Gather data

This commit is contained in:
mike
2026-01-09 22:42:33 +01:00
parent 183216e753
commit e8a1ab6d19
3 changed files with 37 additions and 28 deletions

View File

@@ -340,7 +340,7 @@ public class Main {
static PuzzleResult attempt(Rng rng, Dict dict, Opts opts) { static PuzzleResult attempt(Rng rng, Dict dict, Opts opts) {
TOTAL_ATTEMPTS.incrementAndGet(); TOTAL_ATTEMPTS.incrementAndGet();
var swe = new SwedishGenerator(); var swe = new SwedishGenerator();
var mask = swe.generateMask(rng, dict.lenCounts(), opts.pop, opts.gens); var mask = swe.generateMask(rng, opts.pop, opts.gens);
var filled = new CSP(rng).fillMask(mask, dict.index(), 200, opts.fillTimeout); var filled = new CSP(rng).fillMask(mask, dict.index(), 200, opts.fillTimeout);
TOTAL_NODES.addAndGet(filled.stats().nodes); TOTAL_NODES.addAndGet(filled.stats().nodes);

View File

@@ -281,16 +281,14 @@ public record SwedishGenerator() {
public static record Dict( public static record Dict(
DictEntry[] index, DictEntry[] index,
int[] lenCounts, int length) { int length) {
static final Gson GSON = new Gson(); static final Gson GSON = new Gson();
public Dict(Lemma[] wordz) { public Dict(Lemma[] wordz) {
var lenCounts = new int[MAX_WORD_LENGTH_PLUS_ONE];
var index = new DictEntry[MAX_WORD_LENGTH_PLUS_ONE]; var index = new DictEntry[MAX_WORD_LENGTH_PLUS_ONE];
Arrays.setAll(index, i -> new DictEntry(i)); Arrays.setAll(index, i -> new DictEntry(i));
for (var lemma : wordz) { for (var lemma : wordz) {
var L = lemma.word.length; var L = lemma.word.length;
lenCounts[L]++;
var entry = index[L]; var entry = index[L];
var idx = entry.words.size(); var idx = entry.words.size();
@@ -302,13 +300,13 @@ public record SwedishGenerator() {
entry.pos[i][letter].add(idx); entry.pos[i][letter].add(idx);
} }
} }
for (var len : lenCounts) { for (int i = MIN_LEN; i < index.length; i++) {
var len = index[i].words.size();
if (len <= 0) { if (len <= 0) {
System.out.println("No words for length " + len); throw new RuntimeException("No words for length " + i);
//throw new RuntimeException("Invalid word length: " + len);
} }
} }
this(index, lenCounts, Arrays.stream(lenCounts).sum()); this(index, Arrays.stream(index).mapToInt(i -> i.words.size()).sum());
} }
static Dict loadDict(String wordsPath) { static Dict loadDict(String wordsPath) {
String raw; String raw;
@@ -355,7 +353,6 @@ public record SwedishGenerator() {
return new Dict(map.toArray(Lemma[]::new)); return new Dict(map.toArray(Lemma[]::new));
} }
public int dictLength() { return Arrays.stream(lenCounts).sum(); }
} }
static int intersectSorted(int[] a, int aLen, int[] b, int bLen, int[] out) { static int intersectSorted(int[] a, int aLen, int[] b, int bLen, int[] out) {
@@ -424,7 +421,7 @@ public record SwedishGenerator() {
return false; return false;
} }
long maskFitness(Grid grid, int[] lenCounts) { long maskFitness(Grid grid) {
final long[] penalty = { 0 }; final long[] penalty = { 0 };
var clueCount = grid.clueCount(); var clueCount = grid.clueCount();
@@ -460,9 +457,9 @@ public record SwedishGenerator() {
hasSlots[0] = true; hasSlots[0] = true;
if (n < MIN_LEN) { if (n < MIN_LEN) {
penalty[0] += 8000; penalty[0] += 8000;
} else if (lenCounts[n] <= 0) { } /*else if (lenCounts[n] <= 0) {
penalty[0] += 12000; penalty[0] += 12000;
} }*/
var horiz = Slot.horiz(d) ? covH : covV; var horiz = Slot.horiz(d) ? covH : covV;
for (var i = 0; i < n; i++) horiz[Slot.offset(packedPos, i)] += 1; for (var i = 0; i < n; i++) horiz[Slot.offset(packedPos, i)] += 1;
} }
@@ -488,9 +485,9 @@ public record SwedishGenerator() {
hasSlots[0] = true; hasSlots[0] = true;
if (n < MIN_LEN) { if (n < MIN_LEN) {
penalty[0] += 8000; penalty[0] += 8000;
} else if (lenCounts[n] <= 0) { } /*else if (lenCounts[n] <= 0) {
penalty[0] += 12000; penalty[0] += 12000;
} }*/
var horiz = Slot.horiz(d) ? covH : covV; var horiz = Slot.horiz(d) ? covH : covV;
for (var i = 0; i < n; i++) horiz[Slot.offset(packedPos, i)] += 1; for (var i = 0; i < n; i++) horiz[Slot.offset(packedPos, i)] += 1;
} }
@@ -595,14 +592,14 @@ public record SwedishGenerator() {
return out; return out;
} }
Grid hillclimb(Rng rng, Grid start, int[] lenCounts, int limit) { Grid hillclimb(Rng rng, Grid start, int limit) {
var best = start; var best = start;
var bestF = maskFitness(best, lenCounts); var bestF = maskFitness(best);
var fails = 0; var fails = 0;
while (fails < limit) { while (fails < limit) {
var cand = mutate(rng, best); var cand = mutate(rng, best);
var f = maskFitness(cand, lenCounts); var f = maskFitness(cand);
if (f < bestF) { if (f < bestF) {
best = cand; best = cand;
bestF = f; bestF = f;
@@ -614,21 +611,21 @@ public record SwedishGenerator() {
return best; return best;
} }
public Grid generateMask(Rng rng, int[] lenCounts, int popSize, int gens) { public Grid generateMask(Rng rng, int popSize, int gens) {
class GridAndFit { class GridAndFit {
Grid grid; Grid grid;
Long fite; Long fite;
GridAndFit(Grid grid) { this.grid = grid; } GridAndFit(Grid grid) { this.grid = grid; }
long fit() { long fit() {
if (fite == null) this.fite = maskFitness(grid, lenCounts); if (fite == null) this.fite = maskFitness(grid);
return this.fite; return this.fite;
} }
} }
if (Main.VERBOSE) System.out.println("generateMask init pop: " + popSize); if (Main.VERBOSE) System.out.println("generateMask init pop: " + popSize);
var pop = new ArrayList<GridAndFit>(); var pop = new ArrayList<GridAndFit>();
for (var i = 0; i < popSize; i++) { for (var i = 0; i < popSize; i++) {
pop.add(new GridAndFit(hillclimb(rng, randomMask(rng), lenCounts, 180))); pop.add(new GridAndFit(hillclimb(rng, randomMask(rng), 180)));
} }
for (var gen = 0; gen < gens; gen++) { for (var gen = 0; gen < gens; gen++) {
@@ -640,7 +637,7 @@ public record SwedishGenerator() {
var p1 = pop.get(rng.randint(0, pop.size() - 1)); var p1 = pop.get(rng.randint(0, pop.size() - 1));
var p2 = pop.get(rng.randint(0, pop.size() - 1)); var p2 = pop.get(rng.randint(0, pop.size() - 1));
var child = crossover(rng, p1.grid, p2.grid); var child = crossover(rng, p1.grid, p2.grid);
children.add(new GridAndFit(hillclimb(rng, child, lenCounts, 70))); children.add(new GridAndFit(hillclimb(rng, child, 70)));
} }
pop.addAll(children); pop.addAll(children);

View File

@@ -103,6 +103,12 @@ public class SwedishGeneratorTest {
@Test @Test
void testLemmaAndDict() { void testLemmaAndDict() {
var l2a = new Lemma("IN", 1, "BIJ");
var l4a = new Lemma("INER", 1, "BIJER");
var l6a = new Lemma("INEREN", 1, "BIJERE");
var l7a = new Lemma("INERENA", 1, "BIJERE");
var l8a = new Lemma("INERENAE", 1, "BIJERE");
var l1 = new Lemma("APPLE", 5, "A fruit"); var l1 = new Lemma("APPLE", 5, "A fruit");
Assertions.assertArrayEquals("APPLE".getBytes(StandardCharsets.US_ASCII), l1.word()); Assertions.assertArrayEquals("APPLE".getBytes(StandardCharsets.US_ASCII), l1.word());
assertEquals(5, l1.word().length); assertEquals(5, l1.word().length);
@@ -110,10 +116,10 @@ public class SwedishGeneratorTest {
assertEquals((byte) 'A', l1.byteAt(0)); assertEquals((byte) 'A', l1.byteAt(0));
var l2 = new Lemma("AXE", 2, "A tool"); var l2 = new Lemma("AXE", 2, "A tool");
var dict = new Dict(new Lemma[]{ l1, l2 }); var dict = new Dict(new Lemma[]{ l1, l2, l2a, l4a, l6a, l7a, l8a });
assertEquals(1, dict.lenCounts()[3]); assertEquals(1, dict.index()[3].words().size());
assertEquals(1, dict.lenCounts()[5]); assertEquals(1, dict.index()[5].words().size());
var entry3 = dict.index()[3]; var entry3 = dict.index()[3];
assertEquals(1, entry3.words().size()); assertEquals(1, entry3.words().size());
@@ -173,10 +179,16 @@ public class SwedishGeneratorTest {
@Test @Test
void testCandidateInfoForPattern() { void testCandidateInfoForPattern() {
var l0 = new Lemma("IN", 1, "BIJ");
var l3a = new Lemma("INE", 1, "BIJE");
var l4a = new Lemma("INER", 1, "BIJER");
var l6a = new Lemma("INEREN", 1, "BIJERE");
var l7a = new Lemma("INERENA", 1, "BIJERE");
var l8a = new Lemma("INERENAE", 1, "BIJERE");
var l1 = new Lemma("APPLE", 1, "fruit"); var l1 = new Lemma("APPLE", 1, "fruit");
var l2 = new Lemma("APPLY", 1, "verb"); var l2 = new Lemma("APPLY", 1, "verb");
var l3 = new Lemma("BANAN", 1, "fruit"); var l3 = new Lemma("BANAN", 1, "fruit");
var dict = new Dict(new Lemma[]{ l1, l2, l3 }); var dict = new Dict(new Lemma[]{ l0, l1, l2, l3, l3a, l4a, l6a, l7a, l8a });
var gen = new SwedishGenerator(); var gen = new SwedishGenerator();
// Pattern "APP--" for length 5 // Pattern "APP--" for length 5
@@ -225,12 +237,12 @@ public class SwedishGeneratorTest {
lenCounts[8] = 10; // In case MAX_WORD_LENGTH is 8 lenCounts[8] = 10; // In case MAX_WORD_LENGTH is 8
// Empty grid should have high penalty (no slots) // Empty grid should have high penalty (no slots)
var f1 = gen.maskFitness(grid, lenCounts); var f1 = gen.maskFitness(grid);
assertTrue(f1 >= 1_000_000_000L); assertTrue(f1 >= 1_000_000_000L);
// Add a slot // Add a slot
grid.setClue(0, SwedishGenerator.OFFSETS[2].dbyte()); grid.setClue(0, SwedishGenerator.OFFSETS[2].dbyte());
var f2 = gen.maskFitness(grid, lenCounts); var f2 = gen.maskFitness(grid);
assertTrue(f2 < f1); assertTrue(f2 < f1);
} }
@@ -251,7 +263,7 @@ public class SwedishGeneratorTest {
var lenCounts = new int[12]; var lenCounts = new int[12];
Arrays.fill(lenCounts, 10); Arrays.fill(lenCounts, 10);
var g4 = gen.hillclimb(rng, g1, lenCounts, 10); var g4 = gen.hillclimb(rng, g1, 10);
assertNotNull(g4); assertNotNull(g4);
} }