Gather data

This commit is contained in:
mike
2026-01-09 22:42:33 +01:00
parent 183216e753
commit e8a1ab6d19
3 changed files with 37 additions and 28 deletions

View File

@@ -340,7 +340,7 @@ public class Main {
static PuzzleResult attempt(Rng rng, Dict dict, Opts opts) {
TOTAL_ATTEMPTS.incrementAndGet();
var swe = new SwedishGenerator();
var mask = swe.generateMask(rng, dict.lenCounts(), opts.pop, opts.gens);
var mask = swe.generateMask(rng, opts.pop, opts.gens);
var filled = new CSP(rng).fillMask(mask, dict.index(), 200, opts.fillTimeout);
TOTAL_NODES.addAndGet(filled.stats().nodes);

View File

@@ -281,16 +281,14 @@ public record SwedishGenerator() {
public static record Dict(
DictEntry[] index,
int[] lenCounts, int length) {
int length) {
static final Gson GSON = new Gson();
public Dict(Lemma[] wordz) {
var lenCounts = new int[MAX_WORD_LENGTH_PLUS_ONE];
var index = new DictEntry[MAX_WORD_LENGTH_PLUS_ONE];
Arrays.setAll(index, i -> new DictEntry(i));
for (var lemma : wordz) {
var L = lemma.word.length;
lenCounts[L]++;
var entry = index[L];
var idx = entry.words.size();
@@ -302,13 +300,13 @@ public record SwedishGenerator() {
entry.pos[i][letter].add(idx);
}
}
for (var len : lenCounts) {
for (int i = MIN_LEN; i < index.length; i++) {
var len = index[i].words.size();
if (len <= 0) {
System.out.println("No words for length " + len);
//throw new RuntimeException("Invalid word length: " + len);
throw new RuntimeException("No words for length " + i);
}
}
this(index, lenCounts, Arrays.stream(lenCounts).sum());
this(index, Arrays.stream(index).mapToInt(i -> i.words.size()).sum());
}
static Dict loadDict(String wordsPath) {
String raw;
@@ -355,7 +353,6 @@ public record SwedishGenerator() {
return new Dict(map.toArray(Lemma[]::new));
}
public int dictLength() { return Arrays.stream(lenCounts).sum(); }
}
static int intersectSorted(int[] a, int aLen, int[] b, int bLen, int[] out) {
@@ -424,7 +421,7 @@ public record SwedishGenerator() {
return false;
}
long maskFitness(Grid grid, int[] lenCounts) {
long maskFitness(Grid grid) {
final long[] penalty = { 0 };
var clueCount = grid.clueCount();
@@ -460,9 +457,9 @@ public record SwedishGenerator() {
hasSlots[0] = true;
if (n < MIN_LEN) {
penalty[0] += 8000;
} else if (lenCounts[n] <= 0) {
} /*else if (lenCounts[n] <= 0) {
penalty[0] += 12000;
}
}*/
var horiz = Slot.horiz(d) ? covH : covV;
for (var i = 0; i < n; i++) horiz[Slot.offset(packedPos, i)] += 1;
}
@@ -488,9 +485,9 @@ public record SwedishGenerator() {
hasSlots[0] = true;
if (n < MIN_LEN) {
penalty[0] += 8000;
} else if (lenCounts[n] <= 0) {
} /*else if (lenCounts[n] <= 0) {
penalty[0] += 12000;
}
}*/
var horiz = Slot.horiz(d) ? covH : covV;
for (var i = 0; i < n; i++) horiz[Slot.offset(packedPos, i)] += 1;
}
@@ -595,14 +592,14 @@ public record SwedishGenerator() {
return out;
}
Grid hillclimb(Rng rng, Grid start, int[] lenCounts, int limit) {
Grid hillclimb(Rng rng, Grid start, int limit) {
var best = start;
var bestF = maskFitness(best, lenCounts);
var bestF = maskFitness(best);
var fails = 0;
while (fails < limit) {
var cand = mutate(rng, best);
var f = maskFitness(cand, lenCounts);
var f = maskFitness(cand);
if (f < bestF) {
best = cand;
bestF = f;
@@ -614,21 +611,21 @@ public record SwedishGenerator() {
return best;
}
public Grid generateMask(Rng rng, int[] lenCounts, int popSize, int gens) {
public Grid generateMask(Rng rng, int popSize, int gens) {
class GridAndFit {
Grid grid;
Long fite;
GridAndFit(Grid grid) { this.grid = grid; }
long fit() {
if (fite == null) this.fite = maskFitness(grid, lenCounts);
if (fite == null) this.fite = maskFitness(grid);
return this.fite;
}
}
if (Main.VERBOSE) System.out.println("generateMask init pop: " + popSize);
var pop = new ArrayList<GridAndFit>();
for (var i = 0; i < popSize; i++) {
pop.add(new GridAndFit(hillclimb(rng, randomMask(rng), lenCounts, 180)));
pop.add(new GridAndFit(hillclimb(rng, randomMask(rng), 180)));
}
for (var gen = 0; gen < gens; gen++) {
@@ -640,7 +637,7 @@ public record SwedishGenerator() {
var p1 = pop.get(rng.randint(0, pop.size() - 1));
var p2 = pop.get(rng.randint(0, pop.size() - 1));
var child = crossover(rng, p1.grid, p2.grid);
children.add(new GridAndFit(hillclimb(rng, child, lenCounts, 70)));
children.add(new GridAndFit(hillclimb(rng, child, 70)));
}
pop.addAll(children);

View File

@@ -103,6 +103,12 @@ public class SwedishGeneratorTest {
@Test
void testLemmaAndDict() {
var l2a = new Lemma("IN", 1, "BIJ");
var l4a = new Lemma("INER", 1, "BIJER");
var l6a = new Lemma("INEREN", 1, "BIJERE");
var l7a = new Lemma("INERENA", 1, "BIJERE");
var l8a = new Lemma("INERENAE", 1, "BIJERE");
var l1 = new Lemma("APPLE", 5, "A fruit");
Assertions.assertArrayEquals("APPLE".getBytes(StandardCharsets.US_ASCII), l1.word());
assertEquals(5, l1.word().length);
@@ -110,10 +116,10 @@ public class SwedishGeneratorTest {
assertEquals((byte) 'A', l1.byteAt(0));
var l2 = new Lemma("AXE", 2, "A tool");
var dict = new Dict(new Lemma[]{ l1, l2 });
var dict = new Dict(new Lemma[]{ l1, l2, l2a, l4a, l6a, l7a, l8a });
assertEquals(1, dict.lenCounts()[3]);
assertEquals(1, dict.lenCounts()[5]);
assertEquals(1, dict.index()[3].words().size());
assertEquals(1, dict.index()[5].words().size());
var entry3 = dict.index()[3];
assertEquals(1, entry3.words().size());
@@ -173,10 +179,16 @@ public class SwedishGeneratorTest {
@Test
void testCandidateInfoForPattern() {
var l0 = new Lemma("IN", 1, "BIJ");
var l3a = new Lemma("INE", 1, "BIJE");
var l4a = new Lemma("INER", 1, "BIJER");
var l6a = new Lemma("INEREN", 1, "BIJERE");
var l7a = new Lemma("INERENA", 1, "BIJERE");
var l8a = new Lemma("INERENAE", 1, "BIJERE");
var l1 = new Lemma("APPLE", 1, "fruit");
var l2 = new Lemma("APPLY", 1, "verb");
var l3 = new Lemma("BANAN", 1, "fruit");
var dict = new Dict(new Lemma[]{ l1, l2, l3 });
var dict = new Dict(new Lemma[]{ l0, l1, l2, l3, l3a, l4a, l6a, l7a, l8a });
var gen = new SwedishGenerator();
// Pattern "APP--" for length 5
@@ -225,12 +237,12 @@ public class SwedishGeneratorTest {
lenCounts[8] = 10; // In case MAX_WORD_LENGTH is 8
// Empty grid should have high penalty (no slots)
var f1 = gen.maskFitness(grid, lenCounts);
var f1 = gen.maskFitness(grid);
assertTrue(f1 >= 1_000_000_000L);
// Add a slot
grid.setClue(0, SwedishGenerator.OFFSETS[2].dbyte());
var f2 = gen.maskFitness(grid, lenCounts);
var f2 = gen.maskFitness(grid);
assertTrue(f2 < f1);
}
@@ -251,7 +263,7 @@ public class SwedishGeneratorTest {
var lenCounts = new int[12];
Arrays.fill(lenCounts, 10);
var g4 = gen.hillclimb(rng, g1, lenCounts, 10);
var g4 = gen.hillclimb(rng, g1, 10);
assertNotNull(g4);
}