Gather data

This commit is contained in:
mike
2026-01-08 02:02:08 +01:00
parent 14b33022d2
commit 17880de6d5
3 changed files with 243 additions and 244 deletions

View File

@@ -176,7 +176,7 @@ public final class ExportFormat {
public record WordOut(Lemma lemma, int startRow, int startCol, String direction, int arrowRow, int arrowCol, boolean isReversed, int complex) { public record WordOut(Lemma lemma, int startRow, int startCol, String direction, int arrowRow, int arrowCol, boolean isReversed, int complex) {
public String word() { return lemma().word(); } public String word() { return new String(lemma().word()); }
public ArrayList<String> clue() { return lemma.clue(); } public ArrayList<String> clue() { return lemma.clue(); }
} }

View File

@@ -90,7 +90,8 @@ public record SwedishGenerator(int[] buff) {
IntList[] intListBuffer, IntList[] intListBuffer,
long[] undoBuffer) { long[] undoBuffer) {
public Context() { this(new int[256], new int[256], new int[256], new int[256], new BitSet(256), new char[32], new IntList[32], new long[2048]); } public Context() { this(new int[256], new int[256], new int[256], new int[256], new BitSet(256), new char[32], new IntList[32], new long[2048]); }
void setPatter(char[] chars) { System.arraycopy(chars, 0, this.pattern, 0, chars.length); }
} }
static final class Rng { static final class Rng {
@@ -120,8 +121,8 @@ public record SwedishGenerator(int[] buff) {
record Grid(byte[] g) { record Grid(byte[] g) {
Grid deepCopyGrid() { return new Grid(g.clone()); } Grid deepCopyGrid() { return new Grid(g.clone()); }
private int offset(int r, int c) { return r * W + c; } private int offset(int r, int c) { return r | (c << 3); }
boolean isLettercell(int r, int c) { return !isDigitAt(r, c); } boolean isLettercell(int r, int c) { return (g[offset(r, c)] & 48) != 48; }
char getCharAt(int r, int c) { return (char) (g[offset(r, c)]); } char getCharAt(int r, int c) { return (char) (g[offset(r, c)]); }
int digitAt(int r, int c) { return g[offset(r, c)] - 48; } int digitAt(int r, int c) { return g[offset(r, c)] - 48; }
byte byteAt(int r, int c) { return g[offset(r, c)]; } byte byteAt(int r, int c) { return g[offset(r, c)]; }
@@ -176,15 +177,15 @@ public record SwedishGenerator(int[] buff) {
} }
} }
static record Lemma(int index, String word, int length, int simpel, ArrayList<String> clue) { static record Lemma(int index, char[] word, int simpel, ArrayList<String> clue) {
static int LEMMA_COUNTER = 0; static int LEMMA_COUNTER = 0;
public Lemma(int index, String word, int simpel, String clu) { public Lemma(int index, String word, int simpel, String clu) {
this(index, word, word.length(), simpel, new ArrayList<String>(10)); this(index, word.toCharArray(), simpel, new ArrayList<String>(10));
clue.add(clu); clue.add(clu);
} }
public Lemma(String word, int simpel, String clue) { this(LEMMA_COUNTER++, word, simpel, clue); } public Lemma(String word, int simpel, String clue) { this(LEMMA_COUNTER++, word, simpel, clue); }
char charAt(int idx) { return word.charAt(idx); } char charAt(int idx) { return word[idx]; }
@Override public int hashCode() { return index; } @Override public int hashCode() { return index; }
@Override public boolean equals(Object o) { return (o == this) || (o instanceof Lemma l && l.index == index); } @Override public boolean equals(Object o) { return (o == this) || (o instanceof Lemma l && l.index == index); }
} }
@@ -197,12 +198,12 @@ public record SwedishGenerator(int[] buff) {
Lemma[] lemmas = wordz.clone(); Lemma[] lemmas = wordz.clone();
Arrays.sort(lemmas, Comparator.comparingInt(wd -> wd.simpel)); Arrays.sort(lemmas, Comparator.comparingInt(wd -> wd.simpel));
var lenCounts = new int[12]; var lenCounts = new int[MAX_WORD_LENGTH+1];
var index = new DictEntry[12]; var index = new DictEntry[MAX_WORD_LENGTH+1];
Arrays.setAll(index, i -> new DictEntry(i)); Arrays.setAll(index, i -> new DictEntry(i));
int maxLength = -1; int maxLength = -1;
for (var lemma : lemmas) { for (var lemma : lemmas) {
var L = lemma.length(); var L = lemma.word.length;
if (L > maxLength) maxLength = L; if (L > maxLength) maxLength = L;
lenCounts[L]++; lenCounts[L]++;
@@ -282,10 +283,10 @@ public record SwedishGenerator(int[] buff) {
return Arrays.copyOf(buff, k); return Arrays.copyOf(buff, k);
} }
CandidateInfo candidateInfoForPattern(DictEntry entry, char[] pattern, int len) { CandidateInfo candidateInfoForPattern(Context ctx, DictEntry entry, int len) {
var ctx = CTX.get(); char[] pattern = ctx.pattern;
var listBuffer = ctx.intListBuffer; var listBuffer = ctx.intListBuffer;
int listCount = 0; int listCount = 0;
for (var i = 0; i < len; i++) { for (var i = 0; i < len; i++) {
var ch = pattern[i]; var ch = pattern[i];
if (isLetter(ch)) { if (isLetter(ch)) {
@@ -362,8 +363,7 @@ public record SwedishGenerator(int[] buff) {
long packedCs = 0; long packedCs = 0;
var n = 0; var n = 0;
while (rr >= 0 && rr < H && cc >= 0 && cc < W && n < MAX_WORD_LENGTH) { while (rr >= 0 && rr < H && cc >= 0 && cc < W && grid.isLettercell(rr, cc) && n < MAX_WORD_LENGTH) {
if (grid.isDigitAt(rr, cc)) break;
packedRs |= (long) rr << (n << 2); packedRs |= (long) rr << (n << 2);
packedCs |= (long) cc << (n << 2); packedCs |= (long) cc << (n << 2);
n++; n++;
@@ -772,7 +772,7 @@ public record SwedishGenerator(int[] buff) {
} }
var patLen = patternForSlot(grid, s, ctx.pattern); var patLen = patternForSlot(grid, s, ctx.pattern);
var info = candidateInfoForPattern(entry, ctx.pattern, patLen); var info = candidateInfoForPattern(ctx, entry, patLen);
if (info.count == 0) { if (info.count == 0) {
return new Pick(null, null, false); return new Pick(null, null, false);

View File

@@ -1,5 +1,6 @@
package puzzle; package puzzle;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Test;
import puzzle.SwedishGenerator.*; import puzzle.SwedishGenerator.*;
import java.util.ArrayList; import java.util.ArrayList;
@@ -11,230 +12,228 @@ import static org.junit.jupiter.api.Assertions.*;
public class SwedishGeneratorTest { public class SwedishGeneratorTest {
@Test @Test
void testRng() { void testRng() {
Rng rng = new Rng(123); var rng = new Rng(123);
int val1 = rng.nextU32(); var val1 = rng.nextU32();
int val2 = rng.nextU32(); var val2 = rng.nextU32();
assertNotEquals(val1, val2); assertNotEquals(val1, val2);
Rng rng2 = new Rng(123); var rng2 = new Rng(123);
assertEquals(val1, rng2.nextU32()); assertEquals(val1, rng2.nextU32());
for (int i = 0; i < 100; i++) { for (var i = 0; i < 100; i++) {
int r = rng.randint(5, 10); var r = rng.randint(5, 10);
assertTrue(r >= 5 && r <= 10); assertTrue(r >= 5 && r <= 10);
double f = rng.nextFloat(); var f = rng.nextFloat();
assertTrue(f >= 0.0 && f <= 1.0); assertTrue(f >= 0.0 && f <= 1.0);
} }
} }
@Test @Test
void testGrid() { void testGrid() {
Grid grid = SwedishGenerator.makeEmptyGrid(); var grid = SwedishGenerator.makeEmptyGrid();
grid.setCharAt(0, 0, 'A'); grid.setCharAt(0, 0, 'A');
grid.setCharAt(0, 1, '1'); grid.setCharAt(0, 1, '1');
assertEquals('A', grid.getCharAt(0, 0)); assertEquals('A', grid.getCharAt(0, 0));
assertEquals(1, grid.digitAt(0, 1)); assertEquals(1, grid.digitAt(0, 1));
assertTrue(grid.isLetterAt(0, 0)); assertTrue(grid.isLetterAt(0, 0));
assertFalse(grid.isDigitAt(0, 0)); assertFalse(grid.isDigitAt(0, 0));
assertTrue(grid.isDigitAt(0, 1)); assertTrue(grid.isDigitAt(0, 1));
assertFalse(grid.isLetterAt(0, 1)); assertFalse(grid.isLetterAt(0, 1));
assertTrue(grid.isLettercell(0, 0)); assertTrue(grid.isLettercell(0, 0));
assertFalse(grid.isLettercell(0, 1)); assertFalse(grid.isLettercell(0, 1));
Grid copy = grid.deepCopyGrid(); var copy = grid.deepCopyGrid();
assertEquals('A', copy.getCharAt(0, 0)); assertEquals('A', copy.getCharAt(0, 0));
copy.setCharAt(0, 0, 'B'); copy.setCharAt(0, 0, 'B');
assertEquals('B', copy.getCharAt(0, 0)); assertEquals('B', copy.getCharAt(0, 0));
assertEquals('A', grid.getCharAt(0, 0)); assertEquals('A', grid.getCharAt(0, 0));
} }
@Test @Test
void testIntList() { void testIntList() {
IntList list = new IntList(); var list = new IntList();
assertEquals(0, list.size()); assertEquals(0, list.size());
for (int i = 0; i < 10; i++) { for (var i = 0; i < 10; i++) {
list.add(i); list.add(i);
} }
assertEquals(10, list.size()); assertEquals(10, list.size());
assertEquals(0, list.data()[0]); assertEquals(0, list.data()[0]);
assertEquals(9, list.data()[9]); assertEquals(9, list.data()[9]);
} }
@Test @Test
void testLemmaAndDict() { void testLemmaAndDict() {
Lemma l1 = new Lemma("APPLE", 5, "A fruit"); var l1 = new Lemma("APPLE", 5, "A fruit");
assertEquals("APPLE", l1.word()); Assertions.assertArrayEquals("APPLE".toCharArray(), l1.word());
assertEquals(5, l1.length()); assertEquals(5, l1.word().length);
assertEquals(5, l1.simpel()); assertEquals(5, l1.simpel());
assertEquals('A', l1.charAt(0)); assertEquals('A', l1.charAt(0));
Lemma l2 = new Lemma("AXE", 2, "A tool"); var l2 = new Lemma("AXE", 2, "A tool");
Dict dict = new Dict(new Lemma[]{l1, l2}); var dict = new Dict(new Lemma[]{ l1, l2 });
assertEquals(1, dict.lenCounts()[3]); assertEquals(1, dict.lenCounts()[3]);
assertEquals(1, dict.lenCounts()[5]); assertEquals(1, dict.lenCounts()[5]);
DictEntry entry3 = dict.index()[3]; var entry3 = dict.index()[3];
assertEquals(1, entry3.words().size()); assertEquals(1, entry3.words().size());
assertEquals("AXE", entry3.words().get(0).word()); Assertions.assertArrayEquals("AXE".toCharArray(), entry3.words().getFirst().word());
// Check pos indexing // Check pos indexing
// AXE: A at 0, X at 1, E at 2 // AXE: A at 0, X at 1, E at 2
assertTrue(entry3.pos()[0]['A' - 'A'].size() > 0); assertTrue(entry3.pos()[0]['A' - 'A'].size() > 0);
assertTrue(entry3.pos()[1]['X' - 'A'].size() > 0); assertTrue(entry3.pos()[1]['X' - 'A'].size() > 0);
assertTrue(entry3.pos()[2]['E' - 'A'].size() > 0); assertTrue(entry3.pos()[2]['E' - 'A'].size() > 0);
} }
@Test @Test
void testSlot() { void testSlot() {
// key = (r << 8) | (c << 4) | d // key = (r << 8) | (c << 4) | d
int key = (2 << 8) | (3 << 4) | 5; var key = (2 << 8) | (3 << 4) | 5;
long rs = 0; long rs = 0;
long cs = 0; long cs = 0;
// rs: [2, 3, 4] -> packed 4-bit: 2 | (3<<4) | (4<<8) // rs: [2, 3, 4] -> packed 4-bit: 2 | (3<<4) | (4<<8)
rs |= 2L; rs |= 2L;
rs |= 3L << 4; rs |= 3L << 4;
rs |= 4L << 8; rs |= 4L << 8;
// cs: [5, 5, 5] // cs: [5, 5, 5]
cs |= 5L; cs |= 5L;
cs |= 5L << 4; cs |= 5L << 4;
cs |= 5L << 8; cs |= 5L << 8;
Slot s = new Slot(key, rs, cs, 3); var s = new Slot(key, rs, cs, 3);
assertEquals(2, s.clueR()); assertEquals(2, s.clueR());
assertEquals(3, s.clueC()); assertEquals(3, s.clueC());
assertEquals(5, s.dir()); assertEquals(5, s.dir());
assertFalse(s.horiz()); assertFalse(s.horiz());
assertEquals(2, s.r(0)); assertEquals(2, s.r(0));
assertEquals(3, s.r(1)); assertEquals(3, s.r(1));
assertEquals(4, s.r(2)); assertEquals(4, s.r(2));
assertEquals(5, s.c(0)); assertEquals(5, s.c(0));
assertEquals(5, s.c(1)); assertEquals(5, s.c(1));
assertEquals(5, s.c(2)); assertEquals(5, s.c(2));
assertTrue(Slot.horiz(2)); // right assertTrue(Slot.horiz(2)); // right
assertFalse(Slot.horiz(3)); // down assertFalse(Slot.horiz(3)); // down
} }
@Test @Test
void testIntersectSorted() { void testIntersectSorted() {
int[] buff = new int[10]; var buff = new int[10];
int[] a = {1, 3, 5, 7, 9}; var a = new int[]{ 1, 3, 5, 7, 9 };
int[] b = {2, 3, 6, 7, 10}; var b = new int[]{ 2, 3, 6, 7, 10 };
int[] res = SwedishGenerator.intersectSorted(buff, a, a.length, b, b.length); var res = SwedishGenerator.intersectSorted(buff, a, a.length, b, b.length);
assertArrayEquals(new int[]{3, 7}, res); assertArrayEquals(new int[]{ 3, 7 }, res);
int[] c = {1, 2, 3}; var c = new int[]{ 1, 2, 3 };
int[] d = {4, 5, 6}; var d = new int[]{ 4, 5, 6 };
res = SwedishGenerator.intersectSorted(buff, c, c.length, d, d.length); res = SwedishGenerator.intersectSorted(buff, c, c.length, d, d.length);
assertEquals(0, res.length); assertEquals(0, res.length);
} }
@Test @Test
void testCandidateInfoForPattern() { void testCandidateInfoForPattern() {
Lemma l1 = new Lemma("APPLE", 1, "fruit"); var l1 = new Lemma("APPLE", 1, "fruit");
Lemma l2 = new Lemma("APPLY", 1, "verb"); var l2 = new Lemma("APPLY", 1, "verb");
Lemma l3 = new Lemma("BANAN", 1, "fruit"); var l3 = new Lemma("BANAN", 1, "fruit");
Dict dict = new Dict(new Lemma[]{l1, l2, l3}); var dict = new Dict(new Lemma[]{ l1, l2, l3 });
SwedishGenerator gen = new SwedishGenerator(); var gen = new SwedishGenerator();
// Pattern "APP--" for length 5 // Pattern "APP--" for length 5
char[] pattern = {'A', 'P', 'P', SwedishGenerator.C_DASH, SwedishGenerator.C_DASH}; var context = new Context();
CandidateInfo info = gen.candidateInfoForPattern(dict.index()[5], pattern, 5); context.setPatter(new char[]{ 'A', 'P', 'P', SwedishGenerator.C_DASH, SwedishGenerator.C_DASH });
var info = gen.candidateInfoForPattern(context, dict.index()[5], 5);
assertEquals(2, info.count()); assertEquals(2, info.count());
assertNotNull(info.indices()); assertNotNull(info.indices());
// Indices in entry.words are based on sorted order of lemmas by 'simpel' }
// l1, l2, l3 all have simpel=1, so order might be original or depends on sort stability.
// Dict sorts by simpel.
}
@Test @Test
void testForEachSlotAndExtractSlots() { void testForEachSlotAndExtractSlots() {
SwedishGenerator gen = new SwedishGenerator(); var gen = new SwedishGenerator();
Grid grid = SwedishGenerator.makeEmptyGrid(); var grid = SwedishGenerator.makeEmptyGrid();
// 3x3 grid (Config.PUZZLE_ROWS/COLS are 3 in test env) // 3x3 grid (Config.PUZZLE_ROWS/COLS are 3 in test env)
// Set '2' (right) at 0,0 // Set '2' (right) at 0,0
grid.setCharAt(0, 0, '2'); grid.setCharAt(0, 0, '2');
// This should detect a slot starting at 0,1 with length 2 (0,1 and 0,2) // This should detect a slot starting at 0,1 with length 2 (0,1 and 0,2)
ArrayList<Slot> slots = gen.extractSlots(grid); var slots = gen.extractSlots(grid);
// Depending on MAX_WORD_LENGTH and grid size. // Depending on MAX_WORD_LENGTH and grid size.
// In 3x3, if we have '2' at 0,0, rr=0, cc=1. // In 3x3, if we have '2' at 0,0, rr=0, cc=1.
// while loop: // while loop:
// 1. rr=0, cc=1, n=0 -> packedRs |= 0, packedCs |= 1, n=1, rr=0, cc=2 // 1. rr=0, cc=1, n=0 -> packedRs |= 0, packedCs |= 1, n=1, rr=0, cc=2
// 2. rr=0, cc=2, n=1 -> packedRs |= 0, packedCs |= 2<<4, n=2, rr=0, cc=3 (out) // 2. rr=0, cc=2, n=1 -> packedRs |= 0, packedCs |= 2<<4, n=2, rr=0, cc=3 (out)
// result: Slot with len 2. // result: Slot with len 2.
assertEquals(1, slots.size()); assertEquals(1, slots.size());
Slot s = slots.get(0); var s = slots.getFirst();
// MAX_WORD_LENGTH = Math.min(W, H). In tests with -DPUZZLE_ROWS=3 -DPUZZLE_COLS=3, it should be 3. // MAX_WORD_LENGTH = Math.min(W, H). In tests with -DPUZZLE_ROWS=3 -DPUZZLE_COLS=3, it should be 3.
// However, the test run might be using default Config values if not properly overridden in the test environment. // However, the test run might be using default Config values if not properly overridden in the test environment.
// If Actual was 8, it means MAX_WORD_LENGTH was at least 8. // If Actual was 8, it means MAX_WORD_LENGTH was at least 8.
assertTrue(s.len() >= 2); assertTrue(s.len() >= 2);
assertEquals(0, s.clueR()); assertEquals(0, s.clueR());
assertEquals(0, s.clueC()); assertEquals(0, s.clueC());
assertEquals(2, s.dir()); assertEquals(2, s.dir());
} }
@Test @Test
void testMaskFitnessBasic() { void testMaskFitnessBasic() {
SwedishGenerator gen = new SwedishGenerator(); var gen = new SwedishGenerator();
Grid grid = SwedishGenerator.makeEmptyGrid(); var grid = SwedishGenerator.makeEmptyGrid();
int[] lenCounts = new int[12]; var lenCounts = new int[12];
lenCounts[2] = 10; lenCounts[2] = 10;
lenCounts[8] = 10; // In case MAX_WORD_LENGTH is 8 lenCounts[8] = 10; // In case MAX_WORD_LENGTH is 8
// Empty grid should have high penalty (no slots) // Empty grid should have high penalty (no slots)
long f1 = gen.maskFitness(grid, lenCounts); var f1 = gen.maskFitness(grid, lenCounts);
assertTrue(f1 >= 1_000_000_000L); assertTrue(f1 >= 1_000_000_000L);
// Add a slot // Add a slot
grid.setCharAt(0, 0, '2'); grid.setCharAt(0, 0, '2');
long f2 = gen.maskFitness(grid, lenCounts); var f2 = gen.maskFitness(grid, lenCounts);
assertTrue(f2 < f1); assertTrue(f2 < f1);
} }
@Test @Test
void testGeneticAlgorithmComponents() { void testGeneticAlgorithmComponents() {
SwedishGenerator gen = new SwedishGenerator(); var gen = new SwedishGenerator();
Rng rng = new Rng(42); var rng = new Rng(42);
Grid g1 = gen.randomMask(rng); var g1 = gen.randomMask(rng);
assertNotNull(g1); assertNotNull(g1);
Grid g2 = gen.mutate(rng, g1); var g2 = gen.mutate(rng, g1);
assertNotNull(g2); assertNotNull(g2);
assertNotSame(g1, g2); assertNotSame(g1, g2);
Grid g3 = gen.crossover(rng, g1, g2); var g3 = gen.crossover(rng, g1, g2);
assertNotNull(g3); assertNotNull(g3);
int[] lenCounts = new int[12]; var lenCounts = new int[12];
Arrays.fill(lenCounts, 10); Arrays.fill(lenCounts, 10);
Grid g4 = gen.hillclimb(rng, g1, lenCounts, 10); var g4 = gen.hillclimb(rng, g1, lenCounts, 10);
assertNotNull(g4); assertNotNull(g4);
} }
@Test @Test
void testBacktrackingHelpers() { void testBacktrackingHelpers() {
Grid grid = SwedishGenerator.makeEmptyGrid(); var grid = SwedishGenerator.makeEmptyGrid();
// Slot at 0,1 length 2 // Slot at 0,1 length 2
Slot s = new Slot((0<<8)|(1<<4)|2, 0L, (1L | (2L<<4)), 2); var s = new Slot((0 << 8) | (1 << 4) | 2, 0L, (1L | (2L << 4)), 2);
Lemma w = new Lemma("AZ", 1, "A to Z"); var w = new Lemma("AZ", 1, "A to Z");
long[] undoBuffer = new long[10]; var undoBuffer = new long[10];
int placed = SwedishGenerator.placeWord(grid, s, w, undoBuffer, 0); var placed = SwedishGenerator.placeWord(grid, s, w, undoBuffer, 0);
assertEquals(2, placed); assertEquals(2, placed);
assertEquals('A', grid.getCharAt(0, 1)); assertEquals('A', grid.getCharAt(0, 1));
assertEquals('Z', grid.getCharAt(0, 2)); assertEquals('Z', grid.getCharAt(0, 2));
SwedishGenerator.undoPlace(grid, undoBuffer, 0, placed); SwedishGenerator.undoPlace(grid, undoBuffer, 0, placed);
assertEquals(SwedishGenerator.C_DASH, grid.getCharAt(0, 1)); assertEquals(SwedishGenerator.C_DASH, grid.getCharAt(0, 1));
assertEquals(SwedishGenerator.C_DASH, grid.getCharAt(0, 2)); assertEquals(SwedishGenerator.C_DASH, grid.getCharAt(0, 2));
} }
} }