Gather data

This commit is contained in:
mike
2026-01-08 02:02:08 +01:00
parent 14b33022d2
commit 17880de6d5
3 changed files with 243 additions and 244 deletions

View File

@@ -176,7 +176,7 @@ public final class ExportFormat {
public record WordOut(Lemma lemma, int startRow, int startCol, String direction, int arrowRow, int arrowCol, boolean isReversed, int complex) {
public String word() { return lemma().word(); }
public String word() { return new String(lemma().word()); }
public ArrayList<String> clue() { return lemma.clue(); }
}

View File

@@ -90,7 +90,8 @@ public record SwedishGenerator(int[] buff) {
IntList[] intListBuffer,
long[] undoBuffer) {
public Context() { this(new int[256], new int[256], new int[256], new int[256], new BitSet(256), new char[32], new IntList[32], new long[2048]); }
public Context() { this(new int[256], new int[256], new int[256], new int[256], new BitSet(256), new char[32], new IntList[32], new long[2048]); }
void setPatter(char[] chars) { System.arraycopy(chars, 0, this.pattern, 0, chars.length); }
}
static final class Rng {
@@ -120,8 +121,8 @@ public record SwedishGenerator(int[] buff) {
record Grid(byte[] g) {
Grid deepCopyGrid() { return new Grid(g.clone()); }
private int offset(int r, int c) { return r * W + c; }
boolean isLettercell(int r, int c) { return !isDigitAt(r, c); }
private int offset(int r, int c) { return r | (c << 3); }
boolean isLettercell(int r, int c) { return (g[offset(r, c)] & 48) != 48; }
char getCharAt(int r, int c) { return (char) (g[offset(r, c)]); }
int digitAt(int r, int c) { return g[offset(r, c)] - 48; }
byte byteAt(int r, int c) { return g[offset(r, c)]; }
@@ -176,15 +177,15 @@ public record SwedishGenerator(int[] buff) {
}
}
static record Lemma(int index, String word, int length, int simpel, ArrayList<String> clue) {
static record Lemma(int index, char[] word, int simpel, ArrayList<String> clue) {
static int LEMMA_COUNTER = 0;
public Lemma(int index, String word, int simpel, String clu) {
this(index, word, word.length(), simpel, new ArrayList<String>(10));
this(index, word.toCharArray(), simpel, new ArrayList<String>(10));
clue.add(clu);
}
public Lemma(String word, int simpel, String clue) { this(LEMMA_COUNTER++, word, simpel, clue); }
char charAt(int idx) { return word.charAt(idx); }
char charAt(int idx) { return word[idx]; }
@Override public int hashCode() { return index; }
@Override public boolean equals(Object o) { return (o == this) || (o instanceof Lemma l && l.index == index); }
}
@@ -197,12 +198,12 @@ public record SwedishGenerator(int[] buff) {
Lemma[] lemmas = wordz.clone();
Arrays.sort(lemmas, Comparator.comparingInt(wd -> wd.simpel));
var lenCounts = new int[12];
var index = new DictEntry[12];
var lenCounts = new int[MAX_WORD_LENGTH+1];
var index = new DictEntry[MAX_WORD_LENGTH+1];
Arrays.setAll(index, i -> new DictEntry(i));
int maxLength = -1;
for (var lemma : lemmas) {
var L = lemma.length();
var L = lemma.word.length;
if (L > maxLength) maxLength = L;
lenCounts[L]++;
@@ -282,10 +283,10 @@ public record SwedishGenerator(int[] buff) {
return Arrays.copyOf(buff, k);
}
CandidateInfo candidateInfoForPattern(DictEntry entry, char[] pattern, int len) {
var ctx = CTX.get();
var listBuffer = ctx.intListBuffer;
int listCount = 0;
CandidateInfo candidateInfoForPattern(Context ctx, DictEntry entry, int len) {
char[] pattern = ctx.pattern;
var listBuffer = ctx.intListBuffer;
int listCount = 0;
for (var i = 0; i < len; i++) {
var ch = pattern[i];
if (isLetter(ch)) {
@@ -362,8 +363,7 @@ public record SwedishGenerator(int[] buff) {
long packedCs = 0;
var n = 0;
while (rr >= 0 && rr < H && cc >= 0 && cc < W && n < MAX_WORD_LENGTH) {
if (grid.isDigitAt(rr, cc)) break;
while (rr >= 0 && rr < H && cc >= 0 && cc < W && grid.isLettercell(rr, cc) && n < MAX_WORD_LENGTH) {
packedRs |= (long) rr << (n << 2);
packedCs |= (long) cc << (n << 2);
n++;
@@ -772,7 +772,7 @@ public record SwedishGenerator(int[] buff) {
}
var patLen = patternForSlot(grid, s, ctx.pattern);
var info = candidateInfoForPattern(entry, ctx.pattern, patLen);
var info = candidateInfoForPattern(ctx, entry, patLen);
if (info.count == 0) {
return new Pick(null, null, false);

View File

@@ -1,5 +1,6 @@
package puzzle;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
import puzzle.SwedishGenerator.*;
import java.util.ArrayList;
@@ -10,231 +11,229 @@ import java.util.HashMap;
import static org.junit.jupiter.api.Assertions.*;
public class SwedishGeneratorTest {
@Test
void testRng() {
Rng rng = new Rng(123);
int val1 = rng.nextU32();
int val2 = rng.nextU32();
assertNotEquals(val1, val2);
Rng rng2 = new Rng(123);
assertEquals(val1, rng2.nextU32());
for (int i = 0; i < 100; i++) {
int r = rng.randint(5, 10);
assertTrue(r >= 5 && r <= 10);
double f = rng.nextFloat();
assertTrue(f >= 0.0 && f <= 1.0);
}
}
@Test
void testGrid() {
Grid grid = SwedishGenerator.makeEmptyGrid();
grid.setCharAt(0, 0, 'A');
grid.setCharAt(0, 1, '1');
assertEquals('A', grid.getCharAt(0, 0));
assertEquals(1, grid.digitAt(0, 1));
assertTrue(grid.isLetterAt(0, 0));
assertFalse(grid.isDigitAt(0, 0));
assertTrue(grid.isDigitAt(0, 1));
assertFalse(grid.isLetterAt(0, 1));
assertTrue(grid.isLettercell(0, 0));
assertFalse(grid.isLettercell(0, 1));
Grid copy = grid.deepCopyGrid();
assertEquals('A', copy.getCharAt(0, 0));
copy.setCharAt(0, 0, 'B');
assertEquals('B', copy.getCharAt(0, 0));
assertEquals('A', grid.getCharAt(0, 0));
}
@Test
void testIntList() {
IntList list = new IntList();
assertEquals(0, list.size());
for (int i = 0; i < 10; i++) {
list.add(i);
}
assertEquals(10, list.size());
assertEquals(0, list.data()[0]);
assertEquals(9, list.data()[9]);
}
@Test
void testLemmaAndDict() {
Lemma l1 = new Lemma("APPLE", 5, "A fruit");
assertEquals("APPLE", l1.word());
assertEquals(5, l1.length());
assertEquals(5, l1.simpel());
assertEquals('A', l1.charAt(0));
Lemma l2 = new Lemma("AXE", 2, "A tool");
Dict dict = new Dict(new Lemma[]{l1, l2});
assertEquals(1, dict.lenCounts()[3]);
assertEquals(1, dict.lenCounts()[5]);
DictEntry entry3 = dict.index()[3];
assertEquals(1, entry3.words().size());
assertEquals("AXE", entry3.words().get(0).word());
// Check pos indexing
// AXE: A at 0, X at 1, E at 2
assertTrue(entry3.pos()[0]['A' - 'A'].size() > 0);
assertTrue(entry3.pos()[1]['X' - 'A'].size() > 0);
assertTrue(entry3.pos()[2]['E' - 'A'].size() > 0);
}
@Test
void testSlot() {
// key = (r << 8) | (c << 4) | d
int key = (2 << 8) | (3 << 4) | 5;
long rs = 0;
long cs = 0;
// rs: [2, 3, 4] -> packed 4-bit: 2 | (3<<4) | (4<<8)
rs |= 2L;
rs |= 3L << 4;
rs |= 4L << 8;
// cs: [5, 5, 5]
cs |= 5L;
cs |= 5L << 4;
cs |= 5L << 8;
Slot s = new Slot(key, rs, cs, 3);
assertEquals(2, s.clueR());
assertEquals(3, s.clueC());
assertEquals(5, s.dir());
assertFalse(s.horiz());
assertEquals(2, s.r(0));
assertEquals(3, s.r(1));
assertEquals(4, s.r(2));
assertEquals(5, s.c(0));
assertEquals(5, s.c(1));
assertEquals(5, s.c(2));
assertTrue(Slot.horiz(2)); // right
assertFalse(Slot.horiz(3)); // down
}
@Test
void testIntersectSorted() {
int[] buff = new int[10];
int[] a = {1, 3, 5, 7, 9};
int[] b = {2, 3, 6, 7, 10};
int[] res = SwedishGenerator.intersectSorted(buff, a, a.length, b, b.length);
assertArrayEquals(new int[]{3, 7}, res);
int[] c = {1, 2, 3};
int[] d = {4, 5, 6};
res = SwedishGenerator.intersectSorted(buff, c, c.length, d, d.length);
assertEquals(0, res.length);
}
@Test
void testCandidateInfoForPattern() {
Lemma l1 = new Lemma("APPLE", 1, "fruit");
Lemma l2 = new Lemma("APPLY", 1, "verb");
Lemma l3 = new Lemma("BANAN", 1, "fruit");
Dict dict = new Dict(new Lemma[]{l1, l2, l3});
SwedishGenerator gen = new SwedishGenerator();
// Pattern "APP--" for length 5
char[] pattern = {'A', 'P', 'P', SwedishGenerator.C_DASH, SwedishGenerator.C_DASH};
CandidateInfo info = gen.candidateInfoForPattern(dict.index()[5], pattern, 5);
assertEquals(2, info.count());
assertNotNull(info.indices());
// Indices in entry.words are based on sorted order of lemmas by 'simpel'
// l1, l2, l3 all have simpel=1, so order might be original or depends on sort stability.
// Dict sorts by simpel.
}
@Test
void testForEachSlotAndExtractSlots() {
SwedishGenerator gen = new SwedishGenerator();
Grid grid = SwedishGenerator.makeEmptyGrid();
// 3x3 grid (Config.PUZZLE_ROWS/COLS are 3 in test env)
// Set '2' (right) at 0,0
grid.setCharAt(0, 0, '2');
// This should detect a slot starting at 0,1 with length 2 (0,1 and 0,2)
ArrayList<Slot> slots = gen.extractSlots(grid);
// Depending on MAX_WORD_LENGTH and grid size.
// In 3x3, if we have '2' at 0,0, rr=0, cc=1.
// while loop:
// 1. rr=0, cc=1, n=0 -> packedRs |= 0, packedCs |= 1, n=1, rr=0, cc=2
// 2. rr=0, cc=2, n=1 -> packedRs |= 0, packedCs |= 2<<4, n=2, rr=0, cc=3 (out)
// result: Slot with len 2.
assertEquals(1, slots.size());
Slot s = slots.get(0);
// MAX_WORD_LENGTH = Math.min(W, H). In tests with -DPUZZLE_ROWS=3 -DPUZZLE_COLS=3, it should be 3.
// However, the test run might be using default Config values if not properly overridden in the test environment.
// If Actual was 8, it means MAX_WORD_LENGTH was at least 8.
assertTrue(s.len() >= 2);
assertEquals(0, s.clueR());
assertEquals(0, s.clueC());
assertEquals(2, s.dir());
}
@Test
void testMaskFitnessBasic() {
SwedishGenerator gen = new SwedishGenerator();
Grid grid = SwedishGenerator.makeEmptyGrid();
int[] lenCounts = new int[12];
lenCounts[2] = 10;
lenCounts[8] = 10; // In case MAX_WORD_LENGTH is 8
// Empty grid should have high penalty (no slots)
long f1 = gen.maskFitness(grid, lenCounts);
assertTrue(f1 >= 1_000_000_000L);
// Add a slot
grid.setCharAt(0, 0, '2');
long f2 = gen.maskFitness(grid, lenCounts);
assertTrue(f2 < f1);
}
@Test
void testGeneticAlgorithmComponents() {
SwedishGenerator gen = new SwedishGenerator();
Rng rng = new Rng(42);
Grid g1 = gen.randomMask(rng);
assertNotNull(g1);
Grid g2 = gen.mutate(rng, g1);
assertNotNull(g2);
assertNotSame(g1, g2);
Grid g3 = gen.crossover(rng, g1, g2);
assertNotNull(g3);
int[] lenCounts = new int[12];
Arrays.fill(lenCounts, 10);
Grid g4 = gen.hillclimb(rng, g1, lenCounts, 10);
assertNotNull(g4);
}
@Test
void testBacktrackingHelpers() {
Grid grid = SwedishGenerator.makeEmptyGrid();
// Slot at 0,1 length 2
Slot s = new Slot((0<<8)|(1<<4)|2, 0L, (1L | (2L<<4)), 2);
Lemma w = new Lemma("AZ", 1, "A to Z");
long[] undoBuffer = new long[10];
int placed = SwedishGenerator.placeWord(grid, s, w, undoBuffer, 0);
assertEquals(2, placed);
assertEquals('A', grid.getCharAt(0, 1));
assertEquals('Z', grid.getCharAt(0, 2));
SwedishGenerator.undoPlace(grid, undoBuffer, 0, placed);
assertEquals(SwedishGenerator.C_DASH, grid.getCharAt(0, 1));
assertEquals(SwedishGenerator.C_DASH, grid.getCharAt(0, 2));
}
@Test
void testRng() {
var rng = new Rng(123);
var val1 = rng.nextU32();
var val2 = rng.nextU32();
assertNotEquals(val1, val2);
var rng2 = new Rng(123);
assertEquals(val1, rng2.nextU32());
for (var i = 0; i < 100; i++) {
var r = rng.randint(5, 10);
assertTrue(r >= 5 && r <= 10);
var f = rng.nextFloat();
assertTrue(f >= 0.0 && f <= 1.0);
}
}
@Test
void testGrid() {
var grid = SwedishGenerator.makeEmptyGrid();
grid.setCharAt(0, 0, 'A');
grid.setCharAt(0, 1, '1');
assertEquals('A', grid.getCharAt(0, 0));
assertEquals(1, grid.digitAt(0, 1));
assertTrue(grid.isLetterAt(0, 0));
assertFalse(grid.isDigitAt(0, 0));
assertTrue(grid.isDigitAt(0, 1));
assertFalse(grid.isLetterAt(0, 1));
assertTrue(grid.isLettercell(0, 0));
assertFalse(grid.isLettercell(0, 1));
var copy = grid.deepCopyGrid();
assertEquals('A', copy.getCharAt(0, 0));
copy.setCharAt(0, 0, 'B');
assertEquals('B', copy.getCharAt(0, 0));
assertEquals('A', grid.getCharAt(0, 0));
}
@Test
void testIntList() {
var list = new IntList();
assertEquals(0, list.size());
for (var i = 0; i < 10; i++) {
list.add(i);
}
assertEquals(10, list.size());
assertEquals(0, list.data()[0]);
assertEquals(9, list.data()[9]);
}
@Test
void testLemmaAndDict() {
var l1 = new Lemma("APPLE", 5, "A fruit");
Assertions.assertArrayEquals("APPLE".toCharArray(), l1.word());
assertEquals(5, l1.word().length);
assertEquals(5, l1.simpel());
assertEquals('A', l1.charAt(0));
var l2 = new Lemma("AXE", 2, "A tool");
var dict = new Dict(new Lemma[]{ l1, l2 });
assertEquals(1, dict.lenCounts()[3]);
assertEquals(1, dict.lenCounts()[5]);
var entry3 = dict.index()[3];
assertEquals(1, entry3.words().size());
Assertions.assertArrayEquals("AXE".toCharArray(), entry3.words().getFirst().word());
// Check pos indexing
// AXE: A at 0, X at 1, E at 2
assertTrue(entry3.pos()[0]['A' - 'A'].size() > 0);
assertTrue(entry3.pos()[1]['X' - 'A'].size() > 0);
assertTrue(entry3.pos()[2]['E' - 'A'].size() > 0);
}
@Test
void testSlot() {
// key = (r << 8) | (c << 4) | d
var key = (2 << 8) | (3 << 4) | 5;
long rs = 0;
long cs = 0;
// rs: [2, 3, 4] -> packed 4-bit: 2 | (3<<4) | (4<<8)
rs |= 2L;
rs |= 3L << 4;
rs |= 4L << 8;
// cs: [5, 5, 5]
cs |= 5L;
cs |= 5L << 4;
cs |= 5L << 8;
var s = new Slot(key, rs, cs, 3);
assertEquals(2, s.clueR());
assertEquals(3, s.clueC());
assertEquals(5, s.dir());
assertFalse(s.horiz());
assertEquals(2, s.r(0));
assertEquals(3, s.r(1));
assertEquals(4, s.r(2));
assertEquals(5, s.c(0));
assertEquals(5, s.c(1));
assertEquals(5, s.c(2));
assertTrue(Slot.horiz(2)); // right
assertFalse(Slot.horiz(3)); // down
}
@Test
void testIntersectSorted() {
var buff = new int[10];
var a = new int[]{ 1, 3, 5, 7, 9 };
var b = new int[]{ 2, 3, 6, 7, 10 };
var res = SwedishGenerator.intersectSorted(buff, a, a.length, b, b.length);
assertArrayEquals(new int[]{ 3, 7 }, res);
var c = new int[]{ 1, 2, 3 };
var d = new int[]{ 4, 5, 6 };
res = SwedishGenerator.intersectSorted(buff, c, c.length, d, d.length);
assertEquals(0, res.length);
}
@Test
void testCandidateInfoForPattern() {
var l1 = new Lemma("APPLE", 1, "fruit");
var l2 = new Lemma("APPLY", 1, "verb");
var l3 = new Lemma("BANAN", 1, "fruit");
var dict = new Dict(new Lemma[]{ l1, l2, l3 });
var gen = new SwedishGenerator();
// Pattern "APP--" for length 5
var context = new Context();
context.setPatter(new char[]{ 'A', 'P', 'P', SwedishGenerator.C_DASH, SwedishGenerator.C_DASH });
var info = gen.candidateInfoForPattern(context, dict.index()[5], 5);
assertEquals(2, info.count());
assertNotNull(info.indices());
}
@Test
void testForEachSlotAndExtractSlots() {
var gen = new SwedishGenerator();
var grid = SwedishGenerator.makeEmptyGrid();
// 3x3 grid (Config.PUZZLE_ROWS/COLS are 3 in test env)
// Set '2' (right) at 0,0
grid.setCharAt(0, 0, '2');
// This should detect a slot starting at 0,1 with length 2 (0,1 and 0,2)
var slots = gen.extractSlots(grid);
// Depending on MAX_WORD_LENGTH and grid size.
// In 3x3, if we have '2' at 0,0, rr=0, cc=1.
// while loop:
// 1. rr=0, cc=1, n=0 -> packedRs |= 0, packedCs |= 1, n=1, rr=0, cc=2
// 2. rr=0, cc=2, n=1 -> packedRs |= 0, packedCs |= 2<<4, n=2, rr=0, cc=3 (out)
// result: Slot with len 2.
assertEquals(1, slots.size());
var s = slots.getFirst();
// MAX_WORD_LENGTH = Math.min(W, H). In tests with -DPUZZLE_ROWS=3 -DPUZZLE_COLS=3, it should be 3.
// However, the test run might be using default Config values if not properly overridden in the test environment.
// If Actual was 8, it means MAX_WORD_LENGTH was at least 8.
assertTrue(s.len() >= 2);
assertEquals(0, s.clueR());
assertEquals(0, s.clueC());
assertEquals(2, s.dir());
}
@Test
void testMaskFitnessBasic() {
var gen = new SwedishGenerator();
var grid = SwedishGenerator.makeEmptyGrid();
var lenCounts = new int[12];
lenCounts[2] = 10;
lenCounts[8] = 10; // In case MAX_WORD_LENGTH is 8
// Empty grid should have high penalty (no slots)
var f1 = gen.maskFitness(grid, lenCounts);
assertTrue(f1 >= 1_000_000_000L);
// Add a slot
grid.setCharAt(0, 0, '2');
var f2 = gen.maskFitness(grid, lenCounts);
assertTrue(f2 < f1);
}
@Test
void testGeneticAlgorithmComponents() {
var gen = new SwedishGenerator();
var rng = new Rng(42);
var g1 = gen.randomMask(rng);
assertNotNull(g1);
var g2 = gen.mutate(rng, g1);
assertNotNull(g2);
assertNotSame(g1, g2);
var g3 = gen.crossover(rng, g1, g2);
assertNotNull(g3);
var lenCounts = new int[12];
Arrays.fill(lenCounts, 10);
var g4 = gen.hillclimb(rng, g1, lenCounts, 10);
assertNotNull(g4);
}
@Test
void testBacktrackingHelpers() {
var grid = SwedishGenerator.makeEmptyGrid();
// Slot at 0,1 length 2
var s = new Slot((0 << 8) | (1 << 4) | 2, 0L, (1L | (2L << 4)), 2);
var w = new Lemma("AZ", 1, "A to Z");
var undoBuffer = new long[10];
var placed = SwedishGenerator.placeWord(grid, s, w, undoBuffer, 0);
assertEquals(2, placed);
assertEquals('A', grid.getCharAt(0, 1));
assertEquals('Z', grid.getCharAt(0, 2));
SwedishGenerator.undoPlace(grid, undoBuffer, 0, placed);
assertEquals(SwedishGenerator.C_DASH, grid.getCharAt(0, 1));
assertEquals(SwedishGenerator.C_DASH, grid.getCharAt(0, 2));
}
}