From 183216e753c1cfc7bc2410299b1032c141e0afa9 Mon Sep 17 00:00:00 2001 From: mike Date: Fri, 9 Jan 2026 22:25:15 +0100 Subject: [PATCH] Gather data --- src/main/java/puzzle/Main.java | 8 +- src/main/java/puzzle/SwedishGenerator.java | 110 +++++++++--------- src/test/java/puzzle/ExportFormatTest.java | 4 +- src/test/java/puzzle/MainTest.java | 12 +- .../java/puzzle/SwedishGeneratorTest.java | 14 +-- 5 files changed, 75 insertions(+), 73 deletions(-) diff --git a/src/main/java/puzzle/Main.java b/src/main/java/puzzle/Main.java index a74b23a..39bd059 100644 --- a/src/main/java/puzzle/Main.java +++ b/src/main/java/puzzle/Main.java @@ -16,7 +16,7 @@ import java.util.stream.Collectors; import static puzzle.Export.*; import static puzzle.SwedishGenerator.*; -import static puzzle.SwedishGenerator.loadWords; +import static puzzle.SwedishGenerator.Dict.loadDict; public class Main { @@ -244,11 +244,11 @@ public class Main { PuzzleResult generatePuzzle(Opts opts) { var tLoad0 = System.nanoTime(); - var dict = loadWords(opts.wordsPath); + var dict = loadDict(opts.wordsPath); var tLoad1 = System.nanoTime(); section("Load"); - info(String.format(Locale.ROOT, "words : %,d", dict.dictLength() )); + info(String.format(Locale.ROOT, "words : %,d", dict.length() )); info(String.format(Locale.ROOT, "loadTime : %.3f s", (tLoad1 - tLoad0) / 1e9)); section("Search"); @@ -332,7 +332,7 @@ public class Main { if (TOTAL_SUCCESS.get() > 0) { info(String.format(Locale.ROOT, "avgSimplic : %.2f", TOTAL_SIMPLICITY.get() / 100.0 / TOTAL_SUCCESS.get())); } - info(String.format(Locale.ROOT, "dictWords : %,d", dict.dictLength())); + info(String.format(Locale.ROOT, "dictWords : %,d", dict.length())); return resFinal; } diff --git a/src/main/java/puzzle/SwedishGenerator.java b/src/main/java/puzzle/SwedishGenerator.java index 7fab84f..db97753 100644 --- a/src/main/java/puzzle/SwedishGenerator.java +++ b/src/main/java/puzzle/SwedishGenerator.java @@ -32,6 +32,9 @@ public record SwedishGenerator() { record CandidateInfo(int[] indices, int count) { } + //@formatter:off + @FunctionalInterface interface SlotVisitor { void visit(int key, long packedPos, int len); } + //@formatter:on static final int BAR_LEN = 22; static final int C = Config.PUZZLE_COLS; static final double CROSS_Y = (C - 1) / 2.0; @@ -193,11 +196,10 @@ public record SwedishGenerator() { double nextFloat() { return (nextU32() & 0xFFFFFFFFL) / 4294967295.0; } } - static final byte _48 = 48; - record Grid(byte[] g, long[] bo) { public Grid(byte[] g) { this(g, new long[2]); } + static Grid createEmpty() { return new Grid(new byte[SIZE], new long[2]); } int digitAt(int r, int c) { return g[offset(r, c)] - 48; } int digitAt(int index) { return g[index] - 48; } public static int r(int offset) { return offset & 7; } @@ -245,7 +247,6 @@ public record SwedishGenerator() { for (var hi = bo[1]; hi != 0; hi &= hi - 1) processSlot(this, visitor, 64 + Long.numberOfTrailingZeros(hi)); } } - static Grid makeEmptyGrid() { return new Grid(new byte[SIZE], new long[2]); } static final class IntList { @@ -280,8 +281,9 @@ public record SwedishGenerator() { public static record Dict( DictEntry[] index, - int[] lenCounts) { + int[] lenCounts, int length) { + static final Gson GSON = new Gson(); public Dict(Lemma[] wordz) { var lenCounts = new int[MAX_WORD_LENGTH_PLUS_ONE]; var index = new DictEntry[MAX_WORD_LENGTH_PLUS_ONE]; @@ -300,55 +302,62 @@ public record SwedishGenerator() { entry.pos[i][letter].add(idx); } } - this(index, lenCounts); - } - public int dictLength() { return Arrays.stream(lenCounts).sum(); } - } - - static final Gson GSON = new Gson(); - static Dict loadWords(String wordsPath) { - String raw; - try { - raw = Files.readString(Path.of(wordsPath), StandardCharsets.UTF_8); - } catch (IOException e) { - e.printStackTrace(); - raw = "WOORD,level_1_to_10,hint\nEU,2,hint\nUUR,2,hint\nAUTO,2,hint\nBOOM,2,hint\nHUIS,2,hint\nKAT,2,hint\nZEE,2,hint\nRODE,2,hint\nDRAAD,2,hint\nKENNIS,2,hint\nNETWERK,2,hint\nPAKTE,2,hint\n"; - } - - var map = new ArrayList(); - var first = true; - for (var line : raw.split("\\R")) { - if (line.isBlank()) { - System.err.println("Empty line: " + line); - continue; + for (var len : lenCounts) { + if (len <= 0) { + System.out.println("No words for length " + len); + //throw new RuntimeException("Invalid word length: " + len); + } } - var parts = line.split(",", 4); - var word = parts[0].trim(); - if (first && word.equalsIgnoreCase("WOORD")) { + this(index, lenCounts, Arrays.stream(lenCounts).sum()); + } + static Dict loadDict(String wordsPath) { + String raw; + try { + raw = Files.readString(Path.of(wordsPath), StandardCharsets.UTF_8); + } catch (IOException e) { + e.printStackTrace(); + raw = "WOORD,level_1_to_10,hint\nEU,2,hint\nUUR,2,hint\nAUTO,2,hint\nBOOM,2,hint\nHUIS,2,hint\nKAT,2,hint\nZEE,2,hint\nRODE,2,hint\nDRAAD,2,hint\nKENNIS,2,hint\nNETWERK,2,hint\nPAKTE,2,hint\n"; + } + + var map = new ArrayList(); + var first = true; + for (var line : raw.split("\\R")) { + if (line.isBlank()) { + System.err.println("Empty line: " + line); + continue; + } + var parts = line.split(",", 4); + var word = parts[0].trim(); + if (first && word.equalsIgnoreCase("WOORD")) { + first = false; + continue; + } first = false; - continue; - } - first = false; - var s = word.toUpperCase(Locale.ROOT); - if (s.matches("^[A-Z]{2,8}$")) { + var s = word.toUpperCase(Locale.ROOT); + if (!s.matches("^[A-Z]{2,8}$")) { + System.err.println("Invalid word: " + line); + continue; + } // CSV has level 1-10. llmScores use 10-level. - int score = 10 - Integer.parseInt(parts[1].trim()); + int score = 10 - Integer.parseInt(parts[1].trim()); + if (score < 1) { + if (Main.VERBOSE) System.err.println("Word too complex: " + line); + continue; + } int simpel = Integer.parseInt(parts[2].trim()); var rawClue = parts[3].trim(); if (rawClue.startsWith("\"") && rawClue.endsWith("\"")) { rawClue = rawClue.substring(1, rawClue.length() - 1).replace("\"\"", "\""); } - if (score >= 1) { - map.add(new Lemma(s, simpel, GSON.fromJson(rawClue, String[].class))); - } - } else { - System.err.println("Invalid word: " + line); + map.add(new Lemma(s, simpel, GSON.fromJson(rawClue, String[].class))); } + + return new Dict(map.toArray(Lemma[]::new)); } - - return new Dict(map.toArray(Lemma[]::new)); + public int dictLength() { return Arrays.stream(lenCounts).sum(); } } + static int intersectSorted(int[] a, int aLen, int[] b, int bLen, int[] out) { int i = 0, j = 0, k = 0; while (i < aLen && j < bLen) { @@ -367,6 +376,7 @@ public record SwedishGenerator() { static record Slot(int key, long packedPos) { static Slot from(int key, long packedPos, int len) { return new Slot(key, packedPos | ((long) len << 56)); } + void undoPlace(Grid grid, int mask) { for (int i = 0, len = len(); i < len; i++) if ((mask & (1L << i)) != 0) grid.clear(pos(i)); } public int len() { return (int) (packedPos >>> 56); } public int clueR() { return Grid.r((key >>> 4)); } public int clueIndex() { return key >>> 4; } @@ -377,16 +387,8 @@ public record SwedishGenerator() { public static boolean horiz(int key) { return ((key & 15) & 1) == 0; } public static int offset(long packedPos, int i) { return (int) ((packedPos >> (i * 7)) & 127); } } - static void undoPlace(Grid grid, Slot s, int mask) { - for (int i = 0, len = s.len(); i < len; i++) if ((mask & (1L << i)) != 0) grid.clear(s.pos(i)); - } - @FunctionalInterface - interface SlotVisitor { - - void visit(int key, long packedPos, int len); - } - private static void processSlot(Grid grid, SlotVisitor visitor, int idx) { + private static void processSlot(Grid grid, SwedishGenerator.SlotVisitor visitor, int idx) { var d = grid.digitAt(idx); var nbrs16 = OFFSETS[d]; int r = Grid.r(idx), c = Grid.c(idx), rr = r + nbrs16.r, cc = c + nbrs16.c; @@ -546,7 +548,7 @@ public record SwedishGenerator() { } Grid randomMask(Rng rng) { - var g = makeEmptyGrid(); + var g = Grid.createEmpty(); int placed = 0, guard = 0; while (placed < TARGET_CLUES && guard++ < 4000) { @@ -583,7 +585,7 @@ public record SwedishGenerator() { return g; } Grid crossover(Rng rng, Grid a, Grid b) { - var out = makeEmptyGrid(); + var out = Grid.createEmpty(); var theta = rng.nextFloat() * Math.PI; var nx = Math.cos(theta); var ny = Math.sin(theta); @@ -877,7 +879,7 @@ public record SwedishGenerator() { assigned.remove(k); used.clear(w.index); - undoPlace(grid, s, ctx.undo[depth]); + s.undoPlace(grid, ctx.undo[depth]); } stats.backtracks++; return false; @@ -913,7 +915,7 @@ public record SwedishGenerator() { assigned.remove(k); used.clear(w.index); - undoPlace(grid, s, ctx.undo[depth]); + s.undoPlace(grid, ctx.undo[depth]); } stats.backtracks++; diff --git a/src/test/java/puzzle/ExportFormatTest.java b/src/test/java/puzzle/ExportFormatTest.java index 465ff7e..f3086ca 100644 --- a/src/test/java/puzzle/ExportFormatTest.java +++ b/src/test/java/puzzle/ExportFormatTest.java @@ -17,7 +17,7 @@ public class ExportFormatTest { @Test void testExportFormatFromFilled() { var swe = new SwedishGenerator(); - var grid = SwedishGenerator.makeEmptyGrid(); + var grid = Grid.createEmpty(); // Place a '2' (right) at (0,0) grid.setClue(0, (byte) '2'); @@ -76,7 +76,7 @@ public class ExportFormatTest { @Test void testExportFormatEmpty() { var swe = new SwedishGenerator(); - var grid = SwedishGenerator.makeEmptyGrid(); + var grid = Grid.createEmpty(); var fillResult = new FillResult(true, new Gridded(grid), new HashMap<>(), null); var puzzleResult = new PuzzleResult(swe, null, null, fillResult); diff --git a/src/test/java/puzzle/MainTest.java b/src/test/java/puzzle/MainTest.java index ae0cf47..5fc21cb 100644 --- a/src/test/java/puzzle/MainTest.java +++ b/src/test/java/puzzle/MainTest.java @@ -24,7 +24,7 @@ public class MainTest { @Test void testExtractSlots() { var generator = new SwedishGenerator(); - var grid = makeEmptyGrid(); + var grid = Grid.createEmpty(); // Set up digits on the grid to create slots. // '2' (right) at (0,0) -> slot at (0,1), (0,2) @@ -60,7 +60,7 @@ public class MainTest { @Test void testForEachSlot() { var generator = new SwedishGenerator(); - var grid = makeEmptyGrid(); + var grid = Grid.createEmpty(); grid.setClue(0, (byte) '2'); // right var count = new AtomicInteger(0); @@ -82,7 +82,7 @@ public class MainTest { } @Test public void testGridBasics() { - var grid = makeEmptyGrid(); + var grid = Grid.createEmpty(); // Test set/get grid.setCharAt(0, 0, 'A'); @@ -115,7 +115,7 @@ public class MainTest { @Test public void testGridDeepCopy() { - var grid = makeEmptyGrid(); + var grid = Grid.createEmpty(); grid.setCharAt(0, 0, 'A'); grid.setCharAt(0, 1, 'B'); grid.setCharAt(1, 0, 'C'); @@ -131,7 +131,7 @@ public class MainTest { @Test public void testMini() { - var grid = makeEmptyGrid(); + var grid = Grid.createEmpty(); grid.setCharAt(1, 1, '1'); Assertions.assertTrue(grid.isDigitAt(1, 1)); } @@ -148,7 +148,7 @@ public class MainTest { opts.tries = 1; opts.verbose = false; - var dict = loadWords(opts.wordsPath); + var dict = Dict.loadDict(opts.wordsPath); // Act PuzzleResult res = null; diff --git a/src/test/java/puzzle/SwedishGeneratorTest.java b/src/test/java/puzzle/SwedishGeneratorTest.java index 60ea97f..4856a4d 100644 --- a/src/test/java/puzzle/SwedishGeneratorTest.java +++ b/src/test/java/puzzle/SwedishGeneratorTest.java @@ -69,7 +69,7 @@ public class SwedishGeneratorTest { @Test void testGrid() { - var grid = SwedishGenerator.makeEmptyGrid(); + var grid = Grid.createEmpty(); grid.setCharAt(0, 0, 'A'); grid.setCharAt(0, 1, '1'); @@ -191,7 +191,7 @@ public class SwedishGeneratorTest { @Test void testForEachSlotAndExtractSlots() { var gen = new SwedishGenerator(); - var grid = SwedishGenerator.makeEmptyGrid(); + var grid = Grid.createEmpty(); // 3x3 grid (Config.PUZZLE_ROWS/COLS are 3 in test env) // Set '2' (right) at 0,0 grid.setClue(0, (byte) '2'); @@ -219,7 +219,7 @@ public class SwedishGeneratorTest { @Test void testMaskFitnessBasic() { var gen = new SwedishGenerator(); - var grid = SwedishGenerator.makeEmptyGrid(); + var grid = Grid.createEmpty(); var lenCounts = new int[12]; lenCounts[2] = 10; lenCounts[8] = 10; // In case MAX_WORD_LENGTH is 8 @@ -257,7 +257,7 @@ public class SwedishGeneratorTest { @Test void testPlaceWord() { - var grid = SwedishGenerator.makeEmptyGrid(); + var grid = Grid.createEmpty(); // Slot at (0,0) length 3, horizontal (right) // key = (r << 8) | (c << 4) | d. Here we just need a valid slot for placeWord. // r(i) and c(i) are used by placeWord. @@ -286,7 +286,7 @@ public class SwedishGeneratorTest { assertEquals('C', grid.byteAt(0, 2)); // 4. Partial placement then conflict (rollback) - grid = SwedishGenerator.makeEmptyGrid(); + grid = Grid.createEmpty(); grid.setCharAt(0, 2, 'X'); // Conflict at the end assertFalse(SwedishGenerator.placeWord(grid, s, w1, undoBuffer, 3)); // Verify grid is still empty (except for 'X') @@ -297,7 +297,7 @@ public class SwedishGeneratorTest { @Test void testBacktrackingHelpers() { - var grid = SwedishGenerator.makeEmptyGrid(); + var grid = Grid.createEmpty(); // Slot at 0,1 length 2 var packedPos = ((long) Grid.offset(0, 1)) | (((long) Grid.offset(0, 2)) << 7); var s = Slot.from((0 << 8) | (1 << 4) | 2, packedPos, 2); @@ -310,7 +310,7 @@ public class SwedishGeneratorTest { assertEquals('Z', grid.byteAt(0, 2)); assertEquals(0b11L, undoBuffer[0]); - SwedishGenerator.undoPlace(grid, s, undoBuffer[0]); + s.undoPlace(grid, undoBuffer[0]); assertEquals(SwedishGenerator.DASH, grid.byteAt(0, 1)); assertEquals(SwedishGenerator.DASH, grid.byteAt(0, 2)); }