From 9102dcb922b57879d35129926296b315201ab93f Mon Sep 17 00:00:00 2001 From: mike Date: Sat, 17 Jan 2026 13:22:04 +0100 Subject: [PATCH] introduce bitloops --- src/main/java/puzzle/Export.java | 28 +- src/main/java/puzzle/Main.java | 74 +++-- src/main/java/puzzle/Masker.java | 19 +- src/main/java/puzzle/Meta.java | 72 +++++ src/main/java/puzzle/SwedishGenerator.java | 31 +- .../java/puzzle/CsvIndexService.java | 20 +- src/test/java/puzzle/DictCodeGen.java | 193 ++++++++++++ .../java/puzzle/DictJavaGeneratorMulti.java | 279 ++++++++++++++++++ src/test/java/puzzle/ExportFormatTest.java | 16 +- src/test/java/puzzle/MainTest.java | 56 +++- 10 files changed, 706 insertions(+), 82 deletions(-) create mode 100644 src/main/java/puzzle/Meta.java rename src/{main => test}/java/puzzle/CsvIndexService.java (95%) create mode 100644 src/test/java/puzzle/DictCodeGen.java create mode 100644 src/test/java/puzzle/DictJavaGeneratorMulti.java diff --git a/src/main/java/puzzle/Export.java b/src/main/java/puzzle/Export.java index fa0e89a..035b734 100644 --- a/src/main/java/puzzle/Export.java +++ b/src/main/java/puzzle/Export.java @@ -12,10 +12,6 @@ import puzzle.SwedishGenerator.DictEntry; import puzzle.SwedishGenerator.FillResult; import puzzle.SwedishGenerator.Grid; import puzzle.SwedishGenerator.Slotinfo; -import java.io.IOException; -import java.nio.charset.StandardCharsets; -import java.nio.file.Files; -import java.nio.file.Path; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; @@ -197,7 +193,7 @@ public record Export() { } } - record Placed(long lemma, int slotKey, int[] cells) { + record Placed(long lemma, int shardIdx, int slotKey, int[] cells) { static final char[] DIRECTION = { Placed.VERTICAL, Placed.HORIZONTAL, Placed.VERTICAL, Placed.HORIZONTAL }; public static final char HORIZONTAL = 'h'; @@ -214,9 +210,10 @@ public record Export() { public record WordOut(String word, int[] cell, int startRow, int startCol, char direction, int arrowRow, int arrowCol, boolean isReversed, int complex, String[] clue) { - public WordOut(long l, int startRow, int startCol, char d, int arrowRow, int arrowCol, boolean isReversed) { + public WordOut(long l, int shardIdx, int startRow, int startCol, char d, int arrowRow, int arrowCol, boolean isReversed) { + val meta = Meta.readRecord(Meta.shardKey(l), shardIdx); this(Lemma.asWord(l), new int[]{ arrowRow, arrowCol, startRow, startCol }, startRow, startCol, d, arrowRow, arrowCol, isReversed, - CsvIndexService.simpel(Lemma.unpackIndex(l)), CsvIndexService.clues(Lemma.unpackIndex(l))); + meta.simpel(), meta.clues()); } } @@ -230,7 +227,7 @@ public record Export() { for (var n = 1; n < slots.length; n++) { if (slots[n].assign().w != X) { k++; - simpel += CsvIndexService.simpel(Lemma.unpackIndex(slots[n].assign().w)); + simpel += Meta.readRecord(Meta.shardKey(slots[n].assign().w), slots[n].assign().shardIdx).simpel();//.simpel(Lemma.unpackIndex(slots[n].assign().w)); } } simpel = k == 0 ? 0 : simpel / k; @@ -239,7 +236,7 @@ public record Export() { public ExportedPuzzle exportFormatFromFilled(int difficulty, Rewards rewards) { var placed = new ArrayList(); for (var slot : slots) { - placed.add(new Placed(slot.assign().w, slot.key(), Gridded.walk((byte) slot.key(), slot.lo(), slot.hi()).toArray())); + placed.add(new Placed(slot.assign().w, slot.assign().shardIdx, slot.key(), Gridded.walk((byte) slot.key(), slot.lo(), slot.hi()).toArray())); } // If nothing placed: return full grid mapped to letters/# only @@ -285,6 +282,7 @@ public record Export() { int MIN_R = minR, MIN_C = minC; var wordsOut = placed.stream().map(p -> new WordOut( p.lemma, + p.shardIdx, p.startRow() - MIN_R, p.startCol() - MIN_C, p.direction(), @@ -298,19 +296,9 @@ public record Export() { interface Dicts { - static Dict loadDict(String wordsPath) { - try { - var map = new LongArrayList(100_000); - Files.lines(Path.of(wordsPath), StandardCharsets.UTF_8).forEach(line -> CsvIndexService.lineToLemma(line, map::add)); - return makeDict(map.toArray()); - } catch (IOException e) { - e.printStackTrace(); - throw new RuntimeException("Failed to load dictionary from " + wordsPath, e); - } - } static Dict makeDict(long[] wordz) { var index = new DictEntryDTO[SwedishGenerator.MAX_WORD_LENGTH_PLUS_ONE]; - Arrays.setAll(index, i -> new DictEntryDTO(i)); + Arrays.setAll(index, DictEntryDTO::new); for (var lemma : wordz) { var L = Lemma.length(lemma); diff --git a/src/main/java/puzzle/Main.java b/src/main/java/puzzle/Main.java index 010e149..4d1c0bf 100644 --- a/src/main/java/puzzle/Main.java +++ b/src/main/java/puzzle/Main.java @@ -4,6 +4,7 @@ import lombok.AllArgsConstructor; import lombok.Data; import lombok.NoArgsConstructor; import lombok.val; +import puzzle.Masker.Clues; import puzzle.SwedishGenerator.Rng; import java.io.IOException; @@ -15,11 +16,10 @@ import java.time.format.DateTimeFormatter; import java.util.*; import java.util.concurrent.*; import java.util.concurrent.atomic.AtomicLong; +import java.util.stream.Collectors; -import static puzzle.CsvIndexService.SC; import static puzzle.Export.*; import static puzzle.SwedishGenerator.*; -import static puzzle.Export.Dicts.loadDict; public class Main { @@ -44,11 +44,12 @@ public class Main { @NoArgsConstructor public static class Opts { + static int SSIZE = 20; public int seed = (int) (System.nanoTime() ^ System.currentTimeMillis()); - public int clueSize = 20; - public int pop = 40; - public int offspring = 60; - public int gens = 500; + public int clueSize = SSIZE; + public int pop = SSIZE * 2; + public int offspring = SSIZE * 3; + public int gens = 600; public String wordsPath = "nl_score_hints_v3.csv"; public double minSimplicity = 0; // 0 means no limit public int threads = Math.max(1, Runtime.getRuntime().availableProcessors()); @@ -59,16 +60,7 @@ public class Main { } void main(String[] args) { - var csv = Paths.get("nl_score_hints_v3.csv"); - var idx = Paths.get("nl_score_hints_v3.idx"); - try { - val scv = new CsvIndexService(csv, idx); - scv.ensureLoaded(); - ScopedValue.where(SC, scv).run(() -> _main(args)); - } catch (IOException e) { - throw new RuntimeException(e); - } - + _main(args); } public void _main(String[] args) { var opts = parseArgs(args); @@ -274,7 +266,7 @@ public class Main { PuzzleResult generatePuzzle(Opts opts) { var tLoad0 = System.nanoTime(); - var dict = loadDict(opts.wordsPath); + var dict = DictData.DICT;//loadDict(opts.wordsPath); var tLoad1 = System.nanoTime(); section("Load"); @@ -294,10 +286,9 @@ public class Main { try { // Keep at least some tasks in flight - final var service = CsvIndexService.SC.get(); for (int i = 0; i < opts.threads; i++) { final int attemptIdx = ++submitted; - completionService.submit(() -> ScopedValue.where(CsvIndexService.SC, service).call(() -> attempt(new Rng(opts.seed + attemptIdx), dict, opts))); + completionService.submit(() -> attempt(new Rng(opts.seed + attemptIdx), dict, opts)); } while (System.currentTimeMillis() < deadline) { @@ -314,7 +305,7 @@ public class Main { // Submit another task if we still have time if (System.currentTimeMillis() < deadline) { final int attemptIdx = ++submitted; - completionService.submit(() -> ScopedValue.where(CsvIndexService.SC, service).call(() -> attempt(new Rng(opts.seed + attemptIdx), dict, opts))); + completionService.submit(() -> attempt(new Rng(opts.seed + attemptIdx), dict, opts)); } } if (resFinal == null) warn("status : UNSOLVED (timeout)"); @@ -381,11 +372,44 @@ public class Main { return null; } } + static Clues generateClues() { + String simple = "000 3000\n" + + " 3 \n" + + " 31 \n" + + " 3\n" + + "1 \n" + + "1 \n" + + "1 2\n" + + "1 222 3"; + String sampleComplex = "1 0000\n" + + "1 \n" + + "00 01 \n" + + " 1 \n" + + " 1 \n" + + " 2 1 \n" + + " 1 \n" + + "221 22\n"; + String def = " 30000\n" + + "0 001 \n" + + " 1 \n" + + " 3 \n" + + " 3 \n" + + " 32 \n" + + " 32 2\n" + + "2222 3"; + return Clues.parse(sampleComplex + ); + } + static Clues generateNewClues(Rng rng, Opts opts) { + var masker = new Masker(rng, new int[STACK_SIZE], Masker.Clues.createEmpty()); + var mask = masker.generateMask(opts.clueSize, opts.pop, opts.gens, opts.offspring); + return mask; + } static PuzzleResult _attempt(Rng rng, Dict dict, Opts opts) { long t0 = System.currentTimeMillis(); TOTAL_ATTEMPTS.incrementAndGet(); - var masker = new Masker(rng, new int[STACK_SIZE], Masker.Clues.createEmpty()); - var mask = masker.generateMask(opts.clueSize, opts.pop, opts.gens, opts.offspring); + val mask = generateNewClues(rng, opts); + //val mask = generateClues(); if (mask == null) return null; val multiThreaded = Thread.currentThread().getName().contains("pool"); var slots = Masker.extractSlots(mask, dict.index()); @@ -423,7 +447,9 @@ public class Main { "[ATTEMPT] thread=%s | status=%s | nodes=%d | backtracks=%d | nps=%d | simplicity=%s | time=%.1fs%n", name, status, filled.nodes(), filled.backtracks(), nps, simplicity, totalTime ); - + if (!filled.ok()) { + System.out.println(Arrays.stream(new Clued(mask).gridToString().split("\n")).map(s -> "\"" + s + "\\n\" +").collect(Collectors.joining("\n"))); + } if (filled.ok() && (opts.minSimplicity <= 0 || filled.stats().simplicity >= opts.minSimplicity)) { return new PuzzleResult(new Clued(mask), new Gridded(grid), slotInfo, filled); } @@ -441,7 +467,7 @@ public class Main { record JsonExportedPuzzle(String date, String theme, int difficulty, Rewards rewards, String[] grid, WordOut[] words) { } private static String toJson(ExportedPuzzle puzzle, String date, String theme) { - return CsvIndexService.GSON.toJson(new JsonExportedPuzzle(date, theme, puzzle.difficulty(), puzzle.rewards(), puzzle.grid(), puzzle.words())); + return Meta.GSON.toJson(new JsonExportedPuzzle(date, theme, puzzle.difficulty(), puzzle.rewards(), puzzle.grid(), puzzle.words())); } private static String escapeJson(String s) { diff --git a/src/main/java/puzzle/Masker.java b/src/main/java/puzzle/Masker.java index e0a6f61..f90841b 100644 --- a/src/main/java/puzzle/Masker.java +++ b/src/main/java/puzzle/Masker.java @@ -436,6 +436,23 @@ public record Masker(Rng rng, int[] stack, Clues cache) { public long rhi() { return rhi; } public static Clues createEmpty() { return new Clues(0, 0, 0, 0, 0, 0); } + public static Clues parse(String s) { + var c = createEmpty(); + var lines = s.split("\n"); + for (int r = 0; r < Math.min(lines.length, R); r++) { + var line = lines[r]; + for (int col = 0; col < Math.min(line.length(), C); col++) { + char ch = line.charAt(col); + if (ch >= '0' && ch <= '3') { + int idx = Grid.offset(r, col); + byte dir = (byte) (ch - '0'); + if ((idx & 64) == 0) c.setClueLo(1L << idx, dir); + else c.setClueHi(1L << (idx & 63), dir); + } + } + } + return c; + } public boolean cluelessLo(int idx) { if (!isClueLo(idx)) return false; clearClueLo(~(1L << idx)); @@ -506,7 +523,7 @@ public record Masker(Rng rng, int[] stack, Clues cache) { } } - static record Slot(int key, long lo, long hi, DictEntry entry) { + public record Slot(int key, long lo, long hi, DictEntry entry) { static final int BIT_FOR_DIR = 2; static Slot from(int key, long lo, long hi, DictEntry entry) { return new Slot(key, lo, hi, entry); } diff --git a/src/main/java/puzzle/Meta.java b/src/main/java/puzzle/Meta.java new file mode 100644 index 0000000..141f7a8 --- /dev/null +++ b/src/main/java/puzzle/Meta.java @@ -0,0 +1,72 @@ +package puzzle; + +import com.google.gson.Gson; +import lombok.val; +import puzzle.SwedishGenerator.Lemma; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.channels.FileChannel; +import java.nio.charset.StandardCharsets; +import java.nio.file.Path; +import java.nio.file.StandardOpenOption; +import java.util.stream.IntStream; +public class Meta { + + static final Gson GSON = new Gson(); + private static final int VERSION = 1; + + static record ShardLem(long w, int simpel, String[] clues) { } + + static final int SHARD_MAGIC = 0x49445831; // "IDX1" + static ShardLem readRecord(Path shardFile, int i) { + try (FileChannel ch = FileChannel.open(shardFile, StandardOpenOption.READ)) { + ByteBuffer hdr = ByteBuffer.allocate(12); + ch.read(hdr); + hdr.flip(); + int magic = hdr.getInt(); + int ver = hdr.getInt(); + int n = hdr.getInt(); + if (magic != SHARD_MAGIC || ver != VERSION) throw new IOException("Bad shard"); + if (i < 0 || i >= n) throw new IndexOutOfBoundsException(); + + long tableStart = 12L; + long dataStart = 12L + (long) n * 4L; + + int offI = readIntAt(ch, tableStart + (long) i * 4L); + int offIp = (i + 1 < n) ? readIntAt(ch, tableStart + (long) (i + 1) * 4L) + : (int) (ch.size() - dataStart); + + int len = offIp - offI; + ByteBuffer buf = ByteBuffer.allocate(len); + ch.position(dataStart + offI); + ch.read(buf); + buf.flip(); + var string = StandardCharsets.UTF_8.decode(buf).toString(); + val parts = string.split("\t", 3); + return new ShardLem(Lemma.pack(parts[0]), Integer.parseInt(parts[1]), GSON.fromJson(parts[2], String[].class)); + } catch (Exception e) { + e.printStackTrace(); + return new ShardLem(Lemma.pack("XXX"), -1, new String[0]); + } + } + static final Path[] SHARDS = IntStream.range(0, 10).mapToObj(sId -> Path.of("/home/mike/dev/puzzle-generator/src/main/generated-sources/puzzle").resolve(sId + ".idx")).toArray( + Path[]::new); + static Path shardKey(long word) { + int L = Lemma.length(word); + return SHARDS[L]; + } + static String shardKey(String word) { + int L = word.length(); + char ch = word.charAt(0); + if (ch < 'A' || ch > 'Z') ch = '_'; + ///return "" + L + ch; // e.g. "6Z" + return "" + L; // e.g. "6Z" + } + static int readIntAt(FileChannel ch, long pos) throws IOException { + ByteBuffer b = ByteBuffer.allocate(4); + ch.position(pos); + ch.read(b); + b.flip(); + return b.getInt(); + } +} diff --git a/src/main/java/puzzle/SwedishGenerator.java b/src/main/java/puzzle/SwedishGenerator.java index a1c4572..d100ecf 100644 --- a/src/main/java/puzzle/SwedishGenerator.java +++ b/src/main/java/puzzle/SwedishGenerator.java @@ -114,8 +114,14 @@ public class SwedishGenerator { x = y; return y; } - public int randint2bit() { return nextU32() & 3; } - public byte randint2bitByte() { return (byte) (nextU32() & 3); } + public int randint2bit() { return nextU32() & 3; } + public byte randint2bitByte() { + var b = (byte) (nextU32() & 3); + /*if (b == 3) { + return 1; + }*/ + return b; + } public int randint(int max) { return (int) (((nextU32() & 0xFFFFFFFFL) % ((long) max - 0L + 1L))); } public int randint0_SIZE() { return (int) (((nextU32() & 0xFFFFFFFFL) % RANGE_0_SIZE)); } public int randint0_624() { return (int) (((nextU32() & 0xFFFFFFFFL) % RANGE_0_624)); } @@ -166,9 +172,10 @@ public class SwedishGenerator { static class Assign { long w; + int shardIdx; } - static record Slotinfo(int key, long lo, long hi, int score, Assign assign, DictEntry entry) { + public static record Slotinfo(int key, long lo, long hi, int score, Assign assign, DictEntry entry) { public static int wordCount(int k, Slotinfo[] arr) { for (var n = 1; n < arr.length; n++) if (arr[n].assign.w != X) k++; @@ -380,15 +387,17 @@ public class SwedishGenerator { for (var t = 0; t < tries; t++) { var r = rng.nextFloat(); //int idxInArray = rng.biasedIndexPow3(L - 1); - var w = entry.words[idxs[(int) (r * r * r * (L - 1))]]; - var lemIdx = Lemma.unpackIndex(w); + var arrIndex = (int) (r * r * r * (L - 1)); + var w = entry.words[idxs[arrIndex]]; + var lemIdx = Lemma.unpackIndex(w); if (Bit1029.get(used, lemIdx)) continue; low = glo; top = ghi; if (!placeWord(k, slo, shi, w)) continue; Bit1029.set(used, lemIdx); - s.assign.w = w; + s.assign.w = w; + s.assign.shardIdx = arrIndex; if (backtrack(depth + 1)) return true; s.assign.w = X; Bit1029.clear(used, lemIdx); @@ -403,16 +412,18 @@ public class SwedishGenerator { var tries = Math.min(MAX_TRIES_PER_SLOT, N); for (var t = 0; t < tries; t++) { - double r = rng.nextFloat(); - var w = entry.words[(int) (r * r * r * (N - 1))]; - var lemIdx = Lemma.unpackIndex(w); + double r = rng.nextFloat(); + var shardIndx = (int) (r * r * r * (N - 1)); + var w = entry.words[shardIndx]; + var lemIdx = Lemma.unpackIndex(w); if (Bit1029.get(used, lemIdx)) continue; low = glo; top = ghi; if (!placeWord(k, slo, shi, w)) continue; Bit1029.set(used, lemIdx); - s.assign.w = w; + s.assign.w = w; + s.assign.shardIdx = shardIndx; if (backtrack(depth + 1)) return true; s.assign.w = X; Bit1029.clear(used, lemIdx); diff --git a/src/main/java/puzzle/CsvIndexService.java b/src/test/java/puzzle/CsvIndexService.java similarity index 95% rename from src/main/java/puzzle/CsvIndexService.java rename to src/test/java/puzzle/CsvIndexService.java index 854f673..2d83781 100644 --- a/src/main/java/puzzle/CsvIndexService.java +++ b/src/test/java/puzzle/CsvIndexService.java @@ -1,12 +1,18 @@ package puzzle; -import com.google.gson.Gson; import puzzle.SwedishGenerator.Lemma; -import java.io.*; +import java.io.BufferedInputStream; +import java.io.BufferedOutputStream; +import java.io.Closeable; +import java.io.DataInputStream; +import java.io.DataOutputStream; +import java.io.IOException; import java.nio.ByteBuffer; import java.nio.channels.FileChannel; import java.nio.charset.StandardCharsets; -import java.nio.file.*; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.StandardOpenOption; import java.util.Arrays; import java.util.function.LongConsumer; import static java.nio.charset.StandardCharsets.US_ASCII; @@ -15,7 +21,6 @@ public final class CsvIndexService implements Closeable { static final ScopedValue SC = ScopedValue.newInstance(); - static final Gson GSON = new Gson(); private static final int MAGIC = 0x4C494458; // "LIDX" private static final int VERSION = 1; static int SIMPEL_IDX = 3; @@ -41,7 +46,7 @@ public final class CsvIndexService if (rawClue.startsWith("\"") && rawClue.endsWith("\"")) { rawClue = rawClue.substring(1, rawClue.length() - 1).replace("\"\"", "\""); } - return GSON.fromJson(rawClue, String[].class); + return Meta.GSON.fromJson(rawClue, String[].class); } public static void lineToLemma(String line, LongConsumer ok) { if (line.isBlank()) { @@ -50,10 +55,6 @@ public final class CsvIndexService var parts = line.split(",", 5); var id = Integer.parseInt(parts[0].trim()); var word = parts[1].trim(); - /* if (!word.matches("^[A-Z]{2,8}$")) { - throw new RuntimeException("Invalid word:" + line); - }*/ - int score = Integer.parseInt(parts[2].trim()); if (score < 1) { if (Main.VERBOSE) System.err.println("Word too complex: " + line); @@ -249,4 +250,5 @@ public final class CsvIndexService offsets = null; } } + } diff --git a/src/test/java/puzzle/DictCodeGen.java b/src/test/java/puzzle/DictCodeGen.java new file mode 100644 index 0000000..c7ab484 --- /dev/null +++ b/src/test/java/puzzle/DictCodeGen.java @@ -0,0 +1,193 @@ +package puzzle; + +import lombok.val; +import org.junit.jupiter.api.Test; +import puzzle.Export.Dicts; +import java.io.BufferedWriter; +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.StandardOpenOption; + +public final class DictCodeGen { + + + public static void main(String[] args) throws Exception { + + DictJavaGenerator.main(args); // gebruikt jouw makeDict logic + } + /** + * Generates Java source files for dictionary data, split by word length (2..8), + * and further chunked to avoid "code too large" / constant pool issues. + * + * Output: + * - DictDataL2.java .. DictDataL8.java (arrays chunked) + * - DictData.java (aggregator that builds Dict) + * + * Usage: + * java puzzle.codegen.DictJavaGenerator + * + * Example: + * java puzzle.codegen.DictJavaGenerator nl_score_hints_v3.csv src/main/java puzzle + */ + public final class DictJavaGenerator { + + // tune if needed + private static final int WORDS_CHUNK = 8_192>>>5; // longs per chunk + private static final int POS_CHUNK = 8_192>>>5; // longs per chunk + + public static void main(String[] args) throws Exception { + Path wordsFile = Path.of(args.length > 0 ? args[0] : "nl_score_hints_v3.csv"); + Path outDir = Path.of(args.length > 1 ? args[1] : "/home/mike/dev/puzzle-generator/src/main/generated-sources/puzzle"); + String pkg = "puzzle"; + + SwedishGenerator.Dict dict = buildDict(wordsFile); + + Files.createDirectories(outDir); + + // emit L2..L8 + for (int L = 2; L <= 8; L++) { + var entry = dict.index()[L]; + if (entry == null || entry.words() == null || entry.words().length == 0) { + throw new IllegalStateException("No words for length " + L); + } + writeLengthClass(outDir, pkg, "DictDataL" + L, L, entry); + } + + // emit aggregator + writeAggregator(outDir, pkg, "DictData", dict.length()); + + System.out.println("Generated dictionary sources into: " + outDir.toAbsolutePath()); + } + + private static SwedishGenerator.Dict buildDict(Path wordsPath) throws IOException { + var map = new LongArrayList(100_000); + try (var lines = Files.lines(wordsPath, StandardCharsets.UTF_8)) { + lines.forEach(line -> CsvIndexService.lineToLemma(line, map::add)); + } + return Dicts.makeDict(map.toArray()); + } + + private static void writeAggregator(Path outDir, String pkg, String cls, int totalLen) throws IOException { + Path out = outDir.resolve(cls + ".java"); + try (BufferedWriter w = Files.newBufferedWriter(out, StandardCharsets.UTF_8, + StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING, StandardOpenOption.WRITE)) { + + w.write("package " + pkg + ";\n\n"); + w.write("public final class " + cls + " {\n"); + w.write(" private " + cls + "() {}\n\n"); + w.write(" public static final SwedishGenerator.Dict DICT = build();\n\n"); + w.write(" private static SwedishGenerator.Dict build() {\n"); + w.write(" SwedishGenerator.DictEntry[] idx = new SwedishGenerator.DictEntry[SwedishGenerator.MAX_WORD_LENGTH_PLUS_ONE];\n"); + w.write(" idx[2] = DictDataL2.entry();\n"); + w.write(" idx[3] = DictDataL3.entry();\n"); + w.write(" idx[4] = DictDataL4.entry();\n"); + w.write(" idx[5] = DictDataL5.entry();\n"); + w.write(" idx[6] = DictDataL6.entry();\n"); + w.write(" idx[7] = DictDataL7.entry();\n"); + w.write(" idx[8] = DictDataL8.entry();\n"); + w.write(" return new SwedishGenerator.Dict(idx, " + totalLen + ");\n"); + w.write(" }\n"); + w.write("}\n"); + } + } + + private static void writeLengthClass(Path outDir, String pkg, String cls, int L, SwedishGenerator.DictEntry e) throws IOException { + Path out = outDir.resolve(cls + ".java"); + try (BufferedWriter w = Files.newBufferedWriter(out, StandardCharsets.UTF_8, + StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING, StandardOpenOption.WRITE)) { + + w.write("package " + pkg + ";\n\n"); + w.write("public final class " + cls + " {\n"); + w.write(" private " + cls + "() {}\n\n"); + + long[] words = e.words(); + + // flatten posBitsets: [rows][cols] -> flat[] + long[][] bs = e.posBitsets(); + int rows = bs.length; + int cols = bs[0].length; + long[] flat = new long[rows * cols]; + int t = 0; + for (int r = 0; r < rows; r++) { + System.arraycopy(bs[r], 0, flat, t, cols); + t += cols; + } + + w.write(" static final int LEN = " + L + ";\n"); + w.write(" static final int ROWS = " + rows + ";\n"); + w.write(" static final int COLS = " + cols + ";\n"); + w.write(" static final int WORDS_LEN = " + words.length + ";\n"); + w.write(" static final int POS_LEN = " + flat.length + ";\n\n"); + + // chunked arrays + int wordChunks = emitLongArrayChunked(w, "WORDS", words, WORDS_CHUNK); + int posChunks = emitLongArrayChunked(w, "POS", flat, POS_CHUNK); + + // joiners + emitJoiner(w, "WORDS", "WORDS", words.length, wordChunks); + emitJoiner(w, "POS", "POS", flat.length, posChunks); + + // entry builder + w.write(" public static SwedishGenerator.DictEntry entry() {\n"); + w.write(" long[] words = WORDS();\n"); + w.write(" long[] flat = POS();\n"); + w.write(" long[][] pos = reshape(flat, ROWS, COLS);\n"); + w.write(" return new SwedishGenerator.DictEntry(words, pos, words.length, (words.length + 63) >>> 6);\n"); + w.write(" }\n\n"); + + // helpers + w.write(" private static int copy(long[] dst, int at, long[] src) {\n"); + w.write(" System.arraycopy(src, 0, dst, at, src.length);\n"); + w.write(" return at + src.length;\n"); + w.write(" }\n\n"); + + w.write(" private static long[][] reshape(long[] flat, int rows, int cols) {\n"); + w.write(" long[][] out = new long[rows][cols];\n"); + w.write(" int k = 0;\n"); + w.write(" for (int r = 0; r < rows; r++) {\n"); + w.write(" System.arraycopy(flat, k, out[r], 0, cols);\n"); + w.write(" k += cols;\n"); + w.write(" }\n"); + w.write(" return out;\n"); + w.write(" }\n"); + + w.write("}\n"); + } + } + + /** Emits baseName_0..k arrays and returns chunkCount. */ + private static int emitLongArrayChunked(BufferedWriter w, String baseName, long[] data, int chunkSize) throws IOException { + int chunks = (data.length + chunkSize - 1) / chunkSize; + for (int ci = 0; ci < chunks; ci++) { + int from = ci * chunkSize; + int to = Math.min(data.length, from + chunkSize); + + w.write(" static final long[] " + baseName + "_" + ci + " = new long[] {\n"); + for (int i = from; i < to; i++) { + w.write(" " + toLongLiteral(data[i]) + (i + 1 < to ? "," : "") + "\n"); + } + w.write(" };\n\n"); + } + return chunks; + } + + private static void emitJoiner(BufferedWriter w, String funcName, String baseName, int totalLen, int chunks) throws IOException { + w.write(" static long[] " + funcName + "() {\n"); + w.write(" long[] out = new long[" + totalLen + "];\n"); + w.write(" int k = 0;\n"); + for (int ci = 0; ci < chunks; ci++) { + w.write(" k = copy(out, k, " + baseName + "_" + ci + ");\n"); + } + w.write(" return out;\n"); + w.write(" }\n\n"); + } + + private static String toLongLiteral(long v) { + // compact unsigned hex literal + return "0x" + Long.toUnsignedString(v, 16) + "L"; + } + } + +} diff --git a/src/test/java/puzzle/DictJavaGeneratorMulti.java b/src/test/java/puzzle/DictJavaGeneratorMulti.java new file mode 100644 index 0000000..b176bde --- /dev/null +++ b/src/test/java/puzzle/DictJavaGeneratorMulti.java @@ -0,0 +1,279 @@ +package puzzle; + +import org.junit.jupiter.api.Test; +import puzzle.Export.Dicts; + +import java.io.*; +import java.nio.ByteBuffer; +import java.nio.channels.FileChannel; +import java.nio.charset.StandardCharsets; +import java.nio.file.*; +import java.util.Arrays; + +public final class DictJavaGeneratorMulti { + + // Smaller = more files, but safer for javac/class limits. + private static final int WORDS_CHUNK = 8_192; + private static final int POS_CHUNK = 8_192; + @Test + public void dictCodeGen15() { + System.out.println(DictData.DICT); + } + public static void main(String[] args) throws Exception { + Path wordsFile = Path.of(args.length > 0 ? args[0] : "nl_score_hints_v3.csv"); + Path outDir = Path.of(args.length > 1 ? args[1] : "/home/mike/dev/puzzle-generator/src/main/generated-sources/puzzle"); + String pkg = "puzzle"; + + SwedishGenerator.Dict dict = buildDict(wordsFile); + + Files.createDirectories(outDir); + + // Generate L2..L8 + for (int L = 2; L <= 8; L++) { + var entry = dict.index()[L]; + if (entry == null || entry.words() == null || entry.words().length == 0) { + throw new IllegalStateException("No words for length " + L); + } + writeLengthBundle(outDir, pkg, L, entry); + } + + // Aggregator + writeAggregator(outDir, pkg, "DictData", dict.length()); + generateHintShards(wordsFile, outDir); + System.out.println("Generated sources into: " + outDir.toAbsolutePath()); + } + + private static SwedishGenerator.Dict buildDict(Path wordsPath) throws IOException { + var map = new LongArrayList(100_000); + try (var lines = Files.lines(wordsPath, StandardCharsets.UTF_8)) { + lines.forEach(line -> CsvIndexService.lineToLemma(line, map::add)); + } + return Dicts.makeDict(map.toArray()); + } + + static final int VERSION = 1; + static String wordFromLine(String line) { + // ID,WORD,*,*,"JSON" + var parts = line.split(",", 5); + return parts[1].trim(); + } + static final class IntArrayList { + + int[] a; + int size; + IntArrayList(int cap) { a = new int[cap]; } + void add(int v) { + if (size == a.length) a = Arrays.copyOf(a, a.length * 2); + a[size++] = v; + } + int size() { return size; } + int get(int i) { return a[i]; } + int[] toArray() { return Arrays.copyOf(a, size); } + } + + static final class ShardBuilder { + + final IntArrayList offsets = new IntArrayList(4096); + final ByteArrayOutputStream data = new ByteArrayOutputStream(1 << 20); // grows + void addRecord(byte[] rec) throws IOException { + offsets.add(data.size()); + data.write(rec); + } + } + static void generateHintShards(Path csv, Path outDir) throws IOException { + Files.createDirectories(outDir); + + var builders = new java.util.HashMap(256); + + try (var lines = Files.lines(csv, StandardCharsets.UTF_8)) { + lines.forEach(line -> { + if (line == null || line.isBlank()) return; + + String word = wordFromLine(line); + String[] clues = CsvIndexService.lineToClue(line); + int simpel = CsvIndexService.lineToSimpel(line); + + // serialize to: WORD \t JSON \n + // (als je al JSON string wilt bewaren: gebruik Gson.toJson(clues)) + String json = Meta.GSON.toJson(clues); + String recStr = word + "\t" + simpel + "\t" + json + "\n"; + byte[] rec = recStr.getBytes(StandardCharsets.UTF_8); + + String key = Meta.shardKey(word); + ShardBuilder sb = builders.computeIfAbsent(key, k -> new ShardBuilder()); + try { + sb.addRecord(rec); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + }); + } catch (UncheckedIOException uioe) { + throw uioe.getCause(); + } + + // flush all shards to disk as .idx (e.g. 6Z.idx) + for (var e : builders.entrySet()) { + writeIndexedShard(outDir.resolve(e.getKey() + ".idx"), e.getValue()); + } + } + static void writeIndexedShard(Path out, ShardBuilder sb) throws IOException { + int n = sb.offsets.size(); + int[] offs = sb.offsets.toArray(); + byte[] data = sb.data.toByteArray(); + + try (FileChannel ch = FileChannel.open(out, + StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING, + StandardOpenOption.WRITE)) { + + // header + ByteBuffer hdr = ByteBuffer.allocate(12); + hdr.putInt(Meta.SHARD_MAGIC).putInt(VERSION).putInt(n).flip(); + ch.write(hdr); + + // offsets table (int per record) + ByteBuffer tbl = ByteBuffer.allocate(n * 4); + for (int i = 0; i < n; i++) tbl.putInt(offs[i]); + tbl.flip(); + ch.write(tbl); + + // data + ch.write(ByteBuffer.wrap(data)); + } + } + + private static void writeAggregator(Path outDir, String pkg, String cls, int totalLen) throws IOException { + Path out = outDir.resolve(cls + ".java"); + try (BufferedWriter w = writer(out)) { + w.write("package " + pkg + ";\n\n"); + w.write("public final class " + cls + " {\n"); + w.write(" private " + cls + "() {}\n\n"); + w.write(" public static final SwedishGenerator.Dict DICT = build();\n\n"); + w.write(" private static SwedishGenerator.Dict build() {\n"); + w.write(" SwedishGenerator.DictEntry[] idx = new SwedishGenerator.DictEntry[SwedishGenerator.MAX_WORD_LENGTH_PLUS_ONE];\n"); + for (int L = 2; L <= 8; L++) w.write(" idx[" + L + "] = DictDataL" + L + ".entry();\n"); + w.write(" return new SwedishGenerator.Dict(idx, " + totalLen + ");\n"); + w.write(" }\n"); + w.write("}\n"); + } + } + + private static void writeLengthBundle(Path outDir, String pkg, int L, SwedishGenerator.DictEntry e) throws IOException { + long[] words = e.words(); + + // flatten posBitsets: [rows][cols] -> flat[] + long[][] bs = e.posBitsets(); + int rows = bs.length; + int cols = bs[0].length; + long[] flat = new long[rows * cols]; + int t = 0; + for (int r = 0; r < rows; r++) { + System.arraycopy(bs[r], 0, flat, t, cols); + t += cols; + } + + String base = "DictDataL" + L; + + // 1) chunk classes + int wChunks = writeChunkClasses(outDir, pkg, base + "W", words, WORDS_CHUNK); + int pChunks = writeChunkClasses(outDir, pkg, base + "P", flat, POS_CHUNK); + + // 2) assembler class + writeLengthAssembler(outDir, pkg, base, L, rows, cols, words.length, flat.length, wChunks, pChunks); + } + + /** Writes classes like Prefix0..PrefixN each with static final long[] DATA. Returns chunk count. */ + private static int writeChunkClasses(Path outDir, String pkg, String prefix, long[] data, int chunkSize) throws IOException { + int chunks = (data.length + chunkSize - 1) / chunkSize; + for (int ci = 0; ci < chunks; ci++) { + int from = ci * chunkSize; + int to = Math.min(data.length, from + chunkSize); + + Path out = outDir.resolve(prefix + ci + ".java"); + try (BufferedWriter w = writer(out)) { + w.write("package " + pkg + ";\n\n"); + w.write("public final class " + prefix + ci + " {\n"); + w.write(" private " + prefix + ci + "() {}\n"); + w.write(" public static final long[] DATA = new long[] {\n"); + for (int i = from; i < to; i++) { + w.write(" " + toLongLiteral(data[i]) + (i + 1 < to ? "," : "") + "\n"); + } + w.write(" };\n"); + w.write("}\n"); + } + } + return chunks; + } + + private static void writeLengthAssembler(Path outDir, String pkg, String cls, int L, + int rows, int cols, + int wordsLen, int posLen, + int wChunks, int pChunks) throws IOException { + Path out = outDir.resolve(cls + ".java"); + try (BufferedWriter w = writer(out)) { + w.write("package " + pkg + ";\n\n"); + w.write("public final class " + cls + " {\n"); + w.write(" private " + cls + "() {}\n\n"); + + w.write(" static final int LEN = " + L + ";\n"); + w.write(" static final int ROWS = " + rows + ";\n"); + w.write(" static final int COLS = " + cols + ";\n"); + w.write(" static final int WORDS_LEN = " + wordsLen + ";\n"); + w.write(" static final int POS_LEN = " + posLen + ";\n\n"); + + // assemble words + w.write(" private static long[] words() {\n"); + w.write(" long[] out = new long[WORDS_LEN];\n"); + w.write(" int k = 0;\n"); + for (int ci = 0; ci < wChunks; ci++) { + w.write(" k = copy(out, k, DictDataL" + L + "W" + ci + ".DATA);\n"); + } + w.write(" return out;\n"); + w.write(" }\n\n"); + + // assemble pos + w.write(" private static long[] posFlat() {\n"); + w.write(" long[] out = new long[POS_LEN];\n"); + w.write(" int k = 0;\n"); + for (int ci = 0; ci < pChunks; ci++) { + w.write(" k = copy(out, k, DictDataL" + L + "P" + ci + ".DATA);\n"); + } + w.write(" return out;\n"); + w.write(" }\n\n"); + + // entry + w.write(" public static SwedishGenerator.DictEntry entry() {\n"); + w.write(" long[] wds = words();\n"); + w.write(" long[] flat = posFlat();\n"); + w.write(" long[][] pos = reshape(flat, ROWS, COLS);\n"); + w.write(" return new SwedishGenerator.DictEntry(wds, pos, wds.length, (wds.length + 63) >>> 6);\n"); + w.write(" }\n\n"); + + // helpers + w.write(" private static int copy(long[] dst, int at, long[] src) {\n"); + w.write(" System.arraycopy(src, 0, dst, at, src.length);\n"); + w.write(" return at + src.length;\n"); + w.write(" }\n\n"); + + w.write(" private static long[][] reshape(long[] flat, int rows, int cols) {\n"); + w.write(" long[][] out = new long[rows][cols];\n"); + w.write(" int k = 0;\n"); + w.write(" for (int r = 0; r < rows; r++) {\n"); + w.write(" System.arraycopy(flat, k, out[r], 0, cols);\n"); + w.write(" k += cols;\n"); + w.write(" }\n"); + w.write(" return out;\n"); + w.write(" }\n"); + + w.write("}\n"); + } + } + + private static BufferedWriter writer(Path out) throws IOException { + return Files.newBufferedWriter(out, StandardCharsets.UTF_8, + StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING, StandardOpenOption.WRITE); + } + + private static String toLongLiteral(long v) { + return "0x" + Long.toUnsignedString(v, 16) + "L"; + } +} diff --git a/src/test/java/puzzle/ExportFormatTest.java b/src/test/java/puzzle/ExportFormatTest.java index fe2b4dc..6d01251 100644 --- a/src/test/java/puzzle/ExportFormatTest.java +++ b/src/test/java/puzzle/ExportFormatTest.java @@ -10,9 +10,11 @@ import puzzle.Export.PuzzleResult; import puzzle.Export.Rewards; import puzzle.SwedishGenerator.Assign; import puzzle.SwedishGenerator.FillResult; +import puzzle.SwedishGenerator.Lemma; import puzzle.SwedishGenerator.Slotinfo; import puzzle.SwedishGeneratorTest.Idx; import java.io.IOException; +import java.nio.file.Path; import java.nio.file.Paths; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertNotNull; @@ -69,7 +71,7 @@ public class ExportFormatTest { var fillResult = new FillResult(true, 0, 0, 0, 0, new FillStats()); var puzzleResult = new PuzzleResult(new Clued(clues), grid, new Slotinfo[]{ - new Slotinfo(key, lo, 0L, 0, new Assign(TEST), null) + new Slotinfo(key, lo, 0L, 0, new Assign(TEST, 0), null) }, fillResult); var rewards = new Rewards(10, 5, 1); @@ -134,5 +136,15 @@ public class ExportFormatTest { throw new RuntimeException(e); } } - + @Test + void testShardToClue() { + val index = 1; + val word = DictData.DICT.index()[3].words()[index]; + val assigned = new Assign(word, index); + val lemma = Lemma.unpackIndex(word); + var word1 = Lemma.asWord(word); + val shard = Meta.shardKey(assigned.w); + val clue = Meta.readRecord(shard, index); + assertNotNull(clue); + } } diff --git a/src/test/java/puzzle/MainTest.java b/src/test/java/puzzle/MainTest.java index 0d7a06d..d8777da 100644 --- a/src/test/java/puzzle/MainTest.java +++ b/src/test/java/puzzle/MainTest.java @@ -13,6 +13,10 @@ import puzzle.Export.Rewards; import puzzle.Main.Opts; import puzzle.Masker.Clues; import puzzle.SwedishGenerator.Rng; +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; import java.util.concurrent.atomic.AtomicInteger; import java.util.stream.Collectors; import static org.junit.jupiter.api.Assertions.assertEquals; @@ -43,7 +47,16 @@ public class MainTest { this.tries = 1; this.verbose = false; }}; - static final Dict dict = Dicts.loadDict(opts.wordsPath); + static final Dict dict = loadDict(opts.wordsPath); + public static Dict loadDict(String wordsPath) { + var map = new LongArrayList(100_000); + try (var lines = Files.lines(Path.of(wordsPath), StandardCharsets.UTF_8)) { + lines.forEach(line -> CsvIndexService.lineToLemma(line, map::add)); + return Dicts.makeDict(map.toArray()); + } catch (IOException e) { + throw new RuntimeException("Failed to load dictionary from " + wordsPath, e); + } + } @Test void testExtractSlots() { @@ -173,25 +186,35 @@ public class MainTest { } @Test void testFiller2() { - val mask = "1 000000\n" + - "1 \n" + - "1 \n" + - "3 3 \n" + - "3 0 3 \n" + - "3 \n" + - "3 \n" + - "222 3"; + val rng = new Rng(-343913721); + val mask = Clues.parse( + "1 000000\n" + + "1 \n" + + "1 \n" + + "3 3 \n" + + "3 0 3 \n" + + "3 \n" + + "3 \n" + + "222 3"); + Assertions.assertEquals(20, mask.clueCount()); + var slots = Masker.extractSlots(mask, dict.index()); + val slotInfo = Masker.scoreSlots(new int[slots.length], slots); + var grid = mask.toGrid(); + var filled = fillMask(rng, slotInfo, grid, false); + // val res = new PuzzleResult(new Clued(mask), new Gridded(grid), slotInfo, filled).exportFormatFromFilled(0, new Rewards(0, 0, 0)); } @Test void testFiller() { val rng = new Rng(-343913721); - val mask = new Clues( - 74732156493031040L, - 193L, - 281475397248512L, - 128L, - 422762372923520L, - 192L); + val mask = Clues.parse( + " 3 300\n" + + " 1 \n" + + " 1 \n" + + " 3 0 \n" + + " 31 \n" + + " 1 \n" + + " 1 2\n" + + "21 22 3"); var slots = Masker.extractSlots(mask, dict.index()); val slotInfo = Masker.scoreSlots(new int[slots.length], slots); var grid = mask.toGrid(); @@ -204,6 +227,7 @@ public class MainTest { var g = new Gridded(grid); g.gridToString(mask); var aa = new PuzzleResult(new Clued(mask), g, slotInfo, filled).exportFormatFromFilled(1, new Rewards(1, 1, 1)); + System.out.println(String.join("\n", aa.grid())); } @Test