package puzzle; import lombok.val; import puzzle.Export.Dicts; import puzzle.Export.IntListDTO; import puzzle.SwedishGenerator.Lemma; import java.io.*; import java.nio.ByteBuffer; import java.nio.channels.FileChannel; import java.nio.charset.StandardCharsets; import java.nio.file.*; import java.util.HashMap; public final class DictJavaGeneratorMulti { // Smaller = more files, but safer for javac/class limits. private static final int WORDS_CHUNK = 8_192; private static final int POS_CHUNK = 8_192; public static void main(String[] args) throws Exception { Path wordsFile = Path.of(args.length > 0 ? args[0] : "nl_score_hints_v3.csv"); Path outDir = Path.of(args.length > 1 ? args[1] : "src/main/generated-sources/puzzle"); String pkg = "puzzle"; HashMap builders = new HashMap(16); SwedishGenerator.Dict dict = buildDict(wordsFile, builders); Files.createDirectories(outDir); // Generate L2..L8 for (int L = 2; L <= 8; L++) { var entry = dict.index()[L]; if (entry == null || entry.words() == null || entry.words().length == 0) { throw new IllegalStateException("No words for length " + L); } writeLengthBundle(outDir, pkg, L, entry); } // Aggregator writeAggregator(outDir, pkg, "DictData", dict.length()); System.out.println("Generated sources into: " + outDir.toAbsolutePath()); } private static SwedishGenerator.Dict buildDict(Path wordsPath, HashMap builders) throws IOException { var map = new LongArrayList(100_000); try (var lines = Files.lines(wordsPath, StandardCharsets.UTF_8)) { lines.forEach(line -> { CsvIndexService.lineToLemma(line, w -> { long len = Lemma.length0(w); String word = Lemma.asWord(w); String[] clues = CsvIndexService.lineToClue(line); int simpel = CsvIndexService.lineToSimpel(line); // serialize to: WORD \t JSON \n // (als je al JSON string wilt bewaren: gebruik Gson.toJson(clues)) String json = Meta.GSON.toJson(clues); String recStr = word + "\t" + simpel + "\t" + json + "\n"; byte[] rec = recStr.getBytes(StandardCharsets.UTF_8); var key = Meta.shardKey(w); ShardBuilder sb = builders.computeIfAbsent(key, k -> new ShardBuilder()); try { long index = ((long) sb.addRecord(rec) << 3) | len; map.add(w | (index << 40)); } catch (IOException e) { throw new UncheckedIOException(e); } }); }); } return Dicts.makeDict(map.toArray()); } static final int VERSION = 1; static final class ShardBuilder { final IntListDTO offsets = new IntListDTO(4096); final ByteArrayOutputStream data = new ByteArrayOutputStream(1 << 20); // grows int addRecord(byte[] rec) throws IOException { var size = data.size(); val currSize = offsets.size(); offsets.add(size); data.write(rec); return currSize; } } static void writeIndexedShard(Path out, ShardBuilder sb) throws IOException { int n = sb.offsets.size(); int[] offs = sb.offsets.toArray(); byte[] data = sb.data.toByteArray(); try (FileChannel ch = FileChannel.open(out, StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING, StandardOpenOption.WRITE)) { // header ByteBuffer hdr = ByteBuffer.allocate(12); hdr.putInt(Meta.SHARD_MAGIC).putInt(VERSION).putInt(n).flip(); ch.write(hdr); // offsets table (int per record) ByteBuffer tbl = ByteBuffer.allocate(n * 4); for (int i = 0; i < n; i++) tbl.putInt(offs[i]); tbl.flip(); ch.write(tbl); // data ch.write(ByteBuffer.wrap(data)); } } private static void writeAggregator(Path outDir, String pkg, String cls, int totalLen) throws IOException { Path out = outDir.resolve(cls + ".java"); try (BufferedWriter w = writer(out)) { w.write("package " + pkg + ";\n\n"); w.write("public final class " + cls + " {\n"); w.write(" private " + cls + "() {}\n\n"); w.write(" public static final SwedishGenerator.Dict DICT = build();\n\n"); w.write(" private static SwedishGenerator.Dict build() {\n"); w.write(" SwedishGenerator.DictEntry[] idx = new SwedishGenerator.DictEntry[SwedishGenerator.MAX_WORD_LENGTH_PLUS_ONE];\n"); for (int L = 2; L <= 8; L++) w.write(" idx[" + L + "] = DictDataL" + L + ".entry();\n"); w.write(" return new SwedishGenerator.Dict(idx, " + totalLen + ");\n"); w.write(" }\n"); w.write("}\n"); } } private static void writeLengthBundle(Path outDir, String pkg, int L, SwedishGenerator.DictEntry e) throws IOException { long[] words = e.words(); // flatten posBitsets: [rows][cols] -> flat[] long[][] bs = e.posBitsets(); int rows = bs.length; int cols = bs[0].length; long[] flat = new long[rows * cols]; int t = 0; for (int r = 0; r < rows; r++) { System.arraycopy(bs[r], 0, flat, t, cols); t += cols; } String base = "DictDataL" + L; // 1) chunk classes int wChunks = writeChunkClasses(outDir, pkg, base + "W", words, WORDS_CHUNK); int pChunks = writeChunkClasses(outDir, pkg, base + "P", flat, POS_CHUNK); // 2) assembler class writeLengthAssembler(outDir, pkg, base, L, rows, cols, words.length, flat.length, wChunks, pChunks); } /** Writes classes like Prefix0..PrefixN each with static long[] DATA. Returns chunk count. */ private static int writeChunkClasses(Path outDir, String pkg, String prefix, long[] data, int chunkSize) throws IOException { int chunks = (data.length + chunkSize - 1) / chunkSize; for (int ci = 0; ci < chunks; ci++) { int from = ci * chunkSize; int to = Math.min(data.length, from + chunkSize); Path out = outDir.resolve(prefix + ci + ".java"); try (BufferedWriter w = writer(out)) { w.write("package " + pkg + ";\n\n"); w.write("public final class " + prefix + ci + " {\n"); w.write(" private " + prefix + ci + "() {}\n"); w.write(" public static long[] get() {\n"); w.write(" return new long[] { \n"); for (int i = from; i < to; i++) { w.write(" " + toLongLiteral(data[i]) + (i + 1 < to ? "," : "") + "\n"); } w.write(" };\n"); w.write(" }\n"); w.write("}\n"); } } return chunks; } private static void writeLengthAssembler(Path outDir, String pkg, String cls, int L, int rows, int cols, int wordsLen, int posLen, int wChunks, int pChunks) throws IOException { Path out = outDir.resolve(cls + ".java"); try (BufferedWriter w = writer(out)) { w.write("package " + pkg + ";\n\n"); w.write("public final class " + cls + " {\n"); w.write(" private " + cls + "() {}\n\n"); w.write(" static final int LEN = " + L + ";\n"); w.write(" static final int ROWS = " + rows + ";\n"); w.write(" static final int COLS = " + cols + ";\n"); w.write(" static final int WORDS_LEN = " + wordsLen + ";\n"); w.write(" static final int POS_LEN = " + posLen + ";\n\n"); // assemble words w.write(" private static long[] words() {\n"); String wPrefix = "DictDataL" + L + "W"; if (wChunks == 1) { w.write(" return " + wPrefix + "0.get();\n"); } else { w.write(" long[] out = new long[WORDS_LEN];\n"); w.write(" int k = 0;\n"); for (int ci = 0; ci < wChunks; ci++) { w.write(" k = copy(out, k, " + wPrefix + ci + ".get());\n"); } w.write(" return out;\n"); } w.write(" }\n\n"); // assemble pos w.write(" private static long[] posFlat() {\n"); String pPrefix = "DictDataL" + L + "P"; if (pChunks == 1) { w.write(" return " + pPrefix + "0.get();\n"); } else { w.write(" long[] out = new long[POS_LEN];\n"); w.write(" int k = 0;\n"); for (int ci = 0; ci < pChunks; ci++) { w.write(" k = copy(out, k, " + pPrefix + ci + ".get());\n"); } w.write(" return out;\n"); } w.write(" }\n\n"); // entry w.write(" public static SwedishGenerator.DictEntry entry() {\n"); w.write(" long[] wds = words();\n"); w.write(" long[] flat = posFlat();\n"); w.write(" long[][] pos = reshape(flat, ROWS, COLS);\n"); w.write(" return new SwedishGenerator.DictEntry(wds, pos, wds.length, (wds.length + 63) >>> 6);\n"); w.write(" }\n\n"); // helpers w.write(" private static int copy(long[] dst, int at, long[] src) {\n"); w.write(" System.arraycopy(src, 0, dst, at, src.length);\n"); w.write(" return at + src.length;\n"); w.write(" }\n\n"); w.write(" private static long[][] reshape(long[] flat, int rows, int cols) {\n"); w.write(" long[][] out = new long[rows][cols];\n"); w.write(" int k = 0;\n"); w.write(" for (int r = 0; r < rows; r++) {\n"); w.write(" System.arraycopy(flat, k, out[r], 0, cols);\n"); w.write(" k += cols;\n"); w.write(" }\n"); w.write(" return out;\n"); w.write(" }\n"); w.write("}\n"); } } private static BufferedWriter writer(Path out) throws IOException { return Files.newBufferedWriter(out, StandardCharsets.UTF_8, StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING, StandardOpenOption.WRITE); } private static String toLongLiteral(long v) { return "0x" + Long.toUnsignedString(v, 16) + "L"; } }