diff --git a/src/main/java/puzzle/Export.java b/src/main/java/puzzle/Export.java index 035b734..b23480c 100644 --- a/src/main/java/puzzle/Export.java +++ b/src/main/java/puzzle/Export.java @@ -1,6 +1,7 @@ package puzzle; import lombok.Getter; +import lombok.NoArgsConstructor; import lombok.experimental.Accessors; import lombok.experimental.Delegate; import lombok.val; @@ -339,13 +340,18 @@ public record Export() { @Getter @Accessors(fluent = true) + @NoArgsConstructor static final class IntListDTO { int[] data = new int[8]; int size = 0; + public IntListDTO(int size) { + data = new int[size]; + } void add(int v) { if (size >= data.length) data = Arrays.copyOf(data, data.length * 2); data[size++] = v; } + int[] toArray() { return Arrays.copyOf(data, size); } } } diff --git a/src/main/java/puzzle/SwedishGenerator.java b/src/main/java/puzzle/SwedishGenerator.java index d100ecf..d826542 100644 --- a/src/main/java/puzzle/SwedishGenerator.java +++ b/src/main/java/puzzle/SwedishGenerator.java @@ -388,7 +388,8 @@ public class SwedishGenerator { var r = rng.nextFloat(); //int idxInArray = rng.biasedIndexPow3(L - 1); var arrIndex = (int) (r * r * r * (L - 1)); - var w = entry.words[idxs[arrIndex]]; + var shardIdx = idxs[arrIndex]; + var w = entry.words[shardIdx]; var lemIdx = Lemma.unpackIndex(w); if (Bit1029.get(used, lemIdx)) continue; low = glo; @@ -397,7 +398,7 @@ public class SwedishGenerator { Bit1029.set(used, lemIdx); s.assign.w = w; - s.assign.shardIdx = arrIndex; + s.assign.shardIdx = shardIdx; if (backtrack(depth + 1)) return true; s.assign.w = X; Bit1029.clear(used, lemIdx); diff --git a/src/test/java/puzzle/DictCodeGen.java b/src/test/java/puzzle/DictCodeGen.java index c7ab484..325724c 100644 --- a/src/test/java/puzzle/DictCodeGen.java +++ b/src/test/java/puzzle/DictCodeGen.java @@ -1,8 +1,7 @@ package puzzle; -import lombok.val; -import org.junit.jupiter.api.Test; import puzzle.Export.Dicts; +import puzzle.SwedishGenerator.Dict; import java.io.BufferedWriter; import java.io.IOException; import java.nio.charset.StandardCharsets; @@ -12,7 +11,6 @@ import java.nio.file.StandardOpenOption; public final class DictCodeGen { - public static void main(String[] args) throws Exception { DictJavaGenerator.main(args); // gebruikt jouw makeDict logic @@ -34,17 +32,30 @@ public final class DictCodeGen { public final class DictJavaGenerator { // tune if needed - private static final int WORDS_CHUNK = 8_192>>>5; // longs per chunk - private static final int POS_CHUNK = 8_192>>>5; // longs per chunk + private static final int WORDS_CHUNK = 8_192 >>> 5; // longs per chunk + private static final int POS_CHUNK = 8_192 >>> 5; // longs per chunk public static void main(String[] args) throws Exception { - Path wordsFile = Path.of(args.length > 0 ? args[0] : "nl_score_hints_v3.csv"); - Path outDir = Path.of(args.length > 1 ? args[1] : "/home/mike/dev/puzzle-generator/src/main/generated-sources/puzzle"); - String pkg = "puzzle"; + Path wordsFile = Path.of(args.length > 0 ? args[0] : "nl_score_hints_v3.csv"); + Path outDir = Path.of(args.length > 1 ? args[1] : "/home/mike/dev/puzzle-generator/src/main/generated-sources/puzzle"); + writeDict(wordsFile, outDir); + } + public static Dict writeDict(Path wordsFile, Path outDir) { - SwedishGenerator.Dict dict = buildDict(wordsFile); + String pkg = "puzzle"; - Files.createDirectories(outDir); + SwedishGenerator.Dict dict = null; + try { + dict = buildDict(wordsFile); + } catch (IOException e) { + throw new RuntimeException(e); + } + + try { + Files.createDirectories(outDir); + } catch (IOException e) { + throw new RuntimeException(e); + } // emit L2..L8 for (int L = 2; L <= 8; L++) { @@ -52,13 +63,22 @@ public final class DictCodeGen { if (entry == null || entry.words() == null || entry.words().length == 0) { throw new IllegalStateException("No words for length " + L); } - writeLengthClass(outDir, pkg, "DictDataL" + L, L, entry); + try { + writeLengthClass(outDir, pkg, "DictDataL" + L, L, entry); + } catch (IOException e) { + throw new RuntimeException(e); + } } // emit aggregator - writeAggregator(outDir, pkg, "DictData", dict.length()); + try { + writeAggregator(outDir, pkg, "DictData", dict.length()); + } catch (IOException e) { + throw new RuntimeException(e); + } System.out.println("Generated dictionary sources into: " + outDir.toAbsolutePath()); + return dict; } private static SwedishGenerator.Dict buildDict(Path wordsPath) throws IOException { diff --git a/src/test/java/puzzle/DictJavaGeneratorMulti.java b/src/test/java/puzzle/DictJavaGeneratorMulti.java index b176bde..d282199 100644 --- a/src/test/java/puzzle/DictJavaGeneratorMulti.java +++ b/src/test/java/puzzle/DictJavaGeneratorMulti.java @@ -1,7 +1,11 @@ package puzzle; import org.junit.jupiter.api.Test; +import puzzle.DictCodeGen.DictJavaGenerator; import puzzle.Export.Dicts; +import puzzle.Export.IntListDTO; +import puzzle.SwedishGenerator.Dict; +import puzzle.SwedishGenerator.Lemma; import java.io.*; import java.nio.ByteBuffer; @@ -39,7 +43,10 @@ public final class DictJavaGeneratorMulti { // Aggregator writeAggregator(outDir, pkg, "DictData", dict.length()); - generateHintShards(wordsFile, outDir); + var csv = Paths.get("nl_score_hints_v3.csv"); + var idx = Paths.get("nl_score_hints_v3.idx"); + + ScopedValue.where(CsvIndexService.SC, new CsvIndexService(csv, idx)).run(() -> generateHintShards(dict, outDir)); System.out.println("Generated sources into: " + outDir.toAbsolutePath()); } @@ -57,23 +64,10 @@ public final class DictJavaGeneratorMulti { var parts = line.split(",", 5); return parts[1].trim(); } - static final class IntArrayList { - - int[] a; - int size; - IntArrayList(int cap) { a = new int[cap]; } - void add(int v) { - if (size == a.length) a = Arrays.copyOf(a, a.length * 2); - a[size++] = v; - } - int size() { return size; } - int get(int i) { return a[i]; } - int[] toArray() { return Arrays.copyOf(a, size); } - } static final class ShardBuilder { - final IntArrayList offsets = new IntArrayList(4096); + final IntListDTO offsets = new IntListDTO(4096); final ByteArrayOutputStream data = new ByteArrayOutputStream(1 << 20); // grows void addRecord(byte[] rec) throws IOException { offsets.add(data.size()); @@ -116,6 +110,50 @@ public final class DictJavaGeneratorMulti { writeIndexedShard(outDir.resolve(e.getKey() + ".idx"), e.getValue()); } } + static void generateHintShards(Dict dict, Path outDir) { + try { + Files.createDirectories(outDir); + } catch (IOException e) { + throw new RuntimeException(e); + } + + var builders = new java.util.HashMap(256); + + for (var index : dict.index()) { + long[] words = index.words(); + for (int shardIdx = 0; shardIdx < words.length; shardIdx++) { + var w = words[shardIdx]; + String word = Lemma.asWord(w); + int wIdx = Lemma.unpackIndex(w); + String[] clues = CsvIndexService.clues(wIdx); + int simpel = CsvIndexService.simpel(wIdx); + + // serialize to: WORD \t JSON \n + // (als je al JSON string wilt bewaren: gebruik Gson.toJson(clues)) + String json = Meta.GSON.toJson(clues); + String recStr = word + "\t" + simpel + "\t" + json + "\n"; + byte[] rec = recStr.getBytes(StandardCharsets.UTF_8); + + var key = Meta.shardKey(w); + ShardBuilder sb = builders.computeIfAbsent(key, k -> new ShardBuilder()); + try { + sb.addRecord(rec); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + } + + } + + // flush all shards to disk as .idx (e.g. 6Z.idx) + for (var e : builders.entrySet()) { + try { + writeIndexedShard(e.getKey(), e.getValue()); + } catch (IOException ex) { + throw new RuntimeException(ex); + } + } + } static void writeIndexedShard(Path out, ShardBuilder sb) throws IOException { int n = sb.offsets.size(); int[] offs = sb.offsets.toArray();