introduce bitloops

This commit is contained in:
mike
2026-01-20 19:06:59 +01:00
parent dadde53f76
commit ddce9addb5
28 changed files with 87335 additions and 87363 deletions

View File

@@ -15,19 +15,19 @@ public final class DictJavaGeneratorMulti {
// Smaller = more files, but safer for javac/class limits.
private static final int WORDS_CHUNK = 8_192;
private static final int POS_CHUNK = 8_192;
public static final int THRESS = 800;
public static void main(String[] args) throws Exception {
Path wordsFile = Path.of(args.length > 0 ? args[0] : "nl_score_hints_v4.csv");
Path outDir = Path.of(args.length > 1 ? args[1] : "src/main/generated-sources/puzzle/dict" + THRESS);
String pkg = "puzzle.dict" + THRESS;
HashMap<String, ShardBuilder> builders = new HashMap<String, ShardBuilder>(16);
var THRESS = 900;
var wordsFile = Path.of(args.length > 0 ? args[0] : "nl_score_hints_v4.csv");
var outDir = Path.of(args.length > 1 ? args[1] : "src/main/generated-sources/puzzle/dict" + THRESS);
var pkg = "puzzle.dict" + THRESS;
var builders = new HashMap<String, ShardBuilder>(16);
SwedishGenerator.Dict dict = buildDict(wordsFile, builders, THRESS);
var dict = buildDict(wordsFile, builders, THRESS);
Files.createDirectories(outDir);
// Generate L2..L8
for (int L = 2; L <= 8; L++) {
for (var L = 2; L <= 8; L++) {
var entry = dict.index()[L];
if (entry == null || entry.words() == null || entry.words().length == 0) {
throw new IllegalStateException("No words for length " + L);
@@ -36,7 +36,7 @@ public final class DictJavaGeneratorMulti {
}
// Aggregator
writeAggregator(outDir, pkg, "DictData", dict.length());
writeAggregator(outDir, pkg, "DictData", dict.length(), THRESS);
System.out.println("Generated sources into: " + outDir.toAbsolutePath());
}
@@ -48,21 +48,21 @@ public final class DictJavaGeneratorMulti {
var map = new LongArrayList(100_000);
try (var lines = Files.lines(wordsPath, StandardCharsets.UTF_8)) {
lines.forEach(line -> {
var parts = line.split(",", 4);
var word = parts[0].trim();
long w = SwedishGenerator.Lemma.from(word.getBytes(US_ASCII));
var parts = line.split(",", 4);
var word = parts[0].trim();
var w = SwedishGenerator.Lemma.from(word.getBytes(US_ASCII));
if (!word.equals(SwedishGenerator.Lemma.asWord(w, Export.BYTES.get()))) {
throw new RuntimeException();
}
int score = Integer.parseInt(parts[1].trim());
var score = Integer.parseInt(parts[1].trim());
var simpel = Integer.parseInt(parts[CsvIndexService.SIMPEL_IDX].trim());
if (score < 1 || simpel > thress) {
if (Main.VERBOSE) System.err.println("Word too complex: " + line);
return;
}
var key = shardKey(w);
ShardBuilder sb = builders.computeIfAbsent(key, k -> new ShardBuilder());
var key = shardKey(w);
var sb = builders.computeIfAbsent(key, k -> new ShardBuilder());
map.add(Lemma.pack(w, sb.addRecord()));
});
}
@@ -80,23 +80,23 @@ public final class DictJavaGeneratorMulti {
val idx = entry.words().size();
val pos = entry.pos();
entry.words().add(lemma);
int i = 0;
for (long w = lemma & Lemma.LETTER_MASK; w != 0; w >>>= 5, i++) {
var i = 0;
for (var w = lemma & Lemma.LETTER_MASK; w != 0; w >>>= 5, i++) {
pos[i][(int) ((w & 31) - 1)].add(idx);
}
}
for (int i = 2; i < index.length; i++) if (index[i].words().size() <= 0) throw new RuntimeException("No words for length " + i);
for (var i = 2; i < index.length; i++) if (index[i].words().size() <= 0) throw new RuntimeException("No words for length " + i);
return new Dict(Arrays.stream(index).map(i -> {
var words = i.words().toArray();
int numWords = words.length;
int numLongs = (numWords + 63) >>> 6;
var numWords = words.length;
var numLongs = (numWords + 63) >>> 6;
var bitsets = new long[i.pos().length * 26][numLongs];
for (int p = 0; p < i.pos().length; p++) {
for (int l = 0; l < 26; l++) {
for (var p = 0; p < i.pos().length; p++) {
for (var l = 0; l < 26; l++) {
var list = i.pos()[p][l];
var bs = bitsets[p * 26 + l];
for (int k = 0; k < list.size(); k++) {
int wordIdx = list.data()[k];
for (var k = 0; k < list.size(); k++) {
var wordIdx = list.data()[k];
bs[wordIdx >>> 6] |= (1L << (wordIdx & 63));
}
}
@@ -117,18 +117,16 @@ public final class DictJavaGeneratorMulti {
}
}
static final int VERSION = 1;
private static void writeAggregator(Path outDir, String pkg, String cls, int totalLen) throws IOException {
Path out = outDir.resolve(cls + ".java");
try (BufferedWriter w = writer(out)) {
private static void writeAggregator(Path outDir, String pkg, String cls, int totalLen, int thress) throws IOException {
var out = outDir.resolve(cls + ".java");
try (var w = writer(out)) {
w.write("package " + pkg + ";\n\n");
w.write("public final class " + cls + " {\n");
w.write(" private " + cls + "() {}\n\n");
w.write(" public static final puzzle.SwedishGenerator.Dict DICT" + THRESS + " = build();\n\n");
w.write(" public static final puzzle.SwedishGenerator.Dict DICT" + thress + " = build();\n\n");
w.write(" private static puzzle.SwedishGenerator.Dict build() {\n");
w.write(" puzzle.SwedishGenerator.DictEntry[] idx = new puzzle.SwedishGenerator.DictEntry[puzzle.SwedishGenerator.MAX_WORD_LENGTH_PLUS_ONE];\n");
for (int L = 2; L <= 8; L++) w.write(" idx[" + L + "] = DictDataL" + L + ".entry();\n");
for (var L = 2; L <= 8; L++) w.write(" idx[" + L + "] = DictDataL" + L + ".entry();\n");
w.write(" return new puzzle.SwedishGenerator.Dict(idx, " + totalLen + ");\n");
w.write(" }\n");
w.write("}\n");
@@ -136,24 +134,24 @@ public final class DictJavaGeneratorMulti {
}
private static void writeLengthBundle(Path outDir, String pkg, int L, SwedishGenerator.DictEntry e) throws IOException {
long[] words = e.words();
var words = e.words();
// flatten posBitsets: [rows][cols] -> flat[]
long[][] bs = e.posBitsets();
int rows = bs.length;
int cols = bs[0].length;
long[] flat = new long[rows * cols];
int t = 0;
for (int r = 0; r < rows; r++) {
var bs = e.posBitsets();
var rows = bs.length;
var cols = bs[0].length;
var flat = new long[rows * cols];
var t = 0;
for (var r = 0; r < rows; r++) {
System.arraycopy(bs[r], 0, flat, t, cols);
t += cols;
}
String base = "DictDataL" + L;
var base = "DictDataL" + L;
// 1) chunk classes
int wChunks = writeChunkClasses(outDir, pkg, base + "W", words, WORDS_CHUNK);
int pChunks = writeChunkClasses(outDir, pkg, base + "P", flat, POS_CHUNK);
var wChunks = writeChunkClasses(outDir, pkg, base + "W", words, WORDS_CHUNK);
var pChunks = writeChunkClasses(outDir, pkg, base + "P", flat, POS_CHUNK);
// 2) assembler class
writeLengthAssembler(outDir, pkg, base, L, rows, cols, words.length, flat.length, wChunks, pChunks);
@@ -161,20 +159,20 @@ public final class DictJavaGeneratorMulti {
/** Writes classes like Prefix0..PrefixN each with static long[] DATA. Returns chunk count. */
private static int writeChunkClasses(Path outDir, String pkg, String prefix, long[] data, int chunkSize) throws IOException {
int chunks = (data.length + chunkSize - 1) / chunkSize;
for (int ci = 0; ci < chunks; ci++) {
int from = ci * chunkSize;
int to = Math.min(data.length, from + chunkSize);
var chunks = (data.length + chunkSize - 1) / chunkSize;
for (var ci = 0; ci < chunks; ci++) {
var from = ci * chunkSize;
var to = Math.min(data.length, from + chunkSize);
Path out = outDir.resolve(prefix + ci + ".java");
try (BufferedWriter w = writer(out)) {
var out = outDir.resolve(prefix + ci + ".java");
try (var w = writer(out)) {
w.write("package " + pkg + ";\n\n");
w.write("public final class " + prefix + ci + " {\n");
w.write(" private " + prefix + ci + "() {}\n");
w.write(" public static long[] get() {\n");
w.write(" return new long[] { \n");
for (int i = from; i < to; i++) {
for (var i = from; i < to; i++) {
w.write(" " + toLongLiteral(data[i]) + (i + 1 < to ? "," : "") + "\n");
}
w.write(" };\n");
@@ -189,8 +187,8 @@ public final class DictJavaGeneratorMulti {
int rows, int cols,
int wordsLen, int posLen,
int wChunks, int pChunks) throws IOException {
Path out = outDir.resolve(cls + ".java");
try (BufferedWriter w = writer(out)) {
var out = outDir.resolve(cls + ".java");
try (var w = writer(out)) {
w.write("package " + pkg + ";\n\n");
w.write("public final class " + cls + " {\n");
w.write(" private " + cls + "() {}\n\n");
@@ -203,13 +201,13 @@ public final class DictJavaGeneratorMulti {
// assemble words
w.write(" private static long[] words() {\n");
String wPrefix = "DictDataL" + L + "W";
var wPrefix = "DictDataL" + L + "W";
if (wChunks == 1) {
w.write(" return " + wPrefix + "0.get();\n");
} else {
w.write(" long[] out = new long[WORDS_LEN];\n");
w.write(" int k = 0;\n");
for (int ci = 0; ci < wChunks; ci++) {
for (var ci = 0; ci < wChunks; ci++) {
w.write(" k = copy(out, k, " + wPrefix + ci + ".get());\n");
}
w.write(" return out;\n");
@@ -218,13 +216,13 @@ public final class DictJavaGeneratorMulti {
// assemble pos
w.write(" private static long[] posFlat() {\n");
String pPrefix = "DictDataL" + L + "P";
var pPrefix = "DictDataL" + L + "P";
if (pChunks == 1) {
w.write(" return " + pPrefix + "0.get();\n");
} else {
w.write(" long[] out = new long[POS_LEN];\n");
w.write(" int k = 0;\n");
for (int ci = 0; ci < pChunks; ci++) {
for (var ci = 0; ci < pChunks; ci++) {
w.write(" k = copy(out, k, " + pPrefix + ci + ".get());\n");
}
w.write(" return out;\n");
@@ -271,33 +269,6 @@ public final class DictJavaGeneratorMulti {
static int SIMPEL_IDX = 2;
public static int lineToSimpel(String line) {
var parts = line.split(",", 4);
return Integer.parseInt(parts[SIMPEL_IDX].trim());
}
public static String[] lineToClue(String line) {
if (line.isBlank()) throw new RuntimeException("Empty line");
var parts = line.split(",", 4);
var rawClue = parts[3].trim();
if (rawClue.startsWith("\"") && rawClue.endsWith("\"")) {
rawClue = rawClue.substring(1, rawClue.length() - 1).replace("\"\"", "\"");
}
return Meta.GSON.fromJson(rawClue, String[].class);
}
public static void lineToLemma(String line, LongConsumer ok) {
if (line.isBlank()) {
throw new RuntimeException("Empty line");
}
var parts = line.split(",", 4);
var word = parts[0].trim();
int score = Integer.parseInt(parts[1].trim());
int simpel = Integer.parseInt(parts[2].trim());
if (score < 1 || simpel > THRESS) {
if (Main.VERBOSE) System.err.println("Word too complex: " + line);
return;
}
ok.accept(Lemma.from(word.getBytes(US_ASCII)));
}
}
record DictEntryDTO(LongArrayList words, IntListDTO[][] pos) {
@@ -313,14 +284,10 @@ public final class DictJavaGeneratorMulti {
int[] data = new int[8];
int size = 0;
public IntListDTO(int size) {
data = new int[size];
}
void add(int v) {
if (size >= data.length) data = Arrays.copyOf(data, data.length * 2);
data[size++] = v;
}
int[] toArray() { return Arrays.copyOf(data, size); }
}
}
@@ -342,8 +309,8 @@ public final class DictJavaGeneratorMulti {
}
void grow() {
int newCap = a.length == 0 ? 1 : a.length * 2;
long[] n = new long[newCap];
var newCap = a.length == 0 ? 1 : a.length * 2;
var n = new long[newCap];
System.arraycopy(a, 0, n, 0, size);
a = n;
}

View File

@@ -57,7 +57,7 @@ public class MainTest {
this.tries = 1;
this.verbose = false;
}};
static final Dict dict = DictData.DICT;//loadDict(opts.wordsPath);
static final Dict dict = DictData.DICT;//loadDict(opts.wordsPath);
@Test
void testExtractSlots() {
@@ -169,18 +169,6 @@ public class MainTest {
clues.setClueLo(idx.lo, CLUE_LEFT);
Assertions.assertTrue(clues.isClueLo(idx.index));
}
/*@Test
void testMaskerCreation() {
var masker = new Masker(new Rng(12348), new int[STACK_SIZE], Masker.Clues.createEmpty());
var mask = masker.generateMask(opts.clueSize, opts.pop, opts.gens, opts.offspring);
val clued = new Clued(mask);
val map = clued.stream().collect(Collectors.toMap(ClueAt::index, ClueAt::clue));
Assertions.assertEquals(4, map.size());
Assertions.assertEquals(RIGHT.dir, map.get(0));
Assertions.assertEquals(RIGHT.dir, map.get(2));
Assertions.assertEquals(RIGHT.dir, map.get(5));
Assertions.assertEquals(LEFT.dir, map.get(71));
}*/
@Test
void testFiller2() {
val rng = new Rng(-343913721);