introduce bitloops

This commit is contained in:
mike
2026-01-17 16:03:16 +01:00
parent 9bd85c81a3
commit bfa19ec585
18 changed files with 40847 additions and 41296 deletions

View File

@@ -1,40 +1,13 @@
package puzzle;
import puzzle.SwedishGenerator.Lemma;
import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.Closeable;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.StandardOpenOption;
import java.util.Arrays;
import java.util.function.LongConsumer;
import static java.nio.charset.StandardCharsets.US_ASCII;
public final class CsvIndexService
implements Closeable {
public final class CsvIndexService {
static final ScopedValue<CsvIndexService> SC = ScopedValue.newInstance();
private static final int MAGIC = 0x4C494458; // "LIDX"
private static final int VERSION = 1;
static int SIMPEL_IDX = 3;
private final Path csvPath;
private final Path idxPath;
static int SIMPEL_IDX = 3;
private volatile long[] offsets; // lazy
private volatile FileChannel csvChannel; // open once
private final Object lock = new Object();
public CsvIndexService(Path csvPath, Path idxPath) {
this.csvPath = csvPath;
this.idxPath = idxPath;
}
public static int lineToSimpel(String line) {
var parts = line.split(",", 5);
return Integer.parseInt(parts[SIMPEL_IDX].trim());
@@ -60,195 +33,6 @@ public final class CsvIndexService
if (Main.VERBOSE) System.err.println("Word too complex: " + line);
return;
}
ok.accept(Lemma.pack(id, word.getBytes(US_ASCII)));
ok.accept(Lemma.packW(word.getBytes(US_ASCII)));
}
public static int simpel(int index) {
try {
if (SC.isBound())
return lineToSimpel(SC.get().getLine(index));
return -1;
} catch (Exception e) {
e.printStackTrace();
throw new RuntimeException("Failed to get clues for index " + index, e);
}
}
public static String[] clues(int index) {
try {
if (SC.isBound())
return lineToClue(SC.get().getLine(index));
return new String[0];
} catch (Exception e) {
e.printStackTrace();
throw new RuntimeException("Failed to get clues for index " + index, e);
}
}
/** Haal één regel op (0-based line index), met self-healing index (1x rebuild). */
public String getLine(int lineIndex) throws IOException {
ensureLoaded();
var line = readLineAt(lineIndex);
if (startsWithIndex(line, lineIndex)) return line;
// mismatch => rebuild index en nog 1x proberen
synchronized (lock) {
rebuildIndexLocked();
line = readLineAt(lineIndex);
if (startsWithIndex(line, lineIndex)) return line;
}
throw new RuntimeException("Index mismatch after rebuild. Requested=" + lineIndex + ", got line=" + preview(line));
}
public void ensureLoaded() throws IOException {
if (offsets != null && csvChannel != null && csvChannel.isOpen()) return;
synchronized (lock) {
if (offsets != null && csvChannel != null && csvChannel.isOpen()) return;
csvChannel = FileChannel.open(csvPath, StandardOpenOption.READ);
if (Files.exists(idxPath)) {
try {
offsets = readIndex(idxPath);
return;
} catch (IOException badIndex) {
// fall-through -> rebuild
}
}
rebuildIndexLocked();
}
}
private void rebuildIndexLocked() throws IOException {
var built = buildOffsets(csvPath);
writeIndex(idxPath, built);
offsets = built;
}
private String readLineAt(int lineIndex) throws IOException {
var local = offsets;
if (lineIndex < 0 || lineIndex >= local.length) {
throw new IndexOutOfBoundsException("lineIndex=" + lineIndex + ", max=" + (local.length - 1));
}
long currentPos = local[lineIndex];
// lees in blokjes (sneller dan 1 byte) tot newline
var buf = new byte[8192];
var total = 0;
var out = new byte[256];
while (true) {
var bb = ByteBuffer.wrap(buf);
var n = csvChannel.read(bb, currentPos);
if (n < 0) break; // EOF
currentPos += n;
var end = n;
for (var i = 0; i < end; i++) {
var b = buf[i];
if (b == (byte) '\n') {
return new String(out, 0, total, StandardCharsets.UTF_8);
}
if (b == (byte) '\r') continue;
if (total == out.length) out = Arrays.copyOf(out, out.length * 2);
out[total++] = b;
}
}
return new String(out, 0, total, StandardCharsets.UTF_8);
}
/** Check: begint de regel met "<lineIndex>," */
private static boolean startsWithIndex(String line, int lineIndex) {
if (line == null || line.isEmpty()) return false;
var comma = line.indexOf(',');
if (comma <= 0) return false;
// snelle parse zonder split
long v = 0;
for (var i = 0; i < comma; i++) {
var c = line.charAt(i);
if (c < '0' || c > '9') return false;
v = (v * 10) + (c - '0');
if (v > Integer.MAX_VALUE) return false;
}
return v == lineIndex;
}
private static String preview(String s) {
if (s == null) return "null";
return s.length() <= 120 ? s : s.substring(0, 120) + "...";
}
/** Bouw offsets door newlines te scannen. Resultaat is exact getrimd. */
public static long[] buildOffsets(Path path) throws IOException {
try (var ch = FileChannel.open(path, StandardOpenOption.READ)) {
var offs = new long[131072]; // start-capacity, groeit indien nodig
var c = 0;
offs[c++] = 0;
var buf = ByteBuffer.allocateDirect(1 << 20);
int pos = 0;
while (true) {
buf.clear();
var n = ch.read(buf);
if (n < 0) break;
buf.flip();
for (var i = 0; i < n; i++) {
if (buf.get(i) == (byte) '\n') {
if (c == offs.length) offs = Arrays.copyOf(offs, offs.length * 2);
offs[c++] = pos + i + 1;
}
}
pos += n;
}
return Arrays.copyOf(offs, c);
}
}
public static void writeIndex(Path out, long[] offsets) throws IOException {
try (var dos = new DataOutputStream(new BufferedOutputStream(Files.newOutputStream(
out, StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING, StandardOpenOption.WRITE)))) {
dos.writeInt(MAGIC);
dos.writeInt(VERSION);
dos.writeInt(offsets.length);
for (var v : offsets) dos.writeLong(v);
}
}
public static long[] readIndex(Path in) throws IOException {
try (var dis = new DataInputStream(new BufferedInputStream(Files.newInputStream(in)))) {
if (dis.readInt() != MAGIC) throw new IOException("Not a LIDX file");
var version = dis.readInt();
if (version != VERSION) throw new IOException("Unsupported version: " + version);
var n = dis.readInt();
if (n < 0) throw new IOException("Corrupt length: " + n);
var offsets = new long[n];
for (var i = 0; i < n; i++) offsets[i] = dis.readLong();
return offsets;
}
}
@Override
public void close() throws IOException {
synchronized (lock) {
if (csvChannel != null) csvChannel.close();
csvChannel = null;
offsets = null;
}
}
}

View File

@@ -1,213 +0,0 @@
package puzzle;
import puzzle.Export.Dicts;
import puzzle.SwedishGenerator.Dict;
import java.io.BufferedWriter;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.StandardOpenOption;
public final class DictCodeGen {
public static void main(String[] args) throws Exception {
DictJavaGenerator.main(args); // gebruikt jouw makeDict logic
}
/**
* Generates Java source files for dictionary data, split by word length (2..8),
* and further chunked to avoid "code too large" / constant pool issues.
*
* Output:
* - DictDataL2.java .. DictDataL8.java (arrays chunked)
* - DictData.java (aggregator that builds Dict)
*
* Usage:
* java puzzle.codegen.DictJavaGenerator <wordsFile> <outDir> <packageName>
*
* Example:
* java puzzle.codegen.DictJavaGenerator nl_score_hints_v3.csv src/main/java puzzle
*/
public final class DictJavaGenerator {
// tune if needed
private static final int WORDS_CHUNK = 8_192 >>> 5; // longs per chunk
private static final int POS_CHUNK = 8_192 >>> 5; // longs per chunk
public static void main(String[] args) throws Exception {
Path wordsFile = Path.of(args.length > 0 ? args[0] : "nl_score_hints_v3.csv");
Path outDir = Path.of(args.length > 1 ? args[1] : "/home/mike/dev/puzzle-generator/src/main/generated-sources/puzzle");
writeDict(wordsFile, outDir);
}
public static Dict writeDict(Path wordsFile, Path outDir) {
String pkg = "puzzle";
SwedishGenerator.Dict dict = null;
try {
dict = buildDict(wordsFile);
} catch (IOException e) {
throw new RuntimeException(e);
}
try {
Files.createDirectories(outDir);
} catch (IOException e) {
throw new RuntimeException(e);
}
// emit L2..L8
for (int L = 2; L <= 8; L++) {
var entry = dict.index()[L];
if (entry == null || entry.words() == null || entry.words().length == 0) {
throw new IllegalStateException("No words for length " + L);
}
try {
writeLengthClass(outDir, pkg, "DictDataL" + L, L, entry);
} catch (IOException e) {
throw new RuntimeException(e);
}
}
// emit aggregator
try {
writeAggregator(outDir, pkg, "DictData", dict.length());
} catch (IOException e) {
throw new RuntimeException(e);
}
System.out.println("Generated dictionary sources into: " + outDir.toAbsolutePath());
return dict;
}
private static SwedishGenerator.Dict buildDict(Path wordsPath) throws IOException {
var map = new LongArrayList(100_000);
try (var lines = Files.lines(wordsPath, StandardCharsets.UTF_8)) {
lines.forEach(line -> CsvIndexService.lineToLemma(line, map::add));
}
return Dicts.makeDict(map.toArray());
}
private static void writeAggregator(Path outDir, String pkg, String cls, int totalLen) throws IOException {
Path out = outDir.resolve(cls + ".java");
try (BufferedWriter w = Files.newBufferedWriter(out, StandardCharsets.UTF_8,
StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING, StandardOpenOption.WRITE)) {
w.write("package " + pkg + ";\n\n");
w.write("public final class " + cls + " {\n");
w.write(" private " + cls + "() {}\n\n");
w.write(" public static final SwedishGenerator.Dict DICT = build();\n\n");
w.write(" private static SwedishGenerator.Dict build() {\n");
w.write(" SwedishGenerator.DictEntry[] idx = new SwedishGenerator.DictEntry[SwedishGenerator.MAX_WORD_LENGTH_PLUS_ONE];\n");
w.write(" idx[2] = DictDataL2.entry();\n");
w.write(" idx[3] = DictDataL3.entry();\n");
w.write(" idx[4] = DictDataL4.entry();\n");
w.write(" idx[5] = DictDataL5.entry();\n");
w.write(" idx[6] = DictDataL6.entry();\n");
w.write(" idx[7] = DictDataL7.entry();\n");
w.write(" idx[8] = DictDataL8.entry();\n");
w.write(" return new SwedishGenerator.Dict(idx, " + totalLen + ");\n");
w.write(" }\n");
w.write("}\n");
}
}
private static void writeLengthClass(Path outDir, String pkg, String cls, int L, SwedishGenerator.DictEntry e) throws IOException {
Path out = outDir.resolve(cls + ".java");
try (BufferedWriter w = Files.newBufferedWriter(out, StandardCharsets.UTF_8,
StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING, StandardOpenOption.WRITE)) {
w.write("package " + pkg + ";\n\n");
w.write("public final class " + cls + " {\n");
w.write(" private " + cls + "() {}\n\n");
long[] words = e.words();
// flatten posBitsets: [rows][cols] -> flat[]
long[][] bs = e.posBitsets();
int rows = bs.length;
int cols = bs[0].length;
long[] flat = new long[rows * cols];
int t = 0;
for (int r = 0; r < rows; r++) {
System.arraycopy(bs[r], 0, flat, t, cols);
t += cols;
}
w.write(" static final int LEN = " + L + ";\n");
w.write(" static final int ROWS = " + rows + ";\n");
w.write(" static final int COLS = " + cols + ";\n");
w.write(" static final int WORDS_LEN = " + words.length + ";\n");
w.write(" static final int POS_LEN = " + flat.length + ";\n\n");
// chunked arrays
int wordChunks = emitLongArrayChunked(w, "WORDS", words, WORDS_CHUNK);
int posChunks = emitLongArrayChunked(w, "POS", flat, POS_CHUNK);
// joiners
emitJoiner(w, "WORDS", "WORDS", words.length, wordChunks);
emitJoiner(w, "POS", "POS", flat.length, posChunks);
// entry builder
w.write(" public static SwedishGenerator.DictEntry entry() {\n");
w.write(" long[] words = WORDS();\n");
w.write(" long[] flat = POS();\n");
w.write(" long[][] pos = reshape(flat, ROWS, COLS);\n");
w.write(" return new SwedishGenerator.DictEntry(words, pos, words.length, (words.length + 63) >>> 6);\n");
w.write(" }\n\n");
// helpers
w.write(" private static int copy(long[] dst, int at, long[] src) {\n");
w.write(" System.arraycopy(src, 0, dst, at, src.length);\n");
w.write(" return at + src.length;\n");
w.write(" }\n\n");
w.write(" private static long[][] reshape(long[] flat, int rows, int cols) {\n");
w.write(" long[][] out = new long[rows][cols];\n");
w.write(" int k = 0;\n");
w.write(" for (int r = 0; r < rows; r++) {\n");
w.write(" System.arraycopy(flat, k, out[r], 0, cols);\n");
w.write(" k += cols;\n");
w.write(" }\n");
w.write(" return out;\n");
w.write(" }\n");
w.write("}\n");
}
}
/** Emits baseName_0..k arrays and returns chunkCount. */
private static int emitLongArrayChunked(BufferedWriter w, String baseName, long[] data, int chunkSize) throws IOException {
int chunks = (data.length + chunkSize - 1) / chunkSize;
for (int ci = 0; ci < chunks; ci++) {
int from = ci * chunkSize;
int to = Math.min(data.length, from + chunkSize);
w.write(" static final long[] " + baseName + "_" + ci + " = new long[] {\n");
for (int i = from; i < to; i++) {
w.write(" " + toLongLiteral(data[i]) + (i + 1 < to ? "," : "") + "\n");
}
w.write(" };\n\n");
}
return chunks;
}
private static void emitJoiner(BufferedWriter w, String funcName, String baseName, int totalLen, int chunks) throws IOException {
w.write(" static long[] " + funcName + "() {\n");
w.write(" long[] out = new long[" + totalLen + "];\n");
w.write(" int k = 0;\n");
for (int ci = 0; ci < chunks; ci++) {
w.write(" k = copy(out, k, " + baseName + "_" + ci + ");\n");
}
w.write(" return out;\n");
w.write(" }\n\n");
}
private static String toLongLiteral(long v) {
// compact unsigned hex literal
return "0x" + Long.toUnsignedString(v, 16) + "L";
}
}
}

View File

@@ -1,10 +1,8 @@
package puzzle;
import org.junit.jupiter.api.Test;
import puzzle.DictCodeGen.DictJavaGenerator;
import lombok.val;
import puzzle.Export.Dicts;
import puzzle.Export.IntListDTO;
import puzzle.SwedishGenerator.Dict;
import puzzle.SwedishGenerator.Lemma;
import java.io.*;
@@ -12,23 +10,20 @@ import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
import java.nio.charset.StandardCharsets;
import java.nio.file.*;
import java.util.Arrays;
import java.util.HashMap;
public final class DictJavaGeneratorMulti {
// Smaller = more files, but safer for javac/class limits.
private static final int WORDS_CHUNK = 8_192;
private static final int POS_CHUNK = 8_192;
@Test
public void dictCodeGen15() {
System.out.println(DictData.DICT);
}
public static void main(String[] args) throws Exception {
Path wordsFile = Path.of(args.length > 0 ? args[0] : "nl_score_hints_v3.csv");
Path outDir = Path.of(args.length > 1 ? args[1] : "/home/mike/dev/puzzle-generator/src/main/generated-sources/puzzle");
String pkg = "puzzle";
Path wordsFile = Path.of(args.length > 0 ? args[0] : "nl_score_hints_v3.csv");
Path outDir = Path.of(args.length > 1 ? args[1] : "src/main/generated-sources/puzzle");
String pkg = "puzzle";
HashMap<Path, ShardBuilder> builders = new HashMap<Path, ShardBuilder>(16);
SwedishGenerator.Dict dict = buildDict(wordsFile);
SwedishGenerator.Dict dict = buildDict(wordsFile, builders);
Files.createDirectories(outDir);
@@ -43,117 +38,60 @@ public final class DictJavaGeneratorMulti {
// Aggregator
writeAggregator(outDir, pkg, "DictData", dict.length());
var csv = Paths.get("nl_score_hints_v3.csv");
var idx = Paths.get("nl_score_hints_v3.idx");
//var csv = Paths.get("nl_score_hints_v3.csv");
//var idx = Paths.get("nl_score_hints_v3.idx");
ScopedValue.where(CsvIndexService.SC, new CsvIndexService(csv, idx)).run(() -> generateHintShards(dict, outDir));
//ScopedValue.where(CsvIndexService.SC, new CsvIndexService(csv, idx)).run(() -> generateHintShards(dict, builders, outDir));
System.out.println("Generated sources into: " + outDir.toAbsolutePath());
}
private static SwedishGenerator.Dict buildDict(Path wordsPath) throws IOException {
private static SwedishGenerator.Dict buildDict(Path wordsPath, HashMap<Path, ShardBuilder> builders) throws IOException {
var map = new LongArrayList(100_000);
try (var lines = Files.lines(wordsPath, StandardCharsets.UTF_8)) {
lines.forEach(line -> CsvIndexService.lineToLemma(line, map::add));
lines.forEach(line -> {
CsvIndexService.lineToLemma(line, w -> {
long len = Lemma.length0(w);
String word = Lemma.asWord(w);
String[] clues = CsvIndexService.lineToClue(line);
int simpel = CsvIndexService.lineToSimpel(line);
// serialize to: WORD \t JSON \n
// (als je al JSON string wilt bewaren: gebruik Gson.toJson(clues))
String json = Meta.GSON.toJson(clues);
String recStr = word + "\t" + simpel + "\t" + json + "\n";
byte[] rec = recStr.getBytes(StandardCharsets.UTF_8);
var key = Meta.shardKey(w);
ShardBuilder sb = builders.computeIfAbsent(key, k -> new ShardBuilder());
try {
long index = ((long) sb.addRecord(rec) << 3) | len;
map.add(w | (index << 40));
} catch (IOException e) {
throw new UncheckedIOException(e);
}
});
});
}
return Dicts.makeDict(map.toArray());
}
static final int VERSION = 1;
static String wordFromLine(String line) {
// ID,WORD,*,*,"JSON"
var parts = line.split(",", 5);
return parts[1].trim();
}
static final class ShardBuilder {
final IntListDTO offsets = new IntListDTO(4096);
final ByteArrayOutputStream data = new ByteArrayOutputStream(1 << 20); // grows
void addRecord(byte[] rec) throws IOException {
offsets.add(data.size());
int addRecord(byte[] rec) throws IOException {
var size = data.size();
val currSize = offsets.size();
offsets.add(size);
data.write(rec);
return currSize;
}
}
static void generateHintShards(Path csv, Path outDir) throws IOException {
Files.createDirectories(outDir);
var builders = new java.util.HashMap<String, ShardBuilder>(256);
try (var lines = Files.lines(csv, StandardCharsets.UTF_8)) {
lines.forEach(line -> {
if (line == null || line.isBlank()) return;
String word = wordFromLine(line);
String[] clues = CsvIndexService.lineToClue(line);
int simpel = CsvIndexService.lineToSimpel(line);
// serialize to: WORD \t JSON \n
// (als je al JSON string wilt bewaren: gebruik Gson.toJson(clues))
String json = Meta.GSON.toJson(clues);
String recStr = word + "\t" + simpel + "\t" + json + "\n";
byte[] rec = recStr.getBytes(StandardCharsets.UTF_8);
String key = Meta.shardKey(word);
ShardBuilder sb = builders.computeIfAbsent(key, k -> new ShardBuilder());
try {
sb.addRecord(rec);
} catch (IOException e) {
throw new UncheckedIOException(e);
}
});
} catch (UncheckedIOException uioe) {
throw uioe.getCause();
}
// flush all shards to disk as <key>.idx (e.g. 6Z.idx)
for (var e : builders.entrySet()) {
writeIndexedShard(outDir.resolve(e.getKey() + ".idx"), e.getValue());
}
}
static void generateHintShards(Dict dict, Path outDir) {
try {
Files.createDirectories(outDir);
} catch (IOException e) {
throw new RuntimeException(e);
}
var builders = new java.util.HashMap<Path, ShardBuilder>(256);
for (var index : dict.index()) {
long[] words = index.words();
for (int shardIdx = 0; shardIdx < words.length; shardIdx++) {
var w = words[shardIdx];
String word = Lemma.asWord(w);
int wIdx = Lemma.unpackIndex(w);
String[] clues = CsvIndexService.clues(wIdx);
int simpel = CsvIndexService.simpel(wIdx);
// serialize to: WORD \t JSON \n
// (als je al JSON string wilt bewaren: gebruik Gson.toJson(clues))
String json = Meta.GSON.toJson(clues);
String recStr = word + "\t" + simpel + "\t" + json + "\n";
byte[] rec = recStr.getBytes(StandardCharsets.UTF_8);
var key = Meta.shardKey(w);
ShardBuilder sb = builders.computeIfAbsent(key, k -> new ShardBuilder());
try {
sb.addRecord(rec);
} catch (IOException e) {
throw new UncheckedIOException(e);
}
}
}
// flush all shards to disk as <key>.idx (e.g. 6Z.idx)
for (var e : builders.entrySet()) {
try {
writeIndexedShard(e.getKey(), e.getValue());
} catch (IOException ex) {
throw new RuntimeException(ex);
}
}
}
static void writeIndexedShard(Path out, ShardBuilder sb) throws IOException {
int n = sb.offsets.size();
int[] offs = sb.offsets.toArray();

View File

@@ -126,25 +126,67 @@ public class ExportFormatTest {
}
}
@Test
void testIndex() {
var csv = Paths.get("nl_score_hints_v3.csv");
var idx = Paths.get("nl_score_hints_v3.idx");
try (var svc = new CsvIndexService(csv, idx)) {
System.out.println(svc.getLine(1319));
} catch (IOException e) {
throw new RuntimeException(e);
void testShardToClue() {
for (int length = 2; length <= 8; length++) {
val entry = DictData.DICT.index()[length];
if (entry == null) continue;
val words = entry.words();
for (int i = 0; i < Math.min(words.length, 5); i++) {
val wordVal = words[i];
val word = Lemma.asWord(wordVal);
val assigned = new Assign(wordVal, i);
val shard = Meta.shardKey(assigned.w);
val clueRec = Meta.readRecord(shard, i);
assertNotNull(clueRec);
assertEquals(word, Lemma.asWord(clueRec.w()));
assertTrue(clueRec.simpel() >= 0);
assertTrue(clueRec.clues().length > 0);
}
}
}
@Test
void testShardToClue() {
val index = 1;
val word = DictData.DICT.index()[3].words()[index];
val assigned = new Assign(word, index);
val lemma = Lemma.unpackIndex(word);
var word1 = Lemma.asWord(word);
val shard = Meta.shardKey(assigned.w);
val clue = Meta.readRecord(shard, index);
assertNotNull(clue);
void testSpecificWords() {
// These words are known to be in the CSV and likely in the dictionary
String[] testWords = {"EEN", "NAAR", "IEDEREEN"};
for (String wStr : testWords) {
long w = Lemma.pack(wStr);
int L = wStr.length();
var entry = DictData.DICT.index()[L];
if (entry == null) continue;
// Find index of word in entry
int idx = -1;
long[] words = entry.words();
for (int i = 0; i < words.length; i++) {
if (Lemma.asWord(words[i]).equals(wStr)) {
idx = i;
break;
}
}
if (idx != -1) {
val shard = Meta.shardKey(w);
val clueRec = Meta.readRecord(shard, idx);
assertNotNull(clueRec);
assertEquals(wStr, Lemma.asWord(clueRec.w()));
// Check some expected complexity values (from CSV head output, column 3)
if (wStr.equals("EEN")) {
assertEquals(451, clueRec.simpel());
assertEquals("het getal 1", clueRec.clues()[0]);
}
if (wStr.equals("NAAR")) {
assertEquals(497, clueRec.simpel());
assertEquals("in de richting van", clueRec.clues()[0]);
}
if (wStr.equals("IEDEREEN")) {
assertEquals(501, clueRec.simpel());
assertEquals("elke persoon", clueRec.clues()[0]);
}
assertTrue(clueRec.clues().length > 0);
}
}
}
}

View File

@@ -47,7 +47,7 @@ public class MainTest {
this.tries = 1;
this.verbose = false;
}};
static final Dict dict = loadDict(opts.wordsPath);
static final Dict dict = DictData.DICT;//loadDict(opts.wordsPath);
public static Dict loadDict(String wordsPath) {
var map = new LongArrayList(100_000);
try (var lines = Files.lines(Path.of(wordsPath), StandardCharsets.UTF_8)) {
@@ -192,7 +192,7 @@ public class MainTest {
"1 \n" +
"1 \n" +
"3 3 \n" +
"3 0 3 \n" +
"3 0 3 \n" +
"3 \n" +
"3 \n" +
"222 3");
@@ -200,8 +200,8 @@ public class MainTest {
var slots = Masker.extractSlots(mask, dict.index());
val slotInfo = Masker.scoreSlots(new int[slots.length], slots);
var grid = mask.toGrid();
var filled = fillMask(rng, slotInfo, grid, false);
// val res = new PuzzleResult(new Clued(mask), new Gridded(grid), slotInfo, filled).exportFormatFromFilled(0, new Rewards(0, 0, 0));
// var filled = fillMask(rng, slotInfo, grid, false);
// val res = new PuzzleResult(new Clued(mask), new Gridded(grid), slotInfo, filled).exportFormatFromFilled(0, new Rewards(0, 0, 0));
}
@Test
void testFiller() {
@@ -236,7 +236,7 @@ public class MainTest {
int foundSeed = -1;
for (int i = 0; i < 50; i++) {
int seed = opts.seed + i;
res = Main.attempt(new Rng(seed), dict, opts);
res = Main.attempt(new Rng(seed), DictData.DICT, opts);
if (res != null && res.filled().ok()) {
foundSeed = seed;
System.out.println("[DEBUG_LOG] Seed found: " + seed);

View File

@@ -8,6 +8,7 @@ import puzzle.Export.Dicts;
import puzzle.Export.Gridded;
import puzzle.Export.IntListDTO;
import puzzle.Export.LetterVisit.LetterAt;
import puzzle.Masker.Slot;
import java.nio.charset.StandardCharsets;
import java.util.stream.Collectors;
@@ -26,39 +27,39 @@ public class SwedishGeneratorTest {
public static Context get() { return CTX.get(); }
}
static final long TEST = Lemma.from(0, "TEST");
static final long TEST = Lemma.from("TEST");
static final long[] WORDS = new long[]{
Lemma.from(1, "AT"),
Lemma.from(2, "CAT"),
Lemma.from(3, "DOGS"),
Lemma.from(4, "APPLE"),
Lemma.from(5, "APPLY"),
Lemma.from(6, "BANAN"),
Lemma.from(7, "BANANA"),
Lemma.from(8, "BANANAS"),
Lemma.from(9, "BANANASS") // length 8
Lemma.from("AT"),
Lemma.from("CAT"),
Lemma.from("DOGS"),
Lemma.from("APPLE"),
Lemma.from("APPLY"),
Lemma.from("BANAN"),
Lemma.from("BANANA"),
Lemma.from("BANANAS"),
Lemma.from("BANANASS") // length 8
};
static final long l2a = Lemma.from(10, "IN");
static final long l4a = Lemma.from(11, "INER");
static final long l6a = Lemma.from(12, "INEREN");
static final long l7a = Lemma.from(13, "INERENA");
static final long l8a = Lemma.from(14, "INERENAE");
static final long l1 = Lemma.from(15, "APPLE");
static final long l2 = Lemma.from(16, "AXE");
static final long l2a = Lemma.from("IN");
static final long l4a = Lemma.from("INER");
static final long l6a = Lemma.from("INEREN");
static final long l7a = Lemma.from("INERENA");
static final long l8a = Lemma.from("INERENAE");
static final long l1 = Lemma.from("APPLE");
static final long l2 = Lemma.from("AXE");
static final long[] WORDS2 = new long[]{ Lemma.from(17, "IN"),
Lemma.from(18, "APPLE"),
Lemma.from(19, "APPLY"),
Lemma.from(20, "BANAN"),
Lemma.from(21, "INE"),
Lemma.from(22, "INER"),
Lemma.from(23, "INEREN"),
Lemma.from(24, "INERENA"),
Lemma.from(25, "INERENAE") };
static final long ABC = Lemma.from(26, "ABC");
static final long ABD = Lemma.from(27, "ABD");
static final long AZ = Lemma.from(28, "AZ");
static final long AB = Lemma.from(29, "AB");
static final long[] WORDS2 = new long[]{ Lemma.from("IN"),
Lemma.from("APPLE"),
Lemma.from("APPLY"),
Lemma.from("BANAN"),
Lemma.from("INE"),
Lemma.from("INER"),
Lemma.from("INEREN"),
Lemma.from("INERENA"),
Lemma.from("INERENAE") };
static final long ABC = Lemma.from("ABC");
static final long ABD = Lemma.from("ABD");
static final long AZ = Lemma.from("AZ");
static final long AB = Lemma.from("AB");
static final byte LETTER_A = ((byte) 'A') & 31;
static final byte LETTER_B = ((byte) 'B') & 31;
static final byte LETTER_C = ((byte) 'C') & 31;
@@ -139,7 +140,7 @@ public class SwedishGeneratorTest {
@Test
void testPatternForSlotAllLetters() {
var grid = new Gridded(createEmpty());
var key = Masker.Slot.packSlotKey(OFF_0_0, CLUE_RIGHT);
var key = Slot.packSlotKey(OFF_0_0, CLUE_RIGHT);
val clues = Masker.Clues.createEmpty();
clues.setClueLo(IDX_0_0.lo, CLUE_RIGHT);
GridBuilder.placeWord(grid.grid(), grid.grid().g, key, (1L << OFF_0_1) | (1L << OFF_0_2) | (1L << OFF_0_3), 0L, ABC);
@@ -152,9 +153,9 @@ public class SwedishGeneratorTest {
@Test
void testPatternForSlotMixed() {
var grid = createEmpty();
GridBuilder.placeWord(grid, grid.g, Masker.Slot.packSlotKey(0, CLUE_RIGHT), 1L << OFF_0_0, 0, Lemma.from(0, "A"));
GridBuilder.placeWord(grid, grid.g, Masker.Slot.packSlotKey(0, CLUE_RIGHT), 1L << OFF_2_0, 0, Lemma.from(0, "C"));
var key = Masker.Slot.packSlotKey(OFF_1_0, CLUE_RIGHT);
GridBuilder.placeWord(grid, grid.g, Slot.packSlotKey(0, CLUE_RIGHT), 1L << OFF_0_0, 0, Lemma.from("A"));
GridBuilder.placeWord(grid, grid.g, Slot.packSlotKey(0, CLUE_RIGHT), 1L << OFF_2_0, 0, Lemma.from("C"));
var key = Slot.packSlotKey(OFF_1_0, CLUE_RIGHT);
var pattern = patternForSlot(grid.lo, grid.hi, grid.g, key, 7L, 0L);
assertEquals(14081L, pattern);
}
@@ -162,7 +163,7 @@ public class SwedishGeneratorTest {
@Test
void testPatternForSlotAllDashes() {
var grid = createEmpty();
var key = Masker.Slot.packSlotKey(1 << Masker.Slot.BIT_FOR_DIR, CLUE_RIGHT);
var key = Slot.packSlotKey(1 << Slot.BIT_FOR_DIR, CLUE_RIGHT);
var pattern = patternForSlot(grid.lo, grid.hi, grid.g, key, 7L, 0L);
assertEquals(0L, pattern);
}
@@ -170,8 +171,8 @@ public class SwedishGeneratorTest {
@Test
void testPatternForSlotSingleLetter() {
var grid = createEmpty();
GridBuilder.placeWord(grid, grid.g, Masker.Slot.packSlotKey(0, CLUE_RIGHT), 1L << OFF_0_0, 0, Lemma.from(0, "A"));
var key = Masker.Slot.packSlotKey(1, CLUE_RIGHT);
GridBuilder.placeWord(grid, grid.g, Slot.packSlotKey(0, CLUE_RIGHT), 1L << OFF_0_0, 0, Lemma.from("A"));
var key = Slot.packSlotKey(1, CLUE_RIGHT);
var pattern = patternForSlot(grid.lo, grid.hi, grid.g, key, 7L, 0L);
assertEquals(1L, pattern);
}
@@ -196,7 +197,7 @@ public class SwedishGeneratorTest {
@Test
void testGrid() {
var grid = new Gridded(createEmpty());
GridBuilder.placeWord(grid.grid(), grid.grid().g, Masker.Slot.packSlotKey(0, CLUE_RIGHT), 1L << OFF_0_0, 0, Lemma.from(0, "A"));
GridBuilder.placeWord(grid.grid(), grid.grid().g, Slot.packSlotKey(0, CLUE_RIGHT), 1L << OFF_0_0, 0, Lemma.from("A"));
val arr = grid.stream(Masker.Clues.createEmpty()).collect(Collectors.toMap(LetterAt::index, LetterAt::letter));
assertEquals(1, arr.size());
assertEquals(LETTER_A, arr.get(OFF_0_0));
@@ -232,11 +233,11 @@ public class SwedishGeneratorTest {
@Test
void testSlot() {
System.out.println("[DEBUG_LOG] Slot.BIT_FOR_DIR = " + Masker.Slot.BIT_FOR_DIR);
System.out.println("[DEBUG_LOG] Slot.BIT_FOR_DIR = " + Slot.BIT_FOR_DIR);
// key = (r << 8) | (c << 4) | d
var offset = OFF_2_3;
System.out.println("[DEBUG_LOG] Grid.offset(2, 3) = " + offset);
var key = Masker.Slot.packSlotKey(offset, CLUE_DOWN);
var key = Slot.packSlotKey(offset, CLUE_DOWN);
System.out.println("[DEBUG_LOG] key = " + key);
long lo = 0;
// pos 0: (2, 5)
@@ -246,10 +247,10 @@ public class SwedishGeneratorTest {
// pos 2: (4, 5)
lo |= 1L << OFF_4_5;
System.out.println("[DEBUG_LOG] s.dir() = " + Masker.Slot.dir(key));
assertEquals(OFF_2_3, Masker.Slot.clueIndex(key));
assertEquals(CLUE_DOWN, Masker.Slot.dir(key));
assertFalse(Masker.Slot.horiz(key));
System.out.println("[DEBUG_LOG] s.dir() = " + Slot.dir(key));
assertEquals(OFF_2_3, Slot.clueIndex(key));
assertEquals(CLUE_DOWN, Slot.dir(key));
assertFalse(Slot.horiz(key));
var cells = Gridded.walk((byte) key, lo, 0L).toArray();
assertEquals(2, SwedishGenerator.IT[cells[0]].r());
assertEquals(3, SwedishGenerator.IT[cells[1]].r());
@@ -258,8 +259,8 @@ public class SwedishGeneratorTest {
assertEquals(5, SwedishGenerator.IT[cells[1]].c());
assertEquals(5, SwedishGenerator.IT[cells[2]].c());
assertTrue(Masker.Slot.horiz(CLUE_RIGHT)); // right
assertFalse(Masker.Slot.horiz(CLUE_DOWN)); // down
assertTrue(Slot.horiz(CLUE_RIGHT)); // right
assertFalse(Slot.horiz(CLUE_DOWN)); // down
}
static long packPattern(String s) {
@@ -295,9 +296,9 @@ public class SwedishGeneratorTest {
assertEquals(1, slots.length);
var s = slots[0];
assertTrue(Masker.Slot.length(s.lo(), s.hi()) >= 2);
assertEquals(OFF_0_0, Masker.Slot.clueIndex(s.key()));
assertEquals(CLUE_RIGHT, Masker.Slot.dir(s.key()));
assertTrue(Slot.length(s.lo(), s.hi()) >= 2);
assertEquals(OFF_0_0, Slot.clueIndex(s.key()));
assertEquals(CLUE_RIGHT, Slot.dir(s.key()));
}
@Test
@@ -336,7 +337,7 @@ public class SwedishGeneratorTest {
void testPlaceWord() {
var grid = new Gridded(createEmpty());
// Slot at OFF_0_0 length 3, horizontal (right)
var key = Masker.Slot.packSlotKey(0, CLUE_RIGHT);
var key = Slot.packSlotKey(0, CLUE_RIGHT);
var lo = (1L << OFF_0_0) | (1L << OFF_0_1) | (1L << OFF_0_2);
val hi = 0L;
var w1 = ABC;
@@ -362,7 +363,7 @@ public class SwedishGeneratorTest {
// 4. Partial placement then conflict (rollback)
grid = new Gridded(createEmpty());
GridBuilder.placeWord(grid.grid(), grid.grid().g, Masker.Slot.packSlotKey(0, CLUE_RIGHT), 1L << OFF_0_2, 0, Lemma.from(0, "X")); // Conflict at the end
GridBuilder.placeWord(grid.grid(), grid.grid().g, Slot.packSlotKey(0, CLUE_RIGHT), 1L << OFF_0_2, 0, Lemma.from("X")); // Conflict at the end
assertFalse(GridBuilder.placeWord(grid.grid(), grid.grid().g, key, lo, hi, w1));
map = grid.stream(Masker.Clues.createEmpty()).collect(Collectors.toMap(LetterAt::index, LetterAt::letter));
assertEquals(1, map.size());
@@ -373,7 +374,7 @@ public class SwedishGeneratorTest {
void testBacktrackingHelpers() {
var grid = new Gridded(createEmpty());
// Slot at 0,1 length 2
var key = Masker.Slot.packSlotKey(0, CLUE_RIGHT);
var key = Slot.packSlotKey(0, CLUE_RIGHT);
var lo = (1L << OFF_0_1) | (1L << OFF_0_2);
var w = AZ;
val low = grid.grid().lo;
@@ -401,8 +402,8 @@ public class SwedishGeneratorTest {
assertTrue(Slotinfo.increasing(CLUE_DOWN)); // Down
assertFalse(Slotinfo.increasing(CLUE_UP)); // Up
assertTrue(Slotinfo.increasing(Masker.Slot.packSlotKey(0, CLUE_RIGHT)));
assertFalse(Slotinfo.increasing(Masker.Slot.packSlotKey(0, CLUE_LEFT)));
assertTrue(Slotinfo.increasing(Slot.packSlotKey(0, CLUE_RIGHT)));
assertFalse(Slotinfo.increasing(Slot.packSlotKey(0, CLUE_LEFT)));
// 2. Test slotScore
val counts = new byte[SIZE];