introduce bitloops

This commit is contained in:
mike
2026-01-17 13:22:04 +01:00
parent 0c56fafeaa
commit 9102dcb922
10 changed files with 706 additions and 82 deletions

View File

@@ -12,10 +12,6 @@ import puzzle.SwedishGenerator.DictEntry;
import puzzle.SwedishGenerator.FillResult;
import puzzle.SwedishGenerator.Grid;
import puzzle.SwedishGenerator.Slotinfo;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
@@ -197,7 +193,7 @@ public record Export() {
}
}
record Placed(long lemma, int slotKey, int[] cells) {
record Placed(long lemma, int shardIdx, int slotKey, int[] cells) {
static final char[] DIRECTION = { Placed.VERTICAL, Placed.HORIZONTAL, Placed.VERTICAL, Placed.HORIZONTAL };
public static final char HORIZONTAL = 'h';
@@ -214,9 +210,10 @@ public record Export() {
public record WordOut(String word, int[] cell, int startRow, int startCol, char direction, int arrowRow, int arrowCol, boolean isReversed, int complex, String[] clue) {
public WordOut(long l, int startRow, int startCol, char d, int arrowRow, int arrowCol, boolean isReversed) {
public WordOut(long l, int shardIdx, int startRow, int startCol, char d, int arrowRow, int arrowCol, boolean isReversed) {
val meta = Meta.readRecord(Meta.shardKey(l), shardIdx);
this(Lemma.asWord(l), new int[]{ arrowRow, arrowCol, startRow, startCol }, startRow, startCol, d, arrowRow, arrowCol, isReversed,
CsvIndexService.simpel(Lemma.unpackIndex(l)), CsvIndexService.clues(Lemma.unpackIndex(l)));
meta.simpel(), meta.clues());
}
}
@@ -230,7 +227,7 @@ public record Export() {
for (var n = 1; n < slots.length; n++) {
if (slots[n].assign().w != X) {
k++;
simpel += CsvIndexService.simpel(Lemma.unpackIndex(slots[n].assign().w));
simpel += Meta.readRecord(Meta.shardKey(slots[n].assign().w), slots[n].assign().shardIdx).simpel();//.simpel(Lemma.unpackIndex(slots[n].assign().w));
}
}
simpel = k == 0 ? 0 : simpel / k;
@@ -239,7 +236,7 @@ public record Export() {
public ExportedPuzzle exportFormatFromFilled(int difficulty, Rewards rewards) {
var placed = new ArrayList<Placed>();
for (var slot : slots) {
placed.add(new Placed(slot.assign().w, slot.key(), Gridded.walk((byte) slot.key(), slot.lo(), slot.hi()).toArray()));
placed.add(new Placed(slot.assign().w, slot.assign().shardIdx, slot.key(), Gridded.walk((byte) slot.key(), slot.lo(), slot.hi()).toArray()));
}
// If nothing placed: return full grid mapped to letters/# only
@@ -285,6 +282,7 @@ public record Export() {
int MIN_R = minR, MIN_C = minC;
var wordsOut = placed.stream().map(p -> new WordOut(
p.lemma,
p.shardIdx,
p.startRow() - MIN_R,
p.startCol() - MIN_C,
p.direction(),
@@ -298,19 +296,9 @@ public record Export() {
interface Dicts {
static Dict loadDict(String wordsPath) {
try {
var map = new LongArrayList(100_000);
Files.lines(Path.of(wordsPath), StandardCharsets.UTF_8).forEach(line -> CsvIndexService.lineToLemma(line, map::add));
return makeDict(map.toArray());
} catch (IOException e) {
e.printStackTrace();
throw new RuntimeException("Failed to load dictionary from " + wordsPath, e);
}
}
static Dict makeDict(long[] wordz) {
var index = new DictEntryDTO[SwedishGenerator.MAX_WORD_LENGTH_PLUS_ONE];
Arrays.setAll(index, i -> new DictEntryDTO(i));
Arrays.setAll(index, DictEntryDTO::new);
for (var lemma : wordz) {
var L = Lemma.length(lemma);

View File

@@ -4,6 +4,7 @@ import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.NoArgsConstructor;
import lombok.val;
import puzzle.Masker.Clues;
import puzzle.SwedishGenerator.Rng;
import java.io.IOException;
@@ -15,11 +16,10 @@ import java.time.format.DateTimeFormatter;
import java.util.*;
import java.util.concurrent.*;
import java.util.concurrent.atomic.AtomicLong;
import java.util.stream.Collectors;
import static puzzle.CsvIndexService.SC;
import static puzzle.Export.*;
import static puzzle.SwedishGenerator.*;
import static puzzle.Export.Dicts.loadDict;
public class Main {
@@ -44,11 +44,12 @@ public class Main {
@NoArgsConstructor
public static class Opts {
static int SSIZE = 20;
public int seed = (int) (System.nanoTime() ^ System.currentTimeMillis());
public int clueSize = 20;
public int pop = 40;
public int offspring = 60;
public int gens = 500;
public int clueSize = SSIZE;
public int pop = SSIZE * 2;
public int offspring = SSIZE * 3;
public int gens = 600;
public String wordsPath = "nl_score_hints_v3.csv";
public double minSimplicity = 0; // 0 means no limit
public int threads = Math.max(1, Runtime.getRuntime().availableProcessors());
@@ -59,16 +60,7 @@ public class Main {
}
void main(String[] args) {
var csv = Paths.get("nl_score_hints_v3.csv");
var idx = Paths.get("nl_score_hints_v3.idx");
try {
val scv = new CsvIndexService(csv, idx);
scv.ensureLoaded();
ScopedValue.where(SC, scv).run(() -> _main(args));
} catch (IOException e) {
throw new RuntimeException(e);
}
_main(args);
}
public void _main(String[] args) {
var opts = parseArgs(args);
@@ -274,7 +266,7 @@ public class Main {
PuzzleResult generatePuzzle(Opts opts) {
var tLoad0 = System.nanoTime();
var dict = loadDict(opts.wordsPath);
var dict = DictData.DICT;//loadDict(opts.wordsPath);
var tLoad1 = System.nanoTime();
section("Load");
@@ -294,10 +286,9 @@ public class Main {
try {
// Keep at least some tasks in flight
final var service = CsvIndexService.SC.get();
for (int i = 0; i < opts.threads; i++) {
final int attemptIdx = ++submitted;
completionService.submit(() -> ScopedValue.where(CsvIndexService.SC, service).call(() -> attempt(new Rng(opts.seed + attemptIdx), dict, opts)));
completionService.submit(() -> attempt(new Rng(opts.seed + attemptIdx), dict, opts));
}
while (System.currentTimeMillis() < deadline) {
@@ -314,7 +305,7 @@ public class Main {
// Submit another task if we still have time
if (System.currentTimeMillis() < deadline) {
final int attemptIdx = ++submitted;
completionService.submit(() -> ScopedValue.where(CsvIndexService.SC, service).call(() -> attempt(new Rng(opts.seed + attemptIdx), dict, opts)));
completionService.submit(() -> attempt(new Rng(opts.seed + attemptIdx), dict, opts));
}
}
if (resFinal == null) warn("status : UNSOLVED (timeout)");
@@ -381,11 +372,44 @@ public class Main {
return null;
}
}
static Clues generateClues() {
String simple = "000 3000\n" +
" 3 \n" +
" 31 \n" +
" 3\n" +
"1 \n" +
"1 \n" +
"1 2\n" +
"1 222 3";
String sampleComplex = "1 0000\n" +
"1 \n" +
"00 01 \n" +
" 1 \n" +
" 1 \n" +
" 2 1 \n" +
" 1 \n" +
"221 22\n";
String def = " 30000\n" +
"0 001 \n" +
" 1 \n" +
" 3 \n" +
" 3 \n" +
" 32 \n" +
" 32 2\n" +
"2222 3";
return Clues.parse(sampleComplex
);
}
static Clues generateNewClues(Rng rng, Opts opts) {
var masker = new Masker(rng, new int[STACK_SIZE], Masker.Clues.createEmpty());
var mask = masker.generateMask(opts.clueSize, opts.pop, opts.gens, opts.offspring);
return mask;
}
static PuzzleResult _attempt(Rng rng, Dict dict, Opts opts) {
long t0 = System.currentTimeMillis();
TOTAL_ATTEMPTS.incrementAndGet();
var masker = new Masker(rng, new int[STACK_SIZE], Masker.Clues.createEmpty());
var mask = masker.generateMask(opts.clueSize, opts.pop, opts.gens, opts.offspring);
val mask = generateNewClues(rng, opts);
//val mask = generateClues();
if (mask == null) return null;
val multiThreaded = Thread.currentThread().getName().contains("pool");
var slots = Masker.extractSlots(mask, dict.index());
@@ -423,7 +447,9 @@ public class Main {
"[ATTEMPT] thread=%s | status=%s | nodes=%d | backtracks=%d | nps=%d | simplicity=%s | time=%.1fs%n",
name, status, filled.nodes(), filled.backtracks(), nps, simplicity, totalTime
);
if (!filled.ok()) {
System.out.println(Arrays.stream(new Clued(mask).gridToString().split("\n")).map(s -> "\"" + s + "\\n\" +").collect(Collectors.joining("\n")));
}
if (filled.ok() && (opts.minSimplicity <= 0 || filled.stats().simplicity >= opts.minSimplicity)) {
return new PuzzleResult(new Clued(mask), new Gridded(grid), slotInfo, filled);
}
@@ -441,7 +467,7 @@ public class Main {
record JsonExportedPuzzle(String date, String theme, int difficulty, Rewards rewards, String[] grid, WordOut[] words) { }
private static String toJson(ExportedPuzzle puzzle, String date, String theme) {
return CsvIndexService.GSON.toJson(new JsonExportedPuzzle(date, theme, puzzle.difficulty(), puzzle.rewards(), puzzle.grid(), puzzle.words()));
return Meta.GSON.toJson(new JsonExportedPuzzle(date, theme, puzzle.difficulty(), puzzle.rewards(), puzzle.grid(), puzzle.words()));
}
private static String escapeJson(String s) {

View File

@@ -436,6 +436,23 @@ public record Masker(Rng rng, int[] stack, Clues cache) {
public long rhi() { return rhi; }
public static Clues createEmpty() { return new Clues(0, 0, 0, 0, 0, 0); }
public static Clues parse(String s) {
var c = createEmpty();
var lines = s.split("\n");
for (int r = 0; r < Math.min(lines.length, R); r++) {
var line = lines[r];
for (int col = 0; col < Math.min(line.length(), C); col++) {
char ch = line.charAt(col);
if (ch >= '0' && ch <= '3') {
int idx = Grid.offset(r, col);
byte dir = (byte) (ch - '0');
if ((idx & 64) == 0) c.setClueLo(1L << idx, dir);
else c.setClueHi(1L << (idx & 63), dir);
}
}
}
return c;
}
public boolean cluelessLo(int idx) {
if (!isClueLo(idx)) return false;
clearClueLo(~(1L << idx));
@@ -506,7 +523,7 @@ public record Masker(Rng rng, int[] stack, Clues cache) {
}
}
static record Slot(int key, long lo, long hi, DictEntry entry) {
public record Slot(int key, long lo, long hi, DictEntry entry) {
static final int BIT_FOR_DIR = 2;
static Slot from(int key, long lo, long hi, DictEntry entry) { return new Slot(key, lo, hi, entry); }

View File

@@ -0,0 +1,72 @@
package puzzle;
import com.google.gson.Gson;
import lombok.val;
import puzzle.SwedishGenerator.Lemma;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
import java.nio.charset.StandardCharsets;
import java.nio.file.Path;
import java.nio.file.StandardOpenOption;
import java.util.stream.IntStream;
public class Meta {
static final Gson GSON = new Gson();
private static final int VERSION = 1;
static record ShardLem(long w, int simpel, String[] clues) { }
static final int SHARD_MAGIC = 0x49445831; // "IDX1"
static ShardLem readRecord(Path shardFile, int i) {
try (FileChannel ch = FileChannel.open(shardFile, StandardOpenOption.READ)) {
ByteBuffer hdr = ByteBuffer.allocate(12);
ch.read(hdr);
hdr.flip();
int magic = hdr.getInt();
int ver = hdr.getInt();
int n = hdr.getInt();
if (magic != SHARD_MAGIC || ver != VERSION) throw new IOException("Bad shard");
if (i < 0 || i >= n) throw new IndexOutOfBoundsException();
long tableStart = 12L;
long dataStart = 12L + (long) n * 4L;
int offI = readIntAt(ch, tableStart + (long) i * 4L);
int offIp = (i + 1 < n) ? readIntAt(ch, tableStart + (long) (i + 1) * 4L)
: (int) (ch.size() - dataStart);
int len = offIp - offI;
ByteBuffer buf = ByteBuffer.allocate(len);
ch.position(dataStart + offI);
ch.read(buf);
buf.flip();
var string = StandardCharsets.UTF_8.decode(buf).toString();
val parts = string.split("\t", 3);
return new ShardLem(Lemma.pack(parts[0]), Integer.parseInt(parts[1]), GSON.fromJson(parts[2], String[].class));
} catch (Exception e) {
e.printStackTrace();
return new ShardLem(Lemma.pack("XXX"), -1, new String[0]);
}
}
static final Path[] SHARDS = IntStream.range(0, 10).mapToObj(sId -> Path.of("/home/mike/dev/puzzle-generator/src/main/generated-sources/puzzle").resolve(sId + ".idx")).toArray(
Path[]::new);
static Path shardKey(long word) {
int L = Lemma.length(word);
return SHARDS[L];
}
static String shardKey(String word) {
int L = word.length();
char ch = word.charAt(0);
if (ch < 'A' || ch > 'Z') ch = '_';
///return "" + L + ch; // e.g. "6Z"
return "" + L; // e.g. "6Z"
}
static int readIntAt(FileChannel ch, long pos) throws IOException {
ByteBuffer b = ByteBuffer.allocate(4);
ch.position(pos);
ch.read(b);
b.flip();
return b.getInt();
}
}

View File

@@ -114,8 +114,14 @@ public class SwedishGenerator {
x = y;
return y;
}
public int randint2bit() { return nextU32() & 3; }
public byte randint2bitByte() { return (byte) (nextU32() & 3); }
public int randint2bit() { return nextU32() & 3; }
public byte randint2bitByte() {
var b = (byte) (nextU32() & 3);
/*if (b == 3) {
return 1;
}*/
return b;
}
public int randint(int max) { return (int) (((nextU32() & 0xFFFFFFFFL) % ((long) max - 0L + 1L))); }
public int randint0_SIZE() { return (int) (((nextU32() & 0xFFFFFFFFL) % RANGE_0_SIZE)); }
public int randint0_624() { return (int) (((nextU32() & 0xFFFFFFFFL) % RANGE_0_624)); }
@@ -166,9 +172,10 @@ public class SwedishGenerator {
static class Assign {
long w;
int shardIdx;
}
static record Slotinfo(int key, long lo, long hi, int score, Assign assign, DictEntry entry) {
public static record Slotinfo(int key, long lo, long hi, int score, Assign assign, DictEntry entry) {
public static int wordCount(int k, Slotinfo[] arr) {
for (var n = 1; n < arr.length; n++) if (arr[n].assign.w != X) k++;
@@ -380,15 +387,17 @@ public class SwedishGenerator {
for (var t = 0; t < tries; t++) {
var r = rng.nextFloat();
//int idxInArray = rng.biasedIndexPow3(L - 1);
var w = entry.words[idxs[(int) (r * r * r * (L - 1))]];
var lemIdx = Lemma.unpackIndex(w);
var arrIndex = (int) (r * r * r * (L - 1));
var w = entry.words[idxs[arrIndex]];
var lemIdx = Lemma.unpackIndex(w);
if (Bit1029.get(used, lemIdx)) continue;
low = glo;
top = ghi;
if (!placeWord(k, slo, shi, w)) continue;
Bit1029.set(used, lemIdx);
s.assign.w = w;
s.assign.w = w;
s.assign.shardIdx = arrIndex;
if (backtrack(depth + 1)) return true;
s.assign.w = X;
Bit1029.clear(used, lemIdx);
@@ -403,16 +412,18 @@ public class SwedishGenerator {
var tries = Math.min(MAX_TRIES_PER_SLOT, N);
for (var t = 0; t < tries; t++) {
double r = rng.nextFloat();
var w = entry.words[(int) (r * r * r * (N - 1))];
var lemIdx = Lemma.unpackIndex(w);
double r = rng.nextFloat();
var shardIndx = (int) (r * r * r * (N - 1));
var w = entry.words[shardIndx];
var lemIdx = Lemma.unpackIndex(w);
if (Bit1029.get(used, lemIdx)) continue;
low = glo;
top = ghi;
if (!placeWord(k, slo, shi, w)) continue;
Bit1029.set(used, lemIdx);
s.assign.w = w;
s.assign.w = w;
s.assign.shardIdx = shardIndx;
if (backtrack(depth + 1)) return true;
s.assign.w = X;
Bit1029.clear(used, lemIdx);

View File

@@ -1,12 +1,18 @@
package puzzle;
import com.google.gson.Gson;
import puzzle.SwedishGenerator.Lemma;
import java.io.*;
import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.Closeable;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
import java.nio.charset.StandardCharsets;
import java.nio.file.*;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.StandardOpenOption;
import java.util.Arrays;
import java.util.function.LongConsumer;
import static java.nio.charset.StandardCharsets.US_ASCII;
@@ -15,7 +21,6 @@ public final class CsvIndexService
implements Closeable {
static final ScopedValue<CsvIndexService> SC = ScopedValue.newInstance();
static final Gson GSON = new Gson();
private static final int MAGIC = 0x4C494458; // "LIDX"
private static final int VERSION = 1;
static int SIMPEL_IDX = 3;
@@ -41,7 +46,7 @@ public final class CsvIndexService
if (rawClue.startsWith("\"") && rawClue.endsWith("\"")) {
rawClue = rawClue.substring(1, rawClue.length() - 1).replace("\"\"", "\"");
}
return GSON.fromJson(rawClue, String[].class);
return Meta.GSON.fromJson(rawClue, String[].class);
}
public static void lineToLemma(String line, LongConsumer ok) {
if (line.isBlank()) {
@@ -50,10 +55,6 @@ public final class CsvIndexService
var parts = line.split(",", 5);
var id = Integer.parseInt(parts[0].trim());
var word = parts[1].trim();
/* if (!word.matches("^[A-Z]{2,8}$")) {
throw new RuntimeException("Invalid word:" + line);
}*/
int score = Integer.parseInt(parts[2].trim());
if (score < 1) {
if (Main.VERBOSE) System.err.println("Word too complex: " + line);
@@ -249,4 +250,5 @@ public final class CsvIndexService
offsets = null;
}
}
}

View File

@@ -0,0 +1,193 @@
package puzzle;
import lombok.val;
import org.junit.jupiter.api.Test;
import puzzle.Export.Dicts;
import java.io.BufferedWriter;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.StandardOpenOption;
public final class DictCodeGen {
public static void main(String[] args) throws Exception {
DictJavaGenerator.main(args); // gebruikt jouw makeDict logic
}
/**
* Generates Java source files for dictionary data, split by word length (2..8),
* and further chunked to avoid "code too large" / constant pool issues.
*
* Output:
* - DictDataL2.java .. DictDataL8.java (arrays chunked)
* - DictData.java (aggregator that builds Dict)
*
* Usage:
* java puzzle.codegen.DictJavaGenerator <wordsFile> <outDir> <packageName>
*
* Example:
* java puzzle.codegen.DictJavaGenerator nl_score_hints_v3.csv src/main/java puzzle
*/
public final class DictJavaGenerator {
// tune if needed
private static final int WORDS_CHUNK = 8_192>>>5; // longs per chunk
private static final int POS_CHUNK = 8_192>>>5; // longs per chunk
public static void main(String[] args) throws Exception {
Path wordsFile = Path.of(args.length > 0 ? args[0] : "nl_score_hints_v3.csv");
Path outDir = Path.of(args.length > 1 ? args[1] : "/home/mike/dev/puzzle-generator/src/main/generated-sources/puzzle");
String pkg = "puzzle";
SwedishGenerator.Dict dict = buildDict(wordsFile);
Files.createDirectories(outDir);
// emit L2..L8
for (int L = 2; L <= 8; L++) {
var entry = dict.index()[L];
if (entry == null || entry.words() == null || entry.words().length == 0) {
throw new IllegalStateException("No words for length " + L);
}
writeLengthClass(outDir, pkg, "DictDataL" + L, L, entry);
}
// emit aggregator
writeAggregator(outDir, pkg, "DictData", dict.length());
System.out.println("Generated dictionary sources into: " + outDir.toAbsolutePath());
}
private static SwedishGenerator.Dict buildDict(Path wordsPath) throws IOException {
var map = new LongArrayList(100_000);
try (var lines = Files.lines(wordsPath, StandardCharsets.UTF_8)) {
lines.forEach(line -> CsvIndexService.lineToLemma(line, map::add));
}
return Dicts.makeDict(map.toArray());
}
private static void writeAggregator(Path outDir, String pkg, String cls, int totalLen) throws IOException {
Path out = outDir.resolve(cls + ".java");
try (BufferedWriter w = Files.newBufferedWriter(out, StandardCharsets.UTF_8,
StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING, StandardOpenOption.WRITE)) {
w.write("package " + pkg + ";\n\n");
w.write("public final class " + cls + " {\n");
w.write(" private " + cls + "() {}\n\n");
w.write(" public static final SwedishGenerator.Dict DICT = build();\n\n");
w.write(" private static SwedishGenerator.Dict build() {\n");
w.write(" SwedishGenerator.DictEntry[] idx = new SwedishGenerator.DictEntry[SwedishGenerator.MAX_WORD_LENGTH_PLUS_ONE];\n");
w.write(" idx[2] = DictDataL2.entry();\n");
w.write(" idx[3] = DictDataL3.entry();\n");
w.write(" idx[4] = DictDataL4.entry();\n");
w.write(" idx[5] = DictDataL5.entry();\n");
w.write(" idx[6] = DictDataL6.entry();\n");
w.write(" idx[7] = DictDataL7.entry();\n");
w.write(" idx[8] = DictDataL8.entry();\n");
w.write(" return new SwedishGenerator.Dict(idx, " + totalLen + ");\n");
w.write(" }\n");
w.write("}\n");
}
}
private static void writeLengthClass(Path outDir, String pkg, String cls, int L, SwedishGenerator.DictEntry e) throws IOException {
Path out = outDir.resolve(cls + ".java");
try (BufferedWriter w = Files.newBufferedWriter(out, StandardCharsets.UTF_8,
StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING, StandardOpenOption.WRITE)) {
w.write("package " + pkg + ";\n\n");
w.write("public final class " + cls + " {\n");
w.write(" private " + cls + "() {}\n\n");
long[] words = e.words();
// flatten posBitsets: [rows][cols] -> flat[]
long[][] bs = e.posBitsets();
int rows = bs.length;
int cols = bs[0].length;
long[] flat = new long[rows * cols];
int t = 0;
for (int r = 0; r < rows; r++) {
System.arraycopy(bs[r], 0, flat, t, cols);
t += cols;
}
w.write(" static final int LEN = " + L + ";\n");
w.write(" static final int ROWS = " + rows + ";\n");
w.write(" static final int COLS = " + cols + ";\n");
w.write(" static final int WORDS_LEN = " + words.length + ";\n");
w.write(" static final int POS_LEN = " + flat.length + ";\n\n");
// chunked arrays
int wordChunks = emitLongArrayChunked(w, "WORDS", words, WORDS_CHUNK);
int posChunks = emitLongArrayChunked(w, "POS", flat, POS_CHUNK);
// joiners
emitJoiner(w, "WORDS", "WORDS", words.length, wordChunks);
emitJoiner(w, "POS", "POS", flat.length, posChunks);
// entry builder
w.write(" public static SwedishGenerator.DictEntry entry() {\n");
w.write(" long[] words = WORDS();\n");
w.write(" long[] flat = POS();\n");
w.write(" long[][] pos = reshape(flat, ROWS, COLS);\n");
w.write(" return new SwedishGenerator.DictEntry(words, pos, words.length, (words.length + 63) >>> 6);\n");
w.write(" }\n\n");
// helpers
w.write(" private static int copy(long[] dst, int at, long[] src) {\n");
w.write(" System.arraycopy(src, 0, dst, at, src.length);\n");
w.write(" return at + src.length;\n");
w.write(" }\n\n");
w.write(" private static long[][] reshape(long[] flat, int rows, int cols) {\n");
w.write(" long[][] out = new long[rows][cols];\n");
w.write(" int k = 0;\n");
w.write(" for (int r = 0; r < rows; r++) {\n");
w.write(" System.arraycopy(flat, k, out[r], 0, cols);\n");
w.write(" k += cols;\n");
w.write(" }\n");
w.write(" return out;\n");
w.write(" }\n");
w.write("}\n");
}
}
/** Emits baseName_0..k arrays and returns chunkCount. */
private static int emitLongArrayChunked(BufferedWriter w, String baseName, long[] data, int chunkSize) throws IOException {
int chunks = (data.length + chunkSize - 1) / chunkSize;
for (int ci = 0; ci < chunks; ci++) {
int from = ci * chunkSize;
int to = Math.min(data.length, from + chunkSize);
w.write(" static final long[] " + baseName + "_" + ci + " = new long[] {\n");
for (int i = from; i < to; i++) {
w.write(" " + toLongLiteral(data[i]) + (i + 1 < to ? "," : "") + "\n");
}
w.write(" };\n\n");
}
return chunks;
}
private static void emitJoiner(BufferedWriter w, String funcName, String baseName, int totalLen, int chunks) throws IOException {
w.write(" static long[] " + funcName + "() {\n");
w.write(" long[] out = new long[" + totalLen + "];\n");
w.write(" int k = 0;\n");
for (int ci = 0; ci < chunks; ci++) {
w.write(" k = copy(out, k, " + baseName + "_" + ci + ");\n");
}
w.write(" return out;\n");
w.write(" }\n\n");
}
private static String toLongLiteral(long v) {
// compact unsigned hex literal
return "0x" + Long.toUnsignedString(v, 16) + "L";
}
}
}

View File

@@ -0,0 +1,279 @@
package puzzle;
import org.junit.jupiter.api.Test;
import puzzle.Export.Dicts;
import java.io.*;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
import java.nio.charset.StandardCharsets;
import java.nio.file.*;
import java.util.Arrays;
public final class DictJavaGeneratorMulti {
// Smaller = more files, but safer for javac/class limits.
private static final int WORDS_CHUNK = 8_192;
private static final int POS_CHUNK = 8_192;
@Test
public void dictCodeGen15() {
System.out.println(DictData.DICT);
}
public static void main(String[] args) throws Exception {
Path wordsFile = Path.of(args.length > 0 ? args[0] : "nl_score_hints_v3.csv");
Path outDir = Path.of(args.length > 1 ? args[1] : "/home/mike/dev/puzzle-generator/src/main/generated-sources/puzzle");
String pkg = "puzzle";
SwedishGenerator.Dict dict = buildDict(wordsFile);
Files.createDirectories(outDir);
// Generate L2..L8
for (int L = 2; L <= 8; L++) {
var entry = dict.index()[L];
if (entry == null || entry.words() == null || entry.words().length == 0) {
throw new IllegalStateException("No words for length " + L);
}
writeLengthBundle(outDir, pkg, L, entry);
}
// Aggregator
writeAggregator(outDir, pkg, "DictData", dict.length());
generateHintShards(wordsFile, outDir);
System.out.println("Generated sources into: " + outDir.toAbsolutePath());
}
private static SwedishGenerator.Dict buildDict(Path wordsPath) throws IOException {
var map = new LongArrayList(100_000);
try (var lines = Files.lines(wordsPath, StandardCharsets.UTF_8)) {
lines.forEach(line -> CsvIndexService.lineToLemma(line, map::add));
}
return Dicts.makeDict(map.toArray());
}
static final int VERSION = 1;
static String wordFromLine(String line) {
// ID,WORD,*,*,"JSON"
var parts = line.split(",", 5);
return parts[1].trim();
}
static final class IntArrayList {
int[] a;
int size;
IntArrayList(int cap) { a = new int[cap]; }
void add(int v) {
if (size == a.length) a = Arrays.copyOf(a, a.length * 2);
a[size++] = v;
}
int size() { return size; }
int get(int i) { return a[i]; }
int[] toArray() { return Arrays.copyOf(a, size); }
}
static final class ShardBuilder {
final IntArrayList offsets = new IntArrayList(4096);
final ByteArrayOutputStream data = new ByteArrayOutputStream(1 << 20); // grows
void addRecord(byte[] rec) throws IOException {
offsets.add(data.size());
data.write(rec);
}
}
static void generateHintShards(Path csv, Path outDir) throws IOException {
Files.createDirectories(outDir);
var builders = new java.util.HashMap<String, ShardBuilder>(256);
try (var lines = Files.lines(csv, StandardCharsets.UTF_8)) {
lines.forEach(line -> {
if (line == null || line.isBlank()) return;
String word = wordFromLine(line);
String[] clues = CsvIndexService.lineToClue(line);
int simpel = CsvIndexService.lineToSimpel(line);
// serialize to: WORD \t JSON \n
// (als je al JSON string wilt bewaren: gebruik Gson.toJson(clues))
String json = Meta.GSON.toJson(clues);
String recStr = word + "\t" + simpel + "\t" + json + "\n";
byte[] rec = recStr.getBytes(StandardCharsets.UTF_8);
String key = Meta.shardKey(word);
ShardBuilder sb = builders.computeIfAbsent(key, k -> new ShardBuilder());
try {
sb.addRecord(rec);
} catch (IOException e) {
throw new UncheckedIOException(e);
}
});
} catch (UncheckedIOException uioe) {
throw uioe.getCause();
}
// flush all shards to disk as <key>.idx (e.g. 6Z.idx)
for (var e : builders.entrySet()) {
writeIndexedShard(outDir.resolve(e.getKey() + ".idx"), e.getValue());
}
}
static void writeIndexedShard(Path out, ShardBuilder sb) throws IOException {
int n = sb.offsets.size();
int[] offs = sb.offsets.toArray();
byte[] data = sb.data.toByteArray();
try (FileChannel ch = FileChannel.open(out,
StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING,
StandardOpenOption.WRITE)) {
// header
ByteBuffer hdr = ByteBuffer.allocate(12);
hdr.putInt(Meta.SHARD_MAGIC).putInt(VERSION).putInt(n).flip();
ch.write(hdr);
// offsets table (int per record)
ByteBuffer tbl = ByteBuffer.allocate(n * 4);
for (int i = 0; i < n; i++) tbl.putInt(offs[i]);
tbl.flip();
ch.write(tbl);
// data
ch.write(ByteBuffer.wrap(data));
}
}
private static void writeAggregator(Path outDir, String pkg, String cls, int totalLen) throws IOException {
Path out = outDir.resolve(cls + ".java");
try (BufferedWriter w = writer(out)) {
w.write("package " + pkg + ";\n\n");
w.write("public final class " + cls + " {\n");
w.write(" private " + cls + "() {}\n\n");
w.write(" public static final SwedishGenerator.Dict DICT = build();\n\n");
w.write(" private static SwedishGenerator.Dict build() {\n");
w.write(" SwedishGenerator.DictEntry[] idx = new SwedishGenerator.DictEntry[SwedishGenerator.MAX_WORD_LENGTH_PLUS_ONE];\n");
for (int L = 2; L <= 8; L++) w.write(" idx[" + L + "] = DictDataL" + L + ".entry();\n");
w.write(" return new SwedishGenerator.Dict(idx, " + totalLen + ");\n");
w.write(" }\n");
w.write("}\n");
}
}
private static void writeLengthBundle(Path outDir, String pkg, int L, SwedishGenerator.DictEntry e) throws IOException {
long[] words = e.words();
// flatten posBitsets: [rows][cols] -> flat[]
long[][] bs = e.posBitsets();
int rows = bs.length;
int cols = bs[0].length;
long[] flat = new long[rows * cols];
int t = 0;
for (int r = 0; r < rows; r++) {
System.arraycopy(bs[r], 0, flat, t, cols);
t += cols;
}
String base = "DictDataL" + L;
// 1) chunk classes
int wChunks = writeChunkClasses(outDir, pkg, base + "W", words, WORDS_CHUNK);
int pChunks = writeChunkClasses(outDir, pkg, base + "P", flat, POS_CHUNK);
// 2) assembler class
writeLengthAssembler(outDir, pkg, base, L, rows, cols, words.length, flat.length, wChunks, pChunks);
}
/** Writes classes like Prefix0..PrefixN each with static final long[] DATA. Returns chunk count. */
private static int writeChunkClasses(Path outDir, String pkg, String prefix, long[] data, int chunkSize) throws IOException {
int chunks = (data.length + chunkSize - 1) / chunkSize;
for (int ci = 0; ci < chunks; ci++) {
int from = ci * chunkSize;
int to = Math.min(data.length, from + chunkSize);
Path out = outDir.resolve(prefix + ci + ".java");
try (BufferedWriter w = writer(out)) {
w.write("package " + pkg + ";\n\n");
w.write("public final class " + prefix + ci + " {\n");
w.write(" private " + prefix + ci + "() {}\n");
w.write(" public static final long[] DATA = new long[] {\n");
for (int i = from; i < to; i++) {
w.write(" " + toLongLiteral(data[i]) + (i + 1 < to ? "," : "") + "\n");
}
w.write(" };\n");
w.write("}\n");
}
}
return chunks;
}
private static void writeLengthAssembler(Path outDir, String pkg, String cls, int L,
int rows, int cols,
int wordsLen, int posLen,
int wChunks, int pChunks) throws IOException {
Path out = outDir.resolve(cls + ".java");
try (BufferedWriter w = writer(out)) {
w.write("package " + pkg + ";\n\n");
w.write("public final class " + cls + " {\n");
w.write(" private " + cls + "() {}\n\n");
w.write(" static final int LEN = " + L + ";\n");
w.write(" static final int ROWS = " + rows + ";\n");
w.write(" static final int COLS = " + cols + ";\n");
w.write(" static final int WORDS_LEN = " + wordsLen + ";\n");
w.write(" static final int POS_LEN = " + posLen + ";\n\n");
// assemble words
w.write(" private static long[] words() {\n");
w.write(" long[] out = new long[WORDS_LEN];\n");
w.write(" int k = 0;\n");
for (int ci = 0; ci < wChunks; ci++) {
w.write(" k = copy(out, k, DictDataL" + L + "W" + ci + ".DATA);\n");
}
w.write(" return out;\n");
w.write(" }\n\n");
// assemble pos
w.write(" private static long[] posFlat() {\n");
w.write(" long[] out = new long[POS_LEN];\n");
w.write(" int k = 0;\n");
for (int ci = 0; ci < pChunks; ci++) {
w.write(" k = copy(out, k, DictDataL" + L + "P" + ci + ".DATA);\n");
}
w.write(" return out;\n");
w.write(" }\n\n");
// entry
w.write(" public static SwedishGenerator.DictEntry entry() {\n");
w.write(" long[] wds = words();\n");
w.write(" long[] flat = posFlat();\n");
w.write(" long[][] pos = reshape(flat, ROWS, COLS);\n");
w.write(" return new SwedishGenerator.DictEntry(wds, pos, wds.length, (wds.length + 63) >>> 6);\n");
w.write(" }\n\n");
// helpers
w.write(" private static int copy(long[] dst, int at, long[] src) {\n");
w.write(" System.arraycopy(src, 0, dst, at, src.length);\n");
w.write(" return at + src.length;\n");
w.write(" }\n\n");
w.write(" private static long[][] reshape(long[] flat, int rows, int cols) {\n");
w.write(" long[][] out = new long[rows][cols];\n");
w.write(" int k = 0;\n");
w.write(" for (int r = 0; r < rows; r++) {\n");
w.write(" System.arraycopy(flat, k, out[r], 0, cols);\n");
w.write(" k += cols;\n");
w.write(" }\n");
w.write(" return out;\n");
w.write(" }\n");
w.write("}\n");
}
}
private static BufferedWriter writer(Path out) throws IOException {
return Files.newBufferedWriter(out, StandardCharsets.UTF_8,
StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING, StandardOpenOption.WRITE);
}
private static String toLongLiteral(long v) {
return "0x" + Long.toUnsignedString(v, 16) + "L";
}
}

View File

@@ -10,9 +10,11 @@ import puzzle.Export.PuzzleResult;
import puzzle.Export.Rewards;
import puzzle.SwedishGenerator.Assign;
import puzzle.SwedishGenerator.FillResult;
import puzzle.SwedishGenerator.Lemma;
import puzzle.SwedishGenerator.Slotinfo;
import puzzle.SwedishGeneratorTest.Idx;
import java.io.IOException;
import java.nio.file.Path;
import java.nio.file.Paths;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotNull;
@@ -69,7 +71,7 @@ public class ExportFormatTest {
var fillResult = new FillResult(true, 0, 0, 0, 0, new FillStats());
var puzzleResult = new PuzzleResult(new Clued(clues), grid, new Slotinfo[]{
new Slotinfo(key, lo, 0L, 0, new Assign(TEST), null)
new Slotinfo(key, lo, 0L, 0, new Assign(TEST, 0), null)
}, fillResult);
var rewards = new Rewards(10, 5, 1);
@@ -134,5 +136,15 @@ public class ExportFormatTest {
throw new RuntimeException(e);
}
}
@Test
void testShardToClue() {
val index = 1;
val word = DictData.DICT.index()[3].words()[index];
val assigned = new Assign(word, index);
val lemma = Lemma.unpackIndex(word);
var word1 = Lemma.asWord(word);
val shard = Meta.shardKey(assigned.w);
val clue = Meta.readRecord(shard, index);
assertNotNull(clue);
}
}

View File

@@ -13,6 +13,10 @@ import puzzle.Export.Rewards;
import puzzle.Main.Opts;
import puzzle.Masker.Clues;
import puzzle.SwedishGenerator.Rng;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.stream.Collectors;
import static org.junit.jupiter.api.Assertions.assertEquals;
@@ -43,7 +47,16 @@ public class MainTest {
this.tries = 1;
this.verbose = false;
}};
static final Dict dict = Dicts.loadDict(opts.wordsPath);
static final Dict dict = loadDict(opts.wordsPath);
public static Dict loadDict(String wordsPath) {
var map = new LongArrayList(100_000);
try (var lines = Files.lines(Path.of(wordsPath), StandardCharsets.UTF_8)) {
lines.forEach(line -> CsvIndexService.lineToLemma(line, map::add));
return Dicts.makeDict(map.toArray());
} catch (IOException e) {
throw new RuntimeException("Failed to load dictionary from " + wordsPath, e);
}
}
@Test
void testExtractSlots() {
@@ -173,25 +186,35 @@ public class MainTest {
}
@Test
void testFiller2() {
val mask = "1 000000\n" +
"1 \n" +
"1 \n" +
"3 3 \n" +
"3 0 3 \n" +
"3 \n" +
"3 \n" +
"222 3";
val rng = new Rng(-343913721);
val mask = Clues.parse(
"1 000000\n" +
"1 \n" +
"1 \n" +
"3 3 \n" +
"3 0 3 \n" +
"3 \n" +
"3 \n" +
"222 3");
Assertions.assertEquals(20, mask.clueCount());
var slots = Masker.extractSlots(mask, dict.index());
val slotInfo = Masker.scoreSlots(new int[slots.length], slots);
var grid = mask.toGrid();
var filled = fillMask(rng, slotInfo, grid, false);
// val res = new PuzzleResult(new Clued(mask), new Gridded(grid), slotInfo, filled).exportFormatFromFilled(0, new Rewards(0, 0, 0));
}
@Test
void testFiller() {
val rng = new Rng(-343913721);
val mask = new Clues(
74732156493031040L,
193L,
281475397248512L,
128L,
422762372923520L,
192L);
val mask = Clues.parse(
" 3 300\n" +
" 1 \n" +
" 1 \n" +
" 3 0 \n" +
" 31 \n" +
" 1 \n" +
" 1 2\n" +
"21 22 3");
var slots = Masker.extractSlots(mask, dict.index());
val slotInfo = Masker.scoreSlots(new int[slots.length], slots);
var grid = mask.toGrid();
@@ -204,6 +227,7 @@ public class MainTest {
var g = new Gridded(grid);
g.gridToString(mask);
var aa = new PuzzleResult(new Clued(mask), g, slotInfo, filled).exportFormatFromFilled(1, new Rewards(1, 1, 1));
System.out.println(String.join("\n", aa.grid()));
}
@Test