introduce bitloops

This commit is contained in:
mike
2026-01-20 09:47:55 +01:00
parent a764f45041
commit 8b7827cfc2
10 changed files with 252 additions and 94 deletions

View File

@@ -235,8 +235,8 @@ public record Export() {
public record WordOut(String word, int[] cell, int startRow, int startCol, char direction, int arrowRow, int arrowCol, boolean isReversed, int complex, String[] clue) {
public WordOut(Path shard,long l, int startRow, int startCol, char d, int arrowRow, int arrowCol, boolean isReversed, byte[] bytes) {
val meta = Meta.readRecord(shard, Lemma.unpackShardIndex(l));
public WordOut(long l, int startRow, int startCol, char d, int arrowRow, int arrowCol, boolean isReversed, byte[] bytes) {
val meta = Meta.lookup(l);
this(Lemma.asWord(l, bytes), new int[]{ arrowRow, arrowCol, startRow, startCol }, startRow, startCol, d, arrowRow, arrowCol, isReversed,
meta.simpel(), meta.clues());
}
@@ -244,27 +244,15 @@ public record Export() {
public record ExportedPuzzle(String[] grid, WordOut[] words, int difficulty, Rewards rewards) { }
public record PuzzleResult(Clued clues, Gridded grid, Slotinfo[] slots, FillResult filled, int thress) {
public record PuzzleResult(Clued clues, Gridded grid, Slotinfo[] slots, FillResult filled) {
public Path shardKey(long word) {
return shardKey(this.thress, word);
}
public static Path shardKey(int thress, long word) {
if (thress <= 0)
return Path.of("src/main/generated-sources/puzzle").resolve(Lemma.unpackSize(word) + 1 + ".idx");
else
return Path.of("src/main/generated-sources/puzzle/dict" + thress).resolve(Lemma.unpackSize(word) + 1 + ".idx");
}
public long calcSimpel( Slotinfo[] slots) {
return calcSimpel(thress, slots);
}
static public long calcSimpel(int thress, Slotinfo[] slots) {
static public long calcSimpel(Slotinfo[] slots) {
int k = 0;
long simpel = 0L;
for (var n = 1; n < slots.length; n++) {
if (slots[n].assign().w != X) {
k++;
simpel += Meta.readRecord(shardKey(thress,slots[n].assign().w), Lemma.unpackShardIndex(slots[n].assign().w)).simpel();
simpel += Meta.lookup(slots[n].assign().w).simpel();
}
}
simpel = k == 0 ? 0 : simpel / k;
@@ -311,7 +299,6 @@ public record Export() {
int MIN_R = minR, MIN_C = minC;
val bytes = BYTES.get();
var wordsOut = Arrays.stream(placed).map(p -> new WordOut(
shardKey( p.lemma),
p.lemma,
p.startRow() - MIN_R,
p.startCol() - MIN_C,

View File

@@ -395,7 +395,7 @@ public class Main {
TOTAL_NODES.addAndGet(filled.nodes());
TOTAL_BACKTRACKS.addAndGet(filled.backtracks());
if (filled.ok()) {
val simpel = PuzzleResult.calcSimpel(THRESS,slotInfo);
val simpel = PuzzleResult.calcSimpel(slotInfo);
TOTAL_SUCCESS.incrementAndGet();
TOTAL_SIMPLICITY.addAndGet(simpel * 100);
}
@@ -414,7 +414,7 @@ public class Main {
System.out.println(Arrays.stream(new Clued(mask).gridToString().split("\n")).map(s -> "\"" + s + "\\n\" +").collect(Collectors.joining("\n")));
}
if (filled.ok() && (opts.minSimplicity <= 0 || filled.stats().simplicity >= opts.minSimplicity)) {
return new PuzzleResult(new Clued(mask), new Gridded(grid, mask), slotInfo, filled, THRESS);
return new PuzzleResult(new Clued(mask), new Gridded(grid, mask), slotInfo, filled);
}
if (opts.verbose && filled.ok()) {

View File

@@ -1,59 +1,127 @@
package puzzle;
import module java.base;
import com.google.gson.Gson;
import lombok.val;
import puzzle.SwedishGenerator.Lemma;
import static puzzle.SwedishGenerator.THRESS;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.nio.MappedByteBuffer;
import java.nio.channels.FileChannel;
import java.nio.charset.StandardCharsets;
import java.nio.file.Path;
import java.nio.file.StandardOpenOption;
import java.util.Arrays;
import java.util.Locale;
public class Meta {
static final Gson GSON = new Gson();
private static final int VERSION = 1;
static record ShardLem(long w, int simpel, String[] clues) { }
static final Gson GSON = new Gson();
static final int VERSION = 1;
static final int SHARD_MAGIC = 0x49445831; // "IDX1"
static ShardLem readRecord(Path shardFile, int i) {
static final int MAP_MAGIC = 0x4D415031; // "MAP1"
static final ByteOrder ORDER = ByteOrder.BIG_ENDIAN;
static record ShardRec(String word, long w, int simpel, String[] clues) { }
static final Path projectRoot = Path.of("").toAbsolutePath().normalize(); // current working dir
static final Path dir = projectRoot.resolve("src/main/resources/shards");
static final Path shardData = dir.resolve("shard0.data");
static final Path shardMap = dir.resolve("shard0.map");
static String normWord(String s) {
// belangrijk: zelfde normalisatie bij build en query
return s.toUpperCase(Locale.ROOT);
}
// --- Lookup: w -> i using mmap ---
static int findIndexInMapMmap(Path mapFile, long target) throws IOException {
try (FileChannel ch = FileChannel.open(mapFile, StandardOpenOption.READ)) {
MappedByteBuffer mbb = (MappedByteBuffer) ch.map(FileChannel.MapMode.READ_ONLY, 0, ch.size()).order(ORDER);
int magic = mbb.getInt(0);
int ver = mbb.getInt(4);
int n = mbb.getInt(8);
if (magic != MAP_MAGIC || ver != VERSION) throw new IOException("Bad map file");
int lo = 0, hi = n - 1;
while (lo <= hi) {
int mid = (lo + hi) >>> 1;
int off = 12 + mid * 8;
long key = mbb.getLong(off);
if (key < target) lo = mid + 1;
else if (key > target) hi = mid - 1;
else return mid;
}
return -1;
}
}
// --- Read record i from shard.data (your format) ---
static ShardLem readRecord(Path shardFile, int i) throws IOException {
try (FileChannel ch = FileChannel.open(shardFile, StandardOpenOption.READ)) {
ByteBuffer hdr = ByteBuffer.allocate(12);
ByteBuffer hdr = ByteBuffer.allocate(12).order(ORDER);
ch.read(hdr);
hdr.flip();
int magic = hdr.getInt();
int ver = hdr.getInt();
int n = hdr.getInt();
if (magic != SHARD_MAGIC || ver != VERSION) throw new IOException("Bad shard");
if (magic != SHARD_MAGIC || ver != VERSION) throw new IOException("Bad shard file");
if (i < 0 || i >= n) throw new IndexOutOfBoundsException();
long tableStart = 12L;
long dataStart = 12L + (long) n * 4L;
int offI = readIntAt(ch, tableStart + (long) i * 4L);
int offIp = (i + 1 < n) ? readIntAt(ch, tableStart + (long) (i + 1) * 4L)
: (int) (ch.size() - dataStart);
int offIp = (i + 1 < n)
? readIntAt(ch, tableStart + (long) (i + 1) * 4L)
: (int) (ch.size() - dataStart);
int len = offIp - offI;
ByteBuffer buf = ByteBuffer.allocate(len);
ch.position(dataStart + offI);
ch.read(buf);
buf.flip();
var string = StandardCharsets.UTF_8.decode(buf).toString();
val parts = string.split("\t", 3);
return new ShardLem(Lemma.from(parts[0]), Integer.parseInt(parts[1]), GSON.fromJson(parts[2], String[].class));
} catch (Exception e) {
return new ShardLem(Lemma.from("XXX"), -1, new String[0]);
String s = StandardCharsets.UTF_8.decode(buf).toString();
String[] parts = s.split("\t", 3);
long w = Lemma.from(normWord(parts[0]));
int simpel = Integer.parseInt(parts[1]);
String[] clues = GSON.fromJson(parts[2], String[].class);
return new ShardLem(w, simpel, clues);
}
}
static final Path[] SHARDS = IntStream.range(0, 10).mapToObj(sId -> Path.of("src/main/generated-sources/puzzle" + (THRESS == 0 ? "" : "/dict" + THRESS)).resolve(sId + ".idx"))
.toArray(
Path[]::new);
static Path shardKey(long word) {
return SHARDS[Lemma.unpackSize(word) + 1];
}
static int readIntAt(FileChannel ch, long pos) throws IOException {
ByteBuffer b = ByteBuffer.allocate(4);
ByteBuffer b = ByteBuffer.allocate(4).order(ORDER);
ch.position(pos);
ch.read(b);
b.flip();
return b.getInt();
}
// --- Demo main ---
public static ShardLem lookup(long w) {
try {
int i = findIndexInMapMmap(shardMap, Lemma.pack43(w));
val qRaw = Lemma.asWord(w, new byte[8]);
System.out.println("\nQuery: " + qRaw + " w=" + w + " -> i=" + i);
if (i >= 0) {
ShardLem rec = readRecord(shardData, i);
System.out.println(" simpel=" + rec.simpel());
System.out.println(" clues=" + Arrays.toString(rec.clues()));
return rec;
} else {
System.out.println(" NOT FOUND");
throw new RuntimeException("NOT FOUND");
}
} catch (Exception e) {
e.printStackTrace();
throw new RuntimeException(e);
}
}
public record ShardLem(long w, int simpel, String[] clues) { }
}

View File

@@ -26,7 +26,7 @@ import static java.nio.charset.StandardCharsets.US_ASCII;
*/
@SuppressWarnings("ALL")
public record SwedishGenerator() {
public static final int THRESS = 800;
public static final long X = 0L;
public static final int LOG_EVERY_MS = 200;
public static final int BAR_LEN = 22;
@@ -97,7 +97,7 @@ public record SwedishGenerator() {
public static interface Lemma {
static final long LETTER_MASK = (1L << 40) - 1; // low 40 bits
static final long INDEX_MASK = (1L << 24) - 1; // 24 bits
static final long INDEX_MASK = (1L << 43) - 1; // 24 bits
static long from(byte[] word) { return packShiftIn(word) | ((long) (word.length - 1) << 40); }
static long pack(long w, int shardIndex) { return w | (((long) shardIndex) << 43) | ((long) length0(w)) << 40; }
@@ -117,7 +117,10 @@ public record SwedishGenerator() {
static int unpackIndex(long w) { return (int) (w >>> 40); }
static int unpackShardIndex(long w) { return (int) (w >>> 43); }
static int unpackSize(long w) { return (int) (w >>> 40) & 7; }
static int unpackLetters(long w) { return (int) (w & LETTER_MASK); }
static int unpackLetters(long w) { return (int) (w & LETTER_MASK); }
static long pack43(long w) {
return w & INDEX_MASK;
}
}
public static record Slotinfo(int key, long lo, long hi, int score, Assign assign, DictEntry entry, int minL) {

Binary file not shown.

Binary file not shown.

View File

@@ -0,0 +1,135 @@
package puzzle;
import lombok.val;
import puzzle.DictJavaGeneratorMulti.CsvIndexService;
import puzzle.Meta.ShardLem;
import puzzle.Meta.ShardRec;
import puzzle.SwedishGenerator.Lemma;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.StandardOpenOption;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
import java.util.List;
import java.util.function.LongConsumer;
import static java.nio.charset.StandardCharsets.US_ASCII;
public class BuildMeta2 {
public static void lineToLemma(String line, LongConsumer ok) {
if (line.isBlank()) {
throw new RuntimeException("Empty line");
}
var parts = line.split(",", 5);
var word = parts[1].trim();
ok.accept(SwedishGenerator.Lemma.from(word.getBytes(US_ASCII)));
}
// --- Build demo files: shard.data + shard.map ---
static void buildShard(Path shardData, Path shardMap, List<ShardRec> records) throws IOException {
records = new ArrayList<>(records);
// map is sorted by w; record index i == positie in deze gesorteerde lijst
records.sort(Comparator.comparingLong(ShardRec::w));
int n = records.size();
List<byte[]> recBytes = new ArrayList<>(n);
int[] offsets = new int[n];
int off = 0;
for (int i = 0; i < n; i++) {
ShardRec r = records.get(i);
// schrijf het echte woord weg + simpel + JSON hints
String line = r.word() + "\t" + r.simpel() + "\t" + Meta.GSON.toJson(r.clues());
byte[] bytes = line.getBytes(StandardCharsets.UTF_8);
recBytes.add(bytes);
offsets[i] = off;
off += bytes.length;
}
long headerSize = 12L;
long tableSize = (long) n * 4L;
long dataStart = headerSize + tableSize;
try (FileChannel ch = FileChannel.open(shardData,
StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING, StandardOpenOption.WRITE)) {
ByteBuffer hdr = ByteBuffer.allocate(12).order(Meta.ORDER);
hdr.putInt(Meta.SHARD_MAGIC).putInt(Meta.VERSION).putInt(n);
hdr.flip();
ch.write(hdr);
ByteBuffer table = ByteBuffer.allocate(n * 4).order(Meta.ORDER);
for (int i = 0; i < n; i++) table.putInt(offsets[i]);
table.flip();
ch.write(table);
ch.position(dataStart);
for (byte[] b : recBytes) ch.write(ByteBuffer.wrap(b));
}
try (FileChannel ch = FileChannel.open(shardMap,
StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING, StandardOpenOption.WRITE)) {
ByteBuffer hdr = ByteBuffer.allocate(12).order(Meta.ORDER);
hdr.putInt(Meta.MAP_MAGIC).putInt(Meta.VERSION).putInt(n);
hdr.flip();
ch.write(hdr);
ByteBuffer keys = ByteBuffer.allocate(n * 8).order(Meta.ORDER);
for (ShardRec r : records) keys.putLong(r.w());
keys.flip();
ch.write(keys);
}
}
public static void main2(String[] args) throws Exception {
val records = buildDict(Path.of("nl_score_hints_v3.csv"));
buildShard(Meta.shardData, Meta.shardMap, records);
for (String qRaw : List.of("FIETS", "huis", "kiwi")) {
String q = Meta.normWord(qRaw);
long w = Lemma.from(q);
int i = Meta.findIndexInMapMmap(Meta.shardMap, w);
System.out.println("\nQuery: " + qRaw + " (norm=" + q + ") w=" + w + " -> i=" + i);
if (i >= 0) {
ShardLem rec = Meta.readRecord(Meta.shardData, i);
System.out.println(" simpel=" + rec.simpel());
System.out.println(" clues=" + Arrays.toString(rec.clues()));
} else {
System.out.println(" NOT FOUND");
}
}
System.out.println("\nFiles written to: " + Meta.dir);
System.out.println(" " + Meta.shardData);
System.out.println(" " + Meta.shardMap);
} // --- Demo main ---
public static void main(String[] args) throws Exception {
for (String qRaw : List.of("FIETS", "HUIS", "KIWI")) {
long w = Lemma.from(qRaw) | (3897L << 43L);
ShardLem rec = Meta.lookup(w);
System.out.println(rec);
}
}
private static List<ShardRec> buildDict(Path wordsPath) throws IOException {
var recs = new ArrayList<ShardRec>();
try (var lines = Files.lines(wordsPath, StandardCharsets.UTF_8)) {
lines.forEach(line -> {
lineToLemma(line, w -> {
String word = SwedishGenerator.Lemma.asWord(w, Export.BYTES.get());
String[] clues = CsvIndexService.lineToClue(line);
int simpel = CsvIndexService.lineToSimpel(line);
recs.add(new ShardRec(word, w, simpel, clues));
});
});
}
return recs;
}
}

View File

@@ -10,7 +10,6 @@ import puzzle.SwedishGenerator.Dict;
import puzzle.SwedishGenerator.DictEntry;
import puzzle.SwedishGenerator.Lemma;
import static java.nio.charset.StandardCharsets.US_ASCII;
import static puzzle.SwedishGenerator.THRESS;
public final class DictJavaGeneratorMulti {
@@ -22,7 +21,7 @@ public final class DictJavaGeneratorMulti {
Path wordsFile = Path.of(args.length > 0 ? args[0] : "nl_score_hints_v3.csv");
Path outDir = Path.of(args.length > 1 ? args[1] : "src/main/generated-sources/puzzle/dict" + THRESS);
String pkg = "puzzle.dict" + THRESS;
HashMap<Path, ShardBuilder> builders = new HashMap<Path, ShardBuilder>(16);
HashMap<String, ShardBuilder> builders = new HashMap<String, ShardBuilder>(16);
SwedishGenerator.Dict dict = buildDict(wordsFile, builders);
@@ -40,15 +39,13 @@ public final class DictJavaGeneratorMulti {
// Aggregator
writeAggregator(outDir, pkg, "DictData", dict.length());
System.out.println("Generated sources into: " + outDir.toAbsolutePath());
builders.forEach(DictJavaGeneratorMulti::writeIndexedShard);
}
static final Path[] SHARDS = IntStream.range(0, 10).mapToObj(sId -> Path.of("src/main/generated-sources/puzzle/dict"+THRESS).resolve(sId + ".idx")).toArray(
Path[]::new);
static Path shardKey(long word) {
return SHARDS[Lemma.unpackSize(word) + 1];
public static final int THRESS = 800;
static String shardKey(long word) {
return ""+Lemma.unpackSize(word) + 1;
}
private static SwedishGenerator.Dict buildDict(Path wordsPath, HashMap<Path, ShardBuilder> builders) throws IOException {
private static SwedishGenerator.Dict buildDict(Path wordsPath, HashMap<String, ShardBuilder> builders) throws IOException {
var map = new LongArrayList(100_000);
try (var lines = Files.lines(wordsPath, StandardCharsets.UTF_8)) {
lines.forEach(line -> {
@@ -63,7 +60,7 @@ public final class DictJavaGeneratorMulti {
String recStr = word + "\t" + simpel + "\t" + json + "\n";
byte[] rec = recStr.getBytes(StandardCharsets.UTF_8);
var key = shardKey(w) ;
var key = shardKey(w);
ShardBuilder sb = builders.computeIfAbsent(key, k -> new ShardBuilder());
try {
map.add(Lemma.pack(w, sb.addRecord(rec)));
@@ -129,32 +126,6 @@ public final class DictJavaGeneratorMulti {
}
static final int VERSION = 1;
static void writeIndexedShard(Path out, ShardBuilder sb) {
int n = sb.offsets.size();
int[] offs = sb.offsets.toArray();
byte[] data = sb.data.toByteArray();
try (FileChannel ch = FileChannel.open(out,
StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING,
StandardOpenOption.WRITE)) {
// header
ByteBuffer hdr = ByteBuffer.allocate(12);
hdr.putInt(Meta.SHARD_MAGIC).putInt(VERSION).putInt(n).flip();
ch.write(hdr);
// offsets table (int per record)
ByteBuffer tbl = ByteBuffer.allocate(n * 4);
for (int i = 0; i < n; i++) tbl.putInt(offs[i]);
tbl.flip();
ch.write(tbl);
// data
ch.write(ByteBuffer.wrap(data));
}catch (IOException e){
throw new RuntimeException("Failed to write shard to " + out, e);
}
}
private static void writeAggregator(Path outDir, String pkg, String cls, int totalLen) throws IOException {
Path out = outDir.resolve(cls + ".java");
@@ -330,7 +301,7 @@ public final class DictJavaGeneratorMulti {
var word = parts[1].trim();
int score = Integer.parseInt(parts[2].trim());
int simpel = Integer.parseInt(parts[3].trim());
if (score < 1 || simpel>THRESS) {
if (score < 1 || simpel > THRESS) {
if (Main.VERBOSE) System.err.println("Word too complex: " + line);
return;
}

View File

@@ -24,7 +24,6 @@ import static puzzle.SwedishGenerator.FillStats;
import static puzzle.SwedishGenerator.R;
import static puzzle.Masker.Slot;
import static puzzle.GridBuilder.placeWord;
import static puzzle.SwedishGenerator.THRESS;
import static puzzle.SwedishGeneratorTest.OFF_0_1;
import static puzzle.SwedishGeneratorTest.OFF_0_2;
import static puzzle.SwedishGeneratorTest.OFF_0_3;
@@ -52,7 +51,7 @@ public class ExportFormatTest {
var fillResult = new FillResult(true, 0, 0, 0, 0, new FillStats());
var puzzleResult = new PuzzleResult(new Clued(clues), grid, new Slotinfo[]{
new Slotinfo(key, lo, 0L, 0, new Assign(TEST), null, 0)
}, fillResult, 0);
}, fillResult);
var rewards = new Rewards(10, 5, 1);
var exported = puzzleResult.exportFormatFromFilled(2, rewards);
@@ -92,7 +91,7 @@ public class ExportFormatTest {
var grid = SwedishGeneratorTest.createEmpty();
val clues = Clues.createEmpty();
var fillResult = new FillResult(true, 0, 0, 0, 0, new FillStats());
var puzzleResult = new PuzzleResult(new Clued(clues), new Gridded(grid, clues), new Slotinfo[0], fillResult, 0);
var puzzleResult = new PuzzleResult(new Clued(clues), new Gridded(grid, clues), new Slotinfo[0], fillResult);
var exported = puzzleResult.exportFormatFromFilled(1, new Rewards(0, 0, 0));
@@ -118,9 +117,7 @@ public class ExportFormatTest {
for (int i = 0; i < Math.min(words.length, 5); i++) {
val wordVal = words[i];
val word = Lemma.asWord(wordVal, Export.BYTES.get());
val assigned = new Assign(wordVal);
val shard = shardKey(assigned.w);
val clueRec = Meta.readRecord(shard, i);
val clueRec = Meta.lookup(wordVal);
assertNotNull(clueRec);
assertEquals(word, Lemma.asWord(clueRec.w(), Export.BYTES.get()));
@@ -151,8 +148,7 @@ public class ExportFormatTest {
}
if (idx != -1) {
val shard = shardKey(w);
val clueRec = Meta.readRecord(shard, idx);
val clueRec = Meta.lookup(w);
assertNotNull(clueRec);
assertEquals(wStr, Lemma.asWord(clueRec.w(), Export.BYTES.get()));
// Check some expected complexity values (from CSV head output, column 3)

View File

@@ -11,7 +11,6 @@ import puzzle.Export.LetterVisit.LetterAt;
import puzzle.Export.PuzzleResult;
import puzzle.Export.Rewards;
import puzzle.Main.Opts;
import puzzle.Masker.Slot;
import puzzle.SwedishGenerator.Rng;
import static org.junit.jupiter.api.Assertions.assertEquals;
@@ -21,7 +20,6 @@ import static puzzle.Export.Clue.DOWN;
import static puzzle.Export.Clue.LEFT;
import static puzzle.Export.Clue.RIGHT;
import static puzzle.Export.Clue.UP;
import static puzzle.Masker.Slot;
import static puzzle.SwedishGenerator.Dict;
import static puzzle.SwedishGenerator.Lemma;
import static puzzle.SwedishGenerator.Slotinfo;
@@ -224,7 +222,7 @@ public class MainTest {
Assertions.assertEquals(-1L, grid.hi);
var g = new Gridded(grid, mask.c());
g.gridToString(mask.c());
var aa = new PuzzleResult(mask, g, slotInfo, filled,0).exportFormatFromFilled(1, new Rewards(1, 1, 1));
var aa = new PuzzleResult(mask, g, slotInfo, filled).exportFormatFromFilled(1, new Rewards(1, 1, 1));
System.out.println(String.join("\n", aa.grid()));
}