introduce bitloops
This commit is contained in:
135
src/test/java/puzzle/BuildMeta2.java
Normal file
135
src/test/java/puzzle/BuildMeta2.java
Normal file
@@ -0,0 +1,135 @@
|
||||
package puzzle;
|
||||
|
||||
import lombok.val;
|
||||
import puzzle.DictJavaGeneratorMulti.CsvIndexService;
|
||||
import puzzle.Meta.ShardLem;
|
||||
import puzzle.Meta.ShardRec;
|
||||
import puzzle.SwedishGenerator.Lemma;
|
||||
import java.io.IOException;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.channels.FileChannel;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.StandardOpenOption;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Comparator;
|
||||
import java.util.List;
|
||||
import java.util.function.LongConsumer;
|
||||
import static java.nio.charset.StandardCharsets.US_ASCII;
|
||||
public class BuildMeta2 {
|
||||
|
||||
public static void lineToLemma(String line, LongConsumer ok) {
|
||||
if (line.isBlank()) {
|
||||
throw new RuntimeException("Empty line");
|
||||
}
|
||||
var parts = line.split(",", 5);
|
||||
var word = parts[1].trim();
|
||||
ok.accept(SwedishGenerator.Lemma.from(word.getBytes(US_ASCII)));
|
||||
}
|
||||
// --- Build demo files: shard.data + shard.map ---
|
||||
static void buildShard(Path shardData, Path shardMap, List<ShardRec> records) throws IOException {
|
||||
records = new ArrayList<>(records);
|
||||
// map is sorted by w; record index i == positie in deze gesorteerde lijst
|
||||
records.sort(Comparator.comparingLong(ShardRec::w));
|
||||
|
||||
int n = records.size();
|
||||
List<byte[]> recBytes = new ArrayList<>(n);
|
||||
int[] offsets = new int[n];
|
||||
|
||||
int off = 0;
|
||||
for (int i = 0; i < n; i++) {
|
||||
ShardRec r = records.get(i);
|
||||
// schrijf het echte woord weg + simpel + JSON hints
|
||||
String line = r.word() + "\t" + r.simpel() + "\t" + Meta.GSON.toJson(r.clues());
|
||||
byte[] bytes = line.getBytes(StandardCharsets.UTF_8);
|
||||
recBytes.add(bytes);
|
||||
offsets[i] = off;
|
||||
|
||||
off += bytes.length;
|
||||
}
|
||||
|
||||
long headerSize = 12L;
|
||||
long tableSize = (long) n * 4L;
|
||||
long dataStart = headerSize + tableSize;
|
||||
|
||||
try (FileChannel ch = FileChannel.open(shardData,
|
||||
StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING, StandardOpenOption.WRITE)) {
|
||||
|
||||
ByteBuffer hdr = ByteBuffer.allocate(12).order(Meta.ORDER);
|
||||
hdr.putInt(Meta.SHARD_MAGIC).putInt(Meta.VERSION).putInt(n);
|
||||
hdr.flip();
|
||||
ch.write(hdr);
|
||||
|
||||
ByteBuffer table = ByteBuffer.allocate(n * 4).order(Meta.ORDER);
|
||||
for (int i = 0; i < n; i++) table.putInt(offsets[i]);
|
||||
table.flip();
|
||||
ch.write(table);
|
||||
|
||||
ch.position(dataStart);
|
||||
for (byte[] b : recBytes) ch.write(ByteBuffer.wrap(b));
|
||||
}
|
||||
|
||||
try (FileChannel ch = FileChannel.open(shardMap,
|
||||
StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING, StandardOpenOption.WRITE)) {
|
||||
|
||||
ByteBuffer hdr = ByteBuffer.allocate(12).order(Meta.ORDER);
|
||||
hdr.putInt(Meta.MAP_MAGIC).putInt(Meta.VERSION).putInt(n);
|
||||
hdr.flip();
|
||||
ch.write(hdr);
|
||||
|
||||
ByteBuffer keys = ByteBuffer.allocate(n * 8).order(Meta.ORDER);
|
||||
for (ShardRec r : records) keys.putLong(r.w());
|
||||
keys.flip();
|
||||
ch.write(keys);
|
||||
}
|
||||
}
|
||||
|
||||
public static void main2(String[] args) throws Exception {
|
||||
val records = buildDict(Path.of("nl_score_hints_v3.csv"));
|
||||
|
||||
buildShard(Meta.shardData, Meta.shardMap, records);
|
||||
|
||||
for (String qRaw : List.of("FIETS", "huis", "kiwi")) {
|
||||
String q = Meta.normWord(qRaw);
|
||||
long w = Lemma.from(q);
|
||||
int i = Meta.findIndexInMapMmap(Meta.shardMap, w);
|
||||
|
||||
System.out.println("\nQuery: " + qRaw + " (norm=" + q + ") w=" + w + " -> i=" + i);
|
||||
if (i >= 0) {
|
||||
ShardLem rec = Meta.readRecord(Meta.shardData, i);
|
||||
System.out.println(" simpel=" + rec.simpel());
|
||||
System.out.println(" clues=" + Arrays.toString(rec.clues()));
|
||||
} else {
|
||||
System.out.println(" NOT FOUND");
|
||||
}
|
||||
}
|
||||
|
||||
System.out.println("\nFiles written to: " + Meta.dir);
|
||||
System.out.println(" " + Meta.shardData);
|
||||
System.out.println(" " + Meta.shardMap);
|
||||
} // --- Demo main ---
|
||||
public static void main(String[] args) throws Exception {
|
||||
|
||||
for (String qRaw : List.of("FIETS", "HUIS", "KIWI")) {
|
||||
long w = Lemma.from(qRaw) | (3897L << 43L);
|
||||
ShardLem rec = Meta.lookup(w);
|
||||
System.out.println(rec);
|
||||
}
|
||||
}
|
||||
private static List<ShardRec> buildDict(Path wordsPath) throws IOException {
|
||||
var recs = new ArrayList<ShardRec>();
|
||||
try (var lines = Files.lines(wordsPath, StandardCharsets.UTF_8)) {
|
||||
lines.forEach(line -> {
|
||||
lineToLemma(line, w -> {
|
||||
String word = SwedishGenerator.Lemma.asWord(w, Export.BYTES.get());
|
||||
String[] clues = CsvIndexService.lineToClue(line);
|
||||
int simpel = CsvIndexService.lineToSimpel(line);
|
||||
recs.add(new ShardRec(word, w, simpel, clues));
|
||||
});
|
||||
});
|
||||
}
|
||||
return recs;
|
||||
}
|
||||
}
|
||||
@@ -10,19 +10,18 @@ import puzzle.SwedishGenerator.Dict;
|
||||
import puzzle.SwedishGenerator.DictEntry;
|
||||
import puzzle.SwedishGenerator.Lemma;
|
||||
import static java.nio.charset.StandardCharsets.US_ASCII;
|
||||
import static puzzle.SwedishGenerator.THRESS;
|
||||
|
||||
public final class DictJavaGeneratorMulti {
|
||||
|
||||
// Smaller = more files, but safer for javac/class limits.
|
||||
private static final int WORDS_CHUNK = 8_192;
|
||||
private static final int POS_CHUNK = 8_192;
|
||||
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
Path wordsFile = Path.of(args.length > 0 ? args[0] : "nl_score_hints_v3.csv");
|
||||
Path outDir = Path.of(args.length > 1 ? args[1] : "src/main/generated-sources/puzzle/dict" + THRESS);
|
||||
String pkg = "puzzle.dict" + THRESS;
|
||||
HashMap<Path, ShardBuilder> builders = new HashMap<Path, ShardBuilder>(16);
|
||||
HashMap<String, ShardBuilder> builders = new HashMap<String, ShardBuilder>(16);
|
||||
|
||||
SwedishGenerator.Dict dict = buildDict(wordsFile, builders);
|
||||
|
||||
@@ -40,15 +39,13 @@ public final class DictJavaGeneratorMulti {
|
||||
// Aggregator
|
||||
writeAggregator(outDir, pkg, "DictData", dict.length());
|
||||
System.out.println("Generated sources into: " + outDir.toAbsolutePath());
|
||||
builders.forEach(DictJavaGeneratorMulti::writeIndexedShard);
|
||||
|
||||
}
|
||||
static final Path[] SHARDS = IntStream.range(0, 10).mapToObj(sId -> Path.of("src/main/generated-sources/puzzle/dict"+THRESS).resolve(sId + ".idx")).toArray(
|
||||
Path[]::new);
|
||||
static Path shardKey(long word) {
|
||||
return SHARDS[Lemma.unpackSize(word) + 1];
|
||||
public static final int THRESS = 800;
|
||||
static String shardKey(long word) {
|
||||
return ""+Lemma.unpackSize(word) + 1;
|
||||
}
|
||||
private static SwedishGenerator.Dict buildDict(Path wordsPath, HashMap<Path, ShardBuilder> builders) throws IOException {
|
||||
private static SwedishGenerator.Dict buildDict(Path wordsPath, HashMap<String, ShardBuilder> builders) throws IOException {
|
||||
var map = new LongArrayList(100_000);
|
||||
try (var lines = Files.lines(wordsPath, StandardCharsets.UTF_8)) {
|
||||
lines.forEach(line -> {
|
||||
@@ -63,7 +60,7 @@ public final class DictJavaGeneratorMulti {
|
||||
String recStr = word + "\t" + simpel + "\t" + json + "\n";
|
||||
byte[] rec = recStr.getBytes(StandardCharsets.UTF_8);
|
||||
|
||||
var key = shardKey(w) ;
|
||||
var key = shardKey(w);
|
||||
ShardBuilder sb = builders.computeIfAbsent(key, k -> new ShardBuilder());
|
||||
try {
|
||||
map.add(Lemma.pack(w, sb.addRecord(rec)));
|
||||
@@ -129,32 +126,6 @@ public final class DictJavaGeneratorMulti {
|
||||
}
|
||||
|
||||
static final int VERSION = 1;
|
||||
static void writeIndexedShard(Path out, ShardBuilder sb) {
|
||||
int n = sb.offsets.size();
|
||||
int[] offs = sb.offsets.toArray();
|
||||
byte[] data = sb.data.toByteArray();
|
||||
|
||||
try (FileChannel ch = FileChannel.open(out,
|
||||
StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING,
|
||||
StandardOpenOption.WRITE)) {
|
||||
|
||||
// header
|
||||
ByteBuffer hdr = ByteBuffer.allocate(12);
|
||||
hdr.putInt(Meta.SHARD_MAGIC).putInt(VERSION).putInt(n).flip();
|
||||
ch.write(hdr);
|
||||
|
||||
// offsets table (int per record)
|
||||
ByteBuffer tbl = ByteBuffer.allocate(n * 4);
|
||||
for (int i = 0; i < n; i++) tbl.putInt(offs[i]);
|
||||
tbl.flip();
|
||||
ch.write(tbl);
|
||||
|
||||
// data
|
||||
ch.write(ByteBuffer.wrap(data));
|
||||
}catch (IOException e){
|
||||
throw new RuntimeException("Failed to write shard to " + out, e);
|
||||
}
|
||||
}
|
||||
|
||||
private static void writeAggregator(Path outDir, String pkg, String cls, int totalLen) throws IOException {
|
||||
Path out = outDir.resolve(cls + ".java");
|
||||
@@ -330,7 +301,7 @@ public final class DictJavaGeneratorMulti {
|
||||
var word = parts[1].trim();
|
||||
int score = Integer.parseInt(parts[2].trim());
|
||||
int simpel = Integer.parseInt(parts[3].trim());
|
||||
if (score < 1 || simpel>THRESS) {
|
||||
if (score < 1 || simpel > THRESS) {
|
||||
if (Main.VERBOSE) System.err.println("Word too complex: " + line);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -24,7 +24,6 @@ import static puzzle.SwedishGenerator.FillStats;
|
||||
import static puzzle.SwedishGenerator.R;
|
||||
import static puzzle.Masker.Slot;
|
||||
import static puzzle.GridBuilder.placeWord;
|
||||
import static puzzle.SwedishGenerator.THRESS;
|
||||
import static puzzle.SwedishGeneratorTest.OFF_0_1;
|
||||
import static puzzle.SwedishGeneratorTest.OFF_0_2;
|
||||
import static puzzle.SwedishGeneratorTest.OFF_0_3;
|
||||
@@ -52,7 +51,7 @@ public class ExportFormatTest {
|
||||
var fillResult = new FillResult(true, 0, 0, 0, 0, new FillStats());
|
||||
var puzzleResult = new PuzzleResult(new Clued(clues), grid, new Slotinfo[]{
|
||||
new Slotinfo(key, lo, 0L, 0, new Assign(TEST), null, 0)
|
||||
}, fillResult, 0);
|
||||
}, fillResult);
|
||||
|
||||
var rewards = new Rewards(10, 5, 1);
|
||||
var exported = puzzleResult.exportFormatFromFilled(2, rewards);
|
||||
@@ -92,7 +91,7 @@ public class ExportFormatTest {
|
||||
var grid = SwedishGeneratorTest.createEmpty();
|
||||
val clues = Clues.createEmpty();
|
||||
var fillResult = new FillResult(true, 0, 0, 0, 0, new FillStats());
|
||||
var puzzleResult = new PuzzleResult(new Clued(clues), new Gridded(grid, clues), new Slotinfo[0], fillResult, 0);
|
||||
var puzzleResult = new PuzzleResult(new Clued(clues), new Gridded(grid, clues), new Slotinfo[0], fillResult);
|
||||
|
||||
var exported = puzzleResult.exportFormatFromFilled(1, new Rewards(0, 0, 0));
|
||||
|
||||
@@ -118,9 +117,7 @@ public class ExportFormatTest {
|
||||
for (int i = 0; i < Math.min(words.length, 5); i++) {
|
||||
val wordVal = words[i];
|
||||
val word = Lemma.asWord(wordVal, Export.BYTES.get());
|
||||
val assigned = new Assign(wordVal);
|
||||
val shard = shardKey(assigned.w);
|
||||
val clueRec = Meta.readRecord(shard, i);
|
||||
val clueRec = Meta.lookup(wordVal);
|
||||
|
||||
assertNotNull(clueRec);
|
||||
assertEquals(word, Lemma.asWord(clueRec.w(), Export.BYTES.get()));
|
||||
@@ -151,8 +148,7 @@ public class ExportFormatTest {
|
||||
}
|
||||
|
||||
if (idx != -1) {
|
||||
val shard = shardKey(w);
|
||||
val clueRec = Meta.readRecord(shard, idx);
|
||||
val clueRec = Meta.lookup(w);
|
||||
assertNotNull(clueRec);
|
||||
assertEquals(wStr, Lemma.asWord(clueRec.w(), Export.BYTES.get()));
|
||||
// Check some expected complexity values (from CSV head output, column 3)
|
||||
|
||||
@@ -11,7 +11,6 @@ import puzzle.Export.LetterVisit.LetterAt;
|
||||
import puzzle.Export.PuzzleResult;
|
||||
import puzzle.Export.Rewards;
|
||||
import puzzle.Main.Opts;
|
||||
import puzzle.Masker.Slot;
|
||||
import puzzle.SwedishGenerator.Rng;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
@@ -21,7 +20,6 @@ import static puzzle.Export.Clue.DOWN;
|
||||
import static puzzle.Export.Clue.LEFT;
|
||||
import static puzzle.Export.Clue.RIGHT;
|
||||
import static puzzle.Export.Clue.UP;
|
||||
import static puzzle.Masker.Slot;
|
||||
import static puzzle.SwedishGenerator.Dict;
|
||||
import static puzzle.SwedishGenerator.Lemma;
|
||||
import static puzzle.SwedishGenerator.Slotinfo;
|
||||
@@ -224,7 +222,7 @@ public class MainTest {
|
||||
Assertions.assertEquals(-1L, grid.hi);
|
||||
var g = new Gridded(grid, mask.c());
|
||||
g.gridToString(mask.c());
|
||||
var aa = new PuzzleResult(mask, g, slotInfo, filled,0).exportFormatFromFilled(1, new Rewards(1, 1, 1));
|
||||
var aa = new PuzzleResult(mask, g, slotInfo, filled).exportFormatFromFilled(1, new Rewards(1, 1, 1));
|
||||
System.out.println(String.join("\n", aa.grid()));
|
||||
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user