introduce bitloops
This commit is contained in:
@@ -1,38 +0,0 @@
|
||||
package puzzle;
|
||||
|
||||
import module java.base;
|
||||
import puzzle.SwedishGenerator.Lemma;
|
||||
import static java.nio.charset.StandardCharsets.US_ASCII;
|
||||
|
||||
public final class CsvIndexService {
|
||||
|
||||
static int SIMPEL_IDX = 3;
|
||||
|
||||
public static int lineToSimpel(String line) {
|
||||
var parts = line.split(",", 5);
|
||||
return Integer.parseInt(parts[SIMPEL_IDX].trim());
|
||||
}
|
||||
public static String[] lineToClue(String line) {
|
||||
if (line.isBlank()) throw new RuntimeException("Empty line");
|
||||
var parts = line.split(",", 5);
|
||||
var rawClue = parts[4].trim();
|
||||
if (rawClue.startsWith("\"") && rawClue.endsWith("\"")) {
|
||||
rawClue = rawClue.substring(1, rawClue.length() - 1).replace("\"\"", "\"");
|
||||
}
|
||||
return Meta.GSON.fromJson(rawClue, String[].class);
|
||||
}
|
||||
public static void lineToLemma(String line, LongConsumer ok) {
|
||||
if (line.isBlank()) {
|
||||
throw new RuntimeException("Empty line");
|
||||
}
|
||||
var parts = line.split(",", 5);
|
||||
var id = Integer.parseInt(parts[0].trim());
|
||||
var word = parts[1].trim();
|
||||
int score = Integer.parseInt(parts[2].trim());
|
||||
if (score < 1) {
|
||||
if (Main.VERBOSE) System.err.println("Word too complex: " + line);
|
||||
return;
|
||||
}
|
||||
ok.accept(Lemma.from(word.getBytes(US_ASCII)));
|
||||
}
|
||||
}
|
||||
@@ -1,20 +1,27 @@
|
||||
package puzzle;
|
||||
|
||||
import module java.base;
|
||||
import lombok.Getter;
|
||||
import lombok.NoArgsConstructor;
|
||||
import lombok.experimental.Accessors;
|
||||
import lombok.val;
|
||||
import puzzle.Export.Dicts;
|
||||
import puzzle.Export.IntListDTO;
|
||||
import puzzle.DictJavaGeneratorMulti.DictEntryDTO.IntListDTO;
|
||||
import puzzle.SwedishGenerator.Dict;
|
||||
import puzzle.SwedishGenerator.DictEntry;
|
||||
import puzzle.SwedishGenerator.Lemma;
|
||||
import static java.nio.charset.StandardCharsets.US_ASCII;
|
||||
import static puzzle.SwedishGenerator.THRESS;
|
||||
|
||||
public final class DictJavaGeneratorMulti {
|
||||
|
||||
// Smaller = more files, but safer for javac/class limits.
|
||||
private static final int WORDS_CHUNK = 8_192;
|
||||
private static final int POS_CHUNK = 8_192;
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
Path wordsFile = Path.of(args.length > 0 ? args[0] : "nl_score_hints_v3.csv");
|
||||
Path outDir = Path.of(args.length > 1 ? args[1] : "src/main/generated-sources/puzzle");
|
||||
String pkg = "puzzle";
|
||||
Path outDir = Path.of(args.length > 1 ? args[1] : "src/main/generated-sources/puzzle/dict" + THRESS);
|
||||
String pkg = "puzzle.dict" + THRESS;
|
||||
HashMap<Path, ShardBuilder> builders = new HashMap<Path, ShardBuilder>(16);
|
||||
|
||||
SwedishGenerator.Dict dict = buildDict(wordsFile, builders);
|
||||
@@ -33,10 +40,16 @@ public final class DictJavaGeneratorMulti {
|
||||
// Aggregator
|
||||
writeAggregator(outDir, pkg, "DictData", dict.length());
|
||||
System.out.println("Generated sources into: " + outDir.toAbsolutePath());
|
||||
builders.forEach(DictJavaGeneratorMulti::writeIndexedShard);
|
||||
|
||||
}
|
||||
static final Path[] SHARDS = IntStream.range(0, 10).mapToObj(sId -> Path.of("src/main/generated-sources/puzzle/dict"+THRESS).resolve(sId + ".idx")).toArray(
|
||||
Path[]::new);
|
||||
static Path shardKey(long word) {
|
||||
return SHARDS[Lemma.unpackSize(word) + 1];
|
||||
}
|
||||
|
||||
private static SwedishGenerator.Dict buildDict(Path wordsPath, HashMap<Path, ShardBuilder> builders) throws IOException {
|
||||
var map = new Export.LongArrayList(100_000);
|
||||
var map = new LongArrayList(100_000);
|
||||
try (var lines = Files.lines(wordsPath, StandardCharsets.UTF_8)) {
|
||||
lines.forEach(line -> {
|
||||
CsvIndexService.lineToLemma(line, w -> {
|
||||
@@ -50,7 +63,7 @@ public final class DictJavaGeneratorMulti {
|
||||
String recStr = word + "\t" + simpel + "\t" + json + "\n";
|
||||
byte[] rec = recStr.getBytes(StandardCharsets.UTF_8);
|
||||
|
||||
var key = Meta.shardKey(w);
|
||||
var key = shardKey(w) ;
|
||||
ShardBuilder sb = builders.computeIfAbsent(key, k -> new ShardBuilder());
|
||||
try {
|
||||
map.add(Lemma.pack(w, sb.addRecord(rec)));
|
||||
@@ -64,7 +77,43 @@ public final class DictJavaGeneratorMulti {
|
||||
return Dicts.makeDict(map.toArray());
|
||||
}
|
||||
|
||||
static final int VERSION = 1;
|
||||
interface Dicts {
|
||||
|
||||
static Dict makeDict(long[] wordz) {
|
||||
var index = new DictEntryDTO[SwedishGenerator.MAX_WORD_LENGTH_PLUS_ONE];
|
||||
Arrays.setAll(index, DictEntryDTO::new);
|
||||
for (var lemma : wordz) {
|
||||
var L = Lemma.unpackSize(lemma) + 1;//Lemma.unpackSize(lemma) + 2;
|
||||
val entry = index[L];
|
||||
val idx = entry.words().size();
|
||||
val pos = entry.pos();
|
||||
entry.words().add(lemma);
|
||||
int i = 0;
|
||||
for (long w = lemma & Lemma.LETTER_MASK; w != 0; w >>>= 5, i++) {
|
||||
pos[i][(int) ((w & 31) - 1)].add(idx);
|
||||
}
|
||||
}
|
||||
for (int i = 2; i < index.length; i++) if (index[i].words().size() <= 0) throw new RuntimeException("No words for length " + i);
|
||||
return new Dict(Arrays.stream(index).map(i -> {
|
||||
var words = i.words().toArray();
|
||||
int numWords = words.length;
|
||||
int numLongs = (numWords + 63) >>> 6;
|
||||
var bitsets = new long[i.pos().length * 26][numLongs];
|
||||
for (int p = 0; p < i.pos().length; p++) {
|
||||
for (int l = 0; l < 26; l++) {
|
||||
var list = i.pos()[p][l];
|
||||
var bs = bitsets[p * 26 + l];
|
||||
for (int k = 0; k < list.size(); k++) {
|
||||
int wordIdx = list.data()[k];
|
||||
bs[wordIdx >>> 6] |= (1L << (wordIdx & 63));
|
||||
}
|
||||
}
|
||||
}
|
||||
return new DictEntry(words, bitsets, words.length, (words.length + 63) >>> 6);
|
||||
}).toArray(DictEntry[]::new),
|
||||
Arrays.stream(index).mapToInt(i -> i.words().size()).sum());
|
||||
}
|
||||
}
|
||||
|
||||
static final class ShardBuilder {
|
||||
|
||||
@@ -79,7 +128,8 @@ public final class DictJavaGeneratorMulti {
|
||||
}
|
||||
}
|
||||
|
||||
static void writeIndexedShard(Path out, ShardBuilder sb) throws IOException {
|
||||
static final int VERSION = 1;
|
||||
static void writeIndexedShard(Path out, ShardBuilder sb) {
|
||||
int n = sb.offsets.size();
|
||||
int[] offs = sb.offsets.toArray();
|
||||
byte[] data = sb.data.toByteArray();
|
||||
@@ -101,6 +151,8 @@ public final class DictJavaGeneratorMulti {
|
||||
|
||||
// data
|
||||
ch.write(ByteBuffer.wrap(data));
|
||||
}catch (IOException e){
|
||||
throw new RuntimeException("Failed to write shard to " + out, e);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -110,11 +162,11 @@ public final class DictJavaGeneratorMulti {
|
||||
w.write("package " + pkg + ";\n\n");
|
||||
w.write("public final class " + cls + " {\n");
|
||||
w.write(" private " + cls + "() {}\n\n");
|
||||
w.write(" public static final SwedishGenerator.Dict DICT = build();\n\n");
|
||||
w.write(" private static SwedishGenerator.Dict build() {\n");
|
||||
w.write(" SwedishGenerator.DictEntry[] idx = new SwedishGenerator.DictEntry[SwedishGenerator.MAX_WORD_LENGTH_PLUS_ONE];\n");
|
||||
w.write(" public static final puzzle.SwedishGenerator.Dict DICT" + THRESS + " = build();\n\n");
|
||||
w.write(" private static puzzle.SwedishGenerator.Dict build() {\n");
|
||||
w.write(" puzzle.SwedishGenerator.DictEntry[] idx = new puzzle.SwedishGenerator.DictEntry[puzzle.SwedishGenerator.MAX_WORD_LENGTH_PLUS_ONE];\n");
|
||||
for (int L = 2; L <= 8; L++) w.write(" idx[" + L + "] = DictDataL" + L + ".entry();\n");
|
||||
w.write(" return new SwedishGenerator.Dict(idx, " + totalLen + ");\n");
|
||||
w.write(" return new puzzle.SwedishGenerator.Dict(idx, " + totalLen + ");\n");
|
||||
w.write(" }\n");
|
||||
w.write("}\n");
|
||||
}
|
||||
@@ -217,11 +269,11 @@ public final class DictJavaGeneratorMulti {
|
||||
w.write(" }\n\n");
|
||||
|
||||
// entry
|
||||
w.write(" public static SwedishGenerator.DictEntry entry() {\n");
|
||||
w.write(" public static puzzle.SwedishGenerator.DictEntry entry() {\n");
|
||||
w.write(" long[] wds = words();\n");
|
||||
w.write(" long[] flat = posFlat();\n");
|
||||
w.write(" long[][] pos = reshape(flat, ROWS, COLS);\n");
|
||||
w.write(" return new SwedishGenerator.DictEntry(wds, pos, wds.length, (wds.length + 63) >>> 6);\n");
|
||||
w.write(" return new puzzle.SwedishGenerator.DictEntry(wds, pos, wds.length, (wds.length + 63) >>> 6);\n");
|
||||
w.write(" }\n\n");
|
||||
|
||||
// helpers
|
||||
@@ -252,4 +304,87 @@ public final class DictJavaGeneratorMulti {
|
||||
private static String toLongLiteral(long v) {
|
||||
return "0x" + Long.toUnsignedString(v, 16) + "L";
|
||||
}
|
||||
public static final class CsvIndexService {
|
||||
|
||||
static int SIMPEL_IDX = 3;
|
||||
|
||||
public static int lineToSimpel(String line) {
|
||||
var parts = line.split(",", 5);
|
||||
return Integer.parseInt(parts[SIMPEL_IDX].trim());
|
||||
}
|
||||
public static String[] lineToClue(String line) {
|
||||
if (line.isBlank()) throw new RuntimeException("Empty line");
|
||||
var parts = line.split(",", 5);
|
||||
var rawClue = parts[4].trim();
|
||||
if (rawClue.startsWith("\"") && rawClue.endsWith("\"")) {
|
||||
rawClue = rawClue.substring(1, rawClue.length() - 1).replace("\"\"", "\"");
|
||||
}
|
||||
return Meta.GSON.fromJson(rawClue, String[].class);
|
||||
}
|
||||
public static void lineToLemma(String line, LongConsumer ok) {
|
||||
if (line.isBlank()) {
|
||||
throw new RuntimeException("Empty line");
|
||||
}
|
||||
var parts = line.split(",", 5);
|
||||
var id = Integer.parseInt(parts[0].trim());
|
||||
var word = parts[1].trim();
|
||||
int score = Integer.parseInt(parts[2].trim());
|
||||
int simpel = Integer.parseInt(parts[3].trim());
|
||||
if (score < 1 || simpel>THRESS) {
|
||||
if (Main.VERBOSE) System.err.println("Word too complex: " + line);
|
||||
return;
|
||||
}
|
||||
ok.accept(Lemma.from(word.getBytes(US_ASCII)));
|
||||
}
|
||||
}
|
||||
|
||||
record DictEntryDTO(LongArrayList words, IntListDTO[][] pos) {
|
||||
|
||||
public DictEntryDTO(int L) {
|
||||
this(new LongArrayList(1024), new IntListDTO[L][26]);
|
||||
for (var i = 0; i < L; i++) for (var j = 0; j < 26; j++) pos[i][j] = new IntListDTO();
|
||||
}
|
||||
@Getter
|
||||
@Accessors(fluent = true)
|
||||
@NoArgsConstructor
|
||||
static final class IntListDTO {
|
||||
|
||||
int[] data = new int[8];
|
||||
int size = 0;
|
||||
public IntListDTO(int size) {
|
||||
data = new int[size];
|
||||
}
|
||||
void add(int v) {
|
||||
if (size >= data.length) data = Arrays.copyOf(data, data.length * 2);
|
||||
data[size++] = v;
|
||||
}
|
||||
int[] toArray() { return Arrays.copyOf(data, size); }
|
||||
}
|
||||
}
|
||||
|
||||
static final class LongArrayList {
|
||||
|
||||
long[] a;
|
||||
int size;
|
||||
|
||||
LongArrayList(int initialCapacity) {
|
||||
if (initialCapacity < 0) throw new IllegalArgumentException();
|
||||
a = new long[initialCapacity];
|
||||
}
|
||||
|
||||
int size() { return size; }
|
||||
|
||||
void add(long v) {
|
||||
if (size == a.length) grow();
|
||||
a[size++] = v;
|
||||
}
|
||||
|
||||
void grow() {
|
||||
int newCap = a.length == 0 ? 1 : a.length * 2;
|
||||
long[] n = new long[newCap];
|
||||
System.arraycopy(a, 0, n, 0, size);
|
||||
a = n;
|
||||
}
|
||||
long[] toArray() { return Arrays.copyOf(a, this.size); }
|
||||
}
|
||||
}
|
||||
|
||||
@@ -24,6 +24,7 @@ import static puzzle.SwedishGenerator.FillStats;
|
||||
import static puzzle.SwedishGenerator.R;
|
||||
import static puzzle.Masker.Slot;
|
||||
import static puzzle.GridBuilder.placeWord;
|
||||
import static puzzle.SwedishGenerator.THRESS;
|
||||
import static puzzle.SwedishGeneratorTest.OFF_0_1;
|
||||
import static puzzle.SwedishGeneratorTest.OFF_0_2;
|
||||
import static puzzle.SwedishGeneratorTest.OFF_0_3;
|
||||
@@ -51,7 +52,7 @@ public class ExportFormatTest {
|
||||
var fillResult = new FillResult(true, 0, 0, 0, 0, new FillStats());
|
||||
var puzzleResult = new PuzzleResult(new Clued(clues), grid, new Slotinfo[]{
|
||||
new Slotinfo(key, lo, 0L, 0, new Assign(TEST), null, 0)
|
||||
}, fillResult);
|
||||
}, fillResult, 0);
|
||||
|
||||
var rewards = new Rewards(10, 5, 1);
|
||||
var exported = puzzleResult.exportFormatFromFilled(2, rewards);
|
||||
@@ -91,7 +92,7 @@ public class ExportFormatTest {
|
||||
var grid = SwedishGeneratorTest.createEmpty();
|
||||
val clues = Clues.createEmpty();
|
||||
var fillResult = new FillResult(true, 0, 0, 0, 0, new FillStats());
|
||||
var puzzleResult = new PuzzleResult(new Clued(clues), new Gridded(grid, clues), new Slotinfo[0], fillResult);
|
||||
var puzzleResult = new PuzzleResult(new Clued(clues), new Gridded(grid, clues), new Slotinfo[0], fillResult, 0);
|
||||
|
||||
var exported = puzzleResult.exportFormatFromFilled(1, new Rewards(0, 0, 0));
|
||||
|
||||
@@ -104,6 +105,10 @@ public class ExportFormatTest {
|
||||
assertTrue(row.matches("#+"));
|
||||
}
|
||||
}
|
||||
Path shardKey(long word) {
|
||||
return Path.of("src/main/generated-sources/puzzle").resolve(Lemma.unpackSize(word) + 1 + ".idx");
|
||||
}
|
||||
|
||||
@Test
|
||||
void testShardToClue() {
|
||||
for (int length = 2; length <= 8; length++) {
|
||||
@@ -114,7 +119,7 @@ public class ExportFormatTest {
|
||||
val wordVal = words[i];
|
||||
val word = Lemma.asWord(wordVal, Export.BYTES.get());
|
||||
val assigned = new Assign(wordVal);
|
||||
val shard = Meta.shardKey(assigned.w);
|
||||
val shard = shardKey(assigned.w);
|
||||
val clueRec = Meta.readRecord(shard, i);
|
||||
|
||||
assertNotNull(clueRec);
|
||||
@@ -146,7 +151,7 @@ public class ExportFormatTest {
|
||||
}
|
||||
|
||||
if (idx != -1) {
|
||||
val shard = Meta.shardKey(w);
|
||||
val shard = shardKey(w);
|
||||
val clueRec = Meta.readRecord(shard, idx);
|
||||
assertNotNull(clueRec);
|
||||
assertEquals(wStr, Lemma.asWord(clueRec.w(), Export.BYTES.get()));
|
||||
|
||||
@@ -224,7 +224,7 @@ public class MainTest {
|
||||
Assertions.assertEquals(-1L, grid.hi);
|
||||
var g = new Gridded(grid, mask.c());
|
||||
g.gridToString(mask.c());
|
||||
var aa = new PuzzleResult(mask, g, slotInfo, filled).exportFormatFromFilled(1, new Rewards(1, 1, 1));
|
||||
var aa = new PuzzleResult(mask, g, slotInfo, filled,0).exportFormatFromFilled(1, new Rewards(1, 1, 1));
|
||||
System.out.println(String.join("\n", aa.grid()));
|
||||
|
||||
}
|
||||
|
||||
@@ -5,9 +5,8 @@ import lombok.val;
|
||||
import org.junit.jupiter.api.Assertions;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import puzzle.Export.Clued;
|
||||
import puzzle.Export.Dicts;
|
||||
import puzzle.Export.Gridded;
|
||||
import puzzle.Export.IntListDTO;
|
||||
import puzzle.DictJavaGeneratorMulti.DictEntryDTO.IntListDTO;
|
||||
import puzzle.Export.LetterVisit.LetterAt;
|
||||
import puzzle.Masker.Clues;
|
||||
import puzzle.Masker.Slot;
|
||||
@@ -245,7 +244,7 @@ public class SwedishGeneratorTest {
|
||||
assertEquals(LETTER_A, Lemma.byteAt(INERENAE, 6));
|
||||
assertEquals(LETTER_E, Lemma.byteAt(INERENAE, 7));
|
||||
|
||||
var dict = Dicts.makeDict(new long[]{ APPLE, EXE, IN, INER, INEREN, INERENA, INERENAE });
|
||||
var dict = DictJavaGeneratorMulti.Dicts.makeDict(new long[]{ APPLE, EXE, IN, INER, INEREN, INERENA, INERENAE });
|
||||
|
||||
assertEquals(1, dict.index()[3].words().length);
|
||||
assertEquals(1, dict.index()[5].words().length);
|
||||
@@ -301,7 +300,7 @@ public class SwedishGeneratorTest {
|
||||
|
||||
@Test
|
||||
void testCandidateInfoForPattern() {
|
||||
var dict = Dicts.makeDict(WORDS2);
|
||||
var dict = DictJavaGeneratorMulti.Dicts.makeDict(WORDS2);
|
||||
|
||||
// Pattern "APP--" for length 5
|
||||
var info = candidateInfoForPattern(Context.get().bitset(), packPattern("APP"), dict.index()[5].posBitsets(), dict.index()[5].numlong());
|
||||
@@ -315,7 +314,7 @@ public class SwedishGeneratorTest {
|
||||
// This should detect a slot starting at 0,1 with length 2 (0,1 and 0,2)
|
||||
var clues = Masker.Clues.createEmpty();
|
||||
clues.setClueLo(IDX_0_0.lo, CLUE_RIGHT);
|
||||
var dict = Dicts.makeDict(WORDS2);
|
||||
var dict = DictJavaGeneratorMulti.Dicts.makeDict(WORDS2);
|
||||
var slots = Masker.extractSlots(clues, dict.index());
|
||||
assertEquals(1, slots.length);
|
||||
var s = slots[0];
|
||||
@@ -437,7 +436,7 @@ public class SwedishGeneratorTest {
|
||||
val counts = new byte[SIZE];
|
||||
counts[1] = 2;
|
||||
counts[2] = 3;
|
||||
var dict = Dicts.makeDict(WORDS);
|
||||
var dict = DictJavaGeneratorMulti.Dicts.makeDict(WORDS);
|
||||
var entry5 = dict.index()[5];
|
||||
// cross = (counts[1]-1) + (counts[2]-1) = 1 + 2 = 3
|
||||
// score = 3 * 10 + len(2) = 32
|
||||
|
||||
Reference in New Issue
Block a user