introduce bitloops
This commit is contained in:
@@ -1,6 +1,7 @@
|
|||||||
package puzzle;
|
package puzzle;
|
||||||
|
|
||||||
import lombok.Getter;
|
import lombok.Getter;
|
||||||
|
import lombok.NoArgsConstructor;
|
||||||
import lombok.experimental.Accessors;
|
import lombok.experimental.Accessors;
|
||||||
import lombok.experimental.Delegate;
|
import lombok.experimental.Delegate;
|
||||||
import lombok.val;
|
import lombok.val;
|
||||||
@@ -339,13 +340,18 @@ public record Export() {
|
|||||||
|
|
||||||
@Getter
|
@Getter
|
||||||
@Accessors(fluent = true)
|
@Accessors(fluent = true)
|
||||||
|
@NoArgsConstructor
|
||||||
static final class IntListDTO {
|
static final class IntListDTO {
|
||||||
|
|
||||||
int[] data = new int[8];
|
int[] data = new int[8];
|
||||||
int size = 0;
|
int size = 0;
|
||||||
|
public IntListDTO(int size) {
|
||||||
|
data = new int[size];
|
||||||
|
}
|
||||||
void add(int v) {
|
void add(int v) {
|
||||||
if (size >= data.length) data = Arrays.copyOf(data, data.length * 2);
|
if (size >= data.length) data = Arrays.copyOf(data, data.length * 2);
|
||||||
data[size++] = v;
|
data[size++] = v;
|
||||||
}
|
}
|
||||||
|
int[] toArray() { return Arrays.copyOf(data, size); }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -388,7 +388,8 @@ public class SwedishGenerator {
|
|||||||
var r = rng.nextFloat();
|
var r = rng.nextFloat();
|
||||||
//int idxInArray = rng.biasedIndexPow3(L - 1);
|
//int idxInArray = rng.biasedIndexPow3(L - 1);
|
||||||
var arrIndex = (int) (r * r * r * (L - 1));
|
var arrIndex = (int) (r * r * r * (L - 1));
|
||||||
var w = entry.words[idxs[arrIndex]];
|
var shardIdx = idxs[arrIndex];
|
||||||
|
var w = entry.words[shardIdx];
|
||||||
var lemIdx = Lemma.unpackIndex(w);
|
var lemIdx = Lemma.unpackIndex(w);
|
||||||
if (Bit1029.get(used, lemIdx)) continue;
|
if (Bit1029.get(used, lemIdx)) continue;
|
||||||
low = glo;
|
low = glo;
|
||||||
@@ -397,7 +398,7 @@ public class SwedishGenerator {
|
|||||||
|
|
||||||
Bit1029.set(used, lemIdx);
|
Bit1029.set(used, lemIdx);
|
||||||
s.assign.w = w;
|
s.assign.w = w;
|
||||||
s.assign.shardIdx = arrIndex;
|
s.assign.shardIdx = shardIdx;
|
||||||
if (backtrack(depth + 1)) return true;
|
if (backtrack(depth + 1)) return true;
|
||||||
s.assign.w = X;
|
s.assign.w = X;
|
||||||
Bit1029.clear(used, lemIdx);
|
Bit1029.clear(used, lemIdx);
|
||||||
|
|||||||
@@ -1,8 +1,7 @@
|
|||||||
package puzzle;
|
package puzzle;
|
||||||
|
|
||||||
import lombok.val;
|
|
||||||
import org.junit.jupiter.api.Test;
|
|
||||||
import puzzle.Export.Dicts;
|
import puzzle.Export.Dicts;
|
||||||
|
import puzzle.SwedishGenerator.Dict;
|
||||||
import java.io.BufferedWriter;
|
import java.io.BufferedWriter;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.nio.charset.StandardCharsets;
|
import java.nio.charset.StandardCharsets;
|
||||||
@@ -12,7 +11,6 @@ import java.nio.file.StandardOpenOption;
|
|||||||
|
|
||||||
public final class DictCodeGen {
|
public final class DictCodeGen {
|
||||||
|
|
||||||
|
|
||||||
public static void main(String[] args) throws Exception {
|
public static void main(String[] args) throws Exception {
|
||||||
|
|
||||||
DictJavaGenerator.main(args); // gebruikt jouw makeDict logic
|
DictJavaGenerator.main(args); // gebruikt jouw makeDict logic
|
||||||
@@ -34,17 +32,30 @@ public final class DictCodeGen {
|
|||||||
public final class DictJavaGenerator {
|
public final class DictJavaGenerator {
|
||||||
|
|
||||||
// tune if needed
|
// tune if needed
|
||||||
private static final int WORDS_CHUNK = 8_192>>>5; // longs per chunk
|
private static final int WORDS_CHUNK = 8_192 >>> 5; // longs per chunk
|
||||||
private static final int POS_CHUNK = 8_192>>>5; // longs per chunk
|
private static final int POS_CHUNK = 8_192 >>> 5; // longs per chunk
|
||||||
|
|
||||||
public static void main(String[] args) throws Exception {
|
public static void main(String[] args) throws Exception {
|
||||||
Path wordsFile = Path.of(args.length > 0 ? args[0] : "nl_score_hints_v3.csv");
|
Path wordsFile = Path.of(args.length > 0 ? args[0] : "nl_score_hints_v3.csv");
|
||||||
Path outDir = Path.of(args.length > 1 ? args[1] : "/home/mike/dev/puzzle-generator/src/main/generated-sources/puzzle");
|
Path outDir = Path.of(args.length > 1 ? args[1] : "/home/mike/dev/puzzle-generator/src/main/generated-sources/puzzle");
|
||||||
|
writeDict(wordsFile, outDir);
|
||||||
|
}
|
||||||
|
public static Dict writeDict(Path wordsFile, Path outDir) {
|
||||||
|
|
||||||
String pkg = "puzzle";
|
String pkg = "puzzle";
|
||||||
|
|
||||||
SwedishGenerator.Dict dict = buildDict(wordsFile);
|
SwedishGenerator.Dict dict = null;
|
||||||
|
try {
|
||||||
|
dict = buildDict(wordsFile);
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
Files.createDirectories(outDir);
|
Files.createDirectories(outDir);
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
}
|
||||||
|
|
||||||
// emit L2..L8
|
// emit L2..L8
|
||||||
for (int L = 2; L <= 8; L++) {
|
for (int L = 2; L <= 8; L++) {
|
||||||
@@ -52,13 +63,22 @@ public final class DictCodeGen {
|
|||||||
if (entry == null || entry.words() == null || entry.words().length == 0) {
|
if (entry == null || entry.words() == null || entry.words().length == 0) {
|
||||||
throw new IllegalStateException("No words for length " + L);
|
throw new IllegalStateException("No words for length " + L);
|
||||||
}
|
}
|
||||||
|
try {
|
||||||
writeLengthClass(outDir, pkg, "DictDataL" + L, L, entry);
|
writeLengthClass(outDir, pkg, "DictDataL" + L, L, entry);
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// emit aggregator
|
// emit aggregator
|
||||||
|
try {
|
||||||
writeAggregator(outDir, pkg, "DictData", dict.length());
|
writeAggregator(outDir, pkg, "DictData", dict.length());
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
}
|
||||||
|
|
||||||
System.out.println("Generated dictionary sources into: " + outDir.toAbsolutePath());
|
System.out.println("Generated dictionary sources into: " + outDir.toAbsolutePath());
|
||||||
|
return dict;
|
||||||
}
|
}
|
||||||
|
|
||||||
private static SwedishGenerator.Dict buildDict(Path wordsPath) throws IOException {
|
private static SwedishGenerator.Dict buildDict(Path wordsPath) throws IOException {
|
||||||
|
|||||||
@@ -1,7 +1,11 @@
|
|||||||
package puzzle;
|
package puzzle;
|
||||||
|
|
||||||
import org.junit.jupiter.api.Test;
|
import org.junit.jupiter.api.Test;
|
||||||
|
import puzzle.DictCodeGen.DictJavaGenerator;
|
||||||
import puzzle.Export.Dicts;
|
import puzzle.Export.Dicts;
|
||||||
|
import puzzle.Export.IntListDTO;
|
||||||
|
import puzzle.SwedishGenerator.Dict;
|
||||||
|
import puzzle.SwedishGenerator.Lemma;
|
||||||
|
|
||||||
import java.io.*;
|
import java.io.*;
|
||||||
import java.nio.ByteBuffer;
|
import java.nio.ByteBuffer;
|
||||||
@@ -39,7 +43,10 @@ public final class DictJavaGeneratorMulti {
|
|||||||
|
|
||||||
// Aggregator
|
// Aggregator
|
||||||
writeAggregator(outDir, pkg, "DictData", dict.length());
|
writeAggregator(outDir, pkg, "DictData", dict.length());
|
||||||
generateHintShards(wordsFile, outDir);
|
var csv = Paths.get("nl_score_hints_v3.csv");
|
||||||
|
var idx = Paths.get("nl_score_hints_v3.idx");
|
||||||
|
|
||||||
|
ScopedValue.where(CsvIndexService.SC, new CsvIndexService(csv, idx)).run(() -> generateHintShards(dict, outDir));
|
||||||
System.out.println("Generated sources into: " + outDir.toAbsolutePath());
|
System.out.println("Generated sources into: " + outDir.toAbsolutePath());
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -57,23 +64,10 @@ public final class DictJavaGeneratorMulti {
|
|||||||
var parts = line.split(",", 5);
|
var parts = line.split(",", 5);
|
||||||
return parts[1].trim();
|
return parts[1].trim();
|
||||||
}
|
}
|
||||||
static final class IntArrayList {
|
|
||||||
|
|
||||||
int[] a;
|
|
||||||
int size;
|
|
||||||
IntArrayList(int cap) { a = new int[cap]; }
|
|
||||||
void add(int v) {
|
|
||||||
if (size == a.length) a = Arrays.copyOf(a, a.length * 2);
|
|
||||||
a[size++] = v;
|
|
||||||
}
|
|
||||||
int size() { return size; }
|
|
||||||
int get(int i) { return a[i]; }
|
|
||||||
int[] toArray() { return Arrays.copyOf(a, size); }
|
|
||||||
}
|
|
||||||
|
|
||||||
static final class ShardBuilder {
|
static final class ShardBuilder {
|
||||||
|
|
||||||
final IntArrayList offsets = new IntArrayList(4096);
|
final IntListDTO offsets = new IntListDTO(4096);
|
||||||
final ByteArrayOutputStream data = new ByteArrayOutputStream(1 << 20); // grows
|
final ByteArrayOutputStream data = new ByteArrayOutputStream(1 << 20); // grows
|
||||||
void addRecord(byte[] rec) throws IOException {
|
void addRecord(byte[] rec) throws IOException {
|
||||||
offsets.add(data.size());
|
offsets.add(data.size());
|
||||||
@@ -116,6 +110,50 @@ public final class DictJavaGeneratorMulti {
|
|||||||
writeIndexedShard(outDir.resolve(e.getKey() + ".idx"), e.getValue());
|
writeIndexedShard(outDir.resolve(e.getKey() + ".idx"), e.getValue());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
static void generateHintShards(Dict dict, Path outDir) {
|
||||||
|
try {
|
||||||
|
Files.createDirectories(outDir);
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
}
|
||||||
|
|
||||||
|
var builders = new java.util.HashMap<Path, ShardBuilder>(256);
|
||||||
|
|
||||||
|
for (var index : dict.index()) {
|
||||||
|
long[] words = index.words();
|
||||||
|
for (int shardIdx = 0; shardIdx < words.length; shardIdx++) {
|
||||||
|
var w = words[shardIdx];
|
||||||
|
String word = Lemma.asWord(w);
|
||||||
|
int wIdx = Lemma.unpackIndex(w);
|
||||||
|
String[] clues = CsvIndexService.clues(wIdx);
|
||||||
|
int simpel = CsvIndexService.simpel(wIdx);
|
||||||
|
|
||||||
|
// serialize to: WORD \t JSON \n
|
||||||
|
// (als je al JSON string wilt bewaren: gebruik Gson.toJson(clues))
|
||||||
|
String json = Meta.GSON.toJson(clues);
|
||||||
|
String recStr = word + "\t" + simpel + "\t" + json + "\n";
|
||||||
|
byte[] rec = recStr.getBytes(StandardCharsets.UTF_8);
|
||||||
|
|
||||||
|
var key = Meta.shardKey(w);
|
||||||
|
ShardBuilder sb = builders.computeIfAbsent(key, k -> new ShardBuilder());
|
||||||
|
try {
|
||||||
|
sb.addRecord(rec);
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new UncheckedIOException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
// flush all shards to disk as <key>.idx (e.g. 6Z.idx)
|
||||||
|
for (var e : builders.entrySet()) {
|
||||||
|
try {
|
||||||
|
writeIndexedShard(e.getKey(), e.getValue());
|
||||||
|
} catch (IOException ex) {
|
||||||
|
throw new RuntimeException(ex);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
static void writeIndexedShard(Path out, ShardBuilder sb) throws IOException {
|
static void writeIndexedShard(Path out, ShardBuilder sb) throws IOException {
|
||||||
int n = sb.offsets.size();
|
int n = sb.offsets.size();
|
||||||
int[] offs = sb.offsets.toArray();
|
int[] offs = sb.offsets.toArray();
|
||||||
|
|||||||
Reference in New Issue
Block a user