introduce bitloops
This commit is contained in:
@@ -10,19 +10,18 @@ import puzzle.SwedishGenerator.Dict;
|
||||
import puzzle.SwedishGenerator.DictEntry;
|
||||
import puzzle.SwedishGenerator.Lemma;
|
||||
import static java.nio.charset.StandardCharsets.US_ASCII;
|
||||
import static puzzle.SwedishGenerator.THRESS;
|
||||
|
||||
public final class DictJavaGeneratorMulti {
|
||||
|
||||
// Smaller = more files, but safer for javac/class limits.
|
||||
private static final int WORDS_CHUNK = 8_192;
|
||||
private static final int POS_CHUNK = 8_192;
|
||||
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
Path wordsFile = Path.of(args.length > 0 ? args[0] : "nl_score_hints_v3.csv");
|
||||
Path outDir = Path.of(args.length > 1 ? args[1] : "src/main/generated-sources/puzzle/dict" + THRESS);
|
||||
String pkg = "puzzle.dict" + THRESS;
|
||||
HashMap<Path, ShardBuilder> builders = new HashMap<Path, ShardBuilder>(16);
|
||||
HashMap<String, ShardBuilder> builders = new HashMap<String, ShardBuilder>(16);
|
||||
|
||||
SwedishGenerator.Dict dict = buildDict(wordsFile, builders);
|
||||
|
||||
@@ -40,15 +39,13 @@ public final class DictJavaGeneratorMulti {
|
||||
// Aggregator
|
||||
writeAggregator(outDir, pkg, "DictData", dict.length());
|
||||
System.out.println("Generated sources into: " + outDir.toAbsolutePath());
|
||||
builders.forEach(DictJavaGeneratorMulti::writeIndexedShard);
|
||||
|
||||
}
|
||||
static final Path[] SHARDS = IntStream.range(0, 10).mapToObj(sId -> Path.of("src/main/generated-sources/puzzle/dict"+THRESS).resolve(sId + ".idx")).toArray(
|
||||
Path[]::new);
|
||||
static Path shardKey(long word) {
|
||||
return SHARDS[Lemma.unpackSize(word) + 1];
|
||||
public static final int THRESS = 800;
|
||||
static String shardKey(long word) {
|
||||
return ""+Lemma.unpackSize(word) + 1;
|
||||
}
|
||||
private static SwedishGenerator.Dict buildDict(Path wordsPath, HashMap<Path, ShardBuilder> builders) throws IOException {
|
||||
private static SwedishGenerator.Dict buildDict(Path wordsPath, HashMap<String, ShardBuilder> builders) throws IOException {
|
||||
var map = new LongArrayList(100_000);
|
||||
try (var lines = Files.lines(wordsPath, StandardCharsets.UTF_8)) {
|
||||
lines.forEach(line -> {
|
||||
@@ -63,7 +60,7 @@ public final class DictJavaGeneratorMulti {
|
||||
String recStr = word + "\t" + simpel + "\t" + json + "\n";
|
||||
byte[] rec = recStr.getBytes(StandardCharsets.UTF_8);
|
||||
|
||||
var key = shardKey(w) ;
|
||||
var key = shardKey(w);
|
||||
ShardBuilder sb = builders.computeIfAbsent(key, k -> new ShardBuilder());
|
||||
try {
|
||||
map.add(Lemma.pack(w, sb.addRecord(rec)));
|
||||
@@ -129,32 +126,6 @@ public final class DictJavaGeneratorMulti {
|
||||
}
|
||||
|
||||
static final int VERSION = 1;
|
||||
static void writeIndexedShard(Path out, ShardBuilder sb) {
|
||||
int n = sb.offsets.size();
|
||||
int[] offs = sb.offsets.toArray();
|
||||
byte[] data = sb.data.toByteArray();
|
||||
|
||||
try (FileChannel ch = FileChannel.open(out,
|
||||
StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING,
|
||||
StandardOpenOption.WRITE)) {
|
||||
|
||||
// header
|
||||
ByteBuffer hdr = ByteBuffer.allocate(12);
|
||||
hdr.putInt(Meta.SHARD_MAGIC).putInt(VERSION).putInt(n).flip();
|
||||
ch.write(hdr);
|
||||
|
||||
// offsets table (int per record)
|
||||
ByteBuffer tbl = ByteBuffer.allocate(n * 4);
|
||||
for (int i = 0; i < n; i++) tbl.putInt(offs[i]);
|
||||
tbl.flip();
|
||||
ch.write(tbl);
|
||||
|
||||
// data
|
||||
ch.write(ByteBuffer.wrap(data));
|
||||
}catch (IOException e){
|
||||
throw new RuntimeException("Failed to write shard to " + out, e);
|
||||
}
|
||||
}
|
||||
|
||||
private static void writeAggregator(Path outDir, String pkg, String cls, int totalLen) throws IOException {
|
||||
Path out = outDir.resolve(cls + ".java");
|
||||
@@ -330,7 +301,7 @@ public final class DictJavaGeneratorMulti {
|
||||
var word = parts[1].trim();
|
||||
int score = Integer.parseInt(parts[2].trim());
|
||||
int simpel = Integer.parseInt(parts[3].trim());
|
||||
if (score < 1 || simpel>THRESS) {
|
||||
if (score < 1 || simpel > THRESS) {
|
||||
if (Main.VERBOSE) System.err.println("Word too complex: " + line);
|
||||
return;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user