introduce bitloops

This commit is contained in:
mike
2026-01-17 20:24:45 +01:00
parent 8ff9d661e3
commit 938d2ac66b
9 changed files with 46 additions and 53 deletions

View File

@@ -33,6 +33,6 @@ public final class CsvIndexService {
if (Main.VERBOSE) System.err.println("Word too complex: " + line);
return;
}
ok.accept(Lemma.packW(word.getBytes(US_ASCII)));
ok.accept(Lemma.from(word.getBytes(US_ASCII)));
}
}

View File

@@ -46,8 +46,6 @@ public final class DictJavaGeneratorMulti {
try (var lines = Files.lines(wordsPath, StandardCharsets.UTF_8)) {
lines.forEach(line -> {
CsvIndexService.lineToLemma(line, w -> {
long len = Lemma.length0(w);
String word = Lemma.asWord(w);
String[] clues = CsvIndexService.lineToClue(line);
int simpel = CsvIndexService.lineToSimpel(line);
@@ -61,8 +59,7 @@ public final class DictJavaGeneratorMulti {
var key = Meta.shardKey(w);
ShardBuilder sb = builders.computeIfAbsent(key, k -> new ShardBuilder());
try {
long index = ((long) sb.addRecord(rec) << 3) | len;
map.add(w | (index << 40));
map.add(Lemma.pack(w, sb.addRecord(rec)));
} catch (IOException e) {
throw new UncheckedIOException(e);
}
@@ -165,7 +162,7 @@ public final class DictJavaGeneratorMulti {
w.write("package " + pkg + ";\n\n");
w.write("public final class " + prefix + ci + " {\n");
w.write(" private " + prefix + ci + "() {}\n");
w.write(" public static long[] get() {\n");
w.write(" return new long[] { \n");
for (int i = from; i < to; i++) {

View File

@@ -151,7 +151,7 @@ public class ExportFormatTest {
// These words are known to be in the CSV and likely in the dictionary
String[] testWords = { "EEN", "NAAR", "IEDEREEN" };
for (String wStr : testWords) {
long w = Lemma.pack(wStr);
long w = Lemma.from(wStr);
int L = wStr.length();
var entry = DictData.DICT.index()[L];
if (entry == null) continue;

View File

@@ -48,15 +48,6 @@ public class MainTest {
this.verbose = false;
}};
static final Dict dict = DictData.DICT;//loadDict(opts.wordsPath);
public static Dict loadDict(String wordsPath) {
var map = new LongArrayList(100_000);
try (var lines = Files.lines(Path.of(wordsPath), StandardCharsets.UTF_8)) {
lines.forEach(line -> CsvIndexService.lineToLemma(line, map::add));
return Dicts.makeDict(map.toArray());
} catch (IOException e) {
throw new RuntimeException("Failed to load dictionary from " + wordsPath, e);
}
}
@Test
void testExtractSlots() {

View File

@@ -187,7 +187,7 @@ public class SwedishGeneratorTest {
assertEquals(val1, rng2.nextU32());
for (var i = 0; i < 100; i++) {
var r = rng.randint(5);
var r = rng.randint(6);
assertTrue(r >= 0 && r <= 5);
var f = rng.nextFloat();
assertTrue(f >= 0.0 && f <= 1.0);
@@ -217,8 +217,8 @@ public class SwedishGeneratorTest {
@Test
void testLemmaAndDict() {
Assertions.assertEquals(Lemma.pack("APPLE".getBytes(StandardCharsets.US_ASCII)), Lemma.unpackLetters(l1));
assertEquals(5, Lemma.length(l1));
Assertions.assertEquals(Lemma.packShiftIn("APPLE".getBytes(StandardCharsets.US_ASCII)), Lemma.unpackLetters(l1));
assertEquals(4, Lemma.unpackSize(l1));
assertEquals(LETTER_A, Lemma.byteAt(l1, 0));
var dict = Dicts.makeDict(new long[]{ l1, l2, l2a, l4a, l6a, l7a, l8a });
@@ -228,7 +228,7 @@ public class SwedishGeneratorTest {
var entry3 = dict.index()[3];
assertEquals(1, entry3.words().length);
assertEquals(Lemma.pack("AXE".getBytes(StandardCharsets.US_ASCII)), Lemma.unpackLetters(entry3.words()[0]));
assertEquals(Lemma.packShiftIn("AXE".getBytes(StandardCharsets.US_ASCII)), Lemma.unpackLetters(entry3.words()[0]));
}
@Test