introduce bitloops
This commit is contained in:
@@ -33,6 +33,6 @@ public final class CsvIndexService {
|
||||
if (Main.VERBOSE) System.err.println("Word too complex: " + line);
|
||||
return;
|
||||
}
|
||||
ok.accept(Lemma.packW(word.getBytes(US_ASCII)));
|
||||
ok.accept(Lemma.from(word.getBytes(US_ASCII)));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -46,8 +46,6 @@ public final class DictJavaGeneratorMulti {
|
||||
try (var lines = Files.lines(wordsPath, StandardCharsets.UTF_8)) {
|
||||
lines.forEach(line -> {
|
||||
CsvIndexService.lineToLemma(line, w -> {
|
||||
long len = Lemma.length0(w);
|
||||
|
||||
String word = Lemma.asWord(w);
|
||||
String[] clues = CsvIndexService.lineToClue(line);
|
||||
int simpel = CsvIndexService.lineToSimpel(line);
|
||||
@@ -61,8 +59,7 @@ public final class DictJavaGeneratorMulti {
|
||||
var key = Meta.shardKey(w);
|
||||
ShardBuilder sb = builders.computeIfAbsent(key, k -> new ShardBuilder());
|
||||
try {
|
||||
long index = ((long) sb.addRecord(rec) << 3) | len;
|
||||
map.add(w | (index << 40));
|
||||
map.add(Lemma.pack(w, sb.addRecord(rec)));
|
||||
} catch (IOException e) {
|
||||
throw new UncheckedIOException(e);
|
||||
}
|
||||
@@ -165,7 +162,7 @@ public final class DictJavaGeneratorMulti {
|
||||
w.write("package " + pkg + ";\n\n");
|
||||
w.write("public final class " + prefix + ci + " {\n");
|
||||
w.write(" private " + prefix + ci + "() {}\n");
|
||||
|
||||
|
||||
w.write(" public static long[] get() {\n");
|
||||
w.write(" return new long[] { \n");
|
||||
for (int i = from; i < to; i++) {
|
||||
|
||||
@@ -151,7 +151,7 @@ public class ExportFormatTest {
|
||||
// These words are known to be in the CSV and likely in the dictionary
|
||||
String[] testWords = { "EEN", "NAAR", "IEDEREEN" };
|
||||
for (String wStr : testWords) {
|
||||
long w = Lemma.pack(wStr);
|
||||
long w = Lemma.from(wStr);
|
||||
int L = wStr.length();
|
||||
var entry = DictData.DICT.index()[L];
|
||||
if (entry == null) continue;
|
||||
|
||||
@@ -48,15 +48,6 @@ public class MainTest {
|
||||
this.verbose = false;
|
||||
}};
|
||||
static final Dict dict = DictData.DICT;//loadDict(opts.wordsPath);
|
||||
public static Dict loadDict(String wordsPath) {
|
||||
var map = new LongArrayList(100_000);
|
||||
try (var lines = Files.lines(Path.of(wordsPath), StandardCharsets.UTF_8)) {
|
||||
lines.forEach(line -> CsvIndexService.lineToLemma(line, map::add));
|
||||
return Dicts.makeDict(map.toArray());
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException("Failed to load dictionary from " + wordsPath, e);
|
||||
}
|
||||
}
|
||||
@Test
|
||||
void testExtractSlots() {
|
||||
|
||||
|
||||
@@ -187,7 +187,7 @@ public class SwedishGeneratorTest {
|
||||
assertEquals(val1, rng2.nextU32());
|
||||
|
||||
for (var i = 0; i < 100; i++) {
|
||||
var r = rng.randint(5);
|
||||
var r = rng.randint(6);
|
||||
assertTrue(r >= 0 && r <= 5);
|
||||
var f = rng.nextFloat();
|
||||
assertTrue(f >= 0.0 && f <= 1.0);
|
||||
@@ -217,8 +217,8 @@ public class SwedishGeneratorTest {
|
||||
|
||||
@Test
|
||||
void testLemmaAndDict() {
|
||||
Assertions.assertEquals(Lemma.pack("APPLE".getBytes(StandardCharsets.US_ASCII)), Lemma.unpackLetters(l1));
|
||||
assertEquals(5, Lemma.length(l1));
|
||||
Assertions.assertEquals(Lemma.packShiftIn("APPLE".getBytes(StandardCharsets.US_ASCII)), Lemma.unpackLetters(l1));
|
||||
assertEquals(4, Lemma.unpackSize(l1));
|
||||
assertEquals(LETTER_A, Lemma.byteAt(l1, 0));
|
||||
|
||||
var dict = Dicts.makeDict(new long[]{ l1, l2, l2a, l4a, l6a, l7a, l8a });
|
||||
@@ -228,7 +228,7 @@ public class SwedishGeneratorTest {
|
||||
|
||||
var entry3 = dict.index()[3];
|
||||
assertEquals(1, entry3.words().length);
|
||||
assertEquals(Lemma.pack("AXE".getBytes(StandardCharsets.US_ASCII)), Lemma.unpackLetters(entry3.words()[0]));
|
||||
assertEquals(Lemma.packShiftIn("AXE".getBytes(StandardCharsets.US_ASCII)), Lemma.unpackLetters(entry3.words()[0]));
|
||||
}
|
||||
|
||||
@Test
|
||||
|
||||
Reference in New Issue
Block a user