introduce bitloops

This commit is contained in:
mike
2026-01-17 20:24:45 +01:00
parent 8ff9d661e3
commit 938d2ac66b
9 changed files with 46 additions and 53 deletions

View File

@@ -300,12 +300,16 @@ public record Export() {
var index = new DictEntryDTO[SwedishGenerator.MAX_WORD_LENGTH_PLUS_ONE];
Arrays.setAll(index, DictEntryDTO::new);
for (var lemma : wordz) {
var L = Lemma.length(lemma);
var entry = index[L];
var idx = entry.words().size();
var L = Lemma.unpackSize(lemma) + 1;//Lemma.unpackSize(lemma) + 2;
val entry = index[L];
val idx = entry.words().size();
val pos = entry.pos();
entry.words().add(lemma);
for (var i = 0; i < L; i++) entry.pos()[i][Lemma.byteAt(lemma, i) - 1].add(idx);
int i = 0;
for (long w = lemma & Lemma.LETTER_MASK; w != 0; w >>>= 5, i++) {
pos[i][(int) ((w & 31) - 1)].add(idx);
}
// for (i = 0; i < L; i++) entry.pos()[i][Lemma.byteAt(lemma, i) - 1].add(idx);
}
for (int i = SwedishGenerator.MIN_LEN; i < index.length; i++) if (index[i].words().size() <= 0) throw new RuntimeException("No words for length " + i);
return new Dict(Arrays.stream(index).map(i -> {

View File

@@ -378,8 +378,8 @@ public record Masker(Rng rng, int[] stack, Clues cache) {
for (var k = 0; k < offspring; k++) {
if (Thread.currentThread().isInterrupted()) break;
var p1 = pop.get(rng.randint(pop.size() - 1));
var p2 = pop.get(rng.randint(pop.size() - 1));
var p1 = rng.rand(pop);
var p2 = rng.rand(pop);
var child = crossover(p1.grid, p2.grid);
children.add(new GridAndFit(hillclimb(child, clueSize, 70)));
}

View File

@@ -43,24 +43,16 @@ public class Meta {
buf.flip();
var string = StandardCharsets.UTF_8.decode(buf).toString();
val parts = string.split("\t", 3);
return new ShardLem(Lemma.pack(parts[0]), Integer.parseInt(parts[1]), GSON.fromJson(parts[2], String[].class));
return new ShardLem(Lemma.from(parts[0]), Integer.parseInt(parts[1]), GSON.fromJson(parts[2], String[].class));
} catch (Exception e) {
e.printStackTrace();
return new ShardLem(Lemma.pack("XXX"), -1, new String[0]);
return new ShardLem(Lemma.from("XXX"), -1, new String[0]);
}
}
static final Path[] SHARDS = IntStream.range(0, 10).mapToObj(sId -> Path.of("src/main/generated-sources/puzzle").resolve(sId + ".idx")).toArray(
Path[]::new);
static Path shardKey(long word) {
int L = Lemma.length(word);
return SHARDS[L];
}
static String shardKey(String word) {
int L = word.length();
char ch = word.charAt(0);
if (ch < 'A' || ch > 'Z') ch = '_';
///return "" + L + ch; // e.g. "6Z"
return "" + L; // e.g. "6Z"
return SHARDS[Lemma.unpackSize(word) + 1];
}
static int readIntAt(FileChannel ch, long pos) throws IOException {
ByteBuffer b = ByteBuffer.allocate(4);

View File

@@ -9,6 +9,7 @@ import lombok.experimental.Delegate;
import lombok.val;
import precomp.Neighbors9x8;
import precomp.Neighbors9x8.rci;
import java.util.List;
import java.util.Locale;
import static java.lang.Long.*;
import static java.lang.Long.numberOfTrailingZeros;
@@ -33,7 +34,6 @@ import static java.nio.charset.StandardCharsets.US_ASCII;
@SuppressWarnings("ALL")
public record SwedishGenerator() {
public static final long GT_1_OFFSET_53_BIT = 0x3E00000000000000L;
public static final long X = 0L;
public static final int LOG_EVERY_MS = 200;
public static final int BAR_LEN = 22;
@@ -55,13 +55,12 @@ public record SwedishGenerator() {
public static final byte DASH = (byte) C_DASH;
public static final long RANGE_0_SIZE = (long) SIZE_MIN_1 - 0L + 1L;
public static final long RANGE_0_624 = 624L - 0L + 1L;
public static final int CLUE_INDEX_MAX_SIZE = (288 | 3) + 1;
public static int clamp(int x, int a, int b) { return Math.max(a, Math.min(b, x)); }
interface Bit1029 {
static long[] bit1029() { return new long[2048]; }
private static int wordIndex(int bitIndex) { return bitIndex >> 6; }
static boolean get(long[] bits, int bitIndex) { return (bits[wordIndex(bitIndex)] & 1L << bitIndex) != 0L; }
static boolean get(long[] bits, int bitIndex) { return (bits[wordIndex(bitIndex)] & 1L << bitIndex) != X; }
static void set(long[] bits, int bitIndex) { bits[wordIndex(bitIndex)] |= 1L << bitIndex; }
static void clear(long[] bits, int bitIndex) { bits[wordIndex(bitIndex)] &= ~(1L << bitIndex); }
}
@@ -120,7 +119,9 @@ public record SwedishGenerator() {
}*/
return b;
}
public int randint(int max) { return (int) (((nextU32() & 0xFFFFFFFFL) % ((long) max - 0L + 1L))); }
public <T> T rand(T[] p) { return p[(int) (((nextU32() & 0xFFFFFFFFL) % ((long) p.length /*- 0L*/ /*+ 1L*/)))]; }
public <T> T rand(List<T> p) { return p.get((int) (((nextU32() & 0xFFFFFFFFL) % ((long) p.size() /*- 0L*/ /*+ 1L*/)))); }
public int randint(int max) { return (int) (((nextU32() & 0xFFFFFFFFL) % ((long) max /*- 0L*/ /*+ 1L*/))); }
public int randint0_SIZE() { return (int) (((nextU32() & 0xFFFFFFFFL) % RANGE_0_SIZE)); }
public int randint0_624() { return (int) (((nextU32() & 0xFFFFFFFFL) % RANGE_0_624)); }
public double nextFloat() { return (nextU32() & 0xFFFFFFFFL) / 4294967295.0; }
@@ -142,26 +143,34 @@ public record SwedishGenerator() {
static final long LETTER_MASK = (1L << 40) - 1; // low 40 bits
static final long INDEX_MASK = (1L << 24) - 1; // 24 bits
static long pack(String word) { return pack(word.getBytes(US_ASCII)); }
static long packW(byte[] b) { return pack(b) /*| ((long) index << 40)*/; }
static long pack(byte[] b) {
static long from(byte[] word) { return packShiftIn(word) | ((long) (word.length - 1) << 40); }
static long pack(long w, int shardIndex) { return w | (((long) shardIndex) << 43) | ((long) length0(w)) << 40; }
/* static long pack(byte[] b) {
long w = 0;
for (var i = 0; i < b.length; i++) w |= ((long) b[i] & 31) << (i * 5);
return w;
}*/
static long packShiftIn(byte[] b) {
long w = 0;
for (var i = 0; i < b.length; i++) w |= ((long) b[i] & 31) << (i * 5);
for (int i = b.length - 1; i >= 0; i--) w = (w << 5) | ((long) b[i] & 31);
return w;
}
static public long from(String word) { return packW(word.getBytes(US_ASCII)); }
static byte byteAt(long word, int idx) { return (byte) ((word >>> (idx * 5)) & 0b11111); }
static int length(long word) { return ((63 - numberOfLeadingZeros(word & LETTER_MASK)) / 5) + 1; }
static public long from(String word) { return packShiftIn(word.getBytes(US_ASCII)) | ((long) (word.length() - 1) << 40); }
static byte byteAt(long word, int idx) { return (byte) ((word >>> ((long) idx * 5L)) & 0b11111L); }
static int length0(long word) { return ((63 - numberOfLeadingZeros(word & LETTER_MASK)) / 5); }
static ThreadLocal<byte[]> BYTES = ThreadLocal.withInitial(() -> new byte[MAX_WORD_LENGTH]);
public static String asWord(long word) {
val len = Lemma.length(word);
var b = BYTES.get();//new byte[Lemma.length(word)];
for (int i = 0, bi = 0; i < len * 5; bi++, i += 5) b[bi] = (byte) (((word >>> i) & 31) | 64);
return new String(b, 0, 0, len);
var b = BYTES.get();
int bi = 0;
for (long w = word & LETTER_MASK; w != 0; w >>>= 5) {
b[bi++] = (byte) ((w & 31) | 64); // neem laagste 5 bits
}
return new String(b, 0, bi, US_ASCII);
}
static int unpackIndex(long w) { return (int) (w >>> 40); }
static int unpackShardIndex(long w) { return (int) (w >>> 43); }
static int unpackSize(long w) { return (int) (w >>> 40) & 7; }
static int unpackLetters(long w) { return (int) (w & LETTER_MASK); }
}

View File

@@ -33,6 +33,6 @@ public final class CsvIndexService {
if (Main.VERBOSE) System.err.println("Word too complex: " + line);
return;
}
ok.accept(Lemma.packW(word.getBytes(US_ASCII)));
ok.accept(Lemma.from(word.getBytes(US_ASCII)));
}
}

View File

@@ -46,8 +46,6 @@ public final class DictJavaGeneratorMulti {
try (var lines = Files.lines(wordsPath, StandardCharsets.UTF_8)) {
lines.forEach(line -> {
CsvIndexService.lineToLemma(line, w -> {
long len = Lemma.length0(w);
String word = Lemma.asWord(w);
String[] clues = CsvIndexService.lineToClue(line);
int simpel = CsvIndexService.lineToSimpel(line);
@@ -61,8 +59,7 @@ public final class DictJavaGeneratorMulti {
var key = Meta.shardKey(w);
ShardBuilder sb = builders.computeIfAbsent(key, k -> new ShardBuilder());
try {
long index = ((long) sb.addRecord(rec) << 3) | len;
map.add(w | (index << 40));
map.add(Lemma.pack(w, sb.addRecord(rec)));
} catch (IOException e) {
throw new UncheckedIOException(e);
}

View File

@@ -151,7 +151,7 @@ public class ExportFormatTest {
// These words are known to be in the CSV and likely in the dictionary
String[] testWords = { "EEN", "NAAR", "IEDEREEN" };
for (String wStr : testWords) {
long w = Lemma.pack(wStr);
long w = Lemma.from(wStr);
int L = wStr.length();
var entry = DictData.DICT.index()[L];
if (entry == null) continue;

View File

@@ -48,15 +48,6 @@ public class MainTest {
this.verbose = false;
}};
static final Dict dict = DictData.DICT;//loadDict(opts.wordsPath);
public static Dict loadDict(String wordsPath) {
var map = new LongArrayList(100_000);
try (var lines = Files.lines(Path.of(wordsPath), StandardCharsets.UTF_8)) {
lines.forEach(line -> CsvIndexService.lineToLemma(line, map::add));
return Dicts.makeDict(map.toArray());
} catch (IOException e) {
throw new RuntimeException("Failed to load dictionary from " + wordsPath, e);
}
}
@Test
void testExtractSlots() {

View File

@@ -187,7 +187,7 @@ public class SwedishGeneratorTest {
assertEquals(val1, rng2.nextU32());
for (var i = 0; i < 100; i++) {
var r = rng.randint(5);
var r = rng.randint(6);
assertTrue(r >= 0 && r <= 5);
var f = rng.nextFloat();
assertTrue(f >= 0.0 && f <= 1.0);
@@ -217,8 +217,8 @@ public class SwedishGeneratorTest {
@Test
void testLemmaAndDict() {
Assertions.assertEquals(Lemma.pack("APPLE".getBytes(StandardCharsets.US_ASCII)), Lemma.unpackLetters(l1));
assertEquals(5, Lemma.length(l1));
Assertions.assertEquals(Lemma.packShiftIn("APPLE".getBytes(StandardCharsets.US_ASCII)), Lemma.unpackLetters(l1));
assertEquals(4, Lemma.unpackSize(l1));
assertEquals(LETTER_A, Lemma.byteAt(l1, 0));
var dict = Dicts.makeDict(new long[]{ l1, l2, l2a, l4a, l6a, l7a, l8a });
@@ -228,7 +228,7 @@ public class SwedishGeneratorTest {
var entry3 = dict.index()[3];
assertEquals(1, entry3.words().length);
assertEquals(Lemma.pack("AXE".getBytes(StandardCharsets.US_ASCII)), Lemma.unpackLetters(entry3.words()[0]));
assertEquals(Lemma.packShiftIn("AXE".getBytes(StandardCharsets.US_ASCII)), Lemma.unpackLetters(entry3.words()[0]));
}
@Test