From 938d2ac66b3e39f0175c05ddae1625683fec3c54 Mon Sep 17 00:00:00 2001 From: mike Date: Sat, 17 Jan 2026 20:24:45 +0100 Subject: [PATCH] introduce bitloops --- src/main/java/puzzle/Export.java | 14 ++++--- src/main/java/puzzle/Masker.java | 4 +- src/main/java/puzzle/Meta.java | 14 ++----- src/main/java/puzzle/SwedishGenerator.java | 39 ++++++++++++------- src/test/java/puzzle/CsvIndexService.java | 2 +- .../java/puzzle/DictJavaGeneratorMulti.java | 7 +--- src/test/java/puzzle/ExportFormatTest.java | 2 +- src/test/java/puzzle/MainTest.java | 9 ----- .../java/puzzle/SwedishGeneratorTest.java | 8 ++-- 9 files changed, 46 insertions(+), 53 deletions(-) diff --git a/src/main/java/puzzle/Export.java b/src/main/java/puzzle/Export.java index 62f55e9..44690ef 100644 --- a/src/main/java/puzzle/Export.java +++ b/src/main/java/puzzle/Export.java @@ -300,12 +300,16 @@ public record Export() { var index = new DictEntryDTO[SwedishGenerator.MAX_WORD_LENGTH_PLUS_ONE]; Arrays.setAll(index, DictEntryDTO::new); for (var lemma : wordz) { - var L = Lemma.length(lemma); - - var entry = index[L]; - var idx = entry.words().size(); + var L = Lemma.unpackSize(lemma) + 1;//Lemma.unpackSize(lemma) + 2; + val entry = index[L]; + val idx = entry.words().size(); + val pos = entry.pos(); entry.words().add(lemma); - for (var i = 0; i < L; i++) entry.pos()[i][Lemma.byteAt(lemma, i) - 1].add(idx); + int i = 0; + for (long w = lemma & Lemma.LETTER_MASK; w != 0; w >>>= 5, i++) { + pos[i][(int) ((w & 31) - 1)].add(idx); + } + // for (i = 0; i < L; i++) entry.pos()[i][Lemma.byteAt(lemma, i) - 1].add(idx); } for (int i = SwedishGenerator.MIN_LEN; i < index.length; i++) if (index[i].words().size() <= 0) throw new RuntimeException("No words for length " + i); return new Dict(Arrays.stream(index).map(i -> { diff --git a/src/main/java/puzzle/Masker.java b/src/main/java/puzzle/Masker.java index 2a10589..ec94766 100644 --- a/src/main/java/puzzle/Masker.java +++ b/src/main/java/puzzle/Masker.java @@ -378,8 +378,8 @@ public record Masker(Rng rng, int[] stack, Clues cache) { for (var k = 0; k < offspring; k++) { if (Thread.currentThread().isInterrupted()) break; - var p1 = pop.get(rng.randint(pop.size() - 1)); - var p2 = pop.get(rng.randint(pop.size() - 1)); + var p1 = rng.rand(pop); + var p2 = rng.rand(pop); var child = crossover(p1.grid, p2.grid); children.add(new GridAndFit(hillclimb(child, clueSize, 70))); } diff --git a/src/main/java/puzzle/Meta.java b/src/main/java/puzzle/Meta.java index 1f29857..1323990 100644 --- a/src/main/java/puzzle/Meta.java +++ b/src/main/java/puzzle/Meta.java @@ -43,24 +43,16 @@ public class Meta { buf.flip(); var string = StandardCharsets.UTF_8.decode(buf).toString(); val parts = string.split("\t", 3); - return new ShardLem(Lemma.pack(parts[0]), Integer.parseInt(parts[1]), GSON.fromJson(parts[2], String[].class)); + return new ShardLem(Lemma.from(parts[0]), Integer.parseInt(parts[1]), GSON.fromJson(parts[2], String[].class)); } catch (Exception e) { e.printStackTrace(); - return new ShardLem(Lemma.pack("XXX"), -1, new String[0]); + return new ShardLem(Lemma.from("XXX"), -1, new String[0]); } } static final Path[] SHARDS = IntStream.range(0, 10).mapToObj(sId -> Path.of("src/main/generated-sources/puzzle").resolve(sId + ".idx")).toArray( Path[]::new); static Path shardKey(long word) { - int L = Lemma.length(word); - return SHARDS[L]; - } - static String shardKey(String word) { - int L = word.length(); - char ch = word.charAt(0); - if (ch < 'A' || ch > 'Z') ch = '_'; - ///return "" + L + ch; // e.g. "6Z" - return "" + L; // e.g. "6Z" + return SHARDS[Lemma.unpackSize(word) + 1]; } static int readIntAt(FileChannel ch, long pos) throws IOException { ByteBuffer b = ByteBuffer.allocate(4); diff --git a/src/main/java/puzzle/SwedishGenerator.java b/src/main/java/puzzle/SwedishGenerator.java index 89d1c42..8a5994b 100644 --- a/src/main/java/puzzle/SwedishGenerator.java +++ b/src/main/java/puzzle/SwedishGenerator.java @@ -9,6 +9,7 @@ import lombok.experimental.Delegate; import lombok.val; import precomp.Neighbors9x8; import precomp.Neighbors9x8.rci; +import java.util.List; import java.util.Locale; import static java.lang.Long.*; import static java.lang.Long.numberOfTrailingZeros; @@ -33,7 +34,6 @@ import static java.nio.charset.StandardCharsets.US_ASCII; @SuppressWarnings("ALL") public record SwedishGenerator() { - public static final long GT_1_OFFSET_53_BIT = 0x3E00000000000000L; public static final long X = 0L; public static final int LOG_EVERY_MS = 200; public static final int BAR_LEN = 22; @@ -55,13 +55,12 @@ public record SwedishGenerator() { public static final byte DASH = (byte) C_DASH; public static final long RANGE_0_SIZE = (long) SIZE_MIN_1 - 0L + 1L; public static final long RANGE_0_624 = 624L - 0L + 1L; - public static final int CLUE_INDEX_MAX_SIZE = (288 | 3) + 1; public static int clamp(int x, int a, int b) { return Math.max(a, Math.min(b, x)); } interface Bit1029 { static long[] bit1029() { return new long[2048]; } private static int wordIndex(int bitIndex) { return bitIndex >> 6; } - static boolean get(long[] bits, int bitIndex) { return (bits[wordIndex(bitIndex)] & 1L << bitIndex) != 0L; } + static boolean get(long[] bits, int bitIndex) { return (bits[wordIndex(bitIndex)] & 1L << bitIndex) != X; } static void set(long[] bits, int bitIndex) { bits[wordIndex(bitIndex)] |= 1L << bitIndex; } static void clear(long[] bits, int bitIndex) { bits[wordIndex(bitIndex)] &= ~(1L << bitIndex); } } @@ -120,7 +119,9 @@ public record SwedishGenerator() { }*/ return b; } - public int randint(int max) { return (int) (((nextU32() & 0xFFFFFFFFL) % ((long) max - 0L + 1L))); } + public T rand(T[] p) { return p[(int) (((nextU32() & 0xFFFFFFFFL) % ((long) p.length /*- 0L*/ /*+ 1L*/)))]; } + public T rand(List p) { return p.get((int) (((nextU32() & 0xFFFFFFFFL) % ((long) p.size() /*- 0L*/ /*+ 1L*/)))); } + public int randint(int max) { return (int) (((nextU32() & 0xFFFFFFFFL) % ((long) max /*- 0L*/ /*+ 1L*/))); } public int randint0_SIZE() { return (int) (((nextU32() & 0xFFFFFFFFL) % RANGE_0_SIZE)); } public int randint0_624() { return (int) (((nextU32() & 0xFFFFFFFFL) % RANGE_0_624)); } public double nextFloat() { return (nextU32() & 0xFFFFFFFFL) / 4294967295.0; } @@ -142,26 +143,34 @@ public record SwedishGenerator() { static final long LETTER_MASK = (1L << 40) - 1; // low 40 bits static final long INDEX_MASK = (1L << 24) - 1; // 24 bits - static long pack(String word) { return pack(word.getBytes(US_ASCII)); } - static long packW(byte[] b) { return pack(b) /*| ((long) index << 40)*/; } - static long pack(byte[] b) { + static long from(byte[] word) { return packShiftIn(word) | ((long) (word.length - 1) << 40); } + static long pack(long w, int shardIndex) { return w | (((long) shardIndex) << 43) | ((long) length0(w)) << 40; } + /* static long pack(byte[] b) { + long w = 0; + for (var i = 0; i < b.length; i++) w |= ((long) b[i] & 31) << (i * 5); + return w; + }*/ + static long packShiftIn(byte[] b) { long w = 0; - for (var i = 0; i < b.length; i++) w |= ((long) b[i] & 31) << (i * 5); + for (int i = b.length - 1; i >= 0; i--) w = (w << 5) | ((long) b[i] & 31); return w; } - static public long from(String word) { return packW(word.getBytes(US_ASCII)); } - static byte byteAt(long word, int idx) { return (byte) ((word >>> (idx * 5)) & 0b11111); } - static int length(long word) { return ((63 - numberOfLeadingZeros(word & LETTER_MASK)) / 5) + 1; } + static public long from(String word) { return packShiftIn(word.getBytes(US_ASCII)) | ((long) (word.length() - 1) << 40); } + static byte byteAt(long word, int idx) { return (byte) ((word >>> ((long) idx * 5L)) & 0b11111L); } static int length0(long word) { return ((63 - numberOfLeadingZeros(word & LETTER_MASK)) / 5); } static ThreadLocal BYTES = ThreadLocal.withInitial(() -> new byte[MAX_WORD_LENGTH]); public static String asWord(long word) { - val len = Lemma.length(word); - var b = BYTES.get();//new byte[Lemma.length(word)]; - for (int i = 0, bi = 0; i < len * 5; bi++, i += 5) b[bi] = (byte) (((word >>> i) & 31) | 64); - return new String(b, 0, 0, len); + var b = BYTES.get(); + int bi = 0; + + for (long w = word & LETTER_MASK; w != 0; w >>>= 5) { + b[bi++] = (byte) ((w & 31) | 64); // neem laagste 5 bits + } + return new String(b, 0, bi, US_ASCII); } static int unpackIndex(long w) { return (int) (w >>> 40); } static int unpackShardIndex(long w) { return (int) (w >>> 43); } + static int unpackSize(long w) { return (int) (w >>> 40) & 7; } static int unpackLetters(long w) { return (int) (w & LETTER_MASK); } } diff --git a/src/test/java/puzzle/CsvIndexService.java b/src/test/java/puzzle/CsvIndexService.java index 7f1978f..29dc059 100644 --- a/src/test/java/puzzle/CsvIndexService.java +++ b/src/test/java/puzzle/CsvIndexService.java @@ -33,6 +33,6 @@ public final class CsvIndexService { if (Main.VERBOSE) System.err.println("Word too complex: " + line); return; } - ok.accept(Lemma.packW(word.getBytes(US_ASCII))); + ok.accept(Lemma.from(word.getBytes(US_ASCII))); } } diff --git a/src/test/java/puzzle/DictJavaGeneratorMulti.java b/src/test/java/puzzle/DictJavaGeneratorMulti.java index 8683b41..00da990 100644 --- a/src/test/java/puzzle/DictJavaGeneratorMulti.java +++ b/src/test/java/puzzle/DictJavaGeneratorMulti.java @@ -46,8 +46,6 @@ public final class DictJavaGeneratorMulti { try (var lines = Files.lines(wordsPath, StandardCharsets.UTF_8)) { lines.forEach(line -> { CsvIndexService.lineToLemma(line, w -> { - long len = Lemma.length0(w); - String word = Lemma.asWord(w); String[] clues = CsvIndexService.lineToClue(line); int simpel = CsvIndexService.lineToSimpel(line); @@ -61,8 +59,7 @@ public final class DictJavaGeneratorMulti { var key = Meta.shardKey(w); ShardBuilder sb = builders.computeIfAbsent(key, k -> new ShardBuilder()); try { - long index = ((long) sb.addRecord(rec) << 3) | len; - map.add(w | (index << 40)); + map.add(Lemma.pack(w, sb.addRecord(rec))); } catch (IOException e) { throw new UncheckedIOException(e); } @@ -165,7 +162,7 @@ public final class DictJavaGeneratorMulti { w.write("package " + pkg + ";\n\n"); w.write("public final class " + prefix + ci + " {\n"); w.write(" private " + prefix + ci + "() {}\n"); - + w.write(" public static long[] get() {\n"); w.write(" return new long[] { \n"); for (int i = from; i < to; i++) { diff --git a/src/test/java/puzzle/ExportFormatTest.java b/src/test/java/puzzle/ExportFormatTest.java index 91faff8..45b6410 100644 --- a/src/test/java/puzzle/ExportFormatTest.java +++ b/src/test/java/puzzle/ExportFormatTest.java @@ -151,7 +151,7 @@ public class ExportFormatTest { // These words are known to be in the CSV and likely in the dictionary String[] testWords = { "EEN", "NAAR", "IEDEREEN" }; for (String wStr : testWords) { - long w = Lemma.pack(wStr); + long w = Lemma.from(wStr); int L = wStr.length(); var entry = DictData.DICT.index()[L]; if (entry == null) continue; diff --git a/src/test/java/puzzle/MainTest.java b/src/test/java/puzzle/MainTest.java index 0a08ac3..d8f2c2f 100644 --- a/src/test/java/puzzle/MainTest.java +++ b/src/test/java/puzzle/MainTest.java @@ -48,15 +48,6 @@ public class MainTest { this.verbose = false; }}; static final Dict dict = DictData.DICT;//loadDict(opts.wordsPath); - public static Dict loadDict(String wordsPath) { - var map = new LongArrayList(100_000); - try (var lines = Files.lines(Path.of(wordsPath), StandardCharsets.UTF_8)) { - lines.forEach(line -> CsvIndexService.lineToLemma(line, map::add)); - return Dicts.makeDict(map.toArray()); - } catch (IOException e) { - throw new RuntimeException("Failed to load dictionary from " + wordsPath, e); - } - } @Test void testExtractSlots() { diff --git a/src/test/java/puzzle/SwedishGeneratorTest.java b/src/test/java/puzzle/SwedishGeneratorTest.java index 38d0f4a..2855e0c 100644 --- a/src/test/java/puzzle/SwedishGeneratorTest.java +++ b/src/test/java/puzzle/SwedishGeneratorTest.java @@ -187,7 +187,7 @@ public class SwedishGeneratorTest { assertEquals(val1, rng2.nextU32()); for (var i = 0; i < 100; i++) { - var r = rng.randint(5); + var r = rng.randint(6); assertTrue(r >= 0 && r <= 5); var f = rng.nextFloat(); assertTrue(f >= 0.0 && f <= 1.0); @@ -217,8 +217,8 @@ public class SwedishGeneratorTest { @Test void testLemmaAndDict() { - Assertions.assertEquals(Lemma.pack("APPLE".getBytes(StandardCharsets.US_ASCII)), Lemma.unpackLetters(l1)); - assertEquals(5, Lemma.length(l1)); + Assertions.assertEquals(Lemma.packShiftIn("APPLE".getBytes(StandardCharsets.US_ASCII)), Lemma.unpackLetters(l1)); + assertEquals(4, Lemma.unpackSize(l1)); assertEquals(LETTER_A, Lemma.byteAt(l1, 0)); var dict = Dicts.makeDict(new long[]{ l1, l2, l2a, l4a, l6a, l7a, l8a }); @@ -228,7 +228,7 @@ public class SwedishGeneratorTest { var entry3 = dict.index()[3]; assertEquals(1, entry3.words().length); - assertEquals(Lemma.pack("AXE".getBytes(StandardCharsets.US_ASCII)), Lemma.unpackLetters(entry3.words()[0])); + assertEquals(Lemma.packShiftIn("AXE".getBytes(StandardCharsets.US_ASCII)), Lemma.unpackLetters(entry3.words()[0])); } @Test