introduce bitloops

This commit is contained in:
mike
2026-01-12 05:47:12 +01:00
parent 6e600cfae2
commit 1df6d6266d
4 changed files with 32 additions and 51 deletions

View File

@@ -215,7 +215,7 @@ public record Export() {
}
}
static record DictEntryDTO(ArrayList<Lemma> words, IntListDTO[][] pos) {
record DictEntryDTO(ArrayList<Lemma> words, IntListDTO[][] pos) {
public DictEntryDTO(int L) {
this(new ArrayList<>(), new IntListDTO[L][26]);

View File

@@ -16,12 +16,10 @@ import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.BitSet;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Locale;
import java.util.Map;
import java.util.stream.IntStream;
import static java.nio.charset.StandardCharsets.*;
/**
@@ -129,9 +127,8 @@ public record SwedishGenerator(Rng rng) {
final int[] stack = new int[SIZE];
final Bit seen = new Bit();
long pattern;
final IntList[] intListBuffer = new IntList[MAX_WORD_LENGTH];
final int[] undo = new int[2048];
final long[] bitset = new long[2500];
final int[] undo = new int[2048];
final long[] bitset = new long[2500];
void setPattern(long p) { this.pattern = p; }
}
@@ -246,11 +243,9 @@ public record SwedishGenerator(Rng rng) {
}
}
static final record IntList(int[] data, int size) { }
static record DictEntry(Lemma[] words, long[][] posBitsets) { }
static record DictEntry(Lemma[] words, IntList[][] pos, long[][] posBitsets) { }
public static record Lemma(int index, long word, byte len) {
public static record Lemma(int index, long word) {
static int LEMMA_COUNTER = 0;
static long pack(String word) {
@@ -261,7 +256,9 @@ public record SwedishGenerator(Rng rng) {
for (var i = 0; i < b.length; i++) w |= ((long) b[i] & ~64) << (i * 5);
return w;
}
public Lemma(int index, String word) { this(index, pack(word.getBytes(US_ASCII)), (byte) word.length()); }
public Lemma(int index, String word) {
this(index, pack(word.getBytes(US_ASCII)));
}
public Lemma(String word) { this(LEMMA_COUNTER++, word); }
byte byteAt(int idx) { return (byte) ((word >>> (idx * 5)) & 0b11111 | B64); }// word[]; }
int intAt(int idx) { return (int) (((word >>> (idx * 5))) & 0b11111); }// word[]; }
@@ -269,9 +266,14 @@ public record SwedishGenerator(Rng rng) {
@Override public boolean equals(Object o) { return (o == this) || (o instanceof Lemma l && l.index == index); }
String[] clue() { return CsvIndexService.clues(index); }
int simpel() { return CsvIndexService.simpel(index); }
int length() {
if (word == 0) return 0;
int highestBit = 63 - Long.numberOfLeadingZeros(word & 0xffffffffffffffffL);
return (highestBit / 5) + 1;
}
public String asWord() {
var b = new byte[len];
for (var i = 0; i < len; i++) b[i] = (byte) ((word >>> (i * 5)) & 0b11111 | B64);
var b = new byte[length()];
for (var i = 0; i < length(); i++) b[i] = (byte) ((word >>> (i * 5)) & 0b11111 | B64);
return new String(b, US_ASCII);
}
}
@@ -284,7 +286,7 @@ public record SwedishGenerator(Rng rng) {
var index = new DictEntryDTO[MAX_WORD_LENGTH_PLUS_ONE];
Arrays.setAll(index, i -> new DictEntryDTO(i));
for (var lemma : wordz) {
var L = lemma.len;
var L = lemma.length();
var entry = index[L];
var idx = entry.words().size();
@@ -299,10 +301,6 @@ public record SwedishGenerator(Rng rng) {
for (int i = MIN_LEN; i < index.length; i++) if (index[i].words().size() <= 0) throw new RuntimeException("No words for length " + i);
this(Arrays.stream(index).map(i -> {
var words = i.words().toArray(Lemma[]::new);
var pos = Arrays.stream(i.pos())
.map(ii -> Arrays.stream(ii).map(dto -> new IntList(dto.data(), dto.size()))
.toArray(IntList[]::new))
.toArray(IntList[][]::new);
int numWords = words.length;
int numLongs = (numWords + 63) >>> 6;
var bitsets = new long[i.pos().length * 26][numLongs];
@@ -316,7 +314,7 @@ public record SwedishGenerator(Rng rng) {
}
}
}
return new DictEntry(words, pos, bitsets);
return new DictEntry(words, bitsets);
}).toArray(DictEntry[]::new),
Arrays.stream(index).mapToInt(i -> i.words().size()).sum());
}
@@ -402,7 +400,6 @@ public record SwedishGenerator(Rng rng) {
return slots;
}
long maskFitness(Grid grid) {
var ctx = CTX.get();
var covH = ctx.covH2;
@@ -499,25 +496,6 @@ public record SwedishGenerator(Rng rng) {
if (size >= 2) penalty += (size - 1L) * 120L;
}
}
/*for (int i = 0; i < 65; i += 64) {
for (long bits = (i == 0 ? lo_cl : hi_cl); bits != X; bits &= bits - 1) {
int clueIdx = i + Long.numberOfTrailingZeros(bits);
if (seen.get(clueIdx)) continue;
int size = 0;
stack[0] = clueIdx;
seen.set(clueIdx);
for (int sp = 1; sp > 0; size++) {
long packed = NBR8_PACKED[stack[--sp]];
for (int k = 0, n = (int) (packed >>> 56) * 7; k < n; k += 7) {
int nidx = (int) ((packed >>> k) & 0x7F);
if (seen.get(nidx) || grid.notClue(nidx)) continue;
seen.set(nidx);
stack[sp++] = nidx;
}
}
if (size >= 2) penalty += ((size - 1L) * 120L);
}
}*/
for (int i = 0; i < 65; i += 64) {
long bits = (i == 0 ? ~lo_cl : (~hi_cl & 0xFFL));
@@ -720,9 +698,9 @@ public record SwedishGenerator(Rng rng) {
return new CandidateInfo(null, entry.words.length);
}
int numLongs = (entry.words.length + 63) >>> 6;
long[] res = ctx.bitset;
boolean first = true;
int numLongs = (entry.words.length + 63) >>> 6;
long[] res = ctx.bitset;
boolean first = true;
for (int i = 0, len = usedCharsInPattern(pattern); i < len; i++) {
int val = (int) ((pattern >>> (i * 5)) & 31);
@@ -760,9 +738,9 @@ public record SwedishGenerator(Rng rng) {
long pattern = ctx.pattern;
if (pattern == X) return entry.words.length;
int numLongs = (entry.words.length + 63) >>> 6;
long[] res = ctx.bitset;
boolean first = true;
int numLongs = (entry.words.length + 63) >>> 6;
long[] res = ctx.bitset;
boolean first = true;
for (int i = 0, len = usedCharsInPattern(pattern); i < len; i++) {
int val = (int) ((pattern >>> (i * 5)) & 31);
@@ -783,13 +761,16 @@ public record SwedishGenerator(Rng rng) {
for (int k = 0; k < numLongs; k++) count += Long.bitCount(res[k]);
return count;
}
//72 << 3;
static final int BIGG = 581 + 1;
public FillResult fillMask(Grid mask, DictEntry[] dictIndex,
int timeLimitMs) {
val multiThreaded = Thread.currentThread().getName().contains("pool");
val grid = mask.deepCopyGrid();
val used = new Bit1029();
// val assigned = new HashMap<Integer, Lemma>();
Lemma[] assigned = new Lemma[1024];
Lemma[] assigned = new Lemma[BIGG];
val ctx = CTX.get();
val count = ctx.cellCount;
Arrays.fill(count, 0, SIZE, 0);
@@ -856,7 +837,7 @@ public record SwedishGenerator(Rng rng) {
// Re-calculate for the best slot to get actual indices
ctx.pattern = patternForSlot(grid, best);
bestInfo = candidateInfoForPattern(ctx, dictIndex[best.len()], best.len());
bestInfo = candidateInfoForPattern(ctx, dictIndex[best.len()], best.len());
return new Pick(best, bestInfo, false);
}

View File

@@ -167,7 +167,7 @@ public class MainTest {
// Regression baseline for seed search starting at 12347, pop 4, gens 20
Assertions.assertEquals(12348, foundSeed, "Found seed changed");
Assertions.assertEquals(18, res.filled().clueMap().size(), "Number of assigned words changed");
Assertions.assertEquals(Lemma.pack("VERPATS"), res.filled().clueMap().get(74).word());
Assertions.assertEquals("RIJTUIG", res.filled().clueMap().get(74).asWord());
Assertions.assertEquals(301794542151533187L, res.filled().grid().grid().lo);
Assertions.assertEquals(193L, res.filled().grid().grid().hi);
}

View File

@@ -108,7 +108,7 @@ public class SwedishGeneratorTest {
var l1 = new Lemma("APPLE");
Assertions.assertEquals(Lemma.pack("APPLE".getBytes(StandardCharsets.US_ASCII)), l1.word());
assertEquals(5, l1.len());
assertEquals(5, l1.length());
assertEquals((byte) 'A', l1.byteAt(0));
assertEquals(1, l1.intAt(0));
@@ -124,9 +124,9 @@ public class SwedishGeneratorTest {
// Check pos indexing
// AXE: A at 0, X at 1, E at 2
assertTrue(entry3.pos()[0][0].size() > 0);
/* assertTrue(entry3.pos()[0][0].size() > 0);
assertTrue(entry3.pos()[1]['X' - 'A'].size() > 0);
assertTrue(entry3.pos()[2]['E' - 'A'].size() > 0);
assertTrue(entry3.pos()[2]['E' - 'A'].size() > 0);*/
}
@Test