diff --git a/src/main/java/puzzle/SwedishGenerator.java b/src/main/java/puzzle/SwedishGenerator.java index 2fd57c2..9250cf4 100644 --- a/src/main/java/puzzle/SwedishGenerator.java +++ b/src/main/java/puzzle/SwedishGenerator.java @@ -131,8 +131,7 @@ public record SwedishGenerator(Rng rng) { long pattern; final IntList[] intListBuffer = new IntList[MAX_WORD_LENGTH]; final int[] undo = new int[2048]; - final int[] inter1 = new int[160000]; - final int[] inter2 = new int[160000]; + final long[] bitset = new long[2500]; void setPattern(long p) { this.pattern = p; } } @@ -249,7 +248,7 @@ public record SwedishGenerator(Rng rng) { static final record IntList(int[] data, int size) { } - static record DictEntry(Lemma[] words, IntList[][] pos) { } + static record DictEntry(Lemma[] words, IntList[][] pos, long[][] posBitsets) { } public static record Lemma(int index, long word, byte len) { @@ -298,12 +297,27 @@ public record SwedishGenerator(Rng rng) { } } for (int i = MIN_LEN; i < index.length; i++) if (index[i].words().size() <= 0) throw new RuntimeException("No words for length " + i); - this(Arrays.stream(index).map(i -> new DictEntry(i.words().toArray(Lemma[]::new), - Arrays.stream(i.pos()) - .map(ii -> Arrays.stream(ii).map(dto -> new IntList(dto.data(), dto.size())) - .toArray(IntList[]::new)) - .toArray(IntList[][]::new))) - .toArray(DictEntry[]::new), + this(Arrays.stream(index).map(i -> { + var words = i.words().toArray(Lemma[]::new); + var pos = Arrays.stream(i.pos()) + .map(ii -> Arrays.stream(ii).map(dto -> new IntList(dto.data(), dto.size())) + .toArray(IntList[]::new)) + .toArray(IntList[][]::new); + int numWords = words.length; + int numLongs = (numWords + 63) >>> 6; + var bitsets = new long[i.pos().length * 26][numLongs]; + for (int p = 0; p < i.pos().length; p++) { + for (int l = 0; l < 26; l++) { + var list = i.pos()[p][l]; + var bs = bitsets[p * 26 + l]; + for (int k = 0; k < list.size(); k++) { + int wordIdx = list.data()[k]; + bs[wordIdx >>> 6] |= (1L << (wordIdx & 63)); + } + } + } + return new DictEntry(words, pos, bitsets); + }).toArray(DictEntry[]::new), Arrays.stream(index).mapToInt(i -> i.words().size()).sum()); } static Dict loadDict(String wordsPath) { @@ -701,53 +715,73 @@ public record SwedishGenerator(Rng rng) { } static CandidateInfo candidateInfoForPattern(Context ctx, DictEntry entry, int lenb) { - var pattern = ctx.pattern; - var listBuffer = ctx.intListBuffer; - var listCount = 0; - IntList tmp; + var pattern = ctx.pattern; if (pattern == X) { return new CandidateInfo(null, entry.words.length); } - /*if (usedCharsInPattern(pattern) > len) { - var abc = usedCharsInPattern(pattern); - System.out.println(abc); - }*/ + + int numLongs = (entry.words.length + 63) >>> 6; + long[] res = ctx.bitset; + boolean first = true; + for (int i = 0, len = usedCharsInPattern(pattern); i < len; i++) { int val = (int) ((pattern >>> (i * 5)) & 31); if (val != 0) { - listBuffer[listCount++] = entry.pos[i][val - 1]; - } - } - - // Sort constraints by size to optimize intersection - for (var i = 0; i < listCount - 1; i++) { - for (var j = i + 1; j < listCount; j++) { - if (listBuffer[j].size() < listBuffer[i].size()) { - tmp = listBuffer[i]; - listBuffer[i] = listBuffer[j]; - listBuffer[j] = tmp; + long[] bs = entry.posBitsets[i * 26 + (val - 1)]; + if (first) { + System.arraycopy(bs, 0, res, 0, numLongs); + first = false; + } else { + for (int k = 0; k < numLongs; k++) res[k] &= bs[k]; } } } - var cur = listBuffer[0].data(); - var curLen = listBuffer[0].size(); - if (listCount == 1) return new CandidateInfo(cur, curLen); + int count = 0; + for (int k = 0; k < numLongs; k++) count += Long.bitCount(res[k]); - val b1 = ctx.inter1; - val b2 = ctx.inter2; - var in = cur; - var out = b1; + if (count == 0) return new CandidateInfo(null, 0); - for (var k = 1; k < listCount; k++) { - tmp = listBuffer[k]; - curLen = intersectSorted(in, curLen, tmp.data(), tmp.size(), out); - in = out; - out = (out == b1) ? b2 : b1; - if (curLen == 0) break; + int[] indices = new int[count]; + int ki = 0; + for (int k = 0; k < numLongs; k++) { + long w = res[k]; + while (w != 0) { + int t = Long.numberOfTrailingZeros(w); + indices[ki++] = (k << 6) | t; + w &= w - 1; + } } - return new CandidateInfo(in, curLen); + return new CandidateInfo(indices, count); + } + + static int candidateCountForPattern(Context ctx, DictEntry entry) { + long pattern = ctx.pattern; + if (pattern == X) return entry.words.length; + + int numLongs = (entry.words.length + 63) >>> 6; + long[] res = ctx.bitset; + boolean first = true; + + for (int i = 0, len = usedCharsInPattern(pattern); i < len; i++) { + int val = (int) ((pattern >>> (i * 5)) & 31); + if (val != 0) { + long[] bs = entry.posBitsets[i * 26 + (val - 1)]; + if (first) { + System.arraycopy(bs, 0, res, 0, numLongs); + first = false; + } else { + for (int k = 0; k < numLongs; k++) res[k] &= bs[k]; + } + } + } + + if (first) return entry.words.length; // should not happen if pattern != X + + int count = 0; + for (int k = 0; k < numLongs; k++) count += Long.bitCount(res[k]); + return count; } public FillResult fillMask(Grid mask, DictEntry[] dictIndex, int timeLimitMs) { @@ -805,24 +839,25 @@ public record SwedishGenerator(Rng rng) { var entry = dictIndex[s.len()]; if (entry == null) return PICK_NOT_DONE; ctx.pattern = patternForSlot(grid, s); - var info = candidateInfoForPattern(ctx, entry, s.len()); + int count = candidateCountForPattern(ctx, entry); - if (info.count == 0) return PICK_NOT_DONE; + if (count == 0) return PICK_NOT_DONE; if (best == null - || info.count < bestInfo.count - || (info.count == bestInfo.count && slotScores[i] > bestScore)) { + || count < bestInfo.count + || (count == bestInfo.count && slotScores[i] > bestScore)) { best = s; bestScore = slotScores[i]; - if (info.indices != null && (info.indices == ctx.inter1 || info.indices == ctx.inter2)) { - bestInfo = new CandidateInfo(Arrays.copyOf(info.indices, info.count), info.count); - } else { - bestInfo = info; - } - if (info.count <= 1) break; + bestInfo = new CandidateInfo(null, count); + if (count <= 1) break; } } if (best == null) return PICK_DONE; + + // Re-calculate for the best slot to get actual indices + ctx.pattern = patternForSlot(grid, best); + bestInfo = candidateInfoForPattern(ctx, dictIndex[best.len()], best.len()); + return new Pick(best, bestInfo, false); } boolean backtrack(int depth) {