Gather data
This commit is contained in:
931
src/main/java/puzzle/SwedishGenerator.java
Normal file
931
src/main/java/puzzle/SwedishGenerator.java
Normal file
@@ -0,0 +1,931 @@
|
||||
package puzzle;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.*;
|
||||
import java.util.concurrent.*;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
/**
|
||||
* SwedishGenerator.java
|
||||
*
|
||||
* Usage:
|
||||
* javac SwedishGenerator.java
|
||||
* java SwedishGenerator [--seed N] [--pop N] [--gens N] [--tries N] [--words word-list.txt]
|
||||
*/
|
||||
@SuppressWarnings("ALL")
|
||||
public class SwedishGenerator {
|
||||
|
||||
static final int W = 9, H = 8,
|
||||
CLUE_SIZE = 4,
|
||||
SIMPLICITY_DEFAULT_SCORE = 2;
|
||||
static final int MIN_LEN = 2, MAX_LEN = 8;
|
||||
// Directions for '1'..'6'
|
||||
static final int[][] OFFSETS = new int[7][2];
|
||||
static final int[][] STEPS = new int[7][2];
|
||||
static {
|
||||
// 1: up
|
||||
OFFSETS[1] = new int[]{ -1, 0 };
|
||||
STEPS[1] = new int[]{ -1, 0 };
|
||||
// 2: right
|
||||
OFFSETS[2] = new int[]{ 0, 1 };
|
||||
STEPS[2] = new int[]{ 0, 1 };
|
||||
// 3: down
|
||||
OFFSETS[3] = new int[]{ 1, 0 };
|
||||
STEPS[3] = new int[]{ 1, 0 };
|
||||
// 4: left
|
||||
OFFSETS[4] = new int[]{ 0, -1 };
|
||||
STEPS[4] = new int[]{ 0, -1 };
|
||||
// 5: vertical down, clue is on the right of the first letter
|
||||
OFFSETS[5] = new int[]{ 0, -1 };
|
||||
STEPS[5] = new int[]{ 1, 0 };
|
||||
// 6: vertical down, clue is on the left of the first letter
|
||||
OFFSETS[6] = new int[]{ 0, 1 };
|
||||
STEPS[6] = new int[]{ 1, 0 };
|
||||
}
|
||||
static final char FIRST_ABC = 'A';
|
||||
static final char LAST_ABC = 'Z';
|
||||
static final char FIRST_ARROW = '1', LAST_ARROW = '6', HOR_ARROW_1 = '2', HOR_ARROW_2 = '4';
|
||||
static boolean isDigit(char ch) { return ch >= FIRST_ARROW && ch <= LAST_ARROW; }
|
||||
static boolean isLetter(char ch) { return ch >= FIRST_ABC && ch <= LAST_ABC; }
|
||||
static boolean isLetterCell(char ch) { return ch == '#' || isLetter(ch); }
|
||||
|
||||
// ---------------- RNG (xorshift32) ----------------
|
||||
|
||||
static final class Rng {
|
||||
|
||||
private int x;
|
||||
Rng(int seed) {
|
||||
var s = seed;
|
||||
if (s == 0) s = 1;
|
||||
this.x = s;
|
||||
}
|
||||
int nextU32() {
|
||||
var y = x;
|
||||
y ^= (y << 13);
|
||||
y ^= (y >>> 17);
|
||||
y ^= (y << 5);
|
||||
x = y;
|
||||
return y;
|
||||
}
|
||||
int randint(int min, int max) { // inclusive
|
||||
var u = (nextU32() & 0xFFFFFFFFL);
|
||||
var range = (long) max - (long) min + 1L;
|
||||
return (int) (min + (u % range));
|
||||
}
|
||||
double nextFloat() { return (nextU32() & 0xFFFFFFFFL) / 4294967295.0; }
|
||||
}
|
||||
|
||||
static int clamp(int x, int a, int b) { return Math.max(a, Math.min(b, x)); }
|
||||
|
||||
// ---------------- Grid helpers ----------------
|
||||
static char[][] makeEmptyGrid() {
|
||||
var g = new char[H][W];
|
||||
for (var r = 0; r < H; r++) Arrays.fill(g[r], '#');
|
||||
return g;
|
||||
}
|
||||
|
||||
static char[][] deepCopyGrid(char[][] g) {
|
||||
var out = new char[H][W];
|
||||
for (var r = 0; r < H; r++) out[r] = Arrays.copyOf(g[r], W);
|
||||
return out;
|
||||
}
|
||||
|
||||
static String gridToString(char[][] g) {
|
||||
var sb = new StringBuilder();
|
||||
for (var r = 0; r < H; r++) {
|
||||
if (r > 0) sb.append('\n');
|
||||
sb.append(g[r]);
|
||||
}
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
static String renderHuman(char[][] g) {
|
||||
var sb = new StringBuilder();
|
||||
for (var r = 0; r < H; r++) {
|
||||
if (r > 0) sb.append('\n');
|
||||
for (var c = 0; c < W; c++) {
|
||||
var ch = g[r][c];
|
||||
sb.append(isDigit(ch) ? ' ' : ch);
|
||||
}
|
||||
}
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
// ---------------- Words / index ----------------
|
||||
|
||||
static final class IntList {
|
||||
|
||||
int[] a = new int[8];
|
||||
int n = 0;
|
||||
void add(int v) {
|
||||
if (n >= a.length) a = Arrays.copyOf(a, a.length * 2);
|
||||
a[n++] = v;
|
||||
}
|
||||
void replaceAll(int[] newData) {
|
||||
this.a = newData;
|
||||
this.n = newData.length;
|
||||
}
|
||||
int size() { return n; }
|
||||
int[] data() { return a; } // note: may have extra capacity
|
||||
}
|
||||
|
||||
static final class DictEntry {
|
||||
|
||||
final ArrayList<String> words = new ArrayList<>();
|
||||
final IntList[][] pos; // pos[i][letter] -> indices (sorted by insertion)
|
||||
DictEntry(int L) {
|
||||
pos = new IntList[L][26];
|
||||
for (var i = 0; i < L; i++) {
|
||||
for (var j = 0; j < 26; j++) pos[i][j] = new IntList();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static record WordDifficulty(String word, int difficulty, int simpel, int score, int cross) {
|
||||
|
||||
public WordDifficulty(String word, int simpel, int score) {
|
||||
var difficulty1 = 0 + ((8 - word.length()) * 30) + ((10 - score) * 15);
|
||||
var crossScore = ThemePoolBuilderLength.crossabilityScore(word);
|
||||
this(word, difficulty1, simpel, score, (crossScore * 7) + ((score) * 30) + ((word.length()) * 15));
|
||||
|
||||
// Prioritize simple words (high lScore) and long words.
|
||||
// lScore (1-10) adds up to 1000 points (weight 100).
|
||||
// Length (2-8) adds up to 160 points (weight 20).
|
||||
// We want LONGER and SIMPLER words to be tried earlier (lower difficulty value).
|
||||
// word.length() is 2 to 8.
|
||||
// score is 1 to 10.
|
||||
// Base difficulty starts high and decreases with length and score.
|
||||
// Length impact: up to 8 * 10 = 80
|
||||
// Score impact: up to 10 * 15 = 150
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public static record Dict(Map<String, WordDifficulty> words,
|
||||
HashMap<Integer, DictEntry> index,
|
||||
HashMap<Integer, Integer> lenCounts) { }
|
||||
static Dict loadWords(String wordsPath) {
|
||||
String raw;
|
||||
try {
|
||||
raw = Files.readString(Path.of(wordsPath), StandardCharsets.UTF_8);
|
||||
} catch (IOException e) {
|
||||
raw = "WOORD,level_1_to_10,hint\nEU,2,hint\nUUR,2,hint\nAUTO,2,hint\nBOOM,2,hint\nHUIS,2,hint\nKAT,2,hint\nZEE,2,hint\nRODE,2,hint\nDRAAD,2,hint\nKENNIS,2,hint\nNETWERK,2,hint\nPAKTE,2,hint\n";
|
||||
}
|
||||
|
||||
var map = new HashMap<String, WordDifficulty>();
|
||||
boolean first = true;
|
||||
for (var line : raw.split("\\R")) {
|
||||
if (line.isBlank()) continue;
|
||||
var parts = line.split(",", 4);
|
||||
var word = parts[0].trim();
|
||||
if (first && word.equalsIgnoreCase("WOORD")) {
|
||||
first = false;
|
||||
continue;
|
||||
}
|
||||
first = false;
|
||||
var s = word.toUpperCase(Locale.ROOT);
|
||||
if (s.matches("^[A-Z]{2,8}$")) {
|
||||
int score = SIMPLICITY_DEFAULT_SCORE;
|
||||
int simpel = 0;
|
||||
// CSV has level 1-10. llmScores use 10-level.
|
||||
score = 10 - Integer.parseInt(parts[1].trim());
|
||||
simpel = Integer.parseInt(parts[2].trim());
|
||||
if (score >= 1)
|
||||
map.put(s, new WordDifficulty(s, simpel, score));
|
||||
}
|
||||
}
|
||||
var words = map.values().stream().collect(Collectors.toCollection(ArrayList::new));
|
||||
// Sort words by difficulty in ascending order
|
||||
words.sort(Comparator.comparingInt(wd -> wd.simpel));
|
||||
|
||||
var index = new HashMap<Integer, DictEntry>();
|
||||
var lenCounts = new HashMap<Integer, Integer>();
|
||||
|
||||
for (var w : words) {
|
||||
var L = w.word.length();
|
||||
lenCounts.put(L, lenCounts.getOrDefault(L, 0) + 1);
|
||||
|
||||
var entry = index.get(L);
|
||||
if (entry == null) {
|
||||
entry = new DictEntry(L);
|
||||
index.put(L, entry);
|
||||
}
|
||||
|
||||
var idx = entry.words.size();
|
||||
entry.words.add(w.word);
|
||||
|
||||
for (var i = 0; i < L; i++) {
|
||||
var letter = w.word.charAt(i) - 'A';
|
||||
if (letter >= 0 && letter < 26) entry.pos[i][letter].add(idx);
|
||||
}
|
||||
}
|
||||
|
||||
return new Dict(map, index, lenCounts);
|
||||
}
|
||||
|
||||
static int[] intersectSorted(int[] a, int aLen, int[] b, int bLen) {
|
||||
var out = new int[Math.min(aLen, bLen)];
|
||||
int i = 0, j = 0, k = 0;
|
||||
while (i < aLen && j < bLen) {
|
||||
int x = a[i], y = b[j];
|
||||
if (x == y) {
|
||||
out[k++] = x;
|
||||
i++;
|
||||
j++;
|
||||
} else if (x < y) i++;
|
||||
else j++;
|
||||
}
|
||||
return Arrays.copyOf(out, k);
|
||||
}
|
||||
|
||||
static final record CandidateInfo(int[] indices, int count) {
|
||||
|
||||
}
|
||||
static CandidateInfo candidateInfoForPattern(DictEntry entry, char[] pattern /* 0 means null */) {
|
||||
var lists = new ArrayList<IntList>();
|
||||
for (var i = 0; i < pattern.length; i++) {
|
||||
var ch = pattern[i];
|
||||
if (ch != 0 && isLetter(ch)) {
|
||||
lists.add(entry.pos[i][ch - 'A']);
|
||||
}
|
||||
}
|
||||
|
||||
if (lists.isEmpty()) {
|
||||
return new CandidateInfo(null, entry.words.size());
|
||||
}
|
||||
|
||||
var first = lists.get(0);
|
||||
var cur = Arrays.copyOf(first.data(), first.size());
|
||||
var curLen = cur.length;
|
||||
|
||||
for (var k = 1; k < lists.size(); k++) {
|
||||
var nxt = lists.get(k);
|
||||
var nextArr = nxt.data();
|
||||
var nextLen = nxt.size();
|
||||
cur = intersectSorted(cur, curLen, nextArr, nextLen);
|
||||
curLen = cur.length;
|
||||
if (curLen == 0) break;
|
||||
}
|
||||
|
||||
return new CandidateInfo(cur, curLen);
|
||||
}
|
||||
// ---------------- Slots ----------------
|
||||
|
||||
static record Slot(int clueR, int clueC, char dir, int[] rs, int[] cs, int len) {
|
||||
|
||||
public Slot(int clueR, int clueC, char dir, int[] rs, int[] cs) {
|
||||
this(clueR, clueC, dir, rs, cs, rs.length);
|
||||
}
|
||||
String key() { return clueR + "," + clueC + ":" + dir; }
|
||||
}
|
||||
|
||||
static ArrayList<Slot> extractSlots(char[][] grid) {
|
||||
var slots = new ArrayList<Slot>();
|
||||
for (var r = 0; r < H; r++) {
|
||||
for (var c = 0; c < W; c++) {
|
||||
var d = grid[r][c];
|
||||
if (!isDigit(d)) continue;
|
||||
var dir = d - '0';
|
||||
// Check all possible directions for clue placement
|
||||
// for (int dir = 1; dir <= 4; dir++) {
|
||||
int or = OFFSETS[dir][0], oc = OFFSETS[dir][1];
|
||||
int dr = STEPS[dir][0], dc = STEPS[dir][1];
|
||||
|
||||
int rr = r + or, cc = c + oc;
|
||||
if (rr < 0 || rr >= H || cc < 0 || cc >= W) continue;
|
||||
if (!isLetterCell(grid[rr][cc])) continue;
|
||||
|
||||
var rs = new int[MAX_LEN + 1];
|
||||
var cs = new int[MAX_LEN + 1];
|
||||
var n = 0;
|
||||
|
||||
while (rr >= 0 && rr < H && cc >= 0 && cc < W) {
|
||||
var ch = grid[rr][cc];
|
||||
if (!isLetterCell(ch)) break;
|
||||
rs[n] = rr;
|
||||
cs[n] = cc;
|
||||
n++;
|
||||
rr += dr;
|
||||
cc += dc;
|
||||
if (n > MAX_LEN) break;
|
||||
}
|
||||
|
||||
slots.add(new Slot(r, c, d, Arrays.copyOf(rs, n), Arrays.copyOf(cs, n)));
|
||||
// }
|
||||
}
|
||||
}
|
||||
return slots;
|
||||
}
|
||||
static boolean hasRoomForClue(char[][] grid, int r, int c, char d) {
|
||||
var di = d - '0';
|
||||
int or = OFFSETS[di][0], oc = OFFSETS[di][1];
|
||||
int dr = STEPS[di][0], dc = STEPS[di][1];
|
||||
int rr = r + or, cc = c + oc;
|
||||
var run = 0;
|
||||
while (rr >= 0 && rr < H && cc >= 0 && cc < W && isLetterCell(grid[rr][cc]) && run < MAX_LEN) {
|
||||
run++;
|
||||
rr += dr;
|
||||
cc += dc;
|
||||
}
|
||||
return run >= MIN_LEN;
|
||||
}
|
||||
|
||||
// ---------------- FAST mask fitness ----------------
|
||||
|
||||
static long maskFitness(char[][] grid, HashMap<Integer, Integer> lenCounts) {
|
||||
long penalty = 0;
|
||||
|
||||
var clueCount = 0;
|
||||
for (var r = 0; r < H; r++) for (var c = 0; c < W; c++) if (isDigit(grid[r][c])) clueCount++;
|
||||
|
||||
var targetClues = (int) Math.round(W * H * 0.25); // ~18
|
||||
penalty += 8L * Math.abs(clueCount - targetClues);
|
||||
|
||||
var slots = extractSlots(grid);
|
||||
if (slots.isEmpty()) return 1_000_000_000L;
|
||||
|
||||
var covH = new int[H][W];
|
||||
var covV = new int[H][W];
|
||||
|
||||
for (var s : slots) {
|
||||
var horiz = (s.dir == HOR_ARROW_1 || s.dir == HOR_ARROW_2);
|
||||
|
||||
if (s.len < MIN_LEN) penalty += 8000;
|
||||
if (s.len > MAX_LEN) penalty += 8000 + (long) (s.len - MAX_LEN) * 500L;
|
||||
|
||||
if (s.len >= MIN_LEN && s.len <= MAX_LEN) {
|
||||
if (!lenCounts.containsKey(s.len)) penalty += 12000;
|
||||
}
|
||||
|
||||
for (var i = 0; i < s.len; i++) {
|
||||
int r = s.rs[i], c = s.cs[i];
|
||||
if (horiz) covH[r][c] += 1;
|
||||
else covV[r][c] += 1;
|
||||
}
|
||||
}
|
||||
|
||||
for (var r = 0; r < H; r++)
|
||||
for (var c = 0; c < W; c++) {
|
||||
if (!isLetterCell(grid[r][c])) continue;
|
||||
int h = covH[r][c], v = covV[r][c];
|
||||
if (h == 0 && v == 0) penalty += 1500;
|
||||
else if (h > 0 && v > 0) { /* ok */ } else if (h + v == 1) penalty += 200;
|
||||
else penalty += 600;
|
||||
}
|
||||
|
||||
// clue clustering (8-connected)
|
||||
var seen = new boolean[H][W];
|
||||
var stack = new int[W * H];
|
||||
int sp;
|
||||
var nbrs8 = new int[][]{
|
||||
{ -1, -1 }, { -1, 0 }, { -1, 1 },
|
||||
{ 0, -1 }, { 0, 1 },
|
||||
{ 1, -1 }, { 1, 0 }, { 1, 1 }
|
||||
};
|
||||
|
||||
for (var r = 0; r < H; r++)
|
||||
for (var c = 0; c < W; c++) {
|
||||
if (!isDigit(grid[r][c]) || seen[r][c]) continue;
|
||||
sp = 0;
|
||||
stack[sp++] = r * W + c;
|
||||
seen[r][c] = true;
|
||||
var size = 0;
|
||||
|
||||
while (sp > 0) {
|
||||
var p = stack[--sp];
|
||||
int x = p / W, y = p % W;
|
||||
size++;
|
||||
|
||||
for (var d : nbrs8) {
|
||||
int nx = x + d[0], ny = y + d[1];
|
||||
if (nx < 0 || nx >= H || ny < 0 || ny >= W) continue;
|
||||
if (seen[nx][ny]) continue;
|
||||
if (!isDigit(grid[nx][ny])) continue;
|
||||
seen[nx][ny] = true;
|
||||
stack[sp++] = nx * W + ny;
|
||||
}
|
||||
}
|
||||
|
||||
if (size >= 2) penalty += (long) (size - 1) * 120L;
|
||||
}
|
||||
|
||||
// dead-end-ish letter cell (3+ walls)
|
||||
var nbrs4 = new int[][]{ { -1, 0 }, { 1, 0 }, { 0, -1 }, { 0, 1 } };
|
||||
for (var r = 0; r < H; r++)
|
||||
for (var c = 0; c < W; c++) {
|
||||
if (!isLetterCell(grid[r][c])) continue;
|
||||
var walls = 0;
|
||||
for (var d : nbrs4) {
|
||||
int rr = r + d[0], cc = c + d[1];
|
||||
if (rr < 0 || rr >= H || cc < 0 || cc >= W) {
|
||||
walls++;
|
||||
continue;
|
||||
}
|
||||
if (!isLetterCell(grid[rr][cc])) walls++;
|
||||
}
|
||||
if (walls >= 3) penalty += 400;
|
||||
}
|
||||
|
||||
return penalty;
|
||||
}
|
||||
|
||||
// ---------------- Mask generation ----------------
|
||||
|
||||
static char[][] randomMask(Rng rng) {
|
||||
var g = makeEmptyGrid();
|
||||
var targetClues = (int) Math.round(W * H * 0.25);
|
||||
int placed = 0, guard = 0;
|
||||
|
||||
while (placed < targetClues && guard++ < 4000) {
|
||||
var r = rng.randint(0, H - 1);
|
||||
var c = rng.randint(0, W - 1);
|
||||
if (isDigit(g[r][c])) continue;
|
||||
|
||||
var d = (char) ('0' + rng.randint(1, c == 0 ? CLUE_SIZE : 4));
|
||||
g[r][c] = d;
|
||||
if (!hasRoomForClue(g, r, c, d)) {
|
||||
g[r][c] = '#';
|
||||
continue;
|
||||
}
|
||||
placed++;
|
||||
}
|
||||
return g;
|
||||
}
|
||||
|
||||
static char[][] mutate(Rng rng, char[][] grid) {
|
||||
var g = deepCopyGrid(grid);
|
||||
var cx = rng.randint(0, H - 1);
|
||||
var cy = rng.randint(0, W - 1);
|
||||
|
||||
var steps = 4;
|
||||
for (var k = 0; k < steps; k++) {
|
||||
var rr = clamp(cx + (rng.randint(-2, 2) + rng.randint(-2, 2)), 0, H - 1);
|
||||
var cc = clamp(cy + (rng.randint(-2, 2) + rng.randint(-2, 2)), 0, W - 1);
|
||||
|
||||
var cur = g[rr][cc];
|
||||
if (isDigit(cur)) {
|
||||
g[rr][cc] = '#';
|
||||
} else {
|
||||
var d = (char) ('0' + rng.randint(1, cc == 0 ? CLUE_SIZE : 4));
|
||||
g[rr][cc] = d;
|
||||
if (!hasRoomForClue(g, rr, cc, d)) g[rr][cc] = '#';
|
||||
}
|
||||
}
|
||||
return g;
|
||||
}
|
||||
|
||||
static char[][] crossover(Rng rng, char[][] a, char[][] b) {
|
||||
var out = makeEmptyGrid();
|
||||
var cx = (H - 1) / 2.0;
|
||||
var cy = (W - 1) / 2.0;
|
||||
var theta = rng.nextFloat() * Math.PI;
|
||||
var nx = Math.cos(theta);
|
||||
var ny = Math.sin(theta);
|
||||
|
||||
for (var r = 0; r < H; r++)
|
||||
for (var c = 0; c < W; c++) {
|
||||
double x = r - cx, y = c - cy;
|
||||
var side = x * nx + y * ny;
|
||||
out[r][c] = (side >= 0) ? a[r][c] : b[r][c];
|
||||
}
|
||||
|
||||
for (var r = 0; r < H; r++)
|
||||
for (var c = 0; c < W; c++) {
|
||||
var ch = out[r][c];
|
||||
if (isDigit(ch) && !hasRoomForClue(out, r, c, ch)) out[r][c] = '#';
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
static char[][] hillclimb(Rng rng, char[][] start, HashMap<Integer, Integer> lenCounts, int limit) {
|
||||
var best = deepCopyGrid(start);
|
||||
var bestF = maskFitness(best, lenCounts);
|
||||
var fails = 0;
|
||||
|
||||
while (fails < limit) {
|
||||
var cand = mutate(rng, best);
|
||||
var f = maskFitness(cand, lenCounts);
|
||||
if (f < bestF) {
|
||||
best = cand;
|
||||
bestF = f;
|
||||
fails = 0;
|
||||
} else {
|
||||
fails++;
|
||||
}
|
||||
}
|
||||
return best;
|
||||
}
|
||||
|
||||
static double similarity(char[][] a, char[][] b) {
|
||||
var same = 0;
|
||||
for (var r = 0; r < H; r++) for (var c = 0; c < W; c++) if (a[r][c] == b[r][c]) same++;
|
||||
return same / (double) (W * H);
|
||||
}
|
||||
|
||||
static char[][] generateMask(Rng rng, HashMap<Integer, Integer> lenCounts, int popSize, int gens, boolean verbose) {
|
||||
if (verbose) System.out.println("generateMask init pop: " + popSize);
|
||||
var pop = new ArrayList<char[][]>();
|
||||
|
||||
for (var i = 0; i < popSize; i++) {
|
||||
var g = randomMask(rng);
|
||||
pop.add(hillclimb(rng, g, lenCounts, 180));
|
||||
}
|
||||
|
||||
for (var gen = 0; gen < gens; gen++) {
|
||||
if (Thread.currentThread().isInterrupted()) break;
|
||||
var children = new ArrayList<char[][]>();
|
||||
var pairs = Math.max(popSize, (int) Math.floor(popSize * 1.5));
|
||||
|
||||
for (var k = 0; k < pairs; k++) {
|
||||
var p1 = pop.get(rng.randint(0, pop.size() - 1));
|
||||
var p2 = pop.get(rng.randint(0, pop.size() - 1));
|
||||
var child = crossover(rng, p1, p2);
|
||||
children.add(hillclimb(rng, child, lenCounts, 70));
|
||||
}
|
||||
|
||||
pop.addAll(children);
|
||||
pop.sort(Comparator.comparingLong(g -> maskFitness(g, lenCounts)));
|
||||
|
||||
var next = new ArrayList<char[][]>();
|
||||
for (var cand : pop) {
|
||||
if (next.size() >= popSize) break;
|
||||
var ok = true;
|
||||
for (var kept : next) {
|
||||
if (similarity(cand, kept) > 0.92) {
|
||||
ok = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (ok) next.add(cand);
|
||||
}
|
||||
pop = next;
|
||||
|
||||
if (verbose && gen % 10 == 0) {
|
||||
var bestF = maskFitness(pop.get(0), lenCounts);
|
||||
System.out.println(" gen " + gen + "/" + gens + " bestFitness=" + bestF);
|
||||
}
|
||||
}
|
||||
|
||||
pop.sort(Comparator.comparingLong(g -> maskFitness(g, lenCounts)));
|
||||
return pop.get(0);
|
||||
}
|
||||
|
||||
// ---------------- Fill (CSP) ----------------
|
||||
|
||||
public static final class FillStats {
|
||||
|
||||
public long nodes;
|
||||
public long backtracks;
|
||||
public double seconds;
|
||||
public int lastMRV;
|
||||
}
|
||||
|
||||
public static final class FillResult {
|
||||
|
||||
public boolean ok;
|
||||
public char[][] grid;
|
||||
public HashMap<String, String> clueMap;
|
||||
public FillStats stats;
|
||||
public double simplicity;
|
||||
}
|
||||
|
||||
record Undo(int[] rs, int[] cs, char[] prev, int n) {
|
||||
}
|
||||
|
||||
static char[] patternForSlot(char[][] grid, Slot s) {
|
||||
var pat = new char[s.len];
|
||||
for (var i = 0; i < s.len; i++) {
|
||||
var ch = grid[s.rs[i]][s.cs[i]];
|
||||
pat[i] = isLetter(ch) ? ch : 0;
|
||||
}
|
||||
return pat;
|
||||
}
|
||||
|
||||
static int slotScore(int[][] cellCount, Slot s) {
|
||||
var cross = 0;
|
||||
for (var i = 0; i < s.len; i++) cross += (cellCount[s.rs[i]][s.cs[i]] - 1);
|
||||
return cross * 10 + s.len;
|
||||
}
|
||||
|
||||
static Undo placeWord(char[][] grid, Slot s, String w) {
|
||||
var urs = new int[s.len];
|
||||
var ucs = new int[s.len];
|
||||
var up = new char[s.len];
|
||||
var n = 0;
|
||||
|
||||
for (var i = 0; i < s.len; i++) {
|
||||
int r = s.rs[i], c = s.cs[i];
|
||||
var prev = grid[r][c];
|
||||
var ch = w.charAt(i);
|
||||
if (prev == '#') {
|
||||
urs[n] = r;
|
||||
ucs[n] = c;
|
||||
up[n] = prev;
|
||||
n++;
|
||||
grid[r][c] = ch;
|
||||
} else if (prev != ch) {
|
||||
// rollback immediate changes
|
||||
for (var j = 0; j < n; j++) grid[urs[j]][ucs[j]] = up[j];
|
||||
return null;
|
||||
}
|
||||
}
|
||||
return new Undo(urs, ucs, up, n);
|
||||
}
|
||||
|
||||
static void undoPlace(char[][] grid, Undo u) {
|
||||
for (var i = 0; i < u.n; i++) grid[u.rs[i]][u.cs[i]] = u.prev[i];
|
||||
}
|
||||
|
||||
static FillResult fillMask(Rng rng, char[][] mask, HashMap<Integer, DictEntry> dictIndex,
|
||||
Map<String, WordDifficulty> llmScores,
|
||||
int logEveryMs, int timeLimitMs, boolean verbose) {
|
||||
|
||||
var grid = deepCopyGrid(mask);
|
||||
var allSlots = extractSlots(grid);
|
||||
var slots = new ArrayList<Slot>();
|
||||
for (var s : allSlots) if (s.len >= MIN_LEN && s.len <= MAX_LEN) slots.add(s);
|
||||
|
||||
var used = new HashSet<String>();
|
||||
var assigned = new HashMap<String, String>();
|
||||
|
||||
var cellCount = new int[H][W];
|
||||
for (var s : slots) for (var i = 0; i < s.len; i++) cellCount[s.rs[i]][s.cs[i]]++;
|
||||
|
||||
var t0 = System.currentTimeMillis();
|
||||
final var lastLog = new java.util.concurrent.atomic.AtomicLong(t0);
|
||||
|
||||
var stats = new FillStats();
|
||||
final var TOTAL = slots.size();
|
||||
final var BAR_LEN = 22;
|
||||
|
||||
Runnable renderProgress = () -> {
|
||||
if (!verbose) return;
|
||||
var now = System.currentTimeMillis();
|
||||
if ((now - lastLog.get()) < logEveryMs) return;
|
||||
lastLog.set(now);
|
||||
|
||||
var done = assigned.size();
|
||||
var pct = (TOTAL == 0) ? 100 : (int) Math.floor((done / (double) TOTAL) * 100);
|
||||
var filled = Math.min(BAR_LEN, (int) Math.floor((pct / 100.0) * BAR_LEN));
|
||||
var bar = "[" + "#".repeat(filled) + "-".repeat(BAR_LEN - filled) + "]";
|
||||
var elapsed = String.format(Locale.ROOT, "%.1fs", (now - t0) / 1000.0);
|
||||
|
||||
var msg = String.format(
|
||||
Locale.ROOT,
|
||||
"%s %d/%d slots | nodes=%d | backtracks=%d | mrv=%d | %s",
|
||||
bar, done, TOTAL, stats.nodes, stats.backtracks, stats.lastMRV, elapsed
|
||||
);
|
||||
System.out.print("\r" + padRight(msg, 120));
|
||||
System.out.flush();
|
||||
};
|
||||
|
||||
class Pick {
|
||||
|
||||
Slot slot;
|
||||
CandidateInfo info;
|
||||
boolean done;
|
||||
}
|
||||
|
||||
java.util.function.Supplier<Pick> chooseMRV = () -> {
|
||||
Slot best = null;
|
||||
CandidateInfo bestInfo = null;
|
||||
|
||||
for (var s : slots) {
|
||||
var k = s.key();
|
||||
if (assigned.containsKey(k)) continue;
|
||||
|
||||
var entry = dictIndex.get(s.len);
|
||||
if (entry == null) {
|
||||
var p = new Pick();
|
||||
p.slot = null;
|
||||
p.info = null;
|
||||
p.done = false;
|
||||
return p;
|
||||
}
|
||||
|
||||
var pat = patternForSlot(grid, s);
|
||||
var info = candidateInfoForPattern(entry, pat);
|
||||
|
||||
if (info.count == 0) {
|
||||
var p = new Pick();
|
||||
p.slot = null;
|
||||
p.info = null;
|
||||
p.done = false;
|
||||
return p;
|
||||
}
|
||||
|
||||
if (best == null
|
||||
|| info.count < bestInfo.count
|
||||
|| (info.count == bestInfo.count && slotScore(cellCount, s) > slotScore(cellCount, best))) {
|
||||
best = s;
|
||||
bestInfo = info;
|
||||
if (info.count <= 1) break;
|
||||
}
|
||||
}
|
||||
|
||||
var p = new Pick();
|
||||
if (best == null) {
|
||||
p.slot = null;
|
||||
p.info = null;
|
||||
p.done = true;
|
||||
} else {
|
||||
p.slot = best;
|
||||
p.info = bestInfo;
|
||||
p.done = false;
|
||||
}
|
||||
return p;
|
||||
};
|
||||
|
||||
final var MAX_TRIES_PER_SLOT = 2000;
|
||||
|
||||
class Solver {
|
||||
|
||||
boolean backtrack() {
|
||||
if (Thread.currentThread().isInterrupted()) return false;
|
||||
stats.nodes++;
|
||||
|
||||
if (timeLimitMs > 0 && (System.currentTimeMillis() - t0) > timeLimitMs) return false;
|
||||
|
||||
var pick = chooseMRV.get();
|
||||
if (pick.done) return true;
|
||||
if (pick.slot == null) {
|
||||
stats.backtracks++;
|
||||
return false;
|
||||
}
|
||||
|
||||
stats.lastMRV = pick.info.count;
|
||||
renderProgress.run();
|
||||
|
||||
var s = pick.slot;
|
||||
var k = s.key();
|
||||
var entry = dictIndex.get(s.len);
|
||||
var pat = patternForSlot(grid, s);
|
||||
|
||||
java.util.function.Function<String, Boolean> tryWord = (String w) -> {
|
||||
if (w == null) return false;
|
||||
if (used.contains(w)) return false;
|
||||
|
||||
for (var i = 0; i < pat.length; i++) {
|
||||
if (pat[i] != 0 && pat[i] != w.charAt(i)) return false;
|
||||
}
|
||||
|
||||
var undo = placeWord(grid, s, w);
|
||||
if (undo == null) return false;
|
||||
|
||||
used.add(w);
|
||||
assigned.put(k, w);
|
||||
|
||||
if (backtrack()) return true;
|
||||
|
||||
assigned.remove(k);
|
||||
used.remove(w);
|
||||
undoPlace(grid, undo);
|
||||
return false;
|
||||
};
|
||||
|
||||
if (pick.info.indices != null && pick.info.indices.length > 0) {
|
||||
var idxs = pick.info.indices;
|
||||
var L = idxs.length;
|
||||
var tries = Math.min(MAX_TRIES_PER_SLOT, L);
|
||||
|
||||
// When picking words from sorted indices, we want to favor the beginning
|
||||
// (lower difficulty) but still have some randomness.
|
||||
for (var t = 0; t < tries; t++) {
|
||||
// Bias strongly towards lower indices (simpler words) using r^3
|
||||
double r = rng.nextFloat();
|
||||
int idxInArray = (int) (r * r * r * L);
|
||||
var idx = idxs[idxInArray];
|
||||
var w = entry.words.get(idx);
|
||||
if (tryWord.apply(w)) return true;
|
||||
}
|
||||
stats.backtracks++;
|
||||
return false;
|
||||
}
|
||||
|
||||
var N = entry.words.size();
|
||||
if (N == 0) {
|
||||
stats.backtracks++;
|
||||
return false;
|
||||
}
|
||||
|
||||
var tries = Math.min(MAX_TRIES_PER_SLOT, N);
|
||||
for (var t = 0; t < tries; t++) {
|
||||
double r = rng.nextFloat();
|
||||
int idxInArray = (int) (r * r * r * N);
|
||||
var w = entry.words.get(idxInArray);
|
||||
if (tryWord.apply(w)) return true;
|
||||
}
|
||||
|
||||
stats.backtracks++;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// initial render (same feel)
|
||||
renderProgress.run();
|
||||
var ok = new Solver().backtrack();
|
||||
// final progress line
|
||||
System.out.print("\r" + padRight("", 120) + "\r");
|
||||
System.out.flush();
|
||||
|
||||
var res = new FillResult();
|
||||
res.ok = ok;
|
||||
res.grid = grid;
|
||||
res.clueMap = assigned;
|
||||
stats.seconds = (System.currentTimeMillis() - t0) / 1000.0;
|
||||
res.stats = stats;
|
||||
|
||||
if (ok) {
|
||||
double totalSimplicity = 0;
|
||||
for (var w : assigned.values()) {
|
||||
totalSimplicity += llmScores.get(w).difficulty;
|
||||
}
|
||||
res.simplicity = assigned.isEmpty() ? 0 : totalSimplicity / assigned.size();
|
||||
}
|
||||
|
||||
// print a final progress line
|
||||
if (verbose) {
|
||||
System.out.println(
|
||||
String.format(Locale.ROOT,
|
||||
"[######################] %d/%d slots | nodes=%d | backtracks=%d | mrv=%d | %.1fs",
|
||||
assigned.size(), TOTAL, stats.nodes, stats.backtracks, stats.lastMRV, stats.seconds
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
static String padRight(String s, int n) {
|
||||
if (s.length() >= n) return s;
|
||||
return s + " ".repeat(n - s.length());
|
||||
}
|
||||
|
||||
// ---------------- Top-level generatePuzzle ----------------
|
||||
public record PuzzleResult(Dict dict, char[][] mask, FillResult filled) { }
|
||||
|
||||
public static PuzzleResult generatePuzzle(Main.Opts opts) {
|
||||
var tLoad0 = System.nanoTime();
|
||||
var dict = loadWords(opts.wordsPath);
|
||||
var tLoad1 = System.nanoTime();
|
||||
System.out.printf(Locale.ROOT, "LOAD_WORDS: %.3fs%n %s words%n", (tLoad1 - tLoad0) / 1e9, dict.words.size());
|
||||
|
||||
if (opts.threads > 1) {
|
||||
System.out.println("Running in multi-threaded mode with " + opts.threads + " threads...");
|
||||
var executor = Executors.newFixedThreadPool(opts.threads);
|
||||
try {
|
||||
var tasks = new ArrayList<Callable<PuzzleResult>>();
|
||||
for (int i = 1; i <= opts.tries; i++) {
|
||||
final int attempt = i;
|
||||
tasks.add(() -> {
|
||||
var threadRng = new Rng(opts.seed + attempt);
|
||||
var mask = generateMask(threadRng, dict.lenCounts, opts.pop, opts.gens, false);
|
||||
var filled = fillMask(threadRng, mask, dict.index, dict.words, 200, 60000, false);
|
||||
|
||||
if (filled.ok && (opts.minSimplicity <= 0 || filled.simplicity >= opts.minSimplicity)) {
|
||||
System.out.println("\nSolution found on attempt " + attempt);
|
||||
return new PuzzleResult(dict, mask, filled);
|
||||
}
|
||||
throw new RuntimeException("No solution found in attempt " + attempt);
|
||||
});
|
||||
}
|
||||
return executor.invokeAny(tasks);
|
||||
} catch (InterruptedException e) {
|
||||
Thread.currentThread().interrupt();
|
||||
} catch (ExecutionException e) {
|
||||
// all failed
|
||||
} finally {
|
||||
executor.shutdownNow();
|
||||
}
|
||||
return null;
|
||||
} else {
|
||||
var rng = new Rng(opts.seed);
|
||||
for (var attempt = 1; attempt <= opts.tries; attempt++) {
|
||||
System.out.println("\nAttempt " + attempt + "/" + opts.tries);
|
||||
|
||||
var tMask0 = System.nanoTime();
|
||||
var mask = generateMask(rng, dict.lenCounts, opts.pop, opts.gens, true);
|
||||
var tMask1 = System.nanoTime();
|
||||
System.out.printf(Locale.ROOT, "MASK: %.3fs%n", (tMask1 - tMask0) / 1e9);
|
||||
|
||||
var tFill0 = System.nanoTime();
|
||||
var filled = fillMask(rng, mask, dict.index, dict.words, 200, 60000, true);
|
||||
var tFill1 = System.nanoTime();
|
||||
System.out.printf(Locale.ROOT, "FILL: %.3fms | Simplicity: %.2f%n", (tFill1 - tFill0) / 1e6, filled.simplicity);
|
||||
|
||||
if (filled.ok && (opts.minSimplicity <= 0 || filled.simplicity >= opts.minSimplicity)) {
|
||||
return new PuzzleResult(dict, mask, filled);
|
||||
}
|
||||
if (filled.ok) {
|
||||
System.out.printf(Locale.ROOT, "Puzzle simplicity %.2f is below min %.2f, retrying...%n",
|
||||
filled.simplicity, opts.minSimplicity);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user