Files
puzzle-generator/swedish_generator.js
2025-12-19 14:02:07 +01:00

654 lines
16 KiB
JavaScript

#!/usr/bin/env node
"use strict";
const fs = require("fs");
const W = 9, H = 8;
const MIN_LEN = 2, MAX_LEN = 8;
const DIRS = {
"1": [-1, 0], // up
"2": [0, 1], // right
"3": [1, 0], // down
"4": [0, -1], // left
};
const IS_DIGIT = (ch) => ch >= "1" && ch <= "4";
const IS_LETTER = (ch) => ch >= "A" && ch <= "Z";
const IS_LETTER_CELL = (ch) => ch === "#" || IS_LETTER(ch);
function usage() {
console.log(`Usage:
node swedish_generator.js [--seed N] [--pop N] [--gens N] [--tries N] [--words word-list.txt]
Defaults:
--seed 1
--pop 18
--gens 100
--tries 50
--words ./word-list.txt
`);
}
function parseArgs(argv) {
const out = {seed: 1, pop: 18, gens: 100, tries: 50, wordsPath: "./word-list.txt"};
for (let i = 2; i < argv.length; i++) {
const a = argv[i];
const v = argv[i + 1];
if (a === "--help" || a === "-h") {
usage();
process.exit(0);
}
if (a === "--seed") out.seed = parseInt(v, 10), i++;
else if (a === "--pop") out.pop = parseInt(v, 10), i++;
else if (a === "--gens") out.gens = parseInt(v, 10), i++;
else if (a === "--tries") out.tries = parseInt(v, 10), i++;
else if (a === "--words") out.wordsPath = v, i++;
else throw new Error(`Unknown arg: ${a}`);
}
return out;
}
/** Seeded RNG (xorshift32) */
function makeRng(seed) {
let x = (seed >>> 0) || 1;
return {
nextU32() {
x ^= x << 13;
x >>>= 0;
x ^= x >>> 17;
x >>>= 0;
x ^= x << 5;
x >>>= 0;
return x >>> 0;
},
int(min, max) {
const r = this.nextU32();
return min + (r % (max - min + 1));
},
float() {
return this.nextU32() / 0xFFFFFFFF;
},
};
}
function clamp(x, a, b) { return Math.max(a, Math.min(b, x)); }
function makeEmptyGrid() {
return Array.from({length: H}, () => Array.from({length: W}, () => "#"));
}
function deepCopyGrid(g) { return g.map(r => r.slice()); }
function gridToString(g) { return g.map(r => r.join("")).join("\n"); }
function renderHuman(g) {
return g.map(row => row.map(ch => IS_DIGIT(ch) ? " " : ch).join("")).join("\n");
}
/** --- Words / index --- */
function loadWords(wordsPath) {
let raw = "";
try {
raw = fs.readFileSync(wordsPath, "utf8");
} catch {
raw = "EU\nUUR\nAUTO\nBOOM\nHUIS\nKAT\nZEE\nRODE\nDRAAD\nKENNIS\nNETWERK\nPAKTE\n";
}
const words = raw
.split(/\r?\n/g)
.map(s => s.trim().toUpperCase())
.filter(s => /^[A-Z]{2,8}$/.test(s));
// index[len] = { words: string[], pos: Array(len) of [26 arrays of indices] }
const index = new Map();
const lenCounts = new Map();
for (const w of words) {
const L = w.length;
lenCounts.set(L, (lenCounts.get(L) || 0) + 1);
if (!index.has(L)) {
const pos = Array.from({length: L}, () =>
Array.from({length: 26}, () => [])
);
index.set(L, {words: [], pos});
}
const entry = index.get(L);
const idx = entry.words.length;
entry.words.push(w);
for (let i = 0; i < L; i++) {
entry.pos[i][w.charCodeAt(i) - 65].push(idx);
}
}
return {words, index, lenCounts};
}
function intersectSorted(a, b) {
const out = [];
let i = 0, j = 0;
while (i < a.length && j < b.length) {
const x = a[i], y = b[j];
if (x === y) {
out.push(x);
i++;
j++;
} else if (x < y) i++;
else j++;
}
return out;
}
/** returns {indices?: number[], count: number} WITHOUT allocating huge arrays */
function candidateInfoForPattern(entry, pattern /* array char|null */) {
const lists = [];
for (let i = 0; i < pattern.length; i++) {
const ch = pattern[i];
if (ch && IS_LETTER(ch)) {
lists.push(entry.pos[i][ch.charCodeAt(0) - 65]);
}
}
if (lists.length === 0) {
return {indices: null, count: entry.words.length}; // unconstrained
}
lists.sort((a, b) => a.length - b.length);
let cur = lists[0];
for (let k = 1; k < lists.length; k++) {
cur = intersectSorted(cur, lists[k]);
if (cur.length === 0) break;
}
return {indices: cur, count: cur.length};
}
/** --- Slots --- */
function extractSlots(grid) {
const slots = [];
for (let r = 0; r < H; r++) {
for (let c = 0; c < W; c++) {
const d = grid[r][c];
if (!IS_DIGIT(d)) continue;
const [dr, dc] = DIRS[d];
let rr = r + dr, cc = c + dc;
if (rr < 0 || rr >= H || cc < 0 || cc >= W) continue;
if (!IS_LETTER_CELL(grid[rr][cc])) continue;
const cells = [];
while (rr >= 0 && rr < H && cc >= 0 && cc < W) {
const ch = grid[rr][cc];
if (!IS_LETTER_CELL(ch)) break;
cells.push([rr, cc]);
rr += dr;
cc += dc;
if (cells.length > MAX_LEN) break;
}
slots.push({clue: [r, c, d], dir: d, cells, len: cells.length});
}
}
return slots;
}
function hasRoomForClue(grid, r, c, d) {
const [dr, dc] = DIRS[d];
let rr = r + dr, cc = c + dc;
let run = 0;
while (rr >= 0 && rr < H && cc >= 0 && cc < W && IS_LETTER_CELL(grid[rr][cc]) && run < MAX_LEN) {
run++;
rr += dr;
cc += dc;
}
return run >= MIN_LEN;
}
/** --- FAST mask fitness (structural only) --- */
function maskFitness(grid, lenCounts) {
let penalty = 0;
// clue density (avoid all digits)
let clueCount = 0;
for (let r = 0; r < H; r++) for (let c = 0; c < W; c++) {
if (IS_DIGIT(grid[r][c])) clueCount++;
}
const targetClues = Math.round(W * H * 0.25); // ~18
penalty += 8 * Math.abs(clueCount - targetClues);
const slots = extractSlots(grid);
if (slots.length === 0) return 1e9;
// coverage counts per letter cell: horiz vs vert
const covH = Array.from({length: H}, () => Array(W).fill(0));
const covV = Array.from({length: H}, () => Array(W).fill(0));
for (const s of slots) {
const horiz = (s.dir === "2" || s.dir === "4");
if (s.len < MIN_LEN) penalty += 8000;
if (s.len > MAX_LEN) penalty += 8000 + (s.len - MAX_LEN) * 500;
// dictionary availability only (cheap)
if (s.len >= MIN_LEN && s.len <= MAX_LEN) {
if (!lenCounts.get(s.len)) penalty += 12000;
}
for (const [r, c] of s.cells) {
if (horiz) covH[r][c] += 1;
else covV[r][c] += 1;
}
}
// coverage penalties per letter cell
for (let r = 0; r < H; r++) for (let c = 0; c < W; c++) {
if (!IS_LETTER_CELL(grid[r][c])) continue;
const h = covH[r][c], v = covV[r][c];
if (h === 0 && v === 0) penalty += 1500;
else if (h > 0 && v > 0) penalty += 0;
else if (h + v === 1) penalty += 200;
else penalty += 600;
}
// clue clustering (8-connected)
const seen = Array.from({length: H}, () => Array(W).fill(false));
const nbrs8 = [[-1, -1], [-1, 0], [-1, 1], [0, -1], [0, 1], [1, -1], [1, 0], [1, 1]];
for (let r = 0; r < H; r++) for (let c = 0; c < W; c++) {
if (!IS_DIGIT(grid[r][c]) || seen[r][c]) continue;
const stack = [[r, c]];
seen[r][c] = true;
let size = 0;
while (stack.length) {
const [x, y] = stack.pop();
size++;
for (const [dr, dc] of nbrs8) {
const nx = x + dr, ny = y + dc;
if (nx < 0 || nx >= H || ny < 0 || ny >= W) continue;
if (seen[nx][ny]) continue;
if (!IS_DIGIT(grid[nx][ny])) continue;
seen[nx][ny] = true;
stack.push([nx, ny]);
}
}
if (size >= 2) penalty += (size - 1) * 120;
}
// dead-end-ish letter cell (3+ walls)
const nbrs4 = [[-1, 0], [1, 0], [0, -1], [0, 1]];
for (let r = 0; r < H; r++) for (let c = 0; c < W; c++) {
if (!IS_LETTER_CELL(grid[r][c])) continue;
let walls = 0;
for (const [dr, dc] of nbrs4) {
const rr = r + dr, cc = c + dc;
if (rr < 0 || rr >= H || cc < 0 || cc >= W) {
walls++;
continue;
}
if (!IS_LETTER_CELL(grid[rr][cc])) walls++;
}
if (walls >= 3) penalty += 400;
}
return penalty;
}
/** --- Mask generation (memetic-ish + hillclimb) --- */
function randomMask(rng) {
const g = makeEmptyGrid();
const targetClues = Math.round(W * H * 0.25); // ~18
let placed = 0, guard = 0;
while (placed < targetClues && guard++ < 4000) {
const r = rng.int(0, H - 1);
const c = rng.int(0, W - 1);
if (IS_DIGIT(g[r][c])) continue;
const d = String(rng.int(1, 4));
g[r][c] = d;
if (!hasRoomForClue(g, r, c, d)) {
g[r][c] = "#";
continue;
}
placed++;
}
return g;
}
function mutate(rng, grid) {
const g = deepCopyGrid(grid);
const cx = rng.int(0, H - 1);
const cy = rng.int(0, W - 1);
const steps = 4;
for (let k = 0; k < steps; k++) {
const rr = clamp(cx + (rng.int(-2, 2) + rng.int(-2, 2)), 0, H - 1);
const cc = clamp(cy + (rng.int(-2, 2) + rng.int(-2, 2)), 0, W - 1);
const cur = g[rr][cc];
if (IS_DIGIT(cur)) {
g[rr][cc] = "#";
} else {
const d = String(rng.int(1, 4));
g[rr][cc] = d;
if (!hasRoomForClue(g, rr, cc, d)) g[rr][cc] = "#";
}
}
return g;
}
function crossover(rng, a, b) {
const out = makeEmptyGrid();
const cx = (H - 1) / 2;
const cy = (W - 1) / 2;
const theta = rng.float() * Math.PI;
const nx = Math.cos(theta);
const ny = Math.sin(theta);
for (let r = 0; r < H; r++) for (let c = 0; c < W; c++) {
const x = r - cx, y = c - cy;
const side = x * nx + y * ny;
out[r][c] = (side >= 0) ? a[r][c] : b[r][c];
}
// cleanup invalid clues
for (let r = 0; r < H; r++) for (let c = 0; c < W; c++) {
const ch = out[r][c];
if (IS_DIGIT(ch) && !hasRoomForClue(out, r, c, ch)) out[r][c] = "#";
}
return out;
}
function hillclimb(rng, start, lenCounts, limit) {
let best = deepCopyGrid(start);
let bestF = maskFitness(best, lenCounts);
let fails = 0;
while (fails < limit) {
const cand = mutate(rng, best);
const f = maskFitness(cand, lenCounts);
if (f < bestF) {
best = cand;
bestF = f;
fails = 0;
} else {
fails++;
}
}
return best;
}
function similarity(a, b) {
let same = 0;
for (let r = 0; r < H; r++) for (let c = 0; c < W; c++) {
if (a[r][c] === b[r][c]) same++;
}
return same / (W * H);
}
function generateMask(rng, lenCounts, popSize, gens) {
console.log(`generateMask init pop: ${popSize}`);
let pop = [];
for (let i = 0; i < popSize; i++) {
const g = randomMask(rng);
pop.push(hillclimb(rng, g, lenCounts, 180)); // faster init
}
for (let gen = 0; gen < gens; gen++) {
const children = [];
const pairs = Math.max(popSize, Math.floor(popSize * 1.5));
for (let k = 0; k < pairs; k++) {
const p1 = pop[rng.int(0, pop.length - 1)];
const p2 = pop[rng.int(0, pop.length - 1)];
const child = crossover(rng, p1, p2);
children.push(hillclimb(rng, child, lenCounts, 70)); // light repair
}
pop = pop.concat(children);
pop.sort((x, y) => maskFitness(x, lenCounts) - maskFitness(y, lenCounts));
// similarity cull
const next = [];
for (const cand of pop) {
if (next.length >= popSize) break;
let ok = true;
for (const kept of next) {
if (similarity(cand, kept) > 0.92) {
ok = false;
break;
}
}
if (ok) next.push(cand);
}
pop = next;
if ((gen % 10) === 0) {
const bestF = maskFitness(pop[0], lenCounts);
console.log(` gen ${gen}/${gens} bestFitness=${bestF}`);
}
}
pop.sort((x, y) => maskFitness(x, lenCounts) - maskFitness(y, lenCounts));
return pop[0];
}
/** --- Fill (CSP) with NO huge candidate arrays --- */
function fillMask(rng, mask, dictIndex, opts = {}) {
const grid = deepCopyGrid(mask);
const slots = extractSlots(grid).filter(s => s.len >= MIN_LEN && s.len <= MAX_LEN);
const used = new Set();
const assigned = new Map();
// progress options
const logEveryMs = opts.logEveryMs ?? 250;
const timeLimitMs = opts.timeLimitMs ?? 0; // 0 = no limit
// crossing weight precompute
const cellCount = Array.from({length: H}, () => Array(W).fill(0));
for (const s of slots) for (const [r, c] of s.cells) cellCount[r][c]++;
function slotKey(s) { return `${s.clue[0]},${s.clue[1]}:${s.clue[2]}`; }
function patternForSlot(s) {
return s.cells.map(([r, c]) => {
const ch = grid[r][c];
return IS_LETTER(ch) ? ch : null;
});
}
function slotScore(s) {
let cross = 0;
for (const [r, c] of s.cells) cross += (cellCount[r][c] - 1);
return cross * 10 + s.len;
}
function placeWord(s, w) {
const undo = [];
for (let i = 0; i < s.cells.length; i++) {
const [r, c] = s.cells[i];
const prev = grid[r][c];
const ch = w[i];
if (prev === "#") {
undo.push([r, c, prev]);
grid[r][c] = ch;
} else if (prev !== ch) {
return null;
}
}
return undo;
}
function undoPlace(undo) { for (const [r, c, prev] of undo) grid[r][c] = prev; }
// ---- progress bar ----
const t0 = Date.now();
let lastLog = t0;
let nodes = 0;
let backtracks = 0;
let lastMRV = 0;
function renderProgress(final = false) {
const now = Date.now();
if (!final && (now - lastLog) < logEveryMs) return;
lastLog = now;
const done = assigned.size;
const total = slots.length;
const pct = total ? Math.floor((done / total) * 100) : 100;
const barLen = 22;
const filled = Math.min(barLen, Math.floor((pct / 100) * barLen));
const bar = `[${"#".repeat(filled)}${"-".repeat(barLen - filled)}]`;
const elapsed = ((now - t0) / 1000).toFixed(1);
const msg =
`${bar} ${done}/${total} slots | nodes=${nodes} | backtracks=${backtracks} | mrv=${lastMRV} | ${elapsed}s`;
process.stdout.write("\r" + msg.padEnd(120));
if (final) process.stdout.write("\n");
}
function chooseMRV() {
let best = null;
let bestInfo = null;
for (const s of slots) {
const k = slotKey(s);
if (assigned.has(k)) continue;
const entry = dictIndex.get(s.len);
if (!entry) return {slot: null, info: null};
const pat = patternForSlot(s);
const info = candidateInfoForPattern(entry, pat);
if (info.count === 0) return {slot: null, info: null};
if (
!best ||
info.count < bestInfo.count ||
(info.count === bestInfo.count && slotScore(s) > slotScore(best))
) {
best = s;
bestInfo = info;
if (info.count <= 1) break;
}
}
if (!best) return {slot: null, info: {done: true}};
return {slot: best, info: bestInfo};
}
const MAX_TRIES_PER_SLOT = 500;
function backtrack() {
nodes++;
if (timeLimitMs && (Date.now() - t0) > timeLimitMs) return false;
const pick = chooseMRV();
if (!pick.slot && pick.info && pick.info.done) return true;
if (!pick.slot) {
backtracks++;
return false;
}
lastMRV = pick.info.count;
renderProgress(false);
const s = pick.slot;
const k = slotKey(s);
const entry = dictIndex.get(s.len);
const pat = patternForSlot(s);
const tryWord = (w) => {
if (!w) return false;
if (used.has(w)) return false;
for (let i = 0; i < pat.length; i++) {
if (pat[i] && pat[i] !== w[i]) return false;
}
const undo = placeWord(s, w);
if (!undo) return false;
used.add(w);
assigned.set(k, w);
if (backtrack()) return true;
assigned.delete(k);
used.delete(w);
undoPlace(undo);
return false;
};
// constrained: iterate indices (bounded)
if (pick.info.indices && pick.info.indices.length) {
const idxs = pick.info.indices;
const L = idxs.length;
const tries = Math.min(MAX_TRIES_PER_SLOT, L);
// safe stepping even for L=1
const start = (L === 1) ? 0 : rng.int(0, L - 1);
const step = (L <= 1) ? 1 : rng.int(1, L - 1);
for (let t = 0; t < tries; t++) {
const idx = idxs[(start + t * step) % L];
const w = entry.words[idx];
if (tryWord(w)) return true;
}
backtracks++;
return false;
}
// unconstrained: sample without building arrays
const N = entry.words.length;
if (N === 0) {
backtracks++;
return false;
}
const tries = Math.min(MAX_TRIES_PER_SLOT, N);
const start = (N === 1) ? 0 : rng.int(0, N - 1);
const step = (N <= 1) ? 1 : rng.int(1, N - 1);
for (let t = 0; t < tries; t++) {
const idx = (start + t * step) % N;
const w = entry.words[idx];
if (tryWord(w)) return true;
}
backtracks++;
return false;
}
renderProgress(false);
const ok = backtrack();
renderProgress(true);
const clueMap = {};
for (const [k, v] of assigned.entries()) clueMap[k] = v;
return {ok, grid, clueMap, stats: {nodes, backtracks, seconds: (Date.now() - t0) / 1000}};
}
/** --- Top-level: try mask+fill until success --- */
function generatePuzzle(opts) {
const rng = makeRng(opts.seed);
console.time("LOAD_WORDS");
const dict = loadWords(opts.wordsPath);
console.timeEnd("LOAD_WORDS");
for (let attempt = 1; attempt <= opts.tries; attempt++) {
console.log(`\nAttempt ${attempt}/${opts.tries}`);
console.time("MASK");
const mask = generateMask(rng, dict.lenCounts, opts.pop, opts.gens);
console.timeEnd("MASK");
console.time("FILL");
const filled = fillMask(rng, mask, dict.index, {logEveryMs: 200, timeLimitMs: 30000});
console.timeEnd("FILL");
if (filled.ok) return {mask, filled};
}
return null;
}
module.exports = {parseArgs, generatePuzzle, gridToString};