Gather data

This commit is contained in:
mike
2026-01-04 01:04:56 +01:00
parent 29ed7fe254
commit 795067472f
11 changed files with 124 additions and 76 deletions

View File

@@ -0,0 +1,222 @@
package puzzle;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.*;
import static puzzle.ExportFormat.*;
public class ClueGenerator {
private static final String OLLAMA_URL = "http://localhost:11434/api/chat";
private static final String MODEL = "qwen2.5:14b";
private static final String HINTS_FILE = "/home/mike/dev/puzzle-generator/nl_score_hints.csv";
private static Map<String, String> prebuiltClues = null;
private static synchronized void ensurePrebuiltCluesLoaded() {
if (prebuiltClues != null) return;
prebuiltClues = new HashMap<>();
try {
var lines = Files.readAllLines(Path.of(HINTS_FILE), StandardCharsets.UTF_8);
for (var line : lines) {
var parts = line.split(",", 4);
if (parts.length >= 4) {
var word = parts[0].trim().toUpperCase(Locale.ROOT);
var rawClue = parts[3].trim();
if (rawClue.startsWith("\"") && rawClue.endsWith("\"")) {
rawClue = rawClue.substring(1, rawClue.length() - 1).replace("\"\"", "\"");
}
if (!word.isEmpty() && !rawClue.isEmpty()) {
prebuiltClues.put(word, rawClue);
}
}
}
} catch (IOException e) {
System.err.println("Warning: " + HINTS_FILE + " not found or could not be read.");
}
}
public static ExportedPuzzle applyClues(ExportedPuzzle puzzle) {
if (puzzle == null || puzzle.words().isEmpty()) {
return puzzle;
}
ensurePrebuiltCluesLoaded();
Map<String, String> finalClueMap = new HashMap<>();
List<String> wordsMissingClues = new ArrayList<>();
for (var w : puzzle.words()) {
var wordUpper = w.word().toUpperCase(Locale.ROOT);
if (prebuiltClues.containsKey(wordUpper)) {
finalClueMap.put(w.word(), prebuiltClues.get(wordUpper));
} else {
wordsMissingClues.add(w.word());
}
}
if (!wordsMissingClues.isEmpty()) {
var generatedClues = generateClues(wordsMissingClues);
finalClueMap.putAll(generatedClues);
}
List<WordOut> wordsWithClues = new ArrayList<>();
for (var w : puzzle.words()) {
var clue = finalClueMap.getOrDefault(w.word(), w.word());
wordsWithClues.add(new WordOut(
w.word(),
clue,
w.startRow(),
w.startCol(),
w.direction(),
w.answer(),
w.arrowRow(),
w.arrowCol(),
w.isReversed(),
w.complex()
));
}
return new ExportedPuzzle(puzzle.gridv2(), wordsWithClues, puzzle.difficulty(), puzzle.rewards());
}
public static Map<String, String> generateClues(List<String> words) {
if (words == null || words.isEmpty()) {
return Collections.emptyMap();
}
var prompt = createCluePrompt(words);
try {
var jsonRequest = String.format(
"{\"model\":\"%s\",\"messages\":[{\"role\":\"user\",\"content\":\"%s\"}],\"stream\":false,\"temperature\":0.7}",
MODEL, escapeJson(prompt)
);
var responseBody = curlPostJson(OLLAMA_URL, jsonRequest, 120);
var content = extractChatContent(responseBody);
if (content == null || content.isEmpty()) {
return Collections.emptyMap();
}
return parseCluesFromReply(words, content);
} catch (Exception e) {
System.err.println("Failed to generate clues: " + e.getMessage());
return Collections.emptyMap();
}
}
private static String createCluePrompt(List<String> words) {
return "Je bent een expert in het maken van kruiswoordpuzzels. Geef voor elk van de onderstaande woorden een korte, uitdagende maar duidelijke cryptische of beschrijvende aanwijzing in het Nederlands.\n\n" +
"Output ALLEEN in dit formaat:\n" +
"woord1:aanwijzing\n" +
"woord2:aanwijzing\n\n" +
"GEEN andere tekst of uitleg. Sla GEEN woorden over.\n\n" +
"Lijst:\n" +
String.join("\n", words);
}
private static Map<String, String> parseCluesFromReply(List<String> expectedWords, String reply) {
Map<String, String> wordClueMap = new HashMap<>();
var lines = reply.split("\n");
for (var line : lines) {
line = line.trim();
if (line.contains(":")) {
var parts = line.split(":", 2);
if (parts.length == 2) {
var wordPart = parts[0].trim().replaceAll("^[\\d+.)*\\-\\s]+", "").toLowerCase();
var clue = parts[1].trim();
if (!clue.isEmpty()) {
wordClueMap.put(wordPart, clue);
}
}
}
}
Map<String, String> results = new HashMap<>();
for (var word : expectedWords) {
var clue = wordClueMap.get(word.toLowerCase());
if (clue != null) {
results.put(word, clue);
}
}
return results;
}
private static String curlPostJson(String url, String jsonBody, int timeoutSeconds) throws Exception {
var tempFile = Files.createTempFile("clue-request-", ".json");
try {
Files.writeString(tempFile, jsonBody, StandardCharsets.UTF_8);
List<String> cmd = new ArrayList<>();
cmd.add("curl");
cmd.add("-fsSL");
cmd.add("--connect-timeout");
cmd.add("10");
cmd.add("--max-time");
cmd.add(String.valueOf(timeoutSeconds));
cmd.add("-H");
cmd.add("Content-Type: application/json");
cmd.add("-d");
cmd.add("@" + tempFile);
cmd.add(url);
var p = new ProcessBuilder(cmd)
.redirectErrorStream(true)
.start();
var bytes = p.getInputStream().readAllBytes();
var code = p.waitFor();
if (code != 0) {
throw new IOException("curl POST failed (" + code + ") url=" + url + "\nOutput:\n" +
new String(bytes, StandardCharsets.UTF_8));
}
return new String(bytes, StandardCharsets.UTF_8);
} finally {
Files.deleteIfExists(tempFile);
}
}
private static String extractChatContent(String json) {
if (json == null) return null;
var choices = json.indexOf("\"choices\"");
var p = (choices >= 0) ? choices : 0;
var i = json.indexOf("\"content\"", p);
if (i < 0) {
// Fallback for Ollama non-chat format if needed, but we used /api/chat
// Ollama /api/chat returns {"model":"...","message":{"role":"assistant","content":"..."}}
i = json.indexOf("\"content\"");
if (i < 0) return null;
}
var colon = json.indexOf(':', i);
if (colon < 0) return null;
var q = json.indexOf('"', colon + 1);
if (q < 0) return null;
var sb = new StringBuilder();
var esc = false;
for (var k = q + 1; k < json.length(); k++) {
var ch = json.charAt(k);
if (esc) {
if (ch == 'n') sb.append('\n');
else if (ch == 't') sb.append('\t');
else if (ch == 'r') sb.append('\r');
else sb.append(ch);
esc = false;
} else {
if (ch == '\\') esc = true;
else if (ch == '"') break;
else sb.append(ch);
}
}
return sb.toString();
}
private static String escapeJson(String str) {
return str.replace("\\", "\\\\")
.replace("\"", "\\\"")
.replace("\n", "\\n");
}
}

View File

@@ -0,0 +1,532 @@
package puzzle;
import java.nio.charset.StandardCharsets;
import java.nio.file.*;
import java.util.*;
import java.util.concurrent.*;
import java.io.*;
import java.time.*;
import java.util.concurrent.atomic.*;
/**
* CONCURRENT MULTI-ENDPOINT Dutch Wordlist Scorer
* Distributes batches across Ollama, LM-Studio, and a third endpoint simultaneously
*/
public class ConcurrentWordScorer {
// ===== CONFIGURATION =====
private static final String INPUT_WORDLIST = "word-list.txt";
private static final String OUTPUT_SCORES = "word_scores.csv";
private static final int BATCH_SIZE = 10; // Even smaller for the difficult remaining words
private static final int MAX_RETRIES = 3;
// Define all three endpoints
private static final LLMEndpoint[] ENDPOINTS = {
new OllamaEndpoint(),
new LMStudioEndpoint(),
new LMStudioEndpoint("LM-Studio", "http://192.168.1.74:1234/v1/chat/completions",
"mistralai/mistral-nemo-instruct-2407", 1)
// new CustomEndpoint()
};
// ===== ENDPOINT CLASSES =====
abstract static class LLMEndpoint {
String name;
String baseUrl;
String model;
Semaphore rateLimiter; // Per-endpoint rate limiting
int maxConcurrent;
LLMEndpoint(String name, String baseUrl, String model, int maxConcurrent) {
this.name = name;
this.baseUrl = baseUrl;
this.model = model;
this.maxConcurrent = maxConcurrent;
this.rateLimiter = new Semaphore(maxConcurrent);
}
abstract String buildRequestJson(String prompt);
abstract String extractResponseContent(String responseBody);
// Rate-limited request execution
List<WordScore> execute(List<String> batch) throws Exception {
rateLimiter.acquire(); // Wait for slot
try {
return executeInternal(batch);
} finally {
rateLimiter.release();
}
}
private List<WordScore> executeInternal(List<String> batch) throws Exception {
var prompt = createScoringPrompt(batch);
var jsonRequest = buildRequestJson(prompt);
var responseBody = curlPostJson(baseUrl, jsonRequest, 120);
var content = extractResponseContent(responseBody);
if (content == null || content.isEmpty()) {
throw new IOException("[" + name + "] Empty response content");
}
return parseScoresFromReply(batch, content, name);
}
}
static class OllamaEndpoint
extends LLMEndpoint {
OllamaEndpoint() {
super("Ollama", "http://localhost:11434/api/chat",
"qwen2.5:14b", 1); // 2 concurrent requests
}
@Override String buildRequestJson(String prompt) {
return String.format("{\"model\":\"%s\",\"messages\":[{\"role\":\"user\",\"content\":\"%s\"}],\"stream\":false,\"temperature\":0.1}",
model, escapeJson(prompt));
}
@Override String extractResponseContent(String responseBody) {
// Ollama uses "message" -> "content"
var start = responseBody.indexOf("\"content\":\"") + 11;
var end = responseBody.indexOf("\"", start);
if (start < 11 || end < 0) return "";
return responseBody.substring(start, end).replace("\\n", "\n");
}
}
static class LMStudioEndpoint
extends LLMEndpoint {
LMStudioEndpoint() {
super("LM-Studio", "http://192.168.1.159:1234/v1/chat/completions",
"mistralai/mistral-nemo-instruct-2407", 1); // LM-Studio can handle more
}
public LMStudioEndpoint(String s, String url, String s1, int i) {
super(
s, url, s1, i
);
}
@Override String buildRequestJson(String prompt) {
return String.format("{\"model\":\"%s\",\"messages\":[{\"role\":\"user\",\"content\":\"%s\"}],\"temperature\":0.1,\"max_tokens\":2048}",
model, escapeJson(prompt));
}
@Override String extractResponseContent(String responseBody) {
return extractChatContent(responseBody);
}
}
static class CustomEndpoint
extends LLMEndpoint {
CustomEndpoint() {
super("Custom", "http://192.168.1.74:1234/v1/chat/completions",
"qwen2.5-vl-7b-abliterated-caption-it_gguf", 2);
}
@Override String buildRequestJson(String prompt) {
// Adapt to your third endpoint's format
return new LMStudioEndpoint().buildRequestJson(prompt);
}
@Override String extractResponseContent(String responseBody) {
return new LMStudioEndpoint().extractResponseContent(responseBody);
}
}
// ===== MAIN COORDINATOR =====
static void main(String[] args) throws Exception {
System.out.println("=== CONCURRENT 3-Endpoint Scorer ===");
for (var ep : ENDPOINTS) {
System.out.printf("- %s: %s%n", ep.name, ep.baseUrl);
}
System.out.println();
cleanupOutputFile();
// Load work queue
var allWords = Files.readAllLines(Paths.get(INPUT_WORDLIST));
var scoredWords = loadAlreadyScoredWords();
var workQueue = createWorkQueue(allWords, scoredWords);
System.out.printf("Total words: %d | Already scored: %d | Remaining: %d%n%n",
allWords.size(), scoredWords.size(), workQueue.size());
if (workQueue.isEmpty()) {
System.out.println("All done!");
return;
}
// Start result writer thread
BlockingQueue<List<WordScore>> resultQueue = new LinkedBlockingQueue<>();
var writerThread = startResultWriter(resultQueue);
// Start worker threads
var totalThreads = 0;
for (var ep : ENDPOINTS) totalThreads += ep.maxConcurrent;
var executor = Executors.newFixedThreadPool(totalThreads);
var totalProcessed = new AtomicInteger(scoredWords.size());
for (var endpoint : ENDPOINTS) {
for (var i = 0; i < endpoint.maxConcurrent; i++) {
executor.submit(() -> {
processBatches(endpoint, workQueue, resultQueue, totalProcessed, allWords.size());
});
}
}
// Wait for completion
executor.shutdown();
executor.awaitTermination(Long.MAX_VALUE, TimeUnit.DAYS);
// Signal writer to stop
resultQueue.put(Collections.singletonList(new WordScore(null, 0, "STOP")));
writerThread.join();
// Update hints in the database
System.out.println("\n✓ All endpoints finished!");
}
// ===== WORKER THREAD LOGIC =====
private static void processBatches(LLMEndpoint endpoint,
BlockingQueue<WorkItem> workQueue,
BlockingQueue<List<WordScore>> resultQueue,
AtomicInteger totalProcessed,
int totalWords) {
System.out.printf("[%s] Worker started%n", endpoint.name);
while (!Thread.currentThread().isInterrupted()) {
try {
var work = workQueue.poll(1, TimeUnit.SECONDS);
if (work == null) {
if (workQueue.isEmpty()) break; // No more work in queue
continue;
}
var scores = processWithRetry(endpoint, work.batch);
// Add metadata
scores.forEach(s -> {
s.endpoint = endpoint.name;
s.batchId = work.batchId;
});
resultQueue.put(scores);
// Progress update
var processed = totalProcessed.addAndGet(scores.size());
if (processed % 100 < BATCH_SIZE) { // Reduce console spam
System.out.printf("Progress: %d/%d (%.1f%%)%n",
processed, totalWords, (processed * 100.0 / totalWords));
}
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
break;
} catch (Exception e) {
System.err.printf("[%s] Fatal error: %s%n", endpoint.name, e.getMessage());
break;
}
}
System.out.printf("[%s] Worker stopped%n", endpoint.name);
}
private static List<WordScore> processWithRetry(LLMEndpoint endpoint, List<String> batch) {
var retries = 0;
while (retries < MAX_RETRIES) {
try {
return endpoint.execute(batch);
} catch (Exception e) {
retries++;
System.err.printf("[%s] Attempt %d/%d failed: %s%n",
endpoint.name, retries, MAX_RETRIES, e.getMessage());
if (retries >= MAX_RETRIES) {
return createFailedScores(batch, endpoint.name);
}
try {
Thread.sleep(2000L * retries);
} catch (InterruptedException ie) {
Thread.currentThread().interrupt();
return createFailedScores(batch, endpoint.name);
}
}
}
return createFailedScores(batch, endpoint.name);
}
// ===== RESULT WRITER THREAD =====
private static Thread startResultWriter(BlockingQueue<List<WordScore>> resultQueue) throws Exception {
var writer = new BufferedWriter(new FileWriter(OUTPUT_SCORES, true));
var isNew = Files.size(Paths.get(OUTPUT_SCORES)) == 0;
if (isNew) {
writer.write("word,score,status,endpoint,batch_id,timestamp\n");
writer.flush();
}
var thread = new Thread(() -> {
try {
while (true) {
var scores = resultQueue.take();
// Stop signal
if (scores.size() == 1 && scores.get(0).status.equals("STOP")) {
break;
}
writeBatch(writer, scores);
}
writer.close();
} catch (Exception e) {
System.err.println("Writer thread error: " + e.getMessage());
}
});
thread.start();
return thread;
}
private static synchronized void writeBatch(BufferedWriter writer, List<WordScore> scores) throws Exception {
var timestamp = Instant.now().toString();
for (var ws : scores) {
writer.write(String.format("%s,%d,%s,%s,%d,%s\n",
ws.word, ws.score, ws.status, ws.endpoint, ws.batchId, timestamp));
}
writer.flush();
}
// ===== QUEUE & DATA STRUCTURES =====
record WorkItem(int batchId, List<String> batch) {
}
private static BlockingQueue<WorkItem> createWorkQueue(List<String> allWords, Set<String> scored) {
BlockingQueue<WorkItem> queue = new LinkedBlockingQueue<>();
var batchId = 0;
for (var i = 0; i < allWords.size(); i += BATCH_SIZE) {
List<String> batch = new ArrayList<>();
for (var j = i; j < Math.min(i + BATCH_SIZE, allWords.size()); j++) {
var word = allWords.get(j);
if (!scored.contains(word.toLowerCase())) {
batch.add(word);
}
}
if (!batch.isEmpty()) {
queue.add(new WorkItem(batchId++, batch));
}
}
return queue;
}
// ===== LOADING & PARSING =====
private static Set<String> loadAlreadyScoredWords() throws Exception {
Set<String> scored = new HashSet<>();
var file = new File(OUTPUT_SCORES);
if (!file.exists()) return scored;
var lines = Files.readAllLines(file.toPath());
var first = true;
for (var line : lines) {
if (first) {
first = false;
continue;
}
var parts = line.split(",");
if (parts.length >= 3) {
var word = parts[0].trim().toLowerCase();
var status = parts[2].trim();
if ("OK".equalsIgnoreCase(status)) {
scored.add(word);
}
}
}
return scored;
}
private static void cleanupOutputFile() throws IOException {
var path = Paths.get(OUTPUT_SCORES);
if (!Files.exists(path)) return;
System.out.println("Cleaning up " + OUTPUT_SCORES + "...");
var lines = Files.readAllLines(path);
if (lines.isEmpty()) return;
var header = lines.get(0);
Map<String, String> latestOkEntries = new LinkedHashMap<>();
for (int i = 1; i < lines.size(); i++) {
var line = lines.get(i);
var parts = line.split(",");
if (parts.length >= 3) {
var word = parts[0].trim().toLowerCase();
var status = parts[2].trim();
if ("OK".equalsIgnoreCase(status)) {
latestOkEntries.put(word, line);
}
}
}
var cleanedLines = new ArrayList<String>();
cleanedLines.add(header);
cleanedLines.addAll(latestOkEntries.values());
Files.write(path, cleanedLines, StandardCharsets.UTF_8);
System.out.printf("Cleanup complete. Kept %d unique OK entries. Removed %d non-OK or duplicate entries.%n",
latestOkEntries.size(), lines.size() - cleanedLines.size());
}
private static List<WordScore> createFailedScores(List<String> words, String endpoint) {
List<WordScore> failed = new ArrayList<>();
for (var word : words) {
failed.add(new WordScore(word, -1, "FAILED", endpoint, -1));
}
return failed;
}
// Parsing logic
private static List<WordScore> parseScoresFromReply(List<String> expectedWords, String reply, String endpointName) {
Map<String, Integer> wordScoreMap = new HashMap<>();
var lines = reply.split("\n");
for (var line : lines) {
line = line.trim();
// Handle formats like "1. word:score", "word: score", "word - score"
String sep = null;
if (line.contains(":")) sep = ":";
else if (line.contains("-")) sep = "-";
if (sep != null) {
var parts = line.split(sep, 2);
if (parts.length == 2) {
var wordPart = parts[0].trim();
// Remove leading numbering like "1. " or bullets like "* ", "- "
wordPart = wordPart.replaceAll("^[\\d+.)*\\-\\s]+", "");
var word = wordPart.toLowerCase();
try {
var scoreStr = parts[1].trim();
// Handle potential non-numeric junk after the number
scoreStr = scoreStr.replaceAll("[^0-9].*", "");
if (!scoreStr.isEmpty()) {
var score = Integer.parseInt(scoreStr);
wordScoreMap.put(word, Math.max(1, Math.min(10, score)));
}
} catch (NumberFormatException e) {
// Skip invalid lines
}
}
}
}
// Match scores to original words (maintaining order)
List<WordScore> results = new ArrayList<>();
for (var word : expectedWords) {
var score = wordScoreMap.get(word.toLowerCase());
if (score != null) {
results.add(new WordScore(word, score, "OK"));
} else {
results.add(new WordScore(word, -1, "MISSING"));
}
}
return results;
}
// Prompt creation
private static String createScoringPrompt(List<String> words) {
return "Je bent een Nederlandse taalexpert. Geef elk van de " + words.size() + " onderstaande woorden een populariteitsscore van 1 (zeer zeldzaam) tot 10 (zeer algemeen).\n\n" +
"Output ALLEEN in dit formaat:\n" +
"woord1:score\n" +
"woord2:score\n\n" +
"GEEN andere tekst of uitleg. Sla GEEN woorden over.\n\n" +
"Lijst:\n" +
String.join("\n", words);
}
// Utility methods
private static String escapeJson(String str) {
return str.replace("\\", "\\\\")
.replace("\"", "\\\"")
.replace("\n", "\\n");
}
private static String curlPostJson(String url, String jsonBody, int timeoutSeconds) throws Exception {
// Write JSON body to temp file to avoid shell escaping issues
var tempFile = Files.createTempFile("lm-request-", ".json");
try {
Files.writeString(tempFile, jsonBody, StandardCharsets.UTF_8);
List<String> cmd = new ArrayList<>();
cmd.add("curl");
cmd.add("-fsSL");
cmd.add("--connect-timeout");
cmd.add("10");
cmd.add("--max-time");
cmd.add(String.valueOf(timeoutSeconds));
cmd.add("-H");
cmd.add("Content-Type: application/json");
cmd.add("-d");
cmd.add("@" + tempFile);
cmd.add(url);
var p = new ProcessBuilder(cmd)
.redirectErrorStream(true)
.start();
var bytes = p.getInputStream().readAllBytes();
var code = p.waitFor();
if (code != 0) {
throw new IOException("curl POST failed (" + code + ") url=" + url + "\nOutput:\n" +
new String(bytes, StandardCharsets.UTF_8));
}
return new String(bytes, StandardCharsets.UTF_8);
} finally {
Files.deleteIfExists(tempFile);
}
}
private static String extractChatContent(String json) {
if (json == null) return null;
var choices = json.indexOf("\"choices\"");
var p = (choices >= 0) ? choices : 0;
var i = json.indexOf("\"content\"", p);
if (i < 0) return null;
var colon = json.indexOf(':', i);
if (colon < 0) return null;
var q = json.indexOf('"', colon + 1);
if (q < 0) return null;
var sb = new StringBuilder();
var esc = false;
for (var k = q + 1; k < json.length(); k++) {
var ch = json.charAt(k);
if (esc) {
if (ch == 'n') sb.append('\n');
else if (ch == 't') sb.append('\t');
else if (ch == 'r') sb.append('\r');
else sb.append(ch);
esc = false;
} else {
if (ch == '\\') esc = true;
else if (ch == '"') break;
else sb.append(ch);
}
}
return sb.toString();
}
}

View File

@@ -0,0 +1,205 @@
package puzzle;
import java.util.*;
import static puzzle.SwedishGenerator.*;
/**
* ExportFormat.java
*
* Direct port of export_format.js:
* - scans filled grid for clue digits '1'..'4'
* - extracts placed words in canonical direction (horizontal=right, vertical=down)
* - crops to bounding box (words + arrow cells) with 1-cell margin
* - outputs gridv2 + words[] (+ difficulty, rewards)
*/
public final class ExportFormat {
private ExportFormat() { }
private static boolean isLetter(char ch) { return ch >= 'A' && ch <= 'Z'; }
private static boolean inBounds(int H, int W, int r, int c) {
return r >= 0 && r < H && c >= 0 && c < W;
}
// ---------- Public API ----------
public static ExportedPuzzle exportFormatFromFilled(PuzzleResult puz, int difficulty, Rewards rewards) {
Objects.requireNonNull(puz, "puz");
var g = puz.filled().grid;
var H = g.length;
var W = g[0].length;
// 1) extract "placed" list from all clue digits in the filled grid
List<Placed> placed = new ArrayList<>();
var allSlots = extractSlots(g);
var clueMap = puz.filled().clueMap;
for (var s : allSlots) {
var word = clueMap.get(s.key());
if (word == null) continue;
var p = extractPlacedFromSlot(s, word);
if (p == null) continue;
placed.add(p);
}
// If nothing placed: return full grid mapped to letters/# only
if (placed.isEmpty()) {
List<String> gridv2 = new ArrayList<>(H);
for (var chars : g) {
var sb = new StringBuilder(W);
for (var c = 0; c < W; c++) {
var ch = chars[c];
sb.append(isLetter(ch) ? ch : '#');
}
gridv2.add(sb.toString());
}
return new ExportedPuzzle(gridv2, List.of(), difficulty, rewards);
}
// 2) bounding box around all word cells + arrow cells, with 1-cell margin
List<int[]> allCells = new ArrayList<>();
for (var p : placed) {
allCells.addAll(p.cells);
allCells.add(p.arrow);
}
int minR = Integer.MAX_VALUE, minC = Integer.MAX_VALUE;
int maxR = Integer.MIN_VALUE, maxC = Integer.MIN_VALUE;
for (var rc : allCells) {
int rr = rc[0], cc = rc[1];
minR = Math.min(minR, rr);
minC = Math.min(minC, cc);
maxR = Math.max(maxR, rr);
maxC = Math.max(maxC, cc);
}
// 3) map of only used letter cells (everything else becomes '#')
Map<Long, Character> letterAt = new HashMap<>();
for (var p : placed) {
for (var rc : p.cells) {
int rr = rc[0], cc = rc[1];
if (inBounds(H, W, rr, cc) && isLetter(g[rr][cc])) {
letterAt.put(pack(rr, cc), g[rr][cc]);
}
}
}
// 4) render gridv2 over cropped bounds (out-of-bounds become '#')
List<String> gridv2 = new ArrayList<>(Math.max(0, maxR - minR + 1));
for (var r = minR; r <= maxR; r++) {
var row = new StringBuilder(Math.max(0, maxC - minC + 1));
for (var c = minC; c <= maxC; c++) {
var ch = letterAt.get(pack(r, c));
row.append(ch != null ? ch : '#');
}
gridv2.add(row.toString());
}
// 5) words output with cropped coordinates
List<WordOut> wordsOut = new ArrayList<>(placed.size());
for (var p : placed) {
wordsOut.add(new WordOut(
p.word,
p.clue, // placeholder = word (same as JS)
p.startRow - minR,
p.startCol - minC,
p.direction,
p.word, // answer
p.arrowRow - minR,
p.arrowCol - minC,
p.isReversed,
puz.dict().words().get(p.word).cross()
));
}
return new ExportedPuzzle(gridv2, wordsOut, difficulty, rewards);
}
static final String HORIZONTAL = "h", VERTICAL = "v";
/**
* Convert a generator Slot + assigned word into a Placed object for export.
*/
private static Placed extractPlacedFromSlot(Slot s, String word) {
int r = s.clueR();
int c = s.clueC();
char d = s.dir();
List<int[]> cells = new ArrayList<>();
for (int i = 0; i < s.len(); i++) {
cells.add(new int[]{ s.rs()[i], s.cs()[i] });
}
// Canonicalize: always output right/down
int startRow, startCol, arrowRow, arrowCol;
String direction;
boolean isReversed = false;
if (d == '2') { // right -> horizontal
direction = HORIZONTAL;
startRow = cells.get(0)[0];
startCol = cells.get(0)[1];
arrowRow = r;
arrowCol = c;
} else if (d == '3' || d == '5') { // down or down-bent -> vertical
direction = VERTICAL;
startRow = cells.get(0)[0];
startCol = cells.get(0)[1];
arrowRow = r;
arrowCol = c;
} else if (d == '4') { // left -> horizontal (REVERSED)
direction = HORIZONTAL;
isReversed = true;
startRow = cells.get(0)[0];
startCol = cells.get(0)[1];
arrowRow = r;
arrowCol = c;
} else if (d == '1') { // up -> vertical (REVERSED)
direction = VERTICAL;
isReversed = true;
startRow = cells.get(0)[0];
startCol = cells.get(0)[1];
arrowRow = r;
arrowCol = c;
} else {
return null;
}
return new Placed(
word,
word, // clue placeholder
startRow,
startCol,
direction,
word, // answer
arrowRow,
arrowCol,
cells,
new int[]{ arrowRow, arrowCol },
isReversed
);
}
// pack (r,c) into one long key (handles negatives too)
private static long pack(int r, int c) {
return (((long) r) << 32) ^ (c & 0xFFFFFFFFL);
}
// ---------- Data models ----------
/**
* @param direction "horizontal" | "vertical"
* @param cells word cells
* @param arrow [arrowRow, arrowCol] */
private record Placed(String word, String clue, int startRow, int startCol, String direction, String answer, int arrowRow, int arrowCol, List<int[]> cells, int[] arrow,
boolean isReversed) { }
public record Rewards(int coins, int stars, int hints) { }
/**
* @param direction "horizontal" | "vertical" */
public record WordOut(String word, String clue, int startRow, int startCol, String direction, String answer, int arrowRow, int arrowCol, boolean isReversed, int complex) { }
public record ExportedPuzzle(List<String> gridv2, List<WordOut> words, int difficulty, Rewards rewards) { }
}

View File

@@ -0,0 +1,92 @@
package puzzle;
import java.sql.*;
import java.util.function.ToIntFunction;
public final class HintScores {
public static void main(String[] args) throws Exception {
Class.forName("org.sqlite.JDBC");
try (Connection conn = DriverManager.getConnection("jdbc:sqlite:/home/mike/dev/puzzle-generator/tools/hint/hint.sqlite")) {
updateCrossScores(conn, HintScores::exampleScore, 1000);
}
}
/**
* Updates hints.cross_score by computing a score from hints.word.
*
* @param conn open JDBC connection (PostgreSQL)
* @param scoreFn callback: scoreFn.applyAsInt(word)
* @param batchSize e.g. 1000
*/
public static void updateCrossScores(
Connection conn,
ToIntFunction<String> scoreFn,
int batchSize
) throws SQLException {
// Use a transaction for speed + consistency
final boolean prevAutoCommit = conn.getAutoCommit();
conn.setAutoCommit(false);
// Server-side cursor behavior in pgjdbc requires autoCommit=false + fetchSize>0
final String selectSql =
"SELECT id, puzzle_norm " +
"FROM hints " +
"WHERE puzzle_norm IS NOT NULL"; // optionally add: " AND cross_score IS NULL"
final String updateSql =
"UPDATE hints SET cross_score = ? WHERE id = ?";
try (PreparedStatement psSel = conn.prepareStatement(selectSql);
PreparedStatement psUpd = conn.prepareStatement(updateSql)) {
psSel.setFetchSize(batchSize);
int pending = 0;
try (ResultSet rs = psSel.executeQuery()) {
while (rs.next()) {
long id = rs.getLong("id");
String word = rs.getString("puzzle_norm");
int score;
try {
score = scoreFn.applyAsInt(word);
} catch (RuntimeException ex) {
// If scoring fails, decide your policy: skip or set 0.
// Here: skip row.
continue;
}
psUpd.setInt(1, score);
psUpd.setLong(2, id);
psUpd.addBatch();
pending++;
if (pending >= batchSize) {
psUpd.executeBatch();
conn.commit();
pending = 0;
}
}
}
if (pending > 0) {
psUpd.executeBatch();
conn.commit();
}
} catch (SQLException e) {
conn.rollback();
throw e;
} finally {
conn.setAutoCommit(prevAutoCommit);
}
}
// Example scoring callback
public static int exampleScore(String word) {
return ThemePoolBuilderLength.crossabilityScore(word);
}
}

View File

@@ -0,0 +1,453 @@
package puzzle;
import puzzle.SwedishGenerator.PuzzleResult;
import puzzle.SwedishGenerator.Rng;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.nio.file.*;
import java.time.OffsetDateTime;
import java.time.ZoneOffset;
import java.time.format.DateTimeFormatter;
import java.util.*;
import java.util.concurrent.*;
import static puzzle.SwedishGenerator.fillMask;
import static puzzle.SwedishGenerator.generateMask;
import static puzzle.SwedishGenerator.loadWords;
public class Main {
final static String OUT_DIR = envOrDefault("OUT_DIR", "/data/puzzle");
final static Path PUZZLE_DIR = Paths.get(OUT_DIR, "puzzles");
static final Path INDEX_FILE = PUZZLE_DIR.resolve("index.json");
static final OffsetDateTime now = OffsetDateTime.now(ZoneOffset.UTC);
static final String CREATED_AT = now.format(DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ss'Z'"));
static final String FILE_ID = CREATED_AT.replace(":", "-") + "_" + (System.currentTimeMillis() / 1000);
static final String FILE_NAME = FILE_ID + ".json";
static final Path OUTPUT_PATH = PUZZLE_DIR.resolve(FILE_NAME);
static final String DATE_STRING = now.toLocalDate().toString();
public static class Opts {
public int seed = (int) (System.nanoTime() ^ System.currentTimeMillis());
public int pop = 18;
public int gens = 500;
public String wordsPath = "nl_score_hints.csv";
public double minSimplicity = 0; // 0 means no limit
public int threads = Math.max(1, Runtime.getRuntime().availableProcessors());
public int tries = threads;
public boolean reindex = false;
}
public void main(String[] args) {
var opts = parseArgs(args);
if (opts.reindex) {
section("Reindex");
info("OutputDir : " + OUT_DIR);
rebuildIndex();
return;
}
section("Puzzle Generator");
info("OutputDir : " + OUT_DIR);
info("WordsFile : " + opts.wordsPath);
section("Settings");
printSettings(opts);
var res = generatePuzzle(opts);
if (res == null) {
err("Search status : UNSOLVED");
err("Reason : No solution found within tries.");
System.exit(1);
return;
}
section("Result");
info(String.format(Locale.ROOT, "simplicity : %.2f", res.filled().simplicity));
section("Mask");
System.out.print(indentLines(SwedishGenerator.gridToString(res.mask()), " "));
section("Grid (raw)");
System.out.print(indentLines(SwedishGenerator.gridToString(res.filled().grid), " "));
section("Grid (human)");
System.out.print(indentLines(SwedishGenerator.renderHuman(res.filled().grid), " "));
var exported = ExportFormat.exportFormatFromFilled(res, 1, new ExportFormat.Rewards(50, 2, 1));
section("Clues");
info("status : generating...");
info("generatedFor : " + exported.words().size());
exported = ClueGenerator.applyClues(exported);
info("status : done");
section("Words");
printWordsTable(exported.words());
section("Gridv2");
for (var row : exported.gridv2()) System.out.println(" " + row);
// Export to JSON file
var theme = "algemeen";
section("Export");
info("file : " + OUTPUT_PATH);
try {
Files.createDirectories(PUZZLE_DIR);
var json = toJson(exported, DATE_STRING, theme);
Files.writeString(OUTPUT_PATH, json, StandardCharsets.UTF_8);
// Update index.json
var pathInIndex = "/puzzles/" + FILE_NAME;
var indexRecord = toIndexRecordJson(FILE_ID, pathInIndex, DATE_STRING, theme, exported.difficulty(), CREATED_AT);
if (1 != 1) updateIndex(PUZZLE_DIR.toString(), indexRecord);
else rebuildIndex();
info("indexUpdated : " + INDEX_FILE);
} catch (IOException e) {
err("Failed to write: " + FILE_NAME);
err("Reason : " + e.getMessage());
System.exit(2);
}
}
// ---------------- Output helpers ----------------
private static void info(String msg) { System.out.println("[INFO ] " + msg); }
private static void warn(String msg) { System.out.println("[WARN ] " + msg); }
private static void err(String msg) { System.err.println("[ERROR] " + msg); }
private static void section(String title) {
System.out.println();
System.out.println(title);
}
private static String envOrDefault(String key, String def) {
var v = System.getenv(key);
return (v == null || v.isBlank()) ? def : v;
}
private static void printSettings(Opts o) {
System.out.printf(Locale.ROOT, " %-14s: %d%n", "seed", o.seed);
System.out.printf(Locale.ROOT, " %-14s: %d%n", "population", o.pop);
System.out.printf(Locale.ROOT, " %-14s: %d%n", "generations", o.gens);
System.out.printf(Locale.ROOT, " %-14s: %s%n", "wordsPath", o.wordsPath);
System.out.printf(Locale.ROOT, " %-14s: %.2f%n", "minSimplicity", o.minSimplicity);
System.out.printf(Locale.ROOT, " %-14s: %d%n", "threads", o.threads);
System.out.printf(Locale.ROOT, " %-14s: %d%n", "maxTries", o.tries);
}
private static String fmtPoint(int r, int c) { return String.format(Locale.ROOT, "(%d,%d)", r, c); }
private static void printWordsTable(List<ExportFormat.WordOut> words) {
System.out.println(" # WORD CX DIR START ARROW CLUE");
var i = 1;
for (var w : words) {
System.out.printf(
Locale.ROOT,
" %-2d %-12s %-3s %-3s %-9s %-9s %s%n",
i++,
safe(w.word(), 12),
safe("" + w.complex(), 3),
safe(w.direction(), 3),
fmtPoint(w.startRow(), w.startCol()),
fmtPoint(w.arrowRow(), w.arrowCol()),
w.clue() == null ? "" : w.clue()
);
}
}
private static String safe(String s, int max) {
if (s == null) return "";
if (s.length() <= max) return s;
return s.substring(0, Math.max(0, max - 1)) + "";
}
private static String indentLines(String s, String indent) {
if (s == null || s.isEmpty()) return "";
var lines = s.split("\\R", -1);
var sb = new StringBuilder();
for (var line : lines) sb.append(indent).append(line).append('\n');
return sb.toString();
}
static void usage() {
System.out.println("""
Usage:
java puzzle.Main [--seed N] [--pop N] [--gens N] [--tries N] [--words FILE] [--min-simplicity N.N] [--threads N] [--reindex]
Defaults:
--pop 18
--gens 600
--tries = threads
--words nl_score_hints.csv
--min-simplicity 0 (no limit)
--threads %d
""".formatted(Math.max(1, Runtime.getRuntime().availableProcessors())));
}
static Opts parseArgs(String[] argv) {
var out = new Opts();
for (var i = 0; i < argv.length; i++) {
var a = argv[i];
var v = (i + 1 < argv.length) ? argv[i + 1] : null;
if (a.equals("--help") || a.equals("-h")) {
usage();
System.exit(0);
}
if (a.equals("--seed")) {
out.seed = Integer.parseInt(v);
i++;
} else if (a.equals("--pop")) {
out.pop = Integer.parseInt(v);
i++;
} else if (a.equals("--gens")) {
out.gens = Integer.parseInt(v);
i++;
} else if (a.equals("--tries")) {
out.tries = Integer.parseInt(v);
i++;
} else if (a.equals("--words")) {
out.wordsPath = v;
i++;
} else if (a.equals("--min-simplicity")) {
out.minSimplicity = Double.parseDouble(v);
i++;
} else if (a.equals("--threads")) {
out.threads = Integer.parseInt(v);
i++;
} else if (a.equals("--reindex")) {
out.reindex = true;
} else {
throw new IllegalArgumentException("Unknown arg: " + a);
}
}
return out;
}
// ---------------- Generation ----------------
// Package-private method for testing
PuzzleResult generatePuzzle(Opts opts) {
var tLoad0 = System.nanoTime();
var dict = loadWords(opts.wordsPath);
var tLoad1 = System.nanoTime();
section("Load");
info(String.format(Locale.ROOT, "words : %,d", dict.words().size()));
info(String.format(Locale.ROOT, "loadTime : %.3f s", (tLoad1 - tLoad0) / 1e9));
section("Search");
if (opts.threads > 1) {
info("mode : multi-threaded (" + opts.threads + ")");
var executor = Executors.newFixedThreadPool(opts.threads);
try {
var tasks = new ArrayList<Callable<PuzzleResult>>();
for (var i = 1; i <= opts.tries; i++) {
final var attempt = i;
tasks.add(() -> {
var threadRng = new Rng(opts.seed + attempt);
var mask = generateMask(threadRng, dict.lenCounts(), opts.pop, opts.gens, false);
var filled = fillMask(threadRng, mask, dict.index(), dict.words(), 200, 30000, false);
if (filled.ok && (opts.minSimplicity <= 0 || filled.simplicity >= opts.minSimplicity)) {
info("status : SOLVED");
info("foundAtTry : " + attempt);
return new PuzzleResult(dict, mask, filled);
}
throw new RuntimeException("No solution in try " + attempt);
});
}
return executor.invokeAny(tasks);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
warn("status : INTERRUPTED");
} catch (ExecutionException e) {
// all failed
warn("status : UNSOLVED");
} finally {
executor.shutdownNow();
}
return null;
} else {
info("mode : single-threaded");
var rng = new Rng(opts.seed);
for (var attempt = 1; attempt <= opts.tries; attempt++) {
info("try : " + attempt + "/" + opts.tries);
var mask = generateMask(rng, dict.lenCounts(), opts.pop, opts.gens, true);
var filled = fillMask(rng, mask, dict.index(), dict.words(), 200, 30000, true);
if (filled.ok && (opts.minSimplicity <= 0 || filled.simplicity >= opts.minSimplicity)) {
info("status : SOLVED");
info("foundAtTry : " + attempt);
return new PuzzleResult(dict, mask, filled);
}
if (filled.ok) {
warn(String.format(Locale.ROOT,
"simplicity : %.2f (below min %.2f)",
filled.simplicity, opts.minSimplicity
));
}
}
info("status : UNSOLVED");
return null;
}
}
// ---------------- Export (unchanged logic) ----------------
private static String toJson(ExportFormat.ExportedPuzzle puzzle, String date, String theme) {
var sb = new StringBuilder();
sb.append("{\n");
sb.append(" \"date\": \"").append(escapeJson(date)).append("\",\n");
sb.append(" \"theme\": \"").append(escapeJson(theme)).append("\",\n");
sb.append(" \"difficulty\": ").append(puzzle.difficulty()).append(",\n");
sb.append(" \"rewards\": {\n");
sb.append(" \"coins\": ").append(puzzle.rewards().coins()).append(",\n");
sb.append(" \"stars\": ").append(puzzle.rewards().stars()).append(",\n");
sb.append(" \"hints\": ").append(puzzle.rewards().hints()).append("\n");
sb.append(" },\n");
sb.append(" \"gridv2\": [\n");
for (var i = 0; i < puzzle.gridv2().size(); i++) {
sb.append(" \"").append(escapeJson(puzzle.gridv2().get(i))).append("\"");
if (i < puzzle.gridv2().size() - 1) sb.append(",");
sb.append("\n");
}
sb.append(" ],\n");
sb.append(" \"words\": [\n");
for (var i = 0; i < puzzle.words().size(); i++) {
var w = puzzle.words().get(i);
sb.append(" {\n");
sb.append(" \"word\": \"").append(escapeJson(w.word())).append("\",\n");
sb.append(" \"clue\": \"").append(escapeJson(w.clue())).append("\",\n");
sb.append(" \"startRow\": ").append(w.startRow()).append(",\n");
sb.append(" \"startCol\": ").append(w.startCol()).append(",\n");
sb.append(" \"direction\": \"").append(escapeJson(w.direction())).append("\",\n");
sb.append(" \"answer\": \"").append(escapeJson(w.answer())).append("\",\n");
sb.append(" \"arrowRow\": ").append(w.arrowRow()).append(",\n");
sb.append(" \"arrowCol\": ").append(w.arrowCol()).append(",\n");
sb.append(" \"isReversed\": ").append(w.isReversed()).append("\n");
sb.append(" }");
if (i < puzzle.words().size() - 1) sb.append(",");
sb.append("\n");
}
sb.append(" ]\n");
sb.append("}\n");
return sb.toString();
}
private static String escapeJson(String s) {
return s.replace("\\", "\\\\")
.replace("\"", "\\\"")
.replace("\n", "\\n")
.replace("\r", "\\r")
.replace("\t", "\\t");
}
private static String toIndexRecordJson(String id, String path, String date, String theme, int difficulty, String createdAt) {
return String.format(
Locale.ROOT,
"{\"id\":\"%s\",\"path\":\"%s\",\"date\":\"%s\",\"theme\":\"%s\",\"difficulty\":%d,\"createdAt\":\"%s\"}",
escapeJson(id), escapeJson(path), escapeJson(date), escapeJson(theme), difficulty, escapeJson(createdAt)
);
}
private static void updateIndex(String outDir, String newRecordJson) {
var indexPath = Paths.get(outDir, "index.json");
try {
var content = Files.exists(indexPath) ? Files.readString(indexPath, StandardCharsets.UTF_8).trim() : "";
if (content.isEmpty() || content.equals("[]")) {
content = "[\n " + newRecordJson + "\n]";
} else {
var firstBracket = content.indexOf('[');
if (firstBracket != -1) {
content = content.substring(0, firstBracket + 1) + "\n " + newRecordJson + "," + content.substring(firstBracket + 1);
} else {
content = "[\n " + newRecordJson + "\n]";
}
}
Files.writeString(indexPath, content, StandardCharsets.UTF_8);
info("indexUpdated : " + indexPath);
} catch (IOException e) {
err("Failed to update index.json: " + e.getMessage());
}
}
private void rebuildIndex() {
if (!Files.exists(PUZZLE_DIR)) {
err("Puzzles directory does not exist: " + PUZZLE_DIR);
return;
}
info("Rebuilding index from: " + PUZZLE_DIR);
List<String> records = new ArrayList<>();
try (var stream = Files.list(PUZZLE_DIR)) {
stream.filter(p -> p.toString().endsWith(".json") && !p.getFileName().toString().equals("index.json"))
.sorted(Comparator.comparing(Path::getFileName).reversed())
.forEach(path -> {
try {
var filename = path.getFileName().toString();
var id = filename.substring(0, filename.length() - 5);
var content = Files.readString(path, StandardCharsets.UTF_8);
var date = extractValue(content, "date");
var theme = extractValue(content, "theme");
var difficulty = 1;
try { difficulty = Integer.parseInt(extractValue(content, "difficulty")); } catch (Exception ignored) { }
var createdAt = id;
if (id.length() >= 20 && id.charAt(10) == 'T') {
var parts = id.split("_");
var dtPart = parts[0]; // 2025-12-24T04-25-06Z
if (dtPart.length() >= 19) {
createdAt = dtPart.substring(0, 13) + ":" + dtPart.substring(14, 16) + ":" + dtPart.substring(17);
}
}
var pathInIndex = "/puzzles/" + filename;
records.add(toIndexRecordJson(id, pathInIndex, date, theme, difficulty, createdAt));
} catch (IOException e) {
err("Failed to read " + path + ": " + e.getMessage());
}
});
} catch (IOException e) {
err("Failed to list puzzles: " + e.getMessage());
return;
}
var indexPath = PUZZLE_DIR.resolve("index.json");
var content = "[\n " + String.join(",\n ", records) + "\n]";
try {
Files.writeString(indexPath, content, StandardCharsets.UTF_8);
info("Successfully rebuilt index.json with " + records.size() + " records.");
} catch (IOException e) {
err("Failed to write index.json: " + e.getMessage());
}
}
private static String extractValue(String json, String key) {
var pattern = java.util.regex.Pattern.compile("\"" + key + "\":\\s*\"?([^\",\\n\\r}]*)\"?");
var matcher = pattern.matcher(json);
if (matcher.find()) return matcher.group(1).trim();
return "";
}
}

View File

@@ -0,0 +1,32 @@
package puzzle;
//import org.junit.jupiter.api.Test;
//import static org.junit.jupiter.api.Assertions.*;
public class MainTest {
static void main() {
new MainTest().testGeneratePuzzle();
}
// @Test
public void testGeneratePuzzle() {
// Arrange
var opts = new Main.Opts();
opts.seed = 1234;
opts.pop = 18;
opts.gens = 300;
opts.wordsPath = "src/test/resources/puzzle/pool.txt";
opts.minSimplicity = 0;
opts.threads = 1;
opts.tries = 1;
// Act
var result = new Main().generatePuzzle(opts);
// Assert
/* assertNotNull(result);
assertNotNull(result.mask());
assertNotNull(result.filled());
assertTrue(result.filled().ok);*/
}
}

View File

@@ -0,0 +1,931 @@
package puzzle;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.*;
import java.util.concurrent.*;
import java.util.stream.Collectors;
/**
* SwedishGenerator.java
*
* Usage:
* javac SwedishGenerator.java
* java SwedishGenerator [--seed N] [--pop N] [--gens N] [--tries N] [--words word-list.txt]
*/
@SuppressWarnings("ALL")
public class SwedishGenerator {
static final int W = 9, H = 8,
CLUE_SIZE = 4,
SIMPLICITY_DEFAULT_SCORE = 2;
static final int MIN_LEN = 2, MAX_LEN = 8;
// Directions for '1'..'6'
static final int[][] OFFSETS = new int[7][2];
static final int[][] STEPS = new int[7][2];
static {
// 1: up
OFFSETS[1] = new int[]{ -1, 0 };
STEPS[1] = new int[]{ -1, 0 };
// 2: right
OFFSETS[2] = new int[]{ 0, 1 };
STEPS[2] = new int[]{ 0, 1 };
// 3: down
OFFSETS[3] = new int[]{ 1, 0 };
STEPS[3] = new int[]{ 1, 0 };
// 4: left
OFFSETS[4] = new int[]{ 0, -1 };
STEPS[4] = new int[]{ 0, -1 };
// 5: vertical down, clue is on the right of the first letter
OFFSETS[5] = new int[]{ 0, -1 };
STEPS[5] = new int[]{ 1, 0 };
// 6: vertical down, clue is on the left of the first letter
OFFSETS[6] = new int[]{ 0, 1 };
STEPS[6] = new int[]{ 1, 0 };
}
static final char FIRST_ABC = 'A';
static final char LAST_ABC = 'Z';
static final char FIRST_ARROW = '1', LAST_ARROW = '6', HOR_ARROW_1 = '2', HOR_ARROW_2 = '4';
static boolean isDigit(char ch) { return ch >= FIRST_ARROW && ch <= LAST_ARROW; }
static boolean isLetter(char ch) { return ch >= FIRST_ABC && ch <= LAST_ABC; }
static boolean isLetterCell(char ch) { return ch == '#' || isLetter(ch); }
// ---------------- RNG (xorshift32) ----------------
static final class Rng {
private int x;
Rng(int seed) {
var s = seed;
if (s == 0) s = 1;
this.x = s;
}
int nextU32() {
var y = x;
y ^= (y << 13);
y ^= (y >>> 17);
y ^= (y << 5);
x = y;
return y;
}
int randint(int min, int max) { // inclusive
var u = (nextU32() & 0xFFFFFFFFL);
var range = (long) max - (long) min + 1L;
return (int) (min + (u % range));
}
double nextFloat() { return (nextU32() & 0xFFFFFFFFL) / 4294967295.0; }
}
static int clamp(int x, int a, int b) { return Math.max(a, Math.min(b, x)); }
// ---------------- Grid helpers ----------------
static char[][] makeEmptyGrid() {
var g = new char[H][W];
for (var r = 0; r < H; r++) Arrays.fill(g[r], '#');
return g;
}
static char[][] deepCopyGrid(char[][] g) {
var out = new char[H][W];
for (var r = 0; r < H; r++) out[r] = Arrays.copyOf(g[r], W);
return out;
}
static String gridToString(char[][] g) {
var sb = new StringBuilder();
for (var r = 0; r < H; r++) {
if (r > 0) sb.append('\n');
sb.append(g[r]);
}
return sb.toString();
}
static String renderHuman(char[][] g) {
var sb = new StringBuilder();
for (var r = 0; r < H; r++) {
if (r > 0) sb.append('\n');
for (var c = 0; c < W; c++) {
var ch = g[r][c];
sb.append(isDigit(ch) ? ' ' : ch);
}
}
return sb.toString();
}
// ---------------- Words / index ----------------
static final class IntList {
int[] a = new int[8];
int n = 0;
void add(int v) {
if (n >= a.length) a = Arrays.copyOf(a, a.length * 2);
a[n++] = v;
}
void replaceAll(int[] newData) {
this.a = newData;
this.n = newData.length;
}
int size() { return n; }
int[] data() { return a; } // note: may have extra capacity
}
static final class DictEntry {
final ArrayList<String> words = new ArrayList<>();
final IntList[][] pos; // pos[i][letter] -> indices (sorted by insertion)
DictEntry(int L) {
pos = new IntList[L][26];
for (var i = 0; i < L; i++) {
for (var j = 0; j < 26; j++) pos[i][j] = new IntList();
}
}
}
static record WordDifficulty(String word, int difficulty, int simpel, int score, int cross) {
public WordDifficulty(String word, int simpel, int score) {
var difficulty1 = 0 + ((8 - word.length()) * 30) + ((10 - score) * 15);
var crossScore = ThemePoolBuilderLength.crossabilityScore(word);
this(word, difficulty1, simpel, score, (crossScore * 7) + ((score) * 30) + ((word.length()) * 15));
// Prioritize simple words (high lScore) and long words.
// lScore (1-10) adds up to 1000 points (weight 100).
// Length (2-8) adds up to 160 points (weight 20).
// We want LONGER and SIMPLER words to be tried earlier (lower difficulty value).
// word.length() is 2 to 8.
// score is 1 to 10.
// Base difficulty starts high and decreases with length and score.
// Length impact: up to 8 * 10 = 80
// Score impact: up to 10 * 15 = 150
}
}
public static record Dict(Map<String, WordDifficulty> words,
HashMap<Integer, DictEntry> index,
HashMap<Integer, Integer> lenCounts) { }
static Dict loadWords(String wordsPath) {
String raw;
try {
raw = Files.readString(Path.of(wordsPath), StandardCharsets.UTF_8);
} catch (IOException e) {
raw = "WOORD,level_1_to_10,hint\nEU,2,hint\nUUR,2,hint\nAUTO,2,hint\nBOOM,2,hint\nHUIS,2,hint\nKAT,2,hint\nZEE,2,hint\nRODE,2,hint\nDRAAD,2,hint\nKENNIS,2,hint\nNETWERK,2,hint\nPAKTE,2,hint\n";
}
var map = new HashMap<String, WordDifficulty>();
boolean first = true;
for (var line : raw.split("\\R")) {
if (line.isBlank()) continue;
var parts = line.split(",", 4);
var word = parts[0].trim();
if (first && word.equalsIgnoreCase("WOORD")) {
first = false;
continue;
}
first = false;
var s = word.toUpperCase(Locale.ROOT);
if (s.matches("^[A-Z]{2,8}$")) {
int score = SIMPLICITY_DEFAULT_SCORE;
int simpel = 0;
// CSV has level 1-10. llmScores use 10-level.
score = 10 - Integer.parseInt(parts[1].trim());
simpel = Integer.parseInt(parts[2].trim());
if (score >= 1)
map.put(s, new WordDifficulty(s, simpel, score));
}
}
var words = map.values().stream().collect(Collectors.toCollection(ArrayList::new));
// Sort words by difficulty in ascending order
words.sort(Comparator.comparingInt(wd -> wd.simpel));
var index = new HashMap<Integer, DictEntry>();
var lenCounts = new HashMap<Integer, Integer>();
for (var w : words) {
var L = w.word.length();
lenCounts.put(L, lenCounts.getOrDefault(L, 0) + 1);
var entry = index.get(L);
if (entry == null) {
entry = new DictEntry(L);
index.put(L, entry);
}
var idx = entry.words.size();
entry.words.add(w.word);
for (var i = 0; i < L; i++) {
var letter = w.word.charAt(i) - 'A';
if (letter >= 0 && letter < 26) entry.pos[i][letter].add(idx);
}
}
return new Dict(map, index, lenCounts);
}
static int[] intersectSorted(int[] a, int aLen, int[] b, int bLen) {
var out = new int[Math.min(aLen, bLen)];
int i = 0, j = 0, k = 0;
while (i < aLen && j < bLen) {
int x = a[i], y = b[j];
if (x == y) {
out[k++] = x;
i++;
j++;
} else if (x < y) i++;
else j++;
}
return Arrays.copyOf(out, k);
}
static final record CandidateInfo(int[] indices, int count) {
}
static CandidateInfo candidateInfoForPattern(DictEntry entry, char[] pattern /* 0 means null */) {
var lists = new ArrayList<IntList>();
for (var i = 0; i < pattern.length; i++) {
var ch = pattern[i];
if (ch != 0 && isLetter(ch)) {
lists.add(entry.pos[i][ch - 'A']);
}
}
if (lists.isEmpty()) {
return new CandidateInfo(null, entry.words.size());
}
var first = lists.get(0);
var cur = Arrays.copyOf(first.data(), first.size());
var curLen = cur.length;
for (var k = 1; k < lists.size(); k++) {
var nxt = lists.get(k);
var nextArr = nxt.data();
var nextLen = nxt.size();
cur = intersectSorted(cur, curLen, nextArr, nextLen);
curLen = cur.length;
if (curLen == 0) break;
}
return new CandidateInfo(cur, curLen);
}
// ---------------- Slots ----------------
static record Slot(int clueR, int clueC, char dir, int[] rs, int[] cs, int len) {
public Slot(int clueR, int clueC, char dir, int[] rs, int[] cs) {
this(clueR, clueC, dir, rs, cs, rs.length);
}
String key() { return clueR + "," + clueC + ":" + dir; }
}
static ArrayList<Slot> extractSlots(char[][] grid) {
var slots = new ArrayList<Slot>();
for (var r = 0; r < H; r++) {
for (var c = 0; c < W; c++) {
var d = grid[r][c];
if (!isDigit(d)) continue;
var dir = d - '0';
// Check all possible directions for clue placement
// for (int dir = 1; dir <= 4; dir++) {
int or = OFFSETS[dir][0], oc = OFFSETS[dir][1];
int dr = STEPS[dir][0], dc = STEPS[dir][1];
int rr = r + or, cc = c + oc;
if (rr < 0 || rr >= H || cc < 0 || cc >= W) continue;
if (!isLetterCell(grid[rr][cc])) continue;
var rs = new int[MAX_LEN + 1];
var cs = new int[MAX_LEN + 1];
var n = 0;
while (rr >= 0 && rr < H && cc >= 0 && cc < W) {
var ch = grid[rr][cc];
if (!isLetterCell(ch)) break;
rs[n] = rr;
cs[n] = cc;
n++;
rr += dr;
cc += dc;
if (n > MAX_LEN) break;
}
slots.add(new Slot(r, c, d, Arrays.copyOf(rs, n), Arrays.copyOf(cs, n)));
// }
}
}
return slots;
}
static boolean hasRoomForClue(char[][] grid, int r, int c, char d) {
var di = d - '0';
int or = OFFSETS[di][0], oc = OFFSETS[di][1];
int dr = STEPS[di][0], dc = STEPS[di][1];
int rr = r + or, cc = c + oc;
var run = 0;
while (rr >= 0 && rr < H && cc >= 0 && cc < W && isLetterCell(grid[rr][cc]) && run < MAX_LEN) {
run++;
rr += dr;
cc += dc;
}
return run >= MIN_LEN;
}
// ---------------- FAST mask fitness ----------------
static long maskFitness(char[][] grid, HashMap<Integer, Integer> lenCounts) {
long penalty = 0;
var clueCount = 0;
for (var r = 0; r < H; r++) for (var c = 0; c < W; c++) if (isDigit(grid[r][c])) clueCount++;
var targetClues = (int) Math.round(W * H * 0.25); // ~18
penalty += 8L * Math.abs(clueCount - targetClues);
var slots = extractSlots(grid);
if (slots.isEmpty()) return 1_000_000_000L;
var covH = new int[H][W];
var covV = new int[H][W];
for (var s : slots) {
var horiz = (s.dir == HOR_ARROW_1 || s.dir == HOR_ARROW_2);
if (s.len < MIN_LEN) penalty += 8000;
if (s.len > MAX_LEN) penalty += 8000 + (long) (s.len - MAX_LEN) * 500L;
if (s.len >= MIN_LEN && s.len <= MAX_LEN) {
if (!lenCounts.containsKey(s.len)) penalty += 12000;
}
for (var i = 0; i < s.len; i++) {
int r = s.rs[i], c = s.cs[i];
if (horiz) covH[r][c] += 1;
else covV[r][c] += 1;
}
}
for (var r = 0; r < H; r++)
for (var c = 0; c < W; c++) {
if (!isLetterCell(grid[r][c])) continue;
int h = covH[r][c], v = covV[r][c];
if (h == 0 && v == 0) penalty += 1500;
else if (h > 0 && v > 0) { /* ok */ } else if (h + v == 1) penalty += 200;
else penalty += 600;
}
// clue clustering (8-connected)
var seen = new boolean[H][W];
var stack = new int[W * H];
int sp;
var nbrs8 = new int[][]{
{ -1, -1 }, { -1, 0 }, { -1, 1 },
{ 0, -1 }, { 0, 1 },
{ 1, -1 }, { 1, 0 }, { 1, 1 }
};
for (var r = 0; r < H; r++)
for (var c = 0; c < W; c++) {
if (!isDigit(grid[r][c]) || seen[r][c]) continue;
sp = 0;
stack[sp++] = r * W + c;
seen[r][c] = true;
var size = 0;
while (sp > 0) {
var p = stack[--sp];
int x = p / W, y = p % W;
size++;
for (var d : nbrs8) {
int nx = x + d[0], ny = y + d[1];
if (nx < 0 || nx >= H || ny < 0 || ny >= W) continue;
if (seen[nx][ny]) continue;
if (!isDigit(grid[nx][ny])) continue;
seen[nx][ny] = true;
stack[sp++] = nx * W + ny;
}
}
if (size >= 2) penalty += (long) (size - 1) * 120L;
}
// dead-end-ish letter cell (3+ walls)
var nbrs4 = new int[][]{ { -1, 0 }, { 1, 0 }, { 0, -1 }, { 0, 1 } };
for (var r = 0; r < H; r++)
for (var c = 0; c < W; c++) {
if (!isLetterCell(grid[r][c])) continue;
var walls = 0;
for (var d : nbrs4) {
int rr = r + d[0], cc = c + d[1];
if (rr < 0 || rr >= H || cc < 0 || cc >= W) {
walls++;
continue;
}
if (!isLetterCell(grid[rr][cc])) walls++;
}
if (walls >= 3) penalty += 400;
}
return penalty;
}
// ---------------- Mask generation ----------------
static char[][] randomMask(Rng rng) {
var g = makeEmptyGrid();
var targetClues = (int) Math.round(W * H * 0.25);
int placed = 0, guard = 0;
while (placed < targetClues && guard++ < 4000) {
var r = rng.randint(0, H - 1);
var c = rng.randint(0, W - 1);
if (isDigit(g[r][c])) continue;
var d = (char) ('0' + rng.randint(1, c == 0 ? CLUE_SIZE : 4));
g[r][c] = d;
if (!hasRoomForClue(g, r, c, d)) {
g[r][c] = '#';
continue;
}
placed++;
}
return g;
}
static char[][] mutate(Rng rng, char[][] grid) {
var g = deepCopyGrid(grid);
var cx = rng.randint(0, H - 1);
var cy = rng.randint(0, W - 1);
var steps = 4;
for (var k = 0; k < steps; k++) {
var rr = clamp(cx + (rng.randint(-2, 2) + rng.randint(-2, 2)), 0, H - 1);
var cc = clamp(cy + (rng.randint(-2, 2) + rng.randint(-2, 2)), 0, W - 1);
var cur = g[rr][cc];
if (isDigit(cur)) {
g[rr][cc] = '#';
} else {
var d = (char) ('0' + rng.randint(1, cc == 0 ? CLUE_SIZE : 4));
g[rr][cc] = d;
if (!hasRoomForClue(g, rr, cc, d)) g[rr][cc] = '#';
}
}
return g;
}
static char[][] crossover(Rng rng, char[][] a, char[][] b) {
var out = makeEmptyGrid();
var cx = (H - 1) / 2.0;
var cy = (W - 1) / 2.0;
var theta = rng.nextFloat() * Math.PI;
var nx = Math.cos(theta);
var ny = Math.sin(theta);
for (var r = 0; r < H; r++)
for (var c = 0; c < W; c++) {
double x = r - cx, y = c - cy;
var side = x * nx + y * ny;
out[r][c] = (side >= 0) ? a[r][c] : b[r][c];
}
for (var r = 0; r < H; r++)
for (var c = 0; c < W; c++) {
var ch = out[r][c];
if (isDigit(ch) && !hasRoomForClue(out, r, c, ch)) out[r][c] = '#';
}
return out;
}
static char[][] hillclimb(Rng rng, char[][] start, HashMap<Integer, Integer> lenCounts, int limit) {
var best = deepCopyGrid(start);
var bestF = maskFitness(best, lenCounts);
var fails = 0;
while (fails < limit) {
var cand = mutate(rng, best);
var f = maskFitness(cand, lenCounts);
if (f < bestF) {
best = cand;
bestF = f;
fails = 0;
} else {
fails++;
}
}
return best;
}
static double similarity(char[][] a, char[][] b) {
var same = 0;
for (var r = 0; r < H; r++) for (var c = 0; c < W; c++) if (a[r][c] == b[r][c]) same++;
return same / (double) (W * H);
}
static char[][] generateMask(Rng rng, HashMap<Integer, Integer> lenCounts, int popSize, int gens, boolean verbose) {
if (verbose) System.out.println("generateMask init pop: " + popSize);
var pop = new ArrayList<char[][]>();
for (var i = 0; i < popSize; i++) {
var g = randomMask(rng);
pop.add(hillclimb(rng, g, lenCounts, 180));
}
for (var gen = 0; gen < gens; gen++) {
if (Thread.currentThread().isInterrupted()) break;
var children = new ArrayList<char[][]>();
var pairs = Math.max(popSize, (int) Math.floor(popSize * 1.5));
for (var k = 0; k < pairs; k++) {
var p1 = pop.get(rng.randint(0, pop.size() - 1));
var p2 = pop.get(rng.randint(0, pop.size() - 1));
var child = crossover(rng, p1, p2);
children.add(hillclimb(rng, child, lenCounts, 70));
}
pop.addAll(children);
pop.sort(Comparator.comparingLong(g -> maskFitness(g, lenCounts)));
var next = new ArrayList<char[][]>();
for (var cand : pop) {
if (next.size() >= popSize) break;
var ok = true;
for (var kept : next) {
if (similarity(cand, kept) > 0.92) {
ok = false;
break;
}
}
if (ok) next.add(cand);
}
pop = next;
if (verbose && gen % 10 == 0) {
var bestF = maskFitness(pop.get(0), lenCounts);
System.out.println(" gen " + gen + "/" + gens + " bestFitness=" + bestF);
}
}
pop.sort(Comparator.comparingLong(g -> maskFitness(g, lenCounts)));
return pop.get(0);
}
// ---------------- Fill (CSP) ----------------
public static final class FillStats {
public long nodes;
public long backtracks;
public double seconds;
public int lastMRV;
}
public static final class FillResult {
public boolean ok;
public char[][] grid;
public HashMap<String, String> clueMap;
public FillStats stats;
public double simplicity;
}
record Undo(int[] rs, int[] cs, char[] prev, int n) {
}
static char[] patternForSlot(char[][] grid, Slot s) {
var pat = new char[s.len];
for (var i = 0; i < s.len; i++) {
var ch = grid[s.rs[i]][s.cs[i]];
pat[i] = isLetter(ch) ? ch : 0;
}
return pat;
}
static int slotScore(int[][] cellCount, Slot s) {
var cross = 0;
for (var i = 0; i < s.len; i++) cross += (cellCount[s.rs[i]][s.cs[i]] - 1);
return cross * 10 + s.len;
}
static Undo placeWord(char[][] grid, Slot s, String w) {
var urs = new int[s.len];
var ucs = new int[s.len];
var up = new char[s.len];
var n = 0;
for (var i = 0; i < s.len; i++) {
int r = s.rs[i], c = s.cs[i];
var prev = grid[r][c];
var ch = w.charAt(i);
if (prev == '#') {
urs[n] = r;
ucs[n] = c;
up[n] = prev;
n++;
grid[r][c] = ch;
} else if (prev != ch) {
// rollback immediate changes
for (var j = 0; j < n; j++) grid[urs[j]][ucs[j]] = up[j];
return null;
}
}
return new Undo(urs, ucs, up, n);
}
static void undoPlace(char[][] grid, Undo u) {
for (var i = 0; i < u.n; i++) grid[u.rs[i]][u.cs[i]] = u.prev[i];
}
static FillResult fillMask(Rng rng, char[][] mask, HashMap<Integer, DictEntry> dictIndex,
Map<String, WordDifficulty> llmScores,
int logEveryMs, int timeLimitMs, boolean verbose) {
var grid = deepCopyGrid(mask);
var allSlots = extractSlots(grid);
var slots = new ArrayList<Slot>();
for (var s : allSlots) if (s.len >= MIN_LEN && s.len <= MAX_LEN) slots.add(s);
var used = new HashSet<String>();
var assigned = new HashMap<String, String>();
var cellCount = new int[H][W];
for (var s : slots) for (var i = 0; i < s.len; i++) cellCount[s.rs[i]][s.cs[i]]++;
var t0 = System.currentTimeMillis();
final var lastLog = new java.util.concurrent.atomic.AtomicLong(t0);
var stats = new FillStats();
final var TOTAL = slots.size();
final var BAR_LEN = 22;
Runnable renderProgress = () -> {
if (!verbose) return;
var now = System.currentTimeMillis();
if ((now - lastLog.get()) < logEveryMs) return;
lastLog.set(now);
var done = assigned.size();
var pct = (TOTAL == 0) ? 100 : (int) Math.floor((done / (double) TOTAL) * 100);
var filled = Math.min(BAR_LEN, (int) Math.floor((pct / 100.0) * BAR_LEN));
var bar = "[" + "#".repeat(filled) + "-".repeat(BAR_LEN - filled) + "]";
var elapsed = String.format(Locale.ROOT, "%.1fs", (now - t0) / 1000.0);
var msg = String.format(
Locale.ROOT,
"%s %d/%d slots | nodes=%d | backtracks=%d | mrv=%d | %s",
bar, done, TOTAL, stats.nodes, stats.backtracks, stats.lastMRV, elapsed
);
System.out.print("\r" + padRight(msg, 120));
System.out.flush();
};
class Pick {
Slot slot;
CandidateInfo info;
boolean done;
}
java.util.function.Supplier<Pick> chooseMRV = () -> {
Slot best = null;
CandidateInfo bestInfo = null;
for (var s : slots) {
var k = s.key();
if (assigned.containsKey(k)) continue;
var entry = dictIndex.get(s.len);
if (entry == null) {
var p = new Pick();
p.slot = null;
p.info = null;
p.done = false;
return p;
}
var pat = patternForSlot(grid, s);
var info = candidateInfoForPattern(entry, pat);
if (info.count == 0) {
var p = new Pick();
p.slot = null;
p.info = null;
p.done = false;
return p;
}
if (best == null
|| info.count < bestInfo.count
|| (info.count == bestInfo.count && slotScore(cellCount, s) > slotScore(cellCount, best))) {
best = s;
bestInfo = info;
if (info.count <= 1) break;
}
}
var p = new Pick();
if (best == null) {
p.slot = null;
p.info = null;
p.done = true;
} else {
p.slot = best;
p.info = bestInfo;
p.done = false;
}
return p;
};
final var MAX_TRIES_PER_SLOT = 2000;
class Solver {
boolean backtrack() {
if (Thread.currentThread().isInterrupted()) return false;
stats.nodes++;
if (timeLimitMs > 0 && (System.currentTimeMillis() - t0) > timeLimitMs) return false;
var pick = chooseMRV.get();
if (pick.done) return true;
if (pick.slot == null) {
stats.backtracks++;
return false;
}
stats.lastMRV = pick.info.count;
renderProgress.run();
var s = pick.slot;
var k = s.key();
var entry = dictIndex.get(s.len);
var pat = patternForSlot(grid, s);
java.util.function.Function<String, Boolean> tryWord = (String w) -> {
if (w == null) return false;
if (used.contains(w)) return false;
for (var i = 0; i < pat.length; i++) {
if (pat[i] != 0 && pat[i] != w.charAt(i)) return false;
}
var undo = placeWord(grid, s, w);
if (undo == null) return false;
used.add(w);
assigned.put(k, w);
if (backtrack()) return true;
assigned.remove(k);
used.remove(w);
undoPlace(grid, undo);
return false;
};
if (pick.info.indices != null && pick.info.indices.length > 0) {
var idxs = pick.info.indices;
var L = idxs.length;
var tries = Math.min(MAX_TRIES_PER_SLOT, L);
// When picking words from sorted indices, we want to favor the beginning
// (lower difficulty) but still have some randomness.
for (var t = 0; t < tries; t++) {
// Bias strongly towards lower indices (simpler words) using r^3
double r = rng.nextFloat();
int idxInArray = (int) (r * r * r * L);
var idx = idxs[idxInArray];
var w = entry.words.get(idx);
if (tryWord.apply(w)) return true;
}
stats.backtracks++;
return false;
}
var N = entry.words.size();
if (N == 0) {
stats.backtracks++;
return false;
}
var tries = Math.min(MAX_TRIES_PER_SLOT, N);
for (var t = 0; t < tries; t++) {
double r = rng.nextFloat();
int idxInArray = (int) (r * r * r * N);
var w = entry.words.get(idxInArray);
if (tryWord.apply(w)) return true;
}
stats.backtracks++;
return false;
}
}
// initial render (same feel)
renderProgress.run();
var ok = new Solver().backtrack();
// final progress line
System.out.print("\r" + padRight("", 120) + "\r");
System.out.flush();
var res = new FillResult();
res.ok = ok;
res.grid = grid;
res.clueMap = assigned;
stats.seconds = (System.currentTimeMillis() - t0) / 1000.0;
res.stats = stats;
if (ok) {
double totalSimplicity = 0;
for (var w : assigned.values()) {
totalSimplicity += llmScores.get(w).difficulty;
}
res.simplicity = assigned.isEmpty() ? 0 : totalSimplicity / assigned.size();
}
// print a final progress line
if (verbose) {
System.out.println(
String.format(Locale.ROOT,
"[######################] %d/%d slots | nodes=%d | backtracks=%d | mrv=%d | %.1fs",
assigned.size(), TOTAL, stats.nodes, stats.backtracks, stats.lastMRV, stats.seconds
)
);
}
return res;
}
static String padRight(String s, int n) {
if (s.length() >= n) return s;
return s + " ".repeat(n - s.length());
}
// ---------------- Top-level generatePuzzle ----------------
public record PuzzleResult(Dict dict, char[][] mask, FillResult filled) { }
public static PuzzleResult generatePuzzle(Main.Opts opts) {
var tLoad0 = System.nanoTime();
var dict = loadWords(opts.wordsPath);
var tLoad1 = System.nanoTime();
System.out.printf(Locale.ROOT, "LOAD_WORDS: %.3fs%n %s words%n", (tLoad1 - tLoad0) / 1e9, dict.words.size());
if (opts.threads > 1) {
System.out.println("Running in multi-threaded mode with " + opts.threads + " threads...");
var executor = Executors.newFixedThreadPool(opts.threads);
try {
var tasks = new ArrayList<Callable<PuzzleResult>>();
for (int i = 1; i <= opts.tries; i++) {
final int attempt = i;
tasks.add(() -> {
var threadRng = new Rng(opts.seed + attempt);
var mask = generateMask(threadRng, dict.lenCounts, opts.pop, opts.gens, false);
var filled = fillMask(threadRng, mask, dict.index, dict.words, 200, 60000, false);
if (filled.ok && (opts.minSimplicity <= 0 || filled.simplicity >= opts.minSimplicity)) {
System.out.println("\nSolution found on attempt " + attempt);
return new PuzzleResult(dict, mask, filled);
}
throw new RuntimeException("No solution found in attempt " + attempt);
});
}
return executor.invokeAny(tasks);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
} catch (ExecutionException e) {
// all failed
} finally {
executor.shutdownNow();
}
return null;
} else {
var rng = new Rng(opts.seed);
for (var attempt = 1; attempt <= opts.tries; attempt++) {
System.out.println("\nAttempt " + attempt + "/" + opts.tries);
var tMask0 = System.nanoTime();
var mask = generateMask(rng, dict.lenCounts, opts.pop, opts.gens, true);
var tMask1 = System.nanoTime();
System.out.printf(Locale.ROOT, "MASK: %.3fs%n", (tMask1 - tMask0) / 1e9);
var tFill0 = System.nanoTime();
var filled = fillMask(rng, mask, dict.index, dict.words, 200, 60000, true);
var tFill1 = System.nanoTime();
System.out.printf(Locale.ROOT, "FILL: %.3fms | Simplicity: %.2f%n", (tFill1 - tFill0) / 1e6, filled.simplicity);
if (filled.ok && (opts.minSimplicity <= 0 || filled.simplicity >= opts.minSimplicity)) {
return new PuzzleResult(dict, mask, filled);
}
if (filled.ok) {
System.out.printf(Locale.ROOT, "Puzzle simplicity %.2f is below min %.2f, retrying...%n",
filled.simplicity, opts.minSimplicity);
}
}
}
return null;
}
}

View File

@@ -0,0 +1,26 @@
package puzzle;
import puzzle.ThemePoolBuilderLength.Lexicon;
import java.nio.file.*;
import java.util.*;
public class TestSort {
public static void main(String[] args) throws Exception {
Lexicon lex = new Lexicon(
Arrays.asList("A", "B", "C"),
new HashMap<>(),
new int[]{10, 30, 20},
new BitSet[9]
);
BitSet bs = new BitSet();
bs.set(0); bs.set(1); bs.set(2);
Path p = Paths.get("test_pool.txt");
ThemePoolBuilderLength.writeWordList(p, lex, bs);
List<String> lines = Files.readAllLines(p);
System.out.println("Sorted words: " + lines);
if (lines.get(0).equals("B") && lines.get(1).equals("C") && lines.get(2).equals("A")) {
System.out.println("SUCCESS");
} else {
System.out.println("FAILURE");
System.exit(1);
}
}
}

View File

@@ -0,0 +1,903 @@
package puzzle;
import org.w3c.dom.*;
import javax.xml.parsers.DocumentBuilderFactory;
import java.io.*;
import java.nio.charset.StandardCharsets;
import java.nio.file.*;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.SQLException;
import java.text.Normalizer;
import java.time.LocalDate;
import java.util.*;
public class ThemePoolBuilderLength {
private static final List<String> DEFAULT_FEEDS = List.of(
"https://feeds.nos.nl/nosnieuwsalgemeen",
"https://feeds.nos.nl/nosnieuwstech");
static final String url = "jdbc:postgresql://192.168.1.159:5432/postgres";
static final String user = "puzzle";
static final String pass = "heel-goed-wachtwoord";
// NOTE: normalizeDutchToken strips non A-Z. Keep entries 2-8 after normalization.
private static final List<String> DEFAULT_SHORTS = List.of(
"EU", "VS", "UK", "NAVO", "NOS", "NS", "ANP", "VN", "NPO", "RTL",
"UUR", "MIN", "TV", "GPS", "AI", "IT", "CPU", "GPU",
"ING", "KPN", "KVK", "RIVM", "GGD", "AIVD", "MIVD", "CEO", "CFO", "HR",
"NL", "BE", "BRU", "EUR", "EURO", "WET", "ART", "BTW", "DI", "MA",
"PVV", "VVD", "CDA", "FNV",
"EN", "IN", "OP", "OM", "TE", "ER", "DE", "HET", "EEN", "VAN", "MET", "NOG", "OOK", "MAAR", "WEL", "NIET",
"HOE", "ALS",
"ZO", "DO", "WO", "VR", "MO", "WA", "WE", "TAAL",
"LAND", "GEMEENTE", "STAAT", "BUREAU", "HUIS", "SCHOOL", "STR", "BAAN",
"WERK", "KLUS",
"FONDS", "RAAD", "CONGRESS", "GROEP", "STRAAT", "BRUG", "PARK",
"BUURT",
"BOUW", "HOTEL", "CAFE", "BAR",
"BIJBAAN", "STUDENT", "DOCENT",
"WINKEL", "MARKT", "KIOSK", "AUTO", "MOBILE", "FIETS", "SCOOTER",
// afkortingen
"DHR", "MEVR", "DR", "ST", "CA", "IVM", "MBT", "TAV", "TOV", "DWZ", "MAW", "OA", "TM",
"ANWB", "BRP", "CBS",
"AL", "NU", "TO", "NA", "BIJ", "TOT", "DAN", "WAT", "DAT",
"IK", "JE", "WE", "WIJ", "JIJ", "ZIJ", "HIJ", "HEN", "ONS", "JOU",
// romeinse cijfers (2-8)
"II", "III", "IV", "VI", "VII", "VIII", "IX",
"XI", "XII", "XIII", "XIV", "XV", "XVI", "XVII", "XVIII", "XIX", "XX"
);
private static final String BROWSER_UA =
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/143.0.0.0 Safari/537.36";
static int MIN_SIMPLICITY = 520,
MAX_WORD_LENGTH = 7;
static final class Opts {
String endpoint = "https://jarvis-lan.appmodel.nl/api/ollama/";
List<String> feeds = new ArrayList<>(DEFAULT_FEEDS);
String outDir = System.getenv("OUT_DIR") != null ? System.getenv("OUT_DIR") : "/data/puzzle";
int bridgeN = 30000;
int themeN = 800;
int relatedN = 2200;
int rssItemsPerFeed = 10;
String model = "/models/Hadiseh-Mhd/Mixtral-8x7B-Instruct-v0.1-Q4_K_M-GGUF/mixtral-8x7b-instruct-v0.1.Q4_K_M.gguf";
int timeoutSeconds = 180;
int retries = 2;
int minLen2 = 1000;
int minLen3 = 1000;
int minLen4 = 1000;
int minLen5 = 1000; // set if you also want to force 5-letter words, etc.
int minLen6 = 1000;
int minLen7 = 1000;
int minLen8 = MAX_WORD_LENGTH >= 8 ? 1000 : 0;
}
public static void main(String[] args) throws Exception {
var o = parseArgs(args);
var outDir = Path.of(o.outDir);
Files.createDirectories(outDir);
System.out.println("Loading lexicon...");
Lexicon lex;
Class.forName("org.postgresql.Driver");
try (var c = DriverManager.getConnection(url, user, pass);) {
lex = loadLexicon(c);
}
System.out.println("Master words (2-" + MAX_WORD_LENGTH + ", A-Z): " + lex.words.size());
// RSS via curl (browser-like)
var all = new ArrayList<RssItem>();
for (var feed : o.feeds) {
var f = feed.trim();
if (f.isEmpty()) continue;
System.out.println("Fetching RSS: " + f);
all.addAll(fetchRssViaCurlBrowser(f, o.rssItemsPerFeed, o.timeoutSeconds));
}
var rssText = new StringBuilder();
var k = 0;
for (var it : all) {
k++;
rssText.append(k).append(". ").append(it.title).append("\n");
if (!it.desc.isBlank()) rssText.append(" ").append(it.desc).append("\n");
}
Files.writeString(outDir.resolve("rss.txt"), rssText.toString(), StandardCharsets.UTF_8);
// LM Studio via curl
var modelId = o.model;
if (modelId == null) {
var modelsUrl = apiUrl(o.endpoint, "/models");
System.out.println("Ollama GET: " + modelsUrl);
var modelsJson = curlGetJson(o, modelsUrl);
modelId = pickModelId(modelsJson);
if (modelId == null) {
throw new IOException("Could not auto-pick model id from /v1/models. Use --model <id>.\n--- /models ---\n" + modelsJson);
}
}
System.out.println("Using model: " + modelId);
System.out.println("Generating theme words via LM Studio...");
var llmWords = List.<String>of();//llmThemeWords(o, modelId, rssText.toString());
var themeKept = new LinkedHashSet<String>();
for (var wRaw : llmWords) {
var w = normalizeDutchToken(wRaw);
if (w == null) continue;
if (lex.idOf.containsKey(w)) themeKept.add(w);
}
Files.write(outDir.resolve("theme.txt"), themeKept, StandardCharsets.UTF_8);
// BitSets
var themeBs = bitmapFromWords(lex, themeKept);
var bridgeBs = buildBridgeBitmap(lex, o.bridgeN);
var shortBs = bitmapFromWords(lex, DEFAULT_SHORTS);
var pool = new BitSet(lex.words.size());
pool.or(themeBs);
pool.or(bridgeBs);
pool.or(shortBs);
// ---- NEW: enforce minimum counts per length ----
enforceMinima(o, lex, pool);
// Report
var themeCounts = countsPerLen(lex, themeBs);
var poolCounts = countsPerLen(lex, pool);
var report = """
Date: %s
Feeds: %s
Model: %s
Master size: %d
Theme kept (in master): %d
Bridge size: %d
Shorts kept: %d
Pool total: %d
Enforced minima:
2: %d
3: %d
4: %d
5: %d
6: %d
7: %d
8: %d
Counts per length (theme):
%s
Counts per length (pool):
%s
""".formatted(
LocalDate.now(),
String.join(", ", o.feeds),
modelId,
lex.words.size(),
themeBs.cardinality(),
bridgeBs.cardinality(),
shortBs.cardinality(),
pool.cardinality(),
o.minLen2, o.minLen3, o.minLen4, o.minLen5, o.minLen6, o.minLen7, o.minLen8,
mapToLines(themeCounts),
mapToLines(poolCounts)
);
Files.writeString(outDir.resolve("report.txt"), report, StandardCharsets.UTF_8);
System.out.println(report);
// Output pool list
var poolFile = outDir.resolve("pool.txt");
writeWordList(poolFile, lex, pool);
System.out.println("Wrote: " + poolFile.toAbsolutePath());
}
static Opts parseArgs(String[] args) {
var o = new Opts();
for (var i = 0; i < args.length; i++) {
var a = args[i];
var v = (i + 1 < args.length) ? args[i + 1] : null;
switch (a) {
case "--endpoint" -> {
o.endpoint = v;
i++;
}
case "--feeds" -> {
o.feeds = Arrays.asList(v.split(","));
i++;
}
case "--out" -> {
o.outDir = v;
i++;
}
case "--bridge" -> {
o.bridgeN = Integer.parseInt(v);
i++;
}
case "--theme" -> {
o.themeN = Integer.parseInt(v);
i++;
}
case "--related" -> {
o.relatedN = Integer.parseInt(v);
i++;
}
case "--items" -> {
o.rssItemsPerFeed = Integer.parseInt(v);
i++;
}
case "--model" -> {
o.model = v;
i++;
}
case "--timeout" -> {
o.timeoutSeconds = Integer.parseInt(v);
i++;
}
case "--retries" -> {
o.retries = Integer.parseInt(v);
i++;
}
// ---- NEW: minima per length ----
case "--min2" -> {
o.minLen2 = Integer.parseInt(v);
i++;
}
case "--min3" -> {
o.minLen3 = Integer.parseInt(v);
i++;
}
case "--min4" -> {
o.minLen4 = Integer.parseInt(v);
i++;
}
case "--min5" -> {
o.minLen5 = Integer.parseInt(v);
i++;
}
case "--min6" -> {
o.minLen6 = Integer.parseInt(v);
i++;
}
case "--min7" -> {
o.minLen7 = Integer.parseInt(v);
i++;
}
case "--min8" -> {
o.minLen8 = Integer.parseInt(v);
i++;
}
case "-h", "--help" -> {
System.out.println("""
Usage:
java puzzle.ThemePoolBuilder --words WORDS.txt [options]
Options:
--endpoint http://HOST:1234/v1 (LM Studio)
--feeds url1,url2
--out ./out
--bridge 5000
--theme 300
--related 1200
--items 20 (per feed)
--model <id> (recommended; skips /v1/models)
--timeout 60 (seconds)
--retries 4
# enforce minima per length in final pool
--min2 4000
--min3 7000
--min4 9000
--min5 0
--min6 0
--min7 0
--min8 0
""");
System.exit(0);
}
default -> throw new IllegalArgumentException("Unknown arg: " + a);
}
}
return o;
}
static boolean isAZ(String s) {
for (var i = 0; i < s.length(); i++) {
var ch = s.charAt(i);
if (ch < 'A' || ch > 'Z') return false;
}
return true;
}
static String normalizeDutchToken(String raw) {
if (raw == null) return null;
var s = raw.trim();
if (s.isEmpty()) return null;
s = Normalizer.normalize(s, Normalizer.Form.NFD).replaceAll("\\p{M}+", "");
s = s.toUpperCase(Locale.ROOT);
s = s.replaceAll("[^A-Z]", "");
if (s.length() < 2 || s.length() > 8) return null;
if (!isAZ(s)) return null;
return s;
}
static String stripHtml(String s) {
if (s == null) return "";
var x = s.replaceAll("<[^>]+>", " ");
x = x.replace("&amp;", "&").replace("&lt;", "<").replace("&gt;", ">");
x = x.replaceAll("\\s+", " ").trim();
return x;
}
static final Map<Character, Integer> LETTER_WEIGHT = Map.ofEntries(
Map.entry('E', 10), Map.entry('N', 9), Map.entry('A', 9), Map.entry('R', 8),
Map.entry('I', 8), Map.entry('O', 7), Map.entry('S', 7), Map.entry('T', 7),
Map.entry('D', 6), Map.entry('L', 6), Map.entry('K', 5), Map.entry('M', 5),
Map.entry('U', 5), Map.entry('P', 4), Map.entry('G', 4), Map.entry('H', 4),
Map.entry('V', 4), Map.entry('B', 3), Map.entry('W', 3),
Map.entry('C', 2), Map.entry('F', 2), Map.entry('Z', 2),
Map.entry('J', 1), Map.entry('Y', 1), Map.entry('Q', 0), Map.entry('X', 0)
);
static boolean isVowel(char ch) {
return ch == 'A' || ch == 'E' || ch == 'I' || ch == 'O' || ch == 'U';
}
static int crossabilityScore(String w) {
var score = 0;
var vowels = 0;
for (var i = 0; i < w.length(); i++) {
var ch = w.charAt(i);
score += LETTER_WEIGHT.getOrDefault(ch, 2);
if (isVowel(ch)) vowels++;
}
var ratio = vowels / (double) w.length();
if (ratio >= 0.35 && ratio <= 0.65) score += 8;
if (w.indexOf('Q') >= 0 || w.indexOf('X') >= 0) score -= 6;
if (w.indexOf('Y') >= 0 || w.indexOf('J') >= 0) score -= 2;
return score;
}
/**
* @param words id -> word
* @param idOf word -> id
* @param score id -> crossability
* @param byLen byLen[L] for L 0..8
*/
record Lexicon(List<String> words, Map<String, Integer> idOf, int[] score, BitSet[] byLen) { }
/**
* Loads lexicon from PostgreSQL view/table: export_words_with_hints_2_8
* Columns: WOORD, level_1_to_10, hint
*
* Notes:
* - Normalizes words via normalizeDutchToken(...)
* - Dedupes on normalized word
* - Uses level_1_to_10 as the "LLM score" (fallback 5)
* - Ignores hint for scoring (but you can store it elsewhere if needed)
*/
static Lexicon loadLexicon(Connection conn) throws SQLException {
var out = new ArrayList<String>(200_000);
var idOf = new HashMap<String, Integer>(400_000);
// Store level per normalized word while loading so we can compute scores later
var levelOf = new HashMap<String, Integer>(400_000);
final var sql = """
SELECT woord, 10-level_1_to_10, hint
FROM export_real_words_with_hints
where length(woord)<=7
order by level_1_to_10 asc
""" ;
try (var ps = conn.prepareStatement(sql);
var rs = ps.executeQuery()) {
while (rs.next()) {
var rawWord = rs.getString(1);
var lvlObj = (Integer) rs.getObject(2); // nullable
// String hint = rs.getString(3); // available if you want it later
var w = normalizeDutchToken(rawWord);
if (w == null) continue;
if (idOf.containsKey(w)) continue;
idOf.put(w, out.size());
out.add(w);
var lvl = (lvlObj == null ? 5 : lvlObj.intValue());
levelOf.put(w, lvl);
}
} catch (SQLException e) {
throw new RuntimeException(e);
}
var n = out.size();
var score = new int[n];
var byLen = new BitSet[9];
for (var L = 0; L <= 8; L++) byLen[L] = new BitSet(n);
for (var i = 0; i < n; i++) {
var w = out.get(i);
var crossScore = crossabilityScore(w);
var lScore = levelOf.getOrDefault(w, 5);
// Prioritize simple words (high lScore) and long words.
// lScore (1-10) adds up to 1000 points (weight 100).
// Length (2-8) adds up to 160 points (weight 20).
score[i] = crossScore + (lScore * 100) + (w.length() * 40);
byLen[w.length()].set(i);
}
return new Lexicon(out, idOf, score, byLen);
}
// ---------------- RSS via curl (browser-like) ----------------
record RssItem(String title, String desc) { }
static String textOfFirst(Element parent, String tag) {
var nl = parent.getElementsByTagName(tag);
if (nl.getLength() == 0) return null;
var n = nl.item(0);
return n.getTextContent();
}
static List<RssItem> fetchRssViaCurlBrowser(String url, int limit, int timeoutSeconds) throws Exception {
var cmd = new ArrayList<String>();
cmd.add("curl");
cmd.add("-fsSL");
cmd.add("-L");
cmd.add("--compressed");
cmd.add("--connect-timeout");
cmd.add("10");
cmd.add("--max-time");
cmd.add(String.valueOf(timeoutSeconds));
cmd.add("--retry");
cmd.add("5");
cmd.add("--retry-all-errors");
cmd.add("--retry-delay");
cmd.add("1");
cmd.add("-H");
cmd.add("User-Agent: " + BROWSER_UA);
cmd.add("-H");
cmd.add("Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8");
cmd.add("-H");
cmd.add("Accept-Language: nl-NL,nl;q=0.9,en;q=0.7");
cmd.add("-H");
cmd.add("Cache-Control: no-cache");
cmd.add("-H");
cmd.add("Pragma: no-cache");
cmd.add("-H");
cmd.add("Sec-Fetch-Dest: document");
cmd.add("-H");
cmd.add("Sec-Fetch-Mode: navigate");
cmd.add("-H");
cmd.add("Sec-Fetch-Site: none");
cmd.add("-H");
cmd.add("Sec-Fetch-User: ?1");
cmd.add(url);
var p = new ProcessBuilder(cmd)
.redirectErrorStream(true)
.start();
var bytes = p.getInputStream().readAllBytes();
var code = p.waitFor();
if (code != 0) {
throw new IOException("curl RSS failed (" + code + ") url=" + url + " output=" +
new String(bytes, StandardCharsets.UTF_8));
}
try (InputStream is = new ByteArrayInputStream(bytes)) {
var dbf = DocumentBuilderFactory.newInstance();
var doc = dbf.newDocumentBuilder().parse(is);
var items = doc.getElementsByTagName("item");
var out = new ArrayList<RssItem>();
for (var i = 0; i < items.getLength() && out.size() < limit; i++) {
var item = (Element) items.item(i);
var title = textOfFirst(item, "title");
var desc = textOfFirst(item, "description");
if (title == null) title = "";
if (desc == null) desc = "";
out.add(new RssItem(stripHtml(title), stripHtml(desc)));
}
return out;
}
}
// ---------------- LM Studio (OpenAI-compatible) ----------------
static String apiUrl(String endpointArg, String path) {
var base = endpointArg.trim();
if (base.endsWith("/")) base = base.substring(0, base.length() - 1);
if (base.endsWith("/v1")) base = base.substring(0, base.length() - 3);
if (!path.startsWith("/")) path = "/" + path;
if (!path.startsWith("/v1/")) path = "/" + path;
return base + path;
}
static void sleepBackoff(int attempt) {
try {
var ms = (long) (300L * Math.pow(2, attempt - 1)); // 300, 600, 1200, ...
Thread.sleep(Math.min(ms, 3000));
} catch (InterruptedException ignored) { }
}
static String curlGetJson(Opts o, String url) throws Exception {
Exception last = null;
for (var attempt = 1; attempt <= o.retries; attempt++) {
try {
var cmd = new ArrayList<String>();
cmd.add("curl");
cmd.add("-fsSL");
cmd.add("--connect-timeout");
cmd.add("10");
cmd.add("--max-time");
cmd.add(String.valueOf(o.timeoutSeconds));
cmd.add("--retry");
cmd.add("3");
cmd.add("--retry-all-errors");
cmd.add("--retry-delay");
cmd.add("1");
cmd.add("-H");
cmd.add("Accept: application/json");
cmd.add("-H");
cmd.add("User-Agent: " + BROWSER_UA);
cmd.add(url);
var p = new ProcessBuilder(cmd)
.redirectErrorStream(true)
.start();
var bytes = p.getInputStream().readAllBytes();
var code = p.waitFor();
if (code != 0) {
throw new IOException("curl GET failed (" + code + ") url=" + url + "\nOutput:\n" +
new String(bytes, StandardCharsets.UTF_8));
}
return new String(bytes, StandardCharsets.UTF_8);
} catch (Exception e) {
last = e;
if (attempt < o.retries) sleepBackoff(attempt);
}
}
throw last;
}
static String curlPostJson(Opts o, String url, String jsonBody) throws Exception {
Exception last = null;
for (var attempt = 1; attempt <= o.retries; attempt++) {
try {
System.out.println(" Attempt " + attempt + "/" + o.retries + " via curl...");
var tempFile = Files.createTempFile("lm-request-", ".json");
try {
Files.writeString(tempFile, jsonBody, StandardCharsets.UTF_8);
List<String> cmd = new ArrayList<>();
cmd.add("curl");
cmd.add("-fsSL");
cmd.add("--connect-timeout");
cmd.add("10");
cmd.add("--max-time");
cmd.add(String.valueOf(o.timeoutSeconds));
cmd.add("--retry");
cmd.add("3");
cmd.add("--retry-all-errors");
cmd.add("--retry-delay");
cmd.add("1");
cmd.add("-H");
cmd.add("Content-Type: application/json");
cmd.add("-H");
cmd.add("Accept: application/json");
cmd.add("-H");
cmd.add("User-Agent: " + BROWSER_UA);
cmd.add("-d");
cmd.add("@" + tempFile.toString());
cmd.add(url);
var p = new ProcessBuilder(cmd)
.redirectErrorStream(true)
.start();
var bytes = p.getInputStream().readAllBytes();
var code = p.waitFor();
if (code != 0) {
throw new IOException("curl POST failed (" + code + ") url=" + url + "\nOutput:\n" +
new String(bytes, StandardCharsets.UTF_8));
}
return new String(bytes, StandardCharsets.UTF_8);
} finally {
Files.deleteIfExists(tempFile);
}
} catch (Exception e) {
System.err.println(" Error: " + e.getClass().getName() + ": " + e.getMessage());
last = e;
if (attempt < o.retries) sleepBackoff(attempt);
}
}
throw last;
}
static String pickModelId(String modelsJson) {
if (modelsJson == null) return null;
var data = modelsJson.indexOf("\"data\"");
if (data < 0) return null;
var id = modelsJson.indexOf("\"id\"", data);
if (id < 0) return null;
var q1 = modelsJson.indexOf('"', modelsJson.indexOf(':', id) + 1);
if (q1 < 0) return null;
var q2 = modelsJson.indexOf('"', q1 + 1);
if (q2 < 0) return null;
return modelsJson.substring(q1 + 1, q2);
}
static String extractChatContent(String json) {
if (json == null) return null;
var choices = json.indexOf("\"choices\"");
var p = (choices >= 0) ? choices : 0;
var i = json.indexOf("\"content\"", p);
if (i < 0) return null;
var colon = json.indexOf(':', i);
if (colon < 0) return null;
var q = json.indexOf('"', colon + 1);
if (q < 0) return null;
var sb = new StringBuilder();
var esc = false;
for (var k = q + 1; k < json.length(); k++) {
var ch = json.charAt(k);
if (esc) {
if (ch == 'n') sb.append('\n');
else if (ch == 't') sb.append('\t');
else if (ch == 'r') sb.append('\r');
else sb.append(ch);
esc = false;
} else {
if (ch == '\\') esc = true;
else if (ch == '"') break;
else sb.append(ch);
}
}
return sb.toString();
}
static List<String> parseStringArray(String s) {
if (s == null) return List.of();
var a = s.indexOf('[');
var b = s.lastIndexOf(']');
if (a < 0 || b < 0 || b <= a) return List.of();
var body = s.substring(a + 1, b);
var out = new ArrayList<String>();
// If it's a simple comma-separated list without quotes (or with mixed quotes),
// let's try a more robust approach.
if (!body.contains("\"")) {
for (var part : body.split(",")) {
var trimmed = part.trim();
if (!trimmed.isEmpty()) out.add(trimmed);
}
if (!out.isEmpty()) return out;
}
var cur = new StringBuilder();
boolean in = false, esc = false;
for (var i = 0; i < body.length(); i++) {
var ch = body.charAt(i);
if (!in) {
if (ch == '"') {
in = true;
cur.setLength(0);
esc = false;
}
} else {
if (esc) {
cur.append(ch);
esc = false;
} else if (ch == '\\') {
esc = true;
} else if (ch == '"') {
out.add(cur.toString());
in = false;
} else {
cur.append(ch);
}
}
}
return out;
}
static String jsonQuote(String s) {
if (s == null) return "null";
var sb = new StringBuilder();
sb.append('"');
for (var i = 0; i < s.length(); i++) {
var ch = s.charAt(i);
if (ch == '\\' || ch == '"') sb.append('\\').append(ch);
else if (ch == '\n') sb.append("\\n");
else if (ch == '\r') sb.append("\\r");
else if (ch == '\t') sb.append("\\t");
else sb.append(ch);
}
sb.append('"');
return sb.toString();
}
static List<String> llmThemeWords(Opts o, String modelId, String rssText) throws Exception {
var prompt = """
Je genereert woorden voor een Nederlandse kruiswoordpuzzel.
Regels:
- Output MOET exact één JSON array zijn: ["WOORD", ...]
- Alleen A-Z, 2-8 letters woorden
- Geen spaties, streepjes, cijfers, accenten, apostrofs, punten
- Geen duplicaten
- Focus op zelfstandige naamwoorden/termen uit het nieuws en relevante Zweedse kruiswoordpuzzel koppelwoorden in het thema.
- Lever %d THEMA-woorden en daarna %d GERELATEERDE woorden (totaal %d).
- Voeg ook wat korte woorden/afkortingen toe (2-4 letters), maar houd het totaal gelijk.
Nieuws (koppen/samenvattingen):
%s
""".formatted(o.themeN, o.relatedN, (o.themeN + o.relatedN), rssText.substring(0, Math.min(rssText.length(), 8000)));
var body = """
{
"model": %s,
"messages": [
{"role":"system","content":"Je bent een strikte JSON generator. Antwoord ALLEEN met een JSON array van strings."},
{"role":"user","content": %s}
],
"temperature": 0.35,
"max_tokens": 20000
}
""".formatted(jsonQuote(modelId), jsonQuote(prompt));
var url = apiUrl(o.endpoint, "/chat/completions");
System.out.println("LM Studio POST: " + url);
System.out.println("Request body length: " + body.length() + " bytes");
var resp = curlPostJson(o, url, body);
var content = extractChatContent(resp);
if (content == null) {
throw new IOException("Could not extract chat content from LM Studio response.\n--- response ---\n" + resp);
}
return parseStringArray(content);
}
// ---------------- Pool building ----------------
static BitSet buildBridgeBitmap(Lexicon lex, int bridgeN) {
var n = lex.words.size();
var ids = new ArrayList<Integer>(n);
for (var i = 0; i < n; i++) {
// Optionally filter out VERY complex words from the bridge (e.g. lScore < 3)
// But since we sort by score (which is now dominated by lScore),
// they will be at the very bottom anyway.
// if (lex.score[i] < 800) continue;
ids.add(i);
}
ids.sort((a, b) -> Integer.compare(lex.score[b], lex.score[a]));
var bs = new BitSet(n);
var take = Math.min(bridgeN, ids.size());
for (var i = 0; i < take; i++) bs.set(ids.get(i));
return bs;
}
static BitSet bitmapFromWords(Lexicon lex, Collection<String> words) {
var bs = new BitSet(lex.words.size());
for (var raw : words) {
var w = normalizeDutchToken(raw);
if (w == null) continue;
var id = lex.idOf.get(w);
if (id != null) bs.set(id);
}
return bs;
}
static Map<Integer, Integer> countsPerLen(Lexicon lex, BitSet bs) {
var out = new HashMap<Integer, Integer>();
for (var L = 2; L <= 8; L++) {
var tmp = (BitSet) bs.clone();
tmp.and(lex.byLen[L]);
out.put(L, tmp.cardinality());
}
return out;
}
static void writeWordList(Path path, Lexicon lex, BitSet bs) throws IOException {
var ids = new ArrayList<Integer>(bs.cardinality());
for (var i = bs.nextSetBit(0); i >= 0; i = bs.nextSetBit(i + 1)) {
ids.add(i);
}
// Sort by score descending (higher score is easier/better)
ids.sort((a, b) -> Integer.compare(lex.score[b], lex.score[a]));
var out = new ArrayList<String>(ids.size());
for (var id : ids) {
if (lex.score[id] < MIN_SIMPLICITY)
continue;
out.add(lex.words.get(id));
}
Files.write(path, out, StandardCharsets.UTF_8, StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING);
}
static String mapToLines(Map<Integer, Integer> m) {
var sb = new StringBuilder();
for (var L = 2; L <= 8; L++) {
sb.append(" ").append(L).append(": ").append(m.getOrDefault(L, 0)).append("\n");
}
return sb.toString();
}
// ---------------- NEW: enforce minima per length ----------------
static int countLen(Lexicon lex, BitSet bs, int L) {
var tmp = (BitSet) bs.clone();
tmp.and(lex.byLen[L]);
return tmp.cardinality();
}
static void ensureMinLen(Lexicon lex, BitSet pool, int L, int minWanted) {
if (minWanted <= 0) return;
var current = countLen(lex, pool, L);
if (current >= minWanted) return;
var need = minWanted - current;
// Collect candidate ids of exactly length L that are not already in pool.
var candidates = new ArrayList<Integer>(Math.max(need * 2, 1024));
for (var id = lex.byLen[L].nextSetBit(0); id >= 0; id = lex.byLen[L].nextSetBit(id + 1)) {
if (!pool.get(id)) candidates.add(id);
}
if (candidates.isEmpty()) return;
// Sort by crossability score (desc)
candidates.sort((a, b) -> Integer.compare(lex.score[b], lex.score[a]));
var added = 0;
for (var id : candidates) {
pool.set(id);
added++;
if (added >= need) break;
}
}
static void enforceMinima(Opts o, Lexicon lex, BitSet pool) {
ensureMinLen(lex, pool, 2, o.minLen2);
ensureMinLen(lex, pool, 3, o.minLen3);
ensureMinLen(lex, pool, 4, o.minLen4);
ensureMinLen(lex, pool, 5, o.minLen5);
ensureMinLen(lex, pool, 6, o.minLen6);
ensureMinLen(lex, pool, 7, o.minLen7);
ensureMinLen(lex, pool, 8, o.minLen8);
}
}

View File

@@ -0,0 +1,24 @@
package puzzle;
// ===== DATA CLASS =====
class WordScore {
String word;
int score;
String status;
String endpoint;
int batchId;
WordScore(String word, int score, String status, String endpoint, int batchId) {
this.word = word;
this.score = score;
this.status = status;
this.endpoint = endpoint;
this.batchId = batchId;
}
WordScore(String word, int score, String status) {
this.word = word;
this.score = score;
this.status = status;
}
}

Binary file not shown.