Gather data
This commit is contained in:
222
src/main/java/puzzle/ClueGenerator.java
Normal file
222
src/main/java/puzzle/ClueGenerator.java
Normal file
@@ -0,0 +1,222 @@
|
||||
package puzzle;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.*;
|
||||
import static puzzle.ExportFormat.*;
|
||||
|
||||
public class ClueGenerator {
|
||||
|
||||
private static final String OLLAMA_URL = "http://localhost:11434/api/chat";
|
||||
private static final String MODEL = "qwen2.5:14b";
|
||||
private static final String HINTS_FILE = "/home/mike/dev/puzzle-generator/nl_score_hints.csv";
|
||||
private static Map<String, String> prebuiltClues = null;
|
||||
|
||||
private static synchronized void ensurePrebuiltCluesLoaded() {
|
||||
if (prebuiltClues != null) return;
|
||||
prebuiltClues = new HashMap<>();
|
||||
try {
|
||||
var lines = Files.readAllLines(Path.of(HINTS_FILE), StandardCharsets.UTF_8);
|
||||
for (var line : lines) {
|
||||
var parts = line.split(",", 4);
|
||||
if (parts.length >= 4) {
|
||||
var word = parts[0].trim().toUpperCase(Locale.ROOT);
|
||||
var rawClue = parts[3].trim();
|
||||
if (rawClue.startsWith("\"") && rawClue.endsWith("\"")) {
|
||||
rawClue = rawClue.substring(1, rawClue.length() - 1).replace("\"\"", "\"");
|
||||
}
|
||||
if (!word.isEmpty() && !rawClue.isEmpty()) {
|
||||
prebuiltClues.put(word, rawClue);
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (IOException e) {
|
||||
System.err.println("Warning: " + HINTS_FILE + " not found or could not be read.");
|
||||
}
|
||||
}
|
||||
|
||||
public static ExportedPuzzle applyClues(ExportedPuzzle puzzle) {
|
||||
if (puzzle == null || puzzle.words().isEmpty()) {
|
||||
return puzzle;
|
||||
}
|
||||
|
||||
ensurePrebuiltCluesLoaded();
|
||||
|
||||
Map<String, String> finalClueMap = new HashMap<>();
|
||||
List<String> wordsMissingClues = new ArrayList<>();
|
||||
|
||||
for (var w : puzzle.words()) {
|
||||
var wordUpper = w.word().toUpperCase(Locale.ROOT);
|
||||
if (prebuiltClues.containsKey(wordUpper)) {
|
||||
finalClueMap.put(w.word(), prebuiltClues.get(wordUpper));
|
||||
} else {
|
||||
wordsMissingClues.add(w.word());
|
||||
}
|
||||
}
|
||||
|
||||
if (!wordsMissingClues.isEmpty()) {
|
||||
var generatedClues = generateClues(wordsMissingClues);
|
||||
finalClueMap.putAll(generatedClues);
|
||||
}
|
||||
|
||||
List<WordOut> wordsWithClues = new ArrayList<>();
|
||||
for (var w : puzzle.words()) {
|
||||
var clue = finalClueMap.getOrDefault(w.word(), w.word());
|
||||
wordsWithClues.add(new WordOut(
|
||||
w.word(),
|
||||
clue,
|
||||
w.startRow(),
|
||||
w.startCol(),
|
||||
w.direction(),
|
||||
w.answer(),
|
||||
w.arrowRow(),
|
||||
w.arrowCol(),
|
||||
w.isReversed(),
|
||||
w.complex()
|
||||
));
|
||||
}
|
||||
|
||||
return new ExportedPuzzle(puzzle.gridv2(), wordsWithClues, puzzle.difficulty(), puzzle.rewards());
|
||||
}
|
||||
|
||||
public static Map<String, String> generateClues(List<String> words) {
|
||||
if (words == null || words.isEmpty()) {
|
||||
return Collections.emptyMap();
|
||||
}
|
||||
|
||||
var prompt = createCluePrompt(words);
|
||||
try {
|
||||
var jsonRequest = String.format(
|
||||
"{\"model\":\"%s\",\"messages\":[{\"role\":\"user\",\"content\":\"%s\"}],\"stream\":false,\"temperature\":0.7}",
|
||||
MODEL, escapeJson(prompt)
|
||||
);
|
||||
|
||||
var responseBody = curlPostJson(OLLAMA_URL, jsonRequest, 120);
|
||||
var content = extractChatContent(responseBody);
|
||||
|
||||
if (content == null || content.isEmpty()) {
|
||||
return Collections.emptyMap();
|
||||
}
|
||||
|
||||
return parseCluesFromReply(words, content);
|
||||
} catch (Exception e) {
|
||||
System.err.println("Failed to generate clues: " + e.getMessage());
|
||||
return Collections.emptyMap();
|
||||
}
|
||||
}
|
||||
|
||||
private static String createCluePrompt(List<String> words) {
|
||||
return "Je bent een expert in het maken van kruiswoordpuzzels. Geef voor elk van de onderstaande woorden een korte, uitdagende maar duidelijke cryptische of beschrijvende aanwijzing in het Nederlands.\n\n" +
|
||||
"Output ALLEEN in dit formaat:\n" +
|
||||
"woord1:aanwijzing\n" +
|
||||
"woord2:aanwijzing\n\n" +
|
||||
"GEEN andere tekst of uitleg. Sla GEEN woorden over.\n\n" +
|
||||
"Lijst:\n" +
|
||||
String.join("\n", words);
|
||||
}
|
||||
|
||||
private static Map<String, String> parseCluesFromReply(List<String> expectedWords, String reply) {
|
||||
Map<String, String> wordClueMap = new HashMap<>();
|
||||
var lines = reply.split("\n");
|
||||
|
||||
for (var line : lines) {
|
||||
line = line.trim();
|
||||
if (line.contains(":")) {
|
||||
var parts = line.split(":", 2);
|
||||
if (parts.length == 2) {
|
||||
var wordPart = parts[0].trim().replaceAll("^[\\d+.)*\\-\\s]+", "").toLowerCase();
|
||||
var clue = parts[1].trim();
|
||||
if (!clue.isEmpty()) {
|
||||
wordClueMap.put(wordPart, clue);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Map<String, String> results = new HashMap<>();
|
||||
for (var word : expectedWords) {
|
||||
var clue = wordClueMap.get(word.toLowerCase());
|
||||
if (clue != null) {
|
||||
results.put(word, clue);
|
||||
}
|
||||
}
|
||||
return results;
|
||||
}
|
||||
|
||||
private static String curlPostJson(String url, String jsonBody, int timeoutSeconds) throws Exception {
|
||||
var tempFile = Files.createTempFile("clue-request-", ".json");
|
||||
try {
|
||||
Files.writeString(tempFile, jsonBody, StandardCharsets.UTF_8);
|
||||
List<String> cmd = new ArrayList<>();
|
||||
cmd.add("curl");
|
||||
cmd.add("-fsSL");
|
||||
cmd.add("--connect-timeout");
|
||||
cmd.add("10");
|
||||
cmd.add("--max-time");
|
||||
cmd.add(String.valueOf(timeoutSeconds));
|
||||
cmd.add("-H");
|
||||
cmd.add("Content-Type: application/json");
|
||||
cmd.add("-d");
|
||||
cmd.add("@" + tempFile);
|
||||
cmd.add(url);
|
||||
|
||||
var p = new ProcessBuilder(cmd)
|
||||
.redirectErrorStream(true)
|
||||
.start();
|
||||
|
||||
var bytes = p.getInputStream().readAllBytes();
|
||||
var code = p.waitFor();
|
||||
|
||||
if (code != 0) {
|
||||
throw new IOException("curl POST failed (" + code + ") url=" + url + "\nOutput:\n" +
|
||||
new String(bytes, StandardCharsets.UTF_8));
|
||||
}
|
||||
|
||||
return new String(bytes, StandardCharsets.UTF_8);
|
||||
} finally {
|
||||
Files.deleteIfExists(tempFile);
|
||||
}
|
||||
}
|
||||
|
||||
private static String extractChatContent(String json) {
|
||||
if (json == null) return null;
|
||||
var choices = json.indexOf("\"choices\"");
|
||||
var p = (choices >= 0) ? choices : 0;
|
||||
var i = json.indexOf("\"content\"", p);
|
||||
if (i < 0) {
|
||||
// Fallback for Ollama non-chat format if needed, but we used /api/chat
|
||||
// Ollama /api/chat returns {"model":"...","message":{"role":"assistant","content":"..."}}
|
||||
i = json.indexOf("\"content\"");
|
||||
if (i < 0) return null;
|
||||
}
|
||||
var colon = json.indexOf(':', i);
|
||||
if (colon < 0) return null;
|
||||
var q = json.indexOf('"', colon + 1);
|
||||
if (q < 0) return null;
|
||||
var sb = new StringBuilder();
|
||||
var esc = false;
|
||||
for (var k = q + 1; k < json.length(); k++) {
|
||||
var ch = json.charAt(k);
|
||||
if (esc) {
|
||||
if (ch == 'n') sb.append('\n');
|
||||
else if (ch == 't') sb.append('\t');
|
||||
else if (ch == 'r') sb.append('\r');
|
||||
else sb.append(ch);
|
||||
esc = false;
|
||||
} else {
|
||||
if (ch == '\\') esc = true;
|
||||
else if (ch == '"') break;
|
||||
else sb.append(ch);
|
||||
}
|
||||
}
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
private static String escapeJson(String str) {
|
||||
return str.replace("\\", "\\\\")
|
||||
.replace("\"", "\\\"")
|
||||
.replace("\n", "\\n");
|
||||
}
|
||||
}
|
||||
532
src/main/java/puzzle/ConcurrentWordScorer.java
Normal file
532
src/main/java/puzzle/ConcurrentWordScorer.java
Normal file
@@ -0,0 +1,532 @@
|
||||
package puzzle;
|
||||
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.nio.file.*;
|
||||
import java.util.*;
|
||||
import java.util.concurrent.*;
|
||||
import java.io.*;
|
||||
import java.time.*;
|
||||
import java.util.concurrent.atomic.*;
|
||||
|
||||
/**
|
||||
* CONCURRENT MULTI-ENDPOINT Dutch Wordlist Scorer
|
||||
* Distributes batches across Ollama, LM-Studio, and a third endpoint simultaneously
|
||||
*/
|
||||
public class ConcurrentWordScorer {
|
||||
|
||||
// ===== CONFIGURATION =====
|
||||
private static final String INPUT_WORDLIST = "word-list.txt";
|
||||
private static final String OUTPUT_SCORES = "word_scores.csv";
|
||||
private static final int BATCH_SIZE = 10; // Even smaller for the difficult remaining words
|
||||
private static final int MAX_RETRIES = 3;
|
||||
|
||||
// Define all three endpoints
|
||||
private static final LLMEndpoint[] ENDPOINTS = {
|
||||
new OllamaEndpoint(),
|
||||
new LMStudioEndpoint(),
|
||||
new LMStudioEndpoint("LM-Studio", "http://192.168.1.74:1234/v1/chat/completions",
|
||||
"mistralai/mistral-nemo-instruct-2407", 1)
|
||||
// new CustomEndpoint()
|
||||
};
|
||||
|
||||
// ===== ENDPOINT CLASSES =====
|
||||
abstract static class LLMEndpoint {
|
||||
|
||||
String name;
|
||||
String baseUrl;
|
||||
String model;
|
||||
Semaphore rateLimiter; // Per-endpoint rate limiting
|
||||
|
||||
int maxConcurrent;
|
||||
|
||||
LLMEndpoint(String name, String baseUrl, String model, int maxConcurrent) {
|
||||
this.name = name;
|
||||
this.baseUrl = baseUrl;
|
||||
this.model = model;
|
||||
this.maxConcurrent = maxConcurrent;
|
||||
this.rateLimiter = new Semaphore(maxConcurrent);
|
||||
}
|
||||
|
||||
abstract String buildRequestJson(String prompt);
|
||||
abstract String extractResponseContent(String responseBody);
|
||||
|
||||
// Rate-limited request execution
|
||||
List<WordScore> execute(List<String> batch) throws Exception {
|
||||
rateLimiter.acquire(); // Wait for slot
|
||||
try {
|
||||
return executeInternal(batch);
|
||||
} finally {
|
||||
rateLimiter.release();
|
||||
}
|
||||
}
|
||||
|
||||
private List<WordScore> executeInternal(List<String> batch) throws Exception {
|
||||
var prompt = createScoringPrompt(batch);
|
||||
var jsonRequest = buildRequestJson(prompt);
|
||||
|
||||
var responseBody = curlPostJson(baseUrl, jsonRequest, 120);
|
||||
var content = extractResponseContent(responseBody);
|
||||
|
||||
if (content == null || content.isEmpty()) {
|
||||
throw new IOException("[" + name + "] Empty response content");
|
||||
}
|
||||
|
||||
return parseScoresFromReply(batch, content, name);
|
||||
}
|
||||
}
|
||||
|
||||
static class OllamaEndpoint
|
||||
extends LLMEndpoint {
|
||||
|
||||
OllamaEndpoint() {
|
||||
super("Ollama", "http://localhost:11434/api/chat",
|
||||
"qwen2.5:14b", 1); // 2 concurrent requests
|
||||
}
|
||||
|
||||
@Override String buildRequestJson(String prompt) {
|
||||
return String.format("{\"model\":\"%s\",\"messages\":[{\"role\":\"user\",\"content\":\"%s\"}],\"stream\":false,\"temperature\":0.1}",
|
||||
model, escapeJson(prompt));
|
||||
}
|
||||
|
||||
@Override String extractResponseContent(String responseBody) {
|
||||
// Ollama uses "message" -> "content"
|
||||
var start = responseBody.indexOf("\"content\":\"") + 11;
|
||||
var end = responseBody.indexOf("\"", start);
|
||||
if (start < 11 || end < 0) return "";
|
||||
return responseBody.substring(start, end).replace("\\n", "\n");
|
||||
}
|
||||
}
|
||||
|
||||
static class LMStudioEndpoint
|
||||
extends LLMEndpoint {
|
||||
|
||||
LMStudioEndpoint() {
|
||||
super("LM-Studio", "http://192.168.1.159:1234/v1/chat/completions",
|
||||
"mistralai/mistral-nemo-instruct-2407", 1); // LM-Studio can handle more
|
||||
}
|
||||
public LMStudioEndpoint(String s, String url, String s1, int i) {
|
||||
super(
|
||||
s, url, s1, i
|
||||
);
|
||||
}
|
||||
|
||||
@Override String buildRequestJson(String prompt) {
|
||||
return String.format("{\"model\":\"%s\",\"messages\":[{\"role\":\"user\",\"content\":\"%s\"}],\"temperature\":0.1,\"max_tokens\":2048}",
|
||||
model, escapeJson(prompt));
|
||||
}
|
||||
|
||||
@Override String extractResponseContent(String responseBody) {
|
||||
return extractChatContent(responseBody);
|
||||
}
|
||||
}
|
||||
|
||||
static class CustomEndpoint
|
||||
extends LLMEndpoint {
|
||||
|
||||
CustomEndpoint() {
|
||||
super("Custom", "http://192.168.1.74:1234/v1/chat/completions",
|
||||
"qwen2.5-vl-7b-abliterated-caption-it_gguf", 2);
|
||||
}
|
||||
|
||||
@Override String buildRequestJson(String prompt) {
|
||||
// Adapt to your third endpoint's format
|
||||
return new LMStudioEndpoint().buildRequestJson(prompt);
|
||||
}
|
||||
|
||||
@Override String extractResponseContent(String responseBody) {
|
||||
return new LMStudioEndpoint().extractResponseContent(responseBody);
|
||||
}
|
||||
}
|
||||
|
||||
// ===== MAIN COORDINATOR =====
|
||||
static void main(String[] args) throws Exception {
|
||||
System.out.println("=== CONCURRENT 3-Endpoint Scorer ===");
|
||||
for (var ep : ENDPOINTS) {
|
||||
System.out.printf("- %s: %s%n", ep.name, ep.baseUrl);
|
||||
}
|
||||
System.out.println();
|
||||
|
||||
cleanupOutputFile();
|
||||
|
||||
// Load work queue
|
||||
var allWords = Files.readAllLines(Paths.get(INPUT_WORDLIST));
|
||||
var scoredWords = loadAlreadyScoredWords();
|
||||
var workQueue = createWorkQueue(allWords, scoredWords);
|
||||
|
||||
System.out.printf("Total words: %d | Already scored: %d | Remaining: %d%n%n",
|
||||
allWords.size(), scoredWords.size(), workQueue.size());
|
||||
|
||||
if (workQueue.isEmpty()) {
|
||||
System.out.println("All done!");
|
||||
return;
|
||||
}
|
||||
|
||||
// Start result writer thread
|
||||
BlockingQueue<List<WordScore>> resultQueue = new LinkedBlockingQueue<>();
|
||||
var writerThread = startResultWriter(resultQueue);
|
||||
|
||||
// Start worker threads
|
||||
var totalThreads = 0;
|
||||
for (var ep : ENDPOINTS) totalThreads += ep.maxConcurrent;
|
||||
|
||||
var executor = Executors.newFixedThreadPool(totalThreads);
|
||||
var totalProcessed = new AtomicInteger(scoredWords.size());
|
||||
|
||||
for (var endpoint : ENDPOINTS) {
|
||||
for (var i = 0; i < endpoint.maxConcurrent; i++) {
|
||||
executor.submit(() -> {
|
||||
processBatches(endpoint, workQueue, resultQueue, totalProcessed, allWords.size());
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Wait for completion
|
||||
executor.shutdown();
|
||||
executor.awaitTermination(Long.MAX_VALUE, TimeUnit.DAYS);
|
||||
|
||||
// Signal writer to stop
|
||||
resultQueue.put(Collections.singletonList(new WordScore(null, 0, "STOP")));
|
||||
writerThread.join();
|
||||
|
||||
// Update hints in the database
|
||||
|
||||
System.out.println("\n✓ All endpoints finished!");
|
||||
}
|
||||
|
||||
// ===== WORKER THREAD LOGIC =====
|
||||
private static void processBatches(LLMEndpoint endpoint,
|
||||
BlockingQueue<WorkItem> workQueue,
|
||||
BlockingQueue<List<WordScore>> resultQueue,
|
||||
AtomicInteger totalProcessed,
|
||||
int totalWords) {
|
||||
|
||||
System.out.printf("[%s] Worker started%n", endpoint.name);
|
||||
|
||||
while (!Thread.currentThread().isInterrupted()) {
|
||||
try {
|
||||
var work = workQueue.poll(1, TimeUnit.SECONDS);
|
||||
if (work == null) {
|
||||
if (workQueue.isEmpty()) break; // No more work in queue
|
||||
continue;
|
||||
}
|
||||
|
||||
var scores = processWithRetry(endpoint, work.batch);
|
||||
|
||||
// Add metadata
|
||||
scores.forEach(s -> {
|
||||
s.endpoint = endpoint.name;
|
||||
s.batchId = work.batchId;
|
||||
});
|
||||
|
||||
resultQueue.put(scores);
|
||||
|
||||
// Progress update
|
||||
var processed = totalProcessed.addAndGet(scores.size());
|
||||
if (processed % 100 < BATCH_SIZE) { // Reduce console spam
|
||||
System.out.printf("Progress: %d/%d (%.1f%%)%n",
|
||||
processed, totalWords, (processed * 100.0 / totalWords));
|
||||
}
|
||||
|
||||
} catch (InterruptedException e) {
|
||||
Thread.currentThread().interrupt();
|
||||
break;
|
||||
} catch (Exception e) {
|
||||
System.err.printf("[%s] Fatal error: %s%n", endpoint.name, e.getMessage());
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
System.out.printf("[%s] Worker stopped%n", endpoint.name);
|
||||
}
|
||||
|
||||
private static List<WordScore> processWithRetry(LLMEndpoint endpoint, List<String> batch) {
|
||||
var retries = 0;
|
||||
|
||||
while (retries < MAX_RETRIES) {
|
||||
try {
|
||||
return endpoint.execute(batch);
|
||||
} catch (Exception e) {
|
||||
retries++;
|
||||
System.err.printf("[%s] Attempt %d/%d failed: %s%n",
|
||||
endpoint.name, retries, MAX_RETRIES, e.getMessage());
|
||||
|
||||
if (retries >= MAX_RETRIES) {
|
||||
return createFailedScores(batch, endpoint.name);
|
||||
}
|
||||
|
||||
try {
|
||||
Thread.sleep(2000L * retries);
|
||||
} catch (InterruptedException ie) {
|
||||
Thread.currentThread().interrupt();
|
||||
return createFailedScores(batch, endpoint.name);
|
||||
}
|
||||
}
|
||||
}
|
||||
return createFailedScores(batch, endpoint.name);
|
||||
}
|
||||
|
||||
// ===== RESULT WRITER THREAD =====
|
||||
private static Thread startResultWriter(BlockingQueue<List<WordScore>> resultQueue) throws Exception {
|
||||
var writer = new BufferedWriter(new FileWriter(OUTPUT_SCORES, true));
|
||||
var isNew = Files.size(Paths.get(OUTPUT_SCORES)) == 0;
|
||||
|
||||
if (isNew) {
|
||||
writer.write("word,score,status,endpoint,batch_id,timestamp\n");
|
||||
writer.flush();
|
||||
}
|
||||
|
||||
var thread = new Thread(() -> {
|
||||
try {
|
||||
while (true) {
|
||||
var scores = resultQueue.take();
|
||||
|
||||
// Stop signal
|
||||
if (scores.size() == 1 && scores.get(0).status.equals("STOP")) {
|
||||
break;
|
||||
}
|
||||
|
||||
writeBatch(writer, scores);
|
||||
}
|
||||
writer.close();
|
||||
} catch (Exception e) {
|
||||
System.err.println("Writer thread error: " + e.getMessage());
|
||||
}
|
||||
});
|
||||
|
||||
thread.start();
|
||||
return thread;
|
||||
}
|
||||
|
||||
private static synchronized void writeBatch(BufferedWriter writer, List<WordScore> scores) throws Exception {
|
||||
var timestamp = Instant.now().toString();
|
||||
for (var ws : scores) {
|
||||
writer.write(String.format("%s,%d,%s,%s,%d,%s\n",
|
||||
ws.word, ws.score, ws.status, ws.endpoint, ws.batchId, timestamp));
|
||||
}
|
||||
writer.flush();
|
||||
}
|
||||
|
||||
// ===== QUEUE & DATA STRUCTURES =====
|
||||
record WorkItem(int batchId, List<String> batch) {
|
||||
|
||||
}
|
||||
|
||||
private static BlockingQueue<WorkItem> createWorkQueue(List<String> allWords, Set<String> scored) {
|
||||
BlockingQueue<WorkItem> queue = new LinkedBlockingQueue<>();
|
||||
var batchId = 0;
|
||||
|
||||
for (var i = 0; i < allWords.size(); i += BATCH_SIZE) {
|
||||
List<String> batch = new ArrayList<>();
|
||||
for (var j = i; j < Math.min(i + BATCH_SIZE, allWords.size()); j++) {
|
||||
var word = allWords.get(j);
|
||||
if (!scored.contains(word.toLowerCase())) {
|
||||
batch.add(word);
|
||||
}
|
||||
}
|
||||
|
||||
if (!batch.isEmpty()) {
|
||||
queue.add(new WorkItem(batchId++, batch));
|
||||
}
|
||||
}
|
||||
|
||||
return queue;
|
||||
}
|
||||
|
||||
// ===== LOADING & PARSING =====
|
||||
private static Set<String> loadAlreadyScoredWords() throws Exception {
|
||||
Set<String> scored = new HashSet<>();
|
||||
var file = new File(OUTPUT_SCORES);
|
||||
if (!file.exists()) return scored;
|
||||
|
||||
var lines = Files.readAllLines(file.toPath());
|
||||
var first = true;
|
||||
for (var line : lines) {
|
||||
if (first) {
|
||||
first = false;
|
||||
continue;
|
||||
}
|
||||
var parts = line.split(",");
|
||||
if (parts.length >= 3) {
|
||||
var word = parts[0].trim().toLowerCase();
|
||||
var status = parts[2].trim();
|
||||
if ("OK".equalsIgnoreCase(status)) {
|
||||
scored.add(word);
|
||||
}
|
||||
}
|
||||
}
|
||||
return scored;
|
||||
}
|
||||
|
||||
private static void cleanupOutputFile() throws IOException {
|
||||
var path = Paths.get(OUTPUT_SCORES);
|
||||
if (!Files.exists(path)) return;
|
||||
|
||||
System.out.println("Cleaning up " + OUTPUT_SCORES + "...");
|
||||
var lines = Files.readAllLines(path);
|
||||
if (lines.isEmpty()) return;
|
||||
|
||||
var header = lines.get(0);
|
||||
Map<String, String> latestOkEntries = new LinkedHashMap<>();
|
||||
|
||||
for (int i = 1; i < lines.size(); i++) {
|
||||
var line = lines.get(i);
|
||||
var parts = line.split(",");
|
||||
if (parts.length >= 3) {
|
||||
var word = parts[0].trim().toLowerCase();
|
||||
var status = parts[2].trim();
|
||||
if ("OK".equalsIgnoreCase(status)) {
|
||||
latestOkEntries.put(word, line);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var cleanedLines = new ArrayList<String>();
|
||||
cleanedLines.add(header);
|
||||
cleanedLines.addAll(latestOkEntries.values());
|
||||
|
||||
Files.write(path, cleanedLines, StandardCharsets.UTF_8);
|
||||
System.out.printf("Cleanup complete. Kept %d unique OK entries. Removed %d non-OK or duplicate entries.%n",
|
||||
latestOkEntries.size(), lines.size() - cleanedLines.size());
|
||||
}
|
||||
|
||||
private static List<WordScore> createFailedScores(List<String> words, String endpoint) {
|
||||
List<WordScore> failed = new ArrayList<>();
|
||||
for (var word : words) {
|
||||
failed.add(new WordScore(word, -1, "FAILED", endpoint, -1));
|
||||
}
|
||||
return failed;
|
||||
}
|
||||
|
||||
// Parsing logic
|
||||
private static List<WordScore> parseScoresFromReply(List<String> expectedWords, String reply, String endpointName) {
|
||||
Map<String, Integer> wordScoreMap = new HashMap<>();
|
||||
var lines = reply.split("\n");
|
||||
|
||||
for (var line : lines) {
|
||||
line = line.trim();
|
||||
// Handle formats like "1. word:score", "word: score", "word - score"
|
||||
String sep = null;
|
||||
if (line.contains(":")) sep = ":";
|
||||
else if (line.contains("-")) sep = "-";
|
||||
|
||||
if (sep != null) {
|
||||
var parts = line.split(sep, 2);
|
||||
if (parts.length == 2) {
|
||||
var wordPart = parts[0].trim();
|
||||
// Remove leading numbering like "1. " or bullets like "* ", "- "
|
||||
wordPart = wordPart.replaceAll("^[\\d+.)*\\-\\s]+", "");
|
||||
var word = wordPart.toLowerCase();
|
||||
|
||||
try {
|
||||
var scoreStr = parts[1].trim();
|
||||
// Handle potential non-numeric junk after the number
|
||||
scoreStr = scoreStr.replaceAll("[^0-9].*", "");
|
||||
if (!scoreStr.isEmpty()) {
|
||||
var score = Integer.parseInt(scoreStr);
|
||||
wordScoreMap.put(word, Math.max(1, Math.min(10, score)));
|
||||
}
|
||||
} catch (NumberFormatException e) {
|
||||
// Skip invalid lines
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Match scores to original words (maintaining order)
|
||||
List<WordScore> results = new ArrayList<>();
|
||||
for (var word : expectedWords) {
|
||||
var score = wordScoreMap.get(word.toLowerCase());
|
||||
if (score != null) {
|
||||
results.add(new WordScore(word, score, "OK"));
|
||||
} else {
|
||||
results.add(new WordScore(word, -1, "MISSING"));
|
||||
}
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
// Prompt creation
|
||||
private static String createScoringPrompt(List<String> words) {
|
||||
return "Je bent een Nederlandse taalexpert. Geef elk van de " + words.size() + " onderstaande woorden een populariteitsscore van 1 (zeer zeldzaam) tot 10 (zeer algemeen).\n\n" +
|
||||
"Output ALLEEN in dit formaat:\n" +
|
||||
"woord1:score\n" +
|
||||
"woord2:score\n\n" +
|
||||
"GEEN andere tekst of uitleg. Sla GEEN woorden over.\n\n" +
|
||||
"Lijst:\n" +
|
||||
String.join("\n", words);
|
||||
}
|
||||
|
||||
// Utility methods
|
||||
private static String escapeJson(String str) {
|
||||
return str.replace("\\", "\\\\")
|
||||
.replace("\"", "\\\"")
|
||||
.replace("\n", "\\n");
|
||||
}
|
||||
|
||||
private static String curlPostJson(String url, String jsonBody, int timeoutSeconds) throws Exception {
|
||||
// Write JSON body to temp file to avoid shell escaping issues
|
||||
var tempFile = Files.createTempFile("lm-request-", ".json");
|
||||
try {
|
||||
Files.writeString(tempFile, jsonBody, StandardCharsets.UTF_8);
|
||||
|
||||
List<String> cmd = new ArrayList<>();
|
||||
cmd.add("curl");
|
||||
cmd.add("-fsSL");
|
||||
cmd.add("--connect-timeout");
|
||||
cmd.add("10");
|
||||
cmd.add("--max-time");
|
||||
cmd.add(String.valueOf(timeoutSeconds));
|
||||
cmd.add("-H");
|
||||
cmd.add("Content-Type: application/json");
|
||||
cmd.add("-d");
|
||||
cmd.add("@" + tempFile);
|
||||
cmd.add(url);
|
||||
|
||||
var p = new ProcessBuilder(cmd)
|
||||
.redirectErrorStream(true)
|
||||
.start();
|
||||
|
||||
var bytes = p.getInputStream().readAllBytes();
|
||||
var code = p.waitFor();
|
||||
|
||||
if (code != 0) {
|
||||
throw new IOException("curl POST failed (" + code + ") url=" + url + "\nOutput:\n" +
|
||||
new String(bytes, StandardCharsets.UTF_8));
|
||||
}
|
||||
|
||||
return new String(bytes, StandardCharsets.UTF_8);
|
||||
} finally {
|
||||
Files.deleteIfExists(tempFile);
|
||||
}
|
||||
}
|
||||
|
||||
private static String extractChatContent(String json) {
|
||||
if (json == null) return null;
|
||||
var choices = json.indexOf("\"choices\"");
|
||||
var p = (choices >= 0) ? choices : 0;
|
||||
var i = json.indexOf("\"content\"", p);
|
||||
if (i < 0) return null;
|
||||
var colon = json.indexOf(':', i);
|
||||
if (colon < 0) return null;
|
||||
var q = json.indexOf('"', colon + 1);
|
||||
if (q < 0) return null;
|
||||
var sb = new StringBuilder();
|
||||
var esc = false;
|
||||
for (var k = q + 1; k < json.length(); k++) {
|
||||
var ch = json.charAt(k);
|
||||
if (esc) {
|
||||
if (ch == 'n') sb.append('\n');
|
||||
else if (ch == 't') sb.append('\t');
|
||||
else if (ch == 'r') sb.append('\r');
|
||||
else sb.append(ch);
|
||||
esc = false;
|
||||
} else {
|
||||
if (ch == '\\') esc = true;
|
||||
else if (ch == '"') break;
|
||||
else sb.append(ch);
|
||||
}
|
||||
}
|
||||
return sb.toString();
|
||||
}
|
||||
}
|
||||
205
src/main/java/puzzle/ExportFormat.java
Normal file
205
src/main/java/puzzle/ExportFormat.java
Normal file
@@ -0,0 +1,205 @@
|
||||
package puzzle;
|
||||
|
||||
import java.util.*;
|
||||
import static puzzle.SwedishGenerator.*;
|
||||
|
||||
/**
|
||||
* ExportFormat.java
|
||||
*
|
||||
* Direct port of export_format.js:
|
||||
* - scans filled grid for clue digits '1'..'4'
|
||||
* - extracts placed words in canonical direction (horizontal=right, vertical=down)
|
||||
* - crops to bounding box (words + arrow cells) with 1-cell margin
|
||||
* - outputs gridv2 + words[] (+ difficulty, rewards)
|
||||
*/
|
||||
public final class ExportFormat {
|
||||
|
||||
private ExportFormat() { }
|
||||
|
||||
private static boolean isLetter(char ch) { return ch >= 'A' && ch <= 'Z'; }
|
||||
|
||||
private static boolean inBounds(int H, int W, int r, int c) {
|
||||
return r >= 0 && r < H && c >= 0 && c < W;
|
||||
}
|
||||
|
||||
// ---------- Public API ----------
|
||||
|
||||
public static ExportedPuzzle exportFormatFromFilled(PuzzleResult puz, int difficulty, Rewards rewards) {
|
||||
Objects.requireNonNull(puz, "puz");
|
||||
var g = puz.filled().grid;
|
||||
var H = g.length;
|
||||
var W = g[0].length;
|
||||
|
||||
// 1) extract "placed" list from all clue digits in the filled grid
|
||||
List<Placed> placed = new ArrayList<>();
|
||||
var allSlots = extractSlots(g);
|
||||
var clueMap = puz.filled().clueMap;
|
||||
|
||||
for (var s : allSlots) {
|
||||
var word = clueMap.get(s.key());
|
||||
if (word == null) continue;
|
||||
|
||||
var p = extractPlacedFromSlot(s, word);
|
||||
if (p == null) continue;
|
||||
placed.add(p);
|
||||
}
|
||||
|
||||
// If nothing placed: return full grid mapped to letters/# only
|
||||
if (placed.isEmpty()) {
|
||||
List<String> gridv2 = new ArrayList<>(H);
|
||||
for (var chars : g) {
|
||||
var sb = new StringBuilder(W);
|
||||
for (var c = 0; c < W; c++) {
|
||||
var ch = chars[c];
|
||||
sb.append(isLetter(ch) ? ch : '#');
|
||||
}
|
||||
gridv2.add(sb.toString());
|
||||
}
|
||||
return new ExportedPuzzle(gridv2, List.of(), difficulty, rewards);
|
||||
}
|
||||
|
||||
// 2) bounding box around all word cells + arrow cells, with 1-cell margin
|
||||
List<int[]> allCells = new ArrayList<>();
|
||||
for (var p : placed) {
|
||||
allCells.addAll(p.cells);
|
||||
allCells.add(p.arrow);
|
||||
}
|
||||
|
||||
int minR = Integer.MAX_VALUE, minC = Integer.MAX_VALUE;
|
||||
int maxR = Integer.MIN_VALUE, maxC = Integer.MIN_VALUE;
|
||||
|
||||
for (var rc : allCells) {
|
||||
int rr = rc[0], cc = rc[1];
|
||||
minR = Math.min(minR, rr);
|
||||
minC = Math.min(minC, cc);
|
||||
maxR = Math.max(maxR, rr);
|
||||
maxC = Math.max(maxC, cc);
|
||||
}
|
||||
|
||||
// 3) map of only used letter cells (everything else becomes '#')
|
||||
Map<Long, Character> letterAt = new HashMap<>();
|
||||
for (var p : placed) {
|
||||
for (var rc : p.cells) {
|
||||
int rr = rc[0], cc = rc[1];
|
||||
if (inBounds(H, W, rr, cc) && isLetter(g[rr][cc])) {
|
||||
letterAt.put(pack(rr, cc), g[rr][cc]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 4) render gridv2 over cropped bounds (out-of-bounds become '#')
|
||||
List<String> gridv2 = new ArrayList<>(Math.max(0, maxR - minR + 1));
|
||||
for (var r = minR; r <= maxR; r++) {
|
||||
var row = new StringBuilder(Math.max(0, maxC - minC + 1));
|
||||
for (var c = minC; c <= maxC; c++) {
|
||||
var ch = letterAt.get(pack(r, c));
|
||||
row.append(ch != null ? ch : '#');
|
||||
}
|
||||
gridv2.add(row.toString());
|
||||
}
|
||||
|
||||
// 5) words output with cropped coordinates
|
||||
List<WordOut> wordsOut = new ArrayList<>(placed.size());
|
||||
for (var p : placed) {
|
||||
wordsOut.add(new WordOut(
|
||||
p.word,
|
||||
p.clue, // placeholder = word (same as JS)
|
||||
p.startRow - minR,
|
||||
p.startCol - minC,
|
||||
p.direction,
|
||||
p.word, // answer
|
||||
p.arrowRow - minR,
|
||||
p.arrowCol - minC,
|
||||
p.isReversed,
|
||||
puz.dict().words().get(p.word).cross()
|
||||
));
|
||||
}
|
||||
|
||||
return new ExportedPuzzle(gridv2, wordsOut, difficulty, rewards);
|
||||
}
|
||||
static final String HORIZONTAL = "h", VERTICAL = "v";
|
||||
/**
|
||||
* Convert a generator Slot + assigned word into a Placed object for export.
|
||||
*/
|
||||
private static Placed extractPlacedFromSlot(Slot s, String word) {
|
||||
int r = s.clueR();
|
||||
int c = s.clueC();
|
||||
char d = s.dir();
|
||||
|
||||
List<int[]> cells = new ArrayList<>();
|
||||
for (int i = 0; i < s.len(); i++) {
|
||||
cells.add(new int[]{ s.rs()[i], s.cs()[i] });
|
||||
}
|
||||
|
||||
// Canonicalize: always output right/down
|
||||
int startRow, startCol, arrowRow, arrowCol;
|
||||
String direction;
|
||||
boolean isReversed = false;
|
||||
|
||||
if (d == '2') { // right -> horizontal
|
||||
direction = HORIZONTAL;
|
||||
startRow = cells.get(0)[0];
|
||||
startCol = cells.get(0)[1];
|
||||
arrowRow = r;
|
||||
arrowCol = c;
|
||||
} else if (d == '3' || d == '5') { // down or down-bent -> vertical
|
||||
direction = VERTICAL;
|
||||
startRow = cells.get(0)[0];
|
||||
startCol = cells.get(0)[1];
|
||||
arrowRow = r;
|
||||
arrowCol = c;
|
||||
} else if (d == '4') { // left -> horizontal (REVERSED)
|
||||
direction = HORIZONTAL;
|
||||
isReversed = true;
|
||||
startRow = cells.get(0)[0];
|
||||
startCol = cells.get(0)[1];
|
||||
arrowRow = r;
|
||||
arrowCol = c;
|
||||
} else if (d == '1') { // up -> vertical (REVERSED)
|
||||
direction = VERTICAL;
|
||||
isReversed = true;
|
||||
startRow = cells.get(0)[0];
|
||||
startCol = cells.get(0)[1];
|
||||
arrowRow = r;
|
||||
arrowCol = c;
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
|
||||
return new Placed(
|
||||
word,
|
||||
word, // clue placeholder
|
||||
startRow,
|
||||
startCol,
|
||||
direction,
|
||||
word, // answer
|
||||
arrowRow,
|
||||
arrowCol,
|
||||
cells,
|
||||
new int[]{ arrowRow, arrowCol },
|
||||
isReversed
|
||||
);
|
||||
}
|
||||
|
||||
// pack (r,c) into one long key (handles negatives too)
|
||||
private static long pack(int r, int c) {
|
||||
return (((long) r) << 32) ^ (c & 0xFFFFFFFFL);
|
||||
}
|
||||
|
||||
// ---------- Data models ----------
|
||||
|
||||
/**
|
||||
* @param direction "horizontal" | "vertical"
|
||||
* @param cells word cells
|
||||
* @param arrow [arrowRow, arrowCol] */
|
||||
private record Placed(String word, String clue, int startRow, int startCol, String direction, String answer, int arrowRow, int arrowCol, List<int[]> cells, int[] arrow,
|
||||
boolean isReversed) { }
|
||||
|
||||
public record Rewards(int coins, int stars, int hints) { }
|
||||
|
||||
/**
|
||||
* @param direction "horizontal" | "vertical" */
|
||||
public record WordOut(String word, String clue, int startRow, int startCol, String direction, String answer, int arrowRow, int arrowCol, boolean isReversed, int complex) { }
|
||||
|
||||
public record ExportedPuzzle(List<String> gridv2, List<WordOut> words, int difficulty, Rewards rewards) { }
|
||||
}
|
||||
92
src/main/java/puzzle/HintScores.java
Normal file
92
src/main/java/puzzle/HintScores.java
Normal file
@@ -0,0 +1,92 @@
|
||||
package puzzle;
|
||||
|
||||
import java.sql.*;
|
||||
import java.util.function.ToIntFunction;
|
||||
|
||||
public final class HintScores {
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
Class.forName("org.sqlite.JDBC");
|
||||
try (Connection conn = DriverManager.getConnection("jdbc:sqlite:/home/mike/dev/puzzle-generator/tools/hint/hint.sqlite")) {
|
||||
updateCrossScores(conn, HintScores::exampleScore, 1000);
|
||||
}
|
||||
}
|
||||
/**
|
||||
* Updates hints.cross_score by computing a score from hints.word.
|
||||
*
|
||||
* @param conn open JDBC connection (PostgreSQL)
|
||||
* @param scoreFn callback: scoreFn.applyAsInt(word)
|
||||
* @param batchSize e.g. 1000
|
||||
*/
|
||||
public static void updateCrossScores(
|
||||
Connection conn,
|
||||
ToIntFunction<String> scoreFn,
|
||||
int batchSize
|
||||
) throws SQLException {
|
||||
|
||||
// Use a transaction for speed + consistency
|
||||
final boolean prevAutoCommit = conn.getAutoCommit();
|
||||
conn.setAutoCommit(false);
|
||||
|
||||
// Server-side cursor behavior in pgjdbc requires autoCommit=false + fetchSize>0
|
||||
final String selectSql =
|
||||
"SELECT id, puzzle_norm " +
|
||||
"FROM hints " +
|
||||
"WHERE puzzle_norm IS NOT NULL"; // optionally add: " AND cross_score IS NULL"
|
||||
|
||||
final String updateSql =
|
||||
"UPDATE hints SET cross_score = ? WHERE id = ?";
|
||||
|
||||
try (PreparedStatement psSel = conn.prepareStatement(selectSql);
|
||||
PreparedStatement psUpd = conn.prepareStatement(updateSql)) {
|
||||
|
||||
psSel.setFetchSize(batchSize);
|
||||
|
||||
int pending = 0;
|
||||
|
||||
try (ResultSet rs = psSel.executeQuery()) {
|
||||
while (rs.next()) {
|
||||
long id = rs.getLong("id");
|
||||
String word = rs.getString("puzzle_norm");
|
||||
|
||||
int score;
|
||||
try {
|
||||
score = scoreFn.applyAsInt(word);
|
||||
} catch (RuntimeException ex) {
|
||||
// If scoring fails, decide your policy: skip or set 0.
|
||||
// Here: skip row.
|
||||
continue;
|
||||
}
|
||||
|
||||
psUpd.setInt(1, score);
|
||||
psUpd.setLong(2, id);
|
||||
psUpd.addBatch();
|
||||
pending++;
|
||||
|
||||
if (pending >= batchSize) {
|
||||
psUpd.executeBatch();
|
||||
conn.commit();
|
||||
pending = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (pending > 0) {
|
||||
psUpd.executeBatch();
|
||||
conn.commit();
|
||||
}
|
||||
|
||||
} catch (SQLException e) {
|
||||
conn.rollback();
|
||||
throw e;
|
||||
} finally {
|
||||
conn.setAutoCommit(prevAutoCommit);
|
||||
}
|
||||
}
|
||||
|
||||
// Example scoring callback
|
||||
public static int exampleScore(String word) {
|
||||
return ThemePoolBuilderLength.crossabilityScore(word);
|
||||
}
|
||||
|
||||
}
|
||||
453
src/main/java/puzzle/Main.java
Normal file
453
src/main/java/puzzle/Main.java
Normal file
@@ -0,0 +1,453 @@
|
||||
package puzzle;
|
||||
|
||||
import puzzle.SwedishGenerator.PuzzleResult;
|
||||
import puzzle.SwedishGenerator.Rng;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.nio.file.*;
|
||||
import java.time.OffsetDateTime;
|
||||
import java.time.ZoneOffset;
|
||||
import java.time.format.DateTimeFormatter;
|
||||
import java.util.*;
|
||||
import java.util.concurrent.*;
|
||||
|
||||
import static puzzle.SwedishGenerator.fillMask;
|
||||
import static puzzle.SwedishGenerator.generateMask;
|
||||
import static puzzle.SwedishGenerator.loadWords;
|
||||
|
||||
public class Main {
|
||||
|
||||
final static String OUT_DIR = envOrDefault("OUT_DIR", "/data/puzzle");
|
||||
final static Path PUZZLE_DIR = Paths.get(OUT_DIR, "puzzles");
|
||||
static final Path INDEX_FILE = PUZZLE_DIR.resolve("index.json");
|
||||
static final OffsetDateTime now = OffsetDateTime.now(ZoneOffset.UTC);
|
||||
static final String CREATED_AT = now.format(DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ss'Z'"));
|
||||
static final String FILE_ID = CREATED_AT.replace(":", "-") + "_" + (System.currentTimeMillis() / 1000);
|
||||
static final String FILE_NAME = FILE_ID + ".json";
|
||||
static final Path OUTPUT_PATH = PUZZLE_DIR.resolve(FILE_NAME);
|
||||
static final String DATE_STRING = now.toLocalDate().toString();
|
||||
|
||||
public static class Opts {
|
||||
|
||||
public int seed = (int) (System.nanoTime() ^ System.currentTimeMillis());
|
||||
public int pop = 18;
|
||||
public int gens = 500;
|
||||
public String wordsPath = "nl_score_hints.csv";
|
||||
public double minSimplicity = 0; // 0 means no limit
|
||||
public int threads = Math.max(1, Runtime.getRuntime().availableProcessors());
|
||||
public int tries = threads;
|
||||
public boolean reindex = false;
|
||||
}
|
||||
|
||||
public void main(String[] args) {
|
||||
var opts = parseArgs(args);
|
||||
|
||||
if (opts.reindex) {
|
||||
|
||||
section("Reindex");
|
||||
info("OutputDir : " + OUT_DIR);
|
||||
rebuildIndex();
|
||||
return;
|
||||
}
|
||||
|
||||
section("Puzzle Generator");
|
||||
info("OutputDir : " + OUT_DIR);
|
||||
info("WordsFile : " + opts.wordsPath);
|
||||
|
||||
section("Settings");
|
||||
printSettings(opts);
|
||||
|
||||
var res = generatePuzzle(opts);
|
||||
if (res == null) {
|
||||
err("Search status : UNSOLVED");
|
||||
err("Reason : No solution found within tries.");
|
||||
System.exit(1);
|
||||
return;
|
||||
}
|
||||
|
||||
section("Result");
|
||||
info(String.format(Locale.ROOT, "simplicity : %.2f", res.filled().simplicity));
|
||||
|
||||
section("Mask");
|
||||
System.out.print(indentLines(SwedishGenerator.gridToString(res.mask()), " "));
|
||||
|
||||
section("Grid (raw)");
|
||||
System.out.print(indentLines(SwedishGenerator.gridToString(res.filled().grid), " "));
|
||||
|
||||
section("Grid (human)");
|
||||
System.out.print(indentLines(SwedishGenerator.renderHuman(res.filled().grid), " "));
|
||||
|
||||
var exported = ExportFormat.exportFormatFromFilled(res, 1, new ExportFormat.Rewards(50, 2, 1));
|
||||
|
||||
section("Clues");
|
||||
info("status : generating...");
|
||||
info("generatedFor : " + exported.words().size());
|
||||
exported = ClueGenerator.applyClues(exported);
|
||||
info("status : done");
|
||||
|
||||
section("Words");
|
||||
printWordsTable(exported.words());
|
||||
|
||||
section("Gridv2");
|
||||
for (var row : exported.gridv2()) System.out.println(" " + row);
|
||||
|
||||
// Export to JSON file
|
||||
|
||||
var theme = "algemeen";
|
||||
|
||||
section("Export");
|
||||
info("file : " + OUTPUT_PATH);
|
||||
|
||||
try {
|
||||
Files.createDirectories(PUZZLE_DIR);
|
||||
var json = toJson(exported, DATE_STRING, theme);
|
||||
Files.writeString(OUTPUT_PATH, json, StandardCharsets.UTF_8);
|
||||
|
||||
// Update index.json
|
||||
var pathInIndex = "/puzzles/" + FILE_NAME;
|
||||
var indexRecord = toIndexRecordJson(FILE_ID, pathInIndex, DATE_STRING, theme, exported.difficulty(), CREATED_AT);
|
||||
if (1 != 1) updateIndex(PUZZLE_DIR.toString(), indexRecord);
|
||||
else rebuildIndex();
|
||||
info("indexUpdated : " + INDEX_FILE);
|
||||
} catch (IOException e) {
|
||||
err("Failed to write: " + FILE_NAME);
|
||||
err("Reason : " + e.getMessage());
|
||||
System.exit(2);
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------- Output helpers ----------------
|
||||
|
||||
private static void info(String msg) { System.out.println("[INFO ] " + msg); }
|
||||
private static void warn(String msg) { System.out.println("[WARN ] " + msg); }
|
||||
private static void err(String msg) { System.err.println("[ERROR] " + msg); }
|
||||
|
||||
private static void section(String title) {
|
||||
System.out.println();
|
||||
System.out.println(title);
|
||||
}
|
||||
|
||||
private static String envOrDefault(String key, String def) {
|
||||
var v = System.getenv(key);
|
||||
return (v == null || v.isBlank()) ? def : v;
|
||||
}
|
||||
|
||||
private static void printSettings(Opts o) {
|
||||
System.out.printf(Locale.ROOT, " %-14s: %d%n", "seed", o.seed);
|
||||
System.out.printf(Locale.ROOT, " %-14s: %d%n", "population", o.pop);
|
||||
System.out.printf(Locale.ROOT, " %-14s: %d%n", "generations", o.gens);
|
||||
System.out.printf(Locale.ROOT, " %-14s: %s%n", "wordsPath", o.wordsPath);
|
||||
System.out.printf(Locale.ROOT, " %-14s: %.2f%n", "minSimplicity", o.minSimplicity);
|
||||
System.out.printf(Locale.ROOT, " %-14s: %d%n", "threads", o.threads);
|
||||
System.out.printf(Locale.ROOT, " %-14s: %d%n", "maxTries", o.tries);
|
||||
}
|
||||
|
||||
private static String fmtPoint(int r, int c) { return String.format(Locale.ROOT, "(%d,%d)", r, c); }
|
||||
|
||||
private static void printWordsTable(List<ExportFormat.WordOut> words) {
|
||||
System.out.println(" # WORD CX DIR START ARROW CLUE");
|
||||
var i = 1;
|
||||
for (var w : words) {
|
||||
System.out.printf(
|
||||
Locale.ROOT,
|
||||
" %-2d %-12s %-3s %-3s %-9s %-9s %s%n",
|
||||
i++,
|
||||
safe(w.word(), 12),
|
||||
safe("" + w.complex(), 3),
|
||||
safe(w.direction(), 3),
|
||||
fmtPoint(w.startRow(), w.startCol()),
|
||||
fmtPoint(w.arrowRow(), w.arrowCol()),
|
||||
w.clue() == null ? "" : w.clue()
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
private static String safe(String s, int max) {
|
||||
if (s == null) return "";
|
||||
if (s.length() <= max) return s;
|
||||
return s.substring(0, Math.max(0, max - 1)) + "…";
|
||||
}
|
||||
|
||||
private static String indentLines(String s, String indent) {
|
||||
if (s == null || s.isEmpty()) return "";
|
||||
var lines = s.split("\\R", -1);
|
||||
var sb = new StringBuilder();
|
||||
for (var line : lines) sb.append(indent).append(line).append('\n');
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
static void usage() {
|
||||
System.out.println("""
|
||||
Usage:
|
||||
java puzzle.Main [--seed N] [--pop N] [--gens N] [--tries N] [--words FILE] [--min-simplicity N.N] [--threads N] [--reindex]
|
||||
|
||||
Defaults:
|
||||
--pop 18
|
||||
--gens 600
|
||||
--tries = threads
|
||||
--words nl_score_hints.csv
|
||||
--min-simplicity 0 (no limit)
|
||||
--threads %d
|
||||
""".formatted(Math.max(1, Runtime.getRuntime().availableProcessors())));
|
||||
}
|
||||
|
||||
static Opts parseArgs(String[] argv) {
|
||||
var out = new Opts();
|
||||
for (var i = 0; i < argv.length; i++) {
|
||||
var a = argv[i];
|
||||
var v = (i + 1 < argv.length) ? argv[i + 1] : null;
|
||||
|
||||
if (a.equals("--help") || a.equals("-h")) {
|
||||
usage();
|
||||
System.exit(0);
|
||||
}
|
||||
|
||||
if (a.equals("--seed")) {
|
||||
out.seed = Integer.parseInt(v);
|
||||
i++;
|
||||
} else if (a.equals("--pop")) {
|
||||
out.pop = Integer.parseInt(v);
|
||||
i++;
|
||||
} else if (a.equals("--gens")) {
|
||||
out.gens = Integer.parseInt(v);
|
||||
i++;
|
||||
} else if (a.equals("--tries")) {
|
||||
out.tries = Integer.parseInt(v);
|
||||
i++;
|
||||
} else if (a.equals("--words")) {
|
||||
out.wordsPath = v;
|
||||
i++;
|
||||
} else if (a.equals("--min-simplicity")) {
|
||||
out.minSimplicity = Double.parseDouble(v);
|
||||
i++;
|
||||
} else if (a.equals("--threads")) {
|
||||
out.threads = Integer.parseInt(v);
|
||||
i++;
|
||||
} else if (a.equals("--reindex")) {
|
||||
out.reindex = true;
|
||||
} else {
|
||||
throw new IllegalArgumentException("Unknown arg: " + a);
|
||||
}
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
// ---------------- Generation ----------------
|
||||
|
||||
// Package-private method for testing
|
||||
PuzzleResult generatePuzzle(Opts opts) {
|
||||
|
||||
var tLoad0 = System.nanoTime();
|
||||
var dict = loadWords(opts.wordsPath);
|
||||
var tLoad1 = System.nanoTime();
|
||||
|
||||
section("Load");
|
||||
info(String.format(Locale.ROOT, "words : %,d", dict.words().size()));
|
||||
info(String.format(Locale.ROOT, "loadTime : %.3f s", (tLoad1 - tLoad0) / 1e9));
|
||||
|
||||
section("Search");
|
||||
|
||||
if (opts.threads > 1) {
|
||||
info("mode : multi-threaded (" + opts.threads + ")");
|
||||
var executor = Executors.newFixedThreadPool(opts.threads);
|
||||
try {
|
||||
var tasks = new ArrayList<Callable<PuzzleResult>>();
|
||||
for (var i = 1; i <= opts.tries; i++) {
|
||||
final var attempt = i;
|
||||
tasks.add(() -> {
|
||||
var threadRng = new Rng(opts.seed + attempt);
|
||||
var mask = generateMask(threadRng, dict.lenCounts(), opts.pop, opts.gens, false);
|
||||
var filled = fillMask(threadRng, mask, dict.index(), dict.words(), 200, 30000, false);
|
||||
|
||||
if (filled.ok && (opts.minSimplicity <= 0 || filled.simplicity >= opts.minSimplicity)) {
|
||||
info("status : SOLVED");
|
||||
info("foundAtTry : " + attempt);
|
||||
return new PuzzleResult(dict, mask, filled);
|
||||
}
|
||||
throw new RuntimeException("No solution in try " + attempt);
|
||||
});
|
||||
}
|
||||
return executor.invokeAny(tasks);
|
||||
} catch (InterruptedException e) {
|
||||
Thread.currentThread().interrupt();
|
||||
warn("status : INTERRUPTED");
|
||||
} catch (ExecutionException e) {
|
||||
// all failed
|
||||
warn("status : UNSOLVED");
|
||||
} finally {
|
||||
executor.shutdownNow();
|
||||
}
|
||||
return null;
|
||||
|
||||
} else {
|
||||
info("mode : single-threaded");
|
||||
var rng = new Rng(opts.seed);
|
||||
|
||||
for (var attempt = 1; attempt <= opts.tries; attempt++) {
|
||||
info("try : " + attempt + "/" + opts.tries);
|
||||
|
||||
var mask = generateMask(rng, dict.lenCounts(), opts.pop, opts.gens, true);
|
||||
var filled = fillMask(rng, mask, dict.index(), dict.words(), 200, 30000, true);
|
||||
|
||||
if (filled.ok && (opts.minSimplicity <= 0 || filled.simplicity >= opts.minSimplicity)) {
|
||||
info("status : SOLVED");
|
||||
info("foundAtTry : " + attempt);
|
||||
return new PuzzleResult(dict, mask, filled);
|
||||
}
|
||||
|
||||
if (filled.ok) {
|
||||
warn(String.format(Locale.ROOT,
|
||||
"simplicity : %.2f (below min %.2f)",
|
||||
filled.simplicity, opts.minSimplicity
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
info("status : UNSOLVED");
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------- Export (unchanged logic) ----------------
|
||||
|
||||
private static String toJson(ExportFormat.ExportedPuzzle puzzle, String date, String theme) {
|
||||
var sb = new StringBuilder();
|
||||
sb.append("{\n");
|
||||
sb.append(" \"date\": \"").append(escapeJson(date)).append("\",\n");
|
||||
sb.append(" \"theme\": \"").append(escapeJson(theme)).append("\",\n");
|
||||
sb.append(" \"difficulty\": ").append(puzzle.difficulty()).append(",\n");
|
||||
sb.append(" \"rewards\": {\n");
|
||||
sb.append(" \"coins\": ").append(puzzle.rewards().coins()).append(",\n");
|
||||
sb.append(" \"stars\": ").append(puzzle.rewards().stars()).append(",\n");
|
||||
sb.append(" \"hints\": ").append(puzzle.rewards().hints()).append("\n");
|
||||
sb.append(" },\n");
|
||||
sb.append(" \"gridv2\": [\n");
|
||||
for (var i = 0; i < puzzle.gridv2().size(); i++) {
|
||||
sb.append(" \"").append(escapeJson(puzzle.gridv2().get(i))).append("\"");
|
||||
if (i < puzzle.gridv2().size() - 1) sb.append(",");
|
||||
sb.append("\n");
|
||||
}
|
||||
sb.append(" ],\n");
|
||||
sb.append(" \"words\": [\n");
|
||||
for (var i = 0; i < puzzle.words().size(); i++) {
|
||||
var w = puzzle.words().get(i);
|
||||
sb.append(" {\n");
|
||||
sb.append(" \"word\": \"").append(escapeJson(w.word())).append("\",\n");
|
||||
sb.append(" \"clue\": \"").append(escapeJson(w.clue())).append("\",\n");
|
||||
sb.append(" \"startRow\": ").append(w.startRow()).append(",\n");
|
||||
sb.append(" \"startCol\": ").append(w.startCol()).append(",\n");
|
||||
sb.append(" \"direction\": \"").append(escapeJson(w.direction())).append("\",\n");
|
||||
sb.append(" \"answer\": \"").append(escapeJson(w.answer())).append("\",\n");
|
||||
sb.append(" \"arrowRow\": ").append(w.arrowRow()).append(",\n");
|
||||
sb.append(" \"arrowCol\": ").append(w.arrowCol()).append(",\n");
|
||||
sb.append(" \"isReversed\": ").append(w.isReversed()).append("\n");
|
||||
sb.append(" }");
|
||||
if (i < puzzle.words().size() - 1) sb.append(",");
|
||||
sb.append("\n");
|
||||
}
|
||||
sb.append(" ]\n");
|
||||
sb.append("}\n");
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
private static String escapeJson(String s) {
|
||||
return s.replace("\\", "\\\\")
|
||||
.replace("\"", "\\\"")
|
||||
.replace("\n", "\\n")
|
||||
.replace("\r", "\\r")
|
||||
.replace("\t", "\\t");
|
||||
}
|
||||
|
||||
private static String toIndexRecordJson(String id, String path, String date, String theme, int difficulty, String createdAt) {
|
||||
return String.format(
|
||||
Locale.ROOT,
|
||||
"{\"id\":\"%s\",\"path\":\"%s\",\"date\":\"%s\",\"theme\":\"%s\",\"difficulty\":%d,\"createdAt\":\"%s\"}",
|
||||
escapeJson(id), escapeJson(path), escapeJson(date), escapeJson(theme), difficulty, escapeJson(createdAt)
|
||||
);
|
||||
}
|
||||
|
||||
private static void updateIndex(String outDir, String newRecordJson) {
|
||||
var indexPath = Paths.get(outDir, "index.json");
|
||||
try {
|
||||
var content = Files.exists(indexPath) ? Files.readString(indexPath, StandardCharsets.UTF_8).trim() : "";
|
||||
|
||||
if (content.isEmpty() || content.equals("[]")) {
|
||||
content = "[\n " + newRecordJson + "\n]";
|
||||
} else {
|
||||
var firstBracket = content.indexOf('[');
|
||||
if (firstBracket != -1) {
|
||||
content = content.substring(0, firstBracket + 1) + "\n " + newRecordJson + "," + content.substring(firstBracket + 1);
|
||||
} else {
|
||||
content = "[\n " + newRecordJson + "\n]";
|
||||
}
|
||||
}
|
||||
|
||||
Files.writeString(indexPath, content, StandardCharsets.UTF_8);
|
||||
info("indexUpdated : " + indexPath);
|
||||
} catch (IOException e) {
|
||||
err("Failed to update index.json: " + e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
private void rebuildIndex() {
|
||||
|
||||
if (!Files.exists(PUZZLE_DIR)) {
|
||||
err("Puzzles directory does not exist: " + PUZZLE_DIR);
|
||||
return;
|
||||
}
|
||||
|
||||
info("Rebuilding index from: " + PUZZLE_DIR);
|
||||
|
||||
List<String> records = new ArrayList<>();
|
||||
try (var stream = Files.list(PUZZLE_DIR)) {
|
||||
stream.filter(p -> p.toString().endsWith(".json") && !p.getFileName().toString().equals("index.json"))
|
||||
.sorted(Comparator.comparing(Path::getFileName).reversed())
|
||||
.forEach(path -> {
|
||||
try {
|
||||
var filename = path.getFileName().toString();
|
||||
var id = filename.substring(0, filename.length() - 5);
|
||||
var content = Files.readString(path, StandardCharsets.UTF_8);
|
||||
|
||||
var date = extractValue(content, "date");
|
||||
var theme = extractValue(content, "theme");
|
||||
var difficulty = 1;
|
||||
try { difficulty = Integer.parseInt(extractValue(content, "difficulty")); } catch (Exception ignored) { }
|
||||
|
||||
var createdAt = id;
|
||||
if (id.length() >= 20 && id.charAt(10) == 'T') {
|
||||
var parts = id.split("_");
|
||||
var dtPart = parts[0]; // 2025-12-24T04-25-06Z
|
||||
if (dtPart.length() >= 19) {
|
||||
createdAt = dtPart.substring(0, 13) + ":" + dtPart.substring(14, 16) + ":" + dtPart.substring(17);
|
||||
}
|
||||
}
|
||||
|
||||
var pathInIndex = "/puzzles/" + filename;
|
||||
records.add(toIndexRecordJson(id, pathInIndex, date, theme, difficulty, createdAt));
|
||||
} catch (IOException e) {
|
||||
err("Failed to read " + path + ": " + e.getMessage());
|
||||
}
|
||||
});
|
||||
} catch (IOException e) {
|
||||
err("Failed to list puzzles: " + e.getMessage());
|
||||
return;
|
||||
}
|
||||
|
||||
var indexPath = PUZZLE_DIR.resolve("index.json");
|
||||
var content = "[\n " + String.join(",\n ", records) + "\n]";
|
||||
try {
|
||||
Files.writeString(indexPath, content, StandardCharsets.UTF_8);
|
||||
info("Successfully rebuilt index.json with " + records.size() + " records.");
|
||||
} catch (IOException e) {
|
||||
err("Failed to write index.json: " + e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
private static String extractValue(String json, String key) {
|
||||
var pattern = java.util.regex.Pattern.compile("\"" + key + "\":\\s*\"?([^\",\\n\\r}]*)\"?");
|
||||
var matcher = pattern.matcher(json);
|
||||
if (matcher.find()) return matcher.group(1).trim();
|
||||
return "";
|
||||
}
|
||||
}
|
||||
32
src/main/java/puzzle/MainTest.java
Normal file
32
src/main/java/puzzle/MainTest.java
Normal file
@@ -0,0 +1,32 @@
|
||||
package puzzle;
|
||||
|
||||
//import org.junit.jupiter.api.Test;
|
||||
//import static org.junit.jupiter.api.Assertions.*;
|
||||
|
||||
public class MainTest {
|
||||
|
||||
static void main() {
|
||||
new MainTest().testGeneratePuzzle();
|
||||
}
|
||||
// @Test
|
||||
public void testGeneratePuzzle() {
|
||||
// Arrange
|
||||
var opts = new Main.Opts();
|
||||
opts.seed = 1234;
|
||||
opts.pop = 18;
|
||||
opts.gens = 300;
|
||||
opts.wordsPath = "src/test/resources/puzzle/pool.txt";
|
||||
opts.minSimplicity = 0;
|
||||
opts.threads = 1;
|
||||
opts.tries = 1;
|
||||
|
||||
// Act
|
||||
var result = new Main().generatePuzzle(opts);
|
||||
|
||||
// Assert
|
||||
/* assertNotNull(result);
|
||||
assertNotNull(result.mask());
|
||||
assertNotNull(result.filled());
|
||||
assertTrue(result.filled().ok);*/
|
||||
}
|
||||
}
|
||||
931
src/main/java/puzzle/SwedishGenerator.java
Normal file
931
src/main/java/puzzle/SwedishGenerator.java
Normal file
@@ -0,0 +1,931 @@
|
||||
package puzzle;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.*;
|
||||
import java.util.concurrent.*;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
/**
|
||||
* SwedishGenerator.java
|
||||
*
|
||||
* Usage:
|
||||
* javac SwedishGenerator.java
|
||||
* java SwedishGenerator [--seed N] [--pop N] [--gens N] [--tries N] [--words word-list.txt]
|
||||
*/
|
||||
@SuppressWarnings("ALL")
|
||||
public class SwedishGenerator {
|
||||
|
||||
static final int W = 9, H = 8,
|
||||
CLUE_SIZE = 4,
|
||||
SIMPLICITY_DEFAULT_SCORE = 2;
|
||||
static final int MIN_LEN = 2, MAX_LEN = 8;
|
||||
// Directions for '1'..'6'
|
||||
static final int[][] OFFSETS = new int[7][2];
|
||||
static final int[][] STEPS = new int[7][2];
|
||||
static {
|
||||
// 1: up
|
||||
OFFSETS[1] = new int[]{ -1, 0 };
|
||||
STEPS[1] = new int[]{ -1, 0 };
|
||||
// 2: right
|
||||
OFFSETS[2] = new int[]{ 0, 1 };
|
||||
STEPS[2] = new int[]{ 0, 1 };
|
||||
// 3: down
|
||||
OFFSETS[3] = new int[]{ 1, 0 };
|
||||
STEPS[3] = new int[]{ 1, 0 };
|
||||
// 4: left
|
||||
OFFSETS[4] = new int[]{ 0, -1 };
|
||||
STEPS[4] = new int[]{ 0, -1 };
|
||||
// 5: vertical down, clue is on the right of the first letter
|
||||
OFFSETS[5] = new int[]{ 0, -1 };
|
||||
STEPS[5] = new int[]{ 1, 0 };
|
||||
// 6: vertical down, clue is on the left of the first letter
|
||||
OFFSETS[6] = new int[]{ 0, 1 };
|
||||
STEPS[6] = new int[]{ 1, 0 };
|
||||
}
|
||||
static final char FIRST_ABC = 'A';
|
||||
static final char LAST_ABC = 'Z';
|
||||
static final char FIRST_ARROW = '1', LAST_ARROW = '6', HOR_ARROW_1 = '2', HOR_ARROW_2 = '4';
|
||||
static boolean isDigit(char ch) { return ch >= FIRST_ARROW && ch <= LAST_ARROW; }
|
||||
static boolean isLetter(char ch) { return ch >= FIRST_ABC && ch <= LAST_ABC; }
|
||||
static boolean isLetterCell(char ch) { return ch == '#' || isLetter(ch); }
|
||||
|
||||
// ---------------- RNG (xorshift32) ----------------
|
||||
|
||||
static final class Rng {
|
||||
|
||||
private int x;
|
||||
Rng(int seed) {
|
||||
var s = seed;
|
||||
if (s == 0) s = 1;
|
||||
this.x = s;
|
||||
}
|
||||
int nextU32() {
|
||||
var y = x;
|
||||
y ^= (y << 13);
|
||||
y ^= (y >>> 17);
|
||||
y ^= (y << 5);
|
||||
x = y;
|
||||
return y;
|
||||
}
|
||||
int randint(int min, int max) { // inclusive
|
||||
var u = (nextU32() & 0xFFFFFFFFL);
|
||||
var range = (long) max - (long) min + 1L;
|
||||
return (int) (min + (u % range));
|
||||
}
|
||||
double nextFloat() { return (nextU32() & 0xFFFFFFFFL) / 4294967295.0; }
|
||||
}
|
||||
|
||||
static int clamp(int x, int a, int b) { return Math.max(a, Math.min(b, x)); }
|
||||
|
||||
// ---------------- Grid helpers ----------------
|
||||
static char[][] makeEmptyGrid() {
|
||||
var g = new char[H][W];
|
||||
for (var r = 0; r < H; r++) Arrays.fill(g[r], '#');
|
||||
return g;
|
||||
}
|
||||
|
||||
static char[][] deepCopyGrid(char[][] g) {
|
||||
var out = new char[H][W];
|
||||
for (var r = 0; r < H; r++) out[r] = Arrays.copyOf(g[r], W);
|
||||
return out;
|
||||
}
|
||||
|
||||
static String gridToString(char[][] g) {
|
||||
var sb = new StringBuilder();
|
||||
for (var r = 0; r < H; r++) {
|
||||
if (r > 0) sb.append('\n');
|
||||
sb.append(g[r]);
|
||||
}
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
static String renderHuman(char[][] g) {
|
||||
var sb = new StringBuilder();
|
||||
for (var r = 0; r < H; r++) {
|
||||
if (r > 0) sb.append('\n');
|
||||
for (var c = 0; c < W; c++) {
|
||||
var ch = g[r][c];
|
||||
sb.append(isDigit(ch) ? ' ' : ch);
|
||||
}
|
||||
}
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
// ---------------- Words / index ----------------
|
||||
|
||||
static final class IntList {
|
||||
|
||||
int[] a = new int[8];
|
||||
int n = 0;
|
||||
void add(int v) {
|
||||
if (n >= a.length) a = Arrays.copyOf(a, a.length * 2);
|
||||
a[n++] = v;
|
||||
}
|
||||
void replaceAll(int[] newData) {
|
||||
this.a = newData;
|
||||
this.n = newData.length;
|
||||
}
|
||||
int size() { return n; }
|
||||
int[] data() { return a; } // note: may have extra capacity
|
||||
}
|
||||
|
||||
static final class DictEntry {
|
||||
|
||||
final ArrayList<String> words = new ArrayList<>();
|
||||
final IntList[][] pos; // pos[i][letter] -> indices (sorted by insertion)
|
||||
DictEntry(int L) {
|
||||
pos = new IntList[L][26];
|
||||
for (var i = 0; i < L; i++) {
|
||||
for (var j = 0; j < 26; j++) pos[i][j] = new IntList();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static record WordDifficulty(String word, int difficulty, int simpel, int score, int cross) {
|
||||
|
||||
public WordDifficulty(String word, int simpel, int score) {
|
||||
var difficulty1 = 0 + ((8 - word.length()) * 30) + ((10 - score) * 15);
|
||||
var crossScore = ThemePoolBuilderLength.crossabilityScore(word);
|
||||
this(word, difficulty1, simpel, score, (crossScore * 7) + ((score) * 30) + ((word.length()) * 15));
|
||||
|
||||
// Prioritize simple words (high lScore) and long words.
|
||||
// lScore (1-10) adds up to 1000 points (weight 100).
|
||||
// Length (2-8) adds up to 160 points (weight 20).
|
||||
// We want LONGER and SIMPLER words to be tried earlier (lower difficulty value).
|
||||
// word.length() is 2 to 8.
|
||||
// score is 1 to 10.
|
||||
// Base difficulty starts high and decreases with length and score.
|
||||
// Length impact: up to 8 * 10 = 80
|
||||
// Score impact: up to 10 * 15 = 150
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public static record Dict(Map<String, WordDifficulty> words,
|
||||
HashMap<Integer, DictEntry> index,
|
||||
HashMap<Integer, Integer> lenCounts) { }
|
||||
static Dict loadWords(String wordsPath) {
|
||||
String raw;
|
||||
try {
|
||||
raw = Files.readString(Path.of(wordsPath), StandardCharsets.UTF_8);
|
||||
} catch (IOException e) {
|
||||
raw = "WOORD,level_1_to_10,hint\nEU,2,hint\nUUR,2,hint\nAUTO,2,hint\nBOOM,2,hint\nHUIS,2,hint\nKAT,2,hint\nZEE,2,hint\nRODE,2,hint\nDRAAD,2,hint\nKENNIS,2,hint\nNETWERK,2,hint\nPAKTE,2,hint\n";
|
||||
}
|
||||
|
||||
var map = new HashMap<String, WordDifficulty>();
|
||||
boolean first = true;
|
||||
for (var line : raw.split("\\R")) {
|
||||
if (line.isBlank()) continue;
|
||||
var parts = line.split(",", 4);
|
||||
var word = parts[0].trim();
|
||||
if (first && word.equalsIgnoreCase("WOORD")) {
|
||||
first = false;
|
||||
continue;
|
||||
}
|
||||
first = false;
|
||||
var s = word.toUpperCase(Locale.ROOT);
|
||||
if (s.matches("^[A-Z]{2,8}$")) {
|
||||
int score = SIMPLICITY_DEFAULT_SCORE;
|
||||
int simpel = 0;
|
||||
// CSV has level 1-10. llmScores use 10-level.
|
||||
score = 10 - Integer.parseInt(parts[1].trim());
|
||||
simpel = Integer.parseInt(parts[2].trim());
|
||||
if (score >= 1)
|
||||
map.put(s, new WordDifficulty(s, simpel, score));
|
||||
}
|
||||
}
|
||||
var words = map.values().stream().collect(Collectors.toCollection(ArrayList::new));
|
||||
// Sort words by difficulty in ascending order
|
||||
words.sort(Comparator.comparingInt(wd -> wd.simpel));
|
||||
|
||||
var index = new HashMap<Integer, DictEntry>();
|
||||
var lenCounts = new HashMap<Integer, Integer>();
|
||||
|
||||
for (var w : words) {
|
||||
var L = w.word.length();
|
||||
lenCounts.put(L, lenCounts.getOrDefault(L, 0) + 1);
|
||||
|
||||
var entry = index.get(L);
|
||||
if (entry == null) {
|
||||
entry = new DictEntry(L);
|
||||
index.put(L, entry);
|
||||
}
|
||||
|
||||
var idx = entry.words.size();
|
||||
entry.words.add(w.word);
|
||||
|
||||
for (var i = 0; i < L; i++) {
|
||||
var letter = w.word.charAt(i) - 'A';
|
||||
if (letter >= 0 && letter < 26) entry.pos[i][letter].add(idx);
|
||||
}
|
||||
}
|
||||
|
||||
return new Dict(map, index, lenCounts);
|
||||
}
|
||||
|
||||
static int[] intersectSorted(int[] a, int aLen, int[] b, int bLen) {
|
||||
var out = new int[Math.min(aLen, bLen)];
|
||||
int i = 0, j = 0, k = 0;
|
||||
while (i < aLen && j < bLen) {
|
||||
int x = a[i], y = b[j];
|
||||
if (x == y) {
|
||||
out[k++] = x;
|
||||
i++;
|
||||
j++;
|
||||
} else if (x < y) i++;
|
||||
else j++;
|
||||
}
|
||||
return Arrays.copyOf(out, k);
|
||||
}
|
||||
|
||||
static final record CandidateInfo(int[] indices, int count) {
|
||||
|
||||
}
|
||||
static CandidateInfo candidateInfoForPattern(DictEntry entry, char[] pattern /* 0 means null */) {
|
||||
var lists = new ArrayList<IntList>();
|
||||
for (var i = 0; i < pattern.length; i++) {
|
||||
var ch = pattern[i];
|
||||
if (ch != 0 && isLetter(ch)) {
|
||||
lists.add(entry.pos[i][ch - 'A']);
|
||||
}
|
||||
}
|
||||
|
||||
if (lists.isEmpty()) {
|
||||
return new CandidateInfo(null, entry.words.size());
|
||||
}
|
||||
|
||||
var first = lists.get(0);
|
||||
var cur = Arrays.copyOf(first.data(), first.size());
|
||||
var curLen = cur.length;
|
||||
|
||||
for (var k = 1; k < lists.size(); k++) {
|
||||
var nxt = lists.get(k);
|
||||
var nextArr = nxt.data();
|
||||
var nextLen = nxt.size();
|
||||
cur = intersectSorted(cur, curLen, nextArr, nextLen);
|
||||
curLen = cur.length;
|
||||
if (curLen == 0) break;
|
||||
}
|
||||
|
||||
return new CandidateInfo(cur, curLen);
|
||||
}
|
||||
// ---------------- Slots ----------------
|
||||
|
||||
static record Slot(int clueR, int clueC, char dir, int[] rs, int[] cs, int len) {
|
||||
|
||||
public Slot(int clueR, int clueC, char dir, int[] rs, int[] cs) {
|
||||
this(clueR, clueC, dir, rs, cs, rs.length);
|
||||
}
|
||||
String key() { return clueR + "," + clueC + ":" + dir; }
|
||||
}
|
||||
|
||||
static ArrayList<Slot> extractSlots(char[][] grid) {
|
||||
var slots = new ArrayList<Slot>();
|
||||
for (var r = 0; r < H; r++) {
|
||||
for (var c = 0; c < W; c++) {
|
||||
var d = grid[r][c];
|
||||
if (!isDigit(d)) continue;
|
||||
var dir = d - '0';
|
||||
// Check all possible directions for clue placement
|
||||
// for (int dir = 1; dir <= 4; dir++) {
|
||||
int or = OFFSETS[dir][0], oc = OFFSETS[dir][1];
|
||||
int dr = STEPS[dir][0], dc = STEPS[dir][1];
|
||||
|
||||
int rr = r + or, cc = c + oc;
|
||||
if (rr < 0 || rr >= H || cc < 0 || cc >= W) continue;
|
||||
if (!isLetterCell(grid[rr][cc])) continue;
|
||||
|
||||
var rs = new int[MAX_LEN + 1];
|
||||
var cs = new int[MAX_LEN + 1];
|
||||
var n = 0;
|
||||
|
||||
while (rr >= 0 && rr < H && cc >= 0 && cc < W) {
|
||||
var ch = grid[rr][cc];
|
||||
if (!isLetterCell(ch)) break;
|
||||
rs[n] = rr;
|
||||
cs[n] = cc;
|
||||
n++;
|
||||
rr += dr;
|
||||
cc += dc;
|
||||
if (n > MAX_LEN) break;
|
||||
}
|
||||
|
||||
slots.add(new Slot(r, c, d, Arrays.copyOf(rs, n), Arrays.copyOf(cs, n)));
|
||||
// }
|
||||
}
|
||||
}
|
||||
return slots;
|
||||
}
|
||||
static boolean hasRoomForClue(char[][] grid, int r, int c, char d) {
|
||||
var di = d - '0';
|
||||
int or = OFFSETS[di][0], oc = OFFSETS[di][1];
|
||||
int dr = STEPS[di][0], dc = STEPS[di][1];
|
||||
int rr = r + or, cc = c + oc;
|
||||
var run = 0;
|
||||
while (rr >= 0 && rr < H && cc >= 0 && cc < W && isLetterCell(grid[rr][cc]) && run < MAX_LEN) {
|
||||
run++;
|
||||
rr += dr;
|
||||
cc += dc;
|
||||
}
|
||||
return run >= MIN_LEN;
|
||||
}
|
||||
|
||||
// ---------------- FAST mask fitness ----------------
|
||||
|
||||
static long maskFitness(char[][] grid, HashMap<Integer, Integer> lenCounts) {
|
||||
long penalty = 0;
|
||||
|
||||
var clueCount = 0;
|
||||
for (var r = 0; r < H; r++) for (var c = 0; c < W; c++) if (isDigit(grid[r][c])) clueCount++;
|
||||
|
||||
var targetClues = (int) Math.round(W * H * 0.25); // ~18
|
||||
penalty += 8L * Math.abs(clueCount - targetClues);
|
||||
|
||||
var slots = extractSlots(grid);
|
||||
if (slots.isEmpty()) return 1_000_000_000L;
|
||||
|
||||
var covH = new int[H][W];
|
||||
var covV = new int[H][W];
|
||||
|
||||
for (var s : slots) {
|
||||
var horiz = (s.dir == HOR_ARROW_1 || s.dir == HOR_ARROW_2);
|
||||
|
||||
if (s.len < MIN_LEN) penalty += 8000;
|
||||
if (s.len > MAX_LEN) penalty += 8000 + (long) (s.len - MAX_LEN) * 500L;
|
||||
|
||||
if (s.len >= MIN_LEN && s.len <= MAX_LEN) {
|
||||
if (!lenCounts.containsKey(s.len)) penalty += 12000;
|
||||
}
|
||||
|
||||
for (var i = 0; i < s.len; i++) {
|
||||
int r = s.rs[i], c = s.cs[i];
|
||||
if (horiz) covH[r][c] += 1;
|
||||
else covV[r][c] += 1;
|
||||
}
|
||||
}
|
||||
|
||||
for (var r = 0; r < H; r++)
|
||||
for (var c = 0; c < W; c++) {
|
||||
if (!isLetterCell(grid[r][c])) continue;
|
||||
int h = covH[r][c], v = covV[r][c];
|
||||
if (h == 0 && v == 0) penalty += 1500;
|
||||
else if (h > 0 && v > 0) { /* ok */ } else if (h + v == 1) penalty += 200;
|
||||
else penalty += 600;
|
||||
}
|
||||
|
||||
// clue clustering (8-connected)
|
||||
var seen = new boolean[H][W];
|
||||
var stack = new int[W * H];
|
||||
int sp;
|
||||
var nbrs8 = new int[][]{
|
||||
{ -1, -1 }, { -1, 0 }, { -1, 1 },
|
||||
{ 0, -1 }, { 0, 1 },
|
||||
{ 1, -1 }, { 1, 0 }, { 1, 1 }
|
||||
};
|
||||
|
||||
for (var r = 0; r < H; r++)
|
||||
for (var c = 0; c < W; c++) {
|
||||
if (!isDigit(grid[r][c]) || seen[r][c]) continue;
|
||||
sp = 0;
|
||||
stack[sp++] = r * W + c;
|
||||
seen[r][c] = true;
|
||||
var size = 0;
|
||||
|
||||
while (sp > 0) {
|
||||
var p = stack[--sp];
|
||||
int x = p / W, y = p % W;
|
||||
size++;
|
||||
|
||||
for (var d : nbrs8) {
|
||||
int nx = x + d[0], ny = y + d[1];
|
||||
if (nx < 0 || nx >= H || ny < 0 || ny >= W) continue;
|
||||
if (seen[nx][ny]) continue;
|
||||
if (!isDigit(grid[nx][ny])) continue;
|
||||
seen[nx][ny] = true;
|
||||
stack[sp++] = nx * W + ny;
|
||||
}
|
||||
}
|
||||
|
||||
if (size >= 2) penalty += (long) (size - 1) * 120L;
|
||||
}
|
||||
|
||||
// dead-end-ish letter cell (3+ walls)
|
||||
var nbrs4 = new int[][]{ { -1, 0 }, { 1, 0 }, { 0, -1 }, { 0, 1 } };
|
||||
for (var r = 0; r < H; r++)
|
||||
for (var c = 0; c < W; c++) {
|
||||
if (!isLetterCell(grid[r][c])) continue;
|
||||
var walls = 0;
|
||||
for (var d : nbrs4) {
|
||||
int rr = r + d[0], cc = c + d[1];
|
||||
if (rr < 0 || rr >= H || cc < 0 || cc >= W) {
|
||||
walls++;
|
||||
continue;
|
||||
}
|
||||
if (!isLetterCell(grid[rr][cc])) walls++;
|
||||
}
|
||||
if (walls >= 3) penalty += 400;
|
||||
}
|
||||
|
||||
return penalty;
|
||||
}
|
||||
|
||||
// ---------------- Mask generation ----------------
|
||||
|
||||
static char[][] randomMask(Rng rng) {
|
||||
var g = makeEmptyGrid();
|
||||
var targetClues = (int) Math.round(W * H * 0.25);
|
||||
int placed = 0, guard = 0;
|
||||
|
||||
while (placed < targetClues && guard++ < 4000) {
|
||||
var r = rng.randint(0, H - 1);
|
||||
var c = rng.randint(0, W - 1);
|
||||
if (isDigit(g[r][c])) continue;
|
||||
|
||||
var d = (char) ('0' + rng.randint(1, c == 0 ? CLUE_SIZE : 4));
|
||||
g[r][c] = d;
|
||||
if (!hasRoomForClue(g, r, c, d)) {
|
||||
g[r][c] = '#';
|
||||
continue;
|
||||
}
|
||||
placed++;
|
||||
}
|
||||
return g;
|
||||
}
|
||||
|
||||
static char[][] mutate(Rng rng, char[][] grid) {
|
||||
var g = deepCopyGrid(grid);
|
||||
var cx = rng.randint(0, H - 1);
|
||||
var cy = rng.randint(0, W - 1);
|
||||
|
||||
var steps = 4;
|
||||
for (var k = 0; k < steps; k++) {
|
||||
var rr = clamp(cx + (rng.randint(-2, 2) + rng.randint(-2, 2)), 0, H - 1);
|
||||
var cc = clamp(cy + (rng.randint(-2, 2) + rng.randint(-2, 2)), 0, W - 1);
|
||||
|
||||
var cur = g[rr][cc];
|
||||
if (isDigit(cur)) {
|
||||
g[rr][cc] = '#';
|
||||
} else {
|
||||
var d = (char) ('0' + rng.randint(1, cc == 0 ? CLUE_SIZE : 4));
|
||||
g[rr][cc] = d;
|
||||
if (!hasRoomForClue(g, rr, cc, d)) g[rr][cc] = '#';
|
||||
}
|
||||
}
|
||||
return g;
|
||||
}
|
||||
|
||||
static char[][] crossover(Rng rng, char[][] a, char[][] b) {
|
||||
var out = makeEmptyGrid();
|
||||
var cx = (H - 1) / 2.0;
|
||||
var cy = (W - 1) / 2.0;
|
||||
var theta = rng.nextFloat() * Math.PI;
|
||||
var nx = Math.cos(theta);
|
||||
var ny = Math.sin(theta);
|
||||
|
||||
for (var r = 0; r < H; r++)
|
||||
for (var c = 0; c < W; c++) {
|
||||
double x = r - cx, y = c - cy;
|
||||
var side = x * nx + y * ny;
|
||||
out[r][c] = (side >= 0) ? a[r][c] : b[r][c];
|
||||
}
|
||||
|
||||
for (var r = 0; r < H; r++)
|
||||
for (var c = 0; c < W; c++) {
|
||||
var ch = out[r][c];
|
||||
if (isDigit(ch) && !hasRoomForClue(out, r, c, ch)) out[r][c] = '#';
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
static char[][] hillclimb(Rng rng, char[][] start, HashMap<Integer, Integer> lenCounts, int limit) {
|
||||
var best = deepCopyGrid(start);
|
||||
var bestF = maskFitness(best, lenCounts);
|
||||
var fails = 0;
|
||||
|
||||
while (fails < limit) {
|
||||
var cand = mutate(rng, best);
|
||||
var f = maskFitness(cand, lenCounts);
|
||||
if (f < bestF) {
|
||||
best = cand;
|
||||
bestF = f;
|
||||
fails = 0;
|
||||
} else {
|
||||
fails++;
|
||||
}
|
||||
}
|
||||
return best;
|
||||
}
|
||||
|
||||
static double similarity(char[][] a, char[][] b) {
|
||||
var same = 0;
|
||||
for (var r = 0; r < H; r++) for (var c = 0; c < W; c++) if (a[r][c] == b[r][c]) same++;
|
||||
return same / (double) (W * H);
|
||||
}
|
||||
|
||||
static char[][] generateMask(Rng rng, HashMap<Integer, Integer> lenCounts, int popSize, int gens, boolean verbose) {
|
||||
if (verbose) System.out.println("generateMask init pop: " + popSize);
|
||||
var pop = new ArrayList<char[][]>();
|
||||
|
||||
for (var i = 0; i < popSize; i++) {
|
||||
var g = randomMask(rng);
|
||||
pop.add(hillclimb(rng, g, lenCounts, 180));
|
||||
}
|
||||
|
||||
for (var gen = 0; gen < gens; gen++) {
|
||||
if (Thread.currentThread().isInterrupted()) break;
|
||||
var children = new ArrayList<char[][]>();
|
||||
var pairs = Math.max(popSize, (int) Math.floor(popSize * 1.5));
|
||||
|
||||
for (var k = 0; k < pairs; k++) {
|
||||
var p1 = pop.get(rng.randint(0, pop.size() - 1));
|
||||
var p2 = pop.get(rng.randint(0, pop.size() - 1));
|
||||
var child = crossover(rng, p1, p2);
|
||||
children.add(hillclimb(rng, child, lenCounts, 70));
|
||||
}
|
||||
|
||||
pop.addAll(children);
|
||||
pop.sort(Comparator.comparingLong(g -> maskFitness(g, lenCounts)));
|
||||
|
||||
var next = new ArrayList<char[][]>();
|
||||
for (var cand : pop) {
|
||||
if (next.size() >= popSize) break;
|
||||
var ok = true;
|
||||
for (var kept : next) {
|
||||
if (similarity(cand, kept) > 0.92) {
|
||||
ok = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (ok) next.add(cand);
|
||||
}
|
||||
pop = next;
|
||||
|
||||
if (verbose && gen % 10 == 0) {
|
||||
var bestF = maskFitness(pop.get(0), lenCounts);
|
||||
System.out.println(" gen " + gen + "/" + gens + " bestFitness=" + bestF);
|
||||
}
|
||||
}
|
||||
|
||||
pop.sort(Comparator.comparingLong(g -> maskFitness(g, lenCounts)));
|
||||
return pop.get(0);
|
||||
}
|
||||
|
||||
// ---------------- Fill (CSP) ----------------
|
||||
|
||||
public static final class FillStats {
|
||||
|
||||
public long nodes;
|
||||
public long backtracks;
|
||||
public double seconds;
|
||||
public int lastMRV;
|
||||
}
|
||||
|
||||
public static final class FillResult {
|
||||
|
||||
public boolean ok;
|
||||
public char[][] grid;
|
||||
public HashMap<String, String> clueMap;
|
||||
public FillStats stats;
|
||||
public double simplicity;
|
||||
}
|
||||
|
||||
record Undo(int[] rs, int[] cs, char[] prev, int n) {
|
||||
}
|
||||
|
||||
static char[] patternForSlot(char[][] grid, Slot s) {
|
||||
var pat = new char[s.len];
|
||||
for (var i = 0; i < s.len; i++) {
|
||||
var ch = grid[s.rs[i]][s.cs[i]];
|
||||
pat[i] = isLetter(ch) ? ch : 0;
|
||||
}
|
||||
return pat;
|
||||
}
|
||||
|
||||
static int slotScore(int[][] cellCount, Slot s) {
|
||||
var cross = 0;
|
||||
for (var i = 0; i < s.len; i++) cross += (cellCount[s.rs[i]][s.cs[i]] - 1);
|
||||
return cross * 10 + s.len;
|
||||
}
|
||||
|
||||
static Undo placeWord(char[][] grid, Slot s, String w) {
|
||||
var urs = new int[s.len];
|
||||
var ucs = new int[s.len];
|
||||
var up = new char[s.len];
|
||||
var n = 0;
|
||||
|
||||
for (var i = 0; i < s.len; i++) {
|
||||
int r = s.rs[i], c = s.cs[i];
|
||||
var prev = grid[r][c];
|
||||
var ch = w.charAt(i);
|
||||
if (prev == '#') {
|
||||
urs[n] = r;
|
||||
ucs[n] = c;
|
||||
up[n] = prev;
|
||||
n++;
|
||||
grid[r][c] = ch;
|
||||
} else if (prev != ch) {
|
||||
// rollback immediate changes
|
||||
for (var j = 0; j < n; j++) grid[urs[j]][ucs[j]] = up[j];
|
||||
return null;
|
||||
}
|
||||
}
|
||||
return new Undo(urs, ucs, up, n);
|
||||
}
|
||||
|
||||
static void undoPlace(char[][] grid, Undo u) {
|
||||
for (var i = 0; i < u.n; i++) grid[u.rs[i]][u.cs[i]] = u.prev[i];
|
||||
}
|
||||
|
||||
static FillResult fillMask(Rng rng, char[][] mask, HashMap<Integer, DictEntry> dictIndex,
|
||||
Map<String, WordDifficulty> llmScores,
|
||||
int logEveryMs, int timeLimitMs, boolean verbose) {
|
||||
|
||||
var grid = deepCopyGrid(mask);
|
||||
var allSlots = extractSlots(grid);
|
||||
var slots = new ArrayList<Slot>();
|
||||
for (var s : allSlots) if (s.len >= MIN_LEN && s.len <= MAX_LEN) slots.add(s);
|
||||
|
||||
var used = new HashSet<String>();
|
||||
var assigned = new HashMap<String, String>();
|
||||
|
||||
var cellCount = new int[H][W];
|
||||
for (var s : slots) for (var i = 0; i < s.len; i++) cellCount[s.rs[i]][s.cs[i]]++;
|
||||
|
||||
var t0 = System.currentTimeMillis();
|
||||
final var lastLog = new java.util.concurrent.atomic.AtomicLong(t0);
|
||||
|
||||
var stats = new FillStats();
|
||||
final var TOTAL = slots.size();
|
||||
final var BAR_LEN = 22;
|
||||
|
||||
Runnable renderProgress = () -> {
|
||||
if (!verbose) return;
|
||||
var now = System.currentTimeMillis();
|
||||
if ((now - lastLog.get()) < logEveryMs) return;
|
||||
lastLog.set(now);
|
||||
|
||||
var done = assigned.size();
|
||||
var pct = (TOTAL == 0) ? 100 : (int) Math.floor((done / (double) TOTAL) * 100);
|
||||
var filled = Math.min(BAR_LEN, (int) Math.floor((pct / 100.0) * BAR_LEN));
|
||||
var bar = "[" + "#".repeat(filled) + "-".repeat(BAR_LEN - filled) + "]";
|
||||
var elapsed = String.format(Locale.ROOT, "%.1fs", (now - t0) / 1000.0);
|
||||
|
||||
var msg = String.format(
|
||||
Locale.ROOT,
|
||||
"%s %d/%d slots | nodes=%d | backtracks=%d | mrv=%d | %s",
|
||||
bar, done, TOTAL, stats.nodes, stats.backtracks, stats.lastMRV, elapsed
|
||||
);
|
||||
System.out.print("\r" + padRight(msg, 120));
|
||||
System.out.flush();
|
||||
};
|
||||
|
||||
class Pick {
|
||||
|
||||
Slot slot;
|
||||
CandidateInfo info;
|
||||
boolean done;
|
||||
}
|
||||
|
||||
java.util.function.Supplier<Pick> chooseMRV = () -> {
|
||||
Slot best = null;
|
||||
CandidateInfo bestInfo = null;
|
||||
|
||||
for (var s : slots) {
|
||||
var k = s.key();
|
||||
if (assigned.containsKey(k)) continue;
|
||||
|
||||
var entry = dictIndex.get(s.len);
|
||||
if (entry == null) {
|
||||
var p = new Pick();
|
||||
p.slot = null;
|
||||
p.info = null;
|
||||
p.done = false;
|
||||
return p;
|
||||
}
|
||||
|
||||
var pat = patternForSlot(grid, s);
|
||||
var info = candidateInfoForPattern(entry, pat);
|
||||
|
||||
if (info.count == 0) {
|
||||
var p = new Pick();
|
||||
p.slot = null;
|
||||
p.info = null;
|
||||
p.done = false;
|
||||
return p;
|
||||
}
|
||||
|
||||
if (best == null
|
||||
|| info.count < bestInfo.count
|
||||
|| (info.count == bestInfo.count && slotScore(cellCount, s) > slotScore(cellCount, best))) {
|
||||
best = s;
|
||||
bestInfo = info;
|
||||
if (info.count <= 1) break;
|
||||
}
|
||||
}
|
||||
|
||||
var p = new Pick();
|
||||
if (best == null) {
|
||||
p.slot = null;
|
||||
p.info = null;
|
||||
p.done = true;
|
||||
} else {
|
||||
p.slot = best;
|
||||
p.info = bestInfo;
|
||||
p.done = false;
|
||||
}
|
||||
return p;
|
||||
};
|
||||
|
||||
final var MAX_TRIES_PER_SLOT = 2000;
|
||||
|
||||
class Solver {
|
||||
|
||||
boolean backtrack() {
|
||||
if (Thread.currentThread().isInterrupted()) return false;
|
||||
stats.nodes++;
|
||||
|
||||
if (timeLimitMs > 0 && (System.currentTimeMillis() - t0) > timeLimitMs) return false;
|
||||
|
||||
var pick = chooseMRV.get();
|
||||
if (pick.done) return true;
|
||||
if (pick.slot == null) {
|
||||
stats.backtracks++;
|
||||
return false;
|
||||
}
|
||||
|
||||
stats.lastMRV = pick.info.count;
|
||||
renderProgress.run();
|
||||
|
||||
var s = pick.slot;
|
||||
var k = s.key();
|
||||
var entry = dictIndex.get(s.len);
|
||||
var pat = patternForSlot(grid, s);
|
||||
|
||||
java.util.function.Function<String, Boolean> tryWord = (String w) -> {
|
||||
if (w == null) return false;
|
||||
if (used.contains(w)) return false;
|
||||
|
||||
for (var i = 0; i < pat.length; i++) {
|
||||
if (pat[i] != 0 && pat[i] != w.charAt(i)) return false;
|
||||
}
|
||||
|
||||
var undo = placeWord(grid, s, w);
|
||||
if (undo == null) return false;
|
||||
|
||||
used.add(w);
|
||||
assigned.put(k, w);
|
||||
|
||||
if (backtrack()) return true;
|
||||
|
||||
assigned.remove(k);
|
||||
used.remove(w);
|
||||
undoPlace(grid, undo);
|
||||
return false;
|
||||
};
|
||||
|
||||
if (pick.info.indices != null && pick.info.indices.length > 0) {
|
||||
var idxs = pick.info.indices;
|
||||
var L = idxs.length;
|
||||
var tries = Math.min(MAX_TRIES_PER_SLOT, L);
|
||||
|
||||
// When picking words from sorted indices, we want to favor the beginning
|
||||
// (lower difficulty) but still have some randomness.
|
||||
for (var t = 0; t < tries; t++) {
|
||||
// Bias strongly towards lower indices (simpler words) using r^3
|
||||
double r = rng.nextFloat();
|
||||
int idxInArray = (int) (r * r * r * L);
|
||||
var idx = idxs[idxInArray];
|
||||
var w = entry.words.get(idx);
|
||||
if (tryWord.apply(w)) return true;
|
||||
}
|
||||
stats.backtracks++;
|
||||
return false;
|
||||
}
|
||||
|
||||
var N = entry.words.size();
|
||||
if (N == 0) {
|
||||
stats.backtracks++;
|
||||
return false;
|
||||
}
|
||||
|
||||
var tries = Math.min(MAX_TRIES_PER_SLOT, N);
|
||||
for (var t = 0; t < tries; t++) {
|
||||
double r = rng.nextFloat();
|
||||
int idxInArray = (int) (r * r * r * N);
|
||||
var w = entry.words.get(idxInArray);
|
||||
if (tryWord.apply(w)) return true;
|
||||
}
|
||||
|
||||
stats.backtracks++;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// initial render (same feel)
|
||||
renderProgress.run();
|
||||
var ok = new Solver().backtrack();
|
||||
// final progress line
|
||||
System.out.print("\r" + padRight("", 120) + "\r");
|
||||
System.out.flush();
|
||||
|
||||
var res = new FillResult();
|
||||
res.ok = ok;
|
||||
res.grid = grid;
|
||||
res.clueMap = assigned;
|
||||
stats.seconds = (System.currentTimeMillis() - t0) / 1000.0;
|
||||
res.stats = stats;
|
||||
|
||||
if (ok) {
|
||||
double totalSimplicity = 0;
|
||||
for (var w : assigned.values()) {
|
||||
totalSimplicity += llmScores.get(w).difficulty;
|
||||
}
|
||||
res.simplicity = assigned.isEmpty() ? 0 : totalSimplicity / assigned.size();
|
||||
}
|
||||
|
||||
// print a final progress line
|
||||
if (verbose) {
|
||||
System.out.println(
|
||||
String.format(Locale.ROOT,
|
||||
"[######################] %d/%d slots | nodes=%d | backtracks=%d | mrv=%d | %.1fs",
|
||||
assigned.size(), TOTAL, stats.nodes, stats.backtracks, stats.lastMRV, stats.seconds
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
static String padRight(String s, int n) {
|
||||
if (s.length() >= n) return s;
|
||||
return s + " ".repeat(n - s.length());
|
||||
}
|
||||
|
||||
// ---------------- Top-level generatePuzzle ----------------
|
||||
public record PuzzleResult(Dict dict, char[][] mask, FillResult filled) { }
|
||||
|
||||
public static PuzzleResult generatePuzzle(Main.Opts opts) {
|
||||
var tLoad0 = System.nanoTime();
|
||||
var dict = loadWords(opts.wordsPath);
|
||||
var tLoad1 = System.nanoTime();
|
||||
System.out.printf(Locale.ROOT, "LOAD_WORDS: %.3fs%n %s words%n", (tLoad1 - tLoad0) / 1e9, dict.words.size());
|
||||
|
||||
if (opts.threads > 1) {
|
||||
System.out.println("Running in multi-threaded mode with " + opts.threads + " threads...");
|
||||
var executor = Executors.newFixedThreadPool(opts.threads);
|
||||
try {
|
||||
var tasks = new ArrayList<Callable<PuzzleResult>>();
|
||||
for (int i = 1; i <= opts.tries; i++) {
|
||||
final int attempt = i;
|
||||
tasks.add(() -> {
|
||||
var threadRng = new Rng(opts.seed + attempt);
|
||||
var mask = generateMask(threadRng, dict.lenCounts, opts.pop, opts.gens, false);
|
||||
var filled = fillMask(threadRng, mask, dict.index, dict.words, 200, 60000, false);
|
||||
|
||||
if (filled.ok && (opts.minSimplicity <= 0 || filled.simplicity >= opts.minSimplicity)) {
|
||||
System.out.println("\nSolution found on attempt " + attempt);
|
||||
return new PuzzleResult(dict, mask, filled);
|
||||
}
|
||||
throw new RuntimeException("No solution found in attempt " + attempt);
|
||||
});
|
||||
}
|
||||
return executor.invokeAny(tasks);
|
||||
} catch (InterruptedException e) {
|
||||
Thread.currentThread().interrupt();
|
||||
} catch (ExecutionException e) {
|
||||
// all failed
|
||||
} finally {
|
||||
executor.shutdownNow();
|
||||
}
|
||||
return null;
|
||||
} else {
|
||||
var rng = new Rng(opts.seed);
|
||||
for (var attempt = 1; attempt <= opts.tries; attempt++) {
|
||||
System.out.println("\nAttempt " + attempt + "/" + opts.tries);
|
||||
|
||||
var tMask0 = System.nanoTime();
|
||||
var mask = generateMask(rng, dict.lenCounts, opts.pop, opts.gens, true);
|
||||
var tMask1 = System.nanoTime();
|
||||
System.out.printf(Locale.ROOT, "MASK: %.3fs%n", (tMask1 - tMask0) / 1e9);
|
||||
|
||||
var tFill0 = System.nanoTime();
|
||||
var filled = fillMask(rng, mask, dict.index, dict.words, 200, 60000, true);
|
||||
var tFill1 = System.nanoTime();
|
||||
System.out.printf(Locale.ROOT, "FILL: %.3fms | Simplicity: %.2f%n", (tFill1 - tFill0) / 1e6, filled.simplicity);
|
||||
|
||||
if (filled.ok && (opts.minSimplicity <= 0 || filled.simplicity >= opts.minSimplicity)) {
|
||||
return new PuzzleResult(dict, mask, filled);
|
||||
}
|
||||
if (filled.ok) {
|
||||
System.out.printf(Locale.ROOT, "Puzzle simplicity %.2f is below min %.2f, retrying...%n",
|
||||
filled.simplicity, opts.minSimplicity);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
}
|
||||
26
src/main/java/puzzle/TestSort.java
Normal file
26
src/main/java/puzzle/TestSort.java
Normal file
@@ -0,0 +1,26 @@
|
||||
package puzzle;
|
||||
import puzzle.ThemePoolBuilderLength.Lexicon;
|
||||
import java.nio.file.*;
|
||||
import java.util.*;
|
||||
public class TestSort {
|
||||
public static void main(String[] args) throws Exception {
|
||||
Lexicon lex = new Lexicon(
|
||||
Arrays.asList("A", "B", "C"),
|
||||
new HashMap<>(),
|
||||
new int[]{10, 30, 20},
|
||||
new BitSet[9]
|
||||
);
|
||||
BitSet bs = new BitSet();
|
||||
bs.set(0); bs.set(1); bs.set(2);
|
||||
Path p = Paths.get("test_pool.txt");
|
||||
ThemePoolBuilderLength.writeWordList(p, lex, bs);
|
||||
List<String> lines = Files.readAllLines(p);
|
||||
System.out.println("Sorted words: " + lines);
|
||||
if (lines.get(0).equals("B") && lines.get(1).equals("C") && lines.get(2).equals("A")) {
|
||||
System.out.println("SUCCESS");
|
||||
} else {
|
||||
System.out.println("FAILURE");
|
||||
System.exit(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
903
src/main/java/puzzle/ThemePoolBuilderLength.java
Normal file
903
src/main/java/puzzle/ThemePoolBuilderLength.java
Normal file
@@ -0,0 +1,903 @@
|
||||
package puzzle;
|
||||
|
||||
import org.w3c.dom.*;
|
||||
import javax.xml.parsers.DocumentBuilderFactory;
|
||||
|
||||
import java.io.*;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.nio.file.*;
|
||||
import java.sql.Connection;
|
||||
import java.sql.DriverManager;
|
||||
import java.sql.SQLException;
|
||||
import java.text.Normalizer;
|
||||
import java.time.LocalDate;
|
||||
import java.util.*;
|
||||
|
||||
public class ThemePoolBuilderLength {
|
||||
|
||||
private static final List<String> DEFAULT_FEEDS = List.of(
|
||||
"https://feeds.nos.nl/nosnieuwsalgemeen",
|
||||
"https://feeds.nos.nl/nosnieuwstech");
|
||||
static final String url = "jdbc:postgresql://192.168.1.159:5432/postgres";
|
||||
static final String user = "puzzle";
|
||||
static final String pass = "heel-goed-wachtwoord";
|
||||
// NOTE: normalizeDutchToken strips non A-Z. Keep entries 2-8 after normalization.
|
||||
private static final List<String> DEFAULT_SHORTS = List.of(
|
||||
"EU", "VS", "UK", "NAVO", "NOS", "NS", "ANP", "VN", "NPO", "RTL",
|
||||
"UUR", "MIN", "TV", "GPS", "AI", "IT", "CPU", "GPU",
|
||||
"ING", "KPN", "KVK", "RIVM", "GGD", "AIVD", "MIVD", "CEO", "CFO", "HR",
|
||||
"NL", "BE", "BRU", "EUR", "EURO", "WET", "ART", "BTW", "DI", "MA",
|
||||
"PVV", "VVD", "CDA", "FNV",
|
||||
"EN", "IN", "OP", "OM", "TE", "ER", "DE", "HET", "EEN", "VAN", "MET", "NOG", "OOK", "MAAR", "WEL", "NIET",
|
||||
"HOE", "ALS",
|
||||
|
||||
"ZO", "DO", "WO", "VR", "MO", "WA", "WE", "TAAL",
|
||||
"LAND", "GEMEENTE", "STAAT", "BUREAU", "HUIS", "SCHOOL", "STR", "BAAN",
|
||||
"WERK", "KLUS",
|
||||
"FONDS", "RAAD", "CONGRESS", "GROEP", "STRAAT", "BRUG", "PARK",
|
||||
"BUURT",
|
||||
"BOUW", "HOTEL", "CAFE", "BAR",
|
||||
"BIJBAAN", "STUDENT", "DOCENT",
|
||||
"WINKEL", "MARKT", "KIOSK", "AUTO", "MOBILE", "FIETS", "SCOOTER",
|
||||
|
||||
// afkortingen
|
||||
"DHR", "MEVR", "DR", "ST", "CA", "IVM", "MBT", "TAV", "TOV", "DWZ", "MAW", "OA", "TM",
|
||||
"ANWB", "BRP", "CBS",
|
||||
"AL", "NU", "TO", "NA", "BIJ", "TOT", "DAN", "WAT", "DAT",
|
||||
"IK", "JE", "WE", "WIJ", "JIJ", "ZIJ", "HIJ", "HEN", "ONS", "JOU",
|
||||
// romeinse cijfers (2-8)
|
||||
"II", "III", "IV", "VI", "VII", "VIII", "IX",
|
||||
"XI", "XII", "XIII", "XIV", "XV", "XVI", "XVII", "XVIII", "XIX", "XX"
|
||||
);
|
||||
|
||||
private static final String BROWSER_UA =
|
||||
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/143.0.0.0 Safari/537.36";
|
||||
static int MIN_SIMPLICITY = 520,
|
||||
MAX_WORD_LENGTH = 7;
|
||||
|
||||
static final class Opts {
|
||||
|
||||
String endpoint = "https://jarvis-lan.appmodel.nl/api/ollama/";
|
||||
List<String> feeds = new ArrayList<>(DEFAULT_FEEDS);
|
||||
String outDir = System.getenv("OUT_DIR") != null ? System.getenv("OUT_DIR") : "/data/puzzle";
|
||||
int bridgeN = 30000;
|
||||
int themeN = 800;
|
||||
int relatedN = 2200;
|
||||
int rssItemsPerFeed = 10;
|
||||
String model = "/models/Hadiseh-Mhd/Mixtral-8x7B-Instruct-v0.1-Q4_K_M-GGUF/mixtral-8x7b-instruct-v0.1.Q4_K_M.gguf";
|
||||
int timeoutSeconds = 180;
|
||||
int retries = 2;
|
||||
int minLen2 = 1000;
|
||||
int minLen3 = 1000;
|
||||
int minLen4 = 1000;
|
||||
int minLen5 = 1000; // set if you also want to force 5-letter words, etc.
|
||||
int minLen6 = 1000;
|
||||
int minLen7 = 1000;
|
||||
int minLen8 = MAX_WORD_LENGTH >= 8 ? 1000 : 0;
|
||||
}
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
var o = parseArgs(args);
|
||||
|
||||
var outDir = Path.of(o.outDir);
|
||||
Files.createDirectories(outDir);
|
||||
|
||||
System.out.println("Loading lexicon...");
|
||||
Lexicon lex;
|
||||
Class.forName("org.postgresql.Driver");
|
||||
try (var c = DriverManager.getConnection(url, user, pass);) {
|
||||
lex = loadLexicon(c);
|
||||
}
|
||||
|
||||
System.out.println("Master words (2-" + MAX_WORD_LENGTH + ", A-Z): " + lex.words.size());
|
||||
|
||||
// RSS via curl (browser-like)
|
||||
var all = new ArrayList<RssItem>();
|
||||
for (var feed : o.feeds) {
|
||||
var f = feed.trim();
|
||||
if (f.isEmpty()) continue;
|
||||
System.out.println("Fetching RSS: " + f);
|
||||
all.addAll(fetchRssViaCurlBrowser(f, o.rssItemsPerFeed, o.timeoutSeconds));
|
||||
}
|
||||
|
||||
var rssText = new StringBuilder();
|
||||
var k = 0;
|
||||
for (var it : all) {
|
||||
k++;
|
||||
rssText.append(k).append(". ").append(it.title).append("\n");
|
||||
if (!it.desc.isBlank()) rssText.append(" ").append(it.desc).append("\n");
|
||||
}
|
||||
Files.writeString(outDir.resolve("rss.txt"), rssText.toString(), StandardCharsets.UTF_8);
|
||||
|
||||
// LM Studio via curl
|
||||
var modelId = o.model;
|
||||
if (modelId == null) {
|
||||
var modelsUrl = apiUrl(o.endpoint, "/models");
|
||||
System.out.println("Ollama GET: " + modelsUrl);
|
||||
var modelsJson = curlGetJson(o, modelsUrl);
|
||||
modelId = pickModelId(modelsJson);
|
||||
if (modelId == null) {
|
||||
throw new IOException("Could not auto-pick model id from /v1/models. Use --model <id>.\n--- /models ---\n" + modelsJson);
|
||||
}
|
||||
}
|
||||
System.out.println("Using model: " + modelId);
|
||||
System.out.println("Generating theme words via LM Studio...");
|
||||
var llmWords = List.<String>of();//llmThemeWords(o, modelId, rssText.toString());
|
||||
|
||||
var themeKept = new LinkedHashSet<String>();
|
||||
for (var wRaw : llmWords) {
|
||||
var w = normalizeDutchToken(wRaw);
|
||||
if (w == null) continue;
|
||||
if (lex.idOf.containsKey(w)) themeKept.add(w);
|
||||
}
|
||||
Files.write(outDir.resolve("theme.txt"), themeKept, StandardCharsets.UTF_8);
|
||||
|
||||
// BitSets
|
||||
var themeBs = bitmapFromWords(lex, themeKept);
|
||||
var bridgeBs = buildBridgeBitmap(lex, o.bridgeN);
|
||||
var shortBs = bitmapFromWords(lex, DEFAULT_SHORTS);
|
||||
|
||||
var pool = new BitSet(lex.words.size());
|
||||
pool.or(themeBs);
|
||||
pool.or(bridgeBs);
|
||||
pool.or(shortBs);
|
||||
|
||||
// ---- NEW: enforce minimum counts per length ----
|
||||
enforceMinima(o, lex, pool);
|
||||
|
||||
// Report
|
||||
var themeCounts = countsPerLen(lex, themeBs);
|
||||
var poolCounts = countsPerLen(lex, pool);
|
||||
|
||||
var report = """
|
||||
Date: %s
|
||||
Feeds: %s
|
||||
Model: %s
|
||||
|
||||
Master size: %d
|
||||
Theme kept (in master): %d
|
||||
Bridge size: %d
|
||||
Shorts kept: %d
|
||||
Pool total: %d
|
||||
|
||||
Enforced minima:
|
||||
2: %d
|
||||
3: %d
|
||||
4: %d
|
||||
5: %d
|
||||
6: %d
|
||||
7: %d
|
||||
8: %d
|
||||
|
||||
Counts per length (theme):
|
||||
%s
|
||||
|
||||
Counts per length (pool):
|
||||
%s
|
||||
""".formatted(
|
||||
LocalDate.now(),
|
||||
String.join(", ", o.feeds),
|
||||
modelId,
|
||||
lex.words.size(),
|
||||
themeBs.cardinality(),
|
||||
bridgeBs.cardinality(),
|
||||
shortBs.cardinality(),
|
||||
pool.cardinality(),
|
||||
o.minLen2, o.minLen3, o.minLen4, o.minLen5, o.minLen6, o.minLen7, o.minLen8,
|
||||
mapToLines(themeCounts),
|
||||
mapToLines(poolCounts)
|
||||
);
|
||||
|
||||
Files.writeString(outDir.resolve("report.txt"), report, StandardCharsets.UTF_8);
|
||||
System.out.println(report);
|
||||
|
||||
// Output pool list
|
||||
var poolFile = outDir.resolve("pool.txt");
|
||||
writeWordList(poolFile, lex, pool);
|
||||
System.out.println("Wrote: " + poolFile.toAbsolutePath());
|
||||
}
|
||||
static Opts parseArgs(String[] args) {
|
||||
var o = new Opts();
|
||||
for (var i = 0; i < args.length; i++) {
|
||||
var a = args[i];
|
||||
var v = (i + 1 < args.length) ? args[i + 1] : null;
|
||||
switch (a) {
|
||||
case "--endpoint" -> {
|
||||
o.endpoint = v;
|
||||
i++;
|
||||
}
|
||||
case "--feeds" -> {
|
||||
o.feeds = Arrays.asList(v.split(","));
|
||||
i++;
|
||||
}
|
||||
case "--out" -> {
|
||||
o.outDir = v;
|
||||
i++;
|
||||
}
|
||||
case "--bridge" -> {
|
||||
o.bridgeN = Integer.parseInt(v);
|
||||
i++;
|
||||
}
|
||||
case "--theme" -> {
|
||||
o.themeN = Integer.parseInt(v);
|
||||
i++;
|
||||
}
|
||||
case "--related" -> {
|
||||
o.relatedN = Integer.parseInt(v);
|
||||
i++;
|
||||
}
|
||||
case "--items" -> {
|
||||
o.rssItemsPerFeed = Integer.parseInt(v);
|
||||
i++;
|
||||
}
|
||||
case "--model" -> {
|
||||
o.model = v;
|
||||
i++;
|
||||
}
|
||||
case "--timeout" -> {
|
||||
o.timeoutSeconds = Integer.parseInt(v);
|
||||
i++;
|
||||
}
|
||||
case "--retries" -> {
|
||||
o.retries = Integer.parseInt(v);
|
||||
i++;
|
||||
}
|
||||
|
||||
// ---- NEW: minima per length ----
|
||||
case "--min2" -> {
|
||||
o.minLen2 = Integer.parseInt(v);
|
||||
i++;
|
||||
}
|
||||
case "--min3" -> {
|
||||
o.minLen3 = Integer.parseInt(v);
|
||||
i++;
|
||||
}
|
||||
case "--min4" -> {
|
||||
o.minLen4 = Integer.parseInt(v);
|
||||
i++;
|
||||
}
|
||||
case "--min5" -> {
|
||||
o.minLen5 = Integer.parseInt(v);
|
||||
i++;
|
||||
}
|
||||
case "--min6" -> {
|
||||
o.minLen6 = Integer.parseInt(v);
|
||||
i++;
|
||||
}
|
||||
case "--min7" -> {
|
||||
o.minLen7 = Integer.parseInt(v);
|
||||
i++;
|
||||
}
|
||||
case "--min8" -> {
|
||||
o.minLen8 = Integer.parseInt(v);
|
||||
i++;
|
||||
}
|
||||
|
||||
case "-h", "--help" -> {
|
||||
System.out.println("""
|
||||
Usage:
|
||||
java puzzle.ThemePoolBuilder --words WORDS.txt [options]
|
||||
|
||||
Options:
|
||||
--endpoint http://HOST:1234/v1 (LM Studio)
|
||||
--feeds url1,url2
|
||||
--out ./out
|
||||
--bridge 5000
|
||||
--theme 300
|
||||
--related 1200
|
||||
--items 20 (per feed)
|
||||
--model <id> (recommended; skips /v1/models)
|
||||
--timeout 60 (seconds)
|
||||
--retries 4
|
||||
|
||||
# enforce minima per length in final pool
|
||||
--min2 4000
|
||||
--min3 7000
|
||||
--min4 9000
|
||||
--min5 0
|
||||
--min6 0
|
||||
--min7 0
|
||||
--min8 0
|
||||
""");
|
||||
System.exit(0);
|
||||
}
|
||||
default -> throw new IllegalArgumentException("Unknown arg: " + a);
|
||||
}
|
||||
}
|
||||
return o;
|
||||
}
|
||||
|
||||
static boolean isAZ(String s) {
|
||||
for (var i = 0; i < s.length(); i++) {
|
||||
var ch = s.charAt(i);
|
||||
if (ch < 'A' || ch > 'Z') return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static String normalizeDutchToken(String raw) {
|
||||
if (raw == null) return null;
|
||||
var s = raw.trim();
|
||||
if (s.isEmpty()) return null;
|
||||
|
||||
s = Normalizer.normalize(s, Normalizer.Form.NFD).replaceAll("\\p{M}+", "");
|
||||
s = s.toUpperCase(Locale.ROOT);
|
||||
|
||||
s = s.replaceAll("[^A-Z]", "");
|
||||
if (s.length() < 2 || s.length() > 8) return null;
|
||||
if (!isAZ(s)) return null;
|
||||
return s;
|
||||
}
|
||||
|
||||
static String stripHtml(String s) {
|
||||
if (s == null) return "";
|
||||
var x = s.replaceAll("<[^>]+>", " ");
|
||||
x = x.replace("&", "&").replace("<", "<").replace(">", ">");
|
||||
x = x.replaceAll("\\s+", " ").trim();
|
||||
return x;
|
||||
}
|
||||
|
||||
static final Map<Character, Integer> LETTER_WEIGHT = Map.ofEntries(
|
||||
Map.entry('E', 10), Map.entry('N', 9), Map.entry('A', 9), Map.entry('R', 8),
|
||||
Map.entry('I', 8), Map.entry('O', 7), Map.entry('S', 7), Map.entry('T', 7),
|
||||
Map.entry('D', 6), Map.entry('L', 6), Map.entry('K', 5), Map.entry('M', 5),
|
||||
Map.entry('U', 5), Map.entry('P', 4), Map.entry('G', 4), Map.entry('H', 4),
|
||||
Map.entry('V', 4), Map.entry('B', 3), Map.entry('W', 3),
|
||||
Map.entry('C', 2), Map.entry('F', 2), Map.entry('Z', 2),
|
||||
Map.entry('J', 1), Map.entry('Y', 1), Map.entry('Q', 0), Map.entry('X', 0)
|
||||
);
|
||||
|
||||
static boolean isVowel(char ch) {
|
||||
return ch == 'A' || ch == 'E' || ch == 'I' || ch == 'O' || ch == 'U';
|
||||
}
|
||||
|
||||
static int crossabilityScore(String w) {
|
||||
var score = 0;
|
||||
var vowels = 0;
|
||||
for (var i = 0; i < w.length(); i++) {
|
||||
var ch = w.charAt(i);
|
||||
score += LETTER_WEIGHT.getOrDefault(ch, 2);
|
||||
if (isVowel(ch)) vowels++;
|
||||
}
|
||||
var ratio = vowels / (double) w.length();
|
||||
if (ratio >= 0.35 && ratio <= 0.65) score += 8;
|
||||
if (w.indexOf('Q') >= 0 || w.indexOf('X') >= 0) score -= 6;
|
||||
if (w.indexOf('Y') >= 0 || w.indexOf('J') >= 0) score -= 2;
|
||||
return score;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param words id -> word
|
||||
* @param idOf word -> id
|
||||
* @param score id -> crossability
|
||||
* @param byLen byLen[L] for L 0..8
|
||||
*/
|
||||
record Lexicon(List<String> words, Map<String, Integer> idOf, int[] score, BitSet[] byLen) { }
|
||||
|
||||
/**
|
||||
* Loads lexicon from PostgreSQL view/table: export_words_with_hints_2_8
|
||||
* Columns: WOORD, level_1_to_10, hint
|
||||
*
|
||||
* Notes:
|
||||
* - Normalizes words via normalizeDutchToken(...)
|
||||
* - Dedupes on normalized word
|
||||
* - Uses level_1_to_10 as the "LLM score" (fallback 5)
|
||||
* - Ignores hint for scoring (but you can store it elsewhere if needed)
|
||||
*/
|
||||
static Lexicon loadLexicon(Connection conn) throws SQLException {
|
||||
var out = new ArrayList<String>(200_000);
|
||||
var idOf = new HashMap<String, Integer>(400_000);
|
||||
|
||||
// Store level per normalized word while loading so we can compute scores later
|
||||
var levelOf = new HashMap<String, Integer>(400_000);
|
||||
|
||||
final var sql = """
|
||||
SELECT woord, 10-level_1_to_10, hint
|
||||
FROM export_real_words_with_hints
|
||||
where length(woord)<=7
|
||||
order by level_1_to_10 asc
|
||||
""" ;
|
||||
|
||||
try (var ps = conn.prepareStatement(sql);
|
||||
var rs = ps.executeQuery()) {
|
||||
|
||||
while (rs.next()) {
|
||||
var rawWord = rs.getString(1);
|
||||
var lvlObj = (Integer) rs.getObject(2); // nullable
|
||||
// String hint = rs.getString(3); // available if you want it later
|
||||
|
||||
var w = normalizeDutchToken(rawWord);
|
||||
if (w == null) continue;
|
||||
|
||||
if (idOf.containsKey(w)) continue;
|
||||
|
||||
idOf.put(w, out.size());
|
||||
out.add(w);
|
||||
|
||||
var lvl = (lvlObj == null ? 5 : lvlObj.intValue());
|
||||
levelOf.put(w, lvl);
|
||||
}
|
||||
} catch (SQLException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
|
||||
var n = out.size();
|
||||
var score = new int[n];
|
||||
var byLen = new BitSet[9];
|
||||
for (var L = 0; L <= 8; L++) byLen[L] = new BitSet(n);
|
||||
|
||||
for (var i = 0; i < n; i++) {
|
||||
var w = out.get(i);
|
||||
var crossScore = crossabilityScore(w);
|
||||
var lScore = levelOf.getOrDefault(w, 5);
|
||||
|
||||
// Prioritize simple words (high lScore) and long words.
|
||||
// lScore (1-10) adds up to 1000 points (weight 100).
|
||||
// Length (2-8) adds up to 160 points (weight 20).
|
||||
score[i] = crossScore + (lScore * 100) + (w.length() * 40);
|
||||
byLen[w.length()].set(i);
|
||||
}
|
||||
|
||||
return new Lexicon(out, idOf, score, byLen);
|
||||
}
|
||||
|
||||
// ---------------- RSS via curl (browser-like) ----------------
|
||||
|
||||
record RssItem(String title, String desc) { }
|
||||
|
||||
static String textOfFirst(Element parent, String tag) {
|
||||
var nl = parent.getElementsByTagName(tag);
|
||||
if (nl.getLength() == 0) return null;
|
||||
var n = nl.item(0);
|
||||
return n.getTextContent();
|
||||
}
|
||||
|
||||
static List<RssItem> fetchRssViaCurlBrowser(String url, int limit, int timeoutSeconds) throws Exception {
|
||||
var cmd = new ArrayList<String>();
|
||||
cmd.add("curl");
|
||||
cmd.add("-fsSL");
|
||||
cmd.add("-L");
|
||||
cmd.add("--compressed");
|
||||
|
||||
cmd.add("--connect-timeout");
|
||||
cmd.add("10");
|
||||
cmd.add("--max-time");
|
||||
cmd.add(String.valueOf(timeoutSeconds));
|
||||
|
||||
cmd.add("--retry");
|
||||
cmd.add("5");
|
||||
cmd.add("--retry-all-errors");
|
||||
cmd.add("--retry-delay");
|
||||
cmd.add("1");
|
||||
|
||||
cmd.add("-H");
|
||||
cmd.add("User-Agent: " + BROWSER_UA);
|
||||
cmd.add("-H");
|
||||
cmd.add("Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8");
|
||||
cmd.add("-H");
|
||||
cmd.add("Accept-Language: nl-NL,nl;q=0.9,en;q=0.7");
|
||||
cmd.add("-H");
|
||||
cmd.add("Cache-Control: no-cache");
|
||||
cmd.add("-H");
|
||||
cmd.add("Pragma: no-cache");
|
||||
cmd.add("-H");
|
||||
cmd.add("Sec-Fetch-Dest: document");
|
||||
cmd.add("-H");
|
||||
cmd.add("Sec-Fetch-Mode: navigate");
|
||||
cmd.add("-H");
|
||||
cmd.add("Sec-Fetch-Site: none");
|
||||
cmd.add("-H");
|
||||
cmd.add("Sec-Fetch-User: ?1");
|
||||
|
||||
cmd.add(url);
|
||||
|
||||
var p = new ProcessBuilder(cmd)
|
||||
.redirectErrorStream(true)
|
||||
.start();
|
||||
|
||||
var bytes = p.getInputStream().readAllBytes();
|
||||
var code = p.waitFor();
|
||||
if (code != 0) {
|
||||
throw new IOException("curl RSS failed (" + code + ") url=" + url + " output=" +
|
||||
new String(bytes, StandardCharsets.UTF_8));
|
||||
}
|
||||
|
||||
try (InputStream is = new ByteArrayInputStream(bytes)) {
|
||||
var dbf = DocumentBuilderFactory.newInstance();
|
||||
var doc = dbf.newDocumentBuilder().parse(is);
|
||||
var items = doc.getElementsByTagName("item");
|
||||
|
||||
var out = new ArrayList<RssItem>();
|
||||
for (var i = 0; i < items.getLength() && out.size() < limit; i++) {
|
||||
var item = (Element) items.item(i);
|
||||
var title = textOfFirst(item, "title");
|
||||
var desc = textOfFirst(item, "description");
|
||||
if (title == null) title = "";
|
||||
if (desc == null) desc = "";
|
||||
out.add(new RssItem(stripHtml(title), stripHtml(desc)));
|
||||
}
|
||||
return out;
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------- LM Studio (OpenAI-compatible) ----------------
|
||||
|
||||
static String apiUrl(String endpointArg, String path) {
|
||||
var base = endpointArg.trim();
|
||||
if (base.endsWith("/")) base = base.substring(0, base.length() - 1);
|
||||
if (base.endsWith("/v1")) base = base.substring(0, base.length() - 3);
|
||||
|
||||
if (!path.startsWith("/")) path = "/" + path;
|
||||
if (!path.startsWith("/v1/")) path = "/" + path;
|
||||
|
||||
return base + path;
|
||||
}
|
||||
|
||||
static void sleepBackoff(int attempt) {
|
||||
try {
|
||||
var ms = (long) (300L * Math.pow(2, attempt - 1)); // 300, 600, 1200, ...
|
||||
Thread.sleep(Math.min(ms, 3000));
|
||||
} catch (InterruptedException ignored) { }
|
||||
}
|
||||
|
||||
static String curlGetJson(Opts o, String url) throws Exception {
|
||||
Exception last = null;
|
||||
for (var attempt = 1; attempt <= o.retries; attempt++) {
|
||||
try {
|
||||
var cmd = new ArrayList<String>();
|
||||
cmd.add("curl");
|
||||
cmd.add("-fsSL");
|
||||
cmd.add("--connect-timeout");
|
||||
cmd.add("10");
|
||||
cmd.add("--max-time");
|
||||
cmd.add(String.valueOf(o.timeoutSeconds));
|
||||
cmd.add("--retry");
|
||||
cmd.add("3");
|
||||
cmd.add("--retry-all-errors");
|
||||
cmd.add("--retry-delay");
|
||||
cmd.add("1");
|
||||
cmd.add("-H");
|
||||
cmd.add("Accept: application/json");
|
||||
cmd.add("-H");
|
||||
cmd.add("User-Agent: " + BROWSER_UA);
|
||||
cmd.add(url);
|
||||
|
||||
var p = new ProcessBuilder(cmd)
|
||||
.redirectErrorStream(true)
|
||||
.start();
|
||||
|
||||
var bytes = p.getInputStream().readAllBytes();
|
||||
var code = p.waitFor();
|
||||
|
||||
if (code != 0) {
|
||||
throw new IOException("curl GET failed (" + code + ") url=" + url + "\nOutput:\n" +
|
||||
new String(bytes, StandardCharsets.UTF_8));
|
||||
}
|
||||
|
||||
return new String(bytes, StandardCharsets.UTF_8);
|
||||
} catch (Exception e) {
|
||||
last = e;
|
||||
if (attempt < o.retries) sleepBackoff(attempt);
|
||||
}
|
||||
}
|
||||
throw last;
|
||||
}
|
||||
|
||||
static String curlPostJson(Opts o, String url, String jsonBody) throws Exception {
|
||||
Exception last = null;
|
||||
for (var attempt = 1; attempt <= o.retries; attempt++) {
|
||||
try {
|
||||
System.out.println(" Attempt " + attempt + "/" + o.retries + " via curl...");
|
||||
|
||||
var tempFile = Files.createTempFile("lm-request-", ".json");
|
||||
try {
|
||||
Files.writeString(tempFile, jsonBody, StandardCharsets.UTF_8);
|
||||
List<String> cmd = new ArrayList<>();
|
||||
cmd.add("curl");
|
||||
cmd.add("-fsSL");
|
||||
cmd.add("--connect-timeout");
|
||||
cmd.add("10");
|
||||
cmd.add("--max-time");
|
||||
cmd.add(String.valueOf(o.timeoutSeconds));
|
||||
cmd.add("--retry");
|
||||
cmd.add("3");
|
||||
cmd.add("--retry-all-errors");
|
||||
cmd.add("--retry-delay");
|
||||
cmd.add("1");
|
||||
cmd.add("-H");
|
||||
cmd.add("Content-Type: application/json");
|
||||
cmd.add("-H");
|
||||
cmd.add("Accept: application/json");
|
||||
cmd.add("-H");
|
||||
cmd.add("User-Agent: " + BROWSER_UA);
|
||||
cmd.add("-d");
|
||||
cmd.add("@" + tempFile.toString());
|
||||
cmd.add(url);
|
||||
|
||||
var p = new ProcessBuilder(cmd)
|
||||
.redirectErrorStream(true)
|
||||
.start();
|
||||
|
||||
var bytes = p.getInputStream().readAllBytes();
|
||||
var code = p.waitFor();
|
||||
|
||||
if (code != 0) {
|
||||
throw new IOException("curl POST failed (" + code + ") url=" + url + "\nOutput:\n" +
|
||||
new String(bytes, StandardCharsets.UTF_8));
|
||||
}
|
||||
|
||||
return new String(bytes, StandardCharsets.UTF_8);
|
||||
} finally {
|
||||
Files.deleteIfExists(tempFile);
|
||||
}
|
||||
} catch (Exception e) {
|
||||
System.err.println(" Error: " + e.getClass().getName() + ": " + e.getMessage());
|
||||
last = e;
|
||||
if (attempt < o.retries) sleepBackoff(attempt);
|
||||
}
|
||||
}
|
||||
throw last;
|
||||
}
|
||||
|
||||
static String pickModelId(String modelsJson) {
|
||||
if (modelsJson == null) return null;
|
||||
var data = modelsJson.indexOf("\"data\"");
|
||||
if (data < 0) return null;
|
||||
var id = modelsJson.indexOf("\"id\"", data);
|
||||
if (id < 0) return null;
|
||||
var q1 = modelsJson.indexOf('"', modelsJson.indexOf(':', id) + 1);
|
||||
if (q1 < 0) return null;
|
||||
var q2 = modelsJson.indexOf('"', q1 + 1);
|
||||
if (q2 < 0) return null;
|
||||
return modelsJson.substring(q1 + 1, q2);
|
||||
}
|
||||
|
||||
static String extractChatContent(String json) {
|
||||
if (json == null) return null;
|
||||
|
||||
var choices = json.indexOf("\"choices\"");
|
||||
var p = (choices >= 0) ? choices : 0;
|
||||
|
||||
var i = json.indexOf("\"content\"", p);
|
||||
if (i < 0) return null;
|
||||
var colon = json.indexOf(':', i);
|
||||
if (colon < 0) return null;
|
||||
|
||||
var q = json.indexOf('"', colon + 1);
|
||||
if (q < 0) return null;
|
||||
|
||||
var sb = new StringBuilder();
|
||||
var esc = false;
|
||||
for (var k = q + 1; k < json.length(); k++) {
|
||||
var ch = json.charAt(k);
|
||||
if (esc) {
|
||||
if (ch == 'n') sb.append('\n');
|
||||
else if (ch == 't') sb.append('\t');
|
||||
else if (ch == 'r') sb.append('\r');
|
||||
else sb.append(ch);
|
||||
esc = false;
|
||||
} else {
|
||||
if (ch == '\\') esc = true;
|
||||
else if (ch == '"') break;
|
||||
else sb.append(ch);
|
||||
}
|
||||
}
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
static List<String> parseStringArray(String s) {
|
||||
if (s == null) return List.of();
|
||||
var a = s.indexOf('[');
|
||||
var b = s.lastIndexOf(']');
|
||||
if (a < 0 || b < 0 || b <= a) return List.of();
|
||||
|
||||
var body = s.substring(a + 1, b);
|
||||
var out = new ArrayList<String>();
|
||||
|
||||
// If it's a simple comma-separated list without quotes (or with mixed quotes),
|
||||
// let's try a more robust approach.
|
||||
if (!body.contains("\"")) {
|
||||
for (var part : body.split(",")) {
|
||||
var trimmed = part.trim();
|
||||
if (!trimmed.isEmpty()) out.add(trimmed);
|
||||
}
|
||||
if (!out.isEmpty()) return out;
|
||||
}
|
||||
|
||||
var cur = new StringBuilder();
|
||||
boolean in = false, esc = false;
|
||||
|
||||
for (var i = 0; i < body.length(); i++) {
|
||||
var ch = body.charAt(i);
|
||||
if (!in) {
|
||||
if (ch == '"') {
|
||||
in = true;
|
||||
cur.setLength(0);
|
||||
esc = false;
|
||||
}
|
||||
} else {
|
||||
if (esc) {
|
||||
cur.append(ch);
|
||||
esc = false;
|
||||
} else if (ch == '\\') {
|
||||
esc = true;
|
||||
} else if (ch == '"') {
|
||||
out.add(cur.toString());
|
||||
in = false;
|
||||
} else {
|
||||
cur.append(ch);
|
||||
}
|
||||
}
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
static String jsonQuote(String s) {
|
||||
if (s == null) return "null";
|
||||
var sb = new StringBuilder();
|
||||
sb.append('"');
|
||||
for (var i = 0; i < s.length(); i++) {
|
||||
var ch = s.charAt(i);
|
||||
if (ch == '\\' || ch == '"') sb.append('\\').append(ch);
|
||||
else if (ch == '\n') sb.append("\\n");
|
||||
else if (ch == '\r') sb.append("\\r");
|
||||
else if (ch == '\t') sb.append("\\t");
|
||||
else sb.append(ch);
|
||||
}
|
||||
sb.append('"');
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
static List<String> llmThemeWords(Opts o, String modelId, String rssText) throws Exception {
|
||||
var prompt = """
|
||||
Je genereert woorden voor een Nederlandse kruiswoordpuzzel.
|
||||
|
||||
Regels:
|
||||
- Output MOET exact één JSON array zijn: ["WOORD", ...]
|
||||
- Alleen A-Z, 2-8 letters woorden
|
||||
- Geen spaties, streepjes, cijfers, accenten, apostrofs, punten
|
||||
- Geen duplicaten
|
||||
- Focus op zelfstandige naamwoorden/termen uit het nieuws en relevante Zweedse kruiswoordpuzzel koppelwoorden in het thema.
|
||||
- Lever %d THEMA-woorden en daarna %d GERELATEERDE woorden (totaal %d).
|
||||
- Voeg ook wat korte woorden/afkortingen toe (2-4 letters), maar houd het totaal gelijk.
|
||||
|
||||
Nieuws (koppen/samenvattingen):
|
||||
%s
|
||||
""".formatted(o.themeN, o.relatedN, (o.themeN + o.relatedN), rssText.substring(0, Math.min(rssText.length(), 8000)));
|
||||
|
||||
var body = """
|
||||
{
|
||||
"model": %s,
|
||||
"messages": [
|
||||
{"role":"system","content":"Je bent een strikte JSON generator. Antwoord ALLEEN met een JSON array van strings."},
|
||||
{"role":"user","content": %s}
|
||||
],
|
||||
"temperature": 0.35,
|
||||
"max_tokens": 20000
|
||||
}
|
||||
""".formatted(jsonQuote(modelId), jsonQuote(prompt));
|
||||
|
||||
var url = apiUrl(o.endpoint, "/chat/completions");
|
||||
System.out.println("LM Studio POST: " + url);
|
||||
System.out.println("Request body length: " + body.length() + " bytes");
|
||||
|
||||
var resp = curlPostJson(o, url, body);
|
||||
var content = extractChatContent(resp);
|
||||
if (content == null) {
|
||||
throw new IOException("Could not extract chat content from LM Studio response.\n--- response ---\n" + resp);
|
||||
}
|
||||
return parseStringArray(content);
|
||||
}
|
||||
|
||||
// ---------------- Pool building ----------------
|
||||
|
||||
static BitSet buildBridgeBitmap(Lexicon lex, int bridgeN) {
|
||||
var n = lex.words.size();
|
||||
var ids = new ArrayList<Integer>(n);
|
||||
for (var i = 0; i < n; i++) {
|
||||
// Optionally filter out VERY complex words from the bridge (e.g. lScore < 3)
|
||||
// But since we sort by score (which is now dominated by lScore),
|
||||
// they will be at the very bottom anyway.
|
||||
// if (lex.score[i] < 800) continue;
|
||||
ids.add(i);
|
||||
}
|
||||
|
||||
ids.sort((a, b) -> Integer.compare(lex.score[b], lex.score[a]));
|
||||
|
||||
var bs = new BitSet(n);
|
||||
var take = Math.min(bridgeN, ids.size());
|
||||
for (var i = 0; i < take; i++) bs.set(ids.get(i));
|
||||
return bs;
|
||||
}
|
||||
|
||||
static BitSet bitmapFromWords(Lexicon lex, Collection<String> words) {
|
||||
var bs = new BitSet(lex.words.size());
|
||||
for (var raw : words) {
|
||||
var w = normalizeDutchToken(raw);
|
||||
if (w == null) continue;
|
||||
var id = lex.idOf.get(w);
|
||||
if (id != null) bs.set(id);
|
||||
}
|
||||
return bs;
|
||||
}
|
||||
|
||||
static Map<Integer, Integer> countsPerLen(Lexicon lex, BitSet bs) {
|
||||
var out = new HashMap<Integer, Integer>();
|
||||
for (var L = 2; L <= 8; L++) {
|
||||
var tmp = (BitSet) bs.clone();
|
||||
tmp.and(lex.byLen[L]);
|
||||
out.put(L, tmp.cardinality());
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
static void writeWordList(Path path, Lexicon lex, BitSet bs) throws IOException {
|
||||
var ids = new ArrayList<Integer>(bs.cardinality());
|
||||
for (var i = bs.nextSetBit(0); i >= 0; i = bs.nextSetBit(i + 1)) {
|
||||
ids.add(i);
|
||||
}
|
||||
// Sort by score descending (higher score is easier/better)
|
||||
ids.sort((a, b) -> Integer.compare(lex.score[b], lex.score[a]));
|
||||
|
||||
var out = new ArrayList<String>(ids.size());
|
||||
for (var id : ids) {
|
||||
if (lex.score[id] < MIN_SIMPLICITY)
|
||||
continue;
|
||||
out.add(lex.words.get(id));
|
||||
}
|
||||
Files.write(path, out, StandardCharsets.UTF_8, StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING);
|
||||
}
|
||||
|
||||
static String mapToLines(Map<Integer, Integer> m) {
|
||||
var sb = new StringBuilder();
|
||||
for (var L = 2; L <= 8; L++) {
|
||||
sb.append(" ").append(L).append(": ").append(m.getOrDefault(L, 0)).append("\n");
|
||||
}
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
// ---------------- NEW: enforce minima per length ----------------
|
||||
|
||||
static int countLen(Lexicon lex, BitSet bs, int L) {
|
||||
var tmp = (BitSet) bs.clone();
|
||||
tmp.and(lex.byLen[L]);
|
||||
return tmp.cardinality();
|
||||
}
|
||||
|
||||
static void ensureMinLen(Lexicon lex, BitSet pool, int L, int minWanted) {
|
||||
if (minWanted <= 0) return;
|
||||
|
||||
var current = countLen(lex, pool, L);
|
||||
if (current >= minWanted) return;
|
||||
|
||||
var need = minWanted - current;
|
||||
|
||||
// Collect candidate ids of exactly length L that are not already in pool.
|
||||
var candidates = new ArrayList<Integer>(Math.max(need * 2, 1024));
|
||||
for (var id = lex.byLen[L].nextSetBit(0); id >= 0; id = lex.byLen[L].nextSetBit(id + 1)) {
|
||||
if (!pool.get(id)) candidates.add(id);
|
||||
}
|
||||
if (candidates.isEmpty()) return;
|
||||
|
||||
// Sort by crossability score (desc)
|
||||
candidates.sort((a, b) -> Integer.compare(lex.score[b], lex.score[a]));
|
||||
|
||||
var added = 0;
|
||||
for (var id : candidates) {
|
||||
pool.set(id);
|
||||
added++;
|
||||
if (added >= need) break;
|
||||
}
|
||||
}
|
||||
|
||||
static void enforceMinima(Opts o, Lexicon lex, BitSet pool) {
|
||||
ensureMinLen(lex, pool, 2, o.minLen2);
|
||||
ensureMinLen(lex, pool, 3, o.minLen3);
|
||||
ensureMinLen(lex, pool, 4, o.minLen4);
|
||||
ensureMinLen(lex, pool, 5, o.minLen5);
|
||||
ensureMinLen(lex, pool, 6, o.minLen6);
|
||||
ensureMinLen(lex, pool, 7, o.minLen7);
|
||||
ensureMinLen(lex, pool, 8, o.minLen8);
|
||||
}
|
||||
|
||||
}
|
||||
24
src/main/java/puzzle/WordScore.java
Normal file
24
src/main/java/puzzle/WordScore.java
Normal file
@@ -0,0 +1,24 @@
|
||||
package puzzle;
|
||||
|
||||
// ===== DATA CLASS =====
|
||||
class WordScore {
|
||||
|
||||
String word;
|
||||
int score;
|
||||
String status;
|
||||
String endpoint;
|
||||
int batchId;
|
||||
|
||||
WordScore(String word, int score, String status, String endpoint, int batchId) {
|
||||
this.word = word;
|
||||
this.score = score;
|
||||
this.status = status;
|
||||
this.endpoint = endpoint;
|
||||
this.batchId = batchId;
|
||||
}
|
||||
WordScore(String word, int score, String status) {
|
||||
this.word = word;
|
||||
this.score = score;
|
||||
this.status = status;
|
||||
}
|
||||
}
|
||||
BIN
src/main/java/puzzle/postgresql-42.7.8.jar
Normal file
BIN
src/main/java/puzzle/postgresql-42.7.8.jar
Normal file
Binary file not shown.
Reference in New Issue
Block a user