Gather data
This commit is contained in:
220
src/puzzle/ClueGenerator.java
Normal file
220
src/puzzle/ClueGenerator.java
Normal file
@@ -0,0 +1,220 @@
|
||||
package puzzle;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.*;
|
||||
|
||||
public class ClueGenerator {
|
||||
|
||||
private static final String OLLAMA_URL = "http://localhost:11434/api/chat";
|
||||
private static final String MODEL = "qwen2.5:14b";
|
||||
private static final String HINTS_FILE = "export_with_hints.csv";
|
||||
private static Map<String, String> prebuiltClues = null;
|
||||
|
||||
private static synchronized void ensurePrebuiltCluesLoaded() {
|
||||
if (prebuiltClues != null) return;
|
||||
prebuiltClues = new HashMap<>();
|
||||
try {
|
||||
List<String> lines = Files.readAllLines(Path.of(HINTS_FILE), StandardCharsets.UTF_8);
|
||||
for (String line : lines) {
|
||||
String[] parts = line.split(",", 3);
|
||||
if (parts.length >= 3) {
|
||||
String word = parts[0].trim().toUpperCase(Locale.ROOT);
|
||||
String rawClue = parts[2].trim();
|
||||
if (rawClue.startsWith("\"") && rawClue.endsWith("\"")) {
|
||||
rawClue = rawClue.substring(1, rawClue.length() - 1).replace("\"\"", "\"");
|
||||
}
|
||||
if (!word.isEmpty() && !rawClue.isEmpty()) {
|
||||
prebuiltClues.put(word, rawClue);
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (IOException e) {
|
||||
System.err.println("Warning: " + HINTS_FILE + " not found or could not be read.");
|
||||
}
|
||||
}
|
||||
|
||||
public static ExportFormat.ExportedPuzzle applyClues(ExportFormat.ExportedPuzzle puzzle) {
|
||||
if (puzzle == null || puzzle.words().isEmpty()) {
|
||||
return puzzle;
|
||||
}
|
||||
|
||||
ensurePrebuiltCluesLoaded();
|
||||
|
||||
Map<String, String> finalClueMap = new HashMap<>();
|
||||
List<String> wordsMissingClues = new ArrayList<>();
|
||||
|
||||
for (var w : puzzle.words()) {
|
||||
String wordUpper = w.word().toUpperCase(Locale.ROOT);
|
||||
if (prebuiltClues.containsKey(wordUpper)) {
|
||||
finalClueMap.put(w.word(), prebuiltClues.get(wordUpper));
|
||||
} else {
|
||||
wordsMissingClues.add(w.word());
|
||||
}
|
||||
}
|
||||
|
||||
if (!wordsMissingClues.isEmpty()) {
|
||||
Map<String, String> generatedClues = generateClues(wordsMissingClues);
|
||||
finalClueMap.putAll(generatedClues);
|
||||
}
|
||||
|
||||
List<ExportFormat.WordOut> wordsWithClues = new ArrayList<>();
|
||||
for (var w : puzzle.words()) {
|
||||
String clue = finalClueMap.getOrDefault(w.word(), w.word());
|
||||
wordsWithClues.add(new ExportFormat.WordOut(
|
||||
w.word(),
|
||||
clue,
|
||||
w.startRow(),
|
||||
w.startCol(),
|
||||
w.direction(),
|
||||
w.answer(),
|
||||
w.arrowRow(),
|
||||
w.arrowCol(),
|
||||
w.isReversed()
|
||||
));
|
||||
}
|
||||
|
||||
return new ExportFormat.ExportedPuzzle(puzzle.gridv2(), wordsWithClues, puzzle.difficulty(), puzzle.rewards());
|
||||
}
|
||||
|
||||
public static Map<String, String> generateClues(List<String> words) {
|
||||
if (words == null || words.isEmpty()) {
|
||||
return Collections.emptyMap();
|
||||
}
|
||||
|
||||
String prompt = createCluePrompt(words);
|
||||
try {
|
||||
String jsonRequest = String.format(
|
||||
"{\"model\":\"%s\",\"messages\":[{\"role\":\"user\",\"content\":\"%s\"}],\"stream\":false,\"temperature\":0.7}",
|
||||
MODEL, escapeJson(prompt)
|
||||
);
|
||||
|
||||
String responseBody = curlPostJson(OLLAMA_URL, jsonRequest, 120);
|
||||
String content = extractChatContent(responseBody);
|
||||
|
||||
if (content == null || content.isEmpty()) {
|
||||
return Collections.emptyMap();
|
||||
}
|
||||
|
||||
return parseCluesFromReply(words, content);
|
||||
} catch (Exception e) {
|
||||
System.err.println("Failed to generate clues: " + e.getMessage());
|
||||
return Collections.emptyMap();
|
||||
}
|
||||
}
|
||||
|
||||
private static String createCluePrompt(List<String> words) {
|
||||
return "Je bent een expert in het maken van kruiswoordpuzzels. Geef voor elk van de onderstaande woorden een korte, uitdagende maar duidelijke cryptische of beschrijvende aanwijzing in het Nederlands.\n\n" +
|
||||
"Output ALLEEN in dit formaat:\n" +
|
||||
"woord1:aanwijzing\n" +
|
||||
"woord2:aanwijzing\n\n" +
|
||||
"GEEN andere tekst of uitleg. Sla GEEN woorden over.\n\n" +
|
||||
"Lijst:\n" +
|
||||
String.join("\n", words);
|
||||
}
|
||||
|
||||
private static Map<String, String> parseCluesFromReply(List<String> expectedWords, String reply) {
|
||||
Map<String, String> wordClueMap = new HashMap<>();
|
||||
String[] lines = reply.split("\n");
|
||||
|
||||
for (String line : lines) {
|
||||
line = line.trim();
|
||||
if (line.contains(":")) {
|
||||
String[] parts = line.split(":", 2);
|
||||
if (parts.length == 2) {
|
||||
String wordPart = parts[0].trim().replaceAll("^[\\d+.)*\\-\\s]+", "").toLowerCase();
|
||||
String clue = parts[1].trim();
|
||||
if (!clue.isEmpty()) {
|
||||
wordClueMap.put(wordPart, clue);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Map<String, String> results = new HashMap<>();
|
||||
for (String word : expectedWords) {
|
||||
String clue = wordClueMap.get(word.toLowerCase());
|
||||
if (clue != null) {
|
||||
results.put(word, clue);
|
||||
}
|
||||
}
|
||||
return results;
|
||||
}
|
||||
|
||||
private static String curlPostJson(String url, String jsonBody, int timeoutSeconds) throws Exception {
|
||||
var tempFile = Files.createTempFile("clue-request-", ".json");
|
||||
try {
|
||||
Files.writeString(tempFile, jsonBody, StandardCharsets.UTF_8);
|
||||
List<String> cmd = new ArrayList<>();
|
||||
cmd.add("curl");
|
||||
cmd.add("-fsSL");
|
||||
cmd.add("--connect-timeout");
|
||||
cmd.add("10");
|
||||
cmd.add("--max-time");
|
||||
cmd.add(String.valueOf(timeoutSeconds));
|
||||
cmd.add("-H");
|
||||
cmd.add("Content-Type: application/json");
|
||||
cmd.add("-d");
|
||||
cmd.add("@" + tempFile);
|
||||
cmd.add(url);
|
||||
|
||||
var p = new ProcessBuilder(cmd)
|
||||
.redirectErrorStream(true)
|
||||
.start();
|
||||
|
||||
var bytes = p.getInputStream().readAllBytes();
|
||||
var code = p.waitFor();
|
||||
|
||||
if (code != 0) {
|
||||
throw new IOException("curl POST failed (" + code + ") url=" + url + "\nOutput:\n" +
|
||||
new String(bytes, StandardCharsets.UTF_8));
|
||||
}
|
||||
|
||||
return new String(bytes, StandardCharsets.UTF_8);
|
||||
} finally {
|
||||
Files.deleteIfExists(tempFile);
|
||||
}
|
||||
}
|
||||
|
||||
private static String extractChatContent(String json) {
|
||||
if (json == null) return null;
|
||||
var choices = json.indexOf("\"choices\"");
|
||||
var p = (choices >= 0) ? choices : 0;
|
||||
var i = json.indexOf("\"content\"", p);
|
||||
if (i < 0) {
|
||||
// Fallback for Ollama non-chat format if needed, but we used /api/chat
|
||||
// Ollama /api/chat returns {"model":"...","message":{"role":"assistant","content":"..."}}
|
||||
i = json.indexOf("\"content\"");
|
||||
if (i < 0) return null;
|
||||
}
|
||||
var colon = json.indexOf(':', i);
|
||||
if (colon < 0) return null;
|
||||
var q = json.indexOf('"', colon + 1);
|
||||
if (q < 0) return null;
|
||||
var sb = new StringBuilder();
|
||||
var esc = false;
|
||||
for (var k = q + 1; k < json.length(); k++) {
|
||||
var ch = json.charAt(k);
|
||||
if (esc) {
|
||||
if (ch == 'n') sb.append('\n');
|
||||
else if (ch == 't') sb.append('\t');
|
||||
else if (ch == 'r') sb.append('\r');
|
||||
else sb.append(ch);
|
||||
esc = false;
|
||||
} else {
|
||||
if (ch == '\\') esc = true;
|
||||
else if (ch == '"') break;
|
||||
else sb.append(ch);
|
||||
}
|
||||
}
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
private static String escapeJson(String str) {
|
||||
return str.replace("\\", "\\\\")
|
||||
.replace("\"", "\\\"")
|
||||
.replace("\n", "\\n");
|
||||
}
|
||||
}
|
||||
@@ -31,7 +31,7 @@ public class DailyGenerator {
|
||||
|
||||
public static void main(String[] args) {
|
||||
var outDir = env("OUT_DIR", "/home/mike/dev/puzzle-generator/data/");
|
||||
var wordsPath = env("WORDS_PATH", "./word-list.txt");
|
||||
var wordsPath = env("WORDS_PATH", "./export_words_only.txt");
|
||||
var puzzlesPerDay = envInt("PUZZLES_PER_DAY", 3);
|
||||
var seed = envInt("SEED", (int) System.currentTimeMillis());
|
||||
var themeFilter = envBool("THEME_FILTER", true);
|
||||
@@ -119,6 +119,10 @@ public class DailyGenerator {
|
||||
result, 1, new ExportFormat.Rewards(50, 2, 1)
|
||||
);
|
||||
|
||||
// Generate clues via LLM
|
||||
System.out.println("Generating clues for " + exported.words().size() + " words...");
|
||||
exported = ClueGenerator.applyClues(exported);
|
||||
|
||||
// Write to JSON file
|
||||
var filename = String.format("crossword_%s_%02d_%s.json", dateStr, i, safeSlug(theme));
|
||||
var outputPath = Paths.get(outDir, filename);
|
||||
|
||||
@@ -7,7 +7,10 @@ import java.nio.file.Paths;
|
||||
import java.time.OffsetDateTime;
|
||||
import java.time.ZoneOffset;
|
||||
import java.time.format.DateTimeFormatter;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
|
||||
public class Main {
|
||||
// ---------------- CLI ----------------
|
||||
@@ -15,7 +18,7 @@ public class Main {
|
||||
public static class Opts {
|
||||
public int seed = 1;
|
||||
public int pop = 18;
|
||||
public int gens = 200;
|
||||
public int gens = 1000;
|
||||
public int tries = 5;
|
||||
public String wordsPath = "./out/pool.txt";
|
||||
public double minSimplicity = 0; // 0 means no limit
|
||||
@@ -74,6 +77,11 @@ public class Main {
|
||||
System.out.println(SwedishGenerator.renderHuman(res.filled().grid));
|
||||
System.out.printf(Locale.ROOT, "Puzzle Simplicity: %.2f%n", res.filled().simplicity);
|
||||
var out = ExportFormat.exportFormatFromFilled(res, 1, new ExportFormat.Rewards(50, 2, 1));
|
||||
|
||||
// Generate clues via LLM
|
||||
System.out.println("Generating clues for " + out.words().size() + " words...");
|
||||
out = ClueGenerator.applyClues(out);
|
||||
|
||||
System.out.println("gridv2:");
|
||||
for (String row : out.gridv2()) System.out.println(row);
|
||||
System.out.println("words: " + out.words().size());
|
||||
|
||||
@@ -145,7 +145,7 @@ public class SwedishGenerator {
|
||||
// Base difficulty starts high and decreases with length and score.
|
||||
// Length impact: up to 8 * 10 = 80
|
||||
// Score impact: up to 10 * 15 = 150
|
||||
var difficulty1 = 0 + ((8 - word.length()) * 30) + ((10-score) * 15);
|
||||
var difficulty1 = 0 + ((8 - word.length()) * 30) + ((10 - score) * 15);
|
||||
this.difficulty = difficulty1;
|
||||
}
|
||||
}
|
||||
@@ -153,24 +153,19 @@ public class SwedishGenerator {
|
||||
static Map<String, Integer> loadScores() {
|
||||
var scores = new HashMap<String, Integer>();
|
||||
try {
|
||||
var lines = Files.readAllLines(Path.of("word_scores.csv"), StandardCharsets.UTF_8);
|
||||
var lines = Files.readAllLines(Path.of("export_words.csv"), StandardCharsets.UTF_8);
|
||||
var first = true;
|
||||
for (var line : lines) {
|
||||
if (first) {
|
||||
first = false;
|
||||
continue;
|
||||
}
|
||||
var parts = line.split("," );
|
||||
if (parts.length >= 3) {
|
||||
var parts = line.split(",");
|
||||
if (parts.length >= 2) {
|
||||
try {
|
||||
var word = parts[0].trim().toUpperCase(Locale.ROOT);
|
||||
var score = Integer.parseInt(parts[1].trim());
|
||||
var status = parts[2].trim();
|
||||
if ("OK".equalsIgnoreCase(status)) {
|
||||
scores.put(word, score);
|
||||
} else {
|
||||
System.err.println("Skipping:" +Arrays.toString( parts));
|
||||
}
|
||||
scores.put(word, score);
|
||||
} catch (NumberFormatException ignored) {
|
||||
System.err.println("Illegal number format: " + line);
|
||||
}
|
||||
@@ -906,7 +901,7 @@ public class SwedishGenerator {
|
||||
var tLoad0 = System.nanoTime();
|
||||
var dict = loadWords(opts.wordsPath, llmScores);
|
||||
var tLoad1 = System.nanoTime();
|
||||
System.out.printf(Locale.ROOT, "LOAD_WORDS: %.3fs%n %s words", (tLoad1 - tLoad0) / 1e9,dict.words.size());
|
||||
System.out.printf(Locale.ROOT, "LOAD_WORDS: %.3fs%n %s words", (tLoad1 - tLoad0) / 1e9, dict.words.size());
|
||||
|
||||
for (var attempt = 1; attempt <= opts.tries; attempt++) {
|
||||
System.out.println("\nAttempt " + attempt + "/" + opts.tries);
|
||||
|
||||
@@ -56,7 +56,7 @@ public class ThemePoolBuilderLength {
|
||||
|
||||
static final class Opts {
|
||||
|
||||
String wordsPath = "/home/mike/dev/puzzle-generator/word-list.txt";
|
||||
String wordsPath = "/home/mike/dev/puzzle-generator/export_words_only.txt";
|
||||
String endpoint = "https://jarvis-lan.appmodel.nl/api/stoic/";
|
||||
List<String> feeds = new ArrayList<>(DEFAULT_FEEDS);
|
||||
String outDir = "./out";
|
||||
@@ -301,12 +301,12 @@ public class ThemePoolBuilderLength {
|
||||
var parts = line.split(",", 3);
|
||||
if (parts.length >= 3) {
|
||||
try {
|
||||
var word = parts[0].trim().toUpperCase(Locale.ROOT);
|
||||
var score = Integer.parseInt(parts[1].trim());
|
||||
var status = parts[2].trim();
|
||||
if ("OK".equalsIgnoreCase(status)) {
|
||||
llmScores.put(word, score);
|
||||
}
|
||||
var word = parts[0].trim().toUpperCase(Locale.ROOT);
|
||||
var score = Integer.parseInt(parts[1].trim());
|
||||
// var status = parts[2].trim();
|
||||
// if ("OK".equalsIgnoreCase(status)) {
|
||||
llmScores.put(word, score);
|
||||
//}
|
||||
} catch (NumberFormatException ignored) { }
|
||||
}
|
||||
}
|
||||
@@ -731,7 +731,7 @@ public class ThemePoolBuilderLength {
|
||||
// Optionally filter out VERY complex words from the bridge (e.g. lScore < 3)
|
||||
// But since we sort by score (which is now dominated by lScore),
|
||||
// they will be at the very bottom anyway.
|
||||
if (lex.score[i] < 800) continue;
|
||||
// if (lex.score[i] < 800) continue;
|
||||
ids.add(i);
|
||||
}
|
||||
|
||||
@@ -774,8 +774,8 @@ public class ThemePoolBuilderLength {
|
||||
|
||||
var out = new ArrayList<String>(ids.size());
|
||||
for (var id : ids) {
|
||||
/* if (lex.score[id] < 680)
|
||||
continue;*/
|
||||
if (lex.score[id] < 680)
|
||||
continue;
|
||||
out.add(lex.words.get(id));
|
||||
}
|
||||
Files.write(path, out, StandardCharsets.UTF_8, StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING);
|
||||
|
||||
Reference in New Issue
Block a user