Gather data

This commit is contained in:
mike
2026-01-04 01:04:56 +01:00
parent 29ed7fe254
commit 795067472f
11 changed files with 124 additions and 76 deletions

View File

@@ -20,10 +20,10 @@ public class ClueGenerator {
try {
var lines = Files.readAllLines(Path.of(HINTS_FILE), StandardCharsets.UTF_8);
for (var line : lines) {
var parts = line.split(",", 3);
if (parts.length >= 3) {
var parts = line.split(",", 4);
if (parts.length >= 4) {
var word = parts[0].trim().toUpperCase(Locale.ROOT);
var rawClue = parts[2].trim();
var rawClue = parts[3].trim();
if (rawClue.startsWith("\"") && rawClue.endsWith("\"")) {
rawClue = rawClue.substring(1, rawClue.length() - 1).replace("\"\"", "\"");
}

View File

@@ -111,7 +111,7 @@ public final class ExportFormat {
p.arrowRow - minR,
p.arrowCol - minC,
p.isReversed,
puz.dict().words().get(p.word).difficulty()
puz.dict().words().get(p.word).cross()
));
}

View File

@@ -0,0 +1,92 @@
package puzzle;
import java.sql.*;
import java.util.function.ToIntFunction;
public final class HintScores {
public static void main(String[] args) throws Exception {
Class.forName("org.sqlite.JDBC");
try (Connection conn = DriverManager.getConnection("jdbc:sqlite:/home/mike/dev/puzzle-generator/tools/hint/hint.sqlite")) {
updateCrossScores(conn, HintScores::exampleScore, 1000);
}
}
/**
* Updates hints.cross_score by computing a score from hints.word.
*
* @param conn open JDBC connection (PostgreSQL)
* @param scoreFn callback: scoreFn.applyAsInt(word)
* @param batchSize e.g. 1000
*/
public static void updateCrossScores(
Connection conn,
ToIntFunction<String> scoreFn,
int batchSize
) throws SQLException {
// Use a transaction for speed + consistency
final boolean prevAutoCommit = conn.getAutoCommit();
conn.setAutoCommit(false);
// Server-side cursor behavior in pgjdbc requires autoCommit=false + fetchSize>0
final String selectSql =
"SELECT id, puzzle_norm " +
"FROM hints " +
"WHERE puzzle_norm IS NOT NULL"; // optionally add: " AND cross_score IS NULL"
final String updateSql =
"UPDATE hints SET cross_score = ? WHERE id = ?";
try (PreparedStatement psSel = conn.prepareStatement(selectSql);
PreparedStatement psUpd = conn.prepareStatement(updateSql)) {
psSel.setFetchSize(batchSize);
int pending = 0;
try (ResultSet rs = psSel.executeQuery()) {
while (rs.next()) {
long id = rs.getLong("id");
String word = rs.getString("puzzle_norm");
int score;
try {
score = scoreFn.applyAsInt(word);
} catch (RuntimeException ex) {
// If scoring fails, decide your policy: skip or set 0.
// Here: skip row.
continue;
}
psUpd.setInt(1, score);
psUpd.setLong(2, id);
psUpd.addBatch();
pending++;
if (pending >= batchSize) {
psUpd.executeBatch();
conn.commit();
pending = 0;
}
}
}
if (pending > 0) {
psUpd.executeBatch();
conn.commit();
}
} catch (SQLException e) {
conn.rollback();
throw e;
} finally {
conn.setAutoCommit(prevAutoCommit);
}
}
// Example scoring callback
public static int exampleScore(String word) {
return ThemePoolBuilderLength.crossabilityScore(word);
}
}

View File

@@ -14,7 +14,6 @@ import java.util.concurrent.*;
import static puzzle.SwedishGenerator.fillMask;
import static puzzle.SwedishGenerator.generateMask;
import static puzzle.SwedishGenerator.loadScores;
import static puzzle.SwedishGenerator.loadWords;
public class Main {
@@ -238,10 +237,9 @@ public class Main {
// Package-private method for testing
PuzzleResult generatePuzzle(Opts opts) {
var llmScores = loadScores();
var tLoad0 = System.nanoTime();
var dict = loadWords(opts.wordsPath, llmScores);
var dict = loadWords(opts.wordsPath);
var tLoad1 = System.nanoTime();
section("Load");
@@ -260,7 +258,7 @@ public class Main {
tasks.add(() -> {
var threadRng = new Rng(opts.seed + attempt);
var mask = generateMask(threadRng, dict.lenCounts(), opts.pop, opts.gens, false);
var filled = fillMask(threadRng, mask, dict.index(), llmScores, 200, 60000, false);
var filled = fillMask(threadRng, mask, dict.index(), dict.words(), 200, 30000, false);
if (filled.ok && (opts.minSimplicity <= 0 || filled.simplicity >= opts.minSimplicity)) {
info("status : SOLVED");
@@ -290,7 +288,7 @@ public class Main {
info("try : " + attempt + "/" + opts.tries);
var mask = generateMask(rng, dict.lenCounts(), opts.pop, opts.gens, true);
var filled = fillMask(rng, mask, dict.index(), llmScores, 200, 60000, true);
var filled = fillMask(rng, mask, dict.index(), dict.words(), 200, 30000, true);
if (filled.ok && (opts.minSimplicity <= 0 || filled.simplicity >= opts.minSimplicity)) {
info("status : SOLVED");

View File

@@ -20,7 +20,7 @@ public class SwedishGenerator {
static final int W = 9, H = 8,
CLUE_SIZE = 4,
SIMPLICITY_DEFAULT_SCORE = 5;
SIMPLICITY_DEFAULT_SCORE = 2;
static final int MIN_LEN = 2, MAX_LEN = 8;
// Directions for '1'..'6'
static final int[][] OFFSETS = new int[7][2];
@@ -144,11 +144,16 @@ public class SwedishGenerator {
}
}
static record WordDifficulty(String word, int difficulty, int score) {
static record WordDifficulty(String word, int difficulty, int simpel, int score, int cross) {
public WordDifficulty(String word, int score) {
public WordDifficulty(String word, int simpel, int score) {
var difficulty1 = 0 + ((8 - word.length()) * 30) + ((10 - score) * 15);
this(word, difficulty1, score);
var crossScore = ThemePoolBuilderLength.crossabilityScore(word);
this(word, difficulty1, simpel, score, (crossScore * 7) + ((score) * 30) + ((word.length()) * 15));
// Prioritize simple words (high lScore) and long words.
// lScore (1-10) adds up to 1000 points (weight 100).
// Length (2-8) adds up to 160 points (weight 20).
// We want LONGER and SIMPLER words to be tried earlier (lower difficulty value).
// word.length() is 2 to 8.
// score is 1 to 10.
@@ -158,38 +163,11 @@ public class SwedishGenerator {
}
}
static Map<String, Integer> loadScores() {
var scores = new HashMap<String, Integer>();
try {
var scoresPath = System.getenv("SCORES_PATH");
if (scoresPath == null || scoresPath.isBlank()) scoresPath = "export_real_words_with_hints.csv";
var lines = Files.readAllLines(Path.of(scoresPath), StandardCharsets.UTF_8);
var first = true;
for (var line : lines) {
if (first) {
first = false;
if (line.startsWith("WOORD")) continue;
}
var parts = line.split(",", 3);
if (parts.length >= 2) {
try {
var word = parts[0].trim().toUpperCase(Locale.ROOT);
var score = 10 - Integer.parseInt(parts[1].trim());
scores.put(word, score);
} catch (NumberFormatException ignored) {
}
}
}
} catch (IOException e) {
System.err.println("Warning: word_scores.csv not found, using default scores.");
}
return scores;
}
public static record Dict(Map<String, WordDifficulty> words,
HashMap<Integer, DictEntry> index,
HashMap<Integer, Integer> lenCounts) { }
static Dict loadWords(String wordsPath, Map<String, Integer> llmScores) {
static Dict loadWords(String wordsPath) {
String raw;
try {
raw = Files.readString(Path.of(wordsPath), StandardCharsets.UTF_8);
@@ -201,7 +179,7 @@ public class SwedishGenerator {
boolean first = true;
for (var line : raw.split("\\R")) {
if (line.isBlank()) continue;
var parts = line.split(",", 3);
var parts = line.split(",", 4);
var word = parts[0].trim();
if (first && word.equalsIgnoreCase("WOORD")) {
first = false;
@@ -210,25 +188,18 @@ public class SwedishGenerator {
first = false;
var s = word.toUpperCase(Locale.ROOT);
if (s.matches("^[A-Z]{2,8}$")) {
int score = SIMPLICITY_DEFAULT_SCORE;
if (parts.length >= 2) {
try {
// CSV has level 1-10. llmScores use 10-level.
score = 10 - Integer.parseInt(parts[1].trim());
} catch (NumberFormatException e) {
score = llmScores.getOrDefault(s, SIMPLICITY_DEFAULT_SCORE);
System.err.println("Warning: " + word + " csv not found, using default scores.");
}
} else {
score = llmScores.getOrDefault(s, SIMPLICITY_DEFAULT_SCORE);
System.err.println("Warning: " + word + " csv not found, using default scores.");
}
map.put(s, new WordDifficulty(s, score));
int score = SIMPLICITY_DEFAULT_SCORE;
int simpel = 0;
// CSV has level 1-10. llmScores use 10-level.
score = 10 - Integer.parseInt(parts[1].trim());
simpel = Integer.parseInt(parts[2].trim());
if (score >= 1)
map.put(s, new WordDifficulty(s, simpel, score));
}
}
var words = map.values().stream().collect(Collectors.toCollection(ArrayList::new));
// Sort words by difficulty in ascending order
words.sort(Comparator.comparingInt(wd -> wd.difficulty));
words.sort(Comparator.comparingInt(wd -> wd.simpel));
var index = new HashMap<Integer, DictEntry>();
var lenCounts = new HashMap<Integer, Integer>();
@@ -301,12 +272,6 @@ public class SwedishGenerator {
return new CandidateInfo(cur, curLen);
}
static int indexToDifficulty(DictEntry entry, int index, Map<String, Integer> llmScores) {
var word = entry.words.get(index);
var score = llmScores.getOrDefault(word, SIMPLICITY_DEFAULT_SCORE);
return new WordDifficulty(word, score).difficulty;
}
// ---------------- Slots ----------------
static record Slot(int clueR, int clueC, char dir, int[] rs, int[] cs, int len) {
@@ -675,7 +640,7 @@ public class SwedishGenerator {
}
static FillResult fillMask(Rng rng, char[][] mask, HashMap<Integer, DictEntry> dictIndex,
Map<String, Integer> llmScores,
Map<String, WordDifficulty> llmScores,
int logEveryMs, int timeLimitMs, boolean verbose) {
var grid = deepCopyGrid(mask);
@@ -876,7 +841,7 @@ public class SwedishGenerator {
if (ok) {
double totalSimplicity = 0;
for (var w : assigned.values()) {
totalSimplicity += llmScores.getOrDefault(w, 5);
totalSimplicity += llmScores.get(w).difficulty;
}
res.simplicity = assigned.isEmpty() ? 0 : totalSimplicity / assigned.size();
}
@@ -903,9 +868,8 @@ public class SwedishGenerator {
public record PuzzleResult(Dict dict, char[][] mask, FillResult filled) { }
public static PuzzleResult generatePuzzle(Main.Opts opts) {
var llmScores = loadScores();
var tLoad0 = System.nanoTime();
var dict = loadWords(opts.wordsPath, llmScores);
var dict = loadWords(opts.wordsPath);
var tLoad1 = System.nanoTime();
System.out.printf(Locale.ROOT, "LOAD_WORDS: %.3fs%n %s words%n", (tLoad1 - tLoad0) / 1e9, dict.words.size());
@@ -919,7 +883,7 @@ public class SwedishGenerator {
tasks.add(() -> {
var threadRng = new Rng(opts.seed + attempt);
var mask = generateMask(threadRng, dict.lenCounts, opts.pop, opts.gens, false);
var filled = fillMask(threadRng, mask, dict.index, llmScores, 200, 60000, false);
var filled = fillMask(threadRng, mask, dict.index, dict.words, 200, 60000, false);
if (filled.ok && (opts.minSimplicity <= 0 || filled.simplicity >= opts.minSimplicity)) {
System.out.println("\nSolution found on attempt " + attempt);
@@ -948,7 +912,7 @@ public class SwedishGenerator {
System.out.printf(Locale.ROOT, "MASK: %.3fs%n", (tMask1 - tMask0) / 1e9);
var tFill0 = System.nanoTime();
var filled = fillMask(rng, mask, dict.index, llmScores, 200, 60000, true);
var filled = fillMask(rng, mask, dict.index, dict.words, 200, 60000, true);
var tFill1 = System.nanoTime();
System.out.printf(Locale.ROOT, "FILL: %.3fms | Simplicity: %.2f%n", (tFill1 - tFill0) / 1e6, filled.simplicity);

View File

@@ -1,19 +1,13 @@
package puzzle;
import org.w3c.dom.*;
import javax.net.ssl.*;
import javax.xml.parsers.DocumentBuilderFactory;
import java.io.*;
import java.net.http.*;
import java.nio.charset.StandardCharsets;
import java.nio.file.*;
import java.security.SecureRandom;
import java.security.cert.X509Certificate;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.text.Normalizer;
import java.time.LocalDate;