Gather data
This commit is contained in:
@@ -20,10 +20,10 @@ public class ClueGenerator {
|
||||
try {
|
||||
var lines = Files.readAllLines(Path.of(HINTS_FILE), StandardCharsets.UTF_8);
|
||||
for (var line : lines) {
|
||||
var parts = line.split(",", 3);
|
||||
if (parts.length >= 3) {
|
||||
var parts = line.split(",", 4);
|
||||
if (parts.length >= 4) {
|
||||
var word = parts[0].trim().toUpperCase(Locale.ROOT);
|
||||
var rawClue = parts[2].trim();
|
||||
var rawClue = parts[3].trim();
|
||||
if (rawClue.startsWith("\"") && rawClue.endsWith("\"")) {
|
||||
rawClue = rawClue.substring(1, rawClue.length() - 1).replace("\"\"", "\"");
|
||||
}
|
||||
@@ -111,7 +111,7 @@ public final class ExportFormat {
|
||||
p.arrowRow - minR,
|
||||
p.arrowCol - minC,
|
||||
p.isReversed,
|
||||
puz.dict().words().get(p.word).difficulty()
|
||||
puz.dict().words().get(p.word).cross()
|
||||
));
|
||||
}
|
||||
|
||||
92
src/main/java/puzzle/HintScores.java
Normal file
92
src/main/java/puzzle/HintScores.java
Normal file
@@ -0,0 +1,92 @@
|
||||
package puzzle;
|
||||
|
||||
import java.sql.*;
|
||||
import java.util.function.ToIntFunction;
|
||||
|
||||
public final class HintScores {
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
Class.forName("org.sqlite.JDBC");
|
||||
try (Connection conn = DriverManager.getConnection("jdbc:sqlite:/home/mike/dev/puzzle-generator/tools/hint/hint.sqlite")) {
|
||||
updateCrossScores(conn, HintScores::exampleScore, 1000);
|
||||
}
|
||||
}
|
||||
/**
|
||||
* Updates hints.cross_score by computing a score from hints.word.
|
||||
*
|
||||
* @param conn open JDBC connection (PostgreSQL)
|
||||
* @param scoreFn callback: scoreFn.applyAsInt(word)
|
||||
* @param batchSize e.g. 1000
|
||||
*/
|
||||
public static void updateCrossScores(
|
||||
Connection conn,
|
||||
ToIntFunction<String> scoreFn,
|
||||
int batchSize
|
||||
) throws SQLException {
|
||||
|
||||
// Use a transaction for speed + consistency
|
||||
final boolean prevAutoCommit = conn.getAutoCommit();
|
||||
conn.setAutoCommit(false);
|
||||
|
||||
// Server-side cursor behavior in pgjdbc requires autoCommit=false + fetchSize>0
|
||||
final String selectSql =
|
||||
"SELECT id, puzzle_norm " +
|
||||
"FROM hints " +
|
||||
"WHERE puzzle_norm IS NOT NULL"; // optionally add: " AND cross_score IS NULL"
|
||||
|
||||
final String updateSql =
|
||||
"UPDATE hints SET cross_score = ? WHERE id = ?";
|
||||
|
||||
try (PreparedStatement psSel = conn.prepareStatement(selectSql);
|
||||
PreparedStatement psUpd = conn.prepareStatement(updateSql)) {
|
||||
|
||||
psSel.setFetchSize(batchSize);
|
||||
|
||||
int pending = 0;
|
||||
|
||||
try (ResultSet rs = psSel.executeQuery()) {
|
||||
while (rs.next()) {
|
||||
long id = rs.getLong("id");
|
||||
String word = rs.getString("puzzle_norm");
|
||||
|
||||
int score;
|
||||
try {
|
||||
score = scoreFn.applyAsInt(word);
|
||||
} catch (RuntimeException ex) {
|
||||
// If scoring fails, decide your policy: skip or set 0.
|
||||
// Here: skip row.
|
||||
continue;
|
||||
}
|
||||
|
||||
psUpd.setInt(1, score);
|
||||
psUpd.setLong(2, id);
|
||||
psUpd.addBatch();
|
||||
pending++;
|
||||
|
||||
if (pending >= batchSize) {
|
||||
psUpd.executeBatch();
|
||||
conn.commit();
|
||||
pending = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (pending > 0) {
|
||||
psUpd.executeBatch();
|
||||
conn.commit();
|
||||
}
|
||||
|
||||
} catch (SQLException e) {
|
||||
conn.rollback();
|
||||
throw e;
|
||||
} finally {
|
||||
conn.setAutoCommit(prevAutoCommit);
|
||||
}
|
||||
}
|
||||
|
||||
// Example scoring callback
|
||||
public static int exampleScore(String word) {
|
||||
return ThemePoolBuilderLength.crossabilityScore(word);
|
||||
}
|
||||
|
||||
}
|
||||
@@ -14,7 +14,6 @@ import java.util.concurrent.*;
|
||||
|
||||
import static puzzle.SwedishGenerator.fillMask;
|
||||
import static puzzle.SwedishGenerator.generateMask;
|
||||
import static puzzle.SwedishGenerator.loadScores;
|
||||
import static puzzle.SwedishGenerator.loadWords;
|
||||
|
||||
public class Main {
|
||||
@@ -238,10 +237,9 @@ public class Main {
|
||||
|
||||
// Package-private method for testing
|
||||
PuzzleResult generatePuzzle(Opts opts) {
|
||||
var llmScores = loadScores();
|
||||
|
||||
var tLoad0 = System.nanoTime();
|
||||
var dict = loadWords(opts.wordsPath, llmScores);
|
||||
var dict = loadWords(opts.wordsPath);
|
||||
var tLoad1 = System.nanoTime();
|
||||
|
||||
section("Load");
|
||||
@@ -260,7 +258,7 @@ public class Main {
|
||||
tasks.add(() -> {
|
||||
var threadRng = new Rng(opts.seed + attempt);
|
||||
var mask = generateMask(threadRng, dict.lenCounts(), opts.pop, opts.gens, false);
|
||||
var filled = fillMask(threadRng, mask, dict.index(), llmScores, 200, 60000, false);
|
||||
var filled = fillMask(threadRng, mask, dict.index(), dict.words(), 200, 30000, false);
|
||||
|
||||
if (filled.ok && (opts.minSimplicity <= 0 || filled.simplicity >= opts.minSimplicity)) {
|
||||
info("status : SOLVED");
|
||||
@@ -290,7 +288,7 @@ public class Main {
|
||||
info("try : " + attempt + "/" + opts.tries);
|
||||
|
||||
var mask = generateMask(rng, dict.lenCounts(), opts.pop, opts.gens, true);
|
||||
var filled = fillMask(rng, mask, dict.index(), llmScores, 200, 60000, true);
|
||||
var filled = fillMask(rng, mask, dict.index(), dict.words(), 200, 30000, true);
|
||||
|
||||
if (filled.ok && (opts.minSimplicity <= 0 || filled.simplicity >= opts.minSimplicity)) {
|
||||
info("status : SOLVED");
|
||||
@@ -20,7 +20,7 @@ public class SwedishGenerator {
|
||||
|
||||
static final int W = 9, H = 8,
|
||||
CLUE_SIZE = 4,
|
||||
SIMPLICITY_DEFAULT_SCORE = 5;
|
||||
SIMPLICITY_DEFAULT_SCORE = 2;
|
||||
static final int MIN_LEN = 2, MAX_LEN = 8;
|
||||
// Directions for '1'..'6'
|
||||
static final int[][] OFFSETS = new int[7][2];
|
||||
@@ -144,11 +144,16 @@ public class SwedishGenerator {
|
||||
}
|
||||
}
|
||||
|
||||
static record WordDifficulty(String word, int difficulty, int score) {
|
||||
static record WordDifficulty(String word, int difficulty, int simpel, int score, int cross) {
|
||||
|
||||
public WordDifficulty(String word, int score) {
|
||||
public WordDifficulty(String word, int simpel, int score) {
|
||||
var difficulty1 = 0 + ((8 - word.length()) * 30) + ((10 - score) * 15);
|
||||
this(word, difficulty1, score);
|
||||
var crossScore = ThemePoolBuilderLength.crossabilityScore(word);
|
||||
this(word, difficulty1, simpel, score, (crossScore * 7) + ((score) * 30) + ((word.length()) * 15));
|
||||
|
||||
// Prioritize simple words (high lScore) and long words.
|
||||
// lScore (1-10) adds up to 1000 points (weight 100).
|
||||
// Length (2-8) adds up to 160 points (weight 20).
|
||||
// We want LONGER and SIMPLER words to be tried earlier (lower difficulty value).
|
||||
// word.length() is 2 to 8.
|
||||
// score is 1 to 10.
|
||||
@@ -158,38 +163,11 @@ public class SwedishGenerator {
|
||||
}
|
||||
}
|
||||
|
||||
static Map<String, Integer> loadScores() {
|
||||
var scores = new HashMap<String, Integer>();
|
||||
try {
|
||||
var scoresPath = System.getenv("SCORES_PATH");
|
||||
if (scoresPath == null || scoresPath.isBlank()) scoresPath = "export_real_words_with_hints.csv";
|
||||
var lines = Files.readAllLines(Path.of(scoresPath), StandardCharsets.UTF_8);
|
||||
var first = true;
|
||||
for (var line : lines) {
|
||||
if (first) {
|
||||
first = false;
|
||||
if (line.startsWith("WOORD")) continue;
|
||||
}
|
||||
var parts = line.split(",", 3);
|
||||
if (parts.length >= 2) {
|
||||
try {
|
||||
var word = parts[0].trim().toUpperCase(Locale.ROOT);
|
||||
var score = 10 - Integer.parseInt(parts[1].trim());
|
||||
scores.put(word, score);
|
||||
} catch (NumberFormatException ignored) {
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (IOException e) {
|
||||
System.err.println("Warning: word_scores.csv not found, using default scores.");
|
||||
}
|
||||
return scores;
|
||||
}
|
||||
|
||||
|
||||
public static record Dict(Map<String, WordDifficulty> words,
|
||||
HashMap<Integer, DictEntry> index,
|
||||
HashMap<Integer, Integer> lenCounts) { }
|
||||
static Dict loadWords(String wordsPath, Map<String, Integer> llmScores) {
|
||||
static Dict loadWords(String wordsPath) {
|
||||
String raw;
|
||||
try {
|
||||
raw = Files.readString(Path.of(wordsPath), StandardCharsets.UTF_8);
|
||||
@@ -201,7 +179,7 @@ public class SwedishGenerator {
|
||||
boolean first = true;
|
||||
for (var line : raw.split("\\R")) {
|
||||
if (line.isBlank()) continue;
|
||||
var parts = line.split(",", 3);
|
||||
var parts = line.split(",", 4);
|
||||
var word = parts[0].trim();
|
||||
if (first && word.equalsIgnoreCase("WOORD")) {
|
||||
first = false;
|
||||
@@ -210,25 +188,18 @@ public class SwedishGenerator {
|
||||
first = false;
|
||||
var s = word.toUpperCase(Locale.ROOT);
|
||||
if (s.matches("^[A-Z]{2,8}$")) {
|
||||
int score = SIMPLICITY_DEFAULT_SCORE;
|
||||
if (parts.length >= 2) {
|
||||
try {
|
||||
// CSV has level 1-10. llmScores use 10-level.
|
||||
score = 10 - Integer.parseInt(parts[1].trim());
|
||||
} catch (NumberFormatException e) {
|
||||
score = llmScores.getOrDefault(s, SIMPLICITY_DEFAULT_SCORE);
|
||||
System.err.println("Warning: " + word + " csv not found, using default scores.");
|
||||
}
|
||||
} else {
|
||||
score = llmScores.getOrDefault(s, SIMPLICITY_DEFAULT_SCORE);
|
||||
System.err.println("Warning: " + word + " csv not found, using default scores.");
|
||||
}
|
||||
map.put(s, new WordDifficulty(s, score));
|
||||
int score = SIMPLICITY_DEFAULT_SCORE;
|
||||
int simpel = 0;
|
||||
// CSV has level 1-10. llmScores use 10-level.
|
||||
score = 10 - Integer.parseInt(parts[1].trim());
|
||||
simpel = Integer.parseInt(parts[2].trim());
|
||||
if (score >= 1)
|
||||
map.put(s, new WordDifficulty(s, simpel, score));
|
||||
}
|
||||
}
|
||||
var words = map.values().stream().collect(Collectors.toCollection(ArrayList::new));
|
||||
// Sort words by difficulty in ascending order
|
||||
words.sort(Comparator.comparingInt(wd -> wd.difficulty));
|
||||
words.sort(Comparator.comparingInt(wd -> wd.simpel));
|
||||
|
||||
var index = new HashMap<Integer, DictEntry>();
|
||||
var lenCounts = new HashMap<Integer, Integer>();
|
||||
@@ -301,12 +272,6 @@ public class SwedishGenerator {
|
||||
|
||||
return new CandidateInfo(cur, curLen);
|
||||
}
|
||||
static int indexToDifficulty(DictEntry entry, int index, Map<String, Integer> llmScores) {
|
||||
var word = entry.words.get(index);
|
||||
var score = llmScores.getOrDefault(word, SIMPLICITY_DEFAULT_SCORE);
|
||||
return new WordDifficulty(word, score).difficulty;
|
||||
}
|
||||
|
||||
// ---------------- Slots ----------------
|
||||
|
||||
static record Slot(int clueR, int clueC, char dir, int[] rs, int[] cs, int len) {
|
||||
@@ -675,7 +640,7 @@ public class SwedishGenerator {
|
||||
}
|
||||
|
||||
static FillResult fillMask(Rng rng, char[][] mask, HashMap<Integer, DictEntry> dictIndex,
|
||||
Map<String, Integer> llmScores,
|
||||
Map<String, WordDifficulty> llmScores,
|
||||
int logEveryMs, int timeLimitMs, boolean verbose) {
|
||||
|
||||
var grid = deepCopyGrid(mask);
|
||||
@@ -876,7 +841,7 @@ public class SwedishGenerator {
|
||||
if (ok) {
|
||||
double totalSimplicity = 0;
|
||||
for (var w : assigned.values()) {
|
||||
totalSimplicity += llmScores.getOrDefault(w, 5);
|
||||
totalSimplicity += llmScores.get(w).difficulty;
|
||||
}
|
||||
res.simplicity = assigned.isEmpty() ? 0 : totalSimplicity / assigned.size();
|
||||
}
|
||||
@@ -903,9 +868,8 @@ public class SwedishGenerator {
|
||||
public record PuzzleResult(Dict dict, char[][] mask, FillResult filled) { }
|
||||
|
||||
public static PuzzleResult generatePuzzle(Main.Opts opts) {
|
||||
var llmScores = loadScores();
|
||||
var tLoad0 = System.nanoTime();
|
||||
var dict = loadWords(opts.wordsPath, llmScores);
|
||||
var dict = loadWords(opts.wordsPath);
|
||||
var tLoad1 = System.nanoTime();
|
||||
System.out.printf(Locale.ROOT, "LOAD_WORDS: %.3fs%n %s words%n", (tLoad1 - tLoad0) / 1e9, dict.words.size());
|
||||
|
||||
@@ -919,7 +883,7 @@ public class SwedishGenerator {
|
||||
tasks.add(() -> {
|
||||
var threadRng = new Rng(opts.seed + attempt);
|
||||
var mask = generateMask(threadRng, dict.lenCounts, opts.pop, opts.gens, false);
|
||||
var filled = fillMask(threadRng, mask, dict.index, llmScores, 200, 60000, false);
|
||||
var filled = fillMask(threadRng, mask, dict.index, dict.words, 200, 60000, false);
|
||||
|
||||
if (filled.ok && (opts.minSimplicity <= 0 || filled.simplicity >= opts.minSimplicity)) {
|
||||
System.out.println("\nSolution found on attempt " + attempt);
|
||||
@@ -948,7 +912,7 @@ public class SwedishGenerator {
|
||||
System.out.printf(Locale.ROOT, "MASK: %.3fs%n", (tMask1 - tMask0) / 1e9);
|
||||
|
||||
var tFill0 = System.nanoTime();
|
||||
var filled = fillMask(rng, mask, dict.index, llmScores, 200, 60000, true);
|
||||
var filled = fillMask(rng, mask, dict.index, dict.words, 200, 60000, true);
|
||||
var tFill1 = System.nanoTime();
|
||||
System.out.printf(Locale.ROOT, "FILL: %.3fms | Simplicity: %.2f%n", (tFill1 - tFill0) / 1e6, filled.simplicity);
|
||||
|
||||
@@ -1,19 +1,13 @@
|
||||
package puzzle;
|
||||
|
||||
import org.w3c.dom.*;
|
||||
import javax.net.ssl.*;
|
||||
import javax.xml.parsers.DocumentBuilderFactory;
|
||||
|
||||
import java.io.*;
|
||||
import java.net.http.*;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.nio.file.*;
|
||||
import java.security.SecureRandom;
|
||||
import java.security.cert.X509Certificate;
|
||||
import java.sql.Connection;
|
||||
import java.sql.DriverManager;
|
||||
import java.sql.PreparedStatement;
|
||||
import java.sql.ResultSet;
|
||||
import java.sql.SQLException;
|
||||
import java.text.Normalizer;
|
||||
import java.time.LocalDate;
|
||||
Reference in New Issue
Block a user