Gather data
This commit is contained in:
@@ -20,10 +20,10 @@ public class ClueGenerator {
|
|||||||
try {
|
try {
|
||||||
var lines = Files.readAllLines(Path.of(HINTS_FILE), StandardCharsets.UTF_8);
|
var lines = Files.readAllLines(Path.of(HINTS_FILE), StandardCharsets.UTF_8);
|
||||||
for (var line : lines) {
|
for (var line : lines) {
|
||||||
var parts = line.split(",", 3);
|
var parts = line.split(",", 4);
|
||||||
if (parts.length >= 3) {
|
if (parts.length >= 4) {
|
||||||
var word = parts[0].trim().toUpperCase(Locale.ROOT);
|
var word = parts[0].trim().toUpperCase(Locale.ROOT);
|
||||||
var rawClue = parts[2].trim();
|
var rawClue = parts[3].trim();
|
||||||
if (rawClue.startsWith("\"") && rawClue.endsWith("\"")) {
|
if (rawClue.startsWith("\"") && rawClue.endsWith("\"")) {
|
||||||
rawClue = rawClue.substring(1, rawClue.length() - 1).replace("\"\"", "\"");
|
rawClue = rawClue.substring(1, rawClue.length() - 1).replace("\"\"", "\"");
|
||||||
}
|
}
|
||||||
@@ -111,7 +111,7 @@ public final class ExportFormat {
|
|||||||
p.arrowRow - minR,
|
p.arrowRow - minR,
|
||||||
p.arrowCol - minC,
|
p.arrowCol - minC,
|
||||||
p.isReversed,
|
p.isReversed,
|
||||||
puz.dict().words().get(p.word).difficulty()
|
puz.dict().words().get(p.word).cross()
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
|
|
||||||
92
src/main/java/puzzle/HintScores.java
Normal file
92
src/main/java/puzzle/HintScores.java
Normal file
@@ -0,0 +1,92 @@
|
|||||||
|
package puzzle;
|
||||||
|
|
||||||
|
import java.sql.*;
|
||||||
|
import java.util.function.ToIntFunction;
|
||||||
|
|
||||||
|
public final class HintScores {
|
||||||
|
|
||||||
|
public static void main(String[] args) throws Exception {
|
||||||
|
Class.forName("org.sqlite.JDBC");
|
||||||
|
try (Connection conn = DriverManager.getConnection("jdbc:sqlite:/home/mike/dev/puzzle-generator/tools/hint/hint.sqlite")) {
|
||||||
|
updateCrossScores(conn, HintScores::exampleScore, 1000);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
* Updates hints.cross_score by computing a score from hints.word.
|
||||||
|
*
|
||||||
|
* @param conn open JDBC connection (PostgreSQL)
|
||||||
|
* @param scoreFn callback: scoreFn.applyAsInt(word)
|
||||||
|
* @param batchSize e.g. 1000
|
||||||
|
*/
|
||||||
|
public static void updateCrossScores(
|
||||||
|
Connection conn,
|
||||||
|
ToIntFunction<String> scoreFn,
|
||||||
|
int batchSize
|
||||||
|
) throws SQLException {
|
||||||
|
|
||||||
|
// Use a transaction for speed + consistency
|
||||||
|
final boolean prevAutoCommit = conn.getAutoCommit();
|
||||||
|
conn.setAutoCommit(false);
|
||||||
|
|
||||||
|
// Server-side cursor behavior in pgjdbc requires autoCommit=false + fetchSize>0
|
||||||
|
final String selectSql =
|
||||||
|
"SELECT id, puzzle_norm " +
|
||||||
|
"FROM hints " +
|
||||||
|
"WHERE puzzle_norm IS NOT NULL"; // optionally add: " AND cross_score IS NULL"
|
||||||
|
|
||||||
|
final String updateSql =
|
||||||
|
"UPDATE hints SET cross_score = ? WHERE id = ?";
|
||||||
|
|
||||||
|
try (PreparedStatement psSel = conn.prepareStatement(selectSql);
|
||||||
|
PreparedStatement psUpd = conn.prepareStatement(updateSql)) {
|
||||||
|
|
||||||
|
psSel.setFetchSize(batchSize);
|
||||||
|
|
||||||
|
int pending = 0;
|
||||||
|
|
||||||
|
try (ResultSet rs = psSel.executeQuery()) {
|
||||||
|
while (rs.next()) {
|
||||||
|
long id = rs.getLong("id");
|
||||||
|
String word = rs.getString("puzzle_norm");
|
||||||
|
|
||||||
|
int score;
|
||||||
|
try {
|
||||||
|
score = scoreFn.applyAsInt(word);
|
||||||
|
} catch (RuntimeException ex) {
|
||||||
|
// If scoring fails, decide your policy: skip or set 0.
|
||||||
|
// Here: skip row.
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
psUpd.setInt(1, score);
|
||||||
|
psUpd.setLong(2, id);
|
||||||
|
psUpd.addBatch();
|
||||||
|
pending++;
|
||||||
|
|
||||||
|
if (pending >= batchSize) {
|
||||||
|
psUpd.executeBatch();
|
||||||
|
conn.commit();
|
||||||
|
pending = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (pending > 0) {
|
||||||
|
psUpd.executeBatch();
|
||||||
|
conn.commit();
|
||||||
|
}
|
||||||
|
|
||||||
|
} catch (SQLException e) {
|
||||||
|
conn.rollback();
|
||||||
|
throw e;
|
||||||
|
} finally {
|
||||||
|
conn.setAutoCommit(prevAutoCommit);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Example scoring callback
|
||||||
|
public static int exampleScore(String word) {
|
||||||
|
return ThemePoolBuilderLength.crossabilityScore(word);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
@@ -14,7 +14,6 @@ import java.util.concurrent.*;
|
|||||||
|
|
||||||
import static puzzle.SwedishGenerator.fillMask;
|
import static puzzle.SwedishGenerator.fillMask;
|
||||||
import static puzzle.SwedishGenerator.generateMask;
|
import static puzzle.SwedishGenerator.generateMask;
|
||||||
import static puzzle.SwedishGenerator.loadScores;
|
|
||||||
import static puzzle.SwedishGenerator.loadWords;
|
import static puzzle.SwedishGenerator.loadWords;
|
||||||
|
|
||||||
public class Main {
|
public class Main {
|
||||||
@@ -238,10 +237,9 @@ public class Main {
|
|||||||
|
|
||||||
// Package-private method for testing
|
// Package-private method for testing
|
||||||
PuzzleResult generatePuzzle(Opts opts) {
|
PuzzleResult generatePuzzle(Opts opts) {
|
||||||
var llmScores = loadScores();
|
|
||||||
|
|
||||||
var tLoad0 = System.nanoTime();
|
var tLoad0 = System.nanoTime();
|
||||||
var dict = loadWords(opts.wordsPath, llmScores);
|
var dict = loadWords(opts.wordsPath);
|
||||||
var tLoad1 = System.nanoTime();
|
var tLoad1 = System.nanoTime();
|
||||||
|
|
||||||
section("Load");
|
section("Load");
|
||||||
@@ -260,7 +258,7 @@ public class Main {
|
|||||||
tasks.add(() -> {
|
tasks.add(() -> {
|
||||||
var threadRng = new Rng(opts.seed + attempt);
|
var threadRng = new Rng(opts.seed + attempt);
|
||||||
var mask = generateMask(threadRng, dict.lenCounts(), opts.pop, opts.gens, false);
|
var mask = generateMask(threadRng, dict.lenCounts(), opts.pop, opts.gens, false);
|
||||||
var filled = fillMask(threadRng, mask, dict.index(), llmScores, 200, 60000, false);
|
var filled = fillMask(threadRng, mask, dict.index(), dict.words(), 200, 30000, false);
|
||||||
|
|
||||||
if (filled.ok && (opts.minSimplicity <= 0 || filled.simplicity >= opts.minSimplicity)) {
|
if (filled.ok && (opts.minSimplicity <= 0 || filled.simplicity >= opts.minSimplicity)) {
|
||||||
info("status : SOLVED");
|
info("status : SOLVED");
|
||||||
@@ -290,7 +288,7 @@ public class Main {
|
|||||||
info("try : " + attempt + "/" + opts.tries);
|
info("try : " + attempt + "/" + opts.tries);
|
||||||
|
|
||||||
var mask = generateMask(rng, dict.lenCounts(), opts.pop, opts.gens, true);
|
var mask = generateMask(rng, dict.lenCounts(), opts.pop, opts.gens, true);
|
||||||
var filled = fillMask(rng, mask, dict.index(), llmScores, 200, 60000, true);
|
var filled = fillMask(rng, mask, dict.index(), dict.words(), 200, 30000, true);
|
||||||
|
|
||||||
if (filled.ok && (opts.minSimplicity <= 0 || filled.simplicity >= opts.minSimplicity)) {
|
if (filled.ok && (opts.minSimplicity <= 0 || filled.simplicity >= opts.minSimplicity)) {
|
||||||
info("status : SOLVED");
|
info("status : SOLVED");
|
||||||
@@ -20,7 +20,7 @@ public class SwedishGenerator {
|
|||||||
|
|
||||||
static final int W = 9, H = 8,
|
static final int W = 9, H = 8,
|
||||||
CLUE_SIZE = 4,
|
CLUE_SIZE = 4,
|
||||||
SIMPLICITY_DEFAULT_SCORE = 5;
|
SIMPLICITY_DEFAULT_SCORE = 2;
|
||||||
static final int MIN_LEN = 2, MAX_LEN = 8;
|
static final int MIN_LEN = 2, MAX_LEN = 8;
|
||||||
// Directions for '1'..'6'
|
// Directions for '1'..'6'
|
||||||
static final int[][] OFFSETS = new int[7][2];
|
static final int[][] OFFSETS = new int[7][2];
|
||||||
@@ -144,11 +144,16 @@ public class SwedishGenerator {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static record WordDifficulty(String word, int difficulty, int score) {
|
static record WordDifficulty(String word, int difficulty, int simpel, int score, int cross) {
|
||||||
|
|
||||||
public WordDifficulty(String word, int score) {
|
public WordDifficulty(String word, int simpel, int score) {
|
||||||
var difficulty1 = 0 + ((8 - word.length()) * 30) + ((10 - score) * 15);
|
var difficulty1 = 0 + ((8 - word.length()) * 30) + ((10 - score) * 15);
|
||||||
this(word, difficulty1, score);
|
var crossScore = ThemePoolBuilderLength.crossabilityScore(word);
|
||||||
|
this(word, difficulty1, simpel, score, (crossScore * 7) + ((score) * 30) + ((word.length()) * 15));
|
||||||
|
|
||||||
|
// Prioritize simple words (high lScore) and long words.
|
||||||
|
// lScore (1-10) adds up to 1000 points (weight 100).
|
||||||
|
// Length (2-8) adds up to 160 points (weight 20).
|
||||||
// We want LONGER and SIMPLER words to be tried earlier (lower difficulty value).
|
// We want LONGER and SIMPLER words to be tried earlier (lower difficulty value).
|
||||||
// word.length() is 2 to 8.
|
// word.length() is 2 to 8.
|
||||||
// score is 1 to 10.
|
// score is 1 to 10.
|
||||||
@@ -158,38 +163,11 @@ public class SwedishGenerator {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static Map<String, Integer> loadScores() {
|
|
||||||
var scores = new HashMap<String, Integer>();
|
|
||||||
try {
|
|
||||||
var scoresPath = System.getenv("SCORES_PATH");
|
|
||||||
if (scoresPath == null || scoresPath.isBlank()) scoresPath = "export_real_words_with_hints.csv";
|
|
||||||
var lines = Files.readAllLines(Path.of(scoresPath), StandardCharsets.UTF_8);
|
|
||||||
var first = true;
|
|
||||||
for (var line : lines) {
|
|
||||||
if (first) {
|
|
||||||
first = false;
|
|
||||||
if (line.startsWith("WOORD")) continue;
|
|
||||||
}
|
|
||||||
var parts = line.split(",", 3);
|
|
||||||
if (parts.length >= 2) {
|
|
||||||
try {
|
|
||||||
var word = parts[0].trim().toUpperCase(Locale.ROOT);
|
|
||||||
var score = 10 - Integer.parseInt(parts[1].trim());
|
|
||||||
scores.put(word, score);
|
|
||||||
} catch (NumberFormatException ignored) {
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} catch (IOException e) {
|
|
||||||
System.err.println("Warning: word_scores.csv not found, using default scores.");
|
|
||||||
}
|
|
||||||
return scores;
|
|
||||||
}
|
|
||||||
|
|
||||||
public static record Dict(Map<String, WordDifficulty> words,
|
public static record Dict(Map<String, WordDifficulty> words,
|
||||||
HashMap<Integer, DictEntry> index,
|
HashMap<Integer, DictEntry> index,
|
||||||
HashMap<Integer, Integer> lenCounts) { }
|
HashMap<Integer, Integer> lenCounts) { }
|
||||||
static Dict loadWords(String wordsPath, Map<String, Integer> llmScores) {
|
static Dict loadWords(String wordsPath) {
|
||||||
String raw;
|
String raw;
|
||||||
try {
|
try {
|
||||||
raw = Files.readString(Path.of(wordsPath), StandardCharsets.UTF_8);
|
raw = Files.readString(Path.of(wordsPath), StandardCharsets.UTF_8);
|
||||||
@@ -201,7 +179,7 @@ public class SwedishGenerator {
|
|||||||
boolean first = true;
|
boolean first = true;
|
||||||
for (var line : raw.split("\\R")) {
|
for (var line : raw.split("\\R")) {
|
||||||
if (line.isBlank()) continue;
|
if (line.isBlank()) continue;
|
||||||
var parts = line.split(",", 3);
|
var parts = line.split(",", 4);
|
||||||
var word = parts[0].trim();
|
var word = parts[0].trim();
|
||||||
if (first && word.equalsIgnoreCase("WOORD")) {
|
if (first && word.equalsIgnoreCase("WOORD")) {
|
||||||
first = false;
|
first = false;
|
||||||
@@ -211,24 +189,17 @@ public class SwedishGenerator {
|
|||||||
var s = word.toUpperCase(Locale.ROOT);
|
var s = word.toUpperCase(Locale.ROOT);
|
||||||
if (s.matches("^[A-Z]{2,8}$")) {
|
if (s.matches("^[A-Z]{2,8}$")) {
|
||||||
int score = SIMPLICITY_DEFAULT_SCORE;
|
int score = SIMPLICITY_DEFAULT_SCORE;
|
||||||
if (parts.length >= 2) {
|
int simpel = 0;
|
||||||
try {
|
|
||||||
// CSV has level 1-10. llmScores use 10-level.
|
// CSV has level 1-10. llmScores use 10-level.
|
||||||
score = 10 - Integer.parseInt(parts[1].trim());
|
score = 10 - Integer.parseInt(parts[1].trim());
|
||||||
} catch (NumberFormatException e) {
|
simpel = Integer.parseInt(parts[2].trim());
|
||||||
score = llmScores.getOrDefault(s, SIMPLICITY_DEFAULT_SCORE);
|
if (score >= 1)
|
||||||
System.err.println("Warning: " + word + " csv not found, using default scores.");
|
map.put(s, new WordDifficulty(s, simpel, score));
|
||||||
}
|
|
||||||
} else {
|
|
||||||
score = llmScores.getOrDefault(s, SIMPLICITY_DEFAULT_SCORE);
|
|
||||||
System.err.println("Warning: " + word + " csv not found, using default scores.");
|
|
||||||
}
|
|
||||||
map.put(s, new WordDifficulty(s, score));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
var words = map.values().stream().collect(Collectors.toCollection(ArrayList::new));
|
var words = map.values().stream().collect(Collectors.toCollection(ArrayList::new));
|
||||||
// Sort words by difficulty in ascending order
|
// Sort words by difficulty in ascending order
|
||||||
words.sort(Comparator.comparingInt(wd -> wd.difficulty));
|
words.sort(Comparator.comparingInt(wd -> wd.simpel));
|
||||||
|
|
||||||
var index = new HashMap<Integer, DictEntry>();
|
var index = new HashMap<Integer, DictEntry>();
|
||||||
var lenCounts = new HashMap<Integer, Integer>();
|
var lenCounts = new HashMap<Integer, Integer>();
|
||||||
@@ -301,12 +272,6 @@ public class SwedishGenerator {
|
|||||||
|
|
||||||
return new CandidateInfo(cur, curLen);
|
return new CandidateInfo(cur, curLen);
|
||||||
}
|
}
|
||||||
static int indexToDifficulty(DictEntry entry, int index, Map<String, Integer> llmScores) {
|
|
||||||
var word = entry.words.get(index);
|
|
||||||
var score = llmScores.getOrDefault(word, SIMPLICITY_DEFAULT_SCORE);
|
|
||||||
return new WordDifficulty(word, score).difficulty;
|
|
||||||
}
|
|
||||||
|
|
||||||
// ---------------- Slots ----------------
|
// ---------------- Slots ----------------
|
||||||
|
|
||||||
static record Slot(int clueR, int clueC, char dir, int[] rs, int[] cs, int len) {
|
static record Slot(int clueR, int clueC, char dir, int[] rs, int[] cs, int len) {
|
||||||
@@ -675,7 +640,7 @@ public class SwedishGenerator {
|
|||||||
}
|
}
|
||||||
|
|
||||||
static FillResult fillMask(Rng rng, char[][] mask, HashMap<Integer, DictEntry> dictIndex,
|
static FillResult fillMask(Rng rng, char[][] mask, HashMap<Integer, DictEntry> dictIndex,
|
||||||
Map<String, Integer> llmScores,
|
Map<String, WordDifficulty> llmScores,
|
||||||
int logEveryMs, int timeLimitMs, boolean verbose) {
|
int logEveryMs, int timeLimitMs, boolean verbose) {
|
||||||
|
|
||||||
var grid = deepCopyGrid(mask);
|
var grid = deepCopyGrid(mask);
|
||||||
@@ -876,7 +841,7 @@ public class SwedishGenerator {
|
|||||||
if (ok) {
|
if (ok) {
|
||||||
double totalSimplicity = 0;
|
double totalSimplicity = 0;
|
||||||
for (var w : assigned.values()) {
|
for (var w : assigned.values()) {
|
||||||
totalSimplicity += llmScores.getOrDefault(w, 5);
|
totalSimplicity += llmScores.get(w).difficulty;
|
||||||
}
|
}
|
||||||
res.simplicity = assigned.isEmpty() ? 0 : totalSimplicity / assigned.size();
|
res.simplicity = assigned.isEmpty() ? 0 : totalSimplicity / assigned.size();
|
||||||
}
|
}
|
||||||
@@ -903,9 +868,8 @@ public class SwedishGenerator {
|
|||||||
public record PuzzleResult(Dict dict, char[][] mask, FillResult filled) { }
|
public record PuzzleResult(Dict dict, char[][] mask, FillResult filled) { }
|
||||||
|
|
||||||
public static PuzzleResult generatePuzzle(Main.Opts opts) {
|
public static PuzzleResult generatePuzzle(Main.Opts opts) {
|
||||||
var llmScores = loadScores();
|
|
||||||
var tLoad0 = System.nanoTime();
|
var tLoad0 = System.nanoTime();
|
||||||
var dict = loadWords(opts.wordsPath, llmScores);
|
var dict = loadWords(opts.wordsPath);
|
||||||
var tLoad1 = System.nanoTime();
|
var tLoad1 = System.nanoTime();
|
||||||
System.out.printf(Locale.ROOT, "LOAD_WORDS: %.3fs%n %s words%n", (tLoad1 - tLoad0) / 1e9, dict.words.size());
|
System.out.printf(Locale.ROOT, "LOAD_WORDS: %.3fs%n %s words%n", (tLoad1 - tLoad0) / 1e9, dict.words.size());
|
||||||
|
|
||||||
@@ -919,7 +883,7 @@ public class SwedishGenerator {
|
|||||||
tasks.add(() -> {
|
tasks.add(() -> {
|
||||||
var threadRng = new Rng(opts.seed + attempt);
|
var threadRng = new Rng(opts.seed + attempt);
|
||||||
var mask = generateMask(threadRng, dict.lenCounts, opts.pop, opts.gens, false);
|
var mask = generateMask(threadRng, dict.lenCounts, opts.pop, opts.gens, false);
|
||||||
var filled = fillMask(threadRng, mask, dict.index, llmScores, 200, 60000, false);
|
var filled = fillMask(threadRng, mask, dict.index, dict.words, 200, 60000, false);
|
||||||
|
|
||||||
if (filled.ok && (opts.minSimplicity <= 0 || filled.simplicity >= opts.minSimplicity)) {
|
if (filled.ok && (opts.minSimplicity <= 0 || filled.simplicity >= opts.minSimplicity)) {
|
||||||
System.out.println("\nSolution found on attempt " + attempt);
|
System.out.println("\nSolution found on attempt " + attempt);
|
||||||
@@ -948,7 +912,7 @@ public class SwedishGenerator {
|
|||||||
System.out.printf(Locale.ROOT, "MASK: %.3fs%n", (tMask1 - tMask0) / 1e9);
|
System.out.printf(Locale.ROOT, "MASK: %.3fs%n", (tMask1 - tMask0) / 1e9);
|
||||||
|
|
||||||
var tFill0 = System.nanoTime();
|
var tFill0 = System.nanoTime();
|
||||||
var filled = fillMask(rng, mask, dict.index, llmScores, 200, 60000, true);
|
var filled = fillMask(rng, mask, dict.index, dict.words, 200, 60000, true);
|
||||||
var tFill1 = System.nanoTime();
|
var tFill1 = System.nanoTime();
|
||||||
System.out.printf(Locale.ROOT, "FILL: %.3fms | Simplicity: %.2f%n", (tFill1 - tFill0) / 1e6, filled.simplicity);
|
System.out.printf(Locale.ROOT, "FILL: %.3fms | Simplicity: %.2f%n", (tFill1 - tFill0) / 1e6, filled.simplicity);
|
||||||
|
|
||||||
@@ -1,19 +1,13 @@
|
|||||||
package puzzle;
|
package puzzle;
|
||||||
|
|
||||||
import org.w3c.dom.*;
|
import org.w3c.dom.*;
|
||||||
import javax.net.ssl.*;
|
|
||||||
import javax.xml.parsers.DocumentBuilderFactory;
|
import javax.xml.parsers.DocumentBuilderFactory;
|
||||||
|
|
||||||
import java.io.*;
|
import java.io.*;
|
||||||
import java.net.http.*;
|
|
||||||
import java.nio.charset.StandardCharsets;
|
import java.nio.charset.StandardCharsets;
|
||||||
import java.nio.file.*;
|
import java.nio.file.*;
|
||||||
import java.security.SecureRandom;
|
|
||||||
import java.security.cert.X509Certificate;
|
|
||||||
import java.sql.Connection;
|
import java.sql.Connection;
|
||||||
import java.sql.DriverManager;
|
import java.sql.DriverManager;
|
||||||
import java.sql.PreparedStatement;
|
|
||||||
import java.sql.ResultSet;
|
|
||||||
import java.sql.SQLException;
|
import java.sql.SQLException;
|
||||||
import java.text.Normalizer;
|
import java.text.Normalizer;
|
||||||
import java.time.LocalDate;
|
import java.time.LocalDate;
|
||||||
Reference in New Issue
Block a user