diff --git a/src/puzzle/ClueGenerator.java b/src/main/java/puzzle/ClueGenerator.java similarity index 98% rename from src/puzzle/ClueGenerator.java rename to src/main/java/puzzle/ClueGenerator.java index d04a4a2..effa935 100644 --- a/src/puzzle/ClueGenerator.java +++ b/src/main/java/puzzle/ClueGenerator.java @@ -20,10 +20,10 @@ public class ClueGenerator { try { var lines = Files.readAllLines(Path.of(HINTS_FILE), StandardCharsets.UTF_8); for (var line : lines) { - var parts = line.split(",", 3); - if (parts.length >= 3) { + var parts = line.split(",", 4); + if (parts.length >= 4) { var word = parts[0].trim().toUpperCase(Locale.ROOT); - var rawClue = parts[2].trim(); + var rawClue = parts[3].trim(); if (rawClue.startsWith("\"") && rawClue.endsWith("\"")) { rawClue = rawClue.substring(1, rawClue.length() - 1).replace("\"\"", "\""); } diff --git a/src/puzzle/ConcurrentWordScorer.java b/src/main/java/puzzle/ConcurrentWordScorer.java similarity index 100% rename from src/puzzle/ConcurrentWordScorer.java rename to src/main/java/puzzle/ConcurrentWordScorer.java diff --git a/src/puzzle/ExportFormat.java b/src/main/java/puzzle/ExportFormat.java similarity index 99% rename from src/puzzle/ExportFormat.java rename to src/main/java/puzzle/ExportFormat.java index 220c22b..8c71f93 100644 --- a/src/puzzle/ExportFormat.java +++ b/src/main/java/puzzle/ExportFormat.java @@ -111,7 +111,7 @@ public final class ExportFormat { p.arrowRow - minR, p.arrowCol - minC, p.isReversed, - puz.dict().words().get(p.word).difficulty() + puz.dict().words().get(p.word).cross() )); } diff --git a/src/main/java/puzzle/HintScores.java b/src/main/java/puzzle/HintScores.java new file mode 100644 index 0000000..09d2710 --- /dev/null +++ b/src/main/java/puzzle/HintScores.java @@ -0,0 +1,92 @@ +package puzzle; + +import java.sql.*; +import java.util.function.ToIntFunction; + +public final class HintScores { + + public static void main(String[] args) throws Exception { + Class.forName("org.sqlite.JDBC"); + try (Connection conn = DriverManager.getConnection("jdbc:sqlite:/home/mike/dev/puzzle-generator/tools/hint/hint.sqlite")) { + updateCrossScores(conn, HintScores::exampleScore, 1000); + } + } + /** + * Updates hints.cross_score by computing a score from hints.word. + * + * @param conn open JDBC connection (PostgreSQL) + * @param scoreFn callback: scoreFn.applyAsInt(word) + * @param batchSize e.g. 1000 + */ + public static void updateCrossScores( + Connection conn, + ToIntFunction scoreFn, + int batchSize + ) throws SQLException { + + // Use a transaction for speed + consistency + final boolean prevAutoCommit = conn.getAutoCommit(); + conn.setAutoCommit(false); + + // Server-side cursor behavior in pgjdbc requires autoCommit=false + fetchSize>0 + final String selectSql = + "SELECT id, puzzle_norm " + + "FROM hints " + + "WHERE puzzle_norm IS NOT NULL"; // optionally add: " AND cross_score IS NULL" + + final String updateSql = + "UPDATE hints SET cross_score = ? WHERE id = ?"; + + try (PreparedStatement psSel = conn.prepareStatement(selectSql); + PreparedStatement psUpd = conn.prepareStatement(updateSql)) { + + psSel.setFetchSize(batchSize); + + int pending = 0; + + try (ResultSet rs = psSel.executeQuery()) { + while (rs.next()) { + long id = rs.getLong("id"); + String word = rs.getString("puzzle_norm"); + + int score; + try { + score = scoreFn.applyAsInt(word); + } catch (RuntimeException ex) { + // If scoring fails, decide your policy: skip or set 0. + // Here: skip row. + continue; + } + + psUpd.setInt(1, score); + psUpd.setLong(2, id); + psUpd.addBatch(); + pending++; + + if (pending >= batchSize) { + psUpd.executeBatch(); + conn.commit(); + pending = 0; + } + } + } + + if (pending > 0) { + psUpd.executeBatch(); + conn.commit(); + } + + } catch (SQLException e) { + conn.rollback(); + throw e; + } finally { + conn.setAutoCommit(prevAutoCommit); + } + } + + // Example scoring callback + public static int exampleScore(String word) { + return ThemePoolBuilderLength.crossabilityScore(word); + } + +} diff --git a/src/puzzle/Main.java b/src/main/java/puzzle/Main.java similarity index 98% rename from src/puzzle/Main.java rename to src/main/java/puzzle/Main.java index ce9bad5..f0993b9 100644 --- a/src/puzzle/Main.java +++ b/src/main/java/puzzle/Main.java @@ -14,7 +14,6 @@ import java.util.concurrent.*; import static puzzle.SwedishGenerator.fillMask; import static puzzle.SwedishGenerator.generateMask; -import static puzzle.SwedishGenerator.loadScores; import static puzzle.SwedishGenerator.loadWords; public class Main { @@ -238,10 +237,9 @@ public class Main { // Package-private method for testing PuzzleResult generatePuzzle(Opts opts) { - var llmScores = loadScores(); var tLoad0 = System.nanoTime(); - var dict = loadWords(opts.wordsPath, llmScores); + var dict = loadWords(opts.wordsPath); var tLoad1 = System.nanoTime(); section("Load"); @@ -260,7 +258,7 @@ public class Main { tasks.add(() -> { var threadRng = new Rng(opts.seed + attempt); var mask = generateMask(threadRng, dict.lenCounts(), opts.pop, opts.gens, false); - var filled = fillMask(threadRng, mask, dict.index(), llmScores, 200, 60000, false); + var filled = fillMask(threadRng, mask, dict.index(), dict.words(), 200, 30000, false); if (filled.ok && (opts.minSimplicity <= 0 || filled.simplicity >= opts.minSimplicity)) { info("status : SOLVED"); @@ -290,7 +288,7 @@ public class Main { info("try : " + attempt + "/" + opts.tries); var mask = generateMask(rng, dict.lenCounts(), opts.pop, opts.gens, true); - var filled = fillMask(rng, mask, dict.index(), llmScores, 200, 60000, true); + var filled = fillMask(rng, mask, dict.index(), dict.words(), 200, 30000, true); if (filled.ok && (opts.minSimplicity <= 0 || filled.simplicity >= opts.minSimplicity)) { info("status : SOLVED"); diff --git a/src/puzzle/MainTest.java b/src/main/java/puzzle/MainTest.java similarity index 100% rename from src/puzzle/MainTest.java rename to src/main/java/puzzle/MainTest.java diff --git a/src/puzzle/SwedishGenerator.java b/src/main/java/puzzle/SwedishGenerator.java similarity index 91% rename from src/puzzle/SwedishGenerator.java rename to src/main/java/puzzle/SwedishGenerator.java index 2e1600b..e35b9b4 100644 --- a/src/puzzle/SwedishGenerator.java +++ b/src/main/java/puzzle/SwedishGenerator.java @@ -20,7 +20,7 @@ public class SwedishGenerator { static final int W = 9, H = 8, CLUE_SIZE = 4, - SIMPLICITY_DEFAULT_SCORE = 5; + SIMPLICITY_DEFAULT_SCORE = 2; static final int MIN_LEN = 2, MAX_LEN = 8; // Directions for '1'..'6' static final int[][] OFFSETS = new int[7][2]; @@ -144,11 +144,16 @@ public class SwedishGenerator { } } - static record WordDifficulty(String word, int difficulty, int score) { + static record WordDifficulty(String word, int difficulty, int simpel, int score, int cross) { - public WordDifficulty(String word, int score) { + public WordDifficulty(String word, int simpel, int score) { var difficulty1 = 0 + ((8 - word.length()) * 30) + ((10 - score) * 15); - this(word, difficulty1, score); + var crossScore = ThemePoolBuilderLength.crossabilityScore(word); + this(word, difficulty1, simpel, score, (crossScore * 7) + ((score) * 30) + ((word.length()) * 15)); + + // Prioritize simple words (high lScore) and long words. + // lScore (1-10) adds up to 1000 points (weight 100). + // Length (2-8) adds up to 160 points (weight 20). // We want LONGER and SIMPLER words to be tried earlier (lower difficulty value). // word.length() is 2 to 8. // score is 1 to 10. @@ -158,38 +163,11 @@ public class SwedishGenerator { } } - static Map loadScores() { - var scores = new HashMap(); - try { - var scoresPath = System.getenv("SCORES_PATH"); - if (scoresPath == null || scoresPath.isBlank()) scoresPath = "export_real_words_with_hints.csv"; - var lines = Files.readAllLines(Path.of(scoresPath), StandardCharsets.UTF_8); - var first = true; - for (var line : lines) { - if (first) { - first = false; - if (line.startsWith("WOORD")) continue; - } - var parts = line.split(",", 3); - if (parts.length >= 2) { - try { - var word = parts[0].trim().toUpperCase(Locale.ROOT); - var score = 10 - Integer.parseInt(parts[1].trim()); - scores.put(word, score); - } catch (NumberFormatException ignored) { - } - } - } - } catch (IOException e) { - System.err.println("Warning: word_scores.csv not found, using default scores."); - } - return scores; - } - + public static record Dict(Map words, HashMap index, HashMap lenCounts) { } - static Dict loadWords(String wordsPath, Map llmScores) { + static Dict loadWords(String wordsPath) { String raw; try { raw = Files.readString(Path.of(wordsPath), StandardCharsets.UTF_8); @@ -201,7 +179,7 @@ public class SwedishGenerator { boolean first = true; for (var line : raw.split("\\R")) { if (line.isBlank()) continue; - var parts = line.split(",", 3); + var parts = line.split(",", 4); var word = parts[0].trim(); if (first && word.equalsIgnoreCase("WOORD")) { first = false; @@ -210,25 +188,18 @@ public class SwedishGenerator { first = false; var s = word.toUpperCase(Locale.ROOT); if (s.matches("^[A-Z]{2,8}$")) { - int score = SIMPLICITY_DEFAULT_SCORE; - if (parts.length >= 2) { - try { - // CSV has level 1-10. llmScores use 10-level. - score = 10 - Integer.parseInt(parts[1].trim()); - } catch (NumberFormatException e) { - score = llmScores.getOrDefault(s, SIMPLICITY_DEFAULT_SCORE); - System.err.println("Warning: " + word + " csv not found, using default scores."); - } - } else { - score = llmScores.getOrDefault(s, SIMPLICITY_DEFAULT_SCORE); - System.err.println("Warning: " + word + " csv not found, using default scores."); - } - map.put(s, new WordDifficulty(s, score)); + int score = SIMPLICITY_DEFAULT_SCORE; + int simpel = 0; + // CSV has level 1-10. llmScores use 10-level. + score = 10 - Integer.parseInt(parts[1].trim()); + simpel = Integer.parseInt(parts[2].trim()); + if (score >= 1) + map.put(s, new WordDifficulty(s, simpel, score)); } } var words = map.values().stream().collect(Collectors.toCollection(ArrayList::new)); // Sort words by difficulty in ascending order - words.sort(Comparator.comparingInt(wd -> wd.difficulty)); + words.sort(Comparator.comparingInt(wd -> wd.simpel)); var index = new HashMap(); var lenCounts = new HashMap(); @@ -301,12 +272,6 @@ public class SwedishGenerator { return new CandidateInfo(cur, curLen); } - static int indexToDifficulty(DictEntry entry, int index, Map llmScores) { - var word = entry.words.get(index); - var score = llmScores.getOrDefault(word, SIMPLICITY_DEFAULT_SCORE); - return new WordDifficulty(word, score).difficulty; - } - // ---------------- Slots ---------------- static record Slot(int clueR, int clueC, char dir, int[] rs, int[] cs, int len) { @@ -675,7 +640,7 @@ public class SwedishGenerator { } static FillResult fillMask(Rng rng, char[][] mask, HashMap dictIndex, - Map llmScores, + Map llmScores, int logEveryMs, int timeLimitMs, boolean verbose) { var grid = deepCopyGrid(mask); @@ -876,7 +841,7 @@ public class SwedishGenerator { if (ok) { double totalSimplicity = 0; for (var w : assigned.values()) { - totalSimplicity += llmScores.getOrDefault(w, 5); + totalSimplicity += llmScores.get(w).difficulty; } res.simplicity = assigned.isEmpty() ? 0 : totalSimplicity / assigned.size(); } @@ -903,9 +868,8 @@ public class SwedishGenerator { public record PuzzleResult(Dict dict, char[][] mask, FillResult filled) { } public static PuzzleResult generatePuzzle(Main.Opts opts) { - var llmScores = loadScores(); var tLoad0 = System.nanoTime(); - var dict = loadWords(opts.wordsPath, llmScores); + var dict = loadWords(opts.wordsPath); var tLoad1 = System.nanoTime(); System.out.printf(Locale.ROOT, "LOAD_WORDS: %.3fs%n %s words%n", (tLoad1 - tLoad0) / 1e9, dict.words.size()); @@ -919,7 +883,7 @@ public class SwedishGenerator { tasks.add(() -> { var threadRng = new Rng(opts.seed + attempt); var mask = generateMask(threadRng, dict.lenCounts, opts.pop, opts.gens, false); - var filled = fillMask(threadRng, mask, dict.index, llmScores, 200, 60000, false); + var filled = fillMask(threadRng, mask, dict.index, dict.words, 200, 60000, false); if (filled.ok && (opts.minSimplicity <= 0 || filled.simplicity >= opts.minSimplicity)) { System.out.println("\nSolution found on attempt " + attempt); @@ -948,7 +912,7 @@ public class SwedishGenerator { System.out.printf(Locale.ROOT, "MASK: %.3fs%n", (tMask1 - tMask0) / 1e9); var tFill0 = System.nanoTime(); - var filled = fillMask(rng, mask, dict.index, llmScores, 200, 60000, true); + var filled = fillMask(rng, mask, dict.index, dict.words, 200, 60000, true); var tFill1 = System.nanoTime(); System.out.printf(Locale.ROOT, "FILL: %.3fms | Simplicity: %.2f%n", (tFill1 - tFill0) / 1e6, filled.simplicity); diff --git a/src/puzzle/TestSort.java b/src/main/java/puzzle/TestSort.java similarity index 100% rename from src/puzzle/TestSort.java rename to src/main/java/puzzle/TestSort.java diff --git a/src/puzzle/ThemePoolBuilderLength.java b/src/main/java/puzzle/ThemePoolBuilderLength.java similarity index 99% rename from src/puzzle/ThemePoolBuilderLength.java rename to src/main/java/puzzle/ThemePoolBuilderLength.java index e995e9d..295773e 100644 --- a/src/puzzle/ThemePoolBuilderLength.java +++ b/src/main/java/puzzle/ThemePoolBuilderLength.java @@ -1,19 +1,13 @@ package puzzle; import org.w3c.dom.*; -import javax.net.ssl.*; import javax.xml.parsers.DocumentBuilderFactory; import java.io.*; -import java.net.http.*; import java.nio.charset.StandardCharsets; import java.nio.file.*; -import java.security.SecureRandom; -import java.security.cert.X509Certificate; import java.sql.Connection; import java.sql.DriverManager; -import java.sql.PreparedStatement; -import java.sql.ResultSet; import java.sql.SQLException; import java.text.Normalizer; import java.time.LocalDate; diff --git a/src/puzzle/WordScore.java b/src/main/java/puzzle/WordScore.java similarity index 100% rename from src/puzzle/WordScore.java rename to src/main/java/puzzle/WordScore.java diff --git a/src/puzzle/postgresql-42.7.8.jar b/src/main/java/puzzle/postgresql-42.7.8.jar similarity index 100% rename from src/puzzle/postgresql-42.7.8.jar rename to src/main/java/puzzle/postgresql-42.7.8.jar