diff --git a/src/puzzle/ConcurrentWordScorer.java b/src/puzzle/ConcurrentWordScorer.java index f9accfa..d059705 100644 --- a/src/puzzle/ConcurrentWordScorer.java +++ b/src/puzzle/ConcurrentWordScorer.java @@ -189,7 +189,6 @@ public class ConcurrentWordScorer { writerThread.join(); // Update hints in the database - updateHintsInDatabase(); System.out.println("\n✓ All endpoints finished!"); } diff --git a/src/puzzle/HintJob.java b/src/puzzle/HintJob.java new file mode 100644 index 0000000..cbd6ddd --- /dev/null +++ b/src/puzzle/HintJob.java @@ -0,0 +1,230 @@ +package puzzle; + +import java.io.*; +import java.nio.charset.StandardCharsets; +import java.sql.*; + +public class HintJob { + + static final String JDBC_URL = "jdbc:postgresql://192.168.1.159:5432/postgres"; + static final String JDBC_USER = "puzzle"; + static final String JDBC_PASS = "heel-goed-wachtwoord"; + static final String OLLAMA_URL = "http://192.168.1.159:8081/v1/chat/completions"; + static final String MODEL = "/models/Hadiseh-Mhd/Mixtral-8x7B-Instruct-v0.1-Q4_K_M-GGUF/mixtral-8x7b-instruct-v0.1.Q4_K_M.gguf"; + + public static void main(String[] args) throws Exception { + Class.forName("org.postgresql.Driver"); + var limit = args.length > 0 ? Integer.parseInt(args[0]) : 3000; + + try (var c = DriverManager.getConnection(JDBC_URL, JDBC_USER, JDBC_PASS)) { + c.setAutoCommit(false); + + try (var sel = c.prepareStatement( + "select ctid::text, woord, hint, hint_score " + + "from export_real_words_with_hints " + + "order by (hint is null or hint = '') desc, updated_at nulls first " + + "limit ? for update skip locked"); + var upd = c.prepareStatement( + "update export_real_words_with_hints set hint = ?, hint_score = ?, guessed_word = ?, suggested_hint = ?, updated_at = now() where ctid::text = ?")) { + + sel.setInt(1, limit); + + var done = 0; + try (var rs = sel.executeQuery()) { + while (rs.next()) { + if (done % 10 == 0) System.out.println("Committed " + done); + var ctid = rs.getString(1); + var woord = rs.getString(2); + var oldHint = rs.getString(3); + var oldScore = rs.getInt(4); + if (rs.wasNull()) oldScore = -1; + + var newHint = generateHint(woord); + if (newHint == null || newHint.isBlank()) continue; + newHint = sanitizeHint(newHint); + + var scoreRes = scoreHint(newHint, woord); + var newScore = scoreRes.score; + + // De gebruiker wil voornamelijk hints toevoegen aan records die er geen hebben. + // En de originele hint behouden omdat de LLM resultaten soms tegenvallen. + if (oldHint != null && !oldHint.isBlank()) { + // Er is al een hint. We genereren nog steeds een suggestie, + // maar we overschrijven de originele 'hint' kolom NIET. + var updSug = c.prepareStatement( + "update export_real_words_with_hints set suggested_hint = ?, hint_score = ?, guessed_word = ?, updated_at = now() where ctid::text = ?"); + updSug.setString(1, newHint); + updSug.setInt(2, newScore); + updSug.setString(3, scoreRes.guessedWord); + updSug.setString(4, ctid); + updSug.executeUpdate(); + c.commit(); + done++; + continue; + } + + // Geen bestaande hint, dus we vullen hem nu in + upd.setString(1, newHint); + upd.setInt(2, newScore); + upd.setString(3, scoreRes.guessedWord); + upd.setString(4, newHint); + upd.setString(5, ctid); + upd.executeUpdate(); + c.commit(); + + done++; + } + } + + System.out.println("Done. Updated " + done + " rows."); + } + } + } + + static String sanitizeHint(String hint) { + if (hint == null) return null; + hint = hint.trim(); + if (hint.contains("\n")) { + var lines = hint.split("\n"); + hint = lines[lines.length - 1].trim(); + } + return hint; + } + + record ScoreResult(String guessedWord, int score) { + } + + static ScoreResult scoreHint(String hint, String woord) throws Exception { + var prompt = + "Ik geef je een kruiswoordpuzzel hint en het aantal letters van het woord. " + + "Welk Nederlands woord van " + woord.length() + " letters wordt hier gezocht?\n" + + "Hint: " + hint + "\n" + + "Antwoord met alleen het woord, geen uitleg."; + + var payload = "{" + + "\"model\":\"" + jsonEscape(MODEL) + "\"," + + "\"messages\":[{\"role\":\"user\",\"content\":\"" + jsonEscape(prompt) + "\"}]," + + "\"stream\":false," + + "\"max_tokens\":3," + + "\"temperature\":0.0" + + "}"; + + var p = new ProcessBuilder( + "curl", "-sS", + "-H", "Content-Type: application/json", + "-X", "POST", OLLAMA_URL, + "-d", payload + ).redirectErrorStream(true).start(); + + String out; + try (var in = p.getInputStream()) { + out = new String(in.readAllBytes(), StandardCharsets.UTF_8); + } + p.waitFor(); + + var guessed = jsonGetString(out, "content"); + if (guessed != null) guessed = guessed.trim().toUpperCase().replaceAll("[^A-Z]", ""); + else guessed = ""; + + var original = woord.toUpperCase().replaceAll("[^A-Z]", ""); + var dist = levenshtein(guessed, original); + + // Score: we willen een hoge score voor een goede hint. + // Als de afstand 0 is (exact geraden), is de score maximaal. + // Score = max(0, 100 - dist * 10) - een simpele lineaire schaling + var score = Math.max(0, 100 - (dist * 10)); + + return new ScoreResult(guessed, score); + } + + static int levenshtein(String s1, String s2) { + var dp = new int[s1.length() + 1][s2.length() + 1]; + for (var i = 0; i <= s1.length(); i++) dp[i][0] = i; + for (var j = 0; j <= s2.length(); j++) dp[0][j] = j; + for (var i = 1; i <= s1.length(); i++) + for (var j = 1; j <= s2.length(); j++) { + var cost = (s1.charAt(i - 1) == s2.charAt(j - 1)) ? 0 : 1; + dp[i][j] = Math.min(Math.min(dp[i - 1][j] + 1, dp[i][j - 1] + 1), dp[i - 1][j - 1] + cost); + } + return dp[s1.length()][s2.length()]; + } + + static String generateHint(String woord) throws Exception { + var prompt = + "Geef een korte, duidelijke kruiswoordpuzzel hint in het Nederlands voor het woord: " + woord + ". " + + "Antwoord alleen met de hint. Geen inleiding, geen uitleg, geen aantal letters, geen aanhalingstekens."; + + var payload = "{" + + "\"model\":\"" + jsonEscape(MODEL) + "\"," + + "\"messages\":[{\"role\":\"user\",\"content\":\"" + jsonEscape(prompt) + "\"}]," + + "\"stream\":false" + + "}"; + + var p = new ProcessBuilder( + "curl", "-sS", + "-H", "Content-Type: application/json", + "-X", "POST", OLLAMA_URL, + "-d", payload + ).redirectErrorStream(true).start(); + + String out; + try (var in = p.getInputStream()) { + out = new String(in.readAllBytes(), StandardCharsets.UTF_8); + } + var code = p.waitFor(); + if (code != 0) { + System.err.println("curl failed (" + code + "): " + out); + return null; + } + return jsonGetString(out, "content"); // Extract content from {"choices":[{"message":{"content":"..."}}...]} + } + + static String jsonEscape(String s) { + var b = new StringBuilder(s.length() + 16); + for (var i = 0; i < s.length(); i++) { + var ch = s.charAt(i); + switch (ch) { + case '\\' -> b.append("\\\\"); + case '"' -> b.append("\\\""); + case '\n' -> b.append("\\n"); + case '\r' -> b.append("\\r"); + case '\t' -> b.append("\\t"); + default -> b.append(ch); + } + } + return b.toString(); + } + + // minimal JSON string extractor for {"key":"value"} with escapes + static String jsonGetString(String json, String key) { + var needle = "\"" + key + "\""; + var i = json.indexOf(needle); + if (i < 0) return null; + i = json.indexOf(':', i + needle.length()); + if (i < 0) return null; + i++; + while (i < json.length() && Character.isWhitespace(json.charAt(i))) i++; + if (i >= json.length() || json.charAt(i) != '"') return null; + i++; // after opening quote + + var b = new StringBuilder(); + var esc = false; + for (; i < json.length(); i++) { + var ch = json.charAt(i); + if (esc) { + switch (ch) { + case 'n' -> b.append('\n'); + case 'r' -> b.append('\r'); + case 't' -> b.append('\t'); + case '"' -> b.append('"'); + case '\\' -> b.append('\\'); + default -> b.append(ch); + } + esc = false; + } else if (ch == '\\') esc = true; + else if (ch == '"') return b.toString(); + else b.append(ch); + } + return null; + } +}