Gather data
This commit is contained in:
@@ -189,7 +189,6 @@ public class ConcurrentWordScorer {
|
|||||||
writerThread.join();
|
writerThread.join();
|
||||||
|
|
||||||
// Update hints in the database
|
// Update hints in the database
|
||||||
updateHintsInDatabase();
|
|
||||||
|
|
||||||
System.out.println("\n✓ All endpoints finished!");
|
System.out.println("\n✓ All endpoints finished!");
|
||||||
}
|
}
|
||||||
|
|||||||
230
src/puzzle/HintJob.java
Normal file
230
src/puzzle/HintJob.java
Normal file
@@ -0,0 +1,230 @@
|
|||||||
|
package puzzle;
|
||||||
|
|
||||||
|
import java.io.*;
|
||||||
|
import java.nio.charset.StandardCharsets;
|
||||||
|
import java.sql.*;
|
||||||
|
|
||||||
|
public class HintJob {
|
||||||
|
|
||||||
|
static final String JDBC_URL = "jdbc:postgresql://192.168.1.159:5432/postgres";
|
||||||
|
static final String JDBC_USER = "puzzle";
|
||||||
|
static final String JDBC_PASS = "heel-goed-wachtwoord";
|
||||||
|
static final String OLLAMA_URL = "http://192.168.1.159:8081/v1/chat/completions";
|
||||||
|
static final String MODEL = "/models/Hadiseh-Mhd/Mixtral-8x7B-Instruct-v0.1-Q4_K_M-GGUF/mixtral-8x7b-instruct-v0.1.Q4_K_M.gguf";
|
||||||
|
|
||||||
|
public static void main(String[] args) throws Exception {
|
||||||
|
Class.forName("org.postgresql.Driver");
|
||||||
|
var limit = args.length > 0 ? Integer.parseInt(args[0]) : 3000;
|
||||||
|
|
||||||
|
try (var c = DriverManager.getConnection(JDBC_URL, JDBC_USER, JDBC_PASS)) {
|
||||||
|
c.setAutoCommit(false);
|
||||||
|
|
||||||
|
try (var sel = c.prepareStatement(
|
||||||
|
"select ctid::text, woord, hint, hint_score " +
|
||||||
|
"from export_real_words_with_hints " +
|
||||||
|
"order by (hint is null or hint = '') desc, updated_at nulls first " +
|
||||||
|
"limit ? for update skip locked");
|
||||||
|
var upd = c.prepareStatement(
|
||||||
|
"update export_real_words_with_hints set hint = ?, hint_score = ?, guessed_word = ?, suggested_hint = ?, updated_at = now() where ctid::text = ?")) {
|
||||||
|
|
||||||
|
sel.setInt(1, limit);
|
||||||
|
|
||||||
|
var done = 0;
|
||||||
|
try (var rs = sel.executeQuery()) {
|
||||||
|
while (rs.next()) {
|
||||||
|
if (done % 10 == 0) System.out.println("Committed " + done);
|
||||||
|
var ctid = rs.getString(1);
|
||||||
|
var woord = rs.getString(2);
|
||||||
|
var oldHint = rs.getString(3);
|
||||||
|
var oldScore = rs.getInt(4);
|
||||||
|
if (rs.wasNull()) oldScore = -1;
|
||||||
|
|
||||||
|
var newHint = generateHint(woord);
|
||||||
|
if (newHint == null || newHint.isBlank()) continue;
|
||||||
|
newHint = sanitizeHint(newHint);
|
||||||
|
|
||||||
|
var scoreRes = scoreHint(newHint, woord);
|
||||||
|
var newScore = scoreRes.score;
|
||||||
|
|
||||||
|
// De gebruiker wil voornamelijk hints toevoegen aan records die er geen hebben.
|
||||||
|
// En de originele hint behouden omdat de LLM resultaten soms tegenvallen.
|
||||||
|
if (oldHint != null && !oldHint.isBlank()) {
|
||||||
|
// Er is al een hint. We genereren nog steeds een suggestie,
|
||||||
|
// maar we overschrijven de originele 'hint' kolom NIET.
|
||||||
|
var updSug = c.prepareStatement(
|
||||||
|
"update export_real_words_with_hints set suggested_hint = ?, hint_score = ?, guessed_word = ?, updated_at = now() where ctid::text = ?");
|
||||||
|
updSug.setString(1, newHint);
|
||||||
|
updSug.setInt(2, newScore);
|
||||||
|
updSug.setString(3, scoreRes.guessedWord);
|
||||||
|
updSug.setString(4, ctid);
|
||||||
|
updSug.executeUpdate();
|
||||||
|
c.commit();
|
||||||
|
done++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Geen bestaande hint, dus we vullen hem nu in
|
||||||
|
upd.setString(1, newHint);
|
||||||
|
upd.setInt(2, newScore);
|
||||||
|
upd.setString(3, scoreRes.guessedWord);
|
||||||
|
upd.setString(4, newHint);
|
||||||
|
upd.setString(5, ctid);
|
||||||
|
upd.executeUpdate();
|
||||||
|
c.commit();
|
||||||
|
|
||||||
|
done++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
System.out.println("Done. Updated " + done + " rows.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static String sanitizeHint(String hint) {
|
||||||
|
if (hint == null) return null;
|
||||||
|
hint = hint.trim();
|
||||||
|
if (hint.contains("\n")) {
|
||||||
|
var lines = hint.split("\n");
|
||||||
|
hint = lines[lines.length - 1].trim();
|
||||||
|
}
|
||||||
|
return hint;
|
||||||
|
}
|
||||||
|
|
||||||
|
record ScoreResult(String guessedWord, int score) {
|
||||||
|
}
|
||||||
|
|
||||||
|
static ScoreResult scoreHint(String hint, String woord) throws Exception {
|
||||||
|
var prompt =
|
||||||
|
"Ik geef je een kruiswoordpuzzel hint en het aantal letters van het woord. " +
|
||||||
|
"Welk Nederlands woord van " + woord.length() + " letters wordt hier gezocht?\n" +
|
||||||
|
"Hint: " + hint + "\n" +
|
||||||
|
"Antwoord met alleen het woord, geen uitleg.";
|
||||||
|
|
||||||
|
var payload = "{"
|
||||||
|
+ "\"model\":\"" + jsonEscape(MODEL) + "\","
|
||||||
|
+ "\"messages\":[{\"role\":\"user\",\"content\":\"" + jsonEscape(prompt) + "\"}],"
|
||||||
|
+ "\"stream\":false,"
|
||||||
|
+ "\"max_tokens\":3,"
|
||||||
|
+ "\"temperature\":0.0"
|
||||||
|
+ "}";
|
||||||
|
|
||||||
|
var p = new ProcessBuilder(
|
||||||
|
"curl", "-sS",
|
||||||
|
"-H", "Content-Type: application/json",
|
||||||
|
"-X", "POST", OLLAMA_URL,
|
||||||
|
"-d", payload
|
||||||
|
).redirectErrorStream(true).start();
|
||||||
|
|
||||||
|
String out;
|
||||||
|
try (var in = p.getInputStream()) {
|
||||||
|
out = new String(in.readAllBytes(), StandardCharsets.UTF_8);
|
||||||
|
}
|
||||||
|
p.waitFor();
|
||||||
|
|
||||||
|
var guessed = jsonGetString(out, "content");
|
||||||
|
if (guessed != null) guessed = guessed.trim().toUpperCase().replaceAll("[^A-Z]", "");
|
||||||
|
else guessed = "";
|
||||||
|
|
||||||
|
var original = woord.toUpperCase().replaceAll("[^A-Z]", "");
|
||||||
|
var dist = levenshtein(guessed, original);
|
||||||
|
|
||||||
|
// Score: we willen een hoge score voor een goede hint.
|
||||||
|
// Als de afstand 0 is (exact geraden), is de score maximaal.
|
||||||
|
// Score = max(0, 100 - dist * 10) - een simpele lineaire schaling
|
||||||
|
var score = Math.max(0, 100 - (dist * 10));
|
||||||
|
|
||||||
|
return new ScoreResult(guessed, score);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int levenshtein(String s1, String s2) {
|
||||||
|
var dp = new int[s1.length() + 1][s2.length() + 1];
|
||||||
|
for (var i = 0; i <= s1.length(); i++) dp[i][0] = i;
|
||||||
|
for (var j = 0; j <= s2.length(); j++) dp[0][j] = j;
|
||||||
|
for (var i = 1; i <= s1.length(); i++)
|
||||||
|
for (var j = 1; j <= s2.length(); j++) {
|
||||||
|
var cost = (s1.charAt(i - 1) == s2.charAt(j - 1)) ? 0 : 1;
|
||||||
|
dp[i][j] = Math.min(Math.min(dp[i - 1][j] + 1, dp[i][j - 1] + 1), dp[i - 1][j - 1] + cost);
|
||||||
|
}
|
||||||
|
return dp[s1.length()][s2.length()];
|
||||||
|
}
|
||||||
|
|
||||||
|
static String generateHint(String woord) throws Exception {
|
||||||
|
var prompt =
|
||||||
|
"Geef een korte, duidelijke kruiswoordpuzzel hint in het Nederlands voor het woord: " + woord + ". " +
|
||||||
|
"Antwoord alleen met de hint. Geen inleiding, geen uitleg, geen aantal letters, geen aanhalingstekens.";
|
||||||
|
|
||||||
|
var payload = "{"
|
||||||
|
+ "\"model\":\"" + jsonEscape(MODEL) + "\","
|
||||||
|
+ "\"messages\":[{\"role\":\"user\",\"content\":\"" + jsonEscape(prompt) + "\"}],"
|
||||||
|
+ "\"stream\":false"
|
||||||
|
+ "}";
|
||||||
|
|
||||||
|
var p = new ProcessBuilder(
|
||||||
|
"curl", "-sS",
|
||||||
|
"-H", "Content-Type: application/json",
|
||||||
|
"-X", "POST", OLLAMA_URL,
|
||||||
|
"-d", payload
|
||||||
|
).redirectErrorStream(true).start();
|
||||||
|
|
||||||
|
String out;
|
||||||
|
try (var in = p.getInputStream()) {
|
||||||
|
out = new String(in.readAllBytes(), StandardCharsets.UTF_8);
|
||||||
|
}
|
||||||
|
var code = p.waitFor();
|
||||||
|
if (code != 0) {
|
||||||
|
System.err.println("curl failed (" + code + "): " + out);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
return jsonGetString(out, "content"); // Extract content from {"choices":[{"message":{"content":"..."}}...]}
|
||||||
|
}
|
||||||
|
|
||||||
|
static String jsonEscape(String s) {
|
||||||
|
var b = new StringBuilder(s.length() + 16);
|
||||||
|
for (var i = 0; i < s.length(); i++) {
|
||||||
|
var ch = s.charAt(i);
|
||||||
|
switch (ch) {
|
||||||
|
case '\\' -> b.append("\\\\");
|
||||||
|
case '"' -> b.append("\\\"");
|
||||||
|
case '\n' -> b.append("\\n");
|
||||||
|
case '\r' -> b.append("\\r");
|
||||||
|
case '\t' -> b.append("\\t");
|
||||||
|
default -> b.append(ch);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return b.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
// minimal JSON string extractor for {"key":"value"} with escapes
|
||||||
|
static String jsonGetString(String json, String key) {
|
||||||
|
var needle = "\"" + key + "\"";
|
||||||
|
var i = json.indexOf(needle);
|
||||||
|
if (i < 0) return null;
|
||||||
|
i = json.indexOf(':', i + needle.length());
|
||||||
|
if (i < 0) return null;
|
||||||
|
i++;
|
||||||
|
while (i < json.length() && Character.isWhitespace(json.charAt(i))) i++;
|
||||||
|
if (i >= json.length() || json.charAt(i) != '"') return null;
|
||||||
|
i++; // after opening quote
|
||||||
|
|
||||||
|
var b = new StringBuilder();
|
||||||
|
var esc = false;
|
||||||
|
for (; i < json.length(); i++) {
|
||||||
|
var ch = json.charAt(i);
|
||||||
|
if (esc) {
|
||||||
|
switch (ch) {
|
||||||
|
case 'n' -> b.append('\n');
|
||||||
|
case 'r' -> b.append('\r');
|
||||||
|
case 't' -> b.append('\t');
|
||||||
|
case '"' -> b.append('"');
|
||||||
|
case '\\' -> b.append('\\');
|
||||||
|
default -> b.append(ch);
|
||||||
|
}
|
||||||
|
esc = false;
|
||||||
|
} else if (ch == '\\') esc = true;
|
||||||
|
else if (ch == '"') return b.toString();
|
||||||
|
else b.append(ch);
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user