This commit is contained in:
mike
2025-12-19 16:20:03 +01:00
parent b0244ba51d
commit edcee45f1c
7 changed files with 31 additions and 902 deletions

View File

@@ -59,7 +59,7 @@ public class ThemeGraph {
* Score a word against a theme (0.0 = no match, 1.0 = perfect match)
*/
public static double scoreWordTheme(String word, String theme) {
Set<String> keywords = THEME_KEYWORDS.get(theme.toLowerCase());
var keywords = THEME_KEYWORDS.get(theme.toLowerCase());
if (keywords == null) {
return 0.5; // unknown theme = neutral score
}
@@ -72,15 +72,15 @@ public class ThemeGraph {
}
// Substring match (partial relevance)
for (String kw : keywords) {
for (var kw : keywords) {
if (word.contains(kw) || kw.contains(word)) {
return 0.7;
}
}
// Edit distance similarity (for typos/variations)
for (String kw : keywords) {
double similarity = editDistanceSimilarity(word, kw);
for (var kw : keywords) {
var similarity = editDistanceSimilarity(word, kw);
if (similarity > 0.8) {
return similarity * 0.9;
}
@@ -94,8 +94,8 @@ public class ThemeGraph {
*/
public static List<String> filterByTheme(List<String> words, String theme, double minScore) {
List<String> filtered = new ArrayList<>();
for (String word : words) {
double score = scoreWordTheme(word, theme);
for (var word : words) {
var score = scoreWordTheme(word, theme);
if (score >= minScore) {
filtered.add(word);
}
@@ -108,8 +108,8 @@ public class ThemeGraph {
*/
public static List<ThemeScore> getThemesForWord(String word) {
List<ThemeScore> scores = new ArrayList<>();
for (String theme : THEME_KEYWORDS.keySet()) {
double score = scoreWordTheme(word, theme);
for (var theme : THEME_KEYWORDS.keySet()) {
var score = scoreWordTheme(word, theme);
if (score > 0.0) {
scores.add(new ThemeScore(theme, score));
}
@@ -124,9 +124,9 @@ public class ThemeGraph {
public static String detectTheme(List<String> words) {
Map<String, Double> themeScores = new HashMap<>();
for (String theme : THEME_KEYWORDS.keySet()) {
for (var theme : THEME_KEYWORDS.keySet()) {
double totalScore = 0;
for (String word : words) {
for (var word : words) {
totalScore += scoreWordTheme(word, theme);
}
themeScores.put(theme, totalScore / words.size());
@@ -142,21 +142,21 @@ public class ThemeGraph {
* Simple edit distance similarity (normalized Levenshtein)
*/
private static double editDistanceSimilarity(String a, String b) {
int dist = levenshtein(a, b);
int maxLen = Math.max(a.length(), b.length());
var dist = levenshtein(a, b);
var maxLen = Math.max(a.length(), b.length());
if (maxLen == 0) return 1.0;
return 1.0 - ((double) dist / maxLen);
}
private static int levenshtein(String a, String b) {
int[][] dp = new int[a.length() + 1][b.length() + 1];
var dp = new int[a.length() + 1][b.length() + 1];
for (int i = 0; i <= a.length(); i++) dp[i][0] = i;
for (int j = 0; j <= b.length(); j++) dp[0][j] = j;
for (var i = 0; i <= a.length(); i++) dp[i][0] = i;
for (var j = 0; j <= b.length(); j++) dp[0][j] = j;
for (int i = 1; i <= a.length(); i++) {
for (int j = 1; j <= b.length(); j++) {
int cost = (a.charAt(i - 1) == b.charAt(j - 1)) ? 0 : 1;
for (var i = 1; i <= a.length(); i++) {
for (var j = 1; j <= b.length(); j++) {
var cost = (a.charAt(i - 1) == b.charAt(j - 1)) ? 0 : 1;
dp[i][j] = Math.min(
Math.min(dp[i - 1][j] + 1, dp[i][j - 1] + 1),
dp[i - 1][j - 1] + cost
@@ -180,26 +180,26 @@ public class ThemeGraph {
System.out.println("=== Theme Graph Test ===\n");
// Test word scoring
String[] testWords = {"POLITIEK", "VOETBAL", "COMPUTER", "REGEN", "AUTO"};
for (String word : testWords) {
var testWords = new String[]{ "POLITIEK", "VOETBAL", "COMPUTER", "REGEN", "AUTO" };
for (var word : testWords) {
System.out.println("Word: " + word);
List<ThemeScore> themes = getThemesForWord(word);
for (ThemeScore ts : themes) {
var themes = getThemesForWord(word);
for (var ts : themes) {
System.out.println(" " + ts);
}
System.out.println();
}
// Test theme detection
List<String> techWords = Arrays.asList("COMPUTER", "INTERNET", "SOFTWARE", "DATA");
String detected = detectTheme(techWords);
var techWords = Arrays.asList("COMPUTER", "INTERNET", "SOFTWARE", "DATA");
var detected = detectTheme(techWords);
System.out.println("Detected theme for tech words: " + detected);
// Test filtering
List<String> allWords = Arrays.asList(
var allWords = Arrays.asList(
"POLITIEK", "COMPUTER", "AUTO", "VOETBAL", "INTERNET", "BOOM"
);
List<String> filtered = filterByTheme(allWords, "technologie", 0.5);
);
var filtered = filterByTheme(allWords, "technologie", 0.5);
System.out.println("\nFiltered for 'technologie' (min 0.5): " + filtered);
}
}