package puzzle; import java.util.*; /** * ThemeGraph - Creates a graph between words and themes for filtering. * Uses word embeddings approach: co-occurrence and semantic similarity. */ public class ThemeGraph { // Predefined theme keywords for Dutch word filtering private static final Map> THEME_KEYWORDS = new HashMap<>(); static { // News/Politics THEME_KEYWORDS.put("nieuws", Set.of( "POLITIEK", "VERKIEZING", "MINISTER", "PARLEMENT", "WET", "BELEID", "REGERING", "PARTIJ", "STEM", "KAMER", "RAAD", "STAAT" )); // Technology THEME_KEYWORDS.put("technologie", Set.of( "COMPUTER", "INTERNET", "SOFTWARE", "APP", "DATA", "CODE", "NETWERK", "SYSTEEM", "DIGITAAL", "TECH", "ROBOT", "AI" )); // Sports THEME_KEYWORDS.put("sport", Set.of( "VOETBAL", "TENNIS", "WIELREN", "SPELER", "WEDSTRIJD", "TEAM", "GOAL", "BAL", "SPEL", "WINNEN", "COACH", "ATLEET" )); // Weather/Nature THEME_KEYWORDS.put("weer", Set.of( "REGEN", "ZON", "WIND", "WOLKEN", "STORM", "SNEEUW", "WEER", "KLIMAAT", "NATUUR", "LUCHT", "WARMTE", "KOU" )); // Economy THEME_KEYWORDS.put("economie", Set.of( "GELD", "EURO", "MARKT", "PRIJS", "KOPEN", "VERKOOP", "BEDRIJF", "BANK", "HANDEL", "WINST", "SCHULD", "BUDGET" )); // Health THEME_KEYWORDS.put("gezondheid", Set.of( "ZORG", "DOKTER", "MEDICIJN", "PATIENT", "ZIEKENHUIS", "GEZOND", "VIRUS", "VACCIN", "THERAPIE", "BEHANDEL", "ARTS", "KLINIEK" )); // General/Common THEME_KEYWORDS.put("algemeen", Set.of( "HUIS", "AUTO", "BOOM", "WATER", "MENS", "TIJD", "LEVEN", "WERK", "SCHOOL", "FAMILIE", "STAD", "LAND" )); } /** * Score a word against a theme (0.0 = no match, 1.0 = perfect match) */ public static double scoreWordTheme(String word, String theme) { var keywords = THEME_KEYWORDS.get(theme.toLowerCase()); if (keywords == null) { return 0.5; // unknown theme = neutral score } word = word.toUpperCase(); // Direct match if (keywords.contains(word)) { return 1.0; } // Substring match (partial relevance) for (var kw : keywords) { if (word.contains(kw) || kw.contains(word)) { return 0.7; } } // Edit distance similarity (for typos/variations) for (var kw : keywords) { var similarity = editDistanceSimilarity(word, kw); if (similarity > 0.8) { return similarity * 0.9; } } return 0.0; } /** * Filter word list by theme with minimum score threshold */ public static List filterByTheme(List words, String theme, double minScore) { List filtered = new ArrayList<>(); for (var word : words) { var score = scoreWordTheme(word, theme); if (score >= minScore) { filtered.add(word); } } return filtered; } /** * Get theme suggestions for a word (sorted by score) */ public static List getThemesForWord(String word) { List scores = new ArrayList<>(); for (var theme : THEME_KEYWORDS.keySet()) { var score = scoreWordTheme(word, theme); if (score > 0.0) { scores.add(new ThemeScore(theme, score)); } } scores.sort(Comparator.comparingDouble(ts -> -ts.score)); return scores; } /** * Auto-detect best theme from a word list */ public static String detectTheme(List words) { Map themeScores = new HashMap<>(); for (var theme : THEME_KEYWORDS.keySet()) { double totalScore = 0; for (var word : words) { totalScore += scoreWordTheme(word, theme); } themeScores.put(theme, totalScore / words.size()); } return themeScores.entrySet().stream() .max(Comparator.comparingDouble(Map.Entry::getValue)) .map(Map.Entry::getKey) .orElse("algemeen"); } /** * Simple edit distance similarity (normalized Levenshtein) */ private static double editDistanceSimilarity(String a, String b) { var dist = levenshtein(a, b); var maxLen = Math.max(a.length(), b.length()); if (maxLen == 0) return 1.0; return 1.0 - ((double) dist / maxLen); } private static int levenshtein(String a, String b) { var dp = new int[a.length() + 1][b.length() + 1]; for (var i = 0; i <= a.length(); i++) dp[i][0] = i; for (var j = 0; j <= b.length(); j++) dp[0][j] = j; for (var i = 1; i <= a.length(); i++) { for (var j = 1; j <= b.length(); j++) { var cost = (a.charAt(i - 1) == b.charAt(j - 1)) ? 0 : 1; dp[i][j] = Math.min( Math.min(dp[i - 1][j] + 1, dp[i][j - 1] + 1), dp[i - 1][j - 1] + cost ); } } return dp[a.length()][b.length()]; } public record ThemeScore(String theme, double score) { @Override public String toString() { return String.format("%s: %.2f", theme, score); } } // ---- Main for testing ---- public static void main(String[] args) { System.out.println("=== Theme Graph Test ===\n"); // Test word scoring var testWords = new String[]{ "POLITIEK", "VOETBAL", "COMPUTER", "REGEN", "AUTO" }; for (var word : testWords) { System.out.println("Word: " + word); var themes = getThemesForWord(word); for (var ts : themes) { System.out.println(" " + ts); } System.out.println(); } // Test theme detection var techWords = Arrays.asList("COMPUTER", "INTERNET", "SOFTWARE", "DATA"); var detected = detectTheme(techWords); System.out.println("Detected theme for tech words: " + detected); // Test filtering var allWords = Arrays.asList( "POLITIEK", "COMPUTER", "AUTO", "VOETBAL", "INTERNET", "BOOM" ); var filtered = filterByTheme(allWords, "technologie", 0.5); System.out.println("\nFiltered for 'technologie' (min 0.5): " + filtered); } }