inital-commit

This commit is contained in:
mike
2025-12-19 14:02:07 +01:00
commit 6f0a04f0a1
91 changed files with 1398111 additions and 0 deletions

5
.aiignore Normal file
View File

@@ -0,0 +1,5 @@
paper/
.git/
data/
target/
.idea/

2
.env Normal file
View File

@@ -0,0 +1,2 @@
PUZZLE_ROOT_DIR=/home/mike/dev/puzzle-generator
OUT_DIR=/home/mike/dev/puzzle-generator/data

4
.gitignore vendored Normal file
View File

@@ -0,0 +1,4 @@
.idea/
/vocab/.custom/
**/.custom/
target/

3
compile.sh Executable file
View File

@@ -0,0 +1,3 @@
#!/bin/bash
mkdir -p target
javac -d target src/SwedishGenerator.java

View File

@@ -0,0 +1,83 @@
{
"gridv2": [
"#############",
"#############",
"####B#PALLET#",
"####E#O######",
"##D#S#L#T####",
"##E#T#I#A####",
"##LAAGTANK###",
"##F#N#I#K####",
"##T#D#E#E####",
"########N####",
"#############"
],
"words": [
{
"word": "LAAGTANK",
"clue": "Hoofdonderdeel in beslag genomen",
"startRow": 6,
"startCol": 2,
"direction": "horizontal",
"answer": "LAAGTANK",
"arrowRow": 6,
"arrowCol": 1
},
{
"word": "POLITIE",
"clue": "Verantwoordelijk bij de inval",
"startRow": 2,
"startCol": 6,
"direction": "vertical",
"answer": "POLITIE",
"arrowRow": 1,
"arrowCol": 6
},
{
"word": "BESTAND",
"clue": "Samengestelde hoeveelheid",
"startRow": 2,
"startCol": 4,
"direction": "vertical",
"answer": "BESTAND",
"arrowRow": 1,
"arrowCol": 4
},
{
"word": "PALLET",
"clue": "Transportmiddel voor de lachgastank",
"startRow": 2,
"startCol": 6,
"direction": "horizontal",
"answer": "PALLET",
"arrowRow": 2,
"arrowCol": 5
},
{
"word": "TANKEN",
"clue": "Vervoort voor de lachgastank",
"startRow": 4,
"startCol": 8,
"direction": "vertical",
"answer": "TANKEN",
"arrowRow": 3,
"arrowCol": 8
},
{
"word": "DELFT",
"clue": "Stad waar het gebeurde",
"startRow": 4,
"startCol": 2,
"direction": "vertical",
"answer": "DELFT",
"arrowRow": 3,
"arrowCol": 2
}
],
"difficulty": 1,
"rewards": {
"coins": 50,
"stars": 2,
"hints": 1
}
}

View File

@@ -0,0 +1,92 @@
{
"gridv2": [
"###########",
"###########",
"##EUROPA#N#",
"######A##A#",
"####VERZET#",
"######K##U#",
"######I##U#",
"###KLANK#R#",
"######F##B#",
"###########"
],
"words": [
{
"word": "KLANK",
"clue": "Stemmen voor een betere toekomst",
"startRow": 7,
"startCol": 3,
"direction": "horizontal",
"answer": "KLANK",
"arrowRow": 7,
"arrowCol": 2
},
{
"word": "PARKIN",
"clue": "Dergelijke aandoening beïnvloedt beweging",
"startRow": 2,
"startCol": 6,
"direction": "vertical",
"answer": "PARKIN",
"arrowRow": 1,
"arrowCol": 6
},
{
"word": "VERZET",
"clue": "Collectief optreden tegen beleid",
"startRow": 4,
"startCol": 4,
"direction": "horizontal",
"answer": "VERZET",
"arrowRow": 4,
"arrowCol": 3
},
{
"word": "NATUUR",
"clue": "Leefgebied voor planten en dieren",
"startRow": 2,
"startCol": 9,
"direction": "vertical",
"answer": "NATUUR",
"arrowRow": 1,
"arrowCol": 9
},
{
"word": "EUROPA",
"clue": "Het politieke blok waarin Nederland deel uitmaakt",
"startRow": 2,
"startCol": 2,
"direction": "horizontal",
"answer": "EUROPA",
"arrowRow": 2,
"arrowCol": 1
},
{
"word": "PARKINF",
"clue": "Afkorting Parkinson",
"startRow": 2,
"startCol": 6,
"direction": "vertical",
"answer": "PARKINF",
"arrowRow": 1,
"arrowCol": 6
},
{
"word": "NATUURB",
"clue": "Bescherming van natuurgebieden",
"startRow": 2,
"startCol": 9,
"direction": "vertical",
"answer": "NATUURB",
"arrowRow": 1,
"arrowCol": 9
}
],
"difficulty": 1,
"rewards": {
"coins": 50,
"stars": 2,
"hints": 1
}
}

View File

@@ -0,0 +1,121 @@
{
"gridv2": [
[
"##############"
],
[
"##############"
],
[
"#######L######"
],
[
"#######E##B###"
],
[
"###A#SNUIVEN##"
],
[
"###K###S##S###"
],
[
"###K######T###"
],
[
"##VERSLAAVEND#"
],
[
"###R######D#R#"
],
[
"###H######I#O#"
],
[
"###O#SLOGAN#O#"
],
[
"###F######G#G#"
],
[
"##############"
]
],
"words": [
{
"word": "VERSLAAVEND",
"clue": "Overdrijft aantrekkingskracht",
"startRow": 7,
"startCol": 2,
"direction": "horizontal",
"answer": "VERSLAAVEND",
"arrowRow": 7,
"arrowCol": 1
},
{
"word": "BESTEDING",
"clue": "Korting of prijs",
"startRow": 3,
"startCol": 10,
"direction": "vertical",
"answer": "BESTEDING",
"arrowRow": 2,
"arrowCol": 10
},
{
"word": "AKKERHOF",
"clue": "Naam van de winkel",
"startRow": 4,
"startCol": 3,
"direction": "vertical",
"answer": "AKKERHOF",
"arrowRow": 3,
"arrowCol": 3
},
{
"word": "SNUIVEN",
"clue": "Korte handeling bij verslaving",
"startRow": 4,
"startCol": 5,
"direction": "horizontal",
"answer": "SNUIVEN",
"arrowRow": 4,
"arrowCol": 4
},
{
"word": "SLOGAN",
"clue": "Marketing tekst",
"startRow": 10,
"startCol": 5,
"direction": "horizontal",
"answer": "SLOGAN",
"arrowRow": 10,
"arrowCol": 4
},
{
"word": "DROOG",
"clue": "Tegenstrijdig fruittype",
"startRow": 7,
"startCol": 12,
"direction": "vertical",
"answer": "DROOG",
"arrowRow": 6,
"arrowCol": 12
},
{
"word": "LEUS",
"clue": "Kortere slogan tekst",
"startRow": 2,
"startCol": 7,
"direction": "vertical",
"answer": "LEUS",
"arrowRow": 1,
"arrowCol": 7
}
],
"difficulty": 1,
"rewards": {
"coins": 50,
"stars": 2,
"hints": 1
}
}

View File

@@ -0,0 +1,127 @@
{
"gridv2": [
[
"##############"
],
[
"##############"
],
[
"########C#####"
],
[
"########E#####"
],
[
"##DIVUSEN#####"
],
[
"########S#####"
],
[
"######EQUALIA#"
],
[
"####G###R##J##"
],
[
"###BLOKIER#Z##"
],
[
"####O#O####E##"
],
[
"####O#R####N##"
],
[
"####M#T####K##"
],
[
"####I#E####O##"
],
[
"####T#X#######"
],
[
"##############"
]
],
"words": [
{
"word": "BLOKIER",
"clue": "Persoon die accounts blokt.",
"startRow": 8,
"startCol": 3,
"direction": "horizontal",
"answer": "BLOKIER",
"arrowRow": 8,
"arrowCol": 2
},
{
"word": "CENSURE",
"clue": "Controle over queeraccounts.",
"startRow": 2,
"startCol": 8,
"direction": "vertical",
"answer": "CENSURE",
"arrowRow": 1,
"arrowCol": 8
},
{
"word": "DIVUSEN",
"clue": "Verdeel en heers accountblok.",
"startRow": 4,
"startCol": 2,
"direction": "horizontal",
"answer": "DIVUSEN",
"arrowRow": 4,
"arrowCol": 1
},
{
"word": "EQUALIA",
"clue": "Gelijk op abortus.",
"startRow": 6,
"startCol": 6,
"direction": "horizontal",
"answer": "EQUALIA",
"arrowRow": 6,
"arrowCol": 5
},
{
"word": "GLOOMIT",
"clue": "Verstopt sociale media.",
"startRow": 7,
"startCol": 4,
"direction": "vertical",
"answer": "GLOOMIT",
"arrowRow": 6,
"arrowCol": 4
},
{
"word": "IJZENKO",
"clue": "Krachtige blokking.",
"startRow": 6,
"startCol": 11,
"direction": "vertical",
"answer": "IJZENKO",
"arrowRow": 5,
"arrowCol": 11
},
{
"word": "KORTEX",
"clue": "Kort maar krachtig.",
"startRow": 8,
"startCol": 6,
"direction": "vertical",
"answer": "KORTEX",
"arrowRow": 7,
"arrowCol": 6
}
],
"difficulty": 1,
"rewards": {
"coins": 50,
"stars": 2,
"hints": 1
}
}

View File

@@ -0,0 +1,95 @@
{
"gridv2": [
"#############",
"#############",
"##I#O####D###",
"##N#N####O#S#",
"##S#D####C#C#",
"##T#E#M##U#H#",
"##A#R#E##M#A#",
"##GEZONDHEID#",
"##R#O#T##N#E#",
"##A#E#A##T###",
"##M#K#A#META#",
"######L##N###",
"#############"
],
"words": [
{
"word": "GEZONDHEID",
"clue": "Welzijn",
"startRow": 7,
"startCol": 2,
"direction": "horizontal",
"answer": "GEZONDHEID",
"arrowRow": 7,
"arrowCol": 1
},
{
"word": "DOCUMENTEN",
"clue": "Papieren",
"startRow": 2,
"startCol": 9,
"direction": "vertical",
"answer": "DOCUMENTEN",
"arrowRow": 1,
"arrowCol": 9
},
{
"word": "INSTAGRAM",
"clue": "Platform",
"startRow": 2,
"startCol": 2,
"direction": "vertical",
"answer": "INSTAGRAM",
"arrowRow": 1,
"arrowCol": 2
},
{
"word": "ONDERZOEK",
"clue": "Studie",
"startRow": 2,
"startCol": 4,
"direction": "vertical",
"answer": "ONDERZOEK",
"arrowRow": 1,
"arrowCol": 4
},
{
"word": "MENTAAL",
"clue": "Geestelijk",
"startRow": 5,
"startCol": 6,
"direction": "vertical",
"answer": "MENTAAL",
"arrowRow": 4,
"arrowCol": 6
},
{
"word": "SCHADE",
"clue": "Negatief effect",
"startRow": 3,
"startCol": 11,
"direction": "vertical",
"answer": "SCHADE",
"arrowRow": 2,
"arrowCol": 11
},
{
"word": "META",
"clue": "Stopte onderzoek",
"startRow": 10,
"startCol": 8,
"direction": "horizontal",
"answer": "META",
"arrowRow": 10,
"arrowCol": 7
}
],
"difficulty": 1,
"rewards": {
"coins": 50,
"stars": 2,
"hints": 1
}
}

View File

@@ -0,0 +1,114 @@
{
"gridv2": [
[
"##############"
],
[
"##############"
],
[
"########N#####"
],
[
"######M#I#R###"
],
[
"###N##E#E#U###"
],
[
"###E##N#U#G#S#"
],
[
"###D##S#W#G#M#"
],
[
"##KEIZERSNEDE#"
],
[
"###R##N#U#N#R#"
],
[
"###L##W#U#P#I#"
],
[
"###A##E#R#R#G#"
],
[
"###N##R###I#E#"
],
[
"###D##K###K###"
],
[
"##############"
]
],
"words": [
{
"word": "KEIZERSNEDE",
"clue": "operatie",
"startRow": 7,
"startCol": 2,
"direction": "horizontal",
"answer": "KEIZERSNEDE",
"arrowRow": 7,
"arrowCol": 1
},
{
"word": "MENSENWERK",
"clue": "arbeid",
"startRow": 3,
"startCol": 6,
"direction": "vertical",
"answer": "MENSENWERK",
"arrowRow": 2,
"arrowCol": 6
},
{
"word": "RUGGENPRIK",
"clue": "anesthesie",
"startRow": 3,
"startCol": 10,
"direction": "vertical",
"answer": "RUGGENPRIK",
"arrowRow": 2,
"arrowCol": 10
},
{
"word": "NIEUWSUUR",
"clue": "media",
"startRow": 2,
"startCol": 8,
"direction": "vertical",
"answer": "NIEUWSUUR",
"arrowRow": 1,
"arrowCol": 8
},
{
"word": "NEDERLAND",
"clue": "land",
"startRow": 4,
"startCol": 3,
"direction": "vertical",
"answer": "NEDERLAND",
"arrowRow": 3,
"arrowCol": 3
},
{
"word": "SMERIGE",
"clue": "complex",
"startRow": 5,
"startCol": 12,
"direction": "vertical",
"answer": "SMERIGE",
"arrowRow": 4,
"arrowCol": 12
}
],
"difficulty": 1,
"rewards": {
"coins": 50,
"stars": 2,
"hints": 1
}
}

View File

@@ -0,0 +1,118 @@
{
"gridv2": [
[
"#############"
],
[
"#############"
],
[
"#####F#D##G##"
],
[
"#####B#E##O##"
],
[
"#####I#LEAVE#"
],
[
"#####W#E##T##"
],
[
"####BONGI#S##"
],
[
"######A######"
],
[
"##DOWNT######"
],
[
"######U######"
],
[
"######R######"
],
[
"#############"
]
],
"words": [
{
"word": "BONGI",
"clue": "Ex-podcaster Dan Bongino",
"startRow": 6,
"startCol": 4,
"direction": "horizontal",
"answer": "BONGI",
"arrowRow": 6,
"arrowCol": 3
},
{
"word": "DELEG",
"clue": "Stepped down als FBIdeputiendirector",
"startRow": 2,
"startCol": 7,
"direction": "vertical",
"answer": "DELEG",
"arrowRow": 1,
"arrowCol": 7
},
{
"word": "FBIWO",
"clue": "Gaat uit de FBI na een grote stap",
"startRow": 2,
"startCol": 5,
"direction": "vertical",
"answer": "FBIWO",
"arrowRow": 1,
"arrowCol": 5
},
{
"word": "LEAVE",
"clue": "Bongino kondigt vertrek aan",
"startRow": 4,
"startCol": 7,
"direction": "horizontal",
"answer": "LEAVE",
"arrowRow": 4,
"arrowCol": 6
},
{
"word": "NATUR",
"clue": "Natuurlijk een carrièreswitch",
"startRow": 6,
"startCol": 6,
"direction": "vertical",
"answer": "NATUR",
"arrowRow": 5,
"arrowCol": 6
},
{
"word": "DOWNT",
"clue": "Terug op televisie en in het bestuur",
"startRow": 8,
"startCol": 2,
"direction": "horizontal",
"answer": "DOWNT",
"arrowRow": 8,
"arrowCol": 1
},
{
"word": "GOVTS",
"clue": "Nieuwe rol in de Amerikaanse overheid",
"startRow": 2,
"startCol": 10,
"direction": "vertical",
"answer": "GOVTS",
"arrowRow": 1,
"arrowCol": 10
}
],
"difficulty": 1,
"rewards": {
"coins": 50,
"stars": 2,
"hints": 1
}
}

View File

@@ -0,0 +1,85 @@
{
"gridv2": [
"############",
"############",
"#########Q##",
"#########U##",
"#########E##",
"##P##VRAGEN#",
"##L##E###R##",
"##ABORTUS###",
"##A##S######",
"##T##L######",
"##J##AFMELD#",
"##E##G######",
"############"
],
"words": [
{
"word": "ABORTUS",
"clue": "Medische term voor zwangerschap beëindigen",
"startRow": 7,
"startCol": 2,
"direction": "horizontal",
"answer": "ABORTUS",
"arrowRow": 7,
"arrowCol": 1
},
{
"word": "VERSLAG",
"clue": "Schriftelijke of mondelinge informatie over iets",
"startRow": 5,
"startCol": 5,
"direction": "vertical",
"answer": "VERSLAG",
"arrowRow": 4,
"arrowCol": 5
},
{
"word": "PLAATJE",
"clue": "Korte afbeelding die gedeeld wordt",
"startRow": 5,
"startCol": 2,
"direction": "vertical",
"answer": "PLAATJE",
"arrowRow": 4,
"arrowCol": 2
},
{
"word": "AFMELD",
"clue": "Account weggeven uit het platform",
"startRow": 10,
"startCol": 5,
"direction": "horizontal",
"answer": "AFMELD",
"arrowRow": 10,
"arrowCol": 4
},
{
"word": "VRAGEN",
"clue": "Inzichten of informatie zoeken",
"startRow": 5,
"startCol": 5,
"direction": "horizontal",
"answer": "VRAGEN",
"arrowRow": 5,
"arrowCol": 4
},
{
"word": "QUEER",
"clue": "Overkoepelende term voor LGBTQ+ gemeenschap",
"startRow": 2,
"startCol": 9,
"direction": "vertical",
"answer": "QUEER",
"arrowRow": 1,
"arrowCol": 9
}
],
"difficulty": 1,
"rewards": {
"coins": 50,
"stars": 2,
"hints": 1
}
}

16
data/index.json Normal file
View File

@@ -0,0 +1,16 @@
{
"date": "2025-12-18",
"files": [
"crossword_2025-12-18_01_duizenden-lachgascilinders-in-beslag-genomen-in-de.json",
"crossword_2025-12-18_01_levenslang-voor-40-jarige-duitser-die-in-mannheim-.json",
"crossword_2025-12-18_01_parkinsonpati-nten-nederland-moet-zich-in-eu-verze.json",
"crossword_2025-12-18_01_slechtste-slogan-van-het-jaar-over-verslavende-dru.json",
"crossword_2025-12-18_02_levenslang-voor-40-jarige-duitser-die-in-mannheim-.json",
"crossword_2025-12-18_02_meta-blokkeert-tientallen-queer-en-abortus-account.json",
"crossword_2025-12-18_02_meta-stopte-onderzoek-dat-schade-van-apps-op-menta.json",
"crossword_2025-12-18_03_30-11-in-nieuwsuur-pijn-bij-keizersnede-netanyahu-.json",
"crossword_2025-12-18_03_clair-obscur-expedition-33-recordwinnaar-game-awar.json",
"crossword_2025-12-18_03_dan-bongino-stepping-down-as-fbi-deputy-director.json",
"crossword_2025-12-18_03_meta-blokkeert-tientallen-queer-en-abortus-account.json"
]
}

41
docker-compose.yml Normal file
View File

@@ -0,0 +1,41 @@
services:
puzzle:
build:
context: ${PUZZLE_ROOT_DIR:-/opt/apps/puzzle}
dockerfile: Dockerfile
container_name: puzzle
restart: unless-stopped
networks: [ traefik_net ]
volumes:
- puzzles_data:/usr/share/nginx/html/puzzles:ro
labels:
- "traefik.enable=true"
- "traefik.http.routers.puzzle-main.rule=Host(`puzzle.appmodel.nl`)"
- "traefik.http.routers.puzzle-main.entrypoints=websecure"
- "traefik.http.routers.puzzle-main.tls=true"
- "traefik.http.routers.puzzle-main.tls.certresolver=letsencrypt"
- "traefik.http.routers.puzzle-main-http.rule=Host(`puzzle.appmodel.nl`)"
- "traefik.http.routers.puzzle-main-http.entrypoints=web"
- "traefik.http.routers.puzzle-main-http.middlewares=redirect-to-https@file"
puzzle_gen:
build:
context: ${PUZZLE_ROOT_DIR:-/opt/apps/puzzle}
dockerfile: tools/puzzle-gen/Dockerfile
container_name: puzzle_gen
restart: unless-stopped
networks: [ traefik_net ]
environment:
TZ: Europe/Amsterdam
LM_STUDIO_BASE_URL: "http://192.168.1.159:1234/v1"
PUZZLES_PER_DAY: "3"
volumes:
- puzzles_data:/data/puzzles:rw
volumes:
puzzles_data:
networks:
traefik_net:
external: true
name: traefik_net

191
export_format.js Normal file
View File

@@ -0,0 +1,191 @@
// export_format.js
"use strict";
const DIRS = {
"1": [-1, 0], // up
"2": [0, 1], // right
"3": [1, 0], // down
"4": [0, -1], // left
};
const isDigit = (ch) => ch >= "1" && ch <= "4";
const isLetter = (ch) => ch >= "A" && ch <= "Z";
function toGrid2D(grid) {
if (Array.isArray(grid) && typeof grid[0] === "string") return grid.map(r => r.split(""));
return grid; // assume 2D char array
}
function inBounds(H, W, r, c) {
return r >= 0 && r < H && c >= 0 && c < W;
}
/**
* Extract a word run for a clue cell at (r,c) with direction digit d.
* Returns canonical representation where:
* - direction is only "horizontal"(right) or "vertical"(down)
* - startRow/startCol is the first letter cell in that canonical direction
* - arrowRow/arrowCol is immediately before the start (left or above)
* - word is read from grid in canonical order (start -> end)
*/
function extractPlacedFromClue(g, r, c, d, maxLen = 8, minLen = 2) {
const H = g.length, W = g[0].length;
const [dr, dc] = DIRS[d];
// collect letter cells in the ORIGINAL direction away from the clue
const cells = [];
let rr = r + dr, cc = c + dc;
while (inBounds(H, W, rr, cc) && isLetter(g[rr][cc]) && cells.length < maxLen) {
cells.push([rr, cc]);
rr += dr;
cc += dc;
}
if (cells.length < minLen) return null;
// Canonicalize so we always output right/down runs
// If original was right (2) or down (3): start is first cell, arrow is clue cell
// If original was left (4): start is the farthest-left cell, arrow is one cell left of start
// If original was up (1): start is the topmost cell, arrow is one cell above start
let startRow, startCol, arrowRow, arrowCol, direction;
if (d === "2") { // right
direction = "horizontal";
[startRow, startCol] = cells[0];
arrowRow = r;
arrowCol = c; // clue cell is before start
} else if (d === "3") { // down
direction = "vertical";
[startRow, startCol] = cells[0];
arrowRow = r;
arrowCol = c;
} else if (d === "4") { // left => canonical right
direction = "horizontal";
// farthest left is last in cells list (because we walked left)
[startRow, startCol] = cells[cells.length - 1];
arrowRow = startRow;
arrowCol = startCol - 1;
} else if (d === "1") { // up => canonical down
direction = "vertical";
[startRow, startCol] = cells[cells.length - 1]; // topmost
arrowRow = startRow - 1;
arrowCol = startCol;
} else {
return null;
}
// Read the word from the grid in canonical order (right or down)
const wordChars = [];
if (direction === "horizontal") {
for (let i = 0; i < cells.length; i++) {
const ch = (inBounds(H, W, startRow, startCol + i) ? g[startRow][startCol + i] : "#");
if (!isLetter(ch)) break;
wordChars.push(ch);
}
} else {
for (let i = 0; i < cells.length; i++) {
const ch = (inBounds(H, W, startRow + i, startCol) ? g[startRow + i][startCol] : "#");
if (!isLetter(ch)) break;
wordChars.push(ch);
}
}
const word = wordChars.join("");
if (word.length < minLen || word.length > maxLen) return null;
return {
word,
clue: word, // placeholder; youll replace later
startRow,
startCol,
direction,
answer: word,
arrowRow,
arrowCol,
// For cropping:
_cells: direction === "horizontal"
? Array.from({length: word.length}, (_, i) => [startRow, startCol + i])
: Array.from({length: word.length}, (_, i) => [startRow + i, startCol]),
_arrow: [arrowRow, arrowCol],
};
}
/**
* Transform your generator output into the JSON format you showed.
* @param {Object} puz - { grid: string[]|char[][], clueMap?: object }
*/
function exportFormatFromFilled(puz, difficulty = 1, rewards = {coins: 50, stars: 2, hints: 1}) {
const g = toGrid2D(puz.grid);
const H = g.length, W = g[0].length;
// 1) extract "placed" list from all clue digits in the filled grid
const placed = [];
const seen = new Set(); // avoid duplicates by start+dir
for (let r = 0; r < H; r++) {
for (let c = 0; c < W; c++) {
const ch = g[r][c];
if (!isDigit(ch)) continue;
const p = extractPlacedFromClue(g, r, c, ch, 8, 2);
if (!p) continue;
const key = `${p.startRow},${p.startCol}:${p.direction}:${p.word}`;
if (seen.has(key)) continue;
seen.add(key);
placed.push(p);
}
}
if (placed.length === 0) {
return {gridv2: g.map(row => row.map(ch => (isLetter(ch) ? ch : "#")).join("")), words: [], difficulty, rewards};
}
// 2) compute bounding box around all word cells + arrow cells, with 1-cell margin
const allCells = [];
for (const p of placed) {
for (const [rr, cc] of p._cells) allCells.push([rr, cc]);
allCells.push(p._arrow);
}
let minR = Math.min(...allCells.map(([r]) => r)) - 1;
let minC = Math.min(...allCells.map(([, c]) => c)) - 1;
let maxR = Math.max(...allCells.map(([r]) => r)) + 1;
let maxC = Math.max(...allCells.map(([, c]) => c)) + 1;
// 3) build a map of only the used letter cells (so everything else becomes '#')
const letterAt = new Map();
for (const p of placed) {
for (const [rr, cc] of p._cells) {
if (inBounds(H, W, rr, cc) && isLetter(g[rr][cc])) {
letterAt.set(`${rr},${cc}`, g[rr][cc]);
}
}
}
// 4) render gridv2
const gridv2 = [];
for (let r = minR; r <= maxR; r++) {
let row = "";
for (let c = minC; c <= maxC; c++) {
const ch = letterAt.get(`${r},${c}`);
row += ch ? ch : "#";
}
gridv2.push(row);
}
// 5) words output with cropped coordinates
const words_out = placed.map(p => ({
word: p.word,
clue: p.clue, // currently word itself
startRow: p.startRow - minR,
startCol: p.startCol - minC,
direction: p.direction,
answer: p.word,
arrowRow: p.arrowRow - minR,
arrowCol: p.arrowCol - minC,
}));
return {gridv2, words: words_out, difficulty, rewards};
}
module.exports = {exportFormatFromFilled};

26
main.js Normal file
View File

@@ -0,0 +1,26 @@
const {parseArgs, generatePuzzle, gridToString} = require("./swedish_generator");
const {exportFormatFromFilled} = require("./export_format");
// ---- main ----
(function main() {
const opts = parseArgs(process.argv);
console.log(opts);
const res = generatePuzzle(opts);
if (!res) {
console.error("Failed to generate a fillable puzzle.");
process.exit(1);
}
// Existing logs...
console.log("\n=== FILLED PUZZLE (RAW) ===");
console.log(gridToString(res.filled.grid));
// ✅ Transform to your JSON format
const puz = {grid: res.filled.grid, clueMap: res.filled.clueMap};
const json = exportFormatFromFilled(puz, 1);
console.log("\n=== EXPORTED JSON ===");
console.log(JSON.stringify(json, null, 2));
})();

Binary file not shown.

After

Width:  |  Height:  |  Size: 8.5 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 8.8 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 40 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 53 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 36 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 21 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 40 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 110 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 55 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 56 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 66 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 70 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 15 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 9.7 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 8.9 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 36 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 52 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 87 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 30 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 71 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 60 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 14 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 56 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 64 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 114 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 53 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 49 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 64 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 44 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 45 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 69 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 67 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 83 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 85 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 52 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 52 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 57 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 49 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 54 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 45 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 45 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 101 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 104 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 93 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 92 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 113 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 111 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 116 KiB

View File

@@ -0,0 +1,956 @@
![](_page_0_Picture_0.jpeg)
# FAKULTAT F ¨ UR INFORMATIK ¨
DER TECHNISCHEN UNIVERSITAT M ¨ UNCHEN ¨
Bachelorarbeit in Informatik
# **Generating Swedish-style Crossword Puzzle Masks using Evolutionary Algorithms**
Jakob Julian Engel
![](_page_0_Picture_6.jpeg)
![](_page_2_Picture_0.jpeg)
# FAKULTAT F ¨ UR INFORMATIK ¨
## DER TECHNISCHEN UNIVERSITAT M ¨ UNCHEN ¨
## Bachelorarbeit in Informatik
## Generating Swedish-style Crossword Puzzle Masks using Evolutionary Algorithms
## Erzeugen von Schwedenr ¨atselmasken mit Evolution¨aren Algorithmen
Author: Jakob Julian Engel
Supervisor: Prof. Dr. rer. nat. habil. J ¨urgen Schmidhuber
Advisors: Dipl.-Inf. Oliver Ruepp
Dipl.-Inf. Frank Sehnke
Date: September 15, 2009
![](_page_2_Picture_11.jpeg)
| Ich versichere, dass ich diese Bachelorarbeit selbst ¨andig verfasst und nur die angegebenen<br>Quellen und Hilfsmittel verwendet habe. | |
|-----------------------------------------------------------------------------------------------------------------------------------------|--------------------|
| | |
| M ¨unchen, den 15. September 2009 | Jakob Julian Engel |
| | |
| | |
## **Acknowledgments**
First of all I would like to thank those who supported me in the process of writing this Bachelor Thesis. Major thanks go to my two supervisors Frank Sehnke and Oliver Ruepp. This thesis is largely based on Oliver's study project on crosswords [**?**], and without his advice about the design of crossword masks the results would probably not have been nearly as good. Frank I thank for introducing me to evolutionary algorithms and for his very helpful suggestions while writing this thesis.
Furthermore I would like to thank Markus Holzer for coming up with the idea to use evolutionary algorithms to generate crossword masks in the very first place.
# **Contents**
| | Abstract | | xi |
|----|----------|---------------------------------------------------------|------|
| | | Zusammenfassung | xiii |
| 1. | | About Genetic Algorithms | 1 |
| | 1.1. | General Functionality | 1 |
| | 1.2. | Selection<br> | 2 |
| | 1.3. | Mutation<br> | 4 |
| | 1.4. | Crossover<br> | 5 |
| | 1.5. | Diversity vs. Convergence | 6 |
| | 1.6. | Summary | 7 |
| 2. | | About Crossword Masks | 9 |
| | 2.1. | Basic Definitions | 10 |
| | 2.2. | Validity Constraints | 12 |
| | 2.3. | Quality Criteria<br> | 12 |
| 3. | | Applying a Genetic Algorithm | 15 |
| | 3.1. | General Setup<br> | 15 |
| | 3.2. | The Fitness Function<br> | 15 |
| | | 3.2.1.<br>Coverage | 16 |
| | | 3.2.2.<br>Word Lengths | 16 |
| | | 3.2.3.<br>Clustering<br> | 17 |
| | | 3.2.4.<br>Invalid Definition Fields of Type 3,4,5,6<br> | 17 |
| | | 3.2.5.<br>Dead Ends | 18 |
| | | 3.2.6.<br>Result<br> | 18 |
| | | 3.2.7.<br>Localized Fitness<br> | 18 |
| | 3.3. | Initialization<br> | 19 |
| | 3.4. | Mutation<br> | 20 |
| | 3.5. | Crossover<br> | 24 |
| | 3.6. | Results<br> | 27 |
| | 3.7. | Discussion<br> | 31 |
| 4. | | Memetic Algorithm Approach | 35 |
| | 4.1. | Basic Idea | 35 |
| | 4.2. | Implementation<br> | 36 |
| | 4.3. | Results<br> | 38 |
| 5. | | Practical Results | 41 |
| 6. | Further Work | 43 |
|----|-----------------------------|----|
| | Appendix | 47 |
| | A. Code | 47 |
| | Mutation<br> | 47 |
| | Crossover | 48 |
| | Basic Hillclimber<br> | 49 |
| | Basic Genetic Algorithm<br> | 50 |
| | Memetic Approach<br> | 52 |
| B. | Sample Masks | 53 |
## **Abstract**
<span id="page-10-0"></span>This Bachelor Thesis addresses the problem of generating a so-called *mask* for Swedishstyle crossword puzzles using evolutionary algorithms. Such a mask defines only the general layout of a crossword, i.e. the placement of words and word-descriptions within the rectangular grid: basically a mask assigns a specific field type to each grid cell. This however is subject to several complex constraints and quality criteria, which first have to be (and in this thesis are) formulated. In current practice when creating a professional crossword puzzle, such a mask is usually still created *by hand* - as no algorithm capable of creating adequate masks exists.
The problem can be seen as a constrained optimization problem over a large (exponential in the number of grid cells, 7 <sup>400</sup> for a mere 20 × 20 mask), discrete search space, where neither constraints nor the characteristic to be optimized can be formulated in a compact form. As a globally optimal solution is neither required, nor can be found efficiently, *evolutionary algorithms* as a heuristic optimization technique without need for a mathematical description of the problem are a sensible choice.
First, a *genetic algorithm* for solving the problem is developed and implemented. It will however be shown - by both, practical tests and theoretical considerations - that due to extremely *strong local dependencies* between nearby field assignments, the crossover operator is more destructive than beneficial and ultimately the complete genetic algorithm is outperformed slightly by a simple *hillclimber*.
In order to still be able to exploit the fact that globally, different areas of a mask hardly affect each other at all, a *memetic algorithm* is developed using crossover as single operator and applying a hillclimber afterwards to each resulting individual in order to repair any damage resulting from the crossover, hence fully exploiting the solution. This approach performs significantly better than both, a simple hillclimber and a complete genetic algorithm - and generates masks that can compete with manually created ones.
## **Zusammenfassung**
<span id="page-12-0"></span>Die Bachelorarbeit befasst sich mit dem automatischen Generieren von Kreutzwortr ¨atselmasken f ¨ur Schwedenr ¨atsel mittels Evolution ¨arer Algorithmen. Eine solche Maske legt lediglich das generelle Layout eines R ¨atsels fest, d.h. die Position und Art von Angabefeldern - nicht jedoch die im fertigen R ¨atsel einzutragenden W ¨orter. Diese Zuweisung unterliegt einer Reihe von G ¨ultigkeits- und Qualit ¨atskriterien, welche zuerst extrahiert und formuliert werden m ¨ussen. In heutiger Praxis wird eine solche Maske ¨ublicherweise per Hand erstellt, da automatisch generierte Masken qualitativ minderwertig sind.
Diese Aufgabe kann als beschr ¨anktes Optimierungsproblem ¨uber einem großen (exponentiell in der Anzahl der Gridzellen) diskretem Suchraum gesehen werden, in dem weder die Nebenbedingungen noch die zu optimierende Gr ¨oße explizit formuliert werden kann. Da eine global optimale L ¨osung weder ben ¨otigt wird noch effizient gefunden werden kann, stellen Evolution ¨are Algorithmen einen idealen L ¨osungsansatz dar.
Anfangs wird ein Genetischer Algorithmus entwickelt und implementiert. Es stellt sich jedoch heraus, dass, aufgrund starker Abh ¨angigkeiten zwischen nahe liegenden Feldzuweisungen, der Crossover Operator keine Vorteile bietet, und letzten Endes liefert ein simpler Hillclimber bessere Ergebnisse als ein kompletter Genetischer Algorithmus.
Um dennoch die Vorteile von Crossover ausnutzen zu k ¨onnen wird ein Memetischer Algorithmus entworfen, welcher ausschließlich auf Crossover und nachfolgender "Reparatur" der entstehenden Masken mittels eines Hillclimbers basiert. Dieser Ansatz stellt sich als deutlich besser als ein Hillclimber oder Genetischer Algorithmus alleine heraus. Die durch diesen Ansatz generierten Masken k ¨onnen qualitativ mit Hand gemachten konkurrieren.
# <span id="page-14-0"></span>**1. About Genetic Algorithms**
A genetic algorithms is a heuristic optimization technique used in computer science. As part of the more general class of evolutionary computation, it is motivated by the process of natural evolution, exploiting the principle of "survival of the fittest".
The most important characteristic of evolutionary computation in general is, that very little problem-specific domain knowledge is required: One only needs to somehow measure how good a given solution is (either absolute, or relative to other given solutions by, for example, letting them compete against each other). Additionally, a suitable representation for solutions in the solution domain has to be found.
One however does not require any further information concerning - for example - some gradient, as most other techniques do. On the other hand, using techniques which exploit such information often gives better practical results for problems where such information is available - simply due to the fact that in these cases evolutionary algorithms do not exploit all the problem knowledge available.
Genetic algorithms in particular apply these principals to discrete problems, where solutions traditionally are represented as a binary string of 0s and 1s of fixed length. Arguably, this does not impose any real limitations - as in computer science basically everything is represented as such a binary string.
In order to achieve good results though, the binary string should represent a fixed number of (discrete) features. Furthermore, a high independence between these features, or at least between clusters of features, is desirable.
In this chapter the basic methods of genetic algorithms: selection, mutation and crossover will be explained, and an overview over the general functionality will be given.
## <span id="page-14-1"></span>**1.1. General Functionality**
Rather than attempting to simulate the whole process of natural evolution, genetic algorithms extract only the most basic principles of this process and use them to solve a given optimization task.
The basic algorithm operates on a fixed-size **population** of **individuals**, each representing one solution. The representation of an individual, or rather of one feature of an individual is called **genotype**, while it's behavior interpreted with respect to the given problem is called **phenotype**.
In a first step, each individual in the current population (current **generation**) is evaluated by the means of a **fitness function**, and a subset, favoring the better individuals, is selected. In a second step, the selected individuals are used as basis for creating the next generation by the means of **mutation** and **crossover**. Starting with a randomly generated first generation, this process continues until some break condition is satisfied.
![](_page_15_Figure_2.jpeg)
**Figure 1.1.:** Schematic representation of the Algorithm
Of course this is only a very general description of the algorithm. It can and in practice is often heavily modified. For example rather than selection and creation of the next generation being separate steps, in practice for each individual to be created, one (in case of mutation) or two (in case of crossover) "parents" are selected by the means of some (probabilistic) **selection function**. This Function can be modeled as distribution over a discrete random variable X denoting the selected individual, with the parent generation as domain.
## <span id="page-15-0"></span>**1.2. Selection**
As the selection is a vital part of evolutionary algorithms, the choice of the selection strategy and the respective parameters can highly influence the results.
The most important aspect of the selection strategy is the **selection pressure**, determining how much the fitness value of an individual influences it's chance to be selected. While a high selection pressure heavily favors better individuals and hence leads to increased convergence speed, a low selection pressure basically gives solutions with a lower fitness value a better chance to reproduce as well, increasing the breadth of the search and hence giving a better chance to avoid local optima.
Furthermore, one can choose to strictly separate the generations, i.e. only to allow newly created individuals in the next generation (called $(\mu, \lambda)$ - strategy, creating $\lambda$ new individuals by applying mutation and crossover to the best $\mu$ individuals of the parent generation). Alternatively the new generation can be composed of both, individuals from the parent generation which are just copied to the new generation, and newly created individuals (called $(\mu + \lambda)$ - strategy, creating $\lambda$ new individuals and copying $\mu$ ). A popular method is to copy only the best individual to the next generation without changing it, while the rest of the new generation consists only of newly created individuals.
In practice, one very popular selection function is the so-called **tournament selection**: One first randomly determines $\alpha$ individuals (using a uniform distribution), compares them, and selects the one with the highest fitness value.
The two simple advantages of this method are, that the selection pressure is easily controllable by choice of $\alpha$ , and that is is very simple to implement<sup>1</sup>. Note that this method basically translates to a probability distribution over the parent generation which only depends on $\alpha$ and the *position* of an individual if the population is sorted by fitness - and not on the actual *fitness values*.
![](_page_16_Figure_5.jpeg)
**Figure 1.2.:** Selection probabilities using tournament selection for a population size of 500. The probability for an individual to be selected is given by $Pr[r] = \frac{(n-r+1)^{\alpha}-(n-r)^{\alpha}}{n^{\alpha}}$ , where $n \in \mathbb{N}$ is the population size and $r \in [1;n]$ the position of the individual. Note that an individual can participate multiple times in the same competition and hence even the worst individual has a non-zero probability of being selected (although, obviously, a very small one).
Another possible selection function is the so called **roulette-wheel selection**: Here, for each individual the probability to get selected is directly proportional to its fitness value. This approach has the often desirable effect that big improvements are taken over fairly quickly, while in generations where only small improvements are discovered, the search is broadened automatically. In practice though, the overall selection pressure is
<span id="page-16-0"></span><sup>&</sup>lt;sup>1</sup>Pseudocode for this particular selection function can be found in appendix A
difficult to control and highly dependent on the fitness function itself, which might pose problems<sup>2</sup>.
![](_page_17_Figure_2.jpeg)
**Figure 1.3.:** Visualization of roulette-wheel selection. Each individual is assigned a share of the wheel proportional to it's fitness value. The wheel is then rotated and the position it stops in determines the individual to be selected.
#### <span id="page-17-0"></span>1.3. Mutation
Mutation poses one of the two main operators used in order to create the new generation.
Traditionally on a bit string-level, mutating an individual simply means flipping one or more bits, the result being a slightly modified version of the parent individual. This can be done by randomly choosing k bits to be changed, or by specifying a mutation-probability $\lambda$ deciding for each bit whether it is flipped or not, resulting in an average of $\lambda \cdot n$ , n denoting the length of the bit string, bits changed. The number of bits changed is called **mutation step size**.
On a higher abstraction level there are obvious other possibilities - for example adding a (rounded) $\mathcal{N}(0,\sigma^2)$ -distributed random number to some integer value, or making some other problem- and representation-specific adjustments.
In general, mutating an individual should produce a (with respect to the given problem, i.e. the phenotype of the individual) similar, but slightly different - and hopefully better - child-individual. Equally, the more alike two individuals (again, with respect to their phenotypes) are, the greater the probability for one to be the result of mutating the other should be. This is a very important point as often some adjustments are needed to achieve it: even in a very basic setup using individuals consisting of only one binary encoded
<span id="page-17-1"></span><sup>&</sup>lt;sup>2</sup>This problems can partly be diminished by appropriate normalization; whereas the selection pressure can be controlled by raising the normalized fitness values to some power $p \in \mathbb{R}^+$ ( $p < 1 \rightarrow$ less pressure; $p > 1 \rightarrow$ more pressure) - still this often does not suffice
![](_page_18_Figure_1.jpeg)
**Figure 1.4.:** Schematic representation of mutation.
integer, the genotypes ("representation") of two individuals might be very similar while their phenotypes ("behavior") are fairly different (000000 = 0 and 100000 = 32). On the other hand, two individuals having very different genotypes might be quite similar with respect to their phenotype (100000 = 32 and 011111 = 31)[3](#page-18-1) . This issue and the consequence that, in this case a simple bit flip-mutation is very unlikely to find the, in terms of phenotypes, very small step from 31 to 32 is widely known as **Hamming Wall**. It is to mention that the later situation, i.e. very different genotypes for similar phenotypes, can have far worse consequences than the other way round.
## <span id="page-18-0"></span>**1.4. Crossover**
The crossover operator basically takes two parent individuals, combines them and produces a child individual, simulating natural sexual reproduction. On a bit string-level, this can be done by determining one or more splitting points and using approximately half of each parent individual, as depicted in figure [1.5.](#page-18-2)
On a higher abstraction level again, there are other possibilities. Different numerical values can, for example, be interpolated (i.e. averaged), or even extrapolated (i.e. if 37 gives a good result, and 40 an even better result, one might try 42 for obvious reasons) - although admittedly the later has little to do with natural sexual reproduction.
<span id="page-18-2"></span>![](_page_18_Figure_7.jpeg)
**Figure 1.5.:** Schematic representation of crossover on a bit string-level. (a) one-point-crossover (b) two-point-crossover (c) n-point-crossover
The value of this operator mainly lies in the assumption that different parent individuals might have different strengths and weaknesses, and that with a little luck the better
<span id="page-18-1"></span><sup>3</sup> for this simple example, the problem could be greatly reduced by using a Gray-code representation, or by performing mutation on a higher abstraction level in the first place.
part of each parent combined results - at least in some cases - in an even better child. It also enables individuals to adopt improvements found by some other individual, while retaining at least a part of their original form. Arguably this base assumption can - for some problems - be problematic: If strong dependencies between different parts of an individual exist, ripping it apart at a random point is likely to impair both halves, hence making the produced child useless. Similarly the two halves combined might just not be "compatible", leading to the same result.
As most dependencies cannot be avoided[4](#page-19-1) , one simple way to reduce this problem is to design the representation of an individual in such a way that highly dependent features are closely together, while more independent features are at a bigger distance. Using onepoint- or two-point-crossover then minimizes the probability for these complications to occur.
Still, even concerning problems where these problematics arise, the crossover operator has proven to be very valuable, and using it can, for most problems, improve the result significantly. There also are several methods designed to deal with these issues, for example by trying to "remember" good crossover points, i.e. points at which a crossover is least likely to have negative effects. [**?**].
There in fact exists some theoretical work on this subject[5](#page-19-2) , but, primarily due to the high dependence on the actual problem, it is hardly of practical relevance - in fact even it's theoretical justification is arguable [**?**].
## <span id="page-19-0"></span>**1.5. Diversity vs. Convergence**
One of the major strengths of evolutionary algorithms lies in the ability to explore the search space very exhaustively. While for example most gradient-based approaches tend to immediately run towards the closest extreme point - which for complex problems is likely to be a mere local optimum - evolutionary methods in general have far better chances to overcome such local extreme points. This is due to the simple fact that many areas of the search space are explored simultaneously by the different individuals, while the crossover operator allows improvements found by one individual to be - if compatible - adopted by others; hence making the whole process more efficient than a number of independent, local searches.
In order to fully exploit this strength however, a certain diversity within each generation has to be assured, i.e. the individuals should not be too similar, such that they actually explore different areas of the search space (*exploration*). On the other hand, when keeping the individuals well distributed, existing good solutions might not be exploited fully, leading to extremely slow (or even stagnant) convergence (*exploitation*).
This so-called **Exploration versus Exploitation Dilemma**, is one of the most significant problems to be dealt with; especially premature convergence towards a local optimum is often difficult to avoid. The most obvious and direct way to influence this is the selection
<sup>4</sup> otherwise the whole problem could be split into two separate optimization problems
<span id="page-19-2"></span><span id="page-19-1"></span><sup>5</sup> see *Building block hypothesis [***?***]*
pressure, i.e. giving less good individuals a better chance to be selected - this however is only practicable to a certain degree, as it drastically decreases the convergence speed.
There exist a number of modifications to deal with this issue: One possibility for example is to include the average dissimilarity to other individuals as additional criterion for determining the fitness value (i.e. the more different compared to the other individuals an individual is, the better its fitness value). Another possibility is to only allow individuals that have a certain dissimilarity to all other individuals within the population (usually the minimal dissimilarity is decreased over time). Obviously some kind of metric needs to be defined in order to implement these approaches - for bit strings the Hamming distance is an obvious choice.
## <span id="page-20-0"></span>**1.6. Summary**
In general it can be said that for some problems evolutionary algorithms give much better results than any other method applicable. Exactly which problems "some" are, and exactly how good "much better" is, often depends on design choices and the implementation itself. Especially for complex problems that are difficult - or impossible - to trace mathematically as the problem presented in this thesis, evolutionary algorithms perform very well.
Due to the fact that the whole process is based on heuristics and not on (practically relevant) mathematical theory[6](#page-20-1) , there is no absolute truth concerning design choices[7](#page-20-2) . One rather has to find a suitable setup for a specific given problem, keeping in mind that seemingly small or unimportant aspects, such as the way a solution is encoded, might greatly affect the outcome.
Furthermore there are numerous modifications not discussed at this point, which can help in solving or reducing specific problems. For further information one can refer to corresponding literature, for example [**?**].
<span id="page-20-1"></span><sup>6</sup> as already mentioned above, such theory does exist - in addition to the above one can refer for example to Holland's schema theorem [**?**] - but the *practical* value of these is highly questionable.
<span id="page-20-2"></span><sup>7</sup> there are however several heuristics not discussed here, for example the so-called (1/5-th) success rule - still whether such heuristics apply is, again, highly dependable on the actual problem.
# <span id="page-22-0"></span>**2. About Crossword Masks**
Crossword puzzles are said to be the most popular and widespread word game in the world, yet they have a short history. While an early predecessor of crossword puzzles appeared in England as early as in the 19th century, the puzzle in its today common form has its origin in the USA, where the first of its kind was published in the *New York World* in 1913 [**?**].
<span id="page-22-1"></span>![](_page_22_Figure_2.jpeg)
**Figure 2.1.:** (a) American-style grid. (b) English-style grid. (c) Swedish-style grid (German).
Today, a variety of different crossword puzzle styles exist, some of which are shown
in figure [2.1.](#page-22-1) In this work however we will focus only on the so called **Swedish-style crossword puzzles**. Such a puzzle is presented as a rectangular grid consisting of three different types of fields: **definition fields, letter fields and cut-out fields**. The task of the puzzle solver is to guess the words that are described by the definition fields and to fill out the corresponding letter fields that are denoted by an arrow from the definition field.
In current practice, crossword puzzles are created in two steps: At first, a so-called *mask* is created, denoting only the arrangement of letter fields and definition fields. The actual words filling the puzzle are then found in a second step, resulting in the complete crossword puzzle. The reason for this separation is that, while efficient computer programs exist for solving the second step, the first step is still often done manually. Although there in fact exist computer programs which can create valid masks, those are usually of inferior quality. [This introduction text, the crossword puzzle displayed in [2.1](#page-22-1) (c) and the following definitions of letter field, cut-out field and definition field are partly taken from [**?**].]
## <span id="page-23-0"></span>**2.1. Basic Definitions**
**Mask:** A mask is a two-dimensional rectangular grid, where each field may
be either a **letter field**, a **definition field** or a **cut-out field**. By introduction of a set of control characters S := {0, 1, 2, 3, 4, 5, 6, #}, where 0 represents a letter field, # represents a cut-out field and 1 to 6 represent the different types of definition fields, a mask can naturally be
represented as matrix M ∈ S <sup>n</sup>×m.
**Letter field:** A letter field simply denotes a blank space, which the solver of the
crossword puzzle is to fill in.
**Definition field:** Definition fields denote the fields where the word-descriptions are
printed in. In current crossword puzzles several types of definition fields can be found, including so-called double definition fields and definition fields covering two adjacent grid cells. In this thesis however, only the most common six types of definition fields are allowed,
as depicted in figure [2.2.](#page-24-0)
**Cut-out field:** Cut-out fields denote fields that are not part of the actual crossword
puzzle; usually they reserve space for pictures or solutions of former
puzzles.
**Word:** Each definition field defines exactly one word. The word starts at
a field dependent on the type of its definition field, and continues along the corresponding row or column, until either the end of the grid is reached, or the next field is not a letter field. Words defined by a definition field of type 1,5 or 6 are called horizontal words, words defined by a definition field of type 2,3 or 4 are called vertical words.
<span id="page-24-0"></span>![](_page_24_Figure_1.jpeg)
**Figure 2.2.:** (a) different field types allowed and the corresponding matrix M. The gray lines illustrate the words defined by the corresponding definition fields. (b) other possible definition field types not considered in this work.
## <span id="page-25-0"></span>**2.2. Validity Constraints**
It is obvious that not every possible matrix M ∈ S m×n corresponds to a valid crossword mask. We therefore define four simple, absolute constraints which a valid mask has to meet:
- 1. each letter field is to be part of at least one word.
- 2. each word has to span over at least two letters. A definition field which does not have any corresponding letter fields simply defines a word of length zero.
- 3. each word is to be enclosed in between two non-letter fields.
- 4. no two horizontal or two vertical words may overlap.
Examples for the violation of these four constraints are shown in figure [3.2.](#page-29-2) Note that at this point a letter field is not required to be covered horizontally *and* vertically, in fact there are valid masks where no two words intersect.
![](_page_25_Figure_8.jpeg)
**Figure 2.3.:** Violations of the four Validity Constraints.
## <span id="page-25-1"></span>**2.3. Quality Criteria**
In order to be of practical value, the validity of a mask is by far not enough. As the name strongly suggests, in a crossword puzzle it is desirable that words actually cross, such that finding the solution for one word gives hints towards other words. Furthermore, the layout of the mask should be appealing to the human solver - as this is mainly a subjective criteria, it can not be defined explicitly, however several heuristics can be formulated.
In addition to the above, it needs to be possible to find a number of words to fill the crossword puzzle, such that every letter field is uniquely defined. Some basic criteria for a "good" mask are the following:
- 1. **Coverage**: Naturally, a large number of horizontally *and* vertically covered fields is desired.
- 2. **Word lengths**: the optimal word length is four to six letters. Longer words are usually more interesting for the solver, while on the other hand it becomes difficult to find fitting words with more than eight letters; hence words with more than eight letters should not occur frequently.
- 3. **Clustering of definition fields**: in Swedish-style crossword puzzles, large "clusters" of definition fields are to be avoided, such that definition fields and letter fields are distributed as evenly as possible.
Especially long chains of adjacent definition fields are undesirable, as are so-called "dead ends" where the first or last letter of a word is enclosed by three non-letterfields (excluding the ones at the left and top border, as there such situations are unavoidable).
<span id="page-26-0"></span>![](_page_26_Figure_6.jpeg)
**Figure 2.4.:** All three masks shown are, according to the definition above, valid masks. Mask (a) performs quite well concerning the quality criteria: No clusters with more than 3 definition fields exist, all word lengths are between three and six and a maximum coverage is achieved. Mask (b) however performs significantly worse: There are three words with only length two, a cluster of size ten splitting the whole grid in three parts, several fields which are only covered once and two "dead ends". Mask (c) performs even worse: As no two words intersect, it can hardly be called a crossword puzzle.
Arguably the distinction between validity constraints and quality criteria is somewhat arbitrary. For example one could demand that, for a mask to be valid, at least 75% of the letter fields are to be covered both, horizontally and vertically, or that all letter fields must be 4-connected, i.e. the mask is not split apart by definition fields.
The motivation for the above definition of "valid" is to find a minimal set of easily expressible and simple to compute absolute properties, ensuring that the mask could (at least theoretically) be used for a crossword. It should be clear that generating valid masks according to this definition is trivial (see Figure [2.4](#page-26-0) (c)), and hence the challenge lies in creating valid masks which perform as good as possible on the quality criteria.
# <span id="page-28-0"></span>**3. Applying a Genetic Algorithm**
## <span id="page-28-1"></span>**3.1. General Setup**
The goal of this work is to generate a valid mask performing as good on the quality criteria as possible, such that it can be used for a crossword puzzle. The desired dimensions of the mask, as well as the positions of cut-out fields are given at the beginning and must not be changed - of course the resulting algorithm should be applicable for generating masks with any reasonable dimensions and cut-out field positions.
![](_page_28_Figure_3.jpeg)
**Figure 3.1.:** Task of the algorithm.
Each mask corresponds to exactly one individual. A mask is represented simply as Matrix (i.e. two-dimensional array) M ∈ S <sup>n</sup>×<sup>m</sup> with S := {0, 1, 2, 3, 4, 5, 6, #}, as introduced in the previous part.
## <span id="page-28-2"></span>**3.2. The Fitness Function**
It has been decided not to treat validity and quality of a mask as separate goals, but rather to optimize both aspects simultaneously. This is justified by the fact that they are highly interdependent: optimizing only one criterion very quickly destroys achievements concerning the other one. On the other hand, a big progress concerning one criterion might be worth small sacrifices with respect to the other. Furthermore, it is very difficult to design operators which operate solely on valid masks - every kind of repairing algorithm either heavily relies on trial & error, or has to cover hundreds of situations and hence be very complex.
This is done by accumulating *penalty points* for several criteria, some of which correspond directly to the validity constraints, others are simple and easy to compute heuristics to achieve the described quality criteria. Below, the features used are described in detail. Note that the actual values used are based on preliminary testing and estimates how undesired the respective situations are, and have shown to give very good results in general. Naturally they can be modified easily to enforce some aspects more than others, making this approach very flexible.
### <span id="page-29-0"></span>**3.2.1. Coverage**
Five different coverage types are distinguished:
- 1. completely uncovered: 1500 penalty points
- 2. covered only once, but enclosed between two non-letter-fields (either horizontally or vertically): 75 penalty points
- 3. covered only once: 200 penalty points
- 4. covered more than once in the same direction: 600 penalty points
- <span id="page-29-2"></span>5. covered once horizontally and vertically: 0 penalty points
![](_page_29_Figure_10.jpeg)
**Figure 3.2.:** Different types of field coverage. For each type, one example is highlighted.
Type 2 often is forced to occur, for example at the left or top border of the mask - hence it is penalized less than the similar type 3. Also note that type 4 is penalized much less than type 1, although it is an equivalent violation of the validity constraints. The reason for this is fairly straight forward: while leaving one field uncovered might greatly benefit the rating of the surrounding fields, double covered fields either occur in groups of at least two, or a word of length one is involved - in either case the surroundings account for further penalty points.
### <span id="page-29-1"></span>**3.2.2. Word Lengths**
Word lengths are rated in two ways. Mainly penalty points are given for the length of each word, according to a predefined function shown in table [3.1.](#page-30-2)
<span id="page-30-2"></span>
| Word Length | Penalty Points |
|-------------|----------------|
| 0 | 1800 |
| 1 | 1500 |
| 2 | 650 |
| 3 | 100 |
| 4 | 10 |
| 5 | 0 |
| 6 | 0 |
| 7 | 30 |
| Word Length | Penalty Points |
|-------------|----------------|
| 8 | 50 |
| 9 | 150 |
| 10 | 250 |
| 11 | 400 |
| 12 | 550 |
| 13 | 750 |
| 14 | 1000 |
| 15 | 1300 |
**Table 3.1.:** Penalty points for different word lengths
In addition to that, intersections of two words both longer than six letters receive additional penalty points given by the product of the lengths of the two words (i.e. a field where, for example, two words with length 9 intersect receives an additional 81 Penalty Points). This accounts for the fact that finding fitting words in such situations is especially hard.
### <span id="page-30-0"></span>**3.2.3. Clustering**
The size of each 8-connected (i.e. diagonal adjacency is considered as well as horizontal or vertical adjacency) cluster of definition fields is determined and penalized. As long chains of adjacent definition fields are especially undesirable, the maximum of the clusters horizontal and vertical extension is used as additional criterion for rating the cluster. An extract from the rating table is shown in table [3.2.](#page-30-3) It is to mention that definition fields at the left or top border of the mask are only counted half, as here clusters of size three or four are hardly avoidable and even desired.
<span id="page-30-3"></span>
| Cluster | Maximal | Penalty Points |
|---------|-----------|----------------|
| Size | Extension | |
| 1 | 1 | 0 |
| 2 | 2 | 150 |
| 3 | 2 | 288 |
| 3 | 3 | 320 |
| 4 | 2 | 542 |
| 4 | 3 | 603 |
| 4 | 4 | 670 |
| Cluster | Maximal | Penalty |
|---------|-----------|---------|
| Size | Extension | Points |
| 5 | 3 | 794 |
| 5 | 4 | 882 |
| 5 | 5 | 980 |
| 6 | 4 | 1053 |
| 6 | 6 | 1300 |
| 7 | 5 | 1620 |
| 7 | 7 | 2000 |
**Table 3.2.:** Penalty points for different cluster sizes and extensions
### <span id="page-30-1"></span>**3.2.4. Invalid Definition Fields of Type 3,4,5,6**
The validity constraint for every word to be enclosed in between two non-letter fields is, in contrary to the other three constraints, not yet represented. Hence a penalty of 2000 is introduced for each word violating this constraint.
### <span id="page-31-0"></span>**3.2.5. Dead Ends**
Dead ends are, as defined in chapter [2.3,](#page-25-1) letter fields enclosed by three adjacent non-letter fields, excluding the fields at the top- or left border. These are penalized with 400 points.
### <span id="page-31-1"></span>**3.2.6. Result**
It is to note that the more fields a mask contains, the higher the best achievable score will be - simply due to the fact that not all of the above points can be avoided completely. In fact several of the above criteria work against each other: A perfect clustering score for example can simply be achieved by not using any definition fields at all - but then the coverage score would be extremely bad.
Figure [3.3](#page-31-3) shows a plot of the rating of the best individual over a typical run of one thousand generations, split up into quality and validity parts. It can be seen that, while the validity component dominates at first, it is reduced very quickly, such that approximately from generation 100 onwards, the best result is always a valid mask. Also note that within the first one hundred generations, the validity and quality compo-
<span id="page-31-3"></span>nents frequently increase temporarily, while the total score is always decreasing.
![](_page_31_Figure_6.jpeg)
**Figure 3.3.:** Rating of the best individual over a run of one thousand generations. Note the logarithmic scale on the x-axis
### <span id="page-31-2"></span>**3.2.7. Localized Fitness**
One advantage of the presented fitness function is, that a rating for each distinct field of a mask can be approximated. This is done by, for example, distributing the penalty points a large cluster receives among all fields contained in that cluster. This allows to estimate the rating of only one half of a mask, or to localize areas that are especially "bad", and hence need to be improved. This concept will be used in the second part of this work. A short discussion about the *practical* value of the masks generated with this fitness function can be found in [5.](#page-54-0) Some (larger) exemplary, automatically generated masks can be found in appendix [B.](#page-66-0)
![](_page_32_Figure_2.jpeg)
**Figure 3.4.:** The higher the local penalty rating of each field, the more red it is depicted.
## <span id="page-32-0"></span>**3.3. Initialization**
In a traditional genetic algorithm as explained in chapter [1,](#page-14-0) no problem domain knowledge is used apart from the fitness function and the way a solution is encoded. Still using such knowledge - if available - can significantly improve the results, as one can "guide" the search towards the assumed position of an optimal solution. This however has to be done with great care, as by imposing additional constraints one can accidentally modify the search space in such a way, that finding a (globally) optimal solution becomes far more difficult.
Despite these concerns, two additional modifications were made. As a first and obvious step, field assignments which - no matter how the surrounding mask looks like - are certain to cause a validity violation are disallowed.
In order to justify the second - far more restrictive - modification, we first need to examine an often occurring phenomenon, shown below. Avoiding this kind of arrangement by including a penalty in the fitness function proved to be fairly ineffective: this can be explained by the fact that several fields have to be changed in order to resolve such a situation without creating new violations - hence making it a good example for a local minimum.
The solution was to completely remove these arrangements from the search space by
- disallowing most of the definition field assignments creating such situations
- instead of using completely random initial individuals, preassigning the borderfields with (random) fitting field types, using a (fairly simple) static algorithm.
It also showed that initializing the rest of the mask with letter fields only improves the performance even further. One examples for a resulting initial individual can be found in
![](_page_33_Picture_1.jpeg)
**Figure 3.5.:** Words directly adjacent to a parallel border are to be avoided, such a situation is highlighted in red. However in order to resolve this particular situation without introducing at least one new validity violation - resulting in an overall worse penalty (note that the mask as it is does satisfy all validity constraints) - four fields would have to be changed simultaneously - which is (especially for bigger masks) *very* unlikely to be achieved by a mutation.
figure [3.7.](#page-34-0) Note that *all* fields are still subject to mutation and crossover, hence the border fields are allowed to be changed during the algorithm as well.
![](_page_33_Figure_4.jpeg)
**Figure 3.6.:** Examples for some disallowed field assignments. The field type shown below each mask is not allowed at the highlighted fields.
## <span id="page-33-0"></span>**3.4. Mutation**
A fairly obvious way to implement a mutation operator is to simply replace k randomly chosen fields with new random field types (subject to the constraints discussed in the previous part). In order to improve the effectiveness of this operator, several modifications were made:
### • **Field Type Probabilities**:
When choosing a new random field type, no uniform probability distribution is used.
<span id="page-34-0"></span>![](_page_34_Figure_1.jpeg)
**Figure 3.7.:** An example for an initial individual. Not only the fields adjacent to the left / top border are assigned, but also fields adjacent to such borders created by cut-out fields (which are given from the beginning).
As a typical mask contains approximately $\frac{2}{3}$ letter fields, a letter field is chosen with probability $\frac{2}{3}$ . Furthermore, definition fields of type 1 and 2 occur far more often than type 3, 4, 5 and 6 - hence of the remaining $\frac{1}{3}$ , type 1 and 2 each get a probability of $\frac{1}{12}$ and type 3, 4, 5 and 6 each $\frac{1}{24}$ .
#### • Centralized Mutation:
Instead of determining k fields to be modified separately, the first field, the *central* point is chosen using a uniform distribution. The remaining fields to be modified are then chosen close to this central point<sup>1</sup>, resulting in a more local mutation. The reason for this is the following: Changing two or more adjacent fields often helps overcoming a local optimum. At a bigger distance however, two changes are unlikely to be correlated at all - together with the fact that a random change is, at least in the later phase of the evolution, far more likely to have a (significant) negative effect than to achieve an improvement, the probability for two uncorrelated changes together to still improve the total score is extremely low.
#### • Guided Mutation:
The probability for the central point to be chosen in areas with a high number of penalty points was increased slightly. This was done using the principal of tournament selection: $\alpha$ fields are chosen at random, and the one with the highest local penalty score is selected as central point. In practice, $\alpha=2$ gave the best results; higher values again leading to premature convergence.
<span id="page-34-1"></span>To be exact, a two-dimensional normal distribution with $\mu := (x,y)^T$ and $\Sigma := \sigma^2 I_{2\times 2}$ , with $(x,y)^T$ denoting the central point was used. Preliminary testing showed that $\sigma = 3$ is a good choice for the standard deviation, smaller values leading to premature convergence.
### • **Predefined Mutation**:
Two fixed mutation types, occurring with a certain probability, were introduced:
- **** "shifting" a definition field of type 1 or 2 one field horizontally or vertically.
- **** "splitting" a long word in two halves, by inserting a field of type 1 or 2.
![](_page_35_Picture_5.jpeg)
**Figure 3.8.:** "shifting" and "splitting".
In order to compare the effectiveness of this operator for different values of k, a simple hillclimber was used. For each setting, thirty 20 × 20 masks were created. The continuous line depicts the average of those thirty runs, the boxes depict the average plus / minus the standard deviation. The whiskers depict the best / worst result respectively. Pseudocode of the basic mutation operator can be found in appendix [A.](#page-60-1)
![](_page_35_Figure_8.jpeg)
**Figure 3.9.:** Results for different values of k. The best result was achieved if k is chosen at random from {2, 3}.
![](_page_36_Figure_1.jpeg)
**Figure 3.10.:** Results when using none of the modifications explained above, only centralized mutation or all modifications; all runs were executed with k randomly chosen from {2, 3}. It can be observed that using a centralized mutation has a huge impact on the effectiveness of the mutation operator, while the other three modifications mainly increase convergence speed and only lead to a slightly better overall result.
## <span id="page-37-0"></span>**3.5. Crossover**
In order to better understand the results presented in this chapter, we first need to highlight one very distinctive property of the considered problem.
As discussed in the first chapter, for all relevant optimization problems there exist (more or less strong) dependencies between the parameters to be found. For the problem of generating crossword masks, fields close to each other are extremely dependent while fields more than four cells apart hardly have any (direct) influence on each other at all, as can be observed in [3.11.](#page-37-1) In fact, whether a specific field assignment is beneficial or not in most cases solely depends on the assignments to the surrounding fields.
<span id="page-37-1"></span>![](_page_37_Figure_4.jpeg)
**Figure 3.11.:** For this graphic, 50,000 valid masks were created. (1) visualizes for each grid cell the percentage of masks having a letter field or definition field of type 1 respectively at that position. (2) shows the result when only counting masks that have a definition field of type 1 (i.e. a straight arrow to the right) at the position marked with a cross. (3) shows the difference between (1) and (2). It can be observed that in (1) most fields (apart from those close to a border) are equally likely to be assigned a certain field type. When fixing one field however, this changes significantly for the cells close by, whereas cells more than four fields apart are hardly influenced at all - as can be seen in (2) and (3)
Following this consideration it becomes obvious that when fitting two halves of two different masks together, some of the field assignments close to the splitting line will loose a lot of their value, as they were only a good choice in combination with the - now replaced - other half of the original mask. It is therefore not surprising, that the shorter the splitting line is, the better the resulting masks are. As a very short splitting line can obviously be achieved by, for example, just cutting off one of the corner fields, an additional criterion - for example that each half has to contain at least 30% of the overall non-cut-out fields - is required. For simplicity reasons only lines through the central point are allowed[2](#page-38-0) . Pseudocode of the resulting crossover operator can be found in appendix [A.](#page-60-2)
<span id="page-38-1"></span>
| Mask: | | | | | | |
|-----------------|--------|--------|--------|--------|---------|---------|
| Average Result: | 30,914 | 37,399 | 46,623 | 74,411 | 129,024 | 227,873 |
| Success: | 24 | 2 | 0 | 0 | 0 | 0 |
**Figure 3.12.:** Different crossover masks, tried on every possible combination of 500 masks (i.e. 249,500 tries). Average score of original masks: 17,207. success denotes how often the resulting mask was better than *both* respective parents.
The result of such a crossover operator however is fairly discouraging: especially for big masks, the result of crossing two completely different masks is *very* unlikely to actually be better than the parents, as can be seen in [3.12.](#page-38-1) This is due to the strong dependencies between adjacent fields and the consequence that new validity violations produced along the crossover line outweighs any positive effects a crossover might have. Two additional things have to be added:
- The results presented in [3.12](#page-38-1) were obtained using masks that already are "fairly good", meaning they contain at most two validity violations. When using less evolved masks, the results are similar but less extreme: some newly produced validity violations have far more impact on the fitness value if the original masks only contain very few violations. However as a mask only containing very few to no validity violations at all is achieved quite quickly, the main challenge lies in optimizing such a mask with respect to the quality criteria - justifying these considerations.
- The more alike the two parents are, the better the results of the crossover operator will be - simply due to the fact that less dependencies are destroyed. This leads to the following consequence: If the population has a high diversity - which usually is desired - the crossover operator is too destructive to be used efficiently. If the population however has a very low diversity, the crossover operator is far less destructive, but simultaneously the main advantage of a genetic algorithm - the ability to explore the search space efficiently and exhaustively - is lost (see chapter [1.5,](#page-19-0) "Diversity vs. Convergence").
<span id="page-38-0"></span><sup>2</sup> Sampeling splitting lines with respect to the proposed 30%-criterion is not trivial, as aribitary cut-out field positions have to be taken into account. It however could be done - apart from a simple *try & error* approach - in adequate, constant time (using appropriate pre-computation).
<span id="page-39-0"></span>![](_page_39_Figure_1.jpeg)
**Figure 3.13.:** An example for crossover. While both parent masks meet all validity constraints and achieve a total penalty score of 5,920 and 5,490 respectively, the mask resulting from a crossover along the dashed line contains seven violations and hence has a far worse total penalty of 15,925.
## <span id="page-40-0"></span>**3.6. Results**
Having defined the fitness function as well as the mutation and crossover operator, there are still some parameters left that can be adjusted:
- **Selection Function**: Throughout this work the so-called tournament-selection presented in the first chapter was used. This leaves the parameter α ∈ N (determining the tournament size) to adjust the selection pressure. Default value: α = 4.
- **Population Size** =: n ∈ N denoting the number of masks in each generation. Default value: n = 200.
- **Crossover Rate** =: c ∈ [0, 1] denoting the fraction of the new generation to be created by crossover. It was decided to use an elitist selection, meaning that the best mask from the parent generation is always copied to the next generation without being changed. The fraction of the new generation created by mutation therefore is approximately 1 c. Default value: c = 0.5.
The following plots document the results achieved with different settings for these three parameters. For each set of parameters, thirty evolutions were run, generating a 20 × 20 mask without any cut-out fields. The continuous line depicts the average over all thirty runs, the boxes denote the average plus / minus the estimated standard deviation. The best and worst results are depicted by the whiskers of the box plots. Pseudocode for the genetic algorithm, as well as for a simple hillclimber, can be found in appendix [A.](#page-61-0)
Note again that due to the design of the fitness function, the actual rating achievable is highly dependent on the mask dimensions and the positions of cut-out fields. In particular a mask with rating 0 is not achievable: in fact the overall best 20 × 20 mask found still had a rating of 8,047.
![](_page_41_Figure_1.jpeg)
**Figure 3.14.:** Results for different values of n (population size). $\alpha=4$ , c=0.5.
![](_page_41_Figure_3.jpeg)
**Figure 3.15.:** Results for different values of $\alpha$ (tournament size). n=200, c=0.5.
<span id="page-42-0"></span>![](_page_42_Figure_1.jpeg)
**Figure 3.16.:** Results for different values of c (fraction of new generation created by crossover). n = 200, α = 4.
![](_page_42_Figure_3.jpeg)
**Figure 3.17.:** Result of a simple hillclimber compared to the evolution with default settings (n = 200, α = 4, c = 0.5).
<span id="page-43-0"></span>![](_page_43_Figure_1.jpeg)
**Figure 3.18.:** Results for four exemplary single runs with default settings (n = 200, α = 4, c = 0.5). It is to note that rather than decreasing continuously, relatively few but significant improvements are found which, when averaged, result in the smooth graphs found in the plots before. This is due to the discrete nature and the high dimensionality of the problem, leading to a huge number of possible mutations with only a fraction being beneficial.
## <span id="page-44-0"></span>**3.7. Discussion**
From these plots it can be concluded that - for this specific problem - the advantages an evolutionary algorithm usually has compared to a simple hillclimber can not be exploited: **A simple hillclimber performs slightly better than a complete evolutionary algorithm.** Further examination shows that this is mainly due to two points:
**Extremely quick Convergence**: It turns out that even for very low selection pressure (α = 2), the diversity among the population decreases very quickly. This can be observed in the below plot, showing the average hamming distance (i.e. number of differently assigned fields) between all masks within each generation. Even omitting the elitist selection, i.e. not directly copying the best individual, leads to no significant difference. In fact it is questionable whether a significantly higher diversity was beneficial at all - as the crossover operator would, as discussed in the previous chapter, hardly be able to exploit this. Hence no further actions aimed at increasing the diversity were taken.
![](_page_44_Figure_4.jpeg)
**Figure 3.19.:** Average Hamming distance within each generation for a 20 × 20 mask and n = 200, c = 0.5 and α = 2 or 4 respectively; for the first 500,000 evaluations (corresponding to 2,500 generations). Note that the average actually drops below one, meaning that the same mask (probably the currently best one) occurs multiple times within the population, as crossover "rediscovers" the original mask by patching together the unchanged halves of two mutation offsprings.
This development is responsible for the observable result that increasing α above 4 has no visible effect: As especially the better individuals are extremely similar or even equal, it does not make a significant difference whether the best or the tenth best individual is selected. It also accounts for the fact that higher values for n do not lead to (significantly) better results in the long run: the increased capability to explore the search space simply is not exploited. It does however take longer to converge, as in the beginning a lot of evaluations are wasted on masks that get thrown out anyway.
**Low Mutation Success Rate**: Due to the discrete nature of the problem and very high dimension, there is a huge number of possible mutations. For a 20 × 20 mask for example, there are approximately two billions of possible mutations of size three. When only considering mutations where the changed fields are at most six fields apart (which is reasonable for the centralized mutation used), there are still in the order of 25 million possibilities left. In the later part of the evolution, the vast majority of these mutations result in some new validity violation or a significantly worse quality rating, making the resulting mask worse than the original mask - although there still are some improvements possible. This can indirectly be observed when looking at the fitness plot for a single evolution (see figure [3.18\)](#page-43-0): The fitness value of the best mask stays constant for - in the later part - thousands of generations, until at some point an improvement is discovered, leading to a significantly better value. The below graph shows the empiric mutation success rate for a hillclimber run on a 20 × 20 mask. Each cross represents a successful mutation.
In this scenario, "'wasting"' mutations on suboptimal masks might not be that beneficial. Still it turns out that crossover is required for extracting improvements found by mutation - this is necessary, as the mask on which an improvement was found might have had some flaw compared to the previously best mask itself (for example resulting from some unsuccessful mutation). Crossover then is able to extract the found improvement and "repair" this flaw. Hence, the evolution performs significantly worse if no crossover is used at all (c = 0, see figure [3.16\)](#page-42-0).
With the current implementation, about eight thousand 20 × 20 masks can be evaluated per second on an average home PC. A run over three million evaluation hence takes approximately six to seven minutes - which is practically feasible
![](_page_46_Figure_1.jpeg)
**Figure 3.20.:** Mutation success rate and the corresponding rating of an exemplary hillclimber run. Note the logarithmic scale in the below plot.
# <span id="page-48-0"></span>**4. Memetic Algorithm Approach**
## <span id="page-48-1"></span>**4.1. Basic Idea**
The results discussed in the previous chapter - a simple hillclimber exceeding a complete evolutionary algorithm - are primarily due to the extremely strong *local* dependencies, and the resulting poor performance of the crossover operator. Apart from the problems arising along the splitting line however, a crossover can in fact be very beneficial, as globally different areas of the mask hardly influence each other's value at all.
It turns out that in most cases, the validity violations (and of course assignments contributing to a bad quality rating) created by a crossover along the splitting line can easily be corrected by applying a hillclimber to the resulting child in order to "repair" it. Consider the mask resulting from the exemplary crossover in chapter [3.5:](#page-37-0) only four independent mutations, each of size one (and hence relatively likely to be discovered), are required to repair the mask - making the result significantly better then both parents.
![](_page_48_Figure_4.jpeg)
**Figure 4.1.:** The original crossover result depicted on the left contains several validity violations, and has a rating of 15,925. With only four independent changes however, the mask can be "repaired", such that it achieves a rating of 4,890 - better than both parents (which had a rating of 5,920 and 5,490, see figure [3.13\)](#page-39-0).
This gives rise to the concept of introducing a "second" evolutionary algorithm on a higher level: this evolutionary algorithm only uses crossover, but a hillclimber is applied after each crossover to repair the resulting child mask. This concept is also known as **Memetic Algorithm** [**?**]: Basically exploration and exploitation are separated: While exploration is mainly done by the "outer" evolutionary algorithm using only crossover, the hillclimber process is responsible for exploitation - which is what a hillclimber is best at.
## <span id="page-49-0"></span>**4.2. Implementation**
This approach however can still be refined: The potential of a mask resulting from a crossover can be estimated *before* applying the hillclimber. This is done by using the local rating of both parents: Let the **potential rating** of a crossover result be defined as the *sum of the local ratings of both halves.* This value only accounts for the quality of both halves on their own, without taking the problematics arising along the splitting line into account - assuming that these are "repaired" by the hillclimber, this is a reasonable choice[1](#page-49-1) .
Using this value, a preselection of crossover results based on their potential rating can be determined, and only this preselection is repaired with a hillclimber, as the hillclimber is the - by several orders of magnitude - computationally most expensive step.
Still there are some masks, which turn out to be difficult to repair (due to newly created local minima), or which - even after being repaired - just are not that good and hence are not worth to be exploited fully. These are filtered out by first applying a hillclimber with a relatively weak break condition to all masks, then selecting only a subset having the best rating, and then continuing the hillclimbing process only on this subset. At this point it also is sensible to try to keep the diversity within the population as high as possible, as the genetic algorithm basically is only responsible for exploration anyway. In figure [4.2,](#page-50-0) the basic schematics of the resulting algorithm are depicted; additionally it can be found in appendix [A.](#page-63-0)
<span id="page-49-1"></span><sup>1</sup>As one could expect, this value still is significantly lower than the actual rating of a mask after "repairing" it.
<span id="page-50-0"></span>![](_page_50_Figure_1.jpeg)
**Figure 4.2.:** General layout of the algorithm. n stands for the population size. Note that *two* selection steps are present, both of which are completely deterministic - i.e. always the best masks are selected. The - by several orders of magnitude - most expensive steps are the two hillclimbing processes applied to all masks, i.e. step three and six.
## <span id="page-51-0"></span>**4.3. Results**
In this section, the results obtained when using the algorithm described above are presented. Note that for the following plots only eight runs were done - each run however was allowed twenty million evaluations (instead of only three million, as in the previous chapter). The first plot directly compares the performance of a simple hillclimber with the performance of the memetic approach. Afterwards different values for the following four parameters are tested:
- δ := maximal fraction of identically assigned fields, for two masks to be considered as "too similar". Default value: δ = 0.1
- n := population size. Default value: n = 15
- b<sup>w</sup> := weak break condition: Break after b<sup>w</sup> unsuccessful mutations in a row. Default value: b<sup>w</sup> = 2, 000
- b<sup>s</sup> := strong break condition: Break after b<sup>s</sup> unsuccessful mutations in a row. Default value: b<sup>s</sup> = 10, 000
![](_page_51_Figure_7.jpeg)
**Figure 4.3.:** Memetic approach vs. basic hillclimber.
![](_page_52_Figure_1.jpeg)
**Figure 4.4.:** Different values for $\delta$ : Some optimization might be possible here.
![](_page_52_Figure_3.jpeg)
**Figure 4.5.:** Different values for n: These results are pretty much what one would expect: the bigger the population, the more exploration is involved - leading to slower convergence but (up to a certain point) overall better results
![](_page_53_Figure_1.jpeg)
**Figure 4.6.:** Different values for $b_w$
![](_page_53_Figure_3.jpeg)
**Figure 4.7.:** Different values for $b_s$ : Again, by adjusting $b_w$ and $b_s$ , better overall results can be achieved, but in turn more evaluations are required.
# <span id="page-54-0"></span>**5. Practical Results**
In the preceding chapters, different algorithms and parameters were judged based solely on the fitness values of the masks created. This makes sense, as the fitness value is the only information about a mask available for the algorithm - hence the better the fitness of the masks created, the better the algorithm. Whether the fitness function used really represents the desired characteristics for a good crossword mask is irrelevant for this process, as the performance of an algorithm can be assumed to be independent of the exact fitness function used[1](#page-54-1) .
From a practical point of view however, the resulting mask itself is relevant - and not some fitness value. The link between these two points of view is the fitness function. Judging a fitness function however is only possible by analyzing the resulting masks and - in order to improve it - one needs to adjust the different penalty values and identify new features to be penalized (or maybe rewarded). For this thesis, a professional opinion from Axel Ruepp R¨atselservice[2](#page-54-2) was solicited:
"[The masks] are surprisingly good, almost as good as handmade. Only very few adjustments are necessary to make them fit for being used in practice."
Some exemplary resulting masks can be found in appendix [B:](#page-66-0) Based on manually created masks (again, by Axel Ruepp R¨atselservice), masks with the same layout were generated by the memetic algorithm approach. Note that a slightly modified fitness function, taking some more complex (and technical) features into account was used.
An interesting point to add is that, regardless of which fitness-function settings are used (i.e. how the different features are penalized), the automatically generated masks are - by a large margin - always better than the manually created "originals" (with respect to the fitness function used). This strongly suggests that, apart from optimizing the running time of the algorithm, significant improvement is only possible by finding some more accurate fitness function.
<sup>1</sup>At least for roughly similar fitness functions, this is a reasonable assumption.
<span id="page-54-2"></span><span id="page-54-1"></span><sup>2</sup>http://www.raetselservice.de
# <span id="page-56-0"></span>**6. Further Work**
As discussed in the previous chapter, probably no significant improvements concerning the quality of masks generated *with respect to a given fitness function* is possible. No matter how the fitness function is parameterized, the memetic approach already generates masks which are - with respect to that specific fitness function - better than manually designed masks. Still some optimization with respect to running time and convergence speed is possible: For one thing the memetic approach itself has not been examined very extensively, for several design choices different solutions might prove to perform better. For another thing, the mask evaluation process can be sped up significantly: Currently the whole mask is re-evaluated after each mutation. However especially in the context of a simple hillclimber where the same mask is mutated thousands of times until finally an improvement is found, it should be possible to only calculate the impact of a single mutation without re-evaluating the whole mask[1](#page-56-1) . In particular, this would lead to an evaluation running in O(1) instead of - as it is the case with the current implementation - O(number of fields).
The fitness function itself however is a different matter: the main problem is that ultimately the only way to directly compare two fitness functions is to have an expert judge the resulting masks. At least the features of a mask to be rated - for example to judge a cluster based on the number of definition fields it contains - have to be identified manually, and there obviously are many more possibilities than only the basic ones presented here. How exactly a specific feature then impacts the rating of a mask - for example *how many* penalty points to give for which cluster size, and how the different features are weighted - could then be determined by some more interesting methods: For example using a set of manually created "prototype masks" one could try to automatically adjust these fitness function parameters to give *better* fitness values for the prototypes, while simultaneously giving *worse* fitness values for generated masks - hence iteratively leading to masks more and more similar to the given prototypes (again, only with respect to the features rated at all).
<span id="page-56-1"></span><sup>1</sup>This however requires quite a complex logic, as even a single change can have quite a lot of consequences leading to very technical code of hardly any scientific interest. In particular, it might be necessary to build some more complex representation of the parent mask, allowing to quickly have access to the affected words, clusters and so on - but as mentioned, in the context of thousands of mutations being performed on the same parent mask, this probably pays off.
# **Appendix**
## <span id="page-60-1"></span><span id="page-60-0"></span>A. Code
#### <span id="page-60-2"></span>**Algorithm 1**: Mutation
```
Input: Parent mask, Parameter mutationSize
1 (x,y) \leftarrow getRandomField()
2 result \leftarrow mask.copy()
3 result[x,y] \leftarrow getRandomFieldType()
4 \mathbf{for}\ i \leftarrow 2\ \mathbf{to}\ mutationSize\ \mathbf{do}
5 | \mathbf{repeat} |
6 | (x',y') \leftarrow \text{sample from } \mathcal{N}((x,y), 3 \cdot I_{2\times 2})
7 | \mathbf{until}\ (x',y')\ is\ a\ valid\ coordinate\ and\ no\ cut-out\ field
8 | result[x',y'] \leftarrow getRandomFieldType(x',y')
9 | \mathbf{end} |
10 | \mathbf{return}\ result|
```
Note that the method getRandomField() includes the guided mutation described in chapter 3.4, while getRandomFieldType(x',y') respects the limitations discussed in chapter 3.3, as well as the adjusted field type probabilities presented in chapter 3.4. Additionally - as long as at least two different field types are allowed at that position - a field type different from the current type is returned.
#### <span id="page-61-0"></span>**Algorithm 2**: Crossover
```
Input: Parents parent1, parent2
1 (gx, gy) ← central point with respect to only non-cut-out fields
2 β ← random angle from [0, 2π)
3 foreach field (i, j) do
if (sin β, cos β)(i gx, j gy) 4
T ≤ 0 then
5 result[i, j] ← parent1[i, j]
6 else
7 result[i, j] ← parent2[i, j]
8 end
9 end
10 return result
```
#### <span id="page-62-0"></span>**Algorithm 3**: Basic Hillclimber
**Input**: Initial Mask mask, Break Condition limit
```
1 evaluate(mask)
2 noChange ← 0
3 repeat
4 copy ← mutate(mask)
5 evaluate(copy)
6 if copy.rating < mask.rating then
7 mask ← copy
8 noChange ← 0
9 else
10 noChange ← noChange + 1
11 end
12 until noChange ≥ limit
13 return mask
```
### <span id="page-63-0"></span>**Algorithm 4**: Basic Genetic Algorithm
```
Input: Parameter n, α, c
1 for i ← 1 to n do
2 population[i] ← getRandomM ask()
3 evaluate(population[i])
4 end
5 sort population by descending rating
6 repeat
7 newP op[1] ← population[1]
8 for i ← 2 to n do
9 if i ≤ cn then
10 newP op[i] ← cross(select(population, α), select(population, α))
11 else
12 newP op[i] ← mutate(select(population, α))
13 end
14 evaluate(newP op[i])
15 end
16 sort population by descending rating
17 until break condition is satisfied
18 return population[1]
19 Procedure: select(population, α)
20 result ← population.selectRandom()
21 for i ← 2 to α do
22 m ← population.selectRandom()
23 if m.rating > result.rating then
24 result ← m
25 end
26 end
27 return result
28 end
```
#### **Algorithm 5**: Memetic Approach
```
Input: Parameter n,b_w,b_s,\delta
1 /* Step 0:
Initialization */
2 for i \leftarrow 1 to n do
population[i] \leftarrow hillclimbe(getRandomMask(), b_s)
4 end
5 repeat
/* Step 1:
Crossover(s) */
forall \{i, j\} in \binom{[n]}{2} do
7
m \leftarrow cross(population[i], population[j])
8
\mathbf{for}\ k \leftarrow 2\ \mathbf{to}\ 50\ \mathbf{do}
try \leftarrow cross(population[i], population[j])
10
if try.potentialRating > m.potentialRating then m \leftarrow try
11
end
12
newPop.add(m)
13
end
14
/* Step 2: First Selection by potential Rating */
15
remove all but best 2n (with respect to potential Rating) masks from newPop
16
/* Step 3: First Hillclimber */
17
for i \leftarrow 1 to 2n do newPop[i] \leftarrow hillclimbe(newPop[i], b_w)
18
/* Step 4: Delete masks that are too similar */
19
20
sort newPop (by descending actual rating of masks)
newPop[*].markedForDeletion \leftarrow false
21
for i \leftarrow 1 to 2n do
22
if newPop[i].markedForDeletion then continue
23
for j \leftarrow i + 1 to 2n do
24
if newPop[i] and newPop[j] are too similar then
25
newPop[j].markedForDeletion \leftarrow true
26
end
27
end
28
end
29
/* Step 5: Second Selection by actual Rating */
30
population \leftarrow all masks from newPop not marked for deletion
31
if population.Count \geq n then keep only best n masks in population
32
else n \leftarrow population.Count
33
/* Yes, the population size is actually reduced, if
34
there are not enough different masks left */
35
Second Hillclimber */
/* Step 6:
36
for i \leftarrow 1 to n do population[i] \leftarrow hillclimbe(population[i], b_s)
37
38 until break Condition is satisfied
39 return best mask in population
```
# <span id="page-66-0"></span>**B. Sample Masks**
![](_page_67_Figure_1.jpeg)
**Figure B.1.:** Manually created original mask (total penalty 10,458)
![](_page_67_Figure_3.jpeg)
**Figure B.2.:** Automatically generated mask (total penalty 7,996)
| Coverage: | Perfectly (5) | | | | Once, enclosed (2) | | | | Once, not enclosed (3) | | | | | |
|-------------------|---------------|----|----|----|--------------------|---|---|---|------------------------|----|----|----|----|--|
| Manually created | 137 | | | 42 | | | | | 0 | | | | | |
| Generated mask | 143 | | | 39 | | | | | 0 | | | | | |
| | | | | | | | | | | | | | | |
| Word lengths: | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | |
| Manually created: | 0 | 17 | 14 | 13 | 10 | 6 | 2 | 0 | 0 | 0 | 1 | 0 | 1 | |
| Generated mask: | 0 | 6 | 12 | 17 | 15 | 6 | 4 | 0 | 1 | 0 | 0 | 0 | 0 | |
**Table B.1.:** Some basic statistics for the above masks
![](_page_68_Figure_0.jpeg)
**Figure B.3.:** Manually created original mask (total penalty 37,322)
![](_page_68_Figure_2.jpeg)
**Figure B.4.:** Automatically generated mask (total penalty 26,675)
| Coverage: | Perfectly (5) | | | | Once, enclosed (2) | | | | | Once, not enclosed (3) | | | | | |
|-------------------|---------------|----|----|----|--------------------|----|----|---|----|------------------------|----|----|----|--|--|
| Manually created: | 354 | | | | 102 | | | | | 0 | | | | | |
| Generated mask: | 348 | | | | 114 | | | | | 0 | | | | | |
| | | | | | | | | | | | | | | | |
| Word lengths: | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | | |
| Manually created: | 0 | 51 | 42 | 26 | 21 | 15 | 10 | 3 | 1 | 1 | 0 | 0 | 0 | | |
Generated mask: 0 34 36 43 27 10 9 5 0 0 0 0 0 **Table B.2.:** Some basic statistics for the above masks
![](_page_69_Figure_1.jpeg)
**Figure B.5.:** Manually created original mask (total penalty 29,824)
![](_page_69_Figure_3.jpeg)
**Figure B.6.:** Automatically generated mask (total penalty 21,833)
| Coverage: | Perfectly (5) | | | | Once, enclosed (2) | | | | Once, not enclosed (3) | | | | | | |
|-------------------|---------------|----|----|----|--------------------|----|---|----|------------------------|----|----|----|----|--|--|
| Manually created: | 330 | | | 93 | | | | | 0 | | | | | | |
| Generated mask: | 326 | | | | 101 | | | | | 0 | | | | | |
| | | | | | | | | | | | | | | | |
| Word lengths: | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | | |
| Manually created: | 3 | 20 | 64 | 16 | 22 | 8 | 5 | 11 | 0 | 0 | 2 | 0 | 0 | | |
| Generated mask: | 3 | 19 | 26 | 47 | 27 | 14 | 9 | 1 | 1 | 0 | 0 | 0 | 0 | | |
**Table B.3.:** Some basic statistics for the above masks
![](_page_70_Figure_0.jpeg)
**Figure B.7.:** Manually created original mask (total penalty 47,819)
![](_page_70_Figure_2.jpeg)
**Figure B.8.:** Automatically generated mask (total penalty 44,657)
| Coverage: | Perfectly (5) | Once, enclosed (2) | Once, not enclosed (3) |
|-------------------|---------------|--------------------|------------------------|
| Manually created: | 672 | 203 | 0 |
| Generated mask: | 699 | 172 | 0 |
| Word lengths: | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 |
|-------------------|---|----|----|----|----|----|----|----|----|----|----|----|----|
| Manually created: | 0 | 26 | 74 | 70 | 45 | 31 | 27 | 10 | 3 | 0 | 0 | 0 | 0 |
| Generated mask: | 0 | 52 | 36 | 68 | 63 | 43 | 12 | 8 | 5 | 3 | 0 | 0 | 0 |
**Table B.4.:** Some basic statistics for the above masks
![](_page_71_Figure_1.jpeg)
**Figure B.9.:** And this is what happens when large clusters are rewarded instead of penalized (i.e. the sign gets switched accidentally).
# **Bibliography**
- [1] Michael Vose Alden, Michael D. Vose, Alden H. Wright, and Jonathan E. Rowe. Implicit parallelism. In *In GECCO (2003*, pages 15051517, 2003.
- [2] David E. Goldberg. *Genetic Algorithms in Search, Optimization, and Machine Learning*. Addison Wesley, 1989.
- [3] John H. Holland. *Adaptation in Natural and Artificial Systems*. The MIT Press, 1992. originally published in 1975.
- [4] R. Kruse I. Gerdes, F. Klawonn. *Evolution¨are Algorithmen*. Vieweg Verlag, 2004.
- [5] P. Moscato. On evolution, search, optimization, genetic algorithms and martial arts: Towards memetic algorithms. Technical report, Caltech Concurrent Computation Program 158-79, 1989.
- [6] Oliver Ruepp. On the computational complexity of crossword puzzles theoretical and practical considerations, 2005.
- [7] J. David Schaffer and Amy Morishima. An adaptive crossover distribution mechanism for genetic algorithms. In *Proceedings of the Second International Conference on Genetic Algorithms on Genetic algorithms and their application*, pages 3640, Hillsdale, NJ, USA, 1987. L. Erlbaum Associates Inc.
- [8] Wikipedia. , the free encyclopedia, 2009.

Binary file not shown.

File diff suppressed because it is too large Load Diff

9
requirements-marker.txt Normal file
View File

@@ -0,0 +1,9 @@
# PyTorch with CUDA 12.4 support
--index-url https://download.pytorch.org/whl/cu124
torch
torchvision
torchaudio
# Transformers and marker
transformers
marker-pdf

2
run.sh Executable file
View File

@@ -0,0 +1,2 @@
#!/bin/bash
java -cp target SwedishGenerator "$@"

884
src/SwedishGenerator.java Normal file
View File

@@ -0,0 +1,884 @@
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.*;
/**
* SwedishGenerator.java
*
* Usage:
* javac SwedishGenerator.java
* java SwedishGenerator [--seed N] [--pop N] [--gens N] [--tries N] [--words word-list.txt]
*/
public class SwedishGenerator {
static final int W = 9, H = 8;
static final int MIN_LEN = 2, MAX_LEN = 8;
// Directions for '1'..'4'
static final int[][] DIRS = new int[5][2];
static {
DIRS[1] = new int[]{-1, 0}; // up
DIRS[2] = new int[]{0, 1}; // right
DIRS[3] = new int[]{1, 0}; // down
DIRS[4] = new int[]{0, -1}; // left
}
static boolean isDigit(char ch) { return ch >= '1' && ch <= '4'; }
static boolean isLetter(char ch) { return ch >= 'A' && ch <= 'Z'; }
static boolean isLetterCell(char ch) { return ch == '#' || isLetter(ch); }
// ---------------- CLI ----------------
static class Opts {
int seed = 1;
int pop = 18;
int gens = 100;
int tries = 50;
String wordsPath = "./word-list.txt";
}
static void usage() {
System.out.println("""
Usage:
java SwedishGenerator [--seed N] [--pop N] [--gens N] [--tries N] [--words word-list.txt]
Defaults:
--seed 1
--pop 18
--gens 100
--tries 50
--words ./word-list.txt
""");
}
static SwedishGenerator.Opts parseArgs(String[] argv) {
var out = new SwedishGenerator.Opts();
for (int i = 0; i < argv.length; i++) {
String a = argv[i];
String v = (i + 1 < argv.length) ? argv[i + 1] : null;
if (a.equals("--help") || a.equals("-h")) {
usage();
System.exit(0);
}
if (a.equals("--seed")) { out.seed = Integer.parseInt(v); i++; }
else if (a.equals("--pop")) { out.pop = Integer.parseInt(v); i++; }
else if (a.equals("--gens")) { out.gens = Integer.parseInt(v); i++; }
else if (a.equals("--tries")) { out.tries = Integer.parseInt(v); i++; }
else if (a.equals("--words")) { out.wordsPath = v; i++; }
else throw new IllegalArgumentException("Unknown arg: " + a);
}
return out;
}
// ---------------- RNG (xorshift32) ----------------
static final class Rng {
private int x;
Rng(int seed) {
int s = seed;
if (s == 0) s = 1;
this.x = s;
}
int nextU32() {
int y = x;
y ^= (y << 13);
y ^= (y >>> 17);
y ^= (y << 5);
x = y;
return y;
}
int randint(int min, int max) { // inclusive
int r = nextU32();
long u = (r & 0xFFFFFFFFL);
long range = (long) max - (long) min + 1L;
return (int) (min + (u % range));
}
double nextFloat() {
long u = nextU32() & 0xFFFFFFFFL;
return u / 4294967295.0; // 0xFFFFFFFF
}
}
static int clamp(int x, int a, int b) { return Math.max(a, Math.min(b, x)); }
// ---------------- Grid helpers ----------------
static char[][] makeEmptyGrid() {
char[][] g = new char[H][W];
for (int r = 0; r < H; r++) Arrays.fill(g[r], '#');
return g;
}
static char[][] deepCopyGrid(char[][] g) {
char[][] out = new char[H][W];
for (int r = 0; r < H; r++) out[r] = Arrays.copyOf(g[r], W);
return out;
}
static String gridToString(char[][] g) {
StringBuilder sb = new StringBuilder();
for (int r = 0; r < H; r++) {
if (r > 0) sb.append('\n');
sb.append(g[r]);
}
return sb.toString();
}
static String renderHuman(char[][] g) {
StringBuilder sb = new StringBuilder();
for (int r = 0; r < H; r++) {
if (r > 0) sb.append('\n');
for (int c = 0; c < W; c++) {
char ch = g[r][c];
sb.append(isDigit(ch) ? ' ' : ch);
}
}
return sb.toString();
}
// ---------------- Words / index ----------------
static final class IntList {
int[] a = new int[8];
int n = 0;
void add(int v) {
if (n >= a.length) a = Arrays.copyOf(a, a.length * 2);
a[n++] = v;
}
int size() { return n; }
int[] data() { return a; } // note: may have extra capacity
}
static final class DictEntry {
final ArrayList<String> words = new ArrayList<>();
final IntList[][] pos; // pos[i][letter] -> indices (sorted by insertion)
DictEntry(int L) {
pos = new IntList[L][26];
for (int i = 0; i < L; i++) {
for (int j = 0; j < 26; j++) pos[i][j] = new IntList();
}
}
}
static final class Dict {
final ArrayList<String> words;
final HashMap<Integer, DictEntry> index; // len -> DictEntry
final HashMap<Integer, Integer> lenCounts; // len -> count
Dict(ArrayList<String> words, HashMap<Integer, DictEntry> index, HashMap<Integer, Integer> lenCounts) {
this.words = words;
this.index = index;
this.lenCounts = lenCounts;
}
}
static Dict loadWords(String wordsPath) {
String raw;
try {
raw = Files.readString(Path.of(wordsPath), StandardCharsets.UTF_8);
} catch (IOException e) {
raw = "EU\nUUR\nAUTO\nBOOM\nHUIS\nKAT\nZEE\nRODE\nDRAAD\nKENNIS\nNETWERK\nPAKTE\n";
}
ArrayList<String> words = new ArrayList<>();
for (String line : raw.split("\\R")) {
String s = line.trim().toUpperCase(Locale.ROOT);
if (s.matches("^[A-Z]{2,8}$")) words.add(s);
}
HashMap<Integer, DictEntry> index = new HashMap<>();
HashMap<Integer, Integer> lenCounts = new HashMap<>();
for (String w : words) {
int L = w.length();
lenCounts.put(L, lenCounts.getOrDefault(L, 0) + 1);
DictEntry entry = index.get(L);
if (entry == null) {
entry = new DictEntry(L);
index.put(L, entry);
}
int idx = entry.words.size();
entry.words.add(w);
for (int i = 0; i < L; i++) {
int letter = w.charAt(i) - 'A';
if (letter >= 0 && letter < 26) entry.pos[i][letter].add(idx);
}
}
return new Dict(words, index, lenCounts);
}
static int[] intersectSorted(int[] a, int aLen, int[] b, int bLen) {
int[] out = new int[Math.min(aLen, bLen)];
int i = 0, j = 0, k = 0;
while (i < aLen && j < bLen) {
int x = a[i], y = b[j];
if (x == y) { out[k++] = x; i++; j++; }
else if (x < y) i++;
else j++;
}
return Arrays.copyOf(out, k);
}
static final class CandidateInfo {
int[] indices; // null => unconstrained
int count;
}
static CandidateInfo candidateInfoForPattern(DictEntry entry, char[] pattern /* 0 means null */) {
ArrayList<IntList> lists = new ArrayList<>();
for (int i = 0; i < pattern.length; i++) {
char ch = pattern[i];
if (ch != 0 && isLetter(ch)) {
lists.add(entry.pos[i][ch - 'A']);
}
}
CandidateInfo ci = new CandidateInfo();
if (lists.isEmpty()) {
ci.indices = null;
ci.count = entry.words.size();
return ci;
}
lists.sort(Comparator.comparingInt(IntList::size));
IntList first = lists.get(0);
int[] cur = Arrays.copyOf(first.data(), first.size());
int curLen = cur.length;
for (int k = 1; k < lists.size(); k++) {
IntList nxt = lists.get(k);
int[] nextArr = nxt.data();
int nextLen = nxt.size();
cur = intersectSorted(cur, curLen, nextArr, nextLen);
curLen = cur.length;
if (curLen == 0) break;
}
ci.indices = cur;
ci.count = curLen;
return ci;
}
// ---------------- Slots ----------------
static final class Slot {
final int clueR, clueC;
final char dir; // '1'..'4'
final int[] rs, cs; // cells
final int len;
Slot(int clueR, int clueC, char dir, int[] rs, int[] cs) {
this.clueR = clueR; this.clueC = clueC; this.dir = dir;
this.rs = rs; this.cs = cs;
this.len = rs.length;
}
String key() { return clueR + "," + clueC + ":" + dir; }
}
static ArrayList<Slot> extractSlots(char[][] grid) {
ArrayList<Slot> slots = new ArrayList<>();
for (int r = 0; r < H; r++) {
for (int c = 0; c < W; c++) {
char d = grid[r][c];
if (!isDigit(d)) continue;
int di = d - '0';
int dr = DIRS[di][0], dc = DIRS[di][1];
int rr = r + dr, cc = c + dc;
if (rr < 0 || rr >= H || cc < 0 || cc >= W) continue;
if (!isLetterCell(grid[rr][cc])) continue;
int[] rs = new int[MAX_LEN + 1]; // allow MAX_LEN+1 like JS loop
int[] cs = new int[MAX_LEN + 1];
int n = 0;
while (rr >= 0 && rr < H && cc >= 0 && cc < W) {
char ch = grid[rr][cc];
if (!isLetterCell(ch)) break;
rs[n] = rr;
cs[n] = cc;
n++;
rr += dr;
cc += dc;
if (n > MAX_LEN) break; // allow n==MAX_LEN+1
}
slots.add(new Slot(r, c, d, Arrays.copyOf(rs, n), Arrays.copyOf(cs, n)));
}
}
return slots;
}
static boolean hasRoomForClue(char[][] grid, int r, int c, char d) {
int di = d - '0';
int dr = DIRS[di][0], dc = DIRS[di][1];
int rr = r + dr, cc = c + dc;
int run = 0;
while (rr >= 0 && rr < H && cc >= 0 && cc < W && isLetterCell(grid[rr][cc]) && run < MAX_LEN) {
run++;
rr += dr;
cc += dc;
}
return run >= MIN_LEN;
}
// ---------------- FAST mask fitness ----------------
static long maskFitness(char[][] grid, HashMap<Integer, Integer> lenCounts) {
long penalty = 0;
int clueCount = 0;
for (int r = 0; r < H; r++) for (int c = 0; c < W; c++) if (isDigit(grid[r][c])) clueCount++;
int targetClues = (int)Math.round(W * H * 0.25); // ~18
penalty += 8L * Math.abs(clueCount - targetClues);
ArrayList<Slot> slots = extractSlots(grid);
if (slots.isEmpty()) return 1_000_000_000L;
int[][] covH = new int[H][W];
int[][] covV = new int[H][W];
for (Slot s : slots) {
boolean horiz = (s.dir == '2' || s.dir == '4');
if (s.len < MIN_LEN) penalty += 8000;
if (s.len > MAX_LEN) penalty += 8000 + (long)(s.len - MAX_LEN) * 500L;
if (s.len >= MIN_LEN && s.len <= MAX_LEN) {
if (!lenCounts.containsKey(s.len)) penalty += 12000;
}
for (int i = 0; i < s.len; i++) {
int r = s.rs[i], c = s.cs[i];
if (horiz) covH[r][c] += 1;
else covV[r][c] += 1;
}
}
for (int r = 0; r < H; r++) for (int c = 0; c < W; c++) {
if (!isLetterCell(grid[r][c])) continue;
int h = covH[r][c], v = covV[r][c];
if (h == 0 && v == 0) penalty += 1500;
else if (h > 0 && v > 0) { /* ok */ }
else if (h + v == 1) penalty += 200;
else penalty += 600;
}
// clue clustering (8-connected)
boolean[][] seen = new boolean[H][W];
int[] stack = new int[W * H];
int sp;
int[][] nbrs8 = {
{-1,-1},{-1,0},{-1,1},
{0,-1}, {0,1},
{1,-1},{1,0},{1,1}
};
for (int r = 0; r < H; r++) for (int c = 0; c < W; c++) {
if (!isDigit(grid[r][c]) || seen[r][c]) continue;
sp = 0;
stack[sp++] = r * W + c;
seen[r][c] = true;
int size = 0;
while (sp > 0) {
int p = stack[--sp];
int x = p / W, y = p % W;
size++;
for (int[] d : nbrs8) {
int nx = x + d[0], ny = y + d[1];
if (nx < 0 || nx >= H || ny < 0 || ny >= W) continue;
if (seen[nx][ny]) continue;
if (!isDigit(grid[nx][ny])) continue;
seen[nx][ny] = true;
stack[sp++] = nx * W + ny;
}
}
if (size >= 2) penalty += (long)(size - 1) * 120L;
}
// dead-end-ish letter cell (3+ walls)
int[][] nbrs4 = {{-1,0},{1,0},{0,-1},{0,1}};
for (int r = 0; r < H; r++) for (int c = 0; c < W; c++) {
if (!isLetterCell(grid[r][c])) continue;
int walls = 0;
for (int[] d : nbrs4) {
int rr = r + d[0], cc = c + d[1];
if (rr < 0 || rr >= H || cc < 0 || cc >= W) { walls++; continue; }
if (!isLetterCell(grid[rr][cc])) walls++;
}
if (walls >= 3) penalty += 400;
}
return penalty;
}
// ---------------- Mask generation ----------------
static char[][] randomMask(Rng rng) {
char[][] g = makeEmptyGrid();
int targetClues = (int)Math.round(W * H * 0.25);
int placed = 0, guard = 0;
while (placed < targetClues && guard++ < 4000) {
int r = rng.randint(0, H - 1);
int c = rng.randint(0, W - 1);
if (isDigit(g[r][c])) continue;
char d = (char)('0' + rng.randint(1, 4));
g[r][c] = d;
if (!hasRoomForClue(g, r, c, d)) {
g[r][c] = '#';
continue;
}
placed++;
}
return g;
}
static char[][] mutate(Rng rng, char[][] grid) {
char[][] g = deepCopyGrid(grid);
int cx = rng.randint(0, H - 1);
int cy = rng.randint(0, W - 1);
int steps = 4;
for (int k = 0; k < steps; k++) {
int rr = clamp(cx + (rng.randint(-2, 2) + rng.randint(-2, 2)), 0, H - 1);
int cc = clamp(cy + (rng.randint(-2, 2) + rng.randint(-2, 2)), 0, W - 1);
char cur = g[rr][cc];
if (isDigit(cur)) {
g[rr][cc] = '#';
} else {
char d = (char)('0' + rng.randint(1, 4));
g[rr][cc] = d;
if (!hasRoomForClue(g, rr, cc, d)) g[rr][cc] = '#';
}
}
return g;
}
static char[][] crossover(Rng rng, char[][] a, char[][] b) {
char[][] out = makeEmptyGrid();
double cx = (H - 1) / 2.0;
double cy = (W - 1) / 2.0;
double theta = rng.nextFloat() * Math.PI;
double nx = Math.cos(theta);
double ny = Math.sin(theta);
for (int r = 0; r < H; r++) for (int c = 0; c < W; c++) {
double x = r - cx, y = c - cy;
double side = x * nx + y * ny;
out[r][c] = (side >= 0) ? a[r][c] : b[r][c];
}
for (int r = 0; r < H; r++) for (int c = 0; c < W; c++) {
char ch = out[r][c];
if (isDigit(ch) && !hasRoomForClue(out, r, c, ch)) out[r][c] = '#';
}
return out;
}
static char[][] hillclimb(Rng rng, char[][] start, HashMap<Integer, Integer> lenCounts, int limit) {
char[][] best = deepCopyGrid(start);
long bestF = maskFitness(best, lenCounts);
int fails = 0;
while (fails < limit) {
char[][] cand = mutate(rng, best);
long f = maskFitness(cand, lenCounts);
if (f < bestF) {
best = cand;
bestF = f;
fails = 0;
} else {
fails++;
}
}
return best;
}
static double similarity(char[][] a, char[][] b) {
int same = 0;
for (int r = 0; r < H; r++) for (int c = 0; c < W; c++) if (a[r][c] == b[r][c]) same++;
return same / (double)(W * H);
}
static char[][] generateMask(Rng rng, HashMap<Integer, Integer> lenCounts, int popSize, int gens) {
System.out.println("generateMask init pop: " + popSize);
ArrayList<char[][]> pop = new ArrayList<>();
for (int i = 0; i < popSize; i++) {
char[][] g = randomMask(rng);
pop.add(hillclimb(rng, g, lenCounts, 180));
}
for (int gen = 0; gen < gens; gen++) {
ArrayList<char[][]> children = new ArrayList<>();
int pairs = Math.max(popSize, (int)Math.floor(popSize * 1.5));
for (int k = 0; k < pairs; k++) {
char[][] p1 = pop.get(rng.randint(0, pop.size() - 1));
char[][] p2 = pop.get(rng.randint(0, pop.size() - 1));
char[][] child = crossover(rng, p1, p2);
children.add(hillclimb(rng, child, lenCounts, 70));
}
pop.addAll(children);
pop.sort(Comparator.comparingLong(g -> maskFitness(g, lenCounts)));
ArrayList<char[][]> next = new ArrayList<>();
for (char[][] cand : pop) {
if (next.size() >= popSize) break;
boolean ok = true;
for (char[][] kept : next) {
if (similarity(cand, kept) > 0.92) { ok = false; break; }
}
if (ok) next.add(cand);
}
pop = next;
if (gen % 10 == 0) {
long bestF = maskFitness(pop.get(0), lenCounts);
System.out.println(" gen " + gen + "/" + gens + " bestFitness=" + bestF);
}
}
pop.sort(Comparator.comparingLong(g -> maskFitness(g, lenCounts)));
return pop.get(0);
}
// ---------------- Fill (CSP) ----------------
static final class FillStats {
long nodes;
long backtracks;
double seconds;
int lastMRV;
}
static final class FillResult {
boolean ok;
char[][] grid;
HashMap<String, String> clueMap;
FillStats stats;
}
static final class Undo {
final int[] rs, cs;
final char[] prev;
final int n;
Undo(int[] rs, int[] cs, char[] prev, int n) {
this.rs = rs; this.cs = cs; this.prev = prev; this.n = n;
}
}
static char[] patternForSlot(char[][] grid, Slot s) {
char[] pat = new char[s.len];
for (int i = 0; i < s.len; i++) {
char ch = grid[s.rs[i]][s.cs[i]];
pat[i] = isLetter(ch) ? ch : 0;
}
return pat;
}
static int slotScore(int[][] cellCount, Slot s) {
int cross = 0;
for (int i = 0; i < s.len; i++) cross += (cellCount[s.rs[i]][s.cs[i]] - 1);
return cross * 10 + s.len;
}
static Undo placeWord(char[][] grid, Slot s, String w) {
int[] urs = new int[s.len];
int[] ucs = new int[s.len];
char[] up = new char[s.len];
int n = 0;
for (int i = 0; i < s.len; i++) {
int r = s.rs[i], c = s.cs[i];
char prev = grid[r][c];
char ch = w.charAt(i);
if (prev == '#') {
urs[n] = r; ucs[n] = c; up[n] = prev;
n++;
grid[r][c] = ch;
} else if (prev != ch) {
// rollback immediate changes
for (int j = 0; j < n; j++) grid[urs[j]][ucs[j]] = up[j];
return null;
}
}
return new Undo(urs, ucs, up, n);
}
static void undoPlace(char[][] grid, Undo u) {
for (int i = 0; i < u.n; i++) grid[u.rs[i]][u.cs[i]] = u.prev[i];
}
static FillResult fillMask(Rng rng, char[][] mask, HashMap<Integer, DictEntry> dictIndex,
int logEveryMs, int timeLimitMs) {
char[][] grid = deepCopyGrid(mask);
ArrayList<Slot> allSlots = extractSlots(grid);
ArrayList<Slot> slots = new ArrayList<>();
for (Slot s : allSlots) if (s.len >= MIN_LEN && s.len <= MAX_LEN) slots.add(s);
HashSet<String> used = new HashSet<>();
HashMap<String, String> assigned = new HashMap<>();
int[][] cellCount = new int[H][W];
for (Slot s : slots) for (int i = 0; i < s.len; i++) cellCount[s.rs[i]][s.cs[i]]++;
long t0 = System.currentTimeMillis();
final java.util.concurrent.atomic.AtomicLong lastLog = new java.util.concurrent.atomic.AtomicLong(t0);
FillStats stats = new FillStats();
final int TOTAL = slots.size();
final int BAR_LEN = 22;
Runnable renderProgress = () -> {
long now = System.currentTimeMillis();
if ((now - lastLog.get()) < logEveryMs) return;
lastLog.set(now);
int done = assigned.size();
int pct = (TOTAL == 0) ? 100 : (int)Math.floor((done / (double)TOTAL) * 100);
int filled = Math.min(BAR_LEN, (int)Math.floor((pct / 100.0) * BAR_LEN));
String bar = "[" + "#".repeat(filled) + "-".repeat(BAR_LEN - filled) + "]";
String elapsed = String.format(Locale.ROOT, "%.1fs", (now - t0) / 1000.0);
String msg = String.format(
Locale.ROOT,
"%s %d/%d slots | nodes=%d | backtracks=%d | mrv=%d | %s",
bar, done, TOTAL, stats.nodes, stats.backtracks, stats.lastMRV, elapsed
);
System.out.print("\r" + padRight(msg, 120));
System.out.flush();
};
class Pick {
Slot slot;
CandidateInfo info;
boolean done;
}
java.util.function.Supplier<Pick> chooseMRV = () -> {
Slot best = null;
CandidateInfo bestInfo = null;
for (Slot s : slots) {
String k = s.key();
if (assigned.containsKey(k)) continue;
DictEntry entry = dictIndex.get(s.len);
if (entry == null) {
Pick p = new Pick();
p.slot = null; p.info = null; p.done = false;
return p;
}
char[] pat = patternForSlot(grid, s);
CandidateInfo info = candidateInfoForPattern(entry, pat);
if (info.count == 0) {
Pick p = new Pick();
p.slot = null; p.info = null; p.done = false;
return p;
}
if (best == null
|| info.count < bestInfo.count
|| (info.count == bestInfo.count && slotScore(cellCount, s) > slotScore(cellCount, best))) {
best = s;
bestInfo = info;
if (info.count <= 1) break;
}
}
Pick p = new Pick();
if (best == null) {
p.slot = null;
p.info = null;
p.done = true;
} else {
p.slot = best;
p.info = bestInfo;
p.done = false;
}
return p;
};
final int MAX_TRIES_PER_SLOT = 500;
class Solver {
boolean backtrack() {
stats.nodes++;
if (timeLimitMs > 0 && (System.currentTimeMillis() - t0) > timeLimitMs) return false;
Pick pick = chooseMRV.get();
if (pick.done) return true;
if (pick.slot == null) { stats.backtracks++; return false; }
stats.lastMRV = pick.info.count;
renderProgress.run();
Slot s = pick.slot;
String k = s.key();
DictEntry entry = dictIndex.get(s.len);
char[] pat = patternForSlot(grid, s);
java.util.function.Function<String, Boolean> tryWord = (String w) -> {
if (w == null) return false;
if (used.contains(w)) return false;
for (int i = 0; i < pat.length; i++) {
if (pat[i] != 0 && pat[i] != w.charAt(i)) return false;
}
Undo undo = placeWord(grid, s, w);
if (undo == null) return false;
used.add(w);
assigned.put(k, w);
if (backtrack()) return true;
assigned.remove(k);
used.remove(w);
undoPlace(grid, undo);
return false;
};
if (pick.info.indices != null && pick.info.indices.length > 0) {
int[] idxs = pick.info.indices;
int L = idxs.length;
int tries = Math.min(MAX_TRIES_PER_SLOT, L);
int start = (L == 1) ? 0 : rng.randint(0, L - 1);
int step = (L <= 1) ? 1 : rng.randint(1, L - 1);
for (int t = 0; t < tries; t++) {
int idx = idxs[(start + t * step) % L];
String w = entry.words.get(idx);
if (tryWord.apply(w)) return true;
}
stats.backtracks++;
return false;
}
int N = entry.words.size();
if (N == 0) { stats.backtracks++; return false; }
int tries = Math.min(MAX_TRIES_PER_SLOT, N);
int start = (N == 1) ? 0 : rng.randint(0, N - 1);
int step = (N <= 1) ? 1 : rng.randint(1, N - 1);
for (int t = 0; t < tries; t++) {
int idx = (start + t * step) % N;
String w = entry.words.get(idx);
if (tryWord.apply(w)) return true;
}
stats.backtracks++;
return false;
}
}
// initial render (same feel)
renderProgress.run();
boolean ok = new Solver().backtrack();
// final progress line
System.out.print("\r" + padRight("", 120) + "\r");
System.out.flush();
FillResult res = new FillResult();
res.ok = ok;
res.grid = grid;
res.clueMap = assigned;
stats.seconds = (System.currentTimeMillis() - t0) / 1000.0;
res.stats = stats;
// print a final progress line
System.out.println(
String.format(Locale.ROOT,
"[######################] %d/%d slots | nodes=%d | backtracks=%d | mrv=%d | %.1fs",
assigned.size(), TOTAL, stats.nodes, stats.backtracks, stats.lastMRV, stats.seconds
)
);
return res;
}
static String padRight(String s, int n) {
if (s.length() >= n) return s;
return s + " ".repeat(n - s.length());
}
// ---------------- Top-level generatePuzzle ----------------
static final class PuzzleResult {
char[][] mask;
FillResult filled;
}
static SwedishGenerator.PuzzleResult generatePuzzle(SwedishGenerator.Opts opts) {
var rng = new Rng(opts.seed);
var tLoad0 = System.nanoTime();
var dict = loadWords(opts.wordsPath);
var tLoad1 = System.nanoTime();
System.out.printf(Locale.ROOT, "LOAD_WORDS: %.3fs%n", (tLoad1 - tLoad0) / 1e9);
for (int attempt = 1; attempt <= opts.tries; attempt++) {
System.out.println("\nAttempt " + attempt + "/" + opts.tries);
long tMask0 = System.nanoTime();
char[][] mask = generateMask(rng, dict.lenCounts, opts.pop, opts.gens);
long tMask1 = System.nanoTime();
System.out.printf(Locale.ROOT, "MASK: %.3fs%n", (tMask1 - tMask0) / 1e9);
long tFill0 = System.nanoTime();
var filled = fillMask(rng, mask, dict.index, 200, 30000);
long tFill1 = System.nanoTime();
System.out.printf(Locale.ROOT, "FILL: %.3fms%n", (tFill1 - tFill0) / 1e6);
if (filled.ok) {
var pr = new PuzzleResult();
pr.mask = mask;
pr.filled = filled;
return pr;
}
}
return null;
}
// ---------------- main ----------------
public static void main(String[] args) {
var opts = parseArgs(args);
var res = generatePuzzle(opts);
if (res == null) {
System.out.println("No solution found within tries.");
System.exit(1);
}
System.out.println("\n=== GENERATED MASK ===");
System.out.println(gridToString(res.mask));
System.out.println("\n=== FILLED PUZZLE (RAW) ===");
System.out.println(gridToString(res.filled.grid));
System.out.println("\n=== FILLED PUZZLE (HUMAN) ===");
System.out.println(renderHuman(res.filled.grid));
}
}

654
swedish_generator.js Normal file
View File

@@ -0,0 +1,654 @@
#!/usr/bin/env node
"use strict";
const fs = require("fs");
const W = 9, H = 8;
const MIN_LEN = 2, MAX_LEN = 8;
const DIRS = {
"1": [-1, 0], // up
"2": [0, 1], // right
"3": [1, 0], // down
"4": [0, -1], // left
};
const IS_DIGIT = (ch) => ch >= "1" && ch <= "4";
const IS_LETTER = (ch) => ch >= "A" && ch <= "Z";
const IS_LETTER_CELL = (ch) => ch === "#" || IS_LETTER(ch);
function usage() {
console.log(`Usage:
node swedish_generator.js [--seed N] [--pop N] [--gens N] [--tries N] [--words word-list.txt]
Defaults:
--seed 1
--pop 18
--gens 100
--tries 50
--words ./word-list.txt
`);
}
function parseArgs(argv) {
const out = {seed: 1, pop: 18, gens: 100, tries: 50, wordsPath: "./word-list.txt"};
for (let i = 2; i < argv.length; i++) {
const a = argv[i];
const v = argv[i + 1];
if (a === "--help" || a === "-h") {
usage();
process.exit(0);
}
if (a === "--seed") out.seed = parseInt(v, 10), i++;
else if (a === "--pop") out.pop = parseInt(v, 10), i++;
else if (a === "--gens") out.gens = parseInt(v, 10), i++;
else if (a === "--tries") out.tries = parseInt(v, 10), i++;
else if (a === "--words") out.wordsPath = v, i++;
else throw new Error(`Unknown arg: ${a}`);
}
return out;
}
/** Seeded RNG (xorshift32) */
function makeRng(seed) {
let x = (seed >>> 0) || 1;
return {
nextU32() {
x ^= x << 13;
x >>>= 0;
x ^= x >>> 17;
x >>>= 0;
x ^= x << 5;
x >>>= 0;
return x >>> 0;
},
int(min, max) {
const r = this.nextU32();
return min + (r % (max - min + 1));
},
float() {
return this.nextU32() / 0xFFFFFFFF;
},
};
}
function clamp(x, a, b) { return Math.max(a, Math.min(b, x)); }
function makeEmptyGrid() {
return Array.from({length: H}, () => Array.from({length: W}, () => "#"));
}
function deepCopyGrid(g) { return g.map(r => r.slice()); }
function gridToString(g) { return g.map(r => r.join("")).join("\n"); }
function renderHuman(g) {
return g.map(row => row.map(ch => IS_DIGIT(ch) ? " " : ch).join("")).join("\n");
}
/** --- Words / index --- */
function loadWords(wordsPath) {
let raw = "";
try {
raw = fs.readFileSync(wordsPath, "utf8");
} catch {
raw = "EU\nUUR\nAUTO\nBOOM\nHUIS\nKAT\nZEE\nRODE\nDRAAD\nKENNIS\nNETWERK\nPAKTE\n";
}
const words = raw
.split(/\r?\n/g)
.map(s => s.trim().toUpperCase())
.filter(s => /^[A-Z]{2,8}$/.test(s));
// index[len] = { words: string[], pos: Array(len) of [26 arrays of indices] }
const index = new Map();
const lenCounts = new Map();
for (const w of words) {
const L = w.length;
lenCounts.set(L, (lenCounts.get(L) || 0) + 1);
if (!index.has(L)) {
const pos = Array.from({length: L}, () =>
Array.from({length: 26}, () => [])
);
index.set(L, {words: [], pos});
}
const entry = index.get(L);
const idx = entry.words.length;
entry.words.push(w);
for (let i = 0; i < L; i++) {
entry.pos[i][w.charCodeAt(i) - 65].push(idx);
}
}
return {words, index, lenCounts};
}
function intersectSorted(a, b) {
const out = [];
let i = 0, j = 0;
while (i < a.length && j < b.length) {
const x = a[i], y = b[j];
if (x === y) {
out.push(x);
i++;
j++;
} else if (x < y) i++;
else j++;
}
return out;
}
/** returns {indices?: number[], count: number} WITHOUT allocating huge arrays */
function candidateInfoForPattern(entry, pattern /* array char|null */) {
const lists = [];
for (let i = 0; i < pattern.length; i++) {
const ch = pattern[i];
if (ch && IS_LETTER(ch)) {
lists.push(entry.pos[i][ch.charCodeAt(0) - 65]);
}
}
if (lists.length === 0) {
return {indices: null, count: entry.words.length}; // unconstrained
}
lists.sort((a, b) => a.length - b.length);
let cur = lists[0];
for (let k = 1; k < lists.length; k++) {
cur = intersectSorted(cur, lists[k]);
if (cur.length === 0) break;
}
return {indices: cur, count: cur.length};
}
/** --- Slots --- */
function extractSlots(grid) {
const slots = [];
for (let r = 0; r < H; r++) {
for (let c = 0; c < W; c++) {
const d = grid[r][c];
if (!IS_DIGIT(d)) continue;
const [dr, dc] = DIRS[d];
let rr = r + dr, cc = c + dc;
if (rr < 0 || rr >= H || cc < 0 || cc >= W) continue;
if (!IS_LETTER_CELL(grid[rr][cc])) continue;
const cells = [];
while (rr >= 0 && rr < H && cc >= 0 && cc < W) {
const ch = grid[rr][cc];
if (!IS_LETTER_CELL(ch)) break;
cells.push([rr, cc]);
rr += dr;
cc += dc;
if (cells.length > MAX_LEN) break;
}
slots.push({clue: [r, c, d], dir: d, cells, len: cells.length});
}
}
return slots;
}
function hasRoomForClue(grid, r, c, d) {
const [dr, dc] = DIRS[d];
let rr = r + dr, cc = c + dc;
let run = 0;
while (rr >= 0 && rr < H && cc >= 0 && cc < W && IS_LETTER_CELL(grid[rr][cc]) && run < MAX_LEN) {
run++;
rr += dr;
cc += dc;
}
return run >= MIN_LEN;
}
/** --- FAST mask fitness (structural only) --- */
function maskFitness(grid, lenCounts) {
let penalty = 0;
// clue density (avoid all digits)
let clueCount = 0;
for (let r = 0; r < H; r++) for (let c = 0; c < W; c++) {
if (IS_DIGIT(grid[r][c])) clueCount++;
}
const targetClues = Math.round(W * H * 0.25); // ~18
penalty += 8 * Math.abs(clueCount - targetClues);
const slots = extractSlots(grid);
if (slots.length === 0) return 1e9;
// coverage counts per letter cell: horiz vs vert
const covH = Array.from({length: H}, () => Array(W).fill(0));
const covV = Array.from({length: H}, () => Array(W).fill(0));
for (const s of slots) {
const horiz = (s.dir === "2" || s.dir === "4");
if (s.len < MIN_LEN) penalty += 8000;
if (s.len > MAX_LEN) penalty += 8000 + (s.len - MAX_LEN) * 500;
// dictionary availability only (cheap)
if (s.len >= MIN_LEN && s.len <= MAX_LEN) {
if (!lenCounts.get(s.len)) penalty += 12000;
}
for (const [r, c] of s.cells) {
if (horiz) covH[r][c] += 1;
else covV[r][c] += 1;
}
}
// coverage penalties per letter cell
for (let r = 0; r < H; r++) for (let c = 0; c < W; c++) {
if (!IS_LETTER_CELL(grid[r][c])) continue;
const h = covH[r][c], v = covV[r][c];
if (h === 0 && v === 0) penalty += 1500;
else if (h > 0 && v > 0) penalty += 0;
else if (h + v === 1) penalty += 200;
else penalty += 600;
}
// clue clustering (8-connected)
const seen = Array.from({length: H}, () => Array(W).fill(false));
const nbrs8 = [[-1, -1], [-1, 0], [-1, 1], [0, -1], [0, 1], [1, -1], [1, 0], [1, 1]];
for (let r = 0; r < H; r++) for (let c = 0; c < W; c++) {
if (!IS_DIGIT(grid[r][c]) || seen[r][c]) continue;
const stack = [[r, c]];
seen[r][c] = true;
let size = 0;
while (stack.length) {
const [x, y] = stack.pop();
size++;
for (const [dr, dc] of nbrs8) {
const nx = x + dr, ny = y + dc;
if (nx < 0 || nx >= H || ny < 0 || ny >= W) continue;
if (seen[nx][ny]) continue;
if (!IS_DIGIT(grid[nx][ny])) continue;
seen[nx][ny] = true;
stack.push([nx, ny]);
}
}
if (size >= 2) penalty += (size - 1) * 120;
}
// dead-end-ish letter cell (3+ walls)
const nbrs4 = [[-1, 0], [1, 0], [0, -1], [0, 1]];
for (let r = 0; r < H; r++) for (let c = 0; c < W; c++) {
if (!IS_LETTER_CELL(grid[r][c])) continue;
let walls = 0;
for (const [dr, dc] of nbrs4) {
const rr = r + dr, cc = c + dc;
if (rr < 0 || rr >= H || cc < 0 || cc >= W) {
walls++;
continue;
}
if (!IS_LETTER_CELL(grid[rr][cc])) walls++;
}
if (walls >= 3) penalty += 400;
}
return penalty;
}
/** --- Mask generation (memetic-ish + hillclimb) --- */
function randomMask(rng) {
const g = makeEmptyGrid();
const targetClues = Math.round(W * H * 0.25); // ~18
let placed = 0, guard = 0;
while (placed < targetClues && guard++ < 4000) {
const r = rng.int(0, H - 1);
const c = rng.int(0, W - 1);
if (IS_DIGIT(g[r][c])) continue;
const d = String(rng.int(1, 4));
g[r][c] = d;
if (!hasRoomForClue(g, r, c, d)) {
g[r][c] = "#";
continue;
}
placed++;
}
return g;
}
function mutate(rng, grid) {
const g = deepCopyGrid(grid);
const cx = rng.int(0, H - 1);
const cy = rng.int(0, W - 1);
const steps = 4;
for (let k = 0; k < steps; k++) {
const rr = clamp(cx + (rng.int(-2, 2) + rng.int(-2, 2)), 0, H - 1);
const cc = clamp(cy + (rng.int(-2, 2) + rng.int(-2, 2)), 0, W - 1);
const cur = g[rr][cc];
if (IS_DIGIT(cur)) {
g[rr][cc] = "#";
} else {
const d = String(rng.int(1, 4));
g[rr][cc] = d;
if (!hasRoomForClue(g, rr, cc, d)) g[rr][cc] = "#";
}
}
return g;
}
function crossover(rng, a, b) {
const out = makeEmptyGrid();
const cx = (H - 1) / 2;
const cy = (W - 1) / 2;
const theta = rng.float() * Math.PI;
const nx = Math.cos(theta);
const ny = Math.sin(theta);
for (let r = 0; r < H; r++) for (let c = 0; c < W; c++) {
const x = r - cx, y = c - cy;
const side = x * nx + y * ny;
out[r][c] = (side >= 0) ? a[r][c] : b[r][c];
}
// cleanup invalid clues
for (let r = 0; r < H; r++) for (let c = 0; c < W; c++) {
const ch = out[r][c];
if (IS_DIGIT(ch) && !hasRoomForClue(out, r, c, ch)) out[r][c] = "#";
}
return out;
}
function hillclimb(rng, start, lenCounts, limit) {
let best = deepCopyGrid(start);
let bestF = maskFitness(best, lenCounts);
let fails = 0;
while (fails < limit) {
const cand = mutate(rng, best);
const f = maskFitness(cand, lenCounts);
if (f < bestF) {
best = cand;
bestF = f;
fails = 0;
} else {
fails++;
}
}
return best;
}
function similarity(a, b) {
let same = 0;
for (let r = 0; r < H; r++) for (let c = 0; c < W; c++) {
if (a[r][c] === b[r][c]) same++;
}
return same / (W * H);
}
function generateMask(rng, lenCounts, popSize, gens) {
console.log(`generateMask init pop: ${popSize}`);
let pop = [];
for (let i = 0; i < popSize; i++) {
const g = randomMask(rng);
pop.push(hillclimb(rng, g, lenCounts, 180)); // faster init
}
for (let gen = 0; gen < gens; gen++) {
const children = [];
const pairs = Math.max(popSize, Math.floor(popSize * 1.5));
for (let k = 0; k < pairs; k++) {
const p1 = pop[rng.int(0, pop.length - 1)];
const p2 = pop[rng.int(0, pop.length - 1)];
const child = crossover(rng, p1, p2);
children.push(hillclimb(rng, child, lenCounts, 70)); // light repair
}
pop = pop.concat(children);
pop.sort((x, y) => maskFitness(x, lenCounts) - maskFitness(y, lenCounts));
// similarity cull
const next = [];
for (const cand of pop) {
if (next.length >= popSize) break;
let ok = true;
for (const kept of next) {
if (similarity(cand, kept) > 0.92) {
ok = false;
break;
}
}
if (ok) next.push(cand);
}
pop = next;
if ((gen % 10) === 0) {
const bestF = maskFitness(pop[0], lenCounts);
console.log(` gen ${gen}/${gens} bestFitness=${bestF}`);
}
}
pop.sort((x, y) => maskFitness(x, lenCounts) - maskFitness(y, lenCounts));
return pop[0];
}
/** --- Fill (CSP) with NO huge candidate arrays --- */
function fillMask(rng, mask, dictIndex, opts = {}) {
const grid = deepCopyGrid(mask);
const slots = extractSlots(grid).filter(s => s.len >= MIN_LEN && s.len <= MAX_LEN);
const used = new Set();
const assigned = new Map();
// progress options
const logEveryMs = opts.logEveryMs ?? 250;
const timeLimitMs = opts.timeLimitMs ?? 0; // 0 = no limit
// crossing weight precompute
const cellCount = Array.from({length: H}, () => Array(W).fill(0));
for (const s of slots) for (const [r, c] of s.cells) cellCount[r][c]++;
function slotKey(s) { return `${s.clue[0]},${s.clue[1]}:${s.clue[2]}`; }
function patternForSlot(s) {
return s.cells.map(([r, c]) => {
const ch = grid[r][c];
return IS_LETTER(ch) ? ch : null;
});
}
function slotScore(s) {
let cross = 0;
for (const [r, c] of s.cells) cross += (cellCount[r][c] - 1);
return cross * 10 + s.len;
}
function placeWord(s, w) {
const undo = [];
for (let i = 0; i < s.cells.length; i++) {
const [r, c] = s.cells[i];
const prev = grid[r][c];
const ch = w[i];
if (prev === "#") {
undo.push([r, c, prev]);
grid[r][c] = ch;
} else if (prev !== ch) {
return null;
}
}
return undo;
}
function undoPlace(undo) { for (const [r, c, prev] of undo) grid[r][c] = prev; }
// ---- progress bar ----
const t0 = Date.now();
let lastLog = t0;
let nodes = 0;
let backtracks = 0;
let lastMRV = 0;
function renderProgress(final = false) {
const now = Date.now();
if (!final && (now - lastLog) < logEveryMs) return;
lastLog = now;
const done = assigned.size;
const total = slots.length;
const pct = total ? Math.floor((done / total) * 100) : 100;
const barLen = 22;
const filled = Math.min(barLen, Math.floor((pct / 100) * barLen));
const bar = `[${"#".repeat(filled)}${"-".repeat(barLen - filled)}]`;
const elapsed = ((now - t0) / 1000).toFixed(1);
const msg =
`${bar} ${done}/${total} slots | nodes=${nodes} | backtracks=${backtracks} | mrv=${lastMRV} | ${elapsed}s`;
process.stdout.write("\r" + msg.padEnd(120));
if (final) process.stdout.write("\n");
}
function chooseMRV() {
let best = null;
let bestInfo = null;
for (const s of slots) {
const k = slotKey(s);
if (assigned.has(k)) continue;
const entry = dictIndex.get(s.len);
if (!entry) return {slot: null, info: null};
const pat = patternForSlot(s);
const info = candidateInfoForPattern(entry, pat);
if (info.count === 0) return {slot: null, info: null};
if (
!best ||
info.count < bestInfo.count ||
(info.count === bestInfo.count && slotScore(s) > slotScore(best))
) {
best = s;
bestInfo = info;
if (info.count <= 1) break;
}
}
if (!best) return {slot: null, info: {done: true}};
return {slot: best, info: bestInfo};
}
const MAX_TRIES_PER_SLOT = 500;
function backtrack() {
nodes++;
if (timeLimitMs && (Date.now() - t0) > timeLimitMs) return false;
const pick = chooseMRV();
if (!pick.slot && pick.info && pick.info.done) return true;
if (!pick.slot) {
backtracks++;
return false;
}
lastMRV = pick.info.count;
renderProgress(false);
const s = pick.slot;
const k = slotKey(s);
const entry = dictIndex.get(s.len);
const pat = patternForSlot(s);
const tryWord = (w) => {
if (!w) return false;
if (used.has(w)) return false;
for (let i = 0; i < pat.length; i++) {
if (pat[i] && pat[i] !== w[i]) return false;
}
const undo = placeWord(s, w);
if (!undo) return false;
used.add(w);
assigned.set(k, w);
if (backtrack()) return true;
assigned.delete(k);
used.delete(w);
undoPlace(undo);
return false;
};
// constrained: iterate indices (bounded)
if (pick.info.indices && pick.info.indices.length) {
const idxs = pick.info.indices;
const L = idxs.length;
const tries = Math.min(MAX_TRIES_PER_SLOT, L);
// safe stepping even for L=1
const start = (L === 1) ? 0 : rng.int(0, L - 1);
const step = (L <= 1) ? 1 : rng.int(1, L - 1);
for (let t = 0; t < tries; t++) {
const idx = idxs[(start + t * step) % L];
const w = entry.words[idx];
if (tryWord(w)) return true;
}
backtracks++;
return false;
}
// unconstrained: sample without building arrays
const N = entry.words.length;
if (N === 0) {
backtracks++;
return false;
}
const tries = Math.min(MAX_TRIES_PER_SLOT, N);
const start = (N === 1) ? 0 : rng.int(0, N - 1);
const step = (N <= 1) ? 1 : rng.int(1, N - 1);
for (let t = 0; t < tries; t++) {
const idx = (start + t * step) % N;
const w = entry.words[idx];
if (tryWord(w)) return true;
}
backtracks++;
return false;
}
renderProgress(false);
const ok = backtrack();
renderProgress(true);
const clueMap = {};
for (const [k, v] of assigned.entries()) clueMap[k] = v;
return {ok, grid, clueMap, stats: {nodes, backtracks, seconds: (Date.now() - t0) / 1000}};
}
/** --- Top-level: try mask+fill until success --- */
function generatePuzzle(opts) {
const rng = makeRng(opts.seed);
console.time("LOAD_WORDS");
const dict = loadWords(opts.wordsPath);
console.timeEnd("LOAD_WORDS");
for (let attempt = 1; attempt <= opts.tries; attempt++) {
console.log(`\nAttempt ${attempt}/${opts.tries}`);
console.time("MASK");
const mask = generateMask(rng, dict.lenCounts, opts.pop, opts.gens);
console.timeEnd("MASK");
console.time("FILL");
const filled = fillMask(rng, mask, dict.index, {logEveryMs: 200, timeLimitMs: 30000});
console.timeEnd("FILL");
if (filled.ok) return {mask, filled};
}
return null;
}
module.exports = {parseArgs, generatePuzzle, gridToString};

View File

@@ -0,0 +1,16 @@
FROM python:3.13-slim
RUN apt-get update \
&& apt-get install -y --no-install-recommends ca-certificates tzdata curl \
&& rm -rf /var/lib/apt/lists/*
# supercronic
RUN curl -fsSL -o /usr/local/bin/supercronic \
https://github.com/aptible/supercronic/releases/download/v0.2.30/supercronic-linux-amd64 \
&& chmod +x /usr/local/bin/supercronic
WORKDIR /app
COPY tools/puzzle-gen/generate_daily_puzzles.py /app/generate_daily_puzzles.py
COPY tools/puzzle-gen/crontab /app/crontab
CMD ["/usr/local/bin/supercronic", "/app/crontab"]

1
tools/puzzle-gen/crontab Normal file
View File

@@ -0,0 +1 @@
15 3 * * * python /app/generate_daily_puzzles.py

View File

@@ -0,0 +1,399 @@
#!/usr/bin/env python3
import datetime as dt
import json
import os
import random
import re
import urllib.request
import xml.etree.ElementTree as ET
import json, re
# --- USER-FRIENDLY CONFIG ---
# Max 7 letters for shorter, more common words
WORD_RE = re.compile(r"^[A-Z]{3,7}$")
EMPTY = " "
# Slightly smaller grid for denser puzzles
SIZE = 10
# More words needed since they're shorter
TARGET_WORDS = 15
MIN_ACCEPT_WORDS = 10
FEEDS = [
"https://feeds.nos.nl/nosnieuwsalgemeen",
"https://feeds.nos.nl/nosnieuwstech",
]
def env(name, default=None):
v = os.getenv(name)
return default if v is None or v == "" else v
def http_get(url, timeout=15):
req = urllib.request.Request(url, headers={"User-Agent": "puzzle-gen/1.0"})
with urllib.request.urlopen(req, timeout=timeout) as r:
return r.read()
def http_post_json(url, payload, timeout=45):
data = json.dumps(payload).encode("utf-8")
req = urllib.request.Request(
url,
data=data,
headers={
"Content-Type": "application/json",
"Authorization": "Bearer lm-studio",
"User-Agent": "puzzle-gen/1.0",
},
method="POST",
)
with urllib.request.urlopen(req, timeout=timeout) as r:
return json.loads(r.read().decode("utf-8"))
def fetch_rss_items(url, limit=12):
raw = http_get(url)
root = ET.fromstring(raw)
channel = root.find("channel") if root.tag.lower().endswith("rss") else root
items = []
for it in channel.findall("item"):
title = (it.findtext("title") or "").strip()
desc = (it.findtext("description") or "").strip()
if title:
items.append((title, desc))
if len(items) >= limit:
break
return items
def safe_slug(s, maxlen=50):
s = s.lower()
s = re.sub(r"[^a-z0-9]+", "-", s).strip("-")
return (s[:maxlen] or "news")
def extract_first_json(text: str):
"""Parse first JSON value (object OR array) from any text."""
if not text:
return None
starts = [i for i in (text.find("{"), text.find("[")) if i != -1]
if not starts:
return None
i = min(starts)
try:
return json.JSONDecoder().raw_decode(text[i:])[0]
except json.JSONDecodeError:
return None
def normalize_word(raw: str) -> str:
# A-Z only, remove hyphens/digits/spaces/etc.
w = re.sub(r"[^A-Za-z]", "", (raw or "")).upper()
return w
def sanitize_wordcluemap(obj):
"""
Accepts:
- dict: {"WORD":"clue", ...}
- list: [{"word":"...","clue":"..."}, {"WOORD":"...","clue":"..."}, ...]
Returns dict with keys A-Z 3..7 and non-empty clue.
"""
out = {}
if isinstance(obj, dict):
items = list(obj.items())
elif isinstance(obj, list):
items = []
for it in obj:
if not isinstance(it, dict):
continue
raw_word = it.get("word") or it.get("WOORD") or it.get("Word")
clue = it.get("clue") or it.get("CLUE") or it.get("hint") or it.get("HINT")
items.append((raw_word, clue))
else:
return out
for raw_word, clue in items:
if not isinstance(raw_word, str) or not isinstance(clue, str):
continue
w = normalize_word(raw_word)
if not WORD_RE.fullmatch(w):
continue
clue = clue.strip()
if not clue:
continue
out[w] = clue
return out
# ---- generator (no-touch) ----
def make_grid():
return [[EMPTY for _ in range(SIZE)] for _ in range(SIZE)]
def in_bounds(g, r, c):
return 0 <= r < len(g) and 0 <= c < len(g[0])
def can_place_notouch(g, word, r, c, direction):
H, W = len(g), len(g[0])
if r < 0 or c < 0:
return False
if direction == "horizontal" and c + len(word) > W:
return False
if direction == "vertical" and r + len(word) > H:
return False
# no "glue" before/after
br = r if direction == "horizontal" else r - 1
bc = c - 1 if direction == "horizontal" else c
if in_bounds(g, br, bc) and g[br][bc] != EMPTY:
return False
ar = r if direction == "horizontal" else r + len(word)
ac = c + len(word) if direction == "horizontal" else c
if in_bounds(g, ar, ac) and g[ar][ac] != EMPTY:
return False
for i, ch in enumerate(word):
rr = r if direction == "horizontal" else r + i
cc = c + i if direction == "horizontal" else c
cell = g[rr][cc]
crossing = cell != EMPTY
if crossing and cell != ch:
return False
if not crossing:
if direction == "horizontal":
if in_bounds(g, rr - 1, cc) and g[rr - 1][cc] != EMPTY: return False
if in_bounds(g, rr + 1, cc) and g[rr + 1][cc] != EMPTY: return False
else:
if in_bounds(g, rr, cc - 1) and g[rr][cc - 1] != EMPTY: return False
if in_bounds(g, rr, cc + 1) and g[rr][cc + 1] != EMPTY: return False
return True
def place_word(g, word, r, c, direction):
for i, ch in enumerate(word):
rr = r if direction == "horizontal" else r + i
cc = c + i if direction == "horizontal" else c
g[rr][cc] = ch
def find_spots(g, word, placed):
spots = []
for p in placed:
pw = p["word"]
for i, pch in enumerate(pw):
pr = p["row"] if p["direction"] == "horizontal" else p["row"] + i
pc = p["col"] + i if p["direction"] == "horizontal" else p["col"]
for j, wch in enumerate(word):
if wch != pch:
continue
direction = "vertical" if p["direction"] == "horizontal" else "horizontal"
r = pr if direction == "horizontal" else pr - j
c = pc - j if direction == "horizontal" else pc
if can_place_notouch(g, word, r, c, direction):
spots.append((r, c, direction))
return spots
def generate_puzzle(wordcluemap, rnd):
words = sorted(wordcluemap.keys(), key=len, reverse=True)
g = make_grid()
placed = []
first = words[0]
sr = SIZE // 2
sc = (SIZE - len(first)) // 2
if not can_place_notouch(g, first, sr, sc, "horizontal"):
return None
place_word(g, first, sr, sc, "horizontal")
placed.append({"word": first, "clue": wordcluemap[first], "row": sr, "col": sc, "direction": "horizontal"})
for w in words[1:]:
spots = find_spots(g, w, placed)
rnd.shuffle(spots)
if not spots:
continue
r, c, d = spots[0]
place_word(g, w, r, c, d)
placed.append({"word": w, "clue": wordcluemap[w], "row": r, "col": c, "direction": d})
return {"grid": g, "placed": placed}
def export_format(puz, difficulty=1, rewards=None):
if rewards is None:
rewards = {"coins": 50, "stars": 2, "hints": 1}
g = puz["grid"]
placed = puz["placed"]
H, W = len(g), len(g[0])
cells = []
for p in placed:
for i in range(len(p["word"])):
r = p["row"] if p["direction"] == "horizontal" else p["row"] + i
c = p["col"] + i if p["direction"] == "horizontal" else p["col"]
cells.append((r, c))
# arrow cell: before the start
ar = p["row"] if p["direction"] == "horizontal" else p["row"] - 1
ac = p["col"] - 1 if p["direction"] == "horizontal" else p["col"]
cells.append((ar, ac))
minR = min(r for r, _ in cells) - 1
minC = min(c for _, c in cells) - 1
maxR = max(r for r, _ in cells) + 1
maxC = max(c for _, c in cells) + 1
def ch_at(r, c):
if r < 0 or c < 0 or r >= H or c >= W:
return "#"
ch = g[r][c]
return "#" if ch == EMPTY else ch
gridv2 = []
for r in range(minR, maxR + 1):
row = "".join(ch_at(r, c) for c in range(minC, maxC + 1))
gridv2.append(row)
words_out = []
for p in placed:
arrowRow = (p["row"] if p["direction"] == "horizontal" else p["row"] - 1) - minR
arrowCol = (p["col"] - 1 if p["direction"] == "horizontal" else p["col"]) - minC
words_out.append({
"word": p["word"],
"clue": p["clue"],
"startRow": p["row"] - minR,
"startCol": p["col"] - minC,
"direction": p["direction"],
"answer": p["word"],
"arrowRow": arrowRow,
"arrowCol": arrowCol,
})
return {"gridv2": gridv2, "words": words_out, "difficulty": difficulty, "rewards": rewards}
def list_models(base_url):
try:
data = json.loads(http_get(f"{base_url}/models").decode("utf-8"))
return [m.get("id") for m in data.get("data", []) if m.get("id")]
except Exception:
return []
def llm_make_wordcluemap(base_url, model, title, desc, n_words=12):
prompt = f"""
Geef ALLEEN een JSON object terug (geen array, geen markdown).
Formaat exact:
{{
"WOORD": "clue",
...
}}
REGELS:
- WOORD: alleen letters A-Z, geen streepjes/cijfers, lengte 3..7.
- Gebruik KORTE, GEBRUIKELIJKE Nederlandse woorden (geen jargon, geen moeilijke termen).
- Clue: korte, duidelijke hint in het Nederlands.
- Maak {n_words} items.
Thema: {title}
Context: {desc[:260]}
""".strip()
payload = {
"model": model,
"temperature": 0.7,
"messages": [
{"role": "system", "content": "Return STRICT JSON object only."},
{"role": "user", "content": prompt},
],
}
data = http_post_json(f"{base_url}/chat/completions", payload)
content = data["choices"][0]["message"]["content"]
obj = extract_first_json(content)
wc = sanitize_wordcluemap(obj)
# Aggressive repair for short words
if len(wc) < MIN_ACCEPT_WORDS:
repair = f"""
Zet dit om naar een STRICT JSON OBJECT (geen array) "WOORD":"clue".
KRITIEK:
- WOORD: A-Z only, lengte 3..7. GEEN lange woorden!
- Gebruik ALLEEN korte, bekende Nederlandse woorden bij twijfel.
- Vervang ongeldige/moeilijke woorden door veelvoorkomende synoniemen.
Input:
{content}
""".strip()
payload["messages"] = [
{"role": "system", "content": "Return STRICT JSON object only."},
{"role": "user", "content": repair},
]
data = http_post_json(f"{base_url}/chat/completions", payload)
content2 = data["choices"][0]["message"]["content"]
obj2 = extract_first_json(content2)
wc2 = sanitize_wordcluemap(obj2)
if len(wc2) > len(wc):
wc = wc2
return wc
def main():
base_url = env("LM_STUDIO_BASE_URL", "http://192.168.1.159:1234/v1")
out_dir = env("OUT_DIR", "/data/puzzles")
per_day = int(env("PUZZLES_PER_DAY", "3"))
today = dt.date.today().isoformat()
rnd = random.Random(today)
os.makedirs(out_dir, exist_ok=True)
items = []
for f in FEEDS:
try:
items.extend(fetch_rss_items(f))
except Exception:
pass
if not items:
raise SystemExit("No RSS items found")
models = list_models(base_url)
model = env("LM_MODEL", models[0] if models else "model-identifier")
made = 0
for idx in range(1, per_day + 1):
title, desc = rnd.choice(items)
slug = safe_slug(title)
wc = llm_make_wordcluemap(base_url, model, title, desc, n_words=TARGET_WORDS)
# Stricter validation: need more words since they're shorter
if len(wc) < MIN_ACCEPT_WORDS:
continue
puz = generate_puzzle(wc, rnd)
# Require at least 7 placed words for a decent puzzle
if not puz or len(puz["placed"]) < 7:
continue
exported = export_format(puz, difficulty=1, rewards={"coins": 50, "stars": 2, "hints": 1})
fn = f"crossword_{today}_{idx:02d}_{slug}.json"
path = os.path.join(out_dir, fn)
with open(path, "w", encoding="utf-8") as fp:
json.dump(exported, fp, ensure_ascii=False, indent=2)
made += 1
# index.json (handig voor je frontend)
files = sorted([f for f in os.listdir(out_dir) if f.startswith(f"crossword_{today}_") and f.endswith(".json")])
with open(os.path.join(out_dir, "index.json"), "w", encoding="utf-8") as fp:
json.dump({"date": today, "files": files}, fp, ensure_ascii=False, indent=2)
print(f"Generated {made} puzzles for {today}")
if __name__ == "__main__":
main()

37
vocab/README.md Normal file
View File

@@ -0,0 +1,37 @@
![GitHub last commit](https://img.shields.io/github/last-commit/opentaal/opentaal-wordlist)
![GitHub commit activity](https://img.shields.io/github/commit-activity/y/opentaal/opentaal-wordlist)
![GitHub Repo stars](https://img.shields.io/github/stars/opentaal/opentaal-wordlist)
![GitHub watchers](https://img.shields.io/github/watchers/opentaal/opentaal-wordlist)
![GitHub Sponsors](https://img.shields.io/github/sponsors/opentaal)
![Liberapay patrons](https://img.shields.io/liberapay/patrons/opentaal)
# Dutch Word List
Last updated: 2023-03-10
This repository contains the official OpenTaal Dutch word list, comprising over 400,000 words compiled from contributions and curated sources. The list is provided in UTF-8 encoding and is alphabetically sorted.
## Contents
### Primary File
- **`wordlist.txt`** Complete UTF-8 word list (one word per line).
### Metadata
- **`datetimeversion.txt`** Timestamp and version information.
### Component Files
- **`elements/basiswoorden-gekeurd.txt`** Approved base words (~200k entries).
- **`elements/basiswoorden-ongekeurd.txt`** Unapproved base words, including proper nouns and compounds (~41k entries).
- **`elements/flexies-ongekeurd.txt`** Unapproved inflections (~170k entries).
- **`elements/wordparts.tsv`** Word parts containing spaces (TSV format).
- **`elements/corrections.tsv`** Common misspellings with corrections (TSV format).
- **`elements/romeinse-cijfers.txt`** Roman numerals (~4k entries).
- **`elements/wordlist-ascii.txt`** ASCII-only subset (excludes accented characters).
- **`elements/wordlist-non-ascii.txt`** Entries containing non-ASCII characters.
## Character Set
Includes standard Latin letters (az, AZ), Dutch diacritics (e.g., `é`, `ë`, `ï`), superscript/subscript digits (e.g., `²`, `³`), and punctuation: `' . - / + & @ ?`.

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

17196
vocab/elements/corrections.tsv Normal file

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,987 @@
1 1 aprilgek;1 aprilmop;1 aprilgrap;1 aprilgrappen;1 meifeest;1 meifeesten
8 8 uurjournaal
aanschijns in het zweet des aanschijns;in het zweet zijns aanschijns
aanvals- aanvals- of gevechtshelikopter;aanvals- en verdedigingsbonus
ab ab initio
Abebiet
Abebitisch
abele abele spelen
abeundi consilium abeundi;judicium abeundi
Aboe
abstracto in abstracto
absurdum reductio ad absurdum;ad absurdum
Accijnswet Douane- en Accijnswet BES
accompli fait accompli
Acker
acte acte de présence;akte
actes actes de présence
Addis
Adriaenszoon
aequo ex aequo
Agnus
agreement gentleman's agreement
Agt
Ahead Go Ahead Eagles
aigu accent aigu
Airees Buenos Aires
Aires Buenos Aires
ajam ajam pangang;saté ajam
aleikum salam aleikum
algemenen ten algemenen nutte
Alighieri
alii et alii
all all right
alleenstaanden- alleenstaanden- en het gezinspensioen;alleenstaanden- of gezinsdekking
Amalie
ambtswege van ambtswege
americain filet americain
American
Amu Amu Darja
amused not amused
Andamanse
anderendaags 's anderendaags
Anthonis
aprilgek 1 aprilgek
Armoricaans
art art nouveau;art deco
assistent- assistent- en basisberoepsopleiding;assistent- en conditietrainer
Asturisch-Cantabrisch
Atlantisch-Indisch-Antarctisch
Atlantisch-Indische
Atos Atos Origin
australopithecus homo australopithecus
avant avant la lettre
avonds 's avonds
A-woning premie A-woning
Ayers Ayers Rock
Azov
Bab Bab el Mandeb
bain-marie au bain-marie
Baldur Baldur von Shirach
Banater Banater Bulgaars
Bandar
barrebiesjes naar de barrebiesjes
Bartjens
basso basso continuo
B-complex vitamine B-complex
Beatles
bedde te bedde
bedrage ten bedrage van
bedrijfs- bedrijfs- en inkomensschade;bedrijfs- en huisautomatiseringssystemen
Begawan Bandar Seri Begawan
beginne in den beginne
behoeve te zijnen behoeve;ten behoeve van
belastings- belastings- en uitkeringsstelsels;belastings- en douanestelsels
Belgicus
bello pico bello
belope ten belope van
benefit benefit of the doubt
bene nota bene
besluite ten besluite van
bestemder te bestemder plaatse;te bestemder tijd
bèta- bèta- en gammawetenschappen;bèta- en techniekonderwijs
betalings- betalings- en verrekeningssystemen;betalings- of leveringsvoorwaarden
beurre beurre blanc;crème au beurre
bewijze ten bewijze
bezakt bepakt en bezakt
bez. bez. vnw.
bifida spina bifida
bij- bij- of nevenzaken
Bildt
biloba ginkgo biloba
B-injecties vitamine B-injecties
binnenuit van binnenuit
bin Osama bin Laden
black black box;black metal
Blanc
blanche carte blanche;dame blanche
bloc en bloc
bloede in koelen bloede;van koninklijken bloede
bloedens tot bloedens toe
bloody bloody mary;bloody mary's
blue out of the blue;Danish blue;blue card
BNR BNR Nieuwsradio
boekentop boekentop 10;boekentop 3
Boergondisch
Boergondische
Boheems-Moravische
Bonaparte
Bonifacio
bonne bonne bouche
bonnes bonnes bouches
bono pro bono;cui bono
Borromeus
Botnische Botnische Golf
bouche bonne bouche;flux de bouche
bouches bonnes bouches
boules jeu de boules
Boven-Rijnse
Bradbury
brode om den brode
Broec Stede Broec
broge mazzel en broge
bron- bron- en contactonderzoek
Brugman praten als Brugman
Bruyn
btw- btw- en accijnsverhogingen;btw- en bni-betalingen
Buenos Buenos Aires
buikte buikte uit
burele ten burele
Burkina Burkina Faso
but last but not least
Cadier
Caicoseilanden Turks- en Caicoseilanden
Campanische
Campert
Canarische
Cantabrisch
capella a capella
capita per capita;capita selecta
care intensive care
carne chili con carne
carrière- carrière- en doorgroeimogelijkheden;carrière- of medezeggenschapsmogelijkhe
carte à la carte
catering- catering- en hoteldiensten;catering- en winkeltoepassing
cathedra ex cathedra
causa honoris causa
Centraal-Indische
Centraal-Pacifisch
Centraal-Pacifische
cetera et cetera
ceteris ceteris paribus
chaise chaise longue
chaises chaises longues
Champions Champions League
chantant café chantant
chantants café chantants
Charlotte-eilanden
cheek tongue in cheek
choice multiple choice
chow bunny chow;chow mein
-cijferig drieletterig of -cijferig;2-letterig en -cijferig
cire cire perdue
Civilis
clausus numerus clausus
Clercq De Clercq
Cockburn
Cockerill
Cocksdorp De Cocksdorp
co en co
coffee Irish coffee
coffees Irish coffees
coitus coitus interruptus
colada pina colada
cold cold case;cold turkey;cold calling
coli E. coli
collect collect call
collector's collector's item
cologne eau de cologne
colour full colour
coming coming out;coming man
commedia commedia dell'arte
common common sense
communis communis opinio
Compostela Santiago de Compostela
Compostella Santiago de Compostella
conceptual conceptual art
concreto in concreto
conditio conditio sine qua non
condition condition humaine
Coninck
constrictor boa constrictor
contrecoeur à contrecoeur
Cook
coq coq au vin
cordon cordon bleu;cordon sanitaire
correcten politiek correcten
Corstius
Coruña La Coruña;A Coruña
Cottische
couleur couleur locale
court tout court
Cove Lulworth Cove
culpa mea culpa
cum cum suis;cum laude
dage heden ten dage;ten eeuwigen dage
dalai dalai lama;dalai lama's
Dalmatische
danse danse macabre
danses danses macabres
d'anvers filet d'anvers
d'Arc Jeanne d'Arc
Darja
death sudden death;death metal
deco art deco
Dei
dele ten dele;in allen dele;in genen dele
Delhisch
delicious golden delicious
delicti corpus delicti;locus delicti
dell'arte commedia dell'arte
demand video on demand;printing on demand
dementia dementia praecox;dementia paralytica
dente al dente
d'équipe chef d'équipe
derde- derde- en vierdejaars;derde- tot vijfdegroeper;derde- of zesdeklasser
derdegeneratie- derdegeneratie- of 3G-netwerken
Desiderius
Deum
deus deus ex machina;deus ex machina's
deux pas de deux
Dhabi Abu Dhabi
d'hôtel maître d'hôtel
diene dienen
dienste ten dienste
dii dii menores
dijn mijn en dijn
Dinarische
dinky dinky toy;dinky toys
diplomatique corps diplomatique
director managing director
disc compact disc
discs compact discs
doble paso doble
docking docking station
doeloe tempo doeloe
Dolder
dolfijnen- dolfijnen- en walvissenvlees;dolfijnen- en zeehondenshow
Domela
domo oratio pro domo;pro domo
d'orange jus d'orange
doubt benefit of the doubt
downs ups en downs
Doyle
draken- draken- en leeuwendansen;draken- en phoenixfiguren
dramatis dramatis personae
drie- drie- of vier-en-een-half;drie- tot vijf-en-een-half
dubio in dubio
duiven- duiven- en rattenoverlast;pluimvee- duiven- en kanarieverenigingen
duplo in duplo
dusverre tot dusverre
dusver tot dusver
Dutchman Flying Dutchman
Dyck
Dzjoengaarse
Eagles Go Ahead Eagles
earl earl grey
easy easy listening;easy rider;easy riders
eau eau de cologne;eau de parfum;eau de toilette;eau de toiletteje
eaux eaux de cologne;eaux de toilette;eaux de vie
ede onder ede
Edwardeiland Prins Edwardeiland
EEG- EEG- en EFTA-landen;EEG- en HACCP-normen;EEG- en de EGKS-procedures
EEG-Turkije Overeenkomst EEG-Turkije;Associatieraad EEG-Turkije
eenden- eenden- of ganzenlever;eenden- of ganzendons
eenre ter eenre zijde
Egadische
elektro- elektro- en besturingstechniek;elektro- en staalindustrie
elfder te elfder ure
Elizabetheilanden Koningin Elizabetheilanden
Elsschot
-e-mail bedrijfsinternetverbindingen en -e-mail
éminence éminence grise
endo- endo- en exogene vergiften;endo- en ectoparasieten
enenmale ten enenmale
enfant enfant terrible
enfants enfants terribles
erectus homo erectus
est hora est;id est
et hic et nunc;et cetera
Everest Mount Everest
extenso in extenso
extremis in extremis
face en face
facto de facto;ipso facto
fait fait accompli
fallopii tuba fallopii
familias pater familias;mater familias
fancy fancy fair;fancy fairs
fantastica pseudologia fantastica
Faso Burkina Faso
fatales femmes fatales
Fatra Kleine Fatra;Grote Fatra
faut comme il faut
faux faux pas
faveure ten faveure van
feite in feite
femme femme fatale
femmes femmes fatales
feng feng shui
Fe Santa Fe
fine fine fleur;in fine;ter fine van
fin fin de siècle
Fish
five high five
fixus numerus fixus
flier frequent flier
Flying Flying Dutchman
football american football
forma pro forma
fortiori a fortiori
fraîche crème fraîche
franca lingua franca
free free kick;free kicks
fundum ad fundum
Futuna Wallis en Futuna
Gagarin
garnalen- garnalen- en oestervissers;garnalen- en vishandel
Gaulle De Gaulle;Charles de Gaulle
Gautama Siddhartha Gautama Boeddha
gebreke in gebreke
gelde te gelde
gemeentehuize ten gemeentehuize
gemoede in gemoede
generis sui generis
geschenke ten geschenke
getale in groten getale
gevallend des gevallend
gevolge ten gevolge van
Gezelle
Godswege van Godswege
goeder te goeder trouw;te goeder naam en faam;te goeder ure
Gogh Van Gogh
gogo à gogo
goreng nasi goreng;pisang goreng;bami goreng
Graft-De Graft-De Rijp
Grajische
grand grand café;grand cafés;grand prix;grand seigneur;grand slam;grand slams
granny granny smith;granny smiths
grata persona non grata
gratin au gratin
Grenadines Saint Vincent en de Grenadines
grey earl grey
grijswater- grijswater- en regenwatersysteem;grijswater- of regenwatercircuit
grise éminence grise
grosso grosso modo
guerre nom de guerre
Guevara Che Guevara
gunter van hier tot gunter
Gutenberg
haars haars inziens
Haasse
habilis homo habilis
habitat- habitat- en vogelrichtlijn;habitat- of corridorfuncties
hagedissen- hagedissen- en kalfsleer;hagedissen- en krokodillenleer
Hampshire New Hampshire
Harer
harmony close harmony
Hartbonden
harten- harten- en schoppenaas;harten- of ruitenkleur
haute haute couture
Hecke
heinde van heinde en verre
heinen ijzeren heinen;magere heinen
Helmont van Helmont
helpe Zo waarlijk helpe mij God Almachtig;God helpe
hemelsnaam in hemelsnaam
hemelswil om 's hemelswil
Henck
hendrik brave hendrik
hendriken brave hendriken
henkies gekke henkies
Herenelderen 's Herenelderen
herin herin te richten;herin te voeren;herin te treden
herop herop te starten;herop te bouwen;herop te waarderen;herop te voeden
herten- herten- en zwijnenpopulaties;herten- en konijnenvlees
heruit heruit te zenden;heruit te vinden;heruit te geven
hic hic et nunc
Hinlopen
hippique concours hippique
Hippo Hippo Regius;Augustinus van Hippo
hoc ad hoc;post hoc
hocus hocus pocus
Hofmansaffaire
honden- honden- en kattenbezitter;honden- en kattenvoer
Honecker
honoris honoris causa
Hormuz
houderschaps- houderschaps- of aanschafbelastingen;houderschaps- en wegenbelastingen
hour happy hour;finest hour
huize van goeden huize;ten huize van
humaine condition humaine
huns huns inziens;huns ondanks;huns weegs;het zweet huns aanschijns
huwelijks- huwelijks- en relatieproblemen;huwelijks- en gezinsmigratie
Huygens
hystera hystera protera
hysteron hysteron proteron
Idi Idi Amin
idiot idiot savant
Ilp Den Ilp
inburgerings- inburgerings- en taalcursussen;inburgerings- en integratiebeleid
incognita terra incognita
Indisch-Antarctisch
in- in- en uitlaatopeningen;in- en uitvoer
ins ins en outs
Insolventieregister Centraal Insolventieregister
Instrumentum Novum Instrumentum
intellectualis auctor intellectualis
intensive intensive care
interruptus coitus interruptus
ipso ipso facto;eo ipso
Iskariot Judas Iskariot
Island Rhode Island
Jacques-Yves
Janeiro
Janszoon
jetje van jetje
jeune jeune premier
Joegor
jokes practical jokes;inside jokes;sick jokes
Jozefland Frans Jozefland
J.R.R. J.R.R. Tolkien
juan don juan
juicy juicy details
julifeesten 11 julifeesten
Julische
julivieringen 11 julivieringen;21 julivieringen
jure de jure
Justitia Vrouwe Justitia
K'ai-Sjek
Kampf Mein Kampf
kandidaatstellings- kandidaatstellings- en verkiezingsprocedures;kandidaatstellings- en verkiez
kantore ten kantore
Kapela
Karimata Straat Karimata
Karische
karnemelkse karnemelkse pap
Karnische
Kaspische
keel- keel- en neusamandelen;keel-, neus- en oorarts
keerlen der keerlen god
kerke ter kerke
Kerouac Jack Kerouac
Khomeini
Kieler
kilometergebied 20 kilometergebied;30 kilometergebied
kilometergebieden 20 kilometergebieden;30 kilometergebieden
Kitts
koeriers- koeriers- en transportbedrijven;koeriers- en expresmarkt
Koerse Waregem Koerse
koninklijken van koninklijken bloede
koopmans- koopmans- en domineesgeest;koopmans- en pakhuizen
Kra
krisjna hare krisjna
krisjna's hare krisjna's
Kuala Kuala Lumpur
laad- laad- en loswerkzaamheden;laad- en losopening
Lachmon
Laconische
Laet de Laet;van Laet
Lankaan
Lankaanse
Lankanen
lapis lapis lazuli
latin latin lover;latin lovers;latin rock;latin jazz
laude cum laude
Lawrencebaai Saint Lawrencebaai
lazuli lapis lazuli
League Champions League
Leaguewedstrijd Champions Leaguewedstrijd
Leeghwater
Leidsche
Lejzer
Leoner Sierra Leoner
Leoons
lettre avant la lettre
libris ex libris
librissen ex librissen
licet Quod licet Iovi non licet bovi;Aliis si licet, tibi non licet
liesje vlijtig liesje
lijve aan den lijve
likmevestje van likmevestje
Lindbergh
linea linea recta
lingua lingua franca
Liparische
listening easy listening
Lister
lognormale lognormale verdeling
longue chaise longue
longues chaises longues
lorraine quiche lorraine
lorraines quiche lorraines;quiches lorraines
lorum in de lorum
loss total loss
Luciaan
Luciaanse
Lumpur Kuala Lumpur
macabre danse macabre
macabres danses macabres
machina deus ex machina
machte bij machte
Magallanes Straat Magallanes
Magellaan
magnifici rectores magnifici
magnificus rector magnificus
Mahatma
mais- mais- en sojatreinen;mais- en zonnebloemvelden
Majesteits
male ten tweeden male;ten anderen male
managing managing director
Mandeb Bab el Mandeb
Mariaatje
Marinees
Martaban Golf van Martaban
mary bloody mary
masqué bal masqué
masqués bal masqués;bals masqués
masse en masse
Matapan
mates running mates
Mazurisch
mea mea culpa
Meern De Meern
meeuwen- meeuwen- en scholeksterpopulaties;meeuwen- en papegaaiduikersrotsen
meiherdenking 4 meiherdenking
meiviering 5 meiviering
mellitus diabetes mellitus
Melville
memoriam in memoriam
Merenplateau Mazurisch Merenplateau;Pommers Merenplateau
mer fruits de mer;mer à boire
metal heavy metal
meterbad 25 meterbad;50 meterbad
meterbaden 25 meterbaden;50 meterbaden
metergebied 16 metergebied
M.G. Annie M.G. Schmidt
mia mamma mia
Midden-Atlantische
Midden-Siberisch
middle middle class
mina dolle mina
mina's dolle mina's
Minhstad Ho Chi Minhstad
minute last minute
Miquelon Saint-Pierre en Miquelon
mitswa bar mitswa
mobilia perpetua mobilia
modo grosso modo
Monroe
Mont
Moresby
morgana fata morgana
mortale salto mortale
mortale's salto mortale's
mortales salto mortales
mortis rigor mortis
moskee- moskee- dan wel kerkbezoek;moskee- of synagogebezoek
Moslim-Kroatische
motion slow motion
Mount
mouton pied de mouton;pieds de mouton
muggen- muggen- en vliegenlarven;muggen- en wespensteken
mum in een mum van tijd
music minimal music
mutandis mutatis mutandis
mutatis mutatis mutandis
nachte bij nachte
najaars- najaars- en wintercollectie;najaars- en voorjaarscursussen
nano- nano-, bio- en informatietechnologieën;micro-, nano- en opto-elektronische
napoletano mastino napoletano
napoletano's mastino napoletano's
Nares Straat Nares
nascendi in statu nascendi
natalis dies natalis
natura in natura
nature van nature
neo- neo-liberalisme;neo-nazi's
ne rien ne va plus;ne bis in idem
nervosa anorexia nervosa;boulimia nervosa
Neusiedler Neusiedler Meer
neus- neus- en keelholte;keel-, neus- en oorarts
Nevis Saint Kitts en Nevis
New New Hampshire
niçoise salade niçoise;Niçoise
niet- niet- of laaggeletterd;niet- en spijkergaten
Nieuwkruisland
Nieuw-Siberische
noir film noir;café noir
noirs films noirs;cafés noirs
nolens nolens volens
nom nom de plume;nom de guerre
no no cure;no pay
Noord-Equatoriale
Noord-Tiroler
nouveau art nouveau
nouveaux nouveaux riches
novels graphic novels;gothic novels
Nubische
numerus numerus fixus;numerus clausus
nunc hic et nunc;ex nunc
Obeid
oblige noblesse oblige
obscura camera obscura
Ochotsk Zee van Ochotsk
ochtends 's ochtends
ofte nooit ofte nimmer
Oldenbarnevelt
om- om- of bijscholing
onderscheids des onderscheids
onheils plaats des onheils;dag des onheils
onov. onov. ww.
onpas te pas en te onpas
onrechte ten onrechte
onzentwege van onzentwege
Oost-Pacifische
Oost-Siberische
opinio communis opinio
opinion second opinion
opleidings- opleidings- en erkenningseisen;opleidings- en examenreglement
-opslag CO2-opslag;CO₂-opslag
oratio oratio pro domo
orbi urbi et orbi
Orwell
Ossip
Otranto
ouderschaps- ouderschaps- en levensloopverlof;ouderschaps- en zorgverlof
Oud- Oud- en Nieuwjaar;Oud- en Nieuw-Vossemeer
oudsher van oudsher
outs ins en outs
overvloede ten overvloede
Pacifisch-Antarctisch
paleize ten paleize
Palk
pampus voor pampus
pangang babi pangang;ajam pangang
paribus ceteris paribus
paso paso doble;paso dobles
passionnel crime passionnel
patates patates frites
Paulowna
pay no pay
peau peau de pêche
pêche pêche melba;peau de pêche
pectoris angina pectoris
Pelagische
Penninisch
perdue cire perdue
perpetua perpetua mobilia
perpetuum perpetuum mobile
perse ter perse
persona persona non grata
personeels- personeels- en organisatiebeleid;personeels- en cliëntgegevens
Petalia
petalia trapezites petalia
petit petit restaurant
petto in petto
pico pico bello
pièce pièce de résistance
Pieterszoon
Pilatus
pina pina colada
Pinte
plaatse ter plaatse
places of all places
plekke ter plekke
plume nom de plume
pocus hocus pocus
Poe
poly- poly- en perfluoralkylstoffen
Pommers
Pommerse
Pontisch
Pontius
poste poste restante
posteriori a posteriori
praecox dementia praecox
-principes basisbegrippen en -principes
prix grand prix
profile low profile
prophecy selffulfilling prophecy
protera hystera protera
proterons hysteron proterons
provocatus abortus provocatus
pseudologia pseudologia fantastica
Puerto
qualitate qualitate qua
Quichot Don Quichot
quid quid pro quo
quo a quo;casu quo;quid pro quo;quo vadis;status quo;status quo ante bellum
-quota visbestanden en -quota
Rading Hollandsche Rading
rames nasi rames
rarae rarae aves
rasa tabula rasa
reading close reading
reality virtual reality
rechter- rechter- en linkerkant;rechter- als linkerzijde
rectores rectores magnifici
reductio reductio ad absurdum
relations public relations
remedial remedial teacher;remedial teaching
remme ad remme
remote remote sensing
restante poste restante
retriever golden retriever
retrievers golden retrievers
Rhode Rhode Island
Ricaanse
Ricanen
riches nouveaux riches
-richtlijn
-richtlijnen beveiligingsinstructies en -richtlijnen;ontwerpfilosofieën en -richtlijnen
rider easy rider
riders easy riders
rigor rigor mortis
Rijswijck van Rijswijck
Road Abbey Road
roaring roaring twenties
rocks on the rocks
romeinse romeinse kaars;Romeinse
roodlicht- roodlicht- en snelheidscamera's;roodlicht- en gordelcontroles
running running gag;running gags;running mate;running mates
rupsen- rupsen- of slakkenplaag;rupsen- en wespenplaag
Rushdie
russells jack russells
Saksen-Coburg
sang pur sang
Sankt
sans sans rancune
Sao Sao Tomé en Principe
sapiens homo sapiens
sativa cannabis sativa
savant idiot savant
Savoyse
schapen- schapen- en geitenvlees;schapen- en geitensector
schrijve zegge en schrijve
second second opinion
secret top secret
seilde seilde ab
selecta capita selecta
sensing remote sensing
se per se;SE;s.e.
septemberaanslagen 11 septemberaanslagen
septic septic tank
Shaba
shui feng shui
sick sick joke;sick jokes
Sideros
Sidra
s'il s'il vous plaît
Simbel Aboe Simbel
sine conditio sine qua non
Sint-Vincent
slam grand slam
slams grand slams
slow slow motion
smith granny smith
smiths granny smiths
Soenda
Soenion Kaap Soenion
Soeren Hoog Soeren;Laag-Soeren
soleil coupe soleil
sotto sotto voce
Soubirous Bernadette Soubirous
source open source
speaking on speaking terms
spe in spe
spina spina bifida
's 's avonds;'s anderendaags
Staatsblad Belgisch Staatsblad;staatsblad
stade te stade
stadhuize ten stadhuize
stadswege van stadswege
Stallman
stante stante pede
statu in statu nascendi
Stede Stede Broec
Steinerschool steinerschool;Rudolf Steinerschool
sterling pond sterling
stikkens tot stikkens toe
Stones Rolling Stones
story's short story's
Strauss
stroomaf- stroomaf- en stroomopwaarts;stroomaf- en windopwaarts
stroomop- stroomop- en stroomafwaarts
studium studium generale
sudden sudden death
sui sui generis
suprême moment suprême
tabula tabula rasa
tai tai chi
tale tale Kanaäns
Tasman Abel Tasman
teacher remedial teacher
teachers remedial teachers
teaching remedial teaching
teeg teeg aan
temporeetje ex temporeetje
tempore ex tempore
Terblijt Berg en Terblijt
terminis contradictio in terminis
terrible enfant terrible
terribles enfants terribles
terzelfder terzelfder tijd
the benefit of the doubt
thinking wishful thinking
thomas ongelovige thomas
tijds de tand des tijds
Tobago
toilette eau de toilette
toiletteje eau de toiletteje
tolerance zero tolerance
Tolo
Tomini
tonele ten tonele
tongue tongue in cheek
Torvalds
total total loss
Town George Town;Road Town
toy dinky toy
toys dinky toys
-transport afvalverwerking en -transport;elektriciteitsproductie en -transport
Transsylvanische
Trasimeense Trasimeense Meer
Trudoplein Sint Trudoplein
Tsjang Tsjang Kai-Sjek
turkey cold turkey
Turks- Turks- en Caicoseilanden;Turks- en Arabischtalige
Twain
tweeliter- tweeliter- en elektromotor;tweeliter- en drieliterflessen
twenties roaring twenties
Tyrreense
universalis homo universalis
urbi urbi et orbi
uurjournaal 6 uurjournaal;8 uurjournaal;10 uurjournaal
uursbegeleiding 24 uursbegeleiding
uursbereikbaarheid 24 uursbereikbaarheid
uursbeschikbaarheid 24 uursbeschikbaarheid
uursbewaking 24 uursbewaking
uursconsultatie 24 uursconsultatie
uursdienst 24 uursdienst
uursdiensten 24 uursdiensten
uursdrempel 24 uursdrempel
uurseconomie 24 uurseconomie
uursgemiddelde 24 uursgemiddelde
uurshulp 24 uurshulp
uursopvang 24 uursopvang
uursprocedure 24 uursprocedure
uursprocedures 24 uursprocedures
uursservice 24 uursservice
uursstaking 24 uursstaking
uursstructuren 24 uursstructuren
uursstructuur 24 uursstructuur
uursverplichting 24 uursverplichting
uursverplichtingen 24 uursverplichtingen
uursvoorziening 24 uursvoorziening
uursvoorzieningen 24 uursvoorzieningen
uurszorg 24 uurszorg
uwent te uwent
Uyl
Vaartse Vaartse Rijn
Valley Silicon Valley
Ventoux
venture joint venture
ventures joint ventures
Verdebekken Kaap Verdebekken
Vereenigde
-verenigingen kunststichtingen en -verenigingen
-vereniging kunststichting of -vereniging
Verlatinghe Plakkaat van Verlatinghe
versa vice versa
verstande met dien verstande
vervelens tot vervelens toe
verve met verve
vervoers- vervoers- en accommodatiekosten;vervoers- en parkeermanagement
vervolge ten vervolge
vice vice versa
Victoriawoestijn Grote Victoriawoestijn
vie au de vie;aux de vie
viola viola da gamba
virtual virtual reality
vista a prima vista
vita attestatie de vita
vitae curriculum vitae
Vith Sankt Vith
vivant bon vivant
vivants bon vivants
vivo in vivo
Vleuten-De Vleuten-De Meern
vleze naar den vleze
voce sotto voce
voege in dier voege
Voigtstraat Dr. Voigtstraat
volens nolens volens
volente Deo volente
Volkske Ons Volkske;'t Volkske
voorbedachten met voorbedachten rade
voordele ten voordele van
voor- voor- en nadelen
voto ex voto
votootje ex votootje
vredesnaam in vredesnaam
vu déjà vu
Vuursche Lage Vuursche
vuutje déjà vuutje
-wallen geluidsschermen en -wallen;stadsmuren en -wallen
walvissen- walvissen- en dolfijnentours;walvissen- en robbenvangst
-wapens stadsnamen en -wapens;speelgoedauto's en -wapens
-water gemeentegrond of -water;afvalenergie en -water
-websites thematijdschriften en -websites;ondernemersmagazines en -websites
-website thematijdschrift en -website;ondernemersmagazine en -website
-weefsels donororganen en -weefsels
-weekend sportweek of -weekend
weerd wie het kleine niet eert is het grote niet weerd
-wegen werkterreinen en -wegen;asbestpaden en -wegen
-weg voorrangskruising of -weg;trekvaart en -weg
-welzijn diergezondheid en -welzijn;dierenrechten en -welzijn
-wensen leerdoelen en -wensen;huisvestingseisen en -wensen
-werken samenleven en -werken;meedenken en -werken
-werkgroep themalocatie en -werkgroep
-werking medeweten en -werking
-werk levensvisie en -werk;welzijnsbeleid en -werk
-werknemers overheidswerkgevers en -werknemers
West-Siberisch
wetens willens en wetens
-wet gemeenteklimaat en -wet;belastingtarief en -wet
-wetgeving gewasbeschermingstechnieken en -wetgeving;boekhoudprincipes en -wetgeving
-wetten belastingtarieven en -wetten;milieumaatregelen en -wetten
-wezen oorlogsweduwen en -wezen;fietswrakken en -wezen
W.F. W.F. Hermans
W.G. W.G. van de Hulst
wide world wide web
wiedes nogal wiedes
wiedeweerga als de wiedeweerga
wieven witte wieven
wijfjes- wijfjes- en mannetjesvaren;wijfjes- en mannetjesgroepen
-wijze bereidingstijd en -wijze;levensovertuiging of -wijze
wille ter wille van
willies stille willies
willie stille willie
wils voor elk wat wils
Windward Windward Passage
-winkels souterrainwoningen en -winkels;websites en -winkels
-winst topomzet en -winst;kwartaalomzet en -winst
wishful wishful thinking
Wolden De Wolden
Wolynisch-Podolisch Wolynisch-Podolisch Plateau
-woning nieuwbouwappartement of -woning;studentenkamer of -woning
Xiaoping Deng Xiaoping
XV-meubels Louis XV-meubels
Yorker New Yorker
Yorkse New Yorkse
Zadkine
zake ter zake;in zake
Zedong
zeehonden- zeehonden- en vogelopvang;zeehonden- en walvispoep
Zembla
zend- zend- of ontvanginrichtingen;zend- en ontvangststations
ziele ter ziele
zijd wijd en zijd
Zilk De Zilk
zonne- wind-, zonne- en kernenergie;zonne- en muziekgod
-zorg verslavingspreventie en -zorg
Zuid-Australisch
Zuid-Equatoriale
Zuid-Pacifisch
Zuid-Pacifische
Zuid-Schots
Zuid-West Zuid-West Nederland
-zuivering watervoorziening en -zuivering;luchtafzuiging en -zuivering
-zussen halfbroers en -zussen;stiefbroers en -zussen
-zusters gildebroeders en -zusters
-zus tweelingbroer of -zus
Zwaluwe Lage Zwaluwe;Hooge Zwaluwe
zwanen- zwanen- of ganzenveren;zwanen- en schildersmossels
zwang in zwang
zwartwitte zwartwitte franjeapen;zwartwitte veldridderzwam;zwartwitte snuitwapenvlieg
1 1 1 aprilgek;1 aprilmop;1 aprilgrap;1 aprilgrappen;1 meifeest;1 meifeesten
2 8 8 uurjournaal
3 aanschijns in het zweet des aanschijns;in het zweet zijns aanschijns
4 aanvals- aanvals- of gevechtshelikopter;aanvals- en verdedigingsbonus
5 ab ab initio
6 Abebiet
7 Abebitisch
8 abele abele spelen
9 abeundi consilium abeundi;judicium abeundi
10 Aboe
11 abstracto in abstracto
12 absurdum reductio ad absurdum;ad absurdum
13 Accijnswet Douane- en Accijnswet BES
14 accompli fait accompli
15 Acker
16 acte acte de présence;akte
17 actes actes de présence
18 Addis
19 Adriaenszoon
20 aequo ex aequo
21 Agnus
22 agreement gentleman's agreement
23 Agt
24 Ahead Go Ahead Eagles
25 aigu accent aigu
26 Airees Buenos Aires
27 Aires Buenos Aires
28 ajam ajam pangang;saté ajam
29 aleikum salam aleikum
30 algemenen ten algemenen nutte
31 Alighieri
32 alii et alii
33 all all right
34 alleenstaanden- alleenstaanden- en het gezinspensioen;alleenstaanden- of gezinsdekking
35 Amalie
36 ambtswege van ambtswege
37 americain filet americain
38 American
39 Amu Amu Darja
40 amused not amused
41 Andamanse
42 anderendaags 's anderendaags
43 Anthonis
44 aprilgek 1 aprilgek
45 Armoricaans
46 art art nouveau;art deco
47 assistent- assistent- en basisberoepsopleiding;assistent- en conditietrainer
48 Asturisch-Cantabrisch
49 Atlantisch-Indisch-Antarctisch
50 Atlantisch-Indische
51 Atos Atos Origin
52 australopithecus homo australopithecus
53 avant avant la lettre
54 avonds 's avonds
55 A-woning premie A-woning
56 Ayers Ayers Rock
57 Azov
58 Bab Bab el Mandeb
59 bain-marie au bain-marie
60 Baldur Baldur von Shirach
61 Banater Banater Bulgaars
62 Bandar
63 barrebiesjes naar de barrebiesjes
64 Bartjens
65 basso basso continuo
66 B-complex vitamine B-complex
67 Beatles
68 bedde te bedde
69 bedrage ten bedrage van
70 bedrijfs- bedrijfs- en inkomensschade;bedrijfs- en huisautomatiseringssystemen
71 Begawan Bandar Seri Begawan
72 beginne in den beginne
73 behoeve te zijnen behoeve;ten behoeve van
74 belastings- belastings- en uitkeringsstelsels;belastings- en douanestelsels
75 Belgicus
76 bello pico bello
77 belope ten belope van
78 benefit benefit of the doubt
79 bene nota bene
80 besluite ten besluite van
81 bestemder te bestemder plaatse;te bestemder tijd
82 bèta- bèta- en gammawetenschappen;bèta- en techniekonderwijs
83 betalings- betalings- en verrekeningssystemen;betalings- of leveringsvoorwaarden
84 beurre beurre blanc;crème au beurre
85 bewijze ten bewijze
86 bezakt bepakt en bezakt
87 bez. bez. vnw.
88 bifida spina bifida
89 bij- bij- of nevenzaken
90 Bildt
91 biloba ginkgo biloba
92 B-injecties vitamine B-injecties
93 binnenuit van binnenuit
94 bin Osama bin Laden
95 black black box;black metal
96 Blanc
97 blanche carte blanche;dame blanche
98 bloc en bloc
99 bloede in koelen bloede;van koninklijken bloede
100 bloedens tot bloedens toe
101 bloody bloody mary;bloody mary's
102 blue out of the blue;Danish blue;blue card
103 BNR BNR Nieuwsradio
104 boekentop boekentop 10;boekentop 3
105 Boergondisch
106 Boergondische
107 Boheems-Moravische
108 Bonaparte
109 Bonifacio
110 bonne bonne bouche
111 bonnes bonnes bouches
112 bono pro bono;cui bono
113 Borromeus
114 Botnische Botnische Golf
115 bouche bonne bouche;flux de bouche
116 bouches bonnes bouches
117 boules jeu de boules
118 Boven-Rijnse
119 Bradbury
120 brode om den brode
121 Broec Stede Broec
122 broge mazzel en broge
123 bron- bron- en contactonderzoek
124 Brugman praten als Brugman
125 Bruyn
126 btw- btw- en accijnsverhogingen;btw- en bni-betalingen
127 Buenos Buenos Aires
128 buikte buikte uit
129 burele ten burele
130 Burkina Burkina Faso
131 but last but not least
132 Cadier
133 Caicoseilanden Turks- en Caicoseilanden
134 Campanische
135 Campert
136 Canarische
137 Cantabrisch
138 capella a capella
139 capita per capita;capita selecta
140 care intensive care
141 carne chili con carne
142 carrière- carrière- en doorgroeimogelijkheden;carrière- of medezeggenschapsmogelijkhe
143 carte à la carte
144 catering- catering- en hoteldiensten;catering- en winkeltoepassing
145 cathedra ex cathedra
146 causa honoris causa
147 Centraal-Indische
148 Centraal-Pacifisch
149 Centraal-Pacifische
150 cetera et cetera
151 ceteris ceteris paribus
152 chaise chaise longue
153 chaises chaises longues
154 Champions Champions League
155 chantant café chantant
156 chantants café chantants
157 Charlotte-eilanden
158 cheek tongue in cheek
159 choice multiple choice
160 chow bunny chow;chow mein
161 -cijferig drieletterig of -cijferig;2-letterig en -cijferig
162 cire cire perdue
163 Civilis
164 clausus numerus clausus
165 Clercq De Clercq
166 Cockburn
167 Cockerill
168 Cocksdorp De Cocksdorp
169 co en co
170 coffee Irish coffee
171 coffees Irish coffees
172 coitus coitus interruptus
173 colada pina colada
174 cold cold case;cold turkey;cold calling
175 coli E. coli
176 collect collect call
177 collector's collector's item
178 cologne eau de cologne
179 colour full colour
180 coming coming out;coming man
181 commedia commedia dell'arte
182 common common sense
183 communis communis opinio
184 Compostela Santiago de Compostela
185 Compostella Santiago de Compostella
186 conceptual conceptual art
187 concreto in concreto
188 conditio conditio sine qua non
189 condition condition humaine
190 Coninck
191 constrictor boa constrictor
192 contrecoeur à contrecoeur
193 Cook
194 coq coq au vin
195 cordon cordon bleu;cordon sanitaire
196 correcten politiek correcten
197 Corstius
198 Coruña La Coruña;A Coruña
199 Cottische
200 couleur couleur locale
201 court tout court
202 Cove Lulworth Cove
203 culpa mea culpa
204 cum cum suis;cum laude
205 dage heden ten dage;ten eeuwigen dage
206 dalai dalai lama;dalai lama's
207 Dalmatische
208 danse danse macabre
209 danses danses macabres
210 d'anvers filet d'anvers
211 d'Arc Jeanne d'Arc
212 Darja
213 death sudden death;death metal
214 deco art deco
215 Dei
216 dele ten dele;in allen dele;in genen dele
217 Delhisch
218 delicious golden delicious
219 delicti corpus delicti;locus delicti
220 dell'arte commedia dell'arte
221 demand video on demand;printing on demand
222 dementia dementia praecox;dementia paralytica
223 dente al dente
224 d'équipe chef d'équipe
225 derde- derde- en vierdejaars;derde- tot vijfdegroeper;derde- of zesdeklasser
226 derdegeneratie- derdegeneratie- of 3G-netwerken
227 Desiderius
228 Deum
229 deus deus ex machina;deus ex machina's
230 deux pas de deux
231 Dhabi Abu Dhabi
232 d'hôtel maître d'hôtel
233 diene dienen
234 dienste ten dienste
235 dii dii menores
236 dijn mijn en dijn
237 Dinarische
238 dinky dinky toy;dinky toys
239 diplomatique corps diplomatique
240 director managing director
241 disc compact disc
242 discs compact discs
243 doble paso doble
244 docking docking station
245 doeloe tempo doeloe
246 Dolder
247 dolfijnen- dolfijnen- en walvissenvlees;dolfijnen- en zeehondenshow
248 Domela
249 domo oratio pro domo;pro domo
250 d'orange jus d'orange
251 doubt benefit of the doubt
252 downs ups en downs
253 Doyle
254 draken- draken- en leeuwendansen;draken- en phoenixfiguren
255 dramatis dramatis personae
256 drie- drie- of vier-en-een-half;drie- tot vijf-en-een-half
257 dubio in dubio
258 duiven- duiven- en rattenoverlast;pluimvee- duiven- en kanarieverenigingen
259 duplo in duplo
260 dusverre tot dusverre
261 dusver tot dusver
262 Dutchman Flying Dutchman
263 Dyck
264 Dzjoengaarse
265 Eagles Go Ahead Eagles
266 earl earl grey
267 easy easy listening;easy rider;easy riders
268 eau eau de cologne;eau de parfum;eau de toilette;eau de toiletteje
269 eaux eaux de cologne;eaux de toilette;eaux de vie
270 ede onder ede
271 Edwardeiland Prins Edwardeiland
272 EEG- EEG- en EFTA-landen;EEG- en HACCP-normen;EEG- en de EGKS-procedures
273 EEG-Turkije Overeenkomst EEG-Turkije;Associatieraad EEG-Turkije
274 eenden- eenden- of ganzenlever;eenden- of ganzendons
275 eenre ter eenre zijde
276 Egadische
277 elektro- elektro- en besturingstechniek;elektro- en staalindustrie
278 elfder te elfder ure
279 Elizabetheilanden Koningin Elizabetheilanden
280 Elsschot
281 -e-mail bedrijfsinternetverbindingen en -e-mail
282 éminence éminence grise
283 endo- endo- en exogene vergiften;endo- en ectoparasieten
284 enenmale ten enenmale
285 enfant enfant terrible
286 enfants enfants terribles
287 erectus homo erectus
288 est hora est;id est
289 et hic et nunc;et cetera
290 Everest Mount Everest
291 extenso in extenso
292 extremis in extremis
293 face en face
294 facto de facto;ipso facto
295 fait fait accompli
296 fallopii tuba fallopii
297 familias pater familias;mater familias
298 fancy fancy fair;fancy fairs
299 fantastica pseudologia fantastica
300 Faso Burkina Faso
301 fatales femmes fatales
302 Fatra Kleine Fatra;Grote Fatra
303 faut comme il faut
304 faux faux pas
305 faveure ten faveure van
306 feite in feite
307 femme femme fatale
308 femmes femmes fatales
309 feng feng shui
310 Fe Santa Fe
311 fine fine fleur;in fine;ter fine van
312 fin fin de siècle
313 Fish
314 five high five
315 fixus numerus fixus
316 flier frequent flier
317 Flying Flying Dutchman
318 football american football
319 forma pro forma
320 fortiori a fortiori
321 fraîche crème fraîche
322 franca lingua franca
323 free free kick;free kicks
324 fundum ad fundum
325 Futuna Wallis en Futuna
326 Gagarin
327 garnalen- garnalen- en oestervissers;garnalen- en vishandel
328 Gaulle De Gaulle;Charles de Gaulle
329 Gautama Siddhartha Gautama Boeddha
330 gebreke in gebreke
331 gelde te gelde
332 gemeentehuize ten gemeentehuize
333 gemoede in gemoede
334 generis sui generis
335 geschenke ten geschenke
336 getale in groten getale
337 gevallend des gevallend
338 gevolge ten gevolge van
339 Gezelle
340 Godswege van Godswege
341 goeder te goeder trouw;te goeder naam en faam;te goeder ure
342 Gogh Van Gogh
343 gogo à gogo
344 goreng nasi goreng;pisang goreng;bami goreng
345 Graft-De Graft-De Rijp
346 Grajische
347 grand grand café;grand cafés;grand prix;grand seigneur;grand slam;grand slams
348 granny granny smith;granny smiths
349 grata persona non grata
350 gratin au gratin
351 Grenadines Saint Vincent en de Grenadines
352 grey earl grey
353 grijswater- grijswater- en regenwatersysteem;grijswater- of regenwatercircuit
354 grise éminence grise
355 grosso grosso modo
356 guerre nom de guerre
357 Guevara Che Guevara
358 gunter van hier tot gunter
359 Gutenberg
360 haars haars inziens
361 Haasse
362 habilis homo habilis
363 habitat- habitat- en vogelrichtlijn;habitat- of corridorfuncties
364 hagedissen- hagedissen- en kalfsleer;hagedissen- en krokodillenleer
365 Hampshire New Hampshire
366 Harer
367 harmony close harmony
368 Hartbonden
369 harten- harten- en schoppenaas;harten- of ruitenkleur
370 haute haute couture
371 Hecke
372 heinde van heinde en verre
373 heinen ijzeren heinen;magere heinen
374 Helmont van Helmont
375 helpe Zo waarlijk helpe mij God Almachtig;God helpe
376 hemelsnaam in hemelsnaam
377 hemelswil om 's hemelswil
378 Henck
379 hendrik brave hendrik
380 hendriken brave hendriken
381 henkies gekke henkies
382 Herenelderen 's Herenelderen
383 herin herin te richten;herin te voeren;herin te treden
384 herop herop te starten;herop te bouwen;herop te waarderen;herop te voeden
385 herten- herten- en zwijnenpopulaties;herten- en konijnenvlees
386 heruit heruit te zenden;heruit te vinden;heruit te geven
387 hic hic et nunc
388 Hinlopen
389 hippique concours hippique
390 Hippo Hippo Regius;Augustinus van Hippo
391 hoc ad hoc;post hoc
392 hocus hocus pocus
393 Hofmansaffaire
394 honden- honden- en kattenbezitter;honden- en kattenvoer
395 Honecker
396 honoris honoris causa
397 Hormuz
398 houderschaps- houderschaps- of aanschafbelastingen;houderschaps- en wegenbelastingen
399 hour happy hour;finest hour
400 huize van goeden huize;ten huize van
401 humaine condition humaine
402 huns huns inziens;huns ondanks;huns weegs;het zweet huns aanschijns
403 huwelijks- huwelijks- en relatieproblemen;huwelijks- en gezinsmigratie
404 Huygens
405 hystera hystera protera
406 hysteron hysteron proteron
407 Idi Idi Amin
408 idiot idiot savant
409 Ilp Den Ilp
410 inburgerings- inburgerings- en taalcursussen;inburgerings- en integratiebeleid
411 incognita terra incognita
412 Indisch-Antarctisch
413 in- in- en uitlaatopeningen;in- en uitvoer
414 ins ins en outs
415 Insolventieregister Centraal Insolventieregister
416 Instrumentum Novum Instrumentum
417 intellectualis auctor intellectualis
418 intensive intensive care
419 interruptus coitus interruptus
420 ipso ipso facto;eo ipso
421 Iskariot Judas Iskariot
422 Island Rhode Island
423 Jacques-Yves
424 Janeiro
425 Janszoon
426 jetje van jetje
427 jeune jeune premier
428 Joegor
429 jokes practical jokes;inside jokes;sick jokes
430 Jozefland Frans Jozefland
431 J.R.R. J.R.R. Tolkien
432 juan don juan
433 juicy juicy details
434 julifeesten 11 julifeesten
435 Julische
436 julivieringen 11 julivieringen;21 julivieringen
437 jure de jure
438 Justitia Vrouwe Justitia
439 K'ai-Sjek
440 Kampf Mein Kampf
441 kandidaatstellings- kandidaatstellings- en verkiezingsprocedures;kandidaatstellings- en verkiez
442 kantore ten kantore
443 Kapela
444 Karimata Straat Karimata
445 Karische
446 karnemelkse karnemelkse pap
447 Karnische
448 Kaspische
449 keel- keel- en neusamandelen;keel-, neus- en oorarts
450 keerlen der keerlen god
451 kerke ter kerke
452 Kerouac Jack Kerouac
453 Khomeini
454 Kieler
455 kilometergebied 20 kilometergebied;30 kilometergebied
456 kilometergebieden 20 kilometergebieden;30 kilometergebieden
457 Kitts
458 koeriers- koeriers- en transportbedrijven;koeriers- en expresmarkt
459 Koerse Waregem Koerse
460 koninklijken van koninklijken bloede
461 koopmans- koopmans- en domineesgeest;koopmans- en pakhuizen
462 Kra
463 krisjna hare krisjna
464 krisjna's hare krisjna's
465 Kuala Kuala Lumpur
466 laad- laad- en loswerkzaamheden;laad- en losopening
467 Lachmon
468 Laconische
469 Laet de Laet;van Laet
470 Lankaan
471 Lankaanse
472 Lankanen
473 lapis lapis lazuli
474 latin latin lover;latin lovers;latin rock;latin jazz
475 laude cum laude
476 Lawrencebaai Saint Lawrencebaai
477 lazuli lapis lazuli
478 League Champions League
479 Leaguewedstrijd Champions Leaguewedstrijd
480 Leeghwater
481 Leidsche
482 Lejzer
483 Leoner Sierra Leoner
484 Leoons
485 lettre avant la lettre
486 libris ex libris
487 librissen ex librissen
488 licet Quod licet Iovi non licet bovi;Aliis si licet, tibi non licet
489 liesje vlijtig liesje
490 lijve aan den lijve
491 likmevestje van likmevestje
492 Lindbergh
493 linea linea recta
494 lingua lingua franca
495 Liparische
496 listening easy listening
497 Lister
498 lognormale lognormale verdeling
499 longue chaise longue
500 longues chaises longues
501 lorraine quiche lorraine
502 lorraines quiche lorraines;quiches lorraines
503 lorum in de lorum
504 loss total loss
505 Luciaan
506 Luciaanse
507 Lumpur Kuala Lumpur
508 macabre danse macabre
509 macabres danses macabres
510 machina deus ex machina
511 machte bij machte
512 Magallanes Straat Magallanes
513 Magellaan
514 magnifici rectores magnifici
515 magnificus rector magnificus
516 Mahatma
517 mais- mais- en sojatreinen;mais- en zonnebloemvelden
518 Majesteits
519 male ten tweeden male;ten anderen male
520 managing managing director
521 Mandeb Bab el Mandeb
522 Mariaatje
523 Marinees
524 Martaban Golf van Martaban
525 mary bloody mary
526 masqué bal masqué
527 masqués bal masqués;bals masqués
528 masse en masse
529 Matapan
530 mates running mates
531 Mazurisch
532 mea mea culpa
533 Meern De Meern
534 meeuwen- meeuwen- en scholeksterpopulaties;meeuwen- en papegaaiduikersrotsen
535 meiherdenking 4 meiherdenking
536 meiviering 5 meiviering
537 mellitus diabetes mellitus
538 Melville
539 memoriam in memoriam
540 Merenplateau Mazurisch Merenplateau;Pommers Merenplateau
541 mer fruits de mer;mer à boire
542 metal heavy metal
543 meterbad 25 meterbad;50 meterbad
544 meterbaden 25 meterbaden;50 meterbaden
545 metergebied 16 metergebied
546 M.G. Annie M.G. Schmidt
547 mia mamma mia
548 Midden-Atlantische
549 Midden-Siberisch
550 middle middle class
551 mina dolle mina
552 mina's dolle mina's
553 Minhstad Ho Chi Minhstad
554 minute last minute
555 Miquelon Saint-Pierre en Miquelon
556 mitswa bar mitswa
557 mobilia perpetua mobilia
558 modo grosso modo
559 Monroe
560 Mont
561 Moresby
562 morgana fata morgana
563 mortale salto mortale
564 mortale's salto mortale's
565 mortales salto mortales
566 mortis rigor mortis
567 moskee- moskee- dan wel kerkbezoek;moskee- of synagogebezoek
568 Moslim-Kroatische
569 motion slow motion
570 Mount
571 mouton pied de mouton;pieds de mouton
572 muggen- muggen- en vliegenlarven;muggen- en wespensteken
573 mum in een mum van tijd
574 music minimal music
575 mutandis mutatis mutandis
576 mutatis mutatis mutandis
577 nachte bij nachte
578 najaars- najaars- en wintercollectie;najaars- en voorjaarscursussen
579 nano- nano-, bio- en informatietechnologieën;micro-, nano- en opto-elektronische
580 napoletano mastino napoletano
581 napoletano's mastino napoletano's
582 Nares Straat Nares
583 nascendi in statu nascendi
584 natalis dies natalis
585 natura in natura
586 nature van nature
587 neo- neo-liberalisme;neo-nazi's
588 ne rien ne va plus;ne bis in idem
589 nervosa anorexia nervosa;boulimia nervosa
590 Neusiedler Neusiedler Meer
591 neus- neus- en keelholte;keel-, neus- en oorarts
592 Nevis Saint Kitts en Nevis
593 New New Hampshire
594 niçoise salade niçoise;Niçoise
595 niet- niet- of laaggeletterd;niet- en spijkergaten
596 Nieuwkruisland
597 Nieuw-Siberische
598 noir film noir;café noir
599 noirs films noirs;cafés noirs
600 nolens nolens volens
601 nom nom de plume;nom de guerre
602 no no cure;no pay
603 Noord-Equatoriale
604 Noord-Tiroler
605 nouveau art nouveau
606 nouveaux nouveaux riches
607 novels graphic novels;gothic novels
608 Nubische
609 numerus numerus fixus;numerus clausus
610 nunc hic et nunc;ex nunc
611 Obeid
612 oblige noblesse oblige
613 obscura camera obscura
614 Ochotsk Zee van Ochotsk
615 ochtends 's ochtends
616 ofte nooit ofte nimmer
617 Oldenbarnevelt
618 om- om- of bijscholing
619 onderscheids des onderscheids
620 onheils plaats des onheils;dag des onheils
621 onov. onov. ww.
622 onpas te pas en te onpas
623 onrechte ten onrechte
624 onzentwege van onzentwege
625 Oost-Pacifische
626 Oost-Siberische
627 opinio communis opinio
628 opinion second opinion
629 opleidings- opleidings- en erkenningseisen;opleidings- en examenreglement
630 -opslag CO2-opslag;CO₂-opslag
631 oratio oratio pro domo
632 orbi urbi et orbi
633 Orwell
634 Ossip
635 Otranto
636 ouderschaps- ouderschaps- en levensloopverlof;ouderschaps- en zorgverlof
637 Oud- Oud- en Nieuwjaar;Oud- en Nieuw-Vossemeer
638 oudsher van oudsher
639 outs ins en outs
640 overvloede ten overvloede
641 Pacifisch-Antarctisch
642 paleize ten paleize
643 Palk
644 pampus voor pampus
645 pangang babi pangang;ajam pangang
646 paribus ceteris paribus
647 paso paso doble;paso dobles
648 passionnel crime passionnel
649 patates patates frites
650 Paulowna
651 pay no pay
652 peau peau de pêche
653 pêche pêche melba;peau de pêche
654 pectoris angina pectoris
655 Pelagische
656 Penninisch
657 perdue cire perdue
658 perpetua perpetua mobilia
659 perpetuum perpetuum mobile
660 perse ter perse
661 persona persona non grata
662 personeels- personeels- en organisatiebeleid;personeels- en cliëntgegevens
663 Petalia
664 petalia trapezites petalia
665 petit petit restaurant
666 petto in petto
667 pico pico bello
668 pièce pièce de résistance
669 Pieterszoon
670 Pilatus
671 pina pina colada
672 Pinte
673 plaatse ter plaatse
674 places of all places
675 plekke ter plekke
676 plume nom de plume
677 pocus hocus pocus
678 Poe
679 poly- poly- en perfluoralkylstoffen
680 Pommers
681 Pommerse
682 Pontisch
683 Pontius
684 poste poste restante
685 posteriori a posteriori
686 praecox dementia praecox
687 -principes basisbegrippen en -principes
688 prix grand prix
689 profile low profile
690 prophecy selffulfilling prophecy
691 protera hystera protera
692 proterons hysteron proterons
693 provocatus abortus provocatus
694 pseudologia pseudologia fantastica
695 Puerto
696 qualitate qualitate qua
697 Quichot Don Quichot
698 quid quid pro quo
699 quo a quo;casu quo;quid pro quo;quo vadis;status quo;status quo ante bellum
700 -quota visbestanden en -quota
701 Rading Hollandsche Rading
702 rames nasi rames
703 rarae rarae aves
704 rasa tabula rasa
705 reading close reading
706 reality virtual reality
707 rechter- rechter- en linkerkant;rechter- als linkerzijde
708 rectores rectores magnifici
709 reductio reductio ad absurdum
710 relations public relations
711 remedial remedial teacher;remedial teaching
712 remme ad remme
713 remote remote sensing
714 restante poste restante
715 retriever golden retriever
716 retrievers golden retrievers
717 Rhode Rhode Island
718 Ricaanse
719 Ricanen
720 riches nouveaux riches
721 -richtlijn
722 -richtlijnen beveiligingsinstructies en -richtlijnen;ontwerpfilosofieën en -richtlijnen
723 rider easy rider
724 riders easy riders
725 rigor rigor mortis
726 Rijswijck van Rijswijck
727 Road Abbey Road
728 roaring roaring twenties
729 rocks on the rocks
730 romeinse romeinse kaars;Romeinse
731 roodlicht- roodlicht- en snelheidscamera's;roodlicht- en gordelcontroles
732 running running gag;running gags;running mate;running mates
733 rupsen- rupsen- of slakkenplaag;rupsen- en wespenplaag
734 Rushdie
735 russells jack russells
736 Saksen-Coburg
737 sang pur sang
738 Sankt
739 sans sans rancune
740 Sao Sao Tomé en Principe
741 sapiens homo sapiens
742 sativa cannabis sativa
743 savant idiot savant
744 Savoyse
745 schapen- schapen- en geitenvlees;schapen- en geitensector
746 schrijve zegge en schrijve
747 second second opinion
748 secret top secret
749 seilde seilde ab
750 selecta capita selecta
751 sensing remote sensing
752 se per se;SE;s.e.
753 septemberaanslagen 11 septemberaanslagen
754 septic septic tank
755 Shaba
756 shui feng shui
757 sick sick joke;sick jokes
758 Sideros
759 Sidra
760 s'il s'il vous plaît
761 Simbel Aboe Simbel
762 sine conditio sine qua non
763 Sint-Vincent
764 slam grand slam
765 slams grand slams
766 slow slow motion
767 smith granny smith
768 smiths granny smiths
769 Soenda
770 Soenion Kaap Soenion
771 Soeren Hoog Soeren;Laag-Soeren
772 soleil coupe soleil
773 sotto sotto voce
774 Soubirous Bernadette Soubirous
775 source open source
776 speaking on speaking terms
777 spe in spe
778 spina spina bifida
779 's 's avonds;'s anderendaags
780 Staatsblad Belgisch Staatsblad;staatsblad
781 stade te stade
782 stadhuize ten stadhuize
783 stadswege van stadswege
784 Stallman
785 stante stante pede
786 statu in statu nascendi
787 Stede Stede Broec
788 Steinerschool steinerschool;Rudolf Steinerschool
789 sterling pond sterling
790 stikkens tot stikkens toe
791 Stones Rolling Stones
792 story's short story's
793 Strauss
794 stroomaf- stroomaf- en stroomopwaarts;stroomaf- en windopwaarts
795 stroomop- stroomop- en stroomafwaarts
796 studium studium generale
797 sudden sudden death
798 sui sui generis
799 suprême moment suprême
800 tabula tabula rasa
801 tai tai chi
802 tale tale Kanaäns
803 Tasman Abel Tasman
804 teacher remedial teacher
805 teachers remedial teachers
806 teaching remedial teaching
807 teeg teeg aan
808 temporeetje ex temporeetje
809 tempore ex tempore
810 Terblijt Berg en Terblijt
811 terminis contradictio in terminis
812 terrible enfant terrible
813 terribles enfants terribles
814 terzelfder terzelfder tijd
815 the benefit of the doubt
816 thinking wishful thinking
817 thomas ongelovige thomas
818 tijds de tand des tijds
819 Tobago
820 toilette eau de toilette
821 toiletteje eau de toiletteje
822 tolerance zero tolerance
823 Tolo
824 Tomini
825 tonele ten tonele
826 tongue tongue in cheek
827 Torvalds
828 total total loss
829 Town George Town;Road Town
830 toy dinky toy
831 toys dinky toys
832 -transport afvalverwerking en -transport;elektriciteitsproductie en -transport
833 Transsylvanische
834 Trasimeense Trasimeense Meer
835 Trudoplein Sint Trudoplein
836 Tsjang Tsjang Kai-Sjek
837 turkey cold turkey
838 Turks- Turks- en Caicoseilanden;Turks- en Arabischtalige
839 Twain
840 tweeliter- tweeliter- en elektromotor;tweeliter- en drieliterflessen
841 twenties roaring twenties
842 Tyrreense
843 universalis homo universalis
844 urbi urbi et orbi
845 uurjournaal 6 uurjournaal;8 uurjournaal;10 uurjournaal
846 uursbegeleiding 24 uursbegeleiding
847 uursbereikbaarheid 24 uursbereikbaarheid
848 uursbeschikbaarheid 24 uursbeschikbaarheid
849 uursbewaking 24 uursbewaking
850 uursconsultatie 24 uursconsultatie
851 uursdienst 24 uursdienst
852 uursdiensten 24 uursdiensten
853 uursdrempel 24 uursdrempel
854 uurseconomie 24 uurseconomie
855 uursgemiddelde 24 uursgemiddelde
856 uurshulp 24 uurshulp
857 uursopvang 24 uursopvang
858 uursprocedure 24 uursprocedure
859 uursprocedures 24 uursprocedures
860 uursservice 24 uursservice
861 uursstaking 24 uursstaking
862 uursstructuren 24 uursstructuren
863 uursstructuur 24 uursstructuur
864 uursverplichting 24 uursverplichting
865 uursverplichtingen 24 uursverplichtingen
866 uursvoorziening 24 uursvoorziening
867 uursvoorzieningen 24 uursvoorzieningen
868 uurszorg 24 uurszorg
869 uwent te uwent
870 Uyl
871 Vaartse Vaartse Rijn
872 Valley Silicon Valley
873 Ventoux
874 venture joint venture
875 ventures joint ventures
876 Verdebekken Kaap Verdebekken
877 Vereenigde
878 -verenigingen kunststichtingen en -verenigingen
879 -vereniging kunststichting of -vereniging
880 Verlatinghe Plakkaat van Verlatinghe
881 versa vice versa
882 verstande met dien verstande
883 vervelens tot vervelens toe
884 verve met verve
885 vervoers- vervoers- en accommodatiekosten;vervoers- en parkeermanagement
886 vervolge ten vervolge
887 vice vice versa
888 Victoriawoestijn Grote Victoriawoestijn
889 vie au de vie;aux de vie
890 viola viola da gamba
891 virtual virtual reality
892 vista a prima vista
893 vita attestatie de vita
894 vitae curriculum vitae
895 Vith Sankt Vith
896 vivant bon vivant
897 vivants bon vivants
898 vivo in vivo
899 Vleuten-De Vleuten-De Meern
900 vleze naar den vleze
901 voce sotto voce
902 voege in dier voege
903 Voigtstraat Dr. Voigtstraat
904 volens nolens volens
905 volente Deo volente
906 Volkske Ons Volkske;'t Volkske
907 voorbedachten met voorbedachten rade
908 voordele ten voordele van
909 voor- voor- en nadelen
910 voto ex voto
911 votootje ex votootje
912 vredesnaam in vredesnaam
913 vu déjà vu
914 Vuursche Lage Vuursche
915 vuutje déjà vuutje
916 -wallen geluidsschermen en -wallen;stadsmuren en -wallen
917 walvissen- walvissen- en dolfijnentours;walvissen- en robbenvangst
918 -wapens stadsnamen en -wapens;speelgoedauto's en -wapens
919 -water gemeentegrond of -water;afvalenergie en -water
920 -websites thematijdschriften en -websites;ondernemersmagazines en -websites
921 -website thematijdschrift en -website;ondernemersmagazine en -website
922 -weefsels donororganen en -weefsels
923 -weekend sportweek of -weekend
924 weerd wie het kleine niet eert is het grote niet weerd
925 -wegen werkterreinen en -wegen;asbestpaden en -wegen
926 -weg voorrangskruising of -weg;trekvaart en -weg
927 -welzijn diergezondheid en -welzijn;dierenrechten en -welzijn
928 -wensen leerdoelen en -wensen;huisvestingseisen en -wensen
929 -werken samenleven en -werken;meedenken en -werken
930 -werkgroep themalocatie en -werkgroep
931 -werking medeweten en -werking
932 -werk levensvisie en -werk;welzijnsbeleid en -werk
933 -werknemers overheidswerkgevers en -werknemers
934 West-Siberisch
935 wetens willens en wetens
936 -wet gemeenteklimaat en -wet;belastingtarief en -wet
937 -wetgeving gewasbeschermingstechnieken en -wetgeving;boekhoudprincipes en -wetgeving
938 -wetten belastingtarieven en -wetten;milieumaatregelen en -wetten
939 -wezen oorlogsweduwen en -wezen;fietswrakken en -wezen
940 W.F. W.F. Hermans
941 W.G. W.G. van de Hulst
942 wide world wide web
943 wiedes nogal wiedes
944 wiedeweerga als de wiedeweerga
945 wieven witte wieven
946 wijfjes- wijfjes- en mannetjesvaren;wijfjes- en mannetjesgroepen
947 -wijze bereidingstijd en -wijze;levensovertuiging of -wijze
948 wille ter wille van
949 willies stille willies
950 willie stille willie
951 wils voor elk wat wils
952 Windward Windward Passage
953 -winkels souterrainwoningen en -winkels;websites en -winkels
954 -winst topomzet en -winst;kwartaalomzet en -winst
955 wishful wishful thinking
956 Wolden De Wolden
957 Wolynisch-Podolisch Wolynisch-Podolisch Plateau
958 -woning nieuwbouwappartement of -woning;studentenkamer of -woning
959 Xiaoping Deng Xiaoping
960 XV-meubels Louis XV-meubels
961 Yorker New Yorker
962 Yorkse New Yorkse
963 Zadkine
964 zake ter zake;in zake
965 Zedong
966 zeehonden- zeehonden- en vogelopvang;zeehonden- en walvispoep
967 Zembla
968 zend- zend- of ontvanginrichtingen;zend- en ontvangststations
969 ziele ter ziele
970 zijd wijd en zijd
971 Zilk De Zilk
972 zonne- wind-, zonne- en kernenergie;zonne- en muziekgod
973 -zorg verslavingspreventie en -zorg
974 Zuid-Australisch
975 Zuid-Equatoriale
976 Zuid-Pacifisch
977 Zuid-Pacifische
978 Zuid-Schots
979 Zuid-West Zuid-West Nederland
980 -zuivering watervoorziening en -zuivering;luchtafzuiging en -zuivering
981 -zussen halfbroers en -zussen;stiefbroers en -zussen
982 -zusters gildebroeders en -zusters
983 -zus tweelingbroer of -zus
984 Zwaluwe Lage Zwaluwe;Hooge Zwaluwe
985 zwanen- zwanen- of ganzenveren;zwanen- en schildersmossels
986 zwang in zwang
987 zwartwitte zwartwitte franjeapen;zwartwitte veldridderzwam;zwartwitte snuitwapenvlieg

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,901 @@
aaneen
aanstonds
abusievelijk
achter
achteraan
achteraf
achtereen
achtereenvolgens
achterelkaar
achteren
achterheen
achterin
achterlangs
achterna
achterom
achterop
achterover
achterstevoren
achteruit
achterwege
achtmaal
adagio
af
al
aldaar
aldus
algauw
alhier
alias
allang
alledag
allegretto
allegro
allemaal
allengs
allerwegen
allesbehalve
alleszins
allicht
allrisk
almaar
alom
alras
alreeds
alsmaar
alsnog
althans
altijd
alvast
alwaar
alweer
alzo
ambtshalve
amper
andante
andermaal
andersom
anderszins
anderzijds
anno
antikraak
barrevoets
begrijpelijkerwijs
belcanto
beneden
bergaf
bergop
beroepshalve
beurtelings
bij
bijeen
bijgevolg
bijkans
bijna
bijster
bijtijds
bijvoorbeeld
bijwijlen
binnen
binnenboord
binnengaats
binnenin
binnenkort
binnenshuis
binnenskamers
binnenstebuiten
bis
blindelings
blootsvoets
blootvoets
boudweg
boven
bovenaan
bovenaf
bovenal
bovendien
bovenin
bovenop
bovenuit
breeduit
bruto
buiten
buitenaf
buitenboord
buitengaats
buitenom
buitenshuis
buitenspel
circa
crescendo
daar
daaraan
daarachter
daaraf
daarbeneden
daarbij
daarbinnen
daarboven
daarbovenop
daarbovenuit
daarbuiten
daardoor
daardoorheen
daarenboven
daarentegen
daareven
daarginder
daarginds
daarheen
daarin
daarjuist
daarlangs
daarmee
daarna
daarnaar
daarnaartoe
daarnaast
daarnet
daarnevens
daarom
daaromheen
daaromtrent
daaronder
daarop
daarover
daaroverheen
daarrond
daarstraks
daartegen
daartegenaan
daartegenover
daartoe
daartussen
daartussendoor
daartussenin
daaruit
daarvan
daarvandaan
daarvoor
daarzonder
dan
dato
deels
derdeklas
derhalve
dermate
desalniettemin
desgevallend
desgevraagd
desgewenst
desniettemin
desnoods
desondanks
destijds
dienaangaande
dientengevolge
dikwijls
dinsdag
dinsdagavond
dinsdagmiddag
dinsdagmorgen
dinsdagnacht
dinsdagochtend
ditmaal
dito
donderdag
donderdagavond
donderdagmiddag
donderdagmorgen
donderdagnacht
donderdagochtend
dooreen
doorgaans
doorheen
doormidden
dra
driehoog
driemaal
driewerf
dubbelop
duizendmaal
dus
dusver
echter
edoch
eenmaal
eens
eensklaps
eenvoudigweg
eer
eerdaags
eergisteren
eerlang
eerlijkheidshalve
eerst
eerstdaags
eertijds
eindelijk
elders
elfmaal
enerzijds
enfin
enigermate
enigszins
enzovoort
enzovoorts
er
eraan
erachter
erachteraan
eraf
erbij
erbinnen
erboven
erbovenop
erbuiten
erdoor
erdoorheen
ergens
ergo
erheen
erin
erlangs
ermee
erna
ernaar
ernaartoe
ernaast
erom
eromheen
eronder
eronderdoor
eronderuit
erop
eropuit
erover
eroverheen
ertegen
ertegenaan
ertegenop
ertegenover
ertoe
ertussen
ertussendoor
ertussenin
ertussenuit
eruit
ervan
ervanaf
ervandaan
ervandoor
ervoor
eveneens
evengoed
evenmin
eventjes
evenwel
evenzeer
evenzo
evenzogoed
excelsior
exit
fiftyfifty
foetsie
gaandeweg
gaarne
geeneens
geenszins
gelijkelijk
gelijkertijd
gelukkiglijk
gemakkelijkheidshalve
gemakshalve
gemeenlijk
genoeg
gewoonweg
ginder
ginter
gister
gisteravond
gisteren
gisterenavond
gisterenmiddag
gisterenmorgen
gisterennacht
gisterennamiddag
gisterenochtend
gistermiddag
gistermorgen
gisternacht
gisterochtend
glissando
goedbeschouwd
goeddeels
goedschiks
grofweg
grotendeels
haast
halfacht
halfdrie
halfeen
halfelf
halfnegen
halfom
halfstok
halftien
halftwaalf
halftwee
halfvier
halfvijf
halfweg
halfzes
halfzeven
halsoverkop
halsreikend
halverwege
handenvol
hardop
hartstikke
heden
hedenavond
hedenmiddag
hedenmorgen
hedenochtend
heen
helaas
helemaal
her
hier
hieraan
hierachter
hieraf
hierbeneden
hierbij
hierbinnen
hierboven
hierbuiten
hierdoor
hierheen
hierin
hierlangs
hiermee
hierna
hiernaar
hiernaartoe
hiernaast
hiernevens
hierom
hieromheen
hieromtrent
hieronder
hierop
hierover
hiertegen
hiertegenover
hiertoe
hiertussen
hieruit
hiervan
hiervandaan
hiervoor
hoe
hoeveel
hoever
hoeverre
hoezeer
hogelijk
hogerop
holderdebolder
honderdmaal
honderduit
hoofdzakelijk
hooglijk
hoogst
hoogstens
hooguit
hopelijk
hunnentwege
hutjemutje
ibidem
idealiter
idem
ietsje
ietsjes
ietwat
ijlings
immer
immers
incognito
inderdaad
inderhaast
indertijd
ineen
ineens
infra
inmiddels
insgelijks
integendeel
intussen
inzonderheid
item
kortbij
kortelings
kortgeleden
kortheidshalve
kortom
kortweg
krek
kriskras
kwansuis
laatstelijk
lala
langs
langszij
languit
langzaamaan
langzamerhand
lento
lichtelijk
lichtjes
liefst
liever
linksachter
linksaf
linksboven
linksom
linksonder
loco
logischerwijs
logischerwijze
lou
luidop
maandag
maandagavond
maandagmiddag
maandagmorgen
maandagnacht
maandagnamiddag
maandagochtend
maandagvoormiddag
maar
maximum
mede
medio
mee
meer
meermaals
meermalen
meest
meestal
meestentijds
meesttijds
menigmaal
merendeels
meteen
mettertijd
metterwoon
midden
middendoor
middenin
mijnentwege
mijnerzijds
minimum
minstens
minus
misschien
mogelijkerwijs
mogelijkerwijze
mondjesmaat
mordicus
morgen
morgenavond
morgenmiddag
morgennamiddag
morgenochtend
morgenvoormiddag
morgenvroeg
naartoe
naderbij
naderhand
nadien
nagenoeg
namelijk
nauwelijks
neder
neer
negenmaal
nergens
netjes
netto
niemendal
niet
niets
niettemin
nimmer
nochtans
node
nog
nogal
nogmaals
noodzakelijkerwijs
noodzakelijkerwijze
nooit
noord
noordnoordoost
noordoost
noordwest
normalerwijs
normalerwijze
normaliter
nou
nu
offline
offside
omheen
omhoog
omlaag
omstreeks
omtrent
omver
omzeggens
onafgezien
onderaan
onderaf
onderdoor
ondereen
onderen
onderhand
onderin
onderlangs
onderop
ondersteboven
ondertussen
onderuit
onderweg
onderwijl
ongeacht
ongetwijfeld
ongeveer
onlangs
onzentwege
onzerzijds
ooit
ook
oost
opeen
opeens
opnieuw
opzij
out
over
overal
overboord
overdag
overeen
overeind
overhand
overheen
overhoop
overigens
overlangs
overmorgen
overnieuw
overstag
overzee
pakweg
pal
pardoes
pari
pas
pasgeleden
pertang
piano
pleite
plotsklaps
plus
plusminus
presto
primo
qua
quasi
quinto
recentelijk
rechtaan
rechtdoor
rechtop
rechtover
rechtovereind
rechtsachter
rechtsaf
rechtsboven
rechtsom
rechtsomkeer
rechtsomkeert
rechtsonder
rechtuit
recto
redelijkerwijs
reeds
reikhalzend
retour
reuze
rondom
ronduit
ruimschoots
ruwweg
saam
saampjes
samen
samsam
sans
schaak
schier
schrap
sedertdien
seffens
sic
simpelweg
sindsdien
slechts
smalletjes
soms
somtijds
sowieso
spelenderwijs
staccato
stapsgewijze
starnakel
steeds
stik
stilaan
stilletjes
straal
straks
subiet
tegelijk
tegelijkertijd
tegemoet
tegenaan
tegeneen
tegenop
tegenover
telkenmaal
telkenmale
telkens
temeer
tenminste
tenslotte
terdege
ternauwernood
terstond
terug
tevens
tevergeefs
tevoorschijn
tevoren
tezamen
tezelfdertijd
thans
thuis
tienmaal
tijdenlang
toch
toe
toen
toentertijd
trouwens
tussenbeide
tussendoor
tussenin
tussenuit
tutti
tweedeklas
tweehoog
tweemaal
überhaupt
uiteen
uitentreuren
uiteraard
uitermate
ultimo
ultra
unisono
unverfroren
uptempo
uwentwege
uwerzijds
vaaglijk
vaagweg
vagelijk
valselijk
vanachter
vanaf
vanavond
vanbinnen
vanboven
vanbuiten
vandaag
vandaan
vandaar
vaneen
vanhier
vanjewelste
vanmiddag
vanmorgen
vannacht
vanochtend
vanonder
vanouds
vanwaar
vanwaaruit
vanzelf
veel
veelal
veeleer
veraf
verderaf
verderop
verkeerdelijk
verre
verreweg
verso
veruit
vervolgens
vetjes
viermaal
vijfmaal
vivo
vlakaf
vlakbij
voetstoots
volledigheidshalve
volop
voluit
voor
vooraan
vooraf
voorafgaandelijk
vooral
vooralsnog
voorbij
voordien
vooreerst
voorgoed
voorheen
voorin
voorlangs
voornamelijk
voornemens
voorop
voorover
voorshands
voort
voortaan
voorts
vooruit
voorwaar
voorzeker
voren
vort
vrijdag
vrijdagavond
vrijdagmiddag
vrijdagmorgen
vrijdagnacht
vrijdagochtend
vrijelijk
vrijuit
vrijwel
waar
waaraan
waarachter
waaraf
waarbij
waarbinnen
waarboven
waarbuiten
waardoor
waardoorheen
waarheen
waarin
waarlangs
waarmee
waarna
waarnaar
waarnaartoe
waarnaast
waarom
waaromheen
waaromtrent
waaronder
waarop
waarover
waarrond
waartegen
waartegenover
waartoe
waartussen
waaruit
waarvan
waarvandaan
waarvoor
waarzonder
wanneer
warempel
wederom
weer
weeral
weerom
weg
weinig
wel
weldra
weleens
weleer
welgeteld
welhaast
weliswaar
wellicht
welteverstaan
welzeker
west
wiedes
wijselijk
willens
woensdag
woensdagavond
woensdagmiddag
woensdagmorgen
woensdagnacht
woensdagochtend
wonderwel
zachtjes
zachtjesaan
zaterdag
zaterdagavond
zaterdagmiddag
zaterdagmorgen
zaterdagnacht
zaterdagochtend
zaterdagsavonds
zeer
zegge
zekers
zelden
zelfs
zesmaal
zevenmaal
zienderogen
zigzag
zijnentwege
zo
zoal
zodoende
zoek
zoetjesaan
zogezegd
zohaast
zojuist
zolang
zomaar
zondag
zondagavond
zondagmiddag
zondagmorgen
zondagnacht
zondagochtend
zondagsavonds
zonet
zopas
zover
zoverre
zowaar
zowat
zozeer
zozo
zuid
zuidoost
zuidwest
zuidzuidwest
zus

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,416 @@
{
"been": [
"beenderen",
"benen"
],
"blad": [
"bladeren",
"bladen",
"blaren"
],
"ei": "eieren",
"gelid": "gelederen",
"gemoed": "gemoederen",
"goed": "goederen",
"hoen": "hoenderen",
"kalf": "kalveren",
"kind": "kinderen",
"kleed": [
"kleren",
"klederen",
"kleden"
],
"lam": "lammeren",
"lied": "liederen",
"rad": "raderen",
"rund": "runderen",
"volk": [
"volken",
"volkeren"
],
"koe": "koeien",
"vlo": "vlooien",
"leerrede": [
"leerredenen",
"leerredes"
],
"lende": [
"lendenen",
"lenden"
],
"kleinood": [
"kleinoden",
"kleinodiën"
],
"sieraad": [
"sieraden",
"sieradiën"
],
"epos": [
"epen",
"epossen"
],
"genius": "geniën",
"aanbod": "aanbiedingen",
"beleg": [
"beleggingen",
"belegeringen"
],
"dank": "dankbetuigingen",
"doel": [
"doeleinden",
"doelen"
],
"gedrag": "gedragingen",
"genot": "genietingen",
"lof": [
"loftuitingen",
"lofbetuigingen"
],
"onderzoek": [
"onderzoekingen",
"onderzoeken"
],
"raad": "raadgevingen",
"rede": [
"redevoeringen",
"redes"
],
"fotograaf": "fotografen",
"paragraaf": "paragrafen",
"telegraaf": "telegrafen",
"burggraaf": "burggraven",
"loopgraaf": "loopgraven",
"filosoof": "filosofen",
"theosoof": "theosofen",
"elf": "elfen",
"paraaf": "parafen",
"stad": "steden",
"bad": "baden",
"bedrag": "bedragen",
"dag": "dagen",
"dak": "daken",
"dal": "dalen",
"gat": "gaten",
"gelag": "gelagen",
"glas": "glazen",
"graf": "graven",
"pad": [
"paden",
"padden"
],
"slag": "slagen",
"staf": [
"staffen",
"staven"
],
"vat": "vaten",
"verdrag": "verdragen",
"handvat": [
"handvatten",
"handvaten"
],
"bevel": "bevelen",
"gebed": "gebeden",
"gebrek": "gebreken",
"gen": "genen",
"spel": "spelen",
"tred": "treden",
"weg": "wegen",
"gemet": "gemeten",
"bijzonderheid": "bijzonderheden",
"kleinigheid": "kleinigheden",
"moeilijkheid": "moeilijkheden",
"lid": "leden",
"rif": "reven",
"schip": "schepen",
"smid": "smeden",
"spit": [
"spitten",
"speten"
],
"alcohol": "alcoholen",
"elektron": "elektronen",
"neutron": "neutronen",
"proton": "protonen",
"gebod": "geboden",
"god": "goden",
"hertog": "hertogen",
"hof": "hoven",
"hol": "holen",
"kot": [
"kotten",
"koten"
],
"lot": "loten",
"oorlog": "oorlogen",
"schot": "schoten",
"slot": "sloten",
"verbod": "verboden",
"verlof": "verloven",
"kruis": "kruizen",
"pers": "persen",
"balans": "balansen",
"concours": "concoursen",
"dans": "dansen",
"diocees": "diocesen",
"eis": "eisen",
"forens": [
"forensen",
"forenzen"
],
"impuls": "impulsen",
"kaars": "kaarsen",
"kans": "kansen",
"kers": "kersen",
"kikvors": "kikvorsen",
"koers": "koersen",
"kous": "kousen",
"krans": "kransen",
"lans": "lansen",
"mars": "marsen",
"mens": "mensen",
"ons": [
"onsen",
"onzen"
],
"paus": "pausen",
"plons": [
"plonsen",
"plonzen"
],
"pols": "polsen",
"prins": "prinsen",
"pruis": "pruisen",
"saus": [
"sausen",
"sauzen"
],
"schans": "schansen",
"spons": [
"sponzen",
"sponsen"
],
"stimulans": "stimulansen",
"tendens": "tendensen",
"trans": "transen",
"wals": "walsen",
"wens": "wensen",
"zeis": "zeisen",
"einde": [
"eindes",
"einden"
],
"symbool": "symbolen",
"knop": [
"knopen",
"knoppen"
],
"edelman": "edellieden",
"krijgsman": "krijgslieden",
"landman": "landlieden",
"raadsman": "raadslieden",
"weidman": "weidlieden",
"akkerman": [
"akkerlui",
"akkerlieden"
],
"ambachtsman": [
"ambachtslui",
"ambachtslieden"
],
"baggerman": [
"baggerlui",
"baggerlieden"
],
"": [
"",
""
],
"bootsman": [
"bootslui",
"bootslieden"
],
"buitenman": [
"buitenlui",
"buitenlieden"
],
"burgerman": [
"burgerlui",
"burgerlieden"
],
"buurman": [
"buurlui",
"buurlieden"
],
"handelsman": [
"handelslui",
"handelslieden"
],
"handwerksman": [
"handwerkslui",
"handwerkslieden"
],
"kooiman": [
"kooilui",
"kooilieden"
],
"koopman": [
"kooplui",
"kooplieden"
],
"scheepstimmerman": [
"scheepstimmerlui",
"scheepstimmerlieden"
],
"timmerman": [
"timmerlui",
"timmerlieden"
],
"schieman": [
"schielui",
"schielieden"
],
"sjouwerman": [
"sjouwerlui",
"sjouwerlieden"
],
"speelman": [
"speellui",
"speellieden"
],
"stadswerkman": [
"stadswerklui",
"stadswerklieden"
],
"werkman": [
"werklui",
"werklieden"
],
"stuurman": [
"stuurlui",
"stuurlieden"
],
"tuinman": [
"tuinlui",
"tuinlieden"
],
"vakman": [
"vakmannen",
"vaklieden",
"vaklui"
],
"varensman": [
"varenslui",
"varenslieden"
],
"veerman": [
"veerlui",
"veerlieden"
],
"voerman": [
"voerlui",
"voerlieden"
],
"zakenman": [
"zakenlui",
"zakenlieden"
],
"zeeman": [
"zeelui",
"zeelieden"
],
"zegsman": [
"zegslui",
"zegslieden"
],
"bewindsman": [
"bewindsmannen",
"bewindslieden"
],
"bruggeman": [
"bruggemannen",
"bruggelieden"
],
"cameraman": [
"cameramannen",
"cameralieden"
],
"hoofdman": [
"hoofdmannen",
"hoofdlieden"
],
"leidsman": [
"leidsmannen",
"leidslieden"
],
"ombudsman": [
"ombudsmannen",
"ombudslieden"
],
"staatsman": [
"staatsmannen",
"staatslieden"
],
"vertrouwensman": [
"vertrouwensmannen",
"vertrouwenslieden"
],
"verzetsman": [
"verzetsmannen",
"verzetslieden"
],
"voorman": [
"voormannen",
"voorlieden"
],
"brandweerman": [
"brandweermannen",
"brandweerlieden",
"brandweerlui"
],
"kantoorman": [
"kantoormannen",
"kantoorlieden",
"kantoorlui"
],
"opperman": [
"oppermannen",
"opperlieden",
"opperlui"
],
"sportsman": [
"sportsmannen",
"sportslieden",
"sportslui"
],
"barman": "barmannen",
"bosjesman": "bosjesmannen",
"boeman": "boemannen",
"dronkeman": "dronkemannen",
"ijscoman": "ijscomannen",
"jongeman": "jongemannen",
"kiesman": "kiesmannen",
"kikvorsman": "kikvorsmannen",
"krantenman": "krantenmannen",
"leenman": "leenmannen",
"medicijnman": "medicijnmannen",
"melkman": "melkmannen",
"muzelman": "muzelmannen",
"Noorman": "Noormannen",
"olieman": "oliemannen",
"onderwijsman": "onderwijsmannen",
"orgelman": "orgelmannen",
"partijman": "partijmannen",
"politieman": "politiemannen",
"sandwichman": "sandwichmannen",
"schillenman": "schillenmannen",
"spoorwegman": "spoorwegmannen",
"stroman": "stromannen",
"stuntman": "stuntmannen",
"vakbondsman": "vakbondsmannen",
"vuilnisman": "vuilnismannen",
"weerman": "weermannen",
"wetenschapsman": "wetenschapsmannen",
"wildeman": "wildemannen"
}

1
vocab/plurals/nouns.json Normal file

File diff suppressed because one or more lines are too long

413937
vocab/wordlist.txt Normal file

File diff suppressed because it is too large Load Diff

91871
word-list.txt Normal file

File diff suppressed because it is too large Load Diff