From 79d6ca8e27be5a034783f543fcfb28a6f22c1b65 Mon Sep 17 00:00:00 2001 From: mike Date: Fri, 19 Dec 2025 13:13:38 +0100 Subject: [PATCH] initial commit --- vocab/.custom/README.md | 9 ++ vocab/.custom/fetchPlurals.js | 145 ++++++++++++++++++ vocab/.custom/irregulars.js | 235 ++++++++++++++++++++++++++++++ vocab/.custom/language-helpers.js | 45 ++++++ 4 files changed, 434 insertions(+) create mode 100644 vocab/.custom/README.md create mode 100644 vocab/.custom/fetchPlurals.js create mode 100644 vocab/.custom/irregulars.js create mode 100644 vocab/.custom/language-helpers.js diff --git a/vocab/.custom/README.md b/vocab/.custom/README.md new file mode 100644 index 0000000..b2605d2 --- /dev/null +++ b/vocab/.custom/README.md @@ -0,0 +1,9 @@ +# Meervoudsvormen op basis van woordenlijst + +Op basis van [wordlist.txt](../wordlist.txt) worden een aantal meervoudsregels getest of dit meervoud voorkomt in [nouns-meervouden.txt](../experimenteel/nouns-meervouden.txt). + + +## Genereren van nieuwe lijst + +- Voer eerst ```node irregulars.js``` uit. Dit genereert een lijst met uitzonderingen van de meeste woorden. +- Daarna: ```node fetchPlurals.js```. Dit kan een tijdje duren, maar geeft een output: ```nouns.json```. Deze lijst kan je dan gebruiken. \ No newline at end of file diff --git a/vocab/.custom/fetchPlurals.js b/vocab/.custom/fetchPlurals.js new file mode 100644 index 0000000..6e6e019 --- /dev/null +++ b/vocab/.custom/fetchPlurals.js @@ -0,0 +1,145 @@ +var fs = require("fs"); +let path = require("path"); +var irregularFile = "irregular.json"; +var outputFile = "nouns.json"; +var helpers = require("./language-helpers"); + +try { + fs.unlinkSync(path.join(__dirname, outputFile)); + console.log("Output file deleted."); +} catch (error) { + console.log(error); +} + +var nouns = JSON.parse(fs.readFileSync(path.join(__dirname, irregularFile), "utf8")); +var words = fs.readFileSync(path.join(__dirname, "../wordlist.txt"), "utf8").toString().split("\n"); + +var checkPlural = function (word) { + if (!word.match(/^[a-zA-Z]*/g)) { + console.log("niet alpha"); + return false; + } + + // if word isn't an irregular or doesn't exist yet + if (nouns.hasOwnProperty(word)) { + console.log("irregular"); + return false; + } + + // test if a pluralrule is listed in plurals list + if (word.match(/\w*erik\b/)) { + if (helpers.doesPluralExists(word + "en")) { + nouns[word] = word + "en"; + return true; + } + } + + if (helpers.endsWithVowel(word)) { + if (helpers.doesPluralExists(word + "ën")) { + nouns[word] = word + "ën"; + return true; + } + if (helpers.doesPluralExists(word + "ëen")) { + nouns[word] = word + "ëen"; + return true; + } + if (helpers.doesPluralExists(word + "en")) { + nouns[word] = word + "en"; + return true; + } + } + if (helpers.endsWithDoubleConsonant(word)) { + if (helpers.endswithStemloos(word)) { + var lastCharacter = word.charAt(word.length - 1); + basicword = word.substring(0, word.length - 1); + lastCharacter = lastCharacter.replace("f", "v"); + if (helpers.doesPluralExists(basicword + lastCharacter + "en")) { + nouns[word] = basicword + lastCharacter + "en"; + return true; + } + lastCharacter = lastCharacter.replace("s", "z"); + if (helpers.doesPluralExists(basicword + lastCharacter + "en")) { + nouns[word] = basicword + lastCharacter + "en"; + return true; + } + } + if (helpers.doesPluralExists(word + "en")) { + nouns[word] = word + "en"; + return true; + } + if (helpers.doesPluralExists(word + "s")) { + nouns[word] = word + "s"; + return true; + } + } + if (helpers.endsWithConsonant(word)) { + var lastCharacter = word.charAt(word.length - 1); + basicword = word.substring(0, word.length - 2); + if (helpers.doesPluralExists(basicword + lastCharacter + "en")) { + nouns[word] = basicword + lastCharacter + "en"; + return true; + } + + if (helpers.doesPluralExists(word + word.slice(-1) + "en")) { + nouns[word] = word + word.slice(-1) + "en"; + return true; + } + + if (helpers.endswithStemloosVowel(word)) { + var lastCharacter = word.charAt(word.length - 1); + basicword = word.substring(0, word.length - 1); + + if (helpers.endsWithSingleConsonant(basicword)) { + basicword = word.substring(0, word.length - 2); + lastCharacter = lastCharacter.replace("f", "v"); + console.log(basicword + lastCharacter + "en"); + if (helpers.doesPluralExists(basicword + lastCharacter + "en")) { + nouns[word] = basicword + lastCharacter + "en"; + return true; + } + lastCharacter = lastCharacter.replace("s", "z"); + if (helpers.doesPluralExists(basicword + lastCharacter + "en")) { + nouns[word] = basicword + lastCharacter + "en"; + return true; + } + } else { + lastCharacter = lastCharacter.replace("f", "v"); + if (helpers.doesPluralExists(basicword + lastCharacter + "en")) { + nouns[word] = basicword + lastCharacter + "en"; + return true; + } + lastCharacter = lastCharacter.replace("s", "z"); + if (helpers.doesPluralExists(basicword + lastCharacter + "en")) { + nouns[word] = basicword + lastCharacter + "en"; + return true; + } + } + } + } + + if (helpers.doesPluralExists(word + "'s")) { + nouns[word] = word + "'s"; + return true; + } + + if (helpers.doesPluralExists(word + "s")) { + nouns[word] = word + "s"; + return true; + } + + console.log("niet gevonden"); + return false; +}; + +words.forEach((word, index) => { + console.log(word); + checkPlural(word); +}); + +console.log(nouns); + + +let data = JSON.stringify(nouns); +fs.writeFile(path.join(__dirname, outputFile), data, err => { + console.log("done"); +}); diff --git a/vocab/.custom/irregulars.js b/vocab/.custom/irregulars.js new file mode 100644 index 0000000..ac7b732 --- /dev/null +++ b/vocab/.custom/irregulars.js @@ -0,0 +1,235 @@ +var fs = require("fs"); +let path = require("path"); +var outputFile = "irregular.json"; + +var nounString, + nouns; + +var irregulars = { + been: [ + "beenderen", "benen" + ], + blad: [ + "bladeren", "bladen", "blaren" + ], + ei: "eieren", + gelid: "gelederen", + gemoed: "gemoederen", + goed: "goederen", + hoen: "hoenderen", + kalf: "kalveren", + kind: "kinderen", + kleed: [ + "kleren", "klederen", "kleden" + ], + lam: "lammeren", + lied: "liederen", + rad: "raderen", + rund: "runderen", + volk: [ + "volken", "volkeren" + ], + koe: "koeien", + vlo: "vlooien", + leerrede: [ + "leerredenen", "leerredes" + ], + lende: [ + "lendenen", "lenden" + ], + kleinood: [ + "kleinoden", "kleinodiën" + ], + sieraad: [ + "sieraden", "sieradiën" + ], + epos: [ + "epen", "epossen" + ], + genius: "geniën", + aanbod: "aanbiedingen", + beleg: [ + "beleggingen", "belegeringen" + ], + dank: "dankbetuigingen", + doel: [ + "doeleinden", "doelen" + ], + gedrag: "gedragingen", + genot: "genietingen", + lof: [ + "loftuitingen", "lofbetuigingen" + ], + onderzoek: [ + "onderzoekingen", "onderzoeken" + ], + raad: "raadgevingen", + rede: [ + "redevoeringen", "redes" + ], + fotograaf: "fotografen", + paragraaf: "paragrafen", + telegraaf: "telegrafen", + burggraaf: "burggraven", + loopgraaf: "loopgraven", + filosoof: "filosofen", + theosoof: "theosofen", + elf: "elfen", + paraaf: "parafen", + stad: "steden", + bad: "baden", + bedrag: "bedragen", + blad: [ + "bladeren", "bladen", "blaren" + ], + dag: "dagen", + dak: "daken", + dal: "dalen", + gat: "gaten", + gelag: "gelagen", + glas: "glazen", + graf: "graven", + pad: [ + "paden", "padden" + ], + slag: "slagen", + staf: [ + "staffen", "staven" + ], + vat: "vaten", + verdrag: "verdragen", + handvat: [ + "handvatten", "handvaten" + ], + bevel: "bevelen", + gebed: "gebeden", + gebrek: "gebreken", + gen: "genen", + spel: "spelen", + tred: "treden", + weg: "wegen", + gemet: "gemeten", + bijzonderheid: "bijzonderheden", + kleinigheid: "kleinigheden", + moeilijkheid: "moeilijkheden", + lid: "leden", + rif: "reven", + schip: "schepen", + smid: "smeden", + spit: [ + "spitten", "speten" + ], + alcohol: "alcoholen", + elektron: "elektronen", + neutron: "neutronen", + proton: "protonen", + gebod: "geboden", + god: "goden", + hertog: "hertogen", + hof: "hoven", + hol: "holen", + kot: [ + "kotten", "koten" + ], + lot: "loten", + oorlog: "oorlogen", + schot: "schoten", + slot: "sloten", + verbod: "verboden", + verlof: "verloven", + kruis: "kruizen", + pers: "persen", + balans: "balansen", + concours: "concoursen", + dans: "dansen", + diocees: "diocesen", + eis: "eisen", + forens: [ + "forensen", "forenzen" + ], + impuls: "impulsen", + kaars: "kaarsen", + kans: "kansen", + kers: "kersen", + kikvors: "kikvorsen", + koers: "koersen", + kous: "kousen", + krans: "kransen", + lans: "lansen", + mars: "marsen", + mens: "mensen", + ons: [ + "onsen", "onzen" + ], + paus: "pausen", + plons: [ + "plonsen", "plonzen" + ], + pols: "polsen", + prins: "prinsen", + pruis: "pruisen", + saus: [ + "sausen", "sauzen" + ], + schans: "schansen", + spons: [ + "sponzen", "sponsen" + ], + stimulans: "stimulansen", + tendens: "tendensen", + trans: "transen", + wals: "walsen", + wens: "wensen", + zeis: "zeisen", + einde: ["eindes", "einden"], + symbool: "symbolen", + knop: ["knopen", "knoppen"] +}; + +// https://e-ans.ivdnt.org/topics/pid/ans03050402lingtopic +nounString = "edelman krijgsman landman raadsman weidman"; +nouns = nounString.split(" "); + +nouns.forEach(noun => { + irregulars[noun] = noun.replace(/man$/, "lieden"); +}); + +nounString = "akkerman ambachtsman baggerman bootsman buitenman burgerman buurman handelsman handwerksman kooiman koopman scheepstimmerman timmerman schieman sjouwerman speelman stadswerkman werkman stuurman tuinman vakman varensman veerman voerman zakenman zeeman zegsman"; +nouns = nounString.split(" "); + +nouns.forEach(noun => { + irregulars[noun] = [ + noun.replace(/man$/, "lui"), + noun.replace(/man$/, "lieden") + ]; +}); + +nounString = "bewindsman bruggeman cameraman hoofdman leidsman ombudsman staatsman vertrouwensman verzetsman voorman"; +nouns = nounString.split(" "); +nouns.forEach(noun => { + irregulars[noun] = [ + noun.replace(/man$/, "mannen"), + noun.replace(/man$/, "lieden") + ]; +}); + +nounString = "brandweerman kantoorman opperman sportsman vakman"; +nouns = nounString.split(" "); +nouns.forEach(noun => { + irregulars[noun] = [ + noun.replace(/man$/, "mannen"), + noun.replace(/man$/, "lieden"), + noun.replace(/man$/, "lui") + ]; +}); + +nounString = "barman bosjesman boeman dronkeman ijscoman jongeman kiesman kikvorsman krantenman leenman medicijnman melkman muzelman Noorman olieman onderwijsman orgelman partijman politieman sandwichman schillenman spoorwegman stroman stuntman vakbondsman vuilnisman weerman wetenschapsman wildeman"; +nouns = nounString.split(" "); +nouns.forEach(noun => { + irregulars[noun] = noun.replace(/man$/, "mannen"); +}); + +let data = JSON.stringify(irregulars); +fs.writeFile(path.join(__dirname, outputFile), data, err => { + console.log("done"); +}); diff --git a/vocab/.custom/language-helpers.js b/vocab/.custom/language-helpers.js new file mode 100644 index 0000000..c23a80b --- /dev/null +++ b/vocab/.custom/language-helpers.js @@ -0,0 +1,45 @@ +var fs = require("fs"); +let path = require("path"); +var pluralsFile = "../experimenteel/nouns-meervouden.txt"; + +var plurals = fs.readFileSync(path.join(__dirname, pluralsFile), "utf8").toString().split("\n"); + +exports.isVowel = character => { + return character.match(/[aeiou]/); +}; + +exports.isConsonant = character => { + return character.match(/[bcdfghjklmnpqrstvwxyz]/); +}; + +exports.endsWithVowel = word => { + return word.match(/\w*[aeiou]\b/); +}; + +exports.endsWithConsonant = word => { + return word.match(/\w*[bcdfghjklmnpqrstvwxyz]\b/); +}; + +exports.endsWithDoubleE = word => { + return word.match(/\w*ee\b/); +}; + +exports.endsWithDoubleConsonant = word => { + return word.match(/\w*[bcdfghjklmnpqrstvwxyz][bcdfghjklmnpqrstvwxyz]\b/); +}; + +exports.endsWithSingleConsonant = word => { + return word.match(/\w*[aeiou][bcdfghjklmnpqrstvwxyz]\b/); +}; + +exports.endswithStemloosVowel = word => { + return word.match(/\w*[aeiou][aeiou][tkfschp]\b/); +}; + +exports.endswithStemloos = word => { + return word.match(/\w*[bcdfghjklmnpqrstvwxyz][tkfschp]\b/); +}; + +exports.doesPluralExists = plural => { + return plurals.includes(plural); +};