diff --git a/.gitignore b/.gitignore index 2a0e38f..f6e905c 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ .idea/ /vocab/.custom/ +**/.custom/ \ No newline at end of file diff --git a/vocab/.custom/README.md b/vocab/.custom/README.md deleted file mode 100644 index b2605d2..0000000 --- a/vocab/.custom/README.md +++ /dev/null @@ -1,9 +0,0 @@ -# Meervoudsvormen op basis van woordenlijst - -Op basis van [wordlist.txt](../wordlist.txt) worden een aantal meervoudsregels getest of dit meervoud voorkomt in [nouns-meervouden.txt](../experimenteel/nouns-meervouden.txt). - - -## Genereren van nieuwe lijst - -- Voer eerst ```node irregulars.js``` uit. Dit genereert een lijst met uitzonderingen van de meeste woorden. -- Daarna: ```node fetchPlurals.js```. Dit kan een tijdje duren, maar geeft een output: ```nouns.json```. Deze lijst kan je dan gebruiken. \ No newline at end of file diff --git a/vocab/.custom/fetchPlurals.js b/vocab/.custom/fetchPlurals.js deleted file mode 100644 index 6e6e019..0000000 --- a/vocab/.custom/fetchPlurals.js +++ /dev/null @@ -1,145 +0,0 @@ -var fs = require("fs"); -let path = require("path"); -var irregularFile = "irregular.json"; -var outputFile = "nouns.json"; -var helpers = require("./language-helpers"); - -try { - fs.unlinkSync(path.join(__dirname, outputFile)); - console.log("Output file deleted."); -} catch (error) { - console.log(error); -} - -var nouns = JSON.parse(fs.readFileSync(path.join(__dirname, irregularFile), "utf8")); -var words = fs.readFileSync(path.join(__dirname, "../wordlist.txt"), "utf8").toString().split("\n"); - -var checkPlural = function (word) { - if (!word.match(/^[a-zA-Z]*/g)) { - console.log("niet alpha"); - return false; - } - - // if word isn't an irregular or doesn't exist yet - if (nouns.hasOwnProperty(word)) { - console.log("irregular"); - return false; - } - - // test if a pluralrule is listed in plurals list - if (word.match(/\w*erik\b/)) { - if (helpers.doesPluralExists(word + "en")) { - nouns[word] = word + "en"; - return true; - } - } - - if (helpers.endsWithVowel(word)) { - if (helpers.doesPluralExists(word + "ën")) { - nouns[word] = word + "ën"; - return true; - } - if (helpers.doesPluralExists(word + "ëen")) { - nouns[word] = word + "ëen"; - return true; - } - if (helpers.doesPluralExists(word + "en")) { - nouns[word] = word + "en"; - return true; - } - } - if (helpers.endsWithDoubleConsonant(word)) { - if (helpers.endswithStemloos(word)) { - var lastCharacter = word.charAt(word.length - 1); - basicword = word.substring(0, word.length - 1); - lastCharacter = lastCharacter.replace("f", "v"); - if (helpers.doesPluralExists(basicword + lastCharacter + "en")) { - nouns[word] = basicword + lastCharacter + "en"; - return true; - } - lastCharacter = lastCharacter.replace("s", "z"); - if (helpers.doesPluralExists(basicword + lastCharacter + "en")) { - nouns[word] = basicword + lastCharacter + "en"; - return true; - } - } - if (helpers.doesPluralExists(word + "en")) { - nouns[word] = word + "en"; - return true; - } - if (helpers.doesPluralExists(word + "s")) { - nouns[word] = word + "s"; - return true; - } - } - if (helpers.endsWithConsonant(word)) { - var lastCharacter = word.charAt(word.length - 1); - basicword = word.substring(0, word.length - 2); - if (helpers.doesPluralExists(basicword + lastCharacter + "en")) { - nouns[word] = basicword + lastCharacter + "en"; - return true; - } - - if (helpers.doesPluralExists(word + word.slice(-1) + "en")) { - nouns[word] = word + word.slice(-1) + "en"; - return true; - } - - if (helpers.endswithStemloosVowel(word)) { - var lastCharacter = word.charAt(word.length - 1); - basicword = word.substring(0, word.length - 1); - - if (helpers.endsWithSingleConsonant(basicword)) { - basicword = word.substring(0, word.length - 2); - lastCharacter = lastCharacter.replace("f", "v"); - console.log(basicword + lastCharacter + "en"); - if (helpers.doesPluralExists(basicword + lastCharacter + "en")) { - nouns[word] = basicword + lastCharacter + "en"; - return true; - } - lastCharacter = lastCharacter.replace("s", "z"); - if (helpers.doesPluralExists(basicword + lastCharacter + "en")) { - nouns[word] = basicword + lastCharacter + "en"; - return true; - } - } else { - lastCharacter = lastCharacter.replace("f", "v"); - if (helpers.doesPluralExists(basicword + lastCharacter + "en")) { - nouns[word] = basicword + lastCharacter + "en"; - return true; - } - lastCharacter = lastCharacter.replace("s", "z"); - if (helpers.doesPluralExists(basicword + lastCharacter + "en")) { - nouns[word] = basicword + lastCharacter + "en"; - return true; - } - } - } - } - - if (helpers.doesPluralExists(word + "'s")) { - nouns[word] = word + "'s"; - return true; - } - - if (helpers.doesPluralExists(word + "s")) { - nouns[word] = word + "s"; - return true; - } - - console.log("niet gevonden"); - return false; -}; - -words.forEach((word, index) => { - console.log(word); - checkPlural(word); -}); - -console.log(nouns); - - -let data = JSON.stringify(nouns); -fs.writeFile(path.join(__dirname, outputFile), data, err => { - console.log("done"); -}); diff --git a/vocab/.custom/irregulars.js b/vocab/.custom/irregulars.js deleted file mode 100644 index ac7b732..0000000 --- a/vocab/.custom/irregulars.js +++ /dev/null @@ -1,235 +0,0 @@ -var fs = require("fs"); -let path = require("path"); -var outputFile = "irregular.json"; - -var nounString, - nouns; - -var irregulars = { - been: [ - "beenderen", "benen" - ], - blad: [ - "bladeren", "bladen", "blaren" - ], - ei: "eieren", - gelid: "gelederen", - gemoed: "gemoederen", - goed: "goederen", - hoen: "hoenderen", - kalf: "kalveren", - kind: "kinderen", - kleed: [ - "kleren", "klederen", "kleden" - ], - lam: "lammeren", - lied: "liederen", - rad: "raderen", - rund: "runderen", - volk: [ - "volken", "volkeren" - ], - koe: "koeien", - vlo: "vlooien", - leerrede: [ - "leerredenen", "leerredes" - ], - lende: [ - "lendenen", "lenden" - ], - kleinood: [ - "kleinoden", "kleinodiën" - ], - sieraad: [ - "sieraden", "sieradiën" - ], - epos: [ - "epen", "epossen" - ], - genius: "geniën", - aanbod: "aanbiedingen", - beleg: [ - "beleggingen", "belegeringen" - ], - dank: "dankbetuigingen", - doel: [ - "doeleinden", "doelen" - ], - gedrag: "gedragingen", - genot: "genietingen", - lof: [ - "loftuitingen", "lofbetuigingen" - ], - onderzoek: [ - "onderzoekingen", "onderzoeken" - ], - raad: "raadgevingen", - rede: [ - "redevoeringen", "redes" - ], - fotograaf: "fotografen", - paragraaf: "paragrafen", - telegraaf: "telegrafen", - burggraaf: "burggraven", - loopgraaf: "loopgraven", - filosoof: "filosofen", - theosoof: "theosofen", - elf: "elfen", - paraaf: "parafen", - stad: "steden", - bad: "baden", - bedrag: "bedragen", - blad: [ - "bladeren", "bladen", "blaren" - ], - dag: "dagen", - dak: "daken", - dal: "dalen", - gat: "gaten", - gelag: "gelagen", - glas: "glazen", - graf: "graven", - pad: [ - "paden", "padden" - ], - slag: "slagen", - staf: [ - "staffen", "staven" - ], - vat: "vaten", - verdrag: "verdragen", - handvat: [ - "handvatten", "handvaten" - ], - bevel: "bevelen", - gebed: "gebeden", - gebrek: "gebreken", - gen: "genen", - spel: "spelen", - tred: "treden", - weg: "wegen", - gemet: "gemeten", - bijzonderheid: "bijzonderheden", - kleinigheid: "kleinigheden", - moeilijkheid: "moeilijkheden", - lid: "leden", - rif: "reven", - schip: "schepen", - smid: "smeden", - spit: [ - "spitten", "speten" - ], - alcohol: "alcoholen", - elektron: "elektronen", - neutron: "neutronen", - proton: "protonen", - gebod: "geboden", - god: "goden", - hertog: "hertogen", - hof: "hoven", - hol: "holen", - kot: [ - "kotten", "koten" - ], - lot: "loten", - oorlog: "oorlogen", - schot: "schoten", - slot: "sloten", - verbod: "verboden", - verlof: "verloven", - kruis: "kruizen", - pers: "persen", - balans: "balansen", - concours: "concoursen", - dans: "dansen", - diocees: "diocesen", - eis: "eisen", - forens: [ - "forensen", "forenzen" - ], - impuls: "impulsen", - kaars: "kaarsen", - kans: "kansen", - kers: "kersen", - kikvors: "kikvorsen", - koers: "koersen", - kous: "kousen", - krans: "kransen", - lans: "lansen", - mars: "marsen", - mens: "mensen", - ons: [ - "onsen", "onzen" - ], - paus: "pausen", - plons: [ - "plonsen", "plonzen" - ], - pols: "polsen", - prins: "prinsen", - pruis: "pruisen", - saus: [ - "sausen", "sauzen" - ], - schans: "schansen", - spons: [ - "sponzen", "sponsen" - ], - stimulans: "stimulansen", - tendens: "tendensen", - trans: "transen", - wals: "walsen", - wens: "wensen", - zeis: "zeisen", - einde: ["eindes", "einden"], - symbool: "symbolen", - knop: ["knopen", "knoppen"] -}; - -// https://e-ans.ivdnt.org/topics/pid/ans03050402lingtopic -nounString = "edelman krijgsman landman raadsman weidman"; -nouns = nounString.split(" "); - -nouns.forEach(noun => { - irregulars[noun] = noun.replace(/man$/, "lieden"); -}); - -nounString = "akkerman ambachtsman baggerman bootsman buitenman burgerman buurman handelsman handwerksman kooiman koopman scheepstimmerman timmerman schieman sjouwerman speelman stadswerkman werkman stuurman tuinman vakman varensman veerman voerman zakenman zeeman zegsman"; -nouns = nounString.split(" "); - -nouns.forEach(noun => { - irregulars[noun] = [ - noun.replace(/man$/, "lui"), - noun.replace(/man$/, "lieden") - ]; -}); - -nounString = "bewindsman bruggeman cameraman hoofdman leidsman ombudsman staatsman vertrouwensman verzetsman voorman"; -nouns = nounString.split(" "); -nouns.forEach(noun => { - irregulars[noun] = [ - noun.replace(/man$/, "mannen"), - noun.replace(/man$/, "lieden") - ]; -}); - -nounString = "brandweerman kantoorman opperman sportsman vakman"; -nouns = nounString.split(" "); -nouns.forEach(noun => { - irregulars[noun] = [ - noun.replace(/man$/, "mannen"), - noun.replace(/man$/, "lieden"), - noun.replace(/man$/, "lui") - ]; -}); - -nounString = "barman bosjesman boeman dronkeman ijscoman jongeman kiesman kikvorsman krantenman leenman medicijnman melkman muzelman Noorman olieman onderwijsman orgelman partijman politieman sandwichman schillenman spoorwegman stroman stuntman vakbondsman vuilnisman weerman wetenschapsman wildeman"; -nouns = nounString.split(" "); -nouns.forEach(noun => { - irregulars[noun] = noun.replace(/man$/, "mannen"); -}); - -let data = JSON.stringify(irregulars); -fs.writeFile(path.join(__dirname, outputFile), data, err => { - console.log("done"); -}); diff --git a/vocab/.custom/language-helpers.js b/vocab/.custom/language-helpers.js deleted file mode 100644 index c23a80b..0000000 --- a/vocab/.custom/language-helpers.js +++ /dev/null @@ -1,45 +0,0 @@ -var fs = require("fs"); -let path = require("path"); -var pluralsFile = "../experimenteel/nouns-meervouden.txt"; - -var plurals = fs.readFileSync(path.join(__dirname, pluralsFile), "utf8").toString().split("\n"); - -exports.isVowel = character => { - return character.match(/[aeiou]/); -}; - -exports.isConsonant = character => { - return character.match(/[bcdfghjklmnpqrstvwxyz]/); -}; - -exports.endsWithVowel = word => { - return word.match(/\w*[aeiou]\b/); -}; - -exports.endsWithConsonant = word => { - return word.match(/\w*[bcdfghjklmnpqrstvwxyz]\b/); -}; - -exports.endsWithDoubleE = word => { - return word.match(/\w*ee\b/); -}; - -exports.endsWithDoubleConsonant = word => { - return word.match(/\w*[bcdfghjklmnpqrstvwxyz][bcdfghjklmnpqrstvwxyz]\b/); -}; - -exports.endsWithSingleConsonant = word => { - return word.match(/\w*[aeiou][bcdfghjklmnpqrstvwxyz]\b/); -}; - -exports.endswithStemloosVowel = word => { - return word.match(/\w*[aeiou][aeiou][tkfschp]\b/); -}; - -exports.endswithStemloos = word => { - return word.match(/\w*[bcdfghjklmnpqrstvwxyz][tkfschp]\b/); -}; - -exports.doesPluralExists = plural => { - return plurals.includes(plural); -};