initial commit
This commit is contained in:
9
vocab/.custom/README.md
Normal file
9
vocab/.custom/README.md
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
# Meervoudsvormen op basis van woordenlijst
|
||||||
|
|
||||||
|
Op basis van [wordlist.txt](../wordlist.txt) worden een aantal meervoudsregels getest of dit meervoud voorkomt in [nouns-meervouden.txt](../experimenteel/nouns-meervouden.txt).
|
||||||
|
|
||||||
|
|
||||||
|
## Genereren van nieuwe lijst
|
||||||
|
|
||||||
|
- Voer eerst ```node irregulars.js``` uit. Dit genereert een lijst met uitzonderingen van de meeste woorden.
|
||||||
|
- Daarna: ```node fetchPlurals.js```. Dit kan een tijdje duren, maar geeft een output: ```nouns.json```. Deze lijst kan je dan gebruiken.
|
||||||
145
vocab/.custom/fetchPlurals.js
Normal file
145
vocab/.custom/fetchPlurals.js
Normal file
@@ -0,0 +1,145 @@
|
|||||||
|
var fs = require("fs");
|
||||||
|
let path = require("path");
|
||||||
|
var irregularFile = "irregular.json";
|
||||||
|
var outputFile = "nouns.json";
|
||||||
|
var helpers = require("./language-helpers");
|
||||||
|
|
||||||
|
try {
|
||||||
|
fs.unlinkSync(path.join(__dirname, outputFile));
|
||||||
|
console.log("Output file deleted.");
|
||||||
|
} catch (error) {
|
||||||
|
console.log(error);
|
||||||
|
}
|
||||||
|
|
||||||
|
var nouns = JSON.parse(fs.readFileSync(path.join(__dirname, irregularFile), "utf8"));
|
||||||
|
var words = fs.readFileSync(path.join(__dirname, "../wordlist.txt"), "utf8").toString().split("\n");
|
||||||
|
|
||||||
|
var checkPlural = function (word) {
|
||||||
|
if (!word.match(/^[a-zA-Z]*/g)) {
|
||||||
|
console.log("niet alpha");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// if word isn't an irregular or doesn't exist yet
|
||||||
|
if (nouns.hasOwnProperty(word)) {
|
||||||
|
console.log("irregular");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// test if a pluralrule is listed in plurals list
|
||||||
|
if (word.match(/\w*erik\b/)) {
|
||||||
|
if (helpers.doesPluralExists(word + "en")) {
|
||||||
|
nouns[word] = word + "en";
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (helpers.endsWithVowel(word)) {
|
||||||
|
if (helpers.doesPluralExists(word + "ën")) {
|
||||||
|
nouns[word] = word + "ën";
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
if (helpers.doesPluralExists(word + "ëen")) {
|
||||||
|
nouns[word] = word + "ëen";
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
if (helpers.doesPluralExists(word + "en")) {
|
||||||
|
nouns[word] = word + "en";
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (helpers.endsWithDoubleConsonant(word)) {
|
||||||
|
if (helpers.endswithStemloos(word)) {
|
||||||
|
var lastCharacter = word.charAt(word.length - 1);
|
||||||
|
basicword = word.substring(0, word.length - 1);
|
||||||
|
lastCharacter = lastCharacter.replace("f", "v");
|
||||||
|
if (helpers.doesPluralExists(basicword + lastCharacter + "en")) {
|
||||||
|
nouns[word] = basicword + lastCharacter + "en";
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
lastCharacter = lastCharacter.replace("s", "z");
|
||||||
|
if (helpers.doesPluralExists(basicword + lastCharacter + "en")) {
|
||||||
|
nouns[word] = basicword + lastCharacter + "en";
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (helpers.doesPluralExists(word + "en")) {
|
||||||
|
nouns[word] = word + "en";
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
if (helpers.doesPluralExists(word + "s")) {
|
||||||
|
nouns[word] = word + "s";
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (helpers.endsWithConsonant(word)) {
|
||||||
|
var lastCharacter = word.charAt(word.length - 1);
|
||||||
|
basicword = word.substring(0, word.length - 2);
|
||||||
|
if (helpers.doesPluralExists(basicword + lastCharacter + "en")) {
|
||||||
|
nouns[word] = basicword + lastCharacter + "en";
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (helpers.doesPluralExists(word + word.slice(-1) + "en")) {
|
||||||
|
nouns[word] = word + word.slice(-1) + "en";
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (helpers.endswithStemloosVowel(word)) {
|
||||||
|
var lastCharacter = word.charAt(word.length - 1);
|
||||||
|
basicword = word.substring(0, word.length - 1);
|
||||||
|
|
||||||
|
if (helpers.endsWithSingleConsonant(basicword)) {
|
||||||
|
basicword = word.substring(0, word.length - 2);
|
||||||
|
lastCharacter = lastCharacter.replace("f", "v");
|
||||||
|
console.log(basicword + lastCharacter + "en");
|
||||||
|
if (helpers.doesPluralExists(basicword + lastCharacter + "en")) {
|
||||||
|
nouns[word] = basicword + lastCharacter + "en";
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
lastCharacter = lastCharacter.replace("s", "z");
|
||||||
|
if (helpers.doesPluralExists(basicword + lastCharacter + "en")) {
|
||||||
|
nouns[word] = basicword + lastCharacter + "en";
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
lastCharacter = lastCharacter.replace("f", "v");
|
||||||
|
if (helpers.doesPluralExists(basicword + lastCharacter + "en")) {
|
||||||
|
nouns[word] = basicword + lastCharacter + "en";
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
lastCharacter = lastCharacter.replace("s", "z");
|
||||||
|
if (helpers.doesPluralExists(basicword + lastCharacter + "en")) {
|
||||||
|
nouns[word] = basicword + lastCharacter + "en";
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (helpers.doesPluralExists(word + "'s")) {
|
||||||
|
nouns[word] = word + "'s";
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (helpers.doesPluralExists(word + "s")) {
|
||||||
|
nouns[word] = word + "s";
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log("niet gevonden");
|
||||||
|
return false;
|
||||||
|
};
|
||||||
|
|
||||||
|
words.forEach((word, index) => {
|
||||||
|
console.log(word);
|
||||||
|
checkPlural(word);
|
||||||
|
});
|
||||||
|
|
||||||
|
console.log(nouns);
|
||||||
|
|
||||||
|
|
||||||
|
let data = JSON.stringify(nouns);
|
||||||
|
fs.writeFile(path.join(__dirname, outputFile), data, err => {
|
||||||
|
console.log("done");
|
||||||
|
});
|
||||||
235
vocab/.custom/irregulars.js
Normal file
235
vocab/.custom/irregulars.js
Normal file
@@ -0,0 +1,235 @@
|
|||||||
|
var fs = require("fs");
|
||||||
|
let path = require("path");
|
||||||
|
var outputFile = "irregular.json";
|
||||||
|
|
||||||
|
var nounString,
|
||||||
|
nouns;
|
||||||
|
|
||||||
|
var irregulars = {
|
||||||
|
been: [
|
||||||
|
"beenderen", "benen"
|
||||||
|
],
|
||||||
|
blad: [
|
||||||
|
"bladeren", "bladen", "blaren"
|
||||||
|
],
|
||||||
|
ei: "eieren",
|
||||||
|
gelid: "gelederen",
|
||||||
|
gemoed: "gemoederen",
|
||||||
|
goed: "goederen",
|
||||||
|
hoen: "hoenderen",
|
||||||
|
kalf: "kalveren",
|
||||||
|
kind: "kinderen",
|
||||||
|
kleed: [
|
||||||
|
"kleren", "klederen", "kleden"
|
||||||
|
],
|
||||||
|
lam: "lammeren",
|
||||||
|
lied: "liederen",
|
||||||
|
rad: "raderen",
|
||||||
|
rund: "runderen",
|
||||||
|
volk: [
|
||||||
|
"volken", "volkeren"
|
||||||
|
],
|
||||||
|
koe: "koeien",
|
||||||
|
vlo: "vlooien",
|
||||||
|
leerrede: [
|
||||||
|
"leerredenen", "leerredes"
|
||||||
|
],
|
||||||
|
lende: [
|
||||||
|
"lendenen", "lenden"
|
||||||
|
],
|
||||||
|
kleinood: [
|
||||||
|
"kleinoden", "kleinodiën"
|
||||||
|
],
|
||||||
|
sieraad: [
|
||||||
|
"sieraden", "sieradiën"
|
||||||
|
],
|
||||||
|
epos: [
|
||||||
|
"epen", "epossen"
|
||||||
|
],
|
||||||
|
genius: "geniën",
|
||||||
|
aanbod: "aanbiedingen",
|
||||||
|
beleg: [
|
||||||
|
"beleggingen", "belegeringen"
|
||||||
|
],
|
||||||
|
dank: "dankbetuigingen",
|
||||||
|
doel: [
|
||||||
|
"doeleinden", "doelen"
|
||||||
|
],
|
||||||
|
gedrag: "gedragingen",
|
||||||
|
genot: "genietingen",
|
||||||
|
lof: [
|
||||||
|
"loftuitingen", "lofbetuigingen"
|
||||||
|
],
|
||||||
|
onderzoek: [
|
||||||
|
"onderzoekingen", "onderzoeken"
|
||||||
|
],
|
||||||
|
raad: "raadgevingen",
|
||||||
|
rede: [
|
||||||
|
"redevoeringen", "redes"
|
||||||
|
],
|
||||||
|
fotograaf: "fotografen",
|
||||||
|
paragraaf: "paragrafen",
|
||||||
|
telegraaf: "telegrafen",
|
||||||
|
burggraaf: "burggraven",
|
||||||
|
loopgraaf: "loopgraven",
|
||||||
|
filosoof: "filosofen",
|
||||||
|
theosoof: "theosofen",
|
||||||
|
elf: "elfen",
|
||||||
|
paraaf: "parafen",
|
||||||
|
stad: "steden",
|
||||||
|
bad: "baden",
|
||||||
|
bedrag: "bedragen",
|
||||||
|
blad: [
|
||||||
|
"bladeren", "bladen", "blaren"
|
||||||
|
],
|
||||||
|
dag: "dagen",
|
||||||
|
dak: "daken",
|
||||||
|
dal: "dalen",
|
||||||
|
gat: "gaten",
|
||||||
|
gelag: "gelagen",
|
||||||
|
glas: "glazen",
|
||||||
|
graf: "graven",
|
||||||
|
pad: [
|
||||||
|
"paden", "padden"
|
||||||
|
],
|
||||||
|
slag: "slagen",
|
||||||
|
staf: [
|
||||||
|
"staffen", "staven"
|
||||||
|
],
|
||||||
|
vat: "vaten",
|
||||||
|
verdrag: "verdragen",
|
||||||
|
handvat: [
|
||||||
|
"handvatten", "handvaten"
|
||||||
|
],
|
||||||
|
bevel: "bevelen",
|
||||||
|
gebed: "gebeden",
|
||||||
|
gebrek: "gebreken",
|
||||||
|
gen: "genen",
|
||||||
|
spel: "spelen",
|
||||||
|
tred: "treden",
|
||||||
|
weg: "wegen",
|
||||||
|
gemet: "gemeten",
|
||||||
|
bijzonderheid: "bijzonderheden",
|
||||||
|
kleinigheid: "kleinigheden",
|
||||||
|
moeilijkheid: "moeilijkheden",
|
||||||
|
lid: "leden",
|
||||||
|
rif: "reven",
|
||||||
|
schip: "schepen",
|
||||||
|
smid: "smeden",
|
||||||
|
spit: [
|
||||||
|
"spitten", "speten"
|
||||||
|
],
|
||||||
|
alcohol: "alcoholen",
|
||||||
|
elektron: "elektronen",
|
||||||
|
neutron: "neutronen",
|
||||||
|
proton: "protonen",
|
||||||
|
gebod: "geboden",
|
||||||
|
god: "goden",
|
||||||
|
hertog: "hertogen",
|
||||||
|
hof: "hoven",
|
||||||
|
hol: "holen",
|
||||||
|
kot: [
|
||||||
|
"kotten", "koten"
|
||||||
|
],
|
||||||
|
lot: "loten",
|
||||||
|
oorlog: "oorlogen",
|
||||||
|
schot: "schoten",
|
||||||
|
slot: "sloten",
|
||||||
|
verbod: "verboden",
|
||||||
|
verlof: "verloven",
|
||||||
|
kruis: "kruizen",
|
||||||
|
pers: "persen",
|
||||||
|
balans: "balansen",
|
||||||
|
concours: "concoursen",
|
||||||
|
dans: "dansen",
|
||||||
|
diocees: "diocesen",
|
||||||
|
eis: "eisen",
|
||||||
|
forens: [
|
||||||
|
"forensen", "forenzen"
|
||||||
|
],
|
||||||
|
impuls: "impulsen",
|
||||||
|
kaars: "kaarsen",
|
||||||
|
kans: "kansen",
|
||||||
|
kers: "kersen",
|
||||||
|
kikvors: "kikvorsen",
|
||||||
|
koers: "koersen",
|
||||||
|
kous: "kousen",
|
||||||
|
krans: "kransen",
|
||||||
|
lans: "lansen",
|
||||||
|
mars: "marsen",
|
||||||
|
mens: "mensen",
|
||||||
|
ons: [
|
||||||
|
"onsen", "onzen"
|
||||||
|
],
|
||||||
|
paus: "pausen",
|
||||||
|
plons: [
|
||||||
|
"plonsen", "plonzen"
|
||||||
|
],
|
||||||
|
pols: "polsen",
|
||||||
|
prins: "prinsen",
|
||||||
|
pruis: "pruisen",
|
||||||
|
saus: [
|
||||||
|
"sausen", "sauzen"
|
||||||
|
],
|
||||||
|
schans: "schansen",
|
||||||
|
spons: [
|
||||||
|
"sponzen", "sponsen"
|
||||||
|
],
|
||||||
|
stimulans: "stimulansen",
|
||||||
|
tendens: "tendensen",
|
||||||
|
trans: "transen",
|
||||||
|
wals: "walsen",
|
||||||
|
wens: "wensen",
|
||||||
|
zeis: "zeisen",
|
||||||
|
einde: ["eindes", "einden"],
|
||||||
|
symbool: "symbolen",
|
||||||
|
knop: ["knopen", "knoppen"]
|
||||||
|
};
|
||||||
|
|
||||||
|
// https://e-ans.ivdnt.org/topics/pid/ans03050402lingtopic
|
||||||
|
nounString = "edelman krijgsman landman raadsman weidman";
|
||||||
|
nouns = nounString.split(" ");
|
||||||
|
|
||||||
|
nouns.forEach(noun => {
|
||||||
|
irregulars[noun] = noun.replace(/man$/, "lieden");
|
||||||
|
});
|
||||||
|
|
||||||
|
nounString = "akkerman ambachtsman baggerman bootsman buitenman burgerman buurman handelsman handwerksman kooiman koopman scheepstimmerman timmerman schieman sjouwerman speelman stadswerkman werkman stuurman tuinman vakman varensman veerman voerman zakenman zeeman zegsman";
|
||||||
|
nouns = nounString.split(" ");
|
||||||
|
|
||||||
|
nouns.forEach(noun => {
|
||||||
|
irregulars[noun] = [
|
||||||
|
noun.replace(/man$/, "lui"),
|
||||||
|
noun.replace(/man$/, "lieden")
|
||||||
|
];
|
||||||
|
});
|
||||||
|
|
||||||
|
nounString = "bewindsman bruggeman cameraman hoofdman leidsman ombudsman staatsman vertrouwensman verzetsman voorman";
|
||||||
|
nouns = nounString.split(" ");
|
||||||
|
nouns.forEach(noun => {
|
||||||
|
irregulars[noun] = [
|
||||||
|
noun.replace(/man$/, "mannen"),
|
||||||
|
noun.replace(/man$/, "lieden")
|
||||||
|
];
|
||||||
|
});
|
||||||
|
|
||||||
|
nounString = "brandweerman kantoorman opperman sportsman vakman";
|
||||||
|
nouns = nounString.split(" ");
|
||||||
|
nouns.forEach(noun => {
|
||||||
|
irregulars[noun] = [
|
||||||
|
noun.replace(/man$/, "mannen"),
|
||||||
|
noun.replace(/man$/, "lieden"),
|
||||||
|
noun.replace(/man$/, "lui")
|
||||||
|
];
|
||||||
|
});
|
||||||
|
|
||||||
|
nounString = "barman bosjesman boeman dronkeman ijscoman jongeman kiesman kikvorsman krantenman leenman medicijnman melkman muzelman Noorman olieman onderwijsman orgelman partijman politieman sandwichman schillenman spoorwegman stroman stuntman vakbondsman vuilnisman weerman wetenschapsman wildeman";
|
||||||
|
nouns = nounString.split(" ");
|
||||||
|
nouns.forEach(noun => {
|
||||||
|
irregulars[noun] = noun.replace(/man$/, "mannen");
|
||||||
|
});
|
||||||
|
|
||||||
|
let data = JSON.stringify(irregulars);
|
||||||
|
fs.writeFile(path.join(__dirname, outputFile), data, err => {
|
||||||
|
console.log("done");
|
||||||
|
});
|
||||||
45
vocab/.custom/language-helpers.js
Normal file
45
vocab/.custom/language-helpers.js
Normal file
@@ -0,0 +1,45 @@
|
|||||||
|
var fs = require("fs");
|
||||||
|
let path = require("path");
|
||||||
|
var pluralsFile = "../experimenteel/nouns-meervouden.txt";
|
||||||
|
|
||||||
|
var plurals = fs.readFileSync(path.join(__dirname, pluralsFile), "utf8").toString().split("\n");
|
||||||
|
|
||||||
|
exports.isVowel = character => {
|
||||||
|
return character.match(/[aeiou]/);
|
||||||
|
};
|
||||||
|
|
||||||
|
exports.isConsonant = character => {
|
||||||
|
return character.match(/[bcdfghjklmnpqrstvwxyz]/);
|
||||||
|
};
|
||||||
|
|
||||||
|
exports.endsWithVowel = word => {
|
||||||
|
return word.match(/\w*[aeiou]\b/);
|
||||||
|
};
|
||||||
|
|
||||||
|
exports.endsWithConsonant = word => {
|
||||||
|
return word.match(/\w*[bcdfghjklmnpqrstvwxyz]\b/);
|
||||||
|
};
|
||||||
|
|
||||||
|
exports.endsWithDoubleE = word => {
|
||||||
|
return word.match(/\w*ee\b/);
|
||||||
|
};
|
||||||
|
|
||||||
|
exports.endsWithDoubleConsonant = word => {
|
||||||
|
return word.match(/\w*[bcdfghjklmnpqrstvwxyz][bcdfghjklmnpqrstvwxyz]\b/);
|
||||||
|
};
|
||||||
|
|
||||||
|
exports.endsWithSingleConsonant = word => {
|
||||||
|
return word.match(/\w*[aeiou][bcdfghjklmnpqrstvwxyz]\b/);
|
||||||
|
};
|
||||||
|
|
||||||
|
exports.endswithStemloosVowel = word => {
|
||||||
|
return word.match(/\w*[aeiou][aeiou][tkfschp]\b/);
|
||||||
|
};
|
||||||
|
|
||||||
|
exports.endswithStemloos = word => {
|
||||||
|
return word.match(/\w*[bcdfghjklmnpqrstvwxyz][tkfschp]\b/);
|
||||||
|
};
|
||||||
|
|
||||||
|
exports.doesPluralExists = plural => {
|
||||||
|
return plurals.includes(plural);
|
||||||
|
};
|
||||||
Reference in New Issue
Block a user