all
This commit is contained in:
1
.gitignore
vendored
1
.gitignore
vendored
@@ -1,2 +1,3 @@
|
|||||||
.idea/
|
.idea/
|
||||||
/vocab/.custom/
|
/vocab/.custom/
|
||||||
|
**/.custom/
|
||||||
@@ -1,9 +0,0 @@
|
|||||||
# Meervoudsvormen op basis van woordenlijst
|
|
||||||
|
|
||||||
Op basis van [wordlist.txt](../wordlist.txt) worden een aantal meervoudsregels getest of dit meervoud voorkomt in [nouns-meervouden.txt](../experimenteel/nouns-meervouden.txt).
|
|
||||||
|
|
||||||
|
|
||||||
## Genereren van nieuwe lijst
|
|
||||||
|
|
||||||
- Voer eerst ```node irregulars.js``` uit. Dit genereert een lijst met uitzonderingen van de meeste woorden.
|
|
||||||
- Daarna: ```node fetchPlurals.js```. Dit kan een tijdje duren, maar geeft een output: ```nouns.json```. Deze lijst kan je dan gebruiken.
|
|
||||||
@@ -1,145 +0,0 @@
|
|||||||
var fs = require("fs");
|
|
||||||
let path = require("path");
|
|
||||||
var irregularFile = "irregular.json";
|
|
||||||
var outputFile = "nouns.json";
|
|
||||||
var helpers = require("./language-helpers");
|
|
||||||
|
|
||||||
try {
|
|
||||||
fs.unlinkSync(path.join(__dirname, outputFile));
|
|
||||||
console.log("Output file deleted.");
|
|
||||||
} catch (error) {
|
|
||||||
console.log(error);
|
|
||||||
}
|
|
||||||
|
|
||||||
var nouns = JSON.parse(fs.readFileSync(path.join(__dirname, irregularFile), "utf8"));
|
|
||||||
var words = fs.readFileSync(path.join(__dirname, "../wordlist.txt"), "utf8").toString().split("\n");
|
|
||||||
|
|
||||||
var checkPlural = function (word) {
|
|
||||||
if (!word.match(/^[a-zA-Z]*/g)) {
|
|
||||||
console.log("niet alpha");
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
// if word isn't an irregular or doesn't exist yet
|
|
||||||
if (nouns.hasOwnProperty(word)) {
|
|
||||||
console.log("irregular");
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
// test if a pluralrule is listed in plurals list
|
|
||||||
if (word.match(/\w*erik\b/)) {
|
|
||||||
if (helpers.doesPluralExists(word + "en")) {
|
|
||||||
nouns[word] = word + "en";
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (helpers.endsWithVowel(word)) {
|
|
||||||
if (helpers.doesPluralExists(word + "ën")) {
|
|
||||||
nouns[word] = word + "ën";
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
if (helpers.doesPluralExists(word + "ëen")) {
|
|
||||||
nouns[word] = word + "ëen";
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
if (helpers.doesPluralExists(word + "en")) {
|
|
||||||
nouns[word] = word + "en";
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (helpers.endsWithDoubleConsonant(word)) {
|
|
||||||
if (helpers.endswithStemloos(word)) {
|
|
||||||
var lastCharacter = word.charAt(word.length - 1);
|
|
||||||
basicword = word.substring(0, word.length - 1);
|
|
||||||
lastCharacter = lastCharacter.replace("f", "v");
|
|
||||||
if (helpers.doesPluralExists(basicword + lastCharacter + "en")) {
|
|
||||||
nouns[word] = basicword + lastCharacter + "en";
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
lastCharacter = lastCharacter.replace("s", "z");
|
|
||||||
if (helpers.doesPluralExists(basicword + lastCharacter + "en")) {
|
|
||||||
nouns[word] = basicword + lastCharacter + "en";
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (helpers.doesPluralExists(word + "en")) {
|
|
||||||
nouns[word] = word + "en";
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
if (helpers.doesPluralExists(word + "s")) {
|
|
||||||
nouns[word] = word + "s";
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (helpers.endsWithConsonant(word)) {
|
|
||||||
var lastCharacter = word.charAt(word.length - 1);
|
|
||||||
basicword = word.substring(0, word.length - 2);
|
|
||||||
if (helpers.doesPluralExists(basicword + lastCharacter + "en")) {
|
|
||||||
nouns[word] = basicword + lastCharacter + "en";
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (helpers.doesPluralExists(word + word.slice(-1) + "en")) {
|
|
||||||
nouns[word] = word + word.slice(-1) + "en";
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (helpers.endswithStemloosVowel(word)) {
|
|
||||||
var lastCharacter = word.charAt(word.length - 1);
|
|
||||||
basicword = word.substring(0, word.length - 1);
|
|
||||||
|
|
||||||
if (helpers.endsWithSingleConsonant(basicword)) {
|
|
||||||
basicword = word.substring(0, word.length - 2);
|
|
||||||
lastCharacter = lastCharacter.replace("f", "v");
|
|
||||||
console.log(basicword + lastCharacter + "en");
|
|
||||||
if (helpers.doesPluralExists(basicword + lastCharacter + "en")) {
|
|
||||||
nouns[word] = basicword + lastCharacter + "en";
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
lastCharacter = lastCharacter.replace("s", "z");
|
|
||||||
if (helpers.doesPluralExists(basicword + lastCharacter + "en")) {
|
|
||||||
nouns[word] = basicword + lastCharacter + "en";
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
lastCharacter = lastCharacter.replace("f", "v");
|
|
||||||
if (helpers.doesPluralExists(basicword + lastCharacter + "en")) {
|
|
||||||
nouns[word] = basicword + lastCharacter + "en";
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
lastCharacter = lastCharacter.replace("s", "z");
|
|
||||||
if (helpers.doesPluralExists(basicword + lastCharacter + "en")) {
|
|
||||||
nouns[word] = basicword + lastCharacter + "en";
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (helpers.doesPluralExists(word + "'s")) {
|
|
||||||
nouns[word] = word + "'s";
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (helpers.doesPluralExists(word + "s")) {
|
|
||||||
nouns[word] = word + "s";
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
console.log("niet gevonden");
|
|
||||||
return false;
|
|
||||||
};
|
|
||||||
|
|
||||||
words.forEach((word, index) => {
|
|
||||||
console.log(word);
|
|
||||||
checkPlural(word);
|
|
||||||
});
|
|
||||||
|
|
||||||
console.log(nouns);
|
|
||||||
|
|
||||||
|
|
||||||
let data = JSON.stringify(nouns);
|
|
||||||
fs.writeFile(path.join(__dirname, outputFile), data, err => {
|
|
||||||
console.log("done");
|
|
||||||
});
|
|
||||||
@@ -1,235 +0,0 @@
|
|||||||
var fs = require("fs");
|
|
||||||
let path = require("path");
|
|
||||||
var outputFile = "irregular.json";
|
|
||||||
|
|
||||||
var nounString,
|
|
||||||
nouns;
|
|
||||||
|
|
||||||
var irregulars = {
|
|
||||||
been: [
|
|
||||||
"beenderen", "benen"
|
|
||||||
],
|
|
||||||
blad: [
|
|
||||||
"bladeren", "bladen", "blaren"
|
|
||||||
],
|
|
||||||
ei: "eieren",
|
|
||||||
gelid: "gelederen",
|
|
||||||
gemoed: "gemoederen",
|
|
||||||
goed: "goederen",
|
|
||||||
hoen: "hoenderen",
|
|
||||||
kalf: "kalveren",
|
|
||||||
kind: "kinderen",
|
|
||||||
kleed: [
|
|
||||||
"kleren", "klederen", "kleden"
|
|
||||||
],
|
|
||||||
lam: "lammeren",
|
|
||||||
lied: "liederen",
|
|
||||||
rad: "raderen",
|
|
||||||
rund: "runderen",
|
|
||||||
volk: [
|
|
||||||
"volken", "volkeren"
|
|
||||||
],
|
|
||||||
koe: "koeien",
|
|
||||||
vlo: "vlooien",
|
|
||||||
leerrede: [
|
|
||||||
"leerredenen", "leerredes"
|
|
||||||
],
|
|
||||||
lende: [
|
|
||||||
"lendenen", "lenden"
|
|
||||||
],
|
|
||||||
kleinood: [
|
|
||||||
"kleinoden", "kleinodiën"
|
|
||||||
],
|
|
||||||
sieraad: [
|
|
||||||
"sieraden", "sieradiën"
|
|
||||||
],
|
|
||||||
epos: [
|
|
||||||
"epen", "epossen"
|
|
||||||
],
|
|
||||||
genius: "geniën",
|
|
||||||
aanbod: "aanbiedingen",
|
|
||||||
beleg: [
|
|
||||||
"beleggingen", "belegeringen"
|
|
||||||
],
|
|
||||||
dank: "dankbetuigingen",
|
|
||||||
doel: [
|
|
||||||
"doeleinden", "doelen"
|
|
||||||
],
|
|
||||||
gedrag: "gedragingen",
|
|
||||||
genot: "genietingen",
|
|
||||||
lof: [
|
|
||||||
"loftuitingen", "lofbetuigingen"
|
|
||||||
],
|
|
||||||
onderzoek: [
|
|
||||||
"onderzoekingen", "onderzoeken"
|
|
||||||
],
|
|
||||||
raad: "raadgevingen",
|
|
||||||
rede: [
|
|
||||||
"redevoeringen", "redes"
|
|
||||||
],
|
|
||||||
fotograaf: "fotografen",
|
|
||||||
paragraaf: "paragrafen",
|
|
||||||
telegraaf: "telegrafen",
|
|
||||||
burggraaf: "burggraven",
|
|
||||||
loopgraaf: "loopgraven",
|
|
||||||
filosoof: "filosofen",
|
|
||||||
theosoof: "theosofen",
|
|
||||||
elf: "elfen",
|
|
||||||
paraaf: "parafen",
|
|
||||||
stad: "steden",
|
|
||||||
bad: "baden",
|
|
||||||
bedrag: "bedragen",
|
|
||||||
blad: [
|
|
||||||
"bladeren", "bladen", "blaren"
|
|
||||||
],
|
|
||||||
dag: "dagen",
|
|
||||||
dak: "daken",
|
|
||||||
dal: "dalen",
|
|
||||||
gat: "gaten",
|
|
||||||
gelag: "gelagen",
|
|
||||||
glas: "glazen",
|
|
||||||
graf: "graven",
|
|
||||||
pad: [
|
|
||||||
"paden", "padden"
|
|
||||||
],
|
|
||||||
slag: "slagen",
|
|
||||||
staf: [
|
|
||||||
"staffen", "staven"
|
|
||||||
],
|
|
||||||
vat: "vaten",
|
|
||||||
verdrag: "verdragen",
|
|
||||||
handvat: [
|
|
||||||
"handvatten", "handvaten"
|
|
||||||
],
|
|
||||||
bevel: "bevelen",
|
|
||||||
gebed: "gebeden",
|
|
||||||
gebrek: "gebreken",
|
|
||||||
gen: "genen",
|
|
||||||
spel: "spelen",
|
|
||||||
tred: "treden",
|
|
||||||
weg: "wegen",
|
|
||||||
gemet: "gemeten",
|
|
||||||
bijzonderheid: "bijzonderheden",
|
|
||||||
kleinigheid: "kleinigheden",
|
|
||||||
moeilijkheid: "moeilijkheden",
|
|
||||||
lid: "leden",
|
|
||||||
rif: "reven",
|
|
||||||
schip: "schepen",
|
|
||||||
smid: "smeden",
|
|
||||||
spit: [
|
|
||||||
"spitten", "speten"
|
|
||||||
],
|
|
||||||
alcohol: "alcoholen",
|
|
||||||
elektron: "elektronen",
|
|
||||||
neutron: "neutronen",
|
|
||||||
proton: "protonen",
|
|
||||||
gebod: "geboden",
|
|
||||||
god: "goden",
|
|
||||||
hertog: "hertogen",
|
|
||||||
hof: "hoven",
|
|
||||||
hol: "holen",
|
|
||||||
kot: [
|
|
||||||
"kotten", "koten"
|
|
||||||
],
|
|
||||||
lot: "loten",
|
|
||||||
oorlog: "oorlogen",
|
|
||||||
schot: "schoten",
|
|
||||||
slot: "sloten",
|
|
||||||
verbod: "verboden",
|
|
||||||
verlof: "verloven",
|
|
||||||
kruis: "kruizen",
|
|
||||||
pers: "persen",
|
|
||||||
balans: "balansen",
|
|
||||||
concours: "concoursen",
|
|
||||||
dans: "dansen",
|
|
||||||
diocees: "diocesen",
|
|
||||||
eis: "eisen",
|
|
||||||
forens: [
|
|
||||||
"forensen", "forenzen"
|
|
||||||
],
|
|
||||||
impuls: "impulsen",
|
|
||||||
kaars: "kaarsen",
|
|
||||||
kans: "kansen",
|
|
||||||
kers: "kersen",
|
|
||||||
kikvors: "kikvorsen",
|
|
||||||
koers: "koersen",
|
|
||||||
kous: "kousen",
|
|
||||||
krans: "kransen",
|
|
||||||
lans: "lansen",
|
|
||||||
mars: "marsen",
|
|
||||||
mens: "mensen",
|
|
||||||
ons: [
|
|
||||||
"onsen", "onzen"
|
|
||||||
],
|
|
||||||
paus: "pausen",
|
|
||||||
plons: [
|
|
||||||
"plonsen", "plonzen"
|
|
||||||
],
|
|
||||||
pols: "polsen",
|
|
||||||
prins: "prinsen",
|
|
||||||
pruis: "pruisen",
|
|
||||||
saus: [
|
|
||||||
"sausen", "sauzen"
|
|
||||||
],
|
|
||||||
schans: "schansen",
|
|
||||||
spons: [
|
|
||||||
"sponzen", "sponsen"
|
|
||||||
],
|
|
||||||
stimulans: "stimulansen",
|
|
||||||
tendens: "tendensen",
|
|
||||||
trans: "transen",
|
|
||||||
wals: "walsen",
|
|
||||||
wens: "wensen",
|
|
||||||
zeis: "zeisen",
|
|
||||||
einde: ["eindes", "einden"],
|
|
||||||
symbool: "symbolen",
|
|
||||||
knop: ["knopen", "knoppen"]
|
|
||||||
};
|
|
||||||
|
|
||||||
// https://e-ans.ivdnt.org/topics/pid/ans03050402lingtopic
|
|
||||||
nounString = "edelman krijgsman landman raadsman weidman";
|
|
||||||
nouns = nounString.split(" ");
|
|
||||||
|
|
||||||
nouns.forEach(noun => {
|
|
||||||
irregulars[noun] = noun.replace(/man$/, "lieden");
|
|
||||||
});
|
|
||||||
|
|
||||||
nounString = "akkerman ambachtsman baggerman bootsman buitenman burgerman buurman handelsman handwerksman kooiman koopman scheepstimmerman timmerman schieman sjouwerman speelman stadswerkman werkman stuurman tuinman vakman varensman veerman voerman zakenman zeeman zegsman";
|
|
||||||
nouns = nounString.split(" ");
|
|
||||||
|
|
||||||
nouns.forEach(noun => {
|
|
||||||
irregulars[noun] = [
|
|
||||||
noun.replace(/man$/, "lui"),
|
|
||||||
noun.replace(/man$/, "lieden")
|
|
||||||
];
|
|
||||||
});
|
|
||||||
|
|
||||||
nounString = "bewindsman bruggeman cameraman hoofdman leidsman ombudsman staatsman vertrouwensman verzetsman voorman";
|
|
||||||
nouns = nounString.split(" ");
|
|
||||||
nouns.forEach(noun => {
|
|
||||||
irregulars[noun] = [
|
|
||||||
noun.replace(/man$/, "mannen"),
|
|
||||||
noun.replace(/man$/, "lieden")
|
|
||||||
];
|
|
||||||
});
|
|
||||||
|
|
||||||
nounString = "brandweerman kantoorman opperman sportsman vakman";
|
|
||||||
nouns = nounString.split(" ");
|
|
||||||
nouns.forEach(noun => {
|
|
||||||
irregulars[noun] = [
|
|
||||||
noun.replace(/man$/, "mannen"),
|
|
||||||
noun.replace(/man$/, "lieden"),
|
|
||||||
noun.replace(/man$/, "lui")
|
|
||||||
];
|
|
||||||
});
|
|
||||||
|
|
||||||
nounString = "barman bosjesman boeman dronkeman ijscoman jongeman kiesman kikvorsman krantenman leenman medicijnman melkman muzelman Noorman olieman onderwijsman orgelman partijman politieman sandwichman schillenman spoorwegman stroman stuntman vakbondsman vuilnisman weerman wetenschapsman wildeman";
|
|
||||||
nouns = nounString.split(" ");
|
|
||||||
nouns.forEach(noun => {
|
|
||||||
irregulars[noun] = noun.replace(/man$/, "mannen");
|
|
||||||
});
|
|
||||||
|
|
||||||
let data = JSON.stringify(irregulars);
|
|
||||||
fs.writeFile(path.join(__dirname, outputFile), data, err => {
|
|
||||||
console.log("done");
|
|
||||||
});
|
|
||||||
@@ -1,45 +0,0 @@
|
|||||||
var fs = require("fs");
|
|
||||||
let path = require("path");
|
|
||||||
var pluralsFile = "../experimenteel/nouns-meervouden.txt";
|
|
||||||
|
|
||||||
var plurals = fs.readFileSync(path.join(__dirname, pluralsFile), "utf8").toString().split("\n");
|
|
||||||
|
|
||||||
exports.isVowel = character => {
|
|
||||||
return character.match(/[aeiou]/);
|
|
||||||
};
|
|
||||||
|
|
||||||
exports.isConsonant = character => {
|
|
||||||
return character.match(/[bcdfghjklmnpqrstvwxyz]/);
|
|
||||||
};
|
|
||||||
|
|
||||||
exports.endsWithVowel = word => {
|
|
||||||
return word.match(/\w*[aeiou]\b/);
|
|
||||||
};
|
|
||||||
|
|
||||||
exports.endsWithConsonant = word => {
|
|
||||||
return word.match(/\w*[bcdfghjklmnpqrstvwxyz]\b/);
|
|
||||||
};
|
|
||||||
|
|
||||||
exports.endsWithDoubleE = word => {
|
|
||||||
return word.match(/\w*ee\b/);
|
|
||||||
};
|
|
||||||
|
|
||||||
exports.endsWithDoubleConsonant = word => {
|
|
||||||
return word.match(/\w*[bcdfghjklmnpqrstvwxyz][bcdfghjklmnpqrstvwxyz]\b/);
|
|
||||||
};
|
|
||||||
|
|
||||||
exports.endsWithSingleConsonant = word => {
|
|
||||||
return word.match(/\w*[aeiou][bcdfghjklmnpqrstvwxyz]\b/);
|
|
||||||
};
|
|
||||||
|
|
||||||
exports.endswithStemloosVowel = word => {
|
|
||||||
return word.match(/\w*[aeiou][aeiou][tkfschp]\b/);
|
|
||||||
};
|
|
||||||
|
|
||||||
exports.endswithStemloos = word => {
|
|
||||||
return word.match(/\w*[bcdfghjklmnpqrstvwxyz][tkfschp]\b/);
|
|
||||||
};
|
|
||||||
|
|
||||||
exports.doesPluralExists = plural => {
|
|
||||||
return plurals.includes(plural);
|
|
||||||
};
|
|
||||||
Reference in New Issue
Block a user