all
This commit is contained in:
1
.gitignore
vendored
1
.gitignore
vendored
@@ -1,2 +1,3 @@
|
||||
.idea/
|
||||
/vocab/.custom/
|
||||
**/.custom/
|
||||
@@ -1,9 +0,0 @@
|
||||
# Meervoudsvormen op basis van woordenlijst
|
||||
|
||||
Op basis van [wordlist.txt](../wordlist.txt) worden een aantal meervoudsregels getest of dit meervoud voorkomt in [nouns-meervouden.txt](../experimenteel/nouns-meervouden.txt).
|
||||
|
||||
|
||||
## Genereren van nieuwe lijst
|
||||
|
||||
- Voer eerst ```node irregulars.js``` uit. Dit genereert een lijst met uitzonderingen van de meeste woorden.
|
||||
- Daarna: ```node fetchPlurals.js```. Dit kan een tijdje duren, maar geeft een output: ```nouns.json```. Deze lijst kan je dan gebruiken.
|
||||
@@ -1,145 +0,0 @@
|
||||
var fs = require("fs");
|
||||
let path = require("path");
|
||||
var irregularFile = "irregular.json";
|
||||
var outputFile = "nouns.json";
|
||||
var helpers = require("./language-helpers");
|
||||
|
||||
try {
|
||||
fs.unlinkSync(path.join(__dirname, outputFile));
|
||||
console.log("Output file deleted.");
|
||||
} catch (error) {
|
||||
console.log(error);
|
||||
}
|
||||
|
||||
var nouns = JSON.parse(fs.readFileSync(path.join(__dirname, irregularFile), "utf8"));
|
||||
var words = fs.readFileSync(path.join(__dirname, "../wordlist.txt"), "utf8").toString().split("\n");
|
||||
|
||||
var checkPlural = function (word) {
|
||||
if (!word.match(/^[a-zA-Z]*/g)) {
|
||||
console.log("niet alpha");
|
||||
return false;
|
||||
}
|
||||
|
||||
// if word isn't an irregular or doesn't exist yet
|
||||
if (nouns.hasOwnProperty(word)) {
|
||||
console.log("irregular");
|
||||
return false;
|
||||
}
|
||||
|
||||
// test if a pluralrule is listed in plurals list
|
||||
if (word.match(/\w*erik\b/)) {
|
||||
if (helpers.doesPluralExists(word + "en")) {
|
||||
nouns[word] = word + "en";
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
if (helpers.endsWithVowel(word)) {
|
||||
if (helpers.doesPluralExists(word + "ën")) {
|
||||
nouns[word] = word + "ën";
|
||||
return true;
|
||||
}
|
||||
if (helpers.doesPluralExists(word + "ëen")) {
|
||||
nouns[word] = word + "ëen";
|
||||
return true;
|
||||
}
|
||||
if (helpers.doesPluralExists(word + "en")) {
|
||||
nouns[word] = word + "en";
|
||||
return true;
|
||||
}
|
||||
}
|
||||
if (helpers.endsWithDoubleConsonant(word)) {
|
||||
if (helpers.endswithStemloos(word)) {
|
||||
var lastCharacter = word.charAt(word.length - 1);
|
||||
basicword = word.substring(0, word.length - 1);
|
||||
lastCharacter = lastCharacter.replace("f", "v");
|
||||
if (helpers.doesPluralExists(basicword + lastCharacter + "en")) {
|
||||
nouns[word] = basicword + lastCharacter + "en";
|
||||
return true;
|
||||
}
|
||||
lastCharacter = lastCharacter.replace("s", "z");
|
||||
if (helpers.doesPluralExists(basicword + lastCharacter + "en")) {
|
||||
nouns[word] = basicword + lastCharacter + "en";
|
||||
return true;
|
||||
}
|
||||
}
|
||||
if (helpers.doesPluralExists(word + "en")) {
|
||||
nouns[word] = word + "en";
|
||||
return true;
|
||||
}
|
||||
if (helpers.doesPluralExists(word + "s")) {
|
||||
nouns[word] = word + "s";
|
||||
return true;
|
||||
}
|
||||
}
|
||||
if (helpers.endsWithConsonant(word)) {
|
||||
var lastCharacter = word.charAt(word.length - 1);
|
||||
basicword = word.substring(0, word.length - 2);
|
||||
if (helpers.doesPluralExists(basicword + lastCharacter + "en")) {
|
||||
nouns[word] = basicword + lastCharacter + "en";
|
||||
return true;
|
||||
}
|
||||
|
||||
if (helpers.doesPluralExists(word + word.slice(-1) + "en")) {
|
||||
nouns[word] = word + word.slice(-1) + "en";
|
||||
return true;
|
||||
}
|
||||
|
||||
if (helpers.endswithStemloosVowel(word)) {
|
||||
var lastCharacter = word.charAt(word.length - 1);
|
||||
basicword = word.substring(0, word.length - 1);
|
||||
|
||||
if (helpers.endsWithSingleConsonant(basicword)) {
|
||||
basicword = word.substring(0, word.length - 2);
|
||||
lastCharacter = lastCharacter.replace("f", "v");
|
||||
console.log(basicword + lastCharacter + "en");
|
||||
if (helpers.doesPluralExists(basicword + lastCharacter + "en")) {
|
||||
nouns[word] = basicword + lastCharacter + "en";
|
||||
return true;
|
||||
}
|
||||
lastCharacter = lastCharacter.replace("s", "z");
|
||||
if (helpers.doesPluralExists(basicword + lastCharacter + "en")) {
|
||||
nouns[word] = basicword + lastCharacter + "en";
|
||||
return true;
|
||||
}
|
||||
} else {
|
||||
lastCharacter = lastCharacter.replace("f", "v");
|
||||
if (helpers.doesPluralExists(basicword + lastCharacter + "en")) {
|
||||
nouns[word] = basicword + lastCharacter + "en";
|
||||
return true;
|
||||
}
|
||||
lastCharacter = lastCharacter.replace("s", "z");
|
||||
if (helpers.doesPluralExists(basicword + lastCharacter + "en")) {
|
||||
nouns[word] = basicword + lastCharacter + "en";
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (helpers.doesPluralExists(word + "'s")) {
|
||||
nouns[word] = word + "'s";
|
||||
return true;
|
||||
}
|
||||
|
||||
if (helpers.doesPluralExists(word + "s")) {
|
||||
nouns[word] = word + "s";
|
||||
return true;
|
||||
}
|
||||
|
||||
console.log("niet gevonden");
|
||||
return false;
|
||||
};
|
||||
|
||||
words.forEach((word, index) => {
|
||||
console.log(word);
|
||||
checkPlural(word);
|
||||
});
|
||||
|
||||
console.log(nouns);
|
||||
|
||||
|
||||
let data = JSON.stringify(nouns);
|
||||
fs.writeFile(path.join(__dirname, outputFile), data, err => {
|
||||
console.log("done");
|
||||
});
|
||||
@@ -1,235 +0,0 @@
|
||||
var fs = require("fs");
|
||||
let path = require("path");
|
||||
var outputFile = "irregular.json";
|
||||
|
||||
var nounString,
|
||||
nouns;
|
||||
|
||||
var irregulars = {
|
||||
been: [
|
||||
"beenderen", "benen"
|
||||
],
|
||||
blad: [
|
||||
"bladeren", "bladen", "blaren"
|
||||
],
|
||||
ei: "eieren",
|
||||
gelid: "gelederen",
|
||||
gemoed: "gemoederen",
|
||||
goed: "goederen",
|
||||
hoen: "hoenderen",
|
||||
kalf: "kalveren",
|
||||
kind: "kinderen",
|
||||
kleed: [
|
||||
"kleren", "klederen", "kleden"
|
||||
],
|
||||
lam: "lammeren",
|
||||
lied: "liederen",
|
||||
rad: "raderen",
|
||||
rund: "runderen",
|
||||
volk: [
|
||||
"volken", "volkeren"
|
||||
],
|
||||
koe: "koeien",
|
||||
vlo: "vlooien",
|
||||
leerrede: [
|
||||
"leerredenen", "leerredes"
|
||||
],
|
||||
lende: [
|
||||
"lendenen", "lenden"
|
||||
],
|
||||
kleinood: [
|
||||
"kleinoden", "kleinodiën"
|
||||
],
|
||||
sieraad: [
|
||||
"sieraden", "sieradiën"
|
||||
],
|
||||
epos: [
|
||||
"epen", "epossen"
|
||||
],
|
||||
genius: "geniën",
|
||||
aanbod: "aanbiedingen",
|
||||
beleg: [
|
||||
"beleggingen", "belegeringen"
|
||||
],
|
||||
dank: "dankbetuigingen",
|
||||
doel: [
|
||||
"doeleinden", "doelen"
|
||||
],
|
||||
gedrag: "gedragingen",
|
||||
genot: "genietingen",
|
||||
lof: [
|
||||
"loftuitingen", "lofbetuigingen"
|
||||
],
|
||||
onderzoek: [
|
||||
"onderzoekingen", "onderzoeken"
|
||||
],
|
||||
raad: "raadgevingen",
|
||||
rede: [
|
||||
"redevoeringen", "redes"
|
||||
],
|
||||
fotograaf: "fotografen",
|
||||
paragraaf: "paragrafen",
|
||||
telegraaf: "telegrafen",
|
||||
burggraaf: "burggraven",
|
||||
loopgraaf: "loopgraven",
|
||||
filosoof: "filosofen",
|
||||
theosoof: "theosofen",
|
||||
elf: "elfen",
|
||||
paraaf: "parafen",
|
||||
stad: "steden",
|
||||
bad: "baden",
|
||||
bedrag: "bedragen",
|
||||
blad: [
|
||||
"bladeren", "bladen", "blaren"
|
||||
],
|
||||
dag: "dagen",
|
||||
dak: "daken",
|
||||
dal: "dalen",
|
||||
gat: "gaten",
|
||||
gelag: "gelagen",
|
||||
glas: "glazen",
|
||||
graf: "graven",
|
||||
pad: [
|
||||
"paden", "padden"
|
||||
],
|
||||
slag: "slagen",
|
||||
staf: [
|
||||
"staffen", "staven"
|
||||
],
|
||||
vat: "vaten",
|
||||
verdrag: "verdragen",
|
||||
handvat: [
|
||||
"handvatten", "handvaten"
|
||||
],
|
||||
bevel: "bevelen",
|
||||
gebed: "gebeden",
|
||||
gebrek: "gebreken",
|
||||
gen: "genen",
|
||||
spel: "spelen",
|
||||
tred: "treden",
|
||||
weg: "wegen",
|
||||
gemet: "gemeten",
|
||||
bijzonderheid: "bijzonderheden",
|
||||
kleinigheid: "kleinigheden",
|
||||
moeilijkheid: "moeilijkheden",
|
||||
lid: "leden",
|
||||
rif: "reven",
|
||||
schip: "schepen",
|
||||
smid: "smeden",
|
||||
spit: [
|
||||
"spitten", "speten"
|
||||
],
|
||||
alcohol: "alcoholen",
|
||||
elektron: "elektronen",
|
||||
neutron: "neutronen",
|
||||
proton: "protonen",
|
||||
gebod: "geboden",
|
||||
god: "goden",
|
||||
hertog: "hertogen",
|
||||
hof: "hoven",
|
||||
hol: "holen",
|
||||
kot: [
|
||||
"kotten", "koten"
|
||||
],
|
||||
lot: "loten",
|
||||
oorlog: "oorlogen",
|
||||
schot: "schoten",
|
||||
slot: "sloten",
|
||||
verbod: "verboden",
|
||||
verlof: "verloven",
|
||||
kruis: "kruizen",
|
||||
pers: "persen",
|
||||
balans: "balansen",
|
||||
concours: "concoursen",
|
||||
dans: "dansen",
|
||||
diocees: "diocesen",
|
||||
eis: "eisen",
|
||||
forens: [
|
||||
"forensen", "forenzen"
|
||||
],
|
||||
impuls: "impulsen",
|
||||
kaars: "kaarsen",
|
||||
kans: "kansen",
|
||||
kers: "kersen",
|
||||
kikvors: "kikvorsen",
|
||||
koers: "koersen",
|
||||
kous: "kousen",
|
||||
krans: "kransen",
|
||||
lans: "lansen",
|
||||
mars: "marsen",
|
||||
mens: "mensen",
|
||||
ons: [
|
||||
"onsen", "onzen"
|
||||
],
|
||||
paus: "pausen",
|
||||
plons: [
|
||||
"plonsen", "plonzen"
|
||||
],
|
||||
pols: "polsen",
|
||||
prins: "prinsen",
|
||||
pruis: "pruisen",
|
||||
saus: [
|
||||
"sausen", "sauzen"
|
||||
],
|
||||
schans: "schansen",
|
||||
spons: [
|
||||
"sponzen", "sponsen"
|
||||
],
|
||||
stimulans: "stimulansen",
|
||||
tendens: "tendensen",
|
||||
trans: "transen",
|
||||
wals: "walsen",
|
||||
wens: "wensen",
|
||||
zeis: "zeisen",
|
||||
einde: ["eindes", "einden"],
|
||||
symbool: "symbolen",
|
||||
knop: ["knopen", "knoppen"]
|
||||
};
|
||||
|
||||
// https://e-ans.ivdnt.org/topics/pid/ans03050402lingtopic
|
||||
nounString = "edelman krijgsman landman raadsman weidman";
|
||||
nouns = nounString.split(" ");
|
||||
|
||||
nouns.forEach(noun => {
|
||||
irregulars[noun] = noun.replace(/man$/, "lieden");
|
||||
});
|
||||
|
||||
nounString = "akkerman ambachtsman baggerman bootsman buitenman burgerman buurman handelsman handwerksman kooiman koopman scheepstimmerman timmerman schieman sjouwerman speelman stadswerkman werkman stuurman tuinman vakman varensman veerman voerman zakenman zeeman zegsman";
|
||||
nouns = nounString.split(" ");
|
||||
|
||||
nouns.forEach(noun => {
|
||||
irregulars[noun] = [
|
||||
noun.replace(/man$/, "lui"),
|
||||
noun.replace(/man$/, "lieden")
|
||||
];
|
||||
});
|
||||
|
||||
nounString = "bewindsman bruggeman cameraman hoofdman leidsman ombudsman staatsman vertrouwensman verzetsman voorman";
|
||||
nouns = nounString.split(" ");
|
||||
nouns.forEach(noun => {
|
||||
irregulars[noun] = [
|
||||
noun.replace(/man$/, "mannen"),
|
||||
noun.replace(/man$/, "lieden")
|
||||
];
|
||||
});
|
||||
|
||||
nounString = "brandweerman kantoorman opperman sportsman vakman";
|
||||
nouns = nounString.split(" ");
|
||||
nouns.forEach(noun => {
|
||||
irregulars[noun] = [
|
||||
noun.replace(/man$/, "mannen"),
|
||||
noun.replace(/man$/, "lieden"),
|
||||
noun.replace(/man$/, "lui")
|
||||
];
|
||||
});
|
||||
|
||||
nounString = "barman bosjesman boeman dronkeman ijscoman jongeman kiesman kikvorsman krantenman leenman medicijnman melkman muzelman Noorman olieman onderwijsman orgelman partijman politieman sandwichman schillenman spoorwegman stroman stuntman vakbondsman vuilnisman weerman wetenschapsman wildeman";
|
||||
nouns = nounString.split(" ");
|
||||
nouns.forEach(noun => {
|
||||
irregulars[noun] = noun.replace(/man$/, "mannen");
|
||||
});
|
||||
|
||||
let data = JSON.stringify(irregulars);
|
||||
fs.writeFile(path.join(__dirname, outputFile), data, err => {
|
||||
console.log("done");
|
||||
});
|
||||
@@ -1,45 +0,0 @@
|
||||
var fs = require("fs");
|
||||
let path = require("path");
|
||||
var pluralsFile = "../experimenteel/nouns-meervouden.txt";
|
||||
|
||||
var plurals = fs.readFileSync(path.join(__dirname, pluralsFile), "utf8").toString().split("\n");
|
||||
|
||||
exports.isVowel = character => {
|
||||
return character.match(/[aeiou]/);
|
||||
};
|
||||
|
||||
exports.isConsonant = character => {
|
||||
return character.match(/[bcdfghjklmnpqrstvwxyz]/);
|
||||
};
|
||||
|
||||
exports.endsWithVowel = word => {
|
||||
return word.match(/\w*[aeiou]\b/);
|
||||
};
|
||||
|
||||
exports.endsWithConsonant = word => {
|
||||
return word.match(/\w*[bcdfghjklmnpqrstvwxyz]\b/);
|
||||
};
|
||||
|
||||
exports.endsWithDoubleE = word => {
|
||||
return word.match(/\w*ee\b/);
|
||||
};
|
||||
|
||||
exports.endsWithDoubleConsonant = word => {
|
||||
return word.match(/\w*[bcdfghjklmnpqrstvwxyz][bcdfghjklmnpqrstvwxyz]\b/);
|
||||
};
|
||||
|
||||
exports.endsWithSingleConsonant = word => {
|
||||
return word.match(/\w*[aeiou][bcdfghjklmnpqrstvwxyz]\b/);
|
||||
};
|
||||
|
||||
exports.endswithStemloosVowel = word => {
|
||||
return word.match(/\w*[aeiou][aeiou][tkfschp]\b/);
|
||||
};
|
||||
|
||||
exports.endswithStemloos = word => {
|
||||
return word.match(/\w*[bcdfghjklmnpqrstvwxyz][tkfschp]\b/);
|
||||
};
|
||||
|
||||
exports.doesPluralExists = plural => {
|
||||
return plurals.includes(plural);
|
||||
};
|
||||
Reference in New Issue
Block a user