This commit is contained in:
mike
2025-12-19 13:15:09 +01:00
parent 79d6ca8e27
commit 69babe6e06
5 changed files with 1 additions and 434 deletions

1
.gitignore vendored
View File

@@ -1,2 +1,3 @@
.idea/
/vocab/.custom/
**/.custom/

View File

@@ -1,9 +0,0 @@
# Meervoudsvormen op basis van woordenlijst
Op basis van [wordlist.txt](../wordlist.txt) worden een aantal meervoudsregels getest of dit meervoud voorkomt in [nouns-meervouden.txt](../experimenteel/nouns-meervouden.txt).
## Genereren van nieuwe lijst
- Voer eerst ```node irregulars.js``` uit. Dit genereert een lijst met uitzonderingen van de meeste woorden.
- Daarna: ```node fetchPlurals.js```. Dit kan een tijdje duren, maar geeft een output: ```nouns.json```. Deze lijst kan je dan gebruiken.

View File

@@ -1,145 +0,0 @@
var fs = require("fs");
let path = require("path");
var irregularFile = "irregular.json";
var outputFile = "nouns.json";
var helpers = require("./language-helpers");
try {
fs.unlinkSync(path.join(__dirname, outputFile));
console.log("Output file deleted.");
} catch (error) {
console.log(error);
}
var nouns = JSON.parse(fs.readFileSync(path.join(__dirname, irregularFile), "utf8"));
var words = fs.readFileSync(path.join(__dirname, "../wordlist.txt"), "utf8").toString().split("\n");
var checkPlural = function (word) {
if (!word.match(/^[a-zA-Z]*/g)) {
console.log("niet alpha");
return false;
}
// if word isn't an irregular or doesn't exist yet
if (nouns.hasOwnProperty(word)) {
console.log("irregular");
return false;
}
// test if a pluralrule is listed in plurals list
if (word.match(/\w*erik\b/)) {
if (helpers.doesPluralExists(word + "en")) {
nouns[word] = word + "en";
return true;
}
}
if (helpers.endsWithVowel(word)) {
if (helpers.doesPluralExists(word + "ën")) {
nouns[word] = word + "ën";
return true;
}
if (helpers.doesPluralExists(word + "ëen")) {
nouns[word] = word + "ëen";
return true;
}
if (helpers.doesPluralExists(word + "en")) {
nouns[word] = word + "en";
return true;
}
}
if (helpers.endsWithDoubleConsonant(word)) {
if (helpers.endswithStemloos(word)) {
var lastCharacter = word.charAt(word.length - 1);
basicword = word.substring(0, word.length - 1);
lastCharacter = lastCharacter.replace("f", "v");
if (helpers.doesPluralExists(basicword + lastCharacter + "en")) {
nouns[word] = basicword + lastCharacter + "en";
return true;
}
lastCharacter = lastCharacter.replace("s", "z");
if (helpers.doesPluralExists(basicword + lastCharacter + "en")) {
nouns[word] = basicword + lastCharacter + "en";
return true;
}
}
if (helpers.doesPluralExists(word + "en")) {
nouns[word] = word + "en";
return true;
}
if (helpers.doesPluralExists(word + "s")) {
nouns[word] = word + "s";
return true;
}
}
if (helpers.endsWithConsonant(word)) {
var lastCharacter = word.charAt(word.length - 1);
basicword = word.substring(0, word.length - 2);
if (helpers.doesPluralExists(basicword + lastCharacter + "en")) {
nouns[word] = basicword + lastCharacter + "en";
return true;
}
if (helpers.doesPluralExists(word + word.slice(-1) + "en")) {
nouns[word] = word + word.slice(-1) + "en";
return true;
}
if (helpers.endswithStemloosVowel(word)) {
var lastCharacter = word.charAt(word.length - 1);
basicword = word.substring(0, word.length - 1);
if (helpers.endsWithSingleConsonant(basicword)) {
basicword = word.substring(0, word.length - 2);
lastCharacter = lastCharacter.replace("f", "v");
console.log(basicword + lastCharacter + "en");
if (helpers.doesPluralExists(basicword + lastCharacter + "en")) {
nouns[word] = basicword + lastCharacter + "en";
return true;
}
lastCharacter = lastCharacter.replace("s", "z");
if (helpers.doesPluralExists(basicword + lastCharacter + "en")) {
nouns[word] = basicword + lastCharacter + "en";
return true;
}
} else {
lastCharacter = lastCharacter.replace("f", "v");
if (helpers.doesPluralExists(basicword + lastCharacter + "en")) {
nouns[word] = basicword + lastCharacter + "en";
return true;
}
lastCharacter = lastCharacter.replace("s", "z");
if (helpers.doesPluralExists(basicword + lastCharacter + "en")) {
nouns[word] = basicword + lastCharacter + "en";
return true;
}
}
}
}
if (helpers.doesPluralExists(word + "'s")) {
nouns[word] = word + "'s";
return true;
}
if (helpers.doesPluralExists(word + "s")) {
nouns[word] = word + "s";
return true;
}
console.log("niet gevonden");
return false;
};
words.forEach((word, index) => {
console.log(word);
checkPlural(word);
});
console.log(nouns);
let data = JSON.stringify(nouns);
fs.writeFile(path.join(__dirname, outputFile), data, err => {
console.log("done");
});

View File

@@ -1,235 +0,0 @@
var fs = require("fs");
let path = require("path");
var outputFile = "irregular.json";
var nounString,
nouns;
var irregulars = {
been: [
"beenderen", "benen"
],
blad: [
"bladeren", "bladen", "blaren"
],
ei: "eieren",
gelid: "gelederen",
gemoed: "gemoederen",
goed: "goederen",
hoen: "hoenderen",
kalf: "kalveren",
kind: "kinderen",
kleed: [
"kleren", "klederen", "kleden"
],
lam: "lammeren",
lied: "liederen",
rad: "raderen",
rund: "runderen",
volk: [
"volken", "volkeren"
],
koe: "koeien",
vlo: "vlooien",
leerrede: [
"leerredenen", "leerredes"
],
lende: [
"lendenen", "lenden"
],
kleinood: [
"kleinoden", "kleinodiën"
],
sieraad: [
"sieraden", "sieradiën"
],
epos: [
"epen", "epossen"
],
genius: "geniën",
aanbod: "aanbiedingen",
beleg: [
"beleggingen", "belegeringen"
],
dank: "dankbetuigingen",
doel: [
"doeleinden", "doelen"
],
gedrag: "gedragingen",
genot: "genietingen",
lof: [
"loftuitingen", "lofbetuigingen"
],
onderzoek: [
"onderzoekingen", "onderzoeken"
],
raad: "raadgevingen",
rede: [
"redevoeringen", "redes"
],
fotograaf: "fotografen",
paragraaf: "paragrafen",
telegraaf: "telegrafen",
burggraaf: "burggraven",
loopgraaf: "loopgraven",
filosoof: "filosofen",
theosoof: "theosofen",
elf: "elfen",
paraaf: "parafen",
stad: "steden",
bad: "baden",
bedrag: "bedragen",
blad: [
"bladeren", "bladen", "blaren"
],
dag: "dagen",
dak: "daken",
dal: "dalen",
gat: "gaten",
gelag: "gelagen",
glas: "glazen",
graf: "graven",
pad: [
"paden", "padden"
],
slag: "slagen",
staf: [
"staffen", "staven"
],
vat: "vaten",
verdrag: "verdragen",
handvat: [
"handvatten", "handvaten"
],
bevel: "bevelen",
gebed: "gebeden",
gebrek: "gebreken",
gen: "genen",
spel: "spelen",
tred: "treden",
weg: "wegen",
gemet: "gemeten",
bijzonderheid: "bijzonderheden",
kleinigheid: "kleinigheden",
moeilijkheid: "moeilijkheden",
lid: "leden",
rif: "reven",
schip: "schepen",
smid: "smeden",
spit: [
"spitten", "speten"
],
alcohol: "alcoholen",
elektron: "elektronen",
neutron: "neutronen",
proton: "protonen",
gebod: "geboden",
god: "goden",
hertog: "hertogen",
hof: "hoven",
hol: "holen",
kot: [
"kotten", "koten"
],
lot: "loten",
oorlog: "oorlogen",
schot: "schoten",
slot: "sloten",
verbod: "verboden",
verlof: "verloven",
kruis: "kruizen",
pers: "persen",
balans: "balansen",
concours: "concoursen",
dans: "dansen",
diocees: "diocesen",
eis: "eisen",
forens: [
"forensen", "forenzen"
],
impuls: "impulsen",
kaars: "kaarsen",
kans: "kansen",
kers: "kersen",
kikvors: "kikvorsen",
koers: "koersen",
kous: "kousen",
krans: "kransen",
lans: "lansen",
mars: "marsen",
mens: "mensen",
ons: [
"onsen", "onzen"
],
paus: "pausen",
plons: [
"plonsen", "plonzen"
],
pols: "polsen",
prins: "prinsen",
pruis: "pruisen",
saus: [
"sausen", "sauzen"
],
schans: "schansen",
spons: [
"sponzen", "sponsen"
],
stimulans: "stimulansen",
tendens: "tendensen",
trans: "transen",
wals: "walsen",
wens: "wensen",
zeis: "zeisen",
einde: ["eindes", "einden"],
symbool: "symbolen",
knop: ["knopen", "knoppen"]
};
// https://e-ans.ivdnt.org/topics/pid/ans03050402lingtopic
nounString = "edelman krijgsman landman raadsman weidman";
nouns = nounString.split(" ");
nouns.forEach(noun => {
irregulars[noun] = noun.replace(/man$/, "lieden");
});
nounString = "akkerman ambachtsman baggerman bootsman buitenman burgerman buurman handelsman handwerksman kooiman koopman scheepstimmerman timmerman schieman sjouwerman speelman stadswerkman werkman stuurman tuinman vakman varensman veerman voerman zakenman zeeman zegsman";
nouns = nounString.split(" ");
nouns.forEach(noun => {
irregulars[noun] = [
noun.replace(/man$/, "lui"),
noun.replace(/man$/, "lieden")
];
});
nounString = "bewindsman bruggeman cameraman hoofdman leidsman ombudsman staatsman vertrouwensman verzetsman voorman";
nouns = nounString.split(" ");
nouns.forEach(noun => {
irregulars[noun] = [
noun.replace(/man$/, "mannen"),
noun.replace(/man$/, "lieden")
];
});
nounString = "brandweerman kantoorman opperman sportsman vakman";
nouns = nounString.split(" ");
nouns.forEach(noun => {
irregulars[noun] = [
noun.replace(/man$/, "mannen"),
noun.replace(/man$/, "lieden"),
noun.replace(/man$/, "lui")
];
});
nounString = "barman bosjesman boeman dronkeman ijscoman jongeman kiesman kikvorsman krantenman leenman medicijnman melkman muzelman Noorman olieman onderwijsman orgelman partijman politieman sandwichman schillenman spoorwegman stroman stuntman vakbondsman vuilnisman weerman wetenschapsman wildeman";
nouns = nounString.split(" ");
nouns.forEach(noun => {
irregulars[noun] = noun.replace(/man$/, "mannen");
});
let data = JSON.stringify(irregulars);
fs.writeFile(path.join(__dirname, outputFile), data, err => {
console.log("done");
});

View File

@@ -1,45 +0,0 @@
var fs = require("fs");
let path = require("path");
var pluralsFile = "../experimenteel/nouns-meervouden.txt";
var plurals = fs.readFileSync(path.join(__dirname, pluralsFile), "utf8").toString().split("\n");
exports.isVowel = character => {
return character.match(/[aeiou]/);
};
exports.isConsonant = character => {
return character.match(/[bcdfghjklmnpqrstvwxyz]/);
};
exports.endsWithVowel = word => {
return word.match(/\w*[aeiou]\b/);
};
exports.endsWithConsonant = word => {
return word.match(/\w*[bcdfghjklmnpqrstvwxyz]\b/);
};
exports.endsWithDoubleE = word => {
return word.match(/\w*ee\b/);
};
exports.endsWithDoubleConsonant = word => {
return word.match(/\w*[bcdfghjklmnpqrstvwxyz][bcdfghjklmnpqrstvwxyz]\b/);
};
exports.endsWithSingleConsonant = word => {
return word.match(/\w*[aeiou][bcdfghjklmnpqrstvwxyz]\b/);
};
exports.endswithStemloosVowel = word => {
return word.match(/\w*[aeiou][aeiou][tkfschp]\b/);
};
exports.endswithStemloos = word => {
return word.match(/\w*[bcdfghjklmnpqrstvwxyz][tkfschp]\b/);
};
exports.doesPluralExists = plural => {
return plurals.includes(plural);
};