88 lines
3.0 KiB
Python
88 lines
3.0 KiB
Python
import re
|
|
import os
|
|
from pathlib import Path
|
|
from cache import Cache
|
|
from models.location import Countrycode, GeonameLocation
|
|
|
|
|
|
def getLocationArray(countrycode: Countrycode):
|
|
cachename = 'locations_' + countrycode
|
|
|
|
res = Cache.get(cachename,672) #a month in the cache is long enough...
|
|
if(res):return res
|
|
|
|
base_dir = Path(os.path.dirname(__file__)).parent.absolute() #<-- absolute dir the script is in
|
|
#filename = "\data\locationfiles\\" + countrycode + ".txt"
|
|
abs_file_path = os.path.join(base_dir, "data", "locationfiles", countrycode + ".txt")
|
|
with open(abs_file_path, encoding='utf-8', errors='ignore') as f:
|
|
#datalines = f.readlines();
|
|
|
|
geonames = []
|
|
for line in f:
|
|
line = line.rstrip('\n');
|
|
data = line.split("\t")
|
|
alternatenames = []
|
|
if data[3] != "":
|
|
alternatenames = data[3].lower().split(",")
|
|
geoname = GeonameLocation(data[0], data[1].lower(), data[2].lower(), alternatenames, data[4], data[5], data[8], data[18])
|
|
geonames.append(geoname)
|
|
|
|
Cache.add(cachename,geonames)
|
|
return geonames
|
|
|
|
def getGeoLocationByCity(city = "", countrycode: Countrycode = Countrycode.NL ):
|
|
city = city.lower();
|
|
|
|
cityname = city
|
|
if(not "gemeente" in cityname):
|
|
cityname = "gemeente "+ cityname
|
|
|
|
geonames = getLocationArray(countrycode)
|
|
|
|
#first tries name with 'gemeente as prefix'
|
|
geo = list(filter(lambda g: g.name == cityname, geonames))
|
|
if(geo): geo = geo[0]
|
|
#print('first try' + repr(geo))
|
|
if (geo): return geo;
|
|
#also tries in the alternatenames
|
|
geo = list(filter(lambda g: inAlternatenames(g.alternatenames, cityname), geonames))
|
|
if(geo): geo = geo[0]
|
|
#print('alternatenames'+ repr( geo))
|
|
if (geo): return geo;
|
|
|
|
#then tries name without 'gemeente as prefix'
|
|
geo = list(filter(lambda g: g.name == city, geonames))
|
|
if(geo): geo = geo[0]
|
|
#print('without gemeente' + repr( geo))
|
|
if (geo): return geo;
|
|
#also tries in the alternatenames
|
|
geo = list(filter(lambda g: inAlternatenames(g.alternatenames, city), geonames))
|
|
if(geo): geo = geo[0]
|
|
#print('alternatenames without gemeente' + repr( geo))
|
|
if (geo): return geo;
|
|
|
|
#removes everything between () and then removes the leading and trailing spaces;
|
|
|
|
print('name before regex ' + city)
|
|
#city = re.sub('/\([^()]*\)/g', '', city)
|
|
city = re.sub("[\(].*?[\)]", "", city)
|
|
city = city.strip();
|
|
print('name after regex ' + city)
|
|
|
|
geo = list(filter(lambda g: g.name == city, geonames))
|
|
if(geo): geo = geo[0]
|
|
#print('without anything between ()' + repr( geo))
|
|
if (geo): return geo;
|
|
|
|
|
|
#also tries in the alternatenames
|
|
geo = list(filter(lambda g: inAlternatenames(g.alternatenames, city), geonames))
|
|
if(geo): geo = geo[0]
|
|
#print('alternatenames without ()'+ repr( geo))
|
|
if (geo): return geo;
|
|
|
|
print('city not found: '+ city)
|
|
return None;
|
|
|
|
def inAlternatenames(alternatenames = [], name = ""):
|
|
return name in alternatenames |