Files
aupi/utils/locationutils.py
2024-07-13 23:42:00 +02:00

92 lines
3.4 KiB
Python

import re
import os
from pathlib import Path
from cache import Cache, FileCache
from models.location import Countrycode, GeonameLocation, JsonEncoder
from utils.helperutils import log;
def getLocationArray(countrycode: Countrycode):
cachename = 'locations_' + countrycode
res = Cache.get(cachename,672) #a month in the cache is long enough...
if(res):return res
base_dir = Path(os.path.dirname(__file__)).parent.absolute() #<-- absolute dir the script is in
#filename = "\data\locationfiles\\" + countrycode + ".txt"
abs_file_path = os.path.join(base_dir, "data", "locationfiles", countrycode + ".txt")
with open(abs_file_path, encoding='utf-8', errors='ignore') as f:
#datalines = f.readlines();
geonames = []
for line in f:
line = line.rstrip('\n');
data = line.split("\t")
alternatenames = []
if data[3] != "":
alternatenames = [d.strip() for d in data[3].lower().split(",")] #makes from comma seperated a lowercase array and strips leading and trailing white spaces
geoname = GeonameLocation(data[0], data[1].strip().lower(), data[2].strip().lower(), alternatenames, data[4], data[5], data[8], data[18])
geonames.append(geoname)
Cache.add(cachename,geonames)
return geonames
def getGeoLocationByCity(city = "", countrycode: Countrycode = Countrycode.NL ):
city = city.strip().lower(); #strips leading and trailing white spaces and makes it lowercase
cityname = city
if(not "gemeente" in cityname):
cityname = "gemeente " + cityname
geonames = getLocationArray(countrycode)
city = re.sub(u"(\u2018|\u2019)", "'", city) #replaces single quotes chars by '
# log('cityname and city: ' + cityname + " , " + city)
#first tries name with 'gemeente as prefix'
geo = list(filter(lambda g: g.name == cityname, geonames))
if(geo): geo = geo[0]
# print('first try' + repr(geo))
if (geo): return geo;
#also tries in the alternatenames
geo = list(filter(lambda g: inAlternatenames(g.alternatenames, cityname), geonames))
if(geo): geo = geo[0]
#print('alternatenames'+ repr( geo))
if (geo): return geo;
#then tries name without 'gemeente as prefix'
geo = list(filter(lambda g: g.name == city, geonames))
if(geo): geo = geo[0]
#print('without gemeente' + repr( geo))
if (geo): return geo;
#also tries in the alternatenames
geo = list(filter(lambda g: inAlternatenames(g.alternatenames, city), geonames))
if(geo): geo = geo[0]
#print('alternatenames without gemeente' + repr( geo))
if (geo): return geo;
#removes everything between () and then removes the leading and trailing spaces;
log('name before regex ' + city)
#city = re.sub('/\([^()]*\)/g', '', city)
city = re.sub("[\(].*?[\)]", "", city)
city = city.strip();
log('name after regex ' + city)
geo = list(filter(lambda g: g.name == city, geonames))
if(geo): geo = geo[0]
#print('without anything between ()' + repr( geo))
if (geo): return geo;
#also tries in the alternatenames
geo = list(filter(lambda g: inAlternatenames(g.alternatenames, city), geonames))
if(geo): geo = geo[0]
#print('alternatenames without ()'+ repr( geo))
if (geo): return geo;
log('city not found ' + city)
return None;
def inAlternatenames(alternatenames = [], name = ""):
return name in alternatenames