integrating with monitor app
This commit is contained in:
57
src/parse.py
57
src/parse.py
@@ -38,11 +38,36 @@ class DataParser:
|
||||
def format_timestamp(timestamp) -> str:
|
||||
"""Convert Unix timestamp to readable date"""
|
||||
try:
|
||||
# Handle numeric timestamps
|
||||
if isinstance(timestamp, (int, float)) and timestamp > 0:
|
||||
# Unix timestamps are typically 10 digits (seconds) or 13 digits (milliseconds)
|
||||
if timestamp > 1e12: # Milliseconds
|
||||
timestamp = timestamp / 1000
|
||||
return datetime.fromtimestamp(timestamp).strftime('%Y-%m-%d %H:%M:%S')
|
||||
|
||||
# Handle string timestamps that might be numeric
|
||||
if isinstance(timestamp, str):
|
||||
# Try to parse as number
|
||||
try:
|
||||
ts_num = float(timestamp)
|
||||
if ts_num > 1e12:
|
||||
ts_num = ts_num / 1000
|
||||
if ts_num > 0:
|
||||
return datetime.fromtimestamp(ts_num).strftime('%Y-%m-%d %H:%M:%S')
|
||||
except ValueError:
|
||||
# Not a numeric string - check if it's an invalid value
|
||||
invalid_values = ['gap', 'materieel wegens vereffening', 'tbd', 'n/a', 'unknown']
|
||||
if timestamp.lower().strip() in invalid_values:
|
||||
return ''
|
||||
# Return as-is if it looks like a formatted date
|
||||
return timestamp if len(timestamp) > 0 else ''
|
||||
|
||||
return str(timestamp) if timestamp else ''
|
||||
except:
|
||||
return str(timestamp) if timestamp else ''
|
||||
except Exception as e:
|
||||
# Log parsing errors for debugging
|
||||
if timestamp and str(timestamp).strip():
|
||||
print(f" ⚠️ Could not parse timestamp: {timestamp}")
|
||||
return ''
|
||||
|
||||
@staticmethod
|
||||
def format_currency(amount) -> str:
|
||||
@@ -226,15 +251,33 @@ class DataParser:
|
||||
r'(?:Current bid|Huidig bod)[:\s]*</?\w*>\s*(€[\d,.\s]+)',
|
||||
r'(?:Current bid|Huidig bod)[:\s]+(€[\d,.\s]+)',
|
||||
]
|
||||
|
||||
# Invalid bid texts that should be treated as "no bids"
|
||||
invalid_bid_texts = [
|
||||
'huidig bod',
|
||||
'current bid',
|
||||
'€huidig bod',
|
||||
'€huidig bod', # With zero-width spaces
|
||||
'huidig bod',
|
||||
]
|
||||
|
||||
for pattern in patterns:
|
||||
match = re.search(pattern, content, re.IGNORECASE)
|
||||
if match:
|
||||
bid = match.group(1).strip()
|
||||
if bid and bid.lower() not in ['huidig bod', 'current bid']:
|
||||
if not bid.startswith('€'):
|
||||
bid = f"€{bid}"
|
||||
return bid
|
||||
return "€0"
|
||||
# Remove zero-width spaces and other unicode whitespace
|
||||
bid = re.sub(r'[\u200b\u200c\u200d\u00a0]+', ' ', bid).strip()
|
||||
|
||||
# Check if it's a valid bid
|
||||
if bid:
|
||||
# Reject invalid bid texts
|
||||
bid_lower = bid.lower().replace(' ', '').replace('€', '')
|
||||
if bid_lower not in [t.lower().replace(' ', '').replace('€', '') for t in invalid_bid_texts]:
|
||||
if not bid.startswith('€'):
|
||||
bid = f"€{bid}"
|
||||
return bid
|
||||
|
||||
return "No bids"
|
||||
|
||||
def _extract_bid_count(self, content: str) -> int:
|
||||
"""Extract number of bids"""
|
||||
|
||||
Reference in New Issue
Block a user