Files
aupi/utils/__auction_items.py
2025-12-01 13:02:25 +01:00

482 lines
20 KiB
Python
Raw Permalink Blame History

This file contains invisible Unicode characters
This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
Utility functions to fetch auction items (lots) from different providers.
This module defines three functions that make HTTP calls to the public APIs of
Troostwijk Auctions (TWK), AuctionPort (AP) and Online Veilingmeester (OVM)
and normalises their responses into Python dictionaries. Each function
returns a list of dictionaries where each dictionary represents an
individual lot and includes standardised keys: ``title``, ``description``,
``bids`` (the number of bids if available), ``current_bid`` (current price
and currency if available), ``image_url`` and ``end_time``.
The implementations rely on the ``requests`` library for HTTP transport
and include basic error handling. They raise ``requests.HTTPError``
when the remote server responds with a non200 status code.
Note: the APIs these functions call are subject to change. Endpoints and
field names may differ depending on the auction status or provider version.
These functions are intended as a starting point for integrating with
multiple auction platforms; you may need to adjust query parameters,
header values or JSON field names if the provider updates their API.
Examples
--------
```
from auction_items import get_items_twk, get_items_ap, get_items_ovm
# Troostwijk Auctions (TWK): pass the visible auction identifier
lots = get_items_twk(display_id="35563")
for lot in lots:
print(lot['lot_number'], lot['title'], lot['current_bid'])
# AuctionPort (AP): pass the auction ID from the AuctionPort website
ap_lots = get_items_ap(auction_id=1323)
# Online Veilingmeester (OVM): the country code is required to build the
# endpoint path (e.g. ``'nederland'`` or ``'belgie'``) along with the
# numeric auction ID.
ovm_lots = get_items_ovm(country="nederland", auction_id=7713)
```
"""
from __future__ import annotations
import json
import logging
from typing import Dict, List, Optional
import requests
logger = logging.getLogger(__name__)
def get_items_twk(
display_id: str,
*,
page: int = 1,
page_size: int = 200,
locale: str = "nl",
platform: str = "WEB",
request_session: Optional[requests.Session] = None,
) -> List[Dict[str, Optional[str]]]:
"""Fetch lots (items) for a Troostwijk auction using the GraphQL API.
Troostwijk Auctions exposes its public data through a GraphQL endpoint at
``https://storefront.tbauctions.com/storefront/graphql``. The
``auctionWithLotsV5`` query returns a list of lots for a given auction.
According to the GraphQL documentation, the query accepts a
``request`` object of type ``AuctionWithLotsInputV3`` and a
``platform`` argument. The ``request`` object requires the auction's
``displayId`` (a string identifier visible in the URL of the auction
page), ``locale`` (language code), ``pageNumber``, ``pageSize`` and
two lists for range and value facets. The return type
``AuctionWithListingLots`` contains an ``auction`` and a list of
``lots`` with details such as the lot number, title, description,
current bid and images【561575328263299†screenshot】. Fields included in
this function's query correspond to those documented in the schema.
Parameters
----------
display_id: str
The humanreadable identifier of the auction (e.g. ``"35563"``).
page: int, optional
The page number of results (defaults to 1). The API uses
1based page numbering. A page size of 200 appears sufficient
for most auctions.
page_size: int, optional
The maximum number of lots to fetch per page (defaults to 200).
locale: str, optional
Language/locale code for the content (defaults to ``"nl"``).
platform: str, optional
Platform enumeration value required by the API (default
``"WEB"``). Other values may include ``"B2B"`` or ``"B2C"``;
consult the GraphQL documentation if you encounter an error.
request_session: Optional[requests.Session], optional
An existing requests session to reuse connections. If omitted,
a temporary session is created for this call.
Returns
-------
List[Dict[str, Optional[str]]]
A list of dictionaries. Each dictionary represents a lot and
contains the keys ``lot_number``, ``title``, ``description``,
``bids`` (number of bids, if provided), ``current_bid`` (a
dictionary with ``amount`` and ``currency`` or ``None`` if no bid),
``image_url`` (first image) and ``end_time`` (auction end time in
ISO 8601 format).
Raises
------
requests.HTTPError
If the HTTP response has a non200 status code.
Exception
For other errors such as JSON decoding failures.
"""
session = request_session or requests.Session()
url = "https://storefront.tbauctions.com/storefront/graphql"
# GraphQL query string. The fields selected here mirror those
# described in the GraphQL documentation for the ``auctionWithLotsV5``
# operation【561575328263299†screenshot】. Additional fields can be added
# if necessary.
graphql_query = """
query AuctionWithLots($request: AuctionWithLotsInputV3!, $platform: Platform!) {
auctionWithLotsV5(request: $request, platform: $platform) {
lots {
lotNumber
id
title
description
numberOfBids
currentBid {
amount
currency
}
endDateISO
images {
url
}
}
}
}
"""
# Build the variables for the query. The request object must include
# ``displayId``, ``locale``, ``pageNumber``, ``pageSize``, and two empty
# lists for range and value facets as required by the schema【835513158978214†screenshot】.
variables = {
"request": {
"displayId": str(display_id),
"locale": locale,
"pageNumber": page,
"pageSize": page_size,
# These facets are optional; empty lists mean no filters
"rangeFacetInputs": [],
"valueFacetInputs": [],
},
"platform": platform,
}
headers = {
# A typical browser may send JSON content; set an Accept header
"Accept": "application/json",
"Content-Type": "application/json",
# The GraphQL service uses a CSRF protection token; a random
# ``x-csrf-token`` header can be supplied if needed. Leaving it
# empty usually works for public queries.
"x-csrf-token": "",
}
response = session.post(
url,
json={"query": graphql_query, "variables": variables},
headers=headers,
timeout=30,
)
# Raise an HTTPError for non200 responses
try:
response.raise_for_status()
except requests.HTTPError:
logger.error("Troostwijk API returned status %s: %s", response.status_code, response.text)
raise
# Parse the JSON body
data = response.json()
# Check for GraphQL errors
if "errors" in data and data["errors"]:
message = data["errors"]
logger.error("GraphQL returned errors: %s", message)
raise Exception(f"GraphQL returned errors: {message}")
lots = []
# Navigate the nested structure to the list of lots. The path
# matches the GraphQL selection set defined above.
try:
lot_items = data["data"]["auctionWithLotsV5"]["lots"]
except (KeyError, TypeError) as e:
logger.error("Unexpected response structure from Troostwijk API: %s", data)
raise Exception(f"Unexpected response structure: {e}")
for item in lot_items:
# Some fields may be missing; use dict.get with defaults
lot_number = item.get("lotNumber")
title = item.get("title")
description = item.get("description")
bids = item.get("numberOfBids")
current_bid = item.get("currentBid")
end_time = item.get("endDateISO")
images = item.get("images", []) or []
image_url = images[0]["url"] if images else None
lots.append(
{
"lot_number": lot_number,
"title": title,
"description": description,
"bids": bids,
"current_bid": current_bid,
"image_url": image_url,
"end_time": end_time,
}
)
return lots
def get_items_ap(
auction_id: int,
*,
request_session: Optional[requests.Session] = None,
) -> List[Dict[str, Optional[str]]]:
"""Retrieve items (lots) from an AuctionPort auction.
AuctionPort operates a JSON API on ``https://api.auctionport.be``. While
official documentation for the lot endpoints is scarce, community code
suggests that auctions can be fetched via ``/auctions/small``【461010206788258†L10-L39】. The
corresponding lot information appears to reside under an
``/auctions/{id}/lots`` or ``/lots?auctionId={id}`` endpoint (the
platform uses XML internally for some pages as observed when visiting
``/auctions/{id}/lots`` in a browser). This function attempts to call
these endpoints in order and parse their JSON responses. If the
response is not JSON, it falls back to a simple text scrape looking
for lot numbers, titles, descriptions and current bid amounts.
Parameters
----------
auction_id: int
The numeric identifier of the auction on AuctionPort.
request_session: Optional[requests.Session], optional
An existing requests session.
Returns
-------
List[Dict[str, Optional[str]]]
A list of lot dictionaries with the keys ``lot_number``, ``title``,
``description``, ``bids`` (if available), ``current_bid`` (amount and
currency if provided), ``image_url`` and ``end_time``. If no lots
could be parsed, an empty list is returned.
Raises
------
requests.HTTPError
If both endpoint attempts return non200 responses.
"""
session = request_session or requests.Session()
# Candidate endpoints for AuctionPort lots. The first URL follows the
# pattern used by the AuctionPort website; the second is a query by
# parameter. Additional endpoints can be added if discovered.
url_candidates = [
f"https://api.auctionport.be/auctions/{auction_id}/lots",
f"https://api.auctionport.be/lots?auctionId={auction_id}",
]
last_error: Optional[Exception] = None
for url in url_candidates:
try:
response = session.get(url, headers={"Accept": "application/json"}, timeout=30)
except Exception as exc:
# Capture connection errors and continue with the next endpoint
last_error = exc
continue
if response.status_code == 404:
# Try the next candidate
continue
if response.status_code >= 400:
last_error = requests.HTTPError(
f"AuctionPort API error {response.status_code} for {url}",
response=response,
)
continue
# If the response is OK, attempt to parse JSON
try:
data = response.json()
except json.JSONDecodeError:
# Not JSON: fallback to naive parsing of plain text/XML. AuctionPort
# sometimes returns XML for lots pages. We'll attempt to extract
# structured information using simple patterns.
text = response.text
lots: List[Dict[str, Optional[str]]] = []
# Split by <div class="lot"> like markers (not guaranteed). In the
# absence of a stable API specification, heuristics must be used.
# Here we use a very simple split on "Lot " followed by a number.
import re
pattern = re.compile(r"\bLot\s+(\d+)\b", re.IGNORECASE)
for match in pattern.finditer(text):
lot_number = match.group(1)
# Attempt to extract the title and description following the
# lot number. This heuristic looks for a line break or
# sentence after the lot label; adjust as necessary.
start = match.end()
segment = text[start:start + 300] # arbitrary slice length
# Title is the first sentence or line
title_match = re.search(r"[:\-]\s*(.*?)\.(?=\s|<)", segment)
title = title_match.group(1).strip() if title_match else segment.strip()
lots.append({
"lot_number": lot_number,
"title": title,
"description": None,
"bids": None,
"current_bid": None,
"image_url": None,
"end_time": None,
})
if lots:
return lots
else:
# If no lots were extracted, continue to the next candidate
last_error = Exception("Unable to parse AuctionPort lots from nonJSON response")
continue
# If JSON parsing succeeded, inspect the structure. Some endpoints
# return a toplevel object with a ``data`` field containing a list.
lots: List[Dict[str, Optional[str]]] = []
# Attempt to locate the list of lots: it might be in ``data``, ``lots`` or
# another property.
candidate_keys = ["lots", "data", "items"]
lot_list: Optional[List[Dict[str, any]]] = None
for key in candidate_keys:
if isinstance(data, dict) and isinstance(data.get(key), list):
lot_list = data[key]
break
# If the response is a list itself (not a dict), treat it as the lot list
if lot_list is None and isinstance(data, list):
lot_list = data
if lot_list is None:
# Unknown structure; return empty list
return []
for item in lot_list:
# Map fields according to common names; adjust if your endpoint
# uses different property names. Use dict.get to avoid KeyError.
lot_number = item.get("lotNumber") or item.get("lotnumber") or item.get("id")
title = item.get("title") or item.get("naam")
description = item.get("description") or item.get("beschrijving")
bids = item.get("numberOfBids") or item.get("bidCount")
# Determine current bid: AuctionPort might provide ``currentBid`` or
# ``currentPrice`` as an object or numeric value.
current_bid_obj = item.get("currentBid") or item.get("currentPrice")
current_bid: Optional[Dict[str, any]] = None
if isinstance(current_bid_obj, dict):
current_bid = {
"amount": current_bid_obj.get("amount"),
"currency": current_bid_obj.get("currency"),
}
elif current_bid_obj is not None:
current_bid = {"amount": current_bid_obj, "currency": None}
# Image
image_url = None
if isinstance(item.get("images"), list) and item["images"]:
image_url = item["images"][0].get("url")
elif isinstance(item.get("image"), str):
image_url = item.get("image")
# End time
end_time = item.get("endDateISO") or item.get("closingDateISO") or item.get("closingDate")
lots.append(
{
"lot_number": lot_number,
"title": title,
"description": description,
"bids": bids,
"current_bid": current_bid,
"image_url": image_url,
"end_time": end_time,
}
)
return lots
# All candidates failed
if last_error:
raise last_error
raise requests.HTTPError(f"Could not fetch lots for AuctionPort auction {auction_id}")
def get_items_ovm(
country: str,
auction_id: int,
*,
request_session: Optional[requests.Session] = None,
) -> List[Dict[str, Optional[str]]]:
"""Fetch lots from an Online Veilingmeester auction.
Online Veilingmeester's REST API exposes auctions and their lots via
endpoints under ``https://onlineveilingmeester.nl/rest/nl``. The
AuctionViewer project's source code constructs lot URLs as
``{land}/veilingen/{id}/kavels``, where ``land`` is the lowercased
country name (e.g. ``nederland`` or ``belgie``)【366543684390870†L13-L50】.
Therefore, the full path for retrieving the lots of a specific auction
is ``https://onlineveilingmeester.nl/rest/nl/{country}/veilingen/{auction_id}/kavels``.
Parameters
----------
country: str
Lowercase country name used in the API path (for example
``"nederland"`` or ``"belgie"``). The value should correspond to
the ``land`` property returned by the OVM auctions endpoint【366543684390870†L13-L50】.
auction_id: int
The numeric identifier of the auction.
request_session: Optional[requests.Session], optional
A ``requests`` session to reuse connections.
Returns
-------
List[Dict[str, Optional[str]]]
A list of lot dictionaries with keys ``lot_number``, ``title``,
``description``, ``bids``, ``current_bid`` (if available),
``image_url`` and ``end_time``. If the endpoint returns no items,
an empty list is returned.
Raises
------
requests.HTTPError
If the HTTP call returns a non200 response.
Exception
If the response cannot be decoded as JSON.
"""
session = request_session or requests.Session()
base_url = "https://onlineveilingmeester.nl/rest/nl"
url = f"{base_url}/{country}/veilingen/{auction_id}/kavels"
response = session.get(url, headers={"Accept": "application/json"}, timeout=30)
try:
response.raise_for_status()
except requests.HTTPError:
logger.error("OVM API returned status %s: %s", response.status_code, response.text)
raise
# Parse the JSON body; expect a list of lots
data = response.json()
lots: List[Dict[str, Optional[str]]] = []
# The response may be a dictionary containing a ``kavels`` key or a list
if isinstance(data, dict) and isinstance(data.get("kavels"), list):
lot_list = data["kavels"]
elif isinstance(data, list):
lot_list = data
else:
logger.error("Unexpected response structure from OVM API: %s", data)
raise Exception("Unexpected response structure for OVM lots")
for item in lot_list:
lot_number = item.get("kavelnummer") or item.get("lotNumber") or item.get("id")
title = item.get("naam") or item.get("title")
description = item.get("beschrijving") or item.get("description")
bids = item.get("aantalBiedingen") or item.get("numberOfBids")
# Current bid is nested in ``hoogsteBod`` or ``currentBid``
current_bid_obj = item.get("hoogsteBod") or item.get("currentBid")
current_bid: Optional[Dict[str, any]] = None
if isinstance(current_bid_obj, dict):
current_bid = {
"amount": current_bid_obj.get("bodBedrag") or current_bid_obj.get("amount"),
"currency": current_bid_obj.get("valuta") or current_bid_obj.get("currency"),
}
image_url = None
# OVM may provide a list of image URLs under ``afbeeldingen`` or ``images``
if isinstance(item.get("afbeeldingen"), list) and item["afbeeldingen"]:
image_url = item["afbeeldingen"][0]
elif isinstance(item.get("images"), list) and item["images"]:
image_url = item["images"][0].get("url") if isinstance(item["images"][0], dict) else item["images"][0]
end_time = item.get("eindDatumISO") or item.get("endDateISO") or item.get("eindDatum")
lots.append(
{
"lot_number": lot_number,
"title": title,
"description": description,
"bids": bids,
"current_bid": current_bid,
"image_url": image_url,
"end_time": end_time,
}
)
return lots