Source code for czech_air_quality.api

#  Provides a python client for simply retrieving
#  and processing air quality data from the CHMI OpenData portal.
#  Copyright (C) 2025 chickendrop89

#  This library is free software; you can redistribute it and/or modify it
#  under the terms of the GNU Lesser General Public License as published by
#  the Free Software Foundation; either version 3 of the License, or
#  (at your option) any later version.

#  This library is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU Lesser General Public License for more details.

"""
Public API.
"""

import logging
from datetime import datetime
from functools import wraps

from geopy.extra.rate_limiter import RateLimiter
from geopy.geocoders import Nominatim

from .processing import AirQualityCalculations
from . import const, _warn
from . import data_manager
from . import (
    StationNotFoundError,
    PollutantNotReportedError,
    DataDownloadError
)

_LOGGER = logging.getLogger(__name__)


[docs] class AirQuality(AirQualityCalculations): """ A client for retrieving air quality data from CHMI. """ def __init__( self, auto_load=True, region_filter=None, use_nominatim=True, neighbour_station_limit=const.CHMI_NEIGHBOUR_LIMIT, nominatim_timeout=const.NOMINATIM_TIMEOUT, request_timeout=const.REQUEST_TIMEOUT, disable_caching=False ): """ Initialize the Air Quality client. :param auto_load: If True, load/download data immediately during initialization. If False, data loads on first method call. :type auto_load: bool :param region_filter: Limit stations to specific region (case-insensitive). :type region_filter: str, optional :param use_nominatim: If True, enable Nominatim geocoding for city name lookups. If False, only exact station name matches are accepted. :type use_nominatim: bool :param neighbour_station_limit: Maximum number of nearby stations to check when searching for pollutant data. Useful with use_nominatim=True to find data if nearest station doesn't report requested pollutant. :type neighbour_station_limit: int :param nominatim_timeout: Timeout in seconds for Nominatim geocoding requests :type nominatim_timeout: int :param request_timeout: Timeout in seconds for CHMI data download requests :type request_timeout: int :param disable_caching: If True, skip all caching and always download fresh data :type disable_caching: bool Caching Strategy: 1. Cache Hit (Recent): If cached data is < 20 minutes old, use it immediately 2. ETag Validation: If cache is > 20 minutes old, perform HTTP-HEAD request with ETag - If server returns 304 (Not Modified), trust cache for another 20 minutes - If server returns 200 (Modified), download full fresh data 3. Network Error: If network unavailable but cache exists, use stale cache with warning 4. Force Refresh: `force_fetch_fresh()` bypasses age check but respects ETags """ super().__init__() self._region_filter = region_filter.lower() if region_filter else None self._data = {} self._all_stations = [] self._component_lookup = {} self._id_registration_to_component = {} self._locality_code_to_station = {} self._city_coordinate_cache = {} self._data_manager = data_manager.DataManager( disable_caching=disable_caching, request_timeout=request_timeout ) self._use_nominatim = use_nominatim self._neighbour_station_limit = neighbour_station_limit self._nominatim_timeout = nominatim_timeout if self._use_nominatim: self._geolocator = Nominatim( user_agent=const.USER_AGENT ) self._rate_limited_geocode = RateLimiter( self._geolocator.geocode, min_delay_seconds=1.0, max_retries=0 ) else: self._geolocator = None self._rate_limited_geocode = None if auto_load: self._data_manager.ensure_latest_data() self._load_and_parse_data()
[docs] @classmethod def get_all_station_names(cls) -> list[str | None]: """ Get all known air quality station names by creating temporary a client instance. :return: List of station names, or empty list if data cannot be retrieved :rtype: ``list[str | None]`` """ try: temp_instance = cls() return [station["Name"] for station in temp_instance.all_stations] except (DataDownloadError, StationNotFoundError) as e: _warn(f"Failed to get all station names: {e}") return []
@staticmethod def _ensure_loaded(func): """ Ensure data is fresh and loaded before executing a public method. """ @wraps(func) def wrapper(self, *args, **kwargs): if not self._data_manager.raw_data_json: self._data_manager.ensure_latest_data() self._load_and_parse_data() return func(self, *args, **kwargs) return wrapper @property def actualized_time(self) -> datetime: """ Timestamp when data was last updated by the CHMI source. :rtype: ``datetime`` """ return self._data_manager.actualized_time @property def is_data_fresh(self) -> bool: """ Check if cached data is still valid via ETag validation. :return: ``True`` if cached data is current; ``False`` if needs refresh :rtype: ``bool`` """ return self._data_manager.is_data_fresh() @property def all_stations(self) -> list[dict]: """ Get all available air quality stations. :return: List of station dictionaries, filtered by region if set :rtype: ``list[dict]`` """ return self._all_stations @property def component_lookup(self) -> dict[str, tuple[str, str, str]]: """ Map of pollutant codes to (code, name, unit) tuples. :return: Dictionary with pollutant code as key :rtype: ``dict[str, tuple[str, str, str]]`` """ return self._component_lookup @property def raw_data(self) -> dict: """ Raw parsed data from the JSON source. :return: Dictionary containing localities and measurements :rtype: ``dict`` """ return self._data
[docs] @_ensure_loaded def find_nearest_station(self, city_name: str) -> tuple[dict, float]: """ Find air quality station nearest to a city. If ``use_nominatim=True``, geocodes the city name to coordinates and calculates distances to all stations. Otherwise, matches exact station names only (e.g. "Prague - Letná"). :param city_name: Name of the city or exact station name :type city_name: str :return: Tuple of (``station_dict``, ``distance_km``) with station metadata and distance :rtype: ``tuple[dict, float]`` :raises StationNotFoundError: If city/station not found or no nearby stations exist """ return self._get_nearest_station_to_city(city_name)
[docs] @_ensure_loaded def get_air_quality_report(self, city_name: str) -> dict: """ Get comprehensive air quality report with EAQI (European Air Quality Index) for a city. :param city_name: City name to search for :type city_name: str :return: Air quality report dictionary with keys: - **city_searched (str)**: Original search term - **station_name (str)**: Name of station providing data - **station_code (str)**: Station locality code - **region (str)**: Region name - **distance_km (str)**: Distance from city to station in kilometers - **air_quality_index_code (int)**: EAQI level (0-6, 0 if no data) - **air_quality_index_description (str)**: Human description (e.g., 'Good', 'Poor') - **actualized_time_utc (str)**: ISO format UTC timestamp of data - **measurements (list[dict])**: List of pollutant measurements: - **pollutant_code (str)**: Code like 'PM10', 'O3' - **pollutant_name (str)**: Full name - **unit (str)**: Unit of measurement - **value (float|None)**: Numeric value - **sub_aqi (int)**: Sub-index level for this pollutant - **formatted_measurement (str)**: Display string - ``Error (str)``: Error message if lookup failed :rtype: ``dict`` """ try: station_data, distance_km = self._get_nearest_station_to_city(city_name) except StationNotFoundError as exc: return {"city_searched": city_name, "Error": str(exc)} if not self._station_has_valid_data(station_data): _LOGGER.debug( "Station %s has no valid data. Attempting to find alternative station...", station_data.get("Name"), ) nearby_stations = self._get_nearby_stations_sorted(city_name) for alt_station, alt_distance in nearby_stations[1:]: if self._station_has_valid_data(alt_station): _LOGGER.debug( "Fallback: Using station %s at %.2f km (1st station had no valid data).", alt_station.get("Name"), alt_distance, ) station_data = alt_station distance_km = alt_distance break return self._format_station_data(station_data, distance_km, city_name)
[docs] @_ensure_loaded def get_pollutant_measurement(self, city_name: str, pollutant_code: str) -> dict: """ Get measurement data for a specific pollutant at the nearest station. :param city_name: City name to search for :type city_name: str :param pollutant_code: Pollutant code to retrieve (case-insensitive): - 'PM10': Particulate matter < 10 µm - 'PM2_5': Fine particulate matter < 2.5 µm - 'O3': Ozone - 'NO2': Nitrogen dioxide - 'SO2': Sulfur dioxide :type pollutant_code: str :return: Measurement dictionary with keys: - **city_searched (str)**: Original search term - **station_name (str)**: Station name(s) that provided the measurement - **pollutant_code (str)**: Normalized pollutant code - **pollutant_name (str)**: Full pollutant name - **unit (str)**: Unit of measurement (e.g., 'µg/m³') - **value (float|None)**: Numeric measurement value - **measurement_status (str)**: Status string (e.g., 'Measured', 'No Data') - **formatted_measurement (str)**: Display string (e.g., '12.5 µg/m³') :rtype: ``dict`` :raises StationNotFoundError: If city not found or no nearby stations exist :raises PollutantNotReportedError: If pollutant is not measured at any station """ nearby_stations = self._get_nearby_stations_sorted(city_name) station_data, _ = self._get_nearest_station_to_city(city_name) pollutant_code_upper = pollutant_code.upper() stations_tried: list[str] = [station_data.get("Name") or ""] result = self._try_get_pollutant_from_station( station_data, station_data, pollutant_code_upper, city_name, stations_tried ) if result: return result for alt_station, _ in nearby_stations: if alt_station.get("Name") in stations_tried: continue alt_result = self._try_get_pollutant_from_station( alt_station, station_data, pollutant_code_upper, city_name, stations_tried ) if alt_result: return alt_result stations_tried.append( alt_station.get("Name") or "" ) raise PollutantNotReportedError( f"Pollutant code '{pollutant_code_upper}' is not being measured" f" at any available station near '{city_name}'." )
[docs] @_ensure_loaded def get_air_quality_index(self, city_name: str) -> tuple[int, str]: """ Get EAQI for a city using the 0-6 scale. Returns the highest sub-index across all measured pollutants (PM10, PM2_5, O3, NO2, SO2). :param city_name: Name of the city :type city_name: str :return: Tuple of (EAQI level 0-6, description) :rtype: ``tuple[int, str]`` """ aqi_level = self._get_aqi(city_name) return aqi_level, const.EAQI_LEVELS.get(aqi_level, "Error/N/A")
[docs] def force_fetch_fresh(self) -> None: """ Force fetching fresh data from the source without waiting for the internal cache timer. Bypasses the normal 20-minute cache timeout and immediately requests fresh data from CHMI. Still uses cached data if server returns 304 (Not Modified) via ETag validation. :raises DataDownloadError: If network error occurs and no cache is available """ self._data_manager.ensure_latest_data(force_fetch=True) self._load_and_parse_data()