From 70552459ce741850db1f4f696fcc7747ec488226 Mon Sep 17 00:00:00 2001 From: Julian Pawlowski <75446+jpawlowski@users.noreply.github.com> Date: Mon, 22 Dec 2025 21:05:30 +0000 Subject: [PATCH] fix(periods): protect daily extremes from outlier smoothing The outlier filter was incorrectly smoothing daily minimum/maximum prices, causing best/peak price periods to miss their most important intervals. Root cause: When the daily minimum (e.g., 0.5535 kr at 05:00) was surrounded by higher prices, the trend-based prediction calculated an "expected" price (0.6372 kr) that exceeded the flex threshold (0.6365 kr), causing the interval to be excluded from the best price period. Solution: Daily extremes are now protected from smoothing. Before applying any outlier detection, we calculate daily min/max prices and skip smoothing for any interval at or within 0.1% of these values. Changes: - Added _calculate_daily_extremes() to compute daily min/max - Added _is_daily_extreme() to check if price should be protected - Added EXTREMES_PROTECTION_TOLERANCE constant (0.1%) - Updated filter_price_outliers() to skip extremes before analysis - Enhanced logging to show protected interval count Impact: Best price periods now correctly include daily minimum intervals, and peak price periods correctly include daily maximum intervals. The period for 2024-12-23 now extends from 03:15-05:30 (10 intervals) instead of incorrectly stopping at 05:00 (7 intervals). --- .../period_handlers/outlier_filtering.py | 106 +++++++++++++++++- 1 file changed, 102 insertions(+), 4 deletions(-) diff --git a/custom_components/tibber_prices/coordinator/period_handlers/outlier_filtering.py b/custom_components/tibber_prices/coordinator/period_handlers/outlier_filtering.py index e176c07..19e533b 100644 --- a/custom_components/tibber_prices/coordinator/period_handlers/outlier_filtering.py +++ b/custom_components/tibber_prices/coordinator/period_handlers/outlier_filtering.py @@ -15,6 +15,7 @@ Uses statistical methods: from __future__ import annotations import logging +from datetime import datetime from typing import NamedTuple _LOGGER = logging.getLogger(__name__) @@ -28,6 +29,7 @@ SYMMETRY_THRESHOLD = 1.5 # Max std dev difference for symmetric spike RELATIVE_VOLATILITY_THRESHOLD = 2.0 # Window volatility vs context (cluster detection) ASYMMETRY_TAIL_WINDOW = 6 # Skip asymmetry check for last ~1.5h (6 intervals) of available data ZIGZAG_TAIL_WINDOW = 6 # Skip zigzag/cluster detection for last ~1.5h (6 intervals) +EXTREMES_PROTECTION_TOLERANCE = 0.001 # Protect prices within 0.1% of daily min/max from smoothing # Module-local log indentation (each module starts at level 0) INDENT_L0 = "" # All logs in this module (no indentation needed) @@ -233,6 +235,84 @@ def _validate_spike_candidate( return True +def _calculate_daily_extremes(intervals: list[dict]) -> dict[str, tuple[float, float]]: + """ + Calculate daily min/max prices for each day in the interval list. + + These extremes are used to protect reference prices from being smoothed. + The daily minimum is the reference for best_price periods, and the daily + maximum is the reference for peak_price periods - smoothing these would + break period detection. + + Args: + intervals: List of price intervals with 'startsAt' and 'total' keys + + Returns: + Dict mapping date strings to (min_price, max_price) tuples + + """ + daily_prices: dict[str, list[float]] = {} + + for interval in intervals: + starts_at = interval.get("startsAt") + if starts_at is None: + continue + + # Handle both datetime objects and ISO strings + dt = datetime.fromisoformat(starts_at) if isinstance(starts_at, str) else starts_at + + date_key = dt.strftime("%Y-%m-%d") + price = float(interval["total"]) + daily_prices.setdefault(date_key, []).append(price) + + # Calculate min/max for each day + return {date_key: (min(prices), max(prices)) for date_key, prices in daily_prices.items()} + + +def _is_daily_extreme( + interval: dict, + daily_extremes: dict[str, tuple[float, float]], + tolerance: float = EXTREMES_PROTECTION_TOLERANCE, +) -> bool: + """ + Check if an interval's price is at or very near a daily extreme. + + Prices at daily extremes should never be smoothed because: + - Daily minimum is the reference for best_price period detection + - Daily maximum is the reference for peak_price period detection + - Smoothing these would cause periods to miss their most important intervals + + Args: + interval: Price interval dict with 'startsAt' and 'total' keys + daily_extremes: Dict from _calculate_daily_extremes() + tolerance: Relative tolerance for matching (default 0.1%) + + Returns: + True if the price is at or very near a daily min or max + + """ + starts_at = interval.get("startsAt") + if starts_at is None: + return False + + # Handle both datetime objects and ISO strings + dt = datetime.fromisoformat(starts_at) if isinstance(starts_at, str) else starts_at + + date_key = dt.strftime("%Y-%m-%d") + if date_key not in daily_extremes: + return False + + price = float(interval["total"]) + daily_min, daily_max = daily_extremes[date_key] + + # Check if price is within tolerance of daily min or max + # Using relative tolerance: |price - extreme| <= extreme * tolerance + min_threshold = daily_min * (1 + tolerance) + max_threshold = daily_max * (1 - tolerance) + + return price <= min_threshold or price >= max_threshold + + def filter_price_outliers( intervals: list[dict], flexibility_pct: float, @@ -270,12 +350,31 @@ def filter_price_outliers( # Convert percentage to ratio once for all comparisons (e.g., 15.0 → 0.15) flexibility_ratio = flexibility_pct / 100 + # Calculate daily extremes to protect reference prices from smoothing + # Daily min is the reference for best_price, daily max for peak_price + daily_extremes = _calculate_daily_extremes(intervals) + protected_count = 0 + result = [] smoothed_count = 0 for i, current in enumerate(intervals): current_price = current["total"] + # CRITICAL: Never smooth daily extremes - they are the reference prices! + # Smoothing the daily min would break best_price period detection, + # smoothing the daily max would break peak_price period detection. + if _is_daily_extreme(current, daily_extremes): + result.append(current) + protected_count += 1 + _LOGGER_DETAILS.debug( + "%sProtected daily extreme at %s: %.2f ct/kWh (not smoothed)", + INDENT_L0, + current.get("startsAt", f"index {i}"), + current_price * 100, + ) + continue + # Get context windows (3 intervals before and after) context_before = intervals[max(0, i - MIN_CONTEXT_SIZE) : i] context_after = intervals[i + 1 : min(len(intervals), i + 1 + MIN_CONTEXT_SIZE)] @@ -342,13 +441,12 @@ def filter_price_outliers( stats["trend_slope"] * 100, ) - if smoothed_count > 0: + if smoothed_count > 0 or protected_count > 0: _LOGGER.info( - "%sPrice outlier smoothing complete: %d/%d intervals smoothed (%.1f%%)", + "%sPrice outlier smoothing complete: %d smoothed, %d protected (daily extremes)", INDENT_L0, smoothed_count, - len(intervals), - (smoothed_count / len(intervals)) * 100, + protected_count, ) return result