mirror of
https://github.com/jpawlowski/hass.tibber_prices.git
synced 2026-03-30 05:13:40 +00:00
fix(periods): protect daily extremes from outlier smoothing
The outlier filter was incorrectly smoothing daily minimum/maximum prices, causing best/peak price periods to miss their most important intervals. Root cause: When the daily minimum (e.g., 0.5535 kr at 05:00) was surrounded by higher prices, the trend-based prediction calculated an "expected" price (0.6372 kr) that exceeded the flex threshold (0.6365 kr), causing the interval to be excluded from the best price period. Solution: Daily extremes are now protected from smoothing. Before applying any outlier detection, we calculate daily min/max prices and skip smoothing for any interval at or within 0.1% of these values. Changes: - Added _calculate_daily_extremes() to compute daily min/max - Added _is_daily_extreme() to check if price should be protected - Added EXTREMES_PROTECTION_TOLERANCE constant (0.1%) - Updated filter_price_outliers() to skip extremes before analysis - Enhanced logging to show protected interval count Impact: Best price periods now correctly include daily minimum intervals, and peak price periods correctly include daily maximum intervals. The period for 2024-12-23 now extends from 03:15-05:30 (10 intervals) instead of incorrectly stopping at 05:00 (7 intervals).
This commit is contained in:
parent
11d4cbfd09
commit
70552459ce
1 changed files with 102 additions and 4 deletions
|
|
@ -15,6 +15,7 @@ Uses statistical methods:
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
|
from datetime import datetime
|
||||||
from typing import NamedTuple
|
from typing import NamedTuple
|
||||||
|
|
||||||
_LOGGER = logging.getLogger(__name__)
|
_LOGGER = logging.getLogger(__name__)
|
||||||
|
|
@ -28,6 +29,7 @@ SYMMETRY_THRESHOLD = 1.5 # Max std dev difference for symmetric spike
|
||||||
RELATIVE_VOLATILITY_THRESHOLD = 2.0 # Window volatility vs context (cluster detection)
|
RELATIVE_VOLATILITY_THRESHOLD = 2.0 # Window volatility vs context (cluster detection)
|
||||||
ASYMMETRY_TAIL_WINDOW = 6 # Skip asymmetry check for last ~1.5h (6 intervals) of available data
|
ASYMMETRY_TAIL_WINDOW = 6 # Skip asymmetry check for last ~1.5h (6 intervals) of available data
|
||||||
ZIGZAG_TAIL_WINDOW = 6 # Skip zigzag/cluster detection for last ~1.5h (6 intervals)
|
ZIGZAG_TAIL_WINDOW = 6 # Skip zigzag/cluster detection for last ~1.5h (6 intervals)
|
||||||
|
EXTREMES_PROTECTION_TOLERANCE = 0.001 # Protect prices within 0.1% of daily min/max from smoothing
|
||||||
|
|
||||||
# Module-local log indentation (each module starts at level 0)
|
# Module-local log indentation (each module starts at level 0)
|
||||||
INDENT_L0 = "" # All logs in this module (no indentation needed)
|
INDENT_L0 = "" # All logs in this module (no indentation needed)
|
||||||
|
|
@ -233,6 +235,84 @@ def _validate_spike_candidate(
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
def _calculate_daily_extremes(intervals: list[dict]) -> dict[str, tuple[float, float]]:
|
||||||
|
"""
|
||||||
|
Calculate daily min/max prices for each day in the interval list.
|
||||||
|
|
||||||
|
These extremes are used to protect reference prices from being smoothed.
|
||||||
|
The daily minimum is the reference for best_price periods, and the daily
|
||||||
|
maximum is the reference for peak_price periods - smoothing these would
|
||||||
|
break period detection.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
intervals: List of price intervals with 'startsAt' and 'total' keys
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dict mapping date strings to (min_price, max_price) tuples
|
||||||
|
|
||||||
|
"""
|
||||||
|
daily_prices: dict[str, list[float]] = {}
|
||||||
|
|
||||||
|
for interval in intervals:
|
||||||
|
starts_at = interval.get("startsAt")
|
||||||
|
if starts_at is None:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Handle both datetime objects and ISO strings
|
||||||
|
dt = datetime.fromisoformat(starts_at) if isinstance(starts_at, str) else starts_at
|
||||||
|
|
||||||
|
date_key = dt.strftime("%Y-%m-%d")
|
||||||
|
price = float(interval["total"])
|
||||||
|
daily_prices.setdefault(date_key, []).append(price)
|
||||||
|
|
||||||
|
# Calculate min/max for each day
|
||||||
|
return {date_key: (min(prices), max(prices)) for date_key, prices in daily_prices.items()}
|
||||||
|
|
||||||
|
|
||||||
|
def _is_daily_extreme(
|
||||||
|
interval: dict,
|
||||||
|
daily_extremes: dict[str, tuple[float, float]],
|
||||||
|
tolerance: float = EXTREMES_PROTECTION_TOLERANCE,
|
||||||
|
) -> bool:
|
||||||
|
"""
|
||||||
|
Check if an interval's price is at or very near a daily extreme.
|
||||||
|
|
||||||
|
Prices at daily extremes should never be smoothed because:
|
||||||
|
- Daily minimum is the reference for best_price period detection
|
||||||
|
- Daily maximum is the reference for peak_price period detection
|
||||||
|
- Smoothing these would cause periods to miss their most important intervals
|
||||||
|
|
||||||
|
Args:
|
||||||
|
interval: Price interval dict with 'startsAt' and 'total' keys
|
||||||
|
daily_extremes: Dict from _calculate_daily_extremes()
|
||||||
|
tolerance: Relative tolerance for matching (default 0.1%)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
True if the price is at or very near a daily min or max
|
||||||
|
|
||||||
|
"""
|
||||||
|
starts_at = interval.get("startsAt")
|
||||||
|
if starts_at is None:
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Handle both datetime objects and ISO strings
|
||||||
|
dt = datetime.fromisoformat(starts_at) if isinstance(starts_at, str) else starts_at
|
||||||
|
|
||||||
|
date_key = dt.strftime("%Y-%m-%d")
|
||||||
|
if date_key not in daily_extremes:
|
||||||
|
return False
|
||||||
|
|
||||||
|
price = float(interval["total"])
|
||||||
|
daily_min, daily_max = daily_extremes[date_key]
|
||||||
|
|
||||||
|
# Check if price is within tolerance of daily min or max
|
||||||
|
# Using relative tolerance: |price - extreme| <= extreme * tolerance
|
||||||
|
min_threshold = daily_min * (1 + tolerance)
|
||||||
|
max_threshold = daily_max * (1 - tolerance)
|
||||||
|
|
||||||
|
return price <= min_threshold or price >= max_threshold
|
||||||
|
|
||||||
|
|
||||||
def filter_price_outliers(
|
def filter_price_outliers(
|
||||||
intervals: list[dict],
|
intervals: list[dict],
|
||||||
flexibility_pct: float,
|
flexibility_pct: float,
|
||||||
|
|
@ -270,12 +350,31 @@ def filter_price_outliers(
|
||||||
# Convert percentage to ratio once for all comparisons (e.g., 15.0 → 0.15)
|
# Convert percentage to ratio once for all comparisons (e.g., 15.0 → 0.15)
|
||||||
flexibility_ratio = flexibility_pct / 100
|
flexibility_ratio = flexibility_pct / 100
|
||||||
|
|
||||||
|
# Calculate daily extremes to protect reference prices from smoothing
|
||||||
|
# Daily min is the reference for best_price, daily max for peak_price
|
||||||
|
daily_extremes = _calculate_daily_extremes(intervals)
|
||||||
|
protected_count = 0
|
||||||
|
|
||||||
result = []
|
result = []
|
||||||
smoothed_count = 0
|
smoothed_count = 0
|
||||||
|
|
||||||
for i, current in enumerate(intervals):
|
for i, current in enumerate(intervals):
|
||||||
current_price = current["total"]
|
current_price = current["total"]
|
||||||
|
|
||||||
|
# CRITICAL: Never smooth daily extremes - they are the reference prices!
|
||||||
|
# Smoothing the daily min would break best_price period detection,
|
||||||
|
# smoothing the daily max would break peak_price period detection.
|
||||||
|
if _is_daily_extreme(current, daily_extremes):
|
||||||
|
result.append(current)
|
||||||
|
protected_count += 1
|
||||||
|
_LOGGER_DETAILS.debug(
|
||||||
|
"%sProtected daily extreme at %s: %.2f ct/kWh (not smoothed)",
|
||||||
|
INDENT_L0,
|
||||||
|
current.get("startsAt", f"index {i}"),
|
||||||
|
current_price * 100,
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
|
||||||
# Get context windows (3 intervals before and after)
|
# Get context windows (3 intervals before and after)
|
||||||
context_before = intervals[max(0, i - MIN_CONTEXT_SIZE) : i]
|
context_before = intervals[max(0, i - MIN_CONTEXT_SIZE) : i]
|
||||||
context_after = intervals[i + 1 : min(len(intervals), i + 1 + MIN_CONTEXT_SIZE)]
|
context_after = intervals[i + 1 : min(len(intervals), i + 1 + MIN_CONTEXT_SIZE)]
|
||||||
|
|
@ -342,13 +441,12 @@ def filter_price_outliers(
|
||||||
stats["trend_slope"] * 100,
|
stats["trend_slope"] * 100,
|
||||||
)
|
)
|
||||||
|
|
||||||
if smoothed_count > 0:
|
if smoothed_count > 0 or protected_count > 0:
|
||||||
_LOGGER.info(
|
_LOGGER.info(
|
||||||
"%sPrice outlier smoothing complete: %d/%d intervals smoothed (%.1f%%)",
|
"%sPrice outlier smoothing complete: %d smoothed, %d protected (daily extremes)",
|
||||||
INDENT_L0,
|
INDENT_L0,
|
||||||
smoothed_count,
|
smoothed_count,
|
||||||
len(intervals),
|
protected_count,
|
||||||
(smoothed_count / len(intervals)) * 100,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
return result
|
return result
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue