feat(outliers): adaptive confidence based on daily volatility

Outlier smoothing now adapts to daily price volatility (CV):
- Flat days (CV≤10%): conservative (confidence=2.5), fewer false positives
- Volatile days (CV≥30%): aggressive (confidence=1.5), catch more spikes
- Linear interpolation between thresholds

Uses calculate_coefficient_of_variation() for consistency with volatility sensors.

Impact: Better outlier detection that respects natural price variation patterns.
Flat days preserve more structure, volatile days get stronger smoothing.
This commit is contained in:
Julian Pawlowski 2025-12-22 23:21:44 +00:00
parent 325d855997
commit 7ee013daf2

View file

@ -18,12 +18,13 @@ import logging
from datetime import datetime from datetime import datetime
from typing import NamedTuple from typing import NamedTuple
from custom_components.tibber_prices.utils.price import calculate_coefficient_of_variation
_LOGGER = logging.getLogger(__name__) _LOGGER = logging.getLogger(__name__)
_LOGGER_DETAILS = logging.getLogger(__name__ + ".details") _LOGGER_DETAILS = logging.getLogger(__name__ + ".details")
# Outlier filtering constants # Outlier filtering constants
MIN_CONTEXT_SIZE = 3 # Minimum intervals needed before/after for analysis MIN_CONTEXT_SIZE = 3 # Minimum intervals needed before/after for analysis
CONFIDENCE_LEVEL = 2.0 # Standard deviations for 95% confidence interval
VOLATILITY_THRESHOLD = 0.05 # 5% max relative std dev for zigzag detection VOLATILITY_THRESHOLD = 0.05 # 5% max relative std dev for zigzag detection
SYMMETRY_THRESHOLD = 1.5 # Max std dev difference for symmetric spike SYMMETRY_THRESHOLD = 1.5 # Max std dev difference for symmetric spike
RELATIVE_VOLATILITY_THRESHOLD = 2.0 # Window volatility vs context (cluster detection) RELATIVE_VOLATILITY_THRESHOLD = 2.0 # Window volatility vs context (cluster detection)
@ -31,6 +32,18 @@ ASYMMETRY_TAIL_WINDOW = 6 # Skip asymmetry check for last ~1.5h (6 intervals) o
ZIGZAG_TAIL_WINDOW = 6 # Skip zigzag/cluster detection for last ~1.5h (6 intervals) ZIGZAG_TAIL_WINDOW = 6 # Skip zigzag/cluster detection for last ~1.5h (6 intervals)
EXTREMES_PROTECTION_TOLERANCE = 0.001 # Protect prices within 0.1% of daily min/max from smoothing EXTREMES_PROTECTION_TOLERANCE = 0.001 # Protect prices within 0.1% of daily min/max from smoothing
# Adaptive confidence level constants
# Uses coefficient of variation (CV) from utils/price.py for consistency with volatility sensors
# On flat days (low CV), we're more conservative (higher confidence = fewer smoothed)
# On volatile days (high CV), we're more aggressive (lower confidence = more smoothed)
CONFIDENCE_LEVEL_MIN = 1.5 # Minimum confidence (volatile days: smooth more aggressively)
CONFIDENCE_LEVEL_MAX = 2.5 # Maximum confidence (flat days: smooth more conservatively)
CONFIDENCE_LEVEL_DEFAULT = 2.0 # Default: 95% confidence interval (2 std devs)
# CV thresholds for adaptive confidence (align with volatility sensor defaults)
# These are in percentage points (e.g., 10.0 = 10% CV)
DAILY_CV_LOW = 10.0 # ≤10% CV = flat day (use max confidence)
DAILY_CV_HIGH = 30.0 # ≥30% CV = volatile day (use min confidence)
# Module-local log indentation (each module starts at level 0) # Module-local log indentation (each module starts at level 0)
INDENT_L0 = "" # All logs in this module (no indentation needed) INDENT_L0 = "" # All logs in this module (no indentation needed)
@ -269,6 +282,88 @@ def _calculate_daily_extremes(intervals: list[dict]) -> dict[str, tuple[float, f
return {date_key: (min(prices), max(prices)) for date_key, prices in daily_prices.items()} return {date_key: (min(prices), max(prices)) for date_key, prices in daily_prices.items()}
def _calculate_daily_cv(intervals: list[dict]) -> dict[str, float]:
"""
Calculate daily coefficient of variation (CV) for each day.
Uses the same CV calculation as volatility sensors for consistency.
CV = (std_dev / mean) * 100, expressed as percentage.
Used to adapt the confidence level for outlier detection:
- Flat days (low CV): Higher confidence fewer false positives
- Volatile days (high CV): Lower confidence catch more real outliers
Args:
intervals: List of price intervals with 'startsAt' and 'total' keys
Returns:
Dict mapping date strings to CV percentage (e.g., 15.0 for 15% CV)
"""
daily_prices: dict[str, list[float]] = {}
for interval in intervals:
starts_at = interval.get("startsAt")
if starts_at is None:
continue
dt = datetime.fromisoformat(starts_at) if isinstance(starts_at, str) else starts_at
date_key = dt.strftime("%Y-%m-%d")
price = float(interval["total"])
daily_prices.setdefault(date_key, []).append(price)
# Calculate CV using the shared function from utils/price.py
result = {}
for date_key, prices in daily_prices.items():
cv = calculate_coefficient_of_variation(prices)
result[date_key] = cv if cv is not None else 0.0
return result
def _get_adaptive_confidence_level(
interval: dict,
daily_cv: dict[str, float],
) -> float:
"""
Get adaptive confidence level based on daily coefficient of variation (CV).
Maps daily CV to confidence level:
- Low CV (10%): High confidence (2.5) conservative, fewer smoothed
- High CV (30%): Low confidence (1.5) aggressive, more smoothed
- Between: Linear interpolation
Uses the same CV calculation as volatility sensors for consistency.
Args:
interval: Price interval dict with 'startsAt' key
daily_cv: Dict from _calculate_daily_cv()
Returns:
Confidence level multiplier for std_dev threshold
"""
starts_at = interval.get("startsAt")
if starts_at is None:
return CONFIDENCE_LEVEL_DEFAULT
dt = datetime.fromisoformat(starts_at) if isinstance(starts_at, str) else starts_at
date_key = dt.strftime("%Y-%m-%d")
cv = daily_cv.get(date_key, 0.0)
# Linear interpolation between LOW and HIGH CV
# Low CV → high confidence (conservative)
# High CV → low confidence (aggressive)
if cv <= DAILY_CV_LOW:
return CONFIDENCE_LEVEL_MAX
if cv >= DAILY_CV_HIGH:
return CONFIDENCE_LEVEL_MIN
# Linear interpolation: as CV increases, confidence decreases
ratio = (cv - DAILY_CV_LOW) / (DAILY_CV_HIGH - DAILY_CV_LOW)
return CONFIDENCE_LEVEL_MAX - (ratio * (CONFIDENCE_LEVEL_MAX - CONFIDENCE_LEVEL_MIN))
def _is_daily_extreme( def _is_daily_extreme(
interval: dict, interval: dict,
daily_extremes: dict[str, tuple[float, float]], daily_extremes: dict[str, tuple[float, float]],
@ -340,19 +435,28 @@ def filter_price_outliers(
Intervals with smoothed prices (marked with _smoothed flag) Intervals with smoothed prices (marked with _smoothed flag)
""" """
_LOGGER.info(
"%sSmoothing price outliers: %d intervals, flex=%.1f%%",
INDENT_L0,
len(intervals),
flexibility_pct,
)
# Convert percentage to ratio once for all comparisons (e.g., 15.0 → 0.15) # Convert percentage to ratio once for all comparisons (e.g., 15.0 → 0.15)
flexibility_ratio = flexibility_pct / 100 flexibility_ratio = flexibility_pct / 100
# Calculate daily extremes to protect reference prices from smoothing # Calculate daily extremes to protect reference prices from smoothing
# Daily min is the reference for best_price, daily max for peak_price # Daily min is the reference for best_price, daily max for peak_price
daily_extremes = _calculate_daily_extremes(intervals) daily_extremes = _calculate_daily_extremes(intervals)
# Calculate daily coefficient of variation (CV) for adaptive confidence levels
# Uses same CV calculation as volatility sensors for consistency
# Flat days → conservative smoothing, volatile days → aggressive smoothing
daily_cv = _calculate_daily_cv(intervals)
# Log CV info for debugging (CV is in percentage points, e.g., 15.0 = 15%)
cv_info = ", ".join(f"{date}: {cv:.1f}%" for date, cv in sorted(daily_cv.items()))
_LOGGER.info(
"%sSmoothing price outliers: %d intervals, flex=%.1f%%, daily CV: %s",
INDENT_L0,
len(intervals),
flexibility_pct,
cv_info,
)
protected_count = 0 protected_count = 0
result = [] result = []
@ -396,8 +500,11 @@ def filter_price_outliers(
# Calculate how far current price deviates from expected # Calculate how far current price deviates from expected
residual = abs(current_price - expected_price) residual = abs(current_price - expected_price)
# Tolerance based on statistical confidence (2 std dev = 95% confidence) # Adaptive confidence level based on daily CV:
tolerance = stats["std_dev"] * CONFIDENCE_LEVEL # - Flat days (low CV): higher confidence (2.5) → fewer false positives
# - Volatile days (high CV): lower confidence (1.5) → catch more real spikes
confidence_level = _get_adaptive_confidence_level(current, daily_cv)
tolerance = stats["std_dev"] * confidence_level
# Not a spike if within tolerance # Not a spike if within tolerance
if residual <= tolerance: if residual <= tolerance:
@ -431,14 +538,14 @@ def filter_price_outliers(
smoothed_count += 1 smoothed_count += 1
_LOGGER_DETAILS.debug( _LOGGER_DETAILS.debug(
"%sSmoothed spike at %s: %.2f%.2f ct/kWh (residual: %.2f, tolerance: %.2f, trend_slope: %.4f)", "%sSmoothed spike at %s: %.2f%.2f ct/kWh (residual: %.2f, tolerance: %.2f, confidence: %.2f)",
INDENT_L0, INDENT_L0,
current.get("startsAt", f"index {i}"), current.get("startsAt", f"index {i}"),
current_price * 100, current_price * 100,
expected_price * 100, expected_price * 100,
residual * 100, residual * 100,
tolerance * 100, tolerance * 100,
stats["trend_slope"] * 100, confidence_level,
) )
if smoothed_count > 0 or protected_count > 0: if smoothed_count > 0 or protected_count > 0: