feat(outliers): adaptive confidence based on daily volatility

Outlier smoothing now adapts to daily price volatility (CV): - Flat days (CV≤10%): conservative (confidence=2.5), fewer false positives - Volatile days (CV≥30%): aggressive (confidence=1.5), catch more spikes - Linear interpolation between thresholds Uses calculate_coefficient_of_variation() for consistency with volatility sensors. Impact: Better outlier detection that respects natural price variation patterns. Flat days preserve more structure, volatile days get stronger smoothing.
2026-05-30 11:13:39 +00:00 · 2025-12-22 23:21:44 +00:00 · 2025-12-22 23:21:44 +00:00 · 7ee013daf2
commit 7ee013daf2
parent 325d855997
1 changed files with 119 additions and 12 deletions
--- a/custom_components/tibber_prices/coordinator/period_handlers/outlier_filtering.py
+++ b/custom_components/tibber_prices/coordinator/period_handlers/outlier_filtering.py
@ -18,12 +18,13 @@ import logging
 from datetime import datetime
 from typing import NamedTuple
 from custom_components.tibber_prices.utils.price import calculate_coefficient_of_variation
 _LOGGER = logging.getLogger(__name__)
 _LOGGER_DETAILS = logging.getLogger(__name__ + ".details")
 # Outlier filtering constants
 MIN_CONTEXT_SIZE = 3  # Minimum intervals needed before/after for analysis
 CONFIDENCE_LEVEL = 2.0  # Standard deviations for 95% confidence interval
 VOLATILITY_THRESHOLD = 0.05  # 5% max relative std dev for zigzag detection
 SYMMETRY_THRESHOLD = 1.5  # Max std dev difference for symmetric spike
 RELATIVE_VOLATILITY_THRESHOLD = 2.0  # Window volatility vs context (cluster detection)
@ -31,6 +32,18 @@ ASYMMETRY_TAIL_WINDOW = 6  # Skip asymmetry check for last ~1.5h (6 intervals) o
 ZIGZAG_TAIL_WINDOW = 6  # Skip zigzag/cluster detection for last ~1.5h (6 intervals)
 EXTREMES_PROTECTION_TOLERANCE = 0.001  # Protect prices within 0.1% of daily min/max from smoothing
 # Adaptive confidence level constants
 # Uses coefficient of variation (CV) from utils/price.py for consistency with volatility sensors
 # On flat days (low CV), we're more conservative (higher confidence = fewer smoothed)
 # On volatile days (high CV), we're more aggressive (lower confidence = more smoothed)
 CONFIDENCE_LEVEL_MIN = 1.5  # Minimum confidence (volatile days: smooth more aggressively)
 CONFIDENCE_LEVEL_MAX = 2.5  # Maximum confidence (flat days: smooth more conservatively)
 CONFIDENCE_LEVEL_DEFAULT = 2.0  # Default: 95% confidence interval (2 std devs)
 # CV thresholds for adaptive confidence (align with volatility sensor defaults)
 # These are in percentage points (e.g., 10.0 = 10% CV)
 DAILY_CV_LOW = 10.0  # ≤10% CV = flat day (use max confidence)
 DAILY_CV_HIGH = 30.0  # ≥30% CV = volatile day (use min confidence)
 # Module-local log indentation (each module starts at level 0)
 INDENT_L0 = ""  # All logs in this module (no indentation needed)
@ -269,6 +282,88 @@ def _calculate_daily_extremes(intervals: list[dict]) -> dict[str, tuple[float, f
    return {date_key: (min(prices), max(prices)) for date_key, prices in daily_prices.items()}
 def _calculate_daily_cv(intervals: list[dict]) -> dict[str, float]:
    """
    Calculate daily coefficient of variation (CV) for each day.
    Uses the same CV calculation as volatility sensors for consistency.
    CV = (std_dev / mean) * 100, expressed as percentage.
    Used to adapt the confidence level for outlier detection:
    - Flat days (low CV): Higher confidence → fewer false positives
    - Volatile days (high CV): Lower confidence → catch more real outliers
    Args:
        intervals: List of price intervals with 'startsAt' and 'total' keys
    Returns:
        Dict mapping date strings to CV percentage (e.g., 15.0 for 15% CV)
    """
    daily_prices: dict[str, list[float]] = {}
    for interval in intervals:
        starts_at = interval.get("startsAt")
        if starts_at is None:
            continue
        dt = datetime.fromisoformat(starts_at) if isinstance(starts_at, str) else starts_at
        date_key = dt.strftime("%Y-%m-%d")
        price = float(interval["total"])
        daily_prices.setdefault(date_key, []).append(price)
    # Calculate CV using the shared function from utils/price.py
    result = {}
    for date_key, prices in daily_prices.items():
        cv = calculate_coefficient_of_variation(prices)
        result[date_key] = cv if cv is not None else 0.0
    return result
 def _get_adaptive_confidence_level(
    interval: dict,
    daily_cv: dict[str, float],
 ) -> float:
    """
    Get adaptive confidence level based on daily coefficient of variation (CV).
    Maps daily CV to confidence level:
    - Low CV (≤10%): High confidence (2.5) → conservative, fewer smoothed
    - High CV (≥30%): Low confidence (1.5) → aggressive, more smoothed
    - Between: Linear interpolation
    Uses the same CV calculation as volatility sensors for consistency.
    Args:
        interval: Price interval dict with 'startsAt' key
        daily_cv: Dict from _calculate_daily_cv()
    Returns:
        Confidence level multiplier for std_dev threshold
    """
    starts_at = interval.get("startsAt")
    if starts_at is None:
        return CONFIDENCE_LEVEL_DEFAULT
    dt = datetime.fromisoformat(starts_at) if isinstance(starts_at, str) else starts_at
    date_key = dt.strftime("%Y-%m-%d")
    cv = daily_cv.get(date_key, 0.0)
    # Linear interpolation between LOW and HIGH CV
    # Low CV → high confidence (conservative)
    # High CV → low confidence (aggressive)
    if cv <= DAILY_CV_LOW:
        return CONFIDENCE_LEVEL_MAX
    if cv >= DAILY_CV_HIGH:
        return CONFIDENCE_LEVEL_MIN
    # Linear interpolation: as CV increases, confidence decreases
    ratio = (cv - DAILY_CV_LOW) / (DAILY_CV_HIGH - DAILY_CV_LOW)
    return CONFIDENCE_LEVEL_MAX - (ratio * (CONFIDENCE_LEVEL_MAX - CONFIDENCE_LEVEL_MIN))
 def _is_daily_extreme(
    interval: dict,
    daily_extremes: dict[str, tuple[float, float]],
@ -340,19 +435,28 @@ def filter_price_outliers(
        Intervals with smoothed prices (marked with _smoothed flag)
    """
    _LOGGER.info(
        "%sSmoothing price outliers: %d intervals, flex=%.1f%%",
        INDENT_L0,
        len(intervals),
        flexibility_pct,
    )
    # Convert percentage to ratio once for all comparisons (e.g., 15.0 → 0.15)
    flexibility_ratio = flexibility_pct / 100
    # Calculate daily extremes to protect reference prices from smoothing
    # Daily min is the reference for best_price, daily max for peak_price
    daily_extremes = _calculate_daily_extremes(intervals)
    # Calculate daily coefficient of variation (CV) for adaptive confidence levels
    # Uses same CV calculation as volatility sensors for consistency
    # Flat days → conservative smoothing, volatile days → aggressive smoothing
    daily_cv = _calculate_daily_cv(intervals)
    # Log CV info for debugging (CV is in percentage points, e.g., 15.0 = 15%)
    cv_info = ", ".join(f"{date}: {cv:.1f}%" for date, cv in sorted(daily_cv.items()))
    _LOGGER.info(
        "%sSmoothing price outliers: %d intervals, flex=%.1f%%, daily CV: %s",
        INDENT_L0,
        len(intervals),
        flexibility_pct,
        cv_info,
    )
    protected_count = 0
    result = []
@ -396,8 +500,11 @@ def filter_price_outliers(
        # Calculate how far current price deviates from expected
        residual = abs(current_price - expected_price)
-        # Tolerance based on statistical confidence (2 std dev = 95% confidence)
+        # Adaptive confidence level based on daily CV:
-        tolerance = stats["std_dev"] * CONFIDENCE_LEVEL
+        # - Flat days (low CV): higher confidence (2.5) → fewer false positives
        # - Volatile days (high CV): lower confidence (1.5) → catch more real spikes
        confidence_level = _get_adaptive_confidence_level(current, daily_cv)
        tolerance = stats["std_dev"] * confidence_level
        # Not a spike if within tolerance
        if residual <= tolerance:
@ -431,14 +538,14 @@ def filter_price_outliers(
        smoothed_count += 1
        _LOGGER_DETAILS.debug(
-            "%sSmoothed spike at %s: %.2f → %.2f ct/kWh (residual: %.2f, tolerance: %.2f, trend_slope: %.4f)",
+            "%sSmoothed spike at %s: %.2f → %.2f ct/kWh (residual: %.2f, tolerance: %.2f, confidence: %.2f)",
            INDENT_L0,
            current.get("startsAt", f"index {i}"),
            current_price * 100,
            expected_price * 100,
            residual * 100,
            tolerance * 100,
-            stats["trend_slope"] * 100,
+            confidence_level,
        )
    if smoothed_count > 0 or protected_count > 0: