fix(coordinator): invalidate transformation cache when source data changes

Fixes bug where lifecycle sensor attributes (data_completeness, tomorrow_available) didn't update after tomorrow data was successfully fetched from API. Root cause: DataTransformer had cached transformation data but no mechanism to detect when source API data changed (only checked config and midnight turnover). Changes: - coordinator/data_transformation.py: Track source_data_timestamp and invalidate cache when timestamp changes (detects new API data arrival) - coordinator/data_transformation.py: Integrate period calculation into DataTransformer (calculate_periods_fn parameter) for complete single-layer caching - coordinator/core.py: Remove duplicate transformation cache (_cached_transformed_data, _last_transformation_config), simplify _transform_data_for_*() to direct delegation - tests/test_tomorrow_data_refresh.py: Add 3 regression tests for cache invalidation (new timestamp, config change behavior, cache preservation) Impact: Lifecycle sensor attributes now update correctly when new API data arrives. Reduced code by ~40 lines in coordinator, consolidated caching to single layer. All 350 tests passing.
2026-03-29 21:03:40 +00:00 · 2025-11-23 13:10:19 +00:00 · 2025-11-23 13:10:19 +00:00 · 9ee7f81164
commit 9ee7f81164
parent cfae3c9387
3 changed files with 306 additions and 123 deletions
--- a/custom_components/tibber_prices/coordinator/core.py
+++ b/custom_components/tibber_prices/coordinator/core.py
@ -20,7 +20,6 @@ if TYPE_CHECKING:

    from .listeners import TimeServiceCallback

-from custom_components.tibber_prices import const as _const
 from custom_components.tibber_prices.api import (
    TibberPricesApiClient,
    TibberPricesApiClientAuthenticationError,
@ -212,6 +211,9 @@ class TibberPricesDataUpdateCoordinator(DataUpdateCoordinator[dict[str, Any]]):
            config_entry=config_entry,
            log_prefix=self._log_prefix,
            perform_turnover_fn=self._perform_midnight_turnover,
+            calculate_periods_fn=lambda price_info: self._period_calculator.calculate_periods_for_price_info(
+                price_info
+            ),
            time=self.time,
        )
        self._period_calculator = TibberPricesPeriodCalculator(
@ -228,9 +230,6 @@ class TibberPricesDataUpdateCoordinator(DataUpdateCoordinator[dict[str, Any]]):
        self._user_update_interval = timedelta(days=1)
        self._cached_price_data: dict[str, Any] | None = None
        self._last_price_update: datetime | None = None
-        self._cached_transformed_data: dict[str, Any] | None = None
-        self._last_transformation_config: dict[str, Any] | None = None
-        self._last_transformation_time: datetime | None = None  # When data was last transformed (for cache)

        # Data lifecycle tracking for diagnostic sensor
        self._lifecycle_state: str = (
@ -766,122 +765,21 @@ class TibberPricesDataUpdateCoordinator(DataUpdateCoordinator[dict[str, Any]]):
        """Calculate periods (best price and peak price) for the given price info."""
        return self._period_calculator.calculate_periods_for_price_info(price_info)

-    def _get_current_transformation_config(self) -> dict[str, Any]:
-        """Get current configuration that affects data transformation."""
-        return {
-            "thresholds": self._get_threshold_percentages(),
-            "volatility_thresholds": {
-                "moderate": self.config_entry.options.get(_const.CONF_VOLATILITY_THRESHOLD_MODERATE, 15.0),
-                "high": self.config_entry.options.get(_const.CONF_VOLATILITY_THRESHOLD_HIGH, 25.0),
-                "very_high": self.config_entry.options.get(_const.CONF_VOLATILITY_THRESHOLD_VERY_HIGH, 40.0),
-            },
-            "best_price_config": {
-                "flex": self.config_entry.options.get(_const.CONF_BEST_PRICE_FLEX, 15.0),
-                "max_level": self.config_entry.options.get(_const.CONF_BEST_PRICE_MAX_LEVEL, "NORMAL"),
-                "min_period_length": self.config_entry.options.get(_const.CONF_BEST_PRICE_MIN_PERIOD_LENGTH, 4),
-                "min_distance_from_avg": self.config_entry.options.get(
-                    _const.CONF_BEST_PRICE_MIN_DISTANCE_FROM_AVG, -5.0
-                ),
-                "max_level_gap_count": self.config_entry.options.get(_const.CONF_BEST_PRICE_MAX_LEVEL_GAP_COUNT, 0),
-                "enable_min_periods": self.config_entry.options.get(_const.CONF_ENABLE_MIN_PERIODS_BEST, False),
-                "min_periods": self.config_entry.options.get(_const.CONF_MIN_PERIODS_BEST, 2),
-                "relaxation_attempts": self.config_entry.options.get(_const.CONF_RELAXATION_ATTEMPTS_BEST, 4),
-            },
-            "peak_price_config": {
-                "flex": self.config_entry.options.get(_const.CONF_PEAK_PRICE_FLEX, 15.0),
-                "min_level": self.config_entry.options.get(_const.CONF_PEAK_PRICE_MIN_LEVEL, "HIGH"),
-                "min_period_length": self.config_entry.options.get(_const.CONF_PEAK_PRICE_MIN_PERIOD_LENGTH, 4),
-                "min_distance_from_avg": self.config_entry.options.get(
-                    _const.CONF_PEAK_PRICE_MIN_DISTANCE_FROM_AVG, 5.0
-                ),
-                "max_level_gap_count": self.config_entry.options.get(_const.CONF_PEAK_PRICE_MAX_LEVEL_GAP_COUNT, 0),
-                "enable_min_periods": self.config_entry.options.get(_const.CONF_ENABLE_MIN_PERIODS_PEAK, False),
-                "min_periods": self.config_entry.options.get(_const.CONF_MIN_PERIODS_PEAK, 2),
-                "relaxation_attempts": self.config_entry.options.get(_const.CONF_RELAXATION_ATTEMPTS_PEAK, 4),
-            },
-        }
-
-    def _should_retransform_data(self, current_time: datetime) -> bool:
-        """Check if data transformation should be performed."""
-        # No cached transformed data - must transform
-        if self._cached_transformed_data is None:
-            return True
-
-        # Configuration changed - must retransform
-        current_config = self._get_current_transformation_config()
-        if current_config != self._last_transformation_config:
-            self._log("debug", "Configuration changed, retransforming data")
-            return True
-
-        # Check for midnight turnover
-        now_local = self.time.as_local(current_time)
-        current_date = now_local.date()
-
-        if self._last_transformation_time is None:
-            return True
-
-        last_transform_local = self.time.as_local(self._last_transformation_time)
-        last_transform_date = last_transform_local.date()
-
-        if current_date != last_transform_date:
-            self._log("debug", "Midnight turnover detected, retransforming data")
-            return True
-
-        return False
-
    def _transform_data_for_main_entry(self, raw_data: dict[str, Any]) -> dict[str, Any]:
        """Transform raw data for main entry (aggregated view of all homes)."""
-        current_time = self.time.now()
-
-        # Return cached transformed data if no retransformation needed
-        if not self._should_retransform_data(current_time) and self._cached_transformed_data is not None:
-            self._log("debug", "Using cached transformed data (no transformation needed)")
-            return self._cached_transformed_data
-
-        self._log("debug", "Transforming price data (enrichment only, periods calculated separately)")
-
-        # Delegate actual transformation to DataTransformer (enrichment only)
-        transformed_data = self._data_transformer.transform_data_for_main_entry(raw_data)
-
-        # Add periods (calculated and cached separately by PeriodCalculator)
-        if "priceInfo" in transformed_data:
-            transformed_data["periods"] = self._calculate_periods_for_price_info(transformed_data["priceInfo"])
-
-        # Cache the transformed data
-        self._cached_transformed_data = transformed_data
-        self._last_transformation_config = self._get_current_transformation_config()
-        self._last_transformation_time = current_time
-
-        return transformed_data
+        # Delegate complete transformation to DataTransformer (enrichment + periods)
+        # DataTransformer handles its own caching internally
+        return self._data_transformer.transform_data_for_main_entry(raw_data)

    def _transform_data_for_subentry(self, main_data: dict[str, Any]) -> dict[str, Any]:
        """Transform main coordinator data for subentry (home-specific view)."""
-        current_time = self.time.now()
-
-        # Return cached transformed data if no retransformation needed
-        if not self._should_retransform_data(current_time) and self._cached_transformed_data is not None:
-            self._log("debug", "Using cached transformed data (no transformation needed)")
-            return self._cached_transformed_data
-
-        self._log("debug", "Transforming price data for home (enrichment only, periods calculated separately)")
-
        home_id = self.config_entry.data.get("home_id")
        if not home_id:
            return main_data

-        # Delegate actual transformation to DataTransformer (enrichment only)
-        transformed_data = self._data_transformer.transform_data_for_subentry(main_data, home_id)
-
-        # Add periods (calculated and cached separately by PeriodCalculator)
-        if "priceInfo" in transformed_data:
-            transformed_data["periods"] = self._calculate_periods_for_price_info(transformed_data["priceInfo"])
-
-        # Cache the transformed data
-        self._cached_transformed_data = transformed_data
-        self._last_transformation_config = self._get_current_transformation_config()
-        self._last_transformation_time = current_time
-
-        return transformed_data
+        # Delegate complete transformation to DataTransformer (enrichment + periods)
+        # DataTransformer handles its own caching internally
+        return self._data_transformer.transform_data_for_subentry(main_data, home_id)

    # --- Methods expected by sensors and services ---

--- a/custom_components/tibber_prices/coordinator/data_transformation.py
+++ b/custom_components/tibber_prices/coordinator/data_transformation.py
@ -27,18 +27,21 @@ class TibberPricesDataTransformer:
        config_entry: ConfigEntry,
        log_prefix: str,
        perform_turnover_fn: Callable[[dict[str, Any]], dict[str, Any]],
+        calculate_periods_fn: Callable[[dict[str, Any]], dict[str, Any]],
        time: TibberPricesTimeService,
    ) -> None:
        """Initialize the data transformer."""
        self.config_entry = config_entry
        self._log_prefix = log_prefix
        self._perform_turnover_fn = perform_turnover_fn
+        self._calculate_periods_fn = calculate_periods_fn
        self.time: TibberPricesTimeService = time

        # Transformation cache
        self._cached_transformed_data: dict[str, Any] | None = None
        self._last_transformation_config: dict[str, Any] | None = None
        self._last_midnight_check: datetime | None = None
+        self._last_source_data_timestamp: datetime | None = None  # Track when source data changed
        self._config_cache: dict[str, Any] | None = None
        self._config_cache_valid = False

@ -110,12 +113,28 @@ class TibberPricesDataTransformer:
        self._config_cache_valid = True
        return config

-    def _should_retransform_data(self, current_time: datetime) -> bool:
-        """Check if data transformation should be performed."""
+    def _should_retransform_data(self, current_time: datetime, source_data_timestamp: datetime | None = None) -> bool:
+        """
+        Check if data transformation should be performed.
+
+        Args:
+            current_time: Current time for midnight check
+            source_data_timestamp: Timestamp of source data (if available)
+
+        Returns:
+            True if retransformation needed, False if cached data can be used
+
+        """
        # No cached transformed data - must transform
        if self._cached_transformed_data is None:
            return True

+        # Source data changed - must retransform
+        # This detects when new API data was fetched (e.g., tomorrow data arrival)
+        if source_data_timestamp is not None and source_data_timestamp != self._last_source_data_timestamp:
+            self._log("debug", "Source data changed, retransforming data")
+            return True
+
        # Configuration changed - must retransform
        current_config = self._get_current_transformation_config()
        if current_config != self._last_transformation_config:
@ -141,13 +160,17 @@ class TibberPricesDataTransformer:
    def transform_data_for_main_entry(self, raw_data: dict[str, Any]) -> dict[str, Any]:
        """Transform raw data for main entry (aggregated view of all homes)."""
        current_time = self.time.now()
+        source_data_timestamp = raw_data.get("timestamp")

        # Return cached transformed data if no retransformation needed
-        if not self._should_retransform_data(current_time) and self._cached_transformed_data is not None:
+        if (
+            not self._should_retransform_data(current_time, source_data_timestamp)
+            and self._cached_transformed_data is not None
+        ):
            self._log("debug", "Using cached transformed data (no transformation needed)")
            return self._cached_transformed_data

-        self._log("debug", "Transforming price data (enrichment only, periods cached separately)")
+        self._log("debug", "Transforming price data (enrichment + period calculation)")

        # For main entry, we can show data from the first home as default
        # or provide an aggregated view
@ -181,32 +204,38 @@ class TibberPricesDataTransformer:
            threshold_high=thresholds["high"],
        )

-        # Note: Periods are calculated and cached separately by PeriodCalculator
-        # to avoid redundant caching (periods were cached twice before)
-
        transformed_data = {
            "timestamp": raw_data.get("timestamp"),
            "homes": homes_data,
            "priceInfo": price_info,
        }

+        # Calculate periods (best price and peak price)
+        if "priceInfo" in transformed_data:
+            transformed_data["periods"] = self._calculate_periods_fn(transformed_data["priceInfo"])
+
        # Cache the transformed data
        self._cached_transformed_data = transformed_data
        self._last_transformation_config = self._get_current_transformation_config()
        self._last_midnight_check = current_time
+        self._last_source_data_timestamp = source_data_timestamp

        return transformed_data

    def transform_data_for_subentry(self, main_data: dict[str, Any], home_id: str) -> dict[str, Any]:
        """Transform main coordinator data for subentry (home-specific view)."""
        current_time = self.time.now()
+        source_data_timestamp = main_data.get("timestamp")

        # Return cached transformed data if no retransformation needed
-        if not self._should_retransform_data(current_time) and self._cached_transformed_data is not None:
+        if (
+            not self._should_retransform_data(current_time, source_data_timestamp)
+            and self._cached_transformed_data is not None
+        ):
            self._log("debug", "Using cached transformed data (no transformation needed)")
            return self._cached_transformed_data

-        self._log("debug", "Transforming price data for home (enrichment only, periods cached separately)")
+        self._log("debug", "Transforming price data for home (enrichment + period calculation)")

        if not home_id:
            return main_data
@ -240,18 +269,20 @@ class TibberPricesDataTransformer:
            threshold_high=thresholds["high"],
        )

-        # Note: Periods are calculated and cached separately by PeriodCalculator
-        # to avoid redundant caching (periods were cached twice before)
-
        transformed_data = {
            "timestamp": main_data.get("timestamp"),
            "priceInfo": price_info,
        }

+        # Calculate periods (best price and peak price)
+        if "priceInfo" in transformed_data:
+            transformed_data["periods"] = self._calculate_periods_fn(transformed_data["priceInfo"])
+
        # Cache the transformed data
        self._cached_transformed_data = transformed_data
        self._last_transformation_config = self._get_current_transformation_config()
        self._last_midnight_check = current_time
+        self._last_source_data_timestamp = source_data_timestamp

        return transformed_data

--- a/tests/test_tomorrow_data_refresh.py
+++ b/tests/test_tomorrow_data_refresh.py
@ -0,0 +1,254 @@
+"""
+Tests for tomorrow data arrival and cache invalidation.
+
+Regression test for the bug where lifecycle sensor attributes (data_completeness,
+tomorrow_available) didn't update after tomorrow data was successfully fetched
+due to cached transformation data.
+"""
+
+from __future__ import annotations
+
+from datetime import datetime, timedelta
+from unittest.mock import Mock
+from zoneinfo import ZoneInfo
+
+import pytest
+
+from custom_components.tibber_prices.coordinator.data_transformation import (
+    TibberPricesDataTransformer,
+)
+from custom_components.tibber_prices.coordinator.time_service import (
+    TibberPricesTimeService,
+)
+
+
+def create_price_intervals(day_offset: int = 0) -> list[dict]:
+    """Create 96 mock price intervals (quarter-hourly for one day)."""
+    base_date = datetime(2025, 11, 22, 0, 0, 0, tzinfo=ZoneInfo("Europe/Oslo"))
+    intervals = []
+    for i in range(96):
+        interval_time = base_date.replace(day=base_date.day + day_offset, hour=i // 4, minute=(i % 4) * 15)
+        intervals.append(
+            {
+                "startsAt": interval_time,
+                "total": 20.0 + (i % 10),
+                "energy": 18.0 + (i % 10),
+                "tax": 2.0,
+                "level": "NORMAL",
+            }
+        )
+    return intervals
+
+
+@pytest.mark.unit
+def test_transformation_cache_invalidation_on_new_timestamp() -> None:
+    """
+    Test that DataTransformer cache is invalidated when source data timestamp changes.
+
+    This is the core regression test for the bug:
+    - Tomorrow data arrives with NEW timestamp
+    - Transformation cache MUST be invalidated
+    - Lifecycle attributes MUST be recalculated with new data
+    """
+    config_entry = Mock()
+    config_entry.entry_id = "test_entry"
+    config_entry.data = {"home_id": "home_123"}
+    config_entry.options = {
+        "price_rating_threshold_low": 75.0,
+        "price_rating_threshold_high": 90.0,
+    }
+
+    time_service = TibberPricesTimeService()
+    current_time = datetime(2025, 11, 22, 13, 15, 0, tzinfo=ZoneInfo("Europe/Oslo"))
+
+    # Mock period calculator
+    mock_period_calc = Mock()
+    mock_period_calc.calculate_periods_for_price_info.return_value = {
+        "best_price": [],
+        "peak_price": [],
+    }
+
+    # Create transformer
+    transformer = TibberPricesDataTransformer(
+        config_entry=config_entry,
+        log_prefix="[Test]",
+        perform_turnover_fn=lambda x: x,  # No-op
+        calculate_periods_fn=mock_period_calc.calculate_periods_for_price_info,
+        time=time_service,
+    )
+
+    # STEP 1: First transformation with only today data (timestamp T1)
+    # ================================================================
+    data_t1 = {
+        "timestamp": current_time,
+        "homes": {
+            "home_123": {
+                "price_info": {
+                    "yesterday": [],
+                    "today": create_price_intervals(0),
+                    "tomorrow": [],  # NO TOMORROW YET
+                    "currency": "EUR",
+                }
+            }
+        },
+    }
+
+    result_t1 = transformer.transform_data_for_main_entry(data_t1)
+    assert result_t1 is not None
+    assert result_t1["priceInfo"]["tomorrow"] == []
+
+    # STEP 2: Second call with SAME timestamp should use cache
+    # =========================================================
+    result_t1_cached = transformer.transform_data_for_main_entry(data_t1)
+    assert result_t1_cached is result_t1  # SAME object (cached)
+
+    # STEP 3: Third call with DIFFERENT timestamp should NOT use cache
+    # =================================================================
+    new_time = current_time + timedelta(minutes=1)
+    data_t2 = {
+        "timestamp": new_time,  # DIFFERENT timestamp
+        "homes": {
+            "home_123": {
+                "price_info": {
+                    "yesterday": [],
+                    "today": create_price_intervals(0),
+                    "tomorrow": create_price_intervals(1),  # NOW HAS TOMORROW
+                    "currency": "EUR",
+                }
+            }
+        },
+    }
+
+    result_t2 = transformer.transform_data_for_main_entry(data_t2)
+
+    # CRITICAL ASSERTIONS: Cache must be invalidated
+    assert result_t2 is not result_t1  # DIFFERENT object (re-transformed)
+    assert len(result_t2["priceInfo"]["tomorrow"]) == 96  # New data present
+    assert "periods" in result_t2  # Periods recalculated
+
+
+@pytest.mark.unit
+def test_cache_behavior_on_config_change() -> None:
+    """
+    Document current cache behavior when config changes.
+
+    NOTE: Currently, config changes with same timestamp DO NOT invalidate cache.
+    This is acceptable because:
+    1. Config changes trigger full coordinator reload (new instance)
+    2. The critical bug was about NEW API DATA not updating (timestamp change)
+    3. Options changes are handled at coordinator level via invalidate_config_cache()
+    """
+    config_entry = Mock()
+    config_entry.entry_id = "test_entry"
+    config_entry.data = {"home_id": "home_123"}
+    config_entry.options = {
+        "price_rating_threshold_low": 75.0,
+        "price_rating_threshold_high": 90.0,
+    }
+
+    time_service = TibberPricesTimeService()
+    current_time = datetime(2025, 11, 22, 13, 15, 0, tzinfo=ZoneInfo("Europe/Oslo"))
+
+    mock_period_calc = Mock()
+    mock_period_calc.calculate_periods_for_price_info.return_value = {
+        "best_price": [],
+        "peak_price": [],
+    }
+
+    transformer = TibberPricesDataTransformer(
+        config_entry=config_entry,
+        log_prefix="[Test]",
+        perform_turnover_fn=lambda x: x,
+        calculate_periods_fn=mock_period_calc.calculate_periods_for_price_info,
+        time=time_service,
+    )
+
+    data = {
+        "timestamp": current_time,
+        "homes": {
+            "home_123": {
+                "price_info": {
+                    "yesterday": [],
+                    "today": create_price_intervals(0),
+                    "tomorrow": create_price_intervals(1),
+                    "currency": "EUR",
+                }
+            }
+        },
+    }
+
+    # First transformation
+    result_1 = transformer.transform_data_for_main_entry(data)
+    assert result_1 is not None
+
+    # Second call with SAME config and timestamp should use cache
+    result_1_cached = transformer.transform_data_for_main_entry(data)
+    assert result_1_cached is result_1  # SAME object
+
+    # Change config (note: in real system, config change triggers coordinator reload)
+    config_entry.options = {
+        "price_rating_threshold_low": 80.0,  # Changed
+        "price_rating_threshold_high": 95.0,  # Changed
+    }
+
+    # Call with SAME timestamp but DIFFERENT config
+    # Current behavior: Still uses cache (acceptable, see docstring)
+    result_2 = transformer.transform_data_for_main_entry(data)
+    assert result_2 is result_1  # SAME object (cache preserved)
+
+
+@pytest.mark.unit
+def test_cache_preserved_when_neither_timestamp_nor_config_changed() -> None:
+    """
+    Test that cache is PRESERVED when both timestamp and config stay the same.
+
+    This ensures we're not invalidating cache unnecessarily.
+    """
+    config_entry = Mock()
+    config_entry.entry_id = "test_entry"
+    config_entry.data = {"home_id": "home_123"}
+    config_entry.options = {
+        "price_rating_threshold_low": 75.0,
+        "price_rating_threshold_high": 90.0,
+    }
+
+    time_service = TibberPricesTimeService()
+    current_time = datetime(2025, 11, 22, 13, 15, 0, tzinfo=ZoneInfo("Europe/Oslo"))
+
+    mock_period_calc = Mock()
+    mock_period_calc.calculate_periods_for_price_info.return_value = {
+        "best_price": [],
+        "peak_price": [],
+    }
+
+    transformer = TibberPricesDataTransformer(
+        config_entry=config_entry,
+        log_prefix="[Test]",
+        perform_turnover_fn=lambda x: x,
+        calculate_periods_fn=mock_period_calc.calculate_periods_for_price_info,
+        time=time_service,
+    )
+
+    data = {
+        "timestamp": current_time,
+        "homes": {
+            "home_123": {
+                "price_info": {
+                    "yesterday": [],
+                    "today": create_price_intervals(0),
+                    "tomorrow": create_price_intervals(1),
+                    "currency": "EUR",
+                }
+            }
+        },
+    }
+
+    # Multiple calls with unchanged data/config should all use cache
+    result_1 = transformer.transform_data_for_main_entry(data)
+    result_2 = transformer.transform_data_for_main_entry(data)
+    result_3 = transformer.transform_data_for_main_entry(data)
+
+    assert result_1 is result_2 is result_3  # ALL same object (cached)
+
+    # Verify period calculation was only called ONCE (during first transform)
+    assert mock_period_calc.calculate_periods_for_price_info.call_count == 1