fix(coordinator): invalidate transformation cache when source data changes

Fixes bug where lifecycle sensor attributes (data_completeness, tomorrow_available)
didn't update after tomorrow data was successfully fetched from API.

Root cause: DataTransformer had cached transformation data but no mechanism to detect
when source API data changed (only checked config and midnight turnover).

Changes:
- coordinator/data_transformation.py: Track source_data_timestamp and invalidate cache
  when timestamp changes (detects new API data arrival)
- coordinator/data_transformation.py: Integrate period calculation into DataTransformer
  (calculate_periods_fn parameter) for complete single-layer caching
- coordinator/core.py: Remove duplicate transformation cache (_cached_transformed_data,
  _last_transformation_config), simplify _transform_data_for_*() to direct delegation
- tests/test_tomorrow_data_refresh.py: Add 3 regression tests for cache invalidation
  (new timestamp, config change behavior, cache preservation)

Impact: Lifecycle sensor attributes now update correctly when new API data arrives.
Reduced code by ~40 lines in coordinator, consolidated caching to single layer.
All 350 tests passing.
This commit is contained in:
Julian Pawlowski 2025-11-23 13:10:19 +00:00
parent cfae3c9387
commit 9ee7f81164
3 changed files with 306 additions and 123 deletions

View file

@ -20,7 +20,6 @@ if TYPE_CHECKING:
from .listeners import TimeServiceCallback
from custom_components.tibber_prices import const as _const
from custom_components.tibber_prices.api import (
TibberPricesApiClient,
TibberPricesApiClientAuthenticationError,
@ -212,6 +211,9 @@ class TibberPricesDataUpdateCoordinator(DataUpdateCoordinator[dict[str, Any]]):
config_entry=config_entry,
log_prefix=self._log_prefix,
perform_turnover_fn=self._perform_midnight_turnover,
calculate_periods_fn=lambda price_info: self._period_calculator.calculate_periods_for_price_info(
price_info
),
time=self.time,
)
self._period_calculator = TibberPricesPeriodCalculator(
@ -228,9 +230,6 @@ class TibberPricesDataUpdateCoordinator(DataUpdateCoordinator[dict[str, Any]]):
self._user_update_interval = timedelta(days=1)
self._cached_price_data: dict[str, Any] | None = None
self._last_price_update: datetime | None = None
self._cached_transformed_data: dict[str, Any] | None = None
self._last_transformation_config: dict[str, Any] | None = None
self._last_transformation_time: datetime | None = None # When data was last transformed (for cache)
# Data lifecycle tracking for diagnostic sensor
self._lifecycle_state: str = (
@ -766,122 +765,21 @@ class TibberPricesDataUpdateCoordinator(DataUpdateCoordinator[dict[str, Any]]):
"""Calculate periods (best price and peak price) for the given price info."""
return self._period_calculator.calculate_periods_for_price_info(price_info)
def _get_current_transformation_config(self) -> dict[str, Any]:
"""Get current configuration that affects data transformation."""
return {
"thresholds": self._get_threshold_percentages(),
"volatility_thresholds": {
"moderate": self.config_entry.options.get(_const.CONF_VOLATILITY_THRESHOLD_MODERATE, 15.0),
"high": self.config_entry.options.get(_const.CONF_VOLATILITY_THRESHOLD_HIGH, 25.0),
"very_high": self.config_entry.options.get(_const.CONF_VOLATILITY_THRESHOLD_VERY_HIGH, 40.0),
},
"best_price_config": {
"flex": self.config_entry.options.get(_const.CONF_BEST_PRICE_FLEX, 15.0),
"max_level": self.config_entry.options.get(_const.CONF_BEST_PRICE_MAX_LEVEL, "NORMAL"),
"min_period_length": self.config_entry.options.get(_const.CONF_BEST_PRICE_MIN_PERIOD_LENGTH, 4),
"min_distance_from_avg": self.config_entry.options.get(
_const.CONF_BEST_PRICE_MIN_DISTANCE_FROM_AVG, -5.0
),
"max_level_gap_count": self.config_entry.options.get(_const.CONF_BEST_PRICE_MAX_LEVEL_GAP_COUNT, 0),
"enable_min_periods": self.config_entry.options.get(_const.CONF_ENABLE_MIN_PERIODS_BEST, False),
"min_periods": self.config_entry.options.get(_const.CONF_MIN_PERIODS_BEST, 2),
"relaxation_attempts": self.config_entry.options.get(_const.CONF_RELAXATION_ATTEMPTS_BEST, 4),
},
"peak_price_config": {
"flex": self.config_entry.options.get(_const.CONF_PEAK_PRICE_FLEX, 15.0),
"min_level": self.config_entry.options.get(_const.CONF_PEAK_PRICE_MIN_LEVEL, "HIGH"),
"min_period_length": self.config_entry.options.get(_const.CONF_PEAK_PRICE_MIN_PERIOD_LENGTH, 4),
"min_distance_from_avg": self.config_entry.options.get(
_const.CONF_PEAK_PRICE_MIN_DISTANCE_FROM_AVG, 5.0
),
"max_level_gap_count": self.config_entry.options.get(_const.CONF_PEAK_PRICE_MAX_LEVEL_GAP_COUNT, 0),
"enable_min_periods": self.config_entry.options.get(_const.CONF_ENABLE_MIN_PERIODS_PEAK, False),
"min_periods": self.config_entry.options.get(_const.CONF_MIN_PERIODS_PEAK, 2),
"relaxation_attempts": self.config_entry.options.get(_const.CONF_RELAXATION_ATTEMPTS_PEAK, 4),
},
}
def _should_retransform_data(self, current_time: datetime) -> bool:
"""Check if data transformation should be performed."""
# No cached transformed data - must transform
if self._cached_transformed_data is None:
return True
# Configuration changed - must retransform
current_config = self._get_current_transformation_config()
if current_config != self._last_transformation_config:
self._log("debug", "Configuration changed, retransforming data")
return True
# Check for midnight turnover
now_local = self.time.as_local(current_time)
current_date = now_local.date()
if self._last_transformation_time is None:
return True
last_transform_local = self.time.as_local(self._last_transformation_time)
last_transform_date = last_transform_local.date()
if current_date != last_transform_date:
self._log("debug", "Midnight turnover detected, retransforming data")
return True
return False
def _transform_data_for_main_entry(self, raw_data: dict[str, Any]) -> dict[str, Any]:
"""Transform raw data for main entry (aggregated view of all homes)."""
current_time = self.time.now()
# Return cached transformed data if no retransformation needed
if not self._should_retransform_data(current_time) and self._cached_transformed_data is not None:
self._log("debug", "Using cached transformed data (no transformation needed)")
return self._cached_transformed_data
self._log("debug", "Transforming price data (enrichment only, periods calculated separately)")
# Delegate actual transformation to DataTransformer (enrichment only)
transformed_data = self._data_transformer.transform_data_for_main_entry(raw_data)
# Add periods (calculated and cached separately by PeriodCalculator)
if "priceInfo" in transformed_data:
transformed_data["periods"] = self._calculate_periods_for_price_info(transformed_data["priceInfo"])
# Cache the transformed data
self._cached_transformed_data = transformed_data
self._last_transformation_config = self._get_current_transformation_config()
self._last_transformation_time = current_time
return transformed_data
# Delegate complete transformation to DataTransformer (enrichment + periods)
# DataTransformer handles its own caching internally
return self._data_transformer.transform_data_for_main_entry(raw_data)
def _transform_data_for_subentry(self, main_data: dict[str, Any]) -> dict[str, Any]:
"""Transform main coordinator data for subentry (home-specific view)."""
current_time = self.time.now()
# Return cached transformed data if no retransformation needed
if not self._should_retransform_data(current_time) and self._cached_transformed_data is not None:
self._log("debug", "Using cached transformed data (no transformation needed)")
return self._cached_transformed_data
self._log("debug", "Transforming price data for home (enrichment only, periods calculated separately)")
home_id = self.config_entry.data.get("home_id")
if not home_id:
return main_data
# Delegate actual transformation to DataTransformer (enrichment only)
transformed_data = self._data_transformer.transform_data_for_subentry(main_data, home_id)
# Add periods (calculated and cached separately by PeriodCalculator)
if "priceInfo" in transformed_data:
transformed_data["periods"] = self._calculate_periods_for_price_info(transformed_data["priceInfo"])
# Cache the transformed data
self._cached_transformed_data = transformed_data
self._last_transformation_config = self._get_current_transformation_config()
self._last_transformation_time = current_time
return transformed_data
# Delegate complete transformation to DataTransformer (enrichment + periods)
# DataTransformer handles its own caching internally
return self._data_transformer.transform_data_for_subentry(main_data, home_id)
# --- Methods expected by sensors and services ---

View file

@ -27,18 +27,21 @@ class TibberPricesDataTransformer:
config_entry: ConfigEntry,
log_prefix: str,
perform_turnover_fn: Callable[[dict[str, Any]], dict[str, Any]],
calculate_periods_fn: Callable[[dict[str, Any]], dict[str, Any]],
time: TibberPricesTimeService,
) -> None:
"""Initialize the data transformer."""
self.config_entry = config_entry
self._log_prefix = log_prefix
self._perform_turnover_fn = perform_turnover_fn
self._calculate_periods_fn = calculate_periods_fn
self.time: TibberPricesTimeService = time
# Transformation cache
self._cached_transformed_data: dict[str, Any] | None = None
self._last_transformation_config: dict[str, Any] | None = None
self._last_midnight_check: datetime | None = None
self._last_source_data_timestamp: datetime | None = None # Track when source data changed
self._config_cache: dict[str, Any] | None = None
self._config_cache_valid = False
@ -110,12 +113,28 @@ class TibberPricesDataTransformer:
self._config_cache_valid = True
return config
def _should_retransform_data(self, current_time: datetime) -> bool:
"""Check if data transformation should be performed."""
def _should_retransform_data(self, current_time: datetime, source_data_timestamp: datetime | None = None) -> bool:
"""
Check if data transformation should be performed.
Args:
current_time: Current time for midnight check
source_data_timestamp: Timestamp of source data (if available)
Returns:
True if retransformation needed, False if cached data can be used
"""
# No cached transformed data - must transform
if self._cached_transformed_data is None:
return True
# Source data changed - must retransform
# This detects when new API data was fetched (e.g., tomorrow data arrival)
if source_data_timestamp is not None and source_data_timestamp != self._last_source_data_timestamp:
self._log("debug", "Source data changed, retransforming data")
return True
# Configuration changed - must retransform
current_config = self._get_current_transformation_config()
if current_config != self._last_transformation_config:
@ -141,13 +160,17 @@ class TibberPricesDataTransformer:
def transform_data_for_main_entry(self, raw_data: dict[str, Any]) -> dict[str, Any]:
"""Transform raw data for main entry (aggregated view of all homes)."""
current_time = self.time.now()
source_data_timestamp = raw_data.get("timestamp")
# Return cached transformed data if no retransformation needed
if not self._should_retransform_data(current_time) and self._cached_transformed_data is not None:
if (
not self._should_retransform_data(current_time, source_data_timestamp)
and self._cached_transformed_data is not None
):
self._log("debug", "Using cached transformed data (no transformation needed)")
return self._cached_transformed_data
self._log("debug", "Transforming price data (enrichment only, periods cached separately)")
self._log("debug", "Transforming price data (enrichment + period calculation)")
# For main entry, we can show data from the first home as default
# or provide an aggregated view
@ -181,32 +204,38 @@ class TibberPricesDataTransformer:
threshold_high=thresholds["high"],
)
# Note: Periods are calculated and cached separately by PeriodCalculator
# to avoid redundant caching (periods were cached twice before)
transformed_data = {
"timestamp": raw_data.get("timestamp"),
"homes": homes_data,
"priceInfo": price_info,
}
# Calculate periods (best price and peak price)
if "priceInfo" in transformed_data:
transformed_data["periods"] = self._calculate_periods_fn(transformed_data["priceInfo"])
# Cache the transformed data
self._cached_transformed_data = transformed_data
self._last_transformation_config = self._get_current_transformation_config()
self._last_midnight_check = current_time
self._last_source_data_timestamp = source_data_timestamp
return transformed_data
def transform_data_for_subentry(self, main_data: dict[str, Any], home_id: str) -> dict[str, Any]:
"""Transform main coordinator data for subentry (home-specific view)."""
current_time = self.time.now()
source_data_timestamp = main_data.get("timestamp")
# Return cached transformed data if no retransformation needed
if not self._should_retransform_data(current_time) and self._cached_transformed_data is not None:
if (
not self._should_retransform_data(current_time, source_data_timestamp)
and self._cached_transformed_data is not None
):
self._log("debug", "Using cached transformed data (no transformation needed)")
return self._cached_transformed_data
self._log("debug", "Transforming price data for home (enrichment only, periods cached separately)")
self._log("debug", "Transforming price data for home (enrichment + period calculation)")
if not home_id:
return main_data
@ -240,18 +269,20 @@ class TibberPricesDataTransformer:
threshold_high=thresholds["high"],
)
# Note: Periods are calculated and cached separately by PeriodCalculator
# to avoid redundant caching (periods were cached twice before)
transformed_data = {
"timestamp": main_data.get("timestamp"),
"priceInfo": price_info,
}
# Calculate periods (best price and peak price)
if "priceInfo" in transformed_data:
transformed_data["periods"] = self._calculate_periods_fn(transformed_data["priceInfo"])
# Cache the transformed data
self._cached_transformed_data = transformed_data
self._last_transformation_config = self._get_current_transformation_config()
self._last_midnight_check = current_time
self._last_source_data_timestamp = source_data_timestamp
return transformed_data

View file

@ -0,0 +1,254 @@
"""
Tests for tomorrow data arrival and cache invalidation.
Regression test for the bug where lifecycle sensor attributes (data_completeness,
tomorrow_available) didn't update after tomorrow data was successfully fetched
due to cached transformation data.
"""
from __future__ import annotations
from datetime import datetime, timedelta
from unittest.mock import Mock
from zoneinfo import ZoneInfo
import pytest
from custom_components.tibber_prices.coordinator.data_transformation import (
TibberPricesDataTransformer,
)
from custom_components.tibber_prices.coordinator.time_service import (
TibberPricesTimeService,
)
def create_price_intervals(day_offset: int = 0) -> list[dict]:
"""Create 96 mock price intervals (quarter-hourly for one day)."""
base_date = datetime(2025, 11, 22, 0, 0, 0, tzinfo=ZoneInfo("Europe/Oslo"))
intervals = []
for i in range(96):
interval_time = base_date.replace(day=base_date.day + day_offset, hour=i // 4, minute=(i % 4) * 15)
intervals.append(
{
"startsAt": interval_time,
"total": 20.0 + (i % 10),
"energy": 18.0 + (i % 10),
"tax": 2.0,
"level": "NORMAL",
}
)
return intervals
@pytest.mark.unit
def test_transformation_cache_invalidation_on_new_timestamp() -> None:
"""
Test that DataTransformer cache is invalidated when source data timestamp changes.
This is the core regression test for the bug:
- Tomorrow data arrives with NEW timestamp
- Transformation cache MUST be invalidated
- Lifecycle attributes MUST be recalculated with new data
"""
config_entry = Mock()
config_entry.entry_id = "test_entry"
config_entry.data = {"home_id": "home_123"}
config_entry.options = {
"price_rating_threshold_low": 75.0,
"price_rating_threshold_high": 90.0,
}
time_service = TibberPricesTimeService()
current_time = datetime(2025, 11, 22, 13, 15, 0, tzinfo=ZoneInfo("Europe/Oslo"))
# Mock period calculator
mock_period_calc = Mock()
mock_period_calc.calculate_periods_for_price_info.return_value = {
"best_price": [],
"peak_price": [],
}
# Create transformer
transformer = TibberPricesDataTransformer(
config_entry=config_entry,
log_prefix="[Test]",
perform_turnover_fn=lambda x: x, # No-op
calculate_periods_fn=mock_period_calc.calculate_periods_for_price_info,
time=time_service,
)
# STEP 1: First transformation with only today data (timestamp T1)
# ================================================================
data_t1 = {
"timestamp": current_time,
"homes": {
"home_123": {
"price_info": {
"yesterday": [],
"today": create_price_intervals(0),
"tomorrow": [], # NO TOMORROW YET
"currency": "EUR",
}
}
},
}
result_t1 = transformer.transform_data_for_main_entry(data_t1)
assert result_t1 is not None
assert result_t1["priceInfo"]["tomorrow"] == []
# STEP 2: Second call with SAME timestamp should use cache
# =========================================================
result_t1_cached = transformer.transform_data_for_main_entry(data_t1)
assert result_t1_cached is result_t1 # SAME object (cached)
# STEP 3: Third call with DIFFERENT timestamp should NOT use cache
# =================================================================
new_time = current_time + timedelta(minutes=1)
data_t2 = {
"timestamp": new_time, # DIFFERENT timestamp
"homes": {
"home_123": {
"price_info": {
"yesterday": [],
"today": create_price_intervals(0),
"tomorrow": create_price_intervals(1), # NOW HAS TOMORROW
"currency": "EUR",
}
}
},
}
result_t2 = transformer.transform_data_for_main_entry(data_t2)
# CRITICAL ASSERTIONS: Cache must be invalidated
assert result_t2 is not result_t1 # DIFFERENT object (re-transformed)
assert len(result_t2["priceInfo"]["tomorrow"]) == 96 # New data present
assert "periods" in result_t2 # Periods recalculated
@pytest.mark.unit
def test_cache_behavior_on_config_change() -> None:
"""
Document current cache behavior when config changes.
NOTE: Currently, config changes with same timestamp DO NOT invalidate cache.
This is acceptable because:
1. Config changes trigger full coordinator reload (new instance)
2. The critical bug was about NEW API DATA not updating (timestamp change)
3. Options changes are handled at coordinator level via invalidate_config_cache()
"""
config_entry = Mock()
config_entry.entry_id = "test_entry"
config_entry.data = {"home_id": "home_123"}
config_entry.options = {
"price_rating_threshold_low": 75.0,
"price_rating_threshold_high": 90.0,
}
time_service = TibberPricesTimeService()
current_time = datetime(2025, 11, 22, 13, 15, 0, tzinfo=ZoneInfo("Europe/Oslo"))
mock_period_calc = Mock()
mock_period_calc.calculate_periods_for_price_info.return_value = {
"best_price": [],
"peak_price": [],
}
transformer = TibberPricesDataTransformer(
config_entry=config_entry,
log_prefix="[Test]",
perform_turnover_fn=lambda x: x,
calculate_periods_fn=mock_period_calc.calculate_periods_for_price_info,
time=time_service,
)
data = {
"timestamp": current_time,
"homes": {
"home_123": {
"price_info": {
"yesterday": [],
"today": create_price_intervals(0),
"tomorrow": create_price_intervals(1),
"currency": "EUR",
}
}
},
}
# First transformation
result_1 = transformer.transform_data_for_main_entry(data)
assert result_1 is not None
# Second call with SAME config and timestamp should use cache
result_1_cached = transformer.transform_data_for_main_entry(data)
assert result_1_cached is result_1 # SAME object
# Change config (note: in real system, config change triggers coordinator reload)
config_entry.options = {
"price_rating_threshold_low": 80.0, # Changed
"price_rating_threshold_high": 95.0, # Changed
}
# Call with SAME timestamp but DIFFERENT config
# Current behavior: Still uses cache (acceptable, see docstring)
result_2 = transformer.transform_data_for_main_entry(data)
assert result_2 is result_1 # SAME object (cache preserved)
@pytest.mark.unit
def test_cache_preserved_when_neither_timestamp_nor_config_changed() -> None:
"""
Test that cache is PRESERVED when both timestamp and config stay the same.
This ensures we're not invalidating cache unnecessarily.
"""
config_entry = Mock()
config_entry.entry_id = "test_entry"
config_entry.data = {"home_id": "home_123"}
config_entry.options = {
"price_rating_threshold_low": 75.0,
"price_rating_threshold_high": 90.0,
}
time_service = TibberPricesTimeService()
current_time = datetime(2025, 11, 22, 13, 15, 0, tzinfo=ZoneInfo("Europe/Oslo"))
mock_period_calc = Mock()
mock_period_calc.calculate_periods_for_price_info.return_value = {
"best_price": [],
"peak_price": [],
}
transformer = TibberPricesDataTransformer(
config_entry=config_entry,
log_prefix="[Test]",
perform_turnover_fn=lambda x: x,
calculate_periods_fn=mock_period_calc.calculate_periods_for_price_info,
time=time_service,
)
data = {
"timestamp": current_time,
"homes": {
"home_123": {
"price_info": {
"yesterday": [],
"today": create_price_intervals(0),
"tomorrow": create_price_intervals(1),
"currency": "EUR",
}
}
},
}
# Multiple calls with unchanged data/config should all use cache
result_1 = transformer.transform_data_for_main_entry(data)
result_2 = transformer.transform_data_for_main_entry(data)
result_3 = transformer.transform_data_for_main_entry(data)
assert result_1 is result_2 is result_3 # ALL same object (cached)
# Verify period calculation was only called ONCE (during first transform)
assert mock_period_calc.calculate_periods_for_price_info.call_count == 1