Skip to content

Commit

Permalink
cache_df is not just for read_excel Klimatbyran#682
Browse files Browse the repository at this point in the history
Do not mention excel in cache_df.

Also clarify why column names are cached separately.
  • Loading branch information
joakimbits committed Sep 22, 2024
1 parent e1b86bc commit 7516b67
Showing 1 changed file with 6 additions and 6 deletions.
12 changes: 6 additions & 6 deletions data/issues/emissions/cache_utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,9 +76,9 @@ def caching_f(*args, **kwargs):
raise ValueError("Path parameter is required either as a decorator argument or function argument.")

# Create a hash of the path for the cache file
excel_hash = hashlib.md5(input_path.encode()).hexdigest()
df_file = f'cache_df_{f.__name__}_{excel_hash}.feather'
columns_file = f'cache_df_{f.__name__}_{excel_hash}.pkl'
path_hash = hashlib.md5(input_path.encode()).hexdigest()
df_file = f'cache_df_{f.__name__}_{path_hash}.feather'
columns_file = f'cache_df_{f.__name__}_{path_hash}.pkl'

# Check if cached file and columns file exist and is in the same period as now
if os.path.exists(df_file):
Expand All @@ -95,10 +95,10 @@ def caching_f(*args, **kwargs):

# Process and cache the data
df = f(*args, **kwargs)
# Save the original column names separately
original_columns = df.columns
feather.write_feather(df, df_file)
pd.to_pickle(original_columns, columns_file)

# Save the original column names separately since feather does not support different heading types
pd.to_pickle(df.columns, columns_file)

return df

Expand Down

0 comments on commit 7516b67

Please sign in to comment.