Files
bussines_case_automation/update_excel_xlsxwriter.py
2025-10-01 06:37:47 +00:00

488 lines
25 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python3
import json
import os
import re
import openpyxl
CURRENCY_LOCALE_MAP = {
'$': '-409', # English (US)
'': '-2', # English (Euro)
}
CURRENCY_BRACKET_PATTERN = re.compile(r'\[\$([^\]-]*)(-[^\]]*)?\]')
def apply_currency_symbol(workbook, symbol):
"""Propagate the selected currency symbol across formats, labels, and charts."""
target_symbol = (symbol or '$').strip() or '$'
def replace_currency_token(fmt):
if not isinstance(fmt, str) or ('$' not in fmt and '' not in fmt):
return fmt
def _replace_match(match):
locale_hint = match.group(2) or ''
if target_symbol in CURRENCY_LOCALE_MAP:
locale_hint = f"-{CURRENCY_LOCALE_MAP[target_symbol].lstrip('-')}" if match.group(2) or locale_hint else ''
return f'[${target_symbol}{locale_hint}]'
updated = CURRENCY_BRACKET_PATTERN.sub(_replace_match, fmt)
updated = updated.replace('"$"', f'"{target_symbol}"')
return updated
for worksheet in workbook.worksheets:
for row in worksheet.iter_rows():
for cell in row:
fmt = cell.number_format
updated_format = replace_currency_token(fmt)
if updated_format != fmt:
cell.number_format = updated_format
value = cell.value
if isinstance(value, str) and not value.startswith('=') and ('' in value or '$' in value):
new_value = value.replace('', target_symbol).replace('$', target_symbol)
if new_value != value:
cell.value = new_value
for chart in getattr(worksheet, '_charts', []):
axes = [getattr(chart, 'y_axis', None), getattr(chart, 'secondary_y_axis', None)]
for axis in axes:
if not axis or not getattr(axis, 'number_format', None):
continue
fmt_obj = axis.number_format
format_code = getattr(fmt_obj, 'formatCode', None)
if not isinstance(format_code, str):
continue
updated_code = replace_currency_token(format_code)
if updated_code != format_code:
fmt_obj.formatCode = updated_code
def update_excel_variables(excel_path):
"""
Update the Variables sheet in the Excel file with values from config.json
and hide forecast sheets that aren't in the calculated years array.
This version uses openpyxl exclusively to preserve all formatting, formulas,
and Excel features that xlsxwriter cannot handle when modifying existing files.
While this is named "xlsxwriter", it actually uses openpyxl for the best
approach to modify existing Excel files while preserving all features.
Args:
excel_path (str): Path to the Excel file to update
Returns:
bool: True if successful, False otherwise
"""
# Define paths
script_dir = os.path.dirname(os.path.abspath(__file__))
config_path = os.path.join(script_dir, 'config.json')
try:
# Load config.json
with open(config_path, 'r') as f:
config = json.load(f)
user_data = config.get('user_data', {})
# Load Excel workbook
print(f"Opening Excel file: {excel_path}")
wb = openpyxl.load_workbook(excel_path)
# Break any external links to prevent unsafe external sources error
print("Breaking any external links...")
try:
# Clear external links if they exist
if hasattr(wb, '_external_links'):
wb._external_links.clear()
if hasattr(wb, 'external_links'):
wb.external_links.clear()
# Remove any defined names that might contain external references
names_to_remove = []
for name in wb.defined_names:
if name.value and ('[' in str(name.value) or 'store_name' in str(name.value)):
names_to_remove.append(name.name)
print(f"Removing potentially problematic defined name: {name.name}")
for name_to_remove in names_to_remove:
del wb.defined_names[name_to_remove]
# Set calculation mode to manual to prevent external link issues
if hasattr(wb, 'calculation') and hasattr(wb.calculation, 'calcMode'):
wb.calculation.calcMode = 'manual'
print("Set calculation mode to manual")
print("External links handling completed")
except Exception as e:
print(f"Warning during external links handling: {e}")
# Try to access the Variables sheet
try:
# First try by name
sheet = wb['Variables']
except KeyError:
# If not found by name, try to access the last sheet
sheet_names = wb.sheetnames
if sheet_names:
print(f"Variables sheet not found by name. Using last sheet: {sheet_names[-1]}")
sheet = wb[sheet_names[-1]]
else:
print("No sheets found in the workbook")
return False
# Map config variables to Excel cells based on the provided mapping
cell_mappings = {
'B2': user_data.get('store_name', ''),
'B31': user_data.get('starting_date', ''),
'B32': user_data.get('duration', 36),
'B37': user_data.get('currency_symbol', ''),
# Convenience store type
'H37': user_data.get('convenience_store_type', {}).get('stores_number', 0),
'C37': user_data.get('convenience_store_type', {}).get('monthly_transactions', 0),
# Convert boolean to 1/0 for has_digital_screens
'I37': 1 if user_data.get('convenience_store_type', {}).get('has_digital_screens', False) else 0,
'J37': user_data.get('convenience_store_type', {}).get('screen_count', 0),
'K37': user_data.get('convenience_store_type', {}).get('screen_percentage', 0),
# Convert boolean to 1/0 for has_in_store_radio
'M37': 1 if user_data.get('convenience_store_type', {}).get('has_in_store_radio', False) else 0,
'N37': user_data.get('convenience_store_type', {}).get('radio_percentage', 0),
# Minimarket store type
'H38': user_data.get('minimarket_store_type', {}).get('stores_number', 0),
'C38': user_data.get('minimarket_store_type', {}).get('monthly_transactions', 0),
# Convert boolean to 1/0 for has_digital_screens
'I38': 1 if user_data.get('minimarket_store_type', {}).get('has_digital_screens', False) else 0,
'J38': user_data.get('minimarket_store_type', {}).get('screen_count', 0),
'K38': user_data.get('minimarket_store_type', {}).get('screen_percentage', 0),
# Convert boolean to 1/0 for has_in_store_radio
'M38': 1 if user_data.get('minimarket_store_type', {}).get('has_in_store_radio', False) else 0,
'N38': user_data.get('minimarket_store_type', {}).get('radio_percentage', 0),
# Supermarket store type
'H39': user_data.get('supermarket_store_type', {}).get('stores_number', 0),
'C39': user_data.get('supermarket_store_type', {}).get('monthly_transactions', 0),
# Convert boolean to 1/0 for has_digital_screens
'I39': 1 if user_data.get('supermarket_store_type', {}).get('has_digital_screens', False) else 0,
'J39': user_data.get('supermarket_store_type', {}).get('screen_count', 0),
'K39': user_data.get('supermarket_store_type', {}).get('screen_percentage', 0),
# Convert boolean to 1/0 for has_in_store_radio
'M39': 1 if user_data.get('supermarket_store_type', {}).get('has_in_store_radio', False) else 0,
'N39': user_data.get('supermarket_store_type', {}).get('radio_percentage', 0),
# Hypermarket store type
'H40': user_data.get('hypermarket_store_type', {}).get('stores_number', 0),
'C40': user_data.get('hypermarket_store_type', {}).get('monthly_transactions', 0),
# Convert boolean to 1/0 for has_digital_screens
'I40': 1 if user_data.get('hypermarket_store_type', {}).get('has_digital_screens', False) else 0,
'J40': user_data.get('hypermarket_store_type', {}).get('screen_count', 0),
'K40': user_data.get('hypermarket_store_type', {}).get('screen_percentage', 0),
# Convert boolean to 1/0 for has_in_store_radio
'M40': 1 if user_data.get('hypermarket_store_type', {}).get('has_in_store_radio', False) else 0,
'N40': user_data.get('hypermarket_store_type', {}).get('radio_percentage', 0),
# On-site channels
'B43': user_data.get('website_visitors', 0),
'B44': user_data.get('app_users', 0),
'B45': user_data.get('loyalty_users', 0),
# Off-site channels
'B49': user_data.get('facebook_followers', 0),
'B50': user_data.get('instagram_followers', 0),
'B51': user_data.get('google_views', 0),
'B52': user_data.get('email_subscribers', 0),
'B53': user_data.get('sms_users', 0),
'B54': user_data.get('whatsapp_contacts', 0)
}
# Update the cells
for cell_ref, value in cell_mappings.items():
try:
# Force the value to be set, even if the cell is protected or has data validation
cell = sheet[cell_ref]
cell.value = value
print(f"Updated {cell_ref} with value: {value}")
except Exception as e:
print(f"Error updating cell {cell_ref}: {e}")
apply_currency_symbol(wb, user_data.get('currency_symbol', ''))
# Force formula recalculation before saving
print("Forcing formula recalculation...")
wb.calculation.calcMode = 'auto'
wb.calculation.fullCalcOnLoad = True
wb.calculation.fullPrecision = True
# Save the workbook with variables updated
print("Saving workbook with updated variables...")
wb.save(excel_path)
# Rename sheets containing store_name placeholder with actual store name
store_name = user_data.get('store_name', 'Your Store')
if store_name and store_name.strip():
print(f"Renaming sheets with store_name placeholder to: {store_name}")
# Create a list of sheets to rename to avoid modifying during iteration
sheets_to_rename = []
for sheet_name in wb.sheetnames:
if 'store_name' in sheet_name:
new_sheet_name = sheet_name.replace('store_name', store_name)
sheets_to_rename.append((sheet_name, new_sheet_name))
# Update all formulas and references before renaming sheets
if sheets_to_rename:
print("Updating formulas to prevent external link errors...")
# Go through all sheets and update any formulas that reference sheets with store_name
for ws in wb.worksheets:
for row in ws.iter_rows():
for cell in row:
if cell.value and isinstance(cell.value, str) and cell.value.startswith('='):
original_formula = cell.value
updated_formula = original_formula
# Replace sheet references in formulas
for old_name, new_name in sheets_to_rename:
# Handle different formula reference patterns
patterns_to_replace = [
f"'{old_name}'!", # 'Sheet Name'!
f"{old_name}!", # SheetName! (if no spaces)
f"'{old_name}'.", # 'Sheet Name'. (alternative reference)
]
for pattern in patterns_to_replace:
if pattern in updated_formula:
new_pattern = pattern.replace(old_name, new_name)
updated_formula = updated_formula.replace(pattern, new_pattern)
# Also replace ALL store_name placeholder variations within formula content
store_name_variations = ['store_name', '{store_name}', 'store_name}', '{store_name']
for variation in store_name_variations:
if variation in updated_formula:
updated_formula = updated_formula.replace(variation, store_name)
# Update the cell if formula changed
if updated_formula != original_formula:
try:
cell.value = updated_formula
print(f"Updated formula in {ws.title}!{cell.coordinate}")
except Exception as e:
print(f"Warning: Could not update formula in {ws.title}!{cell.coordinate}: {e}")
# Also check for store_name in regular cell values (non-formula)
elif cell.value and isinstance(cell.value, str) and 'store_name' in cell.value:
try:
original_value = cell.value
updated_value = original_value.replace('store_name', store_name)
cell.value = updated_value
print(f"Updated cell value in {ws.title}!{cell.coordinate}: '{original_value}' -> '{updated_value}'")
except Exception as e:
print(f"Warning: Could not update cell value in {ws.title}!{cell.coordinate}: {e}")
# Now safely rename each sheet
for old_name, new_name in sheets_to_rename:
try:
sheet_obj = wb[old_name]
sheet_obj.title = new_name
print(f"Renamed sheet '{old_name}' to '{new_name}'")
except Exception as e:
print(f"Error renaming sheet '{old_name}' to '{new_name}': {e}")
# COMPREHENSIVE pass: Replace store_name in ALL cells throughout the workbook
print("=== COMPREHENSIVE SCAN: Checking all sheets for store_name placeholders ===")
total_formulas_updated = 0
total_text_updated = 0
for ws in wb.worksheets:
print(f"Scanning sheet: {ws.title}")
sheet_formulas_updated = 0
sheet_text_updated = 0
for row in ws.iter_rows():
for cell in row:
if cell.value:
# Handle ArrayFormula objects specially
if hasattr(cell.value, 'text'): # ArrayFormula
formula_text = cell.value.text
store_name_variations = ['store_name', '{store_name}', 'store_name}', '{store_name']
has_store_name = any(variation in formula_text for variation in store_name_variations)
if has_store_name:
original_formula = formula_text
updated_formula = original_formula
# Replace all variations
for variation in store_name_variations:
if variation in updated_formula:
updated_formula = updated_formula.replace(variation, store_name)
if updated_formula != original_formula:
try:
cell.value.text = updated_formula
sheet_formulas_updated += 1
total_formulas_updated += 1
print(f" ✓ Updated store_name in ArrayFormula {cell.coordinate}: {original_formula}")
except Exception as e:
print(f" ✗ Could not update ArrayFormula {cell.coordinate}: {e}")
elif isinstance(cell.value, str):
# Handle regular string cells
store_name_variations = ['store_name', '{store_name}', 'store_name}', '{store_name']
has_store_name = any(variation in cell.value for variation in store_name_variations)
if has_store_name:
if cell.value.startswith('='):
# Formula with store_name variations
original_formula = cell.value
updated_formula = original_formula
# Replace all variations
for variation in store_name_variations:
if variation in updated_formula:
updated_formula = updated_formula.replace(variation, store_name)
if updated_formula != original_formula:
try:
cell.value = updated_formula
sheet_formulas_updated += 1
total_formulas_updated += 1
print(f" ✓ Updated store_name in formula {cell.coordinate}: {original_formula[:50]}...")
except Exception as e:
print(f" ✗ Could not update formula {cell.coordinate}: {e}")
else:
# Regular text with store_name variations
original_value = cell.value
updated_value = original_value
# Replace all variations
for variation in store_name_variations:
if variation in updated_value:
updated_value = updated_value.replace(variation, store_name)
if updated_value != original_value:
try:
cell.value = updated_value
sheet_text_updated += 1
total_text_updated += 1
print(f" ✓ Updated store_name in text {cell.coordinate}: '{original_value}' -> '{updated_value}'")
except Exception as e:
print(f" ✗ Could not update text {cell.coordinate}: {e}")
if sheet_formulas_updated > 0 or sheet_text_updated > 0:
print(f" → Sheet {ws.title}: {sheet_formulas_updated} formulas, {sheet_text_updated} text cells updated")
else:
print(f" → Sheet {ws.title}: No store_name placeholders found")
print(f"=== TOTAL UPDATES: {total_formulas_updated} formulas, {total_text_updated} text cells ===")
# Save after sheet renaming and formula updates
if sheets_to_rename:
print("Saving workbook after sheet renaming and formula updates...")
wb.save(excel_path)
# Get the calculated years array from config
starting_date = user_data.get('starting_date', '')
duration = user_data.get('duration', 36)
calculated_years = []
# Import datetime at the module level to avoid scope issues
import datetime
from dateutil.relativedelta import relativedelta
# Calculate years array based on starting_date and duration
try:
# Try to parse the date, supporting both dd/mm/yyyy and dd.mm.yyyy formats
if starting_date:
if '/' in str(starting_date):
day, month, year = map(int, str(starting_date).split('/'))
elif '.' in str(starting_date):
day, month, year = map(int, str(starting_date).split('.'))
elif '-' in str(starting_date):
# Handle ISO format (yyyy-mm-dd)
date_parts = str(starting_date).split('-')
if len(date_parts) == 3:
year, month, day = map(int, date_parts)
else:
# Default to current date if format is not recognized
current_date = datetime.datetime.now()
year, month, day = current_date.year, current_date.month, current_date.day
elif isinstance(starting_date, datetime.datetime):
day, month, year = starting_date.day, starting_date.month, starting_date.year
else:
# Default to current date if format is not recognized
current_date = datetime.datetime.now()
year, month, day = current_date.year, current_date.month, current_date.day
# Create datetime object for starting date
start_date = datetime.datetime(year, month, day)
# Calculate end date (starting date + duration months - 1 day)
end_date = start_date + relativedelta(months=duration-1)
# Create a set of years (to avoid duplicates)
years_set = set()
# Add starting year
years_set.add(start_date.year)
# Add ending year
years_set.add(end_date.year)
# If there are years in between, add those too
for y in range(start_date.year + 1, end_date.year):
years_set.add(y)
# Convert set to sorted list
calculated_years = sorted(list(years_set))
print(f"Calculated years for sheet visibility: {calculated_years}")
else:
# Default to current year if no starting date
calculated_years = [datetime.datetime.now().year]
except Exception as e:
print(f"Error calculating years for sheet visibility: {e}")
calculated_years = [datetime.datetime.now().year]
# Hide forecast sheets that aren't in the calculated years array
# No sheet renaming - just check existing sheet names
for sheet_name in wb.sheetnames:
# Check if this is a forecast sheet
# Forecast sheets have names like "2025 Forecast"
if "Forecast" in sheet_name:
# Extract the year from the sheet name
try:
sheet_year = int(sheet_name.split()[0])
# Hide the sheet if its year is not in the calculated years
if sheet_year not in calculated_years:
sheet_obj = wb[sheet_name]
sheet_obj.sheet_state = 'hidden'
print(f"Hiding sheet '{sheet_name}' as year {sheet_year} is not in calculated years {calculated_years}")
except Exception as e:
print(f"Error extracting year from sheet name '{sheet_name}': {e}")
# Ensure formulas are marked for recalculation before final save
print("Ensuring formulas are marked for recalculation...")
wb.calculation.calcMode = 'auto'
wb.calculation.fullCalcOnLoad = True
wb.calculation.fullPrecision = True
# Save the workbook with updated variables and hidden sheets
print("Saving workbook with all updates...")
wb.save(excel_path)
print(f"Excel file updated successfully: {excel_path}")
return True
except Exception as e:
print(f"Error updating Excel file: {e}")
return False
if __name__ == "__main__":
# For testing purposes
import sys
if len(sys.argv) > 1:
excel_path = sys.argv[1]
update_excel_variables(excel_path)
else:
print("Please provide the path to the Excel file as an argument")