Files
bussines_case_automation/create_excel_xlwings.py

382 lines
16 KiB
Python
Executable File

#!/usr/bin/env python3
import json
import os
import shutil
import datetime
import re
import traceback
from pathlib import Path
from dateutil.relativedelta import relativedelta
import sys
import unicodedata
from openpyxl import load_workbook
import zipfile
from xml.etree import ElementTree as ET
def create_excel_from_template():
"""
Create a copy of the Excel template, replacing {store_name} with the value from config.json
and save it to the output folder.
"""
# Define paths
script_dir = os.path.dirname(os.path.abspath(__file__))
config_path = os.path.join(script_dir, 'config.json')
template_path = os.path.join(script_dir, 'template', 'Footprints AI for {store_name} - Retail Media Business Case Calculations.xlsx')
output_dir = os.path.join(script_dir, 'output')
print(f"[DEBUG] script_dir={script_dir}")
print(f"[DEBUG] config_path={config_path}")
print(f"[DEBUG] template_path={template_path}")
print(f"[DEBUG] output_dir={output_dir}")
print(f"[DEBUG] cwd={os.getcwd()}")
# Ensure output directory exists
os.makedirs(output_dir, exist_ok=True)
if not os.path.exists(config_path):
print(f"[ERROR] config.json not found at: {config_path}")
return False
# Read config.json to get store_name, starting_date, and duration
try:
with open(config_path, 'r') as f:
config = json.load(f)
user_data = config.get('user_data', {})
store_name = user_data.get('store_name', '')
starting_date = user_data.get('starting_date', '')
duration = user_data.get('duration', 36)
# If store_name is empty, use a default value
if not store_name:
store_name = "Your Store"
# Calculate years array based on starting_date and duration
years = calculate_years(starting_date, duration)
print(f"Years in the period: {years}")
except Exception as e:
print(f"Error reading config file: {e}")
print(traceback.format_exc())
return False
# Use first and last years from the array in the filename
year_range = ""
if years and len(years) > 0:
if len(years) == 1:
year_range = f"{years[0]}"
else:
year_range = f"{years[0]}-{years[-1]}"
else:
# Fallback to current year if years array is empty
current_year = datetime.datetime.now().year
year_range = f"{current_year}"
# Create output filename with store_name and year range
output_filename = f"Footprints AI for {store_name} - Retail Media Business Case Calculations {year_range}.xlsx"
output_path = os.path.join(output_dir, output_filename)
print(f"[DEBUG] output_path={output_path}")
if not os.path.exists(template_path):
print(f"[ERROR] Template not found at: {template_path}")
return False
# Copy the template to the output directory with the new name
try:
shutil.copy2(template_path, output_path)
if not os.path.exists(output_path):
print(f"[ERROR] Copy reported success but file missing: {output_path}")
return False
print(f"Excel file created successfully: {output_path}")
# Rename any sheets that contain the {store_name} token
try:
renamed_count = rename_store_placeholders(output_path, store_name)
print(f"[RENAME] Sheets renamed: {renamed_count}")
except Exception as e:
print(f"[RENAME] Unexpected error while renaming sheets: {e}")
# Now inject variables from config.json into the Variables sheet
ok = inject_variables(output_path, config)
if not ok:
print("[ERROR] inject_variables failed.")
return False
return True
except Exception as e:
print(f"Error creating Excel file: {e}")
print(traceback.format_exc())
return False
def calculate_years(starting_date, duration):
"""
Calculate an array of years that appear in the period from starting_date for duration months.
Args:
starting_date (str): Date in format dd/mm/yyyy, dd.mm.yyyy, or yyyy-mm-dd
duration (int): Number of months, including the starting month
Returns:
list: Array of years in the period [year1, year2, ...]
"""
# Default result if we can't parse the date
default_years = [datetime.datetime.now().year]
# If starting_date is empty, return current year
if not starting_date:
return default_years
try:
# Try to parse the date, supporting multiple formats
if '/' in starting_date:
day, month, year = map(int, starting_date.split('/'))
elif '.' in starting_date:
day, month, year = map(int, starting_date.split('.'))
elif '-' in starting_date:
# Handle yyyy-mm-dd format (from HTML date input)
parts = starting_date.split('-')
if len(parts) == 3:
year, month, day = map(int, parts)
else:
return default_years
else:
# If format is not recognized, return default
return default_years
# Create datetime object for starting date
start_date = datetime.datetime(year, month, day)
# Calculate end date (starting date + duration months - 1 day)
end_date = start_date + relativedelta(months=duration-1)
# Create a set of years (to avoid duplicates)
years_set = set()
# Add starting year
years_set.add(start_date.year)
# Add ending year
years_set.add(end_date.year)
# If there are years in between, add those too
for y in range(start_date.year + 1, end_date.year):
years_set.add(y)
# Convert set to sorted list
return sorted(list(years_set))
except Exception as e:
print(f"Error calculating years: {e}")
return default_years
def _normalize_name(s: str) -> str:
"""Normalize sheet names to avoid issues with en-dash/nbsp/casing."""
if s is None:
return ""
s = unicodedata.normalize("NFKC", s)
return s.replace("\u2013", "-").replace("\u00A0", " ").strip().lower()
def _diagnose_xlsx(path: str):
"""Inspect the XLSX container to list sheets and their types when openpyxl sees none."""
try:
with zipfile.ZipFile(path, 'r') as z:
print("[DIAG] ZIP entries:", len(z.namelist()))
# Workbook relationships and workbook xml
if 'xl/workbook.xml' in z.namelist():
xml = z.read('xl/workbook.xml')
root = ET.fromstring(xml)
ns = {'ns': 'http://schemas.openxmlformats.org/spreadsheetml/2006/main'}
sheets = root.findall('.//ns:sheets/ns:sheet', ns)
if not sheets:
print("[DIAG] No <sheet> nodes found in xl/workbook.xml")
for s in sheets:
print(f"[DIAG] sheet name={s.get('name')!r} id={s.get('sheetId')} r:id={s.get('{http://schemas.openxmlformats.org/officeDocument/2006/relationships}id')}")
# Check for 'fileVersion' and workbookPr flags that sometimes confuse parsers
wbpr = root.find('.//ns:workbookPr', ns)
if wbpr is not None:
print("[DIAG] workbookPr attrs:", wbpr.attrib)
else:
print("[DIAG] Missing xl/workbook.xml (file may be corrupted or not an xlsx).")
# Look for worksheet vs chartsheet parts
worksheet_parts = [n for n in z.namelist() if n.startswith('xl/worksheets/sheet') and n.endswith('.xml')]
chartsheet_parts = [n for n in z.namelist() if n.startswith('xl/chartsheets/sheet') and n.endswith('.xml')]
dialogsheets = [n for n in z.namelist() if n.startswith('xl/dialogsheets/') and n.endswith('.xml')]
print(f"[DIAG] worksheets={len(worksheet_parts)}, chartsheets={len(chartsheet_parts)}, dialogsheets={len(dialogsheets)}")
if chartsheet_parts and not worksheet_parts:
print("[DIAG] This workbook appears to contain only chart sheets (no worksheets). openpyxl will show zero sheetnames.")
except Exception as e:
print(f"[DIAG] Failed to inspect xlsx: {e}")
print(traceback.format_exc())
def _sanitize_sheet_title(title: str) -> str:
"""
Make a worksheet title Excel-safe:
- Replace invalid characters : \ / ? * [ ]
- Trim to 31 chars
"""
invalid = r'[:\\/\?\*\[\]]'
safe = re.sub(invalid, ' ', title).strip()
if len(safe) > 31:
safe = safe[:31]
return safe
def rename_store_placeholders(excel_path: str, store_name: str) -> int:
"""
Rename any worksheet whose title contains '{store_name}' by replacing the token
with the provided store_name, enforcing Excel naming rules and uniqueness.
Returns the number of sheets renamed.
"""
try:
wb = load_workbook(excel_path, data_only=False)
except Exception as e:
print(f"[RENAME] Could not open workbook for renaming: {e}")
return 0
renamed = 0
existing = set(ws.title for ws in wb.worksheets)
for ws in wb.worksheets:
old = ws.title
if "{store_name}" not in old:
continue
new_title_raw = old.replace("{store_name}", store_name or "Your Store")
new_title = _sanitize_sheet_title(new_title_raw)
# Ensure uniqueness by appending (2), (3), ...
candidate = new_title
suffix = 2
while candidate in existing and candidate != old:
base = new_title
# leave room for " (nn)"
max_base = 31 - (len(str(suffix)) + 3)
if len(base) > max_base:
base = base[:max_base]
candidate = f"{base} ({suffix})"
suffix += 1
if candidate != old:
try:
ws.title = candidate
existing.discard(old)
existing.add(candidate)
renamed += 1
print(f"[RENAME] '{old}''{candidate}'")
except Exception as e:
print(f"[RENAME] Failed to rename '{old}' to '{candidate}': {e}")
if renamed > 0:
try:
wb.save(excel_path)
print(f"[RENAME] Saved workbook after renaming {renamed} sheet(s).")
except Exception as e:
print(f"[RENAME] Failed to save workbook after renames: {e}")
else:
print("[RENAME] No sheets contained '{store_name}'.")
return renamed
def inject_variables(excel_path, config):
"""
Inject variables from config.json into the Variables sheet of the Excel file.
Linux-only path: uses openpyxl (no Excel required). This reads/writes .xlsx safely; .xlsm VBA projects are not preserved if you re-save them.
"""
user_data = config.get("user_data", {})
# Map cell references to config values based on the image
cell_mappings = {
"B2": user_data.get("store_name", ""),
"B31": user_data.get("starting_date", ""),
"B32": user_data.get("duration", 36),
"B37": user_data.get("open_days_per_month", 0),
"H37": user_data.get("convenience_store_type", {}).get("stores_number", 0),
"C37": user_data.get("convenience_store_type", {}).get("monthly_transactions", 0),
"I37": 1 if user_data.get("convenience_store_type", {}).get("has_digital_screens", False) else 0,
"J37": user_data.get("convenience_store_type", {}).get("screen_count", 0),
"K37": user_data.get("convenience_store_type", {}).get("screen_percentage", 0),
"M37": 1 if user_data.get("convenience_store_type", {}).get("has_in_store_radio", False) else 0,
"N37": user_data.get("convenience_store_type", {}).get("radio_percentage", 0),
"H38": user_data.get("supermarket_store_type", {}).get("stores_number", 0),
"C38": user_data.get("supermarket_store_type", {}).get("monthly_transactions", 0),
"I38": 1 if user_data.get("supermarket_store_type", {}).get("has_digital_screens", False) else 0,
"J38": user_data.get("supermarket_store_type", {}).get("screen_count", 0),
"K38": user_data.get("supermarket_store_type", {}).get("screen_percentage", 0),
"M38": 1 if user_data.get("supermarket_store_type", {}).get("has_in_store_radio", False) else 0,
"N38": user_data.get("supermarket_store_type", {}).get("radio_percentage", 0),
"H39": user_data.get("hypermarket_store_type", {}).get("stores_number", 0),
"C39": user_data.get("hypermarket_store_type", {}).get("monthly_transactions", 0),
"I39": 1 if user_data.get("hypermarket_store_type", {}).get("has_digital_screens", False) else 0,
"J39": user_data.get("hypermarket_store_type", {}).get("screen_count", 0),
"K39": user_data.get("hypermarket_store_type", {}).get("screen_percentage", 0),
"M39": 1 if user_data.get("hypermarket_store_type", {}).get("has_in_store_radio", False) else 0,
"N39": user_data.get("hypermarket_store_type", {}).get("radio_percentage", 0),
"B43": user_data.get("website_visitors", 0),
"B44": user_data.get("app_users", 0),
"B45": user_data.get("loyalty_users", 0),
"B49": user_data.get("facebook_followers", 0),
"B50": user_data.get("instagram_followers", 0),
"B51": user_data.get("google_views", 0),
"B53": user_data.get("sms_users", 0)
}
# Warn if trying to process a macro-enabled workbook: openpyxl will not preserve VBA
if excel_path.lower().endswith(".xlsm"):
print("Warning: .xlsm detected. openpyxl cannot preserve VBA projects; consider switching to a .xlsx template or running this step on Windows/Excel.")
# ---- openpyxl fallback (works on Linux, no Excel required) ----
try:
wb = load_workbook(excel_path, data_only=False)
if not wb.sheetnames:
print("[WARN] openpyxl reports no worksheets. Running container diagnostics…")
_diagnose_xlsx(excel_path)
print("Available sheets (openpyxl):", [repr(s) for s in wb.sheetnames])
# Find Variables sheet (case-insensitive, normalized)
target_idx = None
for idx, name in enumerate(wb.sheetnames):
if "variable" in _normalize_name(name):
target_idx = idx
break
if target_idx is None:
target_idx = len(wb.sheetnames) - 1 if wb.sheetnames else None
if target_idx is not None:
print(f"Variables sheet not found by name; using last sheet: {wb.sheetnames[target_idx]}")
else:
print("Suggestion: Ensure the template has at least one normal worksheet (not only chartsheets). Open and 'Save As' a regular .xlsx in Excel.")
if target_idx is None:
print("Warning: Workbook has no sheets. No variables were injected.")
return False
ws = wb[wb.sheetnames[target_idx]]
# Write values
for cell_ref, value in cell_mappings.items():
try:
ws[cell_ref].value = value
print(f"[openpyxl] Set {cell_ref} = {value}")
except Exception as e:
print(f"Warning: Could not set value for cell {cell_ref}: {e}")
# Ensure we're saving to .xlsx path to avoid accidental macro loss if template was .xlsm
save_path = excel_path
if save_path.lower().endswith(".xlsm"):
save_path = save_path[:-5] + ".xlsx"
print(f"Saving as {save_path} to avoid stripping VBA from .xlsm.")
wb.save(save_path)
print(f"Variables successfully injected into {save_path} using openpyxl")
return True
except Exception as e:
print(f"Error in openpyxl fallback: {e}")
print(traceback.format_exc())
return False
if __name__ == "__main__":
try:
ok = create_excel_from_template()
sys.exit(0 if ok else 1)
except Exception as e:
print(f"[FATAL] Unhandled exception: {e}")
print(traceback.format_exc())
sys.exit(2)