bussines_case_automation/create_excel_xlwings.py

#!/usr/bin/env python3
import json
import os
import shutil
import datetime
import re
import traceback
from pathlib import Path
from dateutil.relativedelta import relativedelta
import sys
import unicodedata
from openpyxl import load_workbook
import zipfile
from xml.etree import ElementTree as ET

def create_excel_from_template():
    """
    Create a copy of the Excel template, replacing {store_name} with the value from config.json
    and save it to the output folder.
    """
    # Define paths
    script_dir = os.path.dirname(os.path.abspath(__file__))
    config_path = os.path.join(script_dir, 'config.json')
    template_path = os.path.join(script_dir, 'template', 'Footprints AI for {store_name} - Retail Media Business Case Calculations.xlsx')
    output_dir = os.path.join(script_dir, 'output')

    print(f"[DEBUG] script_dir={script_dir}")
    print(f"[DEBUG] config_path={config_path}")
    print(f"[DEBUG] template_path={template_path}")
    print(f"[DEBUG] output_dir={output_dir}")
    print(f"[DEBUG] cwd={os.getcwd()}")

    # Ensure output directory exists
    os.makedirs(output_dir, exist_ok=True)

    if not os.path.exists(config_path):
        print(f"[ERROR] config.json not found at: {config_path}")
        return False

    # Read config.json to get store_name, starting_date, and duration
    try:
        with open(config_path, 'r') as f:
            config = json.load(f)
            user_data = config.get('user_data', {})
            store_name = user_data.get('store_name', '')
            starting_date = user_data.get('starting_date', '')
            duration = user_data.get('duration', 36)

            # If store_name is empty, use a default value
            if not store_name:
                store_name = "Your Store"

            # Calculate years array based on starting_date and duration
            years = calculate_years(starting_date, duration)
            print(f"Years in the period: {years}")
    except Exception as e:
        print(f"Error reading config file: {e}")
        print(traceback.format_exc())
        return False

    # Use first and last years from the array in the filename
    year_range = ""
    if years and len(years) > 0:
        if len(years) == 1:
            year_range = f"{years[0]}"
        else:
            year_range = f"{years[0]}-{years[-1]}"
    else:
        # Fallback to current year if years array is empty
        current_year = datetime.datetime.now().year
        year_range = f"{current_year}"

    # Create output filename with store_name and year range
    output_filename = f"Footprints AI for {store_name} - Retail Media Business Case Calculations {year_range}.xlsx"
    output_path = os.path.join(output_dir, output_filename)

    print(f"[DEBUG] output_path={output_path}")

    if not os.path.exists(template_path):
        print(f"[ERROR] Template not found at: {template_path}")
        return False

    # Copy the template to the output directory with the new name
    try:
        shutil.copy2(template_path, output_path)
        if not os.path.exists(output_path):
            print(f"[ERROR] Copy reported success but file missing: {output_path}")
            return False
        print(f"Excel file created successfully: {output_path}")

        # Rename any sheets that contain the {store_name} token
        try:
            renamed_count = rename_store_placeholders(output_path, store_name)
            print(f"[RENAME] Sheets renamed: {renamed_count}")
        except Exception as e:
            print(f"[RENAME] Unexpected error while renaming sheets: {e}")

        # Now inject variables from config.json into the Variables sheet
        ok = inject_variables(output_path, config)
        if not ok:
            print("[ERROR] inject_variables failed.")
            return False

        return True
    except Exception as e:
        print(f"Error creating Excel file: {e}")
        print(traceback.format_exc())
        return False

def calculate_years(starting_date, duration):
    """
    Calculate an array of years that appear in the period from starting_date for duration months.

    Args:
        starting_date (str): Date in format dd/mm/yyyy, dd.mm.yyyy, or yyyy-mm-dd
        duration (int): Number of months, including the starting month

    Returns:
        list: Array of years in the period [year1, year2, ...]
    """
    # Default result if we can't parse the date
    default_years = [datetime.datetime.now().year]

    # If starting_date is empty, return current year
    if not starting_date:
        return default_years

    try:
        # Try to parse the date, supporting multiple formats
        if '/' in starting_date:
            day, month, year = map(int, starting_date.split('/'))
        elif '.' in starting_date:
            day, month, year = map(int, starting_date.split('.'))
        elif '-' in starting_date:
            # Handle yyyy-mm-dd format (from HTML date input)
            parts = starting_date.split('-')
            if len(parts) == 3:
                year, month, day = map(int, parts)
            else:
                return default_years
        else:
            # If format is not recognized, return default
            return default_years

        # Create datetime object for starting date
        start_date = datetime.datetime(year, month, day)

        # Calculate end date (starting date + duration months - 1 day)
        end_date = start_date + relativedelta(months=duration-1)

        # Create a set of years (to avoid duplicates)
        years_set = set()

        # Add starting year
        years_set.add(start_date.year)

        # Add ending year
        years_set.add(end_date.year)

        # If there are years in between, add those too
        for y in range(start_date.year + 1, end_date.year):
            years_set.add(y)

        # Convert set to sorted list
        return sorted(list(years_set))

    except Exception as e:
        print(f"Error calculating years: {e}")
        return default_years

def _normalize_name(s: str) -> str:
    """Normalize sheet names to avoid issues with en-dash/nbsp/casing."""
    if s is None:
        return ""
    s = unicodedata.normalize("NFKC", s)
    return s.replace("\u2013", "-").replace("\u00A0", " ").strip().lower()

def _diagnose_xlsx(path: str):
    """Inspect the XLSX container to list sheets and their types when openpyxl sees none."""
    try:
        with zipfile.ZipFile(path, 'r') as z:
            print("[DIAG] ZIP entries:", len(z.namelist()))
            # Workbook relationships and workbook xml
            if 'xl/workbook.xml' in z.namelist():
                xml = z.read('xl/workbook.xml')
                root = ET.fromstring(xml)
                ns = {'ns': 'http://schemas.openxmlformats.org/spreadsheetml/2006/main'}
                sheets = root.findall('.//ns:sheets/ns:sheet', ns)
                if not sheets:
                    print("[DIAG] No <sheet> nodes found in xl/workbook.xml")
                for s in sheets:
                    print(f"[DIAG] sheet name={s.get('name')!r} id={s.get('sheetId')} r:id={s.get('{http://schemas.openxmlformats.org/officeDocument/2006/relationships}id')}")
                # Check for 'fileVersion' and workbookPr flags that sometimes confuse parsers
                wbpr = root.find('.//ns:workbookPr', ns)
                if wbpr is not None:
                    print("[DIAG] workbookPr attrs:", wbpr.attrib)
            else:
                print("[DIAG] Missing xl/workbook.xml (file may be corrupted or not an xlsx).")

            # Look for worksheet vs chartsheet parts
            worksheet_parts = [n for n in z.namelist() if n.startswith('xl/worksheets/sheet') and n.endswith('.xml')]
            chartsheet_parts = [n for n in z.namelist() if n.startswith('xl/chartsheets/sheet') and n.endswith('.xml')]
            dialogsheets = [n for n in z.namelist() if n.startswith('xl/dialogsheets/') and n.endswith('.xml')]
            print(f"[DIAG] worksheets={len(worksheet_parts)}, chartsheets={len(chartsheet_parts)}, dialogsheets={len(dialogsheets)}")
            if chartsheet_parts and not worksheet_parts:
                print("[DIAG] This workbook appears to contain only chart sheets (no worksheets). openpyxl will show zero sheetnames.")
    except Exception as e:
        print(f"[DIAG] Failed to inspect xlsx: {e}")
        print(traceback.format_exc())

def _sanitize_sheet_title(title: str) -> str:
    """
    Make a worksheet title Excel-safe:
    - Replace invalid characters : \ / ? * [ ]
    - Trim to 31 chars
    """
    invalid = r'[:\\/\?\*\[\]]'
    safe = re.sub(invalid, ' ', title).strip()
    if len(safe) > 31:
        safe = safe[:31]
    return safe

def rename_store_placeholders(excel_path: str, store_name: str) -> int:
    """
    Rename any worksheet whose title contains '{store_name}' by replacing the token
    with the provided store_name, enforcing Excel naming rules and uniqueness.
    Returns the number of sheets renamed.
    """
    try:
        wb = load_workbook(excel_path, data_only=False)
    except Exception as e:
        print(f"[RENAME] Could not open workbook for renaming: {e}")
        return 0

    renamed = 0
    existing = set(ws.title for ws in wb.worksheets)

    for ws in wb.worksheets:
        old = ws.title
        if "{store_name}" not in old:
            continue
        new_title_raw = old.replace("{store_name}", store_name or "Your Store")
        new_title = _sanitize_sheet_title(new_title_raw)

        # Ensure uniqueness by appending (2), (3), ...
        candidate = new_title
        suffix = 2
        while candidate in existing and candidate != old:
            base = new_title
            # leave room for " (nn)"
            max_base = 31 - (len(str(suffix)) + 3)
            if len(base) > max_base:
                base = base[:max_base]
            candidate = f"{base} ({suffix})"
            suffix += 1

        if candidate != old:
            try:
                ws.title = candidate
                existing.discard(old)
                existing.add(candidate)
                renamed += 1
                print(f"[RENAME] '{old}' → '{candidate}'")
            except Exception as e:
                print(f"[RENAME] Failed to rename '{old}' to '{candidate}': {e}")

    if renamed > 0:
        try:
            wb.save(excel_path)
            print(f"[RENAME] Saved workbook after renaming {renamed} sheet(s).")
        except Exception as e:
            print(f"[RENAME] Failed to save workbook after renames: {e}")
    else:
        print("[RENAME] No sheets contained '{store_name}'.")

    return renamed

def inject_variables(excel_path, config):
    """
    Inject variables from config.json into the Variables sheet of the Excel file.
    Linux-only path: uses openpyxl (no Excel required). This reads/writes .xlsx safely; .xlsm VBA projects are not preserved if you re-save them.
    """
    user_data = config.get("user_data", {})

    # Map cell references to config values based on the image
    cell_mappings = {
        "B2": user_data.get("store_name", ""),
        "B31": user_data.get("starting_date", ""),
        "B32": user_data.get("duration", 36),
        "B37": user_data.get("open_days_per_month", 0),
        "H37": user_data.get("convenience_store_type", {}).get("stores_number", 0),
        "C37": user_data.get("convenience_store_type", {}).get("monthly_transactions", 0),
        "I37": 1 if user_data.get("convenience_store_type", {}).get("has_digital_screens", False) else 0,
        "J37": user_data.get("convenience_store_type", {}).get("screen_count", 0),
        "K37": user_data.get("convenience_store_type", {}).get("screen_percentage", 0),
        "M37": 1 if user_data.get("convenience_store_type", {}).get("has_in_store_radio", False) else 0,
        "N37": user_data.get("convenience_store_type", {}).get("radio_percentage", 0),
        "H38": user_data.get("supermarket_store_type", {}).get("stores_number", 0),
        "C38": user_data.get("supermarket_store_type", {}).get("monthly_transactions", 0),
        "I38": 1 if user_data.get("supermarket_store_type", {}).get("has_digital_screens", False) else 0,
        "J38": user_data.get("supermarket_store_type", {}).get("screen_count", 0),
        "K38": user_data.get("supermarket_store_type", {}).get("screen_percentage", 0),
        "M38": 1 if user_data.get("supermarket_store_type", {}).get("has_in_store_radio", False) else 0,
        "N38": user_data.get("supermarket_store_type", {}).get("radio_percentage", 0),
        "H39": user_data.get("hypermarket_store_type", {}).get("stores_number", 0),
        "C39": user_data.get("hypermarket_store_type", {}).get("monthly_transactions", 0),
        "I39": 1 if user_data.get("hypermarket_store_type", {}).get("has_digital_screens", False) else 0,
        "J39": user_data.get("hypermarket_store_type", {}).get("screen_count", 0),
        "K39": user_data.get("hypermarket_store_type", {}).get("screen_percentage", 0),
        "M39": 1 if user_data.get("hypermarket_store_type", {}).get("has_in_store_radio", False) else 0,
        "N39": user_data.get("hypermarket_store_type", {}).get("radio_percentage", 0),
        "B43": user_data.get("website_visitors", 0),
        "B44": user_data.get("app_users", 0),
        "B45": user_data.get("loyalty_users", 0),
        "B49": user_data.get("facebook_followers", 0),
        "B50": user_data.get("instagram_followers", 0),
        "B51": user_data.get("google_views", 0),
        "B53": user_data.get("sms_users", 0)
    }

    # Warn if trying to process a macro-enabled workbook: openpyxl will not preserve VBA
    if excel_path.lower().endswith(".xlsm"):
        print("Warning: .xlsm detected. openpyxl cannot preserve VBA projects; consider switching to a .xlsx template or running this step on Windows/Excel.")

    # ---- openpyxl fallback (works on Linux, no Excel required) ----
    try:
        wb = load_workbook(excel_path, data_only=False)
        if not wb.sheetnames:
            print("[WARN] openpyxl reports no worksheets. Running container diagnostics…")
            _diagnose_xlsx(excel_path)
        print("Available sheets (openpyxl):", [repr(s) for s in wb.sheetnames])

        # Find Variables sheet (case-insensitive, normalized)
        target_idx = None
        for idx, name in enumerate(wb.sheetnames):
            if "variable" in _normalize_name(name):
                target_idx = idx
                break
        if target_idx is None:
            target_idx = len(wb.sheetnames) - 1 if wb.sheetnames else None
            if target_idx is not None:
                print(f"Variables sheet not found by name; using last sheet: {wb.sheetnames[target_idx]}")
            else:
                print("Suggestion: Ensure the template has at least one normal worksheet (not only chartsheets). Open and 'Save As' a regular .xlsx in Excel.")

        if target_idx is None:
            print("Warning: Workbook has no sheets. No variables were injected.")
            return False

        ws = wb[wb.sheetnames[target_idx]]

        # Write values
        for cell_ref, value in cell_mappings.items():
            try:
                ws[cell_ref].value = value
                print(f"[openpyxl] Set {cell_ref} = {value}")
            except Exception as e:
                print(f"Warning: Could not set value for cell {cell_ref}: {e}")

        # Ensure we're saving to .xlsx path to avoid accidental macro loss if template was .xlsm
        save_path = excel_path
        if save_path.lower().endswith(".xlsm"):
            save_path = save_path[:-5] + ".xlsx"
            print(f"Saving as {save_path} to avoid stripping VBA from .xlsm.")

        wb.save(save_path)
        print(f"Variables successfully injected into {save_path} using openpyxl")
        return True
    except Exception as e:
        print(f"Error in openpyxl fallback: {e}")
        print(traceback.format_exc())
        return False

if __name__ == "__main__":
    try:
        ok = create_excel_from_template()
        sys.exit(0 if ok else 1)
    except Exception as e:
        print(f"[FATAL] Unhandled exception: {e}")
        print(traceback.format_exc())
        sys.exit(2)