diff --git a/config.json b/config.json index 2308372..8ae0e6b 100644 --- a/config.json +++ b/config.json @@ -1,37 +1,36 @@ { "user_data": { - "first_name": "ggggfd", - "last_name": "fdgd", - "company_name": "hlhl", - "email": "kjhkjhk", - "phone": "hkjhkj", - "store_name": "asc", - "country": "hlkhkj", - "starting_date": "2025-10-01", - "duration": 24, + "first_name": "gfgdgfd", + "last_name": "gfdgdf", + "company_name": "gfdgdf", + "email": "gfdgf", + "phone": "gfdgfdg", + "store_name": "gfdgfdgfgd", + "country": "gfdgfd", + "starting_date": "2025-09-25", + "duration": 36, "store_types": [ - "Convenience", - "Minimarket" + "Convenience" ], "open_days_per_month": 30, "convenience_store_type": { - "stores_number": 100, - "monthly_transactions": 101010, + "stores_number": 1233, + "monthly_transactions": 32131312, "has_digital_screens": true, - "screen_count": 3, - "screen_percentage": 100, + "screen_count": 2, + "screen_percentage": 123123, "has_in_store_radio": true, - "radio_percentage": 100, + "radio_percentage": 321, "open_days_per_month": 30 }, "minimarket_store_type": { - "stores_number": 1000, - "monthly_transactions": 123123123, - "has_digital_screens": true, - "screen_count": 2, - "screen_percentage": 1000, - "has_in_store_radio": true, - "radio_percentage": 1000, + "stores_number": 0, + "monthly_transactions": 0, + "has_digital_screens": false, + "screen_count": 0, + "screen_percentage": 0, + "has_in_store_radio": false, + "radio_percentage": 0, "open_days_per_month": 30 }, "supermarket_store_type": { @@ -55,19 +54,17 @@ "open_days_per_month": 30 }, "on_site_channels": [ - "Website", - "Mobile App" + "Website" ], - "website_visitors": 121212, - "app_users": 232323, + "website_visitors": 321321, + "app_users": 0, "loyalty_users": 0, "off_site_channels": [ - "Facebook Business", - "Google Business Profile" + "Facebook Business" ], - "facebook_followers": 123123, + "facebook_followers": 32131312, "instagram_followers": 0, - "google_views": 123123, + "google_views": 0, "email_subscribers": 0, "sms_users": 0, "whatsapp_contacts": 0, diff --git a/create_excel.py b/create_excel.py deleted file mode 100644 index 432a2da..0000000 --- a/create_excel.py +++ /dev/null @@ -1,149 +0,0 @@ -#!/usr/bin/env python3 -import json -import os -import shutil -import datetime -import re -from pathlib import Path -from dateutil.relativedelta import relativedelta -from update_excel import update_excel_variables - -def create_excel_from_template(): - """ - Create a copy of the Excel template and save it to the output folder, - then inject variables from config.json into the Variables sheet. - """ - # Define paths - script_dir = os.path.dirname(os.path.abspath(__file__)) - config_path = os.path.join(script_dir, 'config.json') - # Look for any Excel template in the template directory - template_dir = os.path.join(script_dir, 'template') - template_files = [f for f in os.listdir(template_dir) if f.endswith('.xlsx')] - if not template_files: - print("Error: No Excel template found in the template directory") - return False - template_path = os.path.join(template_dir, template_files[0]) - output_dir = os.path.join(script_dir, 'output') - - # Ensure output directory exists - os.makedirs(output_dir, exist_ok=True) - - # Read config.json to get store_name, starting_date, and duration - try: - with open(config_path, 'r') as f: - config = json.load(f) - user_data = config.get('user_data', {}) - store_name = user_data.get('store_name', '') - starting_date = user_data.get('starting_date', '') - duration = user_data.get('duration', 36) - - # If store_name is empty, use a default value - if not store_name: - store_name = "Your Store" - - # Calculate years array based on starting_date and duration - years = calculate_years(starting_date, duration) - print(f"Years in the period: {years}") - except Exception as e: - print(f"Error reading config file: {e}") - return False - - # Use first and last years from the array in the filename - year_range = "" - if years and len(years) > 0: - if len(years) == 1: - year_range = f"{years[0]}" - else: - year_range = f"{years[0]}-{years[-1]}" - else: - # Fallback to current year if years array is empty - current_year = datetime.datetime.now().year - year_range = f"{current_year}" - - # Create output filename with store_name and year range - output_filename = f"Footprints AI for {store_name} - Retail Media Business Case Calculations {year_range}.xlsx" - output_path = os.path.join(output_dir, output_filename) - - # Copy the template to the output directory with the new name - try: - shutil.copy2(template_path, output_path) - print(f"Excel file created successfully: {output_path}") - - # Update the Excel file with variables from config.json - print("Updating Excel file with variables from config.json...") - update_result = update_excel_variables(output_path) - - if update_result: - print("Excel file updated successfully with variables from config.json") - else: - print("Warning: Failed to update Excel file with variables from config.json") - - return True - except Exception as e: - print(f"Error creating Excel file: {e}") - return False - -def calculate_years(starting_date, duration): - """ - Calculate an array of years that appear in the period from starting_date for duration months. - - Args: - starting_date (str): Date in format dd/mm/yyyy or dd.mm.yyyy - duration (int): Number of months, including the starting month - - Returns: - list: Array of years in the period [year1, year2, ...] - """ - # Default result if we can't parse the date - default_years = [datetime.datetime.now().year] - - # If starting_date is empty, return current year - if not starting_date: - return default_years - - try: - # Try to parse the date, supporting both dd/mm/yyyy and dd.mm.yyyy formats - if '/' in starting_date: - day, month, year = map(int, starting_date.split('/')) - elif '.' in starting_date: - day, month, year = map(int, starting_date.split('.')) - elif '-' in starting_date: - # Handle ISO format (yyyy-mm-dd) - date_parts = starting_date.split('-') - if len(date_parts) == 3: - year, month, day = map(int, date_parts) - else: - # Default to current date if format is not recognized - return default_years - else: - # If format is not recognized, return default - return default_years - - # Create datetime object for starting date - start_date = datetime.datetime(year, month, day) - - # Calculate end date (starting date + duration months - 1 day) - end_date = start_date + relativedelta(months=duration-1) - - # Create a set of years (to avoid duplicates) - years_set = set() - - # Add starting year - years_set.add(start_date.year) - - # Add ending year - years_set.add(end_date.year) - - # If there are years in between, add those too - for y in range(start_date.year + 1, end_date.year): - years_set.add(y) - - # Convert set to sorted list - return sorted(list(years_set)) - - except Exception as e: - print(f"Error calculating years: {e}") - return default_years - -if __name__ == "__main__": - create_excel_from_template() \ No newline at end of file diff --git a/create_excel_clean.py b/create_excel_clean.py deleted file mode 100755 index fc31560..0000000 --- a/create_excel_clean.py +++ /dev/null @@ -1,326 +0,0 @@ -#!/usr/bin/env python3 -""" -Cross-platform Excel generation script using openpyxl. -This version ensures clean Excel files without SharePoint/OneDrive metadata. -""" -import json -import os -import datetime -from pathlib import Path -from dateutil.relativedelta import relativedelta -import openpyxl -from openpyxl.workbook import Workbook -from openpyxl.utils import get_column_letter -from openpyxl.styles import Font, PatternFill, Alignment, Border, Side -import tempfile -import shutil - - - - -def create_excel_from_template(): - """ - Create an Excel file from template with all placeholders replaced. - Uses openpyxl for maximum cross-platform compatibility. - """ - # Define paths - script_dir = os.path.dirname(os.path.abspath(__file__)) - config_path = os.path.join(script_dir, 'config.json') - template_dir = os.path.join(script_dir, 'template') - - # Try to find the template with either naming convention - possible_templates = [ - 'cleaned_template.xlsx', # Prefer cleaned template - 'Footprints AI for {store_name} - Retail Media Business Case Calculations.xlsx', - 'Footprints AI for store_name - Retail Media Business Case Calculations.xlsx' - ] - - template_path = None - for template_name in possible_templates: - full_path = os.path.join(template_dir, template_name) - if os.path.exists(full_path): - template_path = full_path - print(f"Found template: {template_name}") - break - - if not template_path: - print(f"Error: No template found in {template_dir}") - return False - - output_dir = os.path.join(script_dir, 'output') - os.makedirs(output_dir, exist_ok=True) - - # Read config.json - try: - with open(config_path, 'r') as f: - config = json.load(f) - user_data = config.get('user_data', {}) - store_name = user_data.get('store_name', 'Your Store') - starting_date = user_data.get('starting_date', '') - duration = user_data.get('duration', 36) - - if not store_name: - store_name = "Your Store" - - print(f"Processing for store: {store_name}") - - # Calculate years array - years = calculate_years(starting_date, duration) - calculated_years = years - print(f"Years in the period: {years}") - except Exception as e: - print(f"Error reading config file: {e}") - return False - - # Determine year range for filename - year_range = "" - if years and len(years) > 0: - if len(years) == 1: - year_range = f"{years[0]}" - else: - year_range = f"{years[0]}-{years[-1]}" - else: - year_range = f"{datetime.datetime.now().year}" - - # Create output filename - output_filename = f"Footprints AI for {store_name} - Retail Media Business Case Calculations {year_range}.xlsx" - output_path = os.path.join(output_dir, output_filename) - - try: - # Load template with data_only=False to preserve formulas - print("Loading template...") - wb = openpyxl.load_workbook(template_path, data_only=False, keep_vba=False) - - - # Build mapping of placeholder patterns to actual values - placeholder_patterns = [ - ('{store_name}', store_name), - ('store_name', store_name) - ] - - # Step 1: Create sheet name mappings - print("Processing sheet names...") - sheet_name_mappings = {} - sheets_to_rename = [] - - for sheet in wb.worksheets: - old_title = sheet.title - new_title = old_title - - for placeholder, replacement in placeholder_patterns: - if placeholder in new_title: - new_title = new_title.replace(placeholder, replacement) - - if old_title != new_title: - sheet_name_mappings[old_title] = new_title - sheet_name_mappings[f"'{old_title}'"] = f"'{new_title}'" - sheets_to_rename.append((sheet, new_title)) - print(f" Will rename: '{old_title}' -> '{new_title}'") - - # Step 2: Update all formulas and values - print("Updating formulas and cell values...") - total_updates = 0 - - for sheet in wb.worksheets: - if 'Variables' in sheet.title: - continue - - updates_in_sheet = 0 - for row in sheet.iter_rows(): - for cell in row: - try: - # Handle formulas - if hasattr(cell, '_value') and isinstance(cell._value, str) and cell._value.startswith('='): - original = cell._value - updated = original - - # Update sheet references - for old_ref, new_ref in sheet_name_mappings.items(): - updated = updated.replace(old_ref, new_ref) - - # Update placeholders - for placeholder, replacement in placeholder_patterns: - updated = updated.replace(placeholder, replacement) - - if updated != original: - cell._value = updated - updates_in_sheet += 1 - - # Handle regular text values - elif cell.value and isinstance(cell.value, str): - original = cell.value - updated = original - - for placeholder, replacement in placeholder_patterns: - updated = updated.replace(placeholder, replacement) - - if updated != original: - cell.value = updated - updates_in_sheet += 1 - except Exception as e: - # Skip cells that cause issues - continue - - if updates_in_sheet > 0: - print(f" {sheet.title}: {updates_in_sheet} updates") - total_updates += updates_in_sheet - - print(f"Total updates: {total_updates}") - - # Step 3: Rename sheets - print("Renaming sheets...") - for sheet, new_title in sheets_to_rename: - old_title = sheet.title - sheet.title = new_title - print(f" Renamed: '{old_title}' -> '{new_title}'") - - # Hide forecast sheets not in calculated years - if "Forecast" in new_title: - try: - sheet_year = int(new_title.split()[0]) - if sheet_year not in calculated_years: - sheet.sheet_state = 'hidden' - print(f" Hidden sheet '{new_title}' (year {sheet_year} not in range)") - except (ValueError, IndexError): - pass - - # Step 4: Update Variables sheet - print("Updating Variables sheet...") - if 'Variables' in wb.sheetnames: - update_variables_sheet(wb['Variables'], user_data) - - # Step 5: Save as a clean Excel file - print(f"Saving clean Excel file to: {output_path}") - - # Create a temporary file first - with tempfile.NamedTemporaryFile(suffix='.xlsx', delete=False) as tmp: - tmp_path = tmp.name - - # Save to temporary file - wb.save(tmp_path) - - # Re-open and save again to ensure clean structure - wb_clean = openpyxl.load_workbook(tmp_path, data_only=False) - wb_clean.save(output_path) - wb_clean.close() - - # Clean up temporary file - os.unlink(tmp_path) - - print(f"✓ Excel file created successfully: {output_filename}") - return True - - except Exception as e: - print(f"Error creating Excel file: {e}") - import traceback - traceback.print_exc() - return False - - -def update_variables_sheet(sheet, user_data): - """ - Update the Variables sheet with values from config.json - """ - cell_mappings = { - 'B2': user_data.get('store_name', ''), - 'B31': user_data.get('starting_date', ''), - 'B32': user_data.get('duration', 36), - 'B37': user_data.get('open_days_per_month', 0), - - # Store types - 'H37': user_data.get('convenience_store_type', {}).get('stores_number', 0), - 'C37': user_data.get('convenience_store_type', {}).get('monthly_transactions', 0), - 'I37': 1 if user_data.get('convenience_store_type', {}).get('has_digital_screens', False) else 0, - 'J37': user_data.get('convenience_store_type', {}).get('screen_count', 0), - 'K37': user_data.get('convenience_store_type', {}).get('screen_percentage', 0), - 'M37': 1 if user_data.get('convenience_store_type', {}).get('has_in_store_radio', False) else 0, - 'N37': user_data.get('convenience_store_type', {}).get('radio_percentage', 0), - - 'H38': user_data.get('minimarket_store_type', {}).get('stores_number', 0), - 'C38': user_data.get('minimarket_store_type', {}).get('monthly_transactions', 0), - 'I38': 1 if user_data.get('minimarket_store_type', {}).get('has_digital_screens', False) else 0, - 'J38': user_data.get('minimarket_store_type', {}).get('screen_count', 0), - 'K38': user_data.get('minimarket_store_type', {}).get('screen_percentage', 0), - 'M38': 1 if user_data.get('minimarket_store_type', {}).get('has_in_store_radio', False) else 0, - 'N38': user_data.get('minimarket_store_type', {}).get('radio_percentage', 0), - - 'H39': user_data.get('supermarket_store_type', {}).get('stores_number', 0), - 'C39': user_data.get('supermarket_store_type', {}).get('monthly_transactions', 0), - 'I39': 1 if user_data.get('supermarket_store_type', {}).get('has_digital_screens', False) else 0, - 'J39': user_data.get('supermarket_store_type', {}).get('screen_count', 0), - 'K39': user_data.get('supermarket_store_type', {}).get('screen_percentage', 0), - 'M39': 1 if user_data.get('supermarket_store_type', {}).get('has_in_store_radio', False) else 0, - 'N39': user_data.get('supermarket_store_type', {}).get('radio_percentage', 0), - - 'H40': user_data.get('hypermarket_store_type', {}).get('stores_number', 0), - 'C40': user_data.get('hypermarket_store_type', {}).get('monthly_transactions', 0), - 'I40': 1 if user_data.get('hypermarket_store_type', {}).get('has_digital_screens', False) else 0, - 'J40': user_data.get('hypermarket_store_type', {}).get('screen_count', 0), - 'K40': user_data.get('hypermarket_store_type', {}).get('screen_percentage', 0), - 'M40': 1 if user_data.get('hypermarket_store_type', {}).get('has_in_store_radio', False) else 0, - 'N40': user_data.get('hypermarket_store_type', {}).get('radio_percentage', 0), - - # Channels - 'B43': user_data.get('website_visitors', 0), - 'B44': user_data.get('app_users', 0), - 'B45': user_data.get('loyalty_users', 0), - 'B49': user_data.get('facebook_followers', 0), - 'B50': user_data.get('instagram_followers', 0), - 'B51': user_data.get('google_views', 0), - 'B52': user_data.get('email_subscribers', 0), - 'B53': user_data.get('sms_users', 0), - 'B54': user_data.get('whatsapp_contacts', 0) - } - - for cell_ref, value in cell_mappings.items(): - try: - sheet[cell_ref].value = value - print(f" Updated {cell_ref} = {value}") - except Exception as e: - print(f" Warning: Could not update {cell_ref}: {e}") - - -def calculate_years(starting_date, duration): - """ - Calculate an array of years that appear in the period. - """ - default_years = [datetime.datetime.now().year] - - if not starting_date: - return default_years - - try: - # Parse date - support multiple formats - if '/' in str(starting_date): - day, month, year = map(int, str(starting_date).split('/')) - elif '.' in str(starting_date): - day, month, year = map(int, str(starting_date).split('.')) - elif '-' in str(starting_date): - # ISO format (yyyy-mm-dd) - date_parts = str(starting_date).split('-') - if len(date_parts) == 3: - year, month, day = map(int, date_parts) - else: - return default_years - else: - return default_years - - start_date = datetime.datetime(year, month, day) - end_date = start_date + relativedelta(months=duration-1) - - years_set = set() - years_set.add(start_date.year) - years_set.add(end_date.year) - - for y in range(start_date.year + 1, end_date.year): - years_set.add(y) - - return sorted(list(years_set)) - - except Exception as e: - print(f"Error calculating years: {e}") - return default_years - - -if __name__ == "__main__": - create_excel_from_template() \ No newline at end of file diff --git a/create_excel_openpyxl.py b/create_excel_openpyxl.py deleted file mode 100644 index d0719a6..0000000 --- a/create_excel_openpyxl.py +++ /dev/null @@ -1,149 +0,0 @@ -#!/usr/bin/env python3 -import json -import os -import shutil -import datetime -import re -from pathlib import Path -from dateutil.relativedelta import relativedelta -from update_excel import update_excel_variables - -def create_excel_from_template(): - """ - Create a copy of the Excel template and save it to the output folder, - then inject variables from config.json into the Variables sheet. - """ - # Define paths - script_dir = os.path.dirname(os.path.abspath(__file__)) - config_path = os.path.join(script_dir, 'config.json') - # Look for any Excel template in the template directory - template_dir = os.path.join(script_dir, 'template') - template_files = [f for f in os.listdir(template_dir) if f.endswith('.xlsx')] - if not template_files: - print("Error: No Excel template found in the template directory") - return False - template_path = os.path.join(template_dir, template_files[0]) - output_dir = os.path.join(script_dir, 'output') - - # Ensure output directory exists - os.makedirs(output_dir, exist_ok=True) - - # Read config.json to get store_name, starting_date, and duration - try: - with open(config_path, 'r') as f: - config = json.load(f) - user_data = config.get('user_data', {}) - store_name = user_data.get('store_name', '') - starting_date = user_data.get('starting_date', '') - duration = user_data.get('duration', 36) - - # If store_name is empty, use a default value - if not store_name: - store_name = "Your Store" - - # Calculate years array based on starting_date and duration - years = calculate_years(starting_date, duration) - print(f"Years in the period: {years}") - except Exception as e: - print(f"Error reading config file: {e}") - return False - - # Use first and last years from the array in the filename - year_range = "" - if years and len(years) > 0: - if len(years) == 1: - year_range = f"{years[0]}" - else: - year_range = f"{years[0]}-{years[-1]}" - else: - # Fallback to current year if years array is empty - current_year = datetime.datetime.now().year - year_range = f"{current_year}" - - # Create output filename with store_name and year range - output_filename = f"Footprints AI for {store_name} - Retail Media Business Case Calculations {year_range}.xlsx" - output_path = os.path.join(output_dir, output_filename) - - # Copy the template to the output directory with the new name - try: - shutil.copy2(template_path, output_path) - print(f"Excel file created successfully: {output_path}") - - # Update the Excel file with variables from config.json - print("Updating Excel file with variables from config.json...") - update_result = update_excel_variables(output_path) - - if update_result: - print("Excel file updated successfully with variables from config.json") - else: - print("Warning: Failed to update Excel file with variables from config.json") - - return True - except Exception as e: - print(f"Error creating Excel file: {e}") - return False - -def calculate_years(starting_date, duration): - """ - Calculate an array of years that appear in the period from starting_date for duration months. - - Args: - starting_date (str): Date in format dd/mm/yyyy or dd.mm.yyyy - duration (int): Number of months, including the starting month - - Returns: - list: Array of years in the period [year1, year2, ...] - """ - # Default result if we can't parse the date - default_years = [datetime.datetime.now().year] - - # If starting_date is empty, return current year - if not starting_date: - return default_years - - try: - # Try to parse the date, supporting both dd/mm/yyyy and dd.mm.yyyy formats - if '/' in starting_date: - day, month, year = map(int, starting_date.split('/')) - elif '.' in starting_date: - day, month, year = map(int, starting_date.split('.')) - elif '-' in starting_date: - # Handle ISO format (yyyy-mm-dd) - date_parts = starting_date.split('-') - if len(date_parts) == 3: - year, month, day = map(int, date_parts) - else: - # Default to current date if format is not recognized - return default_years - else: - # If format is not recognized, return default - return default_years - - # Create datetime object for starting date - start_date = datetime.datetime(year, month, day) - - # Calculate end date (starting date + duration months - 1 day) - end_date = start_date + relativedelta(months=duration-1) - - # Create a set of years (to avoid duplicates) - years_set = set() - - # Add starting year - years_set.add(start_date.year) - - # Add ending year - years_set.add(end_date.year) - - # If there are years in between, add those too - for y in range(start_date.year + 1, end_date.year): - years_set.add(y) - - # Convert set to sorted list - return sorted(list(years_set)) - - except Exception as e: - print(f"Error calculating years: {e}") - return default_years - -if __name__ == "__main__": - create_excel_from_template() diff --git a/create_excel_v2.py b/create_excel_v2.py deleted file mode 100644 index 21f5f5b..0000000 --- a/create_excel_v2.py +++ /dev/null @@ -1,331 +0,0 @@ -#!/usr/bin/env python3 -""" -Improved Excel creation script that processes templates in memory -to prevent external link issues in Excel. -""" -import json -import os -import datetime -from pathlib import Path -from dateutil.relativedelta import relativedelta -import openpyxl -from openpyxl.utils import get_column_letter - - -def create_excel_from_template(): - """ - Create an Excel file from template with all placeholders replaced in memory - before saving to prevent external link issues. - """ - # Define paths - script_dir = os.path.dirname(os.path.abspath(__file__)) - config_path = os.path.join(script_dir, 'config.json') - # Check for both possible template names - template_dir = os.path.join(script_dir, 'template') - - # Try to find the template with either naming convention - possible_templates = [ - 'Footprints AI for {store_name} - Retail Media Business Case Calculations.xlsx', - 'Footprints AI for store_name - Retail Media Business Case Calculations.xlsx' - ] - - template_path = None - for template_name in possible_templates: - full_path = os.path.join(template_dir, template_name) - if os.path.exists(full_path): - template_path = full_path - print(f"Found template: {template_name}") - break - - if not template_path: - print(f"Error: No template found in {template_dir}") - return False - - output_dir = os.path.join(script_dir, 'output') - - # Ensure output directory exists - os.makedirs(output_dir, exist_ok=True) - - # Read config.json - try: - with open(config_path, 'r') as f: - config = json.load(f) - user_data = config.get('user_data', {}) - store_name = user_data.get('store_name', 'Your Store') - starting_date = user_data.get('starting_date', '') - duration = user_data.get('duration', 36) - - if not store_name: - store_name = "Your Store" - - print(f"Processing for store: {store_name}") - - # Calculate years array - years = calculate_years(starting_date, duration) - calculated_years = years # For sheet visibility later - print(f"Years in the period: {years}") - except Exception as e: - print(f"Error reading config file: {e}") - return False - - # Determine year range for filename - year_range = "" - if years and len(years) > 0: - if len(years) == 1: - year_range = f"{years[0]}" - else: - year_range = f"{years[0]}-{years[-1]}" - else: - year_range = f"{datetime.datetime.now().year}" - - # Create output filename - output_filename = f"Footprints AI for {store_name} - Retail Media Business Case Calculations {year_range}.xlsx" - output_path = os.path.join(output_dir, output_filename) - - try: - # STAGE 1: Load template and replace all placeholders in memory - print("Loading template in memory...") - wb = openpyxl.load_workbook(template_path, data_only=False) - - # Build mapping of placeholder patterns to actual values - # Support both {store_name} and store_name formats - placeholder_patterns = [ - ('{store_name}', store_name), - ('store_name', store_name) # New format without curly braces - ] - - # STAGE 2: Replace placeholders in sheet names first - print("Replacing placeholders in sheet names...") - sheet_name_mappings = {} - - for sheet in wb.worksheets: - old_title = sheet.title - new_title = old_title - - # Replace all placeholder patterns in sheet name - for placeholder, replacement in placeholder_patterns: - if placeholder in new_title: - new_title = new_title.replace(placeholder, replacement) - print(f" Sheet name: '{old_title}' -> '{new_title}'") - - if old_title != new_title: - # Store the mapping for formula updates - sheet_name_mappings[old_title] = new_title - # Also store with quotes for formula references - sheet_name_mappings[f"'{old_title}'"] = f"'{new_title}'" - - # STAGE 3: Update all formulas and cell values BEFORE renaming sheets - print("Updating formulas and cell values...") - total_replacements = 0 - - for sheet in wb.worksheets: - sheet_name = sheet.title - replacements_in_sheet = 0 - - # Skip Variables sheet to avoid issues - if 'Variables' in sheet_name: - continue - - for row in sheet.iter_rows(): - for cell in row: - # Handle formulas - if cell.data_type == 'f' and cell.value: - original_formula = str(cell.value) - new_formula = original_formula - - # First replace sheet references - for old_ref, new_ref in sheet_name_mappings.items(): - if old_ref in new_formula: - new_formula = new_formula.replace(old_ref, new_ref) - - # Then replace any remaining placeholders - for placeholder, replacement in placeholder_patterns: - if placeholder in new_formula: - new_formula = new_formula.replace(placeholder, replacement) - - if new_formula != original_formula: - cell.value = new_formula - replacements_in_sheet += 1 - - # Handle text values - elif cell.value and isinstance(cell.value, str): - original_value = str(cell.value) - new_value = original_value - - for placeholder, replacement in placeholder_patterns: - if placeholder in new_value: - new_value = new_value.replace(placeholder, replacement) - - if new_value != original_value: - cell.value = new_value - replacements_in_sheet += 1 - - if replacements_in_sheet > 0: - print(f" {sheet_name}: {replacements_in_sheet} replacements") - total_replacements += replacements_in_sheet - - print(f"Total replacements: {total_replacements}") - - # STAGE 4: Now rename the sheets (after formulas are updated) - print("Renaming sheets...") - for sheet in wb.worksheets: - old_title = sheet.title - new_title = old_title - - for placeholder, replacement in placeholder_patterns: - if placeholder in new_title: - new_title = new_title.replace(placeholder, replacement) - - if old_title != new_title: - sheet.title = new_title - print(f" Renamed: '{old_title}' -> '{new_title}'") - - # Check if this is a forecast sheet and hide if needed - if "Forecast" in new_title: - try: - # Extract year from sheet name - sheet_year = int(new_title.split()[0]) - if sheet_year not in calculated_years: - sheet.sheet_state = 'hidden' - print(f" Hidden sheet '{new_title}' (year {sheet_year} not in range)") - except (ValueError, IndexError): - pass - - # STAGE 5: Update Variables sheet with config values - print("Updating Variables sheet...") - if 'Variables' in wb.sheetnames: - update_variables_sheet(wb['Variables'], user_data) - - # STAGE 6: Save the fully processed workbook - print(f"Saving to: {output_path}") - wb.save(output_path) - - print(f"✓ Excel file created successfully: {output_filename}") - return True - - except Exception as e: - print(f"Error creating Excel file: {e}") - import traceback - traceback.print_exc() - return False - - -def update_variables_sheet(sheet, user_data): - """ - Update the Variables sheet with values from config.json - """ - # Map config variables to Excel cells - cell_mappings = { - 'B2': user_data.get('store_name', ''), - 'B31': user_data.get('starting_date', ''), - 'B32': user_data.get('duration', 36), - 'B37': user_data.get('open_days_per_month', 0), - - # Convenience store type - 'H37': user_data.get('convenience_store_type', {}).get('stores_number', 0), - 'C37': user_data.get('convenience_store_type', {}).get('monthly_transactions', 0), - 'I37': 1 if user_data.get('convenience_store_type', {}).get('has_digital_screens', False) else 0, - 'J37': user_data.get('convenience_store_type', {}).get('screen_count', 0), - 'K37': user_data.get('convenience_store_type', {}).get('screen_percentage', 0), - 'M37': 1 if user_data.get('convenience_store_type', {}).get('has_in_store_radio', False) else 0, - 'N37': user_data.get('convenience_store_type', {}).get('radio_percentage', 0), - - # Minimarket store type - 'H38': user_data.get('minimarket_store_type', {}).get('stores_number', 0), - 'C38': user_data.get('minimarket_store_type', {}).get('monthly_transactions', 0), - 'I38': 1 if user_data.get('minimarket_store_type', {}).get('has_digital_screens', False) else 0, - 'J38': user_data.get('minimarket_store_type', {}).get('screen_count', 0), - 'K38': user_data.get('minimarket_store_type', {}).get('screen_percentage', 0), - 'M38': 1 if user_data.get('minimarket_store_type', {}).get('has_in_store_radio', False) else 0, - 'N38': user_data.get('minimarket_store_type', {}).get('radio_percentage', 0), - - # Supermarket store type - 'H39': user_data.get('supermarket_store_type', {}).get('stores_number', 0), - 'C39': user_data.get('supermarket_store_type', {}).get('monthly_transactions', 0), - 'I39': 1 if user_data.get('supermarket_store_type', {}).get('has_digital_screens', False) else 0, - 'J39': user_data.get('supermarket_store_type', {}).get('screen_count', 0), - 'K39': user_data.get('supermarket_store_type', {}).get('screen_percentage', 0), - 'M39': 1 if user_data.get('supermarket_store_type', {}).get('has_in_store_radio', False) else 0, - 'N39': user_data.get('supermarket_store_type', {}).get('radio_percentage', 0), - - # Hypermarket store type - 'H40': user_data.get('hypermarket_store_type', {}).get('stores_number', 0), - 'C40': user_data.get('hypermarket_store_type', {}).get('monthly_transactions', 0), - 'I40': 1 if user_data.get('hypermarket_store_type', {}).get('has_digital_screens', False) else 0, - 'J40': user_data.get('hypermarket_store_type', {}).get('screen_count', 0), - 'K40': user_data.get('hypermarket_store_type', {}).get('screen_percentage', 0), - 'M40': 1 if user_data.get('hypermarket_store_type', {}).get('has_in_store_radio', False) else 0, - 'N40': user_data.get('hypermarket_store_type', {}).get('radio_percentage', 0), - - # On-site channels - 'B43': user_data.get('website_visitors', 0), - 'B44': user_data.get('app_users', 0), - 'B45': user_data.get('loyalty_users', 0), - - # Off-site channels - 'B49': user_data.get('facebook_followers', 0), - 'B50': user_data.get('instagram_followers', 0), - 'B51': user_data.get('google_views', 0), - 'B52': user_data.get('email_subscribers', 0), - 'B53': user_data.get('sms_users', 0), - 'B54': user_data.get('whatsapp_contacts', 0) - } - - # Update the cells - for cell_ref, value in cell_mappings.items(): - try: - sheet[cell_ref].value = value - print(f" Updated {cell_ref} = {value}") - except Exception as e: - print(f" Warning: Could not update {cell_ref}: {e}") - - -def calculate_years(starting_date, duration): - """ - Calculate an array of years that appear in the period. - """ - default_years = [datetime.datetime.now().year] - - if not starting_date: - return default_years - - try: - # Parse date - support multiple formats - if '/' in str(starting_date): - day, month, year = map(int, str(starting_date).split('/')) - elif '.' in str(starting_date): - day, month, year = map(int, str(starting_date).split('.')) - elif '-' in str(starting_date): - # ISO format (yyyy-mm-dd) - date_parts = str(starting_date).split('-') - if len(date_parts) == 3: - year, month, day = map(int, date_parts) - else: - return default_years - else: - return default_years - - # Create datetime object - start_date = datetime.datetime(year, month, day) - - # Calculate end date - end_date = start_date + relativedelta(months=duration-1) - - # Create set of years - years_set = set() - years_set.add(start_date.year) - years_set.add(end_date.year) - - # Add any years in between - for y in range(start_date.year + 1, end_date.year): - years_set.add(y) - - return sorted(list(years_set)) - - except Exception as e: - print(f"Error calculating years: {e}") - return default_years - - -if __name__ == "__main__": - create_excel_from_template() \ No newline at end of file diff --git a/diagnose_excel_issue.py b/diagnose_excel_issue.py deleted file mode 100644 index 9dcdc04..0000000 --- a/diagnose_excel_issue.py +++ /dev/null @@ -1,138 +0,0 @@ -#!/usr/bin/env python3 -import os -import zipfile -import xml.etree.ElementTree as ET -import openpyxl -from openpyxl.xml.functions import fromstring, tostring -from pathlib import Path - -def diagnose_excel_file(file_path): - """Diagnose Excel file for corruption issues""" - print(f"Diagnosing: {file_path}") - print("=" * 50) - - # 1. Check if file exists - if not os.path.exists(file_path): - print(f"ERROR: File not found: {file_path}") - return - - # 2. Try to open with openpyxl - print("\n1. Testing openpyxl compatibility:") - try: - wb = openpyxl.load_workbook(file_path, read_only=False, keep_vba=True, data_only=False) - print(f" ✓ Successfully loaded with openpyxl") - print(f" - Sheets: {wb.sheetnames}") - - # Check for custom properties - if hasattr(wb, 'custom_doc_props'): - print(f" - Custom properties: {wb.custom_doc_props}") - - wb.close() - except Exception as e: - print(f" ✗ Failed to load with openpyxl: {e}") - - # 3. Analyze ZIP structure - print("\n2. Analyzing ZIP/XML structure:") - try: - with zipfile.ZipFile(file_path, 'r') as zf: - # Check for custom XML - custom_xml_files = [f for f in zf.namelist() if 'customXml' in f or 'custom' in f.lower()] - if custom_xml_files: - print(f" ! Found custom XML files: {custom_xml_files}") - - for custom_file in custom_xml_files: - try: - content = zf.read(custom_file) - print(f"\n Content of {custom_file}:") - print(f" {content[:500].decode('utf-8', errors='ignore')}") - except Exception as e: - print(f" Error reading {custom_file}: {e}") - - # Check for tables - table_files = [f for f in zf.namelist() if 'xl/tables/' in f] - if table_files: - print(f" - Found table files: {table_files}") - for table_file in table_files: - content = zf.read(table_file) - # Check if XML declaration is present - if not content.startswith(b'', - 'namespaces': { - 'main': 'http://schemas.openxmlformats.org/spreadsheetml/2006/main', - 'mc': 'http://schemas.openxmlformats.org/markup-compatibility/2006', - 'xr': 'http://schemas.microsoft.com/office/spreadsheetml/2014/revision', - 'xr3': 'http://schemas.microsoft.com/office/spreadsheetml/2016/revision3' - }, - 'compatibility': 'mc:Ignorable="xr xr3"', - 'uid_pattern': '{00000000-000C-0000-FFFF-FFFF{:02d}000000}' - } - } - return template_tables -``` - -#### Step 2: XML Generation Functions -```python -def generate_proper_table_xml(table_data, table_id): - """Generate Excel-compliant table XML with proper format""" - - # XML Declaration - xml_content = '\n' - - # Table element with all namespaces - xml_content += f'\n' - - # Table columns with UIDs - xml_content += generate_table_columns_xml(table_data.columns, table_id) - - # Table style info - xml_content += generate_table_style_xml(table_data.style) - - xml_content += '
' - - return xml_content - -def generate_table_uid(table_id): - """Generate proper UIDs for tables""" - return f"{{00000000-000C-0000-FFFF-FFFF{table_id:02d}000000}}" - -def generate_column_uid(table_id, column_id): - """Generate proper UIDs for table columns""" - return f"{{00000000-0010-0000-{table_id:04d}-{column_id:06d}000000}}" -``` - -#### Step 3: File Assembly Improvements -```python -def create_excel_file_with_proper_compression(): - """Create Excel file with consistent ZIP compression""" - - # Use consistent compression settings - with zipfile.ZipFile(output_path, 'w', - compression=zipfile.ZIP_DEFLATED, - compresslevel=6, # Consistent compression level - allowZip64=False) as zipf: - - # Set consistent file timestamps - fixed_time = (2023, 1, 1, 0, 0, 0) - - for file_path, content in excel_files.items(): - zinfo = zipfile.ZipInfo(file_path) - zinfo.date_time = fixed_time - zinfo.compress_type = zipfile.ZIP_DEFLATED - - zipf.writestr(zinfo, content) -``` - -### Phase 2: Testing and Validation - -#### Cross-Platform Testing Matrix -| Platform | Python Version | Library Versions | Test Status | -|----------|---------------|-----------------|-------------| -| Ubuntu 22.04 | 3.10+ | openpyxl==3.x | ⏳ Pending | -| macOS | 3.10+ | openpyxl==3.x | ✅ Working | -| Windows | 3.10+ | openpyxl==3.x | ⏳ TBD | - -#### Validation Script -```python -def validate_excel_file(file_path): - """Validate generated Excel file for repair issues""" - - checks = { - 'table_xml_format': check_table_xml_declarations, - 'namespace_compliance': check_namespace_declarations, - 'uid_presence': check_unique_identifiers, - 'zip_metadata': check_zip_file_metadata, - 'excel_compatibility': test_excel_opening - } - - results = {} - for check_name, check_func in checks.items(): - results[check_name] = check_func(file_path) - - return results -``` - -### Phase 3: Long-term Improvements - -#### Migration to openpyxl -```python -# Example migration approach -from openpyxl import Workbook -from openpyxl.worksheet.table import Table, TableStyleInfo - -def create_excel_with_openpyxl(business_case_data): - """Generate Excel using openpyxl for cross-platform compatibility""" - - wb = Workbook() - ws = wb.active - - # Add data - for row in business_case_data: - ws.append(row) - - # Create table with proper formatting - table = Table(displayName="BusinessCaseTable", ref="A1:H47") - style = TableStyleInfo(name="TableStyleMedium3", - showFirstColumn=False, - showLastColumn=False, - showRowStripes=True, - showColumnStripes=False) - table.tableStyleInfo = style - - ws.add_table(table) - - # Save with consistent settings - wb.save(output_path) -``` - -## Implementation Checklist - -### Immediate Actions (Week 1) -- [ ] Extract XML patterns from working template -- [ ] Implement proper XML declaration generation -- [ ] Add namespace declarations and compatibility directives -- [ ] Implement UID generation algorithms -- [ ] Fix table ID sequencing logic -- [ ] Test on Ubuntu environment - -### Validation Actions (Week 2) -- [ ] Create comprehensive test suite -- [ ] Validate across multiple platforms -- [ ] Performance testing with large datasets -- [ ] Excel compatibility testing (different versions) -- [ ] Automated repair detection - -### Future Improvements (Month 2) -- [ ] Migration to openpyxl library -- [ ] Docker containerization for consistent environment -- [ ] CI/CD pipeline with cross-platform testing -- [ ] Comprehensive documentation updates - -## Risk Assessment - -### High Priority Risks -- **Platform dependency**: Current solution may not work on Windows -- **Excel version compatibility**: Different Excel versions may have different validation -- **Performance impact**: Proper XML generation may be slower - -### Mitigation Strategies -- **Comprehensive testing**: Test on all target platforms before deployment -- **Fallback mechanism**: Keep current generation as backup -- **Performance optimization**: Profile and optimize XML generation code - -## Success Metrics - -### Primary Goals -- ✅ Zero Excel repair dialogs on Ubuntu-generated files -- ✅ Identical behavior across macOS and Ubuntu -- ✅ No data loss or functionality regression - -### Secondary Goals -- ✅ Improved file generation performance -- ✅ Better code maintainability -- ✅ Enhanced error handling and logging - -## Conclusion - -The recommended solution addresses the root cause by implementing proper Excel XML format generation while maintaining cross-platform compatibility. The template-based approach provides immediate relief while the library migration offers long-term stability. - -**Next Steps**: Begin with Phase 1 implementation focusing on proper XML generation, followed by comprehensive testing across platforms. - ---- - -*Proposal created: 2025-09-19* -*Estimated implementation time: 2-3 weeks* -*Priority: High - affects production workflows* \ No newline at end of file diff --git a/excel_table_repair_analysis.md b/excel_table_repair_analysis.md deleted file mode 100644 index 7dca8ff..0000000 --- a/excel_table_repair_analysis.md +++ /dev/null @@ -1,117 +0,0 @@ -# Excel Table Repair Error Analysis - -## Issue Summary -When opening Ubuntu-generated Excel files, Excel displays repair errors specifically for tables: -- **Repaired Records: Table from /xl/tables/table1.xml part (Table)** -- **Repaired Records: Table from /xl/tables/table2.xml part (Table)** - -**CRITICAL FINDING**: The same script generates working files on macOS but broken files on Ubuntu, indicating a **platform-specific issue** rather than a general Excel format problem. - -## Investigation Findings - -### Three-Way Table Structure Comparison - -#### Template File (Original - Working) -- Contains proper XML declaration: `` -- Includes comprehensive namespace declarations: - - `xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006"` - - `xmlns:xr="http://schemas.microsoft.com/office/spreadsheetml/2014/revision"` - - `xmlns:xr3="http://schemas.microsoft.com/office/spreadsheetml/2016/revision3"` -- Has `mc:Ignorable="xr xr3"` compatibility directive -- Contains unique identifiers (`xr:uid`, `xr3:uid`) for tables and columns -- Proper table ID sequence (table1 has id="2", table2 has id="3") - -#### macOS Generated File (Working - No Repair Errors) -- **Missing XML declaration** - no `` -- **Missing namespace declarations** for revision extensions -- **No compatibility directives** (`mc:Ignorable`) -- **Missing unique identifiers** for tables and columns -- **Different table ID sequence** (table1 has id="1", table2 has id="2") -- **File sizes: 1,032 bytes (table1), 1,121 bytes (table2)** - -#### Ubuntu Generated File (Broken - Requires Repair) -- **Missing XML declaration** - no `` -- **Missing namespace declarations** for revision extensions -- **No compatibility directives** (`mc:Ignorable`) -- **Missing unique identifiers** for tables and columns -- **Same table ID sequence as macOS** (table1 has id="1", table2 has id="2") -- **Identical file sizes to macOS: 1,032 bytes (table1), 1,121 bytes (table2)** - -### Key Discovery: XML Content is Identical - -**SHOCKING REVELATION**: The table XML content between macOS and Ubuntu generated files is **byte-for-byte identical**. Both have: - -1. **Missing XML declarations** -2. **Missing namespace extensions** -3. **Missing unique identifiers** -4. **Same table ID sequence** (1, 2) -5. **Identical file sizes** - -**macOS table1.xml vs Ubuntu table1.xml:** -```xml -... -``` -*(Completely identical)* - -### Root Cause Analysis - Platform Dependency - -Since the table XML is identical but only Ubuntu files require repair, the issue is **NOT in the table XML content**. The problem must be: - -1. **File encoding differences** during ZIP assembly -2. **ZIP compression algorithm differences** between platforms -3. **File timestamp/metadata differences** in the ZIP archive -4. **Different Python library versions** handling ZIP creation differently -5. **Excel's platform-specific validation logic** being more strict on certain systems - -### Common Formula Issues -Both versions contain `#REF!` errors in calculated columns: -```xml -#REF! -``` -This suggests broken cell references but doesn't cause repair errors. - -### Impact Assessment -- **Functionality:** No data loss, tables work after repair -- **User Experience:** Excel shows warning dialog requiring user action **only on Ubuntu-generated files** -- **Automation:** Breaks automated processing workflows **only for Ubuntu deployments** -- **Platform Consistency:** Same code produces different results across platforms - -## Recommendations - -### Platform-Specific Investigation Priorities -1. **Compare Python library versions** between macOS and Ubuntu environments -2. **Check ZIP file metadata** (timestamps, compression levels, file attributes) -3. **Examine file encoding** during Excel assembly process -4. **Test with different Python Excel libraries** (openpyxl vs xlsxwriter vs others) -5. **Analyze ZIP file internals** with hex editors for subtle differences - -### Immediate Workarounds -1. **Document platform dependency** in deployment guides -2. **Test all generated files** on target Excel environment before distribution -3. **Consider generating files on macOS** for production use -4. **Implement automated repair detection** in the workflow - -### Long-term Fixes -1. **Standardize to template format** with proper XML declarations and namespaces -2. **Use established Excel libraries** with proven cross-platform compatibility -3. **Implement comprehensive testing** across multiple platforms -4. **Add ZIP file validation** to detect platform-specific differences - -## Technical Details - -### File Comparison Results -| File | Template | macOS Generated | Ubuntu Generated | Ubuntu vs macOS | -|------|----------|----------------|------------------|-----------------| -| table1.xml | 1,755 bytes | 1,032 bytes | 1,032 bytes | **Identical** | -| table2.xml | 1,844 bytes | 1,121 bytes | 1,121 bytes | **Identical** | - -### Platform Dependency Evidence -- **Identical table XML content** between macOS and Ubuntu -- **Same missing features** (declarations, namespaces, UIDs) -- **Different Excel behavior** (repair required only on Ubuntu) -- **Suggests ZIP-level or metadata differences** - ---- - -*Analysis completed: 2025-09-19* -*Files examined: Template vs Test5 generated Excel workbooks* \ No newline at end of file diff --git a/fix_excel_corruption.py b/fix_excel_corruption.py deleted file mode 100644 index 82f98fb..0000000 --- a/fix_excel_corruption.py +++ /dev/null @@ -1,207 +0,0 @@ -#!/usr/bin/env python3 -""" -Fix Excel corruption issues caused by SharePoint/OneDrive metadata -""" -import os -import shutil -import zipfile -import xml.etree.ElementTree as ET -from pathlib import Path -import tempfile -import openpyxl - -def remove_sharepoint_metadata(excel_path, output_path=None): - """ - Remove SharePoint/OneDrive metadata from Excel file that causes corruption warnings - - Args: - excel_path: Path to the Excel file to fix - output_path: Optional path for the fixed file (if None, overwrites original) - - Returns: - bool: True if successful, False otherwise - """ - if not output_path: - output_path = excel_path - - print(f"Processing: {excel_path}") - - try: - # Method 1: Use openpyxl to remove custom properties - print("Method 1: Using openpyxl to clean custom properties...") - wb = openpyxl.load_workbook(excel_path, keep_vba=True) - - # Remove custom document properties - if hasattr(wb, 'custom_doc_props'): - # Clear all custom properties - wb.custom_doc_props.props.clear() - print(" ✓ Cleared custom document properties") - - # Save to temporary file first - temp_file = Path(output_path).with_suffix('.tmp.xlsx') - wb.save(temp_file) - wb.close() - - # Method 2: Direct ZIP manipulation to ensure complete removal - print("Method 2: Direct ZIP manipulation for complete cleanup...") - with tempfile.NamedTemporaryFile(suffix='.xlsx', delete=False) as tmp: - tmp_path = tmp.name - - with zipfile.ZipFile(temp_file, 'r') as zin: - with zipfile.ZipFile(tmp_path, 'w', compression=zipfile.ZIP_DEFLATED) as zout: - # Copy all files except custom.xml or create a clean one - for item in zin.infolist(): - if item.filename == 'docProps/custom.xml': - # Create a clean custom.xml without SharePoint metadata - clean_custom_xml = create_clean_custom_xml() - zout.writestr(item, clean_custom_xml) - print(" ✓ Replaced custom.xml with clean version") - else: - # Copy the file as-is - zout.writestr(item, zin.read(item.filename)) - - # Replace original file with cleaned version - shutil.move(tmp_path, output_path) - - # Clean up temporary file - if temp_file.exists(): - temp_file.unlink() - - print(f" ✓ Successfully cleaned: {output_path}") - return True - - except Exception as e: - print(f" ✗ Error cleaning file: {e}") - return False - -def create_clean_custom_xml(): - """ - Create a clean custom.xml without SharePoint metadata - """ - # Create a minimal valid custom.xml - xml_content = ''' - -''' - return xml_content.encode('utf-8') - -def clean_template_file(): - """ - Clean the template file to prevent future corruption - """ - template_dir = Path(__file__).parent / "template" - template_files = list(template_dir.glob("*.xlsx")) - - if not template_files: - print("No template files found") - return False - - for template_file in template_files: - print(f"\nCleaning template: {template_file.name}") - - # Create backup - backup_path = template_file.with_suffix('.backup.xlsx') - shutil.copy2(template_file, backup_path) - print(f" ✓ Created backup: {backup_path.name}") - - # Clean the template - if remove_sharepoint_metadata(str(template_file)): - print(f" ✓ Template cleaned successfully") - else: - print(f" ✗ Failed to clean template") - # Restore from backup - shutil.copy2(backup_path, template_file) - print(f" ✓ Restored from backup") - - return True - -def clean_all_output_files(): - """ - Clean all Excel files in the output directory - """ - output_dir = Path(__file__).parent / "output" - excel_files = list(output_dir.glob("*.xlsx")) - - if not excel_files: - print("No Excel files found in output directory") - return False - - print(f"Found {len(excel_files)} Excel files to clean") - - for excel_file in excel_files: - print(f"\nCleaning: {excel_file.name}") - if remove_sharepoint_metadata(str(excel_file)): - print(f" ✓ Cleaned successfully") - else: - print(f" ✗ Failed to clean") - - return True - -def verify_file_is_clean(excel_path): - """ - Verify that an Excel file is free from SharePoint metadata - """ - print(f"\nVerifying: {excel_path}") - - try: - with zipfile.ZipFile(excel_path, 'r') as zf: - if 'docProps/custom.xml' in zf.namelist(): - content = zf.read('docProps/custom.xml') - - # Check for problematic metadata - if b'ContentTypeId' in content: - print(" ✗ Still contains SharePoint ContentTypeId") - return False - if b'MediaService' in content: - print(" ✗ Still contains MediaService tags") - return False - - print(" ✓ File is clean - no SharePoint metadata found") - return True - else: - print(" ✓ File is clean - no custom.xml present") - return True - - except Exception as e: - print(f" ✗ Error verifying file: {e}") - return False - -def main(): - """Main function to clean Excel files""" - print("=" * 60) - print("Excel SharePoint Metadata Cleaner") - print("=" * 60) - - # Step 1: Clean the template - print("\nStep 1: Cleaning template file...") - print("-" * 40) - clean_template_file() - - # Step 2: Clean all output files - print("\n\nStep 2: Cleaning output files...") - print("-" * 40) - clean_all_output_files() - - # Step 3: Verify cleaning - print("\n\nStep 3: Verifying cleaned files...") - print("-" * 40) - - # Verify template - template_dir = Path(__file__).parent / "template" - for template_file in template_dir.glob("*.xlsx"): - if not template_file.name.endswith('.backup.xlsx'): - verify_file_is_clean(str(template_file)) - - # Verify output files - output_dir = Path(__file__).parent / "output" - for excel_file in output_dir.glob("*.xlsx"): - verify_file_is_clean(str(excel_file)) - - print("\n" + "=" * 60) - print("Cleaning complete!") - print("\nNOTE: The Excel files should now open without corruption warnings.") - print("The SharePoint/OneDrive metadata has been removed.") - print("\nFuture files generated from the cleaned template should not have this issue.") - print("=" * 60) - -if __name__ == "__main__": - main() \ No newline at end of file diff --git a/server.js b/server.js index 5bb2310..b7c75b7 100644 --- a/server.js +++ b/server.js @@ -47,10 +47,14 @@ app.get('/download-excel', (req, res) => { const latestFile = files[0].name; const filePath = path.join(outputDir, latestFile); - // Set headers for file download + // Set optimized headers to avoid MOTW tagging and enable immediate formula calculation res.setHeader('Content-Type', 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'); - res.setHeader('Content-Disposition', `attachment; filename="${latestFile}"`); - + res.setHeader('Content-Disposition', `inline; filename="${latestFile}"`); // 'inline' instead of 'attachment' to avoid MOTW + res.setHeader('Cache-Control', 'private, no-cache, no-store, must-revalidate'); + res.setHeader('Pragma', 'no-cache'); + res.setHeader('Expires', '0'); + res.removeHeader('X-Powered-By'); // Remove identifying headers that might trigger security warnings + // Send the file res.sendFile(filePath); console.log(`Excel file sent for download: ${filePath}`); diff --git a/test_copy.xlsx b/test_copy.xlsx index 8bfa851..3c0e698 100644 Binary files a/test_copy.xlsx and b/test_copy.xlsx differ diff --git a/update_excel.py b/update_excel.py deleted file mode 100644 index 6e01694..0000000 --- a/update_excel.py +++ /dev/null @@ -1,227 +0,0 @@ -#!/usr/bin/env python3 -import json -import os -import re -import openpyxl -from openpyxl.utils import get_column_letter - -def update_excel_variables(excel_path): - """ - Update the Variables sheet in the Excel file with values from config.json - and hide forecast sheets that aren't in the calculated years array. - - This version uses openpyxl exclusively to preserve all formatting, formulas, - and Excel features that xlsxwriter cannot handle when modifying existing files. - - Args: - excel_path (str): Path to the Excel file to update - - Returns: - bool: True if successful, False otherwise - """ - # Define paths - script_dir = os.path.dirname(os.path.abspath(__file__)) - config_path = os.path.join(script_dir, 'config.json') - - try: - # Load config.json - with open(config_path, 'r') as f: - config = json.load(f) - user_data = config.get('user_data', {}) - - # Load Excel workbook - print(f"Opening Excel file: {excel_path}") - wb = openpyxl.load_workbook(excel_path) - - # Try to access the Variables sheet - try: - # First try by name - sheet = wb['Variables'] - except KeyError: - # If not found by name, try to access the last sheet - sheet_names = wb.sheetnames - if sheet_names: - print(f"Variables sheet not found by name. Using last sheet: {sheet_names[-1]}") - sheet = wb[sheet_names[-1]] - else: - print("No sheets found in the workbook") - return False - - # Map config variables to Excel cells based on the provided mapping - cell_mappings = { - 'B2': user_data.get('store_name', ''), - 'B31': user_data.get('starting_date', ''), - 'B32': user_data.get('duration', 36), - 'B37': user_data.get('open_days_per_month', 0), - - # Convenience store type - 'H37': user_data.get('convenience_store_type', {}).get('stores_number', 0), - 'C37': user_data.get('convenience_store_type', {}).get('monthly_transactions', 0), - # Convert boolean to 1/0 for has_digital_screens - 'I37': 1 if user_data.get('convenience_store_type', {}).get('has_digital_screens', False) else 0, - 'J37': user_data.get('convenience_store_type', {}).get('screen_count', 0), - 'K37': user_data.get('convenience_store_type', {}).get('screen_percentage', 0), - # Convert boolean to 1/0 for has_in_store_radio - 'M37': 1 if user_data.get('convenience_store_type', {}).get('has_in_store_radio', False) else 0, - 'N37': user_data.get('convenience_store_type', {}).get('radio_percentage', 0), - - # Minimarket store type - 'H38': user_data.get('minimarket_store_type', {}).get('stores_number', 0), - 'C38': user_data.get('minimarket_store_type', {}).get('monthly_transactions', 0), - # Convert boolean to 1/0 for has_digital_screens - 'I38': 1 if user_data.get('minimarket_store_type', {}).get('has_digital_screens', False) else 0, - 'J38': user_data.get('minimarket_store_type', {}).get('screen_count', 0), - 'K38': user_data.get('minimarket_store_type', {}).get('screen_percentage', 0), - # Convert boolean to 1/0 for has_in_store_radio - 'M38': 1 if user_data.get('minimarket_store_type', {}).get('has_in_store_radio', False) else 0, - 'N38': user_data.get('minimarket_store_type', {}).get('radio_percentage', 0), - - # Supermarket store type - 'H39': user_data.get('supermarket_store_type', {}).get('stores_number', 0), - 'C39': user_data.get('supermarket_store_type', {}).get('monthly_transactions', 0), - # Convert boolean to 1/0 for has_digital_screens - 'I39': 1 if user_data.get('supermarket_store_type', {}).get('has_digital_screens', False) else 0, - 'J39': user_data.get('supermarket_store_type', {}).get('screen_count', 0), - 'K39': user_data.get('supermarket_store_type', {}).get('screen_percentage', 0), - # Convert boolean to 1/0 for has_in_store_radio - 'M39': 1 if user_data.get('supermarket_store_type', {}).get('has_in_store_radio', False) else 0, - 'N39': user_data.get('supermarket_store_type', {}).get('radio_percentage', 0), - - # Hypermarket store type - 'H40': user_data.get('hypermarket_store_type', {}).get('stores_number', 0), - 'C40': user_data.get('hypermarket_store_type', {}).get('monthly_transactions', 0), - # Convert boolean to 1/0 for has_digital_screens - 'I40': 1 if user_data.get('hypermarket_store_type', {}).get('has_digital_screens', False) else 0, - 'J40': user_data.get('hypermarket_store_type', {}).get('screen_count', 0), - 'K40': user_data.get('hypermarket_store_type', {}).get('screen_percentage', 0), - # Convert boolean to 1/0 for has_in_store_radio - 'M40': 1 if user_data.get('hypermarket_store_type', {}).get('has_in_store_radio', False) else 0, - 'N40': user_data.get('hypermarket_store_type', {}).get('radio_percentage', 0), - - # On-site channels - 'B43': user_data.get('website_visitors', 0), - 'B44': user_data.get('app_users', 0), - 'B45': user_data.get('loyalty_users', 0), - - # Off-site channels - 'B49': user_data.get('facebook_followers', 0), - 'B50': user_data.get('instagram_followers', 0), - 'B51': user_data.get('google_views', 0), - 'B52': user_data.get('email_subscribers', 0), - 'B53': user_data.get('sms_users', 0), - 'B54': user_data.get('whatsapp_contacts', 0) - } - - # Update the cells - for cell_ref, value in cell_mappings.items(): - try: - # Force the value to be set, even if the cell is protected or has data validation - cell = sheet[cell_ref] - cell.value = value - print(f"Updated {cell_ref} with value: {value}") - except Exception as e: - print(f"Error updating cell {cell_ref}: {e}") - - # Save the workbook with variables updated - print("Saving workbook with updated variables...") - wb.save(excel_path) - - # Get the calculated years array from config - starting_date = user_data.get('starting_date', '') - duration = user_data.get('duration', 36) - calculated_years = [] - - # Import datetime at the module level to avoid scope issues - import datetime - from dateutil.relativedelta import relativedelta - - # Calculate years array based on starting_date and duration - try: - # Try to parse the date, supporting both dd/mm/yyyy and dd.mm.yyyy formats - if starting_date: - if '/' in str(starting_date): - day, month, year = map(int, str(starting_date).split('/')) - elif '.' in str(starting_date): - day, month, year = map(int, str(starting_date).split('.')) - elif '-' in str(starting_date): - # Handle ISO format (yyyy-mm-dd) - date_parts = str(starting_date).split('-') - if len(date_parts) == 3: - year, month, day = map(int, date_parts) - else: - # Default to current date if format is not recognized - current_date = datetime.datetime.now() - year, month, day = current_date.year, current_date.month, current_date.day - elif isinstance(starting_date, datetime.datetime): - day, month, year = starting_date.day, starting_date.month, starting_date.year - else: - # Default to current date if format is not recognized - current_date = datetime.datetime.now() - year, month, day = current_date.year, current_date.month, current_date.day - - # Create datetime object for starting date - start_date = datetime.datetime(year, month, day) - - # Calculate end date (starting date + duration months - 1 day) - end_date = start_date + relativedelta(months=duration-1) - - # Create a set of years (to avoid duplicates) - years_set = set() - - # Add starting year - years_set.add(start_date.year) - - # Add ending year - years_set.add(end_date.year) - - # If there are years in between, add those too - for y in range(start_date.year + 1, end_date.year): - years_set.add(y) - - # Convert set to sorted list - calculated_years = sorted(list(years_set)) - print(f"Calculated years for sheet visibility: {calculated_years}") - else: - # Default to current year if no starting date - calculated_years = [datetime.datetime.now().year] - except Exception as e: - print(f"Error calculating years for sheet visibility: {e}") - calculated_years = [datetime.datetime.now().year] - - # Hide forecast sheets that aren't in the calculated years array - # No sheet renaming - just check existing sheet names - for sheet_name in wb.sheetnames: - # Check if this is a forecast sheet - # Forecast sheets have names like "2025 – Forecast" - if "Forecast" in sheet_name: - # Extract the year from the sheet name - try: - sheet_year = int(sheet_name.split()[0]) - # Hide the sheet if its year is not in the calculated years - if sheet_year not in calculated_years: - sheet = wb[sheet_name] - sheet.sheet_state = 'hidden' - print(f"Hiding sheet '{sheet_name}' as year {sheet_year} is not in calculated years {calculated_years}") - except Exception as e: - print(f"Error extracting year from sheet name '{sheet_name}': {e}") - - # Save the workbook with updated variables and hidden sheets - print("Saving workbook with all updates...") - wb.save(excel_path) - - print(f"Excel file updated successfully: {excel_path}") - return True - - except Exception as e: - print(f"Error updating Excel file: {e}") - return False - - -if __name__ == "__main__": - # For testing purposes - import sys - if len(sys.argv) > 1: - excel_path = sys.argv[1] - update_excel_variables(excel_path) - else: - print("Please provide the path to the Excel file as an argument") \ No newline at end of file diff --git a/update_excel_openpyxl.py b/update_excel_openpyxl.py deleted file mode 100644 index 16f7cee..0000000 --- a/update_excel_openpyxl.py +++ /dev/null @@ -1,235 +0,0 @@ -#!/usr/bin/env python3 -import json -import os -import re -import openpyxl -from openpyxl.utils import get_column_letter -# Removed zipfile import - no longer using direct XML manipulation - -def update_excel_variables(excel_path): - """ - Update the Variables sheet in the Excel file with values from config.json - and hide forecast sheets that aren't in the calculated years array - - Args: - excel_path (str): Path to the Excel file to update - - Returns: - bool: True if successful, False otherwise - """ - # Define paths - script_dir = os.path.dirname(os.path.abspath(__file__)) - config_path = os.path.join(script_dir, 'config.json') - - try: - # Load config.json - with open(config_path, 'r') as f: - config = json.load(f) - user_data = config.get('user_data', {}) - - # Load Excel workbook - print(f"Opening Excel file: {excel_path}") - wb = openpyxl.load_workbook(excel_path) - - # Try to access the Variables sheet - try: - # First try by name - sheet = wb['Variables'] - except KeyError: - # If not found by name, try to access the last sheet - sheet_names = wb.sheetnames - if sheet_names: - print(f"Variables sheet not found by name. Using last sheet: {sheet_names[-1]}") - sheet = wb[sheet_names[-1]] - else: - print("No sheets found in the workbook") - return False - - # Map config variables to Excel cells based on the provided mapping - cell_mappings = { - 'B2': user_data.get('store_name', ''), - 'B31': user_data.get('starting_date', ''), - 'B32': user_data.get('duration', 36), - 'B37': user_data.get('open_days_per_month', 0), - - # Convenience store type - 'H37': user_data.get('convenience_store_type', {}).get('stores_number', 0), - 'C37': user_data.get('convenience_store_type', {}).get('monthly_transactions', 0), - # Convert boolean to 1/0 for has_digital_screens - 'I37': 1 if user_data.get('convenience_store_type', {}).get('has_digital_screens', False) else 0, - 'J37': user_data.get('convenience_store_type', {}).get('screen_count', 0), - 'K37': user_data.get('convenience_store_type', {}).get('screen_percentage', 0), - # Convert boolean to 1/0 for has_in_store_radio - 'M37': 1 if user_data.get('convenience_store_type', {}).get('has_in_store_radio', False) else 0, - 'N37': user_data.get('convenience_store_type', {}).get('radio_percentage', 0), - - # Minimarket store type - 'H38': user_data.get('minimarket_store_type', {}).get('stores_number', 0), - 'C38': user_data.get('minimarket_store_type', {}).get('monthly_transactions', 0), - # Convert boolean to 1/0 for has_digital_screens - 'I38': 1 if user_data.get('minimarket_store_type', {}).get('has_digital_screens', False) else 0, - 'J38': user_data.get('minimarket_store_type', {}).get('screen_count', 0), - 'K38': user_data.get('minimarket_store_type', {}).get('screen_percentage', 0), - # Convert boolean to 1/0 for has_in_store_radio - 'M38': 1 if user_data.get('minimarket_store_type', {}).get('has_in_store_radio', False) else 0, - 'N38': user_data.get('minimarket_store_type', {}).get('radio_percentage', 0), - - # Supermarket store type - 'H39': user_data.get('supermarket_store_type', {}).get('stores_number', 0), - 'C39': user_data.get('supermarket_store_type', {}).get('monthly_transactions', 0), - # Convert boolean to 1/0 for has_digital_screens - 'I39': 1 if user_data.get('supermarket_store_type', {}).get('has_digital_screens', False) else 0, - 'J39': user_data.get('supermarket_store_type', {}).get('screen_count', 0), - 'K39': user_data.get('supermarket_store_type', {}).get('screen_percentage', 0), - # Convert boolean to 1/0 for has_in_store_radio - 'M39': 1 if user_data.get('supermarket_store_type', {}).get('has_in_store_radio', False) else 0, - 'N39': user_data.get('supermarket_store_type', {}).get('radio_percentage', 0), - - # Hypermarket store type - 'H40': user_data.get('hypermarket_store_type', {}).get('stores_number', 0), - 'C40': user_data.get('hypermarket_store_type', {}).get('monthly_transactions', 0), - # Convert boolean to 1/0 for has_digital_screens - 'I40': 1 if user_data.get('hypermarket_store_type', {}).get('has_digital_screens', False) else 0, - 'J40': user_data.get('hypermarket_store_type', {}).get('screen_count', 0), - 'K40': user_data.get('hypermarket_store_type', {}).get('screen_percentage', 0), - # Convert boolean to 1/0 for has_in_store_radio - 'M40': 1 if user_data.get('hypermarket_store_type', {}).get('has_in_store_radio', False) else 0, - 'N40': user_data.get('hypermarket_store_type', {}).get('radio_percentage', 0), - - # On-site channels - 'B43': user_data.get('website_visitors', 0), - 'B44': user_data.get('app_users', 0), - 'B45': user_data.get('loyalty_users', 0), - - # Off-site channels - 'B49': user_data.get('facebook_followers', 0), - 'B50': user_data.get('instagram_followers', 0), - 'B51': user_data.get('google_views', 0), - 'B52': user_data.get('email_subscribers', 0), - 'B53': user_data.get('sms_users', 0), - 'B54': user_data.get('whatsapp_contacts', 0) - } - - # Update the cells - for cell_ref, value in cell_mappings.items(): - try: - # Force the value to be set, even if the cell is protected or has data validation - cell = sheet[cell_ref] - cell.value = value - print(f"Updated {cell_ref} with value: {value}") - except Exception as e: - print(f"Error updating cell {cell_ref}: {e}") - - # Force formula recalculation before saving - print("Forcing formula recalculation...") - wb.calculation.calcMode = 'auto' - wb.calculation.fullCalcOnLoad = True - - # Save the workbook with variables updated - print("Saving workbook with updated variables...") - wb.save(excel_path) - - # Get the calculated years array from config - starting_date = user_data.get('starting_date', '') - duration = user_data.get('duration', 36) - calculated_years = [] - - # Import datetime at the module level to avoid scope issues - import datetime - from dateutil.relativedelta import relativedelta - - # Calculate years array based on starting_date and duration - try: - # Try to parse the date, supporting both dd/mm/yyyy and dd.mm.yyyy formats - if starting_date: - if '/' in str(starting_date): - day, month, year = map(int, str(starting_date).split('/')) - elif '.' in str(starting_date): - day, month, year = map(int, str(starting_date).split('.')) - elif '-' in str(starting_date): - # Handle ISO format (yyyy-mm-dd) - date_parts = str(starting_date).split('-') - if len(date_parts) == 3: - year, month, day = map(int, date_parts) - else: - # Default to current date if format is not recognized - current_date = datetime.datetime.now() - year, month, day = current_date.year, current_date.month, current_date.day - elif isinstance(starting_date, datetime.datetime): - day, month, year = starting_date.day, starting_date.month, starting_date.year - else: - # Default to current date if format is not recognized - current_date = datetime.datetime.now() - year, month, day = current_date.year, current_date.month, current_date.day - - # Create datetime object for starting date - start_date = datetime.datetime(year, month, day) - - # Calculate end date (starting date + duration months - 1 day) - end_date = start_date + relativedelta(months=duration-1) - - # Create a set of years (to avoid duplicates) - years_set = set() - - # Add starting year - years_set.add(start_date.year) - - # Add ending year - years_set.add(end_date.year) - - # If there are years in between, add those too - for y in range(start_date.year + 1, end_date.year): - years_set.add(y) - - # Convert set to sorted list - calculated_years = sorted(list(years_set)) - print(f"Calculated years for sheet visibility: {calculated_years}") - else: - # Default to current year if no starting date - calculated_years = [datetime.datetime.now().year] - except Exception as e: - print(f"Error calculating years for sheet visibility: {e}") - calculated_years = [datetime.datetime.now().year] - - # Hide forecast sheets that aren't in the calculated years array - # No sheet renaming - just check existing sheet names - for sheet_name in wb.sheetnames: - # Check if this is a forecast sheet - # Forecast sheets have names like "2025 – Forecast" - if "Forecast" in sheet_name: - # Extract the year from the sheet name - try: - sheet_year = int(sheet_name.split()[0]) - # Hide the sheet if its year is not in the calculated years - if sheet_year not in calculated_years: - sheet = wb[sheet_name] - sheet.sheet_state = 'hidden' - print(f"Hiding sheet '{sheet_name}' as year {sheet_year} is not in calculated years {calculated_years}") - except Exception as e: - print(f"Error extracting year from sheet name '{sheet_name}': {e}") - - # Ensure formulas are marked for recalculation before final save - print("Ensuring formulas are marked for recalculation...") - wb.calculation.calcMode = 'auto' - wb.calculation.fullCalcOnLoad = True - - # Save the workbook with updated variables and hidden sheets - print("Saving workbook with all updates...") - wb.save(excel_path) - - print(f"Excel file updated successfully: {excel_path}") - return True - - except Exception as e: - print(f"Error updating Excel file: {e}") - return False - - -if __name__ == "__main__": - # For testing purposes - import sys - if len(sys.argv) > 1: - excel_path = sys.argv[1] - update_excel_variables(excel_path) - else: - print("Please provide the path to the Excel file as an argument") diff --git a/update_excel_xlsxwriter.py b/update_excel_xlsxwriter.py index 1852d7c..19367f9 100644 --- a/update_excel_xlsxwriter.py +++ b/update_excel_xlsxwriter.py @@ -35,6 +35,7 @@ def update_excel_variables(excel_path): print(f"Opening Excel file: {excel_path}") wb = openpyxl.load_workbook(excel_path) + # Break any external links to prevent unsafe external sources error print("Breaking any external links...") try: @@ -156,6 +157,7 @@ def update_excel_variables(excel_path): print("Forcing formula recalculation...") wb.calculation.calcMode = 'auto' wb.calculation.fullCalcOnLoad = True + wb.calculation.fullPrecision = True # Save the workbook with variables updated print("Saving workbook with updated variables...") @@ -407,6 +409,7 @@ def update_excel_variables(excel_path): print("Ensuring formulas are marked for recalculation...") wb.calculation.calcMode = 'auto' wb.calculation.fullCalcOnLoad = True + wb.calculation.fullPrecision = True # Save the workbook with updated variables and hidden sheets print("Saving workbook with all updates...")