- Created create_excel_xlsxwriter.py and update_excel_xlsxwriter.py - Uses openpyxl exclusively to preserve Excel formatting and formulas - Updated server.js to use new xlsxwriter scripts for form submissions - Maintains all original functionality while ensuring proper Excel file handling 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
138 lines
6.0 KiB
Python
138 lines
6.0 KiB
Python
#!/usr/bin/env python3
|
|
import os
|
|
import zipfile
|
|
import xml.etree.ElementTree as ET
|
|
import openpyxl
|
|
from openpyxl.xml.functions import fromstring, tostring
|
|
from pathlib import Path
|
|
|
|
def diagnose_excel_file(file_path):
|
|
"""Diagnose Excel file for corruption issues"""
|
|
print(f"Diagnosing: {file_path}")
|
|
print("=" * 50)
|
|
|
|
# 1. Check if file exists
|
|
if not os.path.exists(file_path):
|
|
print(f"ERROR: File not found: {file_path}")
|
|
return
|
|
|
|
# 2. Try to open with openpyxl
|
|
print("\n1. Testing openpyxl compatibility:")
|
|
try:
|
|
wb = openpyxl.load_workbook(file_path, read_only=False, keep_vba=True, data_only=False)
|
|
print(f" ✓ Successfully loaded with openpyxl")
|
|
print(f" - Sheets: {wb.sheetnames}")
|
|
|
|
# Check for custom properties
|
|
if hasattr(wb, 'custom_doc_props'):
|
|
print(f" - Custom properties: {wb.custom_doc_props}")
|
|
|
|
wb.close()
|
|
except Exception as e:
|
|
print(f" ✗ Failed to load with openpyxl: {e}")
|
|
|
|
# 3. Analyze ZIP structure
|
|
print("\n2. Analyzing ZIP/XML structure:")
|
|
try:
|
|
with zipfile.ZipFile(file_path, 'r') as zf:
|
|
# Check for custom XML
|
|
custom_xml_files = [f for f in zf.namelist() if 'customXml' in f or 'custom' in f.lower()]
|
|
if custom_xml_files:
|
|
print(f" ! Found custom XML files: {custom_xml_files}")
|
|
|
|
for custom_file in custom_xml_files:
|
|
try:
|
|
content = zf.read(custom_file)
|
|
print(f"\n Content of {custom_file}:")
|
|
print(f" {content[:500].decode('utf-8', errors='ignore')}")
|
|
except Exception as e:
|
|
print(f" Error reading {custom_file}: {e}")
|
|
|
|
# Check for tables
|
|
table_files = [f for f in zf.namelist() if 'xl/tables/' in f]
|
|
if table_files:
|
|
print(f" - Found table files: {table_files}")
|
|
for table_file in table_files:
|
|
content = zf.read(table_file)
|
|
# Check if XML declaration is present
|
|
if not content.startswith(b'<?xml'):
|
|
print(f" ! WARNING: {table_file} missing XML declaration")
|
|
|
|
# Check workbook.xml for issues
|
|
if 'xl/workbook.xml' in zf.namelist():
|
|
workbook_content = zf.read('xl/workbook.xml')
|
|
# Parse and check for issues
|
|
try:
|
|
root = ET.fromstring(workbook_content)
|
|
# Check for external references
|
|
ext_refs = root.findall('.//{http://schemas.openxmlformats.org/spreadsheetml/2006/main}externalReference')
|
|
if ext_refs:
|
|
print(f" ! Found {len(ext_refs)} external references")
|
|
except Exception as e:
|
|
print(f" ! Error parsing workbook.xml: {e}")
|
|
|
|
except Exception as e:
|
|
print(f" ✗ Failed to analyze ZIP structure: {e}")
|
|
|
|
# 4. Check for SharePoint/OneDrive metadata
|
|
print("\n3. Checking for SharePoint/OneDrive metadata:")
|
|
try:
|
|
with zipfile.ZipFile(file_path, 'r') as zf:
|
|
if 'docProps/custom.xml' in zf.namelist():
|
|
content = zf.read('docProps/custom.xml')
|
|
if b'ContentTypeId' in content:
|
|
print(" ! Found SharePoint ContentTypeId in custom.xml")
|
|
print(" ! This file contains SharePoint metadata that may cause issues")
|
|
if b'MediaService' in content:
|
|
print(" ! Found MediaService tags in custom.xml")
|
|
except Exception as e:
|
|
print(f" ✗ Error checking metadata: {e}")
|
|
|
|
# 5. Compare with template
|
|
print("\n4. Comparing with template:")
|
|
template_path = Path(file_path).parent.parent / "template" / "Footprints AI for {store_name} - Retail Media Business Case Calculations.xlsx"
|
|
if template_path.exists():
|
|
try:
|
|
with zipfile.ZipFile(template_path, 'r') as tf:
|
|
with zipfile.ZipFile(file_path, 'r') as gf:
|
|
template_files = set(tf.namelist())
|
|
generated_files = set(gf.namelist())
|
|
|
|
# Files in generated but not in template
|
|
extra_files = generated_files - template_files
|
|
if extra_files:
|
|
print(f" ! Extra files in generated: {extra_files}")
|
|
|
|
# Files in template but not in generated
|
|
missing_files = template_files - generated_files
|
|
if missing_files:
|
|
print(f" ! Missing files in generated: {missing_files}")
|
|
except Exception as e:
|
|
print(f" ✗ Error comparing with template: {e}")
|
|
else:
|
|
print(f" - Template not found at {template_path}")
|
|
|
|
print("\n" + "=" * 50)
|
|
print("DIAGNOSIS SUMMARY:")
|
|
print("The error 'This file has custom XML elements that are no longer supported'")
|
|
print("is likely caused by SharePoint/OneDrive metadata in the custom.xml file.")
|
|
print("\nThe ContentTypeId property suggests this file was previously stored in")
|
|
print("SharePoint/OneDrive, which added custom metadata that Excel doesn't support")
|
|
print("in certain contexts.")
|
|
|
|
# Test with the latest file
|
|
if __name__ == "__main__":
|
|
output_dir = Path(__file__).parent / "output"
|
|
test_file = output_dir / "Footprints AI for Test14 - Retail Media Business Case Calculations 2025-2028.xlsx"
|
|
|
|
if test_file.exists():
|
|
diagnose_excel_file(str(test_file))
|
|
else:
|
|
print(f"Test file not found: {test_file}")
|
|
# Try to find any Excel file in output
|
|
excel_files = list(output_dir.glob("*.xlsx"))
|
|
if excel_files:
|
|
print(f"\nFound {len(excel_files)} Excel files in output directory.")
|
|
print("Diagnosing the most recent one...")
|
|
latest_file = max(excel_files, key=os.path.getmtime)
|
|
diagnose_excel_file(str(latest_file)) |