- Created create_excel_xlsxwriter.py and update_excel_xlsxwriter.py - Uses openpyxl exclusively to preserve Excel formatting and formulas - Updated server.js to use new xlsxwriter scripts for form submissions - Maintains all original functionality while ensuring proper Excel file handling 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
160 lines
4.9 KiB
Python
Executable File
160 lines
4.9 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""
|
|
Utility to clean Excel files from SharePoint/OneDrive metadata that causes
|
|
cross-platform compatibility issues.
|
|
"""
|
|
import os
|
|
import sys
|
|
import openpyxl
|
|
from pathlib import Path
|
|
import tempfile
|
|
import shutil
|
|
|
|
|
|
def clean_excel_file(input_path, output_path=None):
|
|
"""
|
|
Clean an Excel file from SharePoint/OneDrive metadata.
|
|
|
|
Args:
|
|
input_path (str): Path to the input Excel file
|
|
output_path (str): Path for the cleaned file (optional)
|
|
|
|
Returns:
|
|
bool: True if successful, False otherwise
|
|
"""
|
|
if not os.path.exists(input_path):
|
|
print(f"Error: File not found: {input_path}")
|
|
return False
|
|
|
|
if output_path is None:
|
|
# Create cleaned version with _clean suffix
|
|
path = Path(input_path)
|
|
output_path = path.parent / f"{path.stem}_clean{path.suffix}"
|
|
|
|
try:
|
|
print(f"Loading Excel file: {input_path}")
|
|
|
|
# Load workbook without VBA to avoid macro issues
|
|
wb = openpyxl.load_workbook(input_path, data_only=False, keep_vba=False)
|
|
|
|
# Clean metadata
|
|
print("Cleaning metadata...")
|
|
|
|
# Clear custom document properties
|
|
if hasattr(wb, 'custom_doc_props') and wb.custom_doc_props:
|
|
wb.custom_doc_props.props.clear()
|
|
print(" ✓ Cleared custom document properties")
|
|
|
|
# Clear custom XML
|
|
if hasattr(wb, 'custom_xml'):
|
|
wb.custom_xml = []
|
|
print(" ✓ Cleared custom XML")
|
|
|
|
# Clean core properties
|
|
if wb.properties:
|
|
# Keep only essential properties
|
|
wb.properties.creator = "Excel Generator"
|
|
wb.properties.lastModifiedBy = "Excel Generator"
|
|
wb.properties.keywords = ""
|
|
wb.properties.category = ""
|
|
wb.properties.contentStatus = ""
|
|
wb.properties.subject = ""
|
|
wb.properties.description = ""
|
|
print(" ✓ Cleaned core properties")
|
|
|
|
# Create temporary file for double-save cleaning
|
|
with tempfile.NamedTemporaryFile(suffix='.xlsx', delete=False) as tmp:
|
|
tmp_path = tmp.name
|
|
|
|
print("Saving cleaned file...")
|
|
|
|
# First save to temp file
|
|
wb.save(tmp_path)
|
|
wb.close()
|
|
|
|
# Re-open and save again to ensure clean structure
|
|
print("Re-processing for maximum cleanliness...")
|
|
wb_clean = openpyxl.load_workbook(tmp_path, data_only=False)
|
|
|
|
# Additional cleaning on the re-opened file
|
|
if hasattr(wb_clean, 'custom_doc_props') and wb_clean.custom_doc_props:
|
|
wb_clean.custom_doc_props.props.clear()
|
|
|
|
if hasattr(wb_clean, 'custom_xml'):
|
|
wb_clean.custom_xml = []
|
|
|
|
# Save final clean version
|
|
wb_clean.save(output_path)
|
|
wb_clean.close()
|
|
|
|
# Clean up temporary file
|
|
os.unlink(tmp_path)
|
|
|
|
print(f"✓ Cleaned Excel file saved to: {output_path}")
|
|
|
|
# Compare file sizes
|
|
input_size = os.path.getsize(input_path)
|
|
output_size = os.path.getsize(output_path)
|
|
|
|
print(f"File size: {input_size:,} → {output_size:,} bytes")
|
|
if input_size > output_size:
|
|
print(f"Reduced by {input_size - output_size:,} bytes ({((input_size - output_size) / input_size * 100):.1f}%)")
|
|
|
|
return True
|
|
|
|
except Exception as e:
|
|
print(f"Error cleaning Excel file: {e}")
|
|
import traceback
|
|
traceback.print_exc()
|
|
return False
|
|
|
|
|
|
def clean_template():
|
|
"""
|
|
Clean the template file in the template directory.
|
|
"""
|
|
script_dir = os.path.dirname(os.path.abspath(__file__))
|
|
template_dir = os.path.join(script_dir, 'template')
|
|
|
|
# Look for template files
|
|
possible_templates = [
|
|
'Footprints AI for {store_name} - Retail Media Business Case Calculations.xlsx',
|
|
'Footprints AI for store_name - Retail Media Business Case Calculations.xlsx'
|
|
]
|
|
|
|
template_path = None
|
|
for template_name in possible_templates:
|
|
full_path = os.path.join(template_dir, template_name)
|
|
if os.path.exists(full_path):
|
|
template_path = full_path
|
|
print(f"Found template: {template_name}")
|
|
break
|
|
|
|
if not template_path:
|
|
print(f"Error: No template found in {template_dir}")
|
|
return False
|
|
|
|
# Create cleaned template
|
|
cleaned_path = os.path.join(template_dir, "cleaned_template.xlsx")
|
|
|
|
return clean_excel_file(template_path, cleaned_path)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
if len(sys.argv) > 1:
|
|
# Clean specific file
|
|
input_file = sys.argv[1]
|
|
output_file = sys.argv[2] if len(sys.argv) > 2 else None
|
|
|
|
if clean_excel_file(input_file, output_file):
|
|
print("✓ File cleaned successfully")
|
|
else:
|
|
print("✗ Failed to clean file")
|
|
sys.exit(1)
|
|
else:
|
|
# Clean template
|
|
if clean_template():
|
|
print("✓ Template cleaned successfully")
|
|
else:
|
|
print("✗ Failed to clean template")
|
|
sys.exit(1) |