Add xlsxwriter-based Excel generation scripts with openpyxl implementation
- Created create_excel_xlsxwriter.py and update_excel_xlsxwriter.py - Uses openpyxl exclusively to preserve Excel formatting and formulas - Updated server.js to use new xlsxwriter scripts for form submissions - Maintains all original functionality while ensuring proper Excel file handling 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
160
clean_excel_template.py
Executable file
160
clean_excel_template.py
Executable file
@@ -0,0 +1,160 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Utility to clean Excel files from SharePoint/OneDrive metadata that causes
|
||||
cross-platform compatibility issues.
|
||||
"""
|
||||
import os
|
||||
import sys
|
||||
import openpyxl
|
||||
from pathlib import Path
|
||||
import tempfile
|
||||
import shutil
|
||||
|
||||
|
||||
def clean_excel_file(input_path, output_path=None):
|
||||
"""
|
||||
Clean an Excel file from SharePoint/OneDrive metadata.
|
||||
|
||||
Args:
|
||||
input_path (str): Path to the input Excel file
|
||||
output_path (str): Path for the cleaned file (optional)
|
||||
|
||||
Returns:
|
||||
bool: True if successful, False otherwise
|
||||
"""
|
||||
if not os.path.exists(input_path):
|
||||
print(f"Error: File not found: {input_path}")
|
||||
return False
|
||||
|
||||
if output_path is None:
|
||||
# Create cleaned version with _clean suffix
|
||||
path = Path(input_path)
|
||||
output_path = path.parent / f"{path.stem}_clean{path.suffix}"
|
||||
|
||||
try:
|
||||
print(f"Loading Excel file: {input_path}")
|
||||
|
||||
# Load workbook without VBA to avoid macro issues
|
||||
wb = openpyxl.load_workbook(input_path, data_only=False, keep_vba=False)
|
||||
|
||||
# Clean metadata
|
||||
print("Cleaning metadata...")
|
||||
|
||||
# Clear custom document properties
|
||||
if hasattr(wb, 'custom_doc_props') and wb.custom_doc_props:
|
||||
wb.custom_doc_props.props.clear()
|
||||
print(" ✓ Cleared custom document properties")
|
||||
|
||||
# Clear custom XML
|
||||
if hasattr(wb, 'custom_xml'):
|
||||
wb.custom_xml = []
|
||||
print(" ✓ Cleared custom XML")
|
||||
|
||||
# Clean core properties
|
||||
if wb.properties:
|
||||
# Keep only essential properties
|
||||
wb.properties.creator = "Excel Generator"
|
||||
wb.properties.lastModifiedBy = "Excel Generator"
|
||||
wb.properties.keywords = ""
|
||||
wb.properties.category = ""
|
||||
wb.properties.contentStatus = ""
|
||||
wb.properties.subject = ""
|
||||
wb.properties.description = ""
|
||||
print(" ✓ Cleaned core properties")
|
||||
|
||||
# Create temporary file for double-save cleaning
|
||||
with tempfile.NamedTemporaryFile(suffix='.xlsx', delete=False) as tmp:
|
||||
tmp_path = tmp.name
|
||||
|
||||
print("Saving cleaned file...")
|
||||
|
||||
# First save to temp file
|
||||
wb.save(tmp_path)
|
||||
wb.close()
|
||||
|
||||
# Re-open and save again to ensure clean structure
|
||||
print("Re-processing for maximum cleanliness...")
|
||||
wb_clean = openpyxl.load_workbook(tmp_path, data_only=False)
|
||||
|
||||
# Additional cleaning on the re-opened file
|
||||
if hasattr(wb_clean, 'custom_doc_props') and wb_clean.custom_doc_props:
|
||||
wb_clean.custom_doc_props.props.clear()
|
||||
|
||||
if hasattr(wb_clean, 'custom_xml'):
|
||||
wb_clean.custom_xml = []
|
||||
|
||||
# Save final clean version
|
||||
wb_clean.save(output_path)
|
||||
wb_clean.close()
|
||||
|
||||
# Clean up temporary file
|
||||
os.unlink(tmp_path)
|
||||
|
||||
print(f"✓ Cleaned Excel file saved to: {output_path}")
|
||||
|
||||
# Compare file sizes
|
||||
input_size = os.path.getsize(input_path)
|
||||
output_size = os.path.getsize(output_path)
|
||||
|
||||
print(f"File size: {input_size:,} → {output_size:,} bytes")
|
||||
if input_size > output_size:
|
||||
print(f"Reduced by {input_size - output_size:,} bytes ({((input_size - output_size) / input_size * 100):.1f}%)")
|
||||
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error cleaning Excel file: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
return False
|
||||
|
||||
|
||||
def clean_template():
|
||||
"""
|
||||
Clean the template file in the template directory.
|
||||
"""
|
||||
script_dir = os.path.dirname(os.path.abspath(__file__))
|
||||
template_dir = os.path.join(script_dir, 'template')
|
||||
|
||||
# Look for template files
|
||||
possible_templates = [
|
||||
'Footprints AI for {store_name} - Retail Media Business Case Calculations.xlsx',
|
||||
'Footprints AI for store_name - Retail Media Business Case Calculations.xlsx'
|
||||
]
|
||||
|
||||
template_path = None
|
||||
for template_name in possible_templates:
|
||||
full_path = os.path.join(template_dir, template_name)
|
||||
if os.path.exists(full_path):
|
||||
template_path = full_path
|
||||
print(f"Found template: {template_name}")
|
||||
break
|
||||
|
||||
if not template_path:
|
||||
print(f"Error: No template found in {template_dir}")
|
||||
return False
|
||||
|
||||
# Create cleaned template
|
||||
cleaned_path = os.path.join(template_dir, "cleaned_template.xlsx")
|
||||
|
||||
return clean_excel_file(template_path, cleaned_path)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
if len(sys.argv) > 1:
|
||||
# Clean specific file
|
||||
input_file = sys.argv[1]
|
||||
output_file = sys.argv[2] if len(sys.argv) > 2 else None
|
||||
|
||||
if clean_excel_file(input_file, output_file):
|
||||
print("✓ File cleaned successfully")
|
||||
else:
|
||||
print("✗ Failed to clean file")
|
||||
sys.exit(1)
|
||||
else:
|
||||
# Clean template
|
||||
if clean_template():
|
||||
print("✓ Template cleaned successfully")
|
||||
else:
|
||||
print("✗ Failed to clean template")
|
||||
sys.exit(1)
|
||||
Reference in New Issue
Block a user