- Created create_excel_xlsxwriter.py and update_excel_xlsxwriter.py - Uses openpyxl exclusively to preserve Excel formatting and formulas - Updated server.js to use new xlsxwriter scripts for form submissions - Maintains all original functionality while ensuring proper Excel file handling 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
207 lines
6.8 KiB
Python
207 lines
6.8 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Fix Excel corruption issues caused by SharePoint/OneDrive metadata
|
|
"""
|
|
import os
|
|
import shutil
|
|
import zipfile
|
|
import xml.etree.ElementTree as ET
|
|
from pathlib import Path
|
|
import tempfile
|
|
import openpyxl
|
|
|
|
def remove_sharepoint_metadata(excel_path, output_path=None):
|
|
"""
|
|
Remove SharePoint/OneDrive metadata from Excel file that causes corruption warnings
|
|
|
|
Args:
|
|
excel_path: Path to the Excel file to fix
|
|
output_path: Optional path for the fixed file (if None, overwrites original)
|
|
|
|
Returns:
|
|
bool: True if successful, False otherwise
|
|
"""
|
|
if not output_path:
|
|
output_path = excel_path
|
|
|
|
print(f"Processing: {excel_path}")
|
|
|
|
try:
|
|
# Method 1: Use openpyxl to remove custom properties
|
|
print("Method 1: Using openpyxl to clean custom properties...")
|
|
wb = openpyxl.load_workbook(excel_path, keep_vba=True)
|
|
|
|
# Remove custom document properties
|
|
if hasattr(wb, 'custom_doc_props'):
|
|
# Clear all custom properties
|
|
wb.custom_doc_props.props.clear()
|
|
print(" ✓ Cleared custom document properties")
|
|
|
|
# Save to temporary file first
|
|
temp_file = Path(output_path).with_suffix('.tmp.xlsx')
|
|
wb.save(temp_file)
|
|
wb.close()
|
|
|
|
# Method 2: Direct ZIP manipulation to ensure complete removal
|
|
print("Method 2: Direct ZIP manipulation for complete cleanup...")
|
|
with tempfile.NamedTemporaryFile(suffix='.xlsx', delete=False) as tmp:
|
|
tmp_path = tmp.name
|
|
|
|
with zipfile.ZipFile(temp_file, 'r') as zin:
|
|
with zipfile.ZipFile(tmp_path, 'w', compression=zipfile.ZIP_DEFLATED) as zout:
|
|
# Copy all files except custom.xml or create a clean one
|
|
for item in zin.infolist():
|
|
if item.filename == 'docProps/custom.xml':
|
|
# Create a clean custom.xml without SharePoint metadata
|
|
clean_custom_xml = create_clean_custom_xml()
|
|
zout.writestr(item, clean_custom_xml)
|
|
print(" ✓ Replaced custom.xml with clean version")
|
|
else:
|
|
# Copy the file as-is
|
|
zout.writestr(item, zin.read(item.filename))
|
|
|
|
# Replace original file with cleaned version
|
|
shutil.move(tmp_path, output_path)
|
|
|
|
# Clean up temporary file
|
|
if temp_file.exists():
|
|
temp_file.unlink()
|
|
|
|
print(f" ✓ Successfully cleaned: {output_path}")
|
|
return True
|
|
|
|
except Exception as e:
|
|
print(f" ✗ Error cleaning file: {e}")
|
|
return False
|
|
|
|
def create_clean_custom_xml():
|
|
"""
|
|
Create a clean custom.xml without SharePoint metadata
|
|
"""
|
|
# Create a minimal valid custom.xml
|
|
xml_content = '''<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
|
<Properties xmlns="http://schemas.openxmlformats.org/officeDocument/2006/custom-properties">
|
|
</Properties>'''
|
|
return xml_content.encode('utf-8')
|
|
|
|
def clean_template_file():
|
|
"""
|
|
Clean the template file to prevent future corruption
|
|
"""
|
|
template_dir = Path(__file__).parent / "template"
|
|
template_files = list(template_dir.glob("*.xlsx"))
|
|
|
|
if not template_files:
|
|
print("No template files found")
|
|
return False
|
|
|
|
for template_file in template_files:
|
|
print(f"\nCleaning template: {template_file.name}")
|
|
|
|
# Create backup
|
|
backup_path = template_file.with_suffix('.backup.xlsx')
|
|
shutil.copy2(template_file, backup_path)
|
|
print(f" ✓ Created backup: {backup_path.name}")
|
|
|
|
# Clean the template
|
|
if remove_sharepoint_metadata(str(template_file)):
|
|
print(f" ✓ Template cleaned successfully")
|
|
else:
|
|
print(f" ✗ Failed to clean template")
|
|
# Restore from backup
|
|
shutil.copy2(backup_path, template_file)
|
|
print(f" ✓ Restored from backup")
|
|
|
|
return True
|
|
|
|
def clean_all_output_files():
|
|
"""
|
|
Clean all Excel files in the output directory
|
|
"""
|
|
output_dir = Path(__file__).parent / "output"
|
|
excel_files = list(output_dir.glob("*.xlsx"))
|
|
|
|
if not excel_files:
|
|
print("No Excel files found in output directory")
|
|
return False
|
|
|
|
print(f"Found {len(excel_files)} Excel files to clean")
|
|
|
|
for excel_file in excel_files:
|
|
print(f"\nCleaning: {excel_file.name}")
|
|
if remove_sharepoint_metadata(str(excel_file)):
|
|
print(f" ✓ Cleaned successfully")
|
|
else:
|
|
print(f" ✗ Failed to clean")
|
|
|
|
return True
|
|
|
|
def verify_file_is_clean(excel_path):
|
|
"""
|
|
Verify that an Excel file is free from SharePoint metadata
|
|
"""
|
|
print(f"\nVerifying: {excel_path}")
|
|
|
|
try:
|
|
with zipfile.ZipFile(excel_path, 'r') as zf:
|
|
if 'docProps/custom.xml' in zf.namelist():
|
|
content = zf.read('docProps/custom.xml')
|
|
|
|
# Check for problematic metadata
|
|
if b'ContentTypeId' in content:
|
|
print(" ✗ Still contains SharePoint ContentTypeId")
|
|
return False
|
|
if b'MediaService' in content:
|
|
print(" ✗ Still contains MediaService tags")
|
|
return False
|
|
|
|
print(" ✓ File is clean - no SharePoint metadata found")
|
|
return True
|
|
else:
|
|
print(" ✓ File is clean - no custom.xml present")
|
|
return True
|
|
|
|
except Exception as e:
|
|
print(f" ✗ Error verifying file: {e}")
|
|
return False
|
|
|
|
def main():
|
|
"""Main function to clean Excel files"""
|
|
print("=" * 60)
|
|
print("Excel SharePoint Metadata Cleaner")
|
|
print("=" * 60)
|
|
|
|
# Step 1: Clean the template
|
|
print("\nStep 1: Cleaning template file...")
|
|
print("-" * 40)
|
|
clean_template_file()
|
|
|
|
# Step 2: Clean all output files
|
|
print("\n\nStep 2: Cleaning output files...")
|
|
print("-" * 40)
|
|
clean_all_output_files()
|
|
|
|
# Step 3: Verify cleaning
|
|
print("\n\nStep 3: Verifying cleaned files...")
|
|
print("-" * 40)
|
|
|
|
# Verify template
|
|
template_dir = Path(__file__).parent / "template"
|
|
for template_file in template_dir.glob("*.xlsx"):
|
|
if not template_file.name.endswith('.backup.xlsx'):
|
|
verify_file_is_clean(str(template_file))
|
|
|
|
# Verify output files
|
|
output_dir = Path(__file__).parent / "output"
|
|
for excel_file in output_dir.glob("*.xlsx"):
|
|
verify_file_is_clean(str(excel_file))
|
|
|
|
print("\n" + "=" * 60)
|
|
print("Cleaning complete!")
|
|
print("\nNOTE: The Excel files should now open without corruption warnings.")
|
|
print("The SharePoint/OneDrive metadata has been removed.")
|
|
print("\nFuture files generated from the cleaned template should not have this issue.")
|
|
print("=" * 60)
|
|
|
|
if __name__ == "__main__":
|
|
main() |