Add xlsxwriter-based Excel generation scripts with openpyxl implementation
- Created create_excel_xlsxwriter.py and update_excel_xlsxwriter.py - Uses openpyxl exclusively to preserve Excel formatting and formulas - Updated server.js to use new xlsxwriter scripts for form submissions - Maintains all original functionality while ensuring proper Excel file handling 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
207
fix_excel_corruption.py
Normal file
207
fix_excel_corruption.py
Normal file
@@ -0,0 +1,207 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Fix Excel corruption issues caused by SharePoint/OneDrive metadata
|
||||
"""
|
||||
import os
|
||||
import shutil
|
||||
import zipfile
|
||||
import xml.etree.ElementTree as ET
|
||||
from pathlib import Path
|
||||
import tempfile
|
||||
import openpyxl
|
||||
|
||||
def remove_sharepoint_metadata(excel_path, output_path=None):
|
||||
"""
|
||||
Remove SharePoint/OneDrive metadata from Excel file that causes corruption warnings
|
||||
|
||||
Args:
|
||||
excel_path: Path to the Excel file to fix
|
||||
output_path: Optional path for the fixed file (if None, overwrites original)
|
||||
|
||||
Returns:
|
||||
bool: True if successful, False otherwise
|
||||
"""
|
||||
if not output_path:
|
||||
output_path = excel_path
|
||||
|
||||
print(f"Processing: {excel_path}")
|
||||
|
||||
try:
|
||||
# Method 1: Use openpyxl to remove custom properties
|
||||
print("Method 1: Using openpyxl to clean custom properties...")
|
||||
wb = openpyxl.load_workbook(excel_path, keep_vba=True)
|
||||
|
||||
# Remove custom document properties
|
||||
if hasattr(wb, 'custom_doc_props'):
|
||||
# Clear all custom properties
|
||||
wb.custom_doc_props.props.clear()
|
||||
print(" ✓ Cleared custom document properties")
|
||||
|
||||
# Save to temporary file first
|
||||
temp_file = Path(output_path).with_suffix('.tmp.xlsx')
|
||||
wb.save(temp_file)
|
||||
wb.close()
|
||||
|
||||
# Method 2: Direct ZIP manipulation to ensure complete removal
|
||||
print("Method 2: Direct ZIP manipulation for complete cleanup...")
|
||||
with tempfile.NamedTemporaryFile(suffix='.xlsx', delete=False) as tmp:
|
||||
tmp_path = tmp.name
|
||||
|
||||
with zipfile.ZipFile(temp_file, 'r') as zin:
|
||||
with zipfile.ZipFile(tmp_path, 'w', compression=zipfile.ZIP_DEFLATED) as zout:
|
||||
# Copy all files except custom.xml or create a clean one
|
||||
for item in zin.infolist():
|
||||
if item.filename == 'docProps/custom.xml':
|
||||
# Create a clean custom.xml without SharePoint metadata
|
||||
clean_custom_xml = create_clean_custom_xml()
|
||||
zout.writestr(item, clean_custom_xml)
|
||||
print(" ✓ Replaced custom.xml with clean version")
|
||||
else:
|
||||
# Copy the file as-is
|
||||
zout.writestr(item, zin.read(item.filename))
|
||||
|
||||
# Replace original file with cleaned version
|
||||
shutil.move(tmp_path, output_path)
|
||||
|
||||
# Clean up temporary file
|
||||
if temp_file.exists():
|
||||
temp_file.unlink()
|
||||
|
||||
print(f" ✓ Successfully cleaned: {output_path}")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
print(f" ✗ Error cleaning file: {e}")
|
||||
return False
|
||||
|
||||
def create_clean_custom_xml():
|
||||
"""
|
||||
Create a clean custom.xml without SharePoint metadata
|
||||
"""
|
||||
# Create a minimal valid custom.xml
|
||||
xml_content = '''<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
||||
<Properties xmlns="http://schemas.openxmlformats.org/officeDocument/2006/custom-properties">
|
||||
</Properties>'''
|
||||
return xml_content.encode('utf-8')
|
||||
|
||||
def clean_template_file():
|
||||
"""
|
||||
Clean the template file to prevent future corruption
|
||||
"""
|
||||
template_dir = Path(__file__).parent / "template"
|
||||
template_files = list(template_dir.glob("*.xlsx"))
|
||||
|
||||
if not template_files:
|
||||
print("No template files found")
|
||||
return False
|
||||
|
||||
for template_file in template_files:
|
||||
print(f"\nCleaning template: {template_file.name}")
|
||||
|
||||
# Create backup
|
||||
backup_path = template_file.with_suffix('.backup.xlsx')
|
||||
shutil.copy2(template_file, backup_path)
|
||||
print(f" ✓ Created backup: {backup_path.name}")
|
||||
|
||||
# Clean the template
|
||||
if remove_sharepoint_metadata(str(template_file)):
|
||||
print(f" ✓ Template cleaned successfully")
|
||||
else:
|
||||
print(f" ✗ Failed to clean template")
|
||||
# Restore from backup
|
||||
shutil.copy2(backup_path, template_file)
|
||||
print(f" ✓ Restored from backup")
|
||||
|
||||
return True
|
||||
|
||||
def clean_all_output_files():
|
||||
"""
|
||||
Clean all Excel files in the output directory
|
||||
"""
|
||||
output_dir = Path(__file__).parent / "output"
|
||||
excel_files = list(output_dir.glob("*.xlsx"))
|
||||
|
||||
if not excel_files:
|
||||
print("No Excel files found in output directory")
|
||||
return False
|
||||
|
||||
print(f"Found {len(excel_files)} Excel files to clean")
|
||||
|
||||
for excel_file in excel_files:
|
||||
print(f"\nCleaning: {excel_file.name}")
|
||||
if remove_sharepoint_metadata(str(excel_file)):
|
||||
print(f" ✓ Cleaned successfully")
|
||||
else:
|
||||
print(f" ✗ Failed to clean")
|
||||
|
||||
return True
|
||||
|
||||
def verify_file_is_clean(excel_path):
|
||||
"""
|
||||
Verify that an Excel file is free from SharePoint metadata
|
||||
"""
|
||||
print(f"\nVerifying: {excel_path}")
|
||||
|
||||
try:
|
||||
with zipfile.ZipFile(excel_path, 'r') as zf:
|
||||
if 'docProps/custom.xml' in zf.namelist():
|
||||
content = zf.read('docProps/custom.xml')
|
||||
|
||||
# Check for problematic metadata
|
||||
if b'ContentTypeId' in content:
|
||||
print(" ✗ Still contains SharePoint ContentTypeId")
|
||||
return False
|
||||
if b'MediaService' in content:
|
||||
print(" ✗ Still contains MediaService tags")
|
||||
return False
|
||||
|
||||
print(" ✓ File is clean - no SharePoint metadata found")
|
||||
return True
|
||||
else:
|
||||
print(" ✓ File is clean - no custom.xml present")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
print(f" ✗ Error verifying file: {e}")
|
||||
return False
|
||||
|
||||
def main():
|
||||
"""Main function to clean Excel files"""
|
||||
print("=" * 60)
|
||||
print("Excel SharePoint Metadata Cleaner")
|
||||
print("=" * 60)
|
||||
|
||||
# Step 1: Clean the template
|
||||
print("\nStep 1: Cleaning template file...")
|
||||
print("-" * 40)
|
||||
clean_template_file()
|
||||
|
||||
# Step 2: Clean all output files
|
||||
print("\n\nStep 2: Cleaning output files...")
|
||||
print("-" * 40)
|
||||
clean_all_output_files()
|
||||
|
||||
# Step 3: Verify cleaning
|
||||
print("\n\nStep 3: Verifying cleaned files...")
|
||||
print("-" * 40)
|
||||
|
||||
# Verify template
|
||||
template_dir = Path(__file__).parent / "template"
|
||||
for template_file in template_dir.glob("*.xlsx"):
|
||||
if not template_file.name.endswith('.backup.xlsx'):
|
||||
verify_file_is_clean(str(template_file))
|
||||
|
||||
# Verify output files
|
||||
output_dir = Path(__file__).parent / "output"
|
||||
for excel_file in output_dir.glob("*.xlsx"):
|
||||
verify_file_is_clean(str(excel_file))
|
||||
|
||||
print("\n" + "=" * 60)
|
||||
print("Cleaning complete!")
|
||||
print("\nNOTE: The Excel files should now open without corruption warnings.")
|
||||
print("The SharePoint/OneDrive metadata has been removed.")
|
||||
print("\nFuture files generated from the cleaned template should not have this issue.")
|
||||
print("=" * 60)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user