#!/usr/bin/env python3 import os import zipfile import xml.etree.ElementTree as ET import openpyxl from openpyxl.xml.functions import fromstring, tostring from pathlib import Path def diagnose_excel_file(file_path): """Diagnose Excel file for corruption issues""" print(f"Diagnosing: {file_path}") print("=" * 50) # 1. Check if file exists if not os.path.exists(file_path): print(f"ERROR: File not found: {file_path}") return # 2. Try to open with openpyxl print("\n1. Testing openpyxl compatibility:") try: wb = openpyxl.load_workbook(file_path, read_only=False, keep_vba=True, data_only=False) print(f" ✓ Successfully loaded with openpyxl") print(f" - Sheets: {wb.sheetnames}") # Check for custom properties if hasattr(wb, 'custom_doc_props'): print(f" - Custom properties: {wb.custom_doc_props}") wb.close() except Exception as e: print(f" ✗ Failed to load with openpyxl: {e}") # 3. Analyze ZIP structure print("\n2. Analyzing ZIP/XML structure:") try: with zipfile.ZipFile(file_path, 'r') as zf: # Check for custom XML custom_xml_files = [f for f in zf.namelist() if 'customXml' in f or 'custom' in f.lower()] if custom_xml_files: print(f" ! Found custom XML files: {custom_xml_files}") for custom_file in custom_xml_files: try: content = zf.read(custom_file) print(f"\n Content of {custom_file}:") print(f" {content[:500].decode('utf-8', errors='ignore')}") except Exception as e: print(f" Error reading {custom_file}: {e}") # Check for tables table_files = [f for f in zf.namelist() if 'xl/tables/' in f] if table_files: print(f" - Found table files: {table_files}") for table_file in table_files: content = zf.read(table_file) # Check if XML declaration is present if not content.startswith(b'