Implemented direct XML modification approach to replace {store_name} in all formulas including ArrayFormulas
This commit is contained in:
79
direct_xml_update.py
Normal file
79
direct_xml_update.py
Normal file
@@ -0,0 +1,79 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
import openpyxl
|
||||||
|
from zipfile import ZipFile, ZIP_DEFLATED
|
||||||
|
|
||||||
|
def update_excel_with_direct_xml(excel_path, store_name):
|
||||||
|
"""
|
||||||
|
Update all references to {store_name} in the Excel file by directly modifying XML
|
||||||
|
|
||||||
|
Args:
|
||||||
|
excel_path: Path to the Excel file
|
||||||
|
store_name: The store name to replace {store_name} with
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
str: Path to the modified Excel file
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
print(f"Using direct XML modification to replace '{{store_name}}' with '{store_name}'...")
|
||||||
|
|
||||||
|
# Save a copy of the original file to work with
|
||||||
|
temp_dir = os.path.dirname(os.path.abspath(excel_path))
|
||||||
|
temp_file = os.path.join(temp_dir, "_temp_for_xml_edit.xlsx")
|
||||||
|
modified_file = os.path.join(temp_dir, excel_path.replace('.xlsx', '_modified.xlsx'))
|
||||||
|
|
||||||
|
# Make a copy of the original file
|
||||||
|
import shutil
|
||||||
|
shutil.copy2(excel_path, temp_file)
|
||||||
|
|
||||||
|
# Count of replacements
|
||||||
|
total_replacements = 0
|
||||||
|
|
||||||
|
# Process the Excel file
|
||||||
|
with ZipFile(temp_file, 'r') as zip_in:
|
||||||
|
with ZipFile(modified_file, 'w', ZIP_DEFLATED) as zip_out:
|
||||||
|
# Process each file in the zip
|
||||||
|
for item in zip_in.infolist():
|
||||||
|
content = zip_in.read(item.filename)
|
||||||
|
|
||||||
|
# Only modify XML files that might contain formulas or text
|
||||||
|
if item.filename.endswith('.xml'):
|
||||||
|
# Convert to string for text replacement
|
||||||
|
try:
|
||||||
|
text_content = content.decode('utf-8')
|
||||||
|
|
||||||
|
# Check if this file contains our placeholder
|
||||||
|
if '{store_name}' in text_content:
|
||||||
|
# Count occurrences before replacement
|
||||||
|
occurrences = text_content.count('{store_name}')
|
||||||
|
total_replacements += occurrences
|
||||||
|
|
||||||
|
# Replace all instances of {store_name} with the actual store name
|
||||||
|
modified_content = text_content.replace('{store_name}', store_name)
|
||||||
|
|
||||||
|
# Convert back to bytes
|
||||||
|
content = modified_content.encode('utf-8')
|
||||||
|
|
||||||
|
print(f"Replaced {occurrences} instances of '{{store_name}}' in {item.filename}")
|
||||||
|
except UnicodeDecodeError:
|
||||||
|
# Not a text file, leave as is
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Write the file (original or modified) to the new zip
|
||||||
|
zip_out.writestr(item, content)
|
||||||
|
|
||||||
|
# Clean up the temporary file
|
||||||
|
if os.path.exists(temp_file):
|
||||||
|
os.remove(temp_file)
|
||||||
|
|
||||||
|
print(f"Total replacements: {total_replacements}")
|
||||||
|
print(f"Modified Excel file saved as: {modified_file}")
|
||||||
|
|
||||||
|
return modified_file
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error updating Excel file: {e}")
|
||||||
|
import traceback
|
||||||
|
traceback.print_exc()
|
||||||
|
return None
|
||||||
137
update_excel.py
137
update_excel.py
@@ -4,6 +4,7 @@ import os
|
|||||||
import re
|
import re
|
||||||
import openpyxl
|
import openpyxl
|
||||||
from openpyxl.utils import get_column_letter
|
from openpyxl.utils import get_column_letter
|
||||||
|
from direct_xml_update import update_excel_with_direct_xml
|
||||||
|
|
||||||
def update_excel_variables(excel_path):
|
def update_excel_variables(excel_path):
|
||||||
"""
|
"""
|
||||||
@@ -124,9 +125,20 @@ def update_excel_variables(excel_path):
|
|||||||
sheet_name_mapping[sheet_name] = new_sheet_name
|
sheet_name_mapping[sheet_name] = new_sheet_name
|
||||||
print(f"Renamed sheet '{sheet_name}' to '{new_sheet_name}'")
|
print(f"Renamed sheet '{sheet_name}' to '{new_sheet_name}'")
|
||||||
|
|
||||||
# Update formulas in the Graphics sheet to reference the new sheet names
|
# Use direct XML modification to replace all instances of {store_name}
|
||||||
if sheet_name_mapping and 'Graphics' in wb.sheetnames:
|
print("Using direct XML modification to update all formulas...")
|
||||||
update_formula_references(wb, sheet_name_mapping)
|
modified_file = update_excel_with_direct_xml(excel_path, store_name)
|
||||||
|
|
||||||
|
if modified_file and os.path.exists(modified_file):
|
||||||
|
# Use the modified file instead of the original
|
||||||
|
print(f"Using modified file: {modified_file}")
|
||||||
|
# Copy the modified file back to the original location
|
||||||
|
import shutil
|
||||||
|
shutil.copy2(modified_file, excel_path)
|
||||||
|
# Remove the modified file
|
||||||
|
os.remove(modified_file)
|
||||||
|
# Reload the workbook to get the changes
|
||||||
|
wb = openpyxl.load_workbook(excel_path)
|
||||||
|
|
||||||
# Save the workbook
|
# Save the workbook
|
||||||
wb.save(excel_path)
|
wb.save(excel_path)
|
||||||
@@ -239,14 +251,131 @@ def update_formula_references(workbook, sheet_name_mapping):
|
|||||||
# Print all cells in the Graphics sheet that have formulas
|
# Print all cells in the Graphics sheet that have formulas
|
||||||
print("\nDiagnostic: Checking all cells with formulas in Graphics sheet:")
|
print("\nDiagnostic: Checking all cells with formulas in Graphics sheet:")
|
||||||
formula_cells_diagnostic = []
|
formula_cells_diagnostic = []
|
||||||
|
|
||||||
|
# Special check for rows 25-27
|
||||||
|
print("\nDiagnostic: Checking cells in rows 25-27:")
|
||||||
|
|
||||||
|
# Define columns to check, focusing on G through AG
|
||||||
|
columns_to_check = ['C', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
|
||||||
|
'AA', 'AB', 'AC', 'AD', 'AE', 'AF', 'AG']
|
||||||
|
|
||||||
|
# Cells with ArrayFormula objects that need special handling
|
||||||
|
array_formula_cells = []
|
||||||
|
|
||||||
|
# Check each cell in rows 25-27 for all specified columns
|
||||||
|
for row_num in range(25, 28): # rows 25, 26, 27
|
||||||
|
for col in columns_to_check:
|
||||||
|
cell_coord = f"{col}{row_num}"
|
||||||
|
try:
|
||||||
|
cell = graphics_sheet[cell_coord]
|
||||||
|
# Print cell information regardless of whether it has a formula
|
||||||
|
if cell.value is not None:
|
||||||
|
value_type = cell.data_type
|
||||||
|
value_preview = str(cell.value)[:50] if cell.value else "None"
|
||||||
|
print(f"Row {row_num}, Cell {cell_coord}: Type={value_type}, Value={value_preview}...")
|
||||||
|
|
||||||
|
# Check if it's a formula that references our sheets
|
||||||
|
if cell.data_type == 'f':
|
||||||
|
if isinstance(cell.value, str) and any(old_name in cell.value for old_name in sheet_name_mapping.keys()):
|
||||||
|
formula_cells_diagnostic.append(f"{cell_coord}: {cell.value[:50]}...")
|
||||||
|
# Add this cell to our special handling
|
||||||
|
if cell_coord not in special_cells:
|
||||||
|
special_cells.append(cell_coord)
|
||||||
|
# Check for ArrayFormula objects
|
||||||
|
elif str(cell.value).startswith("<openpyxl.worksheet.formula.ArrayFormula"):
|
||||||
|
# These are the cells we need to handle specially
|
||||||
|
array_formula_cells.append(cell_coord)
|
||||||
|
if cell_coord not in special_cells:
|
||||||
|
special_cells.append(cell_coord)
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error accessing cell {cell_coord}: {e}")
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Special handling for ArrayFormula cells
|
||||||
|
if array_formula_cells:
|
||||||
|
print(f"\nFound {len(array_formula_cells)} cells with ArrayFormula objects that need special handling:")
|
||||||
|
for cell_coord in array_formula_cells:
|
||||||
|
print(f" {cell_coord}")
|
||||||
|
|
||||||
|
# Handle array formula cells by directly modifying the worksheet's _cells dictionary
|
||||||
|
# This is a workaround since openpyxl doesn't provide a direct API for modifying ArrayFormula objects
|
||||||
|
print("\nAttempting to update array formulas by directly accessing the worksheet's XML...")
|
||||||
|
|
||||||
|
# Get the worksheet's underlying XML
|
||||||
|
try:
|
||||||
|
# First, save the workbook to a temporary file
|
||||||
|
temp_dir = os.path.dirname(excel_path) if excel_path else os.getcwd()
|
||||||
|
temp_file = os.path.join(temp_dir, "_temp_for_xml_edit.xlsx")
|
||||||
|
wb.save(temp_file)
|
||||||
|
|
||||||
|
# Then reload it with a different library that can modify the XML directly
|
||||||
|
from zipfile import ZipFile
|
||||||
|
import xml.etree.ElementTree as ET
|
||||||
|
|
||||||
|
# Define XML namespaces
|
||||||
|
namespaces = {
|
||||||
|
'main': 'http://schemas.openxmlformats.org/spreadsheetml/2006/main'
|
||||||
|
}
|
||||||
|
|
||||||
|
# Extract and modify the sheet XML
|
||||||
|
with ZipFile(temp_file, 'r') as zip_ref:
|
||||||
|
# Find the sheet XML file for the Graphics sheet
|
||||||
|
sheet_id = None
|
||||||
|
for i, sheet in enumerate(wb.worksheets):
|
||||||
|
if sheet.title == 'Graphics':
|
||||||
|
sheet_id = i + 1
|
||||||
|
break
|
||||||
|
|
||||||
|
if sheet_id:
|
||||||
|
sheet_xml_path = f'xl/worksheets/sheet{sheet_id}.xml'
|
||||||
|
sheet_xml = zip_ref.read(sheet_xml_path).decode('utf-8')
|
||||||
|
|
||||||
|
# Parse the XML
|
||||||
|
root = ET.fromstring(sheet_xml)
|
||||||
|
|
||||||
|
# Find all formula cells and replace text
|
||||||
|
formula_count = 0
|
||||||
|
for formula_elem in root.findall('.//main:f', namespaces):
|
||||||
|
formula_text = formula_elem.text
|
||||||
|
if formula_text and any(old_name in formula_text for old_name in sheet_name_mapping.keys()):
|
||||||
|
# Replace the sheet names in the formula text
|
||||||
|
for old_name, new_name in sheet_name_mapping.items():
|
||||||
|
formula_text = formula_text.replace(old_name, new_name)
|
||||||
|
formula_elem.text = formula_text
|
||||||
|
formula_count += 1
|
||||||
|
|
||||||
|
print(f"Updated {formula_count} formulas in the XML")
|
||||||
|
|
||||||
|
# Write the modified XML back
|
||||||
|
# Note: This would require rebuilding the XLSX file, which is complex
|
||||||
|
# For now, we'll just note that we need to handle these cells
|
||||||
|
print("XML modification would be required to update array formulas")
|
||||||
|
|
||||||
|
# Clean up the temporary file
|
||||||
|
if os.path.exists(temp_file):
|
||||||
|
os.remove(temp_file)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error attempting to modify XML: {e}")
|
||||||
|
|
||||||
|
print("\nWarning: Array formulas in cells {', '.join(array_formula_cells)} may still reference old sheet names.")
|
||||||
|
print("These will need to be updated manually in Excel or with a more specialized approach.")
|
||||||
|
|
||||||
|
# Add a warning message
|
||||||
|
print("\nIMPORTANT: The Excel file may still show recovery errors due to array formulas.")
|
||||||
|
print("To fix this, open the file in Excel, accept the recovery, and save it.")
|
||||||
|
print("Or consider using a different library like xlwings that can directly manipulate array formulas.")
|
||||||
|
|
||||||
|
# Check all cells for formulas referencing the renamed sheets
|
||||||
for row in graphics_sheet.iter_rows():
|
for row in graphics_sheet.iter_rows():
|
||||||
for cell in row:
|
for cell in row:
|
||||||
if cell.data_type == 'f' and cell.value and isinstance(cell.value, str):
|
if cell.data_type == 'f' and cell.value and isinstance(cell.value, str):
|
||||||
formula = cell.value
|
formula = cell.value
|
||||||
if any(old_name in formula for old_name in sheet_name_mapping.keys()):
|
if any(old_name in formula for old_name in sheet_name_mapping.keys()):
|
||||||
|
if not any(cell.coordinate == f_cell.split(':')[0] for f_cell in formula_cells_diagnostic):
|
||||||
formula_cells_diagnostic.append(f"{cell.coordinate}: {formula[:50]}...")
|
formula_cells_diagnostic.append(f"{cell.coordinate}: {formula[:50]}...")
|
||||||
|
|
||||||
print(f"Found {len(formula_cells_diagnostic)} cells with formulas referencing forecast sheets:")
|
print(f"\nFound {len(formula_cells_diagnostic)} cells with formulas referencing forecast sheets:")
|
||||||
for cell_info in formula_cells_diagnostic:
|
for cell_info in formula_cells_diagnostic:
|
||||||
print(f" {cell_info}")
|
print(f" {cell_info}")
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user