Source code for spreadsheet_intelligence.read_data.excel_to_xml

import zipfile
import xml.etree.ElementTree as ET
from typing import Dict, Any, List, Optional
from io import BytesIO


[docs] def convert_xlsx_to_xml_in_memory( xlsx_path: str, target_files: Optional[List[str]] = None, out_dir: Optional[str] = None, ) -> Dict[str, ET.Element]: """Convert Excel file to XML format and process it in memory Args: xlsx_path (str): Path to the target Excel file target_files (Optional[List[str]]): List of target XML file names (to load only the necessary files to reduce memory usage), default is all XML files Returns: Dict[str, ET.Element]: Dictionary with XML file names as keys and corresponding XML Elements as values """ xml_contents = {} with zipfile.ZipFile(xlsx_path, "r") as zip_ref: for file_info in zip_ref.infolist(): if file_info.filename.endswith(".xml") and ( target_files is None or file_info.filename in target_files ): with zip_ref.open(file_info) as file: xml_data = file.read() xml_tree = ET.parse(BytesIO(xml_data)) xml_root = xml_tree.getroot() xml_contents[file_info.filename] = xml_root if out_dir is not None: convert_xlsx_to_xml(xlsx_path, out_dir) return xml_contents
[docs] def convert_xlsx_to_xml(xlsx_path: str, xml_dir: str) -> None: """Convert Excel file to XML format Args: xlsx_path (str): Path to the target Excel file xml_dir (str): Directory to output the XML format """ with zipfile.ZipFile(xlsx_path, "r") as zip_ref: zip_ref.extractall(xml_dir)