Source code for spreadsheet_intelligence.parsers.drawing.drawing_xml_parser
import logging
from typing import List, Tuple
import xml.etree.ElementTree as ET
from spreadsheet_intelligence.parsers.abstract.base_xml_parser import BaseXMLParser
from spreadsheet_intelligence.utils.helpers import get_required_element
from .drawing_element_parser.connector_parser import ConnectorParser
from .drawing_element_parser.shape_parser import ShapeParser
from spreadsheet_intelligence.models.raw.drawing.drawing_models import (
ConnectorAnchorRaw,
)
from spreadsheet_intelligence.models.raw.drawing.drawing_models import ShapeAnchorRaw
logger = logging.getLogger(__name__)
[docs]
class DrawingXMLParser(BaseXMLParser):
"""Parses XML elements related to drawing objects in a spreadsheet.
This class is responsible for parsing XML elements that represent
drawing objects such as connectors and shapes in a spreadsheet.
Attributes:
namespaces (dict): XML namespaces used in the drawing XML.
xml_root (ET.Element): Root element of the drawing XML.
connector_list (List[ConnectorAnchorRaw]): List of parsed connector anchors.
shape_list (List[ShapeAnchorRaw]): List of parsed shape anchors.
"""
def __init__(self, drawing_root: ET.Element):
"""Initializes the DrawingXMLParser with the root XML element.
Args:
drawing_root (ET.Element): The root element of the drawing XML.
"""
self.namespaces = {
"xdr": "http://schemas.openxmlformats.org/drawingml/2006/spreadsheetDrawing",
"a": "http://schemas.openxmlformats.org/drawingml/2006/main",
}
self.xml_root = drawing_root
logger.debug(f"DrawingXMLParser | __init__ | drawing_root: {drawing_root.tag}")
self.connector_list: List[ConnectorAnchorRaw] = []
self.shape_list: List[ShapeAnchorRaw] = []
[docs]
def parse(self) -> Tuple[List[ConnectorAnchorRaw], List[ShapeAnchorRaw]]:
"""Parses the drawing XML to extract connectors and shapes.
Iterates over the XML elements to find and parse connectors and shapes,
storing them in their respective lists.
Returns:
Tuple[List[ConnectorAnchorRaw], List[ShapeAnchorRaw]]: A tuple containing
lists of parsed connector and shape anchors.
"""
for twocell_anchor_el in self.xml_root.findall(
"xdr:twoCellAnchor", self.namespaces
):
logger.debug(f"DrawingXMLParser | parse | parsing {twocell_anchor_el}")
# Check if the element is a connector
if twocell_anchor_el.find("xdr:cxnSp", self.namespaces) is not None:
logger.info("Parsing connector")
connector_parser = ConnectorParser(self.namespaces)
connector_anchor_raw = connector_parser.parse(twocell_anchor_el)
self.connector_list.append(connector_anchor_raw)
# Check if the element is a shape
elif twocell_anchor_el.find("xdr:sp", self.namespaces) is not None:
logger.info("Parsing shape")
shape_parser = ShapeParser(self.namespaces)
shape_anchor_raw = shape_parser.parse(twocell_anchor_el)
self.shape_list.append(shape_anchor_raw)
return self.connector_list, self.shape_list