Source code for ssapy_toolkit.IO.xml_utils

#!/usr/bin/env python3
# xml_readers.py
#
# Stand-alone XML loaders:
#   - read_xml(filename, keep_root=False, decode_special=True)
#   - load_xml(filename, keep_root=False, decode_special=True)  # alias of read_xml
#
# Behavior
# - Parses ANY XML file into Python dict/list/scalars.
# - Attributes -> stored under "@attrs"; element text -> under "#text".
# - Repeated child tags become Python lists.
# - If decode_special=True, restores data encoded with markers:
#     * @attrs.type="ndarray"  with dtype + shape + <item> -> numpy.ndarray
#     * @attrs.type="datetime" with #text ISO string       -> datetime.datetime
#     * @attrs.type="astropy_time" with 'scale' + #text    -> astropy.time.Time (if available)
#     * @attrs.type="set"/"tuple" with <item> children     -> set / tuple
#
# Notes
# - Uses numpy (no math, no typing).
# - No CLI / __main__ block; import and call read_xml/load_xml directly.

import numpy as np
from datetime import datetime
try:
    from astropy.time import Time  # optional
except Exception:
    Time = None  # gracefully degrade if astropy isn't installed
import xml.etree.ElementTree as ET


def _element_to_struct(element):
    """
    Convert an Element into a nested, JSON-like structure:
      - attributes under "@attrs"
      - text under "#text"
      - children grouped by tag; repeated tags become lists
      - pure-text nodes become just the text scalar
    """
    node = {}
    if element.attrib:
        node["@attrs"] = dict(element.attrib)

    # Group children by tag to detect repeats
    children_by_tag = {}
    for child in element:
        child_struct = _element_to_struct(child)
        tag = child.tag
        children_by_tag.setdefault(tag, []).append(child_struct)

    # Attach grouped children (singletons vs lists)
    for tag, items in children_by_tag.items():
        node[tag] = items if len(items) > 1 else items[0]

    # Attach text content
    text = (element.text or "").strip()
    if text:
        if node:  # already has attrs or children
            node["#text"] = text
        else:
            return text  # pure text node becomes scalar

    return node


def _decode_special_struct(struct):
    """
    Decode special types when marked via @attrs['type'] on a dict payload.
    Returns either a decoded Python object or the original struct.
    """
    if not isinstance(struct, dict):
        return struct

    attrs = struct.get("@attrs")
    if not isinstance(attrs, dict):
        return struct

    encoded_type = attrs.get("type")
    if not encoded_type:
        return struct

    if encoded_type == "ndarray":
        dtype = attrs.get("dtype") or "float64"
        shape_txt = attrs.get("shape") or ""
        shape = tuple(int(s) for s in shape_txt.split(",")) if shape_txt else None
        items = struct.get("item", [])
        arr = np.array(items, dtype=dtype)
        if shape:
            try:
                arr = arr.reshape(shape)
            except Exception:
                # If reshape fails, return flat array
                pass
        return arr

    if encoded_type == "datetime":
        iso = struct.get("#text", "")
        try:
            return datetime.fromisoformat(iso)
        except Exception:
            return iso  # leave as string if parse fails

    if encoded_type == "astropy_time":
        isot = struct.get("#text", "")
        scale = attrs.get("scale", "utc")
        if Time is not None:
            try:
                return Time(isot, scale=scale)
            except Exception:
                return isot
        return isot  # astropy not available: return text

    if encoded_type == "set":
        return set(struct.get("item", []))

    if encoded_type == "tuple":
        return tuple(struct.get("item", []))

    return struct


def _struct_to_python(obj, decode_special):
    """Recursively convert the XML-structure to plain Python, applying special decoding."""
    if isinstance(obj, list):
        return [_struct_to_python(v, decode_special) for v in obj]

    if isinstance(obj, dict):
        # Recurse into children first
        recursed = {k: _struct_to_python(v, decode_special) for k, v in obj.items()}
        # Then decode marked special payloads, if requested
        return _decode_special_struct(recursed) if decode_special else recursed

    return obj  # scalars pass through unchanged


[docs] def read_xml(filename, keep_root=False, decode_special=True): """ Parse an XML file into Python data. Args: filename: Path-like or string to an XML file. keep_root (bool): If False (default), return the content of the root element. If True, return {root_tag: content}. decode_special (bool): If True (default), restore marked ndarray/datetime/Time/set/tuple. Returns: dict | list | str | int | float | bool | numpy.ndarray | datetime | astropy.time.Time """ tree = ET.parse(str(filename)) root = tree.getroot() struct = _element_to_struct(root) data = _struct_to_python(struct, decode_special=decode_special) return {root.tag: data} if keep_root else data
[docs] def load_xml(filename, keep_root=False, decode_special=True): """ Alias of read_xml(), provided with the requested name. """ return read_xml(filename, keep_root=keep_root, decode_special=decode_special)
[docs] def save_xml(filename, data, root_tag="root", pretty=True, xml_declaration=True, encoding="utf-8"): """ Serialize Python data to XML and write to 'filename'. - Dict/List/Scalar supported. - Special encodings: * numpy.ndarray -> @attrs: type="ndarray", dtype, shape + <item> values * numpy scalars -> converted to native Python numbers * datetime -> @attrs: type="datetime", #text=ISO8601 * astropy Time -> @attrs: type="astropy_time", scale + #text=ISOT * set/tuple -> @attrs: type="set"/"tuple" + <item> children """ def _indent_in_place(element, level=0): indent_space = " " i = "\n" + level * indent_space if len(element): if not element.text or not element.text.strip(): element.text = i + indent_space for child in element: _indent_in_place(child, level + 1) if not child.tail or not child.tail.strip(): child.tail = i if level and (not element.tail or not element.tail.strip()): element.tail = i def _serialize_special(obj): if isinstance(obj, np.ndarray): flat = obj.ravel().tolist() return { "@attrs": {"type": "ndarray", "dtype": str(obj.dtype), "shape": ",".join(str(x) for x in obj.shape)}, "item": [x.item() if isinstance(x, np.generic) else x for x in flat], } if isinstance(obj, (np.integer, np.floating, np.bool_)): return obj.item() if isinstance(obj, datetime): return {"@attrs": {"type": "datetime"}, "#text": obj.isoformat()} if Time is not None and isinstance(obj, Time): return {"@attrs": {"type": "astropy_time", "scale": obj.scale}, "#text": obj.isot} if isinstance(obj, set): return {"@attrs": {"type": "set"}, "item": list(obj)} if isinstance(obj, tuple): return {"@attrs": {"type": "tuple"}, "item": list(obj)} return None def _python_to_struct(obj): special = _serialize_special(obj) if special is not None: if isinstance(special, dict): out = {} for k, v in special.items(): if k in ("@attrs", "#text"): out[k] = v else: out[k] = _python_to_struct(v) return out return special if isinstance(obj, dict): return {k: _python_to_struct(v) for k, v in obj.items()} if isinstance(obj, list): return [_python_to_struct(v) for v in obj] if isinstance(obj, (str, bool, int, float)): return obj if isinstance(obj, bytes): return obj.decode("utf-8", "replace") return str(obj) def _struct_to_element(tag, payload): elem = ET.Element(tag) if not isinstance(payload, (dict, list)): elem.text = str(payload) return elem if isinstance(payload, list): for item in payload: elem.append(_struct_to_element("item", item)) return elem attrs = payload.get("@attrs") if isinstance(attrs, dict): for k, v in attrs.items(): elem.set(k, str(v)) text = payload.get("#text") if isinstance(text, (str, int, float, bool)): elem.text = str(text) for key, value in payload.items(): if key in ("@attrs", "#text"): continue if isinstance(value, list): for item in value: elem.append(_struct_to_element(key, item)) else: elem.append(_struct_to_element(key, value)) return elem structure = _python_to_struct(data) if isinstance(structure, dict) and len(structure) == 1 and next(iter(structure)) not in ("@attrs", "#text"): only_key = next(iter(structure)) root_element = _struct_to_element(only_key, structure[only_key]) else: root_element = _struct_to_element(root_tag, structure) if pretty: _indent_in_place(root_element) ET.ElementTree(root_element).write(filename, encoding=encoding, xml_declaration=xml_declaration)