Source code for _delb.typing

# Copyright (C) 2018-'25  Frank Sachsenheim
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published
# by the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program.  If not, see <https://www.gnu.org/licenses/>.


from __future__ import annotations

import sys
from abc import ABC, abstractmethod
from collections.abc import Callable, Iterator, Sequence
from typing import (
    TYPE_CHECKING,
    overload,
    Any,
    AnyStr,
    BinaryIO,
    Optional,
    Protocol,
    TypeAlias,
    TypeVar,
)

if TYPE_CHECKING:
    from collections.abc import Iterable, Mapping
    from types import SimpleNamespace

    from _delb.nodes import Attribute, Siblings, TagAttributes, _TagDefinition
    from _delb.serializer import FormatOptions
    from _delb.xpath import QueryResults
    from _delb.xpath.ast import EvaluationContext
    from delb import Document


if sys.version_info < (3, 11):  # DROPWITH Python 3.10
    from typing_extensions import Literal, Self
else:
    from typing import Literal, Self


# node types


class XMLNodeType(ABC):
    """
    Defines the interfaces that all node type representations share. All node type
    implementations are a subclass of this one.
    """

    _child_nodes: Siblings
    _parent: None | ParentNodeType

    @abstractmethod
    def __copy__(self): ...

    @abstractmethod
    def __deepcopy__(self, memo): ...

    @abstractmethod
    def __str__(self) -> str: ...

    @abstractmethod
    def add_following_siblings(
        self, *node: NodeSource, clone: bool = False
    ) -> tuple[XMLNodeType, ...]:
        """
        Adds one or more nodes to the right of the node this method is called on.

        :param node: The node(s) to be added.
        :param clone: Clones the concrete nodes before adding if :obj:`True`.
        :return: The concrete nodes that were added.
        :meta category: Methods to add nodes to a tree

        The nodes can be concrete instances of any node type or rather abstract
        descriptions in the form of strings or objects returned from the
        :func:`delb.tag` function that are used to derive :class:`TextNode` respectively
        :class:`TagNode` instances from.
        """

    @abstractmethod
    def add_preceding_siblings(
        self, *node: NodeSource, clone: bool = False
    ) -> tuple[XMLNodeType, ...]:
        """
        Adds one or more nodes to the left of the node this method is called on.

        :param node: The node(s) to be added.
        :param clone: Clones the concrete nodes before adding if :obj:`True`.
        :return: The concrete nodes that were added.
        :meta category: Methods to add nodes to a tree

        The nodes can be concrete instances of any node type or rather abstract
        descriptions in the form of strings or objects returned from the
        :func:`delb.tag` function that are used to derive :class:`TextNode` respectively
        :class:`TagNode` instances from.
        """

    @abstractmethod
    def clone(self, deep: bool = False) -> XMLNodeType:
        """
        Creates a new node of the same type with duplicated contents.

        :param deep: Clones the whole subtree if :obj:`True`.
        :return: A copy of the node.
        """

    @property
    @abstractmethod
    def depth(self) -> int:
        """
        The depth (or level) of the node in its tree.

        :meta category: Node properties
        """

    @abstractmethod
    def detach(self, retain_child_nodes: bool = False) -> Self:
        """
        Removes the node from its tree.

        :param retain_child_nodes: Keeps the node's descendants in the originating
                                   tree if :obj:`True`.
        :return: The removed node.
        :meta category: Methods to remove a node
        """

    @property
    @abstractmethod
    def document(self) -> Optional[Document]:
        """
        The :class:`delb.Document` instance that the node is associated with or
        :obj:`None`.

        :meta category: Related document and nodes properties
        """

    @abstractmethod
    def fetch_following(self, *filter: Filter) -> Optional[XMLNodeType]:
        """
        Retrieves the next filter matching node on the following axis.

        :param filter: Any number of :class:`Filter` s.
        :return: The next node in document order that matches all filters or
                 :obj:`None`.
        :meta category: Methods to fetch a relative node
        """

    @abstractmethod
    def _fetch_following(self) -> Optional[XMLNodeType]: ...

    @abstractmethod
    def fetch_following_sibling(self, *filter: Filter) -> Optional[XMLNodeType]:
        """
        Retrieves the next filter matching node on the following-sibling axis.

        :param filter: Any number of :class:`Filter` s.
        :return: The next sibling to the right that matches all filters or
                 :obj:`None`.
        :meta category: Methods to fetch a relative node
        """

    @abstractmethod
    def _fetch_following_sibling(self) -> Optional[XMLNodeType]: ...

    @abstractmethod
    def fetch_preceding(self, *filter: Filter) -> Optional[XMLNodeType]:
        """
        Retrieves the next filter matching node on the preceding axis.

        :param filter: Any number of :class:`Filter` s.
        :return: The previous node in document order that matches all filters or
                 :obj:`None`.
        :meta category: Methods to fetch a relative node
        """

    @abstractmethod
    def _fetch_preceding(self) -> Optional[XMLNodeType]: ...

    @abstractmethod
    def fetch_preceding_sibling(self, *filter: Filter) -> Optional[XMLNodeType]:
        """
        Retrieves the next filter matching node on the preceding-sibling axis.

        :param filter: Any number of :class:`Filter` s.
        :return: The next sibling to the left that matches all filters or
                 :obj:`None`.
        :meta category: Methods to fetch a relative node
        """

    @abstractmethod
    def _fetch_preceding_sibling(self): ...

    @property
    @abstractmethod
    def full_text(self) -> str:
        """
        The concatenated contents of all text node descendants in document order.

        :meta category: Node content properties
        """

    @property
    @abstractmethod
    def index(self) -> Optional[int]:
        """
        The node's index within the parent's collection of child nodes. When this
        property is read the currently set default filters are applied.
        The value :obj:`None` is returned for node without parents and when themself
        are not matching the filters.

        :meta category: Node properties
        """

    @abstractmethod
    def iterate_ancestors(self, *filter: Filter) -> Iterator[ParentNodeType]:
        """
        Iterator over the filter matching nodes on the ancestor axis.

        :param filter: Any number of :class:`Filter` s that a node must match to be
               yielded.
        :return: A :term:`generator iterator` that yields the ancestor nodes from bottom
                 to top.
        :meta category: Methods to iterate over related node
        """

    @abstractmethod
    def _iterate_ancestors(
        self, *, _include_document_node: bool = False
    ) -> Iterator[ParentNodeType]: ...

    @abstractmethod
    def iterate_children(self, *filter: Filter) -> Iterator[XMLNodeType]:
        """
        Iterator over the filter matching nodes on the child axis.

        :param filter: Any number of :class:`Filter` s that a node must match to be
                       yielded.
        :return: A :term:`generator iterator` that yields the child nodes of the node.
        :meta category: Methods to iterate over related node
        """

    @abstractmethod
    def iterate_descendants(self, *filter: Filter) -> Iterator[XMLNodeType]:
        """
        Iterator over the filter matching nodes on the ancestor axis.

        :param filter: Any number of :class:`Filter` s that a node must match to be
                       yielded.
        :return: A :term:`generator iterator` that yields the descending nodes of the
                 node.
        :meta category: Methods to iterate over related node
        """

    @abstractmethod
    def _iterate_descendants(self) -> Iterator[XMLNodeType]: ...

    @abstractmethod
    def iterate_following(
        self, *filter: Filter, include_descendants: bool = True
    ) -> Iterator[XMLNodeType]:
        """
        Iterator over the filter matching nodes on the following axis.

        :param filter: Any number of :class:`Filter` s that a node must match to be
               yielded.
        :param include_descendants: Also yields descendants of the staring node. This
                                    deviates from the XPath definition of the following
                                    axes.
        :return: A :term:`generator iterator` that yields the following nodes in
                 document order.
        :meta category: Methods to iterate over related node
        """

    @abstractmethod
    def _iterate_following(
        self, *, include_descendants: bool = True
    ) -> Iterator[XMLNodeType]: ...

    @abstractmethod
    def iterate_following_siblings(self, *filter: Filter) -> Iterator[XMLNodeType]:
        """
        Iterator over the filter matching nodes on the following-sibling axis.

        :param filter: Any number of :class:`Filter` s that a node must match to be
               yielded.
        :return: A :term:`generator iterator` that yields the siblings to the node's
                 right.
        :meta category: Methods to iterate over related node
        """

    @abstractmethod
    def _iterate_following_siblings(self) -> Iterator[XMLNodeType]: ...

    @abstractmethod
    def iterate_preceding(
        self, *filter: Filter, include_ancestors: bool = True
    ) -> (Iterator)[XMLNodeType]:
        """
        Iterator over the filter matching nodes on the preceding axis.

        :param filter: Any number of :class:`Filter` s that a node must match to be
               yielded.
        :param include_ancestors: Also yields ancestor nodes / tag nodes that were
                                  started earlier in the stream. This deviates from the
                                  XPath definition of the preceding axis.
        :return: A :term:`generator iterator` that yields the previous nodes in document
                 order.
        :meta category: Methods to iterate over related node
        """

    @abstractmethod
    def _iterate_preceding(
        self, *, include_ancestors: bool = True
    ) -> Iterator[XMLNodeType]: ...

    @abstractmethod
    def iterate_preceding_siblings(self, *filter: Filter) -> Iterator[XMLNodeType]:
        """
        Iterator over the filter matching nodes on the preceding-sibling axis.

        :param filter: Any number of :class:`Filter` s that a node must match to be
               yielded.
        :return: A :term:`generator iterator` that yields the siblings to the node's
                 left.
        :meta category: Methods to iterate over related node
        """

    @abstractmethod
    def _iterate_preceding_siblings(self) -> Iterator[XMLNodeType]: ...

    @abstractmethod
    def _iterate_reversed_descendants(self) -> Iterator[XMLNodeType]: ...

    @property
    @abstractmethod
    def parent(self) -> Optional[ParentNodeType]:
        """
        The node's parent or :obj:`None`.

        :meta category: Related document and nodes properties
        """

    @abstractmethod
    def replace_with(self, node: NodeSource, clone: bool = False) -> Self:
        """
        Removes the node and places the given one in its tree location.

        The node can be a concrete instance of any node type or a rather abstract
        description in the form of a string or an object returned from the
        :func:`delb.tag` function that is used to derive a :class:`delb.nodes.TextNode`
        respectively :class:`delb.nodes.TagNode` instance from.

        :param node: The replacing node.
        :param clone: A concrete, replacing node is cloned if :obj:`True`.
        :return: The removed node.
        :meta category: Methods to remove a node
        """

    @abstractmethod
    def serialize(
        self,
        *,
        format_options: Optional[FormatOptions] = None,
        namespaces: Optional[NamespaceDeclarations] = None,
        newline: Optional[str] = None,
    ) -> str:
        """
        Returns a string that contains the serialization of the node. See
        :doc:`/api/serialization` for details.

        :param format_options: An instance of :class:`delb.FormatOptions` can be
                               provided to configure formatting.
        :param namespaces: A mapping of prefixes to namespaces.  If not provided the
                           node's namespace will serve as default namespace.  Prefixes
                           for undeclared namespaces are enumerated with the prefix
                           ``ns``.
        :param newline: See :class:`io.TextIOWrapper` for a detailed explanation of the
                        parameter with the same name.
        """

    @abstractmethod
    def xpath(
        self,
        expression: str,
        namespaces: Optional[NamespaceDeclarations] = None,
    ) -> QueryResults:
        """
        Queries the tree with an XPath expression with this node as initial context
        node.

        :param expression: A supported XPath 1.0 expression that contains one or more
                           location paths.
        :param namespaces: A mapping of prefixes that are used in the expression to
                           namespaces. If not provided the node's namespace will serve
                           as default, mapped to an empty prefix.
        :return: All nodes that match the evaluation of the provided XPath expression.
        :meta category: Methods to query the tree

        See :doc:`/api/querying` for details on the extent of the XPath implementation.
        """


class ParentNodeType(XMLNodeType):
    """Defines the interfaces for nodes that can contain further nodes."""

    @abstractmethod
    def __len__(self) -> int: ...

    @abstractmethod
    def append_children(
        self, *node: NodeSource, clone: bool = False
    ) -> tuple[XMLNodeType, ...]:
        """
        Adds one or more nodes as child nodes after any existing to the child nodes of
        the node this method is called on.

        :param node: The node(s) to be added.
        :param clone: Clones the concrete nodes before adding if :obj:`True`.
        :return: The concrete nodes that were appended.
        :meta category: Methods to add nodes to a tree

        The nodes can be concrete instances of any node type or rather abstract
        descriptions in the form of strings or objects returned from the
        :func:`delb.tag` function that are used to derive :class:`delb.nodes.TextNode`
        respectively :class:`delb.nodes.TagNode` instances from.
        """

    @property
    @abstractmethod
    def first_child(self) -> Optional[XMLNodeType]:
        """
        The node's first child node.

        :meta category: Related document and nodes properties
        """

    @abstractmethod
    def insert_children(
        self, index: int, *node: NodeSource, clone: bool = False
    ) -> tuple[XMLNodeType, ...]:
        """
        Inserts one or more child nodes.

        :param index: The index at which the first of the given nodes will be inserted,
                      the remaining nodes are added afterwards in the given order.
        :param node: The node(s) to be added.
        :param clone: Clones the concrete nodes before adding if :obj:`True`.
        :return: The concrete nodes that were inserted.
        :meta category: Methods to add nodes to a tree

        The nodes can be concrete instances of any node type or rather abstract
        descriptions in the form of strings or objects returned from the
        :func:`delb.tag` function that are used to derive :class:`delb.nodes.TextNode`
        respectively :class:`delb.nodes.TagNode` instances from.
        """

    @property
    @abstractmethod
    def last_child(self) -> Optional[XMLNodeType]:
        """
        The node's last child node.

        :meta category: Related document and nodes properties
        """

    @property
    @abstractmethod
    def last_descendant(self) -> Optional[XMLNodeType]:
        """
        The node's last descendant.

        :meta category: Related document and nodes properties
        """

    @abstractmethod
    def merge_text_nodes(self, deep: bool = False):
        """
        Merges all consecutive text nodes in the subtree into one.
        Text nodes without content are dropped.
        """

    @abstractmethod
    def prepend_children(
        self, *node: XMLNodeType, clone: bool = False
    ) -> tuple[XMLNodeType, ...]:
        """
        Adds one or more nodes as child nodes before any existing to the child nodes of
        the node this method is called on.

        :param node: The node(s) to be added.
        :param clone: Clones the concrete nodes before adding if :obj:`True`.
        :return: The concrete nodes that were prepended.
        :meta category: Methods to add nodes to a tree

        The nodes can be concrete instances of any node type or rather abstract
        descriptions in the form of strings or objects returned from the
        :func:`delb.tag` function that are used to derive :class:`delb.nodes.TextNode`
        respectively :class:`delb.nodes.TagNode` instances from.
        """


class CommentNodeType(XMLNodeType):
    """Defines the interfaces for :class:`delb.nodes.CommentNode`."""

    @abstractmethod
    def __eq__(self, other: Any) -> bool: ...

    @property
    @abstractmethod
    def content(self) -> str:
        """
        The comment's text.

        :meta category: Node content properties
        """

    @content.setter
    @abstractmethod
    def content(self, value: str): ...


class _DocumentNodeType(ParentNodeType): ...  # noqa: E701


class ProcessingInstructionNodeType(XMLNodeType):
    """Defines the interfaces for :class:`delb.nodes.ProcessingInstructionNode`."""

    @abstractmethod
    def __eq__(self, other: Any) -> bool: ...

    @property
    @abstractmethod
    def content(self) -> str:
        """
        The processing instruction's text.

        :meta category: Node content properties
        """

    @content.setter
    @abstractmethod
    def content(self, value: str): ...

    @property
    @abstractmethod
    def target(self) -> str:
        """
        The processing instruction's target.

        :meta category: Node content properties
        """

    @target.setter
    @abstractmethod
    def target(self, value: str): ...


class TagNodeType(ParentNodeType):
    """Defines the interfaces for :class:`delb.nodes.TagNodeType`."""

    @abstractmethod
    def __contains__(self, item: AttributeAccessor | XMLNodeType) -> bool: ...

    @overload
    @abstractmethod
    def __getitem__(self, item: int) -> XMLNodeType: ...

    @overload
    @abstractmethod
    def __getitem__(self, item: AttributeAccessor) -> Attribute | None: ...

    @overload
    @abstractmethod
    def __setitem__(self, item: int, value: NodeSource): ...

    @overload
    @abstractmethod
    def __setitem__(self, item: AttributeAccessor, value: str | Attribute): ...

    @property
    @abstractmethod
    def attributes(self) -> TagAttributes:
        """
        A :term:`mapping` that can be used to access the node's attributes.

        :meta category: Node content properties

        >>> node = new_tag_node("node", attributes={"foo": "0", "bar": "0"})
        >>> node.attributes
        {'foo': '0', 'bar': '0'}
        >>> node.attributes.pop("bar")  # doctest: +ELLIPSIS
        <Attribute(bar="0") [0x...]>
        >>> node.attributes["foo"] = "1"
        >>> node.attributes["peng"] = "1"
        >>> print(node)
        <node foo="1" peng="1"/>
        >>> node.attributes.update({"foo": "2", "zong": "2"})
        >>> print(node)
        <node foo="2" peng="1" zong="2"/>

        Namespaced names are accessed with two-value tuples or a string. The two-value
        holds the namespace and the local name in that order. A string can either be a
        fully qualified name in `Clark notation`_ or a local name that belongs to the
        containing node's namespace.

        .. _Clark notation: http://www.jclark.com/xml/xmlns.htm

        >>> DefaultStringOptions.namespaces = {"": "http://namespace"}
        >>> node = new_tag_node(
        ...     "node",
        ...     namespace="http://namespace",
        ... )
        >>> node.attributes.update({("http://namespace", "foo"): "0"})
        >>> print(node)
        <node xmlns="http://namespace" foo="0"/>
        >>> attribute = node.attributes[("http://namespace", "foo")]
        >>> node.attributes["foo"] is attribute
        True
        >>> node.attributes["{http://namespace}foo"] is attribute
        True

        Attributes behave like strings, but also expose namespace, local name and
        value for manipulation.

        >>> node = new_tag_node("node")
        >>> node.attributes["foo"] = "0"
        >>> node.attributes["foo"].local_name = "bar"
        >>> node.attributes["bar"].namespace = "http://namespace"
        >>> node.attributes[("http://namespace", "bar")].value = "X"
        >>> print(node)
        <node xmlns:ns0="http://namespace" ns0:bar="X"/>
        >>> "ref-" + node.attributes[("http://namespace", "bar")].lower()
        'ref-x'
        """

    @abstractmethod
    def css_select(
        self, expression: str, namespaces: Optional[NamespaceDeclarations] = None
    ) -> QueryResults:
        """
        Queries the tree with a CSS selector expression with this node as initial
        context node.

        :param expression: A CSS selector expression.
        :param namespaces: A mapping of prefixes that are used in the expression to
                           namespaces.  If not provided the node's namespace will serve
                           as default, mapped to an empty prefix.
        :return: All nodes that match the evaluation of the provided CSS selector
                 expression.
        :meta category: Methods to query the tree

        See :doc:`/api/querying` regarding the extent of the supported grammar.

        Namespace prefixes are delimited with a ``|`` before a name test, for example
        ``div svg|metadata`` selects all descendants of ``div`` named nodes that belong
        to the default namespace or have no namespace and whose name is ``metadata``
        and have a namespace that is mapped to the ``svg`` prefix.
        """

    @abstractmethod
    def fetch_or_create_by_xpath(
        self,
        expression: str,
        namespaces: Optional[NamespaceDeclarations] = None,
    ) -> TagNodeType:
        """
        Fetches a single node that is locatable by the provided XPath expression. If
        the node doesn't exist, the non-existing branch will be created.

        :param expression: An XPath expression that can unambiguously locate a
                           descending node in a tree that has any state.
        :param namespaces: A mapping of prefixes that are used in the expression to
                           namespaces.  If not provided the node's namespace will serve
                           as default, mapped to an empty prefix.
        :return: The existing or freshly created node descibed with ``expression``.
        :meta category: Methods to query the tree

        These rules are imperative in your endeavour:

        - All location steps must use the child axis.
        - Each step needs to provide a name test.
        - Attribute comparisons against literals are the only allowed predicates.
        - Multiple attribute comparisons must be joined with the `and` operator and / or
          contained in more than one predicate expression.
        - The logical validity of multiple attribute comparisons isn't checked. E.g.
          one could provide ``foo[@p="her"][@p="him"]``, but expect an undefined
          behaviour.

        >>> root = Document("<root/>").root
        >>> grandchild = root.fetch_or_create_by_xpath(
        ...     "child[@a='b']/grandchild"
        ... )
        >>> grandchild is root.fetch_or_create_by_xpath(
        ...     "child[@a='b']/grandchild"
        ... )
        True
        >>> str(root)
        '<root><child a="b"><grandchild/></child></root>'
        """

    @abstractmethod
    def _get_normalize_space_directive(
        self, default: Literal["default", "preserve"] = "default"
    ) -> Literal["default", "preserve"]: ...

    @property
    @abstractmethod
    def id(self) -> Optional[str]:
        """
        This is a shortcut to retrieve and set the ``id``  attribute in the XML
        namespace. The client code is responsible to pass properly formed id names.

        :meta category: Node content properties
        """

    @id.setter
    @abstractmethod
    def id(self, value: str | None): ...

    @property
    @abstractmethod
    def location_path(self) -> str:
        """
        An unambiguous XPath location path that points to this node from its tree root.

        :meta category: Node properties
        """

    @property
    @abstractmethod
    def local_name(self) -> str:
        """
        The node's name.

        :meta category: Node properties
        """

    @local_name.setter
    @abstractmethod
    def local_name(self, value: str): ...

    @property
    @abstractmethod
    def namespace(self) -> str:
        """
        The node's namespace. An empty string represents an empty namespace.

        :meta category: Node properties
        """

    @namespace.setter
    @abstractmethod
    def namespace(self, value: str): ...

    @abstractmethod
    def _reduce_whitespace(self): ...

    @property
    @abstractmethod
    def universal_name(self) -> str:
        """
        The node's qualified name in `Clark notation`_.

        :meta category: Node properties

        .. _Clark notation: http://www.jclark.com/xml/xmlns.htm
        """


class TextNodeType(XMLNodeType):
    """Defines the interfaces for :class:`delb.nodes.TextNode`."""

    @abstractmethod
    def __eq__(self, other: Any) -> bool: ...

    @property
    @abstractmethod
    def content(self) -> str:
        """
        The node's text content.

        :meta category: Node content properties
        """

    @content.setter
    @abstractmethod
    def content(self, value: str): ...


# protocols


[docs] class BinaryReader(Protocol):
[docs] def close(self): ...
[docs] def read(self, n: int = -1) -> bytes: ...
# aliases QualifiedName: TypeAlias = tuple[str, str] AttributeAccessor: TypeAlias = QualifiedName | str _AttributesData: TypeAlias = dict[QualifiedName, str] GenericDecorated = TypeVar("GenericDecorated", bound=Callable[..., Any]) SecondOrderDecorator: TypeAlias = "Callable[[GenericDecorated], GenericDecorated]" Filter: TypeAlias = "Callable[[XMLNodeType], bool]" NamespaceDeclarations: TypeAlias = "Mapping[str | None, str]" _NamespaceDeclarations: TypeAlias = "Mapping[str, str]" NodeSource: TypeAlias = "str | XMLNodeType | _TagDefinition" InputStream: TypeAlias = AnyStr | BinaryIO LoaderResult: TypeAlias = "Sequence[XMLNodeType] | str" Loader: TypeAlias = "Callable[[Any, SimpleNamespace], LoaderResult]" LoaderConstraint: TypeAlias = "Loader | Iterable[Loader] | None" XPathFunction: TypeAlias = "Callable[[EvaluationContext, *Any], Any]" # __all__ = ( "AttributeAccessor", "_AttributesData", "BinaryReader", CommentNodeType.__name__, _DocumentNodeType.__name__, "Filter", "GenericDecorated", "InputStream", "Literal", "Loader", "LoaderConstraint", "LoaderResult", "NamespaceDeclarations", "NodeSource", ParentNodeType.__name__, ProcessingInstructionNodeType.__name__, "QualifiedName", "SecondOrderDecorator", "Self", TagNodeType.__name__, TextNodeType.__name__, XMLNodeType.__name__, "XPathFunction", )