Source code for delb.transform
# Copyright (C) 2018-'22 Frank Sachsenheim
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published
# by the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
"""
This module offers a canonical interface with the aim to make re-use of transforming
algorithms easier.
Let's look at it with examples::
from delb.transform import Transformation
class ResolveCopyOf(Transformation):
def transform(self):
for node in self.root.css_select("*[copyOf]"):
source_id = node["copyOf"]
source_node = self.origin_document.xpath(
f'//*[@xml:id="{source_id[1:]}"]'
).first
cloned_node = source_node.clone(deep=True)
cloned_node.id = None
node.replace_with(cloned_node)
From such defined transformations instances can be called with a (sub-)tree and an
optional document where that tree originates from::
resolve_copy_of = ResolveCopyOf()
tree = resolve_copy_of(tree) # where tree is an instance of TagNode
:class:`typing.NamedTuple` are used to define options for transformations::
from typing import NamedTuple
class ResolveChoiceOptions(NamedTuple):
corr: bool = True
reg: bool = True
class ResolveChoice(Transformation):
options_class = ResolveChoiceOptions
def __init__(self, options):
super().__init__(options)
self.keep_selector = ",".join(
(
"corr" if self.options.corr else "sic",
"reg" if self.options.reg else "orig"
)
)
self.drop_selector = ",".join(
(
"sic" if self.options.corr else "corr",
"orig" if self.options.reg else "reg"
)
)
def transform(self):
for choice_node in self.root.css_select("choice"):
node_to_drop = choice_node.css_select(self.drop_selector).first
node_to_drop.detach()
node_to_keep = choice_node.css_select(self.keep_selector).first
node_to_keep.detach(retain_child_nodes=True)
choice_node.detach(retain_child_nodes=True)
A transformation class that defines an ``option_class`` property can then either be used
with its defaults or with alternate options::
resolve_choice = ResolveChoice()
tree = resolve_choice(tree)
resolve_choice = ResolveChoice(ResolveChoiceOptions(reg=False))
tree = resolve_choice(tree)
Finally, concrete transformations can be chained, both as classes or instances. The
interface allows also to chain multiple chains::
from delb.transform import TransformationSequence
tidy_up = TransformationSequence(ResolveCopyOf, resolve_choice)
tree = tidy_up(tree)
.. attention::
This is an experimental feature. It might change significantly in the future or be
removed altogether.
"""
from __future__ import annotations
from abc import ABC, abstractmethod
from typing import NamedTuple, Optional
from delb import Document, TagNode
#
class TransformationBase(ABC):
"""This base class defines the calling interface of transformations."""
@abstractmethod
def __call__(self, root: TagNode, document: Optional[Document] = None) -> TagNode:
pass
__all__ = (Transformation.__name__, TransformationSequence.__name__)