Source code for _delb.plugins
# Copyright (C) 2018-'22 Frank Sachsenheim
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published
# by the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
from __future__ import annotations
import sys
from collections.abc import Callable, Iterable
from typing import TYPE_CHECKING, Any
if sys.version_info < (3, 10): # DROPWITH Python3.9
from importlib_metadata import entry_points
else:
from importlib.metadata import entry_points
if TYPE_CHECKING:
from types import SimpleNamespace
from _delb.typing import Loader, LoaderConstraint
[docs]class DocumentMixinBase:
"""
By deriving a subclass from this one, a document extension class is registered as
plugin. These are supposed to add additional attributes to a document, e.g. derived
data or methods to interact with storage systems. All attributes of an extension
should share a common prefix that terminates with an underscore, e.g.
`storage_load`, `storage_save`, etc.
This base class also acts as termination for methods that can be implemented by
mixin classes. Any implementation of a method must call a base class' one, e.g.:
.. code-block::
from types import SimpleNamespace
from _delb.plugins import DocumentMixinBase
from magic_wonderland import play_disco
class MyExtension(DocumentMixinBase):
# this method can be implemented by any extension class
@classmethod
def _init_config(cls, config, kwargs):
config.my_extension = SimpleNamespace(conf=kwargs.pop(
"my_extension_conf"))
super()._init_config(config, kwargs)
# this method is specific to this extension
def my_extension_makes_magic(self):
play_disco()
"""
def __init_subclass__(cls):
# ensure it is a direct subclass
if cls.__mro__[1] is DocumentMixinBase:
plugin_manager.document_mixins.append(cls)
[docs] @classmethod
def _init_config(cls, config: SimpleNamespace, kwargs: dict[str, Any]):
"""
The ``kwargs`` argument contains the additional keyword arguments that a
:class:`Document` instance is called with. Extension classes that expect
configuration data *must* process their specific arguments by clearing them
from the ``kwargs`` dictionary, e.g. with :meth:`dict.pop`, and preferably
storing the final configuration data in a :class:`types.SimpleNamespace` and
adding it to the :class:`types.SimpleNamespace` passed as ``config`` with the
extension's name. The initially mentioned keyword arguments *should* be prefixed
with that name as well. This method is called before the loaders try to read and
parse the given source for a document.
"""
if kwargs:
raise RuntimeError(
"Not all configuration arguments have been processed. You either "
"passed invalid arguments or an extension doesn't handle them "
f"properly: {config}"
)
class PluginManager:
def __init__(self):
self.document_mixins: list[type] = []
self.document_subclasses: list[type] = []
self.loaders: list[Loader] = []
self.xpath_functions: dict[str, Callable] = {}
@staticmethod
def load_plugins():
"""
Loads all modules that are registered as entrypoint in the ``delb`` group.
"""
for entrypoint in entry_points().select(group="delb"):
entrypoint.load()
def register_loader(
self, before: LoaderConstraint = None, after: LoaderConstraint = None
) -> Callable:
"""
Registers a document loader.
An example module that is specified as ``delb`` plugin for an IPFS loader might
look like this:
.. testcode::
from os import getenv
from types import SimpleNamespace
from typing import Any
from _delb.plugins import plugin_manager
from _delb.plugins.https_loader import https_loader
from _delb.typing import LoaderResult
IPFS_GATEWAY = getenv("IPFS_GATEWAY_PREFIX", "https://ipfs.io/ipfs/")
@plugin_manager.register_loader()
def ipfs_loader(source: Any, config: SimpleNamespace) -> LoaderResult:
if isinstance(source, str) and source.startswith("ipfs://"):
config.source_url = source
config.ipfs_gateway_source_url = IPFS_GATEWAY + source[7:]
return https_loader(config.ipfs_gateway_source_url, config)
# return an indication why this loader didn't attempt to load in order
# to support debugging
return "The input value is not an URL with the ipfs scheme."
The ``source`` argument is what a :class:`Document` instance is initialized with
as input data.
Note that the ``config`` argument that is passed to a loader function contains
configuration data, it's the :attr:`delb.Document.config` property after
:meth:`_init_config <_delb.plugins.DocumentMixinHooks._init_config>` has
been processed.
Loaders that retrieve a document from an URL should add the origin as string to
the ``config`` object as ``source_url``.
You might want to specify a loader to be considered before or after another
one. Let's assume a loader shall figure out what to load from a remote XML
resource that contains a reference to the actual document.
That one would have to be considered before the one that loads XML documents
from a URL with the `https` scheme:
.. testcode::
from _delb.plugins import plugin_manager
from _delb.plugins.https_loader import https_loader
@plugin_manager.register_loader(before=https_loader)
def mets_loader(source, config) -> LoaderResult:
# loading logic here
pass
"""
if before is not None and after is not None:
raise NotImplementedError(
"Loaders may only define one constraint atm. Please open an issue with "
"a use-case description if you need to define both."
)
registered_loaders = self.loaders
if before is not None:
if not isinstance(before, Iterable):
before = (before,)
index = min(registered_loaders.index(x) for x in before)
elif after is not None:
if not isinstance(after, Iterable):
after = (after,)
index = max(registered_loaders.index(x) for x in after) + 1
else:
index = len(registered_loaders)
def registrar(loader: Loader) -> Loader:
assert callable(loader)
registered_loaders.insert(index, loader)
return loader
return registrar
def register_xpath_function(self, arg: Callable | str) -> Callable:
"""
Custom XPath functions can be defined as shown in the following example. The
first argument to a function is always an instance of
:class:`_delb.xpath.EvaluationContext` followed by the expression's arguments.
.. testcode::
from delb import Document
from _delb.plugins import plugin_manager
from _delb.xpath import EvaluationContext
@plugin_manager.register_xpath_function("is-last")
def is_last(context: EvaluationContext) -> bool:
return context.position == context.size
@plugin_manager.register_xpath_function
def lowercase(_, string: str) -> str:
return string.lower()
document = Document("<root><node/><node foo='BAR'/></root>")
print(document.xpath("/*[is-last() and lowercase(@foo)='bar']").first)
.. testoutput::
<node foo="BAR"/>
"""
if isinstance(arg, str):
def wrapper(func):
self.xpath_functions[arg] = func
return func
return wrapper
if callable(arg):
self.xpath_functions[arg.__name__] = arg
return arg
plugin_manager = PluginManager()
__all__ = (DocumentMixinBase.__name__, "plugin_manager")