# Copyright (C) 2018-'24 Frank Sachsenheim## This program is free software: you can redistribute it and/or modify# it under the terms of the GNU Affero General Public License as published# by the Free Software Foundation, either version 3 of the License, or# (at your option) any later version.## This program is distributed in the hope that it will be useful,# but WITHOUT ANY WARRANTY; without even the implied warranty of# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the# GNU Affero General Public License for more details.## You should have received a copy of the GNU Affero General Public License# along with this program. If not, see <https://www.gnu.org/licenses/>."""The ``core_loaders`` module provides a set loaders to retrieve documents from variousdata sources."""from__future__importannotationsfromcontextlibimportsuppressfromcopyimportdeepcopyfromioimportIOBase,UnsupportedOperationfrompathlibimportPathfromtypingimportTYPE_CHECKING,cast,Any,IOfromwarningsimportwarnfromlxmlimportetreefrom_delbimportutilsfrom_delb.nodesimportTagNodefrom_delb.pluginsimportplugin_managerifTYPE_CHECKING:fromtypesimportSimpleNamespacefrom_delb.typingimportLoaderResult
[docs]deftag_node_loader(data:Any,config:SimpleNamespace)->LoaderResult:""" This loader loads, or rather clones, a :class:`delb.TagNode` instance and its descendant nodes. """ifisinstance(data,TagNode):tree=etree.ElementTree()ifdata.documentisNone:tree._setroot(data._etree_obj)else:root=data.clone(deep=True)tree._setroot(root._etree_obj)utils._copy_root_siblings(data._etree_obj,root._etree_obj)returntreereturn"The input value is not a TagNode instance."
[docs]@plugin_manager.register_loader()defetree_loader(data:Any,config:SimpleNamespace)->LoaderResult:""" This loader processes :class:`lxml.etree._Element` and :class:`lxml.etree._ElementTree` instances. """ifisinstance(data,etree._ElementTree):warn("lxml's etree models will not be usable inputs with the ""contributed core loaders.",category=DeprecationWarning,)returndeepcopy(data)ifisinstance(data,etree._Element):warn("lxml's etree models will not be usable inputs with the ""contributed core loaders.",category=DeprecationWarning,)returnetree.ElementTree(element=deepcopy(data),parser=config.parser_options._make_parser())return"The input value is neither an etree.Element or …Tree instance."
[docs]@plugin_manager.register_loader(after=etree_loader)defpath_loader(data:Any,config:SimpleNamespace)->LoaderResult:""" This loader loads from a file that is pointed at with a :class:`pathlib.Path` instance. That instance will be bound to ``source_path`` on the document's :attr:`Document.config` attribute. """ifisinstance(data,Path):config.source_url=(Path.cwd()/data).as_uri()withdata.open("r")asfile:returnbuffer_loader(file,config)return"The input value is not a pathlib.Path instance."
[docs]@plugin_manager.register_loader(after=path_loader)defbuffer_loader(data:Any,config:SimpleNamespace)->LoaderResult:""" This loader loads a document from a :term:`file-like object`. """ifisinstance(data,IOBase):if(nothasattr(config,"source_url")andisinstance(name:=getattr(data,"name",None),(bytes,str))and(path:=Path.cwd()/Path(nameifisinstance(name,str)elsename.decode())).is_file()):config.source_url=path.as_uri()withsuppress(UnsupportedOperation):data.seek(0)returnetree.parse(cast("IO",data),parser=config.parser_options._make_parser())return"The input value is no buffer object."
[docs]@plugin_manager.register_loader()deftext_loader(data:Any,config:SimpleNamespace)->LoaderResult:""" Parses a string containing a full document. """ifisinstance(data,str):data=data.encode()ifisinstance(data,bytes):root=etree.fromstring(data,config.parser_options._make_parser())returnetree.ElementTree(element=root)return"The input value is not a byte sequence."