# Copyright (C) 2018-'24 Frank Sachsenheim## This program is free software: you can redistribute it and/or modify# it under the terms of the GNU Affero General Public License as published# by the Free Software Foundation, either version 3 of the License, or# (at your option) any later version.## This program is distributed in the hope that it will be useful,# but WITHOUT ANY WARRANTY; without even the implied warranty of# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the# GNU Affero General Public License for more details.## You should have received a copy of the GNU Affero General Public License# along with this program. If not, see <https://www.gnu.org/licenses/>."""If ``delb`` is installed with ``https-loader`` as extra, the requireddependencies for this loader are installed as well. See :doc:`/installation`."""from__future__importannotationsfromioimportIOBasefromtypingimportTYPE_CHECKING,Any,Optionalfrom_delb.pluginsimportplugin_managerfrom_delb.plugins.core_loadersimportbuffer_loaderifTYPE_CHECKING:fromcollections.abcimportIteratorfromtypesimportSimpleNamespacefromtypingimportFinalfrom_delb.typingimportLoaderResulttry:importh2# type: ignoreexceptImportError:http2=Falseelse:http2=Truedelh2try:importhttpx# noqa: F401exceptImportError:__all__:tuple[str,...]=()else:DEFAULT_CLIENT:Final=httpx.Client(follow_redirects=True,http2=http2)classHttpsStreamWrapper(IOBase):__slots__=("_generator","_response")def__init__(self,response:httpx.Response):self._generator:Optional[Iterator[bytes]]=Noneself._response=responsedefread(self,size:int=4096)->bytes:ifself._generatorisNone:self._generator=self._response.iter_bytes(chunk_size=size)try:returnnext(self._generator)exceptStopIteration:returnb""
[docs]@plugin_manager.register_loader()defhttps_loader(data:Any,config:SimpleNamespace,client:httpx.Client=DEFAULT_CLIENT)->LoaderResult:""" This loader loads a document from a URL with the ``http`` and ``https`` scheme. The default httpx_-client follows redirects and can partially be configured with `environment variables`_. The URL will be bound to the name ``source_url`` on the document's :attr:`Document.config` attribute. Loaders with specifically configured httpx-clients can build on this loader like so: .. testcode:: import httpx from _delb.plugins import plugin_manager from _delb.plugins.https_loader import https_loader client = httpx.Client(follow_redirects=False, trust_env=False) @plugin_manager.register_loader(before=https_loader) def custom_https_loader(data, config): return https_loader(data, config, client=client) .. _environment variables: https://www.python-httpx.org/environment_variables/ .. _httpx: https://www.python-httpx.org/ """ifisinstance(data,str)anddata.lower().startswith(("http://","https://")):withclient.stream("get",url=data)asresponse:response.raise_for_status()result=buffer_loader(HttpsStreamWrapper(response),config)config.source_url=datareturnresultreturn"The input value is not an URL with the http or https scheme."