Source code for _delb.parser

# Copyright (C) 2018-'22  Frank Sachsenheim
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published
# by the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program.  If not, see <https://www.gnu.org/licenses/>.

from __future__ import annotations

from typing import Optional
from warnings import warn

from lxml import etree


[docs]class ParserOptions: """ The configuration options that define an XML parser's behaviour. :param cleanup_namespaces: Consolidate XML namespace declarations. :param collapse_whitespace: :meth:`Collapse the content's whitespace <delb.Document.collapse_whitespace>`. :param remove_comments: Ignore comments. :param remove_processing_instructions: Don't include processing instructions in the parsed tree. :param resolve_entities: Resolve entities. :param unplugged: Don't load referenced resources over network. """ def __init__( self, cleanup_namespaces: bool = False, collapse_whitespace: bool = False, remove_comments: bool = False, remove_processing_instructions: bool = False, resolve_entities: bool = True, unplugged: bool = False, ): self.cleanup_namespaces = cleanup_namespaces self.collapse_whitespace = collapse_whitespace self.remove_comments = remove_comments self.remove_processing_instructions = remove_processing_instructions self.resolve_entities = resolve_entities self.unplugged = unplugged def _make_parser(self) -> etree.XMLParser: return etree.XMLParser( no_network=self.unplugged, ns_clean=self.cleanup_namespaces, remove_blank_text=False, remove_comments=self.remove_comments, remove_pis=self.remove_processing_instructions, resolve_entities=self.resolve_entities, strip_cdata=False, )
def _compat_get_parser( parser: Optional[etree.XMLParser], parser_options: Optional[ParserOptions], collapse_whitesppace: Optional[bool], ) -> tuple[etree.XMLParser, Optional[bool]]: if parser is not None and parser_options is not None: raise ValueError( "Only either the deprecated `parser` argument or `parser_options` " "argument can be provided." ) if parser is None: if parser_options is None: if collapse_whitesppace is not None: warn( "The `collapse_whitespace` argument is deprecated, use the " "property with the same name on the `parser_options` instead.", category=DeprecationWarning, stacklevel=2, ) parser_options = ParserOptions(collapse_whitespace=collapse_whitesppace) else: parser_options = ParserOptions() return parser_options._make_parser(), parser_options.collapse_whitespace else: warn( "Directly providing a lxml-parser is deprecated, use the " "`parser_options` argument instead.", category=DeprecationWarning, stacklevel=2, ) return parser, collapse_whitesppace or False __all__ = (ParserOptions.__name__,)