##
# html5_parser.py: Drop-in compatibility replacement for the
#      html5_parser library.
##
# This module is part of the gardenpath package,
# developed under the rereading Project (https://rereading.space).
#
# Copyright 2025, rereading Project. Licensed under the GNU AGPL v3.
##

"""
This module provides a drop-in compatibility replacement for a
subset of the functionality provided by the html5_parser library.

Notable differences:

- Only html5ever+xot is supported as a tree-builder. The resultant
  tree is API compatible with a subset of lxml functionality.
- Stack sizes may not be manually set.
- It is recommended to decode bytes before calling; transport
  encodings must be fully specified when passing bytes.
"""

from gardenpath import parse_document

from logging import getLogger

from warnings import warn

logger = getLogger(__name__)


def parse(
    html: bytes | str,
    transport_encoding=None,
    namespace_elements=False,
    treebuilder="html5ever+xot",
    fallback_encoding=None,
    keep_doctype=True,
    maybe_xhtml=False,
    return_root=True,
    line_number_attr=None,
    sanitize_names=True,
    stack_size=None,  # set to 16384 in h5p.
    fragment_context=None,
):
    if isinstance(html, bytes):
        if transport_encoding is None:
            raise ValueError(
                "Bytes passed as HTML source, but no encoding was specified."
            )

        html = html.decode(transport_encoding)

    if treebuilder != "html5ever+xot":
        warn(
            "Alternative tree builders may be specified in html5_parser, but not in gardenpath. Using html5ever+xot."
        )

    if stack_size is not None:
        warn(
            "Manually setting stack sizes is supported in html5_parser, but not in gardenpath. Ignoring."
        )

    # TODO: Finish validate and convert keyword arguments as needed.
    # TODO: h5p sometimes parses as a document, other times as a fragment;
    #       dispatch accordingly.
    tree = parse_document(html, format="html5")

    # TODO: wrap the resultant tree as needed to provide
    #       compatability shims.
    return tree
