from collections.abc import Mapping, Sequence from typing import cast, Optional, NamedTuple from markdown_it.token import Token from xml.sax.saxutils import escape, quoteattr from .md import Renderer _xml_id_translate_table = { ord('*'): ord('_'), ord('<'): ord('_'), ord(' '): ord('_'), ord('>'): ord('_'), ord('['): ord('_'), ord(']'): ord('_'), ord(':'): ord('_'), ord('"'): ord('_'), } def make_xml_id(s: str) -> str: return s.translate(_xml_id_translate_table) class Deflist: has_dd = False class Heading(NamedTuple): container_tag: str level: int # special handling for titles: whether partinfo was already closed from elsewhere # or still needs closing. partintro_closed: bool = False class DocBookRenderer(Renderer): _link_tags: list[str] _deflists: list[Deflist] _headings: list[Heading] _attrspans: list[str] def __init__(self, manpage_urls: Mapping[str, str]): super().__init__(manpage_urls) self._link_tags = [] self._deflists = [] self._headings = [] self._attrspans = [] def render(self, tokens: Sequence[Token]) -> str: result = super().render(tokens) result += self._close_headings(None) return result def renderInline(self, tokens: Sequence[Token]) -> str: # HACK to support docbook links and xrefs. link handling is only necessary because the docbook # manpage stylesheet converts - in urls to a mathematical minus, which may be somewhat incorrect. for i, token in enumerate(tokens): if token.type != 'link_open': continue token.tag = 'link' # turn [](#foo) into xrefs if token.attrs['href'][0:1] == '#' and tokens[i + 1].type == 'link_close': # type: ignore[index] token.tag = "xref" # turn into links without contents if tokens[i + 1].type == 'text' and tokens[i + 1].content == token.attrs['href']: tokens[i + 1].content = '' return super().renderInline(tokens) def text(self, token: Token, tokens: Sequence[Token], i: int) -> str: return escape(token.content) def paragraph_open(self, token: Token, tokens: Sequence[Token], i: int) -> str: return "" def paragraph_close(self, token: Token, tokens: Sequence[Token], i: int) -> str: return "" def hardbreak(self, token: Token, tokens: Sequence[Token], i: int) -> str: return "\n" def softbreak(self, token: Token, tokens: Sequence[Token], i: int) -> str: # should check options.breaks() and emit hard break if so return "\n" def code_inline(self, token: Token, tokens: Sequence[Token], i: int) -> str: return f"{escape(token.content)}" def code_block(self, token: Token, tokens: Sequence[Token], i: int) -> str: return f"{escape(token.content)}" def link_open(self, token: Token, tokens: Sequence[Token], i: int) -> str: self._link_tags.append(token.tag) href = cast(str, token.attrs['href']) (attr, start) = ('linkend', 1) if href[0] == '#' else ('xlink:href', 0) return f"<{token.tag} {attr}={quoteattr(href[start:])}>" def link_close(self, token: Token, tokens: Sequence[Token], i: int) -> str: return f"" def list_item_open(self, token: Token, tokens: Sequence[Token], i: int) -> str: return "" def list_item_close(self, token: Token, tokens: Sequence[Token], i: int) -> str: return "\n" # HACK open and close para for docbook change size. remove soon. def bullet_list_open(self, token: Token, tokens: Sequence[Token], i: int) -> str: spacing = ' spacing="compact"' if token.meta.get('compact', False) else '' return f"\n" def bullet_list_close(self, token: Token, tokens: Sequence[Token], i: int) -> str: return "\n" def em_open(self, token: Token, tokens: Sequence[Token], i: int) -> str: return "" def em_close(self, token: Token, tokens: Sequence[Token], i: int) -> str: return "" def strong_open(self, token: Token, tokens: Sequence[Token], i: int) -> str: return "" def strong_close(self, token: Token, tokens: Sequence[Token], i: int) -> str: return "" def fence(self, token: Token, tokens: Sequence[Token], i: int) -> str: info = f" language={quoteattr(token.info)}" if token.info != "" else "" return f"{escape(token.content)}" def blockquote_open(self, token: Token, tokens: Sequence[Token], i: int) -> str: return "
" def blockquote_close(self, token: Token, tokens: Sequence[Token], i: int) -> str: return "
" def note_open(self, token: Token, tokens: Sequence[Token], i: int) -> str: return "" def note_close(self, token: Token, tokens: Sequence[Token], i: int) -> str: return "" def caution_open(self, token: Token, tokens: Sequence[Token], i: int) -> str: return "" def caution_close(self, token: Token, tokens: Sequence[Token], i: int) -> str: return "" def important_open(self, token: Token, tokens: Sequence[Token], i: int) -> str: return "" def important_close(self, token: Token, tokens: Sequence[Token], i: int) -> str: return "" def tip_open(self, token: Token, tokens: Sequence[Token], i: int) -> str: return "" def tip_close(self, token: Token, tokens: Sequence[Token], i: int) -> str: return "" def warning_open(self, token: Token, tokens: Sequence[Token], i: int) -> str: return "" def warning_close(self, token: Token, tokens: Sequence[Token], i: int) -> str: return "" # markdown-it emits tokens based on the html syntax tree, but docbook is # slightly different. html has
{
{
}}
, # docbook has {} # we have to reject multiple definitions for the same term for time being. def dl_open(self, token: Token, tokens: Sequence[Token], i: int) -> str: self._deflists.append(Deflist()) return "" def dl_close(self, token: Token, tokens: Sequence[Token], i: int) -> str: self._deflists.pop() return "" def dt_open(self, token: Token, tokens: Sequence[Token], i: int) -> str: self._deflists[-1].has_dd = False return "" def dt_close(self, token: Token, tokens: Sequence[Token], i: int) -> str: return "" def dd_open(self, token: Token, tokens: Sequence[Token], i: int) -> str: if self._deflists[-1].has_dd: raise Exception("multiple definitions per term not supported") self._deflists[-1].has_dd = True return "" def dd_close(self, token: Token, tokens: Sequence[Token], i: int) -> str: return "" def myst_role(self, token: Token, tokens: Sequence[Token], i: int) -> str: if token.meta['name'] == 'command': return f"{escape(token.content)}" if token.meta['name'] == 'file': return f"{escape(token.content)}" if token.meta['name'] == 'var': return f"{escape(token.content)}" if token.meta['name'] == 'env': return f"{escape(token.content)}" if token.meta['name'] == 'option': return f"" if token.meta['name'] == 'manpage': [page, section] = [ s.strip() for s in token.content.rsplit('(', 1) ] section = section[:-1] man = f"{page}({section})" title = f"{escape(page)}" vol = f"{escape(section)}" ref = f"{title}{vol}" if man in self._manpage_urls: return f"{ref}" else: return ref raise NotImplementedError("md node not supported yet", token) def attr_span_begin(self, token: Token, tokens: Sequence[Token], i: int) -> str: # we currently support *only* inline anchors and the special .keycap class to produce # docbook elements. (id_part, class_part) = ("", "") if s := token.attrs.get('id'): id_part = f'' if s := token.attrs.get('class'): if s == 'keycap': class_part = "" self._attrspans.append("") else: return super().attr_span_begin(token, tokens, i) else: self._attrspans.append("") return id_part + class_part def attr_span_end(self, token: Token, tokens: Sequence[Token], i: int) -> str: return self._attrspans.pop() def ordered_list_open(self, token: Token, tokens: Sequence[Token], i: int) -> str: start = f' startingnumber="{token.attrs["start"]}"' if 'start' in token.attrs else "" spacing = ' spacing="compact"' if token.meta.get('compact', False) else '' return f"" def ordered_list_close(self, token: Token, tokens: Sequence[Token], i: int) -> str: return "" def heading_open(self, token: Token, tokens: Sequence[Token], i: int) -> str: hlevel = int(token.tag[1:]) result = self._close_headings(hlevel) (tag, attrs) = self._heading_tag(token, tokens, i) self._headings.append(Heading(tag, hlevel)) attrs_str = "".join([ f" {k}={quoteattr(v)}" for k, v in attrs.items() ]) return result + f'<{tag}{attrs_str}>\n' def heading_close(self, token: Token, tokens: Sequence[Token], i: int) -> str: heading = self._headings[-1] result = '' if heading.container_tag == 'part': # generate the same ids as were previously assigned manually. if this collides we # rely on outside schema validation to catch it! maybe_id = "" assert tokens[i - 2].type == 'heading_open' if id := cast(str, tokens[i - 2].attrs.get('id', "")): maybe_id = " xml:id=" + quoteattr(id + "-intro") result += f"" return result def example_open(self, token: Token, tokens: Sequence[Token], i: int) -> str: if id := cast(str, token.attrs.get('id', '')): id = f'xml:id={quoteattr(id)}' if id else '' return f'' def example_close(self, token: Token, tokens: Sequence[Token], i: int) -> str: return "" def example_title_open(self, token: Token, tokens: Sequence[Token], i: int) -> str: return "" def example_title_close(self, token: Token, tokens: Sequence[Token], i: int) -> str: return "" def _close_headings(self, level: Optional[int]) -> str: # we rely on markdown-it producing h{1..6} tags in token.tag for this to work result = [] while len(self._headings): if level is None or self._headings[-1].level >= level: heading = self._headings.pop() if heading.container_tag == 'part' and not heading.partintro_closed: result.append("") result.append(f"") else: break return "\n".join(result) def _heading_tag(self, token: Token, tokens: Sequence[Token], i: int) -> tuple[str, dict[str, str]]: attrs = {} if id := token.attrs.get('id'): attrs['xml:id'] = cast(str, id) return ("section", attrs)