xref: /petsc/doc/ext/html5_petsc.py (revision 80425c320915d392b56f48cdadb23b71b5cff860)
1""" Sphinx extension for custom HTML processing for PETSc docs """
2
3from typing import Any, Dict
4import re
5import os
6import subprocess
7import types
8
9from docutils import nodes
10from docutils.nodes import Element, Text
11
12from sphinx import version_info as sphinx_version_info
13from sphinx.writers.html5 import HTML5Translator
14from sphinx.application import Sphinx
15
16if not hasattr(re,'Pattern'): re.Pattern = re._pattern_type
17
18
19def setup(app: Sphinx) -> None:
20    _check_version(app)
21
22    app.connect('builder-inited', _setup_translators)
23
24
25def _check_version(app: Sphinx) -> None:
26    sphinx_version_info_source = (3, 5, 4, 'final', 0)
27    app.require_sphinx('%s.%s' % (sphinx_version_info_source[0], sphinx_version_info_source[1]))
28    if sphinx_version_info[:2] != sphinx_version_info_source[:2]:
29        print('Warning: A custom extension duplicates code from Sphinx %s ' % (sphinx_version_info_source,),
30              'which differs from the current version %s' % (sphinx_version_info,),
31              'so unexpected behavior may be observed.')
32
33
34def _setup_translators(app: Sphinx) -> None:
35    """ Use a mixin strategy to add to the HTML translator without overriding
36
37    This allows use of other extensions which modify the translator.
38
39    Duplicates the approach used here in sphinx-hoverref:
40    https://github.com/readthedocs/sphinx-hoverxref/pull/42
41    """
42    if app.builder.format != 'html':
43        return
44
45    for name, klass in app.registry.translators.items():
46        translator = types.new_class(
47            'PETScHTMLTranslator',
48            (
49                PETScHTMLTranslatorMixin,
50                klass,
51            ),
52            {},
53        )
54        app.set_translator(name, translator, override=True)
55
56    translator = types.new_class(
57        'PETScHTMLTranslator',
58        (
59            PETScHTMLTranslatorMixin,
60            app.builder.default_translator_class,
61        ),
62        {},
63    )
64    app.set_translator(app.builder.name, translator, override=True)
65
66
67class PETScHTMLTranslatorMixin:
68    """
69    A custom HTML translator which overrides methods to add PETSc-specific
70    custom processing to the generated HTML.
71    """
72
73    def __init__(self, *args: Any) -> None:
74        self._manpage_map = None
75        self._manpage_pattern = None
76        super().__init__(*args)
77
78
79    def _get_manpage_map(self) -> Dict[str,str]:
80        """ Return the manpage strings to link, as a dict.  """
81        if not self._manpage_map:
82            htmlmap_filename = os.path.join('_build_classic', 'docs', 'manualpages', 'htmlmap')
83            if not os.path.isfile(htmlmap_filename):
84                raise Exception("Expected file %s not found. Run script to build classic docs subset." %  htmlmap_filename)
85            manpage_map_raw = htmlmap_to_dict(htmlmap_filename)
86            manpage_prefix_base = self._get_manpage_prefix_base()
87            manpage_prefix = os.path.join(manpage_prefix_base, 'docs', '')
88            self._manpage_map = dict_complete_links(manpage_map_raw, manpage_prefix)
89        return self._manpage_map
90
91    def _get_manpage_pattern(self) -> re.Pattern:
92        """ Return the manpage links pattern.
93
94        This is done lazily, so this function should always be used,
95        instead of the direct data member, which may not be populated yet
96        """
97
98        if not self._manpage_pattern:
99            self._manpage_pattern = get_multiple_replace_pattern(self._get_manpage_map())
100        return self._manpage_pattern
101
102    def _get_manpage_prefix_base(self) -> str:
103        """ Return the base location for the install. This varies by platform. """
104        if 'GITLAB_CI' in os.environ:
105            ci_environment_url = os.getenv('CI_ENVIRONMENT_URL')
106            if not ci_environment_url:
107                raise Exception('GitLab CI detected but expected environment variable not found')
108            manpage_prefix_base = ci_environment_url.rstrip('/index.html')
109        elif 'READTHEDOCS' in os.environ:  # Temporary - remove once ReadTheDocs is abandoned
110            manpage_prefix_base = 'https://www.mcs.anl.gov/petsc/petsc-main'
111        else:
112            manpage_prefix_base = self.builder.outdir
113        return manpage_prefix_base
114
115    def _add_manpage_links(self, string: str) -> str:
116        """ Add plain HTML link tags to a string """
117        manpage_map = self._get_manpage_map()
118        manpage_pattern = self._get_manpage_pattern()
119        return replace_from_dict_and_pattern(string, manpage_map, manpage_pattern)
120
121    # This method consists mostly of code duplicated from Sphinx:
122    # overwritten
123    def visit_Text(self, node: Text) -> None:
124        text = node.astext()
125        encoded = self.encode(text)
126        if self.protect_literal_text:
127            # moved here from base class's visit_literal to support
128            # more formatting in literal nodes
129            for token in self.words_and_spaces.findall(encoded):
130                if token.strip():
131                    # Custom processing to add links to PETSc man pages ########
132                    token_processed = self._add_manpage_links(token)
133
134                    # protect literal text from line wrapping
135                    self.body.append('<span class="pre">%s</span>' % token_processed)
136                    # (end of custom processing) ###############################
137                elif token in ' \n':
138                    # allow breaks at whitespace
139                    self.body.append(token)
140                else:
141                    # protect runs of multiple spaces; the last one can wrap
142                    self.body.append('&#160;' * (len(token) - 1) + ' ')
143        else:
144            if self.in_mailto and self.settings.cloak_email_addresses:
145                encoded = self.cloak_email(encoded)
146            self.body.append(encoded)
147
148    # This method consists mostly of code duplicated from Sphinx:
149    # overwritten
150    def visit_literal_block(self, node: Element) -> None:
151        if node.rawsource != node.astext():
152            # most probably a parsed-literal block -- don't highlight
153            return super().visit_literal_block(node)
154
155        lang = node.get('language', 'default')
156        linenos = node.get('linenos', False)
157        highlight_args = node.get('highlight_args', {})
158        highlight_args['force'] = node.get('force', False)
159        opts = self.config.highlight_options.get(lang, {})
160
161        if linenos and self.config.html_codeblock_linenos_style:
162            linenos = self.config.html_codeblock_linenos_style
163
164        highlighted = self.highlighter.highlight_block(
165            node.rawsource, lang, opts=opts, linenos=linenos,
166            location=node, **highlight_args
167        )
168
169        # Custom processing to add links to PETSc man pages ####################
170        highlighted = self._add_manpage_links(highlighted)
171        # (end of custom processing) ###########################################
172
173        starttag = self.starttag(node, 'div', suffix='',
174                                 CLASS='highlight-%s notranslate' % lang)
175        self.body.append(starttag + highlighted + '</div>\n')
176        raise nodes.SkipNode
177
178def htmlmap_to_dict(htmlmap_filename: str) -> Dict[str,str]:
179    """ Extract a dict from an htmlmap file, leaving URLs as they are."""
180    pattern = re.compile(r'man:\+([a-zA-Z_0-9]*)\+\+([a-zA-Z_0-9 .:]*)\+\+\+\+man\+([a-zA-Z_0-9#./:-]*)')
181    string_to_link = dict()
182    with open(htmlmap_filename, 'r') as f:
183        for line in f.readlines():
184            m = re.match(pattern, line)
185            if m:
186                string = m.group(1)
187                string_to_link[string] = m.group(3)
188            else:
189                print("Warning: skipping unexpected line in " + htmlmap_filename + ":")
190                print(line)
191    return string_to_link
192
193
194def dict_complete_links(string_to_link: Dict[str,str], prefix: str = '') -> Dict[str,str]:
195    """ Complete HTML links
196
197    Prepend a prefix to any links not starting with 'http',
198    and add HTML tags
199    """
200    def link_string(name: str, link: str, prefix: str) -> str:
201        url = link if link.startswith('http') else prefix + link
202        return '<a href=\"' + url + '\">' + name + '</a>'
203    return dict((k, link_string(k, v, prefix)) for (k, v) in string_to_link.items())
204
205
206def get_multiple_replace_pattern(source_dict: Dict[str,str]) -> re.Pattern:
207    """ Generate a regex to match any of the keys in source_dict, as full words """
208    def process_word(word):
209        """ add escape characters and word boundaries """
210        return r'\b' + re.escape(word) + r'\b'
211    return re.compile(r'|'.join(map(process_word, source_dict)))
212
213
214def replace_from_dict_and_pattern(string: str, replacements: Dict, pattern: re.Pattern) -> str:
215    """ Given a pattern which matches keys in replacements, replace keys found in string with their values"""
216    return pattern.sub(lambda match: replacements[match.group(0)], string)
217