xref: /petsc/doc/ext/html5_petsc.py (revision fdf8caf3e1e3cd7728cdb7e0a7d210c5935aba88)
1""" Sphinx extension for custom HTML processing for PETSc docs """
2
3from typing import Any, Dict
4import re
5import os
6import subprocess
7import types
8
9from docutils import nodes
10from docutils.nodes import Element, Text
11
12from sphinx import version_info as sphinx_version_info
13from sphinx.writers.html5 import HTML5Translator
14from sphinx.application import Sphinx
15
16if not hasattr(re,'Pattern'): re.Pattern = re._pattern_type
17
18
19def setup(app: Sphinx) -> None:
20    _check_version(app)
21
22    app.connect('builder-inited', _setup_translators)
23
24
25def _check_version(app: Sphinx) -> None:
26    sphinx_version_info_source = (2, 4, 4, 'final', 0)
27    app.require_sphinx('%s.%s' % (sphinx_version_info_source[0], sphinx_version_info_source[1]))
28    if sphinx_version_info != sphinx_version_info_source:
29        error_message = ' '.join([
30            'This extension duplicates code from Sphinx %s ' % (sphinx_version_info_source,),
31            'which is incompatible with the current version %s' % (sphinx_version_info,),
32            ])
33        raise NotImplementedError(error_message)
34
35
36def _setup_translators(app: Sphinx) -> None:
37    """ Use a mixin strategy to add to the HTML translator without overriding
38
39    This allows use of other extensions which modify the translator.
40
41    Duplicates the approach used here in sphinx-hoverref:
42    https://github.com/readthedocs/sphinx-hoverxref/pull/42
43    """
44    if app.builder.format != 'html':
45        return
46
47    for name, klass in app.registry.translators.items():
48        translator = types.new_class(
49            'PETScHTMLTranslator',
50            (
51                PETScHTMLTranslatorMixin,
52                klass,
53            ),
54            {},
55        )
56        app.set_translator(name, translator, override=True)
57
58    translator = types.new_class(
59        'PETScHTMLTranslator',
60        (
61            PETScHTMLTranslatorMixin,
62            app.builder.default_translator_class,
63        ),
64        {},
65    )
66    app.set_translator(app.builder.name, translator, override=True)
67
68
69class PETScHTMLTranslatorMixin:
70    """
71    A custom HTML translator which overrides methods to add PETSc-specific
72    custom processing to the generated HTML.
73    """
74
75    def __init__(self, *args: Any) -> None:
76        self._manpage_map = None
77        self._manpage_pattern = None
78        super().__init__(*args)
79
80
81    def _get_manpage_map(self) -> Dict[str,str]:
82        """ Return the manpage strings to link, as a dict.  """
83        if not self._manpage_map:
84            htmlmap_filename = os.path.join('_build_classic', 'docs', 'manualpages', 'htmlmap')
85            if not os.path.isfile(htmlmap_filename):
86                raise Exception("Expected file %s not found. Run script to build classic docs subset." %  htmlmap_filename)
87            manpage_map_raw = htmlmap_to_dict(htmlmap_filename)
88            manpage_prefix_base = self._get_manpage_prefix_base()
89            manpage_prefix = os.path.join(manpage_prefix_base, 'docs', '')
90            self._manpage_map = dict_complete_links(manpage_map_raw, manpage_prefix)
91        return self._manpage_map
92
93    def _get_manpage_pattern(self) -> re.Pattern:
94        """ Return the manpage links pattern.
95
96        This is done lazily, so this function should always be used,
97        instead of the direct data member, which may not be populated yet
98        """
99
100        if not self._manpage_pattern:
101            self._manpage_pattern = get_multiple_replace_pattern(self._get_manpage_map())
102        return self._manpage_pattern
103
104    def _get_manpage_prefix_base(self) -> str:
105        """ Return the base location for the install. This varies by platform. """
106        if 'GITLAB_CI' in os.environ:
107            ci_environment_url = os.getenv('CI_ENVIRONMENT_URL')
108            if ci_environment_url is not None:
109                manpage_prefix_base = ci_environment_url.rstrip('/index.html')
110            else:
111                # This is a brittle stopgap measure
112                ci_commit_ref_name = os.getenv('CI_COMMIT_REF_NAME')
113                if not ci_commit_ref_name:
114                    raise Exception('Could not determine version name from GitLab CI environment variables')
115                version_name = ci_commit_ref_name.replace('release-', '')
116                manpage_prefix_base = 'https://petsc.org/' + version_name
117        elif 'READTHEDOCS' in os.environ:  # Temporary - remove once ReadTheDocs is abandoned
118            manpage_prefix_base = 'https://www.mcs.anl.gov/petsc/petsc-main'
119        else:
120            manpage_prefix_base = self.builder.outdir
121        return manpage_prefix_base
122
123    def _add_manpage_links(self, string: str) -> str:
124        """ Add plain HTML link tags to a string """
125        manpage_map = self._get_manpage_map()
126        manpage_pattern = self._get_manpage_pattern()
127        return replace_from_dict_and_pattern(string, manpage_map, manpage_pattern)
128
129    # This method consists mostly of code duplicated from Sphinx:
130    # overwritten
131    def visit_Text(self, node: Text) -> None:
132        text = node.astext()
133        encoded = self.encode(text)
134        if self.protect_literal_text:
135            # moved here from base class's visit_literal to support
136            # more formatting in literal nodes
137            for token in self.words_and_spaces.findall(encoded):
138                if token.strip():
139                    # Custom processing to add links to PETSc man pages ########
140                    token_processed = self._add_manpage_links(token)
141
142                    # protect literal text from line wrapping
143                    self.body.append('<span class="pre">%s</span>' % token_processed)
144                    # (end of custom processing) ###############################
145                elif token in ' \n':
146                    # allow breaks at whitespace
147                    self.body.append(token)
148                else:
149                    # protect runs of multiple spaces; the last one can wrap
150                    self.body.append('&#160;' * (len(token) - 1) + ' ')
151        else:
152            if self.in_mailto and self.settings.cloak_email_addresses:
153                encoded = self.cloak_email(encoded)
154            self.body.append(encoded)
155
156    # This method consists mostly of code duplicated from Sphinx:
157    # overwritten
158    def visit_literal_block(self, node: Element) -> None:
159        if node.rawsource != node.astext():
160            # most probably a parsed-literal block -- don't highlight
161            return super().visit_literal_block(node)
162
163        lang = node.get('language', 'default')
164        linenos = node.get('linenos', False)
165        highlight_args = node.get('highlight_args', {})
166        highlight_args['force'] = node.get('force', False)
167        if lang is self.builder.config.highlight_language:
168            # only pass highlighter options for original language
169            opts = self.builder.config.highlight_options
170        else:
171            opts = {}
172
173        highlighted = self.highlighter.highlight_block(
174            node.rawsource, lang, opts=opts, linenos=linenos,
175            location=(self.builder.current_docname, node.line), **highlight_args
176        )
177        starttag = self.starttag(node, 'div', suffix='',
178                                 CLASS='highlight-%s notranslate' % lang)
179
180        # Custom processing to add links to PETSc man pages ####################
181        highlighted = self._add_manpage_links(highlighted)
182        # (end of custom processing) ###########################################
183
184        self.body.append(starttag + highlighted + '</div>\n')
185        raise nodes.SkipNode
186
187def htmlmap_to_dict(htmlmap_filename: str) -> Dict[str,str]:
188    """ Extract a dict from an htmlmap file, leaving URLs as they are."""
189    pattern = re.compile(r'man:\+([a-zA-Z_0-9]*)\+\+([a-zA-Z_0-9 .:]*)\+\+\+\+man\+([a-zA-Z_0-9#./:-]*)')
190    string_to_link = dict()
191    with open(htmlmap_filename, 'r') as f:
192        for line in f.readlines():
193            m = re.match(pattern, line)
194            if m:
195                string = m.group(1)
196                string_to_link[string] = m.group(3)
197            else:
198                print("Warning: skipping unexpected line in " + htmlmap_filename + ":")
199                print(line)
200    return string_to_link
201
202
203def dict_complete_links(string_to_link: Dict[str,str], prefix: str = '') -> Dict[str,str]:
204    """ Complete HTML links
205
206    Prepend a prefix to any links not starting with 'http',
207    and add HTML tags
208    """
209    def link_string(name: str, link: str, prefix: str) -> str:
210        url = link if link.startswith('http') else prefix + link
211        return '<a href=\"' + url + '\">' + name + '</a>'
212    return dict((k, link_string(k, v, prefix)) for (k, v) in string_to_link.items())
213
214
215def get_multiple_replace_pattern(source_dict: Dict[str,str]) -> re.Pattern:
216    """ Generate a regex to match any of the keys in source_dict, as full words """
217    def process_word(word):
218        """ add escape characters and word boundaries """
219        return r'\b' + re.escape(word) + r'\b'
220    return re.compile(r'|'.join(map(process_word, source_dict)))
221
222
223def replace_from_dict_and_pattern(string: str, replacements: Dict, pattern: re.Pattern) -> str:
224    """ Given a pattern which matches keys in replacements, replace keys found in string with their values"""
225    return pattern.sub(lambda match: replacements[match.group(0)], string)
226