xref: /petsc/doc/ext/html5_petsc.py (revision ee12ae39415b2e672d944cdca066227dadbf8b14)
1*ee12ae39SPatrick Sanan""" Sphinx extension for custom HTML processing for PETSc docs """
2*ee12ae39SPatrick Sanan
3*ee12ae39SPatrick Sananfrom typing import Any, Dict
4*ee12ae39SPatrick Sananimport re
5*ee12ae39SPatrick Sananimport os
6*ee12ae39SPatrick Sananimport subprocess
7*ee12ae39SPatrick Sananimport types
8*ee12ae39SPatrick Sanan
9*ee12ae39SPatrick Sananfrom docutils import nodes
10*ee12ae39SPatrick Sananfrom docutils.nodes import Element, Text
11*ee12ae39SPatrick Sanan
12*ee12ae39SPatrick Sananfrom sphinx import version_info as sphinx_version_info
13*ee12ae39SPatrick Sananfrom sphinx.writers.html5 import HTML5Translator
14*ee12ae39SPatrick Sananfrom sphinx.application import Sphinx
15*ee12ae39SPatrick Sanan
16*ee12ae39SPatrick Sananif not hasattr(re,'Pattern'): re.Pattern = re._pattern_type
17*ee12ae39SPatrick Sanan
18*ee12ae39SPatrick Sanan
19*ee12ae39SPatrick Sanandef setup(app: Sphinx) -> None:
20*ee12ae39SPatrick Sanan    _check_version(app)
21*ee12ae39SPatrick Sanan
22*ee12ae39SPatrick Sanan    app.connect('builder-inited', _setup_translators)
23*ee12ae39SPatrick Sanan
24*ee12ae39SPatrick Sanan
25*ee12ae39SPatrick Sanandef _check_version(app: Sphinx) -> None:
26*ee12ae39SPatrick Sanan    sphinx_version_info_source = (2, 4, 4, 'final', 0)
27*ee12ae39SPatrick Sanan    app.require_sphinx('%s.%s' % (sphinx_version_info_source[0], sphinx_version_info_source[1]))
28*ee12ae39SPatrick Sanan    if sphinx_version_info != sphinx_version_info_source:
29*ee12ae39SPatrick Sanan        error_message = ' '.join([
30*ee12ae39SPatrick Sanan            'This extension duplicates code from Sphinx %s ' % (sphinx_version_info_source,),
31*ee12ae39SPatrick Sanan            'which is incompatible with the current version %s' % (sphinx_version_info,),
32*ee12ae39SPatrick Sanan            ])
33*ee12ae39SPatrick Sanan        raise NotImplementedError(error_message)
34*ee12ae39SPatrick Sanan
35*ee12ae39SPatrick Sanan
36*ee12ae39SPatrick Sanandef _setup_translators(app: Sphinx) -> None:
37*ee12ae39SPatrick Sanan    """ Use a mixin strategy to add to the HTML translator without overriding
38*ee12ae39SPatrick Sanan
39*ee12ae39SPatrick Sanan    This allows use of other extensions which modify the translator.
40*ee12ae39SPatrick Sanan
41*ee12ae39SPatrick Sanan    Duplicates the approach used here in sphinx-hoverref:
42*ee12ae39SPatrick Sanan    https://github.com/readthedocs/sphinx-hoverxref/pull/42
43*ee12ae39SPatrick Sanan    """
44*ee12ae39SPatrick Sanan    if app.builder.format != 'html':
45*ee12ae39SPatrick Sanan        return
46*ee12ae39SPatrick Sanan
47*ee12ae39SPatrick Sanan    for name, klass in app.registry.translators.items():
48*ee12ae39SPatrick Sanan        translator = types.new_class(
49*ee12ae39SPatrick Sanan            'PETScHTMLTranslator',
50*ee12ae39SPatrick Sanan            (
51*ee12ae39SPatrick Sanan                PETScHTMLTranslatorMixin,
52*ee12ae39SPatrick Sanan                klass,
53*ee12ae39SPatrick Sanan            ),
54*ee12ae39SPatrick Sanan            {},
55*ee12ae39SPatrick Sanan        )
56*ee12ae39SPatrick Sanan        app.set_translator(name, translator, override=True)
57*ee12ae39SPatrick Sanan
58*ee12ae39SPatrick Sanan    translator = types.new_class(
59*ee12ae39SPatrick Sanan        'PETScHTMLTranslator',
60*ee12ae39SPatrick Sanan        (
61*ee12ae39SPatrick Sanan            PETScHTMLTranslatorMixin,
62*ee12ae39SPatrick Sanan            app.builder.default_translator_class,
63*ee12ae39SPatrick Sanan        ),
64*ee12ae39SPatrick Sanan        {},
65*ee12ae39SPatrick Sanan    )
66*ee12ae39SPatrick Sanan    app.set_translator(app.builder.name, translator, override=True)
67*ee12ae39SPatrick Sanan
68*ee12ae39SPatrick Sanan
69*ee12ae39SPatrick Sananclass PETScHTMLTranslatorMixin:
70*ee12ae39SPatrick Sanan    """
71*ee12ae39SPatrick Sanan    A custom HTML translator which overrides methods to add PETSc-specific
72*ee12ae39SPatrick Sanan    custom processing to the generated HTML.
73*ee12ae39SPatrick Sanan    """
74*ee12ae39SPatrick Sanan
75*ee12ae39SPatrick Sanan    def __init__(self, *args: Any) -> None:
76*ee12ae39SPatrick Sanan        self._manpage_map = None
77*ee12ae39SPatrick Sanan        self._manpage_pattern = None
78*ee12ae39SPatrick Sanan        super().__init__(*args)
79*ee12ae39SPatrick Sanan
80*ee12ae39SPatrick Sanan
81*ee12ae39SPatrick Sanan    def _get_manpage_map(self) -> Dict[str,str]:
82*ee12ae39SPatrick Sanan        """ Return the manpage strings to link, as a dict.  """
83*ee12ae39SPatrick Sanan        if not self._manpage_map:
84*ee12ae39SPatrick Sanan            htmlmap_filename = os.path.join('_build_classic', 'docs', 'manualpages', 'htmlmap')
85*ee12ae39SPatrick Sanan            if not os.path.isfile(htmlmap_filename):
86*ee12ae39SPatrick Sanan                raise Exception("Expected file %s not found. Run script to build classic docs subset." %  htmlmap_filename)
87*ee12ae39SPatrick Sanan            manpage_map_raw = htmlmap_to_dict(htmlmap_filename)
88*ee12ae39SPatrick Sanan            manpage_prefix_base = self._get_manpage_prefix_base()
89*ee12ae39SPatrick Sanan            manpage_prefix = os.path.join(manpage_prefix_base, 'docs', '')
90*ee12ae39SPatrick Sanan            self._manpage_map = dict_complete_links(manpage_map_raw, manpage_prefix)
91*ee12ae39SPatrick Sanan        return self._manpage_map
92*ee12ae39SPatrick Sanan
93*ee12ae39SPatrick Sanan    def _get_manpage_pattern(self) -> re.Pattern:
94*ee12ae39SPatrick Sanan        """ Return the manpage links pattern.
95*ee12ae39SPatrick Sanan
96*ee12ae39SPatrick Sanan        This is done lazily, so this function should always be used,
97*ee12ae39SPatrick Sanan        instead of the direct data member, which may not be populated yet
98*ee12ae39SPatrick Sanan        """
99*ee12ae39SPatrick Sanan
100*ee12ae39SPatrick Sanan        if not self._manpage_pattern:
101*ee12ae39SPatrick Sanan            self._manpage_pattern = get_multiple_replace_pattern(self._get_manpage_map())
102*ee12ae39SPatrick Sanan        return self._manpage_pattern
103*ee12ae39SPatrick Sanan
104*ee12ae39SPatrick Sanan    def _get_manpage_prefix_base(self) -> str:
105*ee12ae39SPatrick Sanan        """ Return the base location for the install. This varies by platform. """
106*ee12ae39SPatrick Sanan        if 'GITLAB_CI' in os.environ:
107*ee12ae39SPatrick Sanan            ci_environment_url = os.getenv('CI_ENVIRONMENT_URL')
108*ee12ae39SPatrick Sanan            if not ci_environment_url:
109*ee12ae39SPatrick Sanan                raise Exception('GitLab CI detected but expected environment variable not found')
110*ee12ae39SPatrick Sanan            manpage_prefix_base = ci_environment_url.rstrip('/index.html')
111*ee12ae39SPatrick Sanan        elif 'READTHEDOCS' in os.environ:  # Temporary - remove once ReadTheDocs is abandoned
112*ee12ae39SPatrick Sanan            manpage_prefix_base = 'https://www.mcs.anl.gov/petsc/petsc-main'
113*ee12ae39SPatrick Sanan        else:
114*ee12ae39SPatrick Sanan            manpage_prefix_base = self.builder.outdir
115*ee12ae39SPatrick Sanan        return manpage_prefix_base
116*ee12ae39SPatrick Sanan
117*ee12ae39SPatrick Sanan    def _add_manpage_links(self, string: str) -> str:
118*ee12ae39SPatrick Sanan        """ Add plain HTML link tags to a string """
119*ee12ae39SPatrick Sanan        manpage_map = self._get_manpage_map()
120*ee12ae39SPatrick Sanan        manpage_pattern = self._get_manpage_pattern()
121*ee12ae39SPatrick Sanan        return replace_from_dict_and_pattern(string, manpage_map, manpage_pattern)
122*ee12ae39SPatrick Sanan
123*ee12ae39SPatrick Sanan    # This method consists mostly of code duplicated from Sphinx:
124*ee12ae39SPatrick Sanan    # overwritten
125*ee12ae39SPatrick Sanan    def visit_Text(self, node: Text) -> None:
126*ee12ae39SPatrick Sanan        text = node.astext()
127*ee12ae39SPatrick Sanan        encoded = self.encode(text)
128*ee12ae39SPatrick Sanan        if self.protect_literal_text:
129*ee12ae39SPatrick Sanan            # moved here from base class's visit_literal to support
130*ee12ae39SPatrick Sanan            # more formatting in literal nodes
131*ee12ae39SPatrick Sanan            for token in self.words_and_spaces.findall(encoded):
132*ee12ae39SPatrick Sanan                if token.strip():
133*ee12ae39SPatrick Sanan                    # Custom processing to add links to PETSc man pages ########
134*ee12ae39SPatrick Sanan                    token_processed = self._add_manpage_links(token)
135*ee12ae39SPatrick Sanan
136*ee12ae39SPatrick Sanan                    # protect literal text from line wrapping
137*ee12ae39SPatrick Sanan                    self.body.append('<span class="pre">%s</span>' % token_processed)
138*ee12ae39SPatrick Sanan                    # (end of custom processing) ###############################
139*ee12ae39SPatrick Sanan                elif token in ' \n':
140*ee12ae39SPatrick Sanan                    # allow breaks at whitespace
141*ee12ae39SPatrick Sanan                    self.body.append(token)
142*ee12ae39SPatrick Sanan                else:
143*ee12ae39SPatrick Sanan                    # protect runs of multiple spaces; the last one can wrap
144*ee12ae39SPatrick Sanan                    self.body.append('&#160;' * (len(token) - 1) + ' ')
145*ee12ae39SPatrick Sanan        else:
146*ee12ae39SPatrick Sanan            if self.in_mailto and self.settings.cloak_email_addresses:
147*ee12ae39SPatrick Sanan                encoded = self.cloak_email(encoded)
148*ee12ae39SPatrick Sanan            self.body.append(encoded)
149*ee12ae39SPatrick Sanan
150*ee12ae39SPatrick Sanan    # This method consists mostly of code duplicated from Sphinx:
151*ee12ae39SPatrick Sanan    # overwritten
152*ee12ae39SPatrick Sanan    def visit_literal_block(self, node: Element) -> None:
153*ee12ae39SPatrick Sanan        if node.rawsource != node.astext():
154*ee12ae39SPatrick Sanan            # most probably a parsed-literal block -- don't highlight
155*ee12ae39SPatrick Sanan            return super().visit_literal_block(node)
156*ee12ae39SPatrick Sanan
157*ee12ae39SPatrick Sanan        lang = node.get('language', 'default')
158*ee12ae39SPatrick Sanan        linenos = node.get('linenos', False)
159*ee12ae39SPatrick Sanan        highlight_args = node.get('highlight_args', {})
160*ee12ae39SPatrick Sanan        highlight_args['force'] = node.get('force', False)
161*ee12ae39SPatrick Sanan        if lang is self.builder.config.highlight_language:
162*ee12ae39SPatrick Sanan            # only pass highlighter options for original language
163*ee12ae39SPatrick Sanan            opts = self.builder.config.highlight_options
164*ee12ae39SPatrick Sanan        else:
165*ee12ae39SPatrick Sanan            opts = {}
166*ee12ae39SPatrick Sanan
167*ee12ae39SPatrick Sanan        highlighted = self.highlighter.highlight_block(
168*ee12ae39SPatrick Sanan            node.rawsource, lang, opts=opts, linenos=linenos,
169*ee12ae39SPatrick Sanan            location=(self.builder.current_docname, node.line), **highlight_args
170*ee12ae39SPatrick Sanan        )
171*ee12ae39SPatrick Sanan        starttag = self.starttag(node, 'div', suffix='',
172*ee12ae39SPatrick Sanan                                 CLASS='highlight-%s notranslate' % lang)
173*ee12ae39SPatrick Sanan
174*ee12ae39SPatrick Sanan        # Custom processing to add links to PETSc man pages ####################
175*ee12ae39SPatrick Sanan        highlighted = self._add_manpage_links(highlighted)
176*ee12ae39SPatrick Sanan        # (end of custom processing) ###########################################
177*ee12ae39SPatrick Sanan
178*ee12ae39SPatrick Sanan        self.body.append(starttag + highlighted + '</div>\n')
179*ee12ae39SPatrick Sanan        raise nodes.SkipNode
180*ee12ae39SPatrick Sanan
181*ee12ae39SPatrick Sanandef htmlmap_to_dict(htmlmap_filename: str) -> Dict[str,str]:
182*ee12ae39SPatrick Sanan    """ Extract a dict from an htmlmap file, leaving URLs as they are."""
183*ee12ae39SPatrick Sanan    pattern = re.compile(r'man:\+([a-zA-Z_0-9]*)\+\+([a-zA-Z_0-9 .:]*)\+\+\+\+man\+([a-zA-Z_0-9#./:-]*)')
184*ee12ae39SPatrick Sanan    string_to_link = dict()
185*ee12ae39SPatrick Sanan    with open(htmlmap_filename, 'r') as f:
186*ee12ae39SPatrick Sanan        for line in f.readlines():
187*ee12ae39SPatrick Sanan            m = re.match(pattern, line)
188*ee12ae39SPatrick Sanan            if m:
189*ee12ae39SPatrick Sanan                string = m.group(1)
190*ee12ae39SPatrick Sanan                string_to_link[string] = m.group(3)
191*ee12ae39SPatrick Sanan            else:
192*ee12ae39SPatrick Sanan                print("Warning: skipping unexpected line in " + htmlmap_filename + ":")
193*ee12ae39SPatrick Sanan                print(line)
194*ee12ae39SPatrick Sanan    return string_to_link
195*ee12ae39SPatrick Sanan
196*ee12ae39SPatrick Sanan
197*ee12ae39SPatrick Sanandef dict_complete_links(string_to_link: Dict[str,str], prefix: str = '') -> Dict[str,str]:
198*ee12ae39SPatrick Sanan    """ Complete HTML links
199*ee12ae39SPatrick Sanan
200*ee12ae39SPatrick Sanan    Prepend a prefix to any links not starting with 'http',
201*ee12ae39SPatrick Sanan    and add HTML tags
202*ee12ae39SPatrick Sanan    """
203*ee12ae39SPatrick Sanan    def link_string(name: str, link: str, prefix: str) -> str:
204*ee12ae39SPatrick Sanan        url = link if link.startswith('http') else prefix + link
205*ee12ae39SPatrick Sanan        return '<a href=\"' + url + '\">' + name + '</a>'
206*ee12ae39SPatrick Sanan    return dict((k, link_string(k, v, prefix)) for (k, v) in string_to_link.items())
207*ee12ae39SPatrick Sanan
208*ee12ae39SPatrick Sanan
209*ee12ae39SPatrick Sanandef get_multiple_replace_pattern(source_dict: Dict[str,str]) -> re.Pattern:
210*ee12ae39SPatrick Sanan    """ Generate a regex to match any of the keys in source_dict, as full words """
211*ee12ae39SPatrick Sanan    def process_word(word):
212*ee12ae39SPatrick Sanan        """ add escape characters and word boundaries """
213*ee12ae39SPatrick Sanan        return r'\b' + re.escape(word) + r'\b'
214*ee12ae39SPatrick Sanan    return re.compile(r'|'.join(map(process_word, source_dict)))
215*ee12ae39SPatrick Sanan
216*ee12ae39SPatrick Sanan
217*ee12ae39SPatrick Sanandef replace_from_dict_and_pattern(string: str, replacements: Dict, pattern: re.Pattern) -> str:
218*ee12ae39SPatrick Sanan    """ Given a pattern which matches keys in replacements, replace keys found in string with their values"""
219*ee12ae39SPatrick Sanan    return pattern.sub(lambda match: replacements[match.group(0)], string)
220