xref: /petsc/doc/ext/html5_petsc.py (revision 29912973eddc2d63379123316d6141c93d64bbc2)
1ee12ae39SPatrick Sanan""" Sphinx extension for custom HTML processing for PETSc docs """
2ee12ae39SPatrick Sanan
3ee12ae39SPatrick Sananfrom typing import Any, Dict
4ee12ae39SPatrick Sananimport re
5ee12ae39SPatrick Sananimport os
6ee12ae39SPatrick Sananimport subprocess
7ee12ae39SPatrick Sananimport types
8ee12ae39SPatrick Sanan
9ee12ae39SPatrick Sananfrom docutils import nodes
10ee12ae39SPatrick Sananfrom docutils.nodes import Element, Text
11ee12ae39SPatrick Sanan
12ee12ae39SPatrick Sananfrom sphinx import version_info as sphinx_version_info
13ee12ae39SPatrick Sananfrom sphinx.writers.html5 import HTML5Translator
14ee12ae39SPatrick Sananfrom sphinx.application import Sphinx
15ee12ae39SPatrick Sanan
16ee12ae39SPatrick Sananif not hasattr(re,'Pattern'): re.Pattern = re._pattern_type
17ee12ae39SPatrick Sanan
18ee12ae39SPatrick Sanan
191540e0edSPatrick SananPETSC_DOC_OUT_ROOT_PLACEHOLDER = 'PETSC_DOC_OUT_ROOT_PLACEHOLDER'
201540e0edSPatrick Sanan
21ee12ae39SPatrick Sanandef setup(app: Sphinx) -> None:
22ee12ae39SPatrick Sanan    _check_version(app)
23ee12ae39SPatrick Sanan
24ee12ae39SPatrick Sanan    app.connect('builder-inited', _setup_translators)
25075dfc9bSPatrick Sanan    return {'parallel_read_safe': True}
26ee12ae39SPatrick Sanan
27ee12ae39SPatrick Sanan
28ee12ae39SPatrick Sanandef _check_version(app: Sphinx) -> None:
297d789994SPatrick Sanan    sphinx_version_info_source = (4, 2, 0, 'final', 0)
30ee12ae39SPatrick Sanan    app.require_sphinx('%s.%s' % (sphinx_version_info_source[0], sphinx_version_info_source[1]))
31a9d4b3f8SPatrick Sanan    if sphinx_version_info[:2] != sphinx_version_info_source[:2]:
3266c9fbddSBarry Smith        print('A custom extension duplicates code from Sphinx %s ' % (sphinx_version_info_source,),
33a9d4b3f8SPatrick Sanan              'which differs from the current version %s' % (sphinx_version_info,),
34a9d4b3f8SPatrick Sanan              'so unexpected behavior may be observed.')
35ee12ae39SPatrick Sanan
36ee12ae39SPatrick Sanan
37ee12ae39SPatrick Sanandef _setup_translators(app: Sphinx) -> None:
38862e4a30SBarry Smith    """ Use a mixin strategy to add to the Sphinx HTML translator without overriding
39ee12ae39SPatrick Sanan
40ee12ae39SPatrick Sanan    This allows use of other extensions which modify the translator.
41ee12ae39SPatrick Sanan
42ee12ae39SPatrick Sanan    Duplicates the approach used here in sphinx-hoverref:
43ee12ae39SPatrick Sanan    https://github.com/readthedocs/sphinx-hoverxref/pull/42
44ee12ae39SPatrick Sanan    """
45ee12ae39SPatrick Sanan    if app.builder.format != 'html':
46ee12ae39SPatrick Sanan        return
47ee12ae39SPatrick Sanan
48ee12ae39SPatrick Sanan    for name, klass in app.registry.translators.items():
49ee12ae39SPatrick Sanan        translator = types.new_class(
50ee12ae39SPatrick Sanan            'PETScHTMLTranslator',
51ee12ae39SPatrick Sanan            (
52ee12ae39SPatrick Sanan                PETScHTMLTranslatorMixin,
53ee12ae39SPatrick Sanan                klass,
54ee12ae39SPatrick Sanan            ),
55ee12ae39SPatrick Sanan            {},
56ee12ae39SPatrick Sanan        )
57ee12ae39SPatrick Sanan        app.set_translator(name, translator, override=True)
58ee12ae39SPatrick Sanan
59ee12ae39SPatrick Sanan    translator = types.new_class(
60ee12ae39SPatrick Sanan        'PETScHTMLTranslator',
61ee12ae39SPatrick Sanan        (
62ee12ae39SPatrick Sanan            PETScHTMLTranslatorMixin,
63ee12ae39SPatrick Sanan            app.builder.default_translator_class,
64ee12ae39SPatrick Sanan        ),
65ee12ae39SPatrick Sanan        {},
66ee12ae39SPatrick Sanan    )
67ee12ae39SPatrick Sanan    app.set_translator(app.builder.name, translator, override=True)
68ee12ae39SPatrick Sanan
69ee12ae39SPatrick Sanan
70ee12ae39SPatrick Sananclass PETScHTMLTranslatorMixin:
71ee12ae39SPatrick Sanan    """
72ee12ae39SPatrick Sanan    A custom HTML translator which overrides methods to add PETSc-specific
73ee12ae39SPatrick Sanan    custom processing to the generated HTML.
74862e4a30SBarry Smith
75862e4a30SBarry Smith    Replaces any string XXX that matches a manual page name with
7673fdd05bSBarry Smith    <a href="PETSC_DOC_OUT_ROOT_PLACEHOLDER/manualpages/YY/XXX.html">XXX</a>
77862e4a30SBarry Smith    or
7873fdd05bSBarry Smith    <a href="PETSC_DOC_OUT_ROOT_PLACEHOLDER/manualpages/YY/XXX">XXX</a>
79862e4a30SBarry Smith    depending on if the Sphinx build is html or dirhtml
80ee12ae39SPatrick Sanan    """
81ee12ae39SPatrick Sanan
82ee12ae39SPatrick Sanan    def __init__(self, *args: Any) -> None:
83ee12ae39SPatrick Sanan        self._manpage_map = None
84*29912973SJacob Faibussowitsch        self._word_pattern = re.compile('\w+')
85ee12ae39SPatrick Sanan        super().__init__(*args)
86ee12ae39SPatrick Sanan
87ee12ae39SPatrick Sanan
88ee12ae39SPatrick Sanan    def _get_manpage_map(self) -> Dict[str,str]:
89ee12ae39SPatrick Sanan        """ Return the manpage strings to link, as a dict.  """
90ee12ae39SPatrick Sanan        if not self._manpage_map:
919cd31cfbSBarry Smith            htmlmap_filename = os.path.join('manualpages', 'htmlmap')
92ee12ae39SPatrick Sanan            if not os.path.isfile(htmlmap_filename):
93ee12ae39SPatrick Sanan                raise Exception("Expected file %s not found. Run script to build classic docs subset." %  htmlmap_filename)
94ee12ae39SPatrick Sanan            manpage_map_raw = htmlmap_to_dict(htmlmap_filename)
951540e0edSPatrick Sanan            manpage_prefix_base = PETSC_DOC_OUT_ROOT_PLACEHOLDER
9673fdd05bSBarry Smith            manpage_prefix = os.path.join(manpage_prefix_base, '')
97ee12ae39SPatrick Sanan            self._manpage_map = dict_complete_links(manpage_map_raw, manpage_prefix)
98ee12ae39SPatrick Sanan        return self._manpage_map
99ee12ae39SPatrick Sanan
100ee12ae39SPatrick Sanan
101ee12ae39SPatrick Sanan    def _add_manpage_links(self, string: str) -> str:
102ee12ae39SPatrick Sanan        """ Add plain HTML link tags to a string """
103ee12ae39SPatrick Sanan        manpage_map = self._get_manpage_map()
104*29912973SJacob Faibussowitsch        def replace(matchobj):
105*29912973SJacob Faibussowitsch            word = matchobj.group(0)
106*29912973SJacob Faibussowitsch            if word in manpage_map:
107*29912973SJacob Faibussowitsch                return manpage_map[word]
108*29912973SJacob Faibussowitsch            return word
109*29912973SJacob Faibussowitsch
110*29912973SJacob Faibussowitsch        return self._word_pattern.sub(replace, string)
111*29912973SJacob Faibussowitsch
112ee12ae39SPatrick Sanan
113ee12ae39SPatrick Sanan    # This method consists mostly of code duplicated from Sphinx:
114ee12ae39SPatrick Sanan    # overwritten
115ee12ae39SPatrick Sanan    def visit_Text(self, node: Text) -> None:
116ee12ae39SPatrick Sanan        text = node.astext()
117ee12ae39SPatrick Sanan        encoded = self.encode(text)
118ee12ae39SPatrick Sanan        if self.protect_literal_text:
119ee12ae39SPatrick Sanan            # moved here from base class's visit_literal to support
120ee12ae39SPatrick Sanan            # more formatting in literal nodes
121ee12ae39SPatrick Sanan            for token in self.words_and_spaces.findall(encoded):
122ee12ae39SPatrick Sanan                if token.strip():
123ee12ae39SPatrick Sanan                    # Custom processing to add links to PETSc man pages ########
124ee12ae39SPatrick Sanan                    token_processed = self._add_manpage_links(token)
125ee12ae39SPatrick Sanan
126ee12ae39SPatrick Sanan                    # protect literal text from line wrapping
127ee12ae39SPatrick Sanan                    self.body.append('<span class="pre">%s</span>' % token_processed)
128ee12ae39SPatrick Sanan                    # (end of custom processing) ###############################
129ee12ae39SPatrick Sanan                elif token in ' \n':
130ee12ae39SPatrick Sanan                    # allow breaks at whitespace
131ee12ae39SPatrick Sanan                    self.body.append(token)
132ee12ae39SPatrick Sanan                else:
133ee12ae39SPatrick Sanan                    # protect runs of multiple spaces; the last one can wrap
134ee12ae39SPatrick Sanan                    self.body.append('&#160;' * (len(token) - 1) + ' ')
135ee12ae39SPatrick Sanan        else:
136ee12ae39SPatrick Sanan            if self.in_mailto and self.settings.cloak_email_addresses:
137ee12ae39SPatrick Sanan                encoded = self.cloak_email(encoded)
138ee12ae39SPatrick Sanan            self.body.append(encoded)
139ee12ae39SPatrick Sanan
140ee12ae39SPatrick Sanan    # This method consists mostly of code duplicated from Sphinx:
141ee12ae39SPatrick Sanan    # overwritten
142ee12ae39SPatrick Sanan    def visit_literal_block(self, node: Element) -> None:
143ee12ae39SPatrick Sanan        if node.rawsource != node.astext():
144ee12ae39SPatrick Sanan            # most probably a parsed-literal block -- don't highlight
145ee12ae39SPatrick Sanan            return super().visit_literal_block(node)
146ee12ae39SPatrick Sanan
147ee12ae39SPatrick Sanan        lang = node.get('language', 'default')
148ee12ae39SPatrick Sanan        linenos = node.get('linenos', False)
149ee12ae39SPatrick Sanan        highlight_args = node.get('highlight_args', {})
150ee12ae39SPatrick Sanan        highlight_args['force'] = node.get('force', False)
151a9d4b3f8SPatrick Sanan        opts = self.config.highlight_options.get(lang, {})
152a9d4b3f8SPatrick Sanan
153a9d4b3f8SPatrick Sanan        if linenos and self.config.html_codeblock_linenos_style:
154a9d4b3f8SPatrick Sanan            linenos = self.config.html_codeblock_linenos_style
155ee12ae39SPatrick Sanan
156ee12ae39SPatrick Sanan        highlighted = self.highlighter.highlight_block(
157ee12ae39SPatrick Sanan            node.rawsource, lang, opts=opts, linenos=linenos,
158a9d4b3f8SPatrick Sanan            location=node, **highlight_args
159ee12ae39SPatrick Sanan        )
160ee12ae39SPatrick Sanan
161ee12ae39SPatrick Sanan        # Custom processing to add links to PETSc man pages ####################
162ee12ae39SPatrick Sanan        highlighted = self._add_manpage_links(highlighted)
163ee12ae39SPatrick Sanan        # (end of custom processing) ###########################################
164ee12ae39SPatrick Sanan
165a9d4b3f8SPatrick Sanan        starttag = self.starttag(node, 'div', suffix='',
166a9d4b3f8SPatrick Sanan                                 CLASS='highlight-%s notranslate' % lang)
167ee12ae39SPatrick Sanan        self.body.append(starttag + highlighted + '</div>\n')
168ee12ae39SPatrick Sanan        raise nodes.SkipNode
169ee12ae39SPatrick Sanan
170ee12ae39SPatrick Sanandef htmlmap_to_dict(htmlmap_filename: str) -> Dict[str,str]:
171ee12ae39SPatrick Sanan    """ Extract a dict from an htmlmap file, leaving URLs as they are."""
172ee12ae39SPatrick Sanan    with open(htmlmap_filename, 'r') as f:
173*29912973SJacob Faibussowitsch        lines = [l for l in f.readlines() if l.startswith('man:')]
174*29912973SJacob Faibussowitsch    string_to_link = dict()
175*29912973SJacob Faibussowitsch    pattern        = re.compile(r'man:\+([a-zA-Z_0-9]*)\+\+([a-zA-Z_0-9 .:]*)\+\+\+\+man\+([a-zA-Z_0-9#./:-]*)')
176*29912973SJacob Faibussowitsch    for line in lines:
177*29912973SJacob Faibussowitsch        m = pattern.match(line)
178ee12ae39SPatrick Sanan        if m:
179*29912973SJacob Faibussowitsch            string_to_link[m.group(1)] = m.group(3)
180ee12ae39SPatrick Sanan        else:
181ee12ae39SPatrick Sanan            print("Warning: skipping unexpected line in " + htmlmap_filename + ":")
182ee12ae39SPatrick Sanan            print(line)
183ee12ae39SPatrick Sanan    return string_to_link
184ee12ae39SPatrick Sanan
185ee12ae39SPatrick Sanan
186ee12ae39SPatrick Sanandef dict_complete_links(string_to_link: Dict[str,str], prefix: str = '') -> Dict[str,str]:
187862e4a30SBarry Smith    """
188862e4a30SBarry Smith    Prepend a prefix to any links not starting with 'http' so Sphinx will recognize them as URLs
189ee12ae39SPatrick Sanan    """
190ee12ae39SPatrick Sanan    def link_string(name: str, link: str, prefix: str) -> str:
191ee12ae39SPatrick Sanan        url = link if link.startswith('http') else prefix + link
192ee12ae39SPatrick Sanan        return '<a href=\"' + url + '\">' + name + '</a>'
193ee12ae39SPatrick Sanan    return dict((k, link_string(k, v, prefix)) for (k, v) in string_to_link.items())
194