xref: /petsc/doc/ext/html5_petsc.py (revision 66c9fbdd036b1e887ebf0d2bef6dbdcafd086d45)
1ee12ae39SPatrick Sanan""" Sphinx extension for custom HTML processing for PETSc docs """
2ee12ae39SPatrick Sanan
3ee12ae39SPatrick Sananfrom typing import Any, Dict
4ee12ae39SPatrick Sananimport re
5ee12ae39SPatrick Sananimport os
6ee12ae39SPatrick Sananimport subprocess
7ee12ae39SPatrick Sananimport types
8ee12ae39SPatrick Sanan
9ee12ae39SPatrick Sananfrom docutils import nodes
10ee12ae39SPatrick Sananfrom docutils.nodes import Element, Text
11ee12ae39SPatrick Sanan
12ee12ae39SPatrick Sananfrom sphinx import version_info as sphinx_version_info
13ee12ae39SPatrick Sananfrom sphinx.writers.html5 import HTML5Translator
14ee12ae39SPatrick Sananfrom sphinx.application import Sphinx
15ee12ae39SPatrick Sanan
16ee12ae39SPatrick Sananif not hasattr(re,'Pattern'): re.Pattern = re._pattern_type
17ee12ae39SPatrick Sanan
18ee12ae39SPatrick Sanan
191540e0edSPatrick SananPETSC_DOC_OUT_ROOT_PLACEHOLDER = 'PETSC_DOC_OUT_ROOT_PLACEHOLDER'
201540e0edSPatrick Sanan
211540e0edSPatrick Sanan
22ee12ae39SPatrick Sanandef setup(app: Sphinx) -> None:
23ee12ae39SPatrick Sanan    _check_version(app)
24ee12ae39SPatrick Sanan
25ee12ae39SPatrick Sanan    app.connect('builder-inited', _setup_translators)
26075dfc9bSPatrick Sanan    return {'parallel_read_safe': True}
27ee12ae39SPatrick Sanan
28ee12ae39SPatrick Sanan
29ee12ae39SPatrick Sanandef _check_version(app: Sphinx) -> None:
307d789994SPatrick Sanan    sphinx_version_info_source = (4, 2, 0, 'final', 0)
31ee12ae39SPatrick Sanan    app.require_sphinx('%s.%s' % (sphinx_version_info_source[0], sphinx_version_info_source[1]))
32a9d4b3f8SPatrick Sanan    if sphinx_version_info[:2] != sphinx_version_info_source[:2]:
33*66c9fbddSBarry Smith        print('A custom extension duplicates code from Sphinx %s ' % (sphinx_version_info_source,),
34a9d4b3f8SPatrick Sanan              'which differs from the current version %s' % (sphinx_version_info,),
35a9d4b3f8SPatrick Sanan              'so unexpected behavior may be observed.')
36ee12ae39SPatrick Sanan
37ee12ae39SPatrick Sanan
38ee12ae39SPatrick Sanandef _setup_translators(app: Sphinx) -> None:
39862e4a30SBarry Smith    """ Use a mixin strategy to add to the Sphinx HTML translator without overriding
40ee12ae39SPatrick Sanan
41ee12ae39SPatrick Sanan    This allows use of other extensions which modify the translator.
42ee12ae39SPatrick Sanan
43ee12ae39SPatrick Sanan    Duplicates the approach used here in sphinx-hoverref:
44ee12ae39SPatrick Sanan    https://github.com/readthedocs/sphinx-hoverxref/pull/42
45ee12ae39SPatrick Sanan    """
46ee12ae39SPatrick Sanan    if app.builder.format != 'html':
47ee12ae39SPatrick Sanan        return
48ee12ae39SPatrick Sanan
49ee12ae39SPatrick Sanan    for name, klass in app.registry.translators.items():
50ee12ae39SPatrick Sanan        translator = types.new_class(
51ee12ae39SPatrick Sanan            'PETScHTMLTranslator',
52ee12ae39SPatrick Sanan            (
53ee12ae39SPatrick Sanan                PETScHTMLTranslatorMixin,
54ee12ae39SPatrick Sanan                klass,
55ee12ae39SPatrick Sanan            ),
56ee12ae39SPatrick Sanan            {},
57ee12ae39SPatrick Sanan        )
58ee12ae39SPatrick Sanan        app.set_translator(name, translator, override=True)
59ee12ae39SPatrick Sanan
60ee12ae39SPatrick Sanan    translator = types.new_class(
61ee12ae39SPatrick Sanan        'PETScHTMLTranslator',
62ee12ae39SPatrick Sanan        (
63ee12ae39SPatrick Sanan            PETScHTMLTranslatorMixin,
64ee12ae39SPatrick Sanan            app.builder.default_translator_class,
65ee12ae39SPatrick Sanan        ),
66ee12ae39SPatrick Sanan        {},
67ee12ae39SPatrick Sanan    )
68ee12ae39SPatrick Sanan    app.set_translator(app.builder.name, translator, override=True)
69ee12ae39SPatrick Sanan
70ee12ae39SPatrick Sanan
71ee12ae39SPatrick Sananclass PETScHTMLTranslatorMixin:
72ee12ae39SPatrick Sanan    """
73ee12ae39SPatrick Sanan    A custom HTML translator which overrides methods to add PETSc-specific
74ee12ae39SPatrick Sanan    custom processing to the generated HTML.
75862e4a30SBarry Smith
76862e4a30SBarry Smith    Replaces any string XXX that matches a manual page name with
77862e4a30SBarry Smith    <a href="PETSC_DOC_OUT_ROOT_PLACEHOLDER/docs/manualpages/YY/XXX.html">XXX</a>
78862e4a30SBarry Smith    or
79862e4a30SBarry Smith    <a href="PETSC_DOC_OUT_ROOT_PLACEHOLDER/docs/manualpages/YY/XXX">XXX</a>
80862e4a30SBarry Smith    depending on if the Sphinx build is html or dirhtml
81ee12ae39SPatrick Sanan    """
82ee12ae39SPatrick Sanan
83ee12ae39SPatrick Sanan    def __init__(self, *args: Any) -> None:
84ee12ae39SPatrick Sanan        self._manpage_map = None
85ee12ae39SPatrick Sanan        self._manpage_pattern = None
86ee12ae39SPatrick Sanan        super().__init__(*args)
87ee12ae39SPatrick Sanan
88ee12ae39SPatrick Sanan
89ee12ae39SPatrick Sanan    def _get_manpage_map(self) -> Dict[str,str]:
90ee12ae39SPatrick Sanan        """ Return the manpage strings to link, as a dict.  """
91ee12ae39SPatrick Sanan        if not self._manpage_map:
925becb6a3SPatrick Sanan            htmlmap_filename = os.path.join('_build_classic', 'docs', 'manualpages', 'htmlmap_modified')
93ee12ae39SPatrick Sanan            if not os.path.isfile(htmlmap_filename):
94ee12ae39SPatrick Sanan                raise Exception("Expected file %s not found. Run script to build classic docs subset." %  htmlmap_filename)
95ee12ae39SPatrick Sanan            manpage_map_raw = htmlmap_to_dict(htmlmap_filename)
961540e0edSPatrick Sanan            manpage_prefix_base = PETSC_DOC_OUT_ROOT_PLACEHOLDER
97ee12ae39SPatrick Sanan            manpage_prefix = os.path.join(manpage_prefix_base, 'docs', '')
98ee12ae39SPatrick Sanan            self._manpage_map = dict_complete_links(manpage_map_raw, manpage_prefix)
99ee12ae39SPatrick Sanan        return self._manpage_map
100ee12ae39SPatrick Sanan
101ee12ae39SPatrick Sanan    def _get_manpage_pattern(self) -> re.Pattern:
102ee12ae39SPatrick Sanan        """ Return the manpage links pattern.
103ee12ae39SPatrick Sanan
104ee12ae39SPatrick Sanan        This is done lazily, so this function should always be used,
105ee12ae39SPatrick Sanan        instead of the direct data member, which may not be populated yet
106ee12ae39SPatrick Sanan        """
107ee12ae39SPatrick Sanan
108ee12ae39SPatrick Sanan        if not self._manpage_pattern:
109ee12ae39SPatrick Sanan            self._manpage_pattern = get_multiple_replace_pattern(self._get_manpage_map())
110ee12ae39SPatrick Sanan        return self._manpage_pattern
111ee12ae39SPatrick Sanan
112ee12ae39SPatrick Sanan    def _add_manpage_links(self, string: str) -> str:
113ee12ae39SPatrick Sanan        """ Add plain HTML link tags to a string """
114ee12ae39SPatrick Sanan        manpage_map = self._get_manpage_map()
115ee12ae39SPatrick Sanan        manpage_pattern = self._get_manpage_pattern()
116ee12ae39SPatrick Sanan        return replace_from_dict_and_pattern(string, manpage_map, manpage_pattern)
117ee12ae39SPatrick Sanan
118ee12ae39SPatrick Sanan    # This method consists mostly of code duplicated from Sphinx:
119ee12ae39SPatrick Sanan    # overwritten
120ee12ae39SPatrick Sanan    def visit_Text(self, node: Text) -> None:
121ee12ae39SPatrick Sanan        text = node.astext()
122ee12ae39SPatrick Sanan        encoded = self.encode(text)
123ee12ae39SPatrick Sanan        if self.protect_literal_text:
124ee12ae39SPatrick Sanan            # moved here from base class's visit_literal to support
125ee12ae39SPatrick Sanan            # more formatting in literal nodes
126ee12ae39SPatrick Sanan            for token in self.words_and_spaces.findall(encoded):
127ee12ae39SPatrick Sanan                if token.strip():
128ee12ae39SPatrick Sanan                    # Custom processing to add links to PETSc man pages ########
129ee12ae39SPatrick Sanan                    token_processed = self._add_manpage_links(token)
130ee12ae39SPatrick Sanan
131ee12ae39SPatrick Sanan                    # protect literal text from line wrapping
132ee12ae39SPatrick Sanan                    self.body.append('<span class="pre">%s</span>' % token_processed)
133ee12ae39SPatrick Sanan                    # (end of custom processing) ###############################
134ee12ae39SPatrick Sanan                elif token in ' \n':
135ee12ae39SPatrick Sanan                    # allow breaks at whitespace
136ee12ae39SPatrick Sanan                    self.body.append(token)
137ee12ae39SPatrick Sanan                else:
138ee12ae39SPatrick Sanan                    # protect runs of multiple spaces; the last one can wrap
139ee12ae39SPatrick Sanan                    self.body.append('&#160;' * (len(token) - 1) + ' ')
140ee12ae39SPatrick Sanan        else:
141ee12ae39SPatrick Sanan            if self.in_mailto and self.settings.cloak_email_addresses:
142ee12ae39SPatrick Sanan                encoded = self.cloak_email(encoded)
143ee12ae39SPatrick Sanan            self.body.append(encoded)
144ee12ae39SPatrick Sanan
145ee12ae39SPatrick Sanan    # This method consists mostly of code duplicated from Sphinx:
146ee12ae39SPatrick Sanan    # overwritten
147ee12ae39SPatrick Sanan    def visit_literal_block(self, node: Element) -> None:
148ee12ae39SPatrick Sanan        if node.rawsource != node.astext():
149ee12ae39SPatrick Sanan            # most probably a parsed-literal block -- don't highlight
150ee12ae39SPatrick Sanan            return super().visit_literal_block(node)
151ee12ae39SPatrick Sanan
152ee12ae39SPatrick Sanan        lang = node.get('language', 'default')
153ee12ae39SPatrick Sanan        linenos = node.get('linenos', False)
154ee12ae39SPatrick Sanan        highlight_args = node.get('highlight_args', {})
155ee12ae39SPatrick Sanan        highlight_args['force'] = node.get('force', False)
156a9d4b3f8SPatrick Sanan        opts = self.config.highlight_options.get(lang, {})
157a9d4b3f8SPatrick Sanan
158a9d4b3f8SPatrick Sanan        if linenos and self.config.html_codeblock_linenos_style:
159a9d4b3f8SPatrick Sanan            linenos = self.config.html_codeblock_linenos_style
160ee12ae39SPatrick Sanan
161ee12ae39SPatrick Sanan        highlighted = self.highlighter.highlight_block(
162ee12ae39SPatrick Sanan            node.rawsource, lang, opts=opts, linenos=linenos,
163a9d4b3f8SPatrick Sanan            location=node, **highlight_args
164ee12ae39SPatrick Sanan        )
165ee12ae39SPatrick Sanan
166ee12ae39SPatrick Sanan        # Custom processing to add links to PETSc man pages ####################
167ee12ae39SPatrick Sanan        highlighted = self._add_manpage_links(highlighted)
168ee12ae39SPatrick Sanan        # (end of custom processing) ###########################################
169ee12ae39SPatrick Sanan
170a9d4b3f8SPatrick Sanan        starttag = self.starttag(node, 'div', suffix='',
171a9d4b3f8SPatrick Sanan                                 CLASS='highlight-%s notranslate' % lang)
172ee12ae39SPatrick Sanan        self.body.append(starttag + highlighted + '</div>\n')
173ee12ae39SPatrick Sanan        raise nodes.SkipNode
174ee12ae39SPatrick Sanan
175ee12ae39SPatrick Sanandef htmlmap_to_dict(htmlmap_filename: str) -> Dict[str,str]:
176ee12ae39SPatrick Sanan    """ Extract a dict from an htmlmap file, leaving URLs as they are."""
177ee12ae39SPatrick Sanan    pattern = re.compile(r'man:\+([a-zA-Z_0-9]*)\+\+([a-zA-Z_0-9 .:]*)\+\+\+\+man\+([a-zA-Z_0-9#./:-]*)')
178ee12ae39SPatrick Sanan    string_to_link = dict()
179ee12ae39SPatrick Sanan    with open(htmlmap_filename, 'r') as f:
180ee12ae39SPatrick Sanan        for line in f.readlines():
181ee12ae39SPatrick Sanan            m = re.match(pattern, line)
182ee12ae39SPatrick Sanan            if m:
183ee12ae39SPatrick Sanan                string = m.group(1)
184ee12ae39SPatrick Sanan                string_to_link[string] = m.group(3)
185ee12ae39SPatrick Sanan            else:
186ee12ae39SPatrick Sanan                print("Warning: skipping unexpected line in " + htmlmap_filename + ":")
187ee12ae39SPatrick Sanan                print(line)
188ee12ae39SPatrick Sanan    return string_to_link
189ee12ae39SPatrick Sanan
190ee12ae39SPatrick Sanan
191ee12ae39SPatrick Sanandef dict_complete_links(string_to_link: Dict[str,str], prefix: str = '') -> Dict[str,str]:
192862e4a30SBarry Smith    """
193862e4a30SBarry Smith    Prepend a prefix to any links not starting with 'http' so Sphinx will recognize them as URLs
194ee12ae39SPatrick Sanan    """
195ee12ae39SPatrick Sanan    def link_string(name: str, link: str, prefix: str) -> str:
196ee12ae39SPatrick Sanan        url = link if link.startswith('http') else prefix + link
197ee12ae39SPatrick Sanan        return '<a href=\"' + url + '\">' + name + '</a>'
198ee12ae39SPatrick Sanan    return dict((k, link_string(k, v, prefix)) for (k, v) in string_to_link.items())
199ee12ae39SPatrick Sanan
200ee12ae39SPatrick Sanan
201ee12ae39SPatrick Sanandef get_multiple_replace_pattern(source_dict: Dict[str,str]) -> re.Pattern:
202ee12ae39SPatrick Sanan    """ Generate a regex to match any of the keys in source_dict, as full words """
203ee12ae39SPatrick Sanan    def process_word(word):
204ee12ae39SPatrick Sanan        """ add escape characters and word boundaries """
205ee12ae39SPatrick Sanan        return r'\b' + re.escape(word) + r'\b'
206ee12ae39SPatrick Sanan    return re.compile(r'|'.join(map(process_word, source_dict)))
207ee12ae39SPatrick Sanan
208ee12ae39SPatrick Sanan
209ee12ae39SPatrick Sanandef replace_from_dict_and_pattern(string: str, replacements: Dict, pattern: re.Pattern) -> str:
210ee12ae39SPatrick Sanan    """ Given a pattern which matches keys in replacements, replace keys found in string with their values"""
211ee12ae39SPatrick Sanan    return pattern.sub(lambda match: replacements[match.group(0)], string)
212