xref: /petsc/doc/ext/html5_petsc.py (revision a9d4b3f8c067701ef1fba1a2dde15d9177921192)
1ee12ae39SPatrick Sanan""" Sphinx extension for custom HTML processing for PETSc docs """
2ee12ae39SPatrick Sanan
3ee12ae39SPatrick Sananfrom typing import Any, Dict
4ee12ae39SPatrick Sananimport re
5ee12ae39SPatrick Sananimport os
6ee12ae39SPatrick Sananimport subprocess
7ee12ae39SPatrick Sananimport types
8ee12ae39SPatrick Sanan
9ee12ae39SPatrick Sananfrom docutils import nodes
10ee12ae39SPatrick Sananfrom docutils.nodes import Element, Text
11ee12ae39SPatrick Sanan
12ee12ae39SPatrick Sananfrom sphinx import version_info as sphinx_version_info
13ee12ae39SPatrick Sananfrom sphinx.writers.html5 import HTML5Translator
14ee12ae39SPatrick Sananfrom sphinx.application import Sphinx
15ee12ae39SPatrick Sanan
16ee12ae39SPatrick Sananif not hasattr(re,'Pattern'): re.Pattern = re._pattern_type
17ee12ae39SPatrick Sanan
18ee12ae39SPatrick Sanan
19ee12ae39SPatrick Sanandef setup(app: Sphinx) -> None:
20ee12ae39SPatrick Sanan    _check_version(app)
21ee12ae39SPatrick Sanan
22ee12ae39SPatrick Sanan    app.connect('builder-inited', _setup_translators)
23ee12ae39SPatrick Sanan
24ee12ae39SPatrick Sanan
25ee12ae39SPatrick Sanandef _check_version(app: Sphinx) -> None:
26*a9d4b3f8SPatrick Sanan    sphinx_version_info_source = (3, 5, 4, 'final', 0)
27ee12ae39SPatrick Sanan    app.require_sphinx('%s.%s' % (sphinx_version_info_source[0], sphinx_version_info_source[1]))
28*a9d4b3f8SPatrick Sanan    if sphinx_version_info[:2] != sphinx_version_info_source[:2]:
29*a9d4b3f8SPatrick Sanan        print('Warning: A custom extension duplicates code from Sphinx %s ' % (sphinx_version_info_source,),
30*a9d4b3f8SPatrick Sanan              'which differs from the current version %s' % (sphinx_version_info,),
31*a9d4b3f8SPatrick Sanan              'so unexpected behavior may be observed.')
32ee12ae39SPatrick Sanan
33ee12ae39SPatrick Sanan
34ee12ae39SPatrick Sanandef _setup_translators(app: Sphinx) -> None:
35ee12ae39SPatrick Sanan    """ Use a mixin strategy to add to the HTML translator without overriding
36ee12ae39SPatrick Sanan
37ee12ae39SPatrick Sanan    This allows use of other extensions which modify the translator.
38ee12ae39SPatrick Sanan
39ee12ae39SPatrick Sanan    Duplicates the approach used here in sphinx-hoverref:
40ee12ae39SPatrick Sanan    https://github.com/readthedocs/sphinx-hoverxref/pull/42
41ee12ae39SPatrick Sanan    """
42ee12ae39SPatrick Sanan    if app.builder.format != 'html':
43ee12ae39SPatrick Sanan        return
44ee12ae39SPatrick Sanan
45ee12ae39SPatrick Sanan    for name, klass in app.registry.translators.items():
46ee12ae39SPatrick Sanan        translator = types.new_class(
47ee12ae39SPatrick Sanan            'PETScHTMLTranslator',
48ee12ae39SPatrick Sanan            (
49ee12ae39SPatrick Sanan                PETScHTMLTranslatorMixin,
50ee12ae39SPatrick Sanan                klass,
51ee12ae39SPatrick Sanan            ),
52ee12ae39SPatrick Sanan            {},
53ee12ae39SPatrick Sanan        )
54ee12ae39SPatrick Sanan        app.set_translator(name, translator, override=True)
55ee12ae39SPatrick Sanan
56ee12ae39SPatrick Sanan    translator = types.new_class(
57ee12ae39SPatrick Sanan        'PETScHTMLTranslator',
58ee12ae39SPatrick Sanan        (
59ee12ae39SPatrick Sanan            PETScHTMLTranslatorMixin,
60ee12ae39SPatrick Sanan            app.builder.default_translator_class,
61ee12ae39SPatrick Sanan        ),
62ee12ae39SPatrick Sanan        {},
63ee12ae39SPatrick Sanan    )
64ee12ae39SPatrick Sanan    app.set_translator(app.builder.name, translator, override=True)
65ee12ae39SPatrick Sanan
66ee12ae39SPatrick Sanan
67ee12ae39SPatrick Sananclass PETScHTMLTranslatorMixin:
68ee12ae39SPatrick Sanan    """
69ee12ae39SPatrick Sanan    A custom HTML translator which overrides methods to add PETSc-specific
70ee12ae39SPatrick Sanan    custom processing to the generated HTML.
71ee12ae39SPatrick Sanan    """
72ee12ae39SPatrick Sanan
73ee12ae39SPatrick Sanan    def __init__(self, *args: Any) -> None:
74ee12ae39SPatrick Sanan        self._manpage_map = None
75ee12ae39SPatrick Sanan        self._manpage_pattern = None
76ee12ae39SPatrick Sanan        super().__init__(*args)
77ee12ae39SPatrick Sanan
78ee12ae39SPatrick Sanan
79ee12ae39SPatrick Sanan    def _get_manpage_map(self) -> Dict[str,str]:
80ee12ae39SPatrick Sanan        """ Return the manpage strings to link, as a dict.  """
81ee12ae39SPatrick Sanan        if not self._manpage_map:
82ee12ae39SPatrick Sanan            htmlmap_filename = os.path.join('_build_classic', 'docs', 'manualpages', 'htmlmap')
83ee12ae39SPatrick Sanan            if not os.path.isfile(htmlmap_filename):
84ee12ae39SPatrick Sanan                raise Exception("Expected file %s not found. Run script to build classic docs subset." %  htmlmap_filename)
85ee12ae39SPatrick Sanan            manpage_map_raw = htmlmap_to_dict(htmlmap_filename)
86ee12ae39SPatrick Sanan            manpage_prefix_base = self._get_manpage_prefix_base()
87ee12ae39SPatrick Sanan            manpage_prefix = os.path.join(manpage_prefix_base, 'docs', '')
88ee12ae39SPatrick Sanan            self._manpage_map = dict_complete_links(manpage_map_raw, manpage_prefix)
89ee12ae39SPatrick Sanan        return self._manpage_map
90ee12ae39SPatrick Sanan
91ee12ae39SPatrick Sanan    def _get_manpage_pattern(self) -> re.Pattern:
92ee12ae39SPatrick Sanan        """ Return the manpage links pattern.
93ee12ae39SPatrick Sanan
94ee12ae39SPatrick Sanan        This is done lazily, so this function should always be used,
95ee12ae39SPatrick Sanan        instead of the direct data member, which may not be populated yet
96ee12ae39SPatrick Sanan        """
97ee12ae39SPatrick Sanan
98ee12ae39SPatrick Sanan        if not self._manpage_pattern:
99ee12ae39SPatrick Sanan            self._manpage_pattern = get_multiple_replace_pattern(self._get_manpage_map())
100ee12ae39SPatrick Sanan        return self._manpage_pattern
101ee12ae39SPatrick Sanan
102ee12ae39SPatrick Sanan    def _get_manpage_prefix_base(self) -> str:
103ee12ae39SPatrick Sanan        """ Return the base location for the install. This varies by platform. """
104ee12ae39SPatrick Sanan        if 'GITLAB_CI' in os.environ:
105ee12ae39SPatrick Sanan            ci_environment_url = os.getenv('CI_ENVIRONMENT_URL')
106ee12ae39SPatrick Sanan            if not ci_environment_url:
107ee12ae39SPatrick Sanan                raise Exception('GitLab CI detected but expected environment variable not found')
108ee12ae39SPatrick Sanan            manpage_prefix_base = ci_environment_url.rstrip('/index.html')
109ee12ae39SPatrick Sanan        elif 'READTHEDOCS' in os.environ:  # Temporary - remove once ReadTheDocs is abandoned
110ee12ae39SPatrick Sanan            manpage_prefix_base = 'https://www.mcs.anl.gov/petsc/petsc-main'
111ee12ae39SPatrick Sanan        else:
112ee12ae39SPatrick Sanan            manpage_prefix_base = self.builder.outdir
113ee12ae39SPatrick Sanan        return manpage_prefix_base
114ee12ae39SPatrick Sanan
115ee12ae39SPatrick Sanan    def _add_manpage_links(self, string: str) -> str:
116ee12ae39SPatrick Sanan        """ Add plain HTML link tags to a string """
117ee12ae39SPatrick Sanan        manpage_map = self._get_manpage_map()
118ee12ae39SPatrick Sanan        manpage_pattern = self._get_manpage_pattern()
119ee12ae39SPatrick Sanan        return replace_from_dict_and_pattern(string, manpage_map, manpage_pattern)
120ee12ae39SPatrick Sanan
121ee12ae39SPatrick Sanan    # This method consists mostly of code duplicated from Sphinx:
122ee12ae39SPatrick Sanan    # overwritten
123ee12ae39SPatrick Sanan    def visit_Text(self, node: Text) -> None:
124ee12ae39SPatrick Sanan        text = node.astext()
125ee12ae39SPatrick Sanan        encoded = self.encode(text)
126ee12ae39SPatrick Sanan        if self.protect_literal_text:
127ee12ae39SPatrick Sanan            # moved here from base class's visit_literal to support
128ee12ae39SPatrick Sanan            # more formatting in literal nodes
129ee12ae39SPatrick Sanan            for token in self.words_and_spaces.findall(encoded):
130ee12ae39SPatrick Sanan                if token.strip():
131ee12ae39SPatrick Sanan                    # Custom processing to add links to PETSc man pages ########
132ee12ae39SPatrick Sanan                    token_processed = self._add_manpage_links(token)
133ee12ae39SPatrick Sanan
134ee12ae39SPatrick Sanan                    # protect literal text from line wrapping
135ee12ae39SPatrick Sanan                    self.body.append('<span class="pre">%s</span>' % token_processed)
136ee12ae39SPatrick Sanan                    # (end of custom processing) ###############################
137ee12ae39SPatrick Sanan                elif token in ' \n':
138ee12ae39SPatrick Sanan                    # allow breaks at whitespace
139ee12ae39SPatrick Sanan                    self.body.append(token)
140ee12ae39SPatrick Sanan                else:
141ee12ae39SPatrick Sanan                    # protect runs of multiple spaces; the last one can wrap
142ee12ae39SPatrick Sanan                    self.body.append('&#160;' * (len(token) - 1) + ' ')
143ee12ae39SPatrick Sanan        else:
144ee12ae39SPatrick Sanan            if self.in_mailto and self.settings.cloak_email_addresses:
145ee12ae39SPatrick Sanan                encoded = self.cloak_email(encoded)
146ee12ae39SPatrick Sanan            self.body.append(encoded)
147ee12ae39SPatrick Sanan
148ee12ae39SPatrick Sanan    # This method consists mostly of code duplicated from Sphinx:
149ee12ae39SPatrick Sanan    # overwritten
150ee12ae39SPatrick Sanan    def visit_literal_block(self, node: Element) -> None:
151ee12ae39SPatrick Sanan        if node.rawsource != node.astext():
152ee12ae39SPatrick Sanan            # most probably a parsed-literal block -- don't highlight
153ee12ae39SPatrick Sanan            return super().visit_literal_block(node)
154ee12ae39SPatrick Sanan
155ee12ae39SPatrick Sanan        lang = node.get('language', 'default')
156ee12ae39SPatrick Sanan        linenos = node.get('linenos', False)
157ee12ae39SPatrick Sanan        highlight_args = node.get('highlight_args', {})
158ee12ae39SPatrick Sanan        highlight_args['force'] = node.get('force', False)
159*a9d4b3f8SPatrick Sanan        opts = self.config.highlight_options.get(lang, {})
160*a9d4b3f8SPatrick Sanan
161*a9d4b3f8SPatrick Sanan        if linenos and self.config.html_codeblock_linenos_style:
162*a9d4b3f8SPatrick Sanan            linenos = self.config.html_codeblock_linenos_style
163ee12ae39SPatrick Sanan
164ee12ae39SPatrick Sanan        highlighted = self.highlighter.highlight_block(
165ee12ae39SPatrick Sanan            node.rawsource, lang, opts=opts, linenos=linenos,
166*a9d4b3f8SPatrick Sanan            location=node, **highlight_args
167ee12ae39SPatrick Sanan        )
168ee12ae39SPatrick Sanan
169ee12ae39SPatrick Sanan        # Custom processing to add links to PETSc man pages ####################
170ee12ae39SPatrick Sanan        highlighted = self._add_manpage_links(highlighted)
171ee12ae39SPatrick Sanan        # (end of custom processing) ###########################################
172ee12ae39SPatrick Sanan
173*a9d4b3f8SPatrick Sanan        starttag = self.starttag(node, 'div', suffix='',
174*a9d4b3f8SPatrick Sanan                                 CLASS='highlight-%s notranslate' % lang)
175ee12ae39SPatrick Sanan        self.body.append(starttag + highlighted + '</div>\n')
176ee12ae39SPatrick Sanan        raise nodes.SkipNode
177ee12ae39SPatrick Sanan
178ee12ae39SPatrick Sanandef htmlmap_to_dict(htmlmap_filename: str) -> Dict[str,str]:
179ee12ae39SPatrick Sanan    """ Extract a dict from an htmlmap file, leaving URLs as they are."""
180ee12ae39SPatrick Sanan    pattern = re.compile(r'man:\+([a-zA-Z_0-9]*)\+\+([a-zA-Z_0-9 .:]*)\+\+\+\+man\+([a-zA-Z_0-9#./:-]*)')
181ee12ae39SPatrick Sanan    string_to_link = dict()
182ee12ae39SPatrick Sanan    with open(htmlmap_filename, 'r') as f:
183ee12ae39SPatrick Sanan        for line in f.readlines():
184ee12ae39SPatrick Sanan            m = re.match(pattern, line)
185ee12ae39SPatrick Sanan            if m:
186ee12ae39SPatrick Sanan                string = m.group(1)
187ee12ae39SPatrick Sanan                string_to_link[string] = m.group(3)
188ee12ae39SPatrick Sanan            else:
189ee12ae39SPatrick Sanan                print("Warning: skipping unexpected line in " + htmlmap_filename + ":")
190ee12ae39SPatrick Sanan                print(line)
191ee12ae39SPatrick Sanan    return string_to_link
192ee12ae39SPatrick Sanan
193ee12ae39SPatrick Sanan
194ee12ae39SPatrick Sanandef dict_complete_links(string_to_link: Dict[str,str], prefix: str = '') -> Dict[str,str]:
195ee12ae39SPatrick Sanan    """ Complete HTML links
196ee12ae39SPatrick Sanan
197ee12ae39SPatrick Sanan    Prepend a prefix to any links not starting with 'http',
198ee12ae39SPatrick Sanan    and add HTML tags
199ee12ae39SPatrick Sanan    """
200ee12ae39SPatrick Sanan    def link_string(name: str, link: str, prefix: str) -> str:
201ee12ae39SPatrick Sanan        url = link if link.startswith('http') else prefix + link
202ee12ae39SPatrick Sanan        return '<a href=\"' + url + '\">' + name + '</a>'
203ee12ae39SPatrick Sanan    return dict((k, link_string(k, v, prefix)) for (k, v) in string_to_link.items())
204ee12ae39SPatrick Sanan
205ee12ae39SPatrick Sanan
206ee12ae39SPatrick Sanandef get_multiple_replace_pattern(source_dict: Dict[str,str]) -> re.Pattern:
207ee12ae39SPatrick Sanan    """ Generate a regex to match any of the keys in source_dict, as full words """
208ee12ae39SPatrick Sanan    def process_word(word):
209ee12ae39SPatrick Sanan        """ add escape characters and word boundaries """
210ee12ae39SPatrick Sanan        return r'\b' + re.escape(word) + r'\b'
211ee12ae39SPatrick Sanan    return re.compile(r'|'.join(map(process_word, source_dict)))
212ee12ae39SPatrick Sanan
213ee12ae39SPatrick Sanan
214ee12ae39SPatrick Sanandef replace_from_dict_and_pattern(string: str, replacements: Dict, pattern: re.Pattern) -> str:
215ee12ae39SPatrick Sanan    """ Given a pattern which matches keys in replacements, replace keys found in string with their values"""
216ee12ae39SPatrick Sanan    return pattern.sub(lambda match: replacements[match.group(0)], string)
217