1*ee12ae39SPatrick Sanan""" Sphinx extension for custom HTML processing for PETSc docs """ 2*ee12ae39SPatrick Sanan 3*ee12ae39SPatrick Sananfrom typing import Any, Dict 4*ee12ae39SPatrick Sananimport re 5*ee12ae39SPatrick Sananimport os 6*ee12ae39SPatrick Sananimport subprocess 7*ee12ae39SPatrick Sananimport types 8*ee12ae39SPatrick Sanan 9*ee12ae39SPatrick Sananfrom docutils import nodes 10*ee12ae39SPatrick Sananfrom docutils.nodes import Element, Text 11*ee12ae39SPatrick Sanan 12*ee12ae39SPatrick Sananfrom sphinx import version_info as sphinx_version_info 13*ee12ae39SPatrick Sananfrom sphinx.writers.html5 import HTML5Translator 14*ee12ae39SPatrick Sananfrom sphinx.application import Sphinx 15*ee12ae39SPatrick Sanan 16*ee12ae39SPatrick Sananif not hasattr(re,'Pattern'): re.Pattern = re._pattern_type 17*ee12ae39SPatrick Sanan 18*ee12ae39SPatrick Sanan 19*ee12ae39SPatrick Sanandef setup(app: Sphinx) -> None: 20*ee12ae39SPatrick Sanan _check_version(app) 21*ee12ae39SPatrick Sanan 22*ee12ae39SPatrick Sanan app.connect('builder-inited', _setup_translators) 23*ee12ae39SPatrick Sanan 24*ee12ae39SPatrick Sanan 25*ee12ae39SPatrick Sanandef _check_version(app: Sphinx) -> None: 26*ee12ae39SPatrick Sanan sphinx_version_info_source = (2, 4, 4, 'final', 0) 27*ee12ae39SPatrick Sanan app.require_sphinx('%s.%s' % (sphinx_version_info_source[0], sphinx_version_info_source[1])) 28*ee12ae39SPatrick Sanan if sphinx_version_info != sphinx_version_info_source: 29*ee12ae39SPatrick Sanan error_message = ' '.join([ 30*ee12ae39SPatrick Sanan 'This extension duplicates code from Sphinx %s ' % (sphinx_version_info_source,), 31*ee12ae39SPatrick Sanan 'which is incompatible with the current version %s' % (sphinx_version_info,), 32*ee12ae39SPatrick Sanan ]) 33*ee12ae39SPatrick Sanan raise NotImplementedError(error_message) 34*ee12ae39SPatrick Sanan 35*ee12ae39SPatrick Sanan 36*ee12ae39SPatrick Sanandef _setup_translators(app: Sphinx) -> None: 37*ee12ae39SPatrick Sanan """ Use a mixin strategy to add to the HTML translator without overriding 38*ee12ae39SPatrick Sanan 39*ee12ae39SPatrick Sanan This allows use of other extensions which modify the translator. 40*ee12ae39SPatrick Sanan 41*ee12ae39SPatrick Sanan Duplicates the approach used here in sphinx-hoverref: 42*ee12ae39SPatrick Sanan https://github.com/readthedocs/sphinx-hoverxref/pull/42 43*ee12ae39SPatrick Sanan """ 44*ee12ae39SPatrick Sanan if app.builder.format != 'html': 45*ee12ae39SPatrick Sanan return 46*ee12ae39SPatrick Sanan 47*ee12ae39SPatrick Sanan for name, klass in app.registry.translators.items(): 48*ee12ae39SPatrick Sanan translator = types.new_class( 49*ee12ae39SPatrick Sanan 'PETScHTMLTranslator', 50*ee12ae39SPatrick Sanan ( 51*ee12ae39SPatrick Sanan PETScHTMLTranslatorMixin, 52*ee12ae39SPatrick Sanan klass, 53*ee12ae39SPatrick Sanan ), 54*ee12ae39SPatrick Sanan {}, 55*ee12ae39SPatrick Sanan ) 56*ee12ae39SPatrick Sanan app.set_translator(name, translator, override=True) 57*ee12ae39SPatrick Sanan 58*ee12ae39SPatrick Sanan translator = types.new_class( 59*ee12ae39SPatrick Sanan 'PETScHTMLTranslator', 60*ee12ae39SPatrick Sanan ( 61*ee12ae39SPatrick Sanan PETScHTMLTranslatorMixin, 62*ee12ae39SPatrick Sanan app.builder.default_translator_class, 63*ee12ae39SPatrick Sanan ), 64*ee12ae39SPatrick Sanan {}, 65*ee12ae39SPatrick Sanan ) 66*ee12ae39SPatrick Sanan app.set_translator(app.builder.name, translator, override=True) 67*ee12ae39SPatrick Sanan 68*ee12ae39SPatrick Sanan 69*ee12ae39SPatrick Sananclass PETScHTMLTranslatorMixin: 70*ee12ae39SPatrick Sanan """ 71*ee12ae39SPatrick Sanan A custom HTML translator which overrides methods to add PETSc-specific 72*ee12ae39SPatrick Sanan custom processing to the generated HTML. 73*ee12ae39SPatrick Sanan """ 74*ee12ae39SPatrick Sanan 75*ee12ae39SPatrick Sanan def __init__(self, *args: Any) -> None: 76*ee12ae39SPatrick Sanan self._manpage_map = None 77*ee12ae39SPatrick Sanan self._manpage_pattern = None 78*ee12ae39SPatrick Sanan super().__init__(*args) 79*ee12ae39SPatrick Sanan 80*ee12ae39SPatrick Sanan 81*ee12ae39SPatrick Sanan def _get_manpage_map(self) -> Dict[str,str]: 82*ee12ae39SPatrick Sanan """ Return the manpage strings to link, as a dict. """ 83*ee12ae39SPatrick Sanan if not self._manpage_map: 84*ee12ae39SPatrick Sanan htmlmap_filename = os.path.join('_build_classic', 'docs', 'manualpages', 'htmlmap') 85*ee12ae39SPatrick Sanan if not os.path.isfile(htmlmap_filename): 86*ee12ae39SPatrick Sanan raise Exception("Expected file %s not found. Run script to build classic docs subset." % htmlmap_filename) 87*ee12ae39SPatrick Sanan manpage_map_raw = htmlmap_to_dict(htmlmap_filename) 88*ee12ae39SPatrick Sanan manpage_prefix_base = self._get_manpage_prefix_base() 89*ee12ae39SPatrick Sanan manpage_prefix = os.path.join(manpage_prefix_base, 'docs', '') 90*ee12ae39SPatrick Sanan self._manpage_map = dict_complete_links(manpage_map_raw, manpage_prefix) 91*ee12ae39SPatrick Sanan return self._manpage_map 92*ee12ae39SPatrick Sanan 93*ee12ae39SPatrick Sanan def _get_manpage_pattern(self) -> re.Pattern: 94*ee12ae39SPatrick Sanan """ Return the manpage links pattern. 95*ee12ae39SPatrick Sanan 96*ee12ae39SPatrick Sanan This is done lazily, so this function should always be used, 97*ee12ae39SPatrick Sanan instead of the direct data member, which may not be populated yet 98*ee12ae39SPatrick Sanan """ 99*ee12ae39SPatrick Sanan 100*ee12ae39SPatrick Sanan if not self._manpage_pattern: 101*ee12ae39SPatrick Sanan self._manpage_pattern = get_multiple_replace_pattern(self._get_manpage_map()) 102*ee12ae39SPatrick Sanan return self._manpage_pattern 103*ee12ae39SPatrick Sanan 104*ee12ae39SPatrick Sanan def _get_manpage_prefix_base(self) -> str: 105*ee12ae39SPatrick Sanan """ Return the base location for the install. This varies by platform. """ 106*ee12ae39SPatrick Sanan if 'GITLAB_CI' in os.environ: 107*ee12ae39SPatrick Sanan ci_environment_url = os.getenv('CI_ENVIRONMENT_URL') 108*ee12ae39SPatrick Sanan if not ci_environment_url: 109*ee12ae39SPatrick Sanan raise Exception('GitLab CI detected but expected environment variable not found') 110*ee12ae39SPatrick Sanan manpage_prefix_base = ci_environment_url.rstrip('/index.html') 111*ee12ae39SPatrick Sanan elif 'READTHEDOCS' in os.environ: # Temporary - remove once ReadTheDocs is abandoned 112*ee12ae39SPatrick Sanan manpage_prefix_base = 'https://www.mcs.anl.gov/petsc/petsc-main' 113*ee12ae39SPatrick Sanan else: 114*ee12ae39SPatrick Sanan manpage_prefix_base = self.builder.outdir 115*ee12ae39SPatrick Sanan return manpage_prefix_base 116*ee12ae39SPatrick Sanan 117*ee12ae39SPatrick Sanan def _add_manpage_links(self, string: str) -> str: 118*ee12ae39SPatrick Sanan """ Add plain HTML link tags to a string """ 119*ee12ae39SPatrick Sanan manpage_map = self._get_manpage_map() 120*ee12ae39SPatrick Sanan manpage_pattern = self._get_manpage_pattern() 121*ee12ae39SPatrick Sanan return replace_from_dict_and_pattern(string, manpage_map, manpage_pattern) 122*ee12ae39SPatrick Sanan 123*ee12ae39SPatrick Sanan # This method consists mostly of code duplicated from Sphinx: 124*ee12ae39SPatrick Sanan # overwritten 125*ee12ae39SPatrick Sanan def visit_Text(self, node: Text) -> None: 126*ee12ae39SPatrick Sanan text = node.astext() 127*ee12ae39SPatrick Sanan encoded = self.encode(text) 128*ee12ae39SPatrick Sanan if self.protect_literal_text: 129*ee12ae39SPatrick Sanan # moved here from base class's visit_literal to support 130*ee12ae39SPatrick Sanan # more formatting in literal nodes 131*ee12ae39SPatrick Sanan for token in self.words_and_spaces.findall(encoded): 132*ee12ae39SPatrick Sanan if token.strip(): 133*ee12ae39SPatrick Sanan # Custom processing to add links to PETSc man pages ######## 134*ee12ae39SPatrick Sanan token_processed = self._add_manpage_links(token) 135*ee12ae39SPatrick Sanan 136*ee12ae39SPatrick Sanan # protect literal text from line wrapping 137*ee12ae39SPatrick Sanan self.body.append('<span class="pre">%s</span>' % token_processed) 138*ee12ae39SPatrick Sanan # (end of custom processing) ############################### 139*ee12ae39SPatrick Sanan elif token in ' \n': 140*ee12ae39SPatrick Sanan # allow breaks at whitespace 141*ee12ae39SPatrick Sanan self.body.append(token) 142*ee12ae39SPatrick Sanan else: 143*ee12ae39SPatrick Sanan # protect runs of multiple spaces; the last one can wrap 144*ee12ae39SPatrick Sanan self.body.append(' ' * (len(token) - 1) + ' ') 145*ee12ae39SPatrick Sanan else: 146*ee12ae39SPatrick Sanan if self.in_mailto and self.settings.cloak_email_addresses: 147*ee12ae39SPatrick Sanan encoded = self.cloak_email(encoded) 148*ee12ae39SPatrick Sanan self.body.append(encoded) 149*ee12ae39SPatrick Sanan 150*ee12ae39SPatrick Sanan # This method consists mostly of code duplicated from Sphinx: 151*ee12ae39SPatrick Sanan # overwritten 152*ee12ae39SPatrick Sanan def visit_literal_block(self, node: Element) -> None: 153*ee12ae39SPatrick Sanan if node.rawsource != node.astext(): 154*ee12ae39SPatrick Sanan # most probably a parsed-literal block -- don't highlight 155*ee12ae39SPatrick Sanan return super().visit_literal_block(node) 156*ee12ae39SPatrick Sanan 157*ee12ae39SPatrick Sanan lang = node.get('language', 'default') 158*ee12ae39SPatrick Sanan linenos = node.get('linenos', False) 159*ee12ae39SPatrick Sanan highlight_args = node.get('highlight_args', {}) 160*ee12ae39SPatrick Sanan highlight_args['force'] = node.get('force', False) 161*ee12ae39SPatrick Sanan if lang is self.builder.config.highlight_language: 162*ee12ae39SPatrick Sanan # only pass highlighter options for original language 163*ee12ae39SPatrick Sanan opts = self.builder.config.highlight_options 164*ee12ae39SPatrick Sanan else: 165*ee12ae39SPatrick Sanan opts = {} 166*ee12ae39SPatrick Sanan 167*ee12ae39SPatrick Sanan highlighted = self.highlighter.highlight_block( 168*ee12ae39SPatrick Sanan node.rawsource, lang, opts=opts, linenos=linenos, 169*ee12ae39SPatrick Sanan location=(self.builder.current_docname, node.line), **highlight_args 170*ee12ae39SPatrick Sanan ) 171*ee12ae39SPatrick Sanan starttag = self.starttag(node, 'div', suffix='', 172*ee12ae39SPatrick Sanan CLASS='highlight-%s notranslate' % lang) 173*ee12ae39SPatrick Sanan 174*ee12ae39SPatrick Sanan # Custom processing to add links to PETSc man pages #################### 175*ee12ae39SPatrick Sanan highlighted = self._add_manpage_links(highlighted) 176*ee12ae39SPatrick Sanan # (end of custom processing) ########################################### 177*ee12ae39SPatrick Sanan 178*ee12ae39SPatrick Sanan self.body.append(starttag + highlighted + '</div>\n') 179*ee12ae39SPatrick Sanan raise nodes.SkipNode 180*ee12ae39SPatrick Sanan 181*ee12ae39SPatrick Sanandef htmlmap_to_dict(htmlmap_filename: str) -> Dict[str,str]: 182*ee12ae39SPatrick Sanan """ Extract a dict from an htmlmap file, leaving URLs as they are.""" 183*ee12ae39SPatrick Sanan pattern = re.compile(r'man:\+([a-zA-Z_0-9]*)\+\+([a-zA-Z_0-9 .:]*)\+\+\+\+man\+([a-zA-Z_0-9#./:-]*)') 184*ee12ae39SPatrick Sanan string_to_link = dict() 185*ee12ae39SPatrick Sanan with open(htmlmap_filename, 'r') as f: 186*ee12ae39SPatrick Sanan for line in f.readlines(): 187*ee12ae39SPatrick Sanan m = re.match(pattern, line) 188*ee12ae39SPatrick Sanan if m: 189*ee12ae39SPatrick Sanan string = m.group(1) 190*ee12ae39SPatrick Sanan string_to_link[string] = m.group(3) 191*ee12ae39SPatrick Sanan else: 192*ee12ae39SPatrick Sanan print("Warning: skipping unexpected line in " + htmlmap_filename + ":") 193*ee12ae39SPatrick Sanan print(line) 194*ee12ae39SPatrick Sanan return string_to_link 195*ee12ae39SPatrick Sanan 196*ee12ae39SPatrick Sanan 197*ee12ae39SPatrick Sanandef dict_complete_links(string_to_link: Dict[str,str], prefix: str = '') -> Dict[str,str]: 198*ee12ae39SPatrick Sanan """ Complete HTML links 199*ee12ae39SPatrick Sanan 200*ee12ae39SPatrick Sanan Prepend a prefix to any links not starting with 'http', 201*ee12ae39SPatrick Sanan and add HTML tags 202*ee12ae39SPatrick Sanan """ 203*ee12ae39SPatrick Sanan def link_string(name: str, link: str, prefix: str) -> str: 204*ee12ae39SPatrick Sanan url = link if link.startswith('http') else prefix + link 205*ee12ae39SPatrick Sanan return '<a href=\"' + url + '\">' + name + '</a>' 206*ee12ae39SPatrick Sanan return dict((k, link_string(k, v, prefix)) for (k, v) in string_to_link.items()) 207*ee12ae39SPatrick Sanan 208*ee12ae39SPatrick Sanan 209*ee12ae39SPatrick Sanandef get_multiple_replace_pattern(source_dict: Dict[str,str]) -> re.Pattern: 210*ee12ae39SPatrick Sanan """ Generate a regex to match any of the keys in source_dict, as full words """ 211*ee12ae39SPatrick Sanan def process_word(word): 212*ee12ae39SPatrick Sanan """ add escape characters and word boundaries """ 213*ee12ae39SPatrick Sanan return r'\b' + re.escape(word) + r'\b' 214*ee12ae39SPatrick Sanan return re.compile(r'|'.join(map(process_word, source_dict))) 215*ee12ae39SPatrick Sanan 216*ee12ae39SPatrick Sanan 217*ee12ae39SPatrick Sanandef replace_from_dict_and_pattern(string: str, replacements: Dict, pattern: re.Pattern) -> str: 218*ee12ae39SPatrick Sanan """ Given a pattern which matches keys in replacements, replace keys found in string with their values""" 219*ee12ae39SPatrick Sanan return pattern.sub(lambda match: replacements[match.group(0)], string) 220