1ee12ae39SPatrick Sanan""" Sphinx extension for custom HTML processing for PETSc docs """ 2ee12ae39SPatrick Sanan 3ee12ae39SPatrick Sananfrom typing import Any, Dict 4ee12ae39SPatrick Sananimport re 5ee12ae39SPatrick Sananimport os 6ee12ae39SPatrick Sananimport subprocess 7ee12ae39SPatrick Sananimport types 8ee12ae39SPatrick Sanan 9ee12ae39SPatrick Sananfrom docutils import nodes 10ee12ae39SPatrick Sananfrom docutils.nodes import Element, Text 11ee12ae39SPatrick Sanan 12ee12ae39SPatrick Sananfrom sphinx import version_info as sphinx_version_info 13ee12ae39SPatrick Sananfrom sphinx.writers.html5 import HTML5Translator 14ee12ae39SPatrick Sananfrom sphinx.application import Sphinx 15ee12ae39SPatrick Sanan 16ee12ae39SPatrick Sananif not hasattr(re,'Pattern'): re.Pattern = re._pattern_type 17ee12ae39SPatrick Sanan 18ee12ae39SPatrick Sanan 191540e0edSPatrick SananPETSC_DOC_OUT_ROOT_PLACEHOLDER = 'PETSC_DOC_OUT_ROOT_PLACEHOLDER' 201540e0edSPatrick Sanan 211540e0edSPatrick Sanan 22ee12ae39SPatrick Sanandef setup(app: Sphinx) -> None: 23ee12ae39SPatrick Sanan _check_version(app) 24ee12ae39SPatrick Sanan 25ee12ae39SPatrick Sanan app.connect('builder-inited', _setup_translators) 26075dfc9bSPatrick Sanan return {'parallel_read_safe': True} 27ee12ae39SPatrick Sanan 28ee12ae39SPatrick Sanan 29ee12ae39SPatrick Sanandef _check_version(app: Sphinx) -> None: 307d789994SPatrick Sanan sphinx_version_info_source = (4, 2, 0, 'final', 0) 31ee12ae39SPatrick Sanan app.require_sphinx('%s.%s' % (sphinx_version_info_source[0], sphinx_version_info_source[1])) 32a9d4b3f8SPatrick Sanan if sphinx_version_info[:2] != sphinx_version_info_source[:2]: 33a9d4b3f8SPatrick Sanan print('Warning: A custom extension duplicates code from Sphinx %s ' % (sphinx_version_info_source,), 34a9d4b3f8SPatrick Sanan 'which differs from the current version %s' % (sphinx_version_info,), 35a9d4b3f8SPatrick Sanan 'so unexpected behavior may be observed.') 36ee12ae39SPatrick Sanan 37ee12ae39SPatrick Sanan 38ee12ae39SPatrick Sanandef _setup_translators(app: Sphinx) -> None: 39ee12ae39SPatrick Sanan """ Use a mixin strategy to add to the HTML translator without overriding 40ee12ae39SPatrick Sanan 41ee12ae39SPatrick Sanan This allows use of other extensions which modify the translator. 42ee12ae39SPatrick Sanan 43ee12ae39SPatrick Sanan Duplicates the approach used here in sphinx-hoverref: 44ee12ae39SPatrick Sanan https://github.com/readthedocs/sphinx-hoverxref/pull/42 45ee12ae39SPatrick Sanan """ 46ee12ae39SPatrick Sanan if app.builder.format != 'html': 47ee12ae39SPatrick Sanan return 48ee12ae39SPatrick Sanan 49ee12ae39SPatrick Sanan for name, klass in app.registry.translators.items(): 50ee12ae39SPatrick Sanan translator = types.new_class( 51ee12ae39SPatrick Sanan 'PETScHTMLTranslator', 52ee12ae39SPatrick Sanan ( 53ee12ae39SPatrick Sanan PETScHTMLTranslatorMixin, 54ee12ae39SPatrick Sanan klass, 55ee12ae39SPatrick Sanan ), 56ee12ae39SPatrick Sanan {}, 57ee12ae39SPatrick Sanan ) 58ee12ae39SPatrick Sanan app.set_translator(name, translator, override=True) 59ee12ae39SPatrick Sanan 60ee12ae39SPatrick Sanan translator = types.new_class( 61ee12ae39SPatrick Sanan 'PETScHTMLTranslator', 62ee12ae39SPatrick Sanan ( 63ee12ae39SPatrick Sanan PETScHTMLTranslatorMixin, 64ee12ae39SPatrick Sanan app.builder.default_translator_class, 65ee12ae39SPatrick Sanan ), 66ee12ae39SPatrick Sanan {}, 67ee12ae39SPatrick Sanan ) 68ee12ae39SPatrick Sanan app.set_translator(app.builder.name, translator, override=True) 69ee12ae39SPatrick Sanan 70ee12ae39SPatrick Sanan 71ee12ae39SPatrick Sananclass PETScHTMLTranslatorMixin: 72ee12ae39SPatrick Sanan """ 73ee12ae39SPatrick Sanan A custom HTML translator which overrides methods to add PETSc-specific 74ee12ae39SPatrick Sanan custom processing to the generated HTML. 75ee12ae39SPatrick Sanan """ 76ee12ae39SPatrick Sanan 77ee12ae39SPatrick Sanan def __init__(self, *args: Any) -> None: 78ee12ae39SPatrick Sanan self._manpage_map = None 79ee12ae39SPatrick Sanan self._manpage_pattern = None 80ee12ae39SPatrick Sanan super().__init__(*args) 81ee12ae39SPatrick Sanan 82ee12ae39SPatrick Sanan 83ee12ae39SPatrick Sanan def _get_manpage_map(self) -> Dict[str,str]: 84ee12ae39SPatrick Sanan """ Return the manpage strings to link, as a dict. """ 85ee12ae39SPatrick Sanan if not self._manpage_map: 86*5becb6a3SPatrick Sanan htmlmap_filename = os.path.join('_build_classic', 'docs', 'manualpages', 'htmlmap_modified') 87ee12ae39SPatrick Sanan if not os.path.isfile(htmlmap_filename): 88ee12ae39SPatrick Sanan raise Exception("Expected file %s not found. Run script to build classic docs subset." % htmlmap_filename) 89ee12ae39SPatrick Sanan manpage_map_raw = htmlmap_to_dict(htmlmap_filename) 901540e0edSPatrick Sanan manpage_prefix_base = PETSC_DOC_OUT_ROOT_PLACEHOLDER 91ee12ae39SPatrick Sanan manpage_prefix = os.path.join(manpage_prefix_base, 'docs', '') 92ee12ae39SPatrick Sanan self._manpage_map = dict_complete_links(manpage_map_raw, manpage_prefix) 93ee12ae39SPatrick Sanan return self._manpage_map 94ee12ae39SPatrick Sanan 95ee12ae39SPatrick Sanan def _get_manpage_pattern(self) -> re.Pattern: 96ee12ae39SPatrick Sanan """ Return the manpage links pattern. 97ee12ae39SPatrick Sanan 98ee12ae39SPatrick Sanan This is done lazily, so this function should always be used, 99ee12ae39SPatrick Sanan instead of the direct data member, which may not be populated yet 100ee12ae39SPatrick Sanan """ 101ee12ae39SPatrick Sanan 102ee12ae39SPatrick Sanan if not self._manpage_pattern: 103ee12ae39SPatrick Sanan self._manpage_pattern = get_multiple_replace_pattern(self._get_manpage_map()) 104ee12ae39SPatrick Sanan return self._manpage_pattern 105ee12ae39SPatrick Sanan 106ee12ae39SPatrick Sanan def _add_manpage_links(self, string: str) -> str: 107ee12ae39SPatrick Sanan """ Add plain HTML link tags to a string """ 108ee12ae39SPatrick Sanan manpage_map = self._get_manpage_map() 109ee12ae39SPatrick Sanan manpage_pattern = self._get_manpage_pattern() 110ee12ae39SPatrick Sanan return replace_from_dict_and_pattern(string, manpage_map, manpage_pattern) 111ee12ae39SPatrick Sanan 112ee12ae39SPatrick Sanan # This method consists mostly of code duplicated from Sphinx: 113ee12ae39SPatrick Sanan # overwritten 114ee12ae39SPatrick Sanan def visit_Text(self, node: Text) -> None: 115ee12ae39SPatrick Sanan text = node.astext() 116ee12ae39SPatrick Sanan encoded = self.encode(text) 117ee12ae39SPatrick Sanan if self.protect_literal_text: 118ee12ae39SPatrick Sanan # moved here from base class's visit_literal to support 119ee12ae39SPatrick Sanan # more formatting in literal nodes 120ee12ae39SPatrick Sanan for token in self.words_and_spaces.findall(encoded): 121ee12ae39SPatrick Sanan if token.strip(): 122ee12ae39SPatrick Sanan # Custom processing to add links to PETSc man pages ######## 123ee12ae39SPatrick Sanan token_processed = self._add_manpage_links(token) 124ee12ae39SPatrick Sanan 125ee12ae39SPatrick Sanan # protect literal text from line wrapping 126ee12ae39SPatrick Sanan self.body.append('<span class="pre">%s</span>' % token_processed) 127ee12ae39SPatrick Sanan # (end of custom processing) ############################### 128ee12ae39SPatrick Sanan elif token in ' \n': 129ee12ae39SPatrick Sanan # allow breaks at whitespace 130ee12ae39SPatrick Sanan self.body.append(token) 131ee12ae39SPatrick Sanan else: 132ee12ae39SPatrick Sanan # protect runs of multiple spaces; the last one can wrap 133ee12ae39SPatrick Sanan self.body.append(' ' * (len(token) - 1) + ' ') 134ee12ae39SPatrick Sanan else: 135ee12ae39SPatrick Sanan if self.in_mailto and self.settings.cloak_email_addresses: 136ee12ae39SPatrick Sanan encoded = self.cloak_email(encoded) 137ee12ae39SPatrick Sanan self.body.append(encoded) 138ee12ae39SPatrick Sanan 139ee12ae39SPatrick Sanan # This method consists mostly of code duplicated from Sphinx: 140ee12ae39SPatrick Sanan # overwritten 141ee12ae39SPatrick Sanan def visit_literal_block(self, node: Element) -> None: 142ee12ae39SPatrick Sanan if node.rawsource != node.astext(): 143ee12ae39SPatrick Sanan # most probably a parsed-literal block -- don't highlight 144ee12ae39SPatrick Sanan return super().visit_literal_block(node) 145ee12ae39SPatrick Sanan 146ee12ae39SPatrick Sanan lang = node.get('language', 'default') 147ee12ae39SPatrick Sanan linenos = node.get('linenos', False) 148ee12ae39SPatrick Sanan highlight_args = node.get('highlight_args', {}) 149ee12ae39SPatrick Sanan highlight_args['force'] = node.get('force', False) 150a9d4b3f8SPatrick Sanan opts = self.config.highlight_options.get(lang, {}) 151a9d4b3f8SPatrick Sanan 152a9d4b3f8SPatrick Sanan if linenos and self.config.html_codeblock_linenos_style: 153a9d4b3f8SPatrick Sanan linenos = self.config.html_codeblock_linenos_style 154ee12ae39SPatrick Sanan 155ee12ae39SPatrick Sanan highlighted = self.highlighter.highlight_block( 156ee12ae39SPatrick Sanan node.rawsource, lang, opts=opts, linenos=linenos, 157a9d4b3f8SPatrick Sanan location=node, **highlight_args 158ee12ae39SPatrick Sanan ) 159ee12ae39SPatrick Sanan 160ee12ae39SPatrick Sanan # Custom processing to add links to PETSc man pages #################### 161ee12ae39SPatrick Sanan highlighted = self._add_manpage_links(highlighted) 162ee12ae39SPatrick Sanan # (end of custom processing) ########################################### 163ee12ae39SPatrick Sanan 164a9d4b3f8SPatrick Sanan starttag = self.starttag(node, 'div', suffix='', 165a9d4b3f8SPatrick Sanan CLASS='highlight-%s notranslate' % lang) 166ee12ae39SPatrick Sanan self.body.append(starttag + highlighted + '</div>\n') 167ee12ae39SPatrick Sanan raise nodes.SkipNode 168ee12ae39SPatrick Sanan 169ee12ae39SPatrick Sanandef htmlmap_to_dict(htmlmap_filename: str) -> Dict[str,str]: 170ee12ae39SPatrick Sanan """ Extract a dict from an htmlmap file, leaving URLs as they are.""" 171ee12ae39SPatrick Sanan pattern = re.compile(r'man:\+([a-zA-Z_0-9]*)\+\+([a-zA-Z_0-9 .:]*)\+\+\+\+man\+([a-zA-Z_0-9#./:-]*)') 172ee12ae39SPatrick Sanan string_to_link = dict() 173ee12ae39SPatrick Sanan with open(htmlmap_filename, 'r') as f: 174ee12ae39SPatrick Sanan for line in f.readlines(): 175ee12ae39SPatrick Sanan m = re.match(pattern, line) 176ee12ae39SPatrick Sanan if m: 177ee12ae39SPatrick Sanan string = m.group(1) 178ee12ae39SPatrick Sanan string_to_link[string] = m.group(3) 179ee12ae39SPatrick Sanan else: 180ee12ae39SPatrick Sanan print("Warning: skipping unexpected line in " + htmlmap_filename + ":") 181ee12ae39SPatrick Sanan print(line) 182ee12ae39SPatrick Sanan return string_to_link 183ee12ae39SPatrick Sanan 184ee12ae39SPatrick Sanan 185ee12ae39SPatrick Sanandef dict_complete_links(string_to_link: Dict[str,str], prefix: str = '') -> Dict[str,str]: 186ee12ae39SPatrick Sanan """ Complete HTML links 187ee12ae39SPatrick Sanan 188*5becb6a3SPatrick Sanan Prepend a prefix to any links not starting with 'http'. 189*5becb6a3SPatrick Sanan Add HTML tags. 190ee12ae39SPatrick Sanan """ 191ee12ae39SPatrick Sanan def link_string(name: str, link: str, prefix: str) -> str: 192ee12ae39SPatrick Sanan url = link if link.startswith('http') else prefix + link 193ee12ae39SPatrick Sanan return '<a href=\"' + url + '\">' + name + '</a>' 194ee12ae39SPatrick Sanan return dict((k, link_string(k, v, prefix)) for (k, v) in string_to_link.items()) 195ee12ae39SPatrick Sanan 196ee12ae39SPatrick Sanan 197ee12ae39SPatrick Sanandef get_multiple_replace_pattern(source_dict: Dict[str,str]) -> re.Pattern: 198ee12ae39SPatrick Sanan """ Generate a regex to match any of the keys in source_dict, as full words """ 199ee12ae39SPatrick Sanan def process_word(word): 200ee12ae39SPatrick Sanan """ add escape characters and word boundaries """ 201ee12ae39SPatrick Sanan return r'\b' + re.escape(word) + r'\b' 202ee12ae39SPatrick Sanan return re.compile(r'|'.join(map(process_word, source_dict))) 203ee12ae39SPatrick Sanan 204ee12ae39SPatrick Sanan 205ee12ae39SPatrick Sanandef replace_from_dict_and_pattern(string: str, replacements: Dict, pattern: re.Pattern) -> str: 206ee12ae39SPatrick Sanan """ Given a pattern which matches keys in replacements, replace keys found in string with their values""" 207ee12ae39SPatrick Sanan return pattern.sub(lambda match: replacements[match.group(0)], string) 208