diff options
Diffstat (limited to 'man-fixer.py')
-rwxr-xr-x | man-fixer.py | 109 |
1 files changed, 109 insertions, 0 deletions
diff --git a/man-fixer.py b/man-fixer.py new file mode 100755 index 0000000000..b848da01c5 --- /dev/null +++ b/man-fixer.py @@ -0,0 +1,109 @@ +#!/bin/env python3 + +# git ls-files -z | xargs -0 rg -l0 '[a-zA-Z0-9_.-]\([1457][a-zA-Z]*\)' | \ +# xargs -0 -P8 $PWD/man-fixer.py + +import os +import re +import stat +import sys +import tempfile + +CHANGES = { + '1M': '8', + '3B': '3UCB', + '3E': '3ELF', + '3G': '3GEN', + '3K': '3KSTAT', + '3N': '3SOCKET', + '3R': '3RT', + '3S': '3C', + '3T': '3THR', + '3X': '3CURSES', + '3XC': '3XCURSES', + '3XN': '33XNET', + '4': '5', + '5': '7', + '7': '4', + '7B': '4B', + '7D': '4D', + '7FS': '4FS', + '7I': '4I', + '7IPP': '4IPP', + '7M': '4M', + '7P': '4P' +} + +# Take a list of manual pages in OpenIndiana and OmniOS which is a convenient +# set to consider a strong enough hint of a manual page reference that we +# should fix automatically rather than leave it for manual cleanup. +# +# Generated by: +# pkg search -o path \ +# -s http://pkg.openindiana.org/hipster \ +# -s https://pkg.omnios.org/bloody/core \ +# -s https://pkg.omnios.org/bloody/extra \ +# 'path:*/man1m/* OR path:*/man[457]/* OR path:*/man7*/*' +SHIPPED = set() +with open(os.path.join(os.path.dirname(__file__), 'real-manpages.list')) as f: + for line in f: + (page, section) = os.path.splitext(os.path.basename(line.rstrip())) + SHIPPED.add(f'{page}({section[1:].upper()})') + +# group 0: The whole shebang +# group 1 or group 2: The page name (sans fonts, if any) +# group 3: The section +REFERENCE = re.compile(r'(?:\\fB([a-zA-Z0-9_.-]+)\\fR|([a-zA-Z0-9_.-]+))\(([0-9][A-Za-z]*)\)') + +def match_to_ref(match): + return f'{match.group(1) or match.group(2)}({match_section(match)})' + +def match_section(match): + return match.group(3).upper() + +def match_section_start(match): + return match.start(3) + +def match_section_end(match): + return match.end(3) + + +def main(path): + with tempfile.NamedTemporaryFile(dir=os.path.dirname(path), + mode='w+', errors='surrogateescape', + newline='') as tmp: + with open(path, errors='surrogateescape', newline='') as f: + for (lineno, line) in enumerate(f): + pos = 0 + + # We must loop rather than .finditer() because we change + # the position of subsequent matches should we match + while x := REFERENCE.search(line, pos): + inplace = False + + if match_to_ref(x) in SHIPPED: + inplace = True + + sec = match_section(x) + if sec in CHANGES: + if inplace: + line = line[:match_section_start(x)] + \ + CHANGES[sec] + line[match_section_end(x):] + else: + print(f'{path}:{lineno + 1}: ' + f'{match_to_ref(x)}: {line.rstrip()}') + pos = x.end(0) + + tmp.write(line) + + st = os.stat(path) + os.remove(path) + os.link(tmp.name, path) + os.chmod(path, stat.S_IMODE(st.st_mode)) + + +for path in sys.argv[1:]: + try: + main(path) + except UnicodeDecodeError: + print(f'{path}: not UTF-8') |