#!/bin/env python3 # git ls-files -z | xargs -0 rg -l0 '[a-zA-Z0-9_.-]\([1457][a-zA-Z]*\)' | \ # xargs -0 -P8 $PWD/man-fixer.py import os import re import stat import sys import tempfile CHANGES = { '1M': '8', '3B': '3UCB', '3E': '3ELF', '3G': '3GEN', '3K': '3KSTAT', '3N': '3SOCKET', '3R': '3RT', '3S': '3C', '3T': '3THR', '3X': '3CURSES', '3XC': '3XCURSES', '3XN': '33XNET', '4': '5', '5': '7', '7': '4', '7B': '4B', '7D': '4D', '7FS': '4FS', '7I': '4I', '7IPP': '4IPP', '7M': '4M', '7P': '4P' } # Take a list of manual pages in OpenIndiana and OmniOS which is a convenient # set to consider a strong enough hint of a manual page reference that we # should fix automatically rather than leave it for manual cleanup. # # Generated by: # pkg search -o path \ # -s http://pkg.openindiana.org/hipster \ # -s https://pkg.omnios.org/bloody/core \ # -s https://pkg.omnios.org/bloody/extra \ # 'path:*/man1m/* OR path:*/man[457]/* OR path:*/man7*/*' SHIPPED = set() with open(os.path.join(os.path.dirname(__file__), 'real-manpages.list')) as f: for line in f: (page, section) = os.path.splitext(os.path.basename(line.rstrip())) SHIPPED.add(f'{page}({section[1:].upper()})') # group 0: The whole shebang # group 1 or group 2: The page name (sans fonts, if any) # group 3: The section REFERENCE = re.compile(r'(?:\\fB([a-zA-Z0-9_.-]+)\\fR|([a-zA-Z0-9_.-]+))\(([0-9][A-Za-z]*)\)') def match_to_ref(match): return f'{match.group(1) or match.group(2)}({match_section(match)})' def match_section(match): return match.group(3).upper() def match_section_start(match): return match.start(3) def match_section_end(match): return match.end(3) def main(path): with tempfile.NamedTemporaryFile(dir=os.path.dirname(path), mode='w+', errors='surrogateescape', newline='') as tmp: with open(path, errors='surrogateescape', newline='') as f: for (lineno, line) in enumerate(f): pos = 0 # We must loop rather than .finditer() because we change # the position of subsequent matches should we match while x := REFERENCE.search(line, pos): inplace = False if match_to_ref(x) in SHIPPED: inplace = True sec = match_section(x) if sec in CHANGES: if inplace: line = line[:match_section_start(x)] + \ CHANGES[sec] + line[match_section_end(x):] else: print(f'{path}:{lineno + 1}: ' f'{match_to_ref(x)}: {line.rstrip()}') pos = x.end(0) tmp.write(line) st = os.stat(path) os.remove(path) os.link(tmp.name, path) os.chmod(path, stat.S_IMODE(st.st_mode)) for path in sys.argv[1:]: try: main(path) except UnicodeDecodeError: print(f'{path}: not UTF-8')