summaryrefslogtreecommitdiff
path: root/man-fixer.py
diff options
context:
space:
mode:
Diffstat (limited to 'man-fixer.py')
-rwxr-xr-xman-fixer.py109
1 files changed, 109 insertions, 0 deletions
diff --git a/man-fixer.py b/man-fixer.py
new file mode 100755
index 0000000000..b848da01c5
--- /dev/null
+++ b/man-fixer.py
@@ -0,0 +1,109 @@
+#!/bin/env python3
+
+# git ls-files -z | xargs -0 rg -l0 '[a-zA-Z0-9_.-]\([1457][a-zA-Z]*\)' | \
+# xargs -0 -P8 $PWD/man-fixer.py
+
+import os
+import re
+import stat
+import sys
+import tempfile
+
+CHANGES = {
+ '1M': '8',
+ '3B': '3UCB',
+ '3E': '3ELF',
+ '3G': '3GEN',
+ '3K': '3KSTAT',
+ '3N': '3SOCKET',
+ '3R': '3RT',
+ '3S': '3C',
+ '3T': '3THR',
+ '3X': '3CURSES',
+ '3XC': '3XCURSES',
+ '3XN': '33XNET',
+ '4': '5',
+ '5': '7',
+ '7': '4',
+ '7B': '4B',
+ '7D': '4D',
+ '7FS': '4FS',
+ '7I': '4I',
+ '7IPP': '4IPP',
+ '7M': '4M',
+ '7P': '4P'
+}
+
+# Take a list of manual pages in OpenIndiana and OmniOS which is a convenient
+# set to consider a strong enough hint of a manual page reference that we
+# should fix automatically rather than leave it for manual cleanup.
+#
+# Generated by:
+# pkg search -o path \
+# -s http://pkg.openindiana.org/hipster \
+# -s https://pkg.omnios.org/bloody/core \
+# -s https://pkg.omnios.org/bloody/extra \
+# 'path:*/man1m/* OR path:*/man[457]/* OR path:*/man7*/*'
+SHIPPED = set()
+with open(os.path.join(os.path.dirname(__file__), 'real-manpages.list')) as f:
+ for line in f:
+ (page, section) = os.path.splitext(os.path.basename(line.rstrip()))
+ SHIPPED.add(f'{page}({section[1:].upper()})')
+
+# group 0: The whole shebang
+# group 1 or group 2: The page name (sans fonts, if any)
+# group 3: The section
+REFERENCE = re.compile(r'(?:\\fB([a-zA-Z0-9_.-]+)\\fR|([a-zA-Z0-9_.-]+))\(([0-9][A-Za-z]*)\)')
+
+def match_to_ref(match):
+ return f'{match.group(1) or match.group(2)}({match_section(match)})'
+
+def match_section(match):
+ return match.group(3).upper()
+
+def match_section_start(match):
+ return match.start(3)
+
+def match_section_end(match):
+ return match.end(3)
+
+
+def main(path):
+ with tempfile.NamedTemporaryFile(dir=os.path.dirname(path),
+ mode='w+', errors='surrogateescape',
+ newline='') as tmp:
+ with open(path, errors='surrogateescape', newline='') as f:
+ for (lineno, line) in enumerate(f):
+ pos = 0
+
+ # We must loop rather than .finditer() because we change
+ # the position of subsequent matches should we match
+ while x := REFERENCE.search(line, pos):
+ inplace = False
+
+ if match_to_ref(x) in SHIPPED:
+ inplace = True
+
+ sec = match_section(x)
+ if sec in CHANGES:
+ if inplace:
+ line = line[:match_section_start(x)] + \
+ CHANGES[sec] + line[match_section_end(x):]
+ else:
+ print(f'{path}:{lineno + 1}: '
+ f'{match_to_ref(x)}: {line.rstrip()}')
+ pos = x.end(0)
+
+ tmp.write(line)
+
+ st = os.stat(path)
+ os.remove(path)
+ os.link(tmp.name, path)
+ os.chmod(path, stat.S_IMODE(st.st_mode))
+
+
+for path in sys.argv[1:]:
+ try:
+ main(path)
+ except UnicodeDecodeError:
+ print(f'{path}: not UTF-8')