summaryrefslogtreecommitdiff
path: root/man-fixer.py
blob: b848da01c5eac9fabe6ba9e5ae14bf5cf6ecef29 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
#!/bin/env python3

# git ls-files -z | xargs -0 rg -l0 '[a-zA-Z0-9_.-]\([1457][a-zA-Z]*\)' | \
#     xargs -0 -P8 $PWD/man-fixer.py

import os
import re
import stat
import sys
import tempfile

CHANGES = {
    '1M': '8',
    '3B': '3UCB',
    '3E': '3ELF',
    '3G': '3GEN',
    '3K': '3KSTAT',
    '3N': '3SOCKET',
    '3R': '3RT',
    '3S': '3C',
    '3T': '3THR',
    '3X': '3CURSES',
    '3XC': '3XCURSES',
    '3XN': '33XNET',
    '4': '5',
    '5': '7',
    '7': '4',
    '7B': '4B',
    '7D': '4D',
    '7FS': '4FS',
    '7I': '4I',
    '7IPP': '4IPP',
    '7M': '4M',
    '7P': '4P'
}

# Take a list of manual pages in OpenIndiana and OmniOS which is a convenient
# set to consider a strong enough hint of a manual page reference that we
# should fix automatically rather than leave it for manual cleanup.
#
# Generated by:
#     pkg search -o path \
#         -s http://pkg.openindiana.org/hipster \
#         -s https://pkg.omnios.org/bloody/core \
#         -s https://pkg.omnios.org/bloody/extra \
#         'path:*/man1m/* OR path:*/man[457]/* OR path:*/man7*/*'
SHIPPED = set()
with open(os.path.join(os.path.dirname(__file__), 'real-manpages.list')) as f:
    for line in f:
        (page, section) = os.path.splitext(os.path.basename(line.rstrip()))
        SHIPPED.add(f'{page}({section[1:].upper()})')

# group 0: The whole shebang
# group 1 or group 2: The page name (sans fonts, if any)
# group 3: The section
REFERENCE = re.compile(r'(?:\\fB([a-zA-Z0-9_.-]+)\\fR|([a-zA-Z0-9_.-]+))\(([0-9][A-Za-z]*)\)')

def match_to_ref(match):
    return f'{match.group(1) or match.group(2)}({match_section(match)})'

def match_section(match):
    return match.group(3).upper()

def match_section_start(match):
    return match.start(3)

def match_section_end(match):
    return match.end(3)


def main(path):
    with tempfile.NamedTemporaryFile(dir=os.path.dirname(path),
                                     mode='w+', errors='surrogateescape',
                                     newline='') as tmp:
        with open(path, errors='surrogateescape', newline='') as f:
            for (lineno, line) in enumerate(f):
                pos = 0

                # We must loop rather than .finditer() because we change
                # the position of subsequent matches should we match
                while x := REFERENCE.search(line, pos):
                    inplace = False

                    if match_to_ref(x) in SHIPPED:
                        inplace = True

                    sec = match_section(x)
                    if sec in CHANGES:
                        if inplace:
                            line = line[:match_section_start(x)] + \
                                CHANGES[sec] + line[match_section_end(x):]
                        else:
                            print(f'{path}:{lineno + 1}: '
                                  f'{match_to_ref(x)}: {line.rstrip()}')
                    pos = x.end(0)

                tmp.write(line)

        st = os.stat(path)
        os.remove(path)
        os.link(tmp.name, path)
        os.chmod(path, stat.S_IMODE(st.st_mode))


for path in sys.argv[1:]:
    try:
        main(path)
    except UnicodeDecodeError:
        print(f'{path}: not UTF-8')