summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCody Peter Mello <cody.mello@joyent.com>2016-04-09 23:16:01 +0000
committerRobert Mustacchi <rm@joyent.com>2016-06-20 21:42:21 -0700
commit71af3be340c57171837478555e2eb0d496318cfc (patch)
tree97cc3de2fcfbbd6a004ca6ccb1c54ba57e67690f
parentb13c8383ad6f847486face2fe8df9f0f50f068ed (diff)
downloadillumos-joyent-71af3be340c57171837478555e2eb0d496318cfc.tar.gz
7040 Detect common spelling errors in manual pages
Reviewed by: Marcel Telka <marcel@telka.sk> Approved by: Dan McDonald <danmcd@omniti.com>
-rw-r--r--usr/src/pkg/manifests/developer-build-onbld.mf2
-rw-r--r--usr/src/tools/onbld/Checks/Makefile2
-rw-r--r--usr/src/tools/onbld/Checks/SpellCheck.py284
-rw-r--r--usr/src/tools/onbld/Checks/__init__.py3
-rw-r--r--usr/src/tools/scripts/git-pbchk.py5
5 files changed, 293 insertions, 3 deletions
diff --git a/usr/src/pkg/manifests/developer-build-onbld.mf b/usr/src/pkg/manifests/developer-build-onbld.mf
index a02522c4ed..85d11480c2 100644
--- a/usr/src/pkg/manifests/developer-build-onbld.mf
+++ b/usr/src/pkg/manifests/developer-build-onbld.mf
@@ -168,6 +168,8 @@ file path=opt/onbld/lib/python2.6/onbld/Checks/Mapfile.py mode=0444
file path=opt/onbld/lib/python2.6/onbld/Checks/Mapfile.pyc mode=0444
file path=opt/onbld/lib/python2.6/onbld/Checks/ProcessCheck.py mode=0444
file path=opt/onbld/lib/python2.6/onbld/Checks/ProcessCheck.pyc mode=0444
+file path=opt/onbld/lib/python2.6/onbld/Checks/SpellCheck.py mode=0444
+file path=opt/onbld/lib/python2.6/onbld/Checks/SpellCheck.pyc mode=0444
file path=opt/onbld/lib/python2.6/onbld/Checks/__init__.py mode=0444
file path=opt/onbld/lib/python2.6/onbld/Checks/__init__.pyc mode=0444
file path=opt/onbld/lib/python2.6/onbld/Scm/Backup.py mode=0444
diff --git a/usr/src/tools/onbld/Checks/Makefile b/usr/src/tools/onbld/Checks/Makefile
index 413a580981..02f13b8351 100644
--- a/usr/src/tools/onbld/Checks/Makefile
+++ b/usr/src/tools/onbld/Checks/Makefile
@@ -25,6 +25,7 @@
# Copyright 2010, Richard Lowe
# Copyright 2014 Garrett D'Amore <garrett@damore.org>
+# Copyright 2016, Joyent, Inc.
include $(SRC)/Makefile.master
include ../../Makefile.tools
@@ -42,6 +43,7 @@ PYSRCS = \
ManLint.py \
Mapfile.py \
ProcessCheck.py \
+ SpellCheck.py \
__init__.py
PYOBJS = $(PYSRCS:%.py=%.pyc)
diff --git a/usr/src/tools/onbld/Checks/SpellCheck.py b/usr/src/tools/onbld/Checks/SpellCheck.py
new file mode 100644
index 0000000000..e8320243f1
--- /dev/null
+++ b/usr/src/tools/onbld/Checks/SpellCheck.py
@@ -0,0 +1,284 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2016 Joyent, Inc.
+#
+
+import re, sys
+
+spellMsg = '%s: Line %d contains "%s", a common misspelling of "%s"\n'
+altMsg = '%s: Lined %d contains "%s"; please use "%s" instead for consistency with other documentation\n'
+
+misspellings = {
+ 'absense': 'absence',
+ 'accessable': 'accessible',
+ 'accomodate': 'accommodate',
+ 'accomodation': 'accommodation',
+ 'accross': 'across',
+ 'acheive': 'achieve',
+ 'addional': 'additional',
+ 'addres': 'address',
+ 'admininistrative': 'administrative',
+ 'adminstered': 'administered',
+ 'adminstrate': 'administrate',
+ 'adminstration': 'administration',
+ 'adminstrative': 'administrative',
+ 'adminstrator': 'administrator',
+ 'admissability': 'admissibility',
+ 'adress': 'address',
+ 'adressable': 'addressable',
+ 'adressed': 'addressed',
+ 'adressing': 'addressing, dressing',
+ 'aginst': 'against',
+ 'agression': 'aggression',
+ 'agressive': 'aggressive',
+ 'alot': 'a lot, allot',
+ 'and and': 'and',
+ 'apparantly': 'apparently',
+ 'appearence': 'appearance',
+ 'arguement': 'argument',
+ 'assasination': 'assassination',
+ 'auxilliary': 'auxiliary',
+ 'basicly': 'basically',
+ 'begining': 'beginning',
+ 'belive': 'believe',
+ 'beteen': 'between',
+ 'betwen': 'between',
+ 'beween': 'between',
+ 'bewteen': 'between',
+ 'bizzare': 'bizarre',
+ 'buisness': 'business',
+ 'calender': 'calendar',
+ 'cemetary': 'cemetery',
+ 'chauffer': 'chauffeur',
+ 'collegue': 'colleague',
+ 'comming': 'coming',
+ 'commited': 'committed',
+ 'commitee': 'committee',
+ 'commiting': 'committing',
+ 'comparision': 'comparison',
+ 'comparisions': 'comparisons',
+ 'compatability': 'compatibility',
+ 'compatable': 'compatible',
+ 'compatablity': 'compatibility',
+ 'compatiable': 'compatible',
+ 'compatiblity': 'compatibility',
+ 'completly': 'completely',
+ 'concious': 'conscious',
+ 'condidtion': 'condition',
+ 'conected': 'connected',
+ 'conjuction': 'conjunction',
+ 'continous': 'continuous',
+ 'curiousity': 'curiosity',
+ 'deamon': 'daemon',
+ 'definately': 'definitely',
+ 'desireable': 'desirable',
+ 'diffrent': 'different',
+ 'dilemna': 'dilemma',
+ 'dissapear': 'disappear',
+ 'dissapoint': 'disappoint',
+ 'ecstacy': 'ecstasy',
+ 'embarass': 'embarrass',
+ 'enviroment': 'environment',
+ 'exept': 'except',
+ 'existance': 'existence',
+ 'familar': 'familiar',
+ 'finaly': 'finally',
+ 'folowing': 'following',
+ 'foriegn': 'foreign',
+ 'forseeable': 'foreseeable',
+ 'fourty': 'forty',
+ 'foward': 'forward',
+ 'freind': 'friend',
+ 'futher': 'further',
+ 'gaurd': 'guard',
+ 'glamourous': 'glamorous',
+ 'goverment': 'government',
+ 'happend': 'happened',
+ 'harrassment': 'harassment',
+ 'hierachical': 'hierarchical',
+ 'hierachies': 'hierarchies',
+ 'hierachy': 'hierarchy',
+ 'hierarcical': 'hierarchical',
+ 'hierarcy': 'hierarchy',
+ 'honourary': 'honorary',
+ 'humourous': 'humorous',
+ 'idiosyncracy': 'idiosyncrasy',
+ 'immediatly': 'immediately',
+ 'inaccessable': 'inaccessible',
+ 'inbetween': 'between',
+ 'incidently': 'incidentally',
+ 'independant': 'independent',
+ 'infomation': 'information',
+ 'interupt': 'interrupt',
+ 'intial': 'initial',
+ 'intially': 'initially',
+ 'irresistable': 'irresistible',
+ 'jist': 'gist',
+ 'knowlege': 'knowledge',
+ 'lenght': 'length',
+ 'liase': 'liaise',
+ 'liason': 'liaison',
+ 'libary': 'library',
+ 'maching': 'machine, marching, matching',
+ 'millenia': 'millennia',
+ 'millenium': 'millennium',
+ 'neccessary': 'necessary',
+ 'negotation': 'negotiation',
+ 'nontheless': 'nonetheless',
+ 'noticable': 'noticeable',
+ 'occassion': 'occasion',
+ 'occassional': 'occasional',
+ 'occassionally': 'occasionally',
+ 'occurance': 'occurrence',
+ 'occured': 'occurred',
+ 'occurence': 'occurrence',
+ 'occuring': 'occurring',
+ 'ommision': 'omission',
+ 'orginal': 'original',
+ 'orginally': 'originally',
+ 'pavillion': 'pavilion',
+ 'peice': 'piece',
+ 'persistant': 'persistent',
+ 'politican': 'politician',
+ 'posession': 'possession',
+ 'possiblity': 'possibility',
+ 'preceed': 'precede',
+ 'preceeded': 'preceded',
+ 'preceeding': 'preceding',
+ 'preceeds': 'precedes',
+ 'prefered': 'preferred',
+ 'prefering': 'preferring',
+ 'presense': 'presence',
+ 'proces': 'process',
+ 'propoganda': 'propaganda',
+ 'psuedo': 'pseudo',
+ 'publically': 'publicly',
+ 'realy': 'really',
+ 'reciept': 'receipt',
+ 'recieve': 'receive',
+ 'recieved': 'received',
+ 'reciever': 'receiver',
+ 'recievers': 'receivers',
+ 'recieves': 'receives',
+ 'recieving': 'receiving',
+ 'recomend': 'recommend',
+ 'recomended': 'recommended',
+ 'recomending': 'recommending',
+ 'recomends': 'recommends',
+ 'recurse': 'recur',
+ 'recurses': 'recurs',
+ 'recursing': 'recurring',
+ 'refered': 'referred',
+ 'refering': 'referring',
+ 'religous': 'religious',
+ 'rember': 'remember',
+ 'remeber': 'remember',
+ 'repetion': 'repetition',
+ 'reponsible': 'responsible',
+ 'resistence': 'resistance',
+ 'retreive': 'retrieve',
+ 'seige': 'siege',
+ 'sence': 'since',
+ 'seperate': 'separate',
+ 'seperated': 'separated',
+ 'seperately': 'separately',
+ 'seperates': 'separates',
+ 'similiar': 'similar',
+ 'somwhere': 'somewhere',
+ 'sould': 'could, should, sold, soul',
+ 'sturcture': 'structure',
+ 'succesful': 'successful',
+ 'succesfully': 'successfully',
+ 'successfull': 'successful',
+ 'sucessful': 'successful',
+ 'supercede': 'supersede',
+ 'supress': 'suppress',
+ 'supressed': 'suppressed',
+ 'suprise': 'surprise',
+ 'suprisingly': 'surprisingly',
+ 'sytem': 'system',
+ 'tendancy': 'tendency',
+ 'the the': 'the',
+ 'the these': 'these',
+ 'therefor': 'therefore',
+ 'threshhold': 'threshold',
+ 'tolerence': 'tolerance',
+ 'tommorow': 'tomorrow',
+ 'tommorrow': 'tomorrow',
+ 'tounge': 'tongue',
+ 'tranformed': 'transformed',
+ 'transfered': 'transferred',
+ 'truely': 'truly',
+ 'trustworthyness': 'trustworthiness',
+ 'unforseen': 'unforeseen',
+ 'unfortunatly': 'unfortunately',
+ 'unsuccessfull': 'unsuccessful',
+ 'untill': 'until',
+ 'upto': 'up to',
+ 'whereever': 'wherever',
+ 'wich': 'which',
+ 'wierd': 'weird',
+ 'wtih': 'with',
+}
+
+alternates = {
+}
+
+misspellingREs = []
+alternateREs = []
+
+for misspelling, correct in misspellings.iteritems():
+ regex = re.compile(r'\b%s\b' % (misspelling), re.IGNORECASE)
+ entry = (regex, misspelling, correct)
+ misspellingREs.append(entry)
+
+for alternate, correct in alternates.iteritems():
+ regex = re.compile(r'\b%s\b' % (alternate), re.IGNORECASE)
+ entry = (regex, alternate, correct)
+ alternateREs.append(entry)
+
+def check(errmsg, output, filename, line, lineno, entry):
+ if entry[0].search(line):
+ output.write(errmsg % (filename, lineno, entry[1], entry[2]))
+ return 1
+ else:
+ return 0
+
+def spellcheck(fh, filename=None, output=sys.stderr, **opts):
+ lineno = 1
+ ret = 0
+
+ if not filename:
+ filename = fh.name
+
+ fh.seek(0)
+ for line in fh:
+ for entry in misspellingREs:
+ ret |= check(spellMsg, output, filename, line,
+ lineno, entry)
+ for entry in alternateREs:
+ ret |= check(altMsg, output, filename, line,
+ lineno, entry)
+ lineno += 1
+
+ return ret
diff --git a/usr/src/tools/onbld/Checks/__init__.py b/usr/src/tools/onbld/Checks/__init__.py
index 7051b0c565..9fdd25d43c 100644
--- a/usr/src/tools/onbld/Checks/__init__.py
+++ b/usr/src/tools/onbld/Checks/__init__.py
@@ -41,4 +41,5 @@ __all__ = [
'JStyle',
'Keywords',
'ManLint',
- 'Mapfile']
+ 'Mapfile',
+ 'SpellCheck']
diff --git a/usr/src/tools/scripts/git-pbchk.py b/usr/src/tools/scripts/git-pbchk.py
index 92303f967f..8dea5a8785 100644
--- a/usr/src/tools/scripts/git-pbchk.py
+++ b/usr/src/tools/scripts/git-pbchk.py
@@ -48,7 +48,7 @@ sys.path.insert(2, os.path.join(os.path.dirname(__file__), ".."))
from onbld.Scm import Ignore
from onbld.Checks import Comments, Copyright, CStyle, HdrChk
-from onbld.Checks import JStyle, Keywords, ManLint, Mapfile
+from onbld.Checks import JStyle, Keywords, ManLint, Mapfile, SpellCheck
class GitError(Exception):
@@ -291,11 +291,12 @@ def jstyle(root, parent, flist, output):
def manlint(root, parent, flist, output):
ret = 0
- output.write("Man page format:\n")
+ output.write("Man page format/spelling:\n")
ManfileRE = re.compile(r'.*\.[0-9][a-z]*$', re.IGNORECASE)
for f in flist(lambda x: ManfileRE.match(x)):
fh = open(f, 'r')
ret |= ManLint.manlint(fh, output=output, picky=True)
+ ret |= SpellCheck.spellcheck(fh, output=output)
fh.close()
return ret