summaryrefslogtreecommitdiff
path: root/pkgtools/pkglint/files/mklexer.go
diff options
context:
space:
mode:
Diffstat (limited to 'pkgtools/pkglint/files/mklexer.go')
-rw-r--r--pkgtools/pkglint/files/mklexer.go414
1 files changed, 414 insertions, 0 deletions
diff --git a/pkgtools/pkglint/files/mklexer.go b/pkgtools/pkglint/files/mklexer.go
new file mode 100644
index 00000000000..92c89a2e3b1
--- /dev/null
+++ b/pkgtools/pkglint/files/mklexer.go
@@ -0,0 +1,414 @@
+package pkglint
+
+import (
+ "netbsd.org/pkglint/regex"
+ "netbsd.org/pkglint/textproc"
+ "strings"
+)
+
+// MkLexer splits a text into a sequence of variable uses
+// and plain text.
+type MkLexer struct {
+ lexer *textproc.Lexer
+ line *Line
+}
+
+func NewMkLexer(text string, line *Line) *MkLexer {
+ return &MkLexer{textproc.NewLexer(text), line}
+}
+
+// MkTokens splits a text like in the following example:
+// Text${VAR:Mmodifier}${VAR2}more text${VAR3}
+// into tokens like these:
+// Text
+// ${VAR:Mmodifier}
+// ${VAR2}
+// more text
+// ${VAR3}
+func (p *MkLexer) MkTokens() ([]*MkToken, string) {
+ lexer := p.lexer
+
+ var tokens []*MkToken
+ for !lexer.EOF() {
+ mark := lexer.Mark()
+ if varuse := p.VarUse(); varuse != nil {
+ tokens = append(tokens, &MkToken{Text: lexer.Since(mark), Varuse: varuse})
+ continue
+ }
+
+ for lexer.NextBytesFunc(func(b byte) bool { return b != '$' }) != "" || lexer.SkipString("$$") {
+ }
+ text := lexer.Since(mark)
+ if text != "" {
+ tokens = append(tokens, &MkToken{Text: text})
+ continue
+ }
+
+ break
+ }
+ return tokens, lexer.Rest()
+}
+
+func (p *MkLexer) VarUse() *MkVarUse {
+ rest := p.lexer.Rest()
+ if len(rest) < 2 || rest[0] != '$' {
+ return nil
+ }
+
+ switch rest[1] {
+ case '{', '(':
+ return p.varUseBrace(rest[1] == '(')
+
+ case '$':
+ // This is an escaped dollar character and not a variable use.
+ return nil
+
+ case '@', '<', ' ':
+ // These variable names are known to exist.
+ //
+ // Many others are also possible but not used in practice.
+ // In particular, when parsing the :C or :S modifier,
+ // the $ must not be interpreted as a variable name,
+ // even when it looks like $/ could refer to the "/" variable.
+ //
+ // TODO: Find out whether $" is a variable use when it appears in the :M modifier.
+ p.lexer.Skip(2)
+ return &MkVarUse{rest[1:2], nil}
+
+ default:
+ return p.varUseAlnum()
+ }
+}
+
+// varUseBrace parses:
+// ${VAR}
+// ${arbitrary text:L}
+// ${variable with invalid chars}
+// $(PARENTHESES)
+// ${VAR:Mpattern:C,:,colon,g:Q:Q:Q}
+func (p *MkLexer) varUseBrace(usingRoundParen bool) *MkVarUse {
+ lexer := p.lexer
+
+ beforeDollar := lexer.Mark()
+ lexer.Skip(2)
+
+ closing := byte('}')
+ if usingRoundParen {
+ closing = ')'
+ }
+
+ beforeVarname := lexer.Mark()
+ varname := p.Varname()
+ p.varUseText(closing)
+ varExpr := lexer.Since(beforeVarname)
+
+ modifiers := p.VarUseModifiers(varExpr, closing)
+
+ closed := lexer.SkipByte(closing)
+
+ if p.line != nil {
+ if !closed {
+ p.line.Warnf("Missing closing %q for %q.", string(rune(closing)), varExpr)
+ }
+
+ if usingRoundParen && closed {
+ parenVaruse := lexer.Since(beforeDollar)
+ edit := []byte(parenVaruse)
+ edit[1] = '{'
+ edit[len(edit)-1] = '}'
+ bracesVaruse := string(edit)
+
+ fix := p.line.Autofix()
+ fix.Warnf("Please use curly braces {} instead of round parentheses () for %s.", varExpr)
+ fix.Replace(parenVaruse, bracesVaruse)
+ fix.Apply()
+ }
+
+ if len(varExpr) > len(varname) && !(&MkVarUse{varExpr, modifiers}).IsExpression() {
+ p.line.Warnf("Invalid part %q after variable name %q.", varExpr[len(varname):], varname)
+ }
+ }
+
+ return &MkVarUse{varExpr, modifiers}
+}
+
+func (p *MkLexer) Varname() string {
+ lexer := p.lexer
+
+ // TODO: duplicated code in MatchVarassign
+ mark := lexer.Mark()
+ lexer.SkipByte('.')
+ for lexer.NextBytesSet(VarbaseBytes) != "" || p.VarUse() != nil {
+ }
+ if lexer.SkipByte('.') || hasPrefix(lexer.Since(mark), "SITES_") {
+ for lexer.NextBytesSet(VarparamBytes) != "" || p.VarUse() != nil {
+ }
+ }
+ return lexer.Since(mark)
+}
+
+// varUseText parses any text up to the next colon or closing mark.
+// Nested variable uses are parsed as well.
+//
+// This is used for the :L and :? modifiers since they accept arbitrary
+// text as the "variable name" and effectively interpret it as the variable
+// value instead.
+func (p *MkLexer) varUseText(closing byte) string {
+ lexer := p.lexer
+ start := lexer.Mark()
+ re := regcomp(regex.Pattern(condStr(closing == '}', `^([^$:}]|\$\$)+`, `^([^$:)]|\$\$)+`)))
+ for p.VarUse() != nil || lexer.SkipRegexp(re) {
+ }
+ return lexer.Since(start)
+}
+
+// VarUseModifiers parses the modifiers of a variable being used, such as :Q, :Mpattern.
+//
+// See the bmake manual page.
+func (p *MkLexer) VarUseModifiers(varname string, closing byte) []MkVarUseModifier {
+ lexer := p.lexer
+
+ var modifiers []MkVarUseModifier
+ // The :S and :C modifiers may be chained without using the : as separator.
+ mayOmitColon := false
+
+ for lexer.SkipByte(':') || mayOmitColon {
+ modifier := p.varUseModifier(varname, closing)
+ if modifier != "" {
+ modifiers = append(modifiers, MkVarUseModifier{modifier})
+ }
+ mayOmitColon = modifier != "" && (modifier[0] == 'S' || modifier[0] == 'C')
+ }
+ return modifiers
+}
+
+// varUseModifier parses a single variable modifier such as :Q or :S,from,to,.
+// The actual parsing starts after the leading colon.
+func (p *MkLexer) varUseModifier(varname string, closing byte) string {
+ lexer := p.lexer
+ mark := lexer.Mark()
+
+ switch lexer.PeekByte() {
+ case 'E', 'H', 'L', 'O', 'Q', 'R', 'T', 's', 't', 'u':
+ mod := lexer.NextBytesSet(textproc.Alnum)
+
+ switch mod {
+ case
+ "E", // Extension, e.g. path/file.suffix => suffix
+ "H", // Head, e.g. dir/subdir/file.suffix => dir/subdir
+ "L", // XXX: Shouldn't this be handled specially?
+ "O", // Order alphabetically
+ "Ox", // Shuffle
+ "Q", // Quote shell meta-characters
+ "R", // Strip the file suffix, e.g. path/file.suffix => file
+ "T", // Basename, e.g. path/file.suffix => file.suffix
+ "sh", // Evaluate the variable value as shell command
+ "tA", // Try to convert to absolute path
+ "tW", // Causes the value to be treated as a single word
+ "tl", // To lowercase
+ "tu", // To uppercase
+ "tw", // Causes the value to be treated as list of words
+ "u": // Remove adjacent duplicate words (like uniq(1))
+ return mod
+ }
+
+ if hasPrefix(mod, "ts") {
+ // See devel/bmake/files/var.c:/case 't'
+ sep := mod[2:] + p.varUseText(closing)
+ switch {
+ case sep == "":
+ lexer.SkipString(":")
+ case len(sep) == 1:
+ break
+ case matches(sep, `^\\\d+`):
+ break
+ default:
+ if p.line != nil {
+ p.line.Warnf("Invalid separator %q for :ts modifier of %q.", sep, varname)
+ p.line.Explain(
+ "The separator for the :ts modifier must be either a single character",
+ "or an escape sequence like \\t or \\n or an octal or decimal escape",
+ "sequence; see the bmake man page for further details.")
+ }
+ }
+ return lexer.Since(mark)
+ }
+
+ case '=', 'D', 'M', 'N', 'U':
+ lexer.Skip(1)
+ re := regcomp(regex.Pattern(condStr(closing == '}', `^([^$:\\}]|\$\$|\\.)+`, `^([^$:\\)]|\$\$|\\.)+`)))
+ for p.VarUse() != nil || lexer.SkipRegexp(re) {
+ }
+ arg := lexer.Since(mark)
+ return strings.Replace(arg, "\\:", ":", -1)
+
+ case 'C', 'S':
+ if ok, _, _, _, _ := p.varUseModifierSubst(closing); ok {
+ return lexer.Since(mark)
+ }
+
+ case '@':
+ if p.varUseModifierAt(lexer, varname) {
+ return lexer.Since(mark)
+ }
+
+ case '[':
+ if lexer.SkipRegexp(regcomp(`^\[(?:[-.\d]+|#)\]`)) {
+ return lexer.Since(mark)
+ }
+
+ case '?':
+ lexer.Skip(1)
+ p.varUseText(closing)
+ if lexer.SkipByte(':') {
+ p.varUseText(closing)
+ return lexer.Since(mark)
+ }
+ }
+
+ lexer.Reset(mark)
+
+ re := regcomp(regex.Pattern(condStr(closing == '}', `^([^:$}]|\$\$)+`, `^([^:$)]|\$\$)+`)))
+ for p.VarUse() != nil || lexer.SkipRegexp(re) {
+ }
+ modifier := lexer.Since(mark)
+
+ // ${SOURCES:%.c=%.o} or ${:!uname -a!:[2]}
+ if contains(modifier, "=") || (hasPrefix(modifier, "!") && hasSuffix(modifier, "!")) {
+ return modifier
+ }
+
+ if p.line != nil && modifier != "" {
+ p.line.Warnf("Invalid variable modifier %q for %q.", modifier, varname)
+ }
+
+ return ""
+}
+
+// varUseModifierSubst parses a :S,from,to, or a :C,from,to, modifier.
+func (p *MkLexer) varUseModifierSubst(closing byte) (ok bool, regex bool, from string, to string, options string) {
+ lexer := p.lexer
+ regex = lexer.PeekByte() == 'C'
+ lexer.Skip(1 /* the initial S or C */)
+
+ sep := lexer.PeekByte() // bmake allows _any_ separator, even letters.
+ if sep == -1 || byte(sep) == closing {
+ return
+ }
+
+ lexer.Skip(1)
+ separator := byte(sep)
+
+ unescape := func(s string) string {
+ return strings.Replace(s, "\\"+string(separator), string(separator), -1)
+ }
+
+ isOther := func(b byte) bool {
+ return b != separator && b != '$' && b != '\\'
+ }
+
+ skipOther := func() {
+ for {
+ switch {
+
+ case p.VarUse() != nil:
+ break
+
+ case lexer.SkipString("$$"):
+ break
+
+ case len(lexer.Rest()) >= 2 && lexer.PeekByte() == '\\' && separator != '\\':
+ _ = lexer.Skip(2)
+
+ case lexer.NextBytesFunc(isOther) != "":
+ break
+
+ default:
+ return
+ }
+ }
+ }
+
+ fromStart := lexer.Mark()
+ lexer.SkipByte('^')
+ skipOther()
+ lexer.SkipByte('$')
+ from = unescape(lexer.Since(fromStart))
+
+ if !lexer.SkipByte(separator) {
+ return
+ }
+
+ toStart := lexer.Mark()
+ skipOther()
+ to = unescape(lexer.Since(toStart))
+
+ if !lexer.SkipByte(separator) {
+ return
+ }
+
+ optionsStart := lexer.Mark()
+ lexer.NextBytesFunc(func(b byte) bool { return b == '1' || b == 'g' || b == 'W' })
+ options = lexer.Since(optionsStart)
+
+ ok = true
+ return
+}
+
+// varUseModifierAt parses a variable modifier like ":@v@echo ${v};@",
+// which expands the variable value in a loop.
+func (p *MkLexer) varUseModifierAt(lexer *textproc.Lexer, varname string) bool {
+ lexer.Skip(1 /* the initial @ */)
+
+ loopVar := lexer.NextBytesSet(AlnumDot)
+ if loopVar == "" || !lexer.SkipByte('@') {
+ return false
+ }
+
+ re := regcomp(`^([^$@\\]|\\.)+`)
+ for p.VarUse() != nil || lexer.SkipString("$$") || lexer.SkipRegexp(re) {
+ }
+
+ if !lexer.SkipByte('@') && p.line != nil {
+ p.line.Warnf("Modifier ${%s:@%s@...@} is missing the final \"@\".", varname, loopVar)
+ }
+
+ return true
+}
+
+func (p *MkLexer) varUseAlnum() *MkVarUse {
+ lexer := p.lexer
+
+ apparentVarname := textproc.NewLexer(lexer.Rest()[1:]).NextBytesSet(textproc.AlnumU)
+ if apparentVarname == "" {
+ return nil
+ }
+
+ lexer.Skip(2)
+
+ if p.line != nil {
+ if len(apparentVarname) > 1 {
+ p.line.Errorf("$%[1]s is ambiguous. Use ${%[1]s} if you mean a Make variable or $$%[1]s if you mean a shell variable.",
+ apparentVarname)
+ p.line.Explain(
+ "Only the first letter after the dollar is the variable name.",
+ "Everything following it is normal text, even if it looks like a variable name to human readers.")
+ } else {
+ p.line.Warnf("$%[1]s is ambiguous. Use ${%[1]s} if you mean a Make variable or $$%[1]s if you mean a shell variable.", apparentVarname)
+ p.line.Explain(
+ "In its current form, this variable is parsed as a Make variable.",
+ "For human readers though, $x looks more like a shell variable than a Make variable,",
+ "since Make variables are usually written using braces (BSD-style) or parentheses (GNU-style).")
+ }
+ }
+
+ return &MkVarUse{apparentVarname[:1], nil}
+}
+
+func (p *MkLexer) EOF() bool {
+ return p.lexer.EOF()
+}
+
+func (p *MkLexer) Rest() string {
+ return p.lexer.Rest()
+}