diff options
| author | Ondřej Surý <ondrej@sury.org> | 2012-04-06 15:14:11 +0200 | 
|---|---|---|
| committer | Ondřej Surý <ondrej@sury.org> | 2012-04-06 15:14:11 +0200 | 
| commit | 505c19580e0f43fe5224431459cacb7c21edd93d (patch) | |
| tree | 79e2634c253d60afc0cc0b2f510dc7dcbb48497b /src/pkg/exp/regexp/syntax/compile.go | |
| parent | 1336a7c91e596c423a49d1194ea42d98bca0d958 (diff) | |
| download | golang-505c19580e0f43fe5224431459cacb7c21edd93d.tar.gz | |
Imported Upstream version 1upstream/1
Diffstat (limited to 'src/pkg/exp/regexp/syntax/compile.go')
| -rw-r--r-- | src/pkg/exp/regexp/syntax/compile.go | 269 | 
1 files changed, 0 insertions, 269 deletions
| diff --git a/src/pkg/exp/regexp/syntax/compile.go b/src/pkg/exp/regexp/syntax/compile.go deleted file mode 100644 index 5ea2425c3..000000000 --- a/src/pkg/exp/regexp/syntax/compile.go +++ /dev/null @@ -1,269 +0,0 @@ -package syntax - -import ( -	"os" -	"unicode" -) - -// A patchList is a list of instruction pointers that need to be filled in (patched). -// Because the pointers haven't been filled in yet, we can reuse their storage -// to hold the list.  It's kind of sleazy, but works well in practice. -// See http://swtch.com/~rsc/regexp/regexp1.html for inspiration. -//  -// These aren't really pointers: they're integers, so we can reinterpret them -// this way without using package unsafe.  A value l denotes -// p.inst[l>>1].Out (l&1==0) or .Arg (l&1==1).  -// l == 0 denotes the empty list, okay because we start every program -// with a fail instruction, so we'll never want to point at its output link. -type patchList uint32 - -func (l patchList) next(p *Prog) patchList { -	i := &p.Inst[l>>1] -	if l&1 == 0 { -		return patchList(i.Out) -	} -	return patchList(i.Arg) -} - -func (l patchList) patch(p *Prog, val uint32) { -	for l != 0 { -		i := &p.Inst[l>>1] -		if l&1 == 0 { -			l = patchList(i.Out) -			i.Out = val -		} else { -			l = patchList(i.Arg) -			i.Arg = val -		} -	} -} - -func (l1 patchList) append(p *Prog, l2 patchList) patchList { -	if l1 == 0 { -		return l2 -	} -	if l2 == 0 { -		return l1 -	} - -	last := l1 -	for { -		next := last.next(p) -		if next == 0 { -			break -		} -		last = next -	} - -	i := &p.Inst[last>>1] -	if last&1 == 0 { -		i.Out = uint32(l2) -	} else { -		i.Arg = uint32(l2) -	} -	return l1 -} - -// A frag represents a compiled program fragment. -type frag struct { -	i   uint32    // index of first instruction -	out patchList // where to record end instruction -} - -type compiler struct { -	p *Prog -} - -// Compile compiles the regexp into a program to be executed. -func Compile(re *Regexp) (*Prog, os.Error) { -	var c compiler -	c.init() -	f := c.compile(re) -	f.out.patch(c.p, c.inst(InstMatch).i) -	c.p.Start = int(f.i) -	return c.p, nil -} - -func (c *compiler) init() { -	c.p = new(Prog) -	c.p.NumCap = 2 // implicit ( and ) for whole match $0 -	c.inst(InstFail) -} - -var anyRuneNotNL = []int{0, '\n' - 1, '\n' - 1, unicode.MaxRune} -var anyRune = []int{0, unicode.MaxRune} - -func (c *compiler) compile(re *Regexp) frag { -	switch re.Op { -	case OpNoMatch: -		return c.fail() -	case OpEmptyMatch: -		return c.nop() -	case OpLiteral: -		if len(re.Rune) == 0 { -			return c.nop() -		} -		var f frag -		for j := range re.Rune { -			f1 := c.rune(re.Rune[j : j+1]) -			if j == 0 { -				f = f1 -			} else { -				f = c.cat(f, f1) -			} -		} -		return f -	case OpCharClass: -		return c.rune(re.Rune) -	case OpAnyCharNotNL: -		return c.rune(anyRuneNotNL) -	case OpAnyChar: -		return c.rune(anyRune) -	case OpBeginLine: -		return c.empty(EmptyBeginLine) -	case OpEndLine: -		return c.empty(EmptyEndLine) -	case OpBeginText: -		return c.empty(EmptyBeginText) -	case OpEndText: -		return c.empty(EmptyEndText) -	case OpWordBoundary: -		return c.empty(EmptyWordBoundary) -	case OpNoWordBoundary: -		return c.empty(EmptyNoWordBoundary) -	case OpCapture: -		bra := c.cap(uint32(re.Cap << 1)) -		sub := c.compile(re.Sub[0]) -		ket := c.cap(uint32(re.Cap<<1 | 1)) -		return c.cat(c.cat(bra, sub), ket) -	case OpStar: -		return c.star(c.compile(re.Sub[0]), re.Flags&NonGreedy != 0) -	case OpPlus: -		return c.plus(c.compile(re.Sub[0]), re.Flags&NonGreedy != 0) -	case OpQuest: -		return c.quest(c.compile(re.Sub[0]), re.Flags&NonGreedy != 0) -	case OpConcat: -		if len(re.Sub) == 0 { -			return c.nop() -		} -		var f frag -		for i, sub := range re.Sub { -			if i == 0 { -				f = c.compile(sub) -			} else { -				f = c.cat(f, c.compile(sub)) -			} -		} -		return f -	case OpAlternate: -		var f frag -		for _, sub := range re.Sub { -			f = c.alt(f, c.compile(sub)) -		} -		return f -	} -	panic("regexp: unhandled case in compile") -} - -func (c *compiler) inst(op InstOp) frag { -	// TODO: impose length limit -	f := frag{i: uint32(len(c.p.Inst))} -	c.p.Inst = append(c.p.Inst, Inst{Op: op}) -	return f -} - -func (c *compiler) nop() frag { -	f := c.inst(InstNop) -	f.out = patchList(f.i << 1) -	return f -} - -func (c *compiler) fail() frag { -	return frag{} -} - -func (c *compiler) cap(arg uint32) frag { -	f := c.inst(InstCapture) -	f.out = patchList(f.i << 1) -	c.p.Inst[f.i].Arg = arg - -	if c.p.NumCap < int(arg)+1 { -		c.p.NumCap = int(arg) + 1 -	} -	return f -} - -func (c *compiler) cat(f1, f2 frag) frag { -	// concat of failure is failure -	if f1.i == 0 || f2.i == 0 { -		return frag{} -	} - -	// TODO: elide nop - -	f1.out.patch(c.p, f2.i) -	return frag{f1.i, f2.out} -} - -func (c *compiler) alt(f1, f2 frag) frag { -	// alt of failure is other -	if f1.i == 0 { -		return f2 -	} -	if f2.i == 0 { -		return f1 -	} - -	f := c.inst(InstAlt) -	i := &c.p.Inst[f.i] -	i.Out = f1.i -	i.Arg = f2.i -	f.out = f1.out.append(c.p, f2.out) -	return f -} - -func (c *compiler) quest(f1 frag, nongreedy bool) frag { -	f := c.inst(InstAlt) -	i := &c.p.Inst[f.i] -	if nongreedy { -		i.Arg = f1.i -		f.out = patchList(f.i << 1) -	} else { -		i.Out = f1.i -		f.out = patchList(f.i<<1 | 1) -	} -	f.out = f.out.append(c.p, f1.out) -	return f -} - -func (c *compiler) star(f1 frag, nongreedy bool) frag { -	f := c.inst(InstAlt) -	i := &c.p.Inst[f.i] -	if nongreedy { -		i.Arg = f1.i -		f.out = patchList(f.i << 1) -	} else { -		i.Out = f1.i -		f.out = patchList(f.i<<1 | 1) -	} -	f1.out.patch(c.p, f.i) -	return f -} - -func (c *compiler) plus(f1 frag, nongreedy bool) frag { -	return frag{f1.i, c.star(f1, nongreedy).out} -} - -func (c *compiler) empty(op EmptyOp) frag { -	f := c.inst(InstEmptyWidth) -	c.p.Inst[f.i].Arg = uint32(op) -	f.out = patchList(f.i << 1) -	return f -} - -func (c *compiler) rune(rune []int) frag { -	f := c.inst(InstRune) -	c.p.Inst[f.i].Rune = rune -	f.out = patchList(f.i << 1) -	return f -} | 
