diff options
author | Rob Pike <r@golang.org> | 2009-08-05 13:03:46 -0700 |
---|---|---|
committer | Rob Pike <r@golang.org> | 2009-08-05 13:03:46 -0700 |
commit | 0d573960f4c0c12f8103a86922927f7f386b639f (patch) | |
tree | 585c1e62c5d4d50530ffa6b724c0f90dc033f4d1 /test/bench/regex-dna.go | |
parent | c8b49a4532508beea09fe32b8ff2ce8670e88652 (diff) | |
download | golang-0d573960f4c0c12f8103a86922927f7f386b639f.tar.gz |
regex-dna
R=rsc
DELTA=243 (242 added, 0 deleted, 1 changed)
OCL=32786
CL=32791
Diffstat (limited to 'test/bench/regex-dna.go')
-rw-r--r-- | test/bench/regex-dna.go | 117 |
1 files changed, 117 insertions, 0 deletions
diff --git a/test/bench/regex-dna.go b/test/bench/regex-dna.go new file mode 100644 index 000000000..c0ade94e7 --- /dev/null +++ b/test/bench/regex-dna.go @@ -0,0 +1,117 @@ +/* +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of "The Computer Language Benchmarks Game" nor the + name of "The Computer Language Shootout Benchmarks" nor the names of + its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +*/ + +/* The Computer Language Benchmarks Game + * http://shootout.alioth.debian.org/ + * + * contributed by The Go Authors. + */ + +package main + +import ( + "fmt"; + "io"; + "os"; + "regexp"; +) + +func compile(s string) *regexp.Regexp { + r, err := regexp.Compile(s); + if err != nil { + fmt.Fprintf(os.Stderr, "can't compile pattern %q: %s\n", s, err); + os.Exit(2); + } + return r; +} + +var variants = []string { + "agggtaaa|tttaccct", + "[cgt]gggtaaa|tttaccc[acg]", + "a[act]ggtaaa|tttacc[agt]t", + "ag[act]gtaaa|tttac[agt]ct", + "agg[act]taaa|ttta[agt]cct", + "aggg[acg]aaa|ttt[cgt]ccct", + "agggt[cgt]aa|tt[acg]accct", + "agggta[cgt]a|t[acg]taccct", + "agggtaa[cgt]|[acg]ttaccct", +} + +type Subst struct { + pat, repl string +} + +var substs = [] Subst { + Subst {"B", "(c|g|t)"}, + Subst {"D", "(a|g|t)"}, + Subst {"H", "(a|c|t)"}, + Subst {"K", "(g|t)"}, + Subst {"M", "(a|c)"}, + Subst {"N", "(a|c|g|t)"}, + Subst {"R", "(a|g)"}, + Subst {"S", "(c|g)"}, + Subst {"V", "(a|c|g)"}, + Subst {"W", "(a|t)"}, + Subst {"Y", "(c|t)"}, +} + +func countMatches(pat, str string) int { + re := compile(pat); + n := 0; + pos := 0; + for { + e := re.Execute(str); + if len(e) == 0 { + break; + } + n++; + str = str[e[1]:len(str)]; + } + return n; +} + +func main() { + bytes, err := io.ReadFile("/dev/stdin"); + if err != nil { + fmt.Fprintf(os.Stderr, "can't read input: %s\n", err); + os.Exit(2); + } + str := string(bytes); + ilen := len(str); + // Delete the comment lines and newlines + str = compile("(>[^\n]+)?\n").ReplaceAll(str, ""); + clen := len(str); + for i, s := range variants { + fmt.Printf("%s %d\n", s, countMatches(s, str)); + } + for i, sub := range substs { + str = compile(sub.pat).ReplaceAll(str, sub.repl); + } + fmt.Printf("\n%d\n%d\n%d\n", ilen, clen, len(str)); +} |