diff options
| author | Ondřej Surý <ondrej@sury.org> | 2011-09-13 13:13:40 +0200 | 
|---|---|---|
| committer | Ondřej Surý <ondrej@sury.org> | 2011-09-13 13:13:40 +0200 | 
| commit | 5ff4c17907d5b19510a62e08fd8d3b11e62b431d (patch) | |
| tree | c0650497e988f47be9c6f2324fa692a52dea82e1 /test/bench/regex-dna.go | |
| parent | 80f18fc933cf3f3e829c5455a1023d69f7b86e52 (diff) | |
| download | golang-5ff4c17907d5b19510a62e08fd8d3b11e62b431d.tar.gz | |
Imported Upstream version 60upstream/60
Diffstat (limited to 'test/bench/regex-dna.go')
| -rw-r--r-- | test/bench/regex-dna.go | 106 | 
1 files changed, 106 insertions, 0 deletions
| diff --git a/test/bench/regex-dna.go b/test/bench/regex-dna.go new file mode 100644 index 000000000..042d7f283 --- /dev/null +++ b/test/bench/regex-dna.go @@ -0,0 +1,106 @@ +/* +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +    * Redistributions of source code must retain the above copyright +    notice, this list of conditions and the following disclaimer. + +    * Redistributions in binary form must reproduce the above copyright +    notice, this list of conditions and the following disclaimer in the +    documentation and/or other materials provided with the distribution. + +    * Neither the name of "The Computer Language Benchmarks Game" nor the +    name of "The Computer Language Shootout Benchmarks" nor the names of +    its contributors may be used to endorse or promote products derived +    from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +*/ + +/* The Computer Language Benchmarks Game + * http://shootout.alioth.debian.org/ + * + * contributed by The Go Authors. + */ + +package main + +import ( +	"fmt" +	"io/ioutil" +	"os" +	"regexp" +) + +var variants = []string{ +	"agggtaaa|tttaccct", +	"[cgt]gggtaaa|tttaccc[acg]", +	"a[act]ggtaaa|tttacc[agt]t", +	"ag[act]gtaaa|tttac[agt]ct", +	"agg[act]taaa|ttta[agt]cct", +	"aggg[acg]aaa|ttt[cgt]ccct", +	"agggt[cgt]aa|tt[acg]accct", +	"agggta[cgt]a|t[acg]taccct", +	"agggtaa[cgt]|[acg]ttaccct", +} + +type Subst struct { +	pat, repl string +} + +var substs = []Subst{ +	Subst{"B", "(c|g|t)"}, +	Subst{"D", "(a|g|t)"}, +	Subst{"H", "(a|c|t)"}, +	Subst{"K", "(g|t)"}, +	Subst{"M", "(a|c)"}, +	Subst{"N", "(a|c|g|t)"}, +	Subst{"R", "(a|g)"}, +	Subst{"S", "(c|g)"}, +	Subst{"V", "(a|c|g)"}, +	Subst{"W", "(a|t)"}, +	Subst{"Y", "(c|t)"}, +} + +func countMatches(pat string, bytes []byte) int { +	re := regexp.MustCompile(pat) +	n := 0 +	for { +		e := re.FindIndex(bytes) +		if len(e) == 0 { +			break +		} +		n++ +		bytes = bytes[e[1]:] +	} +	return n +} + +func main() { +	bytes, err := ioutil.ReadAll(os.Stdin) +	if err != nil { +		fmt.Fprintf(os.Stderr, "can't read input: %s\n", err) +		os.Exit(2) +	} +	ilen := len(bytes) +	// Delete the comment lines and newlines +	bytes = regexp.MustCompile("(>[^\n]+)?\n").ReplaceAll(bytes, []byte{}) +	clen := len(bytes) +	for _, s := range variants { +		fmt.Printf("%s %d\n", s, countMatches(s, bytes)) +	} +	for _, sub := range substs { +		bytes = regexp.MustCompile(sub.pat).ReplaceAll(bytes, []byte(sub.repl)) +	} +	fmt.Printf("\n%d\n%d\n%d\n", ilen, clen, len(bytes)) +} | 
