summaryrefslogtreecommitdiff
path: root/test/bench/regex-dna.go
diff options
context:
space:
mode:
authorRob Pike <r@golang.org>2009-08-05 13:03:46 -0700
committerRob Pike <r@golang.org>2009-08-05 13:03:46 -0700
commit0d573960f4c0c12f8103a86922927f7f386b639f (patch)
tree585c1e62c5d4d50530ffa6b724c0f90dc033f4d1 /test/bench/regex-dna.go
parentc8b49a4532508beea09fe32b8ff2ce8670e88652 (diff)
downloadgolang-0d573960f4c0c12f8103a86922927f7f386b639f.tar.gz
regex-dna
R=rsc DELTA=243 (242 added, 0 deleted, 1 changed) OCL=32786 CL=32791
Diffstat (limited to 'test/bench/regex-dna.go')
-rw-r--r--test/bench/regex-dna.go117
1 files changed, 117 insertions, 0 deletions
diff --git a/test/bench/regex-dna.go b/test/bench/regex-dna.go
new file mode 100644
index 000000000..c0ade94e7
--- /dev/null
+++ b/test/bench/regex-dna.go
@@ -0,0 +1,117 @@
+/*
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ * Neither the name of "The Computer Language Benchmarks Game" nor the
+ name of "The Computer Language Shootout Benchmarks" nor the names of
+ its contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/* The Computer Language Benchmarks Game
+ * http://shootout.alioth.debian.org/
+ *
+ * contributed by The Go Authors.
+ */
+
+package main
+
+import (
+ "fmt";
+ "io";
+ "os";
+ "regexp";
+)
+
+func compile(s string) *regexp.Regexp {
+ r, err := regexp.Compile(s);
+ if err != nil {
+ fmt.Fprintf(os.Stderr, "can't compile pattern %q: %s\n", s, err);
+ os.Exit(2);
+ }
+ return r;
+}
+
+var variants = []string {
+ "agggtaaa|tttaccct",
+ "[cgt]gggtaaa|tttaccc[acg]",
+ "a[act]ggtaaa|tttacc[agt]t",
+ "ag[act]gtaaa|tttac[agt]ct",
+ "agg[act]taaa|ttta[agt]cct",
+ "aggg[acg]aaa|ttt[cgt]ccct",
+ "agggt[cgt]aa|tt[acg]accct",
+ "agggta[cgt]a|t[acg]taccct",
+ "agggtaa[cgt]|[acg]ttaccct",
+}
+
+type Subst struct {
+ pat, repl string
+}
+
+var substs = [] Subst {
+ Subst {"B", "(c|g|t)"},
+ Subst {"D", "(a|g|t)"},
+ Subst {"H", "(a|c|t)"},
+ Subst {"K", "(g|t)"},
+ Subst {"M", "(a|c)"},
+ Subst {"N", "(a|c|g|t)"},
+ Subst {"R", "(a|g)"},
+ Subst {"S", "(c|g)"},
+ Subst {"V", "(a|c|g)"},
+ Subst {"W", "(a|t)"},
+ Subst {"Y", "(c|t)"},
+}
+
+func countMatches(pat, str string) int {
+ re := compile(pat);
+ n := 0;
+ pos := 0;
+ for {
+ e := re.Execute(str);
+ if len(e) == 0 {
+ break;
+ }
+ n++;
+ str = str[e[1]:len(str)];
+ }
+ return n;
+}
+
+func main() {
+ bytes, err := io.ReadFile("/dev/stdin");
+ if err != nil {
+ fmt.Fprintf(os.Stderr, "can't read input: %s\n", err);
+ os.Exit(2);
+ }
+ str := string(bytes);
+ ilen := len(str);
+ // Delete the comment lines and newlines
+ str = compile("(>[^\n]+)?\n").ReplaceAll(str, "");
+ clen := len(str);
+ for i, s := range variants {
+ fmt.Printf("%s %d\n", s, countMatches(s, str));
+ }
+ for i, sub := range substs {
+ str = compile(sub.pat).ReplaceAll(str, sub.repl);
+ }
+ fmt.Printf("\n%d\n%d\n%d\n", ilen, clen, len(str));
+}