summaryrefslogtreecommitdiff
path: root/test/bench/regex-dna.c
diff options
context:
space:
mode:
Diffstat (limited to 'test/bench/regex-dna.c')
-rw-r--r--test/bench/regex-dna.c154
1 files changed, 0 insertions, 154 deletions
diff --git a/test/bench/regex-dna.c b/test/bench/regex-dna.c
deleted file mode 100644
index 134f8215c..000000000
--- a/test/bench/regex-dna.c
+++ /dev/null
@@ -1,154 +0,0 @@
-/*
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
- * Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
-
- * Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions and the following disclaimer in the
- documentation and/or other materials provided with the distribution.
-
- * Neither the name of "The Computer Language Benchmarks Game" nor the
- name of "The Computer Language Shootout Benchmarks" nor the names of
- its contributors may be used to endorse or promote products derived
- from this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGE.
-*/
-
-/*
-** The Computer Language Shootout
-** http://shootout.alioth.debian.org/
-** contributed by Mike Pall
-**
-** regex-dna benchmark using PCRE
-**
-** compile with:
-** gcc -O3 -fomit-frame-pointer -o regexdna regexdna.c -lpcre
-*/
-
-#define __USE_STRING_INLINES
-#include <stdio.h>
-#include <string.h>
-#include <stdlib.h>
-#include <pcre.h>
-
-typedef struct fbuf {
- char *buf;
- size_t size, len;
-} fbuf_t;
-
-static void fb_init(fbuf_t *b)
-{
- b->buf = NULL;
- b->len = b->size = 0;
-}
-
-static char *fb_need(fbuf_t *b, size_t need)
-{
- need += b->len;
- if (need > b->size) {
- if (b->size == 0) b->size = need;
- else while (need > b->size) b->size += b->size;
- if (!(b->buf = realloc(b->buf, b->size))) exit(1);
- }
- return b->buf+b->len;
-}
-
-#define FB_MINREAD (3<<16)
-
-/* Read all of a stdio stream into dst buffer. */
-static size_t fb_readall(fbuf_t *dst, FILE *fp)
-{
- char *dp;
- int n;
- for (dp = fb_need(dst, FB_MINREAD);
- (n = fread(dp, 1, dst->size-dst->len, fp)) > 0;
- dp = fb_need(dst, FB_MINREAD)) dst->len += n;
- if (ferror(fp)) exit(1);
- return dst->len;
-}
-
-/* Substitute pattern p with replacement r, copying from src to dst buffer. */
-static size_t fb_subst(fbuf_t *dst, fbuf_t *src, const char *p, const char *r)
-{
- pcre *re;
- pcre_extra *re_ex;
- const char *re_e;
- char *dp;
- int re_eo, m[3], pos, rlen, clen;
- if (!(re = pcre_compile(p, PCRE_CASELESS, &re_e, &re_eo, NULL))) exit(1);
- re_ex = pcre_study(re, 0, &re_e);
- for (dst->len = 0, rlen = strlen(r), pos = 0;
- pcre_exec(re, re_ex, src->buf, src->len, pos, 0, m, 3) >= 0;
- pos = m[1]) {
- clen = m[0]-pos;
- dp = fb_need(dst, clen+rlen);
- dst->len += clen+rlen;
- memcpy(dp, src->buf+pos, clen);
- memcpy(dp+clen, r, rlen);
- }
- clen = src->len-pos;
- dp = fb_need(dst, clen);
- dst->len += clen;
- memcpy(dp, src->buf+pos, clen);
- return dst->len;
-}
-
-/* Count all matches with pattern p in src buffer. */
-static int fb_countmatches(fbuf_t *src, const char *p)
-{
- pcre *re;
- pcre_extra *re_ex;
- const char *re_e;
- int re_eo, m[3], pos, count;
- if (!(re = pcre_compile(p, PCRE_CASELESS, &re_e, &re_eo, NULL))) exit(1);
- re_ex = pcre_study(re, 0, &re_e);
- for (count = 0, pos = 0;
- pcre_exec(re, re_ex, src->buf, src->len, pos, 0, m, 3) >= 0;
- pos = m[1]) count++;
- return count;
-}
-
-static const char *variants[] = {
- "agggtaaa|tttaccct", "[cgt]gggtaaa|tttaccc[acg]",
- "a[act]ggtaaa|tttacc[agt]t", "ag[act]gtaaa|tttac[agt]ct",
- "agg[act]taaa|ttta[agt]cct", "aggg[acg]aaa|ttt[cgt]ccct",
- "agggt[cgt]aa|tt[acg]accct", "agggta[cgt]a|t[acg]taccct",
- "agggtaa[cgt]|[acg]ttaccct", NULL
-};
-
-static const char *subst[] = {
- "B", "(c|g|t)", "D", "(a|g|t)", "H", "(a|c|t)", "K", "(g|t)",
- "M", "(a|c)", "N", "(a|c|g|t)", "R", "(a|g)", "S", "(c|g)",
- "V", "(a|c|g)", "W", "(a|t)", "Y", "(c|t)", NULL
-};
-
-int main(int argc, char **argv)
-{
- fbuf_t seq[2];
- const char **pp;
- size_t ilen, clen, slen;
- int flip;
- fb_init(&seq[0]);
- fb_init(&seq[1]);
- ilen = fb_readall(&seq[0], stdin);
- clen = fb_subst(&seq[1], &seq[0], ">.*|\n", "");
- for (pp = variants; *pp; pp++)
- printf("%s %d\n", *pp, fb_countmatches(&seq[1], *pp));
- for (slen = 0, flip = 1, pp = subst; *pp; pp += 2, flip = 1-flip)
- slen = fb_subst(&seq[1-flip], &seq[flip], *pp, pp[1]);
- printf("\n%zu\n%zu\n%zu\n", ilen, clen, slen);
- return 0;
-}