diff options
Diffstat (limited to 'test/bench/regex-dna.c')
-rw-r--r-- | test/bench/regex-dna.c | 154 |
1 files changed, 0 insertions, 154 deletions
diff --git a/test/bench/regex-dna.c b/test/bench/regex-dna.c deleted file mode 100644 index 134f8215c..000000000 --- a/test/bench/regex-dna.c +++ /dev/null @@ -1,154 +0,0 @@ -/* -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - * Neither the name of "The Computer Language Benchmarks Game" nor the - name of "The Computer Language Shootout Benchmarks" nor the names of - its contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -*/ - -/* -** The Computer Language Shootout -** http://shootout.alioth.debian.org/ -** contributed by Mike Pall -** -** regex-dna benchmark using PCRE -** -** compile with: -** gcc -O3 -fomit-frame-pointer -o regexdna regexdna.c -lpcre -*/ - -#define __USE_STRING_INLINES -#include <stdio.h> -#include <string.h> -#include <stdlib.h> -#include <pcre.h> - -typedef struct fbuf { - char *buf; - size_t size, len; -} fbuf_t; - -static void fb_init(fbuf_t *b) -{ - b->buf = NULL; - b->len = b->size = 0; -} - -static char *fb_need(fbuf_t *b, size_t need) -{ - need += b->len; - if (need > b->size) { - if (b->size == 0) b->size = need; - else while (need > b->size) b->size += b->size; - if (!(b->buf = realloc(b->buf, b->size))) exit(1); - } - return b->buf+b->len; -} - -#define FB_MINREAD (3<<16) - -/* Read all of a stdio stream into dst buffer. */ -static size_t fb_readall(fbuf_t *dst, FILE *fp) -{ - char *dp; - int n; - for (dp = fb_need(dst, FB_MINREAD); - (n = fread(dp, 1, dst->size-dst->len, fp)) > 0; - dp = fb_need(dst, FB_MINREAD)) dst->len += n; - if (ferror(fp)) exit(1); - return dst->len; -} - -/* Substitute pattern p with replacement r, copying from src to dst buffer. */ -static size_t fb_subst(fbuf_t *dst, fbuf_t *src, const char *p, const char *r) -{ - pcre *re; - pcre_extra *re_ex; - const char *re_e; - char *dp; - int re_eo, m[3], pos, rlen, clen; - if (!(re = pcre_compile(p, PCRE_CASELESS, &re_e, &re_eo, NULL))) exit(1); - re_ex = pcre_study(re, 0, &re_e); - for (dst->len = 0, rlen = strlen(r), pos = 0; - pcre_exec(re, re_ex, src->buf, src->len, pos, 0, m, 3) >= 0; - pos = m[1]) { - clen = m[0]-pos; - dp = fb_need(dst, clen+rlen); - dst->len += clen+rlen; - memcpy(dp, src->buf+pos, clen); - memcpy(dp+clen, r, rlen); - } - clen = src->len-pos; - dp = fb_need(dst, clen); - dst->len += clen; - memcpy(dp, src->buf+pos, clen); - return dst->len; -} - -/* Count all matches with pattern p in src buffer. */ -static int fb_countmatches(fbuf_t *src, const char *p) -{ - pcre *re; - pcre_extra *re_ex; - const char *re_e; - int re_eo, m[3], pos, count; - if (!(re = pcre_compile(p, PCRE_CASELESS, &re_e, &re_eo, NULL))) exit(1); - re_ex = pcre_study(re, 0, &re_e); - for (count = 0, pos = 0; - pcre_exec(re, re_ex, src->buf, src->len, pos, 0, m, 3) >= 0; - pos = m[1]) count++; - return count; -} - -static const char *variants[] = { - "agggtaaa|tttaccct", "[cgt]gggtaaa|tttaccc[acg]", - "a[act]ggtaaa|tttacc[agt]t", "ag[act]gtaaa|tttac[agt]ct", - "agg[act]taaa|ttta[agt]cct", "aggg[acg]aaa|ttt[cgt]ccct", - "agggt[cgt]aa|tt[acg]accct", "agggta[cgt]a|t[acg]taccct", - "agggtaa[cgt]|[acg]ttaccct", NULL -}; - -static const char *subst[] = { - "B", "(c|g|t)", "D", "(a|g|t)", "H", "(a|c|t)", "K", "(g|t)", - "M", "(a|c)", "N", "(a|c|g|t)", "R", "(a|g)", "S", "(c|g)", - "V", "(a|c|g)", "W", "(a|t)", "Y", "(c|t)", NULL -}; - -int main(int argc, char **argv) -{ - fbuf_t seq[2]; - const char **pp; - size_t ilen, clen, slen; - int flip; - fb_init(&seq[0]); - fb_init(&seq[1]); - ilen = fb_readall(&seq[0], stdin); - clen = fb_subst(&seq[1], &seq[0], ">.*|\n", ""); - for (pp = variants; *pp; pp++) - printf("%s %d\n", *pp, fb_countmatches(&seq[1], *pp)); - for (slen = 0, flip = 1, pp = subst; *pp; pp += 2, flip = 1-flip) - slen = fb_subst(&seq[1-flip], &seq[flip], *pp, pp[1]); - printf("\n%zu\n%zu\n%zu\n", ilen, clen, slen); - return 0; -} |