diff options
| author | Russ Cox <rsc@golang.org> | 2010-05-03 17:47:59 -0700 |
|---|---|---|
| committer | Russ Cox <rsc@golang.org> | 2010-05-03 17:47:59 -0700 |
| commit | d011a57d844b5441a43801dbee13131821175b33 (patch) | |
| tree | 6564357001d4ce61effd975aed6bc97728ee9a0e /test/bench/fasta.go | |
| parent | 7a01eb5345dec0273ae3ed2410d8034cde9cdb1d (diff) | |
| download | golang-d011a57d844b5441a43801dbee13131821175b33.tar.gz | |
test/bench: import new fasta C reference, update Go, optimizations
OLD fasta -n 25000000
gcc -O2 fasta.c 7.59u 0.06s 7.74r
gc fasta 9.54u 0.15s 9.84r
gc_B fasta 9.48u 0.10s 9.62r
NEW fasta -n 25000000
gcc -O2 fasta.c 2.59u 0.02s 2.66r
gc fasta 3.00u 0.03s 3.09r
gc_B fasta 2.72u 0.03s 2.81r
R=r
CC=golang-dev
http://codereview.appspot.com/1054041
Diffstat (limited to 'test/bench/fasta.go')
| -rw-r--r-- | test/bench/fasta.go | 219 |
1 files changed, 125 insertions, 94 deletions
diff --git a/test/bench/fasta.go b/test/bench/fasta.go index f79ff680f..470bdb328 100644 --- a/test/bench/fasta.go +++ b/test/bench/fasta.go @@ -31,135 +31,137 @@ POSSIBILITY OF SUCH DAMAGE. * http://shootout.alioth.debian.org/ * * contributed by The Go Authors. - * Based on C program by Joern Inge Vestgaarden - * and Jorge Peixoto de Morais Neto. + * Based on C program by by Petr Prokhorenkov. */ package main import ( - "bufio" + "bytes" "flag" "os" ) -var out *bufio.Writer +var out = make(buffer, 0, 32768) var n = flag.Int("n", 1000, "length of result") -const WIDTH = 60 // Fold lines after WIDTH bytes +const Line = 60 -func min(a, b int) int { - if a < b { - return a +func Repeat(alu []byte, n int) { + buf := bytes.Add(alu, alu) + off := 0 + for n > 0 { + m := n + if m > Line { + m = Line + } + buf1 := out.NextWrite(m + 1) + copy(buf1, buf[off:]) + buf1[m] = '\n' + if off += m; off >= len(alu) { + off -= len(alu) + } + n -= m } - return b } -type AminoAcid struct { - p float - c byte -} +const ( + IM = 139968 + IA = 3877 + IC = 29573 -func AccumulateProbabilities(genelist []AminoAcid) { - for i := 1; i < len(genelist); i++ { - genelist[i].p += genelist[i-1].p - } + LookupSize = 4096 + LookupScale float64 = LookupSize - 1 +) + +var rand uint32 = 42 + +type Acid struct { + sym byte + prob float64 + cprob float64 + next *Acid } -// RepeatFasta prints the characters of the byte slice s. When it -// reaches the end of the slice, it goes back to the beginning. -// It stops after generating count characters. -// After each WIDTH characters it prints a newline. -// It assumes that WIDTH <= len(s) + 1. -func RepeatFasta(s []byte, count int) { - pos := 0 - s2 := make([]byte, len(s)+WIDTH) - copy(s2, s) - copy(s2[len(s):], s) - for count > 0 { - line := min(WIDTH, count) - out.Write(s2[pos : pos+line]) - out.WriteByte('\n') - pos += line - if pos >= len(s) { - pos -= len(s) +func computeLookup(acid []Acid) *[LookupSize]*Acid { + var lookup [LookupSize]*Acid + var p float64 + for i := range acid { + p += acid[i].prob + acid[i].cprob = p * LookupScale + if i > 0 { + acid[i-1].next = &acid[i] } - count -= line } -} + acid[len(acid)-1].cprob = 1.0 * LookupScale -var lastrandom uint32 = 42 + j := 0 + for i := range lookup { + for acid[j].cprob < float64(i) { + j++ + } + lookup[i] = &acid[j] + } -const ( - IM = 139968 - IA = 3877 - IC = 29573 -) + return &lookup +} -// Each element of genelist is a struct with a character and -// a floating point number p between 0 and 1. -// RandomFasta generates a random float r and -// finds the first element such that p >= r. -// This is a weighted random selection. -// RandomFasta then prints the character of the array element. -// This sequence is repeated count times. -// Between each WIDTH consecutive characters, the function prints a newline. -func RandomFasta(genelist []AminoAcid, count int) { - buf := make([]byte, WIDTH+1) - for count > 0 { - line := min(WIDTH, count) - for pos := 0; pos < line; pos++ { - lastrandom = (lastrandom*IA + IC) % IM - // Integer to float conversions are faster if the integer is signed. - r := float(int32(lastrandom)) / IM - for _, v := range genelist { - if v.p >= r { - buf[pos] = v.c - break - } +func Random(acid []Acid, n int) { + lookup := computeLookup(acid) + for n > 0 { + m := n + if m > Line { + m = Line + } + buf := out.NextWrite(m + 1) + f := LookupScale / IM + myrand := rand + for i := 0; i < m; i++ { + myrand = (myrand*IA + IC) % IM + r := float64(int(myrand)) * f + a := lookup[int(r)] + for a.cprob < r { + a = a.next } + buf[i] = a.sym } - buf[line] = '\n' - out.Write(buf[0 : line+1]) - count -= line + rand = myrand + buf[m] = '\n' + n -= m } } func main() { - out = bufio.NewWriter(os.Stdout) defer out.Flush() flag.Parse() - iub := []AminoAcid{ - AminoAcid{0.27, 'a'}, - AminoAcid{0.12, 'c'}, - AminoAcid{0.12, 'g'}, - AminoAcid{0.27, 't'}, - AminoAcid{0.02, 'B'}, - AminoAcid{0.02, 'D'}, - AminoAcid{0.02, 'H'}, - AminoAcid{0.02, 'K'}, - AminoAcid{0.02, 'M'}, - AminoAcid{0.02, 'N'}, - AminoAcid{0.02, 'R'}, - AminoAcid{0.02, 'S'}, - AminoAcid{0.02, 'V'}, - AminoAcid{0.02, 'W'}, - AminoAcid{0.02, 'Y'}, + iub := []Acid{ + Acid{prob: 0.27, sym: 'a'}, + Acid{prob: 0.12, sym: 'c'}, + Acid{prob: 0.12, sym: 'g'}, + Acid{prob: 0.27, sym: 't'}, + Acid{prob: 0.02, sym: 'B'}, + Acid{prob: 0.02, sym: 'D'}, + Acid{prob: 0.02, sym: 'H'}, + Acid{prob: 0.02, sym: 'K'}, + Acid{prob: 0.02, sym: 'M'}, + Acid{prob: 0.02, sym: 'N'}, + Acid{prob: 0.02, sym: 'R'}, + Acid{prob: 0.02, sym: 'S'}, + Acid{prob: 0.02, sym: 'V'}, + Acid{prob: 0.02, sym: 'W'}, + Acid{prob: 0.02, sym: 'Y'}, } - homosapiens := []AminoAcid{ - AminoAcid{0.3029549426680, 'a'}, - AminoAcid{0.1979883004921, 'c'}, - AminoAcid{0.1975473066391, 'g'}, - AminoAcid{0.3015094502008, 't'}, + homosapiens := []Acid{ + Acid{prob: 0.3029549426680, sym: 'a'}, + Acid{prob: 0.1979883004921, sym: 'c'}, + Acid{prob: 0.1975473066391, sym: 'g'}, + Acid{prob: 0.3015094502008, sym: 't'}, } - AccumulateProbabilities(iub) - AccumulateProbabilities(homosapiens) - alu := []byte( "GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGG" + "GAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGA" + @@ -170,9 +172,38 @@ func main() { "AGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA") out.WriteString(">ONE Homo sapiens alu\n") - RepeatFasta(alu, 2**n) + Repeat(alu, 2**n) out.WriteString(">TWO IUB ambiguity codes\n") - RandomFasta(iub, 3**n) + Random(iub, 3**n) out.WriteString(">THREE Homo sapiens frequency\n") - RandomFasta(homosapiens, 5**n) + Random(homosapiens, 5**n) +} + + +type buffer []byte + +func (b *buffer) Flush() { + p := *b + if len(p) > 0 { + os.Stdout.Write(p) + } + *b = p[0:0] +} + +func (b *buffer) WriteString(s string) { + p := b.NextWrite(len(s)) + for i := 0; i < len(s); i++ { + p[i] = s[i] + } +} + +func (b *buffer) NextWrite(n int) []byte { + p := *b + if len(p)+n > cap(p) { + b.Flush() + p = *b + } + out := p[len(p) : len(p)+n] + *b = p[0 : len(p)+n] + return out } |
