summaryrefslogtreecommitdiff
path: root/test/bench/fasta.go
diff options
context:
space:
mode:
Diffstat (limited to 'test/bench/fasta.go')
-rw-r--r--test/bench/fasta.go219
1 files changed, 125 insertions, 94 deletions
diff --git a/test/bench/fasta.go b/test/bench/fasta.go
index f79ff680f..470bdb328 100644
--- a/test/bench/fasta.go
+++ b/test/bench/fasta.go
@@ -31,135 +31,137 @@ POSSIBILITY OF SUCH DAMAGE.
* http://shootout.alioth.debian.org/
*
* contributed by The Go Authors.
- * Based on C program by Joern Inge Vestgaarden
- * and Jorge Peixoto de Morais Neto.
+ * Based on C program by by Petr Prokhorenkov.
*/
package main
import (
- "bufio"
+ "bytes"
"flag"
"os"
)
-var out *bufio.Writer
+var out = make(buffer, 0, 32768)
var n = flag.Int("n", 1000, "length of result")
-const WIDTH = 60 // Fold lines after WIDTH bytes
+const Line = 60
-func min(a, b int) int {
- if a < b {
- return a
+func Repeat(alu []byte, n int) {
+ buf := bytes.Add(alu, alu)
+ off := 0
+ for n > 0 {
+ m := n
+ if m > Line {
+ m = Line
+ }
+ buf1 := out.NextWrite(m + 1)
+ copy(buf1, buf[off:])
+ buf1[m] = '\n'
+ if off += m; off >= len(alu) {
+ off -= len(alu)
+ }
+ n -= m
}
- return b
}
-type AminoAcid struct {
- p float
- c byte
-}
+const (
+ IM = 139968
+ IA = 3877
+ IC = 29573
-func AccumulateProbabilities(genelist []AminoAcid) {
- for i := 1; i < len(genelist); i++ {
- genelist[i].p += genelist[i-1].p
- }
+ LookupSize = 4096
+ LookupScale float64 = LookupSize - 1
+)
+
+var rand uint32 = 42
+
+type Acid struct {
+ sym byte
+ prob float64
+ cprob float64
+ next *Acid
}
-// RepeatFasta prints the characters of the byte slice s. When it
-// reaches the end of the slice, it goes back to the beginning.
-// It stops after generating count characters.
-// After each WIDTH characters it prints a newline.
-// It assumes that WIDTH <= len(s) + 1.
-func RepeatFasta(s []byte, count int) {
- pos := 0
- s2 := make([]byte, len(s)+WIDTH)
- copy(s2, s)
- copy(s2[len(s):], s)
- for count > 0 {
- line := min(WIDTH, count)
- out.Write(s2[pos : pos+line])
- out.WriteByte('\n')
- pos += line
- if pos >= len(s) {
- pos -= len(s)
+func computeLookup(acid []Acid) *[LookupSize]*Acid {
+ var lookup [LookupSize]*Acid
+ var p float64
+ for i := range acid {
+ p += acid[i].prob
+ acid[i].cprob = p * LookupScale
+ if i > 0 {
+ acid[i-1].next = &acid[i]
}
- count -= line
}
-}
+ acid[len(acid)-1].cprob = 1.0 * LookupScale
-var lastrandom uint32 = 42
+ j := 0
+ for i := range lookup {
+ for acid[j].cprob < float64(i) {
+ j++
+ }
+ lookup[i] = &acid[j]
+ }
-const (
- IM = 139968
- IA = 3877
- IC = 29573
-)
+ return &lookup
+}
-// Each element of genelist is a struct with a character and
-// a floating point number p between 0 and 1.
-// RandomFasta generates a random float r and
-// finds the first element such that p >= r.
-// This is a weighted random selection.
-// RandomFasta then prints the character of the array element.
-// This sequence is repeated count times.
-// Between each WIDTH consecutive characters, the function prints a newline.
-func RandomFasta(genelist []AminoAcid, count int) {
- buf := make([]byte, WIDTH+1)
- for count > 0 {
- line := min(WIDTH, count)
- for pos := 0; pos < line; pos++ {
- lastrandom = (lastrandom*IA + IC) % IM
- // Integer to float conversions are faster if the integer is signed.
- r := float(int32(lastrandom)) / IM
- for _, v := range genelist {
- if v.p >= r {
- buf[pos] = v.c
- break
- }
+func Random(acid []Acid, n int) {
+ lookup := computeLookup(acid)
+ for n > 0 {
+ m := n
+ if m > Line {
+ m = Line
+ }
+ buf := out.NextWrite(m + 1)
+ f := LookupScale / IM
+ myrand := rand
+ for i := 0; i < m; i++ {
+ myrand = (myrand*IA + IC) % IM
+ r := float64(int(myrand)) * f
+ a := lookup[int(r)]
+ for a.cprob < r {
+ a = a.next
}
+ buf[i] = a.sym
}
- buf[line] = '\n'
- out.Write(buf[0 : line+1])
- count -= line
+ rand = myrand
+ buf[m] = '\n'
+ n -= m
}
}
func main() {
- out = bufio.NewWriter(os.Stdout)
defer out.Flush()
flag.Parse()
- iub := []AminoAcid{
- AminoAcid{0.27, 'a'},
- AminoAcid{0.12, 'c'},
- AminoAcid{0.12, 'g'},
- AminoAcid{0.27, 't'},
- AminoAcid{0.02, 'B'},
- AminoAcid{0.02, 'D'},
- AminoAcid{0.02, 'H'},
- AminoAcid{0.02, 'K'},
- AminoAcid{0.02, 'M'},
- AminoAcid{0.02, 'N'},
- AminoAcid{0.02, 'R'},
- AminoAcid{0.02, 'S'},
- AminoAcid{0.02, 'V'},
- AminoAcid{0.02, 'W'},
- AminoAcid{0.02, 'Y'},
+ iub := []Acid{
+ Acid{prob: 0.27, sym: 'a'},
+ Acid{prob: 0.12, sym: 'c'},
+ Acid{prob: 0.12, sym: 'g'},
+ Acid{prob: 0.27, sym: 't'},
+ Acid{prob: 0.02, sym: 'B'},
+ Acid{prob: 0.02, sym: 'D'},
+ Acid{prob: 0.02, sym: 'H'},
+ Acid{prob: 0.02, sym: 'K'},
+ Acid{prob: 0.02, sym: 'M'},
+ Acid{prob: 0.02, sym: 'N'},
+ Acid{prob: 0.02, sym: 'R'},
+ Acid{prob: 0.02, sym: 'S'},
+ Acid{prob: 0.02, sym: 'V'},
+ Acid{prob: 0.02, sym: 'W'},
+ Acid{prob: 0.02, sym: 'Y'},
}
- homosapiens := []AminoAcid{
- AminoAcid{0.3029549426680, 'a'},
- AminoAcid{0.1979883004921, 'c'},
- AminoAcid{0.1975473066391, 'g'},
- AminoAcid{0.3015094502008, 't'},
+ homosapiens := []Acid{
+ Acid{prob: 0.3029549426680, sym: 'a'},
+ Acid{prob: 0.1979883004921, sym: 'c'},
+ Acid{prob: 0.1975473066391, sym: 'g'},
+ Acid{prob: 0.3015094502008, sym: 't'},
}
- AccumulateProbabilities(iub)
- AccumulateProbabilities(homosapiens)
-
alu := []byte(
"GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGG" +
"GAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGA" +
@@ -170,9 +172,38 @@ func main() {
"AGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA")
out.WriteString(">ONE Homo sapiens alu\n")
- RepeatFasta(alu, 2**n)
+ Repeat(alu, 2**n)
out.WriteString(">TWO IUB ambiguity codes\n")
- RandomFasta(iub, 3**n)
+ Random(iub, 3**n)
out.WriteString(">THREE Homo sapiens frequency\n")
- RandomFasta(homosapiens, 5**n)
+ Random(homosapiens, 5**n)
+}
+
+
+type buffer []byte
+
+func (b *buffer) Flush() {
+ p := *b
+ if len(p) > 0 {
+ os.Stdout.Write(p)
+ }
+ *b = p[0:0]
+}
+
+func (b *buffer) WriteString(s string) {
+ p := b.NextWrite(len(s))
+ for i := 0; i < len(s); i++ {
+ p[i] = s[i]
+ }
+}
+
+func (b *buffer) NextWrite(n int) []byte {
+ p := *b
+ if len(p)+n > cap(p) {
+ b.Flush()
+ p = *b
+ }
+ out := p[len(p) : len(p)+n]
+ *b = p[0 : len(p)+n]
+ return out
}