blob: bff056fa90c9602a45024d4a82ae9dec3110325e [file] [log] [blame]
Russ Cox6e887552011-12-15 12:32:59 -05001// Copyright 2011 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package go1
6
Dave Cheney166dab62012-06-06 07:49:58 +10007import "runtime"
8
Russ Cox6e887552011-12-15 12:32:59 -05009// Not a benchmark; input for revcomp.
10
Dave Cheney166dab62012-06-06 07:49:58 +100011var fastabytes = makefasta()
12
13func makefasta() []byte {
14 var n int = 25e6
15 if runtime.GOARCH == "arm" {
16 // TODO(dfc) remove this limitation after precise gc.
17 // A value of 25e6 consumes 465mb of heap on 32bit
18 // platforms, which is too much for most ARM systems.
19 // A value of 25e5 produces a memory layout that
20 // confuses the gc on 32bit platforms. So 25e4 it is.
21 n = 25e4
22 }
23 return fasta(n)
24}
Russ Cox6e887552011-12-15 12:32:59 -050025
26func fasta(n int) []byte {
27 out := make(fastaBuffer, 0, 11*n)
28
29 iub := []fastaAcid{
30 {prob: 0.27, sym: 'a'},
31 {prob: 0.12, sym: 'c'},
32 {prob: 0.12, sym: 'g'},
33 {prob: 0.27, sym: 't'},
34 {prob: 0.02, sym: 'B'},
35 {prob: 0.02, sym: 'D'},
36 {prob: 0.02, sym: 'H'},
37 {prob: 0.02, sym: 'K'},
38 {prob: 0.02, sym: 'M'},
39 {prob: 0.02, sym: 'N'},
40 {prob: 0.02, sym: 'R'},
41 {prob: 0.02, sym: 'S'},
42 {prob: 0.02, sym: 'V'},
43 {prob: 0.02, sym: 'W'},
44 {prob: 0.02, sym: 'Y'},
45 }
46
47 homosapiens := []fastaAcid{
48 {prob: 0.3029549426680, sym: 'a'},
49 {prob: 0.1979883004921, sym: 'c'},
50 {prob: 0.1975473066391, sym: 'g'},
51 {prob: 0.3015094502008, sym: 't'},
52 }
53
54 alu := []byte(
55 "GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGG" +
56 "GAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGA" +
57 "CCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAAT" +
58 "ACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCA" +
59 "GCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGG" +
60 "AGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCC" +
61 "AGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA")
62
63 out.WriteString(">ONE Homo sapiens alu\n")
64 fastaRepeat(&out, alu, 2*n)
65 out.WriteString(">TWO IUB ambiguity codes\n")
66 fastaRandom(&out, iub, 3*n)
67 out.WriteString(">THREE Homo sapiens frequency\n")
68 fastaRandom(&out, homosapiens, 5*n)
69 return out
70}
71
72type fastaBuffer []byte
73
74func (b *fastaBuffer) Flush() {
75 panic("flush")
76}
77
78func (b *fastaBuffer) WriteString(s string) {
79 p := b.NextWrite(len(s))
80 copy(p, s)
81}
82
83func (b *fastaBuffer) NextWrite(n int) []byte {
84 p := *b
85 if len(p)+n > cap(p) {
86 b.Flush()
87 p = *b
88 }
89 out := p[len(p) : len(p)+n]
90 *b = p[:len(p)+n]
91 return out
92}
93
94const fastaLine = 60
95
96func fastaRepeat(out *fastaBuffer, alu []byte, n int) {
97 buf := append(alu, alu...)
98 off := 0
99 for n > 0 {
100 m := n
101 if m > fastaLine {
102 m = fastaLine
103 }
104 buf1 := out.NextWrite(m + 1)
105 copy(buf1, buf[off:])
106 buf1[m] = '\n'
107 if off += m; off >= len(alu) {
108 off -= len(alu)
109 }
110 n -= m
111 }
112}
113
114const (
115 fastaLookupSize = 4096
116 fastaLookupScale float64 = fastaLookupSize - 1
117)
118
119var fastaRand uint32 = 42
120
121type fastaAcid struct {
122 sym byte
123 prob float64
124 cprob float64
125 next *fastaAcid
126}
127
128func fastaComputeLookup(acid []fastaAcid) *[fastaLookupSize]*fastaAcid {
129 var lookup [fastaLookupSize]*fastaAcid
130 var p float64
131 for i := range acid {
132 p += acid[i].prob
133 acid[i].cprob = p * fastaLookupScale
134 if i > 0 {
135 acid[i-1].next = &acid[i]
136 }
137 }
138 acid[len(acid)-1].cprob = 1.0 * fastaLookupScale
139
140 j := 0
141 for i := range lookup {
142 for acid[j].cprob < float64(i) {
143 j++
144 }
145 lookup[i] = &acid[j]
146 }
147
148 return &lookup
149}
150
151func fastaRandom(out *fastaBuffer, acid []fastaAcid, n int) {
152 const (
153 IM = 139968
154 IA = 3877
155 IC = 29573
156 )
157 lookup := fastaComputeLookup(acid)
158 for n > 0 {
159 m := n
160 if m > fastaLine {
161 m = fastaLine
162 }
163 buf := out.NextWrite(m + 1)
164 f := fastaLookupScale / IM
165 myrand := fastaRand
166 for i := 0; i < m; i++ {
167 myrand = (myrand*IA + IC) % IM
168 r := float64(int(myrand)) * f
169 a := lookup[int(r)]
170 for a.cprob < r {
171 a = a.next
172 }
173 buf[i] = a.sym
174 }
175 fastaRand = myrand
176 buf[m] = '\n'
177 n -= m
178 }
179}