blob: af4fbac274c125756988ff517b8d47e041159fe1 [file] [log] [blame]
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package go1
import "runtime"
// Not a benchmark; input for revcomp.
var fastabytes = makefasta()
func makefasta() []byte {
var n int = 25e6
if runtime.GOARCH == "arm" || runtime.GOARCH == "mips" || runtime.GOARCH == "mips64" {
// TODO(dfc) remove this limitation after precise gc.
// A value of 25e6 consumes 465mb of heap on 32bit
// platforms, which is too much for some systems.
// A value of 25e5 produces a memory layout that
// confuses the gc on 32bit platforms. So 25e4 it is.
n = 25e4
}
return fasta(n)
}
func fasta(n int) []byte {
out := make(fastaBuffer, 0, 11*n)
iub := []fastaAcid{
{prob: 0.27, sym: 'a'},
{prob: 0.12, sym: 'c'},
{prob: 0.12, sym: 'g'},
{prob: 0.27, sym: 't'},
{prob: 0.02, sym: 'B'},
{prob: 0.02, sym: 'D'},
{prob: 0.02, sym: 'H'},
{prob: 0.02, sym: 'K'},
{prob: 0.02, sym: 'M'},
{prob: 0.02, sym: 'N'},
{prob: 0.02, sym: 'R'},
{prob: 0.02, sym: 'S'},
{prob: 0.02, sym: 'V'},
{prob: 0.02, sym: 'W'},
{prob: 0.02, sym: 'Y'},
}
homosapiens := []fastaAcid{
{prob: 0.3029549426680, sym: 'a'},
{prob: 0.1979883004921, sym: 'c'},
{prob: 0.1975473066391, sym: 'g'},
{prob: 0.3015094502008, sym: 't'},
}
alu := []byte(
"GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGG" +
"GAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGA" +
"CCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAAT" +
"ACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCA" +
"GCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGG" +
"AGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCC" +
"AGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA")
out.WriteString(">ONE Homo sapiens alu\n")
fastaRepeat(&out, alu, 2*n)
out.WriteString(">TWO IUB ambiguity codes\n")
fastaRandom(&out, iub, 3*n)
out.WriteString(">THREE Homo sapiens frequency\n")
fastaRandom(&out, homosapiens, 5*n)
return out
}
type fastaBuffer []byte
func (b *fastaBuffer) Flush() {
panic("flush")
}
func (b *fastaBuffer) WriteString(s string) {
p := b.NextWrite(len(s))
copy(p, s)
}
func (b *fastaBuffer) NextWrite(n int) []byte {
p := *b
if len(p)+n > cap(p) {
b.Flush()
p = *b
}
out := p[len(p) : len(p)+n]
*b = p[:len(p)+n]
return out
}
const fastaLine = 60
func fastaRepeat(out *fastaBuffer, alu []byte, n int) {
buf := append(alu, alu...)
off := 0
for n > 0 {
m := n
if m > fastaLine {
m = fastaLine
}
buf1 := out.NextWrite(m + 1)
copy(buf1, buf[off:])
buf1[m] = '\n'
if off += m; off >= len(alu) {
off -= len(alu)
}
n -= m
}
}
const (
fastaLookupSize = 4096
fastaLookupScale float64 = fastaLookupSize - 1
)
var fastaRand uint32 = 42
type fastaAcid struct {
sym byte
prob float64
cprob float64
next *fastaAcid
}
func fastaComputeLookup(acid []fastaAcid) *[fastaLookupSize]*fastaAcid {
var lookup [fastaLookupSize]*fastaAcid
var p float64
for i := range acid {
p += acid[i].prob
acid[i].cprob = p * fastaLookupScale
if i > 0 {
acid[i-1].next = &acid[i]
}
}
acid[len(acid)-1].cprob = 1.0 * fastaLookupScale
j := 0
for i := range lookup {
for acid[j].cprob < float64(i) {
j++
}
lookup[i] = &acid[j]
}
return &lookup
}
func fastaRandom(out *fastaBuffer, acid []fastaAcid, n int) {
const (
IM = 139968
IA = 3877
IC = 29573
)
lookup := fastaComputeLookup(acid)
for n > 0 {
m := n
if m > fastaLine {
m = fastaLine
}
buf := out.NextWrite(m + 1)
f := fastaLookupScale / IM
myrand := fastaRand
for i := 0; i < m; i++ {
myrand = (myrand*IA + IC) % IM
r := float64(int(myrand)) * f
a := lookup[int(r)]
for a.cprob < r {
a = a.next
}
buf[i] = a.sym
}
fastaRand = myrand
buf[m] = '\n'
n -= m
}
}