|  | // Copyright 2011 The Go Authors.  All rights reserved. | 
|  | // Use of this source code is governed by a BSD-style | 
|  | // license that can be found in the LICENSE file. | 
|  |  | 
|  | package go1 | 
|  |  | 
|  | import "runtime" | 
|  |  | 
|  | // Not a benchmark; input for revcomp. | 
|  |  | 
|  | var fastabytes = makefasta() | 
|  |  | 
|  | func makefasta() []byte { | 
|  | var n int = 25e6 | 
|  | if runtime.GOARCH == "arm" { | 
|  | // TODO(dfc) remove this limitation after precise gc. | 
|  | // A value of 25e6 consumes 465mb of heap on 32bit | 
|  | // platforms, which is too much for most ARM systems. | 
|  | // A value of 25e5 produces a memory layout that | 
|  | // confuses the gc on 32bit platforms. So 25e4 it is. | 
|  | n = 25e4 | 
|  | } | 
|  | return fasta(n) | 
|  | } | 
|  |  | 
|  | func fasta(n int) []byte { | 
|  | out := make(fastaBuffer, 0, 11*n) | 
|  |  | 
|  | iub := []fastaAcid{ | 
|  | {prob: 0.27, sym: 'a'}, | 
|  | {prob: 0.12, sym: 'c'}, | 
|  | {prob: 0.12, sym: 'g'}, | 
|  | {prob: 0.27, sym: 't'}, | 
|  | {prob: 0.02, sym: 'B'}, | 
|  | {prob: 0.02, sym: 'D'}, | 
|  | {prob: 0.02, sym: 'H'}, | 
|  | {prob: 0.02, sym: 'K'}, | 
|  | {prob: 0.02, sym: 'M'}, | 
|  | {prob: 0.02, sym: 'N'}, | 
|  | {prob: 0.02, sym: 'R'}, | 
|  | {prob: 0.02, sym: 'S'}, | 
|  | {prob: 0.02, sym: 'V'}, | 
|  | {prob: 0.02, sym: 'W'}, | 
|  | {prob: 0.02, sym: 'Y'}, | 
|  | } | 
|  |  | 
|  | homosapiens := []fastaAcid{ | 
|  | {prob: 0.3029549426680, sym: 'a'}, | 
|  | {prob: 0.1979883004921, sym: 'c'}, | 
|  | {prob: 0.1975473066391, sym: 'g'}, | 
|  | {prob: 0.3015094502008, sym: 't'}, | 
|  | } | 
|  |  | 
|  | alu := []byte( | 
|  | "GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGG" + | 
|  | "GAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGA" + | 
|  | "CCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAAT" + | 
|  | "ACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCA" + | 
|  | "GCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGG" + | 
|  | "AGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCC" + | 
|  | "AGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA") | 
|  |  | 
|  | out.WriteString(">ONE Homo sapiens alu\n") | 
|  | fastaRepeat(&out, alu, 2*n) | 
|  | out.WriteString(">TWO IUB ambiguity codes\n") | 
|  | fastaRandom(&out, iub, 3*n) | 
|  | out.WriteString(">THREE Homo sapiens frequency\n") | 
|  | fastaRandom(&out, homosapiens, 5*n) | 
|  | return out | 
|  | } | 
|  |  | 
|  | type fastaBuffer []byte | 
|  |  | 
|  | func (b *fastaBuffer) Flush() { | 
|  | panic("flush") | 
|  | } | 
|  |  | 
|  | func (b *fastaBuffer) WriteString(s string) { | 
|  | p := b.NextWrite(len(s)) | 
|  | copy(p, s) | 
|  | } | 
|  |  | 
|  | func (b *fastaBuffer) NextWrite(n int) []byte { | 
|  | p := *b | 
|  | if len(p)+n > cap(p) { | 
|  | b.Flush() | 
|  | p = *b | 
|  | } | 
|  | out := p[len(p) : len(p)+n] | 
|  | *b = p[:len(p)+n] | 
|  | return out | 
|  | } | 
|  |  | 
|  | const fastaLine = 60 | 
|  |  | 
|  | func fastaRepeat(out *fastaBuffer, alu []byte, n int) { | 
|  | buf := append(alu, alu...) | 
|  | off := 0 | 
|  | for n > 0 { | 
|  | m := n | 
|  | if m > fastaLine { | 
|  | m = fastaLine | 
|  | } | 
|  | buf1 := out.NextWrite(m + 1) | 
|  | copy(buf1, buf[off:]) | 
|  | buf1[m] = '\n' | 
|  | if off += m; off >= len(alu) { | 
|  | off -= len(alu) | 
|  | } | 
|  | n -= m | 
|  | } | 
|  | } | 
|  |  | 
|  | const ( | 
|  | fastaLookupSize          = 4096 | 
|  | fastaLookupScale float64 = fastaLookupSize - 1 | 
|  | ) | 
|  |  | 
|  | var fastaRand uint32 = 42 | 
|  |  | 
|  | type fastaAcid struct { | 
|  | sym   byte | 
|  | prob  float64 | 
|  | cprob float64 | 
|  | next  *fastaAcid | 
|  | } | 
|  |  | 
|  | func fastaComputeLookup(acid []fastaAcid) *[fastaLookupSize]*fastaAcid { | 
|  | var lookup [fastaLookupSize]*fastaAcid | 
|  | var p float64 | 
|  | for i := range acid { | 
|  | p += acid[i].prob | 
|  | acid[i].cprob = p * fastaLookupScale | 
|  | if i > 0 { | 
|  | acid[i-1].next = &acid[i] | 
|  | } | 
|  | } | 
|  | acid[len(acid)-1].cprob = 1.0 * fastaLookupScale | 
|  |  | 
|  | j := 0 | 
|  | for i := range lookup { | 
|  | for acid[j].cprob < float64(i) { | 
|  | j++ | 
|  | } | 
|  | lookup[i] = &acid[j] | 
|  | } | 
|  |  | 
|  | return &lookup | 
|  | } | 
|  |  | 
|  | func fastaRandom(out *fastaBuffer, acid []fastaAcid, n int) { | 
|  | const ( | 
|  | IM = 139968 | 
|  | IA = 3877 | 
|  | IC = 29573 | 
|  | ) | 
|  | lookup := fastaComputeLookup(acid) | 
|  | for n > 0 { | 
|  | m := n | 
|  | if m > fastaLine { | 
|  | m = fastaLine | 
|  | } | 
|  | buf := out.NextWrite(m + 1) | 
|  | f := fastaLookupScale / IM | 
|  | myrand := fastaRand | 
|  | for i := 0; i < m; i++ { | 
|  | myrand = (myrand*IA + IC) % IM | 
|  | r := float64(int(myrand)) * f | 
|  | a := lookup[int(r)] | 
|  | for a.cprob < r { | 
|  | a = a.next | 
|  | } | 
|  | buf[i] = a.sym | 
|  | } | 
|  | fastaRand = myrand | 
|  | buf[m] = '\n' | 
|  | n -= m | 
|  | } | 
|  | } |