blob: 64c1c5205817725cfbf4b5acd1c28224458bb4de [file] [log] [blame]
Rob Pike9155bb32009-08-03 21:03:58 -07001/*
2Redistribution and use in source and binary forms, with or without
3modification, are permitted provided that the following conditions are met:
4
5 * Redistributions of source code must retain the above copyright
6 notice, this list of conditions and the following disclaimer.
7
8 * Redistributions in binary form must reproduce the above copyright
9 notice, this list of conditions and the following disclaimer in the
10 documentation and/or other materials provided with the distribution.
11
12 * Neither the name of "The Computer Language Benchmarks Game" nor the
13 name of "The Computer Language Shootout Benchmarks" nor the names of
14 its contributors may be used to endorse or promote products derived
15 from this software without specific prior written permission.
16
17THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27POSSIBILITY OF SUCH DAMAGE.
28*/
29
30/*
Russ Coxf8f83e82010-05-03 17:47:59 -070031 * http://shootout.alioth.debian.org/u32/program.php?test=fasta&lang=gcc&id=3
32 */
33
34/* The Computer Language Benchmarks Game
35 * http://shootout.alioth.debian.org/
36 *
37 * contributed by Petr Prokhorenkov
Rob Pike9155bb32009-08-03 21:03:58 -070038 */
39
40#include <stdio.h>
41#include <stdlib.h>
42#include <string.h>
Rob Pike9155bb32009-08-03 21:03:58 -070043
Rob Pikef57f8b62010-10-20 17:43:57 -070044#ifndef fwrite_unlocked
Russ Coxf8f83e82010-05-03 17:47:59 -070045// not available on OS X
46#define fwrite_unlocked fwrite
47#define fputc_unlocked fputc
48#define fputs_unlocked fputs
Rob Pikef57f8b62010-10-20 17:43:57 -070049#endif
Rob Pike9155bb32009-08-03 21:03:58 -070050
Russ Coxf8f83e82010-05-03 17:47:59 -070051#define ARRAY_SIZE(a) (sizeof(a)/sizeof(a[0]))
52#define unlikely(x) __builtin_expect((x), 0)
Rob Pike9155bb32009-08-03 21:03:58 -070053
Russ Coxf8f83e82010-05-03 17:47:59 -070054#define IM 139968
55#define IA 3877
56#define IC 29573
57
58#define LINE_LEN 60
59#define LOOKUP_SIZE 4096
60#define LOOKUP_SCALE ((float)(LOOKUP_SIZE - 1))
61
62typedef unsigned random_t;
63
64void
65random_init(random_t *random) {
66 *random = 42;
Rob Pike9155bb32009-08-03 21:03:58 -070067}
68
Russ Coxf8f83e82010-05-03 17:47:59 -070069// Special version with result rescaled to LOOKUP_SCALE.
70static inline
71float
72random_next_lookup(random_t *random) {
73 *random = (*random*IA + IC)%IM;
74
75 return (*random)*(LOOKUP_SCALE/IM);
76}
77
78struct amino_acid {
79 char sym;
80 float prob;
81 float cprob_lookup;
82};
83
84void
85repeat(const char *alu, const char *title, int n) {
86 int len = strlen(alu);
87 char buffer[len + LINE_LEN];
88 int pos = 0;
89
90 memcpy(buffer, alu, len);
91 memcpy(buffer + len, alu, LINE_LEN);
92
93 fputs_unlocked(title, stdout);
94 while (n > 0) {
95 int bytes = n > LINE_LEN ? LINE_LEN : n;
96
97 fwrite_unlocked(buffer + pos, bytes, 1, stdout);
98 pos += bytes;
99 if (pos > len) {
100 pos -= len;
101 }
102 fputc_unlocked('\n', stdout);
103 n -= bytes;
Rob Pike9155bb32009-08-03 21:03:58 -0700104 }
105}
106
Russ Coxf8f83e82010-05-03 17:47:59 -0700107/*
108 * Lookup table contains mapping from real values to cumulative
109 * probabilities. Careful selection of table size allows lookup
110 * virtually in constant time.
111 *
112 * All cumulative probabilities are rescaled to LOOKUP_SCALE,
113 * this allows to save one multiplication operation on each iteration
114 * in randomize().
115 */
Rob Pike9155bb32009-08-03 21:03:58 -0700116
Russ Coxf8f83e82010-05-03 17:47:59 -0700117void *
118fill_lookup(struct amino_acid **lookup, struct amino_acid *amino_acid, int amino_acid_size) {
119 float p = 0;
120 int i, j;
Rob Pike9155bb32009-08-03 21:03:58 -0700121
Russ Coxf8f83e82010-05-03 17:47:59 -0700122 for (i = 0; i < amino_acid_size; i++) {
123 p += amino_acid[i].prob;
124 amino_acid[i].cprob_lookup = p*LOOKUP_SCALE;
125 }
Rob Pike9155bb32009-08-03 21:03:58 -0700126
Russ Coxf8f83e82010-05-03 17:47:59 -0700127 // Prevent rounding error.
128 amino_acid[amino_acid_size - 1].cprob_lookup = LOOKUP_SIZE - 1;
Rob Pike9155bb32009-08-03 21:03:58 -0700129
Russ Coxf8f83e82010-05-03 17:47:59 -0700130 for (i = 0, j = 0; i < LOOKUP_SIZE; i++) {
131 while (amino_acid[j].cprob_lookup < i) {
132 j++;
133 }
134 lookup[i] = &amino_acid[j];
135 }
Rob Pike9155bb32009-08-03 21:03:58 -0700136
Rob Pike9155bb32009-08-03 21:03:58 -0700137 return 0;
138}
Russ Coxf8f83e82010-05-03 17:47:59 -0700139
140void
141randomize(struct amino_acid *amino_acid, int amino_acid_size,
142 const char *title, int n, random_t *rand) {
143 struct amino_acid *lookup[LOOKUP_SIZE];
144 char line_buffer[LINE_LEN + 1];
145 int i, j;
146
147 line_buffer[LINE_LEN] = '\n';
148
149 fill_lookup(lookup, amino_acid, amino_acid_size);
150
151 fputs_unlocked(title, stdout);
152
153 for (i = 0, j = 0; i < n; i++, j++) {
154 if (j == LINE_LEN) {
155 fwrite_unlocked(line_buffer, LINE_LEN + 1, 1, stdout);
156 j = 0;
157 }
158
159 float r = random_next_lookup(rand);
160 struct amino_acid *u = lookup[(short)r];
161 while (unlikely(u->cprob_lookup < r)) {
162 ++u;
163 }
164 line_buffer[j] = u->sym;
165 }
166 line_buffer[j] = '\n';
167 fwrite_unlocked(line_buffer, j + 1, 1, stdout);
168}
169
170struct amino_acid amino_acid[] = {
171 { 'a', 0.27 },
172 { 'c', 0.12 },
173 { 'g', 0.12 },
174 { 't', 0.27 },
175
176 { 'B', 0.02 },
177 { 'D', 0.02 },
178 { 'H', 0.02 },
179 { 'K', 0.02 },
180 { 'M', 0.02 },
181 { 'N', 0.02 },
182 { 'R', 0.02 },
183 { 'S', 0.02 },
184 { 'V', 0.02 },
185 { 'W', 0.02 },
186 { 'Y', 0.02 },
187};
188
189struct amino_acid homo_sapiens[] = {
190 { 'a', 0.3029549426680 },
191 { 'c', 0.1979883004921 },
192 { 'g', 0.1975473066391 },
193 { 't', 0.3015094502008 },
194};
195
196static const char alu[] =
197 "GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTG"
198 "GGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGA"
199 "GACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAA"
200 "AATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAAT"
201 "CCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAAC"
202 "CCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTG"
203 "CACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA";
204
205int
206main(int argc, const char **argv) {
207 int n = argc > 1 ? atoi( argv[1] ) : 512;
208 random_t rand;
209
210 random_init(&rand);
211
212 repeat(alu, ">ONE Homo sapiens alu\n", n*2);
213 randomize(amino_acid, ARRAY_SIZE(amino_acid),
214 ">TWO IUB ambiguity codes\n", n*3, &rand);
215 randomize(homo_sapiens, ARRAY_SIZE(homo_sapiens),
216 ">THREE Homo sapiens frequency\n", n*5, &rand);
217
218 return 0;
Rob Pikef57f8b62010-10-20 17:43:57 -0700219}