libStatGen Software 1
ReferenceSequenceTest.cpp
1/*
2 * Copyright (C) 2011 Regents of the University of Michigan
3 *
4 * This program is free software: you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation, either version 3 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <http://www.gnu.org/licenses/>.
16 */
17
18#include <getopt.h>
19#include "Generic.h"
20#include <stdio.h>
21#include "ReferenceSequence.h"
22#include "UnitTest.h"
23
24#include <assert.h>
25#include <sstream>
26
27
29{
30public:
31 ReferenceSequenceTest(const char *title) : UnitTest(title) {;}
32 void test1();
33 void test2();
34 void test3();
35 void humanGenomeTest1();
36
37 void test() {
38 test1();
39 test2();
40 test3();
41 // This test is very slow:
42 // humanGenomeTest1();
43 }
44};
45
46void ReferenceSequenceTest::test1(void)
47{
48 std::string sequence("ACTGACTGACTGACTGACTGACTGACTGACTGACTGACTG");
49 std::string word;
50
51 word="ACTG";
52 check(m_failures, ++m_testNum, "Test wordMatch with std::string", true,
53 Sequence::wordMatch(sequence, 4, word));
54
55 std::stringstream output;
56
57 Sequence::printNearbyWords(output, sequence, 8, word, 4);
58
59 std::string expect("\
60word 'ACTG' found -4 away from position 8.\n\
61word 'ACTG' found 0 away from position 8.\n\
62");
63
64 check(m_failures, ++m_testNum, "Test printNearbyWords with std::string", expect, output.str());
65
66
67 Sequence::getString(sequence, 4, 4, word);
68
69 check(m_failures, ++m_testNum, "Test getString with std::string", "ACTG", word);
70
71 Sequence::getHighLightedString(sequence, 0, 12, word, 4, 8);
72 check(m_failures, ++m_testNum, "Test getHighLightedStribng with std::string", "ACTGactgACTG",word);
73
74#if 0
75 // busted test - don't know why
76 output.clear();
77 output.str(std::string());
78// Sequence::printBaseContext(std::cout, sequence, 8, 4);
79 Sequence::printBaseContext(output, sequence, 8, 4);
80 expect="\
81index: 8\n\
82ACTGACTGA\n\
83 ^\n\
84";
85 check(m_failures, ++m_testNum, "Test printBaseContext with std::string", expect, output.str());
86#endif
87 std::string result;
88 std::string read("ACTGZZZZACTG");
89 expect = " ^^^^ ";
90 Sequence::getMismatchHatString(sequence, 4, result, read);
91 check(m_failures, ++m_testNum, "Test getMismatchHatString with std::string", expect, result);
92
93
94 read="ACTG";
95 std::string quality("");
96 size_t location = Sequence::simpleLocalAligner(sequence, 0, read, quality, 12);
97 check(m_failures, ++m_testNum, "Test simpleLocalAligner with std::string", (size_t) 0, location);
98
99 read="ACNG";
100 int misMatches = Sequence::getMismatchCount(sequence, 0, read);
101 check(m_failures, ++m_testNum, "Test getMismatchCount with std::string", 1, misMatches);
102
103 read="ACNG";
104 quality="$$$$";
105 int sumQ = Sequence::getSumQ(sequence, 0, read, quality);
106 check(m_failures, ++m_testNum, "Test getSumQ with std::string", 3, sumQ);
107}
108
109void ReferenceSequenceTest::test2(void)
110{
111 PackedSequenceData sequence;
112 std::string word;
113
114 sequence.push_back('A');
115 sequence.push_back('C');
116 sequence.push_back('T');
117 sequence.push_back('G');
118
119 sequence.push_back('A');
120 sequence.push_back('C');
121 sequence.push_back('T');
122 sequence.push_back('G');
123
124 sequence.push_back('A');
125 sequence.push_back('C');
126 sequence.push_back('T');
127 sequence.push_back('G');
128
129 sequence.push_back('A');
130 sequence.push_back('C');
131 sequence.push_back('T');
132 sequence.push_back('G');
133
134 Sequence::getString(sequence, 4, 4, word);
135
136 check(m_failures, ++m_testNum, "Test getString with PackedSequenceData", "ACTG", word);
137
138 std::cout << "test2 sequence utilization is " << sequence.getUtilization() * 100 << "% - expect around 6.25%" << std::endl;
139
140}
141
142void ReferenceSequenceTest::test3(void)
143{
144 std::vector<PackedSequenceData> chromosomeSequence;
145 std::vector<std::string> chromosomeNames;
146
147 bool result = loadFastaFile("../phiX.fa", chromosomeSequence, chromosomeNames);
148
149 if(result) {
150 std::cout << "../phiX.fa not found - skipping these tests." << std::endl;
151 return;
152 }
153
154 std::cout << "phiX reference utilization is " << chromosomeSequence[0].getUtilization() * 100 << "% - expect around 96.8%" << std::endl;
155
156
157
158 check(m_failures, ++m_testNum, "Test loadFastaFile with PackedSequenceData", (size_t) 1, chromosomeNames.size());
159 check(m_failures, ++m_testNum, "Test loadFastaFile with PackedSequenceData", (size_t) 1, chromosomeSequence.size());
160 check(m_failures, ++m_testNum, "Test loadFastaFile with PackedSequenceData", "1", chromosomeNames[0]);
161
162 std::string word;
163
164 Sequence::getString(chromosomeSequence[0], 60, 10, word);
165
166 check(m_failures, ++m_testNum, "Test loadFastaFile with PackedSequenceData", "AAATTATCTT", word);
167
168}
169
170void ReferenceSequenceTest::humanGenomeTest1(void)
171{
172 std::vector<PackedSequenceData> chromosomeSequence;
173 std::vector<std::string> chromosomeNames;
174
175#define HUMAN_GENOME "/data/local/ref/karma.ref/human.g1k.v37.fa"
176 bool result = loadFastaFile(HUMAN_GENOME, chromosomeSequence, chromosomeNames);
177
178 if(result) {
179 std::cout << HUMAN_GENOME << " not found - skipping these tests." << std::endl;
180 return;
181 }
182
183}
184
185int main(int argc, char **argv)
186{
187 ReferenceSequenceTest test("ReferenceSequenceTest");
188
189 test.test();
190
191 std::cout << test;
192
193 exit(test.getFailureCount());
194}