libStatGen Software 1
SamRecord.cpp
1/*
2 * Copyright (C) 2010-2012 Regents of the University of Michigan
3 *
4 * This program is free software: you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation, either version 3 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <http://www.gnu.org/licenses/>.
16 */
17
18#include <stdlib.h>
19#include <limits>
20#include <stdexcept>
21
22#include "bam.h"
23
24#include "SamRecord.h"
25#include "SamValidation.h"
26
27#include "BaseUtilities.h"
28#include "SamQuerySeqWithRefHelper.h"
29
30const char* SamRecord::DEFAULT_READ_NAME = "UNKNOWN";
31const char* SamRecord::FIELD_ABSENT_STRING = "=";
32int SamRecord::myNumWarns = 0;
33
35 : myStatus(),
36 myRefPtr(NULL),
37 mySequenceTranslation(NONE)
38{
39 int32_t defaultAllocSize = DEFAULT_BLOCK_SIZE + sizeof(int32_t);
40
41 myRecordPtr =
42 (bamRecordStruct *) malloc(defaultAllocSize);
43
44 myCigarTempBuffer = NULL;
45 myCigarTempBufferAllocatedSize = 0;
46
47 allocatedSize = defaultAllocSize;
48
50}
51
52
54 : myStatus(errorHandlingType),
55 myRefPtr(NULL),
56 mySequenceTranslation(NONE)
57{
58 int32_t defaultAllocSize = DEFAULT_BLOCK_SIZE + sizeof(int32_t);
59
60 myRecordPtr =
61 (bamRecordStruct *) malloc(defaultAllocSize);
62
63 myCigarTempBuffer = NULL;
64 myCigarTempBufferAllocatedSize = 0;
65
66 allocatedSize = defaultAllocSize;
67
69}
70
71
73{
75
76 if(myRecordPtr != NULL)
77 {
78 free(myRecordPtr);
79 myRecordPtr = NULL;
80 }
81 if(myCigarTempBuffer != NULL)
82 {
83 free(myCigarTempBuffer);
84 myCigarTempBuffer = NULL;
85 myCigarTempBufferAllocatedSize = 0;
86 }
87}
88
89
90// Resets the fields of the record to a default value.
92{
93 myIsBufferSynced = true;
94
95 myRecordPtr->myBlockSize = DEFAULT_BLOCK_SIZE;
96 myRecordPtr->myReferenceID = -1;
97 myRecordPtr->myPosition = -1;
98 myRecordPtr->myReadNameLength = DEFAULT_READ_NAME_LENGTH;
99 myRecordPtr->myMapQuality = 0;
100 myRecordPtr->myBin = DEFAULT_BIN;
101 myRecordPtr->myCigarLength = 0;
102 myRecordPtr->myFlag = 0;
103 myRecordPtr->myReadLength = 0;
104 myRecordPtr->myMateReferenceID = -1;
105 myRecordPtr->myMatePosition = -1;
106 myRecordPtr->myInsertSize = 0;
107
108 // Set the sam values for the variable length fields.
109 // TODO - one way to speed this up might be to not set to "*" and just
110 // clear them, and write out a '*' for SAM if it is empty.
111 myReadName = DEFAULT_READ_NAME;
112 myReferenceName = "*";
113 myMateReferenceName = "*";
114 myCigar = "*";
115 mySequence = "*";
116 mySeqWithEq.clear();
117 mySeqWithoutEq.clear();
118 myQuality = "*";
119 myNeedToSetTagsFromBuffer = false;
120 myNeedToSetTagsInBuffer = false;
121
122 // Initialize the calculated alignment info to the uncalculated value.
123 myAlignmentLength = -1;
124 myUnclippedStartOffset = -1;
125 myUnclippedEndOffset = -1;
126
127 clearTags();
128
129 // Set the bam values for the variable length fields.
130 // Only the read name needs to be set, the others are a length of 0.
131 // Set the read name. The min size of myRecordPtr includes the size for
132 // the default read name.
133 memcpy(&(myRecordPtr->myData), myReadName.c_str(),
134 myRecordPtr->myReadNameLength);
135
136 // Set that the variable length buffer fields are valid.
137 myIsReadNameBufferValid = true;
138 myIsCigarBufferValid = true;
139 myPackedSequence =
140 (unsigned char *)myRecordPtr->myData + myRecordPtr->myReadNameLength +
141 myRecordPtr->myCigarLength * sizeof(int);
142 myIsSequenceBufferValid = true;
143 myBufferSequenceTranslation = NONE;
144
145 myPackedQuality = myPackedSequence;
146 myIsQualityBufferValid = true;
147 myIsTagsBufferValid = true;
148 myIsBinValid = true;
149
150 myCigarTempBufferLength = -1;
151
152 myStatus = SamStatus::SUCCESS;
153
154 NOT_FOUND_TAG_STRING = "";
155 NOT_FOUND_TAG_INT = -1; // TODO - deprecate
156}
157
158
159// Returns whether or not the record is valid.
160// Header is needed to perform some validation against it.
162{
163 myStatus = SamStatus::SUCCESS;
164 SamValidationErrors invalidSamErrors;
165 if(!SamValidator::isValid(header, *this, invalidSamErrors))
166 {
167 // The record is not valid.
168 std::string errorMessage = "";
169 invalidSamErrors.getErrorString(errorMessage);
170 myStatus.setStatus(SamStatus::INVALID, errorMessage.c_str());
171 return(false);
172 }
173 // The record is valid.
174 return(true);
175}
176
177
179{
180 myRefPtr = reference;
181}
182
183
184// Set the type of sequence translation to use when getting
185// the sequence. The default type (if this method is never called) is
186// NONE (the sequence is left as-is). This is used
188{
189 mySequenceTranslation = translation;
190}
191
192
193bool SamRecord::setReadName(const char* readName)
194{
195 myReadName = readName;
196 myIsBufferSynced = false;
197 myIsReadNameBufferValid = false;
198 myStatus = SamStatus::SUCCESS;
199
200 // The read name must at least have some length, otherwise this is a parsing
201 // error.
202 if(myReadName.Length() == 0)
203 {
204 // Invalid - reset ReadName return false.
205 myReadName = DEFAULT_READ_NAME;
206 myRecordPtr->myReadNameLength = DEFAULT_READ_NAME_LENGTH;
207 myStatus.setStatus(SamStatus::INVALID, "0 length Query Name.");
208 return(false);
209 }
210
211 return true;
212}
213
214
215bool SamRecord::setFlag(uint16_t flag)
216{
217 myStatus = SamStatus::SUCCESS;
218 myRecordPtr->myFlag = flag;
219 return true;
220}
221
222
224 const char* referenceName)
225{
226 myStatus = SamStatus::SUCCESS;
227
228 myReferenceName = referenceName;
229 // If the reference ID does not already exist, add it (pass true)
230 myRecordPtr->myReferenceID = header.getReferenceID(referenceName, true);
231
232 return true;
233}
234
235
236bool SamRecord::set1BasedPosition(int32_t position)
237{
238 return(set0BasedPosition(position - 1));
239}
240
241
242bool SamRecord::set0BasedPosition(int32_t position)
243{
244 myStatus = SamStatus::SUCCESS;
245 myRecordPtr->myPosition = position;
246 myIsBinValid = false;
247 return true;
248}
249
250
251bool SamRecord::setMapQuality(uint8_t mapQuality)
252{
253 myStatus = SamStatus::SUCCESS;
254 myRecordPtr->myMapQuality = mapQuality;
255 return true;
256}
257
258
259bool SamRecord::setCigar(const char* cigar)
260{
261 myStatus = SamStatus::SUCCESS;
262 myCigar = cigar;
263
264 myIsBufferSynced = false;
265 myIsCigarBufferValid = false;
266 myCigarTempBufferLength = -1;
267 myIsBinValid = false;
268
269 // Initialize the calculated alignment info to the uncalculated value.
270 myAlignmentLength = -1;
271 myUnclippedStartOffset = -1;
272 myUnclippedEndOffset = -1;
273
274 return true;
275}
276
277
278bool SamRecord::setCigar(const Cigar& cigar)
279{
280 myStatus = SamStatus::SUCCESS;
281 cigar.getCigarString(myCigar);
282
283 myIsBufferSynced = false;
284 myIsCigarBufferValid = false;
285 myCigarTempBufferLength = -1;
286 myIsBinValid = false;
287
288 // Initialize the calculated alignment info to the uncalculated value.
289 myAlignmentLength = -1;
290 myUnclippedStartOffset = -1;
291 myUnclippedEndOffset = -1;
292
293 return true;
294}
295
296
298 const char* mateReferenceName)
299{
300 myStatus = SamStatus::SUCCESS;
301 // Set the mate reference, if it is "=", set it to be equal
302 // to myReferenceName. This assumes that myReferenceName has already
303 // been called.
304 if(strcmp(mateReferenceName, FIELD_ABSENT_STRING) == 0)
305 {
306 myMateReferenceName = myReferenceName;
307 }
308 else
309 {
310 myMateReferenceName = mateReferenceName;
311 }
312
313 // Set the Mate Reference ID.
314 // If the reference ID does not already exist, add it (pass true)
315 myRecordPtr->myMateReferenceID =
316 header.getReferenceID(myMateReferenceName, true);
317
318 return true;
319}
320
321
322bool SamRecord::set1BasedMatePosition(int32_t matePosition)
323{
324 return(set0BasedMatePosition(matePosition - 1));
325}
326
327
328bool SamRecord::set0BasedMatePosition(int32_t matePosition)
329{
330 myStatus = SamStatus::SUCCESS;
331 myRecordPtr->myMatePosition = matePosition;
332 return true;
333}
334
335
336bool SamRecord::setInsertSize(int32_t insertSize)
337{
338 myStatus = SamStatus::SUCCESS;
339 myRecordPtr->myInsertSize = insertSize;
340 return true;
341}
342
343
344bool SamRecord::setSequence(const char* seq)
345{
346 myStatus = SamStatus::SUCCESS;
347 mySequence = seq;
348 mySeqWithEq.clear();
349 mySeqWithoutEq.clear();
350
351 myIsBufferSynced = false;
352 myIsSequenceBufferValid = false;
353 return true;
354}
355
356
357bool SamRecord::setQuality(const char* quality)
358{
359 myStatus = SamStatus::SUCCESS;
360 myQuality = quality;
361 myIsBufferSynced = false;
362 myIsQualityBufferValid = false;
363 return true;
364}
365
366
367//Shift indels to the left
369{
370 // Check to see whether or not the Cigar has already been
371 // set - this is determined by checking if alignment length
372 // is set since alignment length and the cigar are set
373 // at the same time.
374 if(myAlignmentLength == -1)
375 {
376 // Not been set, so calculate it.
377 parseCigar();
378 }
379
380 // Track whether or not there was a shift.
381 bool shifted = false;
382
383 // Cigar is set, so now myCigarRoller can be used.
384 // Track where in the read we are.
385 uint32_t currentPos = 0;
386
387 // Since the loop starts at 1 because the first operation can't be shifted,
388 // increment the currentPos past the first operation.
389 if(Cigar::foundInQuery(myCigarRoller[0]))
390 {
391 // This op was found in the read, increment the current position.
392 currentPos += myCigarRoller[0].count;
393 }
394
395 int numOps = myCigarRoller.size();
396
397 // Loop through the cigar operations from the 2nd operation since
398 // the first operation is already on the end and can't shift.
399 for(int currentOp = 1; currentOp < numOps; currentOp++)
400 {
401 if(myCigarRoller[currentOp].operation == Cigar::insert)
402 {
403 // For now, only shift a max of 1 operation.
404 int prevOpIndex = currentOp-1;
405 // Track the next op for seeing if it is the same as the
406 // previous for merging reasons.
407 int nextOpIndex = currentOp+1;
408 if(nextOpIndex == numOps)
409 {
410 // There is no next op, so set it equal to the current one.
411 nextOpIndex = currentOp;
412 }
413 // The start of the previous operation, so we know when we hit it
414 // so we don't shift past it.
415 uint32_t prevOpStart =
416 currentPos - myCigarRoller[prevOpIndex].count;
417
418 // We can only shift if the previous operation
419 if(!Cigar::isMatchOrMismatch(myCigarRoller[prevOpIndex]))
420 {
421 // TODO - shift past pads
422 // An insert is in the read, so increment the position.
423 currentPos += myCigarRoller[currentOp].count;
424 // Not a match/mismatch, so can't shift into it.
425 continue;
426 }
427
428 // It is a match or mismatch, so check to see if we can
429 // shift into it.
430
431 // The end of the insert is calculated by adding the size
432 // of this insert minus 1 to the start of the insert.
433 uint32_t insertEndPos =
434 currentPos + myCigarRoller[currentOp].count - 1;
435
436 // The insert starts at the current position.
437 uint32_t insertStartPos = currentPos;
438
439 // Loop as long as the position before the insert start
440 // matches the last character in the insert. If they match,
441 // the insert can be shifted one index left because the
442 // implied reference will not change. If they do not match,
443 // we can't shift because the implied reference would change.
444 // Stop loop when insertStartPos = prevOpStart, because we
445 // don't want to move past that.
446 while((insertStartPos > prevOpStart) &&
447 (getSequence(insertEndPos,BASES) ==
448 getSequence(insertStartPos - 1, BASES)))
449 {
450 // We can shift, so move the insert start & end one left.
451 --insertEndPos;
452 --insertStartPos;
453 }
454
455 // Determine if a shift has occurred.
456 int shiftLen = currentPos - insertStartPos;
457 if(shiftLen > 0)
458 {
459 // Shift occured, so adjust the cigar if the cigar will
460 // not become more operations.
461 // If the next operation is the same as the previous or
462 // if the insert and the previous operation switch positions
463 // then the cigar has the same number of operations.
464 // If the next operation is different, and the shift splits
465 // the previous operation in 2, then the cigar would
466 // become longer, so we do not want to shift.
467 if(myCigarRoller[nextOpIndex].operation ==
468 myCigarRoller[prevOpIndex].operation)
469 {
470 // The operations are the same, so merge them by adding
471 // the length of the shift to the next operation.
472 myCigarRoller.IncrementCount(nextOpIndex, shiftLen);
473 myCigarRoller.IncrementCount(prevOpIndex, -shiftLen);
474
475 // If the previous op length is 0, just remove that
476 // operation.
477 if(myCigarRoller[prevOpIndex].count == 0)
478 {
479 myCigarRoller.Remove(prevOpIndex);
480 }
481 shifted = true;
482 }
483 else
484 {
485 // Can only shift if the insert shifts past the
486 // entire previous operation, otherwise an operation
487 // would need to be added.
488 if(insertStartPos == prevOpStart)
489 {
490 // Swap the positions of the insert and the
491 // previous operation.
492 myCigarRoller.Update(currentOp,
493 myCigarRoller[prevOpIndex].operation,
494 myCigarRoller[prevOpIndex].count);
495 // Size of the previous op is the entire
496 // shift length.
497 myCigarRoller.Update(prevOpIndex,
499 shiftLen);
500 shifted = true;
501 }
502 }
503 }
504 // An insert is in the read, so increment the position.
505 currentPos += myCigarRoller[currentOp].count;
506 }
507 else if(Cigar::foundInQuery(myCigarRoller[currentOp]))
508 {
509 // This op was found in the read, increment the current position.
510 currentPos += myCigarRoller[currentOp].count;
511 }
512 }
513 if(shifted)
514 {
515 // TODO - setCigar is currently inefficient because later the cigar
516 // roller will be recalculated, but for now it will work.
517 setCigar(myCigarRoller);
518 }
519 return(shifted);
520}
521
522
523// Set the BAM record from the passeed in buffer of the specified size.
524// Note: The size includes the block size.
526 uint32_t fromBufferSize,
527 SamFileHeader& header)
528{
529 myStatus = SamStatus::SUCCESS;
530 if((fromBuffer == NULL) || (fromBufferSize == 0))
531 {
532 // Buffer is empty.
534 "Cannot parse an empty file.");
535 return(SamStatus::FAIL_PARSE);
536 }
537
538 // Clear the record.
539 resetRecord();
540
541 // allocate space for the record size.
542 if(!allocateRecordStructure(fromBufferSize))
543 {
544 // Failed to allocate space.
545 return(SamStatus::FAIL_MEM);
546 }
547
548 memcpy(myRecordPtr, fromBuffer, fromBufferSize);
549
550 setVariablesForNewBuffer(header);
551
552 // Return the status of the record.
553 return(SamStatus::SUCCESS);
554}
555
556
557// Read the BAM record from a file.
559 SamFileHeader& header)
560{
561 myStatus = SamStatus::SUCCESS;
562 if((filePtr == NULL) || (filePtr->isOpen() == false))
563 {
564 // File is not open, return failure.
566 "Can't read from an unopened file.");
567 return(SamStatus::FAIL_ORDER);
568 }
569
570 // Clear the record.
571 resetRecord();
572
573 // read the record size.
574 int numBytes =
575 ifread(filePtr, &(myRecordPtr->myBlockSize), sizeof(int32_t));
576
577 // Check to see if the end of the file was hit and no bytes were read.
578 if(ifeof(filePtr) && (numBytes == 0))
579 {
580 // End of file, nothing was read, no more records.
581 std::string statusMsg = "No more records left to read, ";
582 statusMsg += filePtr->getFileName();
583 statusMsg += ".";
585 statusMsg.c_str());
587 }
588
589 if(numBytes != sizeof(int32_t))
590 {
591 // Failed to read the entire block size. Either the end of the file
592 // was reached early or there was an error.
593 if(ifeof(filePtr))
594 {
595 // Error: end of the file reached prior to reading the rest of the
596 // record.
597 std::string statusMsg = "EOF reached in the middle of a record, ";
598 statusMsg += filePtr->getFileName();
599 statusMsg += ".";
601 statusMsg.c_str());
602 return(SamStatus::FAIL_PARSE);
603 }
604 else
605 {
606 // Error reading.
607 std::string statusMsg = "Failed to read the record size, ";
608 statusMsg += filePtr->getFileName();
609 statusMsg += ".";
611 statusMsg.c_str());
612 return(SamStatus::FAIL_IO);
613 }
614 }
615
616 // allocate space for the record size.
617 if(!allocateRecordStructure(myRecordPtr->myBlockSize + sizeof(int32_t)))
618 {
619 // Failed to allocate space.
620 // Status is set by allocateRecordStructure.
621 return(SamStatus::FAIL_MEM);
622 }
623
624 // Read the rest of the alignment block, starting at the reference id.
625 if(ifread(filePtr, &(myRecordPtr->myReferenceID), myRecordPtr->myBlockSize)
626 != (unsigned int)myRecordPtr->myBlockSize)
627 {
628 // Error reading the record. Reset it and return failure.
629 resetRecord();
630 std::string statusMsg = "Failed to read the record, ";
631 statusMsg += filePtr->getFileName();
632 statusMsg += ".";
634 statusMsg.c_str());
635 return(SamStatus::FAIL_IO);
636 }
637
638 setVariablesForNewBuffer(header);
639
640 // Return the status of the record.
641 return(SamStatus::SUCCESS);
642}
643
644
645// Add the specified tag to the record.
646// Returns true if the tag was successfully added, false otherwise.
647bool SamRecord::addIntTag(const char* tag, int32_t value)
648{
649 myStatus = SamStatus::SUCCESS;
650 int key = 0;
651 int index = 0;
652 char bamvtype;
653
654 int tagBufferSize = 0;
655
656 // First check to see if the tags need to be synced to the buffer.
657 if(myNeedToSetTagsFromBuffer)
658 {
659 if(!setTagsFromBuffer())
660 {
661 // Failed to read tags from the buffer, so cannot add new ones.
662 return(false);
663 }
664 }
665
666 // Ints come in as int. But it can be represented in fewer bits.
667 // So determine a more specific type that is in line with the
668 // types for BAM files.
669 // First check to see if it is a negative.
670 if(value < 0)
671 {
672 // The int is negative, so it will need to use a signed type.
673 // See if it is greater than the min value for a char.
674 if(value > ((std::numeric_limits<char>::min)()))
675 {
676 // It can be stored in a signed char.
677 bamvtype = 'c';
678 tagBufferSize += 4;
679 }
680 else if(value > ((std::numeric_limits<short>::min)()))
681 {
682 // It fits in a signed short.
683 bamvtype = 's';
684 tagBufferSize += 5;
685 }
686 else
687 {
688 // Just store it as a signed int.
689 bamvtype = 'i';
690 tagBufferSize += 7;
691 }
692 }
693 else
694 {
695 // It is positive, so an unsigned type can be used.
696 if(value < ((std::numeric_limits<unsigned char>::max)()))
697 {
698 // It is under the max of an unsigned char.
699 bamvtype = 'C';
700 tagBufferSize += 4;
701 }
702 else if(value < ((std::numeric_limits<unsigned short>::max)()))
703 {
704 // It is under the max of an unsigned short.
705 bamvtype = 'S';
706 tagBufferSize += 5;
707 }
708 else
709 {
710 // Just store it as an unsigned int.
711 bamvtype = 'I';
712 tagBufferSize += 7;
713 }
714 }
715
716 // Check to see if the tag is already there.
717 key = MAKEKEY(tag[0], tag[1], bamvtype);
718 unsigned int hashIndex = extras.Find(key);
719 if(hashIndex != LH_NOTFOUND)
720 {
721 // Tag was already found.
722 index = extras[hashIndex];
723
724 // Since the tagBufferSize was already updated with the new value,
725 // subtract the size for the previous tag (even if they are the same).
726 switch(intType[index])
727 {
728 case 'c':
729 case 'C':
730 case 'A':
731 tagBufferSize -= 4;
732 break;
733 case 's':
734 case 'S':
735 tagBufferSize -= 5;
736 break;
737 case 'i':
738 case 'I':
739 tagBufferSize -= 7;
740 break;
741 default:
743 "unknown tag inttype type found.\n");
744 return(false);
745 }
746
747 // Tag already existed, print message about overwriting.
748 // WARN about dropping duplicate tags.
749 if(myNumWarns++ < myMaxWarns)
750 {
751 String newVal;
752 String origVal;
753 appendIntArrayValue(index, origVal);
754 appendIntArrayValue(bamvtype, value, newVal);
755 fprintf(stderr, "WARNING: Duplicate Tags, overwritting %c%c:%c:%s with %c%c:%c:%s\n",
756 tag[0], tag[1], intType[index], origVal.c_str(), tag[0], tag[1], bamvtype, newVal.c_str());
757 if(myNumWarns == myMaxWarns)
758 {
759 fprintf(stderr, "Suppressing rest of Duplicate Tag warnings.\n");
760 }
761 }
762
763 // Update the integer value and type.
764 integers[index] = value;
765 intType[index] = bamvtype;
766 }
767 else
768 {
769 // Tag is not already there, so add it.
770 index = integers.Length();
771
772 integers.Push(value);
773 intType.push_back(bamvtype);
774
775 extras.Add(key, index);
776 }
777
778 // The buffer tags are now out of sync.
779 myNeedToSetTagsInBuffer = true;
780 myIsTagsBufferValid = false;
781 myIsBufferSynced = false;
782 myTagBufferSize += tagBufferSize;
783
784 return(true);
785}
786
787
788// Add the specified tag to the record, replacing it if it is already there and
789// is different from the previous value.
790// Returns true if the tag was successfully added (or was already there), false otherwise.
791bool SamRecord::addTag(const char* tag, char vtype, const char* valuePtr)
792{
793 if(vtype == 'i')
794 {
795 // integer type. Call addIntTag to handle it.
796 int intVal = atoi(valuePtr);
797 return(addIntTag(tag, intVal));
798 }
799
800 // Non-int type.
801 myStatus = SamStatus::SUCCESS;
802 bool status = true; // default to successful.
803 int key = 0;
804 int index = 0;
805
806 int tagBufferSize = 0;
807
808 // First check to see if the tags need to be synced to the buffer.
809 if(myNeedToSetTagsFromBuffer)
810 {
811 if(!setTagsFromBuffer())
812 {
813 // Failed to read tags from the buffer, so cannot add new ones.
814 return(false);
815 }
816 }
817
818 // First check to see if the tag is already there.
819 key = MAKEKEY(tag[0], tag[1], vtype);
820 unsigned int hashIndex = extras.Find(key);
821 if(hashIndex != LH_NOTFOUND)
822 {
823 // The key was found in the hash, so get the lookup index.
824 index = extras[hashIndex];
825
826 String origTag;
827 char origType = vtype;
828
829 // Adjust the currently pointed to value to the new setting.
830 switch (vtype)
831 {
832 case 'A' :
833 // First check to see if the value changed.
834 if((integers[index] == (const int)*(valuePtr)) &&
835 (intType[index] == vtype))
836 {
837 // The value & type has not changed, so do nothing.
838 return(true);
839 }
840 else
841 {
842 // Tag buffer size changes if type changes, so subtract & add.
843 origType = intType[index];
844 appendIntArrayValue(index, origTag);
845 tagBufferSize -= getNumericTagTypeSize(intType[index]);
846 tagBufferSize += getNumericTagTypeSize(vtype);
847 integers[index] = (const int)*(valuePtr);
848 intType[index] = vtype;
849 }
850 break;
851 case 'Z' :
852 // First check to see if the value changed.
853 if(strings[index] == valuePtr)
854 {
855 // The value has not changed, so do nothing.
856 return(true);
857 }
858 else
859 {
860 // Adjust the tagBufferSize by removing the size of the old string.
861 origTag = strings[index];
862 tagBufferSize -= strings[index].Length();
863 strings[index] = valuePtr;
864 // Adjust the tagBufferSize by adding the size of the new string.
865 tagBufferSize += strings[index].Length();
866 }
867 break;
868 case 'B' :
869 // First check to see if the value changed.
870 if(strings[index] == valuePtr)
871 {
872 // The value has not changed, so do nothing.
873 return(true);
874 }
875 else
876 {
877 // Adjust the tagBufferSize by removing the size of the old field.
878 origTag = strings[index];
879 tagBufferSize -= getBtagBufferSize(strings[index]);
880 strings[index] = valuePtr;
881 // Adjust the tagBufferSize by adding the size of the new field.
882 tagBufferSize += getBtagBufferSize(strings[index]);
883 }
884 break;
885 case 'f' :
886 // First check to see if the value changed.
887 if(floats[index] == (float)atof(valuePtr))
888 {
889 // The value has not changed, so do nothing.
890 return(true);
891 }
892 else
893 {
894 // Tag buffer size doesn't change between different 'f' entries.
895 origTag.appendFullFloat(floats[index]);
896 floats[index] = (float)atof(valuePtr);
897 }
898 break;
899 default :
900 fprintf(stderr,
901 "samRecord::addTag() - Unknown custom field of type %c\n",
902 vtype);
904 "Unknown custom field in a tag");
905 status = false;
906 break;
907 }
908
909 // Duplicate tag in this record.
910 // Tag already existed, print message about overwriting.
911 // WARN about dropping duplicate tags.
912 if(myNumWarns++ < myMaxWarns)
913 {
914 fprintf(stderr, "WARNING: Duplicate Tags, overwritting %c%c:%c:%s with %c%c:%c:%s\n",
915 tag[0], tag[1], origType, origTag.c_str(), tag[0], tag[1], vtype, valuePtr);
916 if(myNumWarns == myMaxWarns)
917 {
918 fprintf(stderr, "Suppressing rest of Duplicate Tag warnings.\n");
919 }
920 }
921 }
922 else
923 {
924 // The key was not found in the hash, so add it.
925 switch (vtype)
926 {
927 case 'A' :
928 index = integers.Length();
929 integers.Push((const int)*(valuePtr));
930 intType.push_back(vtype);
931 tagBufferSize += 4;
932 break;
933 case 'Z' :
934 index = strings.Length();
935 strings.Push(valuePtr);
936 tagBufferSize += 4 + strings.Last().Length();
937 break;
938 case 'B' :
939 index = strings.Length();
940 strings.Push(valuePtr);
941 tagBufferSize += 3 + getBtagBufferSize(strings[index]);
942 break;
943 case 'f' :
944 index = floats.size();
945 floats.push_back((float)atof(valuePtr));
946 tagBufferSize += 7;
947 break;
948 default :
949 fprintf(stderr,
950 "samRecord::addTag() - Unknown custom field of type %c\n",
951 vtype);
953 "Unknown custom field in a tag");
954 status = false;
955 break;
956 }
957 if(status)
958 {
959 // If successful, add the key to extras.
960 extras.Add(key, index);
961 }
962 }
963
964 // Only add the tag if it has so far been successfully processed.
965 if(status)
966 {
967 // The buffer tags are now out of sync.
968 myNeedToSetTagsInBuffer = true;
969 myIsTagsBufferValid = false;
970 myIsBufferSynced = false;
971 myTagBufferSize += tagBufferSize;
972 }
973 return(status);
974}
975
976
978{
979 if(extras.Entries() != 0)
980 {
981 extras.Clear();
982 }
983 strings.Clear();
984 integers.Clear();
985 intType.clear();
986 floats.clear();
987 myTagBufferSize = 0;
988 resetTagIter();
989}
990
991
992bool SamRecord::rmTag(const char* tag, char type)
993{
994 // Check the length of tag.
995 if(strlen(tag) != 2)
996 {
997 // Tag is the wrong length.
999 "rmTag called with tag that is not 2 characters\n");
1000 return(false);
1001 }
1002
1003 myStatus = SamStatus::SUCCESS;
1004 if(myNeedToSetTagsFromBuffer)
1005 {
1006 if(!setTagsFromBuffer())
1007 {
1008 // Failed to read the tags from the buffer, so cannot
1009 // get tags.
1010 return(false);
1011 }
1012 }
1013
1014 // Construct the key.
1015 int key = MAKEKEY(tag[0], tag[1], type);
1016 // Look to see if the key exsists in the hash.
1017 int offset = extras.Find(key);
1018
1019 if(offset < 0)
1020 {
1021 // Not found, so return true, successfully removed since
1022 // it is not in tag.
1023 return(true);
1024 }
1025
1026 // Offset is set, so the key was found.
1027 // First if it is an integer, determine the actual type of the int.
1028 char vtype;
1029 getTypeFromKey(key, vtype);
1030 if(vtype == 'i')
1031 {
1032 vtype = getIntegerType(offset);
1033 }
1034
1035 // Offset is set, so recalculate the buffer size without this entry.
1036 // Do NOT remove from strings, integers, or floats because then
1037 // extras would need to be updated for all entries with the new indexes
1038 // into those variables.
1039 int rmBuffSize = 0;
1040 switch(vtype)
1041 {
1042 case 'A':
1043 case 'c':
1044 case 'C':
1045 rmBuffSize = 4;
1046 break;
1047 case 's':
1048 case 'S':
1049 rmBuffSize = 5;
1050 break;
1051 case 'i':
1052 case 'I':
1053 rmBuffSize = 7;
1054 break;
1055 case 'f':
1056 rmBuffSize = 7;
1057 break;
1058 case 'Z':
1059 rmBuffSize = 4 + getString(offset).Length();
1060 break;
1061 case 'B':
1062 rmBuffSize = 3 + getBtagBufferSize(getString(offset));
1063 break;
1064 default:
1065 myStatus.setStatus(SamStatus::INVALID,
1066 "rmTag called with unknown type.\n");
1067 return(false);
1068 break;
1069 };
1070
1071 // The buffer tags are now out of sync.
1072 myNeedToSetTagsInBuffer = true;
1073 myIsTagsBufferValid = false;
1074 myIsBufferSynced = false;
1075 myTagBufferSize -= rmBuffSize;
1076
1077 // Remove from the hash.
1078 extras.Delete(offset);
1079 return(true);
1080}
1081
1082
1083bool SamRecord::rmTags(const char* tags)
1084{
1085 const char* currentTagPtr = tags;
1086
1087 myStatus = SamStatus::SUCCESS;
1088 if(myNeedToSetTagsFromBuffer)
1089 {
1090 if(!setTagsFromBuffer())
1091 {
1092 // Failed to read the tags from the buffer, so cannot
1093 // get tags.
1094 return(false);
1095 }
1096 }
1097
1098 bool returnStatus = true;
1099
1100 int rmBuffSize = 0;
1101 while(*currentTagPtr != '\0')
1102 {
1103
1104 // Tags are formatted as: XY:Z
1105 // Where X is [A-Za-z], Y is [A-Za-z], and
1106 // Z is A,i,f,Z,H (cCsSI are also excepted)
1107 if((currentTagPtr[0] == '\0') || (currentTagPtr[1] == '\0') ||
1108 (currentTagPtr[2] != ':') || (currentTagPtr[3] == '\0'))
1109 {
1110 myStatus.setStatus(SamStatus::INVALID,
1111 "rmTags called with improperly formatted tags.\n");
1112 returnStatus = false;
1113 break;
1114 }
1115
1116 // Construct the key.
1117 int key = MAKEKEY(currentTagPtr[0], currentTagPtr[1],
1118 currentTagPtr[3]);
1119 // Look to see if the key exsists in the hash.
1120 int offset = extras.Find(key);
1121
1122 if(offset >= 0)
1123 {
1124 // Offset is set, so the key was found.
1125 // First if it is an integer, determine the actual type of the int.
1126 char vtype;
1127 getTypeFromKey(key, vtype);
1128 if(vtype == 'i')
1129 {
1130 vtype = getIntegerType(offset);
1131 }
1132
1133 // Offset is set, so recalculate the buffer size without this entry.
1134 // Do NOT remove from strings, integers, or floats because then
1135 // extras would need to be updated for all entries with the new indexes
1136 // into those variables.
1137 switch(vtype)
1138 {
1139 case 'A':
1140 case 'c':
1141 case 'C':
1142 rmBuffSize += 4;
1143 break;
1144 case 's':
1145 case 'S':
1146 rmBuffSize += 5;
1147 break;
1148 case 'i':
1149 case 'I':
1150 rmBuffSize += 7;
1151 break;
1152 case 'f':
1153 rmBuffSize += 7;
1154 break;
1155 case 'Z':
1156 rmBuffSize += 4 + getString(offset).Length();
1157 break;
1158 case 'B':
1159 rmBuffSize += 3 + getBtagBufferSize(getString(offset));
1160 break;
1161 default:
1162 myStatus.setStatus(SamStatus::INVALID,
1163 "rmTag called with unknown type.\n");
1164 returnStatus = false;
1165 break;
1166 };
1167
1168 // Remove from the hash.
1169 extras.Delete(offset);
1170 }
1171 // Increment to the next tag.
1172 if((currentTagPtr[4] == ';') || (currentTagPtr[4] == ','))
1173 {
1174 // Increment once more.
1175 currentTagPtr += 5;
1176 }
1177 else if(currentTagPtr[4] != '\0')
1178 {
1179 // Invalid tag format.
1180 myStatus.setStatus(SamStatus::INVALID,
1181 "rmTags called with improperly formatted tags.\n");
1182 returnStatus = false;
1183 break;
1184 }
1185 else
1186 {
1187 // Last Tag.
1188 currentTagPtr += 4;
1189 }
1190 }
1191
1192 // The buffer tags are now out of sync.
1193 myNeedToSetTagsInBuffer = true;
1194 myIsTagsBufferValid = false;
1195 myIsBufferSynced = false;
1196 myTagBufferSize -= rmBuffSize;
1197
1198
1199 return(returnStatus);
1200}
1201
1202
1203// Get methods for record fields.
1205{
1206 return(getRecordBuffer(mySequenceTranslation));
1207}
1208
1209
1210// Get methods for record fields.
1212{
1213 myStatus = SamStatus::SUCCESS;
1214 bool status = true;
1215 // If the buffer is not synced or the sequence in the buffer is not
1216 // properly translated, fix the buffer.
1217 if((myIsBufferSynced == false) ||
1218 (myBufferSequenceTranslation != translation))
1219 {
1220 status &= fixBuffer(translation);
1221 }
1222 // If the buffer is synced, check to see if the tags need to be synced.
1223 if(myNeedToSetTagsInBuffer)
1224 {
1225 status &= setTagsInBuffer();
1226 }
1227 if(!status)
1228 {
1229 return(NULL);
1230 }
1231 return (const void *)myRecordPtr;
1232}
1233
1234
1235// Write the record as a buffer into the file using the class's
1236// sequence translation setting.
1238{
1239 return(writeRecordBuffer(filePtr, mySequenceTranslation));
1240}
1241
1242
1243// Write the record as a buffer into the file using the specified translation.
1245 SequenceTranslation translation)
1246{
1247 myStatus = SamStatus::SUCCESS;
1248 if((filePtr == NULL) || (filePtr->isOpen() == false))
1249 {
1250 // File is not open, return failure.
1252 "Can't write to an unopened file.");
1253 return(SamStatus::FAIL_ORDER);
1254 }
1255
1256 if((myIsBufferSynced == false) ||
1257 (myBufferSequenceTranslation != translation))
1258 {
1259 if(!fixBuffer(translation))
1260 {
1261 return(myStatus.getStatus());
1262 }
1263 }
1264
1265 // Write the record.
1266 unsigned int numBytesToWrite = myRecordPtr->myBlockSize + sizeof(int32_t);
1267 unsigned int numBytesWritten =
1268 ifwrite(filePtr, myRecordPtr, numBytesToWrite);
1269
1270 // Return status based on if the correct number of bytes were written.
1271 if(numBytesToWrite == numBytesWritten)
1272 {
1273 return(SamStatus::SUCCESS);
1274 }
1275 // The correct number of bytes were not written.
1276 myStatus.setStatus(SamStatus::FAIL_IO, "Failed to write the entire record.");
1277 return(SamStatus::FAIL_IO);
1278}
1279
1280
1282{
1283 myStatus = SamStatus::SUCCESS;
1284 // If the buffer isn't synced, sync the buffer to determine the
1285 // block size.
1286 if(myIsBufferSynced == false)
1287 {
1288 // Since this just returns the block size, the translation of
1289 // the sequence does not matter, so just use the currently set
1290 // value.
1291 fixBuffer(myBufferSequenceTranslation);
1292 }
1293 return myRecordPtr->myBlockSize;
1294}
1295
1296
1297// This method returns the reference name.
1299{
1300 myStatus = SamStatus::SUCCESS;
1301 return myReferenceName.c_str();
1302}
1303
1304
1306{
1307 myStatus = SamStatus::SUCCESS;
1308 return myRecordPtr->myReferenceID;
1309}
1310
1311
1313{
1314 myStatus = SamStatus::SUCCESS;
1315 return (myRecordPtr->myPosition + 1);
1316}
1317
1318
1320{
1321 myStatus = SamStatus::SUCCESS;
1322 return myRecordPtr->myPosition;
1323}
1324
1325
1327{
1328 myStatus = SamStatus::SUCCESS;
1329 // If the buffer is valid, return the size from there, otherwise get the
1330 // size from the string length + 1 (ending null).
1331 if(myIsReadNameBufferValid)
1332 {
1333 return(myRecordPtr->myReadNameLength);
1334 }
1335
1336 return(myReadName.Length() + 1);
1337}
1338
1339
1341{
1342 myStatus = SamStatus::SUCCESS;
1343 return myRecordPtr->myMapQuality;
1344}
1345
1346
1348{
1349 myStatus = SamStatus::SUCCESS;
1350 if(!myIsBinValid)
1351 {
1352 // The bin that is set in the record is not valid, so
1353 // reset it.
1354 myRecordPtr->myBin =
1355 bam_reg2bin(myRecordPtr->myPosition, get1BasedAlignmentEnd());
1356 myIsBinValid = true;
1357 }
1358 return(myRecordPtr->myBin);
1359}
1360
1361
1363{
1364 myStatus = SamStatus::SUCCESS;
1365 // If the cigar buffer is valid
1366 // then get the length from there.
1367 if(myIsCigarBufferValid)
1368 {
1369 return myRecordPtr->myCigarLength;
1370 }
1371
1372 if(myCigarTempBufferLength == -1)
1373 {
1374 // The cigar buffer is not valid and the cigar temp buffer is not set,
1375 // so parse the string.
1376 parseCigarString();
1377 }
1378
1379 // The temp buffer is now set, so return the size.
1380 return(myCigarTempBufferLength);
1381}
1382
1383
1385{
1386 myStatus = SamStatus::SUCCESS;
1387 return myRecordPtr->myFlag;
1388}
1389
1390
1392{
1393 myStatus = SamStatus::SUCCESS;
1394 if(myIsSequenceBufferValid == false)
1395 {
1396 // If the sequence is "*", then return 0.
1397 if((mySequence.Length() == 1) && (mySequence[0] == '*'))
1398 {
1399 return(0);
1400 }
1401 // Do not add 1 since it is not null terminated.
1402 return(mySequence.Length());
1403 }
1404 return(myRecordPtr->myReadLength);
1405}
1406
1407
1408// This method returns the mate reference name. If it is equal to the
1409// reference name, it still returns the reference name.
1411{
1412 myStatus = SamStatus::SUCCESS;
1413 return myMateReferenceName.c_str();
1414}
1415
1416
1417// This method returns the mate reference name. If it is equal to the
1418// reference name, it returns "=", unless they are both "*" in which case
1419// "*" is returned.
1421{
1422 myStatus = SamStatus::SUCCESS;
1423 if(myMateReferenceName == "*")
1424 {
1425 return(myMateReferenceName);
1426 }
1427 if(myMateReferenceName == getReferenceName())
1428 {
1429 return(FIELD_ABSENT_STRING);
1430 }
1431 else
1432 {
1433 return(myMateReferenceName);
1434 }
1435}
1436
1437
1439{
1440 myStatus = SamStatus::SUCCESS;
1441 return myRecordPtr->myMateReferenceID;
1442}
1443
1444
1446{
1447 myStatus = SamStatus::SUCCESS;
1448 return (myRecordPtr->myMatePosition + 1);
1449}
1450
1451
1453{
1454 myStatus = SamStatus::SUCCESS;
1455 return myRecordPtr->myMatePosition;
1456}
1457
1458
1460{
1461 myStatus = SamStatus::SUCCESS;
1462 return myRecordPtr->myInsertSize;
1463}
1464
1465
1466// Returns the inclusive rightmost position of the clipped sequence.
1468{
1469 myStatus = SamStatus::SUCCESS;
1470 if(myAlignmentLength == -1)
1471 {
1472 // Alignment end has not been set, so calculate it.
1473 parseCigar();
1474 }
1475 // If alignment length > 0, subtract 1 from it to get the end.
1476 if(myAlignmentLength == 0)
1477 {
1478 // Length is 0, just return the start position.
1479 return(myRecordPtr->myPosition);
1480 }
1481 return(myRecordPtr->myPosition + myAlignmentLength - 1);
1482}
1483
1484
1485// Returns the inclusive rightmost position of the clipped sequence.
1487{
1488 return(get0BasedAlignmentEnd() + 1);
1489}
1490
1491
1492// Return the length of the alignment.
1494{
1495 myStatus = SamStatus::SUCCESS;
1496 if(myAlignmentLength == -1)
1497 {
1498 // Alignment end has not been set, so calculate it.
1499 parseCigar();
1500 }
1501 // Return the alignment length.
1502 return(myAlignmentLength);
1503}
1504
1505// Returns the inclusive left-most position adjust for clipped bases.
1507{
1508 myStatus = SamStatus::SUCCESS;
1509 if(myUnclippedStartOffset == -1)
1510 {
1511 // Unclipped has not yet been calculated, so parse the cigar to get it
1512 parseCigar();
1513 }
1514 return(myRecordPtr->myPosition - myUnclippedStartOffset);
1515}
1516
1517
1518// Returns the inclusive left-most position adjust for clipped bases.
1520{
1521 return(get0BasedUnclippedStart() + 1);
1522}
1523
1524
1525// Returns the inclusive right-most position adjust for clipped bases.
1527{
1528 // myUnclippedEndOffset will be set by get0BasedAlignmentEnd if the
1529 // cigar has not yet been parsed, so no need to check it here.
1530 return(get0BasedAlignmentEnd() + myUnclippedEndOffset);
1531}
1532
1533
1534// Returns the inclusive right-most position adjust for clipped bases.
1536{
1537 return(get0BasedUnclippedEnd() + 1);
1538}
1539
1540
1541// Get the read name.
1543{
1544 myStatus = SamStatus::SUCCESS;
1545 if(myReadName.Length() == 0)
1546 {
1547 // 0 Length, means that it is in the buffer, but has not yet
1548 // been synced to the string, so do the sync.
1549 myReadName = (char*)&(myRecordPtr->myData);
1550 }
1551 return myReadName.c_str();
1552}
1553
1554
1556{
1557 myStatus = SamStatus::SUCCESS;
1558 if(myCigar.Length() == 0)
1559 {
1560 // 0 Length, means that it is in the buffer, but has not yet
1561 // been synced to the string, so do the sync.
1562 parseCigarBinary();
1563 }
1564 return myCigar.c_str();
1565}
1566
1567
1569{
1570 return(getSequence(mySequenceTranslation));
1571}
1572
1573
1575{
1576 myStatus = SamStatus::SUCCESS;
1577 if(mySequence.Length() == 0)
1578 {
1579 // 0 Length, means that it is in the buffer, but has not yet
1580 // been synced to the string, so do the sync.
1581 setSequenceAndQualityFromBuffer();
1582 }
1583
1584 // Determine if translation needs to be done.
1585 if((translation == NONE) || (myRefPtr == NULL))
1586 {
1587 return mySequence.c_str();
1588 }
1589 else if(translation == EQUAL)
1590 {
1591 if(mySeqWithEq.length() == 0)
1592 {
1593 // Check to see if the sequence is defined.
1594 if(mySequence == "*")
1595 {
1596 // Sequence is undefined, so no translation necessary.
1597 mySeqWithEq = '*';
1598 }
1599 else
1600 {
1601 // Sequence defined, so translate it.
1602 SamQuerySeqWithRef::seqWithEquals(mySequence.c_str(),
1603 myRecordPtr->myPosition,
1604 *(getCigarInfo()),
1606 *myRefPtr,
1607 mySeqWithEq);
1608 }
1609 }
1610 return(mySeqWithEq.c_str());
1611 }
1612 else
1613 {
1614 // translation == BASES
1615 if(mySeqWithoutEq.length() == 0)
1616 {
1617 if(mySequence == "*")
1618 {
1619 // Sequence is undefined, so no translation necessary.
1620 mySeqWithoutEq = '*';
1621 }
1622 else
1623 {
1624 // Sequence defined, so translate it.
1625 SamQuerySeqWithRef::seqWithoutEquals(mySequence.c_str(),
1626 myRecordPtr->myPosition,
1627 *(getCigarInfo()),
1629 *myRefPtr,
1630 mySeqWithoutEq);
1631 }
1632 }
1633 return(mySeqWithoutEq.c_str());
1634 }
1635}
1636
1637
1639{
1640 myStatus = SamStatus::SUCCESS;
1641 if(myQuality.Length() == 0)
1642 {
1643 // 0 Length, means that it is in the buffer, but has not yet
1644 // been synced to the string, so do the sync.
1645 setSequenceAndQualityFromBuffer();
1646 }
1647 return myQuality.c_str();
1648}
1649
1650
1652{
1653 return(getSequence(index, mySequenceTranslation));
1654}
1655
1656
1658{
1659 static const char * asciiBases = "=AC.G...T......N";
1660
1661 // Determine the read length.
1662 int32_t readLen = getReadLength();
1663
1664 // If the read length is 0, this method should not be called.
1665 if(readLen == 0)
1666 {
1667 String exceptionString = "SamRecord::getSequence(";
1668 exceptionString += index;
1669 exceptionString += ") is not allowed since sequence = '*'";
1670 throw std::runtime_error(exceptionString.c_str());
1671 }
1672 else if((index < 0) || (index >= readLen))
1673 {
1674 // Only get here if the index was out of range, so thow an exception.
1675 String exceptionString = "SamRecord::getSequence(";
1676 exceptionString += index;
1677 exceptionString += ") is out of range. Index must be between 0 and ";
1678 exceptionString += (readLen - 1);
1679 throw std::runtime_error(exceptionString.c_str());
1680 }
1681
1682 // Determine if translation needs to be done.
1683 if((translation == NONE) || (myRefPtr == NULL))
1684 {
1685 // No translation needs to be done.
1686 if(mySequence.Length() == 0)
1687 {
1688 // Parse BAM sequence.
1689 if(myIsSequenceBufferValid)
1690 {
1691 return(index & 1 ?
1692 asciiBases[myPackedSequence[index / 2] & 0xF] :
1693 asciiBases[myPackedSequence[index / 2] >> 4]);
1694 }
1695 else
1696 {
1697 String exceptionString = "SamRecord::getSequence(";
1698 exceptionString += index;
1699 exceptionString += ") called with no sequence set";
1700 throw std::runtime_error(exceptionString.c_str());
1701 }
1702 }
1703 // Already have string.
1704 return(mySequence[index]);
1705 }
1706 else
1707 {
1708 // Need to translate the sequence either to have '=' or to not
1709 // have it.
1710 // First check to see if the sequence has been set.
1711 if(mySequence.Length() == 0)
1712 {
1713 // 0 Length, means that it is in the buffer, but has not yet
1714 // been synced to the string, so do the sync.
1715 setSequenceAndQualityFromBuffer();
1716 }
1717
1718 // Check the type of translation.
1719 if(translation == EQUAL)
1720 {
1721 // Check whether or not the string has already been
1722 // retrieved that has the '=' in it.
1723 if(mySeqWithEq.length() == 0)
1724 {
1725 // The string with '=' has not yet been determined,
1726 // so get the string.
1727 // Check to see if the sequence is defined.
1728 if(mySequence == "*")
1729 {
1730 // Sequence is undefined, so no translation necessary.
1731 mySeqWithEq = '*';
1732 }
1733 else
1734 {
1735 // Sequence defined, so translate it.
1736 SamQuerySeqWithRef::seqWithEquals(mySequence.c_str(),
1737 myRecordPtr->myPosition,
1738 *(getCigarInfo()),
1740 *myRefPtr,
1741 mySeqWithEq);
1742 }
1743 }
1744 // Sequence is set, so return it.
1745 return(mySeqWithEq[index]);
1746 }
1747 else
1748 {
1749 // translation == BASES
1750 // Check whether or not the string has already been
1751 // retrieved that does not have the '=' in it.
1752 if(mySeqWithoutEq.length() == 0)
1753 {
1754 // The string with '=' has not yet been determined,
1755 // so get the string.
1756 // Check to see if the sequence is defined.
1757 if(mySequence == "*")
1758 {
1759 // Sequence is undefined, so no translation necessary.
1760 mySeqWithoutEq = '*';
1761 }
1762 else
1763 {
1764 // Sequence defined, so translate it.
1765 // The string without '=' has not yet been determined,
1766 // so get the string.
1767 SamQuerySeqWithRef::seqWithoutEquals(mySequence.c_str(),
1768 myRecordPtr->myPosition,
1769 *(getCigarInfo()),
1771 *myRefPtr,
1772 mySeqWithoutEq);
1773 }
1774 }
1775 // Sequence is set, so return it.
1776 return(mySeqWithoutEq[index]);
1777 }
1778 }
1779}
1780
1781
1783{
1784 // Determine the read length.
1785 int32_t readLen = getReadLength();
1786
1787 // If the read length is 0, return ' ' whose ascii code is below
1788 // the minimum ascii code for qualities.
1789 if(readLen == 0)
1790 {
1792 }
1793 else if((index < 0) || (index >= readLen))
1794 {
1795 // Only get here if the index was out of range, so thow an exception.
1796 String exceptionString = "SamRecord::getQuality(";
1797 exceptionString += index;
1798 exceptionString += ") is out of range. Index must be between 0 and ";
1799 exceptionString += (readLen - 1);
1800 throw std::runtime_error(exceptionString.c_str());
1801 }
1802
1803 if(myQuality.Length() == 0)
1804 {
1805 // Parse BAM Quality.
1806 // Know that myPackedQuality is correct since readLen != 0.
1807 return(myPackedQuality[index] + 33);
1808 }
1809 else
1810 {
1811 // Already have string.
1812 if((myQuality.Length() == 1) && (myQuality[0] == '*'))
1813 {
1814 // Return the unknown quality character.
1816 }
1817 else if(index >= myQuality.Length())
1818 {
1819 // Only get here if the index was out of range, so thow an exception.
1820 // Technically the myQuality string is not guaranteed to be the same length
1821 // as the sequence, so this catches that error.
1822 String exceptionString = "SamRecord::getQuality(";
1823 exceptionString += index;
1824 exceptionString += ") is out of range. Index must be between 0 and ";
1825 exceptionString += (myQuality.Length() - 1);
1826 throw std::runtime_error(exceptionString.c_str());
1827 }
1828 else
1829 {
1830 return(myQuality[index]);
1831 }
1832 }
1833}
1834
1835
1837{
1838 // Check to see whether or not the Cigar has already been
1839 // set - this is determined by checking if alignment length
1840 // is set since alignment length and the cigar are set
1841 // at the same time.
1842 if(myAlignmentLength == -1)
1843 {
1844 // Not been set, so calculate it.
1845 parseCigar();
1846 }
1847 return(&myCigarRoller);
1848}
1849
1850
1851// Return the number of bases in this read that overlap the passed in
1852// region. (start & end are 0-based)
1853uint32_t SamRecord::getNumOverlaps(int32_t start, int32_t end)
1854{
1855 // Determine whether or not the cigar has been parsed, which sets up
1856 // the cigar roller. This is determined by checking the alignment length.
1857 if(myAlignmentLength == -1)
1858 {
1859 parseCigar();
1860 }
1861 return(myCigarRoller.getNumOverlaps(start, end, get0BasedPosition()));
1862}
1863
1864
1865// Returns the values of all fields except the tags.
1866bool SamRecord::getFields(bamRecordStruct& recStruct, String& readName,
1867 String& cigar, String& sequence, String& quality)
1868{
1869 return(getFields(recStruct, readName, cigar, sequence, quality,
1870 mySequenceTranslation));
1871}
1872
1873
1874// Returns the values of all fields except the tags.
1875bool SamRecord::getFields(bamRecordStruct& recStruct, String& readName,
1876 String& cigar, String& sequence, String& quality,
1877 SequenceTranslation translation)
1878{
1879 myStatus = SamStatus::SUCCESS;
1880 if(myIsBufferSynced == false)
1881 {
1882 if(!fixBuffer(translation))
1883 {
1884 // failed to set the buffer, return false.
1885 return(false);
1886 }
1887 }
1888 memcpy(&recStruct, myRecordPtr, sizeof(bamRecordStruct));
1889
1890 readName = getReadName();
1891 // Check the status.
1892 if(myStatus != SamStatus::SUCCESS)
1893 {
1894 // Failed to set the fields, return false.
1895 return(false);
1896 }
1897 cigar = getCigar();
1898 // Check the status.
1899 if(myStatus != SamStatus::SUCCESS)
1900 {
1901 // Failed to set the fields, return false.
1902 return(false);
1903 }
1904 sequence = getSequence(translation);
1905 // Check the status.
1906 if(myStatus != SamStatus::SUCCESS)
1907 {
1908 // Failed to set the fields, return false.
1909 return(false);
1910 }
1911 quality = getQuality();
1912 // Check the status.
1913 if(myStatus != SamStatus::SUCCESS)
1914 {
1915 // Failed to set the fields, return false.
1916 return(false);
1917 }
1918 return(true);
1919}
1920
1921
1922// Returns the reference pointer.
1924{
1925 return(myRefPtr);
1926}
1927
1928
1930{
1931 myStatus = SamStatus::SUCCESS;
1932 if(myNeedToSetTagsFromBuffer)
1933 {
1934 // Tags are only set in the buffer, so the size of the tags is
1935 // the length of the record minus the starting location of the tags.
1936 unsigned char * tagStart =
1937 (unsigned char *)myRecordPtr->myData
1938 + myRecordPtr->myReadNameLength
1939 + myRecordPtr->myCigarLength * sizeof(int)
1940 + (myRecordPtr->myReadLength + 1) / 2 + myRecordPtr->myReadLength;
1941
1942 // The non-tags take up from the start of the record to the tag start.
1943 // Do not include the block size part of the record since it is not
1944 // included in the size.
1945 uint32_t nonTagSize =
1946 tagStart - (unsigned char*)&(myRecordPtr->myReferenceID);
1947 // Tags take up the size of the block minus the non-tag section.
1948 uint32_t tagSize = myRecordPtr->myBlockSize - nonTagSize;
1949 return(tagSize);
1950 }
1951
1952 // Tags are stored outside the buffer, so myTagBufferSize is set.
1953 return(myTagBufferSize);
1954}
1955
1956
1957// Returns true if there is another tag and sets tag and vtype to the
1958// appropriate values, and returns a pointer to the value.
1959// Sets the Status to SUCCESS when a tag is successfully returned or
1960// when there are no more tags. Otherwise the status is set to describe
1961// why it failed (parsing, etc).
1962bool SamRecord::getNextSamTag(char* tag, char& vtype, void** value)
1963{
1964 myStatus = SamStatus::SUCCESS;
1965 if(myNeedToSetTagsFromBuffer)
1966 {
1967 if(!setTagsFromBuffer())
1968 {
1969 // Failed to read the tags from the buffer, so cannot
1970 // get tags.
1971 return(false);
1972 }
1973 }
1974
1975 // Increment the tag index to start looking at the next tag.
1976 // At the beginning, it is set to -1.
1977 myLastTagIndex++;
1978 int maxTagIndex = extras.Capacity();
1979 if(myLastTagIndex >= maxTagIndex)
1980 {
1981 // Hit the end of the tags, return false, no more tags.
1982 // Status is still success since this is not an error,
1983 // it is just the end of the list.
1984 return(false);
1985 }
1986
1987 bool tagFound = false;
1988 // Loop until a tag is found or the end of extras is hit.
1989 while((tagFound == false) && (myLastTagIndex < maxTagIndex))
1990 {
1991 if(extras.SlotInUse(myLastTagIndex))
1992 {
1993 // Found a slot to use.
1994 int key = extras.GetKey(myLastTagIndex);
1995 getTag(key, tag);
1996 getTypeFromKey(key, vtype);
1997 tagFound = true;
1998 // Get the value associated with the key based on the vtype.
1999 switch (vtype)
2000 {
2001 case 'f' :
2002 *value = getFloatPtr(myLastTagIndex);
2003 break;
2004 case 'i' :
2005 *value = getIntegerPtr(myLastTagIndex, vtype);
2006 if(vtype != 'A')
2007 {
2008 // Convert all int types to 'i'
2009 vtype = 'i';
2010 }
2011 break;
2012 case 'Z' :
2013 case 'B' :
2014 *value = getStringPtr(myLastTagIndex);
2015 break;
2016 default:
2018 "Unknown tag type");
2019 tagFound = false;
2020 break;
2021 }
2022 }
2023 if(!tagFound)
2024 {
2025 // Increment the index since a tag was not found.
2026 myLastTagIndex++;
2027 }
2028 }
2029 return(tagFound);
2030}
2031
2032
2033// Reset the tag iterator to the beginning of the tags.
2035{
2036 myLastTagIndex = -1;
2037}
2038
2039
2041{
2042 if((vtype == 'c') || (vtype == 'C') ||
2043 (vtype == 's') || (vtype == 'S') ||
2044 (vtype == 'i') || (vtype == 'I'))
2045 {
2046 return(true);
2047 }
2048 return(false);
2049}
2050
2051
2053{
2054 if(vtype == 'f')
2055 {
2056 return(true);
2057 }
2058 return(false);
2059}
2060
2061
2063{
2064 if(vtype == 'A')
2065 {
2066 return(true);
2067 }
2068 return(false);
2069}
2070
2071
2073{
2074 if((vtype == 'Z') || (vtype == 'B'))
2075 {
2076 return(true);
2077 }
2078 return(false);
2079}
2080
2081
2082bool SamRecord::getTagsString(const char* tags, String& returnString, char delim)
2083{
2084 const char* currentTagPtr = tags;
2085
2086 returnString.Clear();
2087 myStatus = SamStatus::SUCCESS;
2088 if(myNeedToSetTagsFromBuffer)
2089 {
2090 if(!setTagsFromBuffer())
2091 {
2092 // Failed to read the tags from the buffer, so cannot
2093 // get tags.
2094 return(false);
2095 }
2096 }
2097
2098 bool returnStatus = true;
2099
2100 while(*currentTagPtr != '\0')
2101 {
2102 // Tags are formatted as: XY:Z
2103 // Where X is [A-Za-z], Y is [A-Za-z], and
2104 // Z is A,i,f,Z,H (cCsSI are also excepted)
2105 if((currentTagPtr[0] == '\0') || (currentTagPtr[1] == '\0') ||
2106 (currentTagPtr[2] != ':') || (currentTagPtr[3] == '\0'))
2107 {
2108 myStatus.setStatus(SamStatus::INVALID,
2109 "getTagsString called with improperly formatted tags.\n");
2110 returnStatus = false;
2111 break;
2112 }
2113
2114 // Construct the key.
2115 int key = MAKEKEY(currentTagPtr[0], currentTagPtr[1],
2116 currentTagPtr[3]);
2117 // Look to see if the key exsists in the hash.
2118 int offset = extras.Find(key);
2119
2120 if(offset >= 0)
2121 {
2122 // Offset is set, so the key was found.
2123 if(!returnString.IsEmpty())
2124 {
2125 returnString += delim;
2126 }
2127 returnString += currentTagPtr[0];
2128 returnString += currentTagPtr[1];
2129 returnString += ':';
2130 returnString += currentTagPtr[3];
2131 returnString += ':';
2132
2133 // First if it is an integer, determine the actual type of the int.
2134 char vtype;
2135 getTypeFromKey(key, vtype);
2136
2137 switch(vtype)
2138 {
2139 case 'i':
2140 returnString += *(int*)getIntegerPtr(offset, vtype);
2141 break;
2142 case 'f':
2143 returnString += *(float*)getFloatPtr(offset);
2144 break;
2145 case 'Z':
2146 case 'B':
2147 returnString += *(String*)getStringPtr(offset);
2148 break;
2149 default:
2150 myStatus.setStatus(SamStatus::INVALID,
2151 "rmTag called with unknown type.\n");
2152 returnStatus = false;
2153 break;
2154 };
2155 }
2156 // Increment to the next tag.
2157 if((currentTagPtr[4] == ';') || (currentTagPtr[4] == ','))
2158 {
2159 // Increment once more.
2160 currentTagPtr += 5;
2161 }
2162 else if(currentTagPtr[4] != '\0')
2163 {
2164 // Invalid tag format.
2165 myStatus.setStatus(SamStatus::INVALID,
2166 "rmTags called with improperly formatted tags.\n");
2167 returnStatus = false;
2168 break;
2169 }
2170 else
2171 {
2172 // Last Tag.
2173 currentTagPtr += 4;
2174 }
2175 }
2176 return(returnStatus);
2177}
2178
2179
2180const String* SamRecord::getStringTag(const char * tag)
2181{
2182 // Parse the buffer if necessary.
2183 if(myNeedToSetTagsFromBuffer)
2184 {
2185 if(!setTagsFromBuffer())
2186 {
2187 // Failed to read the tags from the buffer, so cannot
2188 // get tags. setTagsFromBuffer set the errors,
2189 // so just return null.
2190 return(NULL);
2191 }
2192 }
2193
2194 int key = MAKEKEY(tag[0], tag[1], 'Z');
2195 int offset = extras.Find(key);
2196
2197 int value;
2198 if (offset < 0)
2199 {
2200 // Check for 'B' tag.
2201 key = MAKEKEY(tag[0], tag[1], 'B');
2202 offset = extras.Find(key);
2203 if(offset < 0)
2204 {
2205 // Tag not found.
2206 return(NULL);
2207 }
2208 }
2209
2210 // Offset is valid, so return the tag.
2211 value = extras[offset];
2212 return(&(strings[value]));
2213}
2214
2215
2216int* SamRecord::getIntegerTag(const char * tag)
2217{
2218 // Init to success.
2219 myStatus = SamStatus::SUCCESS;
2220 // Parse the buffer if necessary.
2221 if(myNeedToSetTagsFromBuffer)
2222 {
2223 if(!setTagsFromBuffer())
2224 {
2225 // Failed to read the tags from the buffer, so cannot
2226 // get tags. setTagsFromBuffer set the errors,
2227 // so just return NULL.
2228 return(NULL);
2229 }
2230 }
2231
2232 int key = MAKEKEY(tag[0], tag[1], 'i');
2233 int offset = extras.Find(key);
2234
2235 int value;
2236 if (offset < 0)
2237 {
2238 // Failed to find the tag.
2239 return(NULL);
2240 }
2241 else
2242 value = extras[offset];
2243
2244 return(&(integers[value]));
2245}
2246
2247
2248bool SamRecord::getIntegerTag(const char * tag, int& tagVal)
2249{
2250 // Init to success.
2251 myStatus = SamStatus::SUCCESS;
2252 // Parse the buffer if necessary.
2253 if(myNeedToSetTagsFromBuffer)
2254 {
2255 if(!setTagsFromBuffer())
2256 {
2257 // Failed to read the tags from the buffer, so cannot
2258 // get tags. setTagsFromBuffer set the errors,
2259 // so just return false.
2260 return(false);
2261 }
2262 }
2263
2264 int key = MAKEKEY(tag[0], tag[1], 'i');
2265 int offset = extras.Find(key);
2266
2267 int value;
2268 if (offset < 0)
2269 {
2270 // Failed to find the tag.
2271 return(false);
2272 }
2273 else
2274 value = extras[offset];
2275
2276 tagVal = integers[value];
2277 return(true);
2278}
2279
2280
2281bool SamRecord::getFloatTag(const char * tag, float& tagVal)
2282{
2283 // Init to success.
2284 myStatus = SamStatus::SUCCESS;
2285 // Parse the buffer if necessary.
2286 if(myNeedToSetTagsFromBuffer)
2287 {
2288 if(!setTagsFromBuffer())
2289 {
2290 // Failed to read the tags from the buffer, so cannot
2291 // get tags. setTagsFromBuffer set the errors,
2292 // so just return false.
2293 return(false);
2294 }
2295 }
2296
2297 int key = MAKEKEY(tag[0], tag[1], 'f');
2298 int offset = extras.Find(key);
2299
2300 int value;
2301 if (offset < 0)
2302 {
2303 // Failed to find the tag.
2304 return(false);
2305 }
2306 else
2307 value = extras[offset];
2308
2309 tagVal = floats[value];
2310 return(true);
2311}
2312
2313
2314const String & SamRecord::getString(const char * tag)
2315{
2316 // Init to success.
2317 myStatus = SamStatus::SUCCESS;
2318 // Parse the buffer if necessary.
2319 if(myNeedToSetTagsFromBuffer)
2320 {
2321 if(!setTagsFromBuffer())
2322 {
2323 // Failed to read the tags from the buffer, so cannot
2324 // get tags.
2325 // TODO - what do we want to do on failure?
2326 }
2327 }
2328
2329 int key = MAKEKEY(tag[0], tag[1], 'Z');
2330 int offset = extras.Find(key);
2331
2332 int value;
2333 if (offset < 0)
2334 {
2335
2336 key = MAKEKEY(tag[0], tag[1], 'B');
2337 offset = extras.Find(key);
2338 if (offset < 0)
2339 {
2340 // TODO - what do we want to do on failure?
2341 return(NOT_FOUND_TAG_STRING);
2342 }
2343 }
2344 value = extras[offset];
2345
2346 return strings[value];
2347}
2348
2349
2350int & SamRecord::getInteger(const char * tag)
2351{
2352 // Init to success.
2353 myStatus = SamStatus::SUCCESS;
2354 // Parse the buffer if necessary.
2355 if(myNeedToSetTagsFromBuffer)
2356 {
2357 if(!setTagsFromBuffer())
2358 {
2359 // Failed to read the tags from the buffer, so cannot
2360 // get tags. setTagsFromBuffer set the error.
2361 // TODO - what do we want to do on failure?
2362 }
2363 }
2364
2365 int key = MAKEKEY(tag[0], tag[1], 'i');
2366 int offset = extras.Find(key);
2367
2368 int value;
2369 if (offset < 0)
2370 {
2371 // TODO - what do we want to do on failure?
2372 return NOT_FOUND_TAG_INT;
2373 }
2374 else
2375 value = extras[offset];
2376
2377 return integers[value];
2378}
2379
2380
2381bool SamRecord::checkTag(const char * tag, char type)
2382{
2383 // Init to success.
2384 myStatus = SamStatus::SUCCESS;
2385 // Parse the buffer if necessary.
2386 if(myNeedToSetTagsFromBuffer)
2387 {
2388 if(!setTagsFromBuffer())
2389 {
2390 // Failed to read the tags from the buffer, so cannot
2391 // get tags. setTagsFromBuffer set the error.
2392 return("");
2393 }
2394 }
2395
2396 int key = MAKEKEY(tag[0], tag[1], type);
2397
2398 return (extras.Find(key) != LH_NOTFOUND);
2399}
2400
2401
2402// Return the error after a failed SamRecord call.
2404{
2405 return(myStatus);
2406}
2407
2408
2409// Allocate space for the record - does a realloc.
2410// The passed in size is the size of the entire record including the
2411// block size field.
2412bool SamRecord::allocateRecordStructure(int size)
2413{
2414 if (allocatedSize < size)
2415 {
2416 bamRecordStruct* tmpRecordPtr =
2417 (bamRecordStruct *)realloc(myRecordPtr, size);
2418 if(tmpRecordPtr == NULL)
2419 {
2420 // FAILED to allocate memory
2421 fprintf(stderr, "FAILED TO ALLOCATE MEMORY!!!");
2422 myStatus.addError(SamStatus::FAIL_MEM, "Failed Memory Allocation.");
2423 return(false);
2424 }
2425 // Successfully allocated memory, so set myRecordPtr.
2426 myRecordPtr = tmpRecordPtr;
2427
2428 // Reset the pointers into the record.
2429 if(myIsSequenceBufferValid)
2430 {
2431 myPackedSequence = (unsigned char *)myRecordPtr->myData +
2432 myRecordPtr->myReadNameLength +
2433 myRecordPtr->myCigarLength * sizeof(int);
2434 }
2435 if(myIsQualityBufferValid)
2436 {
2437 myPackedQuality = (unsigned char *)myRecordPtr->myData +
2438 myRecordPtr->myReadNameLength +
2439 myRecordPtr->myCigarLength * sizeof(int) +
2440 (myRecordPtr->myReadLength + 1) / 2;
2441 }
2442
2443 allocatedSize = size;
2444 }
2445 return(true);
2446}
2447
2448
2449// Index is the index into the strings array.
2450void* SamRecord::getStringPtr(int index)
2451{
2452 int value = extras[index];
2453
2454 return &(strings[value]);
2455}
2456
2457void* SamRecord::getIntegerPtr(int offset, char& type)
2458{
2459 int value = extras[offset];
2460
2461 type = intType[value];
2462
2463 return &(integers[value]);
2464}
2465
2466void* SamRecord::getFloatPtr(int offset)
2467{
2468 int value = extras[offset];
2469
2470 return &(floats[value]);
2471}
2472
2473
2474// Fixes the buffer to match the variable length fields if they are set.
2475bool SamRecord::fixBuffer(SequenceTranslation translation)
2476{
2477 // Check to see if the buffer is already synced.
2478 if(myIsBufferSynced &&
2479 (myBufferSequenceTranslation == translation))
2480 {
2481 // Already synced, nothing to do.
2482 return(true);
2483 }
2484
2485 // Set the bin if necessary.
2486 if(!myIsBinValid)
2487 {
2488 // The bin that is set in the record is not valid, so
2489 // reset it.
2490 myRecordPtr->myBin =
2491 bam_reg2bin(myRecordPtr->myPosition, get1BasedAlignmentEnd());
2492 myIsBinValid = true;
2493 }
2494
2495 // Not synced.
2496 bool status = true;
2497
2498 // First determine the size the buffer needs to be.
2499 uint8_t newReadNameLen = getReadNameLength();
2500 uint16_t newCigarLen = getCigarLength();
2501 int32_t newReadLen = getReadLength();
2502 uint32_t newTagLen = getTagLength();
2503 uint32_t bamSequenceLen = (newReadLen+1)/2;
2504
2505 // The buffer size extends from the start of the record to data
2506 // plus the length of the variable fields,
2507 // Multiply the cigar length by 4 since it is the number of uint32_t fields.
2508 int newBufferSize =
2509 ((unsigned char*)(&(myRecordPtr->myData)) -
2510 (unsigned char*)myRecordPtr) +
2511 newReadNameLen + ((newCigarLen)*4) +
2512 newReadLen + bamSequenceLen + newTagLen;
2513
2514 if(!allocateRecordStructure(newBufferSize))
2515 {
2516 // Failed to allocate space.
2517 return(false);
2518 }
2519
2520 // Now that space has been added to the buffer, check to see what if
2521 // any fields need to be extracted from the buffer prior to starting to
2522 // overwrite it. Fields need to be extracted from the buffer if the
2523 // buffer is valid for the field and a previous variable length field has
2524 // changed length.
2525 bool readNameLenChange = (newReadNameLen != myRecordPtr->myReadNameLength);
2526 bool cigarLenChange = (newCigarLen != myRecordPtr->myCigarLength);
2527 bool readLenChange = (newReadLen != myRecordPtr->myReadLength);
2528
2529 // If the tags are still stored in the buffer and any other fields changed
2530 // lengths, they need to be extracted.
2531 if(myIsTagsBufferValid &&
2532 (readNameLenChange | cigarLenChange | readLenChange))
2533 {
2534 status &= setTagsFromBuffer();
2535 // The tag buffer will not be valid once the other fields
2536 // are written, so set it to not valid.
2537 myIsTagsBufferValid = false;
2538 }
2539
2540 // If the sequence or quality strings are still stored in the buffer, and
2541 // any of the previous fields have changed length, extract it from the
2542 // current buffer.
2543 if((myIsQualityBufferValid | myIsSequenceBufferValid) &&
2544 (readNameLenChange | cigarLenChange | readLenChange))
2545 {
2546 setSequenceAndQualityFromBuffer();
2547 // The quality and sequence buffers will not be valid once the other
2548 // fields are written, so set them to not valid.
2549 myIsQualityBufferValid = false;
2550 myIsSequenceBufferValid = false;
2551 }
2552
2553 // If the cigar is still stored in the buffer, and any of the
2554 // previous fields have changed length, extract it from the current buffer.
2555 if((myIsCigarBufferValid) &&
2556 (readNameLenChange))
2557 {
2558 status &= parseCigarBinary();
2559 myIsCigarBufferValid = false;
2560 }
2561
2562 // Set each value in the buffer if it is not already valid.
2563 if(!myIsReadNameBufferValid)
2564 {
2565 memcpy(&(myRecordPtr->myData), myReadName.c_str(),
2566 newReadNameLen);
2567
2568 // Set the new ReadNameLength.
2569 myRecordPtr->myReadNameLength = newReadNameLen;
2570 myIsReadNameBufferValid = true;
2571 }
2572
2573 unsigned char * readNameEnds = (unsigned char*)(&(myRecordPtr->myData)) +
2574 myRecordPtr->myReadNameLength;
2575
2576 // Set the Cigar. Need to reformat from the string to
2577 unsigned int * packedCigar = (unsigned int *) (void *) readNameEnds;
2578
2579 if(!myIsCigarBufferValid)
2580 {
2581 // The cigar was already parsed when it was set, so just copy
2582 // data from the temporary buffer.
2583 myRecordPtr->myCigarLength = newCigarLen;
2584 memcpy(packedCigar, myCigarTempBuffer,
2585 myRecordPtr->myCigarLength * sizeof(uint32_t));
2586
2587 myIsCigarBufferValid = true;
2588 }
2589
2590 unsigned char * packedSequence = readNameEnds +
2591 myRecordPtr->myCigarLength * sizeof(int);
2592 unsigned char * packedQuality = packedSequence + bamSequenceLen;
2593
2594 if(!myIsSequenceBufferValid || !myIsQualityBufferValid ||
2595 (myBufferSequenceTranslation != translation))
2596 {
2597 myRecordPtr->myReadLength = newReadLen;
2598 // Determine if the quality needs to be set and is just a * and needs to
2599 // be set to 0xFF.
2600 bool noQuality = false;
2601 if((myQuality.Length() == 1) && (myQuality[0] == '*'))
2602 {
2603 noQuality = true;
2604 }
2605
2606 const char* translatedSeq = NULL;
2607 // If the sequence is not valid in the buffer or it is not
2608 // properly translated, get the properly translated sequence
2609 // that needs to be put into the buffer.
2610 if((!myIsSequenceBufferValid) ||
2611 (translation != myBufferSequenceTranslation))
2612 {
2613 translatedSeq = getSequence(translation);
2614 }
2615
2616 for (int i = 0; i < myRecordPtr->myReadLength; i++)
2617 {
2618 if((!myIsSequenceBufferValid) ||
2619 (translation != myBufferSequenceTranslation))
2620 {
2621 // Sequence buffer is not valid, so set the sequence.
2622 int seqVal = 0;
2623 switch(translatedSeq[i])
2624 {
2625 case '=':
2626 seqVal = 0;
2627 break;
2628 case 'A':
2629 case 'a':
2630 seqVal = 1;
2631 break;
2632 case 'C':
2633 case 'c':
2634 seqVal = 2;
2635 break;
2636 case 'G':
2637 case 'g':
2638 seqVal = 4;
2639 break;
2640 case 'T':
2641 case 't':
2642 seqVal = 8;
2643 break;
2644 case 'N':
2645 case 'n':
2646 case '.':
2647 seqVal = 15;
2648 break;
2649 default:
2651 "Unknown Sequence character found.");
2652 status = false;
2653 break;
2654 };
2655
2656 if(i & 1)
2657 {
2658 // Odd number i's go in the lower 4 bits, so OR in the
2659 // lower bits
2660 packedSequence[i/2] |= seqVal;
2661 }
2662 else
2663 {
2664 // Even i's go in the upper 4 bits and are always set first.
2665 packedSequence[i/2] = seqVal << 4;
2666 }
2667 }
2668
2669 if(!myIsQualityBufferValid)
2670 {
2671 // Set the quality.
2672 if((noQuality) || (myQuality.Length() <= i))
2673 {
2674 // No quality or the quality is smaller than the sequence,
2675 // so set it to 0xFF
2676 packedQuality[i] = 0xFF;
2677 }
2678 else
2679 {
2680 // Copy the quality string.
2681 packedQuality[i] = myQuality[i] - 33;
2682 }
2683 }
2684 }
2685 myPackedSequence = (unsigned char *)myRecordPtr->myData +
2686 myRecordPtr->myReadNameLength +
2687 myRecordPtr->myCigarLength * sizeof(int);
2688 myPackedQuality = myPackedSequence +
2689 (myRecordPtr->myReadLength + 1) / 2;
2690 myIsSequenceBufferValid = true;
2691 myIsQualityBufferValid = true;
2692 myBufferSequenceTranslation = translation;
2693 }
2694
2695 if(!myIsTagsBufferValid)
2696 {
2697 status &= setTagsInBuffer();
2698 }
2699
2700 // Set the lengths in the buffer.
2701 myRecordPtr->myReadNameLength = newReadNameLen;
2702 myRecordPtr->myCigarLength = newCigarLen;
2703 myRecordPtr->myReadLength = newReadLen;
2704
2705 // Set the buffer block size to the size of the buffer minus the
2706 // first field.
2707 myRecordPtr->myBlockSize = newBufferSize - sizeof(int32_t);
2708
2709 if(status)
2710 {
2711 myIsBufferSynced = true;
2712 }
2713
2714 return(status);
2715}
2716
2717
2718// Sets the Sequence and Quality strings from the buffer.
2719// They are done together in one method because they require the same
2720// loop, so might as well be done at the same time.
2721void SamRecord::setSequenceAndQualityFromBuffer()
2722{
2723 // NOTE: If the sequence buffer is not valid, do not set the sequence
2724 // string from the buffer.
2725 // NOTE: If the quality buffer is not valid, do not set the quality string
2726 // from the buffer.
2727
2728 // Extract the sequence if the buffer is valid and the string's length is 0.
2729 bool extractSequence = false;
2730 if(myIsSequenceBufferValid && (mySequence.Length() == 0))
2731 {
2732 extractSequence = true;
2733 }
2734
2735 // Extract the quality if the buffer is valid and the string's length is 0.
2736 bool extractQuality = false;
2737 if(myIsQualityBufferValid && (myQuality.Length() == 0))
2738 {
2739 extractQuality = true;
2740 }
2741
2742 // If neither the quality nor the sequence need to be extracted,
2743 // just return.
2744 if(!extractSequence && !extractQuality)
2745 {
2746 return;
2747 }
2748
2749 // Set the sequence and quality strings..
2750 if(extractSequence)
2751 {
2752 mySequence.SetLength(myRecordPtr->myReadLength);
2753 }
2754 if(extractQuality)
2755 {
2756 myQuality.SetLength(myRecordPtr->myReadLength);
2757 }
2758
2759 const char * asciiBases = "=AC.G...T......N";
2760
2761 // Flag to see if the quality is specified - the quality contains a value
2762 // other than 0xFF. If all values are 0xFF, then there is no quality.
2763 bool qualitySpecified = false;
2764
2765 for (int i = 0; i < myRecordPtr->myReadLength; i++)
2766 {
2767 if(extractSequence)
2768 {
2769 mySequence[i] = i & 1 ?
2770 asciiBases[myPackedSequence[i / 2] & 0xF] :
2771 asciiBases[myPackedSequence[i / 2] >> 4];
2772 }
2773
2774 if(extractQuality)
2775 {
2776 if(myPackedQuality[i] != 0xFF)
2777 {
2778 // Quality is specified, so mark the flag.
2779 qualitySpecified = true;
2780 }
2781
2782 myQuality[i] = myPackedQuality[i] + 33;
2783 }
2784 }
2785
2786 // If the read length is 0, then set the sequence and quality to '*'
2787 if(myRecordPtr->myReadLength == 0)
2788 {
2789 if(extractSequence)
2790 {
2791 mySequence = "*";
2792 }
2793 if(extractQuality)
2794 {
2795 myQuality = "*";
2796 }
2797 }
2798 else if(extractQuality && !qualitySpecified)
2799 {
2800 // No quality was specified, so set it to "*"
2801 myQuality = "*";
2802 }
2803}
2804
2805
2806// Parse the cigar to calculate the alignment/unclipped end.
2807bool SamRecord::parseCigar()
2808{
2809 // Determine if the cigar string or cigar binary needs to be parsed.
2810 if(myCigar.Length() == 0)
2811 {
2812 // The cigar string is not yet set, so parse the binary.
2813 return(parseCigarBinary());
2814 }
2815 return(parseCigarString());
2816}
2817
2818// Parse the cigar to calculate the alignment/unclipped end.
2819bool SamRecord::parseCigarBinary()
2820{
2821 // Only need to parse if the string is not already set.
2822 // The length of the cigar string is set to zero when the
2823 // record is read from a file into the buffer.
2824 if(myCigar.Length() != 0)
2825 {
2826 // Already parsed.
2827 return(true);
2828 }
2829
2830 unsigned char * readNameEnds =
2831 (unsigned char *)myRecordPtr->myData + myRecordPtr->myReadNameLength;
2832
2833 unsigned int * packedCigar = (unsigned int *) (void *) readNameEnds;
2834
2835 myCigarRoller.Set(packedCigar, myRecordPtr->myCigarLength);
2836
2837 myCigarRoller.getCigarString(myCigar);
2838
2839 myAlignmentLength = myCigarRoller.getExpectedReferenceBaseCount();
2840
2841 myUnclippedStartOffset = myCigarRoller.getNumBeginClips();
2842 myUnclippedEndOffset = myCigarRoller.getNumEndClips();
2843
2844 // if the cigar length is 0, then set the cigar string to "*"
2845 if(myRecordPtr->myCigarLength == 0)
2846 {
2847 myCigar = "*";
2848 return(true);
2849 }
2850
2851 // Copy the cigar into a temporary buffer.
2852 int newBufferSize = myRecordPtr->myCigarLength * sizeof(uint32_t);
2853 if(newBufferSize > myCigarTempBufferAllocatedSize)
2854 {
2855 uint32_t* tempBufferPtr =
2856 (uint32_t*)realloc(myCigarTempBuffer, newBufferSize);
2857 if(tempBufferPtr == NULL)
2858 {
2859 // Failed to allocate memory.
2860 // Do not parse, just return.
2861 fprintf(stderr, "FAILED TO ALLOCATE MEMORY!!!");
2863 "Failed to Allocate Memory.");
2864 return(false);
2865 }
2866 myCigarTempBuffer = tempBufferPtr;
2867 myCigarTempBufferAllocatedSize = newBufferSize;
2868 }
2869
2870 memcpy(myCigarTempBuffer, packedCigar,
2871 myRecordPtr->myCigarLength * sizeof(uint32_t));
2872
2873 // Set the length of the temp buffer.
2874 myCigarTempBufferLength = myRecordPtr->myCigarLength;
2875
2876 return(true);
2877}
2878
2879// Parse the cigar string to calculate the cigar length and alignment end.
2880bool SamRecord::parseCigarString()
2881{
2882 myCigarTempBufferLength = 0;
2883 if(myCigar == "*")
2884 {
2885 // Cigar is empty, so initialize the variables.
2886 myAlignmentLength = 0;
2887 myUnclippedStartOffset = 0;
2888 myUnclippedEndOffset = 0;
2889 myCigarRoller.clear();
2890 return(true);
2891 }
2892
2893 myCigarRoller.Set(myCigar);
2894
2895 myAlignmentLength = myCigarRoller.getExpectedReferenceBaseCount();
2896
2897 myUnclippedStartOffset = myCigarRoller.getNumBeginClips();
2898 myUnclippedEndOffset = myCigarRoller.getNumEndClips();
2899
2900 // Check to see if the Temporary Cigar Buffer is large enough to contain
2901 // this cigar. If we make it the size of the length of the cigar string,
2902 // it will be more than large enough.
2903 int newBufferSize = myCigar.Length() * sizeof(uint32_t);
2904 if(newBufferSize > myCigarTempBufferAllocatedSize)
2905 {
2906 uint32_t* tempBufferPtr =
2907 (uint32_t*)realloc(myCigarTempBuffer, newBufferSize);
2908 if(tempBufferPtr == NULL)
2909 {
2910 // Failed to allocate memory.
2911 // Do not parse, just return.
2912 fprintf(stderr, "FAILED TO ALLOCATE MEMORY!!!");
2914 "Failed to Allocate Memory.");
2915 return(false);
2916 }
2917 myCigarTempBuffer = tempBufferPtr;
2918 myCigarTempBufferAllocatedSize = newBufferSize;
2919 }
2920
2921 // Track if there were any errors.
2922 bool status = true;
2923
2924 // Track the index into the cigar string that is being parsed.
2925 char *cigarOp;
2926 const char* cigarEntryStart = myCigar.c_str();
2927 int opLen = 0;
2928 int op = 0;
2929
2930 unsigned int * packedCigar = myCigarTempBuffer;
2931 // TODO - maybe one day make a cigar list... or maybe make a
2932 // reference cigar string for ease of lookup....
2933 const char* endCigarString = cigarEntryStart + myCigar.Length();
2934 while(cigarEntryStart < endCigarString)
2935 {
2936 bool validCigarEntry = true;
2937 // Get the opLen from the string. cigarOp will then point to
2938 // the operation.
2939 opLen = strtol(cigarEntryStart, &cigarOp, 10);
2940 // Switch on the type of operation.
2941 switch(*cigarOp)
2942 {
2943 case('M'):
2944 op = 0;
2945 break;
2946 case('I'):
2947 // Insert into the reference position, so do not increment the
2948 // reference end position.
2949 op = 1;
2950 break;
2951 case('D'):
2952 op = 2;
2953 break;
2954 case('N'):
2955 op = 3;
2956 break;
2957 case('S'):
2958 op = 4;
2959 break;
2960 case('H'):
2961 op = 5;
2962 break;
2963 case('P'):
2964 op = 6;
2965 break;
2966 default:
2967 fprintf(stderr, "ERROR parsing cigar\n");
2968 validCigarEntry = false;
2969 status = false;
2971 "Unknown operation found when parsing the Cigar.");
2972 break;
2973 };
2974 if(validCigarEntry)
2975 {
2976 // Increment the cigar length.
2977 ++myCigarTempBufferLength;
2978 *packedCigar = (opLen << 4) | op;
2979 packedCigar++;
2980 }
2981 // The next Entry starts at one past the cigar op, so set the start.
2982 cigarEntryStart = ++cigarOp;
2983 }
2984
2985 // Check clipLength to adjust the end position.
2986 return(status);
2987}
2988
2989
2990bool SamRecord::setTagsFromBuffer()
2991{
2992 // If the tags do not need to be set from the buffer, return true.
2993 if(myNeedToSetTagsFromBuffer == false)
2994 {
2995 // Already been set from the buffer.
2996 return(true);
2997 }
2998
2999 // Mark false, as they are being set now.
3000 myNeedToSetTagsFromBuffer = false;
3001
3002 unsigned char * extraPtr = myPackedQuality + myRecordPtr->myReadLength;
3003
3004 // Default to success, will be changed to false on failure.
3005 bool status = true;
3006
3007 // Clear any previously set tags.
3008 clearTags();
3009 while (myRecordPtr->myBlockSize + 4 -
3010 (extraPtr - (unsigned char *)myRecordPtr) > 0)
3011 {
3012 int key = 0;
3013 int value = 0;
3014 void * content = extraPtr + 3;
3015 int tagBufferSize = 0;
3016
3017 key = MAKEKEY(extraPtr[0], extraPtr[1], extraPtr[2]);
3018
3019 // First check if the tag already exists.
3020 unsigned int location = extras.Find(key);
3021 int origIndex = 0;
3022 String* duplicate = NULL;
3023 String* origTag = NULL;
3024 if(location != LH_NOTFOUND)
3025 {
3026 duplicate = new String;
3027 origTag = new String;
3028 origIndex = extras[location];
3029
3030 *duplicate = (char)(extraPtr[0]);
3031 *duplicate += (char)(extraPtr[1]);
3032 *duplicate += ':';
3033
3034 *origTag = *duplicate;
3035 *duplicate += (char)(extraPtr[2]);
3036 *duplicate += ':';
3037 }
3038
3039 switch (extraPtr[2])
3040 {
3041 case 'A' :
3042 if(duplicate != NULL)
3043 {
3044 *duplicate += (* (char *) content);
3045 *origTag += intType[origIndex];
3046 *origTag += ':';
3047 appendIntArrayValue(origIndex, *origTag);
3048 tagBufferSize -= getNumericTagTypeSize(intType[origIndex]);
3049 integers[origIndex] = *(char *)content;
3050 intType[origIndex] = extraPtr[2];
3051 tagBufferSize += getNumericTagTypeSize(intType[origIndex]);
3052 }
3053 else
3054 {
3055 value = integers.Length();
3056 integers.Push(* (char *) content);
3057 intType.push_back(extraPtr[2]);
3058 tagBufferSize += 4;
3059 }
3060 extraPtr += 4;
3061 break;
3062 case 'c' :
3063 if(duplicate != NULL)
3064 {
3065 *duplicate += (* (char *) content);
3066 *origTag += intType[origIndex];
3067 *origTag += ':';
3068 appendIntArrayValue(origIndex, *origTag);
3069 tagBufferSize -= getNumericTagTypeSize(intType[origIndex]);
3070 integers[origIndex] = *(char *)content;
3071 intType[origIndex] = extraPtr[2];
3072 tagBufferSize += getNumericTagTypeSize(intType[origIndex]);
3073 }
3074 else
3075 {
3076 value = integers.Length();
3077 integers.Push(* (char *) content);
3078 intType.push_back(extraPtr[2]);
3079 tagBufferSize += 4;
3080 }
3081 extraPtr += 4;
3082 break;
3083 case 'C' :
3084 if(duplicate != NULL)
3085 {
3086 *duplicate += (* (unsigned char *) content);
3087 *origTag += intType[origIndex];
3088 *origTag += ':';
3089 appendIntArrayValue(origIndex, *origTag);
3090 tagBufferSize -= getNumericTagTypeSize(intType[origIndex]);
3091 integers[origIndex] = *(unsigned char *)content;
3092 intType[origIndex] = extraPtr[2];
3093 tagBufferSize += getNumericTagTypeSize(intType[origIndex]);
3094 }
3095 else
3096 {
3097 value = integers.Length();
3098 integers.Push(* (unsigned char *) content);
3099 intType.push_back(extraPtr[2]);
3100 tagBufferSize += 4;
3101 }
3102 extraPtr += 4;
3103 break;
3104 case 's' :
3105 if(duplicate != NULL)
3106 {
3107 *duplicate += (* (short *) content);
3108 *origTag += intType[origIndex];
3109 *origTag += ':';
3110 appendIntArrayValue(origIndex, *origTag);
3111 tagBufferSize -= getNumericTagTypeSize(intType[origIndex]);
3112 integers[origIndex] = *(short *)content;
3113 intType[origIndex] = extraPtr[2];
3114 tagBufferSize += getNumericTagTypeSize(intType[origIndex]);
3115 }
3116 else
3117 {
3118 value = integers.Length();
3119 integers.Push(* (short *) content);
3120 intType.push_back(extraPtr[2]);
3121 tagBufferSize += 5;
3122 }
3123 extraPtr += 5;
3124 break;
3125 case 'S' :
3126 if(duplicate != NULL)
3127 {
3128 *duplicate += (* (unsigned short *) content);
3129 *origTag += intType[origIndex];
3130 *origTag += ':';
3131 appendIntArrayValue(origIndex, *origTag);
3132 tagBufferSize -= getNumericTagTypeSize(intType[origIndex]);
3133 integers[origIndex] = *(unsigned short *)content;
3134 intType[origIndex] = extraPtr[2];
3135 tagBufferSize += getNumericTagTypeSize(intType[origIndex]);
3136 }
3137 else
3138 {
3139 value = integers.Length();
3140 integers.Push(* (unsigned short *) content);
3141 intType.push_back(extraPtr[2]);
3142 tagBufferSize += 5;
3143 }
3144 extraPtr += 5;
3145 break;
3146 case 'i' :
3147 if(duplicate != NULL)
3148 {
3149 *duplicate += (* (int *) content);
3150 *origTag += intType[origIndex];
3151 *origTag += ':';
3152 appendIntArrayValue(origIndex, *origTag);
3153 tagBufferSize -= getNumericTagTypeSize(intType[origIndex]);
3154 integers[origIndex] = *(int *)content;
3155 intType[origIndex] = extraPtr[2];
3156 tagBufferSize += getNumericTagTypeSize(intType[origIndex]);
3157 }
3158 else
3159 {
3160 value = integers.Length();
3161 integers.Push(* (int *) content);
3162 intType.push_back(extraPtr[2]);
3163 tagBufferSize += 7;
3164 }
3165 extraPtr += 7;
3166 break;
3167 case 'I' :
3168 if(duplicate != NULL)
3169 {
3170 *duplicate += (* (unsigned int *) content);
3171 *origTag += intType[origIndex];
3172 *origTag += ':';
3173 appendIntArrayValue(origIndex, *origTag);
3174 tagBufferSize -= getNumericTagTypeSize(intType[origIndex]);
3175 integers[origIndex] = *(unsigned int *)content;
3176 intType[origIndex] = extraPtr[2];
3177 tagBufferSize += getNumericTagTypeSize(intType[origIndex]);
3178 }
3179 else
3180 {
3181 value = integers.Length();
3182 integers.Push((int) * (unsigned int *) content);
3183 intType.push_back(extraPtr[2]);
3184 tagBufferSize += 7;
3185 }
3186 extraPtr += 7;
3187 break;
3188 case 'Z' :
3189 if(duplicate != NULL)
3190 {
3191 *duplicate += ((const char *) content);
3192 *origTag += 'Z';
3193 *origTag += ':';
3194 *origTag += (char*)(strings[origIndex]);
3195 tagBufferSize -= strings[origIndex].Length();
3196 strings[origIndex] = (const char *) content;
3197 extraPtr += 4 + strings[origIndex].Length();
3198 tagBufferSize += strings[origIndex].Length();
3199 }
3200 else
3201 {
3202 value = strings.Length();
3203 strings.Push((const char *) content);
3204 tagBufferSize += 4 + strings.Last().Length();
3205 extraPtr += 4 + strings.Last().Length();
3206 }
3207 break;
3208 case 'B' :
3209 if(duplicate != NULL)
3210 {
3211 *origTag += 'B';
3212 *origTag += ':';
3213 *origTag += (char*)(strings[origIndex]);
3214 tagBufferSize -=
3215 getBtagBufferSize(strings[origIndex]);
3216 int bufferSize =
3217 getStringFromBtagBuffer((unsigned char*)content,
3218 strings[origIndex]);
3219 *duplicate += (char *)(strings[origIndex]);
3220 tagBufferSize += bufferSize;
3221 extraPtr += 3 + bufferSize;
3222 }
3223 else
3224 {
3225 value = strings.Length();
3226 String tempBTag;
3227 int bufferSize =
3228 getStringFromBtagBuffer((unsigned char*)content,
3229 tempBTag);
3230 strings.Push(tempBTag);
3231 tagBufferSize += 3 + bufferSize;
3232 extraPtr += 3 + bufferSize;
3233 }
3234 break;
3235 case 'f' :
3236 if(duplicate != NULL)
3237 {
3238 duplicate->appendFullFloat(* (float *) content);
3239 *origTag += 'f';
3240 *origTag += ':';
3241 origTag->appendFullFloat(floats[origIndex]);
3242 floats[origIndex] = *(float *)content;
3243 }
3244 else
3245 {
3246 value = floats.size();
3247 floats.push_back(* (float *) content);
3248 tagBufferSize += 7;
3249 }
3250 extraPtr += 7;
3251 break;
3252 default :
3253 fprintf(stderr,
3254 "parsing BAM - Unknown custom field of type %c%c:%c\n",
3255 extraPtr[0], extraPtr[1], extraPtr[2]);
3256 fprintf(stderr, "BAM Tags: \n");
3257
3258 unsigned char* tagInfo = myPackedQuality + myRecordPtr->myReadLength;
3259
3260 fprintf(stderr, "\n\n");
3261 tagInfo = myPackedQuality + myRecordPtr->myReadLength;
3262 while(myRecordPtr->myBlockSize + 4 -
3263 (tagInfo - (unsigned char *)myRecordPtr) > 0)
3264 {
3265 fprintf(stderr, "%02x",tagInfo[0]);
3266 ++tagInfo;
3267 }
3268 fprintf(stderr, "\n");
3269
3270 // Failed on read.
3271 // Increment extraPtr just by the size of the 3 known fields
3272 extraPtr += 3;
3274 "Unknown tag type.");
3275 status = false;
3276 }
3277
3278 if(duplicate != NULL)
3279 {
3280 // Duplicate tag in this record.
3281 // Tag already existed, print message about overwriting.
3282 // WARN about dropping duplicate tags.
3283 if(myNumWarns++ < myMaxWarns)
3284 {
3285 fprintf(stderr, "WARNING: Duplicate Tags, overwritting %s with %s\n",
3286 origTag->c_str(), duplicate->c_str());
3287 if(myNumWarns == myMaxWarns)
3288 {
3289 fprintf(stderr, "Suppressing rest of Duplicate Tag warnings.\n");
3290 }
3291 }
3292
3293 continue;
3294 }
3295
3296 // Only add the tag if it has so far been successfully processed.
3297 if(status)
3298 {
3299 // Add the tag.
3300 extras.Add(key, value);
3301 myTagBufferSize += tagBufferSize;
3302 }
3303 }
3304 return(status);
3305}
3306
3307
3308bool SamRecord::setTagsInBuffer()
3309{
3310 // The buffer size extends from the start of the record to data
3311 // plus the length of the variable fields,
3312 // Multiply the cigar length by 4 since it is the number of uint32_t fields.
3313 int bamSequenceLength = (myRecordPtr->myReadLength+1)/2;
3314 int newBufferSize = ((unsigned char*)(&(myRecordPtr->myData)) -
3315 (unsigned char*)myRecordPtr) +
3316 myRecordPtr->myReadNameLength + ((myRecordPtr->myCigarLength)*4) +
3317 myRecordPtr->myReadLength + bamSequenceLength + myTagBufferSize;
3318
3319 // Make sure the buffer is big enough.
3320 if(!allocateRecordStructure(newBufferSize))
3321 {
3322 // Failed to allocate space.
3323 return(false);
3324 }
3325
3326 char * extraPtr = (char*)myPackedQuality + myRecordPtr->myReadLength;
3327
3328 bool status = true;
3329
3330 // Set the tags in the buffer.
3331 if (extras.Entries())
3332 {
3333 for (int i = 0; i < extras.Capacity(); i++)
3334 {
3335 if (extras.SlotInUse(i))
3336 {
3337 int key = extras.GetKey(i);
3338 getTag(key, extraPtr);
3339 extraPtr += 2;
3340 char vtype;
3341 getTypeFromKey(key, vtype);
3342
3343 if(vtype == 'i')
3344 {
3345 vtype = getIntegerType(i);
3346 }
3347
3348 extraPtr[0] = vtype;
3349
3350 // increment the pointer to where the value is.
3351 extraPtr += 1;
3352
3353 switch (vtype)
3354 {
3355 case 'A' :
3356 *(char*)extraPtr = (char)getInteger(i);
3357 // sprintf(extraPtr, "%d", getInteger(i));
3358 extraPtr += 1;
3359 break;
3360 case 'c' :
3361 *(int8_t*)extraPtr = (int8_t)getInteger(i);
3362 // sprintf(extraPtr, "%.4d", getInteger(i));
3363 extraPtr += 1;
3364 break;
3365 case 'C' :
3366 *(uint8_t*)extraPtr = (uint8_t)getInteger(i);
3367 // sprintf(extraPtr, "%.4d", getInteger(i));
3368 extraPtr += 1;
3369 break;
3370 case 's' :
3371 *(int16_t*)extraPtr = (int16_t)getInteger(i);
3372 // sprintf(extraPtr, "%.4d", getInteger(i));
3373 extraPtr += 2;
3374 break;
3375 case 'S' :
3376 *(uint16_t*)extraPtr = (uint16_t)getInteger(i);
3377 // sprintf(extraPtr, "%.4d", getInteger(i));
3378 extraPtr += 2;
3379 break;
3380 case 'i' :
3381 *(int32_t*)extraPtr = (int32_t)getInteger(i);
3382 // sprintf(extraPtr, "%.4d", getInteger(i));
3383 extraPtr += 4;
3384 break;
3385 case 'I' :
3386 *(uint32_t*)extraPtr = (uint32_t)getInteger(i);
3387 // sprintf(extraPtr, "%.4d", getInteger(i));
3388 extraPtr += 4;
3389 break;
3390 case 'Z' :
3391 sprintf(extraPtr, "%s", getString(i).c_str());
3392 extraPtr += getString(i).Length() + 1;
3393 break;
3394 case 'B' :
3395 extraPtr += setBtagBuffer(getString(i), extraPtr);
3396 //--TODO-- Set buffer with correct B tag
3397 //sprintf(extraPtr, "%s", getString(i).c_str());
3398 // extraPtr += getBtagBufferSize(getString(i));
3399 break;
3400 case 'f' :
3401 *(float*)extraPtr = getFloat(i);
3402 extraPtr += 4;
3403 break;
3404 default :
3406 "Unknown tag type.");
3407 status = false;
3408 break;
3409 }
3410 }
3411 }
3412 }
3413
3414 // Validate that the extra pointer is at the end of the allocated buffer.
3415 // If not then there was a problem.
3416 if(extraPtr != (char*)myRecordPtr + newBufferSize)
3417 {
3418 fprintf(stderr, "ERROR updating the buffer. Incorrect size.");
3420 "ERROR updating the buffer. Incorrect size.");
3421 status = false;
3422 }
3423
3424
3425 // The buffer tags are now in sync.
3426 myNeedToSetTagsInBuffer = false;
3427 myIsTagsBufferValid = true;
3428 return(status);
3429}
3430
3431
3432// Reset the variables for a newly set buffer. The buffer must be set first
3433// since this looks up the reference ids in the buffer to set the reference
3434// names.
3435void SamRecord::setVariablesForNewBuffer(SamFileHeader& header)
3436{
3437 // Lookup the reference name & mate reference name associated with this
3438 // record.
3439 myReferenceName =
3440 header.getReferenceLabel(myRecordPtr->myReferenceID);
3441 myMateReferenceName =
3442 header.getReferenceLabel(myRecordPtr->myMateReferenceID);
3443
3444 // Clear the SAM Strings that are now not in-sync with the buffer.
3445 myReadName.SetLength(0);
3446 myCigar.SetLength(0);
3447 mySequence.SetLength(0);
3448 mySeqWithEq.clear();
3449 mySeqWithoutEq.clear();
3450 myQuality.SetLength(0);
3451 myNeedToSetTagsFromBuffer = true;
3452 myNeedToSetTagsInBuffer = false;
3453
3454 //Set that the buffer is valid.
3455 myIsBufferSynced = true;
3456 // Set that the variable length buffer fields are valid.
3457 myIsReadNameBufferValid = true;
3458 myIsCigarBufferValid = true;
3459 myPackedSequence = (unsigned char *)myRecordPtr->myData +
3460 myRecordPtr->myReadNameLength + myRecordPtr->myCigarLength * sizeof(int);
3461 myIsSequenceBufferValid = true;
3462 myBufferSequenceTranslation = NONE;
3463 myPackedQuality = myPackedSequence +
3464 (myRecordPtr->myReadLength + 1) / 2;
3465 myIsQualityBufferValid = true;
3466 myIsTagsBufferValid = true;
3467 myIsBinValid = true;
3468}
3469
3470
3471// Extract the vtype from the key.
3472void SamRecord::getTypeFromKey(int key, char& type) const
3473{
3474 // Extract the type from the key.
3475 type = (key >> 16) & 0xFF;
3476}
3477
3478
3479// Extract the tag from the key.
3480void SamRecord::getTag(int key, char* tag) const
3481{
3482 // Extract the tag from the key.
3483 tag[0] = key & 0xFF;
3484 tag[1] = (key >> 8) & 0xFF;
3485 tag[2] = 0;
3486}
3487
3488
3489// Index is the index into the strings array.
3490String & SamRecord::getString(int index)
3491{
3492 int value = extras[index];
3493
3494 return strings[value];
3495}
3496
3497int & SamRecord::getInteger(int offset)
3498{
3499 int value = extras[offset];
3500
3501 return integers[value];
3502}
3503
3504const char & SamRecord::getIntegerType(int offset) const
3505{
3506 int value = extras[offset];
3507
3508 return intType[value];
3509}
3510
3511float & SamRecord::getFloat(int offset)
3512{
3513 int value = extras[offset];
3514
3515 return floats[value];
3516 }
3517
3518
3519void SamRecord::appendIntArrayValue(char type, int value, String& strVal) const
3520{
3521 switch(type)
3522 {
3523 case 'A':
3524 strVal += (char)value;
3525 break;
3526 case 'c':
3527 case 's':
3528 case 'i':
3529 case 'C':
3530 case 'S':
3531 case 'I':
3532 strVal += value;
3533 break;
3534 default:
3535 // Do nothing.
3536 ;
3537 }
3538}
3539
3540
3541int SamRecord::getBtagBufferSize(String& tagStr)
3542{
3543 if(tagStr.Length() < 1)
3544 {
3545 // ERROR, needs at least the type.
3547 "SamRecord::getBtagBufferSize no tag subtype specified");
3548 return(0);
3549 }
3550 char type = tagStr[0];
3551 int elementSize = getNumericTagTypeSize(type);
3552 if(elementSize <= 0)
3553 {
3554 // ERROR, 'B' tag subtype must be numeric, so should be non-zero
3555 String errorMsg = "SamRecord::getBtagBufferSize invalid tag subtype, ";
3556 errorMsg += type;
3557 myStatus.addError(SamStatus::FAIL_PARSE, errorMsg.c_str());
3558 return(0);
3559 }
3560
3561 // Separated by ',', so count the number of commas.
3562 int numElements = 0;
3563 int index = tagStr.FastFindChar(',', 0);
3564 while(index > 0)
3565 {
3566 ++numElements;
3567 index = tagStr.FastFindChar(',', index+1);
3568 }
3569 // Add 5 bytes: type & numElements
3570 return(numElements * elementSize + 5);
3571}
3572
3573
3574int SamRecord::setBtagBuffer(String& tagStr, char* extraPtr)
3575{
3576 if(tagStr.Length() < 1)
3577 {
3578 // ERROR, needs at least the type.
3580 "SamRecord::getBtagBufferSize no tag subtype specified");
3581 return(0);
3582 }
3583 char type = tagStr[0];
3584 int elementSize = getNumericTagTypeSize(type);
3585 if(elementSize <= 0)
3586 {
3587 // ERROR, 'B' tag subtype must be numeric, so should be non-zero
3588 String errorMsg = "SamRecord::getBtagBufferSize invalid tag subtype, ";
3589 errorMsg += type;
3590 myStatus.addError(SamStatus::FAIL_PARSE, errorMsg.c_str());
3591 return(0);
3592 }
3593
3594 int totalInc = 0;
3595 // Write the type.
3596 *(char*)extraPtr = type;
3597 ++extraPtr;
3598 ++totalInc;
3599
3600 // Get the number of elements by counting ','s
3601 uint32_t numElements = 0;
3602 int index = tagStr.FastFindChar(',', 0);
3603 while(index > 0)
3604 {
3605 ++numElements;
3606 index = tagStr.FastFindChar(',', index+1);
3607 }
3608 *(uint32_t*)extraPtr = numElements;
3609 extraPtr += 4;
3610 totalInc += 4;
3611
3612 const char* stringPtr = tagStr.c_str();
3613 const char* endPtr = stringPtr + tagStr.Length();
3614 // increment past the subtype and ','.
3615 stringPtr += 2;
3616
3617 char* newPtr = NULL;
3618 while(stringPtr < endPtr)
3619 {
3620 switch(type)
3621 {
3622 case 'f':
3623 *(float*)extraPtr = (float)(strtod(stringPtr, &newPtr));
3624 break;
3625 case 'c':
3626 *(int8_t*)extraPtr = (int8_t)strtol(stringPtr, &newPtr, 0);
3627 break;
3628 case 's':
3629 *(int16_t*)extraPtr = (int16_t)strtol(stringPtr, &newPtr, 0);
3630 break;
3631 case 'i':
3632 *(int32_t*)extraPtr = (int32_t)strtol(stringPtr, &newPtr, 0);
3633 break;
3634 case 'C':
3635 *(uint8_t*)extraPtr = (uint8_t)strtoul(stringPtr, &newPtr, 0);
3636 break;
3637 case 'S':
3638 *(uint16_t*)extraPtr = (uint16_t)strtoul(stringPtr, &newPtr, 0);
3639 break;
3640 case 'I':
3641 *(uint32_t*)extraPtr = (uint32_t)strtoul(stringPtr, &newPtr, 0);
3642 break;
3643 default :
3645 "Unknown 'B' tag subtype.");
3646 break;
3647 }
3648 extraPtr += elementSize;
3649 totalInc += elementSize;
3650 stringPtr = newPtr + 1; // skip the ','
3651 }
3652 return(totalInc);
3653}
3654
3655
3656int SamRecord::getStringFromBtagBuffer(unsigned char* buffer,
3657 String& tagStr)
3658{
3659 tagStr.Clear();
3660
3661 int bufferSize = 0;
3662
3663 // 1st byte is the type.
3664 char type = *buffer;
3665 ++buffer;
3666 ++bufferSize;
3667 tagStr = type;
3668
3669 // 2nd-5th bytes are the size
3670 unsigned int numEntries = *(unsigned int *)buffer;
3671 buffer += 4;
3672 bufferSize += 4;
3673 // Num Entries is not included in the string.
3674
3675 int subtypeSize = getNumericTagTypeSize(type);
3676
3677 for(unsigned int i = 0; i < numEntries; i++)
3678 {
3679 tagStr += ',';
3680 switch(type)
3681 {
3682 case 'f':
3683 tagStr.appendFullFloat(*(float *)buffer);
3684 break;
3685 case 'c':
3686 tagStr += *(int8_t *)buffer;
3687 break;
3688 case 's':
3689 tagStr += *(int16_t *)buffer;
3690 break;
3691 case 'i':
3692 tagStr += *(int32_t *)buffer;
3693 break;
3694 case 'C':
3695 tagStr += *(uint8_t *)buffer;
3696 break;
3697 case 'S':
3698 tagStr += *(uint16_t *)buffer;
3699 break;
3700 case 'I':
3701 tagStr += *(uint32_t *)buffer;
3702 break;
3703 default :
3705 "Unknown 'B' tag subtype.");
3706 break;
3707 }
3708 buffer += subtypeSize;
3709 bufferSize += subtypeSize;
3710 }
3711 return(bufferSize);
3712}
int ifeof(IFILE file)
Check to see if we have reached the EOF (returns 0 if not EOF).
Definition: InputFile.h:654
unsigned int ifread(IFILE file, void *buffer, unsigned int size)
Read up to size bytes from the file into the buffer.
Definition: InputFile.h:600
unsigned int ifwrite(IFILE file, const void *buffer, unsigned int size)
Write the specified number of bytes from the specified buffer into the file.
Definition: InputFile.h:669
static const char UNKNOWN_QUALITY_CHAR
Character used when the quality is unknown.
Definition: BaseUtilities.h:49
bool Remove(int index)
Remove the operation at the specified index.
bool IncrementCount(int index, int increment)
Increments the count for the operation at the specified index by the specified value,...
bool Update(int index, Operation op, int count)
Updates the operation at the specified index to be the specified operation and have the specified cou...
void clear()
Clear this object so that it has no Cigar Operations.
void Set(const char *cigarString)
Sets this object to the specified cigarString.
This class represents the CIGAR without any methods to set the cigar (see CigarRoller for that).
Definition: Cigar.h:84
int size() const
Return the number of cigar operations.
Definition: Cigar.h:364
static bool isMatchOrMismatch(Operation op)
Return true if the specified operation is a match/mismatch operation, false if not.
Definition: Cigar.h:298
static bool foundInQuery(Operation op)
Return true if the specified operation is found in the query sequence, false if not.
Definition: Cigar.h:219
int getNumBeginClips() const
Return the number of clips that are at the beginning of the cigar.
Definition: Cigar.cpp:144
int getNumEndClips() const
Return the number of clips that are at the end of the cigar.
Definition: Cigar.cpp:166
int getExpectedReferenceBaseCount() const
Return the number of bases in the reference that this CIGAR "spans".
Definition: Cigar.cpp:120
@ insert
insertion to the reference (the query sequence contains bases that have no corresponding base in the ...
Definition: Cigar.h:91
uint32_t getNumOverlaps(int32_t start, int32_t end, int32_t queryStartPos)
Return the number of bases that overlap the reference and the read associated with this cigar that fa...
Definition: Cigar.cpp:334
void getCigarString(String &cigarString) const
Set the passed in String to the string reprentation of the Cigar operations in this object.
Definition: Cigar.cpp:52
HandlingType
This specifies how this class should respond to errors.
Definition: ErrorHandler.h:29
Create/Access/Modify/Load Genome Sequences stored as binary mapped files.
Class for easily reading/writing files without having to worry about file type (uncompressed,...
Definition: InputFile.h:37
bool isOpen() const
Returns whether or not the file was successfully opened.
Definition: InputFile.h:423
const char * getFileName() const
Get the filename that is currently opened.
Definition: InputFile.h:473
This class allows a user to get/set the fields in a SAM/BAM Header.
Definition: SamFileHeader.h:35
int getReferenceID(const String &referenceName, bool addID=false)
Get the reference ID for the specified reference name (chromosome).
const String & getReferenceLabel(int id) const
Return the reference name (chromosome) for the specified reference id.
static void seqWithoutEquals(const char *currentSeq, int32_t seq0BasedPos, Cigar &cigar, const char *referenceName, const GenomeSequence &refSequence, std::string &updatedSeq)
Gets the sequence converting '=' to the appropriate base using the reference.
static void seqWithEquals(const char *currentSeq, int32_t seq0BasedPos, Cigar &cigar, const char *referenceName, const GenomeSequence &refSequence, std::string &updatedSeq)
Gets the sequence with '=' in any position where the sequence matches the reference.
int32_t getBlockSize()
Get the block size of the record (BAM format).
Definition: SamRecord.cpp:1281
uint16_t getCigarLength()
Get the length of the BAM formatted CIGAR.
Definition: SamRecord.cpp:1362
const char * getReferenceName()
Get the reference sequence name (RNAME) of the record.
Definition: SamRecord.cpp:1298
SequenceTranslation
Enum containing the settings on how to translate the sequence if a reference is available.
Definition: SamRecord.h:57
@ NONE
Leave the sequence as is.
Definition: SamRecord.h:58
@ BASES
Translate '=' to the actual base.
Definition: SamRecord.h:60
@ EQUAL
Translate bases that match the reference to '='.
Definition: SamRecord.h:59
bool setReadName(const char *readName)
Set QNAME to the passed in name.
Definition: SamRecord.cpp:193
int32_t getInsertSize()
Get the inferred insert size of the read pair (ISIZE) or observed template length (TLEN).
Definition: SamRecord.cpp:1459
int32_t get0BasedMatePosition()
Get the 0-based(BAM) leftmost mate/next fragment's position.
Definition: SamRecord.cpp:1452
int32_t get1BasedPosition()
Get the 1-based(SAM) leftmost position (POS) of the record.
Definition: SamRecord.cpp:1312
void clearTags()
Clear the tags in this record.
Definition: SamRecord.cpp:977
bool addIntTag(const char *tag, int32_t value)
Add the specified integer tag to the record.
Definition: SamRecord.cpp:647
int32_t getReferenceID()
Get the reference sequence id of the record (BAM format rid).
Definition: SamRecord.cpp:1305
bool getTagsString(const char *tags, String &returnString, char delim='\t')
Get the string representation of the tags from the record, formatted as TAG:TYPE:VALUE<delim>TAG:TYPE...
Definition: SamRecord.cpp:2082
GenomeSequence * getReference()
Returns a pointer to the genome sequence object associated with this record if it was set (NULL if it...
Definition: SamRecord.cpp:1923
int32_t getAlignmentLength()
Returns the length of the clipped sequence, returning 0 if the cigar is '*'.
Definition: SamRecord.cpp:1493
int & getInteger(const char *tag)
Get the integer value for the specified tag, DEPRECATED, use getIntegerTag that returns a bool.
Definition: SamRecord.cpp:2350
bool setInsertSize(int32_t insertSize)
Sets the inferred insert size (ISIZE)/observed template length (TLEN).
Definition: SamRecord.cpp:336
int32_t get1BasedAlignmentEnd()
Returns the 1-based inclusive rightmost position of the clipped sequence.
Definition: SamRecord.cpp:1486
uint32_t getTagLength()
Returns the length of the BAM formatted tags.
Definition: SamRecord.cpp:1929
SamRecord()
Default Constructor.
Definition: SamRecord.cpp:34
static bool isIntegerType(char vtype)
Returns whether or not the specified vtype is an integer type.
Definition: SamRecord.cpp:2040
bool rmTag(const char *tag, char type)
Remove a tag.
Definition: SamRecord.cpp:992
bool setMateReferenceName(SamFileHeader &header, const char *mateReferenceName)
Set the mate/next fragment's reference sequence name (RNEXT) to the specified name,...
Definition: SamRecord.cpp:297
uint8_t getReadNameLength()
Get the length of the readname (QNAME) including the null.
Definition: SamRecord.cpp:1326
Cigar * getCigarInfo()
Returns a pointer to the Cigar object associated with this record.
Definition: SamRecord.cpp:1836
bool getFloatTag(const char *tag, float &tagVal)
Get the float value for the specified tag.
Definition: SamRecord.cpp:2281
SamStatus::Status writeRecordBuffer(IFILE filePtr)
Write the record as a BAM into the specified already opened file.
Definition: SamRecord.cpp:1237
const char * getMateReferenceNameOrEqual()
Get the mate/next fragment's reference sequence name (RNEXT), returning "=" if it is the same as the ...
Definition: SamRecord.cpp:1420
bool setMapQuality(uint8_t mapQuality)
Set the mapping quality (MAPQ).
Definition: SamRecord.cpp:251
static bool isFloatType(char vtype)
Returns whether or not the specified vtype is a float type.
Definition: SamRecord.cpp:2052
SamStatus::Status setBuffer(const char *fromBuffer, uint32_t fromBufferSize, SamFileHeader &header)
Sets the SamRecord to contain the information in the BAM formatted fromBuffer.
Definition: SamRecord.cpp:525
int32_t get1BasedUnclippedStart()
Returns the 1-based inclusive left-most position adjusted for clipped bases.
Definition: SamRecord.cpp:1519
bool addTag(const char *tag, char vtype, const char *value)
Add the specified tag,vtype,value to the record.
Definition: SamRecord.cpp:791
uint16_t getBin()
Get the BAM bin for the record.
Definition: SamRecord.cpp:1347
bool isValid(SamFileHeader &header)
Returns whether or not the record is valid, setting the status to indicate success or failure.
Definition: SamRecord.cpp:161
int32_t getMateReferenceID()
Get the mate reference id of the record (BAM format: mate_rid/next_refID).
Definition: SamRecord.cpp:1438
bool getFields(bamRecordStruct &recStruct, String &readName, String &cigar, String &sequence, String &quality)
Returns the values of all fields except the tags.
Definition: SamRecord.cpp:1866
bool set0BasedMatePosition(int32_t matePosition)
Set the mate/next fragment's leftmost position using the specified 0-based (BAM format) value.
Definition: SamRecord.cpp:328
void resetRecord()
Reset the fields of the record to a default value.
Definition: SamRecord.cpp:91
bool setFlag(uint16_t flag)
Set the bitwise FLAG to the specified value.
Definition: SamRecord.cpp:215
bool set1BasedPosition(int32_t position)
Set the leftmost position (POS) using the specified 1-based (SAM format) value.
Definition: SamRecord.cpp:236
SamStatus::Status setBufferFromFile(IFILE filePtr, SamFileHeader &header)
Read the BAM record from a file.
Definition: SamRecord.cpp:558
uint16_t getFlag()
Get the flag (FLAG).
Definition: SamRecord.cpp:1384
const void * getRecordBuffer()
Get a const pointer to the buffer that contains the BAM representation of the record.
Definition: SamRecord.cpp:1204
void setSequenceTranslation(SequenceTranslation translation)
Set the type of sequence translation to use when getting the sequence.
Definition: SamRecord.cpp:187
int32_t get1BasedMatePosition()
Get the 1-based(SAM) leftmost mate/next fragment's position (PNEXT).
Definition: SamRecord.cpp:1445
int32_t get0BasedUnclippedEnd()
Returns the 0-based inclusive right-most position adjusted for clipped bases.
Definition: SamRecord.cpp:1526
bool shiftIndelsLeft()
Shift the indels (if any) to the left by updating the CIGAR.
Definition: SamRecord.cpp:368
int * getIntegerTag(const char *tag)
Get the integer value for the specified tag, DEPRECATED, use one that returns a bool (success/failure...
Definition: SamRecord.cpp:2216
const SamStatus & getStatus()
Returns the status associated with the last method that sets the status.
Definition: SamRecord.cpp:2403
static bool isCharType(char vtype)
Returns whether or not the specified vtype is a char type.
Definition: SamRecord.cpp:2062
bool setCigar(const char *cigar)
Set the CIGAR to the specified SAM formatted cigar string.
Definition: SamRecord.cpp:259
int32_t get1BasedUnclippedEnd()
Returns the 1-based inclusive right-most position adjusted for clipped bases.
Definition: SamRecord.cpp:1535
uint32_t getNumOverlaps(int32_t start, int32_t end)
Return the number of bases in this read that overlap the passed in region.
Definition: SamRecord.cpp:1853
const char * getMateReferenceName()
Get the mate/next fragment's reference sequence name (RNEXT).
Definition: SamRecord.cpp:1410
bool checkTag(const char *tag, char type)
Check if the specified tag contains a value of the specified vtype.
Definition: SamRecord.cpp:2381
bool getNextSamTag(char *tag, char &vtype, void **value)
Get the next tag from the record.
Definition: SamRecord.cpp:1962
void setReference(GenomeSequence *reference)
Set the reference to the specified genome sequence object.
Definition: SamRecord.cpp:178
bool setSequence(const char *seq)
Sets the sequence (SEQ) to the specified SAM formatted sequence string.
Definition: SamRecord.cpp:344
int32_t get0BasedUnclippedStart()
Returns the 0-based inclusive left-most position adjusted for clipped bases.
Definition: SamRecord.cpp:1506
int32_t getReadLength()
Get the length of the read.
Definition: SamRecord.cpp:1391
int32_t get0BasedAlignmentEnd()
Returns the 0-based inclusive rightmost position of the clipped sequence.
Definition: SamRecord.cpp:1467
const String * getStringTag(const char *tag)
Get the string value for the specified tag.
Definition: SamRecord.cpp:2180
bool set1BasedMatePosition(int32_t matePosition)
Set the mate/next fragment's leftmost position (PNEXT) using the specified 1-based (SAM format) value...
Definition: SamRecord.cpp:322
int32_t get0BasedPosition()
Get the 0-based(BAM) leftmost position of the record.
Definition: SamRecord.cpp:1319
const char * getCigar()
Returns the SAM formatted CIGAR string.
Definition: SamRecord.cpp:1555
uint8_t getMapQuality()
Get the mapping quality (MAPQ) of the record.
Definition: SamRecord.cpp:1340
const String & getString(const char *tag)
Get the string value for the specified tag.
Definition: SamRecord.cpp:2314
bool set0BasedPosition(int32_t position)
Set the leftmost position using the specified 0-based (BAM format) value.
Definition: SamRecord.cpp:242
const char * getReadName()
Returns the SAM formatted Read Name (QNAME).
Definition: SamRecord.cpp:1542
void resetTagIter()
Reset the tag iterator to the beginning of the tags.
Definition: SamRecord.cpp:2034
bool setQuality(const char *quality)
Sets the quality (QUAL) to the specified SAM formatted quality string.
Definition: SamRecord.cpp:357
bool setReferenceName(SamFileHeader &header, const char *referenceName)
Set the reference sequence name (RNAME) to the specified name, using the header to determine the refe...
Definition: SamRecord.cpp:223
const char * getQuality()
Returns the SAM formatted quality string (QUAL).
Definition: SamRecord.cpp:1638
~SamRecord()
Destructor.
Definition: SamRecord.cpp:72
const char * getSequence()
Returns the SAM formatted sequence string (SEQ), translating the base as specified by setSequenceTran...
Definition: SamRecord.cpp:1568
bool rmTags(const char *tags)
Remove tags.
Definition: SamRecord.cpp:1083
static bool isStringType(char vtype)
Returns whether or not the specified vtype is a string type.
Definition: SamRecord.cpp:2072
The SamValidationErrors class is a container class that holds SamValidationError Objects,...
void getErrorString(std::string &errorString) const
Append the error messages contained in this container to the passed in string.
static bool isValid(SamFileHeader &samHeader, SamRecord &samRecord, SamValidationErrors &validationErrors)
Validates whether or not the specified SamRecord is valid, calling all of the other validations.
This class is used to track the status results of some methods in the BAM classes.
Definition: StatGenStatus.h:27
Status
Return value enum for StatGenFile methods.
Definition: StatGenStatus.h:32
@ FAIL_MEM
fail a memory allocation.
Definition: StatGenStatus.h:45
@ NO_MORE_RECS
NO_MORE_RECS: failed to read a record since there are no more to read either in the file or section i...
Definition: StatGenStatus.h:36
@ SUCCESS
method completed successfully.
Definition: StatGenStatus.h:32
@ FAIL_IO
method failed due to an I/O issue.
Definition: StatGenStatus.h:37
@ INVALID
invalid other than for sorting.
Definition: StatGenStatus.h:44
@ FAIL_PARSE
failed to parse a record/header - invalid format.
Definition: StatGenStatus.h:42
@ FAIL_ORDER
FAIL_ORDER: method failed because it was called out of order, like trying to read a file without open...
Definition: StatGenStatus.h:41
void setStatus(Status newStatus, const char *newMessage)
Set the status with the specified status enum and message.
Status getStatus() const
Return the enum for this status object.
void addError(Status newStatus, const char *newMessage)
Add the specified error message to the status message, setting the status to newStatus if the current...
Structure of a BAM record.
Definition: SamRecord.h:34