libStatGen Software 1
SamFilter.h
1/*
2 * Copyright (C) 2010 Regents of the University of Michigan
3 *
4 * This program is free software: you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation, either version 3 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <http://www.gnu.org/licenses/>.
16 */
17
18#ifndef __SAM_FILTER_H__
19#define __SAM_FILTER_H__
20
21#include "SamRecord.h"
22#include "GenomeSequence.h"
23
24/// Class for helping to filter a SAM/BAM record.
26{
27public:
28 /// Enum describing what sort of filtering was done.
30 NONE, ///< The filter did not affect the read.
31 CLIPPED, ///< Filtering clipped the read.
32 FILTERED ///< Filtering caused the read to be modified to unmapped.
33 };
34
35 /// Clip the read based on the specified mismatch threshold.
36 /// \return how the read was affected,
37 /// NONE if the read was not modified,
38 /// CLIPPED if the read was clipped,
39 /// FILTERED if the whole read would have been clipped so instead the
40 /// read was modified to unmapped.
42 GenomeSequence& refSequence,
43 double mismatchThreshold);
44
45 /// Soft clip the record from the front and/or the back.
46 /// \param record record to be clipped (input/output parameter).
47 /// \param numFrontClips number of bases that should be clipped from the
48 /// front of the sequence read. (total count, including any that are
49 /// already clipped.)
50 /// \param backClipPos number of bases that should be clipped from the
51 /// back of the sequence read. (total count, including any that are
52 /// already clipped.)
53 static FilterStatus softClip(SamRecord& record,
54 int32_t numFrontClips,
55 int32_t numBackClips);
56
57 /// Soft clip the cigar from the front and/or the back, writing the value
58 /// into the new cigar, updatedCigar & startPos are only updated if
59 /// the return FilterStatus is CLIPPED.
60 /// \param oldCigar cigar prior to clipping
61 /// \param numFrontClips number of bases that should be clipped from the
62 /// front of the sequence read. (total count, including any that are
63 /// already clipped.)
64 /// \param numBackClips number of bases that should be clipped from the
65 /// back of the sequence read. (total count, including any that are
66 /// already clipped.)
67 /// \param startPos 0-based start position associated with the
68 /// cigar prior to updating (input) and set to the 0-based start position
69 /// after updating (output) the cigar if it was CLIPPED.
70 /// \param updatedCigar set to the clipped cigar if CLIPPED (output param).
71 static FilterStatus softClip(Cigar& oldCigar,
72 int32_t numFrontClips,
73 int32_t numBackClips,
74 int32_t& startPos,
75 CigarRoller& updatedCigar);
76
77 /// Filter the read based on the specified quality threshold.
78 /// \return how the read was affected,
79 /// NONE if the read was not modified,
80 /// FILTERED if the read was modified to unmapped because it was over
81 /// the quality threshold.
83 GenomeSequence& refSequence,
84 uint32_t qualityThreshold,
85 uint8_t defaultQualityInt);
86
87 /// Get the sum of the qualities of all mismatches in the record.
88 /// \param record record on which to calculate the sum the mismatch qualities
89 /// \param refSequence reference to use to check for mismatches.
90 /// \param defaultQualityInt default value to use for the quality if no
91 /// quality was specified in the read.
92 /// \return sum of the qualities of mismatches
93 static uint32_t sumMismatchQuality(SamRecord& record,
94 GenomeSequence& refSequence,
95 uint8_t defaultQualityInt);
96
97 /// Filter the read by marking it as unmapped.
98 static void filterRead(SamRecord& record);
99};
100
101#endif
102
The purpose of this class is to provide accessors for setting, updating, modifying the CIGAR object....
Definition: CigarRoller.h:67
This class represents the CIGAR without any methods to set the cigar (see CigarRoller for that).
Definition: Cigar.h:84
Create/Access/Modify/Load Genome Sequences stored as binary mapped files.
Class for helping to filter a SAM/BAM record.
Definition: SamFilter.h:26
FilterStatus
Enum describing what sort of filtering was done.
Definition: SamFilter.h:29
@ NONE
The filter did not affect the read.
Definition: SamFilter.h:30
@ FILTERED
Filtering caused the read to be modified to unmapped.
Definition: SamFilter.h:32
@ CLIPPED
Filtering clipped the read.
Definition: SamFilter.h:31
static FilterStatus softClip(SamRecord &record, int32_t numFrontClips, int32_t numBackClips)
Soft clip the record from the front and/or the back.
Definition: SamFilter.cpp:155
static uint32_t sumMismatchQuality(SamRecord &record, GenomeSequence &refSequence, uint8_t defaultQualityInt)
Get the sum of the qualities of all mismatches in the record.
Definition: SamFilter.cpp:451
static FilterStatus clipOnMismatchThreshold(SamRecord &record, GenomeSequence &refSequence, double mismatchThreshold)
Clip the read based on the specified mismatch threshold.
Definition: SamFilter.cpp:27
static void filterRead(SamRecord &record)
Filter the read by marking it as unmapped.
Definition: SamFilter.cpp:486
static FilterStatus filterOnMismatchQuality(SamRecord &record, GenomeSequence &refSequence, uint32_t qualityThreshold, uint8_t defaultQualityInt)
Filter the read based on the specified quality threshold.
Definition: SamFilter.cpp:430
Class providing an easy to use interface to get/set/operate on the fields in a SAM/BAM record.
Definition: SamRecord.h:52