libStatGen Software
1
SamFile.h
1
/*
2
* Copyright (C) 2010 Regents of the University of Michigan
3
*
4
* This program is free software: you can redistribute it and/or modify
5
* it under the terms of the GNU General Public License as published by
6
* the Free Software Foundation, either version 3 of the License, or
7
* (at your option) any later version.
8
*
9
* This program is distributed in the hope that it will be useful,
10
* but WITHOUT ANY WARRANTY; without even the implied warranty of
11
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12
* GNU General Public License for more details.
13
*
14
* You should have received a copy of the GNU General Public License
15
* along with this program. If not, see <http://www.gnu.org/licenses/>.
16
*/
17
18
#ifndef __SAM_FILE_H__
19
#define __SAM_FILE_H__
20
21
#include "SamStatus.h"
22
#include "
InputFile.h
"
23
#include "SamFileHeader.h"
24
#include "SamRecord.h"
25
#include "GenericSamInterface.h"
26
#include "BamIndex.h"
27
#include "SamStatistics.h"
28
29
/// Allows the user to easily read/write a SAM/BAM file.
30
/// The SamFile class contains additional functionality that allows a user
31
/// to read specific sections of sorted & indexed BAM files. In order to
32
/// take advantage of this capability, the index file must be read prior to
33
/// setting the read section. This logic saves the time of having to read
34
/// the entire file and takes advantage of the seeking capability of BGZF.
35
class
SamFile
36
{
37
public
:
38
/// Enum for indicating whether to open the file for read or write.
39
enum
OpenType
{
40
READ
,
///< open for reading.
41
WRITE
///< open for writing.
42
};
43
44
45
/// Enum for indicating the type of sort expected in the file.
46
enum
SortedType
{
47
UNSORTED
= 0,
///< file is not sorted.
48
FLAG
,
///< SO flag from the header indicates the sort type.
49
COORDINATE
,
///< file is sorted by coordinate.
50
QUERY_NAME
///< file is sorted by queryname.
51
};
52
53
/// Default Constructor, initializes the variables, but does not open
54
/// any files.
55
SamFile
();
56
57
/// Constructor that sets the error handling type.
58
/// \param errorHandlingType how to handle errors.
59
SamFile
(
ErrorHandler::HandlingType
errorHandlingType);
60
61
/// Constructor that opens the specified file based on the specified mode
62
/// (READ/WRITE), aborts if the file could not be opened.
63
/// \param filename name of the file to open.
64
/// \param mode mode to use for opening the file.
65
SamFile
(
const
char
* filename,
OpenType
mode);
66
67
/// Constructor that opens the specified file based on the specified mode
68
/// (READ/WRITE) and handles errors per the specified handleType.
69
/// \param filename name of the file to open.
70
/// \param mode mode to use for opening the file.
71
/// \param errorHandlingType how to handle errors.
72
SamFile
(
const
char
* filename,
OpenType
mode,
73
ErrorHandler::HandlingType
errorHandlingType);
74
75
/// Constructor that opens the specified file based on the specified mode
76
/// (READ/WRITE) and reads the header, aborts if the file could not be
77
/// opened or the header not read.
78
/// \param filename name of the file to open.
79
/// \param mode mode to use for opening the file.
80
/// \param header to read into or write from
81
SamFile
(
const
char
* filename,
OpenType
mode,
SamFileHeader
* header);
82
83
/// Constructor that opens the specified file based on the specified mode
84
/// (READ/WRITE) and reads the header, handling errors per the specified
85
/// handleType.
86
/// \param filename name of the file to open.
87
/// \param mode mode to use for opening the file.
88
/// \param errorHandlingType how to handle errors.
89
/// \param header to read into or write from
90
SamFile
(
const
char
* filename,
OpenType
mode,
91
ErrorHandler::HandlingType
errorHandlingType,
92
SamFileHeader
* header);
93
94
/// Destructor
95
virtual
~SamFile
();
96
97
/// Open a sam/bam file for reading with the specified filename,
98
/// determing the type of file and SAM/BAM by reading the file
99
/// (if not stdin).
100
/// \param filename the sam/bam file to open for reading.
101
/// \param header to read into or write from (optional)
102
/// \return true = success; false = failure.
103
bool
OpenForRead
(
const
char
* filename,
SamFileHeader
* header = NULL);
104
105
/// Open a sam/bam file for writing with the specified filename,
106
/// determining SAM/BAM from the extension (.bam = BAM).
107
/// \param filename the sam/bam file to open for writing.
108
/// \param header to read into or write from (optional)
109
/// \return true = success; false = failure.
110
bool
OpenForWrite
(
const
char
* filename,
SamFileHeader
* header = NULL);
111
112
/// Read the specified bam index file. It must be read prior to setting a
113
/// read section, for seeking and reading portions of a bam file.
114
/// \param filename the name of the bam index file to be read.
115
/// \return true = success; false = failure.
116
bool
ReadBamIndex
(
const
char
* filename);
117
118
/// Read the bam index file using the BAM filename as a base.
119
/// It must be read prior to setting a read section, for seeking
120
/// and reading portions of a bam file.
121
/// Must be read after opening the BAM file since it uses the
122
/// BAM filename as a base name for the index file.
123
/// First it tries filename.bam.bai. If that fails, it tries
124
/// it without the .bam extension, filename.bai.
125
/// \return true = success; false = failure.
126
bool
ReadBamIndex
();
127
128
/// Sets the reference to the specified genome sequence object.
129
/// \param reference pointer to the GenomeSequence object.
130
void
SetReference
(
GenomeSequence
* reference);
131
132
/// Set the type of sequence translation to use when reading
133
/// the sequence. Passed down to the SamRecord when it is read.
134
/// The default type (if this method is never called) is
135
/// NONE (the sequence is left as-is).
136
/// \param translation type of sequence translation to use.
137
void
SetReadSequenceTranslation
(
SamRecord::SequenceTranslation
translation);
138
139
/// Set the type of sequence translation to use when writing
140
/// the sequence. Passed down to the SamRecord when it is written.
141
/// The default type (if this method is never called) is
142
/// NONE (the sequence is left as-is).
143
/// \param translation type of sequence translation to use.
144
void
SetWriteSequenceTranslation
(
SamRecord::SequenceTranslation
translation);
145
146
/// Close the file if there is one open.
147
void
Close
();
148
149
/// Returns whether or not the file has been opened successfully.
150
/// \return true = open; false = not open.
151
bool
IsOpen
();
152
153
/// Returns whether or not the end of the file has been reached.
154
/// \return true = EOF; false = not eof.
155
/// If the file is not open, true is returned.
156
bool
IsEOF
();
157
158
/// Returns whether or not the file has been opened for streaming
159
/// input/output.
160
/// \return true = stream; false = not a stream.
161
bool
IsStream
();
162
163
/// Reads the header section from the file and stores it in
164
/// the passed in header.
165
/// \return true = success; false = failure.
166
bool
ReadHeader
(
SamFileHeader
& header);
167
168
/// Writes the specified header into the file.
169
/// \return true = success; false = failure.
170
bool
WriteHeader
(
SamFileHeader
& header);
171
172
/// Reads the next record from the file & stores it in the passed in record.
173
///
174
/// If it is an indexed BAM file and SetReadSection was called,
175
/// only alignments in the section specified by SetReadSection are read.
176
/// If they all have already been read, this method returns false.
177
///
178
/// Validates that the record is sorted according to the value set by
179
/// setSortedValidation. No sorting validation is done if specified to be
180
/// unsorted, or setSortedValidation was never called.
181
/// \return true = record was successfully set (and sorted if applicable),
182
/// false = record was not successfully set
183
/// (or not sorted as expected).
184
bool
ReadRecord
(
SamFileHeader
& header,
SamRecord
& record);
185
186
/// Writes the specified record into the file.
187
/// Validates that the record is sorted according to the value set by
188
/// setSortedValidation. No sorting validation is done if specified to
189
/// be unsorted, or setSortedValidation was never called. Returns false
190
/// and does not write the record if the record was not properly sorted.
191
/// \return true = success; false = failure.
192
bool
WriteRecord
(
SamFileHeader
& header,
SamRecord
& record);
193
194
/// Set the flag to validate that the file is sorted as it is read/written.
195
/// Must be called after the file has been opened.
196
/// Sorting validation is reset everytime SetReadPosition is called since
197
/// it can jump around in the file.
198
/// \param sortType specifies the type of sort to be checked for.
199
void
setSortedValidation
(
SortedType
sortType);
200
201
/// Return the number of records that have been read/written so far.
202
uint32_t
GetCurrentRecordCount
();
203
204
/// Deprecated, get the Status of the last call that sets status.
205
/// To remain backwards compatable - will be removed later.
206
inline
SamStatus::Status
GetFailure
()
207
{
208
return
(
GetStatus
());
209
}
210
211
/// Get the Status of the last call that sets status.
212
inline
SamStatus::Status
GetStatus
()
213
{