libStatGen Software 1
SamFile.h
1/*
2 * Copyright (C) 2010 Regents of the University of Michigan
3 *
4 * This program is free software: you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation, either version 3 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <http://www.gnu.org/licenses/>.
16 */
17
18#ifndef __SAM_FILE_H__
19#define __SAM_FILE_H__
20
21#include "SamStatus.h"
22#include "InputFile.h"
23#include "SamFileHeader.h"
24#include "SamRecord.h"
25#include "GenericSamInterface.h"
26#include "BamIndex.h"
27#include "SamStatistics.h"
28
29/// Allows the user to easily read/write a SAM/BAM file.
30/// The SamFile class contains additional functionality that allows a user
31/// to read specific sections of sorted & indexed BAM files. In order to
32/// take advantage of this capability, the index file must be read prior to
33/// setting the read section. This logic saves the time of having to read
34/// the entire file and takes advantage of the seeking capability of BGZF.
36{
37public:
38 /// Enum for indicating whether to open the file for read or write.
39 enum OpenType {
40 READ, ///< open for reading.
41 WRITE ///< open for writing.
42 };
43
44
45 /// Enum for indicating the type of sort expected in the file.
47 UNSORTED = 0, ///< file is not sorted.
48 FLAG, ///< SO flag from the header indicates the sort type.
49 COORDINATE, ///< file is sorted by coordinate.
50 QUERY_NAME ///< file is sorted by queryname.
51 };
52
53 /// Default Constructor, initializes the variables, but does not open
54 /// any files.
55 SamFile();
56
57 /// Constructor that sets the error handling type.
58 /// \param errorHandlingType how to handle errors.
59 SamFile(ErrorHandler::HandlingType errorHandlingType);
60
61 /// Constructor that opens the specified file based on the specified mode
62 /// (READ/WRITE), aborts if the file could not be opened.
63 /// \param filename name of the file to open.
64 /// \param mode mode to use for opening the file.
65 SamFile(const char* filename, OpenType mode);
66
67 /// Constructor that opens the specified file based on the specified mode
68 /// (READ/WRITE) and handles errors per the specified handleType.
69 /// \param filename name of the file to open.
70 /// \param mode mode to use for opening the file.
71 /// \param errorHandlingType how to handle errors.
72 SamFile(const char* filename, OpenType mode,
73 ErrorHandler::HandlingType errorHandlingType);
74
75 /// Constructor that opens the specified file based on the specified mode
76 /// (READ/WRITE) and reads the header, aborts if the file could not be
77 /// opened or the header not read.
78 /// \param filename name of the file to open.
79 /// \param mode mode to use for opening the file.
80 /// \param header to read into or write from
81 SamFile(const char* filename, OpenType mode, SamFileHeader* header);
82
83 /// Constructor that opens the specified file based on the specified mode
84 /// (READ/WRITE) and reads the header, handling errors per the specified
85 /// handleType.
86 /// \param filename name of the file to open.
87 /// \param mode mode to use for opening the file.
88 /// \param errorHandlingType how to handle errors.
89 /// \param header to read into or write from
90 SamFile(const char* filename, OpenType mode,
91 ErrorHandler::HandlingType errorHandlingType,
92 SamFileHeader* header);
93
94 /// Destructor
95 virtual ~SamFile();
96
97 /// Open a sam/bam file for reading with the specified filename,
98 /// determing the type of file and SAM/BAM by reading the file
99 /// (if not stdin).
100 /// \param filename the sam/bam file to open for reading.
101 /// \param header to read into or write from (optional)
102 /// \return true = success; false = failure.
103 bool OpenForRead(const char * filename, SamFileHeader* header = NULL);
104
105 /// Open a sam/bam file for writing with the specified filename,
106 /// determining SAM/BAM from the extension (.bam = BAM).
107 /// \param filename the sam/bam file to open for writing.
108 /// \param header to read into or write from (optional)
109 /// \return true = success; false = failure.
110 bool OpenForWrite(const char * filename, SamFileHeader* header = NULL);
111
112 /// Read the specified bam index file. It must be read prior to setting a
113 /// read section, for seeking and reading portions of a bam file.
114 /// \param filename the name of the bam index file to be read.
115 /// \return true = success; false = failure.
116 bool ReadBamIndex(const char * filename);
117
118 /// Read the bam index file using the BAM filename as a base.
119 /// It must be read prior to setting a read section, for seeking
120 /// and reading portions of a bam file.
121 /// Must be read after opening the BAM file since it uses the
122 /// BAM filename as a base name for the index file.
123 /// First it tries filename.bam.bai. If that fails, it tries
124 /// it without the .bam extension, filename.bai.
125 /// \return true = success; false = failure.
126 bool ReadBamIndex();
127
128 /// Sets the reference to the specified genome sequence object.
129 /// \param reference pointer to the GenomeSequence object.
130 void SetReference(GenomeSequence* reference);
131
132 /// Set the type of sequence translation to use when reading
133 /// the sequence. Passed down to the SamRecord when it is read.
134 /// The default type (if this method is never called) is
135 /// NONE (the sequence is left as-is).
136 /// \param translation type of sequence translation to use.
138
139 /// Set the type of sequence translation to use when writing
140 /// the sequence. Passed down to the SamRecord when it is written.
141 /// The default type (if this method is never called) is
142 /// NONE (the sequence is left as-is).
143 /// \param translation type of sequence translation to use.
145
146 /// Close the file if there is one open.
147 void Close();
148
149 /// Returns whether or not the file has been opened successfully.
150 /// \return true = open; false = not open.
151 bool IsOpen();
152
153 /// Returns whether or not the end of the file has been reached.
154 /// \return true = EOF; false = not eof.
155 /// If the file is not open, true is returned.
156 bool IsEOF();
157
158 /// Returns whether or not the file has been opened for streaming
159 /// input/output.
160 /// \return true = stream; false = not a stream.
161 bool IsStream();
162
163 /// Reads the header section from the file and stores it in
164 /// the passed in header.
165 /// \return true = success; false = failure.
166 bool ReadHeader(SamFileHeader& header);
167
168 /// Writes the specified header into the file.
169 /// \return true = success; false = failure.
170 bool WriteHeader(SamFileHeader& header);
171
172 /// Reads the next record from the file & stores it in the passed in record.
173 ///
174 /// If it is an indexed BAM file and SetReadSection was called,
175 /// only alignments in the section specified by SetReadSection are read.
176 /// If they all have already been read, this method returns false.
177 ///
178 /// Validates that the record is sorted according to the value set by
179 /// setSortedValidation. No sorting validation is done if specified to be
180 /// unsorted, or setSortedValidation was never called.
181 /// \return true = record was successfully set (and sorted if applicable),
182 /// false = record was not successfully set
183 /// (or not sorted as expected).
184 bool ReadRecord(SamFileHeader& header, SamRecord& record);
185
186 /// Writes the specified record into the file.
187 /// Validates that the record is sorted according to the value set by
188 /// setSortedValidation. No sorting validation is done if specified to
189 /// be unsorted, or setSortedValidation was never called. Returns false
190 /// and does not write the record if the record was not properly sorted.
191 /// \return true = success; false = failure.
192 bool WriteRecord(SamFileHeader& header, SamRecord& record);
193
194 /// Set the flag to validate that the file is sorted as it is read/written.
195 /// Must be called after the file has been opened.
196 /// Sorting validation is reset everytime SetReadPosition is called since
197 /// it can jump around in the file.
198 /// \param sortType specifies the type of sort to be checked for.
199 void setSortedValidation(SortedType sortType);
200
201 /// Return the number of records that have been read/written so far.
202 uint32_t GetCurrentRecordCount();
203
204 /// Deprecated, get the Status of the last call that sets status.
205 /// To remain backwards compatable - will be removed later.
207 {
208 return(GetStatus());
209 }
210
211 /// Get the Status of the last call that sets status.
213 {