libpappsomspp
Library for mass spectrometry
Loading...
Searching...
No Matches
mzidentmlreader.h
Go to the documentation of this file.
1/**
2 * \file src/input/mzidentml/mzidentmlreader.h
3 * \date 24/11/2022
4 * \author Olivier Langella
5 * \brief new method to read mzIdentML XML files
6 */
7
8
9/*******************************************************************************
10 * Copyright (c) 2022 Olivier Langella
11 *<Olivier.Langella@universite-paris-saclay.fr>.
12 *
13 * This file is part of i2MassChroQ.
14 *
15 * i2MassChroQ is free software: you can redistribute it and/or modify
16 * it under the terms of the GNU General Public License as published by
17 * the Free Software Foundation, either version 3 of the License, or
18 * (at your option) any later version.
19 *
20 * i2MassChroQ is distributed in the hope that it will be useful,
21 * but WITHOUT ANY WARRANTY; without even the implied warranty of
22 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 * GNU General Public License for more details.
24 *
25 * You should have received a copy of the GNU General Public License
26 * along with i2MassChroQ. If not, see <http://www.gnu.org/licenses/>.
27 *
28 ******************************************************************************/
29#pragma once
30
31#include <QFileInfo>
32
38
39namespace pappso
40{
41namespace cbor
42{
43namespace psm
44{
45
46
47/**
48 * @todo write docs
49 */
51{
52 public:
53 /**
54 * Default constructor
55 */
58 const QFileInfo &mzident_file);
59 /**
60 * Destructor
61 */
62 virtual ~MzIdentMlReader();
63
64
65 protected:
66 virtual void readStream() override;
67
68
69 private:
74 void readDBSequence();
75 void readPeptide();
78 void readInputs();
79 void readAnalysisData();
80 bool readSearchDatabase();
81 void readSpectraData();
84
85 void finalDebrief();
86
87
88 /** \def IdentificationEngine identification engine
89 *
90 */
91 enum class IdentificationEngine : std::int8_t
92 {
93 unknown = 0, ///< X!Tandem
94 XTandem = 1, ///< MS:1001476 X!Tandem was used to analyze the spectra.
95 mascot = 2, ///< MS:1001207 The name of the Mascot search engine.
96 peptider = 3, ///< peptider
97 OMSSA = 4, ///< MS:1001475 Open Mass Spectrometry Search Algorithm was used to
98 ///< analyze the spectra.
99 SEQUEST = 5, ///< MS:1001208 The name of the SEQUEST search engine.
100 Comet = 6, ///< MS:1002251 Comet open-source sequence search engine developed
101 ///< at the University of Washington. PMID:23148064
102 Morpheus = 7, ///< MS:1002661 "Morpheus search engine." [PMID:23323968]
103 MSGFplus = 8, ///< MS:1002048 "MS-GF+ software used to analyze the spectra." [PSI:PI]
104 SpecOMS = 9, ///< SpecOMS C++ implementation
105 sage = 10, ///< sage
106 PEAKS_Studio = 11, ///< PEAKS Studio
107 };
108
109
110 struct CvParam
111 {
112 QString cvRef;
113 QString accession;
114 QString name;
115 QString value;
117 QString unitName;
118 QString unitCvRef;
119
120 QString toString() const;
121 };
122
124 {
126 std::size_t location;
128 };
129
131 {
132 QString accession;
134 QString sequence;
135 QString description;
136 std::shared_ptr<Protein> protein_sp;
138 std::vector<CvParam> cvParamList;
139 };
140
142 {
143 QString file;
144 };
145
147 {
148 QString file;
149 QString name;
150 };
152 {
155 std::size_t start;
156 std::size_t end;
158 };
159
161 {
162 QString name;
163 QString value;
164 QString toString() const;
165 };
166
168 {
169 unsigned int chargeState;
173 std::vector<MzidPeptideEvidence> mzidPeptideEvidenceList;
174
175 std::vector<CvParam> cvParamList;
176 std::vector<UserParam> userParamList;
177 };
178
180 {
181 QString id;
182 QString spectrumID;
183 // IdentificationMzIdentMlFileSp mzident_source_sp;
184 // IdentificationGroup *identification_group_p;
185 std::size_t scanNum;
186 std::size_t spectrumIndex;
187 bool isSpectrumIndex = false;
189 std::vector<SpectrumIdentificationItem> spectrumIdentificationItemList;
190
191 std::vector<CvParam> cvParamList;
192 std::vector<UserParam> userParamList;
193 };
194
196
198
199
200 void readSpectrumIdentificationItem(SpectrumIdentificationResult &spectrum_identification_result);
201
202 // void
203 // processSpectrumIdentificationItem(SpectrumIdentificationResult
204 // &spectrum_identification_result,
205 // const SpectrumIdentificationItem
206 // &spectrumIdentificationItem);
207
209 const SpectrumIdentificationResult &spectrum_identificatio_result);
210
211
212 bool writeTandemEval(const std::vector<CvParam> &cv_param_list);
213
214 void
215 writeSpectrumIdentificationItem(const SpectrumIdentificationItem &spectrum_identification_item);
216
217 private:
219 // Project *mp_project;
224
225
226 /** @brief store association between xml ID and an identification engine
227 */
228 std::map<QString, IdentificationEngine> m_IdentificationEngineMap;
229
230
231 /** @brief store association between xml ID and fasta files
232 */
233 std::map<QString, MzidSearchDatabase> m_mzidSearchDatabaseIdMap;
234
235
236 /** @brief store association between xml ID and peptide sequence
237 */
238 std::map<QString, PeptideSp> m_PeptideIdMap;
239
240
241 /** @brief store association between xml ID and peptide evidence
242 */
243 std::map<QString, MzidPeptideEvidence> m_MzidPeptideEvidenceIdMap;
244
245
246 /** @brief store association between xml ID and SpectraData
247 */
248 std::map<QString, MzidSpectraData> m_mzidSpectraDataIdMap;
249
250 /** @brief store association between xml ID and DBSequence
251 */
252 std::map<QString, MzidDBSequence> m_MzidDBSequenceIdMap;
253
254 /** @brief associates database ref id to protein shared pointer
255 * because the search database id is not described before the protein (silly
256 * idea IMHO) we keep association of protein to database in this map until the
257 * real search database definition appears We then have to reprocess each
258 * protein to set the right fasta file pointer
259 */
260 std::map<QString, std::vector<ProteinSp>> m_searchDatabase_ref2proteinList;
261
262
263 /** @brief store all identification results by spectra xml id
264 */
265 std::map<QString, std::vector<SpectrumIdentificationResult>>
267
268 QFileInfo m_mzidentFile;
270};
271} // namespace psm
272} // namespace cbor
273} // namespace pappso
overrides QCborStreamWriter base class to provide convenient functions
pappso::cbor::CborStreamWriter * mp_cborWriter
std::map< QString, PeptideSp > m_PeptideIdMap
store association between xml ID and peptide sequence
MzIdentMlReader(pappso::UiMonitorInterface *p_monitor, pappso::cbor::CborStreamWriter *p_output, const QFileInfo &mzident_file)
IdentificationEngine m_identificationEngine
@ MSGFplus
MS:1002048 "MS-GF+ software used to analyze the spectra." [PSI:PI].
@ SEQUEST
MS:1001208 The name of the SEQUEST search engine.
@ XTandem
MS:1001476 X!Tandem was used to analyze the spectra.
@ Morpheus
MS:1002661 "Morpheus search engine." [PMID:23323968].
@ mascot
MS:1001207 The name of the Mascot search engine.
bool writeTandemEval(const std::vector< CvParam > &cv_param_list)
std::map< QString, std::vector< ProteinSp > > m_searchDatabase_ref2proteinList
associates database ref id to protein shared pointer because the search database id is not described ...
void writeSpectrumIdentificationItem(const SpectrumIdentificationItem &spectrum_identification_item)
std::map< QString, MzidPeptideEvidence > m_MzidPeptideEvidenceIdMap
store association between xml ID and peptide evidence
pappso::UiMonitorInterface * mp_monitor
void writeSpectrumIdentificationResult(const SpectrumIdentificationResult &spectrum_identificatio_result)
std::map< QString, IdentificationEngine > m_IdentificationEngineMap
store association between xml ID and an identification engine
std::map< QString, MzidSpectraData > m_mzidSpectraDataIdMap
store association between xml ID and SpectraData
std::map< QString, MzidSearchDatabase > m_mzidSearchDatabaseIdMap
store association between xml ID and fasta files
std::map< QString, std::vector< SpectrumIdentificationResult > > m_spectrumIdentificationResultBySpectraIdMap
store all identification results by spectra xml id
void readSpectrumIdentificationItem(SpectrumIdentificationResult &spectrum_identification_result)
std::map< QString, MzidDBSequence > m_MzidDBSequenceIdMap
store association between xml ID and DBSequence
store PsmProtein in a map with accession as key
tries to keep as much as possible monoisotopes, removing any possible C13 peaks and changes multichar...
Definition aa.cpp:39
std::shared_ptr< const Peptide > PeptideSp
std::shared_ptr< const Protein > ProteinSp
shared pointer on a Protein object
Definition protein.h:47
std::vector< SpectrumIdentificationItem > spectrumIdentificationItemList