libpappsomspp
Library for mass spectrometry
Loading...
Searching...
No Matches
mzmlconvert.cpp
Go to the documentation of this file.
1/**
2 * \file pappsomspp/processing/cbor/mzcbor/mzmlconvert.cpp
3 * \date 19/11/2025
4 * \author Olivier Langella
5 * \brief convert mzML to mzcbor
6 */
7
8/*******************************************************************************
9 * Copyright (c) 2025 Olivier Langella <Olivier.Langella@universite-paris-saclay.fr>.
10 *
11 * This file is part of PAPPSOms-tools.
12 *
13 * PAPPSOms-tools is free software: you can redistribute it and/or modify
14 * it under the terms of the GNU General Public License as published by
15 * the Free Software Foundation, either version 3 of the License, or
16 * (at your option) any later version.
17 *
18 * PAPPSOms-tools is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU General Public License for more details.
22 *
23 * You should have received a copy of the GNU General Public License
24 * along with PAPPSOms-tools. If not, see <http://www.gnu.org/licenses/>.
25 *
26 ******************************************************************************/
27
28#include "mzmlconvert.h"
29#include <QDebug>
30#include <qlogging.h>
31#include <QObject>
32#include <zlib.h>
34#include "pappsomspp/config.h"
35#include "binarydataarray.h"
36#include "cvparam.h"
37
38
41 : mp_monitor(p_monitor), mp_cborWriter(p_output)
42{
44 m_elementToStoreInArray << "cv" << "userParam" << "cvParam" << "binaryDataArray" << "spectrum"
45 << "sourceFile"
46 << "referenceableParamGroup" << "software" << "instrumentConfiguration"
47 << "processingMethod" << "dataProcessing" << "scan" << "scanWindow"
48 << "precursor" << "selectedIon";
49
50
51 m_elementStash.clear();
53 m_doubleArray.clear();
54 m_runIdList.clear();
57}
58
62
63
64void
66{
67 writer->startMap();
68
69 writer->writeInformations(PAPPSOMSPP_NAME, PAPPSOMSPP_VERSION, "mzCBORindex", "mzMLconvert");
70
71 writer->append("from");
72 writer->append(m_uuid);
73
74 writer->append("runIdList");
75 writer->writeArray(m_runIdList);
76
77
78 std::vector<std::size_t> msrun_size;
79
80
81 writer->append("runSpectrumIndexList");
82 writer->startArray(m_runAndSpectrumOffsetList.size());
83 for(auto &spectrum_offset_list : m_runAndSpectrumOffsetList)
84 {
85 msrun_size.push_back(spectrum_offset_list.size());
86 writer->writeArray(spectrum_offset_list);
87 }
88 writer->endArray();
89
90
91 writer->append("runSpectrumNativeIdList");
92 writer->startArray(m_runAndSpectrumIdList.size());
93 for(auto &spectrum_id_list : m_runAndSpectrumIdList)
94 {
95 writer->writeArray(spectrum_id_list);
96 }
97 writer->endArray();
98
99 // TIC
100 bool size_ok = true;
101 for(std::size_t i = 0; i < msrun_size.size(); i++)
102 {
103 if(m_runAndSpectrumTotalIonCountList.at(i).size() != msrun_size.at(i))
104 {
105 size_ok = false;
106 }
107 }
108 if(size_ok)
109 {
110 writer->append("runSpectrumTotalIonCountList");
111 writer->startArray(m_runAndSpectrumIdList.size());
112 for(auto &spectrum_tic_list : m_runAndSpectrumTotalIonCountList)
113 {
114 writer->writeArray(spectrum_tic_list);
115 }
116 writer->endArray();
117 }
118
119 // retention time
120 size_ok = true;
121 for(std::size_t i = 0; i < msrun_size.size(); i++)
122 {
123 if(m_runAndSpectrumRtList.at(i).size() != msrun_size.at(i))
124 {
125 size_ok = false;
126 }
127 }
128 if(size_ok)
129 {
130
131 writer->append("runSpectrumRtList");
132 writer->startArray(m_runAndSpectrumRtList.size());
133 for(auto &spectrum_rt_list : m_runAndSpectrumRtList)
134 {
135 writer->writeArray(spectrum_rt_list);
136 }
137 writer->endArray();
138 }
139 else
140 {
141 // qFatal() << "wrong size";
142 }
143
144 // MS Level
145 size_ok = true;
146 for(std::size_t i = 0; i < msrun_size.size(); i++)
147 {
148 if(m_runAndSpectrumMsLevelList.at(i).size() != msrun_size.at(i))
149 {
150 size_ok = false;
151 }
152 }
153 if(size_ok)
154 {
155 writer->append("runSpectrumMsLevelList");
156 writer->startArray(m_runAndSpectrumMsLevelList.size());
157 for(auto &spectrum_mslevel_list : m_runAndSpectrumMsLevelList)
158 {
159 writer->writeArray(spectrum_mslevel_list);
160 }
161 writer->endArray();
162 }
163
164 writer->endMap();
165}
166
167
168void
170{
171 mp_cborWriter->startMap();
172 if(m_qxmlStreamReader.readNextStartElement())
173 {
174 qDebug() << m_qxmlStreamReader.name().toString();
175 if(m_qxmlStreamReader.name().toString() == "indexedmzML")
176 {
177 m_qxmlStreamReader.readNextStartElement();
178 }
179 if(m_qxmlStreamReader.name().toString() == "mzML")
180 {
181 // write mzCBOR header
182 mp_cborWriter->append("mzCBOR");
183 mp_cborWriter->startMap();
184
185 mp_cborWriter->append("mode");
186 mp_cborWriter->append(0);
187
188 mp_cborWriter->writeInformations(
189 PAPPSOMSPP_NAME, PAPPSOMSPP_VERSION, "mzCBOR", "mzMLconvert");
190
191 m_uuid = mp_cborWriter->getUuid();
192 mp_cborWriter->endMap();
193
194 mp_cborWriter->append(m_qxmlStreamReader.name().toString());
195
196 mp_cborWriter->startMap();
197 mp_cborWriter->append("xmlns");
198 mp_cborWriter->append(m_qxmlStreamReader.namespaceUri());
200 mp_cborWriter->endMap();
201
202
203 bool array_started = false;
204 QString last_element;
205 while(m_qxmlStreamReader.readNextStartElement())
206 {
207 qDebug();
208 insideElement(last_element, array_started);
209 last_element = m_qxmlStreamReader.name().toString();
210 qDebug();
211 }
212 }
213 else
214 {
215 m_qxmlStreamReader.raiseError(QObject::tr("Not an mzML input file"));
216 m_qxmlStreamReader.skipCurrentElement();
217 }
218 }
219 mp_cborWriter->endMap();
220}
221
222
223void
225{
226 // defaultArrayLength 1552
227
228 m_currentSpectrumSize = m_qxmlStreamReader.attributes().value("defaultArrayLength").toULongLong();
229}
230
231
232void
234{
235
236 qDebug() << m_qxmlStreamReader.name();
237 // m_qxmlStreamReader.skipCurrentElement();
238 /*
239 *
240 <binaryDataArray encodedLength="9092">
241 <cvParam cvRef="MS" accession="MS:1000515" value="" name="intensity array"
242 unitAccession="MS:1000131" unitName="number of counts" unitCvRef="MS" /> <cvParam cvRef="MS"
243 accession="MS:1000523" value="" name="64-bit float" /> <cvParam cvRef="MS" accession="MS:1000574"
244 value="" name="zlib compression" /> <binary>*/
245 std::size_t count = m_qxmlStreamReader.attributes().value("count").toULongLong();
246
247 mp_cborWriter->append("binaryDataArray");
248 mp_cborWriter->startArray(count);
249 while(m_qxmlStreamReader.readNext() && !m_qxmlStreamReader.isEndElement())
250 {
251 if(m_qxmlStreamReader.isStartElement())
252 {
253 BinaryDataArray binary_data_array;
254 binary_data_array.fromMzml(m_qxmlStreamReader);
255 binary_data_array.toCbor(*mp_cborWriter);
256 // writeZlibDataArray();
257 }
258 }
259 mp_cborWriter->endArray();
260 qDebug() << m_qxmlStreamReader.name();
261}
262
263
264void
265pappso::cbor::mzcbor::MzmlConvert::insideElement(QString &last_element_in, bool &array_started_in)
266{
267 m_elementStash.push_back(m_qxmlStreamReader.name().toString());
268
269
270 qDebug() << m_qxmlStreamReader.name();
271
272 if(m_elementStash.back() == "spectrum")
273 {
274 // qDebug() << m_qxmlStreamReader.attributes().value("id").toString();
275 m_runAndSpectrumIdList.back().push_back(
276 m_qxmlStreamReader.attributes().value("id").toString());
278 // qFatal();
279 }
280
281
282 // stop an array ?
283 qDebug() << "current element=" << m_elementStash.back();
284 qDebug() << "last_element=" << last_element_in;
285 if(array_started_in && (last_element_in != m_qxmlStreamReader.name().toString()))
286 {
287 mp_cborWriter->endArray();
288 qDebug() << "mp_cborWriter->endArray()";
289 array_started_in = false;
290 }
291 if(m_elementStash.back() == "binaryDataArrayList")
292 {
293 qDebug() << "readBinaryDataArrayList()";
295 }
296
297 else
298 {
299
300
301 // start an array ?
302 if(m_elementToStoreInArray.contains(m_elementStash.back()))
303 {
304 // start an array ?
305 if((!array_started_in) && (last_element_in != m_elementStash.back()))
306 {
307 mp_cborWriter->append(m_elementStash.back());
308 mp_cborWriter->startArray();
309
310 qDebug() << "mp_cborWriter->startArray()";
311 array_started_in = true;
312 }
313 }
314
315 if(m_elementStash.back() == "spectrum")
316 {
317 if(mp_cborWriter->device() != nullptr)
318 m_runAndSpectrumOffsetList.back().push_back(mp_cborWriter->device()->pos());
319 }
320
321
322 if(!array_started_in)
323 mp_cborWriter->append(m_elementStash.back());
324
325 bool array_started = false;
326
327
328 if(m_elementStash.back() == "cvParam")
329 {
330
331 // *********** special treatment for cvParam **********
332 if(!array_started_in)
333 {
335 QObject::tr("unable to write cvParam outside CBOR array"));
336 }
337 // array_started = true;
338 qDebug() << m_qxmlStreamReader.name() << " "
339 << m_elementStash.at(m_elementStash.size() - 2);
340 CvParam cv_param;
342 cv_param.toCbor(*mp_cborWriter);
343 qDebug() << cv_param.name;
344
345 if(m_elementStash.at(m_elementStash.size() - 2) == "spectrum")
346 {
347 qDebug() << "cvparam in spectrum";
348 if(cv_param.accession == "MS:1000511")
349 {
350 m_runAndSpectrumMsLevelList.back().push_back(cv_param.getExpectedUint8());
351 qDebug() << m_runAndSpectrumMsLevelList.back().back();
352 }
353 else if(cv_param.accession == "MS:1000285")
354 { // TIC
355 m_runAndSpectrumTotalIonCountList.back().push_back(cv_param.getExpectedDouble());
356 }
357 }
358 else if(m_elementStash.at(m_elementStash.size() - 2) == "scan")
359 {
360 if(cv_param.accession == "MS:1000016")
361 { // rt
362 double rt = cv_param.getExpectedDouble();
363
364 if(cv_param.unitAccession == "UO:0000031")
365 {
366 // // minutes
367 rt = rt * 60;
368 }
369 m_runAndSpectrumRtList.back().push_back(rt);
370 }
371 }
372 qDebug();
373 // *********** special treatment for cvParam **********
374 //
375 qDebug() << "finish cvParam " << cv_param.accession;
376 }
377 else
378 {
379
380 mp_cborWriter->startMap();
382
383 QString last_element;
384
385 while(m_qxmlStreamReader.readNext() && !m_qxmlStreamReader.isEndElement())
386 {
387 qDebug() << m_qxmlStreamReader.name();
388 if(m_qxmlStreamReader.isCharacters())
389 {
390 // clean content:
391 QStringView content = m_qxmlStreamReader.text().trimmed();
392 if((m_qxmlStreamReader.text().toString() == "\n") ||
393 (m_qxmlStreamReader.text().toString() == "\n\t"))
394 {
395 }
396 else
397 {
398 // text node
399 if(!content.isEmpty())
400 {
401 qDebug() << "text isCharacters" << content.mid(0, 10);
402 mp_cborWriter->append("@text@");
403 mp_cborWriter->append(content);
404 }
405 }
406 }
407 else if(m_qxmlStreamReader.isStartElement())
408 {
409 QString tmp_element = m_qxmlStreamReader.name().toString();
410 qDebug() << tmp_element;
411 insideElement(last_element, array_started);
412 last_element = tmp_element;
413 }
414 }
415
416 if(array_started)
417 {
418 mp_cborWriter->endArray();
419 }
420
421 mp_cborWriter->endMap();
422
423 qDebug() << m_qxmlStreamReader.name();
424 }
425 }
426
427 qDebug() << m_elementStash.back();
428 m_elementStash.pop_back();
429}
430
431
432void
434{
435 bool ok(false);
436 double d = value_str.toDouble(&ok);
437 if(ok)
438 {
439 if(value_str.contains('.'))
440 {
441 mp_cborWriter->append(d);
442 }
443 else
444 {
445 qint64 bigint = value_str.toLongLong(&ok);
446 if(ok)
447 {
448 mp_cborWriter->append(bigint);
449 }
450 }
451 }
452 else
453 {
454 mp_cborWriter->append(value_str);
455 }
456}
457
458
459void
460pappso::cbor::mzcbor::MzmlConvert::attributeListToCbor(const QXmlStreamAttributes &xml_attributes)
461{
462 for(auto &xml_attribute : xml_attributes)
463 {
464 qDebug() << xml_attribute.name() << " " << xml_attribute.value();
465 mp_cborWriter->append(xml_attribute.name());
466 attributeValueToCbor(xml_attribute.value());
467
468 if((m_elementStash.size() > 0) && (m_elementStash.back() == "run") &&
469 (xml_attribute.name() == "id"))
470 {
471 m_runAndSpectrumOffsetList.push_back(std::vector<qint64>());
472 m_runAndSpectrumIdList.push_back(std::vector<QString>());
473 m_runAndSpectrumMsLevelList.push_back(std::vector<std::uint8_t>());
474 m_runAndSpectrumRtList.push_back(std::vector<double>());
475 m_runAndSpectrumTotalIonCountList.push_back(std::vector<qint64>());
476 m_runIdList.push_back(xml_attribute.value().toString());
477 }
478 }
479}
480
481const std::vector<QString> &
487
488const std::vector<std::vector<qint64>> &
493
494const std::vector<std::vector<QString>> &
PSI BinaryDataArray object for mzML/mzCBOR.
overrides QCborStreamWriter base class to provide convenient functions
void writeInformations(const QString &software_name, const QString &software_version, const QString &type, const QString &operation)
automatically produces an informations CBOR map the "informations" map contains default parameters : ...
void writeArray(const std::vector< std::size_t > &int_list)
virtual void readStream() override
std::vector< QString > m_elementStash
Definition mzmlconvert.h:84
std::vector< std::vector< QString > > m_runAndSpectrumIdList
Definition mzmlconvert.h:93
std::vector< std::vector< qint64 > > m_runAndSpectrumTotalIonCountList
Definition mzmlconvert.h:94
void writeMzcborIndex(pappso::cbor::CborStreamWriter *writer) const
std::vector< std::vector< std::uint8_t > > m_runAndSpectrumMsLevelList
Definition mzmlconvert.h:95
const std::vector< std::vector< qint64 > > & getRunAndSpectrumOffsetList() const
void insideElement(QString &last_element, bool &array_started)
const std::vector< QString > & getRunIdList() const
std::vector< std::vector< double > > m_runAndSpectrumRtList
Definition mzmlconvert.h:96
void attributeListToCbor(const QXmlStreamAttributes &xml_attributes)
const std::vector< std::vector< QString > > & getRunAndSpectrumIdList() const
std::vector< QString > m_runIdList
Definition mzmlconvert.h:91
pappso::UiMonitorInterface * mp_monitor
Definition mzmlconvert.h:81
void attributeValueToCbor(const QStringView &value_str)
MzmlConvert(pappso::UiMonitorInterface *p_monitor, pappso::cbor::CborStreamWriter *p_output)
std::vector< std::vector< qint64 > > m_runAndSpectrumOffsetList
Definition mzmlconvert.h:92
pappso::cbor::CborStreamWriter * mp_cborWriter
Definition mzmlconvert.h:82
#define PAPPSOMSPP_VERSION
Definition config.h:6
#define PAPPSOMSPP_NAME
Definition config.h:5
PSI cvParam object for mzML/mzCBOR.
void fromMzml(QXmlStreamReader &reader)
void toCbor(CborStreamWriter &writer)
void fromMzml(QXmlStreamReader &reader)
reads the XML attributes of the cvParam element Inside the cvParam XML element, reads the attributes ...
Definition cvparam.cpp:121
void toCbor(CborStreamWriter &writer)
Definition cvparam.cpp:161
std::uint8_t getExpectedUint8() const
Definition cvparam.cpp:271
double getExpectedDouble() const
Definition cvparam.cpp:303