4.7. Example Scripts
4.7.1. Parsing a mzML file (new syntax)
- simple_parser.main(mzml_file)[source]
Basic example script to demonstrate the usage of pymzML. Requires a mzML file as first argument.
usage:
./simple_parser.py <path_to_mzml_file>
Note
This script uses the new syntax with the MS level being a property of the spectrum class ( Spectrum.ms_level ). The old syntax can be found in the script simple_parser_v2.py where the MS level can be queried as a key (Spectrum[‘ms level’])
#!/usr/bin/env python
import sys
import pymzml
def main(mzml_file):
"""
Basic example script to demonstrate the usage of pymzML. Requires a mzML
file as first argument.
usage:
./simple_parser.py <path_to_mzml_file>
Note:
This script uses the new syntax with the MS level being a property of
the spectrum class ( Spectrum.ms_level ). The old syntax can be found in
the script simple_parser_v2.py where the MS level can be queried as a key
(Spectrum['ms level'])
"""
run = pymzml.run.Reader(mzml_file)
for n, spec in enumerate(run):
print(
"Spectrum {0}, MS level {ms_level} @ RT {scan_time:1.2f}".format(
spec.ID, ms_level=spec.ms_level, scan_time=spec.scan_time_in_minutes()
)
)
print("Parsed {0} spectra from file {1}".format(n, mzml_file))
print()
if __name__ == "__main__":
if len(sys.argv) < 2:
print(main.__doc__)
exit()
mzml_file = sys.argv[1]
main(mzml_file)
4.7.2. Parsing a mzML file (old syntax)
- simple_parser_v2.main(mzml_file)[source]
Basic example script to demonstrate the usage of pymzML. Requires a mzML file as first argument.
- usage:
./simple_parser_v2.py <path_to_mzml_file>
Note
This script uses the old syntax where the MS level can be queried as a key (Spectrum[‘ms level’]). The current syntax can be found in simple_parser.py
#!/usr/bin/env python
import sys
import pymzml
from collections import defaultdict as ddict
def main(mzml_file):
"""
Basic example script to demonstrate the usage of pymzML. Requires a mzML
file as first argument.
usage:
./simple_parser_v2.py <path_to_mzml_file>
Note:
This script uses the old syntax where the MS level can be queried as a
key (Spectrum['ms level']). The current syntax can be found in
simple_parser.py
"""
run = pymzml.run.Reader(mzml_file)
# print( run[10000].keys() )
stats = ddict(int)
for n, spec in enumerate(run):
print(
"Spectrum {0}, MS level {ms_level}".format(n, ms_level=spec["ms level"]),
end="\r",
)
# the old method to obtain peaks from the Spectrum class
stats[spec.ID] = len(spec.centroidedPeaks)
print("Parsed {0} spectra from file {1}".format(len(stats.keys()), mzml_file))
print()
if __name__ == "__main__":
if len(sys.argv) < 2:
print(main.__doc__)
exit()
mzml_file = sys.argv[1]
main(mzml_file)
4.7.3. Query the obo files
- queryOBO.main(args)[source]
Use this script to interrogate the OBO database files.
usage:
./queryOBO.py [-h] [-v VERSION] query
Example:
$ ./queryOBO.py'scan time' MS:1000016 scan time 'The time taken for an acquisition by scanning analyzers.' [PSI:MS] Is a: MS:1000503 ! scan attribute
Example:
$ ./queryOBO.py 1000016 MS:1000016 scan time "The time taken for an acquisition by scanning analyzers." [PSI:MS] MS:1000503 ! scan attribute
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import argparse
from collections import defaultdict
import pymzml.obo
FIELDNAMES = ["id", "name", "def", "is_a"]
def main(args):
"""
Use this script to interrogate the OBO database files.
usage:
./queryOBO.py [-h] [-v VERSION] query
Example::
$ ./queryOBO.py'scan time'
MS:1000016
scan time
'The time taken for an acquisition by scanning analyzers.' [PSI:MS]
Is a: MS:1000503 ! scan attribute
Example::
$ ./queryOBO.py 1000016
MS:1000016
scan time
"The time taken for an acquisition by scanning analyzers." [PSI:MS]
MS:1000503 ! scan attribute
"""
obo = pymzml.obo.OboTranslator(version=args.version)
obo.parseOBO()
if args.query.isdigit():
print(search_by_id(obo, args.query))
else:
for ix, match in enumerate(search_by_name(obo, args.query)):
print("#{0}".format(ix))
for fieldname in ("id", "name", "def"):
print(match[fieldname])
if "is_a" in match:
print("Is a:", match["is_a"])
def search_by_name(obo, name):
print("Searching for {0}".format(name.lower()))
matches = []
for lookup in obo.lookups:
for key in lookup.keys():
if name.lower() in key.lower():
match = defaultdict(str)
for fieldname in FIELDNAMES:
if fieldname in lookup[key].keys():
match[fieldname] = lookup[key][fieldname]
matches.append(match)
return matches
def search_by_id(obo, id):
key = "MS:{0}".format(id)
return_value = ""
for lookup in obo.lookups:
if key in lookup:
if obo.MS_tag_regex.match(key):
for fn in FIELDNAMES:
if fn in lookup[key].keys():
return_value += "{0}\n".format(lookup[key][fn])
return return_value
if __name__ == "__main__":
argparser = argparse.ArgumentParser(usage=__doc__)
argparser.add_argument(
"query", help="an accession or part of an OBO term name to look for"
)
argparser.add_argument(
"-v",
"--version",
default="1.1.0",
help="""
the version of the OBO to use; valid options are 1.0.0, 1.1.0, and 1.2,
default is 1.1.0
""",
)
args = argparser.parse_args()
main(args)
4.7.4. Plotting a chromatogram
- plot_chromatogram.main(mzml_file)[source]
Plots a chromatogram for the given mzML file. File is saved as ‘chromatogram_<mzml_file>.html’.
usage:
./plot_chromatogram.py <path_to_mzml_file>
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os
import sys
import pymzml
from pymzml.plot import Factory
def main(mzml_file):
"""
Plots a chromatogram for the given mzML file. File is saved as
'chromatogram_<mzml_file>.html'.
usage:
./plot_chromatogram.py <path_to_mzml_file>
"""
run = pymzml.run.Reader(mzml_file)
mzml_basename = os.path.basename(mzml_file)
pf = Factory()
pf.new_plot()
pf.add(run["TIC"].peaks(), color=(0, 0, 0), style="lines", title=mzml_basename)
pf.save(
"chromatogram_{0}.html".format(mzml_basename),
layout={"xaxis": {"title": "Retention time"}, "yaxis": {"title": "TIC"}},
)
return
if __name__ == "__main__":
if len(sys.argv) < 2:
print(main.__doc__)
exit()
mzml_file = sys.argv[1]
main(mzml_file)
4.7.5. Plotting a spectrum
- plot_spectrum.main()[source]
This function shows how to plot a simple spectrum. It can be directly plotted via this script or using the python console.
usage:
./plot_spectrum.py
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os
import pymzml
def main():
"""
This function shows how to plot a simple spectrum. It can be directly
plotted via this script or using the python console.
usage:
./plot_spectrum.py
"""
example_file = os.path.join(
os.path.dirname(__file__), os.pardir, "tests", "data", "example.mzML"
)
run = pymzml.run.Reader(example_file)
p = pymzml.plot.Factory()
for spec in run:
p.new_plot()
p.add(spec.peaks("centroided"), color=(0, 0, 0), style="sticks", name="peaks")
filename = "example_plot_{0}_{1}.html".format(
os.path.basename(example_file), spec.ID
)
p.save(filename=filename)
print("Plotted file: {0}".format(filename))
break
if __name__ == "__main__":
main()
4.7.6. Plotting a spectrum with annotation
- plot_spectrum_with_annotation.main()[source]
This script shows how to plot multiple spectra in one plot and how to use label for the annotation of spectra. The first plot is an MS1 spectrum with the annotated precursor ion. The second plot is a zoom into the precursor isotope pattern. The third plot is an annotated fragmentation spectrum (MS2) of the peptide HLVDEPQNLIK from BSA. These examples also show the use of ‘layout’ to define the appearance of a plot.
usage:
./plot_spectrum_with_annotation.py
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import copy
import os
import pymzml
def main():
"""
This script shows how to plot multiple spectra in one plot and
how to use label for the annotation of spectra.
The first plot is an MS1 spectrum with the annotated precursor ion.
The second plot is a zoom into the precursor isotope pattern.
The third plot is an annotated fragmentation spectrum (MS2) of the
peptide HLVDEPQNLIK from BSA.
These examples also show the use of 'layout' to define the appearance
of a plot.
usage:
./plot_spectrum_with_annotation.py
"""
# First we define some general layout attributes
layout = {
"xaxis": {
"title": "<i>m/z</i>",
"tickmode": "auto",
"showticklabels": True,
"ticklen": 5,
"tickwidth": 1,
"ticks": "outside",
"showline": True,
"showgrid": False,
},
"yaxis": {
"color": "#000000",
"tickmode": "auto",
"showticklabels": True,
"ticklen": 5,
"tickwidth": 1,
"ticks": "outside",
"showline": True,
"showgrid": False,
},
}
# The example BSA file will be used
example_file = os.path.join(
os.path.dirname(__file__), os.pardir, "tests", "data", "BSA1.mzML.gz"
)
# Define different precisions for MS1 and MS2
run = pymzml.run.Reader(example_file, MS_precisions={1: 5e-6, 2: 5e-4})
p = pymzml.plot.Factory()
plot_layout = {}
# Now that everything is set up, we can plot the MS1 spectrum
# Spectrum ID: 1574
p.new_plot(title="MS1 Spectrum")
ms1_spectrum = run[1574]
# The measured peaks are added as first trace
p.add(
ms1_spectrum.peaks("centroided"),
color=(0, 0, 0),
opacity=1,
style="sticks",
name="raw data plot 1",
)
# The label for the precursor ion is added as a seperate trace.
# Note that triangle.MS_precision is used here as a label.
# By zooming in at this peak one can therefore check if the measured
# peak fits into defined the mass accuracy range.
precursor_mz_calc = 435.9102
p.add(
[(precursor_mz_calc, "max_intensity", "theoretical precursor")],
color=(255, 0, 0),
opacity=0.6,
style="label.triangle.MS_precision",
name="theoretical precursor plot 1",
)
# Define the layout for the first subplot.
# The x- and y-axes of subplots are numbered, starting at 1.
for axis in layout.keys():
plot_layout["{0}1".format(axis)] = copy.copy(layout[axis])
# Now we can add a second plot, the same way as above but as a zoom-in.
# Therefore, we define a mz_range
p.new_plot(title="MS1 Spectrum Zoom")
p.add(
ms1_spectrum.peaks("centroided"),
color=(0, 0, 0),
opacity=1,
style="sticks",
name="raw data plot 2",
plot_num=1,
mz_range=[435.7, 437],
)
p.add(
[(precursor_mz_calc, "max_intensity", "theoretical precursor")],
color=(255, 0, 0),
opacity=0.3,
plot_num=1,
style="label.triangle.MS_precision",
name="theoretical precursor plot 2",
)
# The mz_range can be included in the layout as well.
# In contrast to mz_range in the add() function, which limits the included
# datapoints, the layout range only defines the area that is depicted (i.e. the zoom)
for axis in layout.keys():
plot_layout["{0}2".format(axis)] = copy.copy(layout[axis])
plot_layout["xaxis2"]["autorange"] = False
plot_layout["xaxis2"]["range"] = [435.7, 437]
# Now the third plot will be added, a fragmentation spectrum of HLVDEPQNLIK
ms2_spectrum = run[3542]
# The MS_precision for the plotting option label.triangle.MS_precision
# needs to be defined
p.new_plot(title="MS2 Spectrum Annotated: HLVDEPQNLIK", MS_precision=5e-4)
p.add(
ms2_spectrum.peaks("centroided"),
color=(0, 0, 0),
opacity=1,
style="sticks",
name="raw data plot 3",
plot_num=2,
)
theoretical_b_ions = {
"b<sub>2</sub><sup>+2</sup>": 126.0788,
"b<sub>3</sub><sup>+2</sup>": 175.6130,
"b<sub>4</sub><sup>+2</sup>": 233.1264,
"b<sub>2</sub>": 251.1503,
"b<sub>5</sub><sup>+2</sup>": 297.6477,
"b<sub>6</sub><sup>+2</sup>": 346.1741,
"b<sub>3</sub>": 350.2187,
"b<sub>7</sub><sup>+2</sup>": 410.2034,
"b<sub>4</sub>": 465.2456,
"b<sub>8</sub><sup>+2</sup>": 467.2249,
"b<sub>9</sub><sup>+2</sup>": 523.7669,
"b<sub>10</sub><sup>+2</sup>": 580.3089,
"b<sub>5</sub>": 594.2882,
"b<sub>6</sub>": 691.341,
"b<sub>7</sub>": 819.3995,
"b<sub>8</sub>": 933.4425,
"b<sub>9</sub>": 1046.5265,
"b<sub>10</sub>": 1159.6106,
}
theoretical_y_ions = {
"y<sub>1</sub><sup>+2</sup>": 74.0600,
"y<sub>2</sub><sup>+2</sup>": 130.6021,
"y<sub>1</sub>": 147.1128,
"y<sub>3</sub><sup>+2</sup>": 187.1441,
"y<sub>4</sub><sup>+2</sup>": 244.1656,
"y<sub>2</sub>": 260.1969,
"y<sub>5</sub><sup>+2</sup>": 308.1949,
"y<sub>6</sub><sup>+2</sup>": 356.7212,
"y<sub>3</sub>": 373.2809,
"y<sub>7</sub><sup>+2</sup>": 421.2425,
"y<sub>8</sub><sup>+2</sup>": 478.7560,
"y<sub>4</sub>": 487.3239,
"y<sub>9</sub><sup>+2</sup>": 528.2902,
"y<sub>10</sub><sup>+2</sup>": 584.8322,
"y<sub>5</sub>": 615.3824,
"y<sub>6</sub>": 712.4352,
"y<sub>7</sub>": 841.4778,
"y<sub>8</sub>": 956.5047,
"y<sub>9</sub>": 1055.5732,
"y<sub>10</sub>": 1168.6572,
}
# Check which theoretical fragments are present in the spectrum
# using the has_peak() function
for ion_list in [theoretical_b_ions, theoretical_y_ions]:
label_list = []
for fragment in ion_list.keys():
peak = ms2_spectrum.has_peak(ion_list[fragment])
if len(peak) != 0:
label_list.append((ion_list[fragment], peak[0][1], fragment))
if ion_list == theoretical_b_ions:
color = (0, 0, 255)
else:
color = (0, 255, 0)
p.add(
label_list,
color=color,
style="label.triangle.MS_precision",
name="theoretical fragment ions plot 3",
)
for axis in layout.keys():
plot_layout["{0}3".format(axis)] = copy.copy(layout[axis])
# Save the plot in a file using the defined plot_layout
filename = "example_plot_{0}_annotation.html".format(os.path.basename(example_file))
p.save(filename=filename, layout=plot_layout)
print("Plotted file: {0}".format(filename))
if __name__ == "__main__":
main()
4.7.7. Extracting highest peaks
- highest_peaks.main()[source]
Testscript to isolate the n-highest peaks from an example file.
Usage:
./highest_peaks.py
Parses the file ‘../tests/data/example.mzML’ and extracts the 2 highest intensities from each spectrum.
#!/usr/bin/env python
import pymzml
from collections import defaultdict as ddict
import os
def main():
"""
Testscript to isolate the n-highest peaks from an example file.
Usage:
./highest_peaks.py
Parses the file '../tests/data/example.mzML' and extracts the 2 highest
intensities from each spectrum.
"""
example_file = os.path.join(
os.path.dirname(__file__), os.pardir, "tests", "data", "example.mzML"
)
run = pymzml.run.Reader(example_file)
highest_i_dict = ddict(list)
number_of_peaks_to_extract = 2
for spectrum in run:
# print( spectrum.ID )
if spectrum.ms_level == 1:
for mz, i in spectrum.highest_peaks(number_of_peaks_to_extract):
highest_i_dict[spectrum.ID].append(i)
for spectrum_id, highest_peak_list in highest_i_dict.items():
assert len(highest_peak_list) == number_of_peaks_to_extract
print(
"Spectrum {0}; highest intensities: {1}".format(
spectrum_id, highest_peak_list
)
)
if __name__ == "__main__":
main()
4.7.8. Compare spectra
- compare_spectra.main()[source]
Compare multiple spectra and return the cosine distance between them. The returned value is between 0 and 1, a returned value of 1 represents highest similarity.
usage:
./compare_spectra.py
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os
import pymzml
def main():
"""
Compare multiple spectra and return the cosine distance between them.
The returned value is between 0 and 1, a returned value of 1
represents highest similarity.
usage:
./compare_spectra.py
"""
example_file = os.path.join(
os.path.dirname(__file__), os.pardir, "tests", "data", "example.mzML"
)
print(
"""
Comparing spectra
"""
)
# print(example_file)
run = pymzml.run.Reader(example_file)
tmp = []
for spec in run:
if spec.ms_level == 1:
print("Parsing spectrum lvl 1 has id {0}".format(spec.ID))
tmp.append(spec)
if len(tmp) >= 3:
break
print("Print total number of specs collected {0}".format(len(tmp)))
for compare_tuples in [(0, 1), (0, 2), (1, 2)]:
print(
"Cosine between spectra {0} & {1} is {2:1.4f}".format(
compare_tuples[0] + 1,
compare_tuples[1] + 1,
tmp[compare_tuples[0]].similarity_to(tmp[compare_tuples[1]]),
)
)
print(
"Cosine score between first spectrum against itself: {0:1.4f}".format(
tmp[0].similarity_to(tmp[0])
)
)
if __name__ == "__main__":
main()
4.7.9. Find m/z values
- has_peak.main()[source]
Testscript to demonstrate functionality of function
pymzml.spec.Spectrum.has_peak()
usage:
./has_peak.py
#!/usr/bin/env python
import pymzml
import os
def main():
"""
Testscript to demonstrate functionality of function :py:func:`pymzml.spec.Spectrum.has_peak`
usage:
./has_peak.py
"""
example_file = os.path.join(
os.path.dirname(__file__), os.pardir, "tests", "data", "example.mzML"
)
mz_to_find = 820.7711792
run = pymzml.run.Reader(example_file)
for spectrum in run:
found_peaks = spectrum.has_peak(mz_to_find)
if found_peaks != []:
print("Found peaks: {0} in spectrum {1}".format(found_peaks, spectrum.ID))
if __name__ == "__main__":
main()
4.7.10. Extract ion chromatogram
- extract_ion_chromatogram.main()[source]
Demonstration of the extraction of a specific ion chromatogram, i.e. XIC or EIC
All intensities and m/z values for a target m/z are extracted.
usage:
./extract_ion_chromatogram.py
#!/usr/bin/env python
import os
import pymzml
def main():
"""
Demonstration of the extraction of a specific ion chromatogram, i.e. XIC or EIC
All intensities and m/z values for a target m/z are extracted.
usage:
./extract_ion_chromatogram.py
"""
example_file = os.path.join(
os.path.dirname(__file__), os.pardir, "tests", "data", "example.mzML"
)
run = pymzml.run.Reader(example_file)
time_dependent_intensities = []
MZ_2_FOLLOW = 70.06575775
for spectrum in run:
if spectrum.ms_level == 1:
has_peak_matches = spectrum.has_peak(MZ_2_FOLLOW)
if has_peak_matches != []:
for mz, I in has_peak_matches:
time_dependent_intensities.append(
[spectrum.scan_time_in_minutes(), I, mz]
)
print("RT \ti \tmz")
for rt, i, mz in time_dependent_intensities:
print("{0:5.3f}\t{1:13.4f}\t{2:10}".format(rt, i, mz))
return
if __name__ == "__main__":
main()
4.7.11. Find abundant precursors
- get_precursors.main()[source]
Extract the 10 most often fragmented precursors from the BSA example file.
This can e.g. be used for defining exclusion lists for further MS runs.
usage:
./get_precursors.py
#!/usr/bin/env python
import os
from operator import itemgetter
import pymzml
def main():
"""
Extract the 10 most often fragmented precursors from the BSA example file.
This can e.g. be used for defining exclusion lists for further MS runs.
usage:
./get_precursors.py
"""
example_file = os.path.join(
os.path.dirname(__file__), os.pardir, "tests", "data", "BSA1.mzML.gz"
)
run = pymzml.run.Reader(example_file)
fragmented_precursors = {}
for spectrum in run:
if spectrum.ms_level == 2:
selected_precursors = spectrum.selected_precursors
if spectrum.selected_precursors is not None:
for precursor_dict in selected_precursors:
precursor_mz = precursor_dict["mz"]
precursor_i = precursor_dict["i"]
rounded_precursor_mz = round(precursor_mz, 3)
if rounded_precursor_mz not in fragmented_precursors.keys():
fragmented_precursors[rounded_precursor_mz] = []
fragmented_precursors[rounded_precursor_mz].append(spectrum.ID)
precursor_info_list = []
for rounded_precursor_mz, spectra_list in fragmented_precursors.items():
precursor_info_list.append(
(len(spectra_list), rounded_precursor_mz, spectra_list)
)
for pos, (number_of_spectra, rounded_precursor_mz, spectra_list) in enumerate(
sorted(precursor_info_list, reverse=True)
):
print(
"Found precursor: {0} in spectra: {1}".format(
rounded_precursor_mz, spectra_list
)
)
if pos > 8:
break
if __name__ == "__main__":
main()
4.7.12. Access polarity of spectra
- polarity.main()[source]
Accessing positive or negative polarity of scan using obo 1.1.0
usage:
./polarity.py
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import pymzml
import get_example_file
def main():
"""
Accessing positive or negative polarity of scan using obo 1.1.0
usage:
./polarity.py
"""
example_file = get_example_file.open_example("small.pwiz.1.1.mzML")
run = pymzml.run.Reader(example_file, obo_version="1.1.0")
for spec in run:
negative_polarity = spec["negative scan"]
if negative_polarity is None:
negative_polarity = False
if negative_polarity == "":
negative_polarity = True
positive_polarity = spec["positive scan"]
if positive_polarity is None:
positive_polarity = False
if positive_polarity == "":
positive_polarity = True
else:
positive_polarity = False
print(
"Polarity negative {0} - Polarity positive {1}".format(
negative_polarity, positive_polarity
)
)
exit(1)
return
if __name__ == "__main__":
main()
4.7.13. Check old to new function name mapping
- deprecation_check.main()[source]
Testscript to highlight the function name changes in the Spectrum class.
Note
Please adjust any old scripts to the new syntax.
usage:
./deprecation_check.py
#!/usr/bin/env python3
import os
import pymzml
def main():
"""
Testscript to highlight the function name changes in the Spectrum class.
Note:
Please adjust any old scripts to the new syntax.
usage:
./deprecation_check.py
"""
example_file = os.path.join(
os.path.dirname(__file__), os.pardir, "tests", "data", "example.mzML"
)
run = pymzml.run.Reader(example_file)
spectrum_list = []
for pos, spectrum in enumerate(run):
spectrum_list.append(spectrum)
spectrum.hasPeak((813.19073486))
spectrum.extremeValues("mz")
spectrum.hasOverlappingPeak(813.19073486)
spectrum.highestPeaks(1)
spectrum.estimatedNoiseLevel()
spectrum.removeNoise()
spectrum.transformMZ(813.19073486)
if pos == 1:
spectrum.similarityTo(spectrum_list[0])
break
if __name__ == "__main__":
main()
4.7.14. Convert mzML(.gz) to mzML.gz (igzip)
- gzip_mzml.main(mzml_path, out_path)[source]
Create and indexed gzip mzML file from a plain mzML.
Usage: python3 gzip_mzml.py <path/to/mzml> <path/to/output>
#!/usr/bin/env python3.4
import sys
import os
from pymzml.utils.utils import index_gzip
from pymzml.run import Reader
def main(mzml_path, out_path):
"""
Create and indexed gzip mzML file from a plain mzML.
Usage: python3 gzip_mzml.py <path/to/mzml> <path/to/output>
"""
with open(mzml_path) as fin:
fin.seek(0, 2)
max_offset_len = fin.tell()
max_spec_no = Reader(mzml_path).get_spectrum_count() + 10
index_gzip(
mzml_path, out_path, max_idx=max_spec_no, idx_len=len(str(max_offset_len))
)
if __name__ == "__main__":
if len(sys.argv) > 2:
main(sys.argv[1], sys.argv[2])
else:
print(main.__doc__)
4.7.15. Multi threading conversion of mzML(.gz) to mzML.gz (igzip)
- multi_threading_file_compression.main(folder, num_cpus=1)[source]
Creates indexed gzip mzML files from all mzMLs files in the given folder using a given number of threads.
- Usage:
python multi_threading_file_compression.py <folder> <threads>
Note
If the number of threads is larger than the number of actual possible threads, all possible threads will be used.
4.7.16. Acces run infos
- access_run_info.main(mzml_file)[source]
Basic example script to access basic run info of an mzML file. Requires a mzML file as first command line argument.
usage:
./access_run_info.py <path_to_mzml_file>
>>> run.info = { 'encoding': 'utf-8', 'file_name': '/Users/joe/Dev/pymzml_2.0/tests/data/BSA1.mzML.gz', 'file_object': <pymzml.file_interface.FileInterface object at 0x1039a3f28>, 'obo_version': '1.1.0', 'offset_dict': None, 'run_id': 'ru_0', 'spectrum_count': 1684, 'start_time': '2009-08-09T22:32:31' }
#!/usr/bin/env python
import sys
import pymzml
def main(mzml_file):
"""
Basic example script to access basic run info of an mzML file. Requires a
mzML file as first command line argument.
usage:
./access_run_info.py <path_to_mzml_file>
>>> run.info =
{
'encoding': 'utf-8',
'file_name': '/Users/joe/Dev/pymzml_2.0/tests/data/BSA1.mzML.gz',
'file_object': <pymzml.file_interface.FileInterface object at 0x1039a3f28>,
'obo_version': '1.1.0',
'offset_dict': None,
'run_id': 'ru_0',
'spectrum_count': 1684,
'start_time': '2009-08-09T22:32:31'
}
"""
run = pymzml.run.Reader(mzml_file)
print(
"""
Summary for mzML file:
{file_name}
Run was measured on {start_time} using obo version {obo_version}
File contains {spectrum_count} spectra
""".format(
**run.info
)
)
if __name__ == "__main__":
if len(sys.argv) < 2:
print(main.__doc__)
exit()
mzml_file = sys.argv[1]
main(mzml_file)
4.7.17. Creating a custom Filehandler
4.7.17.1. Introduction
It is also possible to create an own API for different forms of mzML files. For this, a new class needs to be written, which implements a read and a __getitem__ function.
4.7.17.2. Implementation of the API Class
Example:
class SQLiteDatabase(object):
"""
Example implementation of a database Conncetor,
which can be used to make run accept paths to
sqlite db files.
We initialize with a path to a database and implement
a custom __getitem__ function to retrieve the spectra
"""
def __init__(self, path):
"""
"""
connection = sqlite3.connect(path)
self.cursor = connection.cursor()
def __getitem__(self, key):
"""
Execute a SQL request, process the data and return a spectrum object.
Args:
key (str or int): unique identifier for the given spectrum in the
database
"""
self.cursor.execute('SELECT * FROM spectra WHERE id=?', key)
ID, element = self.cursor.fetchone()
element = et.XML(element)
if 'spectrum' in element.tag:
spectrum = spec.Spectrum(element)
elif 'chromatogram' in element.tag:
spectrum = spec.Chromatogram(element)
return spectrum
def get_spectrum_count(self):
self.cursor.execute("SELECT COUNT(*) from spectra")
num = self.cursor.fetchone()[0]
return num
def read(self, size=-1):
# implement read so it starts reading in first ID,
# if end reached switches to next id and so on ...
return '<spectrum index="0" id="controllerType=0 controllerNumber=1 scan=1" defaultArrayLength="917"></spectrum>\n'
4.7.17.3. Enabling the new API Class in File Interface
In order to make the run class accept the new file class, one need to edit
the _open()
function in file_interface.py
Example:
def _open(self, path):
if path.endswith('.gz'):
if self._indexed_gzip(path):
self.file_handler = indexedGzip.IndexedGzip(path, self.encoding)
else:
self.file_handler = standardGzip.StandardGzip(path, self.encoding)
# Insert a new condition to enable your new fileclass
elif path.endswith('.db'):
self.file_handler = utils.SQLiteConnector.SQLiteDatabase(path, self.encoding)
else:
self.file_handler = standardMzml.StandardMzml(path, self.encoding)
return self.file_handler
4.7.18. Moby Dick as indexed Gzip
Example of how to use the GSGW and GSGR class to create and access indexed Gzip files
python3 index_moby_dick.py
python3 read_moby_dick.py 10