4.7. Example Scripts

4.7.1. Parsing a mzML file (new syntax)

simple_parser.main(mzml_file)[source]

Basic example script to demonstrate the usage of pymzML. Requires a mzML file as first argument.

usage:

./simple_parser.py <path_to_mzml_file>

Note

This script uses the new syntax with the MS level being a property of the spectrum class ( Spectrum.ms_level ). The old syntax can be found in the script simple_parser_v2.py where the MS level can be queried as a key (Spectrum[‘ms level’])

#!/usr/bin/env python
import sys
import pymzml


def main(mzml_file):
    """
    Basic example script to demonstrate the usage of pymzML. Requires a mzML
    file as first argument.

    usage:

        ./simple_parser.py <path_to_mzml_file>

    Note:

        This script uses the new syntax with the MS level being a property of
        the spectrum class ( Spectrum.ms_level ). The old syntax can be found in
        the script simple_parser_v2.py where the MS level can be queried as a key
        (Spectrum['ms level'])


    """
    run = pymzml.run.Reader(mzml_file)
    for n, spec in enumerate(run):
        print(
            "Spectrum {0}, MS level {ms_level} @ RT {scan_time:1.2f}".format(
                spec.ID, ms_level=spec.ms_level, scan_time=spec.scan_time_in_minutes()
            )
        )
    print("Parsed {0} spectra from file {1}".format(n, mzml_file))
    print()


if __name__ == "__main__":
    if len(sys.argv) < 2:
        print(main.__doc__)
        exit()
    mzml_file = sys.argv[1]
    main(mzml_file)

4.7.2. Parsing a mzML file (old syntax)

simple_parser_v2.main(mzml_file)[source]

Basic example script to demonstrate the usage of pymzML. Requires a mzML file as first argument.

usage:: ./simple_parser_v2.py <path_to_mzml_file>

Note

This script uses the old syntax where the MS level can be queried as a key (Spectrum[‘ms level’]). The current syntax can be found in simple_parser.py

#!/usr/bin/env python
import sys
import pymzml
from collections import defaultdict as ddict


def main(mzml_file):
    """
    Basic example script to demonstrate the usage of pymzML. Requires a mzML
    file as first argument.

    usage:
        ./simple_parser_v2.py <path_to_mzml_file>

    Note:

        This script uses the old syntax where the MS level can be queried as a
        key (Spectrum['ms level']). The current syntax can be found in
        simple_parser.py

    """
    run = pymzml.run.Reader(mzml_file)
    # print( run[10000].keys() )
    stats = ddict(int)
    for n, spec in enumerate(run):
        print(
            "Spectrum {0}, MS level {ms_level}".format(n, ms_level=spec["ms level"]),
            end="\r",
        )
        # the old method to obtain peaks from the Spectrum class
        stats[spec.ID] = len(spec.centroidedPeaks)

    print("Parsed {0} spectra from file {1}".format(len(stats.keys()), mzml_file))
    print()


if __name__ == "__main__":
    if len(sys.argv) < 2:
        print(main.__doc__)
        exit()
    mzml_file = sys.argv[1]
    main(mzml_file)

4.7.3. Query the obo files

queryOBO.main(args)[source]

Use this script to interrogate the OBO database files.

usage:

./queryOBO.py [-h] [-v VERSION] query

Example:

$ ./queryOBO.py'scan time'
MS:1000016
scan time
'The time taken for an acquisition by scanning analyzers.' [PSI:MS]
Is a: MS:1000503 ! scan attribute

Example:

$ ./queryOBO.py 1000016
MS:1000016
scan time
"The time taken for an acquisition by scanning analyzers." [PSI:MS]
MS:1000503 ! scan attribute

#!/usr/bin/env python
# -*- coding: utf-8 -*-


import argparse
from collections import defaultdict

import pymzml.obo


FIELDNAMES = ["id", "name", "def", "is_a"]


def main(args):
    """
    Use this script to interrogate the OBO database files.

    usage:

        ./queryOBO.py [-h] [-v VERSION] query

    Example::

      $ ./queryOBO.py'scan time'
      MS:1000016
      scan time
      'The time taken for an acquisition by scanning analyzers.' [PSI:MS]
      Is a: MS:1000503 ! scan attribute

    Example::

        $ ./queryOBO.py 1000016
        MS:1000016
        scan time
        "The time taken for an acquisition by scanning analyzers." [PSI:MS]
        MS:1000503 ! scan attribute


    """
    obo = pymzml.obo.OboTranslator(version=args.version)
    obo.parseOBO()
    if args.query.isdigit():
        print(search_by_id(obo, args.query))
    else:
        for ix, match in enumerate(search_by_name(obo, args.query)):
            print("#{0}".format(ix))

            for fieldname in ("id", "name", "def"):
                print(match[fieldname])

            if "is_a" in match:
                print("Is a:", match["is_a"])


def search_by_name(obo, name):
    print("Searching for {0}".format(name.lower()))
    matches = []
    for lookup in obo.lookups:
        for key in lookup.keys():
            if name.lower() in key.lower():
                match = defaultdict(str)

                for fieldname in FIELDNAMES:
                    if fieldname in lookup[key].keys():
                        match[fieldname] = lookup[key][fieldname]

                matches.append(match)

    return matches


def search_by_id(obo, id):
    key = "MS:{0}".format(id)
    return_value = ""
    for lookup in obo.lookups:
        if key in lookup:
            if obo.MS_tag_regex.match(key):
                for fn in FIELDNAMES:
                    if fn in lookup[key].keys():
                        return_value += "{0}\n".format(lookup[key][fn])
    return return_value


if __name__ == "__main__":
    argparser = argparse.ArgumentParser(usage=__doc__)
    argparser.add_argument(
        "query", help="an accession or part of an OBO term name to look for"
    )
    argparser.add_argument(
        "-v",
        "--version",
        default="1.1.0",
        help="""
            the version of the OBO to use; valid options are 1.0.0, 1.1.0, and 1.2,
            default is 1.1.0
        """,
    )

    args = argparser.parse_args()

    main(args)

4.7.4. Plotting a chromatogram

plot_chromatogram.main(mzml_file)[source]

Plots a chromatogram for the given mzML file. File is saved as ‘chromatogram_<mzml_file>.html’.

usage:

./plot_chromatogram.py <path_to_mzml_file>

#!/usr/bin/env python
# -*- coding: utf-8 -*-

import os
import sys

import pymzml
from pymzml.plot import Factory


def main(mzml_file):
    """
    Plots a chromatogram for the given mzML file. File is saved as
    'chromatogram_<mzml_file>.html'.

    usage:

        ./plot_chromatogram.py <path_to_mzml_file>

    """
    run = pymzml.run.Reader(mzml_file)
    mzml_basename = os.path.basename(mzml_file)
    pf = Factory()
    pf.new_plot()
    pf.add(run["TIC"].peaks(), color=(0, 0, 0), style="lines", title=mzml_basename)
    pf.save(
        "chromatogram_{0}.html".format(mzml_basename),
        layout={"xaxis": {"title": "Retention time"}, "yaxis": {"title": "TIC"}},
    )
    return


if __name__ == "__main__":
    if len(sys.argv) < 2:
        print(main.__doc__)
        exit()
    mzml_file = sys.argv[1]
    main(mzml_file)

4.7.5. Plotting a spectrum

plot_spectrum.main()[source]

This function shows how to plot a simple spectrum. It can be directly plotted via this script or using the python console.

usage:

./plot_spectrum.py

#!/usr/bin/env python
# -*- coding: utf-8 -*-

import os

import pymzml


def main():
    """
    This function shows how to plot a simple spectrum. It can be directly
    plotted via this script or using the python console.

    usage:

        ./plot_spectrum.py

    """

    example_file = os.path.join(
        os.path.dirname(__file__), os.pardir, "tests", "data", "example.mzML"
    )
    run = pymzml.run.Reader(example_file)
    p = pymzml.plot.Factory()
    for spec in run:
        p.new_plot()
        p.add(spec.peaks("centroided"), color=(0, 0, 0), style="sticks", name="peaks")
        filename = "example_plot_{0}_{1}.html".format(
            os.path.basename(example_file), spec.ID
        )
        p.save(filename=filename)
        print("Plotted file: {0}".format(filename))
        break


if __name__ == "__main__":
    main()

4.7.6. Plotting a spectrum with annotation

plot_spectrum_with_annotation.main()[source]

This script shows how to plot multiple spectra in one plot and how to use label for the annotation of spectra. The first plot is an MS1 spectrum with the annotated precursor ion. The second plot is a zoom into the precursor isotope pattern. The third plot is an annotated fragmentation spectrum (MS2) of the peptide HLVDEPQNLIK from BSA. These examples also show the use of ‘layout’ to define the appearance of a plot.

usage:

./plot_spectrum_with_annotation.py

#!/usr/bin/env python
# -*- coding: utf-8 -*-

import copy
import os

import pymzml


def main():
    """
    This script shows how to plot multiple spectra in one plot and
    how to use label for the annotation of spectra.
    The first plot is an MS1 spectrum with the annotated precursor ion.
    The second plot is a zoom into the precursor isotope pattern.
    The third plot is an annotated fragmentation spectrum (MS2) of the
    peptide HLVDEPQNLIK from BSA.
    These examples also show the use of 'layout' to define the appearance
    of a plot.

    usage:

        ./plot_spectrum_with_annotation.py

    """

    # First we define some general layout attributes
    layout = {
        "xaxis": {
            "title": "<i>m/z</i>",
            "tickmode": "auto",
            "showticklabels": True,
            "ticklen": 5,
            "tickwidth": 1,
            "ticks": "outside",
            "showline": True,
            "showgrid": False,
        },
        "yaxis": {
            "color": "#000000",
            "tickmode": "auto",
            "showticklabels": True,
            "ticklen": 5,
            "tickwidth": 1,
            "ticks": "outside",
            "showline": True,
            "showgrid": False,
        },
    }

    # The example BSA file will be used
    example_file = os.path.join(
        os.path.dirname(__file__), os.pardir, "tests", "data", "BSA1.mzML.gz"
    )

    # Define different precisions for MS1 and MS2
    run = pymzml.run.Reader(example_file, MS_precisions={1: 5e-6, 2: 5e-4})
    p = pymzml.plot.Factory()
    plot_layout = {}

    # Now that everything is set up, we can plot the MS1 spectrum
    # Spectrum ID: 1574
    p.new_plot(title="MS1 Spectrum")
    ms1_spectrum = run[1574]

    # The measured peaks are added as first trace
    p.add(
        ms1_spectrum.peaks("centroided"),
        color=(0, 0, 0),
        opacity=1,
        style="sticks",
        name="raw data plot 1",
    )

    # The label for the precursor ion is added as a seperate trace.
    # Note that triangle.MS_precision is used here as a label.
    # By zooming in at this peak one can therefore check if the measured
    # peak fits into defined the mass accuracy range.
    precursor_mz_calc = 435.9102
    p.add(
        [(precursor_mz_calc, "max_intensity", "theoretical precursor")],
        color=(255, 0, 0),
        opacity=0.6,
        style="label.triangle.MS_precision",
        name="theoretical precursor plot 1",
    )

    # Define the layout for the first subplot.
    # The x- and y-axes of subplots are numbered, starting at 1.
    for axis in layout.keys():
        plot_layout["{0}1".format(axis)] = copy.copy(layout[axis])

    # Now we can add a second plot, the same way as above but as a zoom-in.
    # Therefore, we define a mz_range
    p.new_plot(title="MS1 Spectrum Zoom")
    p.add(
        ms1_spectrum.peaks("centroided"),
        color=(0, 0, 0),
        opacity=1,
        style="sticks",
        name="raw data plot 2",
        plot_num=1,
        mz_range=[435.7, 437],
    )

    p.add(
        [(precursor_mz_calc, "max_intensity", "theoretical precursor")],
        color=(255, 0, 0),
        opacity=0.3,
        plot_num=1,
        style="label.triangle.MS_precision",
        name="theoretical precursor plot 2",
    )

    # The mz_range can be included in the layout as well.
    # In contrast to mz_range in the add() function, which limits the included
    # datapoints, the layout range only defines the area that is depicted (i.e. the zoom)
    for axis in layout.keys():
        plot_layout["{0}2".format(axis)] = copy.copy(layout[axis])
    plot_layout["xaxis2"]["autorange"] = False
    plot_layout["xaxis2"]["range"] = [435.7, 437]

    # Now the third plot will be added, a fragmentation spectrum of HLVDEPQNLIK
    ms2_spectrum = run[3542]

    # The MS_precision for the plotting option label.triangle.MS_precision
    # needs to be defined
    p.new_plot(title="MS2 Spectrum Annotated: HLVDEPQNLIK", MS_precision=5e-4)
    p.add(
        ms2_spectrum.peaks("centroided"),
        color=(0, 0, 0),
        opacity=1,
        style="sticks",
        name="raw data plot 3",
        plot_num=2,
    )

    theoretical_b_ions = {
        "b<sub>2</sub><sup>+2</sup>": 126.0788,
        "b<sub>3</sub><sup>+2</sup>": 175.6130,
        "b<sub>4</sub><sup>+2</sup>": 233.1264,
        "b<sub>2</sub>": 251.1503,
        "b<sub>5</sub><sup>+2</sup>": 297.6477,
        "b<sub>6</sub><sup>+2</sup>": 346.1741,
        "b<sub>3</sub>": 350.2187,
        "b<sub>7</sub><sup>+2</sup>": 410.2034,
        "b<sub>4</sub>": 465.2456,
        "b<sub>8</sub><sup>+2</sup>": 467.2249,
        "b<sub>9</sub><sup>+2</sup>": 523.7669,
        "b<sub>10</sub><sup>+2</sup>": 580.3089,
        "b<sub>5</sub>": 594.2882,
        "b<sub>6</sub>": 691.341,
        "b<sub>7</sub>": 819.3995,
        "b<sub>8</sub>": 933.4425,
        "b<sub>9</sub>": 1046.5265,
        "b<sub>10</sub>": 1159.6106,
    }

    theoretical_y_ions = {
        "y<sub>1</sub><sup>+2</sup>": 74.0600,
        "y<sub>2</sub><sup>+2</sup>": 130.6021,
        "y<sub>1</sub>": 147.1128,
        "y<sub>3</sub><sup>+2</sup>": 187.1441,
        "y<sub>4</sub><sup>+2</sup>": 244.1656,
        "y<sub>2</sub>": 260.1969,
        "y<sub>5</sub><sup>+2</sup>": 308.1949,
        "y<sub>6</sub><sup>+2</sup>": 356.7212,
        "y<sub>3</sub>": 373.2809,
        "y<sub>7</sub><sup>+2</sup>": 421.2425,
        "y<sub>8</sub><sup>+2</sup>": 478.7560,
        "y<sub>4</sub>": 487.3239,
        "y<sub>9</sub><sup>+2</sup>": 528.2902,
        "y<sub>10</sub><sup>+2</sup>": 584.8322,
        "y<sub>5</sub>": 615.3824,
        "y<sub>6</sub>": 712.4352,
        "y<sub>7</sub>": 841.4778,
        "y<sub>8</sub>": 956.5047,
        "y<sub>9</sub>": 1055.5732,
        "y<sub>10</sub>": 1168.6572,
    }

    # Check which theoretical fragments are present in the spectrum
    # using the has_peak() function
    for ion_list in [theoretical_b_ions, theoretical_y_ions]:
        label_list = []
        for fragment in ion_list.keys():
            peak = ms2_spectrum.has_peak(ion_list[fragment])
            if len(peak) != 0:
                label_list.append((ion_list[fragment], peak[0][1], fragment))
        if ion_list == theoretical_b_ions:
            color = (0, 0, 255)
        else:
            color = (0, 255, 0)
        p.add(
            label_list,
            color=color,
            style="label.triangle.MS_precision",
            name="theoretical fragment ions plot 3",
        )

    for axis in layout.keys():
        plot_layout["{0}3".format(axis)] = copy.copy(layout[axis])

    # Save the plot in a file using the defined plot_layout
    filename = "example_plot_{0}_annotation.html".format(os.path.basename(example_file))
    p.save(filename=filename, layout=plot_layout)
    print("Plotted file: {0}".format(filename))


if __name__ == "__main__":
    main()

4.7.7. Extracting highest peaks

highest_peaks.main()[source]

Testscript to isolate the n-highest peaks from an example file.

Usage:

./highest_peaks.py

Parses the file ‘../tests/data/example.mzML’ and extracts the 2 highest intensities from each spectrum.

#!/usr/bin/env python

import pymzml
from collections import defaultdict as ddict
import os


def main():
    """
    Testscript to isolate the n-highest peaks from an example file.

    Usage:

        ./highest_peaks.py

    Parses the file '../tests/data/example.mzML' and extracts the 2 highest
    intensities from each spectrum.

    """
    example_file = os.path.join(
        os.path.dirname(__file__), os.pardir, "tests", "data", "example.mzML"
    )
    run = pymzml.run.Reader(example_file)
    highest_i_dict = ddict(list)

    number_of_peaks_to_extract = 2

    for spectrum in run:
        # print( spectrum.ID )
        if spectrum.ms_level == 1:
            for mz, i in spectrum.highest_peaks(number_of_peaks_to_extract):
                highest_i_dict[spectrum.ID].append(i)
    for spectrum_id, highest_peak_list in highest_i_dict.items():
        assert len(highest_peak_list) == number_of_peaks_to_extract
        print(
            "Spectrum {0}; highest intensities: {1}".format(
                spectrum_id, highest_peak_list
            )
        )


if __name__ == "__main__":
    main()

4.7.8. Compare spectra

compare_spectra.main()[source]

Compare multiple spectra and return the cosine distance between them. The returned value is between 0 and 1, a returned value of 1 represents highest similarity.

usage:

./compare_spectra.py

#!/usr/bin/env python
# -*- coding: utf-8 -*-

import os

import pymzml


def main():
    """
    Compare multiple spectra and return the cosine distance between them.
    The returned value is between 0 and 1, a returned value of 1
    represents highest similarity.

    usage:

        ./compare_spectra.py

    """
    example_file = os.path.join(
        os.path.dirname(__file__), os.pardir, "tests", "data", "example.mzML"
    )
    print(
        """
            Comparing spectra
        """
    )
    # print(example_file)
    run = pymzml.run.Reader(example_file)
    tmp = []
    for spec in run:
        if spec.ms_level == 1:
            print("Parsing spectrum lvl 1 has id {0}".format(spec.ID))
            tmp.append(spec)
            if len(tmp) >= 3:
                break

    print("Print total number of specs collected {0}".format(len(tmp)))
    for compare_tuples in [(0, 1), (0, 2), (1, 2)]:
        print(
            "Cosine between spectra {0} & {1} is {2:1.4f}".format(
                compare_tuples[0] + 1,
                compare_tuples[1] + 1,
                tmp[compare_tuples[0]].similarity_to(tmp[compare_tuples[1]]),
            )
        )

    print(
        "Cosine score between first spectrum against itself: {0:1.4f}".format(
            tmp[0].similarity_to(tmp[0])
        )
    )


if __name__ == "__main__":
    main()

4.7.9. Find m/z values

has_peak.main()[source]

Testscript to demonstrate functionality of function pymzml.spec.Spectrum.has_peak()

usage:

./has_peak.py

#!/usr/bin/env python

import pymzml
import os


def main():
    """
    Testscript to demonstrate functionality of function :py:func:`pymzml.spec.Spectrum.has_peak`

    usage:

        ./has_peak.py

    """

    example_file = os.path.join(
        os.path.dirname(__file__), os.pardir, "tests", "data", "example.mzML"
    )
    mz_to_find = 820.7711792
    run = pymzml.run.Reader(example_file)
    for spectrum in run:
        found_peaks = spectrum.has_peak(mz_to_find)
        if found_peaks != []:
            print("Found peaks: {0} in spectrum {1}".format(found_peaks, spectrum.ID))


if __name__ == "__main__":
    main()

4.7.10. Extract ion chromatogram

extract_ion_chromatogram.main()[source]

Demonstration of the extraction of a specific ion chromatogram, i.e. XIC or EIC

All intensities and m/z values for a target m/z are extracted.

usage:

./extract_ion_chromatogram.py

#!/usr/bin/env python

import os

import pymzml


def main():
    """
    Demonstration of the extraction of a specific ion chromatogram, i.e. XIC or EIC

    All intensities and m/z values for a target m/z are extracted.

    usage:

        ./extract_ion_chromatogram.py

    """

    example_file = os.path.join(
        os.path.dirname(__file__), os.pardir, "tests", "data", "example.mzML"
    )
    run = pymzml.run.Reader(example_file)
    time_dependent_intensities = []

    MZ_2_FOLLOW = 70.06575775

    for spectrum in run:
        if spectrum.ms_level == 1:
            has_peak_matches = spectrum.has_peak(MZ_2_FOLLOW)
            if has_peak_matches != []:
                for mz, I in has_peak_matches:
                    time_dependent_intensities.append(
                        [spectrum.scan_time_in_minutes(), I, mz]
                    )
    print("RT   \ti   \tmz")
    for rt, i, mz in time_dependent_intensities:
        print("{0:5.3f}\t{1:13.4f}\t{2:10}".format(rt, i, mz))
    return


if __name__ == "__main__":
    main()

4.7.11. Find abundant precursors

get_precursors.main()[source]

Extract the 10 most often fragmented precursors from the BSA example file.

This can e.g. be used for defining exclusion lists for further MS runs.

usage:

./get_precursors.py

#!/usr/bin/env python

import os
from operator import itemgetter

import pymzml


def main():
    """
    Extract the 10 most often fragmented precursors from the BSA example file.

    This can e.g. be used for defining exclusion lists for further MS runs.

    usage:

        ./get_precursors.py

    """

    example_file = os.path.join(
        os.path.dirname(__file__), os.pardir, "tests", "data", "BSA1.mzML.gz"
    )
    run = pymzml.run.Reader(example_file)
    fragmented_precursors = {}
    for spectrum in run:
        if spectrum.ms_level == 2:
            selected_precursors = spectrum.selected_precursors
            if spectrum.selected_precursors is not None:
                for precursor_dict in selected_precursors:
                    precursor_mz = precursor_dict["mz"]
                    precursor_i = precursor_dict["i"]
                    rounded_precursor_mz = round(precursor_mz, 3)
                    if rounded_precursor_mz not in fragmented_precursors.keys():
                        fragmented_precursors[rounded_precursor_mz] = []
                    fragmented_precursors[rounded_precursor_mz].append(spectrum.ID)

    precursor_info_list = []
    for rounded_precursor_mz, spectra_list in fragmented_precursors.items():
        precursor_info_list.append(
            (len(spectra_list), rounded_precursor_mz, spectra_list)
        )

    for pos, (number_of_spectra, rounded_precursor_mz, spectra_list) in enumerate(
        sorted(precursor_info_list, reverse=True)
    ):
        print(
            "Found precursor: {0} in spectra: {1}".format(
                rounded_precursor_mz, spectra_list
            )
        )
        if pos > 8:
            break


if __name__ == "__main__":
    main()

4.7.12. Access polarity of spectra

polarity.main()[source]

Accessing positive or negative polarity of scan using obo 1.1.0

usage:

./polarity.py

#!/usr/bin/env python
# -*- coding: utf-8 -*-


import pymzml
import get_example_file


def main():
    """
    Accessing positive or negative polarity of scan using obo 1.1.0

    usage:

        ./polarity.py

    """
    example_file = get_example_file.open_example("small.pwiz.1.1.mzML")
    run = pymzml.run.Reader(example_file, obo_version="1.1.0")
    for spec in run:
        negative_polarity = spec["negative scan"]
        if negative_polarity is None:
            negative_polarity = False
        if negative_polarity == "":
            negative_polarity = True
        positive_polarity = spec["positive scan"]
        if positive_polarity is None:
            positive_polarity = False
        if positive_polarity == "":
            positive_polarity = True
        else:
            positive_polarity = False
        print(
            "Polarity negative {0} - Polarity positive {1}".format(
                negative_polarity, positive_polarity
            )
        )
        exit(1)

    return


if __name__ == "__main__":
    main()

4.7.13. Check old to new function name mapping

deprecation_check.main()[source]

Testscript to highlight the function name changes in the Spectrum class.

Note

Please adjust any old scripts to the new syntax.

usage:

./deprecation_check.py

#!/usr/bin/env python3

import os

import pymzml


def main():
    """
    Testscript to highlight the function name changes in the Spectrum class.

    Note:
        Please adjust any old scripts to the new syntax.

    usage:

        ./deprecation_check.py

    """

    example_file = os.path.join(
        os.path.dirname(__file__), os.pardir, "tests", "data", "example.mzML"
    )
    run = pymzml.run.Reader(example_file)
    spectrum_list = []
    for pos, spectrum in enumerate(run):
        spectrum_list.append(spectrum)
        spectrum.hasPeak((813.19073486))
        spectrum.extremeValues("mz")
        spectrum.hasOverlappingPeak(813.19073486)
        spectrum.highestPeaks(1)
        spectrum.estimatedNoiseLevel()
        spectrum.removeNoise()
        spectrum.transformMZ(813.19073486)
        if pos == 1:
            spectrum.similarityTo(spectrum_list[0])
            break


if __name__ == "__main__":
    main()

4.7.14. Convert mzML(.gz) to mzML.gz (igzip)

gzip_mzml.main(mzml_path, out_path)[source]

Create and indexed gzip mzML file from a plain mzML.

Usage: python3 gzip_mzml.py <path/to/mzml> <path/to/output>

#!/usr/bin/env python3.4

import sys
import os
from pymzml.utils.utils import index_gzip
from pymzml.run import Reader


def main(mzml_path, out_path):
    """
    Create and indexed gzip mzML file from a plain mzML.

    Usage: python3 gzip_mzml.py <path/to/mzml> <path/to/output>
    """
    with open(mzml_path) as fin:
        fin.seek(0, 2)
        max_offset_len = fin.tell()
        max_spec_no = Reader(mzml_path).get_spectrum_count() + 10

    index_gzip(
        mzml_path, out_path, max_idx=max_spec_no, idx_len=len(str(max_offset_len))
    )


if __name__ == "__main__":
    if len(sys.argv) > 2:
        main(sys.argv[1], sys.argv[2])
    else:
        print(main.__doc__)

4.7.15. Multi threading conversion of mzML(.gz) to mzML.gz (igzip)

multi_threading_file_compression.main(folder, num_cpus=1)[source]

Creates indexed gzip mzML files from all mzMLs files in the given folder using a given number of threads.

Usage:: python multi_threading_file_compression.py <folder> <threads>

Note

If the number of threads is larger than the number of actual possible threads, all possible threads will be used.

4.7.16. Acces run infos

access_run_info.main(mzml_file)[source]

Basic example script to access basic run info of an mzML file. Requires a mzML file as first command line argument.

usage:

./access_run_info.py <path_to_mzml_file>

>>> run.info =
        {
            'encoding': 'utf-8',
             'file_name': '/Users/joe/Dev/pymzml_2.0/tests/data/BSA1.mzML.gz',
             'file_object': <pymzml.file_interface.FileInterface object at 0x1039a3f28>,
             'obo_version': '1.1.0',
             'offset_dict': None,
             'run_id': 'ru_0',
             'spectrum_count': 1684,
             'start_time': '2009-08-09T22:32:31'
         }

#!/usr/bin/env python
import sys
import pymzml


def main(mzml_file):
    """
    Basic example script to access basic run info of an mzML file. Requires a
    mzML file as first command line argument.

    usage:

        ./access_run_info.py <path_to_mzml_file>

    >>> run.info =
            {
                'encoding': 'utf-8',
                 'file_name': '/Users/joe/Dev/pymzml_2.0/tests/data/BSA1.mzML.gz',
                 'file_object': <pymzml.file_interface.FileInterface object at 0x1039a3f28>,
                 'obo_version': '1.1.0',
                 'offset_dict': None,
                 'run_id': 'ru_0',
                 'spectrum_count': 1684,
                 'start_time': '2009-08-09T22:32:31'
             }

    """
    run = pymzml.run.Reader(mzml_file)
    print(
        """
Summary for mzML file:
    {file_name}
Run was measured on {start_time} using obo version {obo_version}
File contains {spectrum_count} spectra
        """.format(
            **run.info
        )
    )


if __name__ == "__main__":
    if len(sys.argv) < 2:
        print(main.__doc__)
        exit()
    mzml_file = sys.argv[1]
    main(mzml_file)

4.7.17. Creating a custom Filehandler

4.7.17.1. Introduction

It is also possible to create an own API for different forms of mzML files. For this, a new class needs to be written, which implements a read and a __getitem__ function.

4.7.17.2. Implementation of the API Class

Example:

class SQLiteDatabase(object):
        """
        Example implementation of a database Conncetor,
        which can be used to make run accept paths to
        sqlite db files.

        We initialize with a path to a database and implement
        a custom __getitem__ function to retrieve the spectra
        """

        def __init__(self, path):
                """
                """
                connection = sqlite3.connect(path)
                self.cursor = connection.cursor()

        def __getitem__(self, key):
                """
                Execute a SQL request, process the data and return a spectrum object.

                Args:
                        key (str or int): unique identifier for the given spectrum in the
                        database
                """
                self.cursor.execute('SELECT * FROM spectra WHERE id=?', key)
                ID, element = self.cursor.fetchone()

                element = et.XML(element)
                if 'spectrum' in element.tag:
                        spectrum = spec.Spectrum(element)
                elif 'chromatogram' in element.tag:
                        spectrum = spec.Chromatogram(element)
                return spectrum

        def get_spectrum_count(self):
                self.cursor.execute("SELECT COUNT(*) from spectra")
                num = self.cursor.fetchone()[0]
                return num

        def read(self, size=-1):
                # implement read so it starts reading in first ID,
                # if end reached switches to next id and so on ...

                return '<spectrum index="0" id="controllerType=0 controllerNumber=1 scan=1" defaultArrayLength="917"></spectrum>\n'

4.7.17.3. Enabling the new API Class in File Interface

In order to make the run class accept the new file class, one need to edit the _open() function in file_interface.py

Example:

def _open(self, path):
        if path.endswith('.gz'):
                if self._indexed_gzip(path):
                        self.file_handler = indexedGzip.IndexedGzip(path, self.encoding)
                else:
                        self.file_handler = standardGzip.StandardGzip(path, self.encoding)
        # Insert a new condition to enable your new fileclass
        elif path.endswith('.db'):
                self.file_handler = utils.SQLiteConnector.SQLiteDatabase(path, self.encoding)
        else:
                self.file_handler     = standardMzml.StandardMzml(path, self.encoding)
        return self.file_handler

4.7.18. Moby Dick as indexed Gzip

Example of how to use the GSGW and GSGR class to create and access indexed Gzip files

python3 index_moby_dick.py

python3 read_moby_dick.py 10