Source code for cobra.flux_analysis.double_deletion

from warnings import warn
from itertools import chain, product

from six import iteritems
import numpy

from ..solvers import get_solver_name, solver_dict
from ..manipulation.delete import (find_gene_knockout_reactions,
                                   get_compiled_gene_reaction_rules)
from .deletion_worker import CobraDeletionPool, CobraDeletionMockPool

try:
    import scipy
except ImportError:
    moma = None
else:
    from . import moma

try:
    from pandas import DataFrame
except:
    DataFrame = None


# Utility functions
[docs]def generate_matrix_indexes(ids1, ids2):
    """map an identifier to an entry in the square result matrix"""
    return {id: index for index, id in enumerate(set(chain(ids1, ids2)))}


[docs]def yield_upper_tria_indexes(ids1, ids2, id_to_index):
    """gives the necessary indexes in the upper triangle

    ids1 and ids2 are lists of the identifiers i.e. gene id's or reaction
    indexes to be knocked out. id_to_index maps each identifier to its index
    in the result matrix.

    Note that this does not return indexes for the diagonal. Those have
    to be computed separately."""
    # sets to check for inclusion in o(1)
    id_set1 = set(ids1)
    id_set2 = set(ids2)
    for id1, id2 in product(ids1, ids2):
        # indexes in the result matrix
        index1 = id_to_index[id1]
        index2 = id_to_index[id2]
        # upper triangle
        if index2 > index1:
            yield ((index1, index2), (id1, id2))
        # lower triangle but would be skipped, so return in upper triangle
        elif id2 not in id_set1 or id1 not in id_set2:
            yield((index2, index1), (id2, id1))  # note that order flipped


def _format_upper_triangular_matrix(row_indexes, column_indexes, matrix):
    """reformat the square upper-triangular result matrix

    For example, results may look like this
    [[ A  B  C  D]
     [ -  -  -  -]
     [ -  -  E  F]
     [ -  -  -  G]]
    In this case, the second row was skipped. This means we have
    row_indexes [0, 2, 3] and column_indexes [0, 1, 2, 3]

    First, it will reflect the upper triangle into the lower triangle
    [[ A  B  C  D]
     [ B  -  -  -]
     [ C  -  E  F]
     [ D  -  F  G]]

    Finally, it will remove the missing rows and return
    [[ A  B  C  D]
     [ C  -  E  F]
     [ D  -  F  G]]
    """
    results = matrix.copy()
    # Thse select the indexes for the upper triangle. However, switching
    # the order selects the lower triangle.
    triu1, triu2 = numpy.triu_indices(matrix.shape[0])
    # This makes reflection pretty easy
    results[triu2, triu1] = results[triu1, triu2]
    # Remove the missing rows and  return.
    return results[row_indexes, :][:, column_indexes]


[docs]def format_results_frame(row_ids, column_ids, matrix, return_frame=False):
    """format results as a pandas.DataFrame if desired/possible

    Otherwise returns a dict of
    {"x": row_ids, "y": column_ids", "data": result_matrx}"""
    if return_frame and DataFrame:
        return DataFrame(data=matrix, index=row_ids, columns=column_ids)
    elif return_frame and not DataFrame:
        warn("could not import pandas.DataFrame")
    return {"x": row_ids, "y": column_ids, "data": matrix}


[docs]def double_deletion(cobra_model, element_list_1=None, element_list_2=None,
                    element_type='gene', **kwargs):
    """Wrapper for double_gene_deletion and double_reaction_deletion

    .. deprecated :: 0.4
        Use double_reaction_deletion and double_gene_deletion
    """
    warn("deprecated - use single_reaction_deletion and single_gene_deletion")
    if element_type == "reaction":
        return double_reaction_deletion(cobra_model, element_list_1,
                                        element_list_2, **kwargs)
    elif element_type == "gene":
        return double_gene_deletion(cobra_model, element_list_1,
                                    element_list_2, **kwargs)
    else:
        raise Exception("unknown element type")


[docs]def double_reaction_deletion(cobra_model,
                             reaction_list1=None, reaction_list2=None,
                             method="fba", return_frame=False,
                             solver=None, zero_cutoff=1e-12,
                             **kwargs):
    """sequentially knocks out pairs of reactions in a model

    cobra_model : :class:`~cobra.core.Model.Model`
        cobra model in which to perform deletions

    reaction_list1 : [:class:`~cobra.core.Reaction.Reaction`:] (or their id's)
        Reactions to be deleted. These will be the rows in the result.
        If not provided, all reactions will be used.

    reaction_list2 : [:class:`~cobra.core.Reaction`:] (or their id's)
        Reactions to be deleted. These will be the rows in the result.
        If not provided, reaction_list1 will be used.

    method: "fba" or "moma"
        Procedure used to predict the growth rate

    solver: str for solver name
        This must be a QP-capable solver for MOMA. If left unspecified,
        a suitable solver will be automatically chosen.

    zero_cutoff: float
        When checking to see if a value is 0, this threshold is used.

    return_frame: bool
        If true, formats the results as a pandas.Dataframe. Otherwise
        returns a dict of the form:
        {"x": row_labels, "y": column_labels", "data": 2D matrix}
    """
    # handle arguments which need to be passed on
    if solver is None:
        solver = get_solver_name(qp=(method == "moma"))
        kwargs["solver"] = solver
    kwargs["zero_cutoff"] = zero_cutoff

    # generate other arguments

    # identifiers for reactions are their indexes
    if reaction_list1 is None:
        reaction_indexes1 = range(len(cobra_model.reactions))
    else:
        reaction_indexes1 = [cobra_model.reactions.index(r)
                             for r in reaction_list1]
    if reaction_list2 is None:
        reaction_indexes2 = reaction_indexes1
    else:
        reaction_indexes2 = [cobra_model.reactions.index(r)
                             for r in reaction_list2]
    reaction_to_result = generate_matrix_indexes(reaction_indexes1,
                                                 reaction_indexes2)

    # Determine 0 flux reactions. If an optimal solution passes no flux
    # through the deleted reactions, then we know removing them will
    # not change the solution.
    wt_solution = solver_dict[solver].solve(cobra_model)
    if wt_solution.status == "optimal":
        kwargs["wt_growth_rate"] = wt_solution.f
        kwargs["no_flux_reaction_indexes"] = \
            {i for i, v in enumerate(wt_solution.x) if abs(v) < zero_cutoff}
    else:
        warn("wild-type solution status is '%s'" % wt_solution.status)

    # call the computing functions
    if method == "fba":
        results = _double_reaction_deletion_fba(
            cobra_model, reaction_indexes1, reaction_indexes2,
            reaction_to_result, **kwargs)
    elif method == "moma":
        results = _double_reaction_deletion_moma(
            cobra_model, reaction_indexes1, reaction_indexes2,
            reaction_to_result, **kwargs)
    else:
        raise ValueError("Unknown deletion method '%s'" % method)

    # convert upper triangular matrix to full matrix
    full_result = _format_upper_triangular_matrix(
        [reaction_to_result[i] for i in reaction_indexes1],  # row indexes
        [reaction_to_result[i] for i in reaction_indexes2],  # col indexes
        results)

    # format appropriately with labels
    row_ids = [cobra_model.reactions[i].id for i in reaction_indexes1]
    column_ids = [cobra_model.reactions[i].id for i in reaction_indexes2]
    return format_results_frame(row_ids, column_ids,
                                full_result, return_frame)


[docs]def double_gene_deletion(cobra_model,
                         gene_list1=None, gene_list2=None,
                         method="fba", return_frame=False,
                         solver=None, zero_cutoff=1e-12,
                         **kwargs):
    """sequentially knocks out pairs of genes in a model

    cobra_model : :class:`~cobra.core.Model.Model`
        cobra model in which to perform deletions

    gene_list1 : [:class:`~cobra.core.Gene.Gene`:] (or their id's)
        Genes to be deleted. These will be the rows in the result.
        If not provided, all reactions will be used.

    gene_list1 : [:class:`~cobra.core.Gene.Gene`:] (or their id's)
        Genes to be deleted. These will be the rows in the result.
        If not provided, reaction_list1 will be used.

    method: "fba" or "moma"
        Procedure used to predict the growth rate

    solver: str for solver name
        This must be a QP-capable solver for MOMA. If left unspecified,
        a suitable solver will be automatically chosen.

    zero_cutoff: float
        When checking to see if a value is 0, this threshold is used.

    number_of_processes: int for number of processes to use.
        If unspecified, the number of parallel processes to use will be
        automatically determined. Setting this to 1 explicitly disables used
        of the multiprocessing library.

    .. note:: multiprocessing is not supported with method=moma

    return_frame: bool
        If true, formats the results as a pandas.Dataframe. Otherwise
        returns a dict of the form:
        {"x": row_labels, "y": column_labels", "data": 2D matrix}
    """
    # handle arguments which need to be passed on
    if solver is None:
        solver = get_solver_name(qp=(method == "moma"))
        kwargs["solver"] = solver
    kwargs["zero_cutoff"] = zero_cutoff

    # generate other arguments

    # identifiers for genes
    if gene_list1 is None:
        gene_ids1 = cobra_model.genes.list_attr("id")
    else:
        gene_ids1 = [str(i) for i in gene_list1]
    if gene_list2 is None:
        gene_ids2 = gene_ids1
    else:
        gene_ids2 = [str(i) for i in gene_list2]

    # The gene_id_to_result dict will map each gene id to the index
    # in the result matrix.
    gene_id_to_result = generate_matrix_indexes(gene_ids1, gene_ids2)

    # Determine 0 flux reactions. If an optimal solution passes no flux
    # through the deleted reactions, then we know removing them will
    # not change the solution.
    wt_solution = solver_dict[solver].solve(cobra_model)
    if wt_solution.status == "optimal":
        kwargs["wt_growth_rate"] = wt_solution.f
        kwargs["no_flux_reaction_indexes"] = \
            {i for i, v in enumerate(wt_solution.x) if abs(v) < zero_cutoff}
    else:
        warn("wild-type solution status is '%s'" % wt_solution.status)

    if method == "fba":
        result = _double_gene_deletion_fba(cobra_model, gene_ids1, gene_ids2,
                                           gene_id_to_result, **kwargs)
    elif method == "moma":
        result = _double_gene_deletion_moma(cobra_model, gene_ids1, gene_ids2,
                                            gene_id_to_result, **kwargs)
    else:
        raise ValueError("Unknown deletion method '%s'" % method)

    # convert upper triangular matrix to full matrix
    full_result = _format_upper_triangular_matrix(
        [gene_id_to_result[id] for id in gene_ids1],  # row indexes
        [gene_id_to_result[id] for id in gene_ids2],  # col indexes,
        result)

    # format as a Dataframe if required
    return format_results_frame(gene_ids1, gene_ids2,
                                full_result, return_frame)


def _double_reaction_deletion_fba(cobra_model, reaction_indexes1,
                                  reaction_indexes2, reaction_to_result,
                                  solver, number_of_processes=None,
                                  zero_cutoff=1e-15, wt_growth_rate=None,
                                  no_flux_reaction_indexes=set(), **kwargs):
    """compute double reaction deletions using fba

    cobra_model: model

    reaction_indexes1, reaction_indexes2: reaction indexes (used as unique
        identifiers)

    reaction_to_result: maps each reaction identifier to the entry in
        the result matrix

    no_flux_reaction_indexes: set of indexes for reactions in the model
        which carry no flux in an optimal solution. For deletions only in
        this set, the result will beset to wt_growth_rate.

    returns an upper triangular square matrix
    """
    if solver is None:
        solver = get_solver_name()

    # generate the square result matrix
    n_results = len(reaction_to_result)
    results = numpy.empty((n_results, n_results))
    results.fill(numpy.nan)

    PoolClass = CobraDeletionMockPool if number_of_processes == 1 \
        else CobraDeletionPool  # explicitly disable multiprocessing

    with PoolClass(cobra_model, n_processes=number_of_processes,
                   solver=solver, **kwargs) as pool:

        # precompute all single deletions in the pool and store them along
        # the diagonal
        for reaction_index, result_index in iteritems(reaction_to_result):
            pool.submit((reaction_index, ), label=result_index)
        for result_index, value in pool.receive_all():
            # if singly lethal, set everything in row and column to 0
            value = value if abs(value) > zero_cutoff else 0.
            if value == 0.:
                results[result_index, :] = 0.
                results[:, result_index] = 0.
            else:  # only the diagonal needs to be set
                results[result_index, result_index] = value

        # Run double knockouts in the upper triangle
        index_selector = yield_upper_tria_indexes(
            reaction_indexes1, reaction_indexes2, reaction_to_result)
        for result_index, (r1_index, r2_index) in index_selector:
            # skip if the result was already computed to be lethal
            if results[result_index] == 0:
                continue
            # reactions removed carry no flux
            if r1_index in no_flux_reaction_indexes and \
                    r2_index in no_flux_reaction_indexes:
                results[result_index] = wt_growth_rate
                continue
            pool.submit((r1_index, r2_index), label=result_index)
        # get results
        for result in pool.receive_all():
            results[result[0]] = result[1]

    return results


def _double_gene_deletion_fba(cobra_model, gene_ids1, gene_ids2,
                              gene_id_to_result, solver,
                              number_of_processes=None, zero_cutoff=1e-12,
                              wt_growth_rate=None,
                              no_flux_reaction_indexes=set(), **kwargs):
    """compute double gene deletions using fba

    cobra_model: model

    gene_ids1, gene_ids2: lists of id's to be knocked out

    gene_id_to_result: maps each gene identifier to the entry in
        the result matrix

    no_flux_reaction_indexes: set of indexes for reactions in the model
        which carry no flux in an optimal solution. For deletions only in
        this set, the result will beset to wt_growth_rate.

    returns an upper triangular square matrix
    """
    # Because each gene reaction rule will be evaluated multiple times
    # the reaction has multiple associated genes being deleted, compiling
    # the gene reaction rules ahead of time increases efficiency greatly.
    compiled_rules = get_compiled_gene_reaction_rules(cobra_model)

    n_results = len(gene_id_to_result)
    results = numpy.empty((n_results, n_results))
    results.fill(numpy.nan)

    if number_of_processes == 1:  # explicitly disable multiprocessing
        PoolClass = CobraDeletionMockPool
    else:
        PoolClass = CobraDeletionPool
    with PoolClass(cobra_model, n_processes=number_of_processes,
                   solver=solver, **kwargs) as pool:
        # precompute all single deletions in the pool and store them along
        # the diagonal
        for gene_id, gene_result_index in iteritems(gene_id_to_result):
            ko_reactions = find_gene_knockout_reactions(
                cobra_model, (cobra_model.genes.get_by_id(gene_id),))
            ko_indexes = [cobra_model.reactions.index(i) for i in ko_reactions]
            pool.submit(ko_indexes, label=gene_result_index)
        for result_index, value in pool.receive_all():
            # if singly lethal, set everything in row and column to 0
            value = value if abs(value) > zero_cutoff else 0.
            if value == 0.:
                results[result_index, :] = 0.
                results[:, result_index] = 0.
            else:  # only the diagonal needs to be set
                results[result_index, result_index] = value

        # Run double knockouts in the upper triangle
        index_selector = yield_upper_tria_indexes(gene_ids1, gene_ids2,
                                                  gene_id_to_result)
        for result_index, (gene1, gene2) in index_selector:

            # if singly lethal the results have already been set
            if results[result_index] == 0:
                continue
            ko_reactions = find_gene_knockout_reactions(
                cobra_model, (gene1, gene2), compiled_rules)
            ko_indexes = [cobra_model.reactions.index(i)
                          for i in ko_reactions]
            # if all removed gene indexes carry no flux
            if len(set(ko_indexes) - no_flux_reaction_indexes) == 0:
                results[result_index] = wt_growth_rate
                continue
            pool.submit(ko_indexes, label=result_index)

        for result in pool.receive_all():
            value = result[1]
            if value < zero_cutoff:
                value = 0
            results[result[0]] = value

    return results


def _double_reaction_deletion_moma(cobra_model, reaction_indexes1,
                                   reaction_indexes2, reaction_to_result,
                                   solver, number_of_processes=1,
                                   zero_cutoff=1e-15, wt_growth_rate=None,
                                   no_flux_reaction_indexes=set(), **kwargs):
    """compute double reaction deletions using moma

    cobra_model: model

    reaction_indexes1, reaction_indexes2: reaction indexes (used as unique
        identifiers)

    reaction_to_result: maps each reaction identifier to the entry in
        the result matrix

    no_flux_reaction_indexes: set of indexes for reactions in the model
        which carry no flux in an optimal solution. For deletions only in
        this set, the result will beset to wt_growth_rate.

    number_of_processes: must be 1. Parallel MOMA not yet implmemented

    returns an upper triangular square matrix
    """
    if number_of_processes > 1:
        raise NotImplementedError("parallel MOMA not implemented")
    if moma is None:
        raise RuntimeError("scipy required for MOMA")

    # generate the square result matrix
    n_results = len(reaction_to_result)
    results = numpy.empty((n_results, n_results))
    results.fill(numpy.nan)

    # function to compute reaction knockouts with moma
    moma_model, moma_obj = moma.create_euclidian_moma_model(cobra_model)

    def run(indexes):
        # If all the reactions carry no flux, deletion will have no effect.
        if no_flux_reaction_indexes.issuperset(indexes):
            return wt_growth_rate
        return moma.moma_knockout(moma_model, moma_obj, indexes,
                                  solver=solver, **kwargs).f

    # precompute all single deletions and store them along the diagonal
    for reaction_index, result_index in iteritems(reaction_to_result):
        value = run((reaction_index,))
        value = value if abs(value) > zero_cutoff else 0.
        results[result_index, result_index] = value
        # if singly lethal, the entire row and column are set to 0
        if value == 0.:
            results[result_index, :] = 0.
            results[:, result_index] = 0.

    # Run double knockouts in the upper triangle
    index_selector = yield_upper_tria_indexes(
        reaction_indexes1, reaction_indexes2, reaction_to_result)
    for result_index, (r1_index, r2_index) in index_selector:
        # skip if the result was already computed to be lethal
        if results[result_index] == 0:
            continue
        else:
            results[result_index] = run((r1_index, r2_index))

    return results


def _double_gene_deletion_moma(cobra_model, gene_ids1, gene_ids2,
                               gene_id_to_result, solver,
                               number_of_processes=1,
                               zero_cutoff=1e-12, wt_growth_rate=None,
                               no_flux_reaction_indexes=set(), **kwargs):
    """compute double gene deletions using moma

    cobra_model: model

    gene_ids1, gene_ids2: lists of id's to be knocked out

    gene_id_to_result: maps each gene identifier to the entry in
        the result matrix

    number_of_processes: must be 1. Parallel MOMA not yet implemented

    no_flux_reaction_indexes: set of indexes for reactions in the model
        which carry no flux in an optimal solution. For deletions only in
        this set, the result will beset to wt_growth_rate.

    returns an upper triangular square matrix
    """
    if number_of_processes > 1:
        raise NotImplementedError("parallel MOMA not implemented")
    if moma is None:
        raise RuntimeError("scipy required for MOMA")

    # Because each gene reaction rule will be evaluated multiple times
    # the reaction has multiple associated genes being deleted, compiling
    # the gene reaction rules ahead of time increases efficiency greatly.
    compiled_rules = get_compiled_gene_reaction_rules(cobra_model)

    # function to compute reaction knockouts with moma
    moma_model, moma_obj = moma.create_euclidian_moma_model(cobra_model)

    def run(gene_ids):
        ko_reactions = find_gene_knockout_reactions(cobra_model, gene_ids)
        ko_indexes = map(cobra_model.reactions.index, ko_reactions)
        # If all the reactions carry no flux, deletion will have no effect.
        if no_flux_reaction_indexes.issuperset(gene_ids):
            return wt_growth_rate
        return moma.moma_knockout(moma_model, moma_obj, ko_indexes,
                                  solver=solver, **kwargs).f

    n_results = len(gene_id_to_result)
    results = numpy.empty((n_results, n_results))
    results.fill(numpy.nan)

    # precompute all single deletions and store them along the diagonal
    for gene_id, result_index in iteritems(gene_id_to_result):
        value = run((gene_id,))
        value = value if abs(value) > zero_cutoff else 0.
        results[result_index, result_index] = value
        # If singly lethal, the entire row and column are set to 0.
        if value == 0.:
            results[result_index, :] = 0.
            results[:, result_index] = 0.

    # Run double knockouts in the upper triangle
    index_selector = yield_upper_tria_indexes(gene_ids1, gene_ids2,
                                              gene_id_to_result)
    for result_index, (gene1, gene2) in index_selector:
        # if singly lethal the results have already been set
        if results[result_index] == 0:
            continue
        results[result_index] = run((gene1, gene2))

    return results