Fixed failing tests

ym2877 · ym2877 · commit ec5f260b002e · 2023-03-15T16:52:41.000-04:00
diff --git a/pymgpipe/nmpc.py b/pymgpipe/nmpc.py
@@ -9,6 +9,7 @@
 from .fva import regularFVA
 from .utils import load_dataframe, load_model, set_objective
 from .io import suppress_stdout
+from .vffva import veryFastFVA
 
 
 def compute_nmpcs(
@@ -27,7 +28,13 @@ def compute_nmpcs(
     force=False,
     threshold=1e-5,
     write_to_file=True,
+    fva_type="regular",
+    obj_optimality=100,
 ):
+    assert fva_type == "regular" or fva_type == "fast", (
+        "FVA type must be either `regular` or `fast`! Received %s" % fva_type
+    )
+
     start = time.time()
     out_dir = out_dir + "/" if out_dir[-1] != "/" else out_dir
     Path(out_dir).mkdir(exist_ok=True)
@@ -36,9 +43,15 @@ def compute_nmpcs(
     objective_out_file = out_dir + objective_out_file
     fluxes_out_file = out_dir + fluxes_out_file
 
-    nmpcs = pd.DataFrame() if force or not write_to_file else load_dataframe(out_file, return_empty=True)
+    nmpcs = (
+        pd.DataFrame()
+        if force or not write_to_file
+        else load_dataframe(out_file, return_empty=True)
+    )
     all_fluxes = (
-        pd.DataFrame() if force or not write_to_file else load_dataframe(fluxes_out_file, return_empty=True)
+        pd.DataFrame()
+        if force or not write_to_file
+        else load_dataframe(fluxes_out_file, return_empty=True)
     )
     obj_values = (
         pd.DataFrame()
@@ -51,6 +64,11 @@ def compute_nmpcs(
 
     try:
         models = [load_model(samples)]
+
+        # Skip models that already exist
+        if models[0].name in list(nmpcs.columns) and not force:
+            print("NMPCs for %s already exist in file!" % models[0].name)
+            return
     except Exception:
         models = (
             samples
@@ -60,12 +78,14 @@ def compute_nmpcs(
                 for m in os.listdir(os.path.dirname(samples))
             ]
         )
-    models = [
-        f
-        for f in models
-        if not isinstance(f, str)
-        or os.path.basename(f).split(".")[0] not in list(nmpcs.columns)
-    ]
+
+        # Skip models that already exist
+        models = [
+            f
+            for f in models
+            if not isinstance(f, str)
+            or os.path.basename(f).split(".")[0] not in list(nmpcs.columns)
+        ]
     threads = os.cpu_count() - 1 if threads == -1 else threads
 
     print("Computing NMPCs on %s models..." % len(models))
@@ -80,6 +100,11 @@ def compute_nmpcs(
     print("------------------------------------------")
 
     for s in tqdm.tqdm(models, total=len(models)):
+        if fva_type == "fast":
+            assert isinstance(
+                s, str
+            ), "For fast fva, `samples` param must be directory or list of model paths."
+            model_path = s
         with suppress_stdout():
             m = load_model(path=s, solver=solver)
             if not isinstance(m, optlang.interface.Model):
@@ -93,43 +118,53 @@ def compute_nmpcs(
         m.variables["communityBiomass"].set_bounds(0.4, 1)
         set_objective(m, m.variables["communityBiomass"], direction="max")
 
-        with suppress_stdout():
-            m.optimize()
-        if m.status == "infeasible":
-            logging.warning("%s model is infeasible!" % m.name)
-            continue
-
-        obj_val = round(m.objective.value, 5)
-        obj_values.loc[m.name] = obj_val
-        if "ObjectiveConstraint" in m.constraints:
-            m.remove(m.constraints["ObjectiveConstraint"])
-            m.update()
-        obj_const = m.interface.Constraint(
-            expression=m.objective.expression,
-            lb=obj_val,
-            ub=obj_val,
-            name="ObjectiveConstraint",
-        )
-        m.add(obj_const)
-        m.update()
-
         # Now perform FVA under constrained objective value
-        with suppress_stdout():
-            try:
+        try:
+            if fva_type == "regular":
+                with suppress_stdout():
+                    m.optimize()
+                if m.status == "infeasible":
+                    logging.warning("%s model is infeasible!" % m.name)
+                    continue
+
+                obj_val = round(m.objective.value, 5)
+                obj_values.loc[m.name] = obj_val
+                if "ObjectiveConstraint" in m.constraints:
+                    m.remove(m.constraints["ObjectiveConstraint"])
+                    m.update()
+                obj_const = m.interface.Constraint(
+                    expression=m.objective.expression,
+                    lb=obj_val * (obj_optimality / 100),
+                    ub=obj_val,
+                    name="ObjectiveConstraint",
+                )
+                m.add(obj_const)
+                m.update()
                 res = regularFVA(
                     m,
                     reactions=reactions,
                     regex=regex,
                     ex_only=ex_only,
                     solver=solver,
-                    threads=threads,
+                    threads=threads if parallel else 1,
                     parallel=parallel,
                     write_to_file=False,
-                    threshold=threshold
+                    threshold=threshold,
                 )
-            except Exception:
-                logging.warning("Cannot solve %s model!" % m.name)
-                continue
+            elif fva_type == "fast":
+                res = veryFastFVA(
+                    model=m,
+                    path=model_path,
+                    reactions=reactions,
+                    regex=regex,
+                    nCores=threads if parallel else 1,
+                    nThreads=1,
+                    optPerc=obj_optimality,
+                    threshold=threshold,
+                )
+        except Exception as e:
+            logging.warning(f"Cannot solve {m.name} model!\n{e}")
+            continue
         if res is None:
             return
         res["sample_id"] = m.name
@@ -158,9 +193,9 @@ def compute_nmpcs(
             all_fluxes.to_csv(fluxes_out_file)
 
     res = namedtuple("res", "nmpc objectives fluxes")
-    
+
     print("-------------------------------------------------------")
-    print('Finished computing NMPCs!')
-    print('Process took %s minutes to run...'%round((time.time()-start)/60,3))
+    print("Finished computing NMPCs!")
+    print("Process took %s minutes to run..." % round((time.time() - start) / 60, 3))
 
-    return res(nmpcs, obj_values, all_fluxes)
+    return res(nmpcs, obj_values, all_fluxes)
diff --git a/pymgpipe/tests/test_build.py b/pymgpipe/tests/test_build.py
@@ -30,7 +30,6 @@ def test_build_diet_fecal():
         taxonomy=sample_df,
         rel_threshold=1e-6,
         solver="gurobi",
-        coupling_constraints=True,
         diet_fecal_compartments=True,
     )
 
@@ -39,7 +38,6 @@ def test_build_diet_fecal():
         assert (
             "fe" in pymgpipe_model.compartments and "d" in pymgpipe_model.compartments
         )
-        assert len([k for k in pymgpipe_model.constraints if re.match(".*_cp$", k.name)]) > 0
 
     built_abundances = get_abundances(pymgpipe_model).to_dict()["A test model"]
     true_abundances = sample_df.set_index("strain")["abundance"].to_dict()
@@ -68,7 +66,6 @@ def test_build():
         taxonomy=sample_df,
         rel_threshold=1e-6,
         solver="gurobi",
-        coupling_constraints=True,
         diet_fecal_compartments=False,
     )
 
@@ -78,8 +75,6 @@ def test_build():
             "fe" not in pymgpipe_model.compartments
             and "d" not in pymgpipe_model.compartments
         )
-        assert len([k for k in pymgpipe_model.constraints if re.match(".*_cp$", k.name)]) > 0
-
 
     built_abundances = get_abundances(pymgpipe_model).to_dict()["A test model"]
     true_abundances = sample_df.set_index("strain")["abundance"].to_dict()
diff --git a/pymgpipe/tests/test_e2e.py b/pymgpipe/tests/test_e2e.py
@@ -4,20 +4,27 @@
 import numpy as np
 import cobra
 from pkg_resources import resource_filename
-from pymgpipe import add_coupling_constraints, compute_nmpcs, get_abundances, build_models
+from pymgpipe import (
+    add_coupling_constraints,
+    compute_nmpcs,
+    get_abundances,
+    build_models,
+    remove_reverse_vars,
+    load_dataframe,
+    get_reverse_id,
+)
 from pymgpipe.build import _build
 from pytest_check import check
 import re
 import tempfile
 
 
-
 def test_build_models():
-    samples = ['sample%s'%i for i in range(5)]
-    cov = pd.DataFrame(columns=samples,index=['TaxaA','TaxaB','TaxaC','TaxaD'])
+    samples = ["sample%s" % i for i in range(5)]
+    cov = pd.DataFrame(columns=samples, index=["TaxaA", "TaxaB", "TaxaC", "TaxaD"])
 
     for t in cov.columns:
-        cov[t]=np.random.dirichlet(np.ones(4),size=1)[0]
+        cov[t] = np.random.dirichlet(np.ones(4), size=1)[0]
 
     with tempfile.TemporaryDirectory() as tmpdirname:
         build_models(
@@ -30,18 +37,19 @@ def test_build_models():
             coupling_constraints=True,
             compute_metrics=True,
             compress=True,
-            write_lp=True
         )
-        models_out = os.listdir(tmpdirname+'/models/')
-        problems_out = os.listdir(tmpdirname+'/problems/')
+        models_out = os.listdir(tmpdirname + "/models/")
+        problems_out = os.listdir(tmpdirname + "/problems/")
+
+        assert len(models_out) == 5 and len(problems_out) == 5
 
-        assert len(models_out)==5 and len(problems_out) == 5
+        assert (
+            os.path.exists(tmpdirname + "/metabolic_diversity.png")
+            and os.path.exists(tmpdirname + "/reaction_abundance.csv")
+            and os.path.exists(tmpdirname + "/reaction_content.csv")
+            and os.path.exists(tmpdirname + "/sample_label_conversion.csv")
+        )
 
-        assert \
-            os.path.exists(tmpdirname+'/metabolic_diversity.png') and \
-            os.path.exists(tmpdirname+'/reaction_abundance.csv') and \
-            os.path.exists(tmpdirname+'/reaction_content.csv') and \
-            os.path.exists(tmpdirname+'/sample_label_conversion.csv')
 
 def test_full_diet_fecal_compartments():
     sample_data = [
@@ -62,12 +70,13 @@ def test_full_diet_fecal_compartments():
         taxonomy=sample_df,
         rel_threshold=1e-6,
         solver="gurobi",
-        coupling_constraints=False,
         diet_fecal_compartments=True,
     )
 
     add_coupling_constraints(pymgpipe_model)
-    assert len([k for k in pymgpipe_model.constraints if re.match(".*_cp$", k.name)]) > 0
+    assert (
+        len([k for k in pymgpipe_model.constraints if re.match(".*_cp$", k.name)]) > 0
+    )
 
     built_abundances = get_abundances(pymgpipe_model).to_dict()["A test model"]
     true_abundances = sample_df.set_index("strain")["abundance"].to_dict()
@@ -108,12 +117,13 @@ def test_full_single_compartment():
         taxonomy=sample_df,
         rel_threshold=1e-6,
         solver="gurobi",
-        coupling_constraints=False,
         diet_fecal_compartments=False,
     )
 
     add_coupling_constraints(pymgpipe_model)
-    assert len([k for k in pymgpipe_model.constraints if re.match(".*_cp$", k.name)]) > 0
+    assert (
+        len([k for k in pymgpipe_model.constraints if re.match(".*_cp$", k.name)]) > 0
+    )
 
     built_abundances = get_abundances(pymgpipe_model).to_dict()["A test model"]
     true_abundances = sample_df.set_index("strain")["abundance"].to_dict()
@@ -133,3 +143,59 @@ def test_full_single_compartment():
     os.remove("community_objectives.csv")
 
     assert len(nmpc_res.nmpc) == 20
+
+
+def test_remove_variables():
+    sample_data = [
+        ["mc1", 0.1, "TaxaA"],
+        ["mc1", 0.2, "TaxaB"],
+        ["mc1", 0.3, "TaxaC"],
+        ["mc1", 0.4, "TaxaD"],
+    ]
+
+    sample_df = pd.DataFrame(sample_data, columns=["sample_id", "abundance", "strain"])
+    sample_df["id"] = sample_df["strain"]
+    sample_df["file"] = (
+        resource_filename("pymgpipe", "resources/miniTaxa/") + sample_df.id + ".xml.gz"
+    )
+
+    pymgpipe_model = _build(
+        name="A test model",
+        taxonomy=sample_df,
+        rel_threshold=1e-6,
+        solver="gurobi",
+        diet_fecal_compartments=True,
+    )
+    some_var = pymgpipe_model.variables[100]
+    reverse_var_id = get_reverse_id(some_var.name)
+
+    res1 = compute_nmpcs(samples=pymgpipe_model, write_to_file=False, threads=-1).nmpc
+    remove_reverse_vars(pymgpipe_model, hard_remove=False)
+    
+    assert pymgpipe_model.variables[reverse_var_id].lb == 0 and pymgpipe_model.variables[reverse_var_id].ub == 0
+    
+    res2 = compute_nmpcs(samples=pymgpipe_model, write_to_file=False, threads=-1).nmpc
+    remove_reverse_vars(pymgpipe_model, hard_remove=True)
+    
+    assert reverse_var_id not in pymgpipe_model.variables
+    
+    res3 = compute_nmpcs(samples=pymgpipe_model, write_to_file=False, threads=-1).nmpc
+    
+    assert (
+        len(_compare(res1, res2)) == 0
+        and len(_compare(res1, res3)) == 0
+    )
+
+
+def _compare(first, second, threshold=1e-10):
+    first = load_dataframe(first)
+    second = load_dataframe(second)
+    bad = []
+    for i, row in first.iterrows():
+        second_row = second.loc[i].to_dict()
+        for x in row.to_dict():
+            first_x = row[x]
+            second_x = second_row[x]
+            if abs(first_x - second_x) > threshold:
+                bad.append((i, x, abs(first_x - second_x)))
+    return bad
diff --git a/pymgpipe/tests/test_utils.py b/pymgpipe/tests/test_utils.py
@@ -6,7 +6,11 @@ def test_remove_reverse_reactions(mini_optlang_model):
     reverse_var_id = get_reverse_id(some_var.name)
     assert reverse_var_id in mini_optlang_model.variables
 
-    remove_reverse_vars(mini_optlang_model)
-    new_reactions = len(mini_optlang_model.variables)
+    remove_reverse_vars(mini_optlang_model,hard_remove=False)
+    assert len(mini_optlang_model.variables) == num_reactions and \
+        mini_optlang_model.variables[reverse_var_id].lb == 0 and mini_optlang_model.variables[reverse_var_id].ub == 0
+
+    remove_reverse_vars(mini_optlang_model,hard_remove=True)
+    assert len(mini_optlang_model.variables) == num_reactions/2 and reverse_var_id not in mini_optlang_model.variables
+
 
-    assert new_reactions == num_reactions/2 and reverse_var_id not in mini_optlang_model.variables
diff --git a/pymgpipe/utils.py b/pymgpipe/utils.py
@@ -68,7 +68,8 @@ def _get_fluxes_from_model(model, reactions=None, regex=None, threshold=1e-5):
             reverse = model.variables[r_id]
             flux = float(forward.primal - reverse.primal)
         flux = 0 if flux == -0.0 else flux
-        flux = flux if abs(flux) > threshold else 0
+        if threshold is not None:
+            flux = flux if abs(flux) > threshold else 0
         fluxes[forward.name] = flux
     return fluxes
 
diff --git a/pymgpipe/vffva.py b/pymgpipe/vffva.py