|
14 | 14 | from pandas.testing import assert_frame_equal
|
15 | 15 | from scanpy import settings as s
|
16 | 16 | from scanpy.datasets import blobs
|
| 17 | +from scipy.sparse import csc_matrix |
17 | 18 |
|
18 | 19 | from squidpy._constants._pkg_constants import Key
|
19 | 20 | from squidpy.gr import ligrec
|
@@ -461,3 +462,104 @@ def test_none_source_target(self, adata: AnnData):
|
461 | 462 | )
|
462 | 463 | assert isinstance(pt.interactions, pd.DataFrame)
|
463 | 464 | assert len(pt.interactions) == 1
|
| 465 | + |
| 466 | + def test_ligrec_nan_counts(self): |
| 467 | + """ |
| 468 | + For the test case with 2 clusters (A, B) and 3 gene pairs (Gene1→Gene2, Gene2→Gene3, Gene3→Gene1): |
| 469 | +
|
| 470 | + The mask is computed for each gene in each cluster as: |
| 471 | + mask[gene, cluster] = (number of cells with value > 0) / (total cells in cluster) >= threshold |
| 472 | +
|
| 473 | + Number of cells with value > 0 in each cluster: |
| 474 | + Cluster A: [1, 3, 0] |
| 475 | + Cluster B: [1, 0, 3] |
| 476 | +
|
| 477 | + Number of cells with value > 0 in each cluster divided by total number of cells in the cluster: |
| 478 | + Cluster A: [1/3, 3/3, 0/3] = [0.33, 1.0, 0.0] |
| 479 | + Cluster B: [1/3, 0/3, 3/3] = [0.33, 0.0, 1.0] |
| 480 | +
|
| 481 | + Using threshold=0.8 on this data, the mask is: |
| 482 | + Cluster A: [False, True, False] |
| 483 | + Cluster B: [False, False, True] |
| 484 | +
|
| 485 | + A value in the result becomes NaN if either: |
| 486 | + - The ligand's mask is False in the source cluster, OR |
| 487 | + - The receptor's mask is False in the target cluster |
| 488 | +
|
| 489 | + Only in one combination, the mask is both True in the source and target cluster. |
| 490 | + This is the case for Gene2→Gene3 in A→B. |
| 491 | +
|
| 492 | + This means from all the possible cluster pairs (A→A, A→B, B→A, B→B) and gene pairs (Gene1→Gene2, Gene2→Gene3, Gene3→Gene1), |
| 493 | + (4 cluster pairs * 3 gene pairs = 12 combinations) only one combination is non-NaN. |
| 494 | +
|
| 495 | + Therefore, the total number of NaNs is 11. |
| 496 | +
|
| 497 | + The expected p-values are: |
| 498 | + cluster_1 A B |
| 499 | + cluster_2 A B A B |
| 500 | + source target |
| 501 | + GENE1 GENE2 NaN NaN NaN NaN |
| 502 | + GENE2 GENE3 NaN 0.0 NaN NaN |
| 503 | + GENE3 GENE1 NaN NaN NaN NaN |
| 504 | +
|
| 505 | + """ |
| 506 | + # only Gene2→Gene3 is non-NaN |
| 507 | + # |
| 508 | + |
| 509 | + expected_pvalues = np.array( |
| 510 | + [ |
| 511 | + [ |
| 512 | + np.nan, |
| 513 | + np.nan, |
| 514 | + np.nan, |
| 515 | + np.nan, |
| 516 | + ], |
| 517 | + [ |
| 518 | + np.nan, |
| 519 | + 0.0, |
| 520 | + np.nan, |
| 521 | + np.nan, |
| 522 | + ], |
| 523 | + [ |
| 524 | + np.nan, |
| 525 | + np.nan, |
| 526 | + np.nan, |
| 527 | + np.nan, |
| 528 | + ], |
| 529 | + ] |
| 530 | + ) |
| 531 | + |
| 532 | + expected_nans = 11 |
| 533 | + # Setup test data |
| 534 | + threshold = 0.8 |
| 535 | + interactions = pd.DataFrame({"source": ["Gene1", "Gene2", "Gene3"], "target": ["Gene2", "Gene3", "Gene1"]}) |
| 536 | + |
| 537 | + # Create sparse matrix with test data |
| 538 | + X = csc_matrix( |
| 539 | + [ |
| 540 | + [1.0, 0.1, 0.0], # A1 |
| 541 | + [0.0, 1.0, 0.0], # A2 |
| 542 | + [0.0, 1.0, 0.0], # A3 |
| 543 | + [0.1, 0.0, 1.0], # B1 |
| 544 | + [0.0, 0.0, 1.0], # B2 |
| 545 | + [0.0, 0.0, 1.0], # B3 |
| 546 | + ] |
| 547 | + ) |
| 548 | + |
| 549 | + # Create AnnData object |
| 550 | + adata = AnnData( |
| 551 | + X=X, |
| 552 | + obs=pd.DataFrame({"cluster": ["A"] * 3 + ["B"] * 3}, index=[f"cell{i}" for i in range(1, 7)]), |
| 553 | + var=pd.DataFrame(index=["Gene1", "Gene2", "Gene3"]), |
| 554 | + ) |
| 555 | + adata.obs["cluster"] = adata.obs["cluster"].astype("category") |
| 556 | + |
| 557 | + # Run ligrec and compare NaN counts |
| 558 | + res = ligrec( |
| 559 | + adata, cluster_key="cluster", interactions=interactions, threshold=threshold, use_raw=False, copy=True |
| 560 | + ) |
| 561 | + |
| 562 | + actual_nans = np.sum(np.isnan(res["pvalues"].values)) |
| 563 | + |
| 564 | + assert actual_nans == expected_nans, f"NaN count mismatch: expected {expected_nans}, got {actual_nans}" |
| 565 | + np.testing.assert_array_equal(res["pvalues"].values, expected_pvalues) |
0 commit comments