Skip to content

Commit

Permalink
Merge branch 'main' into ancova_updated
Browse files Browse the repository at this point in the history
  • Loading branch information
enryH authored Nov 28, 2024
2 parents 8b9adb6 + b9b035f commit 48a37df
Show file tree
Hide file tree
Showing 13 changed files with 299 additions and 35 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -104,3 +104,6 @@ ENV/
# IDE settings
.vscode/
.idea/

# Mac
.DS_Store
3 changes: 3 additions & 0 deletions acore/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
from importlib.metadata import version

import dsp_pandas # sets up pandas formatting options

__all__ = ["dsp_pandas"]
__version__ = version("acore")
10 changes: 10 additions & 0 deletions acore/correlation_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,16 @@
from acore.multiple_testing import apply_pvalue_correction


def corr_lower_triangle(df: pd.DataFrame, **kwargs) -> pd.DataFrame:
"""Compute the correlation matrix, returning only unique values (lower triangle).
Passes kwargs to pandas.DataFrame.corr method.
"""
corr_df = df.corr(**kwargs)
lower_triangle = pd.DataFrame(np.tril(np.ones(corr_df.shape), -1)).astype(bool)
lower_triangle.index, lower_triangle.columns = corr_df.index, corr_df.columns
return corr_df.where(lower_triangle)


def calculate_correlations(x, y, method="pearson"):
"""
Calculates a Spearman (nonparametric) or a Pearson (parametric) correlation coefficient and p-value to test for non-correlation.
Expand Down
17 changes: 1 addition & 16 deletions acore/sklearn/pca.py → acore/decomposition/pca.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,8 @@
from typing import Optional

import matplotlib
import pandas as pd
import sklearn.decomposition


# ! also a version in exploratory analysis
def run_pca(
df_wide: pd.DataFrame, n_components: int = 2
) -> tuple[pd.DataFrame, sklearn.decomposition.PCA]:
Expand Down Expand Up @@ -35,16 +33,3 @@ def run_pca(
]
PCs = pd.DataFrame(PCs, index=df_wide.index, columns=cols)
return PCs, pca


# ! move to aviz
def plot_explained_variance(
pca: sklearn.decomposition.PCA, ax: Optional[matplotlib.axes.Axes] = None
) -> matplotlib.axes.Axes:
"""Plot explained variance of PCA from scikit-learn."""
exp_var = pd.Series(pca.explained_variance_ratio_).to_frame("explained variance")
exp_var.index += 1 # start at 1
exp_var["explained variance (cummulated)"] = exp_var["explained variance"].cumsum()
exp_var.index.name = "PC"
ax = exp_var.plot(ax=ax)
return ax
10 changes: 5 additions & 5 deletions acore/differential_regulation.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,16 +37,16 @@ def calc_means_between_groups(


def calc_ttest(
df: pd.DataFrame, boolean_array: pd.Series, vars: list[str]
df: pd.DataFrame, boolean_array: pd.Series, variables: list[str]
) -> pd.DataFrame:
"""Calculate t-test for each variable in `vars` between two groups defined
"""Calculate t-test for each variable in `variables` between two groups defined
by boolean array."""
ret = []
for var in vars:
for var in variables:
_ = pg.ttest(df.loc[boolean_array, var], df.loc[~boolean_array, var])
ret.append(_)
ret = pd.concat(ret)
ret = ret.set_index(vars)
ret = ret.set_index(variables)
ret.columns.name = "ttest"
ret.columns = pd.MultiIndex.from_product(
[["ttest"], ret.columns], names=("test", "var")
Expand All @@ -65,7 +65,7 @@ def run_diff_analysis(
ret = calc_means_between_groups(
df, boolean_array=boolean_array, event_names=event_names
)
ttests = calc_ttest(df, boolean_array=boolean_array, vars=ret.index)
ttests = calc_ttest(df, boolean_array=boolean_array, variables=ret.index)
ret = ret.join(ttests.loc[:, pd.IndexSlice[:, ttest_vars]])
return ret

Expand Down
206 changes: 206 additions & 0 deletions acore/plotting/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,206 @@
"""
This module contains functions to plot data. It will be moved to a separate
visualization package.
"""

import logging
import pathlib
from typing import Iterable

import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

plt.rcParams["figure.figsize"] = [4.0, 3.0]
plt.rcParams["pdf.fonttype"] = 42
plt.rcParams["ps.fonttype"] = 42

plt.rcParams["figure.dpi"] = 147

figsize_a4 = (8.3, 11.7)

logger = logging.getLogger(__name__)


def savefig(
fig: matplotlib.figure.Figure,
name: str,
folder: pathlib.Path = ".",
pdf=True,
tight_layout=True,
dpi=300,
):
"""Save matplotlib Figure (having method `savefig`) as pdf and png."""
folder = pathlib.Path(folder)
fname = folder / name
folder = fname.parent # in case name specifies folders
folder.mkdir(exist_ok=True, parents=True)
if not fig.get_constrained_layout() and tight_layout:
fig.tight_layout()
fig.savefig(fname.with_suffix(".png"), bbox_inches="tight", dpi=dpi)
if pdf:
fig.savefig(fname.with_suffix(".pdf"), bbox_inches="tight", dpi=dpi)
logger.info(f"Saved Figures to {fname}")


def select_xticks(ax: matplotlib.axes.Axes, max_ticks: int = 50) -> list:
"""Limit the number of xticks displayed.
Parameters
----------
ax : matplotlib.axes.Axes
Axes object to manipulate
max_ticks : int, optional
maximum number of set ticks on x-axis, by default 50
Returns
-------
list
list of current ticks for x-axis. Either new
or old (depending if something was changed).
"""
x_ticks = ax.get_xticks()
offset = len(x_ticks) // max_ticks
if offset > 1: # if larger than 1
return ax.set_xticks(x_ticks[::offset])
return x_ticks


def select_dates(date_series: pd.Series, max_ticks=30) -> np.array:
"""Get unique dates (single days) for selection in pd.plot.line
with xticks argument.
Parameters
----------
date_series : pd.Series
datetime series to use (values, not index)
max_ticks : int, optional
maximum number of unique ticks to select, by default 30
Returns
-------
np.array
array of selected dates
"""
xticks = date_series.dt.date.unique()
offset = len(xticks) // max_ticks
if offset > 1:
return xticks[::offset]
else:
xticks


def make_large_descriptors(size="xx-large"):
"""Helper function to have very large titles, labes and tick texts for
matplotlib plots per default.
size: str
fontsize or allowed category. Change default if necessary, default 'xx-large'
"""
plt.rcParams.update(
{
k: size
for k in [
"xtick.labelsize",
"ytick.labelsize",
"axes.titlesize",
"axes.labelsize",
"legend.fontsize",
"legend.title_fontsize",
]
}
)


set_font_sizes = make_large_descriptors


def add_prop_as_second_yaxis(
ax: matplotlib.axes.Axes, n_samples: int, format_str: str = "{x:,.3f}"
) -> matplotlib.axes.Axes:
"""Add proportion as second axis. Try to align cleverly
Parameters
----------
ax : matplotlib.axes.Axes
Axes for which you want to add a second y-axis
n_samples : int
Number of total samples (to normalize against)
Returns
-------
matplotlib.axes.Axes
Second layover twin Axes with right-hand side y-axis
"""
ax2 = ax.twinx()
n_min, n_max = np.round(ax.get_ybound())
logger.info(f"{n_min = }, {n_max = }")
lower_prop = n_min / n_samples + (ax.get_ybound()[0] - n_min) / n_samples
upper_prop = n_max / n_samples + (ax.get_ybound()[1] - n_max) / n_samples
logger.info(f"{lower_prop = }, {upper_prop = }")
ax2.set_ybound(lower_prop, upper_prop)
# _ = ax2.set_yticks(np.linspace(n_min/n_samples,
# n_max /n_samples, len(ax.get_yticks())-2))
_ = ax2.set_yticks(ax.get_yticks()[1:-1] / n_samples)
ax2.yaxis.set_major_formatter(matplotlib.ticker.StrMethodFormatter(format_str))
return ax2


def add_height_to_barplot(
ax: matplotlib.axes.Axes, size: int = 15
) -> matplotlib.axes.Axes:
"""Add height of bar to each bar in a barplot."""
for bar in ax.patches:
ax.annotate(
text=format(bar.get_height(), ".2f"),
xy=(bar.get_x() + bar.get_width() / 2, bar.get_height()),
xytext=(0, 7),
ha="center",
va="center",
size=size,
textcoords="offset points",
)
return ax


def add_text_to_barplot(
ax: matplotlib.axes.Axes, text: Iterable[str], size=15
) -> matplotlib.axes.Axes:
"""Add custom text from Iterable to each bar in a barplot."""
for bar, text_bar in zip(ax.patches, text):
msg = f"{bar = }, {text = }, {bar.get_height() = }"
logger.debug(msg)
ax.annotate(
text=text_bar,
xy=(bar.get_x() + bar.get_width() / 2, bar.get_height()),
xytext=(0, -5),
rotation=90,
ha="center",
va="top",
size=size,
textcoords="offset points",
)
return ax


def format_large_numbers(
ax: matplotlib.axes.Axes, format_str: str = "{x:,.0f}"
) -> matplotlib.axes.Axes:
"""Format large integer numbers to be read more easily.
Parameters
----------
ax : matplotlib.axes.Axes
Axes which labels should be manipulated.
format_str : str, optional
Default float format string, by default '{x:,.0f}'
Returns
-------
matplotlib.axes.Axes
Return reference to modified input Axes object.
"""
ax.xaxis.set_major_formatter(matplotlib.ticker.StrMethodFormatter(format_str))
ax.yaxis.set_major_formatter(matplotlib.ticker.StrMethodFormatter(format_str))
return ax
19 changes: 19 additions & 0 deletions acore/plotting/decomposition.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
"""Decompositon plots like pca, umap, tsne, etc."""

from typing import Optional

import matplotlib
import pandas as pd
import sklearn.decomposition


def plot_explained_variance(
pca: sklearn.decomposition.PCA, ax: Optional[matplotlib.axes.Axes] = None
) -> matplotlib.axes.Axes:
"""Plot explained variance of PCA from scikit-learn."""
exp_var = pd.Series(pca.explained_variance_ratio_).to_frame("explained variance")
exp_var.index += 1 # start at 1
exp_var["explained variance (cummulated)"] = exp_var["explained variance"].cumsum()
exp_var.index.name = "PC"
ax = exp_var.plot(ax=ax)
return ax
17 changes: 16 additions & 1 deletion docs/api_examples/exploratory_analysis.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,20 @@
"# Exploratory Analysis"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "2958fb55",
"metadata": {
"tags": [
"hide-output"
]
},
"outputs": [],
"source": [
"%pip install acore"
]
},
{
"cell_type": "code",
"execution_count": null,
Expand All @@ -16,6 +30,7 @@
"outputs": [],
"source": [
"import pandas as pd\n",
"\n",
"import acore.exploratory_analysis as ea\n",
"\n",
"data = pd.DataFrame(\n",
Expand Down Expand Up @@ -129,7 +144,7 @@
"metadata": {},
"outputs": [],
"source": [
"result['umap']"
"result[\"umap\"]"
]
},
{
Expand Down
6 changes: 5 additions & 1 deletion docs/api_examples/exploratory_analysis.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
# %% [markdown]
# # Exploratory Analysis

# %%
# %pip install acore

# %%
import pandas as pd

import acore.exploratory_analysis as ea

data = pd.DataFrame(
Expand Down Expand Up @@ -52,7 +56,7 @@
)

# %%
result['umap']
result["umap"]

# %%
annotation
Expand Down
Loading

0 comments on commit 48a37df

Please sign in to comment.