oiv_ld_phenotyping / src /leaf_patch_gen_diff.py
treizh's picture
Upload folder using huggingface_hub (#5)
edacdfe verified
raw
history blame contribute delete
No virus
7.09 kB
import numpy as np
import pandas as pd
import scipy.stats as stats
import statsmodels.api as sm
from statsmodels.formula.api import ols
from statsmodels.regression.linear_model import RegressionResultsWrapper
from statsmodels.stats.multicomp import pairwise_tukeyhsd
from matplotlib.figure import Figure
import seaborn as sns
import panel as pn
import com_const as cc
import com_func as cf
import com_image as ci
stars = [-np.log(0.05), -np.log(0.01), -np.log(0.001), -np.log(0.0001)]
def plot_single_progression(
ax,
df,
target,
title: str,
hue="gen",
style="gen",
show_legend: bool = False,
):
lp = sns.lineplot(
df.sort_values(hue),
x="dpi",
y=target,
hue=hue,
markers=True,
style=style,
dashes=False,
palette="tab10",
markersize=12,
ax=ax,
)
lp.set_yticklabels(["", "3", "", "5", "", "7", "", "9"])
ax.set_title(title)
if show_legend is True:
sns.move_legend(ax, "upper left", bbox_to_anchor=(1, 1))
else:
ax.get_legend().set_visible(False)
def get_model(
df: pd.DataFrame, target: str, formula: str, dpi: int = None
) -> RegressionResultsWrapper:
df_ = df[df.dpi == dpi] if dpi is not None else df
return ols(f"{target} {formula}", data=df_).fit()
def anova_table(aov, add_columns: bool = True):
"""
The function below was created specifically for the one-way ANOVA table
results returned for Type II sum of squares
"""
if add_columns is True:
aov["mean_sq"] = aov[:]["sum_sq"] / aov[:]["df"]
aov["eta_sq"] = aov[:-1]["sum_sq"] / sum(aov["sum_sq"])
aov["omega_sq"] = (
aov[:-1]["sum_sq"] - (aov[:-1]["df"] * aov["mean_sq"][-1])
) / (sum(aov["sum_sq"]) + aov["mean_sq"][-1])
cols = ["sum_sq", "df", "mean_sq", "F", "PR(>F)", "eta_sq", "omega_sq"]
aov = aov[cols]
return aov
def plot_assumptions(models: list, titles: list, figsize=(12, 4)):
fig = Figure(figsize=figsize)
fig.suptitle("Probability plot of model residual's", fontsize="x-large")
axii = fig.subplots(1, len(models))
for ax, model, title in zip(axii, models, titles):
_ = stats.probplot(model.resid, plot=ax, rvalue=True)
ax.set_title(title)
return fig
def hghlight_rejection(s):
df = pd.DataFrame(columns=s.columns, index=s.index)
df.loc[s["reject_pred"].ne(s["reject_obs"]), ["group1", "group2"]] = (
"background: red"
)
df.loc[s["reject_pred"].eq(s["reject_obs"]), ["group1", "group2"]] = (
"background: green"
)
df.loc[s.reject_pred, ["reject_pred"]] = "background: green"
df.loc[~s.reject_pred, ["reject_pred"]] = "background: red"
df.loc[s.reject_obs, ["reject_obs"]] = "background: green"
df.loc[~s.reject_obs, ["reject_obs"]] = "background: red"
return df
def get_tuckey_df(endog, groups, df_genotypes) -> pd.DataFrame:
tukey = pairwise_tukeyhsd(endog=endog, groups=groups)
df_tuc = pd.DataFrame(tukey._results_table)
df_tuc.columns = [str(c) for c in df_tuc.iloc[0]]
ret = (
df_tuc.drop(df_tuc.index[0])
.assign(group1=lambda s: s.group1.astype(str))
.assign(group2=lambda s: s.group2.astype(str))
.assign(reject=lambda s: s.reject.astype(str) == "True")
)
ret["p-adj"] = tukey.pvalues
if df_genotypes is None:
return ret
else:
return (
ret.merge(right=df_genotypes, how="left", left_on="group1", right_on="gen")
.drop(["gen"], axis=1)
.rename(columns={"rpvloci": "group1_rpvloci"})
.merge(right=df_genotypes, how="left", left_on="group2", right_on="gen")
.drop(["gen"], axis=1)
.rename(columns={"rpvloci": "group2_rpvloci"})
)
def get_tuckey_compare(df, df_genotypes=None, groups: str = "gen"):
merge_on = (
["group1", "group2"]
if df_genotypes is None
else ["group1", "group2", "group1_rpvloci", "group2_rpvloci"]
)
df_poiv = get_tuckey_df(df.p_oiv, df[groups], df_genotypes=df_genotypes)
df_oiv = get_tuckey_df(df.oiv, df[groups], df_genotypes=df_genotypes)
df = pd.merge(left=df_poiv, right=df_oiv, on=merge_on, suffixes=["_pred", "_obs"])
return df
def df_tukey_cmp_plot(df, groups):
df_tukey = (
get_tuckey_compare(df=df, groups=groups, df_genotypes=None)
.assign(pair_groups=lambda s: s.group1 + "\n" + s.group2)
.sort_values("p-adj_obs")
)
df_tukey_reject = df_tukey[df_tukey.reject_obs & df_tukey.reject_pred]
df_tukey_accept = df_tukey[~df_tukey.reject_obs & ~df_tukey.reject_pred]
df_tukey_diverge = df_tukey[df_tukey.reject_obs != df_tukey.reject_pred]
fig = Figure(figsize=(20, 6))
ax_reject, ax_diverge, ax_accept = fig.subplots(
1,
3,
gridspec_kw={
"width_ratios": [
len(df_tukey_reject),
len(df_tukey_diverge),
len(df_tukey_accept),
]
},
sharey=True,
)
for ax in [ax_reject, ax_accept, ax_diverge]:
ax.set_yticks(ticks=stars, labels=["*", "**", "***", "****"])
ax.grid(False)
ax_reject.set_title("Rejected")
ax_diverge.set_title("Conflict")
ax_accept.set_title("Accepted")
for ax, df in zip(
[ax_reject, ax_accept, ax_diverge],
[df_tukey_reject, df_tukey_accept, df_tukey_diverge],
):
for star in stars:
ax.axhline(y=star, linestyle="-", color="black", alpha=0.5)
ax.bar(
x=df["pair_groups"],
height=-np.log(df["p-adj_pred"]),
width=-0.4,
align="edge",
color="green",
label="predictions",
)
ax.bar(
x=df["pair_groups"],
height=-np.log(df["p-adj_obs"]),
width=0.4,
align="edge",
color="blue",
label="scorings",
)
ax.margins(0.01)
ax_accept.legend(loc="upper left", bbox_to_anchor=[0, 1], ncols=1, fancybox=True)
ax_reject.set_ylabel("-log(p value)")
ax_reject.tick_params(axis="y", which="major", labelsize=16)
fig.subplots_adjust(wspace=0.05, hspace=0.05)
return fig
def plot_patches(df, diff_only: bool = True):
if diff_only is True:
df = df[(df.oiv != df.p_oiv)]
df = df.assign(diff=lambda s: s.oiv != s.p_oiv).sort_values(
["diff", "oiv", "p_oiv"]
)
return pn.GridBox(
*[
pn.Column(
pn.pane.Markdown(f"### {row.file_name}|{row.oiv}->p{row.p_oiv}"),
pn.pane.Image(
object=ci.enhance_pil_image(
image=ci.load_image(
file_name=row.file_name,
path_to_images=cc.path_to_leaf_patches,
),
brightness=1.5,
)
),
)
for _, row in df.iterrows()
],
ncols=len(df),
)