import numpy as np import pandas as pd import scipy.stats as stats import statsmodels.api as sm from statsmodels.formula.api import ols from statsmodels.regression.linear_model import RegressionResultsWrapper from statsmodels.stats.multicomp import pairwise_tukeyhsd from matplotlib.figure import Figure import seaborn as sns import panel as pn import com_const as cc import com_func as cf import com_image as ci stars = [-np.log(0.05), -np.log(0.01), -np.log(0.001), -np.log(0.0001)] def plot_single_progression( ax, df, target, title: str, hue="gen", style="gen", show_legend: bool = False, ): lp = sns.lineplot( df.sort_values(hue), x="dpi", y=target, hue=hue, markers=True, style=style, dashes=False, palette="tab10", markersize=12, ax=ax, ) lp.set_yticklabels(["", "3", "", "5", "", "7", "", "9"]) ax.set_title(title) if show_legend is True: sns.move_legend(ax, "upper left", bbox_to_anchor=(1, 1)) else: ax.get_legend().set_visible(False) def get_model( df: pd.DataFrame, target: str, formula: str, dpi: int = None ) -> RegressionResultsWrapper: df_ = df[df.dpi == dpi] if dpi is not None else df return ols(f"{target} {formula}", data=df_).fit() def anova_table(aov, add_columns: bool = True): """ The function below was created specifically for the one-way ANOVA table results returned for Type II sum of squares """ if add_columns is True: aov["mean_sq"] = aov[:]["sum_sq"] / aov[:]["df"] aov["eta_sq"] = aov[:-1]["sum_sq"] / sum(aov["sum_sq"]) aov["omega_sq"] = ( aov[:-1]["sum_sq"] - (aov[:-1]["df"] * aov["mean_sq"][-1]) ) / (sum(aov["sum_sq"]) + aov["mean_sq"][-1]) cols = ["sum_sq", "df", "mean_sq", "F", "PR(>F)", "eta_sq", "omega_sq"] aov = aov[cols] return aov def plot_assumptions(models: list, titles: list, figsize=(12, 4)): fig = Figure(figsize=figsize) fig.suptitle("Probability plot of model residual's", fontsize="x-large") axii = fig.subplots(1, len(models)) for ax, model, title in zip(axii, models, titles): _ = stats.probplot(model.resid, plot=ax, rvalue=True) ax.set_title(title) return fig def hghlight_rejection(s): df = pd.DataFrame(columns=s.columns, index=s.index) df.loc[s["reject_pred"].ne(s["reject_obs"]), ["group1", "group2"]] = ( "background: red" ) df.loc[s["reject_pred"].eq(s["reject_obs"]), ["group1", "group2"]] = ( "background: green" ) df.loc[s.reject_pred, ["reject_pred"]] = "background: green" df.loc[~s.reject_pred, ["reject_pred"]] = "background: red" df.loc[s.reject_obs, ["reject_obs"]] = "background: green" df.loc[~s.reject_obs, ["reject_obs"]] = "background: red" return df def get_tuckey_df(endog, groups, df_genotypes) -> pd.DataFrame: tukey = pairwise_tukeyhsd(endog=endog, groups=groups) df_tuc = pd.DataFrame(tukey._results_table) df_tuc.columns = [str(c) for c in df_tuc.iloc[0]] ret = ( df_tuc.drop(df_tuc.index[0]) .assign(group1=lambda s: s.group1.astype(str)) .assign(group2=lambda s: s.group2.astype(str)) .assign(reject=lambda s: s.reject.astype(str) == "True") ) ret["p-adj"] = tukey.pvalues if df_genotypes is None: return ret else: return ( ret.merge(right=df_genotypes, how="left", left_on="group1", right_on="gen") .drop(["gen"], axis=1) .rename(columns={"rpvloci": "group1_rpvloci"}) .merge(right=df_genotypes, how="left", left_on="group2", right_on="gen") .drop(["gen"], axis=1) .rename(columns={"rpvloci": "group2_rpvloci"}) ) def get_tuckey_compare(df, df_genotypes=None, groups: str = "gen"): merge_on = ( ["group1", "group2"] if df_genotypes is None else ["group1", "group2", "group1_rpvloci", "group2_rpvloci"] ) df_poiv = get_tuckey_df(df.p_oiv, df[groups], df_genotypes=df_genotypes) df_oiv = get_tuckey_df(df.oiv, df[groups], df_genotypes=df_genotypes) df = pd.merge(left=df_poiv, right=df_oiv, on=merge_on, suffixes=["_pred", "_obs"]) return df def df_tukey_cmp_plot(df, groups): df_tukey = ( get_tuckey_compare(df=df, groups=groups, df_genotypes=None) .assign(pair_groups=lambda s: s.group1 + "\n" + s.group2) .sort_values("p-adj_obs") ) df_tukey_reject = df_tukey[df_tukey.reject_obs & df_tukey.reject_pred] df_tukey_accept = df_tukey[~df_tukey.reject_obs & ~df_tukey.reject_pred] df_tukey_diverge = df_tukey[df_tukey.reject_obs != df_tukey.reject_pred] fig = Figure(figsize=(20, 6)) ax_reject, ax_diverge, ax_accept = fig.subplots( 1, 3, gridspec_kw={ "width_ratios": [ len(df_tukey_reject), len(df_tukey_diverge), len(df_tukey_accept), ] }, sharey=True, ) for ax in [ax_reject, ax_accept, ax_diverge]: ax.set_yticks(ticks=stars, labels=["*", "**", "***", "****"]) ax.grid(False) ax_reject.set_title("Rejected") ax_diverge.set_title("Conflict") ax_accept.set_title("Accepted") for ax, df in zip( [ax_reject, ax_accept, ax_diverge], [df_tukey_reject, df_tukey_accept, df_tukey_diverge], ): for star in stars: ax.axhline(y=star, linestyle="-", color="black", alpha=0.5) ax.bar( x=df["pair_groups"], height=-np.log(df["p-adj_pred"]), width=-0.4, align="edge", color="green", label="predictions", ) ax.bar( x=df["pair_groups"], height=-np.log(df["p-adj_obs"]), width=0.4, align="edge", color="blue", label="scorings", ) ax.margins(0.01) ax_accept.legend(loc="upper left", bbox_to_anchor=[0, 1], ncols=1, fancybox=True) ax_reject.set_ylabel("-log(p value)") ax_reject.tick_params(axis="y", which="major", labelsize=16) fig.subplots_adjust(wspace=0.05, hspace=0.05) return fig def plot_patches(df, diff_only: bool = True): if diff_only is True: df = df[(df.oiv != df.p_oiv)] df = df.assign(diff=lambda s: s.oiv != s.p_oiv).sort_values( ["diff", "oiv", "p_oiv"] ) return pn.GridBox( *[ pn.Column( pn.pane.Markdown(f"### {row.file_name}|{row.oiv}->p{row.p_oiv}"), pn.pane.Image( object=ci.enhance_pil_image( image=ci.load_image( file_name=row.file_name, path_to_images=cc.path_to_leaf_patches, ), brightness=1.5, ) ), ) for _, row in df.iterrows() ], ncols=len(df), )