Python snippets#

Pandas tricks#

DF Transformations#

Wide to long#

tdf = tdf.melt(id_vars = ["Session", "PROLIFICID"], value_vars=columns)

Lambda function#

Take first value of each subgroup

df.groupby(by="Session")["date"].apply(lambda x: np.array(x.reset_index().iloc[0,1]))

Plotting (Seaborn and MPL)#

Scatter plot with lmplot and correlation for each group#

f, ax = plt.subplots(1,1,figsize=(5,5))
cols= ["black", "red"]
for lidx, lvl in enumerate(["high", "low"]):
    ttdf = behdf.loc[behdf["TF2_PhysiolAnx_ms"].isin([lvl])]
    sns.regplot(data=ttdf, x="prob_safe", y="shnosh_diff_mflr", color=cols[lidx], ax=ax)
    corrfunc(ttdf["prob_safe"], ttdf["shnosh_diff_mflr"], tests=[ "spearman"], drop_missing=True, xanchor=0.4, yanchor=0+lidx*0.06, boxcolor=cols[lidx])

Legend manipulation#

Change labels on existing legend

for t, l  in zip(ax.get_legend().texts, new_labels):
    t.set_text(l)

Remove legend border

ax.get_legend().get_frame().set_linewidth(0.0)

Statistics in Python#

Universal correlation function to add to plots#

def corrfunc(x, y, tests=["pearson"], drop_missing=False, ax=None, xanchor=0.4, yanchor = 0.1, randomanchor=False, boxcolor='purple', **kws):
    if (ax is None):
        ax = plt.gca()
    
    if randomanchor:
        yanchor=0.1 + np.random.normal(0, 0.2)
        
        
    if drop_missing:
        d = pd.DataFrame({'x':np.array(x), 'y':np.array(y)})
        d = d.dropna()
        x = d["x"]
        y = d["y"]

    #["pearson", "spearman", "kendall", "distcor"]
    
    ycoord = yanchor*len(tests) + 0.05
    if "pearson" in tests:
        r,p = stats.pearsonr(x, y)
        t = plt.text(xanchor, ycoord, "Pearson r = {:.2f}, p={:.2g}".format(r,p), transform=ax.transAxes, fontsize=10)
        t.set_bbox(dict(facecolor='white', alpha=1, edgecolor=boxcolor))
        ycoord = ycoord-0.1
        
    if "spearman" in tests:
        r,p = stats.spearmanr(x, y)
        t = plt.text(xanchor, ycoord, "Spearman r = {:.2f}, p={:.2g}".format(r,p), transform=ax.transAxes, fontsize=10)
        t.set_bbox(dict(facecolor='white', alpha=1, edgecolor=boxcolor))
        ycoord = ycoord-0.1

    if "xicor" in tests:
        r = xicor(x,y, ties=True)
        t = plt.text(xanchor, 0.05, "xicor. xi= {:.2f}".format(r), transform=ax.transAxes, fontsize=10)
        t.set_bbox(dict(facecolor='white', alpha=1, edgecolor=boxcolor))
        ycoord = ycoord-0.1
    
    if "distcor" in tests:
        r2 = Dcorr(x,y)
        t = plt.text(xanchor, 0.05, "Dist. corr = {:.2f}".format(r2), transform=ax.transAxes, fontsize=10)
        t.set_bbox(dict(facecolor='white', alpha=1, edgecolor=boxcolor))

# More info in xicor: https://arxiv.org/abs/1909.10140
def xicor(X, Y, ties=True):
    random.seed(42)
    n = len(X)
    order = array([i[0] for i in sorted(enumerate(X), key=lambda x: x[1])])
    if ties:
        l = array([sum(y >= Y[order]) for y in Y[order]])
        r = l.copy()
        for j in range(n):
            if sum([r[j] == r[i] for i in range(n)]) > 1:
                tie_index = array([r[j] == r[i] for i in range(n)])
                r[tie_index] = random.choice(r[tie_index] - arange(0, sum([r[j] == r[i] for i in range(n)])), sum(tie_index), replace=False)
        return 1 - n*sum( abs(r[1:] - r[:n-1]) ) / (2*sum(l*(n - l)))
    else:
        r = array([sum(y >= Y[order]) for y in Y[order]])
        return 1 - 3 * sum( abs(r[1:] - r[:n-1]) ) / (n**2 - 1)

Mixed ANOVA with posthocs in Python#

import pingouin as pg
from statsmodels.stats.multitest import multipletests
aov = pg.mixed_anova(data=stdf, dv='value', between='cl', within='variable',
                     subject='id', correction=False, effsize="np2")
pg.print_table(aov)
resdf = pd.DataFrame()
for c in [0,1]:
    tdf = stdf.loc[stdf["cl"]==c,:]
    res = pg.ttest( x=tdf.loc[tdf["variable"]=="TF3_NegativeAffect","value"],
                    y=tdf.loc[tdf["variable"]=="TF2_PhysiolAnx","value"],
                    paired=True)
    res["contrast"] = "TF3>TF2, cluster "+str(c)
    resdf = pd.concat([resdf, res], axis=0)

    res = pg.ttest( x=tdf.loc[tdf["variable"]=="TF3_NegativeAffect","value"],
                    y=tdf.loc[tdf["variable"]=="TF1_CognAnxDepr","value"],
                    paired=True)
    res["contrast"] = "TF3>TF1, cluster "+str(c)
    resdf = pd.concat([resdf, res], axis=0)

    res = pg.ttest( x=tdf.loc[tdf["variable"]=="TF1_CognAnxDepr","value"],
                    y=tdf.loc[tdf["variable"]=="TF2_PhysiolAnx","value"],
                    paired=True)
    res["contrast"] = "TF1>TF2, cluster "+str(c)
    resdf = pd.concat([resdf, res], axis=0)



for t in trait_factor_names:
    tdf = stdf.loc[stdf["variable"]==t,:]
    res = pg.ttest( x=tdf.loc[tdf["cl"]==0,"value"],
                    y=tdf.loc[tdf["cl"]==1,"value"],
                    paired=False)
    res["contrast"] = "cl0>cl1"+t
    resdf = pd.concat([resdf, res], axis=0)

resdf["p-holm"] = np.round(multipletests(resdf["p-val"], alpha=0.05, method="holm")[1],4)
pg.print_table(resdf)