Mean testing using permutation 1000 times and t-test

Goals

1. Determine which leadership construct significantly changes over pre-Covid and during-Covid times

2. Hypothesis testing: difference in means

png

3. Find the average differences

	changes = exp - covid	p_values_permutation	p_values_t_test	significant
AHC	0.345929	0.000	0.0000	1
Bout	-0.182161	0.000	0.0000	1
CO	0.256943	0.000	0.0000	1
GC	0.355430	0.000	0.0000	1
IPC	0.174999	0.000	0.0000	1
LMX	0.021892	0.096	0.0846	0
LS	0.004266	0.447	0.4497	0
MF	0.395901	0.000	0.0000	1
PS	-0.009199	0.383	0.3950	0
RE	0.045658	0.002	0.0016	1
WES	0.182599	0.000	0.0000	1
WLC	-0.894634	0.000	0.0000	1
WLSP	0.819718	0.000	0.0000	1

import pandas as pd
import numpy as np
import random
from scipy import stats
import matplotlib.pyplot as plt

Leadership constructs are abbreviated: after-hour connectivity, burnout, collaborative overload, goal clarity, etc. For more information, please contact My Bui.

label = ['AHC', 'Bout', 'CO', 'GC', 'IPC', 'LMX', 'LS', 'MF', 'PS', 'RE', 'WES', 'WLC', 'WLSP']

def read_data(construct):
    data_B = pd.read_excel(construct + '.xlsx', sheet_name='covid')
    data_A = pd.read_excel(construct + '.xlsx', sheet_name='exp')
    array_B = data_B.values.reshape(1, -1)[0]
    array_B = array_B[~np.isnan(array_B)]
    array_A = data_A.values.reshape(1, -1)[0]
    array_A = array_A[~np.isnan(array_A)]
    
    return (array_A, array_B)


def combine(array_A, array_B):
    x = array_A.tolist()
    x += array_B.tolist()
    
    return x


def permutation_A_B(x, n_B, n_A):
    random.shuffle(x)
    x = pd.Series(x)
    n = n_A + n_B
    idx_A = set(random.sample(range(n), n_A))
    idx_B = set(range(n)) - idx_A
    
    return x.loc[idx_A].mean() - x.loc[idx_B].mean()


def permutation_B_A(x, n_B, n_A):
    random.shuffle(x)
    x = pd.Series(x)
    n = n_A + n_B
    idx_A = set(random.sample(range(n), n_A))
    idx_B = set(range(n)) - idx_A
    
    return x.loc[idx_B].mean() - x.loc[idx_A].mean()

p_perm_list = []
p_t_test = []

for i in label:
    array_A, array_B = read_data(i)
    combine(array_A, array_B)
    diff = np.mean(array_A) - np.mean(array_B)
    x = combine(array_A, array_B)
    
    if (diff < 0):
        print('For ' + i + ': Covid > Exp')
        diff = np.mean(array_B) - np.mean(array_A)
        perm_diffs = [permutation_B_A(x, len(array_B.tolist()), len(array_A.tolist())) for _ in range(1000)]
    else:
        print('For ' + i + ': Exp > Covid')
        perm_diffs = [permutation_A_B(x, len(array_B.tolist()), len(array_A.tolist())) for _ in range(1000)]
        
    fig, ax = plt.subplots()
    ax.hist(perm_diffs, bins=11, rwidth=0.9)
    ax.axvline(x=diff, color='red', lw=2)
    ax.set_xlabel('Differences')
    ax.set_ylabel('Frequencies')
    plt.legend(['Observed\ndifference'])
    ax.set_title(i)
    plt.show()
    
    p_perm = np.mean(perm_diffs >= diff)
    p_perm_list.append(p_perm)
    print(f'p_perm: {p_perm}')
    
    res = stats.ttest_ind(array_A, array_B, equal_var=False)
    p_t_test.append(res.pvalue/2)
    print(f'p_t_test: {res.pvalue / 2:.4f}')
    print('\n')

For AHC: Exp > Covid

png

p_perm: 0.0
p_t_test: 0.0000


For Bout: Covid > Exp

png

p_perm: 0.0
p_t_test: 0.0000


For CO: Exp > Covid

png

p_perm: 0.0
p_t_test: 0.0000


For GC: Exp > Covid

png

p_perm: 0.0
p_t_test: 0.0000


For IPC: Exp > Covid

png

p_perm: 0.0
p_t_test: 0.0000


For LMX: Exp > Covid

png

p_perm: 0.096
p_t_test: 0.0846


For LS: Exp > Covid

png

p_perm: 0.447
p_t_test: 0.4497


For MF: Exp > Covid

png

p_perm: 0.0
p_t_test: 0.0000


For PS: Covid > Exp

png

p_perm: 0.383
p_t_test: 0.3950


For RE: Exp > Covid

png

p_perm: 0.002
p_t_test: 0.0016


For WES: Exp > Covid

png

p_perm: 0.0
p_t_test: 0.0000


For WLC: Covid > Exp

png

p_perm: 0.0
p_t_test: 0.0000


For WLSP: Exp > Covid

png

p_perm: 0.0
p_t_test: 0.0000

changes = {}

for i in label:
    read_data(i)
    array_A, array_B = read_data(i)
    combine(array_A, array_B)
    diff = np.mean(array_A) - np.mean(array_B)
    changes[i] = diff

changes = pd.DataFrame.from_dict(data=changes, orient='index', columns=['changes = exp - covid'])

changes['p_values_permutation'] = p_perm_list
changes['p_values_t_test'] = p_t_test
changes['p_values_t_test'] = round(changes['p_values_t_test'], 4)

changes['significant'] = changes['p_values_permutation'] <= 0.05
changes['significant'] = changes['significant'].astype(int)

changes

	changes = exp - covid	p_values_permutation	p_values_t_test	significant
AHC	0.345929	0.000	0.0000	1
Bout	-0.182161	0.000	0.0000	1
CO	0.256943	0.000	0.0000	1
GC	0.355430	0.000	0.0000	1
IPC	0.174999	0.000	0.0000	1
LMX	0.021892	0.096	0.0846	0
LS	0.004266	0.447	0.4497	0
MF	0.395901	0.000	0.0000	1
PS	-0.009199	0.383	0.3950	0
RE	0.045658	0.002	0.0016	1
WES	0.182599	0.000	0.0000	1
WLC	-0.894634	0.000	0.0000	1
WLSP	0.819718	0.000	0.0000	1