// This is a do-file for the analysis performed by Prateek Chandra Bhan for a doctoral thesis in Economics in The University of Glasgow
// Project title: Do role models increase student hope and effort? Evidence from India

// Date: 20 Oct 2020

// I have a merged file with information on: Covariates (Parents, Social Networks and Children, Secondary data on student scores) and Indicators (Hope, Effort and Achievement)
// An example of the merging commands used: 
// On Date: 27 March 2020:-			 merge m:m id studentname using "/Users/apple/Documents/PhD Economics/Year 3/Main experiment/Data/Stata files/AttendanceDataThesis_PCB_Aug21.dta"

** It is important that some of the following commands are purely for the intermediate analysis purposes and the outputs are not reported in the thesis.

// Using the merged dataset:
use "/Users/apple/Documents/PhD Economics/Year 3/Main experiment/Data/Stata files/MergedData_BLParentsKidsTeachers_Indicators_Rev5.dta"

//use "/Users/apple/Documents/PhD Economics/Year 4/PhD Thesis 2021/Data/MergedDataThesis_PCB_Aug21.dta"

// sorting
// gsort id session

// dataset was cleaned and variables were labelled in Feb-Sep 2020

// Sample summary statistics (Mean, s.d., meadian, n & range) 
// In the order: age, gender, past performance in class 3(percentage), taking pvt tuition, tution hours/wk, religion, native, hh size, hrs study/week, hope score, remedial class attendance, substitution class score, aser scores in math, english and total, hope score of father and mother

tabstat age gender  past_record sem1_result c_4 c_6 c_7_1 c_10 c_12 c_19 chs rc sc aser_m aser_e aser_t p1_h p2_h if session == 1, statistics( count mean sd min max median ) by(tr) columns(statistics)


///// Table for balancing checks

// A. Using survey information on baseline characteristics
asdoc ttest age if session == 1, by(tr)
asdoc ttest gender if session == 1, by(tr)
asdoc ttest c_4 if session == 1, by(tr)
asdoc ttest c_6 if session == 1, by(tr)
asdoc ttest c_7_1 if session == 1, by(tr)
asdoc ttest c_10 if session == 1, by(tr)
asdoc ttest c_12 if session == 1, by(tr)
asdoc ttest c_19 if session == 1, by(tr)
asdoc ttest f_1_2 if session == 1, by(tr)
asdoc ttest july_fraction if session == 1, by(tr)
asdoc ttest p1_h if session == 1, by(tr)
asdoc ttest p2_h if session == 1, by(tr)

// B. Using information on baseline indicators & secondary data

asdoc ttest past_record if session == 1, by(tr)
asdoc ttest chs if session == 1, by(tr)
asdoc ttest rc if session == 1, by(tr)
asdoc ttest sc if session == 1, by(tr)
asdoc ttest gls_index if session == 1, by(tr)
asdoc ttest aser_m if session == 1, by(tr)
asdoc ttest aser_e if session == 1, by(tr)
asdoc ttest aser_t if session == 1, by(tr)
asdoc ttest aser_t if session == 1, by(tr)


// Depicting the balancing checks using figures 
histogram age if session == 1, discrete frequency by(tr)
histogram age1 if session == 1, discrete frequency by(tr)
histogram gender if session == 1, discrete frequency by(tr)
histogram c_7_1 if session == 1, discrete frequency by(tr)
histogram c_4 if session == 1, discrete frequency by(tr)
histogram c_10 if session == 1, discrete frequency by(tr)

// One can draw many other figures to illustrate the balance across the two groups (tr = 0 & tr = 1)

// Baseline correlation matrix
// we can use pearson's correlation i.e. pwcorr for continuous variables 
// for ordinal variables like likert scales (or index) we need to use spearman's correlation (non-parametric test) + it doesnt require the monotonicity in relationships

// general correlation matrix: possible relationships between variables
corr chs rc sc aser_m aser_e aser_t age gender past_record total_sem1 c_4 c_7_1 c_10  c_12 c_19 if session == 1

// pearson's pairwise correlation: tells us about possible multicollinearity
pwcorr chs rc sc aser_m aser_e aser_t age gender past_record total_sem1 c_4 c_7_1 c_10 c_12 c_19 if session == 1, obs sig star(5)

pwcorr aser_m aser_e aser_t age past_record july_fraction c_12 c_19 if session == 1, obs sig star(5)

// spearman's correlation: non-parametric test

spearman chs aser_t rc sc c_4 c_7_1 c_10 gender if session == 1, stats(rho obs p) star(0.05)
spearman chs rc sc aser_m aser_e aser_t age gender past_record total_sem1 c_4 c_7_1 c_10 c_12 c_19 if session == 1, stats (rho obs p) star(0.05)

/// defining the sample as panel
xtset id session

// generating the outcome variables from the drawings
/// Factor analysis (EFA)
global port v_1 - v_30
global ncomp 3
factor $port 
screeplot
factor $port, mineigen(1)
factor $port, comp($ncomp) blanks(.3)
rotate, varimax
rotate, varimax blanks(.3)
estat common
predict f1 f2 f3
estat kmo
alpha $port

/// prepaing Kling's index
foreach k1 in v_5 v_6 v_8 v_18 v_19 v_20 v_24 {
	quietly sum `k1' if tr == 0 
	gen `k1'_es = `k1' / r(sd) 
	local csd = r(sd) 
	quietly sum `k1' if tr == 0 
	local cmean = r(mean)
    replace `k1'_es = `k1' - `cmean' / `csd' 
}

egen k1 = rsum(v_5_es v_6_es v_8_es v_18_es v_19_es v_20_es v_24_es)
replace k1 = k1/7 

/// prepaing Anderson's index

foreach a1 in v_5 v_6 v_8 v_18 v_19 v_20 v_24 {
	quietly sum `a1' if tr == 0 & session == 1 
	gen `a1'_es = `a1' / r(sd) 
	local csd1 = r(sd) 
	quietly sum `a1' if tr == 0 & session == 1
	local cmean1 = r(mean)
    replace `a1'_es = `a1' - `cmean1' / `csd1' 
}

// Generate and invert the correlation matrix
matrix accum R = v_5_es v_6_es v_8_es v_18_es v_19_es v_20_es v_24_es, nocons dev // Form cross-product (Y'Y) matrix from outcome variables
matrix R = R / r(N) // Generate correlation matrix for outcomes
matrix R = syminv(R) // Invert correlation matrix
local counter = 1
matrix J = J(colsof(R),1,1) // Column vector of correct dimension
while `counter' <= colsof(R) {
    matrix T = R[`counter',1..colsof(R)]
    matrix A = T * J // Sum of row of inverted correlation matrix
    local weight`counter++' = A[1,1]
}

// Generate total number of variables per obs, replacing missings with zeros
gen sample_p1 = 0
local counter = 1
foreach a1 in v_5 v_6 v_8 v_18 v_19 v_20 v_24 {
    replace sample_p1 = sample_p1 + `weight`counter++''  if  `a1' != .
    replace `a1'_es = 0  if `a1' == .
}


// Generate the GLS index variable
quietly gen gls_index = 0
local counter = 1
foreach a1 in v_5 v_6 v_8 v_18 v_19 v_20 v_24 {
    replace gls_index = `a1'_es *  `weight`counter++''  +  gls_index
}
replace gls_index = gls_index / sample_p1

//// baseline associations for the drawing characteristics (not including kling index because we have two variables from the exploratory factory analysis and the Anderson's index) 

eststo clear

eststo: reg chs gender age  past_record july_fraction c_4  c_7_1 c_10 c_12 c_19 f_1_2 aser_t sc f1  if v_5!= . & chs! = . & session2 == 0
eststo: reg chs gender age  past_record july_fraction c_4  c_7_1 c_10 c_12 c_19 f_1_2 aser_t sc gls_index  if v_5!= . & chs! = . & session2 == 0

eststo: reg sc gender age  past_record july_fraction c_4  c_7_1 c_10 c_12 c_19 f_1_2 chs aser_t rc f1 if v_5!=. &  chs! = . & session2 == 0
eststo: reg sc gender age  past_record july_fraction c_4  c_7_1 c_10 c_12 c_19 f_1_2 chs aser_t rc gls_index if v_5!=. &  chs! = . & session2 == 0

eststo: reg aser_t gender age  past_record july_fraction c_4  c_7_1 c_10 c_12 c_19 f_1_2 chs rc sc f1 if v_5!=. & chs! = . & session2 == 0
eststo: reg aser_t gender age  past_record july_fraction c_4  c_7_1 c_10 c_12 c_19 f_1_2 chs rc sc gls_index if v_5!=. & chs! = . & session2 == 0

esttab using table1.rtf, starlevels(* 0.10 ** 0.05  *** 0.001) se label

*** The table numbers used to save the tables do not correspond to the those used in the thesis.

/// mean comparison using DiD (before/after)
diff chs if session2 != ., t(tr) p(session2)
diff sc if session2 != ., t(tr) p(session2)
diff rc if session2 != ., t(tr) p(session2)
diff aser_m if session2 != ., t(tr) p(session2)
diff aser_e if session2 != ., t(tr) p(session2)
diff f1 if v_5!= ., t(tr) p(session2)
diff k1 if v_5!= ., t(tr) p(session2)
diff gls_index if v_5!= ., t(tr) p(session2)


//// Main results 

*** standardised set of outcome indicators are created for hope, effort (rc & sc) and Achievement (English, Maths and total test performance)
//// we will proceed with the standardisation by demeaning and normalising the outcome variables i.e. (x - mean of all baseline data)/all baseline s.d.

// I use this for hope
summ chs if session == 1
egen chs_sd = sd(chs) if session == 1 
replace chs_sd = 5.409283
egen chs_bl_mean = mean(chs) if session == 1 
replace chs_bl_mean = 27.331081 
replace chs_bl_mean = . if chs == . 
gen chs_d = chs - chs_bl_mean
replace chs_d = chs_d/chs_sd 
replace chs_d = . if chs == .
xtreg chs_d interact session2 tr if chs!= . & session2 != ., re cl(id) robust
xtreg chs_d interact1 ses tr if ses != . , re cl(id) robust
xtreg chs_d interact2 ses2 tr if ses2 != ., re cl(id) robust
// I use this for substitution class
summ sc if session == 1
egen sc_sd = sd(sc) if session == 1 
replace sc_sd =  1.068302 
egen sc_bl_mean = mean(sc) if session == 1 
replace sc_bl_mean =  1.108796
replace sc_bl_mean = . if sc == . 
gen sc_d = sc - sc_bl_mean
replace sc_d = sc_d/sc_sd 
replace sc_d = . if sc == .
xtreg sc_d interact session2 tr if session2 != ., re cl(id) robust
xtreg sc_d interact1 ses tr if ses != . , re cl(id) robust
xtreg sc_d interact2 ses2 tr if ses2 != ., re cl(id) robust
// I use this for remedial class
summ rc if session == 1
egen rc_sd = sd(rc) if session == 1 
replace rc_sd =   .4773766
egen rc_bl_mean = mean(rc) if session == 1 
replace rc_bl_mean =  .349537
replace rc_bl_mean = . if rc == . 
gen rc_d = rc - rc_bl_mean
replace rc_d = rc_d/rc_sd 
replace rc_d = . if rc == .
xtreg rc_d interact session2 tr if session2 != ., re cl(id) robust
xtreg rc_d interact1 ses tr if ses != . , re cl(id) robust
xtreg rc_d interact2 ses2 tr if ses2 != ., re cl(id) robust
// I use this for Mathematics
summ aser_m if session == 1
egen aser_m_sd = sd(aser_m) if session == 1 
replace aser_m_sd =   11.77973 
egen aser_m_bl_mean = mean(aser_m) if session == 1 
replace aser_m_bl_mean = 32.28704
replace aser_m_bl_mean = . if aser_m == . 
gen aser_m_d = aser_m - aser_m_bl_mean
replace aser_m_d = aser_m_d/aser_m_sd 
replace aser_m_d = . if aser_m == .
xtreg aser_m_d interact session2 tr if session2 != ., re cl(id) robust
xtreg aser_m_d interact1 ses tr if ses != . , re cl(id) robust
xtreg aser_m_d interact2 ses2 tr if ses2 != ., re cl(id) robust
// I use this for English
summ aser_e if session == 1
egen aser_e_sd = sd(aser_e) if session == 1 
replace aser_e_sd =   15.57966 
egen aser_e_bl_mean = mean(aser_e) if session == 1 
replace aser_e_bl_mean = 31.0162
replace aser_e_bl_mean = . if aser_e == . 
gen aser_e_d = aser_e - aser_e_bl_mean
replace aser_e_d = aser_e_d/aser_e_sd 
replace aser_e_d = . if aser_e == .
xtreg aser_e_d interact session2 tr if session2 != ., re cl(id) robust
xtreg aser_e_d interact1 ses tr if ses != . , re cl(id) robust
xtreg aser_e_d interact2 ses2 tr if ses2 != ., re cl(id) robust

*** Instead of doing the above manually, one can use the command 'global' or 'collapse'. 
*** Because I have multiple outcome variables, for robustness, I decided to approach each variable manually and one at a time to avoid any ambivalence.


/// Plotting point estimates
preserve
collapse chs_d, by(tr session)
scatter chs_d session if  tr == 1, connect(l) lpattern (solid) || scatter chs_d session if  tr == 0, connect(l) lpattern (dash)
restore

preserve
collapse gls_index, by(tr session)
scatter gls_index session if  tr == 1, connect(l) lpattern (solid) || scatter gls_index session if  tr == 0, connect(l) lpattern (dash)
restore

preserve
collapse f3, by(tr session)
scatter f3 session if  tr == 1, connect(l) lpattern (solid) || scatter f3 session if  tr == 0, connect(l) lpattern (dash)
restore

preserve
collapse rc_d, by(tr session)
scatter rc_d session if  tr == 1, connect(l) lpattern (solid) || scatter rc_d session if  tr == 0, connect(l) lpattern (dash)
restore


ed id attending m1 m8 ses
preserve
collapse attending, by(tr ses)
scatter attending ses if tr == 1, connect(l) lpattern (solid) || scatter attending ses if tr == 0, connect(l) lpattern (dash)
restore

preserve
collapse sc_d, by(tr session)
scatter sc_d session if  tr == 1, connect(l) lpattern (solid) || scatter sc_d session if  tr == 0, connect(l) lpattern (dash)
restore

preserve
collapse aser_e_d, by(tr session)
scatter aser_e_d session if  tr == 1, connect(l) lpattern (solid) || scatter aser_e_d session if  tr == 0, connect(l) lpattern (dash)
restore

preserve
collapse aser_m_d, by(tr session)
scatter aser_m_d session if  tr == 1, connect(l) lpattern (solid) || scatter aser_m_d session if  tr == 0, connect(l) lpattern (dash)
restore


/// Plotting point estimates with 90% confidence intervals (by treatment) for all sessions

// For hope
preserve
local varname chs_d
local group session tr

collapse (mean) y = `varname' (semean) se_y = `varname', by(`group')

gen yu = y + 1.65*se_y 
gen yl = y - 1.65*se_y 
**** for 95% CI - one can use 1.96 instead of 1.65

twoway (scatter y session if  tr == 1, connect(l) lpattern (solid)) (rcap yu yl session, lwidth(.2)) ||  (scatter y session if  tr == 0, connect(l) lpattern (dash)) (rcap yu yl session, lwidth(.1))

restore


//// For SE/Optimism
preserve
local varname gls_index
local group session tr
collapse (mean) y = `varname' (semean) se_y = `varname', by(`group')
gen yu = y + 1.65*se_y 
gen yl = y - 1.65*se_y 
twoway (scatter y session if  tr == 1, connect(l) lpattern (solid)) (rcap yu yl session) ||  (scatter y session if  tr == 0, connect(l) lpattern (dash)) (rcap yu yl session)
restore


//// For RC
preserve
local varname rc_d
local group session tr
collapse (mean) y = `varname' (semean) se_y = `varname', by(`group')
gen yu = y + 1.65*se_y 
gen yl = y - 1.65*se_y 
twoway (scatter y session if  tr == 1, connect(l) lpattern (solid)) (rcap yu yl session) ||  (scatter y session if  tr == 0, connect(l) lpattern (dash)) (rcap yu yl session)
restore


//// For Attendance
preserve
local varname attend
local group session tr
collapse (mean) y = `varname' (semean) se_y = `varname', by(`group')
gen yu = y + 1.65*se_y 
gen yl = y - 1.65*se_y 
twoway (scatter y session if  tr == 1, connect(l) lpattern (solid)) (rcap yu yl session) ||  (scatter y session if  tr == 0, connect(l) lpattern (dash)) (rcap yu yl session)
restore


//// For Mathematics
preserve
local varname aser_m_d
local group session tr
collapse (mean) y = `varname' (semean) se_y = `varname', by(`group')
gen yu = y + 1.65*se_y 
gen yl = y - 1.65*se_y 
twoway (scatter y session if  tr == 1, connect(l) lpattern (solid)) (rcap yu yl session) ||  (scatter y session if  tr == 0, connect(l) lpattern (dash)) (rcap yu yl session)
restore


//// For English
preserve
local varname aser_e_d
local group session tr
collapse (mean) y = `varname' (semean) se_y = `varname', by(`group')
gen yu = y + 1.65*se_y 
gen yl = y - 1.65*se_y 
twoway (scatter y session if  tr == 1, connect(l) lpattern (solid)) (rcap yu yl session) ||  (scatter y session if  tr == 0, connect(l) lpattern (dash)) (rcap yu yl session)
restore


///// Estimations for treatment effects using difference-in-differences (DiD) and (Ancova)

//// DiD 

eststo clear
eststo: xtreg chs_d interact session2 tr if session2!=., re cl(id) robust
eststo: xtreg chs_d interact session2 tr age gender c_19 c_12 c_7_1 past_record if session2!=., re cl(id) robust
eststo: xtreg chs_d interact1 ses tr age gender c_19 c_12 c_7_1 past_record  if ses!=., re cl(id) robust
eststo: xtreg chs_d interact2 ses2 tr age gender c_19 c_12 c_7_1 past_record  if ses2!=., re cl(id) robust

eststo: xtreg f1 interact session2 tr if session2!=., re cl(id) robust
eststo: xtreg f1 interact session2 tr age gender c_19 c_12 c_7_1 past_record if session2!=., re cl(id) robust
eststo: xtreg f1 interact1 ses tr age gender c_19 c_12 c_7_1 past_record  if ses!=., re cl(id) robust
eststo: xtreg f1 interact2 ses2 tr age gender c_19 c_12 c_7_1 past_record  if ses2!=., re cl(id) robust

eststo: xtreg k1 interact session2 tr if session2!=., re cl(id) robust
eststo: xtreg k1 interact session2 tr age gender c_19 c_12 c_7_1 past_record if session2!=., re cl(id) robust
eststo: xtreg k1 interact1 ses tr age gender c_19 c_12 c_7_1 past_record  if ses!=., re cl(id) robust
eststo: xtreg k1 interact2 ses2 tr age gender c_19 c_12 c_7_1 past_record  if ses2!=., re cl(id) robust

eststo: xtreg gls_index interact session2 tr if session2!=., re cl(id) robust
eststo: xtreg gls_index interact session2 tr age gender c_19 c_12 c_7_1 past_record if session2!=., re cl(id) robust
eststo: xtreg gls_index interact1 ses tr age gender c_19 c_12 c_7_1 past_record  if ses!=., re cl(id) robust
eststo: xtreg gls_index interact2 ses2 tr age gender c_19 c_12 c_7_1 past_record  if ses2!=., re cl(id) robust

eststo: xtreg f3 interact session2 tr if session2!=., re cl(id) robust
eststo: xtreg f3 interact session2 tr age gender c_19 c_12 c_7_1 past_record if session2!=., re cl(id) robust
eststo: xtreg f3 interact1 ses tr age gender c_19 c_12 c_7_1 past_record  if ses!=., re cl(id) robust
eststo: xtreg f3 interact2 ses2 tr age gender c_19 c_12 c_7_1 past_record  if ses2!=., re cl(id) robust

eststo: xtreg sc_d interact session2 tr if session2!=., re cl(id) robust
eststo: xtreg sc_d interact session2 tr age gender c_19 c_12 c_7_1 past_record if session2!=., re cl(id) robust
eststo: xtreg sc_d interact1 ses tr age gender c_19 c_12 c_7_1 past_record  if ses!=., re cl(id) robust
eststo: xtreg sc_d interact2 ses2 tr age gender c_19 c_12 c_7_1 past_record  if ses2!=., re cl(id) robust

eststo: xtreg rc_d_w interact session2 tr if session2!=., re cl(id) robust
eststo: xtreg rc_d_w interact session2 tr age gender c_19 c_12 c_7_1 past_record if session2!=., re cl(id) robust
eststo: xtreg rc_d_w interact1 ses tr age gender c_19 c_12 c_7_1 past_record  if ses!=., re cl(id) robust
eststo: xtreg rc_d_w interact2 ses2 tr age gender c_19 c_12 c_7_1 past_record  if ses2!=., re cl(id) robust

eststo: xtreg aser_m_d interact session2 tr if session2!=., re cl(id) robust
eststo: xtreg aser_m_d interact session2 tr age gender c_19 c_12 c_7_1 past_record if session2!=., re cl(id) robust
eststo: xtreg aser_m_d interact1 ses tr age gender c_19 c_12 c_7_1 past_record  if ses!=., re cl(id) robust
eststo: xtreg aser_m_d interact2 ses2 tr age gender c_19 c_12 c_7_1 past_record  if ses2!=., re cl(id) robust

eststo: xtreg aser_e_d interact session2 tr if session2!=., re cl(id) robust
eststo: xtreg aser_e_d interact session2 tr age gender c_19 c_12 c_7_1 past_record if session2!=., re cl(id) robust
eststo: xtreg aser_e_d interact1 ses tr age gender c_19 c_12 c_7_1 past_record  if ses!=., re cl(id) robust
eststo: xtreg aser_e_d interact2 ses2 tr age gender c_19 c_12 c_7_1 past_record  if ses2!=., re cl(id) robust

esttab using table2.rtf, starlevels(* 0.10 ** 0.05  *** 0.01) se label

///// Ancova 

**** Ancova main results

** bl to el
// Hope
preserve
gen chs_d1 = chs 
replace chs_d1 = . if session2 != 1 
replace chs_d1 = chs_d[_n+1] if missing(chs_d1)  
replace chs_d1 = . if session2 != 0
reg chs_d1 chs_d tr  if session2 == 0
restore

// Effort - sc
preserve
gen sc_d1 = sc_d 
replace sc_d1 = . if session2 != 1 
replace sc_d1 = sc_d[_n+1] if missing(sc_d1)  
replace sc_d1 = . if session2 != 0
reg sc_d1 sc_d tr  if session2 == 0
restore

// Effort - rc
preserve
gen rc_d1 = rc_d 
replace rc_d1 = . if session2 != 1 
replace rc_d1 = rc_d[_n+1] if missing(rc_d1)  
replace rc_d1 = . if session2 != 0
reg rc_d1 rc_d tr if session2 == 0
restore

// Mathematics
preserve
gen aser_m_d1 = aser_m_d
replace aser_m_d1 = . if session2 != 1 
replace aser_m_d1 = aser_m_d[_n+1] if missing(aser_m_d1)  
replace aser_m_d1 = . if session2 != 0
reg aser_m_d1 aser_m_d tr if session2 == 0
restore

// English
preserve
gen aser_e_d1 = aser_e_d
replace aser_e_d1 = . if session2 != 1 
replace aser_e_d1 = aser_e_d[_n+1] if missing(aser_e_d1)  
replace aser_e_d1 = . if session2 != 0
reg aser_e_d1 aser_e_d tr if session2 == 0
restore

** bl to fus1

//Hope 
preserve
gen chs_d1 = chs_d 
replace chs_d1 = . if ses2 != 1 
replace chs_d1 = chs_d[_n+2] if missing(chs_d1)  
replace chs_d1 = . if session2 != 0

//SE/Optimism 
gen f1_d1 = f1
replace f1_d1 = . if ses2 != 1 
replace f1_d1 = f1[_n+2] if missing(f1_d1)  
replace f1_d1 = . if session2 != 0

//Effort - sc
gen sc_d1 = sc_d 
replace sc_d1 = . if ses2 != 1 
replace sc_d1 = sc_d[_n+2] if missing(sc_d1)  
replace sc_d1 = . if session2 != 0

//Effort - rc
gen rc_d1 = rc_d 
replace rc_d1 = . if ses2 != 1 
replace rc_d1 = rc_d[_n+2] if missing(rc_d1)  
replace rc_d1 = . if session2 != 0

//Mathematics
gen aser_m_d1 = aser_m_d
replace aser_m_d1 = . if ses2 != 1 
replace aser_m_d1 = aser_m_d[_n+2] if missing(aser_m_d1)  
replace aser_m_d1 = . if session2 != 0

//English
gen aser_e_d1 = aser_e_d
replace aser_e_d1 = . if ses2 != 1 
replace aser_e_d1 = aser_e_d[_n+2] if missing(aser_e_d1)  
replace aser_e_d1 = . if session2 != 0

eststo: reg chs_d1 chs_d tr  if session2 == 0
eststo: reg f1_d1 f1 tr if session2 == 0
eststo: reg sc_d1 sc_d tr if session2 == 0
eststo: reg rc_d1 rc_d tr  if session2 == 0
eststo: reg aser_m_d1 aser_m_d tr  if session2 == 0
eststo: reg aser_e_d1 aser_e_d tr  if session2 == 0
esttab using table3.rtf, starlevels(* 0.10 ** 0.05  *** 0.01) se label

restore


** bl to fus2

//Hope
preserve
gen chs_d1 = chs_d 
replace chs_d1 = . if ses2 != 1 
replace chs_d1 = chs_d[_n+3] if missing(chs_d1)  
replace chs_d1 = . if session2 != 0

//SE/Optimism
gen f1_d1 = f1
replace f1_d1 = . if ses2 != 1 
replace f1_d1 = f1[_n+3] if missing(f1_d1)  
replace f1_d1 = . if session2 != 0

//Effort - sc
gen sc_d1 = sc_d 
replace sc_d1 = . if ses2 != 1 
replace sc_d1 = sc_d[_n+3] if missing(sc_d1)  
replace sc_d1 = . if session2 != 0

//Effort - rc
gen rc_d1 = rc_d 
replace rc_d1 = . if ses2 != 1 
replace rc_d1 = rc_d[_n+3] if missing(rc_d1)  
replace rc_d1 = . if session2 != 0

//Mathematics
gen aser_m_d1 = aser_m_d
replace aser_m_d1 = . if ses2 != 1 
replace aser_m_d1 = aser_m_d[_n+3] if missing(aser_m_d1)  
replace aser_m_d1 = . if session2 != 0

//English
gen aser_e_d1 = aser_e_d
replace aser_e_d1 = . if ses2 != 1 
replace aser_e_d1 = aser_e_d[_n+3] if missing(aser_e_d1)  
replace aser_e_d1 = . if session2 != 0

eststo: reg chs_d1 chs_d tr  if session2 == 0
eststo: reg f1_d1 f1 tr if session2 == 0
eststo: reg sc_d1 sc_d tr if session2 == 0
eststo: reg rc_d1 rc_d tr  if session2 == 0
eststo: reg aser_m_d1 aser_m_d tr  if session2 == 0
eststo: reg aser_e_d1 aser_e_d tr  if session2 == 0
esttab using table4.rtf, starlevels(* 0.10 ** 0.05  *** 0.01) se label

restore


//// I estimate the overall avergae treatment effects using a pooled sample (Sessions: 1 - baseline, 2 - endline, 3 - follow-up survey 1, and 4 - follow-up survey 2)
/// First, I create session dummies
/// Second, I repopulate a variable with all the follow-up 2 values of the outcome variables (Hope, SE/Optimism, RC, SC, Mathematics and English) 
/// Finally, I regress the outcome value on treatment controlling for the session dummies and the ouctome values in different time periods
preserve 

gen s0 = 1 if session == 1
replace s0 = 0 if session != 1
gen chs_bl_d = s0*chs_d

gen s1 = 1 if session == 2
replace s1 = 0 if session != 2
gen chs_el_d = s1*chs_d

gen s2 = 1 if session == 3
replace s2 = 0 if session != 3
gen chs_f1_d = s2*chs_d

gen s3 = 1 if session == 4
replace s3 = 0 if session != 4
gen chs_f2_d = s3*chs_d

reg chs_f2 chs tr chs_el chs_f1 

recode chs_f2_d (0 = .)


replace chs_f2_d = chs_f2_d[_n+3] if missing(chs_f2_d) & session == 1
replace chs_f2_d = chs_f2_d[_n+2] if missing(chs_f2_d) & session == 2
replace chs_f2_d = chs_f2_d[_n+1] if missing(chs_f2_d) & session == 3

gen chs_eltr = chs_el_d*tr
gen chs_f1tr = chs_f1_d*tr

gen f1_bl_d = s0*f1
gen sc_bl_d = s0*sc_d 
gen rc_bl_d = s0*rc_d
gen aser_m_bld = s0*aser_m_d
gen aser_e_bld = s0*aser_e_d

gen f1_el_d = s1*f1
gen sc_el_d = s1*sc_d 
gen rc_el_d = s1*rc_d
gen aser_m_eld = s1*aser_m_d
gen aser_e_eld = s1*aser_e_d

gen f1_f1_d = s2*f1
gen sc_f1_d = s2*sc_d 
gen rc_f1_d = s2*rc_d
gen aser_m_f1d = s2*aser_m_d
gen aser_e_f1d = s2*aser_e_d

gen f1_f2_d = s3*f1
gen sc_f2_d = s3*sc_d 
gen rc_f2_d = s3*rc_d
gen aser_m_f2d = s3*aser_m_d
gen aser_e_f2d = s3*aser_e_d

recode f1_f2_d (0 = .)
recode sc_f2_d (0 = .)
recode rc_f2_d (0 = .)
recode aser_m_f2d (0 = .)
recode aser_e_f2d (0 = .)

replace f1_f2_d = f1_f2_d[_n+3] if missing(f1_f2_d) & session == 1
replace f1_f2_d = f1_f2_d[_n+2] if missing(f1_f2_d) & session == 2
replace f1_f2_d = f1_f2_d[_n+1] if missing(f1_f2_d) & session == 3

replace sc_f2_d = sc_f2_d[_n+3] if missing(sc_f2_d) & session == 1
replace sc_f2_d = sc_f2_d[_n+2] if missing(sc_f2_d) & session == 2
replace sc_f2_d = sc_f2_d[_n+1] if missing(sc_f2_d) & session == 3

replace rc_f2_d = rc_f2_d[_n+3] if missing(rc_f2_d) & session == 1
replace rc_f2_d = rc_f2_d[_n+2] if missing(rc_f2_d) & session == 2
replace rc_f2_d = rc_f2_d[_n+1] if missing(rc_f2_d) & session == 3

replace aser_m_f2d = aser_m_f2d[_n+3] if missing(aser_m_f2d) & session == 1
replace aser_m_f2d = aser_m_f2d[_n+2] if missing(aser_m_f2d) & session == 2
replace aser_m_f2d = aser_m_f2d[_n+1] if missing(aser_m_f2d) & session == 3

replace aser_e_f2d = aser_e_f2d[_n+3] if missing(aser_e_f2d) & session == 1
replace aser_e_f2d = aser_e_f2d[_n+2] if missing(aser_e_f2d) & session == 2
replace aser_e_f2d = aser_e_f2d[_n+1] if missing(aser_e_f2d) & session == 3


eststo: reg chs_f2_d s0 s1 chs_bl_d chs_el_d chs_f1_d tr   if session!= 4
eststo: reg f1_f2_d s0 s1 f1_bl_d f1_el_d f1_f1_d tr    if session!= 4
eststo: reg sc_f2_d s0 s1 sc_bl_d sc_el_d sc_f1_d tr    if session!= 4
eststo: reg rc_f2_d s0 s1 rc_bl_d rc_el_d rc_f1_d tr    if session!= 4
eststo: reg aser_m_f2d s0 s1 aser_m_bld aser_m_eld aser_m_f1d tr    if session!= 4
eststo: reg aser_e_f2d s0 s1 aser_e_bld aser_e_eld aser_e_f1d tr   if session!= 4
esttab using table5.rtf, starlevels(* 0.10 ** 0.05  *** 0.01) se label


restore

//// Heterogeneity analysis

/// I give an example on how the dummies were created for above and below median age. 
** The same is followed for Gender (male/female) and previous academic performance (above/below median performance)

tabstat age if session == 1, statistics( mean count sd min max median ) columns(statistics)
gen age11 = 1 if age>9 
replace age11 = 0 if age<= 9
gen age_int1 = age11*tr 

eststo clear
eststo: xtreg chs_d tr interact session2 age11 gender past_record c_19 age_int1 if session2!=., re cl(id) robust
eststo: xtreg f1 tr interact session2 age11 gender past_record c_19 age_int1 if session2!=., re cl(id) robust
eststo: xtreg sc_d tr interact session2 age11 gender past_record c_19 age_int1 if session2!=., re cl(id) robust
eststo: xtreg rc_d tr interact session2 age11 gender past_record c_19 age_int1 if session2!=., re cl(id) robust
eststo: xtreg aser_m_d tr interact session2 age11 gender past_record c_19 age_int1 if session2!=., re cl(id) robust
eststo: xtreg aser_e_d tr interact session2 age11 gender past_record c_19 age_int1 if session2!=., re cl(id) robust
esttab using table6.rtf, starlevels(* 0.10 ** 0.05  *** 0.01) se label

tabstat past_record if session == 1, statistics( mean count sd min max median ) columns(statistics)
gen exam = 1 if past_record > 80.25
replace exam = 0 if past_record <= 80.25
gen exam_tr = exam*tr

/// For psychological outcomes (Hope, SE/Optimism and Happiness)
eststo clear
eststo: xtreg chs_d tr interact session2 age11 gender exam  age_int1 if session2!=., re cl(id) robust
eststo: xtreg chs_d tr interact session2 age11 gender exam   male_tr if session2!=., re cl(id) robust
eststo: xtreg chs_d tr interact session2 age11 gender exam   exam_tr if session2!=., re cl(id) robust
eststo: xtreg f1 tr interact session2 age11 gender exam  age_int1 if session2!=., re cl(id) robust
eststo: xtreg f1 tr interact session2 age11 gender exam   male_tr if session2!=., re cl(id) robust
eststo: xtreg f1 tr interact session2 age11 gender exam   exam_tr if session2!=., re cl(id) robust
eststo: xtreg f3 tr interact session2 age11 gender exam  age_int1 if session2!=., re cl(id) robust
eststo: xtreg f3 tr interact session2 age11 gender exam   male_tr if session2!=., re cl(id) robust
eststo: xtreg f3 tr interact session2 age11 gender exam   exam_tr if session2!=., re cl(id) robust
esttab using table7.rtf, starlevels(* 0.10 ** 0.05  *** 0.01) se label


/// For Effort (Substitution and Rememdial Class) and Achievement (Mathematics and English)
eststo clear
eststo: xtreg sc_d tr interact session2 age11 gender exam   male_tr if session2!=., re cl(id) robust
eststo: xtreg sc_d tr interact session2 age11 gender exam  age_int1 if session2!=., re cl(id) robust
eststo: xtreg sc_d tr interact session2 age11 gender exam   exam_tr if session2!=., re cl(id) robust
eststo: xtreg rc_d tr interact session2 age11 gender exam   male_tr if session2!=., re cl(id) robust
eststo: xtreg rc_d tr interact session2 age11 gender exam  age_int1 if session2!=., re cl(id) robust
eststo: xtreg rc_d tr interact session2 age11 gender exam   exam_tr if session2!=., re cl(id) robust
eststo: xtreg aser_m_d tr interact session2 age11 gender exam   male_tr if session2!=., re cl(id) robust
eststo: xtreg aser_m_d tr interact session2 age11 gender exam  age_int1 if session2!=., re cl(id) robust
eststo: xtreg aser_m_d tr interact session2 age11 gender exam   exam_tr if session2!=., re cl(id) robust
eststo: xtreg aser_e_d tr interact session2 age11 gender exam   male_tr if session2!=., re cl(id) robust
eststo: xtreg aser_e_d tr interact session2 age11 gender exam  age_int1 if session2!=., re cl(id) robust
eststo: xtreg aser_e_d tr interact session2 age11 gender exam   exam_tr if session2!=., re cl(id) robust
esttab using table8.rtf, starlevels(* 0.10 ** 0.05  *** 0.01) se label


*** Exploring heterogeneoty on the basis of baseline hope quantiles
** Other possible sources of heterogeneous effects that are not recorded in the PAP were explored. No such effect was detected. 


xtile chs_d_quin = chs_d if session ==1 , nq(4)

gen hope_q1 = 1 if chs_d_quin == 1 
replace hope_q1 = 0 if hope_q1 != 1
bys id: replace hope_q1 = sum(hope_q1)
gen hopeq1_tr = hope_q1*tr

gen hope_q2 = 1 if chs_d_quin == 2
replace hope_q2 = 0 if hope_q2 != 1
bys id: replace hope_q2 = sum(hope_q2)
gen hopeq2_tr = hope_q2*tr

gen hope_q3 = 1 if chs_d_quin == 3
replace hope_q3 = 0 if hope_q3 != 1
bys id: replace hope_q3 = sum(hope_q3)
gen hopeq3_tr = hope_q3*tr

gen hope_q4 = 1 if chs_d_quin == 4
replace hope_q4 = 0 if hope_q4 != 1
bys id: replace hope_q4 = sum(hope_q4)
gen hopeq4_tr = hope_q4*tr


xtreg chs_d tr session2 interact hope_q1 hopeq1_tr age gender exam c_19 c_12 if session2!=., re cl(id) robust
xtreg chs_d tr session2 interact hope_q2 hopeq2_tr age gender exam c_19 c_12  if session2!=., re cl(id) robust
xtreg chs_d tr session2 interact hope_q3 hopeq3_tr age gender exam c_19 c_12 if session2!=., re cl(id) robust
xtreg chs_d tr session2 interact hope_q4 hopeq4_tr age gender exam c_19 c_12 if session2!=., re cl(id) robust

xtreg f1 tr session2 interact hope_q1 hopeq1_tr age gender exam c_19 c_12 if session2!=., re cl(id) robust
xtreg f1 tr session2 interact hope_q2 hopeq2_tr age gender exam c_19 c_12  if session2!=., re cl(id) robust
xtreg f1 tr session2 interact hope_q3 hopeq3_tr age gender exam c_19 c_12 if session2!=., re cl(id) robust
xtreg f1 tr session2 interact hope_q4 hopeq4_tr age gender exam c_19 c_12 if session2!=., re cl(id) robust

xtreg sc_d tr session2 interact hope_q1 hopeq1_tr age gender exam c_19 c_12 if session2!=., re cl(id) robust
xtreg sc_d tr session2 interact hope_q2 hopeq2_tr age gender exam c_19 c_12  if session2!=., re cl(id) robust
xtreg sc_d tr session2 interact hope_q3 hopeq3_tr age gender exam c_19 c_12 if session2!=., re cl(id) robust
xtreg sc_d tr session2 interact hope_q4 hopeq4_tr age gender exam c_19 c_12 if session2!=., re cl(id) robust

xtreg rc_d tr session2 interact hope_q1 hopeq1_tr age gender exam c_19 c_12 if session2!=., re cl(id) robust
xtreg rc_d tr session2 interact hope_q2 hopeq2_tr age gender exam c_19 c_12  if session2!=., re cl(id) robust
xtreg rc_d tr session2 interact hope_q3 hopeq3_tr age gender exam c_19 c_12 if session2!=., re cl(id) robust
xtreg rc_d tr session2 interact hope_q4 hopeq4_tr age gender exam c_19 c_12 if session2!=., re cl(id) robust

xtreg aser_m_d tr session2 interact hope_q1 hopeq1_tr age gender exam c_19 c_12 if session2!=., re cl(id) robust
xtreg aser_m_d tr session2 interact hope_q2 hopeq2_tr age gender exam c_19 c_12  if session2!=., re cl(id) robust
xtreg aser_m_d tr session2 interact hope_q3 hopeq3_tr age gender exam c_19 c_12 if session2!=., re cl(id) robust
xtreg aser_m_d tr session2 interact hope_q4 hopeq4_tr age gender exam c_19 c_12 if session2!=., re cl(id) robust

xtreg aser_e_d tr session2 interact hope_q1 hopeq1_tr age gender exam c_19 c_12 if session2!=., re cl(id) robust
xtreg aser_e_d tr session2 interact hope_q2 hopeq2_tr age gender exam c_19 c_12  if session2!=., re cl(id) robust
xtreg aser_e_d tr session2 interact hope_q3 hopeq3_tr age gender exam c_19 c_12 if session2!=., re cl(id) robust
xtreg aser_e_d tr session2 interact hope_q4 hopeq4_tr age gender exam c_19 c_12 if session2!=., re cl(id) robust


///// Robustness checks

//// A. FWER-adjusted p-values for family-wise error rate adjustment for self-portrait analysis
** I follow the commands suggested by Prof. M.L. Anderson at UC Berkeley 
** The instructions and explanations with the commands were also kindly offered by Prof. Anderson. I am thankful to him for this support.  
preserve

rename tr treated

local counter = 1
gen str20 varname = ""
gen float tstat = .
gen float act_pval = .
gen float tstatsim = .
gen float pvalsim = .
gen float pvals = .

// I run the original regressions for all of the different outcomes tested  
// I store the actual (observed) p-vals/t-stats

foreach lhsvar in f1 k1 gls_index  {
    quietly xtreg `lhsvar' interact session2 treated if v_1 !=., re cl(id)
    quietly replace tstat = abs( _b[interact] / _se[interact] ) in `counter'
    quietly replace act_pval = 2 * ttail( e(N), abs( tstat ) ) in `counter'
    quietly replace varname = "`lhsvar'" in `counter'
    local `lhsvar'_ct_0 = 0
    local counter = `counter' + 1
}

// Sort the p-vals by the actual (observed) p-vals 
// It does not matter but this step reorders some of the observations in the dataset
//gsort id session act_pval
gsort act_pval 
/// It is also permissible to use "gsort act_pval id session2"

local endvar = `counter' - 1

// I create a variable that stores the simulated (placebo) treatments

gen byte simtreatment = .
gen float simtreatment_uni = .
local count = 1

// Run 10,000 iterations of the simulation; and 
// Record results in p-val storage counters
while `count' <= 10000 {
// In this section I assign the placebo treatments and run regressions using the placebo treatments
	quietly replace simtreatment_uni = uniform()
	quietly replace simtreatment = ( simtreatment_uni > 0.5 )
	quietly replace tstatsim = .
	quietly replace pvalsim = .
	foreach lhsvar of numlist 1/`endvar' {
	    local depvar = varname[`lhsvar']
        quietly reg `depvar' simtreatment
    	quietly replace tstatsim = abs( _b[simtreatment] / _se[simtreatment] ) in `lhsvar'
        quietly replace pvalsim = 2 * ttail( e(N), abs( tstatsim ) ) in `lhsvar'
	}
// In this section I perform the "step down" procedure 
// It replaces simulated p-vals with the minimum of the set of simulated p-vals associated with outcomes that had actual p-vals greater than or equal to the one being replaced.  
// For each outcome, I keep count of how many times the ultimate simulated p-val is less than the actual observed p-val.
    local countdown = `endvar'
    while `countdown' >= 1 {
        quietly replace pvalsim = min( pvalsim, pvalsim[_n+1] ) in `countdown'
        local depvar = varname[`countdown']
        if pvalsim[`countdown'] <= act_pval[`countdown'] {
            local `depvar'_ct_0 = ``depvar'_ct_0' + 1
        }
        local countdown = `countdown' - 1
	}
    local count = `count' + 1
}

// I perform the final adjustment that ensures that the ordering to adjusted p-vals is the same as the original ordering of actual p-vals.
// Note: this code enforces monotonicity below by going from the smallest (most significant) to the largest (least significant) p-value, but the original 2008 JASA paper describes the algorithm on p. 1486 as going from the largest p-value to the smallest p-value. The code is correct; the paper has a typo.

foreach lhsvar of numlist 1/`endvar' {
    local depvar = varname[`lhsvar']
    quietly replace pvals = max( round( ``depvar'_ct_0' / 10000, 0.001 ), pvals[`lhsvar'-1] ) in `lhsvar'
}

restore


//// B. Attrition balance
/// attrition balance test
** Since the data collection on a given day was a continuous class-room exercise format, attrition on any given variable of interest is representative of other variables too.
gen attrition_sc = 1 if sc==. 
bys tr session: egen att_sc1 = sum(attrition_sc)
gen att_sc2 = att_sc1/226
ttest att_sc2 if session == 1, by(tr)


///// C. Winsorized sample 


/// C.1 For effort and achievement
** Winsorising at 5% and 95% to ensure that outliers are not driving the results
preserve
winsor2 sc_d, suffix(_w) cuts (5 95)

winsor2 rc_d, suffix(_w) cuts (5 95)

winsor2 aser_e_d, suffix(_w) cuts (5 95)

winsor2 aser_m_d, suffix(_w) cuts (5 95)

diff sc_d_w, tr(tr) p(session2)
diff rc_d_w, tr(tr) p(session2)
diff aser_m_d_w, tr(tr) p(session2)
diff aser_e_d_w , t(tr) p(session2) 

eststo clear
eststo: xtreg sc_d_w interact session2 tr if session2!=., re cl(id) robust
eststo: xtreg sc_d_w interact session2 tr age gender c_19 c_12 c_7_1 past_record if session2!=., re cl(id) robust
eststo: xtreg sc_d_w interact1 ses tr age gender c_19 c_12 c_7_1 past_record  if ses!=., re cl(id) robust
eststo: xtreg sc_d_w interact2 ses2 tr age gender c_19 c_12 c_7_1 past_record  if ses2!=., re cl(id) robust

eststo: xtreg rc_d_w interact session2 tr if session2!=., re cl(id) robust
eststo: xtreg rc_d_w interact session2 tr age gender c_19 c_12 c_7_1 past_record if session2!=., re cl(id) robust
eststo: xtreg rc_d_w interact1 ses tr age gender c_19 c_12 c_7_1 past_record  if ses!=., re cl(id) robust
eststo: xtreg rc_d_w interact2 ses2 tr age gender c_19 c_12 c_7_1 past_record  if ses2!=., re cl(id) robust
esttab using table19.rtf, starlevels(* 0.10 ** 0.05  *** 0.01) se label

eststo clear
eststo: xtreg aser_m_d_w interact session2 tr if session2 !=., re cl(id) robust
eststo: xtreg aser_m_d_w interact session2 tr age gender c_19 c_12 c_7_1 past_record if session2!=., re cl(id) robust
eststo: xtreg aser_m_d_w interact1 ses tr age gender c_19 c_12 c_7_1 past_record  if ses!=., re cl(id) robust
eststo: xtreg aser_m_d_w interact2 ses2 tr age gender c_19 c_12 c_7_1 past_record  if ses2!=., re cl(id) robust

eststo: xtreg aser_e_d_w interact session2 tr if session2 !=., re cl(id) robust
eststo: xtreg aser_e_d_w interact session2 tr age gender c_19 c_12 c_7_1 past_record if session2!=., re cl(id) robust
eststo: xtreg aser_e_d_w interact1 ses tr age gender c_19 c_12 c_7_1 past_record  if ses!=., re cl(id) robust
eststo: xtreg aser_e_d_w interact2 ses2 tr age gender c_19 c_12 c_7_1 past_record  if ses2!=., re cl(id) robust
esttab using table9.rtf, starlevels(* 0.10 ** 0.05  *** 0.01) se label

restore

** Winsorising at 1% and 99% to ensure that outliers are not driving the results


preserve
winsor2 sc_d, suffix(_w) cuts (1 99)

winsor2 rc_d, suffix(_w) cuts (1 99)

winsor2 aser_e_d, suffix(_w) cuts (1 99)

winsor2 aser_m_d, suffix(_w) cuts (1 99)

diff sc_d_w, tr(tr) p(session2)
diff rc_d_w, tr(tr) p(session2)
diff aser_m_d_w, tr(tr) p(session2)
diff aser_e_d_w , t(tr) p(session2) 

eststo clear
eststo: xtreg sc_d_w interact session2 tr if session2!=., re cl(id) robust
eststo: xtreg sc_d_w interact session2 tr age gender c_19 c_12 c_7_1 past_record if session2!=., re cl(id) robust
eststo: xtreg sc_d_w interact1 ses tr age gender c_19 c_12 c_7_1 past_record  if ses!=., re cl(id) robust
eststo: xtreg sc_d_w interact2 ses2 tr age gender c_19 c_12 c_7_1 past_record  if ses2!=., re cl(id) robust

eststo: xtreg rc_d_w interact session2 tr if session2!=., re cl(id) robust
eststo: xtreg rc_d_w interact session2 tr age gender c_19 c_12 c_7_1 past_record if session2!=., re cl(id) robust
eststo: xtreg rc_d_w interact1 ses tr age gender c_19 c_12 c_7_1 past_record  if ses!=., re cl(id) robust
eststo: xtreg rc_d_w interact2 ses2 tr age gender c_19 c_12 c_7_1 past_record  if ses2!=., re cl(id) robust
esttab using table21.rtf, starlevels(* 0.10 ** 0.05  *** 0.01) se label

eststo clear
eststo: xtreg aser_m_d_w interact session2 tr if session2 !=., re cl(id) robust
eststo: xtreg aser_m_d_w interact session2 tr age gender c_19 c_12 c_7_1 past_record if session2!=., re cl(id) robust
eststo: xtreg aser_m_d_w interact1 ses tr age gender c_19 c_12 c_7_1 past_record  if ses!=., re cl(id) robust
eststo: xtreg aser_m_d_w interact2 ses2 tr age gender c_19 c_12 c_7_1 past_record  if ses2!=., re cl(id) robust

eststo: xtreg aser_e_d_w interact session2 tr if session2 !=., re cl(id) robust
eststo: xtreg aser_e_d_w interact session2 tr age gender c_19 c_12 c_7_1 past_record if session2!=., re cl(id) robust
eststo: xtreg aser_e_d_w interact1 ses tr age gender c_19 c_12 c_7_1 past_record  if ses!=., re cl(id) robust
eststo: xtreg aser_e_d_w interact2 ses2 tr age gender c_19 c_12 c_7_1 past_record  if ses2!=., re cl(id) robust
esttab using table10.rtf, starlevels(* 0.10 ** 0.05  *** 0.01) se label

restore

/// C.2 For psychological outcomes


** Winsorising at 5% and 95% to ensure that outliers are not driving the results
preserve

winsor2 f3, suffix(_w) cuts (5 95)
winsor2 f1, suffix(_w) cuts (5 95)
winsor2 chs_d, suffix(_w) cuts (5 95)

diff f3_w, tr(tr) p(session2)
diff f1_w, tr(tr) p(session2)
diff chs_d_w, t(tr) p(session2)


eststo clear
eststo: xtreg f3_w interact session2 tr if session2!=., re cl(id) robust
eststo: xtreg f3_w interact session2 tr age gender c_19 c_12 c_7_1 past_record if session2!=., re cl(id) robust
eststo: xtreg f3_w interact1 ses tr age gender c_19 c_12 c_7_1 past_record  if ses!=., re cl(id) robust
eststo: xtreg f3_w interact2 ses2 tr age gender c_19 c_12 c_7_1 past_record  if ses2!=., re cl(id) robust

eststo: xtreg f1_w interact session2 tr if session2!=., re cl(id) robust
eststo: xtreg f1_w interact session2 tr age gender c_19 c_12 c_7_1 past_record if session2!=., re cl(id) robust
eststo: xtreg f1_w interact1 ses tr age gender c_19 c_12 c_7_1 past_record  if ses!=., re cl(id) robust
eststo: xtreg f1_w interact2 ses2 tr age gender c_19 c_12 c_7_1 past_record  if ses2!=., re cl(id) robust

eststo: xtreg chs_d_w interact session2 tr if session2!=., re cl(id) robust
eststo: xtreg chs_d_w interact session2 tr age gender c_19 c_12 c_7_1 past_record if session2!=., re cl(id) robust
eststo: xtreg chs_d_w interact1 ses tr age gender c_19 c_12 c_7_1 past_record  if ses!=., re cl(id) robust
eststo: xtreg chs_d_w interact2 ses2 tr age gender c_19 c_12 c_7_1 past_record  if ses2!=., re cl(id) robust

esttab using table11.rtf, starlevels(* 0.10 ** 0.05  *** 0.01) se label
restore


** Winsorising at 1% and 99% to ensure that outliers are not driving the results
preserve

winsor2 f3, suffix(_w) cuts (1 99)
winsor2 f1, suffix(_w) cuts (1 99)
winsor2 chs, suffix(_w) cuts (1 99)

diff f3_w, tr(tr) p(session2)
diff f1_w, tr(tr) p(session2)
diff chs_d_w, tr(tr) p(session2)

eststo clear
eststo: xtreg f3_w interact session2 tr if session2!=., re cl(id) robust
eststo: xtreg f3_w interact session2 tr age gender c_19 c_12 c_7_1 past_record if session2!=., re cl(id) robust
eststo: xtreg f3_w interact1 ses tr age gender c_19 c_12 c_7_1 past_record  if ses!=., re cl(id) robust
eststo: xtreg f3_w interact2 ses2 tr age gender c_19 c_12 c_7_1 past_record  if ses2!=., re cl(id) robust

eststo: xtreg f1_w interact session2 tr if session2!=., re cl(id) robust
eststo: xtreg f1_w interact session2 tr age gender c_19 c_12 c_7_1 past_record if session2!=., re cl(id) robust
eststo: xtreg f1_w interact1 ses tr age gender c_19 c_12 c_7_1 past_record  if ses!=., re cl(id) robust
eststo: xtreg f1_w interact2 ses2 tr age gender c_19 c_12 c_7_1 past_record  if ses2!=., re cl(id) robust

eststo: xtreg chs_d_w interact session2 tr if session2!=., re cl(id) robust
eststo: xtreg chs_d_w interact session2 tr age gender c_19 c_12 c_7_1 past_record if session2!=., re cl(id) robust
eststo: xtreg chs_d_w interact1 ses tr age gender c_19 c_12 c_7_1 past_record  if ses!=., re cl(id) robust
eststo: xtreg chs_d_w interact2 ses2 tr age gender c_19 c_12 c_7_1 past_record  if ses2!=., re cl(id) robust

esttab using table12.rtf, starlevels(* 0.10 ** 0.05  *** 0.01) se label


restore


//// D. Attendance data (Worked upon in a separate file)
*** Schools offered this administrative data at a later date (April-July 2020)
*** This was not recorded in the PAP 
** Attendance data is availalble from July 2019 to February 2020
**** Open the relevant file
use "/Users/apple/Documents/PhD Economics/Year 3/Main experiment/Data/Stata files/AttendanceTrialAncova.dta"

/// Data was cleaned and useful variables for the analysis were created 
** The main outcome variable is a fraction of number of days that a student attended the school in a given month and the number of days that the school was open in that month

reg sep_f aug_f tr age gender if m1 == 1
reg oct_f aug_f tr age gender if m1 == 1
reg nov_f aug_f tr age gender if m1 == 1
reg dec_f aug_f tr age gender if m1 == 1
reg jan_f aug_f tr age gender if m1 == 1
reg feb_f aug_f tr age gender if m1 == 1
gen ses = 1 if m7 == 1
replace ses = 0 if m2 == 1
gen interact = ses*tr
ttest aug_f if m1==1, by(tr)

// I perform a DiD analysis between the month of August 2019 (pre-treatment) and January 2020 (post-treatment)
xtset id ses
egen attend = rsum (august january)
diff attend if ses!=., t(tr) p(ses)
xtreg attend ses tr interact gender age if ses!=. 
xtreg attend ses tr interact gender age if ses!=. , cl(id) robust
gen ses1 = 1 if m2 == 1
replace ses1 = 2 if m3 == 1
replace ses1 = 3 if m4 == 1
replace ses1 = 4 if m5 == 1
replace ses1 = 5 if m6 == 1
replace ses1 = 6 if m7 == 1
replace ses1 = . if m1 == 1
replace ses1 = .  if m8 == 1
gen attending1 = attending*100
replace ses_j = 1 if m7 == 1
egen attending2 = rsum(july august sept october novem decem january)
replace ses1 = 0 if m1== 1

// For overall effect on student attendance I perform ancova
reg jan1 august sept october novem decem  m2 m3 m4 m5 tr if ses1!= . & ses1!=0 & ses1!= 6


//// Data on teachers' was collected in baseline (including their hope scores)
//// Administrative data on teachers' subjects was received in April 2021
** Using this data, information on English and Math teachers is segregated
// Some steps in cleaning
ed id school tr t1_h t2_h t3_h t4_h t5_h t6_h t7_h t8_h t9_h t10_h if session == 1
gen sub_codetry = 1
order sub_codetry, after (t1_h)
replace sub_codetry = 2 if school == 6 & t1_h == 36
replace sub_codetry = 6 if school == 4 & t1_h == 59
replace sub_codetry = 1 if school == 1 & t1_h == 55

gen t7_sub = 1
order t7_sub, after(t7_h)
ed school  t1_h t1_sub t2_h t2_sub t3_h t3_sub t4_h t4_sub t5_h t5_sub t6_h t6_sub t7_h t7_sub t8_h t9_h t10_h if session == 1
 replace t7_sub = 0 if t7_h == .
replace t7_sub = 2 if school == 1 & t7_h == 59
replace t7_sub = 5 if school == 2 & t7_h == 54
replace t7_sub = 2 if school == 5 & t7_h == 52
gen t8_sub = 1
order t8_sub, after(t8_h)
gen t9_sub = 1
order t9_sub, after(t9_h)
replace t8_sub = 0 if t8_h == .
replace t9_sub = 0 if t9_h == .
ed school  t1_h t1_sub t2_h t2_sub t3_h t3_sub t4_h t4_sub t5_h t5_sub t6_h t6_sub t7_h t7_sub t8_h t8_sub t9_h t9_sub t10_h if session == 1
replace t8_sub = 5 if school == 1 & t8_h == 48
replace t9_sub = 7 if school == 2 & t9_h == 60
gen t10_sub = 1
replace t10_sub = 0 if t10_h == .
 order t10_sub, after(t10_h)
replace t10_sub = 3 if school == 2 & t10_h == 56
replace t8_sub = 2 if school == 5 & t8_h == 60
replace t9_sub = 4 if school == 5 & t9_h == 55
////These steps above are followed for all the 10 subject codes created per teach t1_sub to t10_sub 
///// Createing a dummy for English teacher's average hope scores
gen  t1_sub1 =  t1_sub
order t1_sub1, after(t1_sub)
recode t1_sub1 (1=1) (2=0) (3=0) (4=0) (5=0) (6=0) (7=0)
gen  t2_sub1 =  t2_sub
order t2_sub1, after(t2_sub)
recode t2_sub1 (1=1) (2=0) (3=0) (4=0) (5=0) (6=0) (7=0)
gen  t3_sub1 =  t3_sub
order t3_sub1, after(t3_sub)
recode t3_sub1 (1=1) (2=0) (3=0) (4=0) (5=0) (6=0) (7=0)
gen  t4_sub1 =  t4_sub
order t4_sub1, after(t4_sub)
recode t4_sub1 (1=1) (2=0) (3=0) (4=0) (5=0) (6=0) (7=0)
gen  t5_sub1 =  t5_sub
order t5_sub1, after(t5_sub)
recode t5_sub1 (1=1) (2=0) (3=0) (4=0) (5=0) (6=0) (7=0) (0=0)
gen  t6_sub1 =  t6_sub
order t6_sub1, after(t6_sub)
recode t6_sub1 (1=1) (2=0) (3=0) (4=0) (5=0) (6=0) (7=0) (0=0)
gen  t7_sub1 =  t7_sub
order t7_sub1, after(t7_sub)
recode t7_sub1 (1=1) (2=0) (3=0) (4=0) (5=0) (6=0) (7=0) (0=0)
gen  t8_sub1 =  t8_sub
order t8_sub1, after(t8_sub)
recode t8_sub1 (1=1) (2=0) (3=0) (4=0) (5=0) (6=0) (7=0) (0=0)
gen  t9_sub1 =  t9_sub
order t9_sub1, after(t9_sub)
recode t9_sub1 (1=1) (2=0) (3=0) (4=0) (5=0) (6=0) (7=0) (0=0)
gen  t10_sub1 =  t10_sub
order t10_sub1, after(t10_sub)
recode t10_sub1 (1=1) (2=0) (3=0) (4=0) (5=0) (6=0) (7=0) (0=0)

egen eng_sub = rsum(t1_sub1 t2_sub1 t3_sub1 t4_sub1 t5_sub1 t6_sub1 t7_sub1 t8_sub1 t9_sub1 t10_sub1)

///// Createing a dummy for Mathematics teacher's average hope scores
gen  t1_sub2 =  t1_sub
order t1_sub2, after(t1_sub1)
recode t1_sub2 (1=0) (2=1) (3=0) (4=0) (5=0) (6=0) (7=0)
gen  t2_sub2 =  t2_sub
order t2_sub2, after(t2_sub1)
recode t2_sub2 (1=0) (2=1) (3=0) (4=0) (5=0) (6=0) (7=0)
gen  t3_sub2 =  t3_sub
order t3_sub2, after(t3_sub1)
recode t3_sub2 (1=0) (2=1) (3=0) (4=0) (5=0) (6=0) (7=0)
gen  t4_sub2 =  t4_sub
order t4_sub2, after(t4_sub1)
recode t4_sub2 (1=0) (2=1) (3=0) (4=0) (5=0) (6=0) (7=0)
gen  t5_sub2 =  t5_sub
order t5_sub2, after(t5_sub1)
recode t5_sub2 (1=0) (2=1) (3=0) (4=0) (5=0) (6=0) (7=0) (0=0)
gen  t6_sub2 =  t6_sub
order t6_sub2, after(t6_sub1)
recode t6_sub2 (1=0) (2=1) (3=0) (4=0) (5=0) (6=0) (7=0) (0=0)
gen  t7_sub2 =  t7_sub
order t7_sub2, after(t7_sub1)
recode t7_sub2 (1=0) (2=1) (3=0) (4=0) (5=0) (6=0) (7=0) (0=0)
gen  t8_sub2 =  t8_sub
order t8_sub2, after(t8_sub1)
recode t8_sub2 (1=0) (2=1) (3=0) (4=0) (5=0) (6=0) (7=0) (0=0)
gen  t9_sub2 =  t9_sub
order t9_sub2, after(t9_sub1)
recode t9_sub2 (1=0) (2=1) (3=0) (4=0) (5=0) (6=0) (7=0) (0=0)
gen  t10_sub2 =  t10_sub
order t10_sub2, after(t10_sub1)
recode t10_sub2 (1=0) (2=1) (3=0) (4=0) (5=0) (6=0) (7=0) (0=0)

egen math_sub = rsum(t1_sub2 t2_sub2 t3_sub2 t4_sub2 t5_sub2 t6_sub2 t7_sub2 t8_sub2 t9_sub2 t10_sub2)


///// creating average hope per english teacher for each student
gen e1 = t1_sub1*t1_h 
gen e2 = t2_sub1*t2_h 
gen e3 = t3_sub1*t3_h 
gen e4 = t4_sub1*t4_h 
gen e5 = t5_sub1*t5_h 
gen e6 = t6_sub1*t6_h 
gen e7 = t7_sub1*t7_h 
gen e8 = t8_sub1*t8_h 
gen e9 = t9_sub1*t9_h 
gen e10= t10_sub1*t10_h 

egen eng_t_h = rsum(e1 e2 e3 e4 e5 e6 e7 e8 e9 e10)
gen eng_t_hope = eng_t_h/eng_sub 


///// creating avergae hope per math teacher for each student
gen math1 = t1_sub2*t1_h 
gen math2 = t2_sub2*t2_h 
gen math3 = t3_sub2*t3_h 
gen math4 = t4_sub2*t4_h 
gen math5 = t5_sub2*t5_h 
gen math6 = t6_sub2*t6_h 
gen math7 = t7_sub2*t7_h 
gen math8 = t8_sub2*t8_h 
gen math9 = t9_sub2*t9_h 
gen math10= t10_sub2*t10_h 

egen math_t_h = rsum(math1 math2 math3 math4 math5 math6 math7 math8 math9 math10)
gen math_t_hope = math_t_h/math_sub


summ eng_t_hope math_t_hope if session == 1
tabstat  eng_t_hope math_t_hope if session == 1, statistics( count mean sd min max median ) by(tr) columns(statistics)

///// Just by these two summaries it is clear that the english teacher hopes are higher than that of mathematicss. 
///// Also across the two treatment groups the hope scores are balanced for both subjects.
//// I do not find any heterogeneity across above median hope score teachers (in English)
//// I do not find any heterogeneity across above median hope score teachers (in Mathematics)

// gen eng_abovemedian = 1 if eng_t_hope>56
// replace eng_abovemedian = 0 if eng_t_hope<= 56
// gen eng_interact = eng_abovemedian*tr 

// gen math_abovemedian = 1 if math_t_hope>56
// replace math_abovemedian = 0 if math_t_hope<= 56
// gen math_interact = math_abovemedian*tr 


// gen eng_abovemedian = 1 if eng_t_hope>56
// replace eng_abovemedian = 0 if eng_t_hope<= 56
// gen eng_interact = eng_abovemedian*tr 

// gen math_abovemedian = 1 if math_t_hope>56
// replace math_abovemedian = 0 if math_t_hope<= 56
// gen math_interact = math_abovemedian*tr 


**** Notes:
// This file is comprehensive and indicative, but not exhaustive of all the commands used in the cleaning and analysis process.
// The file was saved on 21 June 2021.
// Data is anonymous (following CoSS research ethics guidelines).
// Table and figure numbers in the thesis do not correspond to those in the file.  
// For any questions, please contact the author at: p.bhan.1@research.gla.ac.uk.


*** End of do-file ***