cd "C:\Users\bailey\EPICODE"
import excel "C:\Users\bailey\EPICODE\examplesurvey.xlsx", sheet("<sheetname>") firstrow
import excel "<location\dataset.xlsx>", sheet("<sheetname>") firstrow
import excel "<location\dataset1.xlsx>", sheet("<sheetname>") firstrow
save "<location\dataset1.dta>", replace
import excel "<location\dataset2.xlsx>", sheet("<sheetname>") firstrow
save "<location\dataset2.dta>", replace
import delimited C:\Users\bailey\EPICODE\examplesurvey.csv
cd "C:\Users\bailey\EPICODE"
import delimited dataset1.csv
save "dataset1.dta", replace
cd "C:\Users\bailey\EPICODE\data"
import delimited dataset2.csv
save "dataset2.dta", replace
PROC EXPORT DATA= WORK.EXAMPLESURVEY
OUTFILE= "C:\Users\bailey\EPICODE\examplesurvey2.dta"
DBMS=STATA REPLACE;
RUN;
libname data "C:\Users\bailey\EPICODE\data"
libname files "C:\Users\bailey\EPICODE\output"
PROC IMPORT OUT= WORK.examplesurvey
DATAFILE= "C:\Users\bailey\EPICODE\examplesurvey.xlsx"
DBMS=XLSX REPLACE;
SHEET="auto";
GETNAME=YES;
RUN;
DATA LIB.examplesurvey;
SET WORK.examplesurvey;
RUN;
PROC IMPORT OUT= WORK.examplesurvey
DATAFILE= "C:\Users\bailey\EPICODE\examplesurvey.csv"
DBMS=CSV REPLACE;
GETNAMES=YES;
DATAROW=2;
RUN;
DATA LIB.examplesurvey;
SET WORK.examplesurvey;
RUN;
PROC IMPORT OUT= WORK.EXAMPLESURVEY2
DATAFILE= "C:\Users\baile\Dropbox\EPICODE\survey\examplesurvey.dta"
DBMS=STATA REPLACE;
RUN;
DATA LIB.examplesurvey;
SET WORK.examplesurvey;
RUN;
setwd("C:/Users/bailey/EPICODE")
examplesurvey < readRDS("examplesurvey.rds")
saveRDS(examplesurvey, file="examplesurvey_final.rds")
library(readxl)
example_survey_1 < read_excel("C:/Users/bailey/EPICODE/examplesurvey1.xlsx")
example.survey < read.csv("C:/Users/bailey/EPICODE/examplesurvey.csv")
install.packages('haven')
library(haven)
examplesurvey < read_sas("examplesurvey.sas7bdat")
install.packages('haven')]]>
library(haven)
examplesurvey < read_dta("C:/Users/bailey/EPICODE/examplesurvey.dta")
reshape long stub, i(id) j(time)
reshape long status, i(id) j(year)
xtset id
xttrans status
xttrans status, freq
xttrans status, by(sex)
*Analysis question:
*Estimate the association of diabetes with hypertension stage
*Adjusting for sex and age
*SYNTAX: mlogit <outcome> <covariates>, base(#)
*the option base(#) allows you tell Stata what level of the outcome variable
*should be the reference  for us we'll make it htn = 0 (normal)
set more off
set line 200
*Run the model
mlogit htn diabetes female age, base(0)
*Get exponentiated results;
mlogit htn diabetes female age, base(0)
mlogit,rrr
*Is the effect of diabetes on Elevated BP vs normal BP similar for Stage 2 HTN vs normal BP?
test [1]diabetes = [3]diabetes
Bailey DeBarmore is a doctoral student at the University of North Carolina at Chapel Hill studying epidemiology. Find her on Twitter @BaileyDeBarmore and blogging for the American Heart Association on the Early Career Voice blog. 
***********************************************
* Calculating SMR weights where exposure = 0, 1
**************************************(********;
&let data=<data>;
&let y=<outcome>;
&let x=<exposure>;
&let id=<id>;
*Estimate the predicted probability given covariates;
proc logistic data=&data desc;
model &x=<covariates>;
output out=pred p=p1;
run;
*Generate the weights by exposure status, for exposed group = target their weight will be 1;
data <newdata>;
set pred;
p0 = 1p1;
odds = p1/p0;
if &x=1 then wt=1;
else wt=odds;
run;
*Final weighted analysis;
proc logistic data=<newdata> desc;
weight wt;
model &y = &x;
run;
***********************************************
* Calculating SMR weights where exposure = categorical
**************************************(********;
&let data=<data>;
&let y=<outcome>;
&let x=<exposure>;
&let id=<id>;
*Estimate the predicted probability given covariates;
proc logistic data=&data desc;
model &x=<covariates> /LINK= glogit;
output out=pred p=p1;
run;
*Generate the weights by exposure status, for exposed group = target their weight will be 1;
data <newdata>;
set pred;
p0 = 1p1;
odds = p1/p0;
if &x=1 then wt=1;
else wt=odds;
run;
*Final weighted analysis;
proc logistic data=<newdata> desc;
weight wt;
model &y = &x;
run;
******************************************
* Calculating SMR weights
*****************************************;
* Syntax for teffects statement
*teffects ipw (<outcome>) (<exposure> <covariates>), atet
*where <outcome> is your outcome variable, <exposure> is your exposure variable, and <covariates> is a list of your covariates to generate your weights.
*Example: Binary
*Outcome = lowbirthwt
*Exposure = maternalsmoke
*Covariates = maternalage nonwhite
*Use the teffects statement to generate your weights and then apply them in a logistic (default) model all in 1 step
teffects ipw (lowbirthwt) (maternalsmoke maternalage nonwhite), atet
*If your outcome is continuous, you can specify a probit model
*Example: Continuous
*Outcome = birthwt
*Exposure = maternalsmoke
*Covariates = maternalage nonwhite
teffects ipw (birthwt) (maternalsmoke maternalage nonwhite, probit), atet
Unstabilized Create a pseudopopulation 2x the size of our observed  one where everyone is exposed and one where everyone is unexposed.  Stabilized Create a pseudopopulation maintaining the original population size, but we adjust the covariate distribution within each strata of exposure group by upweighting and downweighting people to match the overall covariate distribution. 
******************************************
* Calculating IPTW
*****************************************;
&let data=<data>;
&let y=<outcome>;
&let x=<exposure>;
&let id=<id>;
*Estimate denominator  output a dataset with results of regression called denom, with the resulting probabilities stored in variable d;
proc logistic data=&data desc;
model &x = <covariates>;
output out=denom p=d;
run;
*Generate numerator for stabilized weights  output a dataset with results of regression called num, with the resulting probabilities stored in variable n  note that there is nothing on the right side of the equation because the numerator will simply be P(A=a), where a = observed exposure status;
proc logistic data=&data desc;
model &x=;
output out=num p=n;
run;
*Generate stabilized and unstabilized weights by merging the datasets with regression output (merge on the unique identifier in your dataset, &id);
data <newdata>;
merge &data denom num;
by &id;
if &x=1 then do;
uw = 1/d;
sw = n/d;
end;
*Remember we can use 1  P(exposed) for the unexposed weight components;
else if &x=0 then do;
uw=1/(1d);
sw=(1n)/(1d);
end;
run;
*Check the distribution of your IPTW  the mean should be 1. Is the sum for uw twice the sum of sw? why? is the range of uw greater than sw? why?;
proc means data=<newdata> mean sum min max;
var uw sw;
run;
*You can check to see if your exposure and covariates are associated in your new pseudopopulation (<newdata>);
proc logistic data=<newdata> desc;
weight sw;
model &x=<covariates>;
run;
*Now you can run your main analyses and apply the weights using the weight statement  use sw variable for stabilized weights, and use uw for unstabilized weights  you can use proc genmod, glm, logistic, etc. I'll show you below with logistic you can see now we're using &y and &x  and we don't need the covariates because the confounder > x arrow is encompassed in the sw weight statement;
proc logistic data=<newdata> desc;
weight sw;
model &y = &x;
run;
******************************************
* Calculating IPTW
*****************************************;
* Syntax for teffects statement
*teffects ipw (<outcome>) (<exposure> <covariates>), ate
*where <outcome> is your outcome variable, <exposure> is your exposure variable, and <covariates> is a list of your covariates to generate your weights.
*Example: Binary
*Outcome = lowbirthwt
*Exposure = maternalsmoke
*Covariates = maternalage nonwhite
*Use the teffects statement to generate your weights and then apply them in a logistic (default) model all in 1 step
teffects ipw (lowbirthwt) (maternalsmoke maternalage nonwhite), ate
*If your outcome is continuous, you can specify a probit model
*Example: Continuous
*Outcome = birthwt
*Exposure = maternalsmoke
*Covariates = maternalage nonwhite
teffects ipw (birthwt) (maternalsmoke maternalage nonwhite, probit), ate
* Syntax to manually create IPTW for binary exposure (treatment)
logistic treatment vars
predict p
gen iptw = 1/p if treatment==1
replace iptw=1/(1p) if treatment==0
*To calculate stabilized IPTW, run tab treatment and use the proportions for X below
tab treatment
gen siptw = X/p if treatment==1
replace siptw = (1X)/(1p) if treatment==0
PROC LOGISTIC DATA=...;
MODEL treatment = vars / LINK=glogit;
OUTPUT OUT=denom_ipw P=d;
RUN;
mlogit treatment vars
predict p0, outcome(0)
predict p1, outcome(1)
predict p2, outcome(2)
predict p3, outcome(3)
gen iptw=.
replace iptw=1/p0 if treatment==0
replace iptw=1/p1 if treatment==1
replace iptw=1/p2 if treatment==2
replace iptw=1/p3 if treatment==3
gen siptw=.
replace siptw=0.6/p0 if treatment==0
replace siptw=0.14/p1 if treatment==1
replace siptw=0.2/p2 if treatment==2
replace siptw=0.06/p3 if treatment==3
Calculating SMR and IPW  EPICODE  SAS.txt 
Calculating SMR and IPW  EPICODE  Stata 
Coding IPW and SMR in SAS and Stata  PDF for teachers 
Bailey DeBarmore is a doctoral student at the University of North Carolina at Chapel Hill studying epidemiology. Find her on Twitter @BaileyDeBarmore and blogging for the American Heart Association on the Early Career Voice blog. 
Bailey DeBarmore is a doctoral student at the University of North Carolina at Chapel Hill studying epidemiology. Find her on Twitter@BaileyDeBarmore and blogging for the American Heart Association on the Early Career Voice blog. 
&let data=<dataset>;
&let outcome=<outcome>;
&let var=<var>;
proc genmod data=&data descending;
model &outcome = &var / link=logit dist=binomial type3;
estimate "&var" &var 1 / exp;
run;
proc genmod data=&data descending;
class &var (ref='1') / param=ref;
model &outcome = &var / link=logit dist=binomial type3;
estimate "OR 3039" &var 1 / exp;
estimate "OR 4049" &var 0 1 / exp;
estimate "OR 5059" &var 0 0 1 / exp;
estimate "OR 6069" &var 0 0 0 1 / exp;
estimate "OR 7079" &var 0 0 0 0 1 / exp;
estimate "OR 80+" &var 0 0 0 0 0 1 / exp;
run;
proc genmod data=out.cohort6 descending;
class &var (ref='2') / param=ref;
model &outcome = &var / link=logit dist=binomial;
estimate "OR Public" &var 1 / exp;
estimate "OR Other" &var 0 1 / exp;
run;
proc sort data=&data out=sort;
by &strata;
run;
proc genmod data=sort descending;
by &strata;
class &var (ref='1') / param=ref;
model &outcome = &var / link=logit dist=binomial type3;
estimate "OR q2" &var 1 / exp;
estimate "OR q3" &var 0 1/ exp;
estimate "OR q4" &var 0 0 1 / exp;
run;
Bailey DeBarmore is a doctoral student at the University of North Carolina at Chapel Hill studying epidemiology. Find her on Twitter@BaileyDeBarmore and blogging for the American Heart Association on the Early Career Voice blog. 
import zepid as ze
import matplotlib.pyplot as plt
df = ze.load_sample_data(timevary=False)
ze.graphics.func_form_plot(df,outcome='dead',var='age0',discrete=True)
plt.show()
Warning: missing observations of model variables are dropped
0 observations were dropped from the functional form assessment
Generalized Linear Model Regression Results
==============================================================================
Dep. Variable: dead No. Observations: 547
Model: GLM Df Residuals: 545
Model Family: Binomial Df Model: 1
Link Function: logit Scale: 1.0000
Method: IRLS LogLikelihood: 239.25
Date: Tue, 26 Jun 2018 Deviance: 478.51
Time: 08:25:47 Pearson chi2: 553.
No. Iterations: 5 Covariance Type: nonrobust
==============================================================================
coef std err z P>z [0.025 0.975]

Intercept 3.6271 0.537 6.760 0.000 4.679 2.575
age0 0.0507 0.013 4.012 0.000 0.026 0.075
==============================================================================
AIC: 482.50783872152573
BIC: 2957.4167585984537
df[['rqs0','rqs1']] = ze.spline(df,var='age0',n_knots=3,knots=[30,40,50],restricted=True)
ze.graphics.func_form_plot(df,outcome='dead',var='age0',f_form='age0 + rqs0 + rqs1',discrete=True)
plt.vlines(30,0,0.85,colors='gray',linestyles='')
plt.vlines(40,0,0.85,colors='gray',linestyles='')
plt.vlines(55,0,0.85,colors='gray',linestyles='')
plt.show()
#Loading necessary packages to fit model
import zepid as ze
import statsmodels.api as sm
import statsmodels.formula.api as smf
from statsmodels.genmod.families import family,links
#Loading the example data within zEpid
df = ze.load_sample_data(timevary=False)
#Creating polynomial terms
df['cd40sq'] = df['cd40']**2
df['cd40cu'] = df['cd40']**3
#Generating stabilized IPTW for ART as exposure
model = 'male + age0 + cd40 + cd40sq + cd40cu + dvl0'
df['iptw'] = ze.ipw.iptw(df,treatment='art',model_denominator=model,stabilized=True)
#Fitting a GEE model with the statsmodels library to obtain the risk of death by ART exposure (Risk Difference)
ind = sm.cov_struct.Independence()
f = sm.families.family.Binomial(sm.families.links.identity)
linrisk = smf.gee('dead ~ art',df['id'],df,cov_struct=ind,family=f,weights=df['iptw']).fit()
print(linrisk.summary())
GEE Regression Results
===================================================================================
Dep. Variable: dead No. Observations: 547
Model: GEE No. clusters: 547
Method: Generalized Min. cluster size: 1
Estimating Equations Max. cluster size: 1
Family: Binomial Mean cluster size: 1.0
Dependence structure: Independence Num. iterations: 2
Date: Tue, 26 Jun 2018 Scale: 1.000
Covariance type: robust Time: 13:56:22
==============================================================================
coef std err z P>z [0.025 0.975]

Intercept 0.1817 0.018 10.008 0.000 0.146 0.217
art 0.0826 0.037 2.205 0.027 0.156 0.009
==============================================================================
Skew: 1.7574 Kurtosis: 1.1278
Centered skew: 0.0000 Centered kurtosis: 3.0000
==============================================================================
pip install zepid
In the background, zEpid uses:
 If you are interested in conducting analyses in Python, I also recommend the packages:

Paul Zivich is an epidemiology PhD student at University of North Carolina at Chapel Hill. His interests include infectious disease epidemiology and causal inference in the presence of interference. To request features or ask questions, contact him on GitHub at /pzivich/zepid, on Twitter @zEpidpy, or by email. 
avg_bp=mean(bp1bp3)
avg_bp=average(bp1bp3)
avg_bp=mean(of bp1bp3)
avg_bp=SUM(of bp1bp3)/3
NEWVAR = mean(of VAR1VAR3)
NEWVAR = mean(VAR1, VAR2, VAR3)
Bailey DeBarmore is a doctoral student at the University of North Carolina at Chapel Hill studying epidemiology. Find her on Twitter @BaileyDeBarmore and blogging for the American Heart Association on the Early Career Voice blog. 
tab diabetes agegrp, col
nptrend diabetes, by(agegrp)
sort male
by male: tab diabetes agegrp, col
nptrend diabetes if male==0, by(agegrp)
nptrend diabetes if male==1, by (agegrp)
tabstat bmi, by(agegrp) stats(mean sd) format(%9.2f)
nptrend bmi, by(agegrp)
sort male
by male: tabstat bmi, by(agegrp) stats(mean sd) format(%9.2f)
nptrend bmi if male==0, by(agegrp)
nptrend bmi if male==1, by(agegrp)
anova bmi agegrp race
regress bmi i.agegrp race
contrast r.agegrp
contrast a.agegrp
contrast ar.agegrp
contrast p.agegrp, noeffects
logit diabetes i.agegrp race bmi
contrast p.agegrp, noeffects
logistic diabetes i.agegrp race bmi
contrast p.agegrp, noeffects
PROC FREQ data=[data];
TABLES row * col / trend;
run;
PROC FREQ data=stroke;
TABLES diabetes * agegrp / trend;
run;
PROC NPAR1WAY data=stroke WILCOXON;
CLASS agegrp;
VAR bmi;
*exact wilcoxon;
run;
PROC LOGISTIC data=[data];
MODEL diabetes = agegrp bmi race;
TEST agegrp;
run;
Bailey DeBarmore is a doctoral student at the University of North Carolina at Chapel Hill studying epidemiology. Find her on Twitter @BaileyDeBarmore and blogging for the American Heart Association on the Early Career Voice blog. 