#################################################################################################################################################################
# Purpose: Make forest plot using the EWAS results from multiple cohorts and their meta result (by rma). 
#          Note: CIs are calculated from the se values of the EWAS results and numbers of samples are used just for plot text
# Request: Stephanie London, Mi Kyeong, 11/18/2019, Forest plots using R. 
# Input: /ddn/gs1/group/london/CHARGE_Methylation/Smokers-methylation/other-cohorts/ARIC|GTP|Inchianti.....
#        /ddn/gs1/group/london/CHARGE_Methylation/Smokers-methylation/off-methylation-smoke-all-models123.txt 
# Output: PACE-Charge_forestplot.pdf 
# Ref: SampleCode_ForestPlot_forJP_MKL_11182019.R; modified_to_Sinjini_rma.r 
# WDir: /ddn/gs1/home/jinj2/LondonData/PACE/PACE-CHARGE_smoking/Check/Jianping/Charge_cohort_forestPlot/ 
# Script: forestPlot_cg05575921.r
# Dec. 27, 2019, Jianping Jin
#################################################################################################################################################################

library(data.table)
library(metafor)
library(forestplot)

## Read in data (p-value, coef, se) from cohorts
fhs <- data.frame(fread("/ddn/gs1/group/london/CHARGE_Methylation/Smokers-methylation/off-methylation-smoke-all-models123.txt"))
fhs_red <- fhs[,which(colnames(fhs)%in%c("V1","Fx_SmokerVsNeverSmokerModel2Fave","P_SmokerVsNeverSmokerModel2Fave","SD_SmokerVsNeverSmokerModel2Fave"))]
colnames(fhs_red) = c(".id","pvalue","coef","se")

aric <- data.frame(fread("/ddn/gs1/group/london/CHARGE_Methylation/Smokers-methylation/other-cohorts/ARIC_smoking_EWAS_model2-combined.csv"))
aric_red <- aric[,which(colnames(aric)%in%c("V1","Fx_SmokerVsNeverSmokerModel2Fave","P_SmokerVsNeverSmokerModel2Fave","SE_SmokerVsNeverSmokerModel2Fave"))]
colnames(aric_red) <- c(".id","coef","se","pvalue")

gtp <- data.frame(fread("/ddn/gs1/group/london/CHARGE_Methylation/Smokers-methylation/other-cohorts/GTP_smoking_model2-combined.csv"))
gtp_red <- gtp[,which(colnames(gtp)%in%c("V1","Fx_SmokerVsNeverSmokerModel2Fave","P_SmokerVsNeverSmokerModel2Fave","SE_SmokerVsNeverSmokerModel2Fave"))]
colnames(gtp_red) <- c(".id","pvalue","coef","se")

inchianti <- data.frame(fread("/ddn/gs1/group/london/CHARGE_Methylation/Smokers-methylation/other-cohorts/Inchianti_smoking_EWAS_model2-combined.csv"))
inchianti_red <- inchianti[,which(colnames(inchianti)%in%c("V1","Fx_SmokerVsNeverSmokerModel2Fave","P_SmokerVsNeverSmokerModel2Fave","SE_SmokerVsNeverSmokerModel2Fave"))]
colnames(inchianti_red) <- c(".id","pvalue","coef","se")

kora <- data.frame(fread("/ddn/gs1/group/london/CHARGE_Methylation/Smokers-methylation/other-cohorts/KORA_smoking_EWAS_model2-combined.csv"))
kora_red <- kora[,which(colnames(kora)%in%c("V1","Fx_SmokerVsNeverSmokerModel2Fave","P_SmokerVsNeverSmokerModel2Fave","SE_SmokerVsNeverSmokerModel2Fave"))]
colnames(kora_red) <- c(".id","pvalue","coef","se")

lbc1921 <- data.frame(fread("/ddn/gs1/group/london/CHARGE_Methylation/Smokers-methylation/other-cohorts/LBC1921_smoking_EWAS_model2-combined.csv"))
lbc1921_red <- lbc1921[,which(colnames(lbc1921)%in%c("V1","Fx_SmokerVsNeverSmokerModel2Fave","P_SmokerVsNeverSmokerModel2Fave","SE_SmokerVsNeverSmokerModel2Fave"))]
colnames(lbc1921_red) <- c(".id","pvalue","coef","se")

lbc1936 <- data.frame(fread("/ddn/gs1/group/london/CHARGE_Methylation/Smokers-methylation/other-cohorts/LBC1936_smoking_EWAS_model2-combined.csv"))
lbc1936_red <- lbc1936[,which(colnames(lbc1936)%in%c("V1","Fx_SmokerVsNeverSmokerModel2Fave","P_SmokerVsNeverSmokerModel2Fave","SE_SmokerVsNeverSmokerModel2Fave"))]
colnames(lbc1936_red) <- c(".id","pvalue","coef","se")

nas <- data.frame(fread("/ddn/gs1/group/london/CHARGE_Methylation/Smokers-methylation/other-cohorts/NAS_smoking_EWAS_model2-combined.csv"))
nas_red <- nas[,which(colnames(nas)%in%c("V1","Fx_SmokerVsNeverSmokerModel2Fave","P_SmokerVsNeverSmokerModel2Fave","SE_SmokerVsNeverSmokerModel2Fave"))]
colnames(nas_red) <- c(".id","pvalue","coef","se")

rs <- data.frame(fread("/ddn/gs1/group/london/CHARGE_Methylation/Smokers-methylation/other-cohorts/RS_smoking_EWAS_model2-combined.csv"))
rs_red <- rs[,which(colnames(rs)%in%c("V1","Fx_SmokerVsNeverSmokerModel2Fave","P_SmokerVsNeverSmokerModel2Fave","SE_SmokerVsNeverSmokerModel2Fave"))]
colnames(rs_red) <- c(".id","pvalue","coef","se")

goldn <- data.frame(fread("/ddn/gs1/group/london/CHARGE_Methylation/Smokers-methylation/other-cohorts/GOLDN_smoking_EWAS_model2-combined.csv"))
goldn_red <- goldn[,which(colnames(goldn)%in%c("V1","Fx_SmokerVsNeverSmokerModel2Fave","P_SmokerVsNeverSmokerModel2Fave","SE_SmokerVsNeverSmokerModel2Fave"))]
colnames(goldn_red) <- c(".id","coef","se","pvalue")


mesa <- data.frame(fread("/ddn/gs1/group/london/CHARGE_Methylation/Smokers-methylation/other-cohorts/MESA_smoking_model2-combined.csv"))
mesa_red <- mesa[,which(colnames(mesa)%in%c("V1","Fx_SmokerVsNeverSmokerModel2Fave","P_SmokerVsNeverSmokerModel2Fave","SE_SmokerVsNeverSmokerModel2Fave"))]
colnames(mesa_red) <- c(".id","coef","se","pvalue")

epic <- data.frame(fread("/ddn/gs1/group/london/CHARGE_Methylation/Smokers-methylation/other-cohorts/EPIC_smoking_model2-combined.csv"))
epic_red <- epic[,which(colnames(epic)%in%c("V1","Fx_SmokerVsNeverSmokerModel2Fave","P_SmokerVsNeverSmokerModel2Fave","SE_SmokerVsNeverSmokerModel2Fave"))]
colnames(epic_red) <- c(".id","pvalue","coef","se")

norfolk <- data.frame(fread("/ddn/gs1/group/london/CHARGE_Methylation/Smokers-methylation/other-cohorts/EPICNorfolk_smoking_model2-combined.csv"))
norfolk_red <- norfolk[,which(colnames(norfolk)%in%c("V1","Fx_SmokerVsNeverSmokerModel2Fave","P_SmokerVsNeverSmokerModel2Fave","SE_SmokerVsNeverSmokerModel2Fave"))]
colnames(norfolk_red) <- c(".id","pvalue","coef","se")


chsea <- data.frame(fread("/ddn/gs1/group/london/CHARGE_Methylation/Smokers-methylation/other-cohorts/CHS-EA_smoking_model2-combined.csv"))
chsea_red <- chsea[,which(colnames(chsea)%in%c("V1","Fx_SmokerVsNeverSmokerModel2Fave","P_SmokerVsNeverSmokerModel2Fave","SE_SmokerVsNeverSmokerModel2Fave"))]
colnames(chsea_red) <- c(".id","pvalue","coef","se")

chsaa <- data.frame(fread("/ddn/gs1/group/london/CHARGE_Methylation/Smokers-methylation/other-cohorts/CHS-AA_smoking_model2-combined.csv"))
chsaa_red <- chsaa[,which(colnames(chsaa)%in%c("V1","Fx_SmokerVsNeverSmokerModel2Fave","P_SmokerVsNeverSmokerModel2Fave","SE_SmokerVsNeverSmokerModel2Fave"))]
colnames(chsaa_red) <- c(".id","pvalue","coef","se")

genoa <- data.frame(fread("/ddn/gs1/group/london/CHARGE_Methylation/Smokers-methylation/other-cohorts/GENOA_smoking_model2-combined.csv"))
genoa_red <- genoa[,which(colnames(genoa)%in%c("V1","Fx_SmokerVsNeverSmokerModel2Fave","P_SmokerVsNeverSmokerModel2Fave","SE_SmokerVsNeverSmokerModel2Fave"))]
colnames(genoa_red) <- c(".id","pvalue","coef","se")

########################################################################################################################
## generate all cpgs
options(digits=4)

all_cpgs <- Reduce(union, list(fhs_red$.id, aric_red$.id, gtp_red$.id, inchianti_red$.id, kora_red$.id, lbc1921_red$.id,
lbc1936_red$.id, nas_red$.id, rs_red$.id, goldn_red$.id, mesa_red$.id, epic_red$.id, norfolk_red$.id, chsea_red$.id,
chsaa_red$.id, genoa_red$.id));
length(all_cpgs)

## Take target CpG(s)
i <- which(all_cpgs=="cg05575921")
cpg <- all_cpgs[i]    
coef_cpg <- c(fhs_red[which(fhs_red$.id==cpg),"coef"],
aric_red[which(aric_red$.id==cpg),"coef"],
gtp_red[which(gtp_red$.id==cpg),"coef"],
inchianti_red[which(inchianti_red$.id==cpg),"coef"],
kora_red[which(kora_red$.id==cpg),"coef"],
lbc1921_red[which(lbc1921_red$.id==cpg),"coef"],
lbc1936_red[which(lbc1936_red$.id==cpg),"coef"],
nas_red[which(nas_red$.id==cpg),"coef"],
rs_red[which(rs_red$.id==cpg),"coef"],
goldn_red[which(goldn_red$.id==cpg),"coef"],
mesa_red[which(mesa_red$.id==cpg),"coef"],
epic_red[which(epic_red$.id==cpg),"coef"],
norfolk_red[which(norfolk_red$.id==cpg),"coef"],
chsea_red[which(chsea_red$.id==cpg),"coef"],
chsaa_red[which(chsaa_red$.id==cpg),"coef"],
genoa_red[which(genoa_red$.id==cpg),"coef"])

se_cpg <- c(fhs_red[which(fhs_red$.id==cpg),"se"],
aric_red[which(aric_red$.id==cpg),"se"],
gtp_red[which(gtp_red$.id==cpg),"se"],
inchianti_red[which(inchianti_red$.id==cpg),"se"],
kora_red[which(kora_red$.id==cpg),"se"],
lbc1921_red[which(lbc1921_red$.id==cpg),"se"],
lbc1936_red[which(lbc1936_red$.id==cpg),"se"],
nas_red[which(nas_red$.id==cpg),"se"],
rs_red[which(rs_red$.id==cpg),"se"],
goldn_red[which(goldn_red$.id==cpg),"se"],
mesa_red[which(mesa_red$.id==cpg),"se"],
epic_red[which(epic_red$.id==cpg),"se"],
norfolk_red[which(norfolk_red$.id==cpg),"se"],
chsea_red[which(chsea_red$.id==cpg),"se"],
chsaa_red[which(chsaa_red$.id==cpg),"se"],
genoa_red[which(genoa_red$.id==cpg),"se"])

coef_se <- data.frame(coef_cpg, se_cpg)
rownames(coef_se) <- c("fhs","aric","gtp","inchianti","kora","lbc1921","lbc1936","nas","rs","goldn","mesa","epic","norfolk","chsea","chsaa","genoa")

## Metal analysis using RMA (robust multiple average) 
res_fe <- rma(yi=coef_se$coef_cpg, sei=coef_se$se_cpg, method="FE",weighted=TRUE)
res_dl <- rma(yi=coef_se$coef_cpg, sei=coef_se$se_cpg, method="DL",weighted=TRUE)
#options(digits=4)

Mat <- rbind(coef_se, c(res_fe$beta, res_fe$se), c(res_dl$beta, res_dl$se))
rownames(Mat)[17:18] <- c("Meta_FE", "Meta_DL")

### Need to customize ntotal for each CpG 
## For now, just using the global total N for each cohort	##borrow from ML
## Number of samples isn't available in this data set, using the numbers below just for demonstration 
ntotal <- c(2268, 3205, 172,1155,181,214,716,107,246,193,432,903,2251,788,350,1135)
Mat$N <- c(ntotal, sum(ntotal), sum(ntotal))

## Calculate CIs
Mat$err <- qnorm(0.975)*Mat$se_cpg
Mat$lower <- Mat$coef_cpg - Mat$err
Mat$upper <- Mat$coef_cpg + Mat$err
names(Mat) <- c("Beta", "SE", "N", "Err", "CI.lb", "CI.ub")
dat <- Mat

##Data
meta.dat <- data.frame(
means  = c(NA, dat$Beta),
lower = c(NA, dat$CI.lb),
upper = c(NA, dat$CI.ub))
meta.dat

##For making table text 
meta <- data.frame(
means  = dat$Beta,
lower = dat$CI.lb,
upper = dat$CI.ub)
meta

## take N and cohort names
ntotal <- dat$N 
cohorts <- as.character(rownames(dat))

## Generate table text 
tabletext<-cbind(c("Study", paste0(cohorts,"(N=",ntotal,")")),
                 c("Coefficient (95% CI)", paste0(round(meta$means,4)," (",round(meta$lower,4),", ",round(meta$upper,4),")")))


## ploting out
pdf("PACE-Charge_forestplot_FE-DL.pdf")
forestplot(tabletext, meta.dat, new_page = TRUE,
           title = paste0("PACE-Charge",".","ForestPlot of cg055759212"),
           ##boxsize=0.25, ref.vline.at = numeric(0), 	#for fixed size of box
           txt_gp = fpTxtGp(ticks = gpar(cex=0.7), xlab  = gpar(cex = 0.9)), xlab ="Regression Coefficient",
           #fn.ci_norm=c("fpDrawDiamondCI",rep("fpDrawNormalCI",times=11),"fpDrawDiamondCI"),
           is.summary=c(TRUE,rep(FALSE,16),TRUE,TRUE),
           col=fpColors(box="royalblue",line="darkblue", summary="royalblue")) #
dev.off() 

##JP's script worked as well
#pdf("forestplot_paceCharge_cg05575921_forestplot.pdf")
#forestplot(rownames(dat), dat$estimate, dat$ci.lb, dat$ci.ub, is.summary=c(rep(FALSE,16),TRUE), title="cg055759212")
#dev.off()

q("no")