################################################################################################################################################################# # Purpose: Make forest plot using the EWAS results from multiple cohorts and their meta result (by rma). # Note: CIs are calculated from the se values of the EWAS results and numbers of samples are used just for plot text # Request: Stephanie London, Mi Kyeong, 11/18/2019, Forest plots using R. # Input: /ddn/gs1/group/london/CHARGE_Methylation/Smokers-methylation/other-cohorts/ARIC|GTP|Inchianti..... # /ddn/gs1/group/london/CHARGE_Methylation/Smokers-methylation/off-methylation-smoke-all-models123.txt # Output: PACE-Charge_forestplot.pdf # Ref: SampleCode_ForestPlot_forJP_MKL_11182019.R; modified_to_Sinjini_rma.r # WDir: /ddn/gs1/home/jinj2/LondonData/PACE/PACE-CHARGE_smoking/Check/Jianping/Charge_cohort_forestPlot/ # Script: forestPlot_cg05575921.r # Dec. 27, 2019, Jianping Jin ################################################################################################################################################################# library(data.table) library(metafor) library(forestplot) ## Read in data (p-value, coef, se) from cohorts fhs <- data.frame(fread("/ddn/gs1/group/london/CHARGE_Methylation/Smokers-methylation/off-methylation-smoke-all-models123.txt")) fhs_red <- fhs[,which(colnames(fhs)%in%c("V1","Fx_SmokerVsNeverSmokerModel2Fave","P_SmokerVsNeverSmokerModel2Fave","SD_SmokerVsNeverSmokerModel2Fave"))] colnames(fhs_red) = c(".id","pvalue","coef","se") aric <- data.frame(fread("/ddn/gs1/group/london/CHARGE_Methylation/Smokers-methylation/other-cohorts/ARIC_smoking_EWAS_model2-combined.csv")) aric_red <- aric[,which(colnames(aric)%in%c("V1","Fx_SmokerVsNeverSmokerModel2Fave","P_SmokerVsNeverSmokerModel2Fave","SE_SmokerVsNeverSmokerModel2Fave"))] colnames(aric_red) <- c(".id","coef","se","pvalue") gtp <- data.frame(fread("/ddn/gs1/group/london/CHARGE_Methylation/Smokers-methylation/other-cohorts/GTP_smoking_model2-combined.csv")) gtp_red <- gtp[,which(colnames(gtp)%in%c("V1","Fx_SmokerVsNeverSmokerModel2Fave","P_SmokerVsNeverSmokerModel2Fave","SE_SmokerVsNeverSmokerModel2Fave"))] colnames(gtp_red) <- c(".id","pvalue","coef","se") inchianti <- data.frame(fread("/ddn/gs1/group/london/CHARGE_Methylation/Smokers-methylation/other-cohorts/Inchianti_smoking_EWAS_model2-combined.csv")) inchianti_red <- inchianti[,which(colnames(inchianti)%in%c("V1","Fx_SmokerVsNeverSmokerModel2Fave","P_SmokerVsNeverSmokerModel2Fave","SE_SmokerVsNeverSmokerModel2Fave"))] colnames(inchianti_red) <- c(".id","pvalue","coef","se") kora <- data.frame(fread("/ddn/gs1/group/london/CHARGE_Methylation/Smokers-methylation/other-cohorts/KORA_smoking_EWAS_model2-combined.csv")) kora_red <- kora[,which(colnames(kora)%in%c("V1","Fx_SmokerVsNeverSmokerModel2Fave","P_SmokerVsNeverSmokerModel2Fave","SE_SmokerVsNeverSmokerModel2Fave"))] colnames(kora_red) <- c(".id","pvalue","coef","se") lbc1921 <- data.frame(fread("/ddn/gs1/group/london/CHARGE_Methylation/Smokers-methylation/other-cohorts/LBC1921_smoking_EWAS_model2-combined.csv")) lbc1921_red <- lbc1921[,which(colnames(lbc1921)%in%c("V1","Fx_SmokerVsNeverSmokerModel2Fave","P_SmokerVsNeverSmokerModel2Fave","SE_SmokerVsNeverSmokerModel2Fave"))] colnames(lbc1921_red) <- c(".id","pvalue","coef","se") lbc1936 <- data.frame(fread("/ddn/gs1/group/london/CHARGE_Methylation/Smokers-methylation/other-cohorts/LBC1936_smoking_EWAS_model2-combined.csv")) lbc1936_red <- lbc1936[,which(colnames(lbc1936)%in%c("V1","Fx_SmokerVsNeverSmokerModel2Fave","P_SmokerVsNeverSmokerModel2Fave","SE_SmokerVsNeverSmokerModel2Fave"))] colnames(lbc1936_red) <- c(".id","pvalue","coef","se") nas <- data.frame(fread("/ddn/gs1/group/london/CHARGE_Methylation/Smokers-methylation/other-cohorts/NAS_smoking_EWAS_model2-combined.csv")) nas_red <- nas[,which(colnames(nas)%in%c("V1","Fx_SmokerVsNeverSmokerModel2Fave","P_SmokerVsNeverSmokerModel2Fave","SE_SmokerVsNeverSmokerModel2Fave"))] colnames(nas_red) <- c(".id","pvalue","coef","se") rs <- data.frame(fread("/ddn/gs1/group/london/CHARGE_Methylation/Smokers-methylation/other-cohorts/RS_smoking_EWAS_model2-combined.csv")) rs_red <- rs[,which(colnames(rs)%in%c("V1","Fx_SmokerVsNeverSmokerModel2Fave","P_SmokerVsNeverSmokerModel2Fave","SE_SmokerVsNeverSmokerModel2Fave"))] colnames(rs_red) <- c(".id","pvalue","coef","se") goldn <- data.frame(fread("/ddn/gs1/group/london/CHARGE_Methylation/Smokers-methylation/other-cohorts/GOLDN_smoking_EWAS_model2-combined.csv")) goldn_red <- goldn[,which(colnames(goldn)%in%c("V1","Fx_SmokerVsNeverSmokerModel2Fave","P_SmokerVsNeverSmokerModel2Fave","SE_SmokerVsNeverSmokerModel2Fave"))] colnames(goldn_red) <- c(".id","coef","se","pvalue") mesa <- data.frame(fread("/ddn/gs1/group/london/CHARGE_Methylation/Smokers-methylation/other-cohorts/MESA_smoking_model2-combined.csv")) mesa_red <- mesa[,which(colnames(mesa)%in%c("V1","Fx_SmokerVsNeverSmokerModel2Fave","P_SmokerVsNeverSmokerModel2Fave","SE_SmokerVsNeverSmokerModel2Fave"))] colnames(mesa_red) <- c(".id","coef","se","pvalue") epic <- data.frame(fread("/ddn/gs1/group/london/CHARGE_Methylation/Smokers-methylation/other-cohorts/EPIC_smoking_model2-combined.csv")) epic_red <- epic[,which(colnames(epic)%in%c("V1","Fx_SmokerVsNeverSmokerModel2Fave","P_SmokerVsNeverSmokerModel2Fave","SE_SmokerVsNeverSmokerModel2Fave"))] colnames(epic_red) <- c(".id","pvalue","coef","se") norfolk <- data.frame(fread("/ddn/gs1/group/london/CHARGE_Methylation/Smokers-methylation/other-cohorts/EPICNorfolk_smoking_model2-combined.csv")) norfolk_red <- norfolk[,which(colnames(norfolk)%in%c("V1","Fx_SmokerVsNeverSmokerModel2Fave","P_SmokerVsNeverSmokerModel2Fave","SE_SmokerVsNeverSmokerModel2Fave"))] colnames(norfolk_red) <- c(".id","pvalue","coef","se") chsea <- data.frame(fread("/ddn/gs1/group/london/CHARGE_Methylation/Smokers-methylation/other-cohorts/CHS-EA_smoking_model2-combined.csv")) chsea_red <- chsea[,which(colnames(chsea)%in%c("V1","Fx_SmokerVsNeverSmokerModel2Fave","P_SmokerVsNeverSmokerModel2Fave","SE_SmokerVsNeverSmokerModel2Fave"))] colnames(chsea_red) <- c(".id","pvalue","coef","se") chsaa <- data.frame(fread("/ddn/gs1/group/london/CHARGE_Methylation/Smokers-methylation/other-cohorts/CHS-AA_smoking_model2-combined.csv")) chsaa_red <- chsaa[,which(colnames(chsaa)%in%c("V1","Fx_SmokerVsNeverSmokerModel2Fave","P_SmokerVsNeverSmokerModel2Fave","SE_SmokerVsNeverSmokerModel2Fave"))] colnames(chsaa_red) <- c(".id","pvalue","coef","se") genoa <- data.frame(fread("/ddn/gs1/group/london/CHARGE_Methylation/Smokers-methylation/other-cohorts/GENOA_smoking_model2-combined.csv")) genoa_red <- genoa[,which(colnames(genoa)%in%c("V1","Fx_SmokerVsNeverSmokerModel2Fave","P_SmokerVsNeverSmokerModel2Fave","SE_SmokerVsNeverSmokerModel2Fave"))] colnames(genoa_red) <- c(".id","pvalue","coef","se") ######################################################################################################################## ## generate all cpgs options(digits=4) all_cpgs <- Reduce(union, list(fhs_red$.id, aric_red$.id, gtp_red$.id, inchianti_red$.id, kora_red$.id, lbc1921_red$.id, lbc1936_red$.id, nas_red$.id, rs_red$.id, goldn_red$.id, mesa_red$.id, epic_red$.id, norfolk_red$.id, chsea_red$.id, chsaa_red$.id, genoa_red$.id)); length(all_cpgs) ## Take target CpG(s) i <- which(all_cpgs=="cg05575921") cpg <- all_cpgs[i] coef_cpg <- c(fhs_red[which(fhs_red$.id==cpg),"coef"], aric_red[which(aric_red$.id==cpg),"coef"], gtp_red[which(gtp_red$.id==cpg),"coef"], inchianti_red[which(inchianti_red$.id==cpg),"coef"], kora_red[which(kora_red$.id==cpg),"coef"], lbc1921_red[which(lbc1921_red$.id==cpg),"coef"], lbc1936_red[which(lbc1936_red$.id==cpg),"coef"], nas_red[which(nas_red$.id==cpg),"coef"], rs_red[which(rs_red$.id==cpg),"coef"], goldn_red[which(goldn_red$.id==cpg),"coef"], mesa_red[which(mesa_red$.id==cpg),"coef"], epic_red[which(epic_red$.id==cpg),"coef"], norfolk_red[which(norfolk_red$.id==cpg),"coef"], chsea_red[which(chsea_red$.id==cpg),"coef"], chsaa_red[which(chsaa_red$.id==cpg),"coef"], genoa_red[which(genoa_red$.id==cpg),"coef"]) se_cpg <- c(fhs_red[which(fhs_red$.id==cpg),"se"], aric_red[which(aric_red$.id==cpg),"se"], gtp_red[which(gtp_red$.id==cpg),"se"], inchianti_red[which(inchianti_red$.id==cpg),"se"], kora_red[which(kora_red$.id==cpg),"se"], lbc1921_red[which(lbc1921_red$.id==cpg),"se"], lbc1936_red[which(lbc1936_red$.id==cpg),"se"], nas_red[which(nas_red$.id==cpg),"se"], rs_red[which(rs_red$.id==cpg),"se"], goldn_red[which(goldn_red$.id==cpg),"se"], mesa_red[which(mesa_red$.id==cpg),"se"], epic_red[which(epic_red$.id==cpg),"se"], norfolk_red[which(norfolk_red$.id==cpg),"se"], chsea_red[which(chsea_red$.id==cpg),"se"], chsaa_red[which(chsaa_red$.id==cpg),"se"], genoa_red[which(genoa_red$.id==cpg),"se"]) coef_se <- data.frame(coef_cpg, se_cpg) rownames(coef_se) <- c("fhs","aric","gtp","inchianti","kora","lbc1921","lbc1936","nas","rs","goldn","mesa","epic","norfolk","chsea","chsaa","genoa") ## Metal analysis using RMA (robust multiple average) res_fe <- rma(yi=coef_se$coef_cpg, sei=coef_se$se_cpg, method="FE",weighted=TRUE) res_dl <- rma(yi=coef_se$coef_cpg, sei=coef_se$se_cpg, method="DL",weighted=TRUE) #options(digits=4) Mat <- rbind(coef_se, c(res_fe$beta, res_fe$se), c(res_dl$beta, res_dl$se)) rownames(Mat)[17:18] <- c("Meta_FE", "Meta_DL") ### Need to customize ntotal for each CpG ## For now, just using the global total N for each cohort ##borrow from ML ## Number of samples isn't available in this data set, using the numbers below just for demonstration ntotal <- c(2268, 3205, 172,1155,181,214,716,107,246,193,432,903,2251,788,350,1135) Mat$N <- c(ntotal, sum(ntotal), sum(ntotal)) ## Calculate CIs Mat$err <- qnorm(0.975)*Mat$se_cpg Mat$lower <- Mat$coef_cpg - Mat$err Mat$upper <- Mat$coef_cpg + Mat$err names(Mat) <- c("Beta", "SE", "N", "Err", "CI.lb", "CI.ub") dat <- Mat ##Data meta.dat <- data.frame( means = c(NA, dat$Beta), lower = c(NA, dat$CI.lb), upper = c(NA, dat$CI.ub)) meta.dat ##For making table text meta <- data.frame( means = dat$Beta, lower = dat$CI.lb, upper = dat$CI.ub) meta ## take N and cohort names ntotal <- dat$N cohorts <- as.character(rownames(dat)) ## Generate table text tabletext<-cbind(c("Study", paste0(cohorts,"(N=",ntotal,")")), c("Coefficient (95% CI)", paste0(round(meta$means,4)," (",round(meta$lower,4),", ",round(meta$upper,4),")"))) ## ploting out pdf("PACE-Charge_forestplot_FE-DL.pdf") forestplot(tabletext, meta.dat, new_page = TRUE, title = paste0("PACE-Charge",".","ForestPlot of cg055759212"), ##boxsize=0.25, ref.vline.at = numeric(0), #for fixed size of box txt_gp = fpTxtGp(ticks = gpar(cex=0.7), xlab = gpar(cex = 0.9)), xlab ="Regression Coefficient", #fn.ci_norm=c("fpDrawDiamondCI",rep("fpDrawNormalCI",times=11),"fpDrawDiamondCI"), is.summary=c(TRUE,rep(FALSE,16),TRUE,TRUE), col=fpColors(box="royalblue",line="darkblue", summary="royalblue")) # dev.off() ##JP's script worked as well #pdf("forestplot_paceCharge_cg05575921_forestplot.pdf") #forestplot(rownames(dat), dat$estimate, dat$ci.lb, dat$ci.ub, is.summary=c(rep(FALSE,16),TRUE), title="cg055759212") #dev.off() q("no")