R version 3.6.1 (2019-07-05) -- "Action of the Toes" Copyright (C) 2019 The R Foundation for Statistical Computing Platform: x86_64-pc-linux-gnu (64-bit) R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. Natural language support but running in an English locale R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > ################################################################################################################################################################# > # Purpose: Make forest plot using the EWAS results from multiple cohorts and their meta result (by rma). > # Note: CIs are calculated from the se values of the EWAS results and numbers of samples are used just for plot text > # Request: Stephanie London, Mi Kyeong, 11/18/2019, Forest plots using R. > # Input: /ddn/gs1/group/london/CHARGE_Methylation/Smokers-methylation/other-cohorts/ARIC|GTP|Inchianti..... > # /ddn/gs1/group/london/CHARGE_Methylation/Smokers-methylation/off-methylation-smoke-all-models123.txt > # Output: PACE-Charge_forestplot.pdf > # Ref: SampleCode_ForestPlot_forJP_MKL_11182019.R; modified_to_Sinjini_rma.r > # WDir: /ddn/gs1/home/jinj2/LondonData/PACE/PACE-CHARGE_smoking/Check/Jianping/Charge_cohort_forestPlot/ > # Script: forestPlot_cg05575921.r > # Dec. 27, 2019, Jianping Jin > ################################################################################################################################################################# > > library(data.table) > library(metafor) > library(forestplot) > > ## Read in data (p-value, coef, se) from cohorts > fhs <- data.frame(fread("/ddn/gs1/group/london/CHARGE_Methylation/Smokers-methylation/off-methylation-smoke-all-models123.txt")) > fhs_red <- fhs[,which(colnames(fhs)%in%c("V1","Fx_SmokerVsNeverSmokerModel2Fave","P_SmokerVsNeverSmokerModel2Fave","SD_SmokerVsNeverSmokerModel2Fave"))] > colnames(fhs_red) = c(".id","pvalue","coef","se") > > aric <- data.frame(fread("/ddn/gs1/group/london/CHARGE_Methylation/Smokers-methylation/other-cohorts/ARIC_smoking_EWAS_model2-combined.csv")) > aric_red <- aric[,which(colnames(aric)%in%c("V1","Fx_SmokerVsNeverSmokerModel2Fave","P_SmokerVsNeverSmokerModel2Fave","SE_SmokerVsNeverSmokerModel2Fave"))] > colnames(aric_red) <- c(".id","coef","se","pvalue") > > gtp <- data.frame(fread("/ddn/gs1/group/london/CHARGE_Methylation/Smokers-methylation/other-cohorts/GTP_smoking_model2-combined.csv")) > gtp_red <- gtp[,which(colnames(gtp)%in%c("V1","Fx_SmokerVsNeverSmokerModel2Fave","P_SmokerVsNeverSmokerModel2Fave","SE_SmokerVsNeverSmokerModel2Fave"))] > colnames(gtp_red) <- c(".id","pvalue","coef","se") > > inchianti <- data.frame(fread("/ddn/gs1/group/london/CHARGE_Methylation/Smokers-methylation/other-cohorts/Inchianti_smoking_EWAS_model2-combined.csv")) > inchianti_red <- inchianti[,which(colnames(inchianti)%in%c("V1","Fx_SmokerVsNeverSmokerModel2Fave","P_SmokerVsNeverSmokerModel2Fave","SE_SmokerVsNeverSmokerModel2Fave"))] > colnames(inchianti_red) <- c(".id","pvalue","coef","se") > > kora <- data.frame(fread("/ddn/gs1/group/london/CHARGE_Methylation/Smokers-methylation/other-cohorts/KORA_smoking_EWAS_model2-combined.csv")) > kora_red <- kora[,which(colnames(kora)%in%c("V1","Fx_SmokerVsNeverSmokerModel2Fave","P_SmokerVsNeverSmokerModel2Fave","SE_SmokerVsNeverSmokerModel2Fave"))] > colnames(kora_red) <- c(".id","pvalue","coef","se") > > lbc1921 <- data.frame(fread("/ddn/gs1/group/london/CHARGE_Methylation/Smokers-methylation/other-cohorts/LBC1921_smoking_EWAS_model2-combined.csv")) > lbc1921_red <- lbc1921[,which(colnames(lbc1921)%in%c("V1","Fx_SmokerVsNeverSmokerModel2Fave","P_SmokerVsNeverSmokerModel2Fave","SE_SmokerVsNeverSmokerModel2Fave"))] > colnames(lbc1921_red) <- c(".id","pvalue","coef","se") > > lbc1936 <- data.frame(fread("/ddn/gs1/group/london/CHARGE_Methylation/Smokers-methylation/other-cohorts/LBC1936_smoking_EWAS_model2-combined.csv")) > lbc1936_red <- lbc1936[,which(colnames(lbc1936)%in%c("V1","Fx_SmokerVsNeverSmokerModel2Fave","P_SmokerVsNeverSmokerModel2Fave","SE_SmokerVsNeverSmokerModel2Fave"))] > colnames(lbc1936_red) <- c(".id","pvalue","coef","se") > > nas <- data.frame(fread("/ddn/gs1/group/london/CHARGE_Methylation/Smokers-methylation/other-cohorts/NAS_smoking_EWAS_model2-combined.csv")) > nas_red <- nas[,which(colnames(nas)%in%c("V1","Fx_SmokerVsNeverSmokerModel2Fave","P_SmokerVsNeverSmokerModel2Fave","SE_SmokerVsNeverSmokerModel2Fave"))] > colnames(nas_red) <- c(".id","pvalue","coef","se") > > rs <- data.frame(fread("/ddn/gs1/group/london/CHARGE_Methylation/Smokers-methylation/other-cohorts/RS_smoking_EWAS_model2-combined.csv")) > rs_red <- rs[,which(colnames(rs)%in%c("V1","Fx_SmokerVsNeverSmokerModel2Fave","P_SmokerVsNeverSmokerModel2Fave","SE_SmokerVsNeverSmokerModel2Fave"))] > colnames(rs_red) <- c(".id","pvalue","coef","se") > > goldn <- data.frame(fread("/ddn/gs1/group/london/CHARGE_Methylation/Smokers-methylation/other-cohorts/GOLDN_smoking_EWAS_model2-combined.csv")) > goldn_red <- goldn[,which(colnames(goldn)%in%c("V1","Fx_SmokerVsNeverSmokerModel2Fave","P_SmokerVsNeverSmokerModel2Fave","SE_SmokerVsNeverSmokerModel2Fave"))] > colnames(goldn_red) <- c(".id","coef","se","pvalue") > > > mesa <- data.frame(fread("/ddn/gs1/group/london/CHARGE_Methylation/Smokers-methylation/other-cohorts/MESA_smoking_model2-combined.csv")) > mesa_red <- mesa[,which(colnames(mesa)%in%c("V1","Fx_SmokerVsNeverSmokerModel2Fave","P_SmokerVsNeverSmokerModel2Fave","SE_SmokerVsNeverSmokerModel2Fave"))] > colnames(mesa_red) <- c(".id","coef","se","pvalue") > > epic <- data.frame(fread("/ddn/gs1/group/london/CHARGE_Methylation/Smokers-methylation/other-cohorts/EPIC_smoking_model2-combined.csv")) > epic_red <- epic[,which(colnames(epic)%in%c("V1","Fx_SmokerVsNeverSmokerModel2Fave","P_SmokerVsNeverSmokerModel2Fave","SE_SmokerVsNeverSmokerModel2Fave"))] > colnames(epic_red) <- c(".id","pvalue","coef","se") > > norfolk <- data.frame(fread("/ddn/gs1/group/london/CHARGE_Methylation/Smokers-methylation/other-cohorts/EPICNorfolk_smoking_model2-combined.csv")) > norfolk_red <- norfolk[,which(colnames(norfolk)%in%c("V1","Fx_SmokerVsNeverSmokerModel2Fave","P_SmokerVsNeverSmokerModel2Fave","SE_SmokerVsNeverSmokerModel2Fave"))] > colnames(norfolk_red) <- c(".id","pvalue","coef","se") > > > chsea <- data.frame(fread("/ddn/gs1/group/london/CHARGE_Methylation/Smokers-methylation/other-cohorts/CHS-EA_smoking_model2-combined.csv")) > chsea_red <- chsea[,which(colnames(chsea)%in%c("V1","Fx_SmokerVsNeverSmokerModel2Fave","P_SmokerVsNeverSmokerModel2Fave","SE_SmokerVsNeverSmokerModel2Fave"))] > colnames(chsea_red) <- c(".id","pvalue","coef","se") > > chsaa <- data.frame(fread("/ddn/gs1/group/london/CHARGE_Methylation/Smokers-methylation/other-cohorts/CHS-AA_smoking_model2-combined.csv")) > chsaa_red <- chsaa[,which(colnames(chsaa)%in%c("V1","Fx_SmokerVsNeverSmokerModel2Fave","P_SmokerVsNeverSmokerModel2Fave","SE_SmokerVsNeverSmokerModel2Fave"))] > colnames(chsaa_red) <- c(".id","pvalue","coef","se") > > genoa <- data.frame(fread("/ddn/gs1/group/london/CHARGE_Methylation/Smokers-methylation/other-cohorts/GENOA_smoking_model2-combined.csv")) > genoa_red <- genoa[,which(colnames(genoa)%in%c("V1","Fx_SmokerVsNeverSmokerModel2Fave","P_SmokerVsNeverSmokerModel2Fave","SE_SmokerVsNeverSmokerModel2Fave"))] > colnames(genoa_red) <- c(".id","pvalue","coef","se") > > ######################################################################################################################## > ## generate all cpgs > options(digits=4) > > all_cpgs <- Reduce(union, list(fhs_red$.id, aric_red$.id, gtp_red$.id, inchianti_red$.id, kora_red$.id, lbc1921_red$.id, + lbc1936_red$.id, nas_red$.id, rs_red$.id, goldn_red$.id, mesa_red$.id, epic_red$.id, norfolk_red$.id, chsea_red$.id, + chsaa_red$.id, genoa_red$.id)); > length(all_cpgs) [1] 485577 > > ## Take target CpG(s) > i <- which(all_cpgs=="cg05575921") > cpg <- all_cpgs[i] > coef_cpg <- c(fhs_red[which(fhs_red$.id==cpg),"coef"], + aric_red[which(aric_red$.id==cpg),"coef"], + gtp_red[which(gtp_red$.id==cpg),"coef"], + inchianti_red[which(inchianti_red$.id==cpg),"coef"], + kora_red[which(kora_red$.id==cpg),"coef"], + lbc1921_red[which(lbc1921_red$.id==cpg),"coef"], + lbc1936_red[which(lbc1936_red$.id==cpg),"coef"], + nas_red[which(nas_red$.id==cpg),"coef"], + rs_red[which(rs_red$.id==cpg),"coef"], + goldn_red[which(goldn_red$.id==cpg),"coef"], + mesa_red[which(mesa_red$.id==cpg),"coef"], + epic_red[which(epic_red$.id==cpg),"coef"], + norfolk_red[which(norfolk_red$.id==cpg),"coef"], + chsea_red[which(chsea_red$.id==cpg),"coef"], + chsaa_red[which(chsaa_red$.id==cpg),"coef"], + genoa_red[which(genoa_red$.id==cpg),"coef"]) > > se_cpg <- c(fhs_red[which(fhs_red$.id==cpg),"se"], + aric_red[which(aric_red$.id==cpg),"se"], + gtp_red[which(gtp_red$.id==cpg),"se"], + inchianti_red[which(inchianti_red$.id==cpg),"se"], + kora_red[which(kora_red$.id==cpg),"se"], + lbc1921_red[which(lbc1921_red$.id==cpg),"se"], + lbc1936_red[which(lbc1936_red$.id==cpg),"se"], + nas_red[which(nas_red$.id==cpg),"se"], + rs_red[which(rs_red$.id==cpg),"se"], + goldn_red[which(goldn_red$.id==cpg),"se"], + mesa_red[which(mesa_red$.id==cpg),"se"], + epic_red[which(epic_red$.id==cpg),"se"], + norfolk_red[which(norfolk_red$.id==cpg),"se"], + chsea_red[which(chsea_red$.id==cpg),"se"], + chsaa_red[which(chsaa_red$.id==cpg),"se"], + genoa_red[which(genoa_red$.id==cpg),"se"]) > > coef_se <- data.frame(coef_cpg, se_cpg) > rownames(coef_se) <- c("fhs","aric","gtp","inchianti","kora","lbc1921","lbc1936","nas","rs","goldn","mesa","epic","norfolk","chsea","chsaa","genoa") > > ## Metal analysis using RMA (robust multiple average) > res <- rma(yi=coef_se$coef_cpg, sei=coef_se$se_cpg, method="FE",weighted=TRUE) > #options(digits=4) > > Mat <- rbind(coef_se, c(res$beta, res$se)) > rownames(Mat)[17] <- "Meta" > > ### Need to customize ntotal for each CpG > ## For now, just using the global total N for each cohort ##borrow from ML > ## Number of samples isn't available in this data set, using the numbers below just for demonstration > ntotal <- c(2268, 3205, 172,1155,181,214,716,107,246,193,432,903,2251,788,350,1135) > Mat$N <- c(ntotal, sum(ntotal)) > > ## Calculate CIs > Mat$err <- qnorm(0.975)*Mat$se_cpg > Mat$lower <- Mat$coef_cpg - Mat$err > Mat$upper <- Mat$coef_cpg + Mat$err > names(Mat) <- c("Beta", "SE", "N", "Err", "CI.lb", "CI.ub") > dat <- Mat > > ##Data > meta.dat <- data.frame( + means = c(NA, dat$Beta), + lower = c(NA, dat$CI.lb), + upper = c(NA, dat$CI.ub)) > meta.dat means lower upper 1 NA NA NA 2 -0.190238 -0.199012 -0.18146 3 -0.147480 -0.155819 -0.13914 4 -0.161855 -0.187050 -0.13666 5 -0.161289 -0.176236 -0.14634 6 -0.218061 -0.228434 -0.20769 7 -0.221334 -0.245869 -0.19680 8 -0.275467 -0.291708 -0.25922 9 -0.264655 -0.290339 -0.23897 10 -0.208397 -0.223074 -0.19372 11 -0.108800 -0.119891 -0.09771 12 -0.262103 -0.278961 -0.24525 13 0.001317 -0.000365 0.00300 14 -0.162159 -0.176466 -0.14785 15 -0.140697 -0.175086 -0.10631 16 -0.179523 -0.206440 -0.15261 17 -0.187195 -0.204758 -0.16963 18 -0.031383 -0.032909 -0.02986 > > ##For making table text > meta <- data.frame( + means = dat$Beta, + lower = dat$CI.lb, + upper = dat$CI.ub) > meta means lower upper 1 -0.190238 -0.199012 -0.18146 2 -0.147480 -0.155819 -0.13914 3 -0.161855 -0.187050 -0.13666 4 -0.161289 -0.176236 -0.14634 5 -0.218061 -0.228434 -0.20769 6 -0.221334 -0.245869 -0.19680 7 -0.275467 -0.291708 -0.25922 8 -0.264655 -0.290339 -0.23897 9 -0.208397 -0.223074 -0.19372 10 -0.108800 -0.119891 -0.09771 11 -0.262103 -0.278961 -0.24525 12 0.001317 -0.000365 0.00300 13 -0.162159 -0.176466 -0.14785 14 -0.140697 -0.175086 -0.10631 15 -0.179523 -0.206440 -0.15261 16 -0.187195 -0.204758 -0.16963 17 -0.031383 -0.032909 -0.02986 > > ## take N and cohort names > ntotal <- dat$N > cohorts <- as.character(rownames(dat)) > > ## Generate table text > tabletext<-cbind(c("Study", paste0(cohorts,"(N=",ntotal,")")), + c("Coefficient (95% CI)", paste0(round(meta$means,4)," (",round(meta$lower,4),", ",round(meta$upper,4),")"))) > > > ## ploting out > #pdf("PACE-Charge_forestplot.pdf") > bitmap(file="PACE-Charge_forestplot.jpg") > forestplot(tabletext, meta.dat, new_page = TRUE, + title = paste0("PACE-Charge",".","ForestPlot of cg055759212"), + ##boxsize=0.25, ref.vline.at = numeric(0), #for fixed size of box + txt_gp = fpTxtGp(ticks = gpar(cex=0.7), xlab = gpar(cex = 0.9)), xlab ="Regression Coefficient", + #fn.ci_norm=c("fpDrawDiamondCI",rep("fpDrawNormalCI",times=11),"fpDrawDiamondCI"), + is.summary=c(TRUE,rep(FALSE,16),TRUE), + col=fpColors(box="royalblue",line="darkblue", summary="royalblue")) # > dev.off() null device 1 > > ##JP's script worked as well > #pdf("forestplot_paceCharge_cg05575921_forestplot.pdf") > #forestplot(rownames(dat), dat$estimate, dat$ci.lb, dat$ci.ub, is.summary=c(rep(FALSE,16),TRUE), title="cg055759212") > #dev.off() > > q("no")