Script: /ddn/gs1/home/jinj2/LondonData/alhs_hrc_pft_analysis_rvtests/concatenate_add_IDsRsq_qqPlot_rvtestalhs_untransform.sh WDir: /ddn/gs1/home/jinj2/LondonData/alhs_hrc_pft_analysis_rvtests/ controls ever yessmk ff 39140401 ch1_22.info.chr_pos_allele.rsq.txt Row number of alhs_untransform_controls_ratio_yessmk_chr1_22_awjj_2020_11_25.chr_pos.txt is 39140401 alhs_untransform_controls_ratio_yessmk_chr1_22_awjj_2020_11_25.chr_pos_All.txt 39140401 ch1_22.info.chr_pos_All.rsq.txt Difference between chr1_22_chr_pos_allele chr1_22_info_pos_allele is \n 39140401 alhs_untransform_controls_ratio_yessmk_chr1_22_awjj_2020_11_25.chr_pos_All_rsq.txt 39131579 HRC_chr_pos_allele_snpid_noX.txt 32873830 HRC_chr_pos_allele_snpid_noX_noMissing.txt 32873830 HRC_chr_pos_All_snpid_noX_noMissing_uniq 39140401 alhs_untransform_controls_ratio_yessmk_chr1_22_awjj_2020_11_25.chr_pos_All_rsq_snpid.txt 39140401 alhs_untransform_controls_ratio_yessmk_chr1_22_awjj_2020_11_25.chr_pos_All_rsq_snpid_final.txt Differene between alhs_untransform_cntrl_yessmk_chr_pos_allele_rsq_id & alhs_untransform_cntrl_yessmk_chr_pos_allele_rsq_final_id are: Check out the results in the final result with a few duplicated probes and a couple of probes with missing rsIDs. 1:949511:G:A rs139516378 1 949511 G A 637 0 NA NA NA NA NA NA NA 0.94805 1:949511:G:T rs139516378 1 949511 G T 637 0 NA NA NA NA NA NA NA 0.04084 1:94951105:C:T rs77904282 1 94951105 C T 637 0 NA NA NA NA NA NA NA 0.00871 11:94951194:C:G rs569780013 11 94951194 C G 637 0 NA NA NA NA NA NA NA 0.44696 7:142126531:C:G rs10258029 7 142126531 C G 637 0.997645 3.12276 238.363 0.0409109 + 1.06016 5.24143 0.83971 0.96513 7:142126531:C:T rs567481259 7 142126531 C T 637 0.518838 231.291 25399.8 2.10613 + 0.736881 0.507755 0.14671 0.58062 18:22759054:T:A rs76477121 18 22759054 T A 637 0.00235479 -12.0704 235.692 0.618159 - -4.14426 5.27105 0.431732 0.49748 18:22759054:T:C rs36170220 18 22759054 T C 637 0.0188383 -31.8307 1862.59 0.543969 - -1.38292 1.87504 0.460793 0.80280 1:16280:T:C 1:16280:T:C 1 16280 T C 637 0 NA NA NA NA NA NA NA 0.10117 22:51238513:C:G 22:51238513:C:G 22 51238513 C G 637 0 NA NA NA NA NA NA NA 0.00260 R version 3.6.1 (2019-07-05) -- "Action of the Toes" Copyright (C) 2019 The R Foundation for Statistical Computing Platform: x86_64-pc-linux-gnu (64-bit) R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. Natural language support but running in an English locale R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > STAT=c("controls"); ##(controls | cases) > SMOKE=c("ever"); ##(for path name: never | ever) > SMK=c("yessmk"); ##(for fileName: nosmk | yessmk) > PFT=c("ff"); ##(fev1 | fvc | ff) > FNAME=c("ratio"); ##(fev1 | fvc | raito) > print(STAT) [1] "controls" > print(SMOKE) [1] "ever" > print(SMK) [1] "yessmk" > print(PFT) [1] "ff" > > path <- c(paste0("/ddn/gs1/group/london/alhs_hrc_pft_analysis_rvtests/",STAT,"_",SMOKE,"_smoker_results/",PFT,"_untransform/combined_and_qqPlot/")) > > ### QQ plot and lambda value > dat <- read.table(paste0(path,"alhs_untransform_",STAT,"_",FNAME,"_",SMK,"_chr1_22_awjj_2020_11_25.chr_pos_All_rsq_snpid_final.txt.gz"), header=TRUE) > dim(dat) [1] 39140400 16 > head(dat) CHR.POS.Allele ID CHROM POS REF ALT N_INFORMATIVE AF 1 1:13380:C:G rs571093408 1 13380 C G 637 0.000000 2 1:16071:G:A rs541172944 1 16071 G A 637 0.000000 3 1:16141:C:T rs529651976 1 16141 C T 637 0.000000 4 1:16280:T:C 1:16280:T:C 1 16280 T C 637 0.000000 5 1:49298:T:C rs200943160 1 49298 T C 637 0.856358 6 1:54353:C:A rs140052487 1 54353 C A 637 0.000000 U V STAT DIRECTION EFFECT SE PVALUE Rsq 1 NA NA NA NA NA NA 0.00024 2 NA NA NA NA NA NA 0.21131 3 NA NA NA NA NA NA 0.00784 4 NA NA NA NA NA NA 0.10117 5 -30.5162 11562.5 0.0805397 - -0.213574 0.752565 0.776567 0.12335 6 NA NA NA NA NA NA 0.03243 > pval= as.vector(dat$"PVALUE") > p <- na.omit(pval) > length(p) [1] 15094648 > mp <- median(p) > std.mchi <- qchisq(mp, df = 1, lower.tail = F)/qchisq(0.5, 1) > std.mchi [1] 0.9032597 > lambda<-round(std.mchi,2) > lambda [1] 0.9 > > myQQ = function(pvalue){ + pvals <- na.omit(pvalue) + lgP <- log(pvals, base = 10) + N <- length(pvalue) + n <- length(pvals) + mp <- median(pvals) + std.mchi <- qchisq(mp, df = 1, lower.tail = F)/qchisq(0.5, 1) + par(pty = "s") + qqplot(-log((seq(1, n, 1) - 0.5)/n, base = 10), -lgP, xlim = range(-lgP), + ylim = range(-lgP), ylab = "-log_10 P (Label)", xlab = "Expected -log_10 P (Label)", + pch = 20, main = paste0("QQ Plot of ",STAT," ",SMK," ",PFT,"_chrPos_rsq_rsID")) + abline(0, 1) + text(mean(range(-lgP)), 1, substitute(lambda == x, list(x = round(std.mchi,2))), cex = 2) + } > #pdf(file=paste0("alhs_untransform_",STAT,"_",PFT,"_",SMK,"_awjj_20201125.chr-pos-All_rsq_snpid.qqplot_1M.pdf")) > #pdf(file=paste0("alhs_untransform_",STAT,"_",PFT,"_",SMK,"_awjj_20201125.chr-pos-All_rsq_snpid.qqplot.pdf")) > bitmap(file=paste0(path,"alhs_untransform_",STAT,"_",FNAME,"_",SMK,"_awjj_20201125.chr-pos-All_rsq_snpid.qqplot.jpg")) > > #myQQ(p[1:1000000]) > myQQ(p) > > dev.off() null device 1 > >