1 Star 0 Fork 0

尧小飞/Tipton_2015_analysis

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
克隆/下载
Tipton_2015_analysis_part2_v01-09.Rmd 21.40 KB
一键复制 编辑 原始数据 按行查看 历史
wguesdon 提交于 2020-02-03 19:01 . update analysis
title author date output
Tipton_2015_analysis_part2_v01-01
William Guesdon
22/01/2020
html_document
```{r setup, include=FALSE} knitr::opts_chunk$set(echo = TRUE) ``` ```{r warning=FALSE, message=FALSE} ################ # Load libraries ################ library(tidyverse) library(alakazam) library(shazam) library(cowplot) library(rstatix) library(cowplot) library(ggpubr) ``` ```{r echo=FALSE, warning=FALSE, message=FALSE, include=FALSE} ##################### #Set up ggplot theme #################### tmp_theme % # mutate(ISOTYPE = # ifelse(str_detect(SEQUENCE_INPUT, fixed('GGGAGTGCATCCGCCCC', ignore_case=TRUE)), "IgM", # ifelse(str_detect(SEQUENCE_INPUT, fixed('GCACCCACCAAGGCTCC', ignore_case=TRUE)), "IgD", # ifelse(str_detect(SEQUENCE_INPUT, fixed('GCATCCCCGACCAGCCC', ignore_case=TRUE)), "IgA", # ifelse(str_detect(SEQUENCE_INPUT, fixed('GCCTCCACCAAGGGCCC', ignore_case=TRUE)), "IgG", # ifelse(str_detect(SEQUENCE_INPUT, fixed('GCTTCCACCAAGGGCCC', ignore_case=TRUE)), "IgG", # ifelse(str_detect(SEQUENCE_INPUT, fixed('GCCTCCACACAGAGCCC', ignore_case=TRUE)), "IgE", NA # )))))) # ) ``` ```{r} # db % # mutate(ISOTYPE = # ifelse(str_detect(SEQUENCE_INPUT, fixed('CCTTTTCCCCCTCGTCTCCTG', ignore_case=TRUE)), "IgM", # ifelse(str_detect(SEQUENCE_INPUT, fixed('GCACCCACCAAGGCTCC', ignore_case=TRUE)), "IgD", # ifelse(str_detect(SEQUENCE_INPUT, fixed('CGACCAGCCCCAAGGTCTTC', ignore_case=TRUE)), "IgA", # ifelse(str_detect(SEQUENCE_INPUT, fixed('TCCACCAAGGGCCCATCGG', ignore_case=TRUE)), "IgG", # ifelse(str_detect(SEQUENCE_INPUT, fixed('GCCTCCACACAGAGCCC', ignore_case=TRUE)), "IgE", NA # ))))) # ) ``` Issue with determining the isotype. Try to reduce the signature in case the problem is linked to shorted read. The issue is that some isotype could be not be correctly identified. ```{r} db % mutate(ISOTYPE = ifelse(str_detect(SEQUENCE_INPUT, fixed('GGGAGT', ignore_case=TRUE)), "IgM", ifelse(str_detect(SEQUENCE_INPUT, fixed('GCACCC', ignore_case=TRUE)), "IgD", ifelse(str_detect(SEQUENCE_INPUT, fixed('GCATCC', ignore_case=TRUE)), "IgA", ifelse(str_detect(SEQUENCE_INPUT, fixed('GCCTCC', ignore_case=TRUE)), "IgG", ifelse(str_detect(SEQUENCE_INPUT, fixed('GCTTCC', ignore_case=TRUE)), "IgG", ifelse(str_detect(SEQUENCE_INPUT, fixed('GCCTCC', ignore_case=TRUE)), "IgE", NA )))))) ) all_samples_isotype % filter(!is.na(ISOTYPE)) %>% filter(!is.na(D_CALL)) nrow(db) #View(db) ``` # Compare HC vs SLE IgM sequences ```{r} db %>% group_by(STATUS, ISOTYPE) %>% count() %>% knitr::kable() ``` ```{r message=FALSE} directory % filter(STATUS == "HC") db_SLE % filter(STATUS == "SLE") HC % tukey_hsd() %>% mutate(variable = "MU_FREQ") %>% select(variable, group1, group2, p.adj, p.adj.signif) %>% arrange(p.adj) write_csv(stat, "Output/MU_FREQ_stat.csv") #knitr::kable(stat) ``` # Physico-chemical properties analysis ```{r} db_props % group_by(STATUS, ISOTYPE) %>% select( STATUS, ISOTYPE, CDR3_AA_LENGTH, CDR3_AA_GRAVY, CDR3_AA_BASIC, CDR3_AA_ACIDIC, CDR3_AA_AROMATIC, CDR3_AA_ALIPHATIC, CDR3_AA_POLARITY ) write_csv(db_props_summary, "Output/db_props_summary.csv") #View(db_props_summary) ``` ```{r message=FALSE} #https://stackoverflow.com/questions/29678435/how-to-pass-dynamic-column-names-in-dplyr-into-custom-function # https://stackoverflow.com/questions/29678435/how-to-pass-dynamic-column-names-in-dplyr-into-custom-function # https://stackoverflow.com/questions/38511743/adding-missing-grouping-variables-message-in-dplyr-in-r #View(db_props) selected_variables % select(STATUS, ISOTYPE, !!as.name(i)) %>% group_by(STATUS, ISOTYPE) %>% summarise( mean = mean(!!as.name(i)), se = sd(!!as.name(i))/sqrt(n())) %>% ggplot( aes(x=STATUS, y=mean, fill = STATUS))+ geom_bar(stat="identity", colour="black", position=position_dodge(0.9)) + geom_errorbar(aes(ymin = mean - se, ymax = mean + se), position=position_dodge(0.9), width = 0.2)+ scale_fill_manual(name="STATUS", values=cbPalette, guide=FALSE) + theme(text = element_text(size=20))+ ylab(i)+ tmp_theme + facet_wrap( .~ ISOTYPE) ggsave(g1, filename = paste0("Output/", i, ".png")) #plot(g1) } db_props % filter(ISOTYPE == "IgM") for (i in selected_variables){ #print(i) g1 % select(STATUS, ISOTYPE, !!as.name(i)) %>% group_by(STATUS) %>% summarise( mean = mean(!!as.name(i)), se = sd(!!as.name(i))/sqrt(n())) %>% ggplot( aes(x=STATUS, y=mean, fill = STATUS))+ geom_bar(stat="identity", colour="black", position=position_dodge(0.9)) + geom_errorbar(aes(ymin = mean - se, ymax = mean + se), position=position_dodge(0.9), width = 0.2)+ scale_fill_manual(name="STATUS", values=cbPalette, guide=FALSE) + theme(text = element_text(size=20))+ ylab(i)+ tmp_theme ggsave(g1, filename = paste0("Output/IgM/", i, ".png")) #plot(g1) } db_props % filter(ISOTYPE == "IgD") for (i in selected_variables){ #print(i) g1 % select(STATUS, ISOTYPE, !!as.name(i)) %>% group_by(STATUS) %>% summarise( mean = mean(!!as.name(i)), se = sd(!!as.name(i))/sqrt(n())) %>% ggplot( aes(x=STATUS, y=mean, fill = STATUS))+ geom_bar(stat="identity", colour="black", position=position_dodge(0.9)) + geom_errorbar(aes(ymin = mean - se, ymax = mean + se), position=position_dodge(0.9), width = 0.2)+ scale_fill_manual(name="STATUS", values=cbPalette, guide=FALSE) + theme(text = element_text(size=20))+ ylab(i)+ tmp_theme ggsave(g1, filename = paste0("Output/IgD/", i, ".png")) #plot(g1) } db_props % filter(ISOTYPE == "IgG") for (i in selected_variables){ #print(i) g1 % select(STATUS, ISOTYPE, !!as.name(i)) %>% group_by(STATUS) %>% summarise( mean = mean(!!as.name(i)), se = sd(!!as.name(i))/sqrt(n())) %>% ggplot( aes(x=STATUS, y=mean, fill = STATUS))+ geom_bar(stat="identity", colour="black", position=position_dodge(0.9)) + geom_errorbar(aes(ymin = mean - se, ymax = mean + se), position=position_dodge(0.9), width = 0.2)+ scale_fill_manual(name="STATUS", values=cbPalette, guide=FALSE) + theme(text = element_text(size=20))+ ylab(i)+ tmp_theme ggsave(g1, filename = paste0("Output/IgG/", i, ".png")) #plot(g1) } db_props % filter(ISOTYPE == "IgA") for (i in selected_variables){ #print(i) g1 % select(STATUS, ISOTYPE, !!as.name(i)) %>% group_by(STATUS) %>% summarise( mean = mean(!!as.name(i)), se = sd(!!as.name(i))/sqrt(n())) %>% ggplot( aes(x=STATUS, y=mean, fill = STATUS))+ geom_bar(stat="identity", colour="black", position=position_dodge(0.9)) + geom_errorbar(aes(ymin = mean - se, ymax = mean + se), position=position_dodge(0.9), width = 0.2)+ scale_fill_manual(name="STATUS", values=cbPalette, guide=FALSE) + theme(text = element_text(size=20))+ ylab(i)+ tmp_theme ggsave(g1, filename = paste0("Output/IgA/", i, ".png")) #plot(g1) } ``` # Compare V gene usage between HC and SLE ```{r message=FALSE} # D1D2_IGH_analysis_v01-01 # Quantify V family clonal usage by sample and isotype db_props % group_by(STATUS, ISOTYPE) %>% count() %>% knitr::kable() ``` ```{r} iso_count % group_by(STATUS, ISOTYPE) %>% summarise( count = n() ) iso_count %>% knitr::kable() ``` ```{r} HC_IgA % filter(STATUS == 'HC') %>% filter(ISOTYPE == 'IgA') #HC_IgA$count HC_IgD % filter(STATUS == 'HC') %>% filter(ISOTYPE == 'IgD') #HC_IgD$count HC_IgG % filter(STATUS == 'HC') %>% filter(ISOTYPE == 'IgG') #HC_IgG$count HC_IgM % filter(STATUS == 'HC') %>% filter(ISOTYPE == 'IgM') #HC_IgM$count HC_total % filter(STATUS == 'SLE') %>% filter(ISOTYPE == 'IgA') #SLE_IgA$count SLE_IgD % filter(STATUS == 'SLE') %>% filter(ISOTYPE == 'IgD') #SLE_IgD$count SLE_IgG % filter(STATUS == 'SLE') %>% filter(ISOTYPE == 'IgG') #SLE_IgG$count SLE_IgM % filter(STATUS == 'SLE') %>% filter(ISOTYPE == 'IgM') #SLE_IgM$count SLE_total % select(STATUS, GERMLINE_V_CALL) %>% filter( str_detect(GERMLINE_V_CALL, "4-34")) %>% group_by(STATUS) %>% summarise( count = n() ) %>% mutate( proportion = ifelse(STATUS == 'HC', count/HC_total*100, ifelse(STATUS == 'SLE', count/SLE_total*100, NA )) ) ggplot(db)+ aes( x = STATUS, y = proportion, fill = STATUS) + geom_col()+ scale_fill_manual(name="Status", values=cbPalette) + tmp_theme + ggtitle('VH4-34 percentage') #View(db) ``` ```{r} # db % # select(STATUS, GERMLINE_V_CALL, ISOTYPE) %>% # filter(str_detect(GERMLINE_V_CALL, "4-34")) %>% # filter(ISOTYPE == "IgM") %>% # group_by(STATUS) %>% # summarise( # count = n() # ) %>% # mutate( # proportion = # ifelse(STATUS == 'HC', count/HC_total*100, # ifelse(STATUS == 'SLE', count/SLE_total*100, NA # )) # ) # # ggplot(db)+ # aes( x = STATUS, y = proportion, fill = STATUS) + # geom_col()+ # scale_fill_manual(name="Status", values=cbPalette) + # tmp_theme + # ggtitle('VH4-34 percentage') # #View(db) ``` ```{r} db % select(STATUS, GERMLINE_V_CALL, ISOTYPE) %>% filter(str_detect(GERMLINE_V_CALL, "4-34")) %>% group_by(STATUS, ISOTYPE) %>% summarise( count = n() ) %>% mutate( proportion = ifelse(STATUS == 'HC', count/HC_total*100, ifelse(STATUS == 'SLE', count/SLE_total*100, NA )) ) ggplot(db)+ aes( x = STATUS, y = proportion, fill = STATUS) + geom_col()+ scale_fill_manual(name="Status", values=cbPalette) + tmp_theme + ggtitle('VH4-34 percentage') + facet_wrap( .~ ISOTYPE) # #View(db) ``` # Compare clonal diversity between HC and SLE ## clonal abundance curve ```{r} db % filter(ISOTYPE == "IgM") # Compare diversity curve across values in the "STATUS" column # q ranges from 0 (min_q=0) to 4 (max_q=4) in 0.05 incriments (step_q=0.05) # A 95% confidence interval will be calculated (ci=0.95) # 200 resampling realizations are performed (nboot=200) sample_curve % filter(ISOTYPE == "IgD") # Compare diversity curve across values in the "STATUS" column # q ranges from 0 (min_q=0) to 4 (max_q=4) in 0.05 incriments (step_q=0.05) # A 95% confidence interval will be calculated (ci=0.95) # 200 resampling realizations are performed (nboot=200) sample_curve % filter(ISOTYPE == "IgG") # Compare diversity curve across values in the "STATUS" column # q ranges from 0 (min_q=0) to 4 (max_q=4) in 0.05 incriments (step_q=0.05) # A 95% confidence interval will be calculated (ci=0.95) # 200 resampling realizations are performed (nboot=200) sample_curve % filter(ISOTYPE == "IgA") # Compare diversity curve across values in the "STATUS" column # q ranges from 0 (min_q=0) to 4 (max_q=4) in 0.05 incriments (step_q=0.05) # A 95% confidence interval will be calculated (ci=0.95) # 200 resampling realizations are performed (nboot=200) sample_curve
Loading...
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
1
https://gitee.com/yao_xiao_fei2/Tipton_2015_analysis.git
git@gitee.com:yao_xiao_fei2/Tipton_2015_analysis.git
yao_xiao_fei2
Tipton_2015_analysis
Tipton_2015_analysis
master

搜索帮助