Tipton_2015_analysis_part2_v01-09.Rmd · 尧小飞/Tipton_2015_analysis

title	author	date	output
Tipton_2015_analysis_part2_v01-01	William Guesdon	22/01/2020	html_document

```{r setup, include=FALSE} knitr::opts_chunk$set(echo = TRUE) ``` ```{r warning=FALSE, message=FALSE} ################ # Load libraries ################ library(tidyverse) library(alakazam) library(shazam) library(cowplot) library(rstatix) library(cowplot) library(ggpubr) ``` ```{r echo=FALSE, warning=FALSE, message=FALSE, include=FALSE} ##################### #Set up ggplot theme #################### tmp_theme % # mutate(ISOTYPE = # ifelse(str_detect(SEQUENCE_INPUT, fixed('GGGAGTGCATCCGCCCC', ignore_case=TRUE)), "IgM", # ifelse(str_detect(SEQUENCE_INPUT, fixed('GCACCCACCAAGGCTCC', ignore_case=TRUE)), "IgD", # ifelse(str_detect(SEQUENCE_INPUT, fixed('GCATCCCCGACCAGCCC', ignore_case=TRUE)), "IgA", # ifelse(str_detect(SEQUENCE_INPUT, fixed('GCCTCCACCAAGGGCCC', ignore_case=TRUE)), "IgG", # ifelse(str_detect(SEQUENCE_INPUT, fixed('GCTTCCACCAAGGGCCC', ignore_case=TRUE)), "IgG", # ifelse(str_detect(SEQUENCE_INPUT, fixed('GCCTCCACACAGAGCCC', ignore_case=TRUE)), "IgE", NA # )))))) # ) ``` ```{r} # db % # mutate(ISOTYPE = # ifelse(str_detect(SEQUENCE_INPUT, fixed('CCTTTTCCCCCTCGTCTCCTG', ignore_case=TRUE)), "IgM", # ifelse(str_detect(SEQUENCE_INPUT, fixed('GCACCCACCAAGGCTCC', ignore_case=TRUE)), "IgD", # ifelse(str_detect(SEQUENCE_INPUT, fixed('CGACCAGCCCCAAGGTCTTC', ignore_case=TRUE)), "IgA", # ifelse(str_detect(SEQUENCE_INPUT, fixed('TCCACCAAGGGCCCATCGG', ignore_case=TRUE)), "IgG", # ifelse(str_detect(SEQUENCE_INPUT, fixed('GCCTCCACACAGAGCCC', ignore_case=TRUE)), "IgE", NA # ))))) # ) ``` Issue with determining the isotype. Try to reduce the signature in case the problem is linked to shorted read. The issue is that some isotype could be not be correctly identified. ```{r} db % mutate(ISOTYPE = ifelse(str_detect(SEQUENCE_INPUT, fixed('GGGAGT', ignore_case=TRUE)), "IgM", ifelse(str_detect(SEQUENCE_INPUT, fixed('GCACCC', ignore_case=TRUE)), "IgD", ifelse(str_detect(SEQUENCE_INPUT, fixed('GCATCC', ignore_case=TRUE)), "IgA", ifelse(str_detect(SEQUENCE_INPUT, fixed('GCCTCC', ignore_case=TRUE)), "IgG", ifelse(str_detect(SEQUENCE_INPUT, fixed('GCTTCC', ignore_case=TRUE)), "IgG", ifelse(str_detect(SEQUENCE_INPUT, fixed('GCCTCC', ignore_case=TRUE)), "IgE", NA )))))) ) all_samples_isotype % filter(!is.na(ISOTYPE)) %>% filter(!is.na(D_CALL)) nrow(db) #View(db) ``` # Compare HC vs SLE IgM sequences ```{r} db %>% group_by(STATUS, ISOTYPE) %>% count() %>% knitr::kable() ``` ```{r message=FALSE} directory % filter(STATUS == "HC") db_SLE % filter(STATUS == "SLE") HC % tukey_hsd() %>% mutate(variable = "MU_FREQ") %>% select(variable, group1, group2, p.adj, p.adj.signif) %>% arrange(p.adj) write_csv(stat, "Output/MU_FREQ_stat.csv") #knitr::kable(stat) ``` # Physico-chemical properties analysis ```{r} db_props % group_by(STATUS, ISOTYPE) %>% select( STATUS, ISOTYPE, CDR3_AA_LENGTH, CDR3_AA_GRAVY, CDR3_AA_BASIC, CDR3_AA_ACIDIC, CDR3_AA_AROMATIC, CDR3_AA_ALIPHATIC, CDR3_AA_POLARITY ) write_csv(db_props_summary, "Output/db_props_summary.csv") #View(db_props_summary) ``` ```{r message=FALSE} #https://stackoverflow.com/questions/29678435/how-to-pass-dynamic-column-names-in-dplyr-into-custom-function # https://stackoverflow.com/questions/29678435/how-to-pass-dynamic-column-names-in-dplyr-into-custom-function # https://stackoverflow.com/questions/38511743/adding-missing-grouping-variables-message-in-dplyr-in-r #View(db_props) selected_variables % select(STATUS, ISOTYPE, !!as.name(i)) %>% group_by(STATUS, ISOTYPE) %>% summarise( mean = mean(!!as.name(i)), se = sd(!!as.name(i))/sqrt(n())) %>% ggplot( aes(x=STATUS, y=mean, fill = STATUS))+ geom_bar(stat="identity", colour="black", position=position_dodge(0.9)) + geom_errorbar(aes(ymin = mean - se, ymax = mean + se), position=position_dodge(0.9), width = 0.2)+ scale_fill_manual(name="STATUS", values=cbPalette, guide=FALSE) + theme(text = element_text(size=20))+ ylab(i)+ tmp_theme + facet_wrap( .~ ISOTYPE) ggsave(g1, filename = paste0("Output/", i, ".png")) #plot(g1) } db_props % filter(ISOTYPE == "IgM") for (i in selected_variables){ #print(i) g1 % select(STATUS, ISOTYPE, !!as.name(i)) %>% group_by(STATUS) %>% summarise( mean = mean(!!as.name(i)), se = sd(!!as.name(i))/sqrt(n())) %>% ggplot( aes(x=STATUS, y=mean, fill = STATUS))+ geom_bar(stat="identity", colour="black", position=position_dodge(0.9)) + geom_errorbar(aes(ymin = mean - se, ymax = mean + se), position=position_dodge(0.9), width = 0.2)+ scale_fill_manual(name="STATUS", values=cbPalette, guide=FALSE) + theme(text = element_text(size=20))+ ylab(i)+ tmp_theme ggsave(g1, filename = paste0("Output/IgM/", i, ".png")) #plot(g1) } db_props % filter(ISOTYPE == "IgD") for (i in selected_variables){ #print(i) g1 % select(STATUS, ISOTYPE, !!as.name(i)) %>% group_by(STATUS) %>% summarise( mean = mean(!!as.name(i)), se = sd(!!as.name(i))/sqrt(n())) %>% ggplot( aes(x=STATUS, y=mean, fill = STATUS))+ geom_bar(stat="identity", colour="black", position=position_dodge(0.9)) + geom_errorbar(aes(ymin = mean - se, ymax = mean + se), position=position_dodge(0.9), width = 0.2)+ scale_fill_manual(name="STATUS", values=cbPalette, guide=FALSE) + theme(text = element_text(size=20))+ ylab(i)+ tmp_theme ggsave(g1, filename = paste0("Output/IgD/", i, ".png")) #plot(g1) } db_props % filter(ISOTYPE == "IgG") for (i in selected_variables){ #print(i) g1 % select(STATUS, ISOTYPE, !!as.name(i)) %>% group_by(STATUS) %>% summarise( mean = mean(!!as.name(i)), se = sd(!!as.name(i))/sqrt(n())) %>% ggplot( aes(x=STATUS, y=mean, fill = STATUS))+ geom_bar(stat="identity", colour="black", position=position_dodge(0.9)) + geom_errorbar(aes(ymin = mean - se, ymax = mean + se), position=position_dodge(0.9), width = 0.2)+ scale_fill_manual(name="STATUS", values=cbPalette, guide=FALSE) + theme(text = element_text(size=20))+ ylab(i)+ tmp_theme ggsave(g1, filename = paste0("Output/IgG/", i, ".png")) #plot(g1) } db_props % filter(ISOTYPE == "IgA") for (i in selected_variables){ #print(i) g1 % select(STATUS, ISOTYPE, !!as.name(i)) %>% group_by(STATUS) %>% summarise( mean = mean(!!as.name(i)), se = sd(!!as.name(i))/sqrt(n())) %>% ggplot( aes(x=STATUS, y=mean, fill = STATUS))+ geom_bar(stat="identity", colour="black", position=position_dodge(0.9)) + geom_errorbar(aes(ymin = mean - se, ymax = mean + se), position=position_dodge(0.9), width = 0.2)+ scale_fill_manual(name="STATUS", values=cbPalette, guide=FALSE) + theme(text = element_text(size=20))+ ylab(i)+ tmp_theme ggsave(g1, filename = paste0("Output/IgA/", i, ".png")) #plot(g1) } ``` # Compare V gene usage between HC and SLE ```{r message=FALSE} # D1D2_IGH_analysis_v01-01 # Quantify V family clonal usage by sample and isotype db_props % group_by(STATUS, ISOTYPE) %>% count() %>% knitr::kable() ``` ```{r} iso_count % group_by(STATUS, ISOTYPE) %>% summarise( count = n() ) iso_count %>% knitr::kable() ``` ```{r} HC_IgA % filter(STATUS == 'HC') %>% filter(ISOTYPE == 'IgA') #HC_IgA$count HC_IgD % filter(STATUS == 'HC') %>% filter(ISOTYPE == 'IgD') #HC_IgD$count HC_IgG % filter(STATUS == 'HC') %>% filter(ISOTYPE == 'IgG') #HC_IgG$count HC_IgM % filter(STATUS == 'HC') %>% filter(ISOTYPE == 'IgM') #HC_IgM$count HC_total % filter(STATUS == 'SLE') %>% filter(ISOTYPE == 'IgA') #SLE_IgA$count SLE_IgD % filter(STATUS == 'SLE') %>% filter(ISOTYPE == 'IgD') #SLE_IgD$count SLE_IgG % filter(STATUS == 'SLE') %>% filter(ISOTYPE == 'IgG') #SLE_IgG$count SLE_IgM % filter(STATUS == 'SLE') %>% filter(ISOTYPE == 'IgM') #SLE_IgM$count SLE_total % select(STATUS, GERMLINE_V_CALL) %>% filter( str_detect(GERMLINE_V_CALL, "4-34")) %>% group_by(STATUS) %>% summarise( count = n() ) %>% mutate( proportion = ifelse(STATUS == 'HC', count/HC_total*100, ifelse(STATUS == 'SLE', count/SLE_total*100, NA )) ) ggplot(db)+ aes( x = STATUS, y = proportion, fill = STATUS) + geom_col()+ scale_fill_manual(name="Status", values=cbPalette) + tmp_theme + ggtitle('VH4-34 percentage') #View(db) ``` ```{r} # db % # select(STATUS, GERMLINE_V_CALL, ISOTYPE) %>% # filter(str_detect(GERMLINE_V_CALL, "4-34")) %>% # filter(ISOTYPE == "IgM") %>% # group_by(STATUS) %>% # summarise( # count = n() # ) %>% # mutate( # proportion = # ifelse(STATUS == 'HC', count/HC_total*100, # ifelse(STATUS == 'SLE', count/SLE_total*100, NA # )) # ) # # ggplot(db)+ # aes( x = STATUS, y = proportion, fill = STATUS) + # geom_col()+ # scale_fill_manual(name="Status", values=cbPalette) + # tmp_theme + # ggtitle('VH4-34 percentage') # #View(db) ``` ```{r} db % select(STATUS, GERMLINE_V_CALL, ISOTYPE) %>% filter(str_detect(GERMLINE_V_CALL, "4-34")) %>% group_by(STATUS, ISOTYPE) %>% summarise( count = n() ) %>% mutate( proportion = ifelse(STATUS == 'HC', count/HC_total*100, ifelse(STATUS == 'SLE', count/SLE_total*100, NA )) ) ggplot(db)+ aes( x = STATUS, y = proportion, fill = STATUS) + geom_col()+ scale_fill_manual(name="Status", values=cbPalette) + tmp_theme + ggtitle('VH4-34 percentage') + facet_wrap( .~ ISOTYPE) # #View(db) ``` # Compare clonal diversity between HC and SLE ## clonal abundance curve ```{r} db % filter(ISOTYPE == "IgM") # Compare diversity curve across values in the "STATUS" column # q ranges from 0 (min_q=0) to 4 (max_q=4) in 0.05 incriments (step_q=0.05) # A 95% confidence interval will be calculated (ci=0.95) # 200 resampling realizations are performed (nboot=200) sample_curve % filter(ISOTYPE == "IgD") # Compare diversity curve across values in the "STATUS" column # q ranges from 0 (min_q=0) to 4 (max_q=4) in 0.05 incriments (step_q=0.05) # A 95% confidence interval will be calculated (ci=0.95) # 200 resampling realizations are performed (nboot=200) sample_curve % filter(ISOTYPE == "IgG") # Compare diversity curve across values in the "STATUS" column # q ranges from 0 (min_q=0) to 4 (max_q=4) in 0.05 incriments (step_q=0.05) # A 95% confidence interval will be calculated (ci=0.95) # 200 resampling realizations are performed (nboot=200) sample_curve % filter(ISOTYPE == "IgA") # Compare diversity curve across values in the "STATUS" column # q ranges from 0 (min_q=0) to 4 (max_q=4) in 0.05 incriments (step_q=0.05) # A 95% confidence interval will be calculated (ci=0.95) # 200 resampling realizations are performed (nboot=200) sample_curve

尧小飞/Tipton_2015_analysis

简介

发行版

贡献者

近期动态

尧小飞/Tipton_2015_analysis .gitee-modal { width: 500px !important; }

简介

发行版

贡献者

近期动态

搜索帮助

尧小飞/Tipton_2015_analysis