title |
author |
date |
output |
Tipton_2015_analysis_part2_v01-01 |
William Guesdon |
22/01/2020 |
html_document |
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```
```{r warning=FALSE, message=FALSE}
################
# Load libraries
################
library(tidyverse)
library(alakazam)
library(shazam)
library(cowplot)
library(rstatix)
library(cowplot)
library(ggpubr)
```
```{r echo=FALSE, warning=FALSE, message=FALSE, include=FALSE}
#####################
#Set up ggplot theme
####################
tmp_theme %
# mutate(ISOTYPE =
# ifelse(str_detect(SEQUENCE_INPUT, fixed('GGGAGTGCATCCGCCCC', ignore_case=TRUE)), "IgM",
# ifelse(str_detect(SEQUENCE_INPUT, fixed('GCACCCACCAAGGCTCC', ignore_case=TRUE)), "IgD",
# ifelse(str_detect(SEQUENCE_INPUT, fixed('GCATCCCCGACCAGCCC', ignore_case=TRUE)), "IgA",
# ifelse(str_detect(SEQUENCE_INPUT, fixed('GCCTCCACCAAGGGCCC', ignore_case=TRUE)), "IgG",
# ifelse(str_detect(SEQUENCE_INPUT, fixed('GCTTCCACCAAGGGCCC', ignore_case=TRUE)), "IgG",
# ifelse(str_detect(SEQUENCE_INPUT, fixed('GCCTCCACACAGAGCCC', ignore_case=TRUE)), "IgE", NA
# ))))))
# )
```
```{r}
# db %
# mutate(ISOTYPE =
# ifelse(str_detect(SEQUENCE_INPUT, fixed('CCTTTTCCCCCTCGTCTCCTG', ignore_case=TRUE)), "IgM",
# ifelse(str_detect(SEQUENCE_INPUT, fixed('GCACCCACCAAGGCTCC', ignore_case=TRUE)), "IgD",
# ifelse(str_detect(SEQUENCE_INPUT, fixed('CGACCAGCCCCAAGGTCTTC', ignore_case=TRUE)), "IgA",
# ifelse(str_detect(SEQUENCE_INPUT, fixed('TCCACCAAGGGCCCATCGG', ignore_case=TRUE)), "IgG",
# ifelse(str_detect(SEQUENCE_INPUT, fixed('GCCTCCACACAGAGCCC', ignore_case=TRUE)), "IgE", NA
# )))))
# )
```
Issue with determining the isotype. Try to reduce the signature in case the problem is linked to shorted read.
The issue is that some isotype could be not be correctly identified.
```{r}
db %
mutate(ISOTYPE =
ifelse(str_detect(SEQUENCE_INPUT, fixed('GGGAGT', ignore_case=TRUE)), "IgM",
ifelse(str_detect(SEQUENCE_INPUT, fixed('GCACCC', ignore_case=TRUE)), "IgD",
ifelse(str_detect(SEQUENCE_INPUT, fixed('GCATCC', ignore_case=TRUE)), "IgA",
ifelse(str_detect(SEQUENCE_INPUT, fixed('GCCTCC', ignore_case=TRUE)), "IgG",
ifelse(str_detect(SEQUENCE_INPUT, fixed('GCTTCC', ignore_case=TRUE)), "IgG",
ifelse(str_detect(SEQUENCE_INPUT, fixed('GCCTCC', ignore_case=TRUE)), "IgE", NA
))))))
)
all_samples_isotype %
filter(!is.na(ISOTYPE)) %>%
filter(!is.na(D_CALL))
nrow(db)
#View(db)
```
# Compare HC vs SLE IgM sequences
```{r}
db %>%
group_by(STATUS, ISOTYPE) %>%
count() %>%
knitr::kable()
```
```{r message=FALSE}
directory %
filter(STATUS == "HC")
db_SLE %
filter(STATUS == "SLE")
HC %
tukey_hsd() %>%
mutate(variable = "MU_FREQ") %>%
select(variable, group1, group2, p.adj, p.adj.signif) %>%
arrange(p.adj)
write_csv(stat, "Output/MU_FREQ_stat.csv")
#knitr::kable(stat)
```
# Physico-chemical properties analysis
```{r}
db_props %
group_by(STATUS, ISOTYPE) %>%
select(
STATUS,
ISOTYPE,
CDR3_AA_LENGTH,
CDR3_AA_GRAVY,
CDR3_AA_BASIC,
CDR3_AA_ACIDIC,
CDR3_AA_AROMATIC,
CDR3_AA_ALIPHATIC,
CDR3_AA_POLARITY
)
write_csv(db_props_summary, "Output/db_props_summary.csv")
#View(db_props_summary)
```
```{r message=FALSE}
#https://stackoverflow.com/questions/29678435/how-to-pass-dynamic-column-names-in-dplyr-into-custom-function
# https://stackoverflow.com/questions/29678435/how-to-pass-dynamic-column-names-in-dplyr-into-custom-function
# https://stackoverflow.com/questions/38511743/adding-missing-grouping-variables-message-in-dplyr-in-r
#View(db_props)
selected_variables %
select(STATUS, ISOTYPE, !!as.name(i)) %>%
group_by(STATUS, ISOTYPE) %>%
summarise(
mean = mean(!!as.name(i)),
se = sd(!!as.name(i))/sqrt(n())) %>%
ggplot( aes(x=STATUS, y=mean, fill = STATUS))+
geom_bar(stat="identity",
colour="black",
position=position_dodge(0.9)) +
geom_errorbar(aes(ymin = mean - se, ymax = mean + se),
position=position_dodge(0.9), width = 0.2)+
scale_fill_manual(name="STATUS", values=cbPalette, guide=FALSE) +
theme(text = element_text(size=20))+
ylab(i)+
tmp_theme +
facet_wrap( .~ ISOTYPE)
ggsave(g1, filename = paste0("Output/", i, ".png"))
#plot(g1)
}
db_props % filter(ISOTYPE == "IgM")
for (i in selected_variables){
#print(i)
g1 %
select(STATUS, ISOTYPE, !!as.name(i)) %>%
group_by(STATUS) %>%
summarise(
mean = mean(!!as.name(i)),
se = sd(!!as.name(i))/sqrt(n())) %>%
ggplot( aes(x=STATUS, y=mean, fill = STATUS))+
geom_bar(stat="identity",
colour="black",
position=position_dodge(0.9)) +
geom_errorbar(aes(ymin = mean - se, ymax = mean + se),
position=position_dodge(0.9), width = 0.2)+
scale_fill_manual(name="STATUS", values=cbPalette, guide=FALSE) +
theme(text = element_text(size=20))+
ylab(i)+
tmp_theme
ggsave(g1, filename = paste0("Output/IgM/", i, ".png"))
#plot(g1)
}
db_props % filter(ISOTYPE == "IgD")
for (i in selected_variables){
#print(i)
g1 %
select(STATUS, ISOTYPE, !!as.name(i)) %>%
group_by(STATUS) %>%
summarise(
mean = mean(!!as.name(i)),
se = sd(!!as.name(i))/sqrt(n())) %>%
ggplot( aes(x=STATUS, y=mean, fill = STATUS))+
geom_bar(stat="identity",
colour="black",
position=position_dodge(0.9)) +
geom_errorbar(aes(ymin = mean - se, ymax = mean + se),
position=position_dodge(0.9), width = 0.2)+
scale_fill_manual(name="STATUS", values=cbPalette, guide=FALSE) +
theme(text = element_text(size=20))+
ylab(i)+
tmp_theme
ggsave(g1, filename = paste0("Output/IgD/", i, ".png"))
#plot(g1)
}
db_props % filter(ISOTYPE == "IgG")
for (i in selected_variables){
#print(i)
g1 %
select(STATUS, ISOTYPE, !!as.name(i)) %>%
group_by(STATUS) %>%
summarise(
mean = mean(!!as.name(i)),
se = sd(!!as.name(i))/sqrt(n())) %>%
ggplot( aes(x=STATUS, y=mean, fill = STATUS))+
geom_bar(stat="identity",
colour="black",
position=position_dodge(0.9)) +
geom_errorbar(aes(ymin = mean - se, ymax = mean + se),
position=position_dodge(0.9), width = 0.2)+
scale_fill_manual(name="STATUS", values=cbPalette, guide=FALSE) +
theme(text = element_text(size=20))+
ylab(i)+
tmp_theme
ggsave(g1, filename = paste0("Output/IgG/", i, ".png"))
#plot(g1)
}
db_props % filter(ISOTYPE == "IgA")
for (i in selected_variables){
#print(i)
g1 %
select(STATUS, ISOTYPE, !!as.name(i)) %>%
group_by(STATUS) %>%
summarise(
mean = mean(!!as.name(i)),
se = sd(!!as.name(i))/sqrt(n())) %>%
ggplot( aes(x=STATUS, y=mean, fill = STATUS))+
geom_bar(stat="identity",
colour="black",
position=position_dodge(0.9)) +
geom_errorbar(aes(ymin = mean - se, ymax = mean + se),
position=position_dodge(0.9), width = 0.2)+
scale_fill_manual(name="STATUS", values=cbPalette, guide=FALSE) +
theme(text = element_text(size=20))+
ylab(i)+
tmp_theme
ggsave(g1, filename = paste0("Output/IgA/", i, ".png"))
#plot(g1)
}
```
# Compare V gene usage between HC and SLE
```{r message=FALSE}
# D1D2_IGH_analysis_v01-01
# Quantify V family clonal usage by sample and isotype
db_props %
group_by(STATUS, ISOTYPE) %>%
count() %>%
knitr::kable()
```
```{r}
iso_count %
group_by(STATUS, ISOTYPE) %>%
summarise(
count = n()
)
iso_count %>%
knitr::kable()
```
```{r}
HC_IgA %
filter(STATUS == 'HC') %>%
filter(ISOTYPE == 'IgA')
#HC_IgA$count
HC_IgD %
filter(STATUS == 'HC') %>%
filter(ISOTYPE == 'IgD')
#HC_IgD$count
HC_IgG %
filter(STATUS == 'HC') %>%
filter(ISOTYPE == 'IgG')
#HC_IgG$count
HC_IgM %
filter(STATUS == 'HC') %>%
filter(ISOTYPE == 'IgM')
#HC_IgM$count
HC_total %
filter(STATUS == 'SLE') %>%
filter(ISOTYPE == 'IgA')
#SLE_IgA$count
SLE_IgD %
filter(STATUS == 'SLE') %>%
filter(ISOTYPE == 'IgD')
#SLE_IgD$count
SLE_IgG %
filter(STATUS == 'SLE') %>%
filter(ISOTYPE == 'IgG')
#SLE_IgG$count
SLE_IgM %
filter(STATUS == 'SLE') %>%
filter(ISOTYPE == 'IgM')
#SLE_IgM$count
SLE_total %
select(STATUS, GERMLINE_V_CALL) %>%
filter( str_detect(GERMLINE_V_CALL, "4-34")) %>%
group_by(STATUS) %>%
summarise(
count = n()
) %>%
mutate(
proportion =
ifelse(STATUS == 'HC', count/HC_total*100,
ifelse(STATUS == 'SLE', count/SLE_total*100, NA
))
)
ggplot(db)+
aes( x = STATUS, y = proportion, fill = STATUS) +
geom_col()+
scale_fill_manual(name="Status", values=cbPalette) +
tmp_theme +
ggtitle('VH4-34 percentage')
#View(db)
```
```{r}
# db %
# select(STATUS, GERMLINE_V_CALL, ISOTYPE) %>%
# filter(str_detect(GERMLINE_V_CALL, "4-34")) %>%
# filter(ISOTYPE == "IgM") %>%
# group_by(STATUS) %>%
# summarise(
# count = n()
# ) %>%
# mutate(
# proportion =
# ifelse(STATUS == 'HC', count/HC_total*100,
# ifelse(STATUS == 'SLE', count/SLE_total*100, NA
# ))
# )
#
# ggplot(db)+
# aes( x = STATUS, y = proportion, fill = STATUS) +
# geom_col()+
# scale_fill_manual(name="Status", values=cbPalette) +
# tmp_theme +
# ggtitle('VH4-34 percentage')
#
#View(db)
```
```{r}
db %
select(STATUS, GERMLINE_V_CALL, ISOTYPE) %>%
filter(str_detect(GERMLINE_V_CALL, "4-34")) %>%
group_by(STATUS, ISOTYPE) %>%
summarise(
count = n()
) %>%
mutate(
proportion =
ifelse(STATUS == 'HC', count/HC_total*100,
ifelse(STATUS == 'SLE', count/SLE_total*100, NA
))
)
ggplot(db)+
aes( x = STATUS, y = proportion, fill = STATUS) +
geom_col()+
scale_fill_manual(name="Status", values=cbPalette) +
tmp_theme +
ggtitle('VH4-34 percentage') +
facet_wrap( .~ ISOTYPE)
# #View(db)
```
# Compare clonal diversity between HC and SLE
## clonal abundance curve
```{r}
db %
filter(ISOTYPE == "IgM")
# Compare diversity curve across values in the "STATUS" column
# q ranges from 0 (min_q=0) to 4 (max_q=4) in 0.05 incriments (step_q=0.05)
# A 95% confidence interval will be calculated (ci=0.95)
# 200 resampling realizations are performed (nboot=200)
sample_curve %
filter(ISOTYPE == "IgD")
# Compare diversity curve across values in the "STATUS" column
# q ranges from 0 (min_q=0) to 4 (max_q=4) in 0.05 incriments (step_q=0.05)
# A 95% confidence interval will be calculated (ci=0.95)
# 200 resampling realizations are performed (nboot=200)
sample_curve %
filter(ISOTYPE == "IgG")
# Compare diversity curve across values in the "STATUS" column
# q ranges from 0 (min_q=0) to 4 (max_q=4) in 0.05 incriments (step_q=0.05)
# A 95% confidence interval will be calculated (ci=0.95)
# 200 resampling realizations are performed (nboot=200)
sample_curve %
filter(ISOTYPE == "IgA")
# Compare diversity curve across values in the "STATUS" column
# q ranges from 0 (min_q=0) to 4 (max_q=4) in 0.05 incriments (step_q=0.05)
# A 95% confidence interval will be calculated (ci=0.95)
# 200 resampling realizations are performed (nboot=200)
sample_curve