Commit 0ad33c81 authored by Uladzislava KHAURATOVICH's avatar Uladzislava KHAURATOVICH 💬
Browse files

Upload New File

parent 1044e001
---
author: "Julian Stopp & Saren Tascyian & Uladzislava Khauratovich"
date: <h4> 25 Fabrary 2021 </h4>
created: "2021-01-18"
updated: "2020-03-05"
output:
pdf_document:
highlight: default
always_allow_html: true
geometry: paperheight=6.5in,paperwidth=7in,margin=.5in
---
```{r Setup of Knitr & R, include = FALSE}
knitr::opts_chunk$set(echo = T, message = F, eval = T, fig.show = "markup", include = T, warning = F, results = "markup", tidy = T)
## Package Loading ##
Packages = c("dplyr" , "tidyr", "data.table" , "readxl", "ggplot2");
invisible(lapply(Packages, library, character.only = TRUE))
## Empty workspace ##
rm(list=ls())
```
### Julian's part
## Import Data
#It s just one file - we do not need the file name
```{r S Data Import I, message=F, warning=F}
setwd("D:/Heterozygosity_perfect_genome")
data_to_read = list.files(pattern = "\\.vcf_simple$",
recursive = F,
full.names = F)
Sdata1 <- data.table::rbindlist(
sapply(data_to_read, fread, simplify = FALSE),
use.names = FALSE)
```
```{r Dataexplorer}
# DataExplorer::create_report(Sdata)
```
### Name change
```{r}
Sdata <- Sdata1 %>%
rename(
Chromosome = 1,
FemaleState = 10,
MaleState = 11,
) %>%
mutate_at(
vars(Chromosome),
list(factor)
)
Sdata <- Sdata[-(2418541:2840638)]
```
### Saren's part
### Alternatively, without binning
```{r }
# Custom function by Saren
interpret = function(female, male){
state = rep(NA, length(female))
state[female == 1 & male == 1] = "HetMaintained"
state[(female == 1 & male == 0) | (female == 1 & male == 2)] = "LossOfHet"
state[(female == 0 & male == 1) | (female == 2 & male == 1)] = "GainOfHet"
state[(female == 0 & male == 2) | (female == 2 & male == 0)] = "SeqError"
return(state)
}
data = Sdata
# datatable if_else chaining
data2 = data %>% mutate(state = if_else(FemaleState == 1 & MaleState == 1, "HetMaintained",
if_else(FemaleState == 1 & MaleState == 0, "LossOfHet",
if_else(FemaleState == 1 & MaleState == 2, "LossOfHet",
if_else(FemaleState == 0 & MaleState == 1, "GainOfHet",
if_else(FemaleState == 2 & MaleState == 1, "GainOfHet",
if_else(FemaleState == 0 & MaleState == 2, "SeqError",
if_else(FemaleState == 2 & MaleState == 0, "SeqError",
"NA")))))))
)
data1 = data %>% mutate(state = interpret(FemaleState, MaleState))
ggplot(data1, aes(x = state, fill = state)) + geom_bar()
ggplot(data1, aes(x = Chromosome, fill = state)) + geom_bar(position="fill") + xlab("Chromosome") + ylab("Fraction of SNPs of each category") #fraction on each Chr
ggplot(data1 %>% filter(!is.na(Chromosome)), aes(x = Chromosome, fill = state)) + geom_bar() + xlab("LG number") + ylab("Number of SNPs of each category") # removed NAs to see absolute numbers
#ggplot(data1, aes(x = Chromosome, fill = state)) + geom_bar() # with NAs
```
### get sites with the lost heterozygosity Chr6
```{r}
LossOfHet_positions_LG6 <- data2 %>%
dplyr::filter(Chromosome == "LG6") %>%
dplyr::filter(state == "LossOfHet") %>%
droplevels()
#write.table(LossOfHet_positions_LG6, file = "LossOfHet_positions_LG6.txt", sep = " ", col.names = TRUE, row.names = FALSE, na = "NA")
write.csv(LossOfHet_positions_LG6, file = "LossOfHet_positions_LG6.csv", col.names = TRUE, row.names = FALSE, na = "NA")
```
### get all sites with the lost heterozygosity
```{r}
LossOfHet_all_positions <- data2 %>%
dplyr::filter(state == "LossOfHet") %>%
droplevels()
write.csv(LossOfHet_positions_LG6, file = "LossOfHet_all_positions.csv", col.names = TRUE, row.names = FALSE, na = "NA")
```
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment