Commit 84a18d54 authored by Uladzislava KHAURATOVICH's avatar Uladzislava KHAURATOVICH 💬
Browse files

Update full_chromosome-level_assembly_analysis.md

parent 88de820c
......@@ -395,7 +395,7 @@ filtered for
-got rid of NAs.
-not all sites heterozygous/homozygous.
-i lest all the columns.
-i removed grep -v '\W2\W' -\_O-O_/- who knows what it is...
-i removed grep -v '\W2\W' -\_O-O_/- who knows what it is...??
```
cat head_Gk1_Gs1_f_m_filtered2.vcf.simple | grep -v 'NA' | awk '($10 != "0" && $11 == "0")' | awk '{print $1, $2, $12, $13, $14, $15, $16, $17, $18, $19, $20, $21, $22, $23, $24, $25, $26, $27, $28, $29, $30, $31}' | grep -v '\W2\W' | awk '( ($3+$4+$5+$6+$7+$8+$9+$10+$11+$12+$13+$14+$15+$16+$17+$18+$19+$20+$21+$22)>4 && ($3+$4+$5+$6+$7+$8+$9+$10+$11+$12+$13+$14+$15+$16+$17+$18+$19+$20+$21+$22)<16)' > head_Gk1_Gs1_f_m_filtered2.vcf.simple.filtered
......@@ -403,13 +403,65 @@ cat head_Gk1_Gs1_f_m_filtered2.vcf.simple | grep -v 'NA' | awk '($10 != "0" && $
scp -o ProxyJump=ukhaurat@login.ist.ac.at ukhaurat@bea81:/nfs/scistore03/vicosgrp/ukhaurat/full_assembly_analysis/head_Gk1_Gs1_f_m_filtered2.vcf.simple.filtered /Users/ukhaurat/Documents/full_assembly_analysis
```
### 10a. The heatmap for 10% of the markers (random sample) from each chromosome.
```
'R_MAX_VSIZE'=32000000000
setwd("/Users/ukhaurat/Documents/full_assembly_analysis")
library(dplyr)
library(stringr)
df<-read.table("head_Gk1_Gs1_f_m_filtered2.vcf.simple.filtered", header = F, sep = " ", col.names = c("Chromosome", "Coordinate", "f1", "f2", "f3", "f4", "f5", "f6", "f7", "f8", "f9", "f10", "m1", "m2", "m3", "m4", "m5", "m6", "m7", "m8", "m9", "m10"))
df <- mutate(df, add_name = if_else(str_detect(`Chromosome`, "scaffold_"), "NoLG", Chromosome))
df <- df[!grepl("scaffold",df$Chromosome),]
df$add_name <- gsub("^.{0,2}", "", df$add_name)
df$add_name <- as.numeric(df$add_name)
#make a toy table
df_sample <- df %>% group_by(add_name) %>%
sample_frac(size = 0.1, replace = TRUE) %>%
arrange(add_name, by_group = TRUE)
library(tidyr)
#this part just to create coordinate lables for the ggplot
df_sample$num <- seq(1,3864,1)
breakss <- as.vector(as.numeric(seq(1,3864,50)))
b<-subset(df_sample,df_sample$num %in% breakss)
#b<-unite(b, "Label", Chromosome, Coordinate, sep = "_", remove = TRUE)
labelss <- as.vector(as.character(b$Chromosome))
xxx<-df_sample[,3:22]
#manhattan distance for x1,y1 and x2,y2 is |x1-x2|+|y1-y2|, which I think is what we want
distobject<-dist(xxx, method = "manhattan")
```
grep 'scaffold' head_Gk1_Gs1_f_m_filtered2.vcf.simple.filtered > head_Gk1_Gs1_f_m_filtered2.vcf.simple.filtered_scaffoldsplus
```
library(usethis)
library(devtools)
library(spaa)
distdf <- spaa::dist2list(distobject)
write.table(distdf, file = "/Users/ukhaurat/Documents/full_assembly_analysis/distdf_0.1_NoNoLG_2", append = FALSE, quote = TRUE, sep = " ", eol = "\n", na = "NA", dec = ".", row.names = TRUE, col.names = TRUE)
#distdf<-read.table("distdf_0.1")
distdf$row <- as.numeric(distdf$row)
distdf$col <- as.numeric(distdf$col)
scp -o ProxyJump=ukhaurat@login.ist.ac.at /Users/ukhaurat/Documents/full_assembly_analysis/heatmap_genome.R ukhaurat@bea81:/nfs/scistore03/vicosgrp/ukhaurat/full_assembly_analysis/
library(ggplot2)
library(hrbrthemes)
library(scico)
sbatch R_heatmap.sh
Submitted batch job 24697696
svg("heatmap_genome_0.1.svg")
ggplot(distdf, aes(row, col, fill= value)) +
geom_tile(aes(fill = value), colour = "transparent") +
scale_fill_gradient(trans = "log", low = "dark green", high = "white", breaks = c(0, 2, 5, 10, 20)) +
labs(title = "Distance between markers genome 0.1 sample") +
scale_x_continuous(breaks = breakss, labels = labelss) +
scale_y_continuous(breaks = breakss, labels = labelss) +
theme(axis.text.x= element_text(size=6, angle=45)) +
theme(axis.text.y = element_text(size=6))
# Close the graphics device
dev.off()
```
![](images/Heatmap_0.1_genome_disent_labels_NoNoLGs.png)
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment