Commit 4682416e authored by Uladzislava KHAURATOVICH's avatar Uladzislava KHAURATOVICH 💬
Browse files

Update !full_chromosome-level_assembly_analysis.md

parent 81574b04
......@@ -402,6 +402,7 @@ cat head_Gk1_Gs1_f_m_filtered2.vcf.simple | grep -v 'NA' | awk '($10 != "0" && $
scp -o ProxyJump=ukhaurat@login.ist.ac.at ukhaurat@bea81:/nfs/scistore03/vicosgrp/ukhaurat/full_assembly_analysis/head_Gk1_Gs1_f_m_filtered2.vcf.simple.filtered /Users/ukhaurat/Documents/full_assembly_analysis
```
### 10a. The heatmap for 10% of the markers (random sample) from each chromosome.
......@@ -464,4 +465,132 @@ ggplot(distdf, aes(row, col, fill= value)) +
# Close the graphics device
dev.off()
```
![](images/Heatmap_0.1_genome_disent_labels_NoNoLGs.png)
![](images/Heatmap_0.1_genome_disent_labels_NoNoLGs.png)
### 10b. The heatmap for the whole genome.
To calculate matrix for the whole genome more computational power is needed. Thus, I create a slurm script for the cluster
Slurm sript
```
#!/bin/bash
#
#SBATCH --job-name=heatmap
#SBATCH --output=STAR.%j.out
#SBATCH --error=STAR.%j.err
#SBATCH -c 12
#SBATCH --time=96:00:00
#SBATCH --mem=350G
#SBATCH --no-requeue
#SBATCH --partition=defaultp
#SBATCH --mail-type=ALL
#SBATCH --mail-user=slava.khovratovich@gmail.com
#SBATCH --export=NONE
unset SLURM_EXPORT_ENV
export OMP_NUM_THREADS=$SLURM_CPUS_PER_TASK
cd /nfs/scistore03/vicosgrp/ukhaurat/full_assembly_analysis/
module load R
Rscript heatmap_genome.R
```
R script
```
setwd("/nfs/scistore03/vicosgrp/ukhaurat/full_assembly_analysis/")
library(dplyr)
library(stringr)
df<-read.table("head_Gk1_Gs1_f_m_filtered2.vcf.simple.filtered", header = F, sep = " ", col.names = c("Chromosome", "Coordinate", "f1", "f2", "f3", "f4", "f5", "f6", "f7", "f8", "f9", "f10", "m1", "m2", "m3", "m4", "m5", "m6", "m7", "m8", "m9", "m10"))
df <- mutate(df, add_name = if_else(str_detect(`Chromosome`, "scaffold_"), "NoLG", Chromosome))
library(tidyr)
#this part just to create coordinate lables for the ggplot
df$num <- seq(1,45573,1)
breakss <- as.vector(as.numeric(seq(1,45573,1000)))
b<-subset(df,df$num %in% breakss)
#b<-unite(b, "Label", Chromosome, Coordinate, sep = "_", remove = TRUE)
labelss <- as.vector(as.character(b$add_name))
xxx<-df[,3:22]
#manhattan distance for x1,y1 and x2,y2 is |x1-x2|+|y1-y2|, which I think is what we want
distobject<-dist(xxx, method = "manhattan")
library(usethis)
library(devtools)
library(spaa)
distdf <- spaa::dist2list(distobject)
write.table(distdf, file = "/nfs/scistore03/vicosgrp/ukhaurat/full_assembly_analysis/distdf", append = FALSE, quote = TRUE, sep = " ",
eol = "\n", na = "NA", dec = ".", row.names = TRUE,
col.names = TRUE)
write.csv(distdf, file = "/nfs/scistore03/vicosgrp/ukhaurat/full_assembly_analysis/distdf.csv")
distdf$row <- as.numeric(distdf$row)
distdf$col <- as.numeric(distdf$col)
library(ggplot2)
pdf("heatmap_genome.pdf")
svg("heatmap_genome.svg")
ggplot(distdf, aes(row, col, fill= value)) +
geom_tile() +
scale_fill_gradient(low="red", high="yellow") +
labs(title = "Distance between markers") +
scale_x_continuous(breaks = breakss, labels = labelss) +
scale_y_continuous(breaks = breakss, labels = labelss) +
theme(axis.text.x= element_text(size=6, angle=45)) +
theme(axis.text.y = element_text(size=6, angle=45))
# Close the graphics device
dev.off()
```
i cannot get the figure, however matrix is calculated :(
```
setwd("/nfs/scistore03/vicosgrp/ukhaurat/full_assembly_analysis/")
library(dplyr)
library(stringr)
df<-read.table("head_Gk1_Gs1_f_m_filtered2.vcf.simple.filtered", header = F, sep = " ", col.names = c("Chromosome", "Coordinate", "f1", "f2", "f3", "f4", "f5", "f6", "f7", "f8", "f9", "f10", "m1", "m2", "m3", "m4", "m5", "m6", "m7", "m8", "m9", "m10"))
df <- mutate(df, add_name = if_else(str_detect(`Chromosome`, "scaffold_"), "NoLG", Chromosome))
library(tidyr)
#this part just to create coordinate lables for the ggplot
df$num <- seq(1,45573,1)
breakss <- as.vector(as.numeric(seq(1,45573,1000)))
b<-subset(df,df$num %in% breakss)
#b<-unite(b, "Label", Chromosome, Coordinate, sep = "_", remove = TRUE)
labelss <- as.vector(as.character(b$add_name))
library(usethis)
library(devtools)
library(spaa)
distdf<-read.table("/nfs/scistore03/vicosgrp/ukhaurat/full_assembly_analysis/distdf", header = T)
distdf$row <- as.numeric(distdf$row)
distdf$col <- as.numeric(distdf$col)
library(ggplot2)
svg("heatmap_genome.svg")
ggplot(distdf, aes(row, col, fill= value)) +
geom_tile() +
scale_fill_gradient(low="red", high="yellow") +
labs(title = "Distance between markers") +
scale_x_continuous(breaks = breakss, labels = labelss) +
scale_y_continuous(breaks = breakss, labels = labelss) +
theme(axis.text.x= element_text(size=6, angle=45)) +
theme(axis.text.y = element_text(size=6, angle=45))
dev.off()
sbatch R_heatmap_reduced.sh
Submitted batch job 24828046
```
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment