Commit 59c7dcff authored by Marwan ELKREWI's avatar Marwan ELKREWI
Browse files

Delete FindZWSNPs_v2.pl

parent 11c04266
#!/usr/bin/perl
#10/11/2020 update: adapted the script for the new vcf created based on new male genome assembly
#24/06/2020 update: fixed bug that made it impossible to find Z-specific SNPs, as they are 0 in all individuals, but were selected after we filtered for min number of 1 = 5
#usage perl FindZWSNPs.pl simplified_vcf_file
#the simplified vcf file was produced with the command:
# cat head_filtered2_all.vcf | grep -v '^##' | perl -pi -e 's/:.*?\t/\t/gi' | perl -pi -e 's/:.*//gi' | perl -pi -e 's/\t/ /gi' | perl -pi -e 's/0\/0/0/gi' | perl -pi -e 's/1\/1/2/gi' | perl -pi -e 's/0\/1/1/gi' | perl -pi -e 's/\.\/\./NA/gi' > head_filtered2.vcf.simple
#the output is the file vcffile.ZWassign, which has the columns
#scaffold
#position
#grandmother GT (1 = het, 2 = hom)
#number of heterozygous F2 females
#number of heterozygous F2 males
#chromosomal assignment (0 is auto, 1 is a W-SNP [het in granmother], 2 is a ZW-SNP [homozygous in grandmother], 3 is Z-specific [hom. in grandmother and not passed on to F2])
print "Usage: perl FindZWSNPs_2.pl simplified_vcf_file\n";
print "Warning: check that the expected VCF columns match those you are using (currently GM, GF, 10 F2_f, 10 F2_m)\n";
print "Warning: currently keeps only SNPs found in 5 to 15 F2 individuals!\n";
#Updated: November 10th 2020
my $vcffile= $ARGV[0];
open(VCF, "$vcffile");
open (RESULTS, ">$vcffile.ZWassign");
###Give information on what the different columns are (count from 0)
###This example is for 10 columns of females, 10 columns of males, 2 granmothers, 2 granfathers (we ignore one of each grandparent)
###CHANGE IF VCF IS DIFFERENT!!!!
@female_columns = (2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
@male_columns = (12, 13, 14, 15, 16, 17, 18, 19, 20, 21);
$grannycolumn = 0;
$granpacolumn = 1;
while ($line = <VCF>)
{
#skip first line
next if $. == 1;
#split the vcf line into its descriptive components and the genotypes of males and females
(@vcf) = split(" ", $line);
$chrom = shift @vcf;
$POS = shift @vcf;
$ID = shift @vcf;
$REF = shift @vcf;
$ALT = shift @vcf;
$QUAL = shift @vcf;
$FILTER = shift @vcf;
$INFO = shift @vcf;
$FORMAT = shift @vcf;
my @females = @vcf[ @female_columns ];
my @males = @vcf[ @male_columns ];
my $granny = $vcf[$grannycolumn];
my $granpa = $vcf[$granpacolumn];
#create score value that needs to stay 0 for site to be kept
$score = 0;
###Filter bad sites or assign as Z-specific
#Kaz allele in grandmother (1 or 2) and sinica in grandfather (0, since he should be homozygous)
if ($granny eq "0" || $granny eq "NA") {$score=1;}
if ($granpa eq "1" || $granpa eq "2" || $granpa eq "NA") {$score=1;}
#F2 should not have NA or homozygous-kaz alleles
for (@females) {
if ($_ eq "2" || $_ eq "NA") { $score=1; }
}
#F2 should not have NA or homozygous-kaz alleles
for (@males) {
if ($_ eq "2" || $_ eq "NA") { $score=1; }
}
#F2 should have some 0s and some 1s, unless they are Z-specific
my $fcount = 0;
foreach (@females){
$fcount += $_;
}
my $mcount = 0;
foreach (@males){
$mcount += $_;
}
$totalcount = $fcount + $mcount;
#if the SNP is homozygous in grandmother, but not inherited by anybody, it is a Z-specific SNP (class 3)
if ($fcount == 0 && $mcount == 0 && $granny eq "2")
{
$assign = 3;
}
if ($totalcount<4 or $totalcount>15) {$score=1;}
#if the site passed filters, keep processing
if ($score == 0)
{
#assign site to ZW, Z, or Autosome
$assign = 0;
#if the SNP is in all F2 females but not males, and heterozygous in granmother then it is a W-SNP (class 1)
if ($fcount == 10 && $mcount == 0 && $granny eq "1")
{
$assign = 1;
}
#if the SNP is in all F2 females but not males, and homozygous in granmother then it is a ZW-SNP (class 2)
elsif ($fcount == 10 && $mcount == 0 && $granny eq "2")
{
$assign = 2;
}
print RESULTS "$chrom $POS $granny $fcount $mcount $assign\n";
}
}
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment