Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
I
IBD-Analysis
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Service Desk
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Operations
Operations
Incidents
Environments
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Harald RINGBAUER
IBD-Analysis
Commits
631f551b
Commit
631f551b
authored
Sep 19, 2016
by
Harald RINGBAUER
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Upload new file
parent
994bd098
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
181 additions
and
0 deletions
+181
-0
POPRES-Analysis/loaddata.py
POPRES-Analysis/loaddata.py
+181
-0
No files found.
POPRES-Analysis/loaddata.py
0 → 100644
View file @
631f551b
'''
Created on Feb 4, 2016
@author: Harald
'''
import
numpy
as
np
import
simplekml
# To extract Google map files
import
matplotlib.pyplot
as
plt
from
geopy.distance
import
vincenty
# To compute great circle distance from coordinates
from
mpl_toolkits.basemap
import
Basemap
# my_proj = Proj(proj='utm',zone="31T",ellps='WGS84',units='m') # Prepare the Longitude/Latidude to Easting/Northing transformation
style
=
simplekml
.
StyleMap
()
# Create a style map for highlight style and normal style FOR UNCORRECTED
style
.
normalstyle
.
labelstyle
.
scale
=
0
style
.
normalstyle
.
iconstyle
.
icon
.
href
=
"http://maps.google.com/mapfiles/kml/pushpin/red-pushpin.png"
# grn
style
.
highlightstyle
.
labelstyle
.
scale
=
1
style
.
highlightstyle
.
iconstyle
.
icon
.
href
=
'http://maps.google.com/mapfiles/kml/pushpin/blue-pushpin.png'
######################################################
class
LoadData
(
object
):
'''
Loads and pre-processes the data. Contains methods for data Cleaning.
'''
populations
=
[]
# Contains table of IDs and populations
blocks
=
[]
# Contains table of all shared blocks
coordinates
=
[]
# Matrix saving the coordinates of a country
countries_oi
=
[]
# countries_oi = [] # List of countries of interest
pw_distances
=
[]
# Pairwise distance Matrix
pw_blocksharing
=
[]
# Matrix of block sharing between countries
nr_individuals
=
[]
def
__init__
(
self
,
pop_path
,
ibd_list_path
,
geo_path
,
min_block_length
,
countries_oi
):
'''Runs all the stuff to bring data in shape '''
self
.
countries_oi
=
countries_oi
print
(
"Loading data..."
)
# First load all the necessary files.
self
.
populations
=
np
.
loadtxt
(
pop_path
,
dtype
=
'string'
,
delimiter
=
','
)[
1
:,
:]
# Load the cross_reference list
self
.
blocks
=
np
.
loadtxt
(
ibd_list_path
,
dtype
=
'string'
,
delimiter
=
','
)[
1
:,
:]
self
.
coordinates
=
np
.
loadtxt
(
geo_path
,
dtype
=
'string'
,
delimiter
=
','
)[
1
:,
:]
self
.
populations
[:,
1
]
=
[
countrie
.
replace
(
"
\"
"
,
""
)
for
countrie
in
self
.
populations
[:,
1
]]
# Replace double "" in populations
# self.countries_oi = list(set(self.populations[:, 1])) # Set countries of interest to everything
# self.extract_kml(self.coordinates[:, 0], self.coordinates[:, 1].astype('float'), self.coordinates[:, 2].astype('float'))
print
(
self
.
countries_oi
)
print
(
"Total number of inds: %.1f"
%
len
(
self
.
populations
))
print
(
"Total number of blocks: %.1f"
%
len
(
self
.
blocks
[:,
0
]))
self
.
calculate_pw_dist
()
# Update important fields
self
.
calc_ind_nr
()
k
=
len
(
self
.
countries_oi
)
block_list
=
self
.
blocks
[(
self
.
blocks
[:,
3
].
astype
(
'float'
)
>
min_block_length
),
:]
# Only keep blocks with minimum length
# Replace individual numbers by their country of origin:
for
i
in
range
(
len
(
block_list
[:,
0
])):
ind1
=
np
.
where
(
self
.
populations
[:,
0
]
==
block_list
[
i
,
0
])[
0
][
0
]
ind2
=
np
.
where
(
self
.
populations
[:,
0
]
==
block_list
[
i
,
1
])[
0
][
0
]
block_list
[
i
,
0
]
=
self
.
populations
[
ind1
,
1
]
block_list
[
i
,
1
]
=
self
.
populations
[
ind2
,
1
]
# Generate the block sharing matrix
filler
=
np
.
frompyfunc
(
lambda
x
:
list
(),
1
,
1
)
a
=
np
.
empty
((
k
,
k
),
dtype
=
np
.
object
)
self
.
pw_blocksharing
=
filler
(
a
,
a
)
# Initialize everything to an empty list.
for
i
in
range
(
len
(
block_list
[:,
0
])):
ind1
=
np
.
where
(
self
.
countries_oi
==
block_list
[
i
,
0
])[
0
]
# First locate block1
ind2
=
np
.
where
(
self
.
countries_oi
==
block_list
[
i
,
1
])[
0
]
# Then the second block
if
not
((
len
(
ind1
)
>
0
)
and
(
len
(
ind2
)
>
0
)):
# Jump to next iteration if countries not in countrie_oi
continue
self
.
pw_blocksharing
[
max
(
ind1
[
0
],
ind2
[
0
]),
min
(
ind1
[
0
],
ind2
[
0
])].
append
(
float
(
block_list
[
i
,
3
]))
# Add to block sharing.
# print("Doing something!!!")
print
(
"Interesting block sharing: %.0f "
%
np
.
sum
([
len
(
a
[
i
,
j
])
for
i
in
range
(
k
)
for
j
in
range
(
k
)]))
# Print interesting block sharing
def
calculate_pw_dist
(
self
):
'''Calculate Pairwise Distances of countries of interest'''
country_found_list
=
[]
# List of countries which were found
lat_list
=
[]
long_list
=
[]
for
country
in
self
.
countries_oi
:
# For every country of interest
print
(
country
)
coord_ind
=
np
.
where
(
self
.
coordinates
[:,
0
]
==
country
)[
0
]
if
not
len
(
coord_ind
)
>
0
:
print
(
"Country %.s not found!"
%
country
)
continue
print
(
"Country %.s has index %i"
%
(
country
,
coord_ind
[
0
]))
country_found_list
.
append
(
country
)
# Append Country to the list of found countries
lat_list
.
append
(
self
.
coordinates
[
coord_ind
,
1
])
# Append Geographic coordinates
long_list
.
append
(
self
.
coordinates
[
coord_ind
,
2
])
print
(
country_found_list
)
self
.
countries_oi
=
np
.
array
(
country_found_list
)
# Only countries which are found are of interest
lat_list
=
np
.
array
(
lat_list
).
astype
(
'float'
)
long_list
=
np
.
array
(
long_list
).
astype
(
'float'
)
lat1_list
=
np
.
array
([
i
[
0
]
for
i
in
lat_list
])
# For kml extraction
long1_list
=
np
.
array
([
i
[
0
]
for
i
in
long_list
])
self
.
extract_kml
(
country_found_list
,
lat1_list
,
long1_list
)
self
.
make_mpl_map
(
lat1_list
,
long1_list
)
# Send data to Matplot Lib card function
l
=
len
(
self
.
countries_oi
)
dist_mat
=
np
.
zeros
((
l
,
l
))
for
i
in
range
(
0
,
l
):
# Iterate over all pairs of
for
j
in
range
(
0
,
i
):
dist_mat
[
i
,
j
]
=
self
.
calc_dist
(
lat_list
[
i
],
long_list
[
i
],
lat_list
[
j
],
long_list
[
j
])
self
.
pw_distances
=
dist_mat
# for i in range(0,l):
# for j in range(0,i):
# print("Distance between %s and %s is: %.1f km" % (self.countries_oi[i],self.countries_oi[j],dist_mat[i,j]/1000))
def
calc_dist
(
self
,
lat1
,
long1
,
lat2
,
long2
):
'''Calculates the pairwise distance between the given coordinates'''
coord1
=
(
lat1
,
long1
)
coord2
=
(
lat2
,
long2
)
return
vincenty
(
coord1
,
coord2
).
meters
/
1000.0
# Return distance of input points (in km)
def
calc_ind_nr
(
self
):
'''Generates the number of individuals per countrie_of_i entry'''
nr_inds
=
[]
for
country
in
self
.
countries_oi
:
nr_ind
=
np
.
sum
(
self
.
populations
[:,
1
]
==
country
)
# print("Country %s has %.0f inds" % (country, nr_ind))
nr_inds
.
append
(
nr_ind
)
self
.
nr_individuals
=
nr_inds
def
extract_kml
(
self
,
index
,
lat
,
lon
):
'''Extract Google maps file from lot long Values with name index'''
kml
=
simplekml
.
Kml
()
# Load the KML Creater
for
i
in
range
(
len
(
lat
)):
pnt
=
kml
.
newpoint
(
name
=
index
[
i
],
coords
=
[(
lon
[
i
],
lat
[
i
])])
# pnt.description = data[i, comm_ind]
pnt
.
stylemap
=
style
# pnt.altitudemode = 'absolute'
pnt
.
extrude
=
1
kml
.
save
(
"output.kml"
)
# Save the kml file
print
(
"GPS saved!"
)
def
make_mpl_map
(
self
,
lats
,
lons
):
'''Method that makes a map within matplotlib with points at lat, lon'''
m
=
Basemap
(
projection
=
'merc'
,
llcrnrlat
=
30
,
urcrnrlat
=
65
,
llcrnrlon
=
5
,
urcrnrlon
=
40
,
resolution
=
'i'
)
# m = Basemap(llcrnrlon=-10.5, llcrnrlat=35, urcrnrlon=4., urcrnrlat=44.,
# resolution='i', projection='merc', lat_0 = 39.5, lon_0 = -3.25)
# m.drawcountries(linewidth=0.1,color='w')
# m.drawmapboundary(fill_color='aqua')
m
.
drawcoastlines
()
m
.
drawcountries
(
linewidth
=
1
,
color
=
'k'
)
# m.drawcountries()
m
.
fillcontinents
(
color
=
'coral'
)
# Make the points
for
i
in
range
(
len
(
lons
)):
print
(
lats
[
i
])
print
(
lons
[
i
])
x
,
y
=
m
(
float
(
lons
[
i
]),
float
(
lats
[
i
]))
m
.
plot
(
x
,
y
,
'bo'
,
markersize
=
8
)
plt
.
show
()
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment