-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathwasp_census.py
163 lines (129 loc) · 5.68 KB
/
wasp_census.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
import numpy as np
import pandas as pd
import plotly.offline as pyo
import plotly.graph_objs as go
def unique_wasps(census_df):
"""Return the IDs of every wasp in the census."""
wasps = set()
for date, nest in census_df.iteritems():
for wasps_on_nest in nest:
if not pd.isna(wasps_on_nest):
for wasp in wasps_on_nest.replace(" ", "").split(","):
wasps.add(wasp)
return wasps
def wasp_interaction_count(census_df, wasp_1, wasp_2):
"""Return the number of times wasp_1 and wasp_2 are on the same nest."""
# If wasp_1 and wasp_2 are the same wasp then its interaction count is 0
if wasp_1 == wasp_2:
return 0
# Search through the census data frame and count the number of times wasp_1 and
# wasp_2 are on the same nest
sum = 0
for col in census_df.columns:
for ind in census_df.index:
nest = census_df.at[ind, col]
# When a nest is empty pandas represents it as NaN
# Make sure the nest is not empty before testing if the nest contains
# wasp_1 and wasp_2
if not pd.isna(nest) and wasp_1 in nest and wasp_2 in nest:
sum += 1
return sum
def create_interaction_df(census_df):
"""Return a wasp interaction data frame using wasp census data."""
# Get the IDs of each wasp from the census
wasps = unique_wasps(census_df)
# Create a new data frame (matrix) with the wasp IDs as the row and column names
# Initialize each element in the data frame to 0
interaction_df = pd.DataFrame(
np.zeros((len(wasps), len(wasps)), dtype=int), index=wasps, columns=wasps,
)
# Iterate over the row and column names to get every possible wasp pairing
for wasp_1 in interaction_df.columns:
for wasp_2 in interaction_df.index:
interaction_df.at[wasp_2, wasp_1] = wasp_interaction_count(
census_df, wasp_1, wasp_2
)
return interaction_df
def save_interaction_df(interaction_df):
interaction_df.to_csv("interaction_matrix.csv")
def plot_interaction_df(interaction_df):
fig = go.Figure(
data=go.Heatmap(
z=[interaction_df.loc[row, :].values for row in interaction_df.index],
x=interaction_df.columns,
y=interaction_df.index,
),
)
pyo.plot(fig, filename="interaction-matrix-heatmap.html")
def days_seen_count(census_df, wasp):
"""Return the number of days the wasp was seen."""
days_seen = 0
# Iterate over the dates and nests
for date in census_df.columns:
for nest in census_df.index:
wasps_on_nest = census_df.at[nest, date]
# When a nest is empty pandas represents it as NaN
# Make sure the nest is not empty before testing if the nest contains
# the wasp
if not pd.isna(wasps_on_nest) and wasp in wasps_on_nest:
days_seen += 1
break
return days_seen
def nests_visited_count(census_df, wasp):
"""Return the number of nests the wasp visited."""
nests_visited = 0
# Iterate over the nests and dates
for nest in census_df.index:
for date in census_df.columns:
wasps_on_nest = census_df.at[nest, date]
# When a nest is empty pandas represents it as NaN
# Make sure the nest is not empty before testing if the nest contains
# the wasp
if not pd.isna(wasps_on_nest) and wasp in wasps_on_nest:
nests_visited += 1
# After we've found a wasp on a particular nest, we must break out of
# the loop iterating over each date so we don't count a nest more than
# once
break
return nests_visited
def wasp_partner_count(census_df, wasp):
"""Return the number of other wasps the wasp has shared a nest with."""
partners = set()
# Iterate over the nests and dates
for nest in census_df.index:
for date in census_df.columns:
wasps_on_nest = census_df.at[nest, date]
if not pd.isna(wasps_on_nest) and wasp in wasps_on_nest:
# Split the string of wasps on the nest "YYYY, WWWW, GGGG" into a list
# of wasps so we can iterate over each wasp ['YYYY', 'WWWW', 'GGGG']
for partner in wasps_on_nest.replace(" ", "").split(","):
if partner != wasp:
partners.add(partner)
return len(partners)
def create_wasp_summary_df(census_df):
"""Return a wasp summary data frame using wasp census data."""
# Get the IDs of each wasp from the census
wasps = unique_wasps(census_df)
# Create a new data frame (matrix) with the wasp IDs as the row names and
# "Days Seen", "Nests Visited", and "Partners" as the column names
summary_df = pd.DataFrame(
index=wasps, columns=["Days Seen", "Nests Visited", "Partners"]
)
# Iterate over all the wasps
for wasp in wasps:
summary_df.at[wasp, summary_df.columns[0]] = days_seen_count(census_df, wasp)
summary_df.at[wasp, summary_df.columns[1]] = nests_visited_count(
census_df, wasp
)
summary_df.at[wasp, summary_df.columns[2]] = wasp_partner_count(census_df, wasp)
return summary_df
def save_summary_df(summary_df):
summary_df.to_csv("wasp_summary_matrix.csv")
if __name__ == "__main__":
census_df = pd.read_csv("wasp_census_2019.csv", index_col=0)
# Note: This takes a long time to calculate
interaction_df = create_interaction_df(census_df)
save_interaction_df(interaction_df)
plot_interaction_df(interaction_df)
summary_df = create_wasp_summary_df(census_df)
save_summary_df(summary_df)