-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathhealth_facilities.py
110 lines (89 loc) · 4.45 KB
/
health_facilities.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
import logging
from geopandas import read_file
from glob import glob
from os.path import join
from pandas import concat, DataFrame, merge, read_csv
from zipfile import ZipFile, BadZipFile
from hdx.data.dataset import Dataset
from hdx.utilities.downloader import DownloadError
from hdx.utilities.uuid import get_uuid
logger = logging.getLogger()
class HealthFacilities:
def __init__(self, configuration, subnational_json, temp_folder):
self.boundaries = subnational_json
self.temp_folder = temp_folder
self.exceptions = configuration["hdx_inputs"].get("dataset_exceptions", {})
def find_read_resource(self, iso, dataset_name):
dataset = Dataset.read_from_hdx(dataset_name)
if not dataset:
logger.error(f"{iso}: Could not find dataset")
return None
health_resource = [r for r in dataset.get_resources() if r.get_file_type() == "shp" and
"points" in r["name"]]
if len(health_resource) == 0:
logger.error(f"{iso}: Could not find resource")
return None
try:
_, resource_file = health_resource[0].download(folder=self.temp_folder)
except DownloadError:
logger.error(f"{iso}: Could not download resource")
return None
temp_dir = join(self.temp_folder, get_uuid())
try:
with ZipFile(resource_file, "r") as z:
z.extractall(temp_dir)
except BadZipFile:
logger.error(f"{iso}: Could not unzip file")
return None
out_files = glob(join(temp_dir, "**", "*.shp"), recursive=True)
if len(out_files) == 0:
logger.error(f"{iso}: Did not find a shapefile in the zip")
return None
lyr = read_file(out_files[0])
return lyr
def summarize_data(self, countries):
summarized_data = DataFrame()
updated_countries = dict()
for iso in countries:
dataset_name = self.exceptions.get(iso, f"hotosm_{iso.lower()}_health_facilities")
health_shp_lyr = self.find_read_resource(iso, dataset_name)
if isinstance(health_shp_lyr, type(None)):
continue
levels = list(set(self.boundaries["ADM_LEVEL"].loc[(self.boundaries["alpha_3"] == iso)]))
for level in levels:
if level not in updated_countries:
updated_countries[level] = list()
logger.info(f"{iso}: Processing health facilities at adm{level}")
join_lyr = health_shp_lyr.sjoin(
self.boundaries.loc[(self.boundaries["alpha_3"] == iso) &
(self.boundaries["ADM_LEVEL"] == level)]
)
join_lyr = join_lyr.groupby("ADM_PCODE").size()
join_lyr = join_lyr.to_frame(name="Health_Facilities").reset_index()
summarized_data = concat([summarized_data, join_lyr])
if iso not in updated_countries[level]:
updated_countries[level].append(iso)
return summarized_data, updated_countries
def update_hdx_resource(self, dataset_name, summarized_data, updated_countries):
dataset = Dataset.read_from_hdx(dataset_name)
if not dataset:
logger.error("Could not find overall health facility dataset")
return None, None
resource = dataset.get_resources()[0]
try:
_, health_data = resource.download(folder=self.temp_folder)
except DownloadError:
logger.error(f"Could not download population csv")
return None, None
health_data = read_csv(health_data)
updated_data = self.boundaries.drop(columns="geometry")
updated_data = merge(updated_data, summarized_data, on="ADM_PCODE")
updated_data.loc[updated_data["Health_Facilities"].isna()] = 0
for level in updated_countries:
health_data.drop(health_data[(health_data["alpha_3"].isin(updated_countries[level])) &
(health_data["ADM_LEVEL"] == level)].index, inplace=True)
health_data = concat([health_data,
updated_data.loc[(updated_data["alpha_3"].isin(updated_countries[level])) &
(updated_data["ADM_LEVEL"] == level)]])
health_data.sort_values(by=["alpha_3", "ADM_LEVEL", "ADM_PCODE"], inplace=True)
return health_data, resource