forked from pjuckem/GIS_utils
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathGISio.py
662 lines (564 loc) · 22.9 KB
/
GISio.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
# suppress annoying pandas openpyxl warning
import warnings
warnings.filterwarnings('ignore', category=UserWarning)
import os
from collections import OrderedDict
import numpy as np
import fiona
from shapely.geometry import Point, shape, asLineString, mapping
from shapely.wkt import loads
import pandas as pd
import shutil
import numpy as np
def getPRJwkt(epsg):
"""
from: https://code.google.com/p/pyshp/wiki/CreatePRJfiles
Grabs a WKT version of an EPSG code
usage getPRJwkt(4326)
This makes use of links like http://spatialreference.org/ref/epsg/4326/prettywkt/
"""
import urllib.request, urllib.parse, urllib.error
f=urllib.request.urlopen("http://spatialreference.org/ref/epsg/{0}/prettywkt/".format(epsg))
return (f.read())
def get_df_bounds(indf):
"""
:param indf: dataframe that includes a geometry column
:return: dfbounds: a tuple of (minx, miny, maxx, maxy) bounding all the geometry column members
"""
minx = np.inf
miny = np.inf
maxx = -np.inf
maxy = -np.inf
for cgeom in indf.geometry:
tmpbounds = cgeom.bounds
minx = np.min([minx, tmpbounds[0]])
miny = np.min([miny, tmpbounds[1]])
maxx = np.max([maxx, tmpbounds[2]])
maxy = np.max([maxy, tmpbounds[3]])
return (minx, miny, maxx, maxy)
def get_proj4(prj):
"""Get proj4 string for a projection file
Parameters
----------
prj : string
Shapefile or projection file
Returns
-------
proj4 string (http://trac.osgeo.org/proj/)
"""
from osgeo import osr
prjfile = prj[:-4] + '.prj' # allows shp or prj to be argued
prjtext = open(prjfile).read()
srs = osr.SpatialReference()
srs.ImportFromESRI([prjtext])
proj4 = srs.ExportToProj4()
return proj4
def get_shapefile_bounds(shapefile):
xmin, xmax = 0, 0
ymin, ymax = 0, 0
with fiona.open(shapefile) as src:
for rec in src:
for crds in rec['geometry']['coordinates']:
a = np.array(crds)
x, y = a[:, 0], a[:, 1]
xmin, xmax = np.min(x, xmin), np.max(x, xmax)
ymin, ymax = np.min(y, ymin), np.max(y, ymax)
return xmin, xmax, ymin, ymax
def get_photo_location(photos):
"""Get locations for georeferenced photos using python image library.
Parameters
----------
photos : list of strings
Returns
-------
locations : lon, lat tuple or list of lon, lat tuples
"""
import PIL.Image
from .get_lat_lon_exif_pil import get_exif_data, get_lat_lon
if isinstance(photos, str):
photos = [photos]
locations = []
for photo in photos:
img = PIL.Image.open(photo) # load an image through PIL's Image object
exif_data = get_exif_data(img)
locations.append(get_lat_lon(exif_data))
if len(locations) == 1:
return locations[0]
elif len(locations) > 1:
return locations
def shp2df(shplist, index=None, index_dtype=None, clipto=[], filter=None,
true_values=None, false_values=None, layer=None,
skip_empty_geom=True):
"""Read shapefile/DBF, list of shapefiles/DBFs, or File geodatabase (GDB)
into pandas DataFrame.
Parameters
----------
shplist : string or list
of shapefile/DBF name(s) or FileGDB
index : string
Column to use as index for dataframe
index_dtype : dtype
Enforces a datatype for the index column (for example, if the index field is supposed to be integer
but pandas reads it as strings, converts to integer)
clipto : list
limit what is brought in to items in index of clipto (requires index)
filter : tuple (xmin, ymin, xmax, ymax)
bounding box to filter which records are read from the shapefile.
true_values : list
same as argument for pandas read_csv
false_values : list
same as argument for pandas read_csv
layer : str
Layer name to read (if opening FileGDB)
skip_empty_geom : True/False, default True
Drops shapefile entries with null geometries.
DBF files (which specify null geometries in their schema) will still be read.
Returns
-------
df : DataFrame
with attribute fields as columns; feature geometries are stored as
shapely geometry objects in the 'geometry' column.
"""
if isinstance(shplist, str):
shplist = [shplist]
if not isinstance(true_values, list) and true_values is not None:
true_values = [true_values]
if not isinstance(false_values, list) and false_values is not None:
false_values = [false_values]
if len(clipto) > 0 and index:
clip = True
else:
clip = False
df = pd.DataFrame()
for shp in shplist:
print("\nreading {}...".format(shp))
shp_obj = fiona.open(shp, 'r', layer=layer)
if index is not None:
# handle capitolization issues with index field name
fields = list(shp_obj.schema['properties'].keys())
index = [f for f in fields if index.lower() == f.lower()][0]
attributes = []
# for reading in shapefiles
meta = shp_obj.meta
if meta['schema']['geometry'] != 'None':
if filter is not None:
print('filtering on bounding box {}, {}, {}, {}...'.format(*filter))
if clip: # limit what is brought in to items in index of clipto
for line in shp_obj.filter(bbox=filter):
props = line['properties']
if not props[index] in clipto:
continue
props['geometry'] = line.get('geometry', None)
attributes.append(props)
else:
for line in shp_obj.filter(bbox=filter):
props = line['properties']
props['geometry'] = line.get('geometry', None)
attributes.append(props)
print('--> building dataframe... (may take a while for large shapefiles)')
shp_df = pd.DataFrame(attributes)
# reorder fields in the DataFrame to match the input shapefile
if len(attributes) > 0:
shp_df = shp_df[list(attributes[0].keys())]
# handle null geometries
if len(shp_df) == 0:
print('Empty dataframe! No features were read.')
if filter is not None:
print('Check filter {} for consistency \
with shapefile coordinate system'.format(filter))
geoms = shp_df.geometry.tolist()
if geoms.count(None) == 0:
shp_df['geometry'] = [shape(g) for g in geoms]
elif skip_empty_geom:
null_geoms = [i for i, g in enumerate(geoms) if g is None]
shp_df.drop(null_geoms, axis=0, inplace=True)
shp_df['geometry'] = [shape(g) for g in shp_df.geometry.tolist()]
else:
shp_df['geometry'] = [shape(g) if g is not None else None
for g in geoms]
# for reading in DBF files (just like shps, but without geometry)
else:
if clip: # limit what is brought in to items in index of clipto
for line in shp_obj:
props = line['properties']
if not props[index] in clipto:
continue
attributes.append(props)
else:
for line in shp_obj:
attributes.append(line['properties'])
print('--> building dataframe... (may take a while for large shapefiles)')
shp_df = pd.DataFrame(attributes)
# reorder fields in the DataFrame to match the input shapefile
if len(attributes) > 0:
shp_df = shp_df[list(attributes[0].keys())]
shp_obj.close()
if len(shp_df) == 0:
continue
# set the dataframe index from the index column
if index is not None:
if index_dtype is not None:
shp_df[index] = shp_df[index].astype(index_dtype)
shp_df.index = shp_df[index].values
df = df.append(shp_df)
# convert any t/f columns to numpy boolean data
if true_values is not None or false_values is not None:
replace_boolean = {}
for t in true_values:
replace_boolean[t] = True
for f in false_values:
replace_boolean[f] = False
# only remap columns that have values to be replaced
cols = [c for c in df.columns if c != 'geometry']
for c in cols:
if len(set(replace_boolean.keys()).intersection(set(df[c]))) > 0:
df[c] = df[c].map(replace_boolean)
return df
def shp_properties(df):
newdtypes = {'bool': 'str',
'object': 'str',
'datetime64[ns]': 'str'}
# fiona/OGR doesn't like numpy ints
# shapefile doesn't support 64 bit ints,
# but apparently leaving the ints alone is more reliable
# than intentionally downcasting them to 32 bit
# pandas is smart enough to figure it out on .to_dict()?
for c in df.columns:
if c != 'geometry':
df[c] = df[c].astype(newdtypes.get(df.dtypes[c].name,
df.dtypes[c].name))
if 'int' in df.dtypes[c].name:
if np.max(np.abs(df[c])) > 2**31 -1:
df[c] = df[c].astype(str)
# strip dtypes to just 'float', 'int' or 'str'
def stripandreplace(s):
return ''.join([i for i in s
if not i.isdigit()]).replace('object', 'str')
dtypes = [stripandreplace(df[c].dtype.name)
if c != 'geometry'
else df[c].dtype.name for c in df.columns]
properties = OrderedDict(list(zip(df.columns, dtypes)))
return properties
def shpfromdf(df, shpname, Xname, Yname, prj):
'''
creates point shape file from pandas dataframe
shp: name of shapefile to write
Xname: name of column containing Xcoordinates
Yname: name of column containing Ycoordinates
'''
'''
# convert dtypes in dataframe to 32 bit
i = -1
dtypes = list(df.dtypes)
for dtype in dtypes:
i += 1
if dtype == np.dtype('float64'):
continue
#df[df.columns[i]] = df[df.columns[i]].astype('float32')
elif dtype == np.dtype('int64'):
df[df.columns[i]] = df[df.columns[i]].astype('int32')
# strip dtypes just down to 'float' or 'int'
dtypes = [''.join([c for c in d.name if not c.isdigit()]) for d in list(df.dtypes)]
# also exchange any 'object' dtype for 'str'
dtypes = [d.replace('object', 'str') for d in dtypes]
properties = dict(zip(df.columns, dtypes))
'''
properties = shp_properties(df)
schema = {'geometry': 'Point', 'properties': properties}
with fiona.collection(shpname, "w", "ESRI Shapefile", schema) as output:
for i in df.index:
X = df.iloc[i][Xname]
Y = df.iloc[i][Yname]
point = Point(X, Y)
props = dict(list(zip(df.columns, df.iloc[i])))
output.write({'properties': props,
'geometry': mapping(point)})
shutil.copyfile(prj, "{}.prj".format(shpname[:-4]))
def csv2points(csv, X='POINT_X', Y='POINT_Y', shpname=None, prj='EPSG:4326', **kwargs):
'''
convert csv with point information to shapefile
``**kwargs``: keyword arguments to pandas read_csv()
'''
if not shpname:
shpname = csv[:-4] + '.shp'
df = pd.read_csv(csv, **kwargs)
df['geometry'] = [Point(p) for p in zip(df[X], df[Y])]
df2shp(df, shpname, geo_column='geometry', prj=prj)
def xlsx2points(xlsx, sheetname='Sheet1', X='X', Y='Y', shpname=None, prj='EPSG:4326'):
'''
convert Excel file with point information to shapefile
'''
if not shpname:
shpname = xlsx.split('.')[0] + '.shp'
df = pd.read_excel(xlsx, sheetname)
df['geometry'] = [Point(p) for p in zip(df[X], df[Y])]
df2shp(df, shpname, geo_column='geometry', prj=prj)
def pointsdf2shp(dataframe, shpname, X='X', Y='Y', index=False, prj=None, epsg=None, proj4=None, crs=None):
'''
convert dataframe with point information to shapefile
follows the same logic as xlsx2points above but no need for Excel file --> assumes point data already
in the dataframe. Note that prj, epsg, etc. get passed along using the same logic as df2shp
'''
if 'geometry' not in [k.lower() for k in list(dataframe.keys())]:
dataframe['geometry'] = [Point(p) for p in zip(dataframe[X],dataframe[Y])]
df2shp(dataframe, shpname, 'geometry', index, prj, epsg, proj4, crs)
def df2shp(dataframe, shpname, geo_column='geometry', index=False,
retain_order=False,
prj=None, epsg=None, proj4=None, crs=None):
'''
Write a DataFrame to a shapefile
dataframe: dataframe to write to shapefile
geo_column: optional column containing geometry to write - default is 'geometry'
index: If true, write out the dataframe index as a column
retain_order : boolean
Retain column order in dataframe, using an OrderedDict. Shapefile will
take about twice as long to write, since OrderedDict output is not
supported by the pandas DataFrame object.
--->there are four ways to specify the projection....choose one
prj: <file>.prj filename (string)
epsg: EPSG identifier (integer)
proj4: pyproj style projection string definition
crs: crs attribute (dictionary) as read by fiona
'''
# first check if output path exists
if os.path.split(shpname)[0] != '' and not os.path.isdir(os.path.split(shpname)[0]):
raise IOError("Output folder doesn't exist")
# check for empty dataframe
if len(dataframe) == 0:
raise IndexError("DataFrame is empty!")
df = dataframe.copy() # make a copy so the supplied dataframe isn't edited
# reassign geometry column if geo_column is special (e.g. something other than "geometry")
if geo_column != 'geometry':
df['geometry'] = df[geo_column]
df.drop(geo_column, axis=1, inplace=True)
# assign none for geometry, to write a dbf file from dataframe
Type = None
if 'geometry' not in df.columns:
df['geometry'] = None
Type = 'None'
mapped = [None] * len(df)
# reset the index to integer index to enforce ordering
# retain index as attribute field if index=True
df.reset_index(inplace=True, drop=not index)
# enforce character limit for names! (otherwise fiona marks it zero)
# somewhat kludgey, but should work for duplicates up to 99
df.columns = list(map(str, df.columns)) # convert columns to strings in case some are ints
overtheline = [(i, '{}{}'.format(c[:8],i)) for i, c in enumerate(df.columns) if len(c) > 10]
newcolumns = list(df.columns)
for i, c in overtheline:
newcolumns[i] = c
df.columns = newcolumns
properties = shp_properties(df)
del properties['geometry']
# set projection (or use a prj file, which must be copied after shp is written)
# alternatively, provide a crs in dictionary form as read using fiona
# from a shapefile like fiona.open(inshpfile).crs
if epsg is not None:
from fiona.crs import from_epsg
crs = from_epsg(int(epsg))
elif proj4 is not None:
from fiona.crs import from_string
crs = from_string(proj4)
elif crs is not None:
pass
else:
pass
if Type != 'None':
for g in df.geometry:
try:
Type = g.type
except:
continue
mapped = [mapping(g) for g in df.geometry]
schema = {'geometry': Type, 'properties': properties}
length = len(df)
if not retain_order:
props = df.drop('geometry', axis=1).astype(object).to_dict(orient='records')
else:
props = [OrderedDict(r) for i, r in df.drop('geometry', axis=1).astype(object).iterrows()]
print('writing {}...'.format(shpname))
with fiona.collection(shpname, "w", driver="ESRI Shapefile", crs=crs, schema=schema) as output:
for i in range(length):
output.write({'properties': props[i],
'geometry': mapped[i]})
if prj is not None:
"""
if 'epsg' in prj.lower():
epsg = int(prj.split(':')[1])
prjstr = getPRJwkt(epsg).replace('\n', '') # get rid of any EOL
ofp = open("{}.prj".format(shpname[:-4]), 'w')
ofp.write(prjstr)
ofp.close()
"""
try:
print('copying {} --> {}...'.format(prj, "{}.prj".format(shpname[:-4])))
shutil.copyfile(prj, "{}.prj".format(shpname[:-4]))
except IOError:
print('Warning: could not find specified prj file. shp will not be projected.')
def linestring_shpfromdf(df, shpname, IDname, Xname, Yname, Zname, prj, aggregate=None):
'''
creates point shape file from pandas dataframe
shp: name of shapefile to write
Xname: name of column containing Xcoordinates
Yname: name of column containing Ycoordinates
Zname: name of column containing Zcoordinates
IDname: column with unique integers for each line
aggregate = dict of column names (keys) and operations (entries)
'''
# setup properties for schema
# if including other properties besides line identifier,
# aggregate those to single value for line, using supplied aggregate dictionary
if aggregate:
cols = [IDname] + list(aggregate.keys())
aggregated = df[cols].groupby(IDname).agg(aggregate)
aggregated[IDname] = aggregated.index
properties = shp_properties(aggregated)
# otherwise setup properties to just include line identifier
else:
properties = {IDname: 'int'}
aggregated = pd.DataFrame(df[IDname].astype('int32'))
schema = {'geometry': '3D LineString', 'properties': properties}
lines = list(np.unique(df[IDname].astype('int32')))
with fiona.collection(shpname, "w", "ESRI Shapefile", schema) as output:
for line in lines:
lineinfo = df[df[IDname] == line]
vertices = lineinfo[[Xname, Yname, Zname]].values
linestring = asLineString(vertices)
props = dict(list(zip(aggregated.columns, aggregated.ix[line, :])))
output.write({'properties': props,
'geometry': mapping(linestring)})
shutil.copyfile(prj, "{}.prj".format(shpname[:-4]))
def get_values_at_points(rasterfile, x=None, y=None):
"""Get raster values single point or list of points.
Points must be in same coordinate system as raster.
Parameters
----------
rasterfile : str
Filename of raster.
x : 1D array
X coordinate locations
y : 1D array
Y coordinate locations
points : list of tuples or 2D numpy array (npoints, (row, col))
Points at which to sample raster.
Returns
-------
list of floats
Notes
-----
requires gdal
"""
try:
import gdal
except:
print('This function requires gdal.')
if x is not None and isinstance(x[0], tuple):
x, y = np.squeeze(np.array(x))
warnings.warn(
"new argument input for get_values_at_points is x, y, or points",
PendingDeprecationWarning
)
elif x is not None:
if not isinstance(x, np.ndarray):
x = np.array(x)
if not isinstance(y, np.ndarray):
y = np.ndarray(y)
elif points is not None:
if not isinstance(points, np.ndarray):
x, y = np.array(points)
else:
x, y = points[:, 0], points[:, 1]
else:
print('Must supply x, y or list/array of points.')
t0 = time.time()
# open the raster
gdata = gdal.Open(rasterfile)
# get the location info
xul, dx, rx, yul, ry, dy = gdata.GetGeoTransform()
# read the array
print("reading data from {}...".format(rasterfile))
data = gdata.ReadAsArray().astype(np.float)
nrow, ncol = data.shape
print("sampling...")
# find the closest row, col location for each point
i = ((y - yul) / dy).astype(int)
j = ((x - xul) / dx).astype(int)
# mask row, col locations outside the raster
within = (i > 0) & (i < nrow) & (j > 0) & (j < ncol)
# get values at valid point locations
results = np.ones(len(i), dtype=float) * np.nan
results[within] = data[i[within], j[within]]
print("finished in {:.2f}s".format(time.time() - t0))
return results
def read_raster(rasterfile):
'''
reads a GDAL raster into numpy array for plotting
also returns meshgrid of x and y coordinates of each cell for plotting
based on code stolen from:
http://stackoverflow.com/questions/20488765/plot-gdal-raster-using-matplotlib-basemap
'''
try:
import gdal
except:
print('This function requires gdal.')
try:
ds = gdal.Open(rasterfile)
except:
raise IOError("problem reading raster file {}".format(rasterfile))
print('\nreading in {} into numpy array...'.format(rasterfile))
data = ds.ReadAsArray()
gt = ds.GetGeoTransform()
proj = ds.GetProjection()
xres = gt[1]
yres = gt[5]
# get the edge coordinates and add half the resolution
# to go to center coordinates
xmin = gt[0] + xres * 0.5
xmax = gt[0] + (xres * ds.RasterXSize) - xres * 0.5
ymin = gt[3] + (yres * ds.RasterYSize) + yres * 0.5
ymax = gt[3] + yres * 0.5
del ds
print('creating a grid of xy coordinates in the original projection...')
xy = np.mgrid[xmin:xmax+xres:xres, ymax+yres:ymin:yres]
# create a mask for no-data values
data[data<-1.0e+20] = 0
return data, gt, proj, xy
def flatten_3Dshp(shp, outshape=None):
if not outshape:
outshape = '{}_2D.shp'.format(shp[:-4])
df = shp2df(shp, geometry=True)
# somehow this removes 3D formatting
df['2D'] = df['geometry'].map(lambda x: loads(x.wkt))
# drop the original geometry column
df = df.drop('geometry', axis=1)
# poop it back out
df2shp(df, outshape, '2D', shp[:-4]+'.prj')
def _is_None(value):
if isinstance(value, str) and value.lower() == 'none':
return True
elif value is None:
return True
else:
return False
def arc_ascii(array, filename, xll=0, yll=0, cellsize=1.,
nodata=-9999, **kwargs):
"""Write numpy array to Arc Ascii grid.
Parameters
----------
kwargs: keyword arguments to np.savetxt
"""
array = array.copy()
array[np.isnan(array)] = nodata
filename = '.'.join(filename.split('.')[:-1]) + '.asc' # enforce .asc ending
nrow, ncol = array.shape
txt = 'ncols {:d}\n'.format(ncol)
txt += 'nrows {:d}\n'.format(nrow)
txt += 'xllcorner {:f}\n'.format(xll)
txt += 'yllcorner {:f}\n'.format(yll)
txt += 'cellsize {}\n'.format(cellsize)
txt += 'NODATA_value {:.0f}\n'.format(nodata)
with open(filename, 'w') as output:
output.write(txt)
with open(filename, 'ab') as output:
np.savetxt(output, array, **kwargs)
print('wrote {}'.format(filename))