Skip to content
This repository was archived by the owner on Mar 18, 2024. It is now read-only.

ML training data generator #5

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
301 changes: 301 additions & 0 deletions examples/gen_landsat_training_set.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,301 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import rasterio\n",
"import numpy as np\n",
"import pandas as pd\n",
"\n",
"print(\"rasterio: %s\"%rasterio.__version__)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Define the Landsat bands so that we can use a high level \n",
"# description to interface.\n",
"band_def5 = {'blue':1,'green':2,'red':3,'nir':4,'swir1':5,'thermal':6,'swir2':7}\n",
"band_def8 = {'ultra_blue':1,'blue':2,'green':3,'red':4,'nir':5,'swir1':6,'swir2':7,\n",
" 'pan':8,'cirus':9,'thermal1':10,'thermal2':11}\n",
"# generate the names of all images by \n",
"images5 = [\"LT05_L1TP_042033_19881022_20161001_01_T1_sr_band%d.tif\"%b for b in band_def5.values()]\n",
"images8 = [\"LC08_L1TP_042033_20171022_20171107_01_T1_sr_band%d.tif\"%b for b in band_def8.values()]\n",
"\n",
"# what bands do we care about?\n",
"bands = [\"red\",\"green\",\"blue\",\"nir\"]\n",
"\n",
"# What bands are we going t ogenerate\n",
"calculated = ['ndvi','bn','bnn']\n",
"\n",
"# how shal we subsample the ordered computed data?\n",
"num_pts = 20\n",
"step = 5\n",
"_end = num_pts * step"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# find some of the brightest NDVI and BN locations. \n",
"# This is rather arbitrary but provides some useful data.\n",
"with rasterio.open(images5[band_def5[\"red\"]-1]) as red_img:\n",
" red = red_img.read()\n",
"with rasterio.open(images5[band_def5[\"nir\"]-1]) as nir_img:\n",
" nir = nir_img.read()\n",
" affine = nir_img.affine\n",
"\n",
"blue_img = rasterio.open(images5[band_def5[\"blue\"]-1])\n",
"blue = blue_img.read()\n",
"\n",
"# because we are using Landsat reflectance images, the pixel vause are not\n",
"# the raw sensor info, and the numbers can be out of the theretical range,\n",
"# so mask out the nonsensical values that might cause problems.\n",
"bnmask = (nir>0) & (red>0) & (blue>0) & (nir<10000) & (red<10000) & (blue<10000)\n",
"\n",
"ndvi = (nir-red)/(nir+red)\n",
"sndvi = sorted(ndvi.flatten()[bnmask.flatten()],reverse=True)[0:_end:step]\n",
"bn = blue/nir\n",
"bnn = (nir-blue)/(nir+blue)\n",
"sbn = sorted(bn.flatten()[bnmask.flatten()],reverse=True)[0:_end:step]\n",
"sbnn = sorted(bnn.flatten()[bnmask.flatten()],reverse=True)[0:_end:step]\n",
" \n",
"def find_loc(band,values,img):\n",
" import numpy as np\n",
"\n",
" npts = []\n",
" for v in values:\n",
" x,y = [a[0] for a in np.where(band==v)[1:]]\n",
" #print(pos)\n",
" npts.append(affine*[y+0.25,x+0.25])\n",
" \n",
" return list(set(npts))\n",
"\n",
"ndvi_pts = find_loc(ndvi,sndvi,nir_img)\n",
"bn_pts = find_loc(bn,sbn,nir_img)\n",
"bnn_pts = find_loc(bnn,sbnn,nir_img)\n",
"\n",
"# hand coded water\n",
"water_pts=[(348586,4286269),(338690,4323890)]\n",
"\n",
"del red, nir, blue, ndvi\n",
"del bnn, bn, blue_img"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"print(\"num NDVI points =\",len(ndvi_pts))\n",
"print(\"num BN points =\",len(bn_pts))\n",
"print(\"num BNN points =\",len(bnn_pts))\n",
"print(\"num Water points =\",len(water_pts))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# extract the basic band info, as well as \n",
"def fill_pt(images,band_def,bands,name,\n",
" north=None,east=None,row=None,col=None,\n",
" ptype=None):\n",
" ndf = pd.DataFrame(data=[name],columns=[\"image\"])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# extract the basic band info, as well as \n",
"def fill_pt(images,band_def,bands,name,\n",
" north=None,east=None,row=None,col=None,\n",
" ptype=None):\n",
" ndf = pd.DataFrame(data=[name],columns=[\"image\"])\n",
" ndf['type'] = ptype\n",
" \n",
" for band in bands:\n",
" image = images[band_def[band]-1]\n",
" \n",
" img = rasterio.open(image)\n",
" affine = img.affine\n",
" \n",
" if (north==None or east==None) and (row==None or col==None):\n",
" print(\"Error: need either north+east or row+col\")\n",
" return {}\n",
" if (north!=None and east!=None):\n",
" row,col = img.index(y=north,x=east)\n",
" elif (row!=None and col!=None):\n",
" east,north = affine * (col,row)\n",
" else:\n",
" print(\"Error: north+east / row+col pairs not found\")\n",
" return {}\n",
" \n",
" ndf['east'],ndf['north'] = east,north\n",
" ndf['row'],ndf['col'] = row,col\n",
"\n",
" val = img.read(window=((row,row+1),(col,col+1)))[0][0][0]\n",
" ndf[band] = val\n",
" if (val<0) or (val>=10000):\n",
" ndf['row'],ndf['col'] = np.nan,np.nan\n",
" for band in bands:\n",
" ndf[band] = np.nan\n",
" return ndf\n",
" \n",
" del img\n",
" return ndf\n",
"\n",
"# extract the basic band info, as well as \n",
"def fill_computed(df,**kwargs):\n",
" for key in kwargs:\n",
" kwargs[key](df)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# process the Landsat-5 images\n",
"df = pd.DataFrame()\n",
"for east,north in water_pts:\n",
" df = df.append(fill_pt(images5,band_def5,bands,\n",
" images5[0].split(\"_sr_band\")[0],\n",
" north=north,east=east,ptype='water'), ignore_index=True)\n",
"for east,north in ndvi_pts:\n",
" df = df.append(fill_pt(images5,band_def5,bands,\n",
" images5[0].split(\"_sr_band\")[0],\n",
" north=north,east=east,ptype='veg'), ignore_index=True)\n",
"for east,north in bn_pts:\n",
" df = df.append(fill_pt(images5,band_def5,bands,\n",
" images5[0].split(\"_sr_band\")[0],\n",
" north=north,east=east,ptype='back'), ignore_index=True)\n",
"for east,north in bnn_pts:\n",
" df = df.append(fill_pt(images5,band_def5,bands,\n",
" images5[0].split(\"_sr_band\")[0],\n",
" north=north,east=east,ptype='norm_back'), ignore_index=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# process the Landsat-8 images\n",
"for east,north in water_pts:\n",
" df = df.append(fill_pt(images8,band_def8,bands,\n",
" images8[0].split(\"_sr_band\")[0],\n",
" north=north,east=east,ptype='water'), ignore_index=True)\n",
"for east,north in ndvi_pts:\n",
" df = df.append(fill_pt(images8,band_def8,bands,\n",
" images8[0].split(\"_sr_band\")[0],\n",
" north=north,east=east,ptype='veg'), ignore_index=True)\n",
"for east,north in bn_pts:\n",
" df = df.append(fill_pt(images8,band_def8,bands,\n",
" images8[0].split(\"_sr_band\")[0],\n",
" north=north,east=east,ptype='back'), ignore_index=True)\n",
"for east,north in bnn_pts:\n",
" df = df.append(fill_pt(images8,band_def8,bands,\n",
" images8[0].split(\"_sr_band\")[0],\n",
" north=north,east=east,ptype='norm_back'), ignore_index=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# define the user defined caculations.\n",
"def ndvi(df):\n",
" df['ndvi'] = (df['nir']-df['red'])/(df['nir']+df['red'])\n",
"def bn(df):\n",
" df['bn'] = df['blue']/df['nir']\n",
"def bnn(df):\n",
" df['bnn'] = (df['nir']-df['blue'])/(df['nir']+df['blue'])\n",
"\n",
"fill_computed(df,ndvi=ndvi,bn=bn,bnn=bnn)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# find the locations with NaN's. We scrubbed the row/col variables above\n",
"# to make this easy.\n",
"rf = df[df['row'].isna()]\n",
"\n",
"# drop both the locations with NaN's and its pairs in other images as well\n",
"#df = df.dropna(axis=0, how='all')\n",
"df = df[~(df['east'].isin(rf['east'].values) & df['north'].isin(rf['north'].values))]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# show a few for inspection\n",
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# write it out some place\n",
"df.to_csv(\"landsat_training.csv\",index=False)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
23 changes: 0 additions & 23 deletions examples/landsat5_training.csv

This file was deleted.

29 changes: 0 additions & 29 deletions examples/landsat8_training.csv

This file was deleted.

Loading