holoviz-topics · ebo · May 11, 2018 · May 11, 2018
diff --git a/examples/gen_landsat_training_set.ipynb b/examples/gen_landsat_training_set.ipynb
@@ -0,0 +1,301 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import rasterio\n",
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "\n",
+    "print(\"rasterio: %s\"%rasterio.__version__)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Define the Landsat bands so that we can use a high level \n",
+    "# description to interface.\n",
+    "band_def5 = {'blue':1,'green':2,'red':3,'nir':4,'swir1':5,'thermal':6,'swir2':7}\n",
+    "band_def8 = {'ultra_blue':1,'blue':2,'green':3,'red':4,'nir':5,'swir1':6,'swir2':7,\n",
+    "             'pan':8,'cirus':9,'thermal1':10,'thermal2':11}\n",
+    "# generate the names of all images by \n",
+    "images5 = [\"LT05_L1TP_042033_19881022_20161001_01_T1_sr_band%d.tif\"%b for b in band_def5.values()]\n",
+    "images8 = [\"LC08_L1TP_042033_20171022_20171107_01_T1_sr_band%d.tif\"%b for b in band_def8.values()]\n",
+    "\n",
+    "# what bands do we care about?\n",
+    "bands = [\"red\",\"green\",\"blue\",\"nir\"]\n",
+    "\n",
+    "# What bands are we going t ogenerate\n",
+    "calculated = ['ndvi','bn','bnn']\n",
+    "\n",
+    "# how shal we subsample the ordered computed data?\n",
+    "num_pts = 20\n",
+    "step = 5\n",
+    "_end = num_pts * step"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# find some of the brightest NDVI and BN locations.  \n",
+    "# This is rather arbitrary but provides some useful data.\n",
+    "with rasterio.open(images5[band_def5[\"red\"]-1]) as red_img:\n",
+    "    red = red_img.read()\n",
+    "with rasterio.open(images5[band_def5[\"nir\"]-1]) as nir_img:\n",
+    "    nir = nir_img.read()\n",
+    "    affine = nir_img.affine\n",
+    "\n",
+    "blue_img = rasterio.open(images5[band_def5[\"blue\"]-1])\n",
+    "blue = blue_img.read()\n",
+    "\n",
+    "# because we are using Landsat reflectance images, the pixel vause are not\n",
+    "# the raw sensor info, and the numbers can be out of the theretical range,\n",
+    "# so mask out the nonsensical values that might cause problems.\n",
+    "bnmask = (nir>0) & (red>0) & (blue>0) & (nir<10000) & (red<10000) & (blue<10000)\n",
+    "\n",
+    "ndvi = (nir-red)/(nir+red)\n",
+    "sndvi = sorted(ndvi.flatten()[bnmask.flatten()],reverse=True)[0:_end:step]\n",
+    "bn = blue/nir\n",
+    "bnn = (nir-blue)/(nir+blue)\n",
+    "sbn = sorted(bn.flatten()[bnmask.flatten()],reverse=True)[0:_end:step]\n",
+    "sbnn = sorted(bnn.flatten()[bnmask.flatten()],reverse=True)[0:_end:step]\n",
+    "    \n",
+    "def find_loc(band,values,img):\n",
+    "    import numpy as np\n",
+    "\n",
+    "    npts = []\n",
+    "    for v in values:\n",
+    "        x,y = [a[0] for a in np.where(band==v)[1:]]\n",
+    "        #print(pos)\n",
+    "        npts.append(affine*[y+0.25,x+0.25])\n",
+    "    \n",
+    "    return list(set(npts))\n",
+    "\n",
+    "ndvi_pts = find_loc(ndvi,sndvi,nir_img)\n",
+    "bn_pts = find_loc(bn,sbn,nir_img)\n",
+    "bnn_pts = find_loc(bnn,sbnn,nir_img)\n",
+    "\n",
+    "# hand coded water\n",
+    "water_pts=[(348586,4286269),(338690,4323890)]\n",
+    "\n",
+    "del red, nir, blue, ndvi\n",
+    "del bnn, bn, blue_img"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(\"num NDVI points =\",len(ndvi_pts))\n",
+    "print(\"num BN points =\",len(bn_pts))\n",
+    "print(\"num BNN points =\",len(bnn_pts))\n",
+    "print(\"num Water points =\",len(water_pts))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# extract the basic band info, as well as \n",
+    "def fill_pt(images,band_def,bands,name,\n",
+    "            north=None,east=None,row=None,col=None,\n",
+    "            ptype=None):\n",
+    "    ndf = pd.DataFrame(data=[name],columns=[\"image\"])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# extract the basic band info, as well as \n",
+    "def fill_pt(images,band_def,bands,name,\n",
+    "            north=None,east=None,row=None,col=None,\n",
+    "            ptype=None):\n",
+    "    ndf = pd.DataFrame(data=[name],columns=[\"image\"])\n",
+    "    ndf['type'] = ptype\n",
+    "    \n",
+    "    for band in bands:\n",
+    "        image = images[band_def[band]-1]\n",
+    "    \n",
+    "        img = rasterio.open(image)\n",
+    "        affine = img.affine\n",
+    "    \n",
+    "        if (north==None or east==None) and (row==None or col==None):\n",
+    "            print(\"Error: need either north+east or row+col\")\n",
+    "            return {}\n",
+    "        if (north!=None and east!=None):\n",
+    "            row,col = img.index(y=north,x=east)\n",
+    "        elif (row!=None and col!=None):\n",
+    "            east,north = affine * (col,row)\n",
+    "        else:\n",
+    "            print(\"Error: north+east / row+col pairs not found\")\n",
+    "            return {}\n",
+    "        \n",
+    "        ndf['east'],ndf['north'] = east,north\n",
+    "        ndf['row'],ndf['col'] = row,col\n",
+    "\n",
+    "        val = img.read(window=((row,row+1),(col,col+1)))[0][0][0]\n",
+    "        ndf[band] = val\n",
+    "        if (val<0) or (val>=10000):\n",
+    "            ndf['row'],ndf['col'] = np.nan,np.nan\n",
+    "            for band in bands:\n",
+    "                ndf[band] = np.nan\n",
+    "            return ndf\n",
+    "        \n",
+    "    del img\n",
+    "    return ndf\n",
+    "\n",
+    "# extract the basic band info, as well as \n",
+    "def fill_computed(df,**kwargs):\n",
+    "    for key in kwargs:\n",
+    "        kwargs[key](df)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# process the Landsat-5 images\n",
+    "df = pd.DataFrame()\n",
+    "for east,north in water_pts:\n",
+    "    df = df.append(fill_pt(images5,band_def5,bands,\n",
+    "                            images5[0].split(\"_sr_band\")[0],\n",
+    "                            north=north,east=east,ptype='water'), ignore_index=True)\n",
+    "for east,north in ndvi_pts:\n",
+    "    df = df.append(fill_pt(images5,band_def5,bands,\n",
+    "                            images5[0].split(\"_sr_band\")[0],\n",
+    "                            north=north,east=east,ptype='veg'), ignore_index=True)\n",
+    "for east,north in bn_pts:\n",
+    "    df = df.append(fill_pt(images5,band_def5,bands,\n",
+    "                            images5[0].split(\"_sr_band\")[0],\n",
+    "                            north=north,east=east,ptype='back'), ignore_index=True)\n",
+    "for east,north in bnn_pts:\n",
+    "    df = df.append(fill_pt(images5,band_def5,bands,\n",
+    "                            images5[0].split(\"_sr_band\")[0],\n",
+    "                            north=north,east=east,ptype='norm_back'), ignore_index=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# process the Landsat-8 images\n",
+    "for east,north in water_pts:\n",
+    "    df = df.append(fill_pt(images8,band_def8,bands,\n",
+    "                            images8[0].split(\"_sr_band\")[0],\n",
+    "                            north=north,east=east,ptype='water'), ignore_index=True)\n",
+    "for east,north in ndvi_pts:\n",
+    "    df = df.append(fill_pt(images8,band_def8,bands,\n",
+    "                            images8[0].split(\"_sr_band\")[0],\n",
+    "                            north=north,east=east,ptype='veg'), ignore_index=True)\n",
+    "for east,north in bn_pts:\n",
+    "    df = df.append(fill_pt(images8,band_def8,bands,\n",
+    "                            images8[0].split(\"_sr_band\")[0],\n",
+    "                            north=north,east=east,ptype='back'), ignore_index=True)\n",
+    "for east,north in bnn_pts:\n",
+    "    df = df.append(fill_pt(images8,band_def8,bands,\n",
+    "                            images8[0].split(\"_sr_band\")[0],\n",
+    "                            north=north,east=east,ptype='norm_back'), ignore_index=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# define the user defined caculations.\n",
+    "def ndvi(df):\n",
+    "    df['ndvi'] = (df['nir']-df['red'])/(df['nir']+df['red'])\n",
+    "def bn(df):\n",
+    "    df['bn'] = df['blue']/df['nir']\n",
+    "def bnn(df):\n",
+    "    df['bnn'] = (df['nir']-df['blue'])/(df['nir']+df['blue'])\n",
+    "\n",
+    "fill_computed(df,ndvi=ndvi,bn=bn,bnn=bnn)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# find the locations with NaN's.  We scrubbed the row/col variables above\n",
+    "# to make this easy.\n",
+    "rf = df[df['row'].isna()]\n",
+    "\n",
+    "# drop both the locations with NaN's and its pairs in other images as well\n",
+    "#df = df.dropna(axis=0, how='all')\n",
+    "df = df[~(df['east'].isin(rf['east'].values) & df['north'].isin(rf['north'].values))]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# show a few for inspection\n",
+    "df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# write it out some place\n",
+    "df.to_csv(\"landsat_training.csv\",index=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.5"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/examples/landsat5_training.csv b/examples/landsat5_training.csv
diff --git a/examples/landsat8_training.csv b/examples/landsat8_training.csv