xarray-contrib
diff --git a/‎examples/user_guide/25_GLCM_Texture.ipynb‎
Lines changed: 296 additions & 6 deletions b/‎examples/user_guide/25_GLCM_Texture.ipynb‎
Lines changed: 296 additions & 6 deletions
@@ -25,7 +25,14 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "3a06tk9sxuh",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2026-03-09T15:30:49.123715Z",
+     "iopub.status.busy": "2026-03-09T15:30:49.123629Z",
+     "iopub.status.idle": "2026-03-09T15:30:50.272830Z",
+     "shell.execute_reply": "2026-03-09T15:30:50.272259Z"
+    }
+   },
    "outputs": [],
    "source": [
     "import numpy as np\n",
@@ -48,7 +55,14 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "mqtgqmsnvlp",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2026-03-09T15:30:50.274712Z",
+     "iopub.status.busy": "2026-03-09T15:30:50.274450Z",
+     "iopub.status.idle": "2026-03-09T15:30:50.338586Z",
+     "shell.execute_reply": "2026-03-09T15:30:50.338001Z"
+    }
+   },
    "outputs": [],
    "source": [
     "# Build a 100x100 raster with four texture quadrants\n",
@@ -93,7 +107,14 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "2fgm4xz3uwj",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2026-03-09T15:30:50.339809Z",
+     "iopub.status.busy": "2026-03-09T15:30:50.339694Z",
+     "iopub.status.idle": "2026-03-09T15:30:51.046768Z",
+     "shell.execute_reply": "2026-03-09T15:30:51.046259Z"
+    }
+   },
    "outputs": [],
    "source": [
     "contrast = glcm_texture(agg, metric='contrast', window_size=7, levels=64)\n",
@@ -122,7 +143,14 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "7ah4bm6v5ut",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2026-03-09T15:30:51.047924Z",
+     "iopub.status.busy": "2026-03-09T15:30:51.047834Z",
+     "iopub.status.idle": "2026-03-09T15:30:51.568614Z",
+     "shell.execute_reply": "2026-03-09T15:30:51.567915Z"
+    }
+   },
    "outputs": [],
    "source": [
     "metrics = ['contrast', 'dissimilarity', 'homogeneity', 'energy', 'correlation', 'entropy']\n",
@@ -160,7 +188,14 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "nkzw9wmyxio",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2026-03-09T15:30:51.570036Z",
+     "iopub.status.busy": "2026-03-09T15:30:51.569932Z",
+     "iopub.status.idle": "2026-03-09T15:30:51.920501Z",
+     "shell.execute_reply": "2026-03-09T15:30:51.919825Z"
+    }
+   },
    "outputs": [],
    "source": [
     "fig, axes = plt.subplots(1, 4, figsize=(16, 4))\n",
@@ -188,7 +223,14 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "keurc3dmugs",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2026-03-09T15:30:51.921620Z",
+     "iopub.status.busy": "2026-03-09T15:30:51.921522Z",
+     "iopub.status.idle": "2026-03-09T15:30:52.045363Z",
+     "shell.execute_reply": "2026-03-09T15:30:52.044567Z"
+    }
+   },
    "outputs": [],
    "source": [
     "import dask.array as da\n",
@@ -228,6 +270,254 @@
     "\n",
     "Lower `levels` values run faster but lose gray-level resolution. For most remote sensing work, 32-64 levels is a good balance."
    ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "r8ezsmezbt9",
+   "source": [
+    "## Ok, how do we apply this to GIS?\n",
+    "\n",
+    "The synthetic quadrants above make the math obvious, but the real payoff is on actual imagery. Water and land have very different spatial textures in satellite data: water is smooth and uniform, land is rough and varied. GLCM features capture that difference even when raw pixel brightness alone is ambiguous (shadows, mudflats, dark pavement).\n",
+    "\n",
+    "Below we'll grab a Sentinel-2 NIR band crop of the San Francisco Bay coastline, compute GLCM texture features, and use KMeans clustering to classify water vs. land \u2014 no training labels required."
+   ],
+   "metadata": {}
+  },
+  {
+   "cell_type": "markdown",
+   "id": "ec79xdunce9",
+   "source": [
+    "### Step 1 \u2014 Download a Sentinel-2 NIR band\n",
+    "\n",
+    "We read a 500 x 500 pixel window (5 km x 5 km at 10 m resolution) straight from a\n",
+    "Cloud-Optimized GeoTIFF hosted on AWS. The scene is\n",
+    "[S2B_10SEG_20230921](https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/10/S/EG/2023/9/S2B_10SEG_20230921_0_L2A/),\n",
+    "a near-cloudless capture of the Bay Area from September 2023.\n",
+    "\n",
+    "If the remote file is unreachable, a synthetic coastal raster is used instead\n",
+    "so the rest of the notebook still runs."
+   ],
+   "metadata": {}
+  },
+  {
+   "cell_type": "code",
+   "id": "2hogpjtllw7",
+   "source": [
+    "import os\n",
+    "import rioxarray\n",
+    "\n",
+    "os.environ['AWS_NO_SIGN_REQUEST'] = 'YES'\n",
+    "os.environ['GDAL_DISABLE_READDIR_ON_OPEN'] = 'EMPTY_DIR'\n",
+    "\n",
+    "COG_URL = (\n",
+    "    'https://sentinel-cogs.s3.us-west-2.amazonaws.com/'\n",
+    "    'sentinel-s2-l2a-cogs/10/S/EG/2023/9/'\n",
+    "    'S2B_10SEG_20230921_0_L2A/B08.tif'\n",
+    ")\n",
+    "\n",
+    "try:\n",
+    "    nir_da = rioxarray.open_rasterio(COG_URL).isel(band=0, y=slice(2100, 2600), x=slice(5300, 5800))\n",
+    "    nir = nir_da.load().values.astype(np.float64)\n",
+    "    print(f'Downloaded NIR band: {nir.shape}, range {nir.min():.0f}\u2013{nir.max():.0f}')\n",
+    "except Exception as exc:\n",
+    "    print(f'Remote read failed ({exc}), using synthetic fallback')\n",
+    "    rng_sat = np.random.default_rng(99)\n",
+    "    nir = np.zeros((500, 500), dtype=np.float64)\n",
+    "    # Water: low reflectance, smooth\n",
+    "    nir[:, 250:] = rng_sat.normal(80, 10, (500, 250)).clip(20, 200)\n",
+    "    # Land: high reflectance, rough\n",
+    "    nir[:, :250] = rng_sat.normal(1800, 400, (500, 250)).clip(300, 4000)\n",
+    "\n",
+    "satellite = xr.DataArray(nir, dims=['y', 'x'])\n",
+    "\n",
+    "fig, ax = plt.subplots(figsize=(6, 6))\n",
+    "ax.imshow(nir, cmap='gray', vmin=0, vmax=np.percentile(nir, 98))\n",
+    "ax.set_title('Sentinel-2 NIR \u2014 SF Bay coastline')\n",
+    "plt.tight_layout()\n",
+    "plt.show()"
+   ],
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2026-03-09T15:30:52.046807Z",
+     "iopub.status.busy": "2026-03-09T15:30:52.046700Z",
+     "iopub.status.idle": "2026-03-09T15:30:53.261434Z",
+     "shell.execute_reply": "2026-03-09T15:30:53.260895Z"
+    }
+   },
+   "execution_count": null,
+   "outputs": []
+  },
+  {
+   "cell_type": "markdown",
+   "id": "joxz7n8olpc",
+   "source": [
+    "### Step 2 \u2014 Compute GLCM texture features\n",
+    "\n",
+    "We pick four metrics that tend to separate water (uniform, high energy, high homogeneity) from land (rough, high contrast):\n",
+    "\n",
+    "| Metric | Water (smooth) | Land (rough) |\n",
+    "|---|---|---|\n",
+    "| contrast | low | high |\n",
+    "| homogeneity | high | low |\n",
+    "| energy | high | low |\n",
+    "| correlation | high (uniform) | lower (varied) |\n",
+    "\n",
+    "An 11 x 11 window gives enough spatial context at 10 m resolution (110 m footprint)."
+   ],
+   "metadata": {}
+  },
+  {
+   "cell_type": "code",
+   "id": "ytdru4ssilp",
+   "source": [
+    "sat_metrics = ['contrast', 'homogeneity', 'energy', 'correlation']\n",
+    "sat_textures = glcm_texture(satellite, metric=sat_metrics, window_size=11, levels=64)\n",
+    "\n",
+    "fig, axes = plt.subplots(1, 4, figsize=(18, 4))\n",
+    "for ax, name in zip(axes, sat_metrics):\n",
+    "    vals = sat_textures.sel(metric=name).values\n",
+    "    im = ax.imshow(vals, cmap='viridis')\n",
+    "    ax.set_title(name.capitalize())\n",
+    "    plt.colorbar(im, ax=ax, shrink=0.7)\n",
+    "plt.suptitle('GLCM features on satellite NIR', fontsize=13, y=1.02)\n",
+    "plt.tight_layout()\n",
+    "plt.show()"
+   ],
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2026-03-09T15:30:53.262766Z",
+     "iopub.status.busy": "2026-03-09T15:30:53.262453Z",
+     "iopub.status.idle": "2026-03-09T15:30:58.515207Z",
+     "shell.execute_reply": "2026-03-09T15:30:58.514628Z"
+    }
+   },
+   "execution_count": null,
+   "outputs": []
+  },
+  {
+   "cell_type": "markdown",
+   "id": "xya2v71uhb",
+   "source": [
+    "### Step 3 \u2014 Classify water vs. land with KMeans\n",
+    "\n",
+    "We stack the four texture layers into a feature vector per pixel, normalize them,\n",
+    "and let KMeans find two clusters. No training labels needed \u2014 the texture\n",
+    "difference between water and land is large enough that unsupervised clustering\n",
+    "picks it up cleanly.\n",
+    "\n",
+    "After clustering, we check which cluster has lower mean NIR reflectance and\n",
+    "label that one \"water.\""
+   ],
+   "metadata": {}
+  },
+  {
+   "cell_type": "code",
+   "id": "keramnxr509",
+   "source": [
+    "from sklearn.cluster import KMeans\n",
+    "from sklearn.preprocessing import StandardScaler\n",
+    "\n",
+    "# Stack texture features: (H, W, 4)\n",
+    "feature_stack = np.stack(\n",
+    "    [sat_textures.sel(metric=m).values for m in sat_metrics], axis=-1\n",
+    ")\n",
+    "h, w, n_feat = feature_stack.shape\n",
+    "\n",
+    "# Mask out NaN edges left by the GLCM window\n",
+    "valid = ~np.any(np.isnan(feature_stack), axis=-1)\n",
+    "X = feature_stack[valid]\n",
+    "\n",
+    "# Normalize and cluster\n",
+    "X_scaled = StandardScaler().fit_transform(X)\n",
+    "labels = KMeans(n_clusters=2, random_state=42, n_init=10).fit_predict(X_scaled)\n",
+    "\n",
+    "# Build the classification raster\n",
+    "class_map = np.full((h, w), np.nan)\n",
+    "class_map[valid] = labels\n",
+    "\n",
+    "# Assign \"water\" to the cluster with lower NIR reflectance\n",
+    "cluster_nir = [np.nanmean(nir[class_map == c]) for c in range(2)]\n",
+    "water_id = int(np.argmin(cluster_nir))\n",
+    "\n",
+    "water_mask = np.where(valid, class_map == water_id, np.nan)\n",
+    "print(f'Water cluster: {water_id} (mean NIR {cluster_nir[water_id]:.0f})')\n",
+    "print(f'Land  cluster: {1 - water_id} (mean NIR {cluster_nir[1 - water_id]:.0f})')"
+   ],
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2026-03-09T15:30:58.517250Z",
+     "iopub.status.busy": "2026-03-09T15:30:58.517125Z",
+     "iopub.status.idle": "2026-03-09T15:30:59.860739Z",
+     "shell.execute_reply": "2026-03-09T15:30:59.860156Z"
+    }
+   },
+   "execution_count": null,
+   "outputs": []
+  },
+  {
+   "cell_type": "markdown",
+   "id": "pbro14h2x4",
+   "source": [
+    "### Result\n",
+    "\n",
+    "Left: the NIR image (bright = land, dark = water). Right: the GLCM-based classification. The shoreline comes through cleanly without any hand-drawn training polygons."
+   ],
+   "metadata": {}
+  },
+  {
+   "cell_type": "code",
+   "id": "q1zl6bioi7e",
+   "source": [
+    "from matplotlib.colors import ListedColormap\n",
+    "\n",
+    "fig, axes = plt.subplots(1, 2, figsize=(13, 6))\n",
+    "\n",
+    "# NIR image\n",
+    "axes[0].imshow(nir, cmap='gray', vmin=0, vmax=np.percentile(nir, 98))\n",
+    "axes[0].set_title('Sentinel-2 NIR band')\n",
+    "\n",
+    "# Classification\n",
+    "water_cmap = ListedColormap(['#a6611a', '#2166ac'])  # 0=land(brown), 1=water(blue)\n",
+    "im = axes[1].imshow(\n",
+    "    np.where(valid, class_map == water_id, np.nan),\n",
+    "    cmap=water_cmap, vmin=0, vmax=1, interpolation='nearest',\n",
+    ")\n",
+    "axes[1].set_title('GLCM water / land classification')\n",
+    "\n",
+    "# Legend\n",
+    "from matplotlib.patches import Patch\n",
+    "axes[1].legend(\n",
+    "    handles=[Patch(color='#2166ac', label='Water'),\n",
+    "             Patch(color='#a6611a', label='Land')],\n",
+    "    loc='lower right', framealpha=0.9,\n",
+    ")\n",
+    "\n",
+    "plt.tight_layout()\n",
+    "plt.show()"
+   ],
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2026-03-09T15:30:59.862223Z",
+     "iopub.status.busy": "2026-03-09T15:30:59.861896Z",
+     "iopub.status.idle": "2026-03-09T15:31:00.062349Z",
+     "shell.execute_reply": "2026-03-09T15:31:00.061769Z"
+    }
+   },
+   "execution_count": null,
+   "outputs": []
+  },
+  {
+   "cell_type": "markdown",
+   "id": "yqyikmy8is",
+   "source": [
+    "### Where to go from here\n",
+    "\n",
+    "- **More clusters.** Bump `n_clusters` to 3 or 4 to split out mud flats, urban areas, or vegetation.\n",
+    "- **Supervised classification.** Replace KMeans with a Random Forest trained on labeled polygons for higher accuracy.\n",
+    "- **Larger areas.** Wrap the satellite DataArray in `dask.array.from_array` and `glcm_texture` will process it chunk-by-chunk, the same way we showed in the Dask section above.\n",
+    "- **Multi-band features.** Compute GLCM on several spectral bands and stack the results for a richer feature space."
+   ],
+   "metadata": {}
   }
  ],
  "metadata": {