diff --git a/docs/examples/convert_to_raster.ipynb b/docs/examples/convert_to_raster.ipynb index f896f011..ae3e0713 100644 --- a/docs/examples/convert_to_raster.ipynb +++ b/docs/examples/convert_to_raster.ipynb @@ -20,12 +20,17 @@ }, { "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], + "metadata": { + "ExecuteTime": { + "end_time": "2026-03-24T16:00:08.693735600Z", + "start_time": "2026-03-24T16:00:07.451084900Z" + } + }, "source": [ "import rioxarray" - ] + ], + "outputs": [], + "execution_count": 1 }, { "cell_type": "markdown", @@ -36,426 +41,49 @@ }, { "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "
<xarray.Dataset>\n",
-       "Dimensions:      (time: 2, x: 10, y: 10)\n",
-       "Coordinates:\n",
-       "  * time         (time) object 2016-12-19 10:27:29.687763 2016-12-29 12:52:42...\n",
-       "  * x            (x) float64 4.663e+05 4.663e+05 ... 4.663e+05 4.663e+05\n",
-       "  * y            (y) float64 8.085e+06 8.085e+06 ... 8.085e+06 8.085e+06\n",
-       "    spatial_ref  int32 0\n",
-       "Data variables:\n",
-       "    blue         (time, y, x) float64 ...\n",
-       "    green        (time, y, x) float64 ...\n",
-       "Attributes:\n",
-       "    coordinates:  spatial_ref
" - ], - "text/plain": [ - "\n", - "Dimensions: (time: 2, x: 10, y: 10)\n", - "Coordinates:\n", - " * time (time) object 2016-12-19 10:27:29.687763 2016-12-29 12:52:42...\n", - " * x (x) float64 4.663e+05 4.663e+05 ... 4.663e+05 4.663e+05\n", - " * y (y) float64 8.085e+06 8.085e+06 ... 8.085e+06 8.085e+06\n", - " spatial_ref int32 0\n", - "Data variables:\n", - " blue (time, y, x) float64 ...\n", - " green (time, y, x) float64 ...\n", - "Attributes:\n", - " coordinates: spatial_ref" - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" + "metadata": { + "ExecuteTime": { + "end_time": "2026-03-24T16:00:09.538359600Z", + "start_time": "2026-03-24T16:00:08.696735900Z" } - ], + }, "source": [ "rds = rioxarray.open_rasterio(\n", " \"../../test/test_data/input/PLANET_SCOPE_3D.nc\",\n", ")\n", "rds" - ] + ], + "outputs": [ + { + "ename": "RasterioIOError", + "evalue": "'../../test/test_data/input/PLANET_SCOPE_3D.nc' not recognized as being in a supported file format. It could have been recognized by driver HDF5, but plugin gdal_HDF5.dll is not available in your installation. You may install it with 'conda install -c conda-forge libgdal-hdf5'. The GDAL_DRIVER_PATH configuration option is not set.", + "output_type": "error", + "traceback": [ + "\u001B[31m---------------------------------------------------------------------------\u001B[39m", + "\u001B[31mKeyError\u001B[39m Traceback (most recent call last)", + "\u001B[36mFile \u001B[39m\u001B[32m~\\Anaconda3\\envs\\rioxarray\\Lib\\site-packages\\xarray\\backends\\file_manager.py:219\u001B[39m, in \u001B[36mCachingFileManager._acquire_with_cache_info\u001B[39m\u001B[34m(self, needs_lock)\u001B[39m\n\u001B[32m 218\u001B[39m \u001B[38;5;28;01mtry\u001B[39;00m:\n\u001B[32m--> \u001B[39m\u001B[32m219\u001B[39m file = \u001B[38;5;28;43mself\u001B[39;49m\u001B[43m.\u001B[49m\u001B[43m_cache\u001B[49m\u001B[43m[\u001B[49m\u001B[38;5;28;43mself\u001B[39;49m\u001B[43m.\u001B[49m\u001B[43m_key\u001B[49m\u001B[43m]\u001B[49m\n\u001B[32m 220\u001B[39m \u001B[38;5;28;01mexcept\u001B[39;00m \u001B[38;5;167;01mKeyError\u001B[39;00m:\n", + "\u001B[36mFile \u001B[39m\u001B[32m~\\Anaconda3\\envs\\rioxarray\\Lib\\site-packages\\xarray\\backends\\lru_cache.py:56\u001B[39m, in \u001B[36mLRUCache.__getitem__\u001B[39m\u001B[34m(self, key)\u001B[39m\n\u001B[32m 55\u001B[39m \u001B[38;5;28;01mwith\u001B[39;00m \u001B[38;5;28mself\u001B[39m._lock:\n\u001B[32m---> \u001B[39m\u001B[32m56\u001B[39m value = \u001B[38;5;28;43mself\u001B[39;49m\u001B[43m.\u001B[49m\u001B[43m_cache\u001B[49m\u001B[43m[\u001B[49m\u001B[43mkey\u001B[49m\u001B[43m]\u001B[49m\n\u001B[32m 57\u001B[39m \u001B[38;5;28mself\u001B[39m._cache.move_to_end(key)\n", + "\u001B[31mKeyError\u001B[39m: [, ('../../test/test_data/input/PLANET_SCOPE_3D.nc',), 'r', (('sharing', False),), '906ef6ee-6579-470e-868f-9eeef4b364d6']", + "\nDuring handling of the above exception, another exception occurred:\n", + "\u001B[31mCPLE_OpenFailedError\u001B[39m Traceback (most recent call last)", + "\u001B[36mFile \u001B[39m\u001B[32m~\\Anaconda3\\envs\\rioxarray\\Lib\\site-packages\\rasterio\\_base.pyx:320\u001B[39m, in \u001B[36mrasterio._base.DatasetBase.__init__\u001B[39m\u001B[34m()\u001B[39m\n", + "\u001B[36mFile \u001B[39m\u001B[32m~\\Anaconda3\\envs\\rioxarray\\Lib\\site-packages\\rasterio\\_base.pyx:232\u001B[39m, in \u001B[36mrasterio._base.open_dataset\u001B[39m\u001B[34m()\u001B[39m\n", + "\u001B[36mFile \u001B[39m\u001B[32m~\\Anaconda3\\envs\\rioxarray\\Lib\\site-packages\\rasterio\\_err.pyx:359\u001B[39m, in \u001B[36mrasterio._err.exc_wrap_pointer\u001B[39m\u001B[34m()\u001B[39m\n", + "\u001B[31mCPLE_OpenFailedError\u001B[39m: '../../test/test_data/input/PLANET_SCOPE_3D.nc' not recognized as being in a supported file format. It could have been recognized by driver HDF5, but plugin gdal_HDF5.dll is not available in your installation. You may install it with 'conda install -c conda-forge libgdal-hdf5'. The GDAL_DRIVER_PATH configuration option is not set.", + "\nDuring handling of the above exception, another exception occurred:\n", + "\u001B[31mRasterioIOError\u001B[39m Traceback (most recent call last)", + "\u001B[36mCell\u001B[39m\u001B[36m \u001B[39m\u001B[32mIn[2]\u001B[39m\u001B[32m, line 1\u001B[39m\n\u001B[32m----> \u001B[39m\u001B[32m1\u001B[39m rds = \u001B[43mrioxarray\u001B[49m\u001B[43m.\u001B[49m\u001B[43mopen_rasterio\u001B[49m\u001B[43m(\u001B[49m\n\u001B[32m 2\u001B[39m \u001B[43m \u001B[49m\u001B[33;43m\"\u001B[39;49m\u001B[33;43m../../test/test_data/input/PLANET_SCOPE_3D.nc\u001B[39;49m\u001B[33;43m\"\u001B[39;49m\u001B[43m,\u001B[49m\n\u001B[32m 3\u001B[39m \u001B[43m)\u001B[49m\n\u001B[32m 4\u001B[39m rds\n", + "\u001B[36mFile \u001B[39m\u001B[32mD:\\_FORKS\\rioxarray\\rioxarray\\_io.py:1140\u001B[39m, in \u001B[36mopen_rasterio\u001B[39m\u001B[34m(filename, parse_coordinates, chunks, cache, lock, masked, mask_and_scale, variable, group, default_name, decode_times, decode_timedelta, band_as_variable, **open_kwargs)\u001B[39m\n\u001B[32m 1138\u001B[39m \u001B[38;5;28;01melse\u001B[39;00m:\n\u001B[32m 1139\u001B[39m manager = URIManager(file_opener, filename, mode=\u001B[33m\"\u001B[39m\u001B[33mr\u001B[39m\u001B[33m\"\u001B[39m, kwargs=open_kwargs)\n\u001B[32m-> \u001B[39m\u001B[32m1140\u001B[39m riods = \u001B[43mmanager\u001B[49m\u001B[43m.\u001B[49m\u001B[43macquire\u001B[49m\u001B[43m(\u001B[49m\u001B[43m)\u001B[49m\n\u001B[32m 1141\u001B[39m captured_warnings = rio_warnings.copy()\n\u001B[32m 1143\u001B[39m \u001B[38;5;66;03m# raise the NotGeoreferencedWarning if applicable\u001B[39;00m\n", + "\u001B[36mFile \u001B[39m\u001B[32m~\\Anaconda3\\envs\\rioxarray\\Lib\\site-packages\\xarray\\backends\\file_manager.py:201\u001B[39m, in \u001B[36mCachingFileManager.acquire\u001B[39m\u001B[34m(self, needs_lock)\u001B[39m\n\u001B[32m 186\u001B[39m \u001B[38;5;28;01mdef\u001B[39;00m\u001B[38;5;250m \u001B[39m\u001B[34macquire\u001B[39m(\u001B[38;5;28mself\u001B[39m, needs_lock: \u001B[38;5;28mbool\u001B[39m = \u001B[38;5;28;01mTrue\u001B[39;00m) -> T_File:\n\u001B[32m 187\u001B[39m \u001B[38;5;250m \u001B[39m\u001B[33;03m\"\"\"Acquire a file object from the manager.\u001B[39;00m\n\u001B[32m 188\u001B[39m \n\u001B[32m 189\u001B[39m \u001B[33;03m A new file is only opened if it has expired from the\u001B[39;00m\n\u001B[32m (...)\u001B[39m\u001B[32m 199\u001B[39m \u001B[33;03m An open file object, as returned by ``opener(*args, **kwargs)``.\u001B[39;00m\n\u001B[32m 200\u001B[39m \u001B[33;03m \"\"\"\u001B[39;00m\n\u001B[32m--> \u001B[39m\u001B[32m201\u001B[39m file, _ = \u001B[38;5;28;43mself\u001B[39;49m\u001B[43m.\u001B[49m\u001B[43m_acquire_with_cache_info\u001B[49m\u001B[43m(\u001B[49m\u001B[43mneeds_lock\u001B[49m\u001B[43m)\u001B[49m\n\u001B[32m 202\u001B[39m \u001B[38;5;28;01mreturn\u001B[39;00m file\n", + "\u001B[36mFile \u001B[39m\u001B[32m~\\Anaconda3\\envs\\rioxarray\\Lib\\site-packages\\xarray\\backends\\file_manager.py:225\u001B[39m, in \u001B[36mCachingFileManager._acquire_with_cache_info\u001B[39m\u001B[34m(self, needs_lock)\u001B[39m\n\u001B[32m 223\u001B[39m kwargs = kwargs.copy()\n\u001B[32m 224\u001B[39m kwargs[\u001B[33m\"\u001B[39m\u001B[33mmode\u001B[39m\u001B[33m\"\u001B[39m] = \u001B[38;5;28mself\u001B[39m._mode\n\u001B[32m--> \u001B[39m\u001B[32m225\u001B[39m file = \u001B[38;5;28;43mself\u001B[39;49m\u001B[43m.\u001B[49m\u001B[43m_opener\u001B[49m\u001B[43m(\u001B[49m\u001B[43m*\u001B[49m\u001B[38;5;28;43mself\u001B[39;49m\u001B[43m.\u001B[49m\u001B[43m_args\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43m*\u001B[49m\u001B[43m*\u001B[49m\u001B[43mkwargs\u001B[49m\u001B[43m)\u001B[49m\n\u001B[32m 226\u001B[39m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;28mself\u001B[39m._mode == \u001B[33m\"\u001B[39m\u001B[33mw\u001B[39m\u001B[33m\"\u001B[39m:\n\u001B[32m 227\u001B[39m \u001B[38;5;66;03m# ensure file doesn't get overridden when opened again\u001B[39;00m\n\u001B[32m 228\u001B[39m \u001B[38;5;28mself\u001B[39m._mode = \u001B[33m\"\u001B[39m\u001B[33ma\u001B[39m\u001B[33m\"\u001B[39m\n", + "\u001B[36mFile \u001B[39m\u001B[32m~\\Anaconda3\\envs\\rioxarray\\Lib\\site-packages\\rasterio\\env.py:464\u001B[39m, in \u001B[36mensure_env_with_credentials..wrapper\u001B[39m\u001B[34m(*args, **kwds)\u001B[39m\n\u001B[32m 461\u001B[39m session = DummySession()\n\u001B[32m 463\u001B[39m \u001B[38;5;28;01mwith\u001B[39;00m env_ctor(session=session):\n\u001B[32m--> \u001B[39m\u001B[32m464\u001B[39m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[43mf\u001B[49m\u001B[43m(\u001B[49m\u001B[43m*\u001B[49m\u001B[43margs\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43m*\u001B[49m\u001B[43m*\u001B[49m\u001B[43mkwds\u001B[49m\u001B[43m)\u001B[49m\n", + "\u001B[36mFile \u001B[39m\u001B[32m~\\Anaconda3\\envs\\rioxarray\\Lib\\site-packages\\rasterio\\__init__.py:367\u001B[39m, in \u001B[36mopen\u001B[39m\u001B[34m(fp, mode, driver, width, height, count, crs, transform, dtype, nodata, sharing, thread_safe, opener, **kwargs)\u001B[39m\n\u001B[32m 364\u001B[39m path = _parse_path(raw_dataset_path)\n\u001B[32m 366\u001B[39m \u001B[38;5;28;01mif\u001B[39;00m mode == \u001B[33m\"\u001B[39m\u001B[33mr\u001B[39m\u001B[33m\"\u001B[39m:\n\u001B[32m--> \u001B[39m\u001B[32m367\u001B[39m dataset = \u001B[43mDatasetReader\u001B[49m\u001B[43m(\u001B[49m\u001B[43mpath\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mdriver\u001B[49m\u001B[43m=\u001B[49m\u001B[43mdriver\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43msharing\u001B[49m\u001B[43m=\u001B[49m\u001B[43msharing\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mthread_safe\u001B[49m\u001B[43m=\u001B[49m\u001B[43mthread_safe\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43m*\u001B[49m\u001B[43m*\u001B[49m\u001B[43mkwargs\u001B[49m\u001B[43m)\u001B[49m\n\u001B[32m 368\u001B[39m \u001B[38;5;28;01melif\u001B[39;00m mode == \u001B[33m\"\u001B[39m\u001B[33mr+\u001B[39m\u001B[33m\"\u001B[39m:\n\u001B[32m 369\u001B[39m dataset = get_writer_for_path(path, driver=driver)(\n\u001B[32m 370\u001B[39m path, mode, driver=driver, sharing=sharing, **kwargs\n\u001B[32m 371\u001B[39m )\n", + "\u001B[36mFile \u001B[39m\u001B[32m~\\Anaconda3\\envs\\rioxarray\\Lib\\site-packages\\rasterio\\_base.pyx:329\u001B[39m, in \u001B[36mrasterio._base.DatasetBase.__init__\u001B[39m\u001B[34m()\u001B[39m\n", + "\u001B[31mRasterioIOError\u001B[39m: '../../test/test_data/input/PLANET_SCOPE_3D.nc' not recognized as being in a supported file format. It could have been recognized by driver HDF5, but plugin gdal_HDF5.dll is not available in your installation. You may install it with 'conda install -c conda-forge libgdal-hdf5'. The GDAL_DRIVER_PATH configuration option is not set." + ] + } + ], + "execution_count": 2 }, { "cell_type": "markdown", @@ -468,30 +96,22 @@ }, { "cell_type": "code", - "execution_count": 3, "metadata": {}, - "outputs": [], "source": [ "# note how one time slice was selected on export to make the dataset 2D\n", "rds.isel(time=0).rio.to_raster(\"planet_scope.tif\")" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "code", - "execution_count": 4, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{\"bounds\": [466266.0, 8084670.0, 466296.0, 8084700.0], \"colorinterp\": [\"gray\", \"undefined\"], \"count\": 2, \"crs\": \"EPSG:32722\", \"descriptions\": [\"blue\", \"green\"], \"driver\": \"GTiff\", \"dtype\": \"float64\", \"height\": 10, \"indexes\": [1, 2], \"interleave\": \"pixel\", \"lnglat\": [-51.31732641226951, -17.322997474192466], \"mask_flags\": [[\"nodata\"], [\"nodata\"]], \"nodata\": NaN, \"res\": [3.0, 3.0], \"shape\": [10, 10], \"tiled\": false, \"transform\": [3.0, 0.0, 466266.0, 0.0, -3.0, 8084700.0, 0.0, 0.0, 1.0], \"units\": [null, null], \"width\": 10}\n" - ] - } - ], "source": [ "!rio info planet_scope.tif" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "markdown", @@ -504,30 +124,22 @@ }, { "cell_type": "code", - "execution_count": 5, "metadata": {}, - "outputs": [], "source": [ "# note how selecting one variable allowed for multiple time steps in a single raster\n", "rds.green.rio.to_raster(\"planet_scope_green.tif\")" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "code", - "execution_count": 6, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{\"bounds\": [466266.0, 8084670.0, 466296.0, 8084700.0], \"colorinterp\": [\"gray\", \"undefined\"], \"count\": 2, \"crs\": \"EPSG:32722\", \"descriptions\": [\"green\", \"green\"], \"driver\": \"GTiff\", \"dtype\": \"float64\", \"height\": 10, \"indexes\": [1, 2], \"interleave\": \"pixel\", \"lnglat\": [-51.31732641226951, -17.322997474192466], \"mask_flags\": [[\"nodata\"], [\"nodata\"]], \"nodata\": NaN, \"res\": [3.0, 3.0], \"shape\": [10, 10], \"tiled\": false, \"transform\": [3.0, 0.0, 466266.0, 0.0, -3.0, 8084700.0, 0.0, 0.0, 1.0], \"units\": [null, null], \"width\": 10}\n" - ] - } - ], "source": [ "!rio info planet_scope_green.tif" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "markdown", @@ -540,13 +152,13 @@ }, { "cell_type": "code", - "execution_count": 4, "metadata": {}, - "outputs": [], "source": [ "# you will get a two file raster: the .ers file with the metdata and the data with no extension\n", "rds.blue.rio.to_raster(\"planet_scope_green.ers\", driver=\"ERS\")" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "markdown", @@ -557,12 +169,12 @@ }, { "cell_type": "code", - "execution_count": 5, "metadata": {}, - "outputs": [], "source": [ "rds.blue.rio.to_raster(\"planet_scope_green_LZW_compression.tif\", driver=\"GTiff\", compress=\"LZW\")" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "markdown", @@ -573,33 +185,50 @@ }, { "cell_type": "code", - "execution_count": 7, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "dtype('float64')" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], "source": [ "rds.blue.dtype" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "code", - "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "rds.blue.astype('float32').rio.to_raster(\"planet_scope_green_LZW_compression.tif\", driver=\"GTiff\", compress=\"LZW\")" + ], + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "## Add band tags\n", + "\n", + "Band tags are tags referring specifically to the bands. You need to specify the `'band_tags'` keyword and assign a list (one per band)" ] }, + { + "metadata": {}, + "cell_type": "code", + "source": "rds.rio.to_raster(\"planet_scope_green_tags.tif\", tags={\"band_tags\": [{\"my_tag\": 1}]})", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": "Note that tags can be added with the `write_band_tags` method:" + }, + { + "metadata": {}, + "cell_type": "code", + "source": "rds.rio.write_band_tags([{\"my_tag\": 1}], inplace=False).rio.to_raster(\"planet_scope_green_tags.tif\")", + "outputs": [], + "execution_count": null + }, { "cell_type": "markdown", "metadata": {}, @@ -618,9 +247,7 @@ }, { "cell_type": "code", - "execution_count": 7, "metadata": {}, - "outputs": [], "source": [ "rds = rioxarray.open_rasterio(\n", " \"../../test/test_data/input/PLANET_SCOPE_3D.nc\",\n", @@ -633,24 +260,18 @@ " tiled=True, # GDAL: By default striped TIFF files are created. This option can be used to force creation of tiled TIFF files.\n", " windowed=True, # rioxarray: read & write one window at a time\n", ")" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "code", - "execution_count": 8, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{\"blockxsize\": 256, \"blockysize\": 256, \"bounds\": [466266.0, 8084670.0, 466296.0, 8084700.0], \"colorinterp\": [\"gray\", \"undefined\"], \"count\": 2, \"crs\": \"EPSG:32722\", \"descriptions\": [\"green\", \"green\"], \"driver\": \"GTiff\", \"dtype\": \"float64\", \"height\": 10, \"indexes\": [1, 2], \"interleave\": \"pixel\", \"lnglat\": [-51.31732641226951, -17.322997474192466], \"mask_flags\": [[\"nodata\"], [\"nodata\"]], \"nodata\": NaN, \"res\": [3.0, 3.0], \"shape\": [10, 10], \"tiled\": true, \"transform\": [3.0, 0.0, 466266.0, 0.0, -3.0, 8084700.0, 0.0, 0.0, 1.0], \"units\": [null, null], \"width\": 10}\n" - ] - } - ], "source": [ "!rio info planet_scope_tiled.tif" - ] + ], + "outputs": [], + "execution_count": null }, { "metadata": {}, diff --git a/rioxarray/_io.py b/rioxarray/_io.py index 2ac62f07..9230f77b 100644 --- a/rioxarray/_io.py +++ b/rioxarray/_io.py @@ -34,8 +34,10 @@ from xarray.core.variable import as_variable from rioxarray._spatial_utils import ( + DEFAULT_GRID_MAP, FILL_VALUE_NAMES, UNWANTED_RIO_ATTRS, + UNWANTED_TAGS, _generate_spatial_coords, ) from rioxarray.exceptions import RioXarrayError @@ -654,17 +656,56 @@ def build_subdataset_filter( ) +def _parse_and_sanitize_tags( + tags: dict, other_unwanted_attrs: Optional[tuple] = None +) -> dict: + attrs = _parse_tags(tags) + + if other_unwanted_attrs is None: + other_unwanted_attrs = FILL_VALUE_NAMES + UNWANTED_RIO_ATTRS + else: + other_unwanted_attrs += FILL_VALUE_NAMES + other_unwanted_attrs += UNWANTED_RIO_ATTRS + + # that should be added by GDAL/rasterio + for unwanted_attr in other_unwanted_attrs: + if unwanted_attr.endswith("*"): + for attr in attrs: + if attr.startswith(unwanted_attr[:-1]): + attrs.pop(unwanted_attr, None) + else: + attrs.pop(unwanted_attr, None) + + return attrs + + +def _sanitize_netcdf_dims_in_attrs(riods, attrs, coords): + for coord in coords: + if f"NETCDF_DIM_{coord}" in attrs: + coord_name = coord + attrs.pop(f"NETCDF_DIM_{coord}") + break + if f"NETCDF_DIM_{coord}_VALUES" in attrs: + coord_name = coord + attrs.pop(f"NETCDF_DIM_{coord}_VALUES") + attrs.pop(f"NETCDF_DIM_{coord}_DEF", None) + attrs.pop("NETCDF_DIM_EXTRA", None) + break + else: + coord_name = "band" + coords[coord_name] = numpy.asarray(riods.indexes) + + return coord_name + + def _get_rasterio_attrs(riods: RasterioReader): """ Get rasterio specific attributes """ # pylint: disable=too-many-branches - # Add rasterio attributes - attrs = _parse_tags({**riods.tags(), **riods.tags(1)}) - # remove attributes with informaiton - # that should be added by GDAL/rasterio - for unwanted_attr in FILL_VALUE_NAMES + UNWANTED_RIO_ATTRS: - attrs.pop(unwanted_attr, None) + # Add rasterio attributes (add the first band attributes into the raster's attribute) + attrs = _parse_and_sanitize_tags({**riods.tags(), **riods.tags(1)}) + if riods.nodata is not None: # The nodata values for the raster bands attrs["_FillValue"] = riods.nodata @@ -997,6 +1038,11 @@ def _single_band_open(*args, bidx=0, **kwargs): ) +def _remove_from_band_tags(band_tags: list[dict], to_be_removed: str): + for tags in band_tags: + tags.pop(to_be_removed, None) + + def open_rasterio( filename: Union[ str, @@ -1196,20 +1242,18 @@ def open_rasterio( attrs = _get_rasterio_attrs(riods=riods) coords = _load_netcdf_1d_coords(attrs) _parse_driver_tags(riods=riods, attrs=attrs, coords=coords) - for coord in coords: - if f"NETCDF_DIM_{coord}" in attrs: - coord_name = coord - attrs.pop(f"NETCDF_DIM_{coord}") - break - if f"NETCDF_DIM_{coord}_VALUES" in attrs: - coord_name = coord - attrs.pop(f"NETCDF_DIM_{coord}_VALUES") - attrs.pop(f"NETCDF_DIM_{coord}_DEF", None) - attrs.pop("NETCDF_DIM_EXTRA", None) - break - else: - coord_name = "band" - coords[coord_name] = numpy.asarray(riods.indexes) + coord_name = _sanitize_netcdf_dims_in_attrs(riods=riods, attrs=attrs, coords=coords) + + # Add band tags in band_tags as a list: one dict per band + band_tags = [] + for i in range(riods.count): + tags = _parse_and_sanitize_tags( + riods.tags(i + 1), + other_unwanted_attrs=UNWANTED_TAGS + + (DEFAULT_GRID_MAP, "AREA_OR_POINT", "NETCDF_*"), + ) + if len(tags) > 0: + band_tags.append(tags) # Handle GCPs and RPCs has_gcps = riods.gcps[0] @@ -1231,11 +1275,14 @@ def open_rasterio( encoding: dict[Hashable, Any] = {} if mask_and_scale and "_Unsigned" in attrs: unsigned = variables.pop_to(attrs, encoding, "_Unsigned") == "true" + _remove_from_band_tags(band_tags, "_Unsigned") if masked: encoding["dtype"] = str(_rasterio_to_numpy_dtype(riods.dtypes)) da_name = attrs.pop("NETCDF_VARNAME", default_name) + _remove_from_band_tags(band_tags, "NETCDF_VARNAME") + data: Any = indexing.LazilyOuterIndexedArray( RasterioArrayWrapper( manager=manager, @@ -1267,6 +1314,7 @@ def open_rasterio( # make sure the _FillValue is correct dtype if "_FillValue" in result.attrs: result.attrs["_FillValue"] = result.dtype.type(result.attrs["_FillValue"]) + _remove_from_band_tags(band_tags, "_FillValue") # handle encoding _handle_encoding( @@ -1290,6 +1338,10 @@ def open_rasterio( if has_rpcs: result.rio.write_rpcs(riods.rpcs, inplace=True) + # Add band tags + if band_tags and band_tags[0]: + result.rio.write_band_tags(band_tags, inplace=True) + if chunks is not None: result = _prepare_dask( result=result, @@ -1322,6 +1374,7 @@ def open_rasterio( for attr, value in result.attrs.items() if not attr.startswith(f"{result.name}#") } + # Make the file closeable result.set_close(manager.close) result.rio._manager = manager diff --git a/rioxarray/_spatial_utils.py b/rioxarray/_spatial_utils.py index c651f012..243afa7d 100644 --- a/rioxarray/_spatial_utils.py +++ b/rioxarray/_spatial_utils.py @@ -21,6 +21,15 @@ FILL_VALUE_NAMES = ("_FillValue", "missing_value", "fill_value", "nodata") UNWANTED_RIO_ATTRS = ("nodatavals", "is_tiled", "res") DEFAULT_GRID_MAP = "spatial_ref" +UNWANTED_TAGS = ( + "crs", + "transform", + "scales", + "scale_factor", + "add_offset", + "offsets", + "grid_mapping", +) # DTYPE TO NODATA MAP # Based on: https://github.com/OSGeo/gdal/blob/v3.12.1/swig/python/gdal-utils/osgeo_utils/gdal_calc.py#L49-L66 diff --git a/rioxarray/raster_array.py b/rioxarray/raster_array.py index fb17371d..0ace3e4b 100644 --- a/rioxarray/raster_array.py +++ b/rioxarray/raster_array.py @@ -971,7 +971,7 @@ def to_raster( *, driver: Optional[str] = None, dtype: Optional[Union[str, numpy.dtype]] = None, - tags: Optional[dict[str, str]] = None, + tags: Optional[dict[str, str | list]] = None, windowed: bool = False, recalc_transform: bool = True, lock: Optional[bool] = None, @@ -1058,6 +1058,15 @@ def to_raster( self.encoded_nodata if self.encoded_nodata is not None else self.nodata ) + # Add band tags + band_tags = self.get_band_tags() + + if band_tags: + if tags is None: + tags = {"band_tags": band_tags} + else: + tags["band_tags"] = band_tags + return RasterioWriter(raster_path=raster_path).to_raster( xarray_dataarray=self._obj, tags=tags, @@ -1091,7 +1100,7 @@ def to_rasterio_dataset(self) -> Generator[DatasetReader, None, None]: Example ------- - >>> with xds.to_rasterio_dataset() as rio_ds: + >>> with xda.rio.to_rasterio_dataset() as rio_ds: >>> rio_ds.count """ @@ -1099,3 +1108,42 @@ def to_rasterio_dataset(self) -> Generator[DatasetReader, None, None]: self.to_raster(memfile.name) with memfile.open() as src_ds: yield src_ds + + def write_band_tags( + self, band_tags: list[dict], inplace: bool = False + ) -> xarray.DataArray: + """ + Write band tags to the :obj:`xarray.DataArray`'s attributes, ensuring one tag per band. + + The tags are stored in the array's attributes under the key :code:`"band_tags"`, ensuring they'll be written on disk with :func:`to_raster`. + + Parameters + ---------- + band_tags: list[dict] + A list of dictionnaries, one per band, containing the bands' metadata. + + Returns + ------- + :obj:`xarray.DataArray`: + Modified DataArray with band tags + + Raises + ------ + AssertionError: + If the length of `band_tags` does not match the number of bands. + + Example + ------- + + >>> band_tags = [ + >>> {"year": "yesterday", "where": "here"}, + >>> {"year": "now", "where": "here"} + >>> ], # Raster has two bands + >>> xda.rio.write_band_tags(band_tags, inplace=True) + """ + assert len(band_tags) == self.count, "You should give one band tag per band." + + data_obj: xarray.DataArray = self._get_obj(inplace=inplace) # type: ignore + data_obj.rio._band_tags = band_tags + data_obj.encoding["band_tags"] = band_tags + return data_obj diff --git a/rioxarray/raster_dataset.py b/rioxarray/raster_dataset.py index 560340c2..37746ebd 100644 --- a/rioxarray/raster_dataset.py +++ b/rioxarray/raster_dataset.py @@ -453,6 +453,48 @@ def interpolate_na( x_dim=self.x_dim, y_dim=self.y_dim, inplace=True ) + def _retrieve_da_mtd( + self, + data_var: xarray.DataArray, + *, + attrs_img: dict, + encodings_img: dict, + band_tags: list, + long_name: list, + ): + # Scale + try: + encodings_img["scales"].append(self._obj[data_var].encoding["scale_factor"]) + except KeyError: + attrs_img["scales"].append( + self._obj[data_var].attrs.get("scale_factor", 1.0) + ) + + # Offset + try: + encodings_img["offsets"].append(self._obj[data_var].encoding["add_offset"]) + except KeyError: + attrs_img["offsets"].append( + self._obj[data_var].attrs.get("add_offset", 0.0) + ) + + # Nodata + if self._obj[data_var].rio.encoded_nodata is not None: + encodings_img["nodatavals"].append(self._obj[data_var].rio.encoded_nodata) + else: + attrs_img["nodatavals"].append(self._obj[data_var].rio.nodata) + + # Name + long_name.append(self._obj[data_var].attrs.get("long_name", data_var)) + + # Band tags + curr_band_tags = self._obj[data_var].rio.get_band_tags() + var_attrs = self._obj[data_var].attrs.copy() + if curr_band_tags is not None: + for cbt in curr_band_tags: + var_attrs.update(cbt) + band_tags.append(var_attrs) + def to_raster( self, raster_path: Union[str, os.PathLike], @@ -515,43 +557,52 @@ def to_raster( # pylint: disable=too-many-locals variable_dim = f"band_{uuid4()}" data_array = self._obj.to_array(dim=variable_dim) + # ensure raster metadata preserved - attr_scales = [] - attr_offsets = [] - attr_nodatavals = [] - encoded_scales = [] - encoded_offsets = [] - encoded_nodatavals = [] - band_tags = [] - long_name = [] + attrs_img: dict[str, list] = { + "scales": [], + "offsets": [], + "nodatavals": [], + } + + encodings_img: dict[str, list] = { + "scales": [], + "offsets": [], + "nodatavals": [], + } + band_tags: list = [] + long_name: list = [] + for data_var in data_array[variable_dim].values: - try: - encoded_scales.append(self._obj[data_var].encoding["scale_factor"]) - except KeyError: - attr_scales.append(self._obj[data_var].attrs.get("scale_factor", 1.0)) - try: - encoded_offsets.append(self._obj[data_var].encoding["add_offset"]) - except KeyError: - attr_offsets.append(self._obj[data_var].attrs.get("add_offset", 0.0)) - long_name.append(self._obj[data_var].attrs.get("long_name", data_var)) - if self._obj[data_var].rio.encoded_nodata is not None: - encoded_nodatavals.append(self._obj[data_var].rio.encoded_nodata) - else: - attr_nodatavals.append(self._obj[data_var].rio.nodata) - band_tags.append(self._obj[data_var].attrs.copy()) - if encoded_scales: - data_array.encoding["scales"] = encoded_scales + self._retrieve_da_mtd( + data_var=data_var, + attrs_img=attrs_img, + encodings_img=encodings_img, + band_tags=band_tags, + long_name=long_name, + ) + + # Scale and offsets + if encodings_img["scales"]: + data_array.encoding["scales"] = encodings_img["scales"] else: - data_array.attrs["scales"] = attr_scales - if encoded_offsets: - data_array.encoding["offsets"] = encoded_offsets + data_array.attrs["scales"] = attrs_img["scales"] + if encodings_img["offsets"]: + data_array.encoding["offsets"] = encodings_img["offsets"] else: - data_array.attrs["offsets"] = attr_offsets + data_array.attrs["offsets"] = attrs_img["offsets"] + + # Band name and tags data_array.attrs["band_tags"] = band_tags data_array.attrs["long_name"] = long_name - use_encoded_nodatavals = bool(encoded_nodatavals) - nodatavals = encoded_nodatavals if use_encoded_nodatavals else attr_nodatavals + # Nodata + use_encoded_nodatavals = bool(encodings_img["nodatavals"]) + nodatavals = ( + encodings_img["nodatavals"] + if use_encoded_nodatavals + else attrs_img["nodatavals"] + ) nodata = nodatavals[0] if ( all(nodataval == nodata for nodataval in nodatavals) @@ -563,10 +614,13 @@ def to_raster( else: raise RioXarrayError( "All nodata values must be the same when exporting to raster. " - f"Current values: {attr_nodatavals}" + f"Current values: {attrs_img['nodatavals']}" ) + + # CRS if self.crs is not None: data_array.rio.write_crs(self.crs, inplace=True) + # write it to a raster return data_array.rio.set_spatial_dims( x_dim=self.x_dim, @@ -583,3 +637,53 @@ def to_raster( compute=compute, **profile_kwargs, ) + + def write_band_tags( + self, band_tags: dict[str, list[dict]], inplace: bool = False + ) -> xarray.Dataset: + """ + Write band tags to the Dataset's attributes, ensuring one tag per band and per variable. + + The tags are stored in the variables's attributes under the key :code:`"band_tags"`, ensuring they'll be written on disk with :func:`to_raster`. + + Parameters + ---------- + band_tags: dict[str, list[dict]] + A dictionary mapping the variable name to the band tags list of dictionnaries, one per variable's band, containing the bands' metadata. + + Returns + -------- + :class:`xarray.Dataset`: + Modified Dataset with band tags + + Raises + ------ + AssertionError: + If the length of `band_tags` does not match the number of variables. + + Example + ------- + + >>> band_tags = { + >>> "blue": [ + >>> {"year": "yesterday", "where": "here"}, + >>> {"year": "now", "where": "here"} + >>> ], # Blue has two bands + >>> "green": [ + >>> {"year": "yesterday", "where": "there"}, + >>> {"year": "now", "where": "there"} + >>> ], # Green has two bands + >>> } + >>> xds.rio.write_band_tags(band_tags, inplace=True) + """ + assert ( + list(band_tags.keys()) == self.vars + ), "You should give one band tag per Dataset variable." + + data_obj: xarray.Dataset = self._get_obj(inplace=inplace) # type: ignore + + data_obj.rio._band_tags = band_tags + for var in self.vars: + data_obj[var].rio.write_band_tags(band_tags=band_tags[var], inplace=True) + + return data_obj diff --git a/rioxarray/raster_writer.py b/rioxarray/raster_writer.py index bea306c9..26da5602 100644 --- a/rioxarray/raster_writer.py +++ b/rioxarray/raster_writer.py @@ -16,6 +16,7 @@ from xarray.conventions import encode_cf_variable from rioxarray._io import FILL_VALUE_NAMES, UNWANTED_RIO_ATTRS, _get_unsigned_dtype +from rioxarray._spatial_utils import UNWANTED_TAGS from rioxarray.exceptions import RioXarrayError try: @@ -39,19 +40,7 @@ def _write_tags(*, raster_handle, tags): Write tags to raster dataset """ # filter out attributes that should be written in a different location - skip_tags = ( - UNWANTED_RIO_ATTRS - + FILL_VALUE_NAMES - + ( - "crs", - "transform", - "scales", - "scale_factor", - "add_offset", - "offsets", - "grid_mapping", - ) - ) + skip_tags = UNWANTED_RIO_ATTRS + FILL_VALUE_NAMES + UNWANTED_TAGS # this is for when multiple values are used # in this case, it will be stored in the raster description if not isinstance(tags.get("long_name"), str): diff --git a/rioxarray/rioxarray.py b/rioxarray/rioxarray.py index 368d26a1..1df3919d 100644 --- a/rioxarray/rioxarray.py +++ b/rioxarray/rioxarray.py @@ -75,6 +75,7 @@ def __init__(self, xarray_obj: Union[xarray.DataArray, xarray.Dataset]): self._crs: Union[rasterio.crs.CRS, None, Literal[False]] = None self._gcps: Optional[list[GroundControlPoint]] = None self._rpcs: Optional[RPC] = None + self._band_tags: Optional[list[dict[str, Any]]] = None @property def crs(self) -> Optional[rasterio.crs.CRS]: @@ -119,6 +120,7 @@ def _get_obj(self, inplace: bool) -> Union[xarray.Dataset, xarray.DataArray]: obj_copy.rio._crs = self._crs obj_copy.rio._gcps = self._gcps obj_copy.rio._rpcs = self._rpcs + obj_copy.rio._band_tags = self._band_tags return obj_copy def set_crs( @@ -1132,3 +1134,19 @@ def get_rpcs(self) -> Optional[RPC]: self._rpcs = RPC(**json_rpcs) return self._rpcs + + def get_band_tags(self) -> list[dict[str, Any]] | None: + """ + Get the band tags + + Returns + ------- + :obj:`list[dict]` or None + The band tags as a list of tags dict, one per band + """ + if self._band_tags is not None: + band_tags = self._band_tags + else: + band_tags = self._obj.encoding.get("band_tags") # type: ignore + + return band_tags diff --git a/test/integration/test_integration__io.py b/test/integration/test_integration__io.py index 60fe9a44..d52ade96 100644 --- a/test/integration/test_integration__io.py +++ b/test/integration/test_integration__io.py @@ -1138,6 +1138,7 @@ def test_mask_and_scale(open_rasterio): assert numpy.nanmax(rds.air_temperature.values) == numpy.float32(302.1) test_encoding = dict(rds.air_temperature.encoding) _assert_tmmx_source(test_encoding.pop("source")) + assert len(test_encoding.pop("band_tags")) == 1 assert test_encoding == { "_Unsigned": "true", "add_offset": 220.0, @@ -1167,6 +1168,7 @@ def test_no_mask_and_scale(open_rasterio): test_encoding = dict(rds.air_temperature.encoding) source = test_encoding.pop("source") _assert_tmmx_source(source) + assert len(test_encoding.pop("band_tags")) == 1 assert test_encoding == { "_FillValue": 32767.0, "grid_mapping": "crs", @@ -1612,3 +1614,77 @@ def test_reading_writing_rpcs(tmp_path): assert ( dst.rpcs is not None ), "Existing RPCs in dst raster (through rpc attribute)" + + +@pytest.mark.parametrize("inplace", [True, False]) +def test_write_band_tags_da(tmp_path, inplace): + # Open 2D data array + raster_2d_file = os.path.join(TEST_INPUT_DATA_DIR, "cog.tif") + raster_2d = rioxarray.open_rasterio(raster_2d_file) + + # Write the tags + with pytest.raises(AssertionError): + raster_2d.rio.write_band_tags([{"first_tag": 1}, {"first_tag": 2}]) + + # Raster has one band + band_tag = [{"first_tag": 1}] + with_tags_2d = raster_2d.rio.write_band_tags(band_tag, inplace=inplace) + + # Replace raster to check in case of inplace + with_tags_2d = raster_2d if inplace else with_tags_2d + + # Check the tags + assert with_tags_2d.rio.get_band_tags() == band_tag, "Missing band tag" + + if not inplace: + with pytest.raises(TypeError): + raster_2d.rio.get_band_tags()["first_tag"] + + # Check and write the raster + out_path = tmp_path / "test.tif" + with_tags_2d.rio.to_raster(out_path) + + # Check what is obtained after writing and reloading it + out = rioxarray.open_rasterio(out_path) + assert out.rio.get_band_tags() == band_tag, "Missing band tag" + + +@pytest.mark.parametrize("inplace", [True, False]) +def test_write_band_tags_ds(tmp_path, inplace): + # Open 3D dataset + raster_3d_file = os.path.join(TEST_INPUT_DATA_DIR, "PLANET_SCOPE_3D.nc") + raster_3d = rioxarray.open_rasterio(raster_3d_file) + + # Create tags + band_tags = { + "blue": [ + {"year": "yesterday", "where": "here"}, + {"year": "now", "where": "here"}, + ], # Blue as two bands + "green": [ + {"year": "yesterday", "where": "there"}, + {"year": "now", "where": "there"}, + ], # Green as two bands + } + with_tags_3d = raster_3d.rio.write_band_tags(band_tags, inplace=inplace) + + # Replace raster to check in case of inplace + with_tags_3d = raster_3d if inplace else with_tags_3d + + # Check the bands + for var in with_tags_3d.rio.vars: + assert ( + with_tags_3d[var].rio.get_band_tags() == band_tags[var] + ), "Missing band tag" + + # Check that the tags doesn't exist + if not inplace: + with pytest.raises(KeyError): + raster_3d["green"].rio.get_band_tags()[0]["year"] + + # Write one 3D variable + out_path = tmp_path / "test_green.tif" + with_tags_3d["green"].rio.to_raster(out_path) + + out = rioxarray.open_rasterio(out_path) + assert out.rio.get_band_tags() == band_tags["green"], "Missing band tag"