From b77af1d231f2b45f8178bb0ece9adb511a7a614e Mon Sep 17 00:00:00 2001 From: Francesco Nattino Date: Thu, 25 Jul 2024 08:52:46 +0200 Subject: [PATCH 01/16] lazy interpolation using map_complete_blocks --- lib/iris/analysis/_interpolation.py | 134 +++++++++++++++++----------- 1 file changed, 81 insertions(+), 53 deletions(-) diff --git a/lib/iris/analysis/_interpolation.py b/lib/iris/analysis/_interpolation.py index 6904c5ae4f..392296f4fc 100644 --- a/lib/iris/analysis/_interpolation.py +++ b/lib/iris/analysis/_interpolation.py @@ -12,6 +12,8 @@ from numpy.lib.stride_tricks import as_strided import numpy.ma as ma +from iris._lazy_data import map_complete_blocks +from iris.analysis._scipy_interpolate import _RegularGridInterpolator from iris.coords import AuxCoord, DimCoord import iris.util @@ -163,6 +165,15 @@ def snapshot_grid(cube): return x.copy(), y.copy() +def _interpolated_dtype(dtype, method): + """Determine the minimum base dtype required by the underlying interpolator.""" + if method == "nearest": + result = dtype + else: + result = np.result_type(_DEFAULT_DTYPE, dtype) + return result + + class RectilinearInterpolator: """Provide support for performing nearest-neighbour or linear interpolation. @@ -200,13 +211,8 @@ def __init__(self, src_cube, coords, method, extrapolation_mode): set to NaN. """ - # Trigger any deferred loading of the source cube's data and snapshot - # its state to ensure that the interpolator is impervious to external - # changes to the original source cube. The data is loaded to prevent - # the snapshot having lazy data, avoiding the potential for the - # same data to be loaded again and again. - if src_cube.has_lazy_data(): - src_cube.data + # Snapshot the cube state to ensure that the interpolator is impervious + # to external changes to the original source cube. self._src_cube = src_cube.copy() # Coordinates defining the dimensions to be interpolated. self._src_coords = [self._src_cube.coord(coord) for coord in coords] @@ -277,17 +283,27 @@ def _account_for_inverted(self, data): data = data[tuple(dim_slices)] return data - def _interpolate(self, data, interp_points): + @staticmethod + def _interpolate( + data, + src_points, + interp_points, + interp_shape, + method="linear", + extrapolation_mode="nanmask", + ): """Interpolate a data array over N dimensions. - Create and cache the underlying interpolator instance before invoking - it to perform interpolation over the data at the given coordinate point - values. + Create the interpolator instance before invoking it to perform + interpolation over the data at the given coordinate point values. Parameters ---------- data : ndarray A data array, to be interpolated in its first 'N' dimensions. + src_points : + The point values defining the dimensions to be interpolated. + (len(src_points) should be N). interp_points : ndarray An array of interpolation coordinate values. Its shape is (..., N) where N is the number of interpolation @@ -296,44 +312,51 @@ def _interpolate(self, data, interp_points): coordinate, which is mapped to the i'th data dimension. The other (leading) dimensions index over the different required sample points. + interp_shape : + The shape of the interpolated array in its first 'N' dimensions + (len(interp_shape) should be N). + method: str + Interpolation method (see :class:`iris.analysis._interpolation.RectilinearInterpolator`) + extrapolation_mode: str + Extrapolation mode (see :class:`iris.analysis._interpolation.RectilinearInterpolator`) Returns ------- :class:`np.ndarray`. - Its shape is "points_shape + extra_shape", + Its shape is "interp_shape + extra_shape", where "extra_shape" is the remaining non-interpolated dimensions of - the data array (i.e. 'data.shape[N:]'), and "points_shape" is the - leading dimensions of interp_points, - (i.e. 'interp_points.shape[:-1]'). - + the data array (i.e. 'data.shape[N:]'). """ - from iris.analysis._scipy_interpolate import _RegularGridInterpolator - - dtype = self._interpolated_dtype(data.dtype) + dtype = _interpolated_dtype(data.dtype, method) if data.dtype != dtype: # Perform dtype promotion. data = data.astype(dtype) - mode = EXTRAPOLATION_MODES[self._mode] - if self._interpolator is None: - # Cache the interpolator instance. - # NB. The constructor of the _RegularGridInterpolator class does - # some unnecessary checks on the fill_value parameter, - # so we set it afterwards instead. Sneaky. ;-) - self._interpolator = _RegularGridInterpolator( - self._src_points, - data, - method=self.method, - bounds_error=mode.bounds_error, - fill_value=None, - ) - else: - self._interpolator.values = data + # Determine the shape of the interpolated result. + ndims_interp = len(interp_shape) + extra_shape = data.shape[ndims_interp:] + final_shape = [*interp_shape, *extra_shape] + + mode = EXTRAPOLATION_MODES[extrapolation_mode] + _data = np.ma.getdata(data) + # NB. The constructor of the _RegularGridInterpolator class does + # some unnecessary checks on the fill_value parameter, + # so we set it afterwards instead. Sneaky. ;-) + interpolator = _RegularGridInterpolator( + src_points, + _data, + method=method, + bounds_error=mode.bounds_error, + fill_value=None, + ) + interpolator.fill_value = mode.fill_value + result = interpolator(interp_points) - # We may be re-using a cached interpolator, so ensure the fill - # value is set appropriately for extrapolating data values. - self._interpolator.fill_value = mode.fill_value - result = self._interpolator(interp_points) + # The interpolated result has now shape "points_shape + extra_shape" + # where "points_shape" is the leading dimension of "interp_points" + # (i.e. 'interp_points.shape[:-1]'). We reshape it to match the shape + # of the interpolated dimensions. + result = result.reshape(final_shape) if result.dtype != data.dtype: # Cast the data dtype to be as expected. Note that, the dtype @@ -346,13 +369,11 @@ def _interpolate(self, data, interp_points): # `data` is not a masked array. src_mask = np.ma.getmaskarray(data) # Switch the extrapolation to work with mask values. - self._interpolator.fill_value = mode.mask_fill_value - self._interpolator.values = src_mask - mask_fraction = self._interpolator(interp_points) + interpolator.fill_value = mode.mask_fill_value + interpolator.values = src_mask + mask_fraction = interpolator(interp_points) new_mask = mask_fraction > 0 - if ma.isMaskedArray(data) or np.any(new_mask): - result = np.ma.MaskedArray(result, new_mask) - + result = np.ma.MaskedArray(result, new_mask) return result def _resample_coord(self, sample_points, coord, coord_dims): @@ -530,7 +551,7 @@ def _points(self, sample_points, data, data_dims=None): _, src_order = zip(*sorted(dmap.items(), key=operator.itemgetter(0))) # Prepare the sample points for interpolation and calculate the - # shape of the interpolated result. + # shape of the interpolated dimensions. interp_points = [] interp_shape = [] for index, points in enumerate(sample_points): @@ -539,10 +560,6 @@ def _points(self, sample_points, data, data_dims=None): interp_points.append(points) interp_shape.append(points.size) - interp_shape.extend( - length for dim, length in enumerate(data.shape) if dim not in di - ) - # Convert the interpolation points into a cross-product array # with shape (n_cross_points, n_dims) interp_points = np.asarray([pts for pts in product(*interp_points)]) @@ -554,9 +571,20 @@ def _points(self, sample_points, data, data_dims=None): # Transpose data in preparation for interpolation. data = np.transpose(data, interp_order) - # Interpolate and reshape the data ... - result = self._interpolate(data, interp_points) - result = result.reshape(interp_shape) + # Interpolate the data, meging the chunks in the interpolated + # dimensions. + dims_merge_chunks = [dmap[d] for d in di] + result = map_complete_blocks( + data, + self._interpolate, + dims=dims_merge_chunks, + out_sizes=interp_shape, + src_points=self._src_points, + interp_points=interp_points, + interp_shape=interp_shape, + method=self._method, + extrapolation_mode=self._mode, + ) if src_order != dims: # Restore the interpolated result to the original @@ -592,7 +620,7 @@ def __call__(self, sample_points, collapse_scalar=True): sample_points = _canonical_sample_points(self._src_coords, sample_points) - data = self._src_cube.data + data = self._src_cube.core_data() # Interpolate the cube payload. interpolated_data = self._points(sample_points, data) From 0021cb0b28c52a516c66664d75c0e87bfd698dba Mon Sep 17 00:00:00 2001 From: Francesco Nattino Date: Thu, 25 Jul 2024 11:42:33 +0200 Subject: [PATCH 02/16] pre-commit fixes --- lib/iris/analysis/_interpolation.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/lib/iris/analysis/_interpolation.py b/lib/iris/analysis/_interpolation.py index 392296f4fc..f3e1870427 100644 --- a/lib/iris/analysis/_interpolation.py +++ b/lib/iris/analysis/_interpolation.py @@ -315,10 +315,10 @@ def _interpolate( interp_shape : The shape of the interpolated array in its first 'N' dimensions (len(interp_shape) should be N). - method: str - Interpolation method (see :class:`iris.analysis._interpolation.RectilinearInterpolator`) - extrapolation_mode: str - Extrapolation mode (see :class:`iris.analysis._interpolation.RectilinearInterpolator`) + method : str + Interpolation method (see :class:`iris.analysis._interpolation.RectilinearInterpolator`). + extrapolation_mode : str + Extrapolation mode (see :class:`iris.analysis._interpolation.RectilinearInterpolator`). Returns ------- @@ -571,7 +571,7 @@ def _points(self, sample_points, data, data_dims=None): # Transpose data in preparation for interpolation. data = np.transpose(data, interp_order) - # Interpolate the data, meging the chunks in the interpolated + # Interpolate the data, merging the chunks in the interpolated # dimensions. dims_merge_chunks = [dmap[d] for d in di] result = map_complete_blocks( From 18908ae17fe45b5a93e2639c695f40b7e7bebeaa Mon Sep 17 00:00:00 2001 From: Francesco Nattino Date: Tue, 27 Aug 2024 22:30:36 +0200 Subject: [PATCH 03/16] replace test on interpolation with lazy data --- .../test_RectilinearInterpolator.py | 27 ++++++++++++++----- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/lib/iris/tests/unit/analysis/interpolation/test_RectilinearInterpolator.py b/lib/iris/tests/unit/analysis/interpolation/test_RectilinearInterpolator.py index ed6e230840..1513738b7d 100644 --- a/lib/iris/tests/unit/analysis/interpolation/test_RectilinearInterpolator.py +++ b/lib/iris/tests/unit/analysis/interpolation/test_RectilinearInterpolator.py @@ -499,24 +499,37 @@ def test_orthogonal_cube_squash(self): self.assertEqual(result_cube, non_collapsed_cube[0, ...]) +class Test___call___real_data(ThreeDimCube): + def test_src_cube_data_loaded(self): + # If the source cube has real data when the interpolator is + # instantiated, then the interpolated result should also have + # real data. + self.assertFalse(self.cube.has_lazy_data()) + + # Perform interpolation and check the data is real. + interpolator = RectilinearInterpolator( + self.cube, ["latitude"], LINEAR, EXTRAPOLATE + ) + res = interpolator([[1.5]]) + self.assertFalse(res.has_lazy_data()) + + class Test___call___lazy_data(ThreeDimCube): def test_src_cube_data_loaded(self): - # RectilinearInterpolator operates using a snapshot of the source cube. # If the source cube has lazy data when the interpolator is - # instantiated we want to make sure the source cube's data is - # loaded as a consequence of interpolation to avoid the risk - # of loading it again and again. + # instantiated, then the interpolated result should also have + # lazy data. # Modify self.cube to have lazy data. self.cube.data = as_lazy_data(self.data) self.assertTrue(self.cube.has_lazy_data()) - # Perform interpolation and check the data has been loaded. + # Perform interpolation and check the data is lazy.. interpolator = RectilinearInterpolator( self.cube, ["latitude"], LINEAR, EXTRAPOLATE ) - interpolator([[1.5]]) - self.assertFalse(self.cube.has_lazy_data()) + res = interpolator([[1.5]]) + self.assertTrue(res.has_lazy_data()) class Test___call___time(tests.IrisTest): From 555f3c72a45bd99d27ae655b41627aac3fae8a1c Mon Sep 17 00:00:00 2001 From: Francesco Nattino <49899980+fnattino@users.noreply.github.com> Date: Fri, 13 Sep 2024 11:47:16 +0200 Subject: [PATCH 04/16] Update lib/iris/analysis/_interpolation.py Co-authored-by: Martin Yeo <40734014+trexfeathers@users.noreply.github.com> --- lib/iris/analysis/_interpolation.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/iris/analysis/_interpolation.py b/lib/iris/analysis/_interpolation.py index f3e1870427..cd0c1f9735 100644 --- a/lib/iris/analysis/_interpolation.py +++ b/lib/iris/analysis/_interpolation.py @@ -571,9 +571,9 @@ def _points(self, sample_points, data, data_dims=None): # Transpose data in preparation for interpolation. data = np.transpose(data, interp_order) - # Interpolate the data, merging the chunks in the interpolated - # dimensions. - dims_merge_chunks = [dmap[d] for d in di] + # Interpolate the data, ensuring the interpolated dimensions + # are not chunked. + dims_not_chunked = [dmap[d] for d in di] result = map_complete_blocks( data, self._interpolate, From 7a081085308e8ef816664b8e5cdffe5cc05210a5 Mon Sep 17 00:00:00 2001 From: Francesco Nattino <49899980+fnattino@users.noreply.github.com> Date: Fri, 13 Sep 2024 11:47:16 +0200 Subject: [PATCH 05/16] Update lib/iris/analysis/_interpolation.py Co-authored-by: Martin Yeo <40734014+trexfeathers@users.noreply.github.com> --- lib/iris/analysis/_interpolation.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/iris/analysis/_interpolation.py b/lib/iris/analysis/_interpolation.py index f3e1870427..7e255bd87e 100644 --- a/lib/iris/analysis/_interpolation.py +++ b/lib/iris/analysis/_interpolation.py @@ -571,13 +571,13 @@ def _points(self, sample_points, data, data_dims=None): # Transpose data in preparation for interpolation. data = np.transpose(data, interp_order) - # Interpolate the data, merging the chunks in the interpolated - # dimensions. - dims_merge_chunks = [dmap[d] for d in di] + # Interpolate the data, ensuring the interpolated dimensions + # are not chunked. + dims_not_chunked = [dmap[d] for d in di] result = map_complete_blocks( data, self._interpolate, - dims=dims_merge_chunks, + dims=dims_not_chunked, out_sizes=interp_shape, src_points=self._src_points, interp_points=interp_points, From 3814383d5a71b9cc12c2b6ef1b8662937c58372d Mon Sep 17 00:00:00 2001 From: Francesco Nattino Date: Tue, 17 Sep 2024 11:05:39 +0200 Subject: [PATCH 06/16] resume local import --- lib/iris/analysis/_interpolation.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/iris/analysis/_interpolation.py b/lib/iris/analysis/_interpolation.py index 7e255bd87e..e2419b6c22 100644 --- a/lib/iris/analysis/_interpolation.py +++ b/lib/iris/analysis/_interpolation.py @@ -13,7 +13,6 @@ import numpy.ma as ma from iris._lazy_data import map_complete_blocks -from iris.analysis._scipy_interpolate import _RegularGridInterpolator from iris.coords import AuxCoord, DimCoord import iris.util @@ -327,6 +326,8 @@ def _interpolate( where "extra_shape" is the remaining non-interpolated dimensions of the data array (i.e. 'data.shape[N:]'). """ + from iris.analysis._scipy_interpolate import _RegularGridInterpolator + dtype = _interpolated_dtype(data.dtype, method) if data.dtype != dtype: # Perform dtype promotion. From 0c5dc9a9405eb0725975378b0b6f51ecf7407e1d Mon Sep 17 00:00:00 2001 From: Francesco Nattino Date: Fri, 20 Sep 2024 14:47:19 +0200 Subject: [PATCH 07/16] add entry to latest.rst --- docs/src/whatsnew/latest.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/docs/src/whatsnew/latest.rst b/docs/src/whatsnew/latest.rst index bc821511be..d7415849ae 100644 --- a/docs/src/whatsnew/latest.rst +++ b/docs/src/whatsnew/latest.rst @@ -71,6 +71,10 @@ This document explains the changes made to Iris for this release the concatenation axis. This issue can be avoided by disabling the problematic check. (:pull:`5926`) +#. `@fnattino`_ enabled lazy cube interpolation using the linear and + nearest-neighbour interpolators (:class:`iris.analysis.Linear` and + :class:`iris.analysis.Nearest`). (:pull:`6084`) + 🔥 Deprecations =============== From 3481e46daa8de5b63cc0845299fc36fc31f2c374 Mon Sep 17 00:00:00 2001 From: Francesco Nattino Date: Fri, 20 Sep 2024 14:59:01 +0200 Subject: [PATCH 08/16] add author name to list --- docs/src/whatsnew/latest.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/src/whatsnew/latest.rst b/docs/src/whatsnew/latest.rst index d7415849ae..5d512be6c2 100644 --- a/docs/src/whatsnew/latest.rst +++ b/docs/src/whatsnew/latest.rst @@ -105,6 +105,7 @@ This document explains the changes made to Iris for this release Whatsnew author names (@github name) in alphabetical order. Note that, core dev names are automatically included by the common_links.inc: +.. _@fnattino: https://github.com/fnattino .. _@jrackham-mo: https://github.com/jrackham-mo From 63714f034f24948c0a3b65e2871ce77b4ae16da7 Mon Sep 17 00:00:00 2001 From: Francesco Nattino Date: Thu, 28 Nov 2024 11:01:01 +0100 Subject: [PATCH 09/16] drop duplicated method --- lib/iris/analysis/_interpolation.py | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/lib/iris/analysis/_interpolation.py b/lib/iris/analysis/_interpolation.py index e2419b6c22..e346a3c45f 100644 --- a/lib/iris/analysis/_interpolation.py +++ b/lib/iris/analysis/_interpolation.py @@ -480,14 +480,6 @@ def _validate(self): msg = "Cannot interpolate over the non-monotonic coordinate {}." raise ValueError(msg.format(coord.name())) - def _interpolated_dtype(self, dtype): - """Determine the minimum base dtype required by the underlying interpolator.""" - if self._method == "nearest": - result = dtype - else: - result = np.result_type(_DEFAULT_DTYPE, dtype) - return result - def _points(self, sample_points, data, data_dims=None): """Interpolate at the specified points. @@ -556,7 +548,7 @@ def _points(self, sample_points, data, data_dims=None): interp_points = [] interp_shape = [] for index, points in enumerate(sample_points): - dtype = self._interpolated_dtype(self._src_points[index].dtype) + dtype = _interpolated_dtype(self._src_points[index].dtype, self._method) points = np.array(points, dtype=dtype, ndmin=1) interp_points.append(points) interp_shape.append(points.size) From 98143f2937d59ae6e8109c9603fc8806719beeae Mon Sep 17 00:00:00 2001 From: Francesco Nattino Date: Thu, 28 Nov 2024 12:00:02 +0100 Subject: [PATCH 10/16] new signature of map_complete_blocks --- lib/iris/analysis/_interpolation.py | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/iris/analysis/_interpolation.py b/lib/iris/analysis/_interpolation.py index e346a3c45f..55a97808d1 100644 --- a/lib/iris/analysis/_interpolation.py +++ b/lib/iris/analysis/_interpolation.py @@ -572,6 +572,7 @@ def _points(self, sample_points, data, data_dims=None): self._interpolate, dims=dims_not_chunked, out_sizes=interp_shape, + dtype=_interpolated_dtype(data.dtype, self._method), src_points=self._src_points, interp_points=interp_points, interp_shape=interp_shape, From 948c75e094681863af2658d58c977c9107aa5f94 Mon Sep 17 00:00:00 2001 From: Francesco Nattino Date: Thu, 28 Nov 2024 17:17:44 +0100 Subject: [PATCH 11/16] update docstrings on lazy data --- lib/iris/analysis/_interpolation.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/lib/iris/analysis/_interpolation.py b/lib/iris/analysis/_interpolation.py index 55a97808d1..67b10727ec 100644 --- a/lib/iris/analysis/_interpolation.py +++ b/lib/iris/analysis/_interpolation.py @@ -504,9 +504,8 @@ def _points(self, sample_points, data, data_dims=None): Returns ------- - :class:`~numpy.ndarray` or :class:`~numpy.ma.MaskedArray` - An :class:`~numpy.ndarray` or :class:`~numpy.ma.MaskedArray` - instance of the interpolated data. + ndarray + The interpolated data array. """ dims = list(range(self._src_cube.ndim)) @@ -590,6 +589,9 @@ def _points(self, sample_points, data, data_dims=None): def __call__(self, sample_points, collapse_scalar=True): """Construct a cube from the specified orthogonal interpolation points. + If the source cube has lazy data, the returned cube will also + have lazy data. + Parameters ---------- sample_points : @@ -607,6 +609,14 @@ def __call__(self, sample_points, collapse_scalar=True): of the cube will be the number of original cube dimensions minus the number of scalar coordinates, if collapse_scalar is True. + Notes + ----- + .. note:: + + If the source cube has lazy data, + `chunks `__ + in the interpolated dimensions will be combined before regridding. + """ if len(sample_points) != len(self._src_coords): msg = "Expected sample points for {} coordinates, got {}." From d190a8b25d6715cefdb59c3f878ab520223b1e78 Mon Sep 17 00:00:00 2001 From: Francesco Nattino Date: Fri, 6 Dec 2024 11:18:34 +0100 Subject: [PATCH 12/16] update userguide with lazy interpolator --- .../interpolation_and_regridding.rst | 26 +++++++++---------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/docs/src/userguide/interpolation_and_regridding.rst b/docs/src/userguide/interpolation_and_regridding.rst index 571c43bf0e..7678bfe8cd 100644 --- a/docs/src/userguide/interpolation_and_regridding.rst +++ b/docs/src/userguide/interpolation_and_regridding.rst @@ -29,9 +29,9 @@ The following are the regridding schemes that are currently available in Iris: * point in cell regridding (:class:`iris.analysis.PointInCell`) and * area-weighted regridding (:class:`iris.analysis.AreaWeighted`, first-order conservative). -The linear, nearest-neighbor, and area-weighted regridding schemes support -lazy regridding, i.e. if the source cube has lazy data, the resulting cube -will also have lazy data. +The linear and nearest-neighbour interpolation schemes, and the linear, nearest-neighbour, +and area-weighted regridding schemes support lazy regridding, i.e. if the source cube has lazy data, +the resulting cube will also have lazy data. See :doc:`real_and_lazy_data` for an introduction to lazy data. See :doc:`../further_topics/which_regridder_to_use` for a more in depth overview of the different regridders. @@ -417,12 +417,12 @@ In each case ``result`` will be the input cube regridded to the grid defined by the target grid cube (in this case ``rotated_psl``) that we used to define the cached regridder. -Regridding Lazy Data -^^^^^^^^^^^^^^^^^^^^ +Interpolating and Regridding Lazy Data +-------------------------------------- -If you are working with large cubes, especially when you are regridding to a -high resolution target grid, you may run out of memory when trying to -regrid a cube. When this happens, make sure the input cube has lazy data +If you are working with large cubes, you may run out of memory when trying to +interpolate or regrid a cube. For instance, this might happen when regridding to a +high resolution target grid. When this happens, make sure the input cube has lazy data >>> air_temp = iris.load_cube(iris.sample_data_path('A1B_north_america.nc')) >>> air_temp @@ -430,9 +430,9 @@ regrid a cube. When this happens, make sure the input cube has lazy data >>> air_temp.has_lazy_data() True -and the regridding scheme supports lazy data. All regridding schemes described -here support lazy data. If you still run out of memory even while using lazy -data, inspect the +and the interpolation or regridding scheme supports lazy data. All interpolation and +regridding schemes described here with exception of the the point-in-cell regridder +support lazy data. If you still run out of memory even while using lazy data, inspect the `chunks `__ : @@ -455,6 +455,6 @@ dimension, to regrid it in 8 chunks of 30 timesteps at a time: Assuming that Dask is configured such that it processes only a few chunks of the data array at a time, this will further reduce memory use. -Note that chunking in the horizontal dimensions is not supported by the -regridding schemes. Chunks in these dimensions will automatically be combined +Note that chunking in the horizontal dimensions is not supported by the interpolation +and regridding schemes. Chunks in these dimensions will automatically be combined before regridding. From 47bce0ec36942af62bcbd4a1634e8f37fb2fdc5c Mon Sep 17 00:00:00 2001 From: Francesco Nattino Date: Sun, 22 Dec 2024 14:30:18 +0100 Subject: [PATCH 13/16] the unstructured NN regridder does not support lazy data --- docs/src/userguide/interpolation_and_regridding.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/src/userguide/interpolation_and_regridding.rst b/docs/src/userguide/interpolation_and_regridding.rst index 7678bfe8cd..76c68e0477 100644 --- a/docs/src/userguide/interpolation_and_regridding.rst +++ b/docs/src/userguide/interpolation_and_regridding.rst @@ -431,10 +431,10 @@ high resolution target grid. When this happens, make sure the input cube has laz True and the interpolation or regridding scheme supports lazy data. All interpolation and -regridding schemes described here with exception of the the point-in-cell regridder -support lazy data. If you still run out of memory even while using lazy data, inspect the -`chunks `__ -: +regridding schemes described here with exception of :class:`iris.analysis.PointInCell` +(point-in-cell regridder) and :class:`iris.analysis.UnstructuredNearest` (nearest-neighbour +regridder) support lazy data. If you still run out of memory even while using lazy data, +inspect the `chunks `__ : >>> air_temp.lazy_data().chunks ((240,), (37,), (49,)) From 609c75aa49fdfdb57c8c71d77184467125b7d733 Mon Sep 17 00:00:00 2001 From: Francesco Nattino Date: Sun, 22 Dec 2024 14:36:33 +0100 Subject: [PATCH 14/16] remove caching an interpolator --- .../interpolation_and_regridding.rst | 40 ------------------- 1 file changed, 40 deletions(-) diff --git a/docs/src/userguide/interpolation_and_regridding.rst b/docs/src/userguide/interpolation_and_regridding.rst index 76c68e0477..4a95276ab2 100644 --- a/docs/src/userguide/interpolation_and_regridding.rst +++ b/docs/src/userguide/interpolation_and_regridding.rst @@ -194,46 +194,6 @@ For example, to mask values that lie beyond the range of the original data: [-- 494.44451904296875 588.888916015625 683.333251953125 777.77783203125 872.2222290039062 966.666748046875 1061.111083984375 1155.555419921875 --] - -.. _caching_an_interpolator: - -Caching an Interpolator -^^^^^^^^^^^^^^^^^^^^^^^ - -If you need to interpolate a cube on multiple sets of sample points you can -'cache' an interpolator to be used for each of these interpolations. This can -shorten the execution time of your code as the most computationally -intensive part of an interpolation is setting up the interpolator. - -To cache an interpolator you must set up an interpolator scheme and call the -scheme's interpolator method. The interpolator method takes as arguments: - -#. a cube to be interpolated, and -#. an iterable of coordinate names or coordinate instances of the coordinates that are to be interpolated over. - -For example: - - >>> air_temp = iris.load_cube(iris.sample_data_path('air_temp.pp')) - >>> interpolator = iris.analysis.Nearest().interpolator(air_temp, ['latitude', 'longitude']) - -When this cached interpolator is called you must pass it an iterable of sample points -that have the same form as the iterable of coordinates passed to the constructor. -So, to use the cached interpolator defined above: - - >>> latitudes = np.linspace(48, 60, 13) - >>> longitudes = np.linspace(-11, 2, 14) - >>> for lat, lon in zip(latitudes, longitudes): - ... result = interpolator([lat, lon]) - -In each case ``result`` will be a cube interpolated from the ``air_temp`` cube we -passed to interpolator. - -Note that you must specify the required extrapolation mode when setting up the cached interpolator. -For example:: - - >>> interpolator = iris.analysis.Nearest(extrapolation_mode='nan').interpolator(cube, coords) - - .. _regridding: Regridding From dd14caaef1fdfaf94d3d52ccd740ca726a9a9933 Mon Sep 17 00:00:00 2001 From: Francesco Nattino Date: Sun, 22 Dec 2024 14:59:55 +0100 Subject: [PATCH 15/16] update what's new entry --- docs/src/whatsnew/latest.rst | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/docs/src/whatsnew/latest.rst b/docs/src/whatsnew/latest.rst index a61553a2ab..abf7edd822 100644 --- a/docs/src/whatsnew/latest.rst +++ b/docs/src/whatsnew/latest.rst @@ -63,7 +63,10 @@ This document explains the changes made to Iris for this release #. `@fnattino`_ enabled lazy cube interpolation using the linear and nearest-neighbour interpolators (:class:`iris.analysis.Linear` and - :class:`iris.analysis.Nearest`). (:pull:`6084`) + :class:`iris.analysis.Nearest`). Note that this implementation removes + performance benefits linked to caching an interpolator object. While this does + not break previously suggested code (instantiating and re-using an interpolator + object remains possible), this is no longer an advertised feature. (:pull:`6084`) 🔥 Deprecations From 2fb8fc37ed45ab4aea6b408aae8546b49bd4503b Mon Sep 17 00:00:00 2001 From: Francesco Nattino Date: Sun, 22 Dec 2024 15:10:36 +0100 Subject: [PATCH 16/16] remove links to docs section about caching interpolators --- lib/iris/analysis/__init__.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/lib/iris/analysis/__init__.py b/lib/iris/analysis/__init__.py index 2c890ef8cc..0b8443273d 100644 --- a/lib/iris/analysis/__init__.py +++ b/lib/iris/analysis/__init__.py @@ -2673,9 +2673,7 @@ def interpolator(self, cube, coords): the given coordinates. Typically you should use :meth:`iris.cube.Cube.interpolate` for - interpolating a cube. There are, however, some situations when - constructing your own interpolator is preferable. These are detailed - in the :ref:`user guide `. + interpolating a cube. Parameters ---------- @@ -2876,9 +2874,7 @@ def interpolator(self, cube, coords): by the dimensions of the specified coordinates. Typically you should use :meth:`iris.cube.Cube.interpolate` for - interpolating a cube. There are, however, some situations when - constructing your own interpolator is preferable. These are detailed - in the :ref:`user guide `. + interpolating a cube. Parameters ----------