Source code for dep_tools.processors

"""Definition of base class and implementations of :class:`Processor` objects.

:class:`Processor` objects process input data to produce output data. As such,
they are the most likely to be written for custom processing.
"""

from abc import ABC, abstractmethod

from xarray import DataArray, Dataset

from .landsat_utils import mask_clouds as mask_clouds_landsat
from .s2_utils import mask_clouds as mask_clouds_s2
from .utils import scale_and_offset, scale_to_int16


[docs] class Processor(ABC): """A Processor converts input data to output data. Args: send_area_to_processor: Whether to send the input area (typically used by a loader to load appropriate data) to the processor. """ def __init__(self, send_area_to_processor: bool = False): self.send_area_to_processor = send_area_to_processor
[docs] @abstractmethod def process(self, input_data): """Process the data. Args: input_data (Any): Any data. """ pass
[docs] class LandsatProcessor(Processor): def __init__( self, send_area_to_processor: bool = False, scale_and_offset: bool = True, mask_clouds: bool = True, mask_clouds_kwargs: dict = dict(), ) -> None: """A :class:`Processor` for use with Landsat data. Typically this Processor will be subclassed when working with Landsat data Args: scale_and_offset: Whether to scale and offset the input data. Landsat data is typically stored in 16-bit integers, this applies the standard scale and offset values to each band for surface reflectance data and (as a side effect) converts the data type to floating point. mask_clouds: Whether to mask_clouds, using :func:`dep_tools.landsat_utils.mask_clouds_landsat`. mask_clouds_kwargs: Additional arguments to :func:`dep_tools.landsat_utils.mask_clouds_landsat`. """ super().__init__(send_area_to_processor) self.scale_and_offset = scale_and_offset self.mask_clouds = mask_clouds self.mask_kwargs = mask_clouds_kwargs
[docs] def process(self, xr: DataArray | Dataset) -> DataArray | Dataset: """Process the data. Args: xr: Any input data, but to benefit from the functionality of this class, input should be Landsat surface reflectance data, typically with the `"QA_PIXEL"` band as well. Returns: The input data, optionally with clouds masked and/or scale and offset applied. """ if self.mask_clouds: xr = mask_clouds_landsat(xr, **self.mask_kwargs) if self.scale_and_offset: # These values only work for SR bands of landsat. Ideally we could # read from metadata. _Really_ ideally we could just pass "scale" # to rioxarray/stack/odc.stac.load but apparently that doesn't work. scale = 0.0000275 offset = -0.2 xr = scale_and_offset(xr, scale=[scale], offset=offset) return xr
[docs] class S2Processor(Processor): def __init__( self, send_area_to_processor: bool = False, scale_and_offset: bool = False, mask_clouds: bool = True, mask_clouds_kwargs: dict = dict(), ) -> None: """A :class:`Processor` for use with Sentinel-2 data. Typically this Processor will be subclassed when working with Sentinel-2 data. Args: scale_and_offset: Whether to scale and offset the input data. Landsat data is typically stored in 16-bit integers, this applies the standard scale and offset values to each band for surface reflectance data and (as a side effect) converts the data type to floating point. mask_clouds: Whether to mask_clouds, using :func:`dep_tools.s2_utils.mask_clouds`. mask_clouds_kwargs: Additional arguments to :func:`dep_tools.s2_utils.mask_clouds`. """ super().__init__(send_area_to_processor) self.scale_and_offset = scale_and_offset self.mask_clouds = mask_clouds self.mask_clouds_kwargs = mask_clouds_kwargs
[docs] def process(self, xr: DataArray) -> DataArray: """Process the data. Args: xr: Any input data, but to benefit from the functionality of this class, input should be Sentinel-2 data, typically including the `"SCL"` band. Returns: The input data, optionally with clouds masked and/or scale and offset applied. """ if self.mask_clouds: xr = mask_clouds_s2(xr, **self.mask_clouds_kwargs) if self.scale_and_offset and not self.harmonize_to_old: print( "Warning: scale and offset is dangerous when used without harmonize_to_old" ) if self.scale_and_offset: scale = 1 / 10000 offset = 0 xr = scale_and_offset(xr, scale=[scale], offset=offset) return xr
[docs] class XrPostProcessor(Processor): def __init__( self, convert_to_int16: bool = True, output_value_multiplier: int = 10000, scale_int16s: bool = False, output_nodata: int = -32767, extra_attrs: dict = {}, ): """A Processor with typical things to do to output data. Some :class:`Task` objects allow for use of a Processor to prep data for writing after the actual processing. This is mostly a wrapper around :func:`scale_to_int16`. Args: convert_to_int16: Whether to convert output data to 16-bit (signed) integer. output_value_multiplier: A multiplier to apply to the input data. scale_int16s: Whether data which is already 16-bit signed integer should be scaled using `output_value_multiplier`. output_nodata: The `nodata` value to be declared in the output. extra_attrs: Extra attributes to add to the output data. """ self._convert_to_int16 = convert_to_int16 self._output_value_multiplier = output_value_multiplier self._scale_int16s = scale_int16s self._output_nodata = output_nodata self._extra_attrs = extra_attrs
[docs] def process(self, xr: DataArray | Dataset): """Process the data. Args: xr: Any input data. Returns: The input data, with scaling, type-conversion and other adjustments applied. """ xr.attrs.update(self._extra_attrs) if self._convert_to_int16: xr = scale_to_int16( xr, output_multiplier=self._output_value_multiplier, output_nodata=self._output_nodata, scale_int16s=self._scale_int16s, ) return xr