Source code for dep_tools.searchers

"""This module contains the definition and implementation of :class:`Searcher` objects."""

import warnings
from abc import ABC, abstractmethod

from geopandas import GeoDataFrame
from odc.geo.geobox import GeoBox
from pystac import ItemCollection
from pystac_client import Client

from dep_tools.exceptions import EmptyCollectionError
from dep_tools.landsat_utils import items_in_pathrows, pathrows_in_area
from dep_tools.utils import fix_bad_epsgs, remove_bad_items, search_across_180


[docs] class Searcher(ABC): """An object which searches for something, based on an area.""" def __init__(self): pass
[docs] @abstractmethod def search(self, area): pass
[docs] class PystacSearcher(Searcher): def __init__( self, catalog: str | None = None, client: Client | None = None, raise_empty_collection_error: bool = True, **kwargs, ): """A Searcher which searches for stac items using pystac_client.Client.search. Fixes include correctly searching across the antimeridian by splitting the bounding box of the target area on either side and removal of known "bad" stac items (using dep_tools.utils.remove_bad_items). This is written to be used with the :class:`Task` framework. If you just want to search for stac items and handle the antimeridian correctly, use :func:`dep_tools.utils.search_across_180`. Args: catalog: The URL of a stac catalog. if client is specified, this is ignored. client: A search client. Either this or catalog must be specified. raise_empty_collection_error: Whether an EmptyCollectionError exception should be returned if no stac items are found. **kwargs: Additional arguments passed to client.search(). For example, passing `collections=["sentinel-2-l2a"]` will restrict results to Sentinel 2 stac items. """ if client and catalog: warnings.warn( "Arguments for both 'client' and 'catalog' passed to PystacSearcher, ignoring catalog" ) if not (client or catalog): raise ValueError("Must specify either client or catalog") self._client = client if client else Client.open(catalog) self._raise_errors = raise_empty_collection_error self._kwargs = kwargs
[docs] def search(self, area: GeoDataFrame | GeoBox) -> ItemCollection: """Search for stac items within the bounds of the corresponding area. Args: area: An area in any projection as defined by the crs. Returns: An ItemCollection. """ item_collection = search_across_180( region=area, client=self._client, **self._kwargs ) if self._client.id == "microsoft-pc": fix_bad_epsgs(item_collection) item_collection = remove_bad_items(item_collection) if len(item_collection) == 0 and self._raise_errors: raise EmptyCollectionError() return item_collection
[docs] class LandsatPystacSearcher(PystacSearcher): def __init__( self, catalog: str | None = None, client: Client | None = None, collections: list[str] | None = ["landsat-c2-l2"], raise_empty_collection_error: bool = True, search_intersecting_pathrows: bool = False, exclude_platforms: list[str] | None = None, only_tier_one: bool = False, fall_back_to_tier_two: bool = False, **kwargs, ): """A PystacSearcher with special functionality for landsat data on the MSPC. Currently it overwrites any `query` kwarg, so if you want a direct query, just use :class:PystacSearcher. Args: catalog: The URL of a stac catalog. if client is specified, this is ignored. client: A search client. Either this or catalog must be specified. raise_empty_collection_error: Whether an EmptyCollectionError exception should be returned if no stac items are found. search_intersecting_pathrows: Whether to use landsat pathrows which intersect the area passed to :func:search rather than the area itself. This is a workaround for bad geometry in some stac items which cross the antimeridian. exclude_platforms: A list of platforms (e.g. ["landsat-7"]) to exclude from searching. only_tier_one: Whether to only search for tier one landsat data. fall_back_to_tier_two: If `only_tier_one` is set to True and no items are returned from the search, search again with tier two data included. **kwargs: Additional arguments passed to client.search(). `collections` and `query` arguments will be overwritten. """ super().__init__( catalog=catalog, client=client, raise_empty_collection_error=raise_empty_collection_error, **kwargs, ) self._kwargs["collections"] = collections self._search_intersecting_pathrows = search_intersecting_pathrows self._exclude_platforms = exclude_platforms self._only_tier_one = only_tier_one self._fall_back_to_tier_two = fall_back_to_tier_two # For now, just warn that we're overwriting the query. In future, # we might look to combine. if "query" in self._kwargs.keys(): warnings.warn( "Portions of `query` argument may be replaced. To specify the full query directly, use `PystacSearcher`." ) query = kwargs.pop("query") else: query = {} if self._exclude_platforms is not None: landsat_platforms = ["landsat-5", "landsat-7", "landsat-8", "landsat-9"] query["platform"] = { "in": [p for p in landsat_platforms if p not in self._exclude_platforms] } if self._only_tier_one: query["landsat:collection_category"] = {"eq": "T1"} self._kwargs["query"] = query
[docs] def search(self, area: GeoDataFrame): """Perform the search. Args: area: Any area. Returns: An ItemCollection. Raises: EmptyCollectionError: If the search finds no items. """ search_area = ( pathrows_in_area(area) if self._search_intersecting_pathrows else area ) try: items = super().search(search_area) except EmptyCollectionError: # If we're only looking for tier one items, try falling back to both T1 and T2 if self._only_tier_one and self._fall_back_to_tier_two: self._only_tier_one = False items = self.search(search_area) else: raise EmptyCollectionError() if self._search_intersecting_pathrows: items = items_in_pathrows(items, search_area) if len(items) == 0 and self._raise_errors: raise EmptyCollectionError() return items