Source code for pystac.cache

from __future__ import annotations

from collections import ChainMap
from copy import copy
from typing import TYPE_CHECKING, Any, cast

import pystac

if TYPE_CHECKING:
    from pystac.collection import Collection
    from pystac.stac_object import STACObject


[docs] def get_cache_key(stac_object: STACObject) -> tuple[str, bool]: """Produce a cache key for the given STAC object. If a self href is set, use that as the cache key. If not, use a key that combines this object's ID with it's parents' IDs. Returns: Tuple[str, bool]: A tuple with the cache key as the first element and a boolean that is true if the cache key is the object's HREF as the second element. """ href = stac_object.get_self_href() if href is not None: return href, True else: ids: list[str] = [] obj: pystac.STACObject | None = stac_object while obj is not None: ids.append(obj.id) obj = obj.get_parent() return "/".join(ids), False
[docs] class ResolvedObjectCache: """This class tracks resolved objects tied to root catalogs. A STAC object is 'resolved' when it is a Python Object; a link to a STAC object such as a Catalog or Item is considered "unresolved" if it's target is pointed at an HREF of the object. Tracking resolved objects allows us to tie together the same instances when there are loops in the Graph of the STAC catalog (e.g. a LabelItem can link to a rel:source, and if that STAC Item exists in the same root catalog they should refer to the same Python object). Resolution tracking is important when copying STACs in-memory: In order for object links to refer to the copy of STAC Objects rather than their originals, we have to keep track of the resolved STAC Objects and replace them with their copies. Args: id_keys_to_objects : Existing cache of a key made up of the STACObject and it's parents IDs mapped to the cached STACObject. hrefs_to_objects : STAC Object HREFs matched to their cached object. ids_to_collections : Map of collection IDs to collections. """ id_keys_to_objects: dict[str, STACObject] """Existing cache of a key made up of the STACObject and it's parents IDs mapped to the cached STACObject.""" hrefs_to_objects: dict[str, STACObject] """STAC Object HREFs matched to their cached object.""" ids_to_collections: dict[str, Collection] """Map of collection IDs to collections.""" _collection_cache: ResolvedObjectCollectionCache | None def __init__( self, id_keys_to_objects: dict[str, STACObject] | None = None, hrefs_to_objects: dict[str, STACObject] | None = None, ids_to_collections: dict[str, Collection] | None = None, ): self.id_keys_to_objects = id_keys_to_objects or {} self.hrefs_to_objects = hrefs_to_objects or {} self.ids_to_collections = ids_to_collections or {} self._collection_cache = None
[docs] def get_or_cache(self, obj: STACObject) -> STACObject: """Gets the STACObject that is the cached version of the given STACObject; or, if none exists, sets the cached object to the given object. Args: obj : The given object who's cache key will be checked against the cache. Returns: STACObject: Either the cached object that has the same cache key as the given object, or the given object. """ key, is_href = get_cache_key(obj) if is_href: if key in self.hrefs_to_objects: return self.hrefs_to_objects[key] else: self.cache(obj) return obj else: if key in self.id_keys_to_objects: return self.id_keys_to_objects[key] else: self.cache(obj) return obj
[docs] def get(self, obj: STACObject) -> STACObject | None: """Get the cached object that has the same cache key as the given object. Args: obj : The given object who's cache key will be checked against the cache. Returns: STACObject or None: Either the cached object that has the same cache key as the given object, or None """ key, is_href = get_cache_key(obj) if is_href: return self.get_by_href(key) else: return self.id_keys_to_objects.get(key)
[docs] def get_by_href(self, href: str) -> STACObject | None: """Gets the cached object at href. Args: href : The href to use as the key for the cached object. Returns: STACObject or None: Returns the STACObject if cached, otherwise None. """ return self.hrefs_to_objects.get(href)
[docs] def get_collection_by_id(self, id: str) -> Collection | None: """Retrieved a cached Collection by its ID. Args: id : The ID of the collection. Returns: Collection or None: Returns the collection if there is one cached with the given ID, otherwise None. """ return self.ids_to_collections.get(id)
[docs] def cache(self, obj: STACObject) -> None: """Set the given object into the cache. Args: obj : The object to cache """ key, is_href = get_cache_key(obj) if is_href: self.hrefs_to_objects[key] = obj else: self.id_keys_to_objects[key] = obj if isinstance(obj, pystac.Collection): self.ids_to_collections[obj.id] = obj
[docs] def remove(self, obj: STACObject) -> None: """Removes any cached object that matches the given object's cache key. Args: obj : The object to remove """ key, is_href = get_cache_key(obj) if is_href: self.hrefs_to_objects.pop(key, None) else: self.id_keys_to_objects.pop(key, None) if obj.STAC_OBJECT_TYPE == pystac.STACObjectType.COLLECTION: self.id_keys_to_objects.pop(obj.id, None)
def __contains__(self, obj: STACObject) -> bool: key, is_href = get_cache_key(obj) return ( key in self.hrefs_to_objects if is_href else key in self.id_keys_to_objects )
[docs] def contains_collection_id(self, collection_id: str) -> bool: """Returns True if there is a collection with given collection ID is cached.""" return collection_id in self.ids_to_collections
[docs] def as_collection_cache(self) -> CollectionCache: if self._collection_cache is None: self._collection_cache = ResolvedObjectCollectionCache(self) return self._collection_cache
[docs] @staticmethod def merge( first: ResolvedObjectCache, second: ResolvedObjectCache ) -> ResolvedObjectCache: """Merges two ResolvedObjectCache. The merged cache will give preference to the first argument; that is, if there are cached keys that exist in both the first and second cache, the object cached in the first will be cached in the resulting merged ResolvedObjectCache. Args: first : The first cache to merge. This cache will be the preferred cache for objects in the case of ID conflicts. second : The second cache to merge. Returns: ResolvedObjectCache: The resulting merged cache. """ merged = ResolvedObjectCache( id_keys_to_objects=dict( ChainMap( copy(first.id_keys_to_objects), copy(second.id_keys_to_objects) ) ), hrefs_to_objects=dict( ChainMap(copy(first.hrefs_to_objects), copy(second.hrefs_to_objects)) ), ids_to_collections=dict( ChainMap( copy(first.ids_to_collections), copy(second.ids_to_collections) ) ), ) merged._collection_cache = ResolvedObjectCollectionCache.merge( merged, first._collection_cache, second._collection_cache ) return merged
[docs] class CollectionCache: """Cache of collections that can be used to avoid re-reading Collection JSON in :func:`pystac.serialization.merge_common_properties <pystac.serialization.common_properties.merge_common_properties>`. The CollectionCache will contain collections as either as dicts or PySTAC Collections, and will set Collection JSON that it reads in order to merge in common properties. """ cached_ids: dict[str, Collection | dict[str, Any]] cached_hrefs: dict[str, Collection | dict[str, Any]] def __init__( self, cached_ids: dict[str, Collection | dict[str, Any]] | None = None, cached_hrefs: dict[str, Collection | dict[str, Any]] | None = None, ): self.cached_ids = cached_ids or {} self.cached_hrefs = cached_hrefs or {}
[docs] def get_by_id(self, collection_id: str) -> Collection | dict[str, Any] | None: return self.cached_ids.get(collection_id)
[docs] def get_by_href(self, href: str) -> Collection | dict[str, Any] | None: return self.cached_hrefs.get(href)
[docs] def contains_id(self, collection_id: str) -> bool: return collection_id in self.cached_ids
[docs] def cache( self, collection: Collection | dict[str, Any], href: str | None = None, ) -> None: """Caches a collection JSON.""" if isinstance(collection, pystac.Collection): self.cached_ids[collection.id] = collection else: self.cached_ids[collection["id"]] = collection if href is not None: self.cached_hrefs[href] = collection
[docs] class ResolvedObjectCollectionCache(CollectionCache): resolved_object_cache: ResolvedObjectCache def __init__( self, resolved_object_cache: ResolvedObjectCache, cached_ids: dict[str, Collection | dict[str, Any]] | None = None, cached_hrefs: dict[str, Collection | dict[str, Any]] | None = None, ): super().__init__(cached_ids, cached_hrefs) self.resolved_object_cache = resolved_object_cache
[docs] def get_by_id(self, collection_id: str) -> Collection | dict[str, Any] | None: result = self.resolved_object_cache.get_collection_by_id(collection_id) if result is None: return super().get_by_id(collection_id) else: return result
[docs] def get_by_href(self, href: str) -> Collection | dict[str, Any] | None: result = self.resolved_object_cache.get_by_href(href) if result is None: return super().get_by_href(href) else: return cast(pystac.Collection, result)
[docs] def contains_id(self, collection_id: str) -> bool: return self.resolved_object_cache.contains_collection_id( collection_id ) or super().contains_id(collection_id)
[docs] def cache( self, collection: Collection | dict[str, Any], href: str | None = None, ) -> None: super().cache(collection, href)
[docs] @staticmethod def merge( resolved_object_cache: ResolvedObjectCache, first: ResolvedObjectCollectionCache | None, second: ResolvedObjectCollectionCache | None, ) -> ResolvedObjectCollectionCache: first_cached_ids = {} if first is not None: first_cached_ids = copy(first.cached_ids) second_cached_ids = {} if second is not None: second_cached_ids = copy(second.cached_ids) first_cached_hrefs = {} if first is not None: first_cached_hrefs = copy(first.cached_hrefs) second_cached_hrefs = {} if second is not None: second_cached_hrefs = copy(second.cached_hrefs) return ResolvedObjectCollectionCache( resolved_object_cache, cached_ids=dict(ChainMap(first_cached_ids, second_cached_ids)), cached_hrefs=dict(ChainMap(first_cached_hrefs, second_cached_hrefs)), )