from __future__ import annotations
import warnings
from collections.abc import Iterable
from copy import deepcopy
from datetime import datetime, timezone
from typing import (
TYPE_CHECKING,
Any,
Optional,
TypeVar,
cast,
)
from dateutil import tz
import pystac
from pystac import CatalogType, STACObjectType
from pystac.asset import Asset, Assets
from pystac.catalog import Catalog
from pystac.errors import DeprecatedWarning, ExtensionNotImplemented, STACTypeError
from pystac.item_assets import ItemAssetDefinition, _ItemAssets
from pystac.layout import HrefLayoutStrategy
from pystac.link import Link
from pystac.provider import Provider
from pystac.serialization import (
identify_stac_object,
identify_stac_object_type,
migrate_to_latest,
)
from pystac.summaries import Summaries
from pystac.utils import (
datetime_to_str,
str_to_datetime,
)
if TYPE_CHECKING:
from pystac.extensions.ext import CollectionExt
from pystac.item import Item
#: Generalized version of :class:`Collection`
C = TypeVar("C", bound="Collection")
Bboxes = list[list[float | int]]
TemporalIntervals = list[list[datetime]] | list[list[Optional[datetime]]]
TemporalIntervalsLike = TemporalIntervals | list[datetime] | list[Optional[datetime]]
[docs]
class SpatialExtent:
"""Describes the spatial extent of a Collection.
Args:
bboxes : A list of bboxes that represent the spatial
extent of the collection. Each bbox can be 2D or 3D. The length of the bbox
array must be 2*n where n is the number of dimensions. For example, a
2D Collection with only one bbox would be [[xmin, ymin, xmax, ymax]]
extra_fields : Dictionary containing additional top-level fields defined on the
Spatial Extent object.
"""
bboxes: Bboxes
"""A list of bboxes that represent the spatial
extent of the collection. Each bbox can be 2D or 3D. The length of the bbox
array must be 2*n where n is the number of dimensions. For example, a
2D Collection with only one bbox would be [[xmin, ymin, xmax, ymax]]"""
extra_fields: dict[str, Any]
"""Dictionary containing additional top-level fields defined on the Spatial
Extent object."""
def __init__(
self,
bboxes: Bboxes | list[float | int],
extra_fields: dict[str, Any] | None = None,
) -> None:
if not isinstance(bboxes, list):
raise TypeError("bboxes must be a list")
# A common mistake is to pass in a single bbox instead of a list of bboxes.
# Account for this by transforming the input in that case.
if isinstance(bboxes[0], (float, int)):
self.bboxes = [cast(list[float | int], bboxes)]
else:
self.bboxes = cast(Bboxes, bboxes)
self.extra_fields = extra_fields or {}
[docs]
def to_dict(self) -> dict[str, Any]:
"""Returns this spatial extent as a dictionary.
Returns:
dict: A serialization of the SpatialExtent.
"""
d = {"bbox": self.bboxes, **self.extra_fields}
return d
[docs]
def clone(self) -> SpatialExtent:
"""Clones this object.
Returns:
SpatialExtent: The clone of this object.
"""
cls = self.__class__
return cls(
bboxes=deepcopy(self.bboxes), extra_fields=deepcopy(self.extra_fields)
)
[docs]
@staticmethod
def from_dict(d: dict[str, Any]) -> SpatialExtent:
"""Constructs a SpatialExtent from a dict.
Returns:
SpatialExtent: The SpatialExtent deserialized from the JSON dict.
"""
return SpatialExtent(
bboxes=d["bbox"], extra_fields={k: v for k, v in d.items() if k != "bbox"}
)
[docs]
@staticmethod
def from_coordinates(
coordinates: list[Any], extra_fields: dict[str, Any] | None = None
) -> SpatialExtent:
"""Constructs a SpatialExtent from a set of coordinates.
This method will only produce a single bbox that covers all points
in the coordinate set.
Args:
coordinates : Coordinates to derive the bbox from.
extra_fields : Dictionary containing additional top-level fields defined on
the SpatialExtent object.
Returns:
SpatialExtent: A SpatialExtent with a single bbox that covers the
given coordinates.
"""
def process_coords(
coord_lists: list[Any],
xmin: float | None = None,
ymin: float | None = None,
xmax: float | None = None,
ymax: float | None = None,
) -> tuple[float | None, float | None, float | None, float | None]:
for coord in coord_lists:
if isinstance(coord[0], list):
xmin, ymin, xmax, ymax = process_coords(
coord, xmin, ymin, xmax, ymax
)
else:
x, y = coord
if xmin is None or x < xmin:
xmin = x
elif xmax is None or xmax < x:
xmax = x
if ymin is None or y < ymin:
ymin = y
elif ymax is None or ymax < y:
ymax = y
return xmin, ymin, xmax, ymax
xmin, ymin, xmax, ymax = process_coords(coordinates)
if xmin is None or ymin is None or xmax is None or ymax is None:
raise ValueError(
f"Could not determine bounds from coordinate sequence {coordinates}"
)
return SpatialExtent(
bboxes=[[xmin, ymin, xmax, ymax]], extra_fields=extra_fields
)
[docs]
class TemporalExtent:
"""Describes the temporal extent of a Collection.
Args:
intervals : A list of two datetimes wrapped in a list,
representing the temporal extent of a Collection. Open date ranges are
supported by setting either the start (the first element of the interval)
or the end (the second element of the interval) to None.
extra_fields : Dictionary containing additional top-level fields defined on the
Temporal Extent object.
Note:
Datetimes are required to be in UTC.
"""
intervals: TemporalIntervals
"""A list of two datetimes wrapped in a list,
representing the temporal extent of a Collection. Open date ranges are
represented by either the start (the first element of the interval) or the
end (the second element of the interval) being None."""
extra_fields: dict[str, Any]
"""Dictionary containing additional top-level fields defined on the Temporal
Extent object."""
def __init__(
self,
intervals: TemporalIntervals | list[datetime | None],
extra_fields: dict[str, Any] | None = None,
):
if not isinstance(intervals, list):
raise TypeError("intervals must be a list")
# A common mistake is to pass in a single interval instead of a
# list of intervals. Account for this by transforming the input
# in that case.
if isinstance(intervals[0], datetime) or intervals[0] is None:
self.intervals = [cast(list[Optional[datetime]], intervals)]
else:
self.intervals = cast(TemporalIntervals, intervals)
self.extra_fields = extra_fields or {}
[docs]
def to_dict(self) -> dict[str, Any]:
"""Returns this temporal extent as a dictionary.
Returns:
dict: A serialization of the TemporalExtent.
"""
encoded_intervals: list[list[str | None]] = []
for i in self.intervals:
start = None
end = None
if i[0] is not None:
start = datetime_to_str(i[0])
if i[1] is not None:
end = datetime_to_str(i[1])
encoded_intervals.append([start, end])
d = {"interval": encoded_intervals, **self.extra_fields}
return d
[docs]
def clone(self) -> TemporalExtent:
"""Clones this object.
Returns:
TemporalExtent: The clone of this object.
"""
cls = self.__class__
return cls(
intervals=deepcopy(self.intervals), extra_fields=deepcopy(self.extra_fields)
)
[docs]
@staticmethod
def from_dict(d: dict[str, Any]) -> TemporalExtent:
"""Constructs an TemporalExtent from a dict.
Returns:
TemporalExtent: The TemporalExtent deserialized from the JSON dict.
"""
parsed_intervals: list[list[datetime | None]] = []
for i in d["interval"]:
if isinstance(i, str):
# d["interval"] is a list of strings, so we correct the list and
# try again
# https://github.com/stac-utils/pystac/issues/1221
warnings.warn(
"A collection's temporal extent should be a list of lists, but "
"is instead a "
"list of strings. pystac is fixing this issue and continuing "
"deserialization, but note that the source "
"collection is invalid STAC.",
UserWarning,
)
d["interval"] = [d["interval"]]
return TemporalExtent.from_dict(d)
start = None
end = None
if i[0]:
start = str_to_datetime(i[0])
if i[1]:
end = str_to_datetime(i[1])
parsed_intervals.append([start, end])
return TemporalExtent(
intervals=parsed_intervals,
extra_fields={k: v for k, v in d.items() if k != "interval"},
)
[docs]
@staticmethod
def from_now() -> TemporalExtent:
"""Constructs an TemporalExtent with a single open interval that has
the start time as the current time.
Returns:
TemporalExtent: The resulting TemporalExtent.
"""
return TemporalExtent(
intervals=[[datetime.now(timezone.utc).replace(microsecond=0), None]]
)
[docs]
class Extent:
"""Describes the spatiotemporal extents of a Collection.
Args:
spatial : Potential spatial extent covered by the collection.
temporal : Potential temporal extent covered by the collection.
extra_fields : Dictionary containing additional top-level fields defined on the
Extent object.
"""
spatial: SpatialExtent
"""Potential spatial extent covered by the collection."""
temporal: TemporalExtent
"""Potential temporal extent covered by the collection."""
extra_fields: dict[str, Any]
"""Dictionary containing additional top-level fields defined on the Extent
object."""
def __init__(
self,
spatial: SpatialExtent,
temporal: TemporalExtent,
extra_fields: dict[str, Any] | None = None,
):
self.spatial = spatial
self.temporal = temporal
self.extra_fields = extra_fields or {}
[docs]
def to_dict(self) -> dict[str, Any]:
"""Returns this extent as a dictionary.
Returns:
dict: A serialization of the Extent.
"""
d = {
"spatial": self.spatial.to_dict(),
"temporal": self.temporal.to_dict(),
**self.extra_fields,
}
return d
[docs]
def clone(self) -> Extent:
"""Clones this object.
Returns:
Extent: The clone of this extent.
"""
cls = self.__class__
return cls(
spatial=self.spatial.clone(),
temporal=self.temporal.clone(),
extra_fields=deepcopy(self.extra_fields),
)
[docs]
@staticmethod
def from_dict(d: dict[str, Any]) -> Extent:
"""Constructs an Extent from a dict.
Returns:
Extent: The Extent deserialized from the JSON dict.
"""
return Extent(
spatial=SpatialExtent.from_dict(d["spatial"]),
temporal=TemporalExtent.from_dict(d["temporal"]),
extra_fields={
k: v for k, v in d.items() if k not in {"spatial", "temporal"}
},
)
[docs]
@staticmethod
def from_items(
items: Iterable[Item], extra_fields: dict[str, Any] | None = None
) -> Extent:
"""Create an Extent based on the datetimes and bboxes of a list of items.
Args:
items : A list of items to derive the extent from.
extra_fields : Optional dictionary containing additional top-level fields
defined on the Extent object.
Returns:
Extent: An Extent that spatially and temporally covers all of the
given items.
"""
bounds_values: list[list[float]] = [
[float("inf")],
[float("inf")],
[float("-inf")],
[float("-inf")],
]
datetimes: list[datetime] = []
starts: list[datetime] = []
ends: list[datetime] = []
for item in items:
if item.bbox is not None:
for i in range(0, 4):
bounds_values[i].append(item.bbox[i])
if item.datetime is not None:
datetimes.append(item.datetime)
if item.common_metadata.start_datetime is not None:
starts.append(item.common_metadata.start_datetime)
if item.common_metadata.end_datetime is not None:
ends.append(item.common_metadata.end_datetime)
if not any(datetimes + starts):
start_timestamp = None
else:
start_timestamp = min(
[
dt if dt.tzinfo else dt.replace(tzinfo=tz.UTC)
for dt in datetimes + starts
]
)
if not any(datetimes + ends):
end_timestamp = None
else:
end_timestamp = max(
[
dt if dt.tzinfo else dt.replace(tzinfo=tz.UTC)
for dt in datetimes + ends
]
)
spatial = SpatialExtent(
[
[
min(bounds_values[0]),
min(bounds_values[1]),
max(bounds_values[2]),
max(bounds_values[3]),
]
]
)
temporal = TemporalExtent([[start_timestamp, end_timestamp]])
return Extent(spatial=spatial, temporal=temporal, extra_fields=extra_fields)
[docs]
class Collection(Catalog, Assets):
"""A Collection extends the Catalog spec with additional metadata that helps
enable discovery.
Args:
id : Identifier for the collection. Must be unique within the STAC.
description : Detailed multi-line description to fully explain the
collection. `CommonMark 0.29 syntax <https://commonmark.org/>`_ MAY
be used for rich text representation.
extent : Spatial and temporal extents that describe the bounds of
all items contained within this Collection.
title : Optional short descriptive one-line title for the
collection.
stac_extensions : Optional list of extensions the Collection
implements.
href : Optional HREF for this collection, which be set as the
collection's self link's HREF.
catalog_type : Optional catalog type for this catalog. Must
be one of the values in :class`~pystac.CatalogType`.
license : Collection's license(s) as a
`SPDX License identifier <https://spdx.org/licenses/>`_,
or `other`. If collection includes data with multiple
different licenses, use `other` and add a link for
each. The licenses `various` and `proprietary` are deprecated.
Defaults to 'other'.
keywords : Optional list of keywords describing the collection.
providers : Optional list of providers of this Collection.
summaries : An optional map of property summaries,
either a set of values or statistics such as a range.
extra_fields : Extra fields that are part of the top-level
JSON properties of the Collection.
assets : A dictionary mapping string keys to :class:`~pystac.Asset` objects. All
:class:`~pystac.Asset` values in the dictionary will have their
:attr:`~pystac.Asset.owner` attribute set to the created Collection.
strategy : The layout strategy to use for setting the
HREFs of the catalog child objects and items.
If not provided, it will default to strategy of the parent and fallback to
:class:`~pystac.layout.BestPracticesLayoutStrategy`.
"""
description: str
"""Detailed multi-line description to fully explain the collection."""
extent: Extent
"""Spatial and temporal extents that describe the bounds of all items contained
within this Collection."""
id: str
"""Identifier for the collection."""
stac_extensions: list[str]
"""List of extensions the Collection implements."""
title: str | None
"""Optional short descriptive one-line title for the collection."""
keywords: list[str] | None
"""Optional list of keywords describing the collection."""
providers: list[Provider] | None
"""Optional list of providers of this Collection."""
summaries: Summaries
"""A map of property summaries, either a set of values or statistics such as a
range."""
links: list[Link]
"""A list of :class:`~pystac.Link` objects representing all links associated with
this Collection."""
extra_fields: dict[str, Any]
"""Extra fields that are part of the top-level JSON properties of the Collection."""
STAC_OBJECT_TYPE = STACObjectType.COLLECTION
DEFAULT_FILE_NAME = "collection.json"
"""Default file name that will be given to this STAC object
in a canonical format."""
def __init__(
self,
id: str,
description: str,
extent: Extent,
title: str | None = None,
stac_extensions: list[str] | None = None,
href: str | None = None,
extra_fields: dict[str, Any] | None = None,
catalog_type: CatalogType | None = None,
license: str = "other",
keywords: list[str] | None = None,
providers: list[Provider] | None = None,
summaries: Summaries | None = None,
assets: dict[str, Asset] | None = None,
strategy: HrefLayoutStrategy | None = None,
):
super().__init__(
id,
description,
title,
stac_extensions,
extra_fields,
href,
catalog_type or CatalogType.ABSOLUTE_PUBLISHED,
strategy,
)
self.extent = extent
self.license = license
self.stac_extensions: list[str] = stac_extensions or []
self.keywords = keywords
self.providers = providers
self.summaries = summaries or Summaries.empty()
self._item_assets: _ItemAssets | None = None
self.assets = {}
if assets is not None:
for k, asset in assets.items():
self.add_asset(k, asset)
def __repr__(self) -> str:
return f"<Collection id={self.id}>"
[docs]
def add_item(
self,
item: Item,
title: str | None = None,
strategy: HrefLayoutStrategy | None = None,
set_parent: bool = True,
) -> Link:
link = super().add_item(item, title, strategy, set_parent)
item.set_collection(self)
return link
[docs]
def to_dict(
self, include_self_link: bool = True, transform_hrefs: bool = True
) -> dict[str, Any]:
d = super().to_dict(
include_self_link=include_self_link, transform_hrefs=transform_hrefs
)
d["extent"] = self.extent.to_dict()
d["license"] = self.license
if self.stac_extensions:
d["stac_extensions"] = self.stac_extensions
if self.keywords:
d["keywords"] = self.keywords
if self.providers:
d["providers"] = list(map(lambda x: x.to_dict(), self.providers))
if not self.summaries.is_empty():
d["summaries"] = self.summaries.to_dict()
if any(self.assets):
d["assets"] = {k: v.to_dict() for k, v in self.assets.items()}
return d
[docs]
def clone(self) -> Collection:
cls = self.__class__
clone = cls(
id=self.id,
description=self.description,
extent=self.extent.clone(),
title=self.title,
stac_extensions=self.stac_extensions.copy(),
extra_fields=deepcopy(self.extra_fields),
catalog_type=self.catalog_type,
license=self.license,
keywords=self.keywords.copy() if self.keywords is not None else None,
providers=deepcopy(self.providers),
summaries=self.summaries.clone(),
assets={k: asset.clone() for k, asset in self.assets.items()},
)
clone._resolved_objects.cache(clone)
for link in self.links:
if link.rel == pystac.RelType.ROOT:
# Collection __init__ sets correct root to clone; don't reset
# if the root link points to self
root_is_self = link.is_resolved() and link.target is self
if not root_is_self:
clone.set_root(None)
clone.add_link(link.clone())
else:
clone.add_link(link.clone())
return clone
[docs]
@classmethod
def from_dict(
cls: type[C],
d: dict[str, Any],
href: str | None = None,
root: Catalog | None = None,
migrate: bool = True,
preserve_dict: bool = True,
) -> C:
from pystac.extensions.version import CollectionVersionExtension
if migrate:
info = identify_stac_object(d)
d = migrate_to_latest(d, info)
if not cls.matches_object_type(d):
raise STACTypeError(d, cls)
catalog_type = CatalogType.determine_type(d)
if preserve_dict:
d = deepcopy(d)
id = d.pop("id")
description = d.pop("description")
license = d.pop("license")
extent = Extent.from_dict(d.pop("extent"))
title = d.pop("title", None)
stac_extensions = d.pop("stac_extensions", None)
keywords = d.pop("keywords", None)
providers = d.pop("providers", None)
if providers is not None:
providers = list(map(lambda x: pystac.Provider.from_dict(x), providers))
summaries = d.pop("summaries", None)
if summaries is not None:
summaries = Summaries(summaries)
assets = d.pop("assets", None)
if assets:
assets = {k: Asset.from_dict(v) for k, v in assets.items()}
links = d.pop("links")
d.pop("stac_version")
collection = cls(
id=id,
description=description,
extent=extent,
title=title,
stac_extensions=stac_extensions,
extra_fields=d,
license=license,
keywords=keywords,
providers=providers,
summaries=summaries,
href=href,
catalog_type=catalog_type,
assets=assets,
)
for link in links:
if link["rel"] == pystac.RelType.ROOT:
# Remove the link that's generated in Catalog's constructor.
collection.remove_links(pystac.RelType.ROOT)
if link["rel"] != pystac.RelType.SELF or href is None:
collection.add_link(Link.from_dict(link))
if root:
collection.set_root(root)
try:
version = CollectionVersionExtension.ext(collection)
if version.deprecated:
warnings.warn(
f"The collection '{collection.id}' is deprecated.",
DeprecatedWarning,
)
# Collection asset deprecation checks pending version extension support
except ExtensionNotImplemented:
pass
return collection
[docs]
@classmethod
def from_items(
cls: type[Collection],
items: Iterable[Item] | pystac.ItemCollection,
*,
id: str | None = None,
strategy: HrefLayoutStrategy | None = None,
) -> Collection:
"""Create a :class:`Collection` from iterable of items or an
:class:`~pystac.ItemCollection`.
Will try to pull collection attributes from
:attr:`~pystac.ItemCollection.extra_fields` and items when possible.
Args:
items : Iterable of :class:`~pystac.Item` instances to include in the
:class:`Collection`. This can be a :class:`~pystac.ItemCollection`.
id : Identifier for the collection. If not set, must be available on the
items and they must all match.
strategy : The layout strategy to use for setting the
HREFs of the catalog child objects and items.
If not provided, it will default to strategy of the parent and fallback
to :class:`~pystac.layout.BestPracticesLayoutStrategy`.
"""
def extract(attr: str) -> Any:
"""Extract attrs from items or item.properties as long as they all match"""
value = None
values = {getattr(item, attr, None) for item in items}
if len(values) == 1:
value = next(iter(values))
if value is None:
values = {item.properties.get(attr, None) for item in items}
if len(values) == 1:
value = next(iter(values))
return value
if isinstance(items, pystac.ItemCollection):
extra_fields = deepcopy(items.extra_fields)
links = extra_fields.pop("links", {})
providers = extra_fields.pop("providers", None)
if providers is not None:
providers = [pystac.Provider.from_dict(p) for p in providers]
else:
extra_fields = {}
links = {}
providers = []
id = id or extract("collection_id")
if id is None:
raise ValueError(
"Collection id must be defined. Either by specifying collection_id "
"on every item, or as a keyword argument to this function."
)
collection = cls(
id=id,
description=extract("description"),
extent=Extent.from_items(items),
title=extract("title"),
providers=providers,
extra_fields=extra_fields,
strategy=strategy,
)
collection.add_items(items)
for link in links:
collection.add_link(Link.from_dict(link))
return collection
[docs]
def get_item(self, id: str, recursive: bool = False) -> Item | None:
"""Returns an item with a given ID.
Args:
id : The ID of the item to find.
recursive : If True, search this collection and all children for the
item; otherwise, only search the items of this collection. Defaults
to False.
Return:
Item or None: The item with the given ID, or None if not found.
"""
try:
return next(self.get_items(id, recursive=recursive), None)
except TypeError as e:
if any("recursive" in arg for arg in e.args):
# For inherited classes that do not yet support recursive
# See https://github.com/stac-utils/pystac-client/issues/485
return super().get_item(id, recursive=recursive)
raise e
@property
def item_assets(self) -> dict[str, ItemAssetDefinition]:
"""Accessor for `item_assets
<https://github.com/radiantearth/stac-spec/blob/v1.1.0/collection-spec/collection-spec.md#item_assets>`__
on this collection.
Example::
.. code-block:: python
>>> print(collection.item_assets)
{'thumbnail': <pystac.item_assets.ItemAssetDefinition at 0x72aea0420750>,
'metadata': <pystac.item_assets.ItemAssetDefinition at 0x72aea017dc90>,
'B5': <pystac.item_assets.ItemAssetDefinition at 0x72aea017efd0>,
'B6': <pystac.item_assets.ItemAssetDefinition at 0x72aea016d5d0>,
'B7': <pystac.item_assets.ItemAssetDefinition at 0x72aea016e050>,
'B8': <pystac.item_assets.ItemAssetDefinition at 0x72aea016da90>}
>>> collection.item_assets["thumbnail"].title
'Thumbnail'
Set attributes on :class:`~pystac.ItemAssetDefinition` objects
.. code-block:: python
>>> collection.item_assets["thumbnail"].title = "New Title"
Add to the ``item_assets`` dict:
.. code-block:: python
>>> collection.item_assets["B4"] = {
'type': 'image/tiff; application=geotiff; profile=cloud-optimized',
'eo:bands': [{'name': 'B4', 'common_name': 'red'}]
}
>>> collection.item_assets["B4"].owner == collection
True
"""
if self._item_assets is None:
self._item_assets = _ItemAssets(self)
return self._item_assets
@item_assets.setter
def item_assets(
self, item_assets: dict[str, ItemAssetDefinition | dict[str, Any]] | None
) -> None:
# clear out the cached value
self._item_assets = None
if item_assets is None:
self.extra_fields.pop("item_assets")
else:
self.extra_fields["item_assets"] = {
k: v if isinstance(v, dict) else v.to_dict()
for k, v in item_assets.items()
}
[docs]
def update_extent_from_items(self) -> None:
"""
Update datetime and bbox based on all items to a single bbox and time window.
"""
self.extent = Extent.from_items(self.get_items(recursive=True))
[docs]
def full_copy(
self, root: Catalog | None = None, parent: Catalog | None = None
) -> Collection:
return cast(Collection, super().full_copy(root, parent))
[docs]
@classmethod
def matches_object_type(cls, d: dict[str, Any]) -> bool:
return identify_stac_object_type(d) == STACObjectType.COLLECTION
@property
def ext(self) -> CollectionExt:
"""Accessor for extension classes on this collection
Example::
print(collection.ext.xarray)
"""
from pystac.extensions.ext import CollectionExt
return CollectionExt(stac_object=self)