Source code for pystac.validation.stac_validator

import json
import logging
import warnings
from abc import ABC, abstractmethod
from typing import Any, Optional

import pystac
import pystac.utils
from pystac.errors import STACValidationError
from pystac.stac_object import STACObjectType
from pystac.validation.schema_uri_map import DefaultSchemaUriMap, SchemaUriMap

try:
    import jsonschema
    import jsonschema.exceptions
    import jsonschema.validators
    from referencing import Registry, Resource

    from pystac.validation.local_validator import get_local_schema_cache

    HAS_JSONSCHEMA = True
except ImportError:
    HAS_JSONSCHEMA = False

logger = logging.getLogger(__name__)


[docs]class GetSchemaError(Exception): """Raised when unable to fetch a schema.""" def __init__(self, href: str, error: Exception) -> None: super().__init__(f"Error when fetching schema {href}: {error}")
[docs]class STACValidator(ABC): """STACValidator defines methods for validating STAC JSON. Implementations define methods for validating core objects and extension. By default the JsonSchemaSTACValidator is used by PySTAC; users can define their own STACValidator implementation and set that validator to be used by pystac by using the :func:`~pystac.validation.set_validator` method. """
[docs] @abstractmethod def validate_core( self, stac_dict: dict[str, Any], stac_object_type: STACObjectType, stac_version: str, href: Optional[str] = None, ) -> Any: """Validate a core stac object. Return value can be None or specific to the implementation. Args: stac_dict : Dictionary that is the STAC json of the object. stac_object_type : The stac object type of the object encoded in stac_dict. One of :class:`~pystac.STACObjectType`. stac_version : The version of STAC to validate the object against. href : Optional HREF of the STAC object being validated. """ raise NotImplementedError
[docs] @abstractmethod def validate_extension( self, stac_dict: dict[str, Any], stac_object_type: STACObjectType, stac_version: str, extension_id: str, href: Optional[str] = None, ) -> Any: """Validate an extension stac object. Return value can be None or specific to the implementation. Args: stac_dict : Dictionary that is the STAC json of the object. stac_object_type : The stac object type of the object encoded in stac_dict. One of :class:`~pystac.STACObjectType`. stac_version : The version of STAC to validate the object against. extension_id : The extension ID of the extension to validate against. href : Optional HREF of the STAC object being validated. """ raise NotImplementedError
[docs] def validate( self, stac_dict: dict[str, Any], stac_object_type: STACObjectType, stac_version: str, extensions: list[str], href: Optional[str] = None, ) -> list[Any]: """Validate a STAC object JSON. Args: stac_dict : Dictionary that is the STAC json of the object. stac_object_type : The stac object type of the object encoded in stac_dict. One of :class:`~pystac.STACObjectType`. stac_version : The version of STAC to validate the object against. extensions : Extension IDs for this stac object. href : Optional href of the STAC object being validated. Returns: List[Any]: List of return values from the validation calls for the core object and any extensions. Element type is specific to the STACValidator implementation. """ results: list[Any] = [] # Pass the dict through JSON serialization and parsing, otherwise # some valid properties can be marked as invalid (e.g. tuples in # coordinate sequences for geometries). json_dict = json.loads(json.dumps(stac_dict)) core_result = self.validate_core( json_dict, stac_object_type, stac_version, href ) if core_result is not None: results.append(core_result) for extension_id in extensions: ext_result = self.validate_extension( json_dict, stac_object_type, stac_version, extension_id, href ) if ext_result is not None: results.append(ext_result) return results
[docs]class JsonSchemaSTACValidator(STACValidator): """Validate STAC based on JSON Schemas. This validator uses JSON schemas, read from URIs provided by a :class:`~pystac.validation.SchemaUriMap`, to validate STAC core objects and extensions. Args: schema_uri_map : The SchemaUriMap that defines where the validator will retrieve the JSON schemas for validation. Defaults to an instance of :class:`~pystac.validation.schema_uri_map.DefaultSchemaUriMap` Note: This class requires the ``jsonschema`` library to be installed. """ schema_uri_map: SchemaUriMap schema_cache: dict[str, dict[str, Any]] def __init__(self, schema_uri_map: Optional[SchemaUriMap] = None) -> None: if not HAS_JSONSCHEMA: raise ImportError("Cannot instantiate, requires jsonschema package") if schema_uri_map is not None: self.schema_uri_map = schema_uri_map else: self.schema_uri_map = DefaultSchemaUriMap() self.schema_cache = get_local_schema_cache() def _get_schema(self, schema_uri: str) -> dict[str, Any]: if schema_uri not in self.schema_cache: try: s = json.loads(pystac.StacIO.default().read_text(schema_uri)) except Exception as error: raise GetSchemaError(schema_uri, error) from error self.schema_cache[schema_uri] = s id_field = "$id" if "$id" in s else "id" if not s[id_field].startswith("http"): s[id_field] = schema_uri return self.schema_cache[schema_uri] @property def registry(self) -> Any: def retrieve(schema_uri: str) -> Resource[dict[str, Any]]: return Resource.from_contents(self._get_schema(schema_uri)) return Registry(retrieve=retrieve).with_resources( # type: ignore [ (k, Resource.from_contents(v)) for k, v in self.schema_cache.items() ] # type: ignore )
[docs] def get_schema_from_uri(self, schema_uri: str) -> tuple[dict[str, Any], Any]: """DEPRECATED""" warnings.warn( "get_schema_from_uri is deprecated and will be removed in v2.", DeprecationWarning, ) return self._get_schema(schema_uri), self.registry
def _validate_from_uri( self, stac_dict: dict[str, Any], stac_object_type: STACObjectType, schema_uri: str, href: Optional[str] = None, ) -> None: try: schema = self._get_schema(schema_uri) # This block is cribbed (w/ change in error handling) from # jsonschema.validate cls = jsonschema.validators.validator_for(schema) cls.check_schema(schema) validator = cls(schema, registry=self.registry) errors = list(validator.iter_errors(stac_dict)) except Exception as e: logger.error(f"Exception while validating {stac_object_type} href: {href}") logger.exception(e) raise if errors: stac_id = stac_dict.get("id", None) msg = f"Validation failed for {stac_object_type} " if href is not None: msg += f"at {href} " if stac_id is not None: msg += f"with ID {stac_id} " msg += f"against schema at {schema_uri}" best = jsonschema.exceptions.best_match(errors) if best: msg += "\n" + str(best) raise STACValidationError(msg, source=errors) from best
[docs] def validate_core( self, stac_dict: dict[str, Any], stac_object_type: STACObjectType, stac_version: str, href: Optional[str] = None, ) -> Optional[str]: """Validate a core stac object. Return value can be None or specific to the implementation. Args: stac_dict : Dictionary that is the STAC json of the object. stac_object_type : The stac object type of the object encoded in stac_dict. One of :class:`~pystac.STACObjectType`. stac_version : The version of STAC to validate the object against. href : Optional HREF of the STAC object being validated. Returns: str: URI for the JSON schema that was validated against, or None if no validation occurred. Raises: STACValidationError if stac_dict is not valid. The exception is raised from the "best" error, as determined by the jsonschema library. To access all jsonschema validation errors, use ``STACValidationError.source``. """ schema_uri = self.schema_uri_map.get_object_schema_uri( stac_object_type, stac_version ) if schema_uri is None: return None self._validate_from_uri(stac_dict, stac_object_type, schema_uri, href=href) return schema_uri
[docs] def validate_extension( self, stac_dict: dict[str, Any], stac_object_type: STACObjectType, stac_version: str, extension_id: str, href: Optional[str] = None, ) -> Optional[str]: """Validate an extension stac object. Return value can be None or specific to the implementation. Args: stac_dict : Dictionary that is the STAC json of the object. stac_object_type : The stac object type of the object encoded in stac_dict. One of :class:`~pystac.STACObjectType`. stac_version : The version of STAC to validate the object against. extension_id : The extension ID to validate against. href : Optional HREF of the STAC object being validated. Returns: str: URI for the JSON schema that was validated against, or None if no validation occurred. Raises: STACValidationError if stac_dict is not valid. The exception is raised from the "best" error, as determined by the jsonschema library. To access all jsonschema validation errors, use ``STACValidationError.source``. """ schema_uri = extension_id if schema_uri is None: return None schema_uri = pystac.utils.make_absolute_href(schema_uri, href) self._validate_from_uri(stac_dict, stac_object_type, schema_uri, href) return schema_uri