STAC Catalogs¶
GeoAgent includes a registry of pre-configured STAC catalog endpoints.
Built-in Catalogs¶
| Catalog | URL | Auth Required |
|---|---|---|
| Earth Search (AWS) | earth-search.aws.element84.com/v1 |
No |
| Planetary Computer | planetarycomputer.microsoft.com/api/stac/v1 |
No (signing optional) |
| USGS Landsat | landsatlook.usgs.gov/stac-server |
No |
| NASA CMR-STAC | cmr.earthdata.nasa.gov/stac |
Yes (NASA_EARTHDATA_TOKEN) |
Catalog Registry¶
geoagent.catalogs.registry
¶
STAC catalog registry with pre-configured endpoints and extensibility.
CatalogInfo
dataclass
¶
Information about a STAC catalog.
Source code in geoagent/catalogs/registry.py
@dataclass
class CatalogInfo:
"""Information about a STAC catalog."""
name: str
url: str
description: str
requires_auth: bool = False
auth_env_var: Optional[str] = None
CatalogRegistry
¶
Registry for STAC catalogs with built-in and custom endpoints.
Source code in geoagent/catalogs/registry.py
class CatalogRegistry:
"""Registry for STAC catalogs with built-in and custom endpoints."""
def __init__(self):
"""Initialize the catalog registry with built-in catalogs."""
self._catalogs: Dict[str, CatalogInfo] = BUILTIN_CATALOGS.copy()
# Cache for catalog collections: {catalog_name: [{"id": str, "title": str}, ...]}
# Cached per session to avoid repeated network calls
self.__class__._collection_cache = getattr(
self.__class__, "_collection_cache", {}
)
def list_catalogs(self) -> List[CatalogInfo]:
"""
List all available catalogs.
Returns:
List of CatalogInfo objects
"""
return list(self._catalogs.values())
def get_catalog(self, name: str) -> Optional[CatalogInfo]:
"""
Get catalog information by name.
Args:
name: Catalog name
Returns:
CatalogInfo object or None if not found
"""
return self._catalogs.get(name)
def add_catalog(
self,
name: str,
url: str,
description: str,
requires_auth: bool = False,
auth_env_var: Optional[str] = None,
) -> None:
"""
Add a custom catalog to the registry.
Args:
name: Unique catalog name
url: STAC API endpoint URL
description: Human-readable description
requires_auth: Whether authentication is required
auth_env_var: Environment variable name for auth token
"""
self._catalogs[name] = CatalogInfo(
name=name,
url=url,
description=description,
requires_auth=requires_auth,
auth_env_var=auth_env_var,
)
logger.info(f"Added catalog '{name}' to registry")
def get_client(self, name: Optional[str] = None) -> pystac_client.Client:
"""
Get a pystac_client.Client for the specified catalog.
Args:
name: Catalog name. Uses default if None.
Returns:
Configured pystac_client.Client instance
Raises:
ValueError: If catalog not found
RuntimeError: If authentication required but credentials missing
"""
if name is None:
name = DEFAULT_CATALOG
catalog = self.get_catalog(name)
if not catalog:
available = ", ".join(self._catalogs.keys())
raise ValueError(f"Catalog '{name}' not found. Available: {available}")
# Check authentication
if catalog.requires_auth and catalog.auth_env_var:
if not os.getenv(catalog.auth_env_var):
raise RuntimeError(
f"Catalog '{name}' requires authentication. "
f"Set environment variable: {catalog.auth_env_var}"
)
client_kwargs = {}
# Handle authentication for specific catalogs
if name == "nasa_cmr" and catalog.auth_env_var:
token = os.getenv(catalog.auth_env_var)
if token:
client_kwargs["headers"] = {"Authorization": f"Bearer {token}"}
# Create the client
try:
client = pystac_client.Client.open(catalog.url, **client_kwargs)
# Special handling for Planetary Computer signing
if name == "planetary_computer":
try:
import planetary_computer
# Wrap the client with signing capability
original_search = client.search
def signed_search(*args, **kwargs):
"""Search wrapper that signs items for access."""
search_result = original_search(*args, **kwargs)
# Sign the items in the search result for direct access
search_result.sign = lambda: planetary_computer.sign(
search_result
)
return search_result
client.search = signed_search
logger.info("Planetary Computer signing enabled")
except ImportError:
logger.warning(
"planetary-computer package not available. "
"Some data access may be slower. "
"Install with: pip install planetary-computer"
)
return client
except Exception as e:
raise RuntimeError(f"Failed to connect to catalog '{name}': {str(e)}")
def get_collection_index(
self, catalog_name: str = "planetary_computer"
) -> List[Dict[str, str]]:
"""Fetch and cache a simple index of collections for a catalog.
The index contains dicts with collection id and title, e.g.:
[{"id": "sentinel-2-l2a", "title": "Sentinel-2 Level-2A"}, ...]
Results are cached in-memory for the duration of the session.
Args:
catalog_name: Name of the catalog as registered in this registry
Returns:
List of dicts with keys "id" and "title"
"""
# Return cached if available
cache = self.__class__._collection_cache
if catalog_name in cache:
return cache[catalog_name]
client = self.get_client(catalog_name)
collections: List[Dict[str, str]] = []
try:
for col in client.get_collections():
col_id = getattr(col, "id", None) or col.to_dict().get("id")
title = (
getattr(col, "title", None) or col.to_dict().get("title") or col_id
)
if col_id:
collections.append({"id": col_id, "title": title})
except Exception as e:
raise RuntimeError(f"Failed to list collections for '{catalog_name}': {e}")
# Cache and return
cache[catalog_name] = collections
return collections
def remove_catalog(self, name: str) -> bool:
"""
Remove a catalog from the registry.
Args:
name: Catalog name to remove
Returns:
True if catalog was removed, False if not found
Note:
Built-in catalogs cannot be removed, only custom ones
"""
if name in BUILTIN_CATALOGS:
logger.warning(f"Cannot remove built-in catalog '{name}'")
return False
if name in self._catalogs:
del self._catalogs[name]
logger.info(f"Removed catalog '{name}' from registry")
return True
return False
__init__(self)
special
¶
Initialize the catalog registry with built-in catalogs.
Source code in geoagent/catalogs/registry.py
def __init__(self):
"""Initialize the catalog registry with built-in catalogs."""
self._catalogs: Dict[str, CatalogInfo] = BUILTIN_CATALOGS.copy()
# Cache for catalog collections: {catalog_name: [{"id": str, "title": str}, ...]}
# Cached per session to avoid repeated network calls
self.__class__._collection_cache = getattr(
self.__class__, "_collection_cache", {}
)
add_catalog(self, name, url, description, requires_auth=False, auth_env_var=None)
¶
Add a custom catalog to the registry.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
name |
str |
Unique catalog name |
required |
url |
str |
STAC API endpoint URL |
required |
description |
str |
Human-readable description |
required |
requires_auth |
bool |
Whether authentication is required |
False |
auth_env_var |
Optional[str] |
Environment variable name for auth token |
None |
Source code in geoagent/catalogs/registry.py
def add_catalog(
self,
name: str,
url: str,
description: str,
requires_auth: bool = False,
auth_env_var: Optional[str] = None,
) -> None:
"""
Add a custom catalog to the registry.
Args:
name: Unique catalog name
url: STAC API endpoint URL
description: Human-readable description
requires_auth: Whether authentication is required
auth_env_var: Environment variable name for auth token
"""
self._catalogs[name] = CatalogInfo(
name=name,
url=url,
description=description,
requires_auth=requires_auth,
auth_env_var=auth_env_var,
)
logger.info(f"Added catalog '{name}' to registry")
get_catalog(self, name)
¶
Get catalog information by name.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
name |
str |
Catalog name |
required |
Returns:
| Type | Description |
|---|---|
Optional[geoagent.catalogs.registry.CatalogInfo] |
CatalogInfo object or None if not found |
Source code in geoagent/catalogs/registry.py
def get_catalog(self, name: str) -> Optional[CatalogInfo]:
"""
Get catalog information by name.
Args:
name: Catalog name
Returns:
CatalogInfo object or None if not found
"""
return self._catalogs.get(name)
get_client(self, name=None)
¶
Get a pystac_client.Client for the specified catalog.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
name |
Optional[str] |
Catalog name. Uses default if None. |
None |
Returns:
| Type | Description |
|---|---|
Client |
Configured pystac_client.Client instance |
Exceptions:
| Type | Description |
|---|---|
ValueError |
If catalog not found |
RuntimeError |
If authentication required but credentials missing |
Source code in geoagent/catalogs/registry.py
def get_client(self, name: Optional[str] = None) -> pystac_client.Client:
"""
Get a pystac_client.Client for the specified catalog.
Args:
name: Catalog name. Uses default if None.
Returns:
Configured pystac_client.Client instance
Raises:
ValueError: If catalog not found
RuntimeError: If authentication required but credentials missing
"""
if name is None:
name = DEFAULT_CATALOG
catalog = self.get_catalog(name)
if not catalog:
available = ", ".join(self._catalogs.keys())
raise ValueError(f"Catalog '{name}' not found. Available: {available}")
# Check authentication
if catalog.requires_auth and catalog.auth_env_var:
if not os.getenv(catalog.auth_env_var):
raise RuntimeError(
f"Catalog '{name}' requires authentication. "
f"Set environment variable: {catalog.auth_env_var}"
)
client_kwargs = {}
# Handle authentication for specific catalogs
if name == "nasa_cmr" and catalog.auth_env_var:
token = os.getenv(catalog.auth_env_var)
if token:
client_kwargs["headers"] = {"Authorization": f"Bearer {token}"}
# Create the client
try:
client = pystac_client.Client.open(catalog.url, **client_kwargs)
# Special handling for Planetary Computer signing
if name == "planetary_computer":
try:
import planetary_computer
# Wrap the client with signing capability
original_search = client.search
def signed_search(*args, **kwargs):
"""Search wrapper that signs items for access."""
search_result = original_search(*args, **kwargs)
# Sign the items in the search result for direct access
search_result.sign = lambda: planetary_computer.sign(
search_result
)
return search_result
client.search = signed_search
logger.info("Planetary Computer signing enabled")
except ImportError:
logger.warning(
"planetary-computer package not available. "
"Some data access may be slower. "
"Install with: pip install planetary-computer"
)
return client
except Exception as e:
raise RuntimeError(f"Failed to connect to catalog '{name}': {str(e)}")
get_collection_index(self, catalog_name='planetary_computer')
¶
Fetch and cache a simple index of collections for a catalog.
The index contains dicts with collection id and title, e.g.: [{"id": "sentinel-2-l2a", "title": "Sentinel-2 Level-2A"}, ...]
Results are cached in-memory for the duration of the session.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
catalog_name |
str |
Name of the catalog as registered in this registry |
'planetary_computer' |
Returns:
| Type | Description |
|---|---|
List[Dict[str, str]] |
List of dicts with keys "id" and "title" |
Source code in geoagent/catalogs/registry.py
def get_collection_index(
self, catalog_name: str = "planetary_computer"
) -> List[Dict[str, str]]:
"""Fetch and cache a simple index of collections for a catalog.
The index contains dicts with collection id and title, e.g.:
[{"id": "sentinel-2-l2a", "title": "Sentinel-2 Level-2A"}, ...]
Results are cached in-memory for the duration of the session.
Args:
catalog_name: Name of the catalog as registered in this registry
Returns:
List of dicts with keys "id" and "title"
"""
# Return cached if available
cache = self.__class__._collection_cache
if catalog_name in cache:
return cache[catalog_name]
client = self.get_client(catalog_name)
collections: List[Dict[str, str]] = []
try:
for col in client.get_collections():
col_id = getattr(col, "id", None) or col.to_dict().get("id")
title = (
getattr(col, "title", None) or col.to_dict().get("title") or col_id
)
if col_id:
collections.append({"id": col_id, "title": title})
except Exception as e:
raise RuntimeError(f"Failed to list collections for '{catalog_name}': {e}")
# Cache and return
cache[catalog_name] = collections
return collections
list_catalogs(self)
¶
List all available catalogs.
Returns:
| Type | Description |
|---|---|
List[geoagent.catalogs.registry.CatalogInfo] |
List of CatalogInfo objects |
Source code in geoagent/catalogs/registry.py
def list_catalogs(self) -> List[CatalogInfo]:
"""
List all available catalogs.
Returns:
List of CatalogInfo objects
"""
return list(self._catalogs.values())
remove_catalog(self, name)
¶
Remove a catalog from the registry.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
name |
str |
Catalog name to remove |
required |
Returns:
| Type | Description |
|---|---|
bool |
True if catalog was removed, False if not found |
Note
Built-in catalogs cannot be removed, only custom ones
Source code in geoagent/catalogs/registry.py
def remove_catalog(self, name: str) -> bool:
"""
Remove a catalog from the registry.
Args:
name: Catalog name to remove
Returns:
True if catalog was removed, False if not found
Note:
Built-in catalogs cannot be removed, only custom ones
"""
if name in BUILTIN_CATALOGS:
logger.warning(f"Cannot remove built-in catalog '{name}'")
return False
if name in self._catalogs:
del self._catalogs[name]
logger.info(f"Removed catalog '{name}' from registry")
return True
return False
get_catalog_client(name=None)
¶
Get a STAC client for the specified catalog.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
name |
Optional[str] |
Catalog name. Uses default if None. |
None |
Returns:
| Type | Description |
|---|---|
Client |
Configured pystac_client.Client instance |
Source code in geoagent/catalogs/registry.py
def get_catalog_client(name: Optional[str] = None) -> pystac_client.Client:
"""
Get a STAC client for the specified catalog.
Args:
name: Catalog name. Uses default if None.
Returns:
Configured pystac_client.Client instance
"""
return _global_registry.get_client(name)
get_collection_index(catalog_name='planetary_computer')
¶
Convenience accessor for a catalog's collection index.
Source code in geoagent/catalogs/registry.py
def get_collection_index(catalog_name: str = "planetary_computer") -> list:
"""Convenience accessor for a catalog's collection index."""
return _global_registry.get_collection_index(catalog_name)
get_registry()
¶
Get the global catalog registry instance.
Source code in geoagent/catalogs/registry.py
def get_registry() -> CatalogRegistry:
"""Get the global catalog registry instance."""
return _global_registry
list_catalogs()
¶
List all available catalogs from the global registry.
Source code in geoagent/catalogs/registry.py
def list_catalogs() -> List[CatalogInfo]:
"""List all available catalogs from the global registry."""
return _global_registry.list_catalogs()