Skip to main content

Documentation Index

Fetch the complete documentation index at: https://mintlify.com/terrafloww/rasteret/llms.txt

Use this file to discover all available pages before exploring further.

create_backend()

from rasteret import create_backend

backend = create_backend(
    credential_provider=None,
    cloud_config=None,
    region=None,
    default_s3_config=None
)
Create an I/O backend for authenticated cloud reads. Pass the result as backend= to data access methods.

Parameters

credential_provider
object
An obstore credential provider, e.g. PlanetaryComputerCredentialProvider, NasaEarthdataCredentialProvider.
cloud_config
CloudConfig
Cloud configuration for S3 URL rewriting and per-bucket overrides.
region
str
Convenience alias for default_s3_config={"region": region}.
default_s3_config
dict[str, str]
Default S3Store config applied to all buckets that don’t have per-bucket overrides (e.g. {"region": "us-west-2"}).

Returns

backend
StorageBackend
A storage backend instance for use with data access methods.

StorageBackend Protocol

class StorageBackend(Protocol):
    async def get_range(self, url: str, start: int, length: int) -> bytes: ...
    
    async def get_ranges(
        self, url: str, ranges: list[tuple[int, int]]
    ) -> list[bytes]: ...
Minimal protocol for range-based reads from cloud storage. Implement this to plug in a custom I/O backend (e.g. obstore, fsspec, or a mocked reader for tests).

Methods

get_range
async method
Fetch a single byte range from a URL.Parameters:
  • url (str): Resource URL
  • start (int): Byte offset
  • length (int): Number of bytes to read
Returns: bytes
get_ranges
async method
Fetch multiple byte ranges from a URL in a single request.Parameters:
  • url (str): Resource URL
  • ranges (list[tuple[int, int]]): List of (start, length) tuples
Returns: list[bytes]

ObstoreBackend

from rasteret.cloud import ObstoreBackend
import obstore as obs

store = obs.store.HTTPStore.from_url("https://example.com/data/")
backend = ObstoreBackend(
    store=store,
    url_prefix="",
    client_options=None
)
StorageBackend backed by the obstore library. Wraps obstore.get_range_async / obstore.get_ranges_async.

Parameters

store
object
required
Any obstore store (S3Store, HTTPStore, etc.).
url_prefix
str
default:""
URL prefix to strip before passing paths to obstore. obstore expects paths relative to the store root, but rasteret works with full URLs. For example, if COG URLs look like https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/... and the store is HTTPStore.from_url("https://sentinel-cogs.s3.us-west-2.amazonaws.com/"), set url_prefix="https://sentinel-cogs.s3.us-west-2.amazonaws.com/".
client_options
dict
ClientConfig options forwarded to HTTPStore.from_url when constructing the store. Ignored if a pre-built store is provided.

Examples

Planetary Computer

import rasteret
from obstore.auth.planetary_computer import PlanetaryComputerCredentialProvider

# Create backend with PC credentials
pc_asset_url = "https://naipeuwest.blob.core.windows.net/naip/v002/"
backend = rasteret.create_backend(
    credential_provider=PlanetaryComputerCredentialProvider(pc_asset_url)
)

# Use with data access
ds = collection.get_xarray(
    geometries=bbox,
    bands=["B04"],
    backend=backend
)

NASA Earthdata

from obstore.auth.earthdata import NasaEarthdataCredentialProvider

# Create backend for NASA LP DAAC
backend = rasteret.create_backend(
    credential_provider=NasaEarthdataCredentialProvider(
        credentials_url="https://data.lpdaac.earthdatacloud.nasa.gov/s3credentials"
    ),
    region="us-west-2"
)

collection = rasteret.build(
    "lpdaac/hlsl30",
    name="my_area",
    bbox=bbox,
    date_range=date_range,
    backend=backend
)

Custom S3 Region

# Access data in a specific region
backend = rasteret.create_backend(
    region="eu-west-1"
)

data = collection.get_numpy(
    geometries=bbox,
    bands=["B04"],
    backend=backend
)

With CloudConfig

from rasteret import CloudConfig

config = CloudConfig(
    provider="aws",
    requester_pays=True,
    region="us-west-2",
    url_patterns={
        "https://landsatlook.usgs.gov/data/": "s3://usgs-landsat/"
    }
)

backend = rasteret.create_backend(
    cloud_config=config
)

collection = rasteret.build(
    "landsat-c2-l2",
    name="landsat_test",
    bbox=bbox,
    date_range=date_range,
    backend=backend
)

Custom ObstoreBackend

import obstore as obs
from rasteret.cloud import ObstoreBackend

# Create custom S3 store
store = obs.store.S3Store(
    bucket="my-bucket",
    config={
        "region": "us-east-1",
        "aws_access_key_id": "...",
        "aws_secret_access_key": "..."
    }
)

backend = ObstoreBackend(
    store=store,
    url_prefix="s3://my-bucket/"
)

data = collection.get_xarray(
    geometries=bbox,
    bands=["B04"],
    backend=backend
)

Custom Backend Implementation

import aiohttp
from typing import Protocol

class MyCustomBackend:
    """Custom backend using aiohttp."""
    
    def __init__(self, session: aiohttp.ClientSession):
        self.session = session
    
    async def get_range(self, url: str, start: int, length: int) -> bytes:
        headers = {"Range": f"bytes={start}-{start + length - 1}"}
        async with self.session.get(url, headers=headers) as resp:
            return await resp.read()
    
    async def get_ranges(
        self, url: str, ranges: list[tuple[int, int]]
    ) -> list[bytes]:
        # Fetch ranges concurrently
        import asyncio
        tasks = [
            self.get_range(url, start, length)
            for start, length in ranges
        ]
        return await asyncio.gather(*tasks)

# Use custom backend
session = aiohttp.ClientSession()
backend = MyCustomBackend(session)

data = collection.get_numpy(
    geometries=bbox,
    bands=["B04"],
    backend=backend
)

Notes

  • Backends enable authenticated access to cloud storage
  • create_backend() returns an auto-configured obstore backend
  • Implement StorageBackend protocol for custom I/O layers
  • Credential providers must be from the obstore.auth package
  • Most built-in datasets work without explicit backends
  • Requester-pays datasets require valid cloud credentials