"""
Raster clipping module with OpenEO integration.
Provides functionality to clip raster data by vector geometries using OpenEO backend.
"""
from typing import Optional
import openeo
from satorbis_kit.constants import Constants
[docs]
def clip_raster(
raster_s3_path: str,
vector_s3_path: str,
output_s3_path: str,
compress: str = "LZW",
all_touched: bool = True,
crop: bool = True,
job_title: Optional[str] = None,
wait_for_completion: bool = False,
) -> openeo.rest.job.BatchJob:
"""
Clip raster data using vector geometries with OpenEO backend.
This function clips (masks) a raster file using the boundaries of a vector file
(shapefile, GeoJSON, etc.). It uses the OpenEO backend for processing and supports
S3 storage for input and output files.
Args:
raster_s3_path (str): S3 URI to input raster file to clip.
Example: "s3://bucket/input.tif"
vector_s3_path (str): S3 URI to vector file containing clip boundaries.
Example: "s3://bucket/boundary.shp"
output_s3_path (str): S3 URI for clipped output raster.
Example: "s3://bucket/clipped_output.tif"
compress (str, optional): Compression method for output (LZW, DEFLATE, NONE).
Defaults to "LZW".
all_touched (bool, optional): If True, all pixels touched by geometries are
included. If False, only pixels whose center is within the geometry.
Defaults to True.
crop (bool, optional): If True, crop output to the extent of the geometry.
If False, keep original raster extent but mask values. Defaults to True.
job_title (str, optional): Custom job title. If None, auto-generated
from input filename.
wait_for_completion (bool, optional): If True, blocks until job completes.
If False, returns immediately after starting. Defaults to False.
Returns:
openeo.rest.job.BatchJob: OpenEO batch job object that can be monitored
and managed. Use job.status() to check status.
Raises:
ConnectionError: If unable to connect to OpenEO backend.
ValueError: If S3 paths are invalid or CRS mismatch between raster and vector.
Examples:
Basic raster clipping:
>>> from satorbis_kit.raster import clip_raster
>>> job = clip_raster(
... raster_s3_path="s3://satimg-to-be-deleted/Sentinel2/TCX8021/20251201_091002001000000_TCX8021.tif",
... vector_s3_path="s3://satsure-immutables/ToDB/SOIShp/district/districtSFS/091002001000000.shp",
... output_s3_path="s3://satsure-products-testing/clipped/20251201_091002001000000_TCX8021.tif"
... )
>>> print(job.status())
Clip without cropping extent:
>>> job = clip_raster(
... raster_s3_path="s3://bucket/input.tif",
... vector_s3_path="s3://bucket/mask.shp",
... output_s3_path="s3://bucket/masked_output.tif",
... crop=False,
... all_touched=False,
... wait_for_completion=True
... )
>>> print(f"Clipping complete: {job.status()}")
Custom compression and wait for completion:
>>> job = clip_raster(
... raster_s3_path="s3://bucket/large_raster.tif",
... vector_s3_path="s3://bucket/region.geojson",
... output_s3_path="s3://bucket/clipped_region.tif",
... compress="DEFLATE",
... job_title="Agricultural Area Extraction",
... wait_for_completion=True
... )
Using OpenEO directly:
>>> import openeo
>>> from satorbis_kit.constants import Constants
>>> con = openeo.connect(Constants.DEV_URL)
>>> cube = con.datacube_from_process(
... process_id="clip_raster",
... raster_s3_path="s3://bucket/input.tif",
... vector_s3_path="s3://bucket/boundary.shp",
... output_s3_path="s3://bucket/clipped.tif",
... compress="LZW",
... all_touched=True,
... crop=True
... )
>>> job = cube.create_job(title="Clip Raster Job")
>>> job.start_and_wait()
Note:
- Requires AWS credentials configured for S3 access
- OpenEO backend accessible via Constants.DEV_URL
- Raster and vector files must have compatible CRS (or will be reprojected)
- Vector file can be shapefile, GeoJSON, GeoPackage, etc.
- all_touched=True is useful for extracting complete features
- crop=True reduces output file size by removing areas outside boundaries
"""
# Connect to OpenEO
con = openeo.connect(
Constants.DEV_URL,
)
# Create datacube from clip raster process
cube = con.datacube_from_process(
process_id="clip_raster",
raster_s3_path=raster_s3_path,
vector_s3_path=vector_s3_path,
output_s3_path=output_s3_path,
compress=compress,
all_touched=all_touched,
crop=crop,
)
# Create job with descriptive title
if job_title is None:
job_title = f"Clip Raster Job - {raster_s3_path.split('/')[-1]}"
job = cube.create_job(title=job_title)
# Start job and optionally wait
if wait_for_completion:
job.start_and_wait()
else:
job.start()
return job