Skip to main content

Search and Download Data Through Catalog API

The following notebook provides some usage example of data visualization and download from the STAC catalog. If you would like to have a deeper understanding about the STAC catalog, collections and products, you can refer to the official ESA STAC API guide. The following examples are written in Python.

Catalog data visualization

Here you will understand how to perform search on the STAC catalog through the STAC API interface. The catalog can also be visualized here via browser. 

import requests
import pandas as pd
import json
from typing import Any, Dict
from pystac_client import Client
from IPython.display import Markdown as md

URL_LANDING_PAGE = "https://catalog.maap.eo.esa.int/catalogue/"
api = Client.open(URL_LANDING_PAGE) # Connection to the catalog

There are different ways you can search for a specific collection within the catalog. Some usage examples are reported below.

Search by free text
from pystac_client import Client, ConformanceClasses 
import urllib

value = 'Biomass' # Search for the word "biomass" within the products
params = { 'q': value } 
URL = f'{URL_LANDING_PAGE}collections?{urllib.parse.urlencode(params)}'

response = requests.get(URL)
data = json.loads(response.text)
df = pd.json_normalize(data, record_path=['collections'])
df[['id']] # Display the id of the collection

image.png

Search by title
value = 'Biomass Simulated data'
params = { 'filter': "title='" + value + "'"} 
URL = f'{URL_LANDING_PAGE}collections?{urllib.parse.urlencode(params)}'

response = requests.get(URL)
data = json.loads(response.text)
df = pd.json_normalize(data, record_path=['collections'])

df[['id', 'title']]

image.png

Search by platform
URL = URL_LANDING_PAGE + "collections"+ "?filter=platform='BIOMASS'"

response = requests.get(URL)
data = json.loads(response.text)

df = pd.json_normalize(data, record_path=['collections'])
df[['title', 'summaries.platform']]

image.png

Search by organization
URL = URL_LANDING_PAGE + "collections"+ "?filter=organisationName='ESA/ESRIN'"

df = pd.json_normalize(data, record_path=['collections'])
df[['title', 'providers']]

image.png

Search by bounding box
URL = URL_LANDING_PAGE + "collections"+ "?bbox=14.90,37.700,14.99,37.780" # Longitude, Latitude

response = requests.get(URL)
data = json.loads(response.text)
df = pd.json_normalize(data, record_path=['collections'])
df[['id', 'extent.spatial.bbox']]

image.png

Search by temporal extent
URL = URL_LANDING_PAGE + "collections"+ "?datetime=" + '2020-01-01T00:00:00.000Z/2024-12-31T23:59:59.999Z'

response = requests.get(URL)
data = json.loads(response.text)

df = pd.json_normalize(data, record_path=['collections'])
df[['id', 'extent.temporal.interval']]

image.png

Search by geometry
from pystac_client import Client 

URL_LANDING_PAGE = "https://catalog.maap.eo.esa.int/catalogue/"
api = Client.open(URL_LANDING_PAGE) 

aoi_as_dict: Dict[str, Any] = {
    "type": "Polygon",
    "coordinates": [
      [
        [
        112.82476,
        -2.66676
        ],
        [
        112.291824,
        -2.778783
        ],
        [
        112.409676,
        -3.33663
        ],
        [
        112.94324,
        -3.224744
        ],
        [
        112.82476,
        -2.66676
        ]
      ]
    ]
}

results = api.search(
    method = 'GET',         
    max_items = 5, # Maximum number of granules to take
    collections = 'BiomassSimulated', # Search for granules belonging to collection ID (e.g. BiomassLevel1cIOC, BiomassAux...)
    intersects = aoi_as_dict,
    datetime = ['2015-01-01T00:00:00Z', '2020-01-02T00:00:00Z'] # Search for granules in date range
)

print(f'{len(results.item_collection_as_dict()['features'])} granules found')
Search by bounding box
results = api.search(
    method = 'GET',   
    max_items = 10, # Maximum number of granules to take
    collections = 'BiomassSimulated', # Search for granules belonging to collection ID (e.g. BiomassLevel1cIOC, BiomassAux...)
    bbox = [112.291824, -3.33663, 112.94324, -2.66676], 
    # datetime = ['2015-01-01T00:00:00Z', '2020-01-02T00:00:00Z'] # Search for granules in date range
)

print(f'{len(results.item_collection_as_dict()['features'])} granules found')
Search by temporal extent
results = api.search(
    method = 'GET',   
    max_items = 50, # Maximum number of granules to take
    collections = 'BiomassSimulated', # Search for granules belonging to collection ID (e.g. BiomassLevel1cIOC, BiomassAux...)
    datetime = ['2017-01-01T00:00:00Z', '2017-12-02T00:00:00Z'] # Search for granules in date range
)

print(f'{len(results.item_collection_as_dict()['features'])} granules found')
Search by identifier
product_id = ['BIO_S1_DGM__1S_20170101T222105_20170101T222114_I_G01_M01_C01_T010_F308_01_D6K4O3',
             'BIO_S1_DGM__1S_20170101T222150_20170101T222211_I_G01_M01_C01_T011_F001_01_D5U6F7'] # Insert one or more product id to search

results = api.search(
    method = 'GET',   
    collections = 'BiomassSimulated',
    ids = product_id
)

print(f'{len(results.item_collection_as_dict()['features'])} granules found')
Search with filter
results = api.search(
    method = 'GET',   
    max_items = 10, # Maximum number of granules to take
    collections = 'BiomassSimulated',
    # bbox = [112.291824, -3.33663, 112.94324, -2.66676], 
    # datetime = ['2015-01-01T00:00:00Z', '2020-01-02T00:00:00Z'] # Search for granules in date range
    filter="productType='S1_DGM__1S' and instrument='P-SAR'" # Many other filters can be applied 
)

print(f'{len(results.item_collection_as_dict()['features'])} granules found')

Catalog Data Download

Now that you have seen how to search for collections and products, let's see how you can download data from catalog directly via code.

Token to access the catalog

To download data from the STAC catalog directly via code, you need to generate an access token to pass into the client. This token can be generated by following this page and it will last for 90 days. Then, you will be asked to log in with your credentials to generate a new token.

It is highly recommended to create a .txt file (e.g. credentials.txt) containing all the authentication information that you need. In Coding, you can create a new .txt file by clicking on "Text File" from the Launcher:

bjAuntitled-design-6.png

The credentials.txt file should be located in the same directory as the script.

Now you can populate your file with your keys and tokens in this format:

CLIENT_ID=offline-token
CLIENT_SECRET=p1eL7uonXs6MDxtGbgKdPVRAmnGxHpVE
OFFLINE_TOKEN=your_esamaap_longlasting_token_here

You only have to change OFFLINE_TOKEN, pasting your long lasting token generated before. The other two entries should be kept as they are in the above example.

Be careful to not use quotes around the values and ensure there are no trailing spaces or extra characters.

Example Code

Below you can find a sample of code that shows you how to use your token. 

Let's consider the case in which (for example) you want to download all the products that satisfy your search requirements:

  • Belong to the collection "BiomassLevel1aIOC"
  • Acquisition date between 2025-01-01 and 2025-12-31 (YYYY-MM-dd)
  • With bounding box [-57.099045, -20.350586, -56.448875, -19.722197]
import os
import requests
from pathlib import Path
from pystac_client import Client

# --- Path to credentials.txt --- 
CREDENTIALS_FILE = Path('path/to/credentials.txt').resolve().parent / "credentials.txt"   # Insert the .txt path

def load_credentials(file_path=CREDENTIALS_FILE):
    """Read key-value pairs from a credentials file into a dictionary."""
    creds = {}
    if not file_path.exists():
        raise FileNotFoundError(f"Credentials file not found: {file_path}")
    with open(file_path, "r") as f:
        for line in f:
            line = line.strip()
            if not line or line.startswith("#"):
                continue
            if "=" not in line:
                continue
            key, value = line.split("=", 1)
            creds[key.strip()] = value.strip()
    return creds


# --- ESA MAAP API ---

def get_token():
    """Use OFFLINE_TOKEN to fetch a short-lived access token."""
    creds = load_credentials()

    OFFLINE_TOKEN = creds.get("OFFLINE_TOKEN")
    CLIENT_ID = creds.get("CLIENT_ID")
    CLIENT_SECRET = creds.get("CLIENT_SECRET")
    print(CLIENT_SECRET)

    if not all([OFFLINE_TOKEN, CLIENT_ID, CLIENT_SECRET]):
        raise ValueError("Missing OFFLINE_TOKEN, CLIENT_ID, or CLIENT_SECRET in credentials file")

    url = "https://iam.maap.eo.esa.int/realms/esa-maap/protocol/openid-connect/token"
    data = {
        "client_id": CLIENT_ID,
        "client_secret": CLIENT_SECRET,
        "grant_type": "refresh_token",
        "refresh_token": OFFLINE_TOKEN,
        "scope": "offline_access openid"
    }

    response = requests.post(url, data=data)
    response.raise_for_status()

    response_json = response.json()
    access_token = response_json.get('access_token')

    if not access_token:
        raise RuntimeError("Failed to retrieve access token from IAM response")

    return access_token




if __name__ == "__main__":
 

    URL_LANDING_PAGE = "https://catalog.maap.eo.esa.int/catalogue/"
    api = Client.open(URL_LANDING_PAGE) # Connection to the catalog
    
    results = api.search(
        method = 'GET',   
        max_items = 20, # Maximum number of granules to take
        collections = 'BiomassLevel1aIOC',
        bbox = [-57.099045, -20.350586, -56.448875, -19.722197],
        datetime = ['2025-01-01T00:00:00Z', '2025-12-31T00:00:00Z'] # Search for granules in date range 
    )
    
    print(f'{len(results.item_collection_as_dict()['features'])} granules found')

    access_token = get_token()

    data = results.item_collection_as_dict()
    n_products = 2 # Number of products to download
    
    for n in range(0,n_products):
    
        file_url = data['features'][n]['assets']['product']['href']
        file_path = "./my_products/" + data['features'][n]['assets']['product']['file:local_path'] # Change ./my_products/ with your desired path to download data
          
        if access_token:
          print("Access token verified!")
        else:
          print("Failed to retrieve access token.")
          exit(2)
              
        try:
          headers = {"Authorization": f"Bearer {access_token}"}
          response = requests.get(file_url, headers=headers, stream=True)
          response.raise_for_status()  # Raise an exception for bad status codes
         
          with open(file_path, "wb") as f:
            for chunk in response.iter_content(chunk_size=8192):
              f.write(chunk)
         
          print(f"File downloaded successfully to {file_path}")
          print('')
         
        except requests.exceptions.RequestException as e:
          print(f"Error downloading file: {e}")
          print('')