Skip to content

Querying with SQL

Querying Beacon with SQL is done through a api call to the /api/query endpoint. Every programming language has a different way of making API calls. Below are some examples of how to make an API call using SQL in different programming languages.

Querying a single dataset

python
import requests
import json
import pandas as pd
import io
# Define the API endpoint
url = "http://beacon-host/api/query"

# Define the SQL query and output format
query = {
    "sql": "SELECT TEMP, \"TEMP.units\",PSAL,LONGITUDE,LATITUDE FROM read_netcdf('dataset.nc')",
    "output": {
        "format": "parquet"
    }
}

# Make the API call
response = requests.post(url, json=query)

# Check the response status code
if response.status_code == 200:
    # Parse the response parquet data
    parquet_data = response.content
    # Convert the response content to a BytesIO object
    io_buffer = io.BytesIO(parquet_data)
    # Read the Parquet data into a DataFrame
    df = pd.read_parquet(io_buffer)
    print(df)
else:
    print(f"Error: {response.status_code} - {response.text}")
python
import requests
import json
import pandas as pd
import io
# Define the API endpoint
url = "http://beacon-host/api/query"

# Define the SQL query and output format
query = {
    "sql": "SELECT TEMP,PSAL,LONGITUDE,LATITUDE FROM read_odv_ascii('dataset.txt')",
    "output": {
        "format": "parquet"
    }
}

# Make the API call
response = requests.post(url, json=query)

# Check the response status code
if response.status_code == 200:
    # Parse the response parquet data
    parquet_data = response.content
    # Convert the response content to a BytesIO object
    io_buffer = io.BytesIO(parquet_data)
    # Read the Parquet data into a DataFrame
    df = pd.read_parquet(io_buffer)
    print(df)
else:
    print(f"Error: {response.status_code} - {response.text}")
python
import requests
import json
import pandas as pd
import io
# Define the API endpoint
url = "http://beacon-host/api/query"

# Define the SQL query and output format
query = {
    "sql": "SELECT TEMP,PSAL,LONGITUDE,LATITUDE FROM read_parquet('dataset.parquet')",
    "output": {
        "format": "parquet"
    }
}

# Make the API call
response = requests.post(url, json=query)

# Check the response status code
if response.status_code == 200:
    # Parse the response parquet data
    parquet_data = response.content
    # Convert the response content to a BytesIO object
    io_buffer = io.BytesIO(parquet_data)
    # Read the Parquet data into a DataFrame
    df = pd.read_parquet(io_buffer)
    print(df)
else:
    print(f"Error: {response.status_code} - {response.text}")

Querying multiple datasets

python
import requests
import json
import pandas as pd
import io
# Define the API endpoint
url = "http://beacon-host/api/query"

# Define the SQL query and output format
query = {
    "sql": "SELECT TEMP, \"TEMP.units\",PSAL,LONGITUDE,LATITUDE FROM read_netcdf('*.nc')",
    "output": {
        "format": "parquet"
    }
}

# Make the API call
response = requests.post(url, json=query)

# Check the response status code
if response.status_code == 200:
    # Parse the response parquet data
    parquet_data = response.content
    # Convert the response content to a BytesIO object
    io_buffer = io.BytesIO(parquet_data)
    # Read the Parquet data into a DataFrame
    df = pd.read_parquet(io_buffer)
    print(df)
else:
    print(f"Error: {response.status_code} - {response.text}")
python
import requests
import json
import pandas as pd
import io
# Define the API endpoint
url = "http://beacon-host/api/query"

# Define the SQL query and output format
query = {
    "sql": "SELECT TEMP, \"TEMP.units\",PSAL,LONGITUDE,LATITUDE FROM read_odv_ascii('*.txt')",
    "output": {
        "format": "parquet"
    }
}

# Make the API call
response = requests.post(url, json=query)

# Check the response status code
if response.status_code == 200:
    # Parse the response parquet data
    parquet_data = response.content
    # Convert the response content to a BytesIO object
    io_buffer = io.BytesIO(parquet_data)
    # Read the Parquet data into a DataFrame
    df = pd.read_parquet(io_buffer)
    print(df)
else:
    print(f"Error: {response.status_code} - {response.text}")
python
import requests
import json
import pandas as pd
import io
# Define the API endpoint
url = "http://beacon-host/api/query"

# Define the SQL query and output format
query = {
    "sql": "SELECT TEMP,PSAL,LONGITUDE,LATITUDE FROM read_parquet('*.parquet')",
    "output": {
        "format": "parquet"
    }
}

# Make the API call
response = requests.post(url, json=query)

# Check the response status code
if response.status_code == 200:
    # Parse the response parquet data
    parquet_data = response.content
    # Convert the response content to a BytesIO object
    io_buffer = io.BytesIO(parquet_data)
    # Read the Parquet data into a DataFrame
    df = pd.read_parquet(io_buffer)
    print(df)
else:
    print(f"Error: {response.status_code} - {response.text}")