Query Reference

Source code in beacon_api/query.py

class Query:
    def __init__(self, http_session: BaseBeaconSession, from_table: Optional[str] = None, from_file_path: Optional[str] = None):
        """
        A class to build and run Beacon JSON Queries. Best to construct this object using the Client object or Table object.
        """
        self.http_session = http_session
        self.from_table = from_table
        self.from_file_path = from_file_path

    def select(self, selects: List[Select]) -> Self:
        self.selects = selects
        return self

    def add_select(self, select: Select) -> Self:
        if not hasattr(self, "selects"):
            self.selects = []
        self.selects.append(select)
        return self

    def add_selects(self, selects: List[Select]) -> Self:
        """Adds multiple select statements to the query.

        Args:
            selects (list[Select]): The select statements to add.

        Returns:
            Self: The query builder instance.
        """
        if not hasattr(self, "selects"):
            self.selects = []
        self.selects.extend(selects)
        return self

    def add_select_column(self, column: str, alias: Optional[str] = None) -> Self:
        """Adds a select column to the query.

        Args:
            column (str): The name of the column to select.
            alias (str | None, optional): An optional alias for the column. Defaults to None.

        Returns:
            Self: The query builder instance.
        """
        if not hasattr(self, "selects"):
            self.selects = []
        self.selects.append(SelectColumn(column=column, alias=alias))
        return self

    def add_select_columns(self, columns: List[Tuple[str, Optional[str]]]) -> Self:
        """Adds multiple select columns to the query.

        Args:
            columns (List[Tuple[str, Optional[str]]]): A list of tuples containing column names and their aliases.

        Returns:
            Self: The query builder instance.
        """
        if not hasattr(self, "selects"):
            self.selects = []
        for column, alias in columns:
            self.selects.append(SelectColumn(column=column, alias=alias))
        return self

    def add_select_coalesced(self, mergeable_columns: List[str], alias: str) -> Self:
        """Adds a coalesced select to the query.

        Args:
            mergeable_columns (list[str]): The columns to merge.
            alias (str): The alias for the merged column.

        Returns:
            Self: The query builder instance.
        """
        if not hasattr(self, "selects"):
            self.selects = []

        function_call = SelectFunction("coalesce", args=[SelectColumn(column=col) for col in mergeable_columns], alias=alias)
        self.selects.append(function_call)
        return self

    def filter(self, filters: List[Filter]) -> Self:
        """Adds filters to the query.

        Args:
            filters (list[Filter]): The filters to add.

        Returns:
            Self: The query builder instance.
        """
        self.filters = filters
        return self

    def add_filter(self, filter: Filter) -> Self:
        """Adds a filter to the query.

        Args:
            filter (Filter): The filter to add.

        Returns:
            Self: The query builder instance.
        """
        if not hasattr(self, "filters"):
            self.filters = []
        self.filters.append(filter)
        return self

    def add_bbox_filter(
        self,
        longitude_column: str,
        latitude_column: str,
        bbox: Tuple[float, float, float, float],
    ) -> Self:
        """Adds a bounding box filter to the query.

        Args:
            longitude_column (str): The name of the column for longitude.
            latitude_column (str): The name of the column for latitude.
            bbox (tuple[float, float, float, float]): The bounding box coordinates (min_lon, max_lon, min_lat, max_lat).

        Returns:
            Self: The query builder instance.
        """
        if not hasattr(self, "filters"):
            self.filters = []
        self.filters.append(
            AndFilter(
                filters=[
                    RangeFilter(column=longitude_column, gt_eq=bbox[0]),
                    RangeFilter(column=longitude_column, lt_eq=bbox[2]),
                    RangeFilter(column=latitude_column, gt_eq=bbox[1]),
                    RangeFilter(column=latitude_column, lt_eq=bbox[3]),
                ]
            )
        )
        return self

    def add_polygon_filter(self, longitude_column: str, latitude_column: str, polygon: List[Tuple[float, float]]) -> Self:
        """Adds a POLYGON filter to the query.

        Args:
            longitude_column (str): The name of the column for longitude.
            latitude_column (str): The name of the column for latitude.
            polygon (list[tuple[float, float]]): A list of (longitude, latitude) tuples defining the polygon.

        Returns:
            Self: The query builder instance.
        """
        if not hasattr(self, "filters"):
            self.filters = []
        self.filters.append(PolygonFilter(longitude_column=longitude_column, latitude_column=latitude_column, polygon=polygon))
        return self

    def add_range_filter(
        self,
        column: str,
        gt_eq: Union[str, int, float, datetime, None] = None,
        lt_eq: Union[str, int, float, datetime, None] = None,
    ) -> Self:
        """Adds a RANGE filter to the query.

        Args:
            column (str): The name of the column to filter.
            gt_eq (str | int | float | datetime | None, optional): The lower bound for the range filter. Defaults to None.
            lt_eq (str | int | float | datetime | None, optional): The upper bound for the range filter. Defaults to None.

        Returns:
            Self: The query builder instance.
        """
        if not hasattr(self, "filters"):
            self.filters = []
        self.filters.append(RangeFilter(column=column, gt_eq=gt_eq, lt_eq=lt_eq))
        return self

    def add_equals_filter(
        self, column: str, eq: Union[str, int, float, bool, datetime]
    ) -> Self:
        """Adds an EQUALS filter to the query.

        Args:
            column (str): The name of the column to filter.
            eq (str | int | float | bool | datetime): The value to compare against.

        Returns:
            Self: The query builder instance.
        """
        if not hasattr(self, "filters"):
            self.filters = []
        self.filters.append(EqualsFilter(column=column, eq=eq))
        return self

    def add_not_equals_filter(
        self, column: str, neq: Union[str, int, float, bool, datetime]
    ) -> Self:
        """Adds a NOT EQUALS filter to the query.

        Args:
            column (str): The name of the column to filter.
            neq (str | int | float | bool | datetime): The value to compare against.

        Returns:
            Self: The query builder instance.
        """

        if not hasattr(self, "filters"):
            self.filters = []
        self.filters.append(NotEqualsFilter(column=column, neq=neq))
        return self

    def add_is_null_filter(self, column: str) -> Self:
        """Adds an IS NULL filter to the query.

        Args:
            column (str): The name of the column to filter.

        Returns:
            Self: The query builder instance.
        """
        if not hasattr(self, "filters"):
            self.filters = []
        self.filters.append(FilterIsNull(column=column))
        return self

    def add_is_not_null_filter(self, column: str) -> Self:
        """Adds an IS NOT NULL filter to the query.

        Args:
            column (str): The name of the column to filter.

        Returns:
            Self: The query builder instance.
        """
        if not hasattr(self, "filters"):
            self.filters = []
        self.filters.append(IsNotNullFilter(column=column))
        return self

    def set_output(self, output: Output) -> Self:
        """Sets the output format for the query.

        Args:
            output (Output): The output format to use.

        Returns:
            Self: The query builder instance.
        """
        self.output = output
        return self

    def compile_query(self) -> str:
        """Compiles the query into a Beacon JSON Query.

        Raises:
            ValueError: If the query is invalid.
            ValueError: If the query is invalid.
            TypeError: If the query is invalid.

        Returns:
            str: The compiled query as a JSON string.
        """
        # Check if from_table is set
        from_ = None
        if not self.from_table and not self.from_file_path:
            from_ = "default"
        elif self.from_table and self.from_file_path:
            raise ValueError("Cannot set both from_table and from_file_path")
        elif self.from_file_path:
            from_ = self.from_file_path
        else:
            from_ = self.from_table

        # Check if output is set
        if not hasattr(self, "output"):
            raise ValueError("Output must be set before compiling the query")

        # Check if selects are set
        if not hasattr(self, "selects"):
            raise ValueError("Selects must be set before compiling the query")

        query = {
            "from": from_,
            "select": (
                [s.to_dict() for s in self.selects] if hasattr(self, "selects") else []
            ),
            "filters": (
                [f.to_dict() for f in self.filters] if hasattr(self, "filters") else []
            ),
            "output": self.output.to_dict() if hasattr(self, "output") else {},
        }

        # Convert datetime objects to ISO format strings
        # This is necessary for JSON serialization
        def datetime_converter(o):
            if isinstance(o, datetime):
                return o.strftime("%Y-%m-%dT%H:%M:%S.%f")
            raise TypeError(f"Type {type(o)} not serializable")

        return json.dumps(query, default=datetime_converter)

    def run(self) -> Response:
        """Run the query and return the response"""
        query = self.compile_query()
        print(f"Running query: {query}")
        response = self.http_session.post("/api/query", data=query)
        if response.status_code != 200:
            raise Exception(f"Query failed: {response.text}")
        if len(response.content) == 0:
            raise Exception("Query returned no content")
        return response

    def explain(self) -> dict:
        """Get the query plan"""
        query = self.compile_query()
        response = self.http_session.post("/api/explain-query", data=query)
        if response.status_code != 200:
            raise Exception(f"Explain query failed: {response.text}")
        return response.json()

    def explain_visualize(self):
        """Visualize the query plan using networkx and matplotlib"""

        try: 
            import networkx as nx
            import matplotlib.pyplot as plt
        except ImportError as e:
            raise ImportError(
                "This function requires `networkx` and `matplotlib`. Install with `pip install beacon-api[profiling]`."
            ) from e

        plan_json = self.explain()
        # Extract the root plan node
        root_plan = plan_json[0]["Plan"]

        # === Step 2: Build a directed graph ===
        G = nx.DiGraph()

        def make_label(node):
            """Build a multi‐line label from whichever fields are present."""
            parts = [node.get("Node Type", "<unknown>")]
            for field in (
                "File Type",
                "Options",
                "Condition",
                "Output URL",
                "Expressions",
                "Output",
                "Filter",
            ):
                if field in node and node[field]:
                    parts.append(f"{field}: {node[field]}")
            return "\n".join(parts)

        def add_nodes(node, parent_id=None):
            nid = id(node)
            G.add_node(nid, label=make_label(node))
            if parent_id is not None:
                G.add_edge(parent_id, nid)
            for child in node.get("Plans", []):
                add_nodes(child, nid)

        add_nodes(root_plan)

        try:
            pos = nx.nx_agraph.graphviz_layout(G, prog="dot")
        except Exception:
            pos = nx.spring_layout(G)

        plt.figure(figsize=(8, 6))
        labels = nx.get_node_attributes(G, "label")
        nx.draw(G, pos, labels=labels, with_labels=True, node_size=2000, font_size=8)
        plt.title("Beacon Query Plan Visualization")
        plt.tight_layout()
        plt.show()

    def to_netcdf(self, filename: str, build_nc_local: bool = True):
        """Export the query result to a NetCDF file
        Args:
            filename (str): The name of the output NetCDF file.
            build_nc_local (bool): 
                If True, build the NetCDF file locally using pandas and xarray. (This is likely faster in most cases.)
                If False, use the server to build the NetCDF file.
        """
        # If build_nc_local is True, we will build the NetCDF file locally
        if build_nc_local:
            df = self.to_pandas_dataframe()
            xdf = df.to_xarray()
            xdf.to_netcdf(filename, mode="w")
        # If build_nc_local is False, we will use the server to build the NetCDF
        else:
            self.set_output(NetCDF())
            response = self.run()
            with open(filename, "wb") as f:
                # Write the content of the response to a file
                for chunk in response.iter_content(chunk_size=1024 * 1024):
                    if chunk:  # skip keep-alive chunks
                        f.write(chunk)

    def to_arrow(self, filename: str):
        """
        Converts the query result to Apache Arrow format and writes it to a file.

        Args:
            filename (str): The path to the file where the Arrow-formatted data will be saved.

        Returns:
            None

        Side Effects:
            Writes the Arrow-formatted response content to the specified file.
        """
        self.set_output(Arrow())
        response = self.run()

        with open(filename, "wb") as f:
            # Write the content of the response to a file
            for chunk in response.iter_content(chunk_size=1024 * 1024):
                if chunk:  # skip keep-alive chunks
                    f.write(chunk)

    def to_parquet(self, filename: str, streaming_chunk_size: int = 1024 * 1024):
        """
        Exports the query results to a Parquet file.

        This method sets the output format to Parquet, executes the query, and writes the resulting data to the specified file.

        Args:
            filename (str): The path to the file where the Parquet data will be saved.

        Returns:
            None
        """
        self.set_output(Parquet())
        response = self.run()

        with open(filename, "wb") as f:
            # Write the content of the response to a file
            for chunk in response.iter_content(chunk_size=streaming_chunk_size):
                if chunk:  # skip keep-alive chunks
                    f.write(chunk)

    def to_geoparquet(self, filename: str, longitude_column: str, latitude_column: str, streaming_chunk_size: int = 1024 * 1024):
        """
        Exports the query results to a GeoParquet file.

        Args:
            filename (str): The path to the file where the GeoParquet data will be saved.
            longitude_column (str): The name of the column representing longitude.
            latitude_column (str): The name of the column representing latitude.
        """
        self.set_output(GeoParquet(longitude_column=longitude_column, latitude_column=latitude_column))
        response = self.run()

        with open(filename, "wb") as f:
            # Write the content of the response to a file
            for chunk in response.iter_content(chunk_size=streaming_chunk_size):
                if chunk:  # skip keep-alive chunks
                    f.write(chunk)

    def to_csv(self, filename: str, streaming_chunk_size: int = 1024 * 1024):
        """Exports the query results to a CSV file.

        Args:
            filename (str): The path to the file where the CSV data will be saved.
        """
        self.set_output(CSV())
        response = self.run()

        with open(filename, "wb") as f:
            # Write the content of the response to a file
            for chunk in response.iter_content(chunk_size=streaming_chunk_size):
                if chunk:  # skip keep-alive chunks
                    f.write(chunk)

    def to_zarr(self, filename: str):
        """Exports the query results to a Zarr file.

        Args:
            filename (str): The path to the file where the Zarr data will be saved.
        """

        try:
            import zarr # just to check if zarr is installed
        except ImportError as e:
            raise ImportError(
                "This function requires `zarr`. Install with `pip install beacon-api[zarr]`."
            ) from e

        # Read to pandas dataframe first
        df = self.to_pandas_dataframe()
        # Convert to Zarr format
        xdf = df.to_xarray()
        xdf.to_zarr(filename, mode="w")

    def to_pandas_dataframe(self) -> pd.DataFrame:
        """Converts the query results to a pandas DataFrame.

        Returns:
            pd.DataFrame: The query results as a pandas DataFrame.
        """
        self.set_output(Parquet())
        response = self.run()
        bytes_io = BytesIO(response.content)

        df = pd.read_parquet(bytes_io)
        return df

    def to_geo_pandas_dataframe(self, longitude_column: str, latitude_column: str, crs: str = "EPSG:4326") -> gpd.GeoDataFrame:
        """Converts the query results to a GeoPandas GeoDataFrame.

        Args:
            longitude_column (str): The name of the column representing longitude.
            latitude_column (str): The name of the column representing latitude.
            crs (str, optional): The coordinate reference system to use. Defaults to "EPSG:4326".

        Returns:
            gpd.GeoDataFrame: The query results as a GeoPandas GeoDataFrame.
        """

        try:
            import geopandas as gpd
        except ImportError as e:
            raise ImportError(
                "This function requires `geopandas`. Install with `pip install beacon-api[geopandas]`."
            ) from e

        self.set_output(GeoParquet(longitude_column=longitude_column, latitude_column=latitude_column))
        response = self.run()
        bytes_io = BytesIO(response.content)
        # Read into parquet arrow table 
        table = pq.read_table(bytes_io)

        gdf = gpd.GeoDataFrame.from_arrow(table)
        gdf.set_crs(crs, inplace=True)
        return gdf

    def to_odv(self, odv_output: Odv, filename: str):
        """Exports the query results to an ODV file.

        Args:
            odv_output (Odv): The ODV output format to use.
            filename (str): The path to the file where the ODV data will be saved.
        """
        self.set_output(odv_output)
        response = self.run()
        with open(filename, "wb") as f:
            # Write the content of the response to a file
            f.write(response.content)

`init(http_session, from_table=None, from_file_path=None)`

A class to build and run Beacon JSON Queries. Best to construct this object using the Client object or Table object.

Source code in beacon_api/query.py

def __init__(self, http_session: BaseBeaconSession, from_table: Optional[str] = None, from_file_path: Optional[str] = None):
    """
    A class to build and run Beacon JSON Queries. Best to construct this object using the Client object or Table object.
    """
    self.http_session = http_session
    self.from_table = from_table
    self.from_file_path = from_file_path

`add_bbox_filter(longitude_column, latitude_column, bbox)`

Adds a bounding box filter to the query.

Args: longitude_column (str): The name of the column for longitude. latitude_column (str): The name of the column for latitude. bbox (tuple[float, float, float, float]): The bounding box coordinates (min_lon, max_lon, min_lat, max_lat).

Returns: Self: The query builder instance.

Source code in beacon_api/query.py

def add_bbox_filter(
    self,
    longitude_column: str,
    latitude_column: str,
    bbox: Tuple[float, float, float, float],
) -> Self:
    """Adds a bounding box filter to the query.

    Args:
        longitude_column (str): The name of the column for longitude.
        latitude_column (str): The name of the column for latitude.
        bbox (tuple[float, float, float, float]): The bounding box coordinates (min_lon, max_lon, min_lat, max_lat).

    Returns:
        Self: The query builder instance.
    """
    if not hasattr(self, "filters"):
        self.filters = []
    self.filters.append(
        AndFilter(
            filters=[
                RangeFilter(column=longitude_column, gt_eq=bbox[0]),
                RangeFilter(column=longitude_column, lt_eq=bbox[2]),
                RangeFilter(column=latitude_column, gt_eq=bbox[1]),
                RangeFilter(column=latitude_column, lt_eq=bbox[3]),
            ]
        )
    )
    return self

`add_equals_filter(column, eq)`

Adds an EQUALS filter to the query.

Args: column (str): The name of the column to filter. eq (str | int | float | bool | datetime): The value to compare against.

Returns: Self: The query builder instance.

Source code in beacon_api/query.py

def add_equals_filter(
    self, column: str, eq: Union[str, int, float, bool, datetime]
) -> Self:
    """Adds an EQUALS filter to the query.

    Args:
        column (str): The name of the column to filter.
        eq (str | int | float | bool | datetime): The value to compare against.

    Returns:
        Self: The query builder instance.
    """
    if not hasattr(self, "filters"):
        self.filters = []
    self.filters.append(EqualsFilter(column=column, eq=eq))
    return self

`add_filter(filter)`

Adds a filter to the query.

Args: filter (Filter): The filter to add.

Returns: Self: The query builder instance.

Source code in beacon_api/query.py

def add_filter(self, filter: Filter) -> Self:
    """Adds a filter to the query.

    Args:
        filter (Filter): The filter to add.

    Returns:
        Self: The query builder instance.
    """
    if not hasattr(self, "filters"):
        self.filters = []
    self.filters.append(filter)
    return self

`add_is_not_null_filter(column)`

Adds an IS NOT NULL filter to the query.

Args: column (str): The name of the column to filter.

Returns: Self: The query builder instance.

Source code in beacon_api/query.py

def add_is_not_null_filter(self, column: str) -> Self:
    """Adds an IS NOT NULL filter to the query.

    Args:
        column (str): The name of the column to filter.

    Returns:
        Self: The query builder instance.
    """
    if not hasattr(self, "filters"):
        self.filters = []
    self.filters.append(IsNotNullFilter(column=column))
    return self

`add_is_null_filter(column)`

Adds an IS NULL filter to the query.

Args: column (str): The name of the column to filter.

Returns: Self: The query builder instance.

Source code in beacon_api/query.py

def add_is_null_filter(self, column: str) -> Self:
    """Adds an IS NULL filter to the query.

    Args:
        column (str): The name of the column to filter.

    Returns:
        Self: The query builder instance.
    """
    if not hasattr(self, "filters"):
        self.filters = []
    self.filters.append(FilterIsNull(column=column))
    return self

`add_not_equals_filter(column, neq)`

Adds a NOT EQUALS filter to the query.

Args: column (str): The name of the column to filter. neq (str | int | float | bool | datetime): The value to compare against.

Returns: Self: The query builder instance.

Source code in beacon_api/query.py

def add_not_equals_filter(
    self, column: str, neq: Union[str, int, float, bool, datetime]
) -> Self:
    """Adds a NOT EQUALS filter to the query.

    Args:
        column (str): The name of the column to filter.
        neq (str | int | float | bool | datetime): The value to compare against.

    Returns:
        Self: The query builder instance.
    """

    if not hasattr(self, "filters"):
        self.filters = []
    self.filters.append(NotEqualsFilter(column=column, neq=neq))
    return self

`add_polygon_filter(longitude_column, latitude_column, polygon)`

Adds a POLYGON filter to the query.

Args: longitude_column (str): The name of the column for longitude. latitude_column (str): The name of the column for latitude. polygon (list[tuple[float, float]]): A list of (longitude, latitude) tuples defining the polygon.

Returns: Self: The query builder instance.

Source code in beacon_api/query.py

def add_polygon_filter(self, longitude_column: str, latitude_column: str, polygon: List[Tuple[float, float]]) -> Self:
    """Adds a POLYGON filter to the query.

    Args:
        longitude_column (str): The name of the column for longitude.
        latitude_column (str): The name of the column for latitude.
        polygon (list[tuple[float, float]]): A list of (longitude, latitude) tuples defining the polygon.

    Returns:
        Self: The query builder instance.
    """
    if not hasattr(self, "filters"):
        self.filters = []
    self.filters.append(PolygonFilter(longitude_column=longitude_column, latitude_column=latitude_column, polygon=polygon))
    return self

`add_range_filter(column, gt_eq=None, lt_eq=None)`

Adds a RANGE filter to the query.

Returns: Self: The query builder instance.

Source code in beacon_api/query.py

def add_range_filter(
    self,
    column: str,
    gt_eq: Union[str, int, float, datetime, None] = None,
    lt_eq: Union[str, int, float, datetime, None] = None,
) -> Self:
    """Adds a RANGE filter to the query.

    Args:
        column (str): The name of the column to filter.
        gt_eq (str | int | float | datetime | None, optional): The lower bound for the range filter. Defaults to None.
        lt_eq (str | int | float | datetime | None, optional): The upper bound for the range filter. Defaults to None.

    Returns:
        Self: The query builder instance.
    """
    if not hasattr(self, "filters"):
        self.filters = []
    self.filters.append(RangeFilter(column=column, gt_eq=gt_eq, lt_eq=lt_eq))
    return self

`add_select_coalesced(mergeable_columns, alias)`

Adds a coalesced select to the query.

Args: mergeable_columns (list[str]): The columns to merge. alias (str): The alias for the merged column.

Returns: Self: The query builder instance.

Source code in beacon_api/query.py

def add_select_coalesced(self, mergeable_columns: List[str], alias: str) -> Self:
    """Adds a coalesced select to the query.

    Args:
        mergeable_columns (list[str]): The columns to merge.
        alias (str): The alias for the merged column.

    Returns:
        Self: The query builder instance.
    """
    if not hasattr(self, "selects"):
        self.selects = []

    function_call = SelectFunction("coalesce", args=[SelectColumn(column=col) for col in mergeable_columns], alias=alias)
    self.selects.append(function_call)
    return self

`add_select_column(column, alias=None)`

Adds a select column to the query.

Args: column (str): The name of the column to select. alias (str | None, optional): An optional alias for the column. Defaults to None.

Returns: Self: The query builder instance.

Source code in beacon_api/query.py

def add_select_column(self, column: str, alias: Optional[str] = None) -> Self:
    """Adds a select column to the query.

    Args:
        column (str): The name of the column to select.
        alias (str | None, optional): An optional alias for the column. Defaults to None.

    Returns:
        Self: The query builder instance.
    """
    if not hasattr(self, "selects"):
        self.selects = []
    self.selects.append(SelectColumn(column=column, alias=alias))
    return self

`add_select_columns(columns)`

Adds multiple select columns to the query.

Args: columns (List[Tuple[str, Optional[str]]]): A list of tuples containing column names and their aliases.

Returns: Self: The query builder instance.

Source code in beacon_api/query.py

def add_select_columns(self, columns: List[Tuple[str, Optional[str]]]) -> Self:
    """Adds multiple select columns to the query.

    Args:
        columns (List[Tuple[str, Optional[str]]]): A list of tuples containing column names and their aliases.

    Returns:
        Self: The query builder instance.
    """
    if not hasattr(self, "selects"):
        self.selects = []
    for column, alias in columns:
        self.selects.append(SelectColumn(column=column, alias=alias))
    return self

`add_selects(selects)`

Adds multiple select statements to the query.

Args: selects (list[Select]): The select statements to add.

Returns: Self: The query builder instance.

Source code in beacon_api/query.py

def add_selects(self, selects: List[Select]) -> Self:
    """Adds multiple select statements to the query.

    Args:
        selects (list[Select]): The select statements to add.

    Returns:
        Self: The query builder instance.
    """
    if not hasattr(self, "selects"):
        self.selects = []
    self.selects.extend(selects)
    return self

`compile_query()`

Compiles the query into a Beacon JSON Query.

Raises: ValueError: If the query is invalid. ValueError: If the query is invalid. TypeError: If the query is invalid.

Returns: str: The compiled query as a JSON string.

Source code in beacon_api/query.py

def compile_query(self) -> str:
    """Compiles the query into a Beacon JSON Query.

    Raises:
        ValueError: If the query is invalid.
        ValueError: If the query is invalid.
        TypeError: If the query is invalid.

    Returns:
        str: The compiled query as a JSON string.
    """
    # Check if from_table is set
    from_ = None
    if not self.from_table and not self.from_file_path:
        from_ = "default"
    elif self.from_table and self.from_file_path:
        raise ValueError("Cannot set both from_table and from_file_path")
    elif self.from_file_path:
        from_ = self.from_file_path
    else:
        from_ = self.from_table

    # Check if output is set
    if not hasattr(self, "output"):
        raise ValueError("Output must be set before compiling the query")

    # Check if selects are set
    if not hasattr(self, "selects"):
        raise ValueError("Selects must be set before compiling the query")

    query = {
        "from": from_,
        "select": (
            [s.to_dict() for s in self.selects] if hasattr(self, "selects") else []
        ),
        "filters": (
            [f.to_dict() for f in self.filters] if hasattr(self, "filters") else []
        ),
        "output": self.output.to_dict() if hasattr(self, "output") else {},
    }

    # Convert datetime objects to ISO format strings
    # This is necessary for JSON serialization
    def datetime_converter(o):
        if isinstance(o, datetime):
            return o.strftime("%Y-%m-%dT%H:%M:%S.%f")
        raise TypeError(f"Type {type(o)} not serializable")

    return json.dumps(query, default=datetime_converter)

`explain()`

Get the query plan

Source code in beacon_api/query.py

def explain(self) -> dict:
    """Get the query plan"""
    query = self.compile_query()
    response = self.http_session.post("/api/explain-query", data=query)
    if response.status_code != 200:
        raise Exception(f"Explain query failed: {response.text}")
    return response.json()

`explain_visualize()`

Visualize the query plan using networkx and matplotlib

Source code in beacon_api/query.py

def explain_visualize(self):
    """Visualize the query plan using networkx and matplotlib"""

    try: 
        import networkx as nx
        import matplotlib.pyplot as plt
    except ImportError as e:
        raise ImportError(
            "This function requires `networkx` and `matplotlib`. Install with `pip install beacon-api[profiling]`."
        ) from e

    plan_json = self.explain()
    # Extract the root plan node
    root_plan = plan_json[0]["Plan"]

    # === Step 2: Build a directed graph ===
    G = nx.DiGraph()

    def make_label(node):
        """Build a multi‐line label from whichever fields are present."""
        parts = [node.get("Node Type", "<unknown>")]
        for field in (
            "File Type",
            "Options",
            "Condition",
            "Output URL",
            "Expressions",
            "Output",
            "Filter",
        ):
            if field in node and node[field]:
                parts.append(f"{field}: {node[field]}")
        return "\n".join(parts)

    def add_nodes(node, parent_id=None):
        nid = id(node)
        G.add_node(nid, label=make_label(node))
        if parent_id is not None:
            G.add_edge(parent_id, nid)
        for child in node.get("Plans", []):
            add_nodes(child, nid)

    add_nodes(root_plan)

    try:
        pos = nx.nx_agraph.graphviz_layout(G, prog="dot")
    except Exception:
        pos = nx.spring_layout(G)

    plt.figure(figsize=(8, 6))
    labels = nx.get_node_attributes(G, "label")
    nx.draw(G, pos, labels=labels, with_labels=True, node_size=2000, font_size=8)
    plt.title("Beacon Query Plan Visualization")
    plt.tight_layout()
    plt.show()

`filter(filters)`

Adds filters to the query.

Args: filters (list[Filter]): The filters to add.

Returns: Self: The query builder instance.

Source code in beacon_api/query.py

def filter(self, filters: List[Filter]) -> Self:
    """Adds filters to the query.

    Args:
        filters (list[Filter]): The filters to add.

    Returns:
        Self: The query builder instance.
    """
    self.filters = filters
    return self

`run()`

Run the query and return the response

Source code in beacon_api/query.py

def run(self) -> Response:
    """Run the query and return the response"""
    query = self.compile_query()
    print(f"Running query: {query}")
    response = self.http_session.post("/api/query", data=query)
    if response.status_code != 200:
        raise Exception(f"Query failed: {response.text}")
    if len(response.content) == 0:
        raise Exception("Query returned no content")
    return response

`set_output(output)`

Sets the output format for the query.

Args: output (Output): The output format to use.

Returns: Self: The query builder instance.

Source code in beacon_api/query.py

def set_output(self, output: Output) -> Self:
    """Sets the output format for the query.

    Args:
        output (Output): The output format to use.

    Returns:
        Self: The query builder instance.
    """
    self.output = output
    return self

`to_arrow(filename)`

Converts the query result to Apache Arrow format and writes it to a file.

Args: filename (str): The path to the file where the Arrow-formatted data will be saved.

Returns: None

Side Effects: Writes the Arrow-formatted response content to the specified file.

Source code in beacon_api/query.py

def to_arrow(self, filename: str):
    """
    Converts the query result to Apache Arrow format and writes it to a file.

    Args:
        filename (str): The path to the file where the Arrow-formatted data will be saved.

    Returns:
        None

    Side Effects:
        Writes the Arrow-formatted response content to the specified file.
    """
    self.set_output(Arrow())
    response = self.run()

    with open(filename, "wb") as f:
        # Write the content of the response to a file
        for chunk in response.iter_content(chunk_size=1024 * 1024):
            if chunk:  # skip keep-alive chunks
                f.write(chunk)

`to_csv(filename, streaming_chunk_size=1024 * 1024)`

Exports the query results to a CSV file.

Args: filename (str): The path to the file where the CSV data will be saved.

Source code in beacon_api/query.py

def to_csv(self, filename: str, streaming_chunk_size: int = 1024 * 1024):
    """Exports the query results to a CSV file.

    Args:
        filename (str): The path to the file where the CSV data will be saved.
    """
    self.set_output(CSV())
    response = self.run()

    with open(filename, "wb") as f:
        # Write the content of the response to a file
        for chunk in response.iter_content(chunk_size=streaming_chunk_size):
            if chunk:  # skip keep-alive chunks
                f.write(chunk)

`to_geo_pandas_dataframe(longitude_column, latitude_column, crs='EPSG:4326')`

Converts the query results to a GeoPandas GeoDataFrame.

Args: longitude_column (str): The name of the column representing longitude. latitude_column (str): The name of the column representing latitude. crs (str, optional): The coordinate reference system to use. Defaults to "EPSG:4326".

Returns: gpd.GeoDataFrame: The query results as a GeoPandas GeoDataFrame.

Source code in beacon_api/query.py

def to_geo_pandas_dataframe(self, longitude_column: str, latitude_column: str, crs: str = "EPSG:4326") -> gpd.GeoDataFrame:
    """Converts the query results to a GeoPandas GeoDataFrame.

    Args:
        longitude_column (str): The name of the column representing longitude.
        latitude_column (str): The name of the column representing latitude.
        crs (str, optional): The coordinate reference system to use. Defaults to "EPSG:4326".

    Returns:
        gpd.GeoDataFrame: The query results as a GeoPandas GeoDataFrame.
    """

    try:
        import geopandas as gpd
    except ImportError as e:
        raise ImportError(
            "This function requires `geopandas`. Install with `pip install beacon-api[geopandas]`."
        ) from e

    self.set_output(GeoParquet(longitude_column=longitude_column, latitude_column=latitude_column))
    response = self.run()
    bytes_io = BytesIO(response.content)
    # Read into parquet arrow table 
    table = pq.read_table(bytes_io)

    gdf = gpd.GeoDataFrame.from_arrow(table)
    gdf.set_crs(crs, inplace=True)
    return gdf

`to_geoparquet(filename, longitude_column, latitude_column, streaming_chunk_size=1024 * 1024)`

Exports the query results to a GeoParquet file.

Args: filename (str): The path to the file where the GeoParquet data will be saved. longitude_column (str): The name of the column representing longitude. latitude_column (str): The name of the column representing latitude.

Source code in beacon_api/query.py

def to_geoparquet(self, filename: str, longitude_column: str, latitude_column: str, streaming_chunk_size: int = 1024 * 1024):
    """
    Exports the query results to a GeoParquet file.

    Args:
        filename (str): The path to the file where the GeoParquet data will be saved.
        longitude_column (str): The name of the column representing longitude.
        latitude_column (str): The name of the column representing latitude.
    """
    self.set_output(GeoParquet(longitude_column=longitude_column, latitude_column=latitude_column))
    response = self.run()

    with open(filename, "wb") as f:
        # Write the content of the response to a file
        for chunk in response.iter_content(chunk_size=streaming_chunk_size):
            if chunk:  # skip keep-alive chunks
                f.write(chunk)

`to_netcdf(filename, build_nc_local=True)`

Export the query result to a NetCDF file Args: filename (str): The name of the output NetCDF file. build_nc_local (bool): If True, build the NetCDF file locally using pandas and xarray. (This is likely faster in most cases.) If False, use the server to build the NetCDF file.

Source code in beacon_api/query.py

def to_netcdf(self, filename: str, build_nc_local: bool = True):
    """Export the query result to a NetCDF file
    Args:
        filename (str): The name of the output NetCDF file.
        build_nc_local (bool): 
            If True, build the NetCDF file locally using pandas and xarray. (This is likely faster in most cases.)
            If False, use the server to build the NetCDF file.
    """
    # If build_nc_local is True, we will build the NetCDF file locally
    if build_nc_local:
        df = self.to_pandas_dataframe()
        xdf = df.to_xarray()
        xdf.to_netcdf(filename, mode="w")
    # If build_nc_local is False, we will use the server to build the NetCDF
    else:
        self.set_output(NetCDF())
        response = self.run()
        with open(filename, "wb") as f:
            # Write the content of the response to a file
            for chunk in response.iter_content(chunk_size=1024 * 1024):
                if chunk:  # skip keep-alive chunks
                    f.write(chunk)

`to_odv(odv_output, filename)`

Exports the query results to an ODV file.

Args: odv_output (Odv): The ODV output format to use. filename (str): The path to the file where the ODV data will be saved.

Source code in beacon_api/query.py

def to_odv(self, odv_output: Odv, filename: str):
    """Exports the query results to an ODV file.

    Args:
        odv_output (Odv): The ODV output format to use.
        filename (str): The path to the file where the ODV data will be saved.
    """
    self.set_output(odv_output)
    response = self.run()
    with open(filename, "wb") as f:
        # Write the content of the response to a file
        f.write(response.content)

`to_pandas_dataframe()`

Converts the query results to a pandas DataFrame.

Returns: pd.DataFrame: The query results as a pandas DataFrame.

Source code in beacon_api/query.py

def to_pandas_dataframe(self) -> pd.DataFrame:
    """Converts the query results to a pandas DataFrame.

    Returns:
        pd.DataFrame: The query results as a pandas DataFrame.
    """
    self.set_output(Parquet())
    response = self.run()
    bytes_io = BytesIO(response.content)

    df = pd.read_parquet(bytes_io)
    return df

`to_parquet(filename, streaming_chunk_size=1024 * 1024)`

Exports the query results to a Parquet file.

This method sets the output format to Parquet, executes the query, and writes the resulting data to the specified file.

Args: filename (str): The path to the file where the Parquet data will be saved.

Returns: None

Source code in beacon_api/query.py

def to_parquet(self, filename: str, streaming_chunk_size: int = 1024 * 1024):
    """
    Exports the query results to a Parquet file.

    This method sets the output format to Parquet, executes the query, and writes the resulting data to the specified file.

    Args:
        filename (str): The path to the file where the Parquet data will be saved.

    Returns:
        None
    """
    self.set_output(Parquet())
    response = self.run()

    with open(filename, "wb") as f:
        # Write the content of the response to a file
        for chunk in response.iter_content(chunk_size=streaming_chunk_size):
            if chunk:  # skip keep-alive chunks
                f.write(chunk)

`to_zarr(filename)`

Exports the query results to a Zarr file.

Args: filename (str): The path to the file where the Zarr data will be saved.

Source code in beacon_api/query.py

def to_zarr(self, filename: str):
    """Exports the query results to a Zarr file.

    Args:
        filename (str): The path to the file where the Zarr data will be saved.
    """

    try:
        import zarr # just to check if zarr is installed
    except ImportError as e:
        raise ImportError(
            "This function requires `zarr`. Install with `pip install beacon-api[zarr]`."
        ) from e

    # Read to pandas dataframe first
    df = self.to_pandas_dataframe()
    # Convert to Zarr format
    xdf = df.to_xarray()
    xdf.to_zarr(filename, mode="w")

Query Reference

__init__(http_session, from_table=None, from_file_path=None)

add_bbox_filter(longitude_column, latitude_column, bbox)

add_equals_filter(column, eq)

add_filter(filter)

add_is_not_null_filter(column)

add_is_null_filter(column)

add_not_equals_filter(column, neq)

add_polygon_filter(longitude_column, latitude_column, polygon)

add_range_filter(column, gt_eq=None, lt_eq=None)

add_select_coalesced(mergeable_columns, alias)

add_select_column(column, alias=None)

add_select_columns(columns)

add_selects(selects)

compile_query()

explain()

explain_visualize()

filter(filters)

run()

set_output(output)

to_arrow(filename)

to_csv(filename, streaming_chunk_size=1024 * 1024)

to_geo_pandas_dataframe(longitude_column, latitude_column, crs='EPSG:4326')

to_geoparquet(filename, longitude_column, latitude_column, streaming_chunk_size=1024 * 1024)

to_netcdf(filename, build_nc_local=True)

to_odv(odv_output, filename)

to_pandas_dataframe()

to_parquet(filename, streaming_chunk_size=1024 * 1024)

to_zarr(filename)

`init(http_session, from_table=None, from_file_path=None)`

`add_bbox_filter(longitude_column, latitude_column, bbox)`

`add_equals_filter(column, eq)`

`add_filter(filter)`

`add_is_not_null_filter(column)`

`add_is_null_filter(column)`

`add_not_equals_filter(column, neq)`

`add_polygon_filter(longitude_column, latitude_column, polygon)`

`add_range_filter(column, gt_eq=None, lt_eq=None)`

`add_select_coalesced(mergeable_columns, alias)`

`add_select_column(column, alias=None)`

`add_select_columns(columns)`

`add_selects(selects)`

`compile_query()`

`explain()`

`explain_visualize()`

`filter(filters)`

`run()`

`set_output(output)`

`to_arrow(filename)`

`to_csv(filename, streaming_chunk_size=1024 * 1024)`

`to_geo_pandas_dataframe(longitude_column, latitude_column, crs='EPSG:4326')`

`to_geoparquet(filename, longitude_column, latitude_column, streaming_chunk_size=1024 * 1024)`

`to_netcdf(filename, build_nc_local=True)`

`to_odv(odv_output, filename)`

`to_pandas_dataframe()`

`to_parquet(filename, streaming_chunk_size=1024 * 1024)`

`to_zarr(filename)`