Source code for pyrfu.mms.get_data

#!/usr/bin/env python
# -*- coding: utf-8 -*-

# Built-in imports
import json
import logging
import os
from typing import Mapping, Optional, Tuple, Union

# 3rd party imports
import requests
from botocore.exceptions import ClientError
from requests.sessions import Session
from xarray.core.dataarray import DataArray
from xarray.core.dataset import Dataset

# Local imports
from pyrfu.mms.db_init import MMS_CFG_PATH
from pyrfu.mms.get_dist import get_dist
from pyrfu.mms.get_ts import get_ts
from pyrfu.mms.list_files import list_files
from pyrfu.mms.list_files_aws import list_files_aws
from pyrfu.mms.list_files_sdc import _login_lasp, list_files_sdc
from pyrfu.mms.tokenize import tokenize
from pyrfu.pyrf.dist_append import dist_append
from pyrfu.pyrf.ts_append import ts_append
from pyrfu.pyrf.ttns2datetime64 import ttns2datetime64

__author__ = "Louis Richard"
__email__ = "louisr@irfu.se"
__copyright__ = "Copyright 2020-2024"
__license__ = "MIT"
__version__ = "2.4.13"
__status__ = "Prototype"

logging.captureWarnings(True)
logging.basicConfig(
    format="[%(asctime)s] %(levelname)s: %(message)s",
    datefmt="%d-%b-%y %H:%M:%S",
    level=logging.INFO,
)


def _var_and_cdf_name(
    var_str: str, mms_id: str
) -> Tuple[Mapping[str, Union[str, int]], str]:
    r"""Tokenize variable string `var_str` and returns the corresponding cdf name.

    Parameters
    ----------
    var_str : str
        Variable string.
    mms_id : str
        Spacecraft identifier.

    Returns
    -------
    var : dict
        Variable dictionary.
    cdf_name : str
        Corresponding cdf name.

    """
    var = tokenize(var_str)
    cdf_name = f"mms{mms_id}_{var['cdf_name']}"
    return var, cdf_name


def _check_times(inp: Union[DataArray, Dataset]) -> Union[DataArray, Dataset]:
    if inp.time.data.dtype == "int64":
        out = inp.assign_coords(time=ttns2datetime64(inp.time.data))
    else:
        out = inp
    return out


def _list_files_sources(
    source: str,
    tint: list[str],
    mms_id: str,
    var: Mapping[str, Union[str, int]],
    data_path: Optional[str] = "",
):
    if source == "local":
        file_names = list_files(tint, mms_id, var, data_path)
        sdc_session, headers = None, {}
    elif source == "sdc":
        file_names = [file.get("url") for file in list_files_sdc(tint, mms_id, var)]
        sdc_session, headers, _ = _login_lasp()
    elif source == "aws":
        file_names = [file.get("s3_obj") for file in list_files_aws(tint, mms_id, var)]
        sdc_session, headers = None, {}
    else:
        raise NotImplementedError(f"Resource {source} is not yet implemented!!")

    return file_names, sdc_session, headers


def _get_file_content_sources(
    source: str,
    file_name: str,
    sdc_session: Optional[Session] = None,
    headers: Optional[dict] = None,
) -> bytes:
    r"""Get file content from different sources.

    Parameters
    ----------
    source : str
        Source of the data.
    file_name : str
        File name.
    sdc_session : Session, Optional
        SDC session. Default is None.
    headers : dict, Optional
        Headers. Default is None.

    Returns
    -------
    file_content : bytes
        File content.

    """
    if source == "local":
        file_path = os.path.normpath(file_name)
        with open(file_path, "rb") as file:
            file_content = file.read()
    elif source == "sdc":
        try:
            response = sdc_session.get(file_name, timeout=None, headers=headers)
            response.raise_for_status()  # Raise an HTTPError for bad responses
            file_content = response.content
        except requests.RequestException:
            logging.error("Error retrieving file from %s", file_name)
    elif source == "aws":
        try:
            response = file_name.get()
            file_content = response["Body"].read()
        except ClientError as err:
            if err.response["Error"]["Code"] == "InternalError":  # Generic error
                logging.error("Error Message: %s", err.response["Error"]["Message"])

                response_meta = err.response.get("ResponseMetadata")
                logging.error("Request ID: %s", response_meta.get("RequestId"))
                logging.error("Http code: %s", response_meta.get("HTTPStatusCode"))
            else:
                raise err
    else:
        raise NotImplementedError(f"Resource {source} is not yet implemented!!")

    return file_content


[docs]def get_data( var_str: str, tint: list, mms_id: Union[int, str], verbose: bool = True, data_path: str = "", source: str = "", ) -> Union[DataArray, Dataset]: r"""Load a variable `var_str`. AFG: b_bcs_afg_srvy_l2pre, b_dmpa_afg_srvy_l2pre, b_gse_afg_srvy_l2pre, b_gsm_afg_srvy_l2pre DFG: b_bcs_dfg_srvy_l2pre, b_dmpa_dfg_srvy_l2pre, b_gse_dfg_srvy_l2pre, b_gsm_dfg_srvy_l2pre EDP: e2d_dsl_edp_brst_l2pre, e2d_dsl_edp_brst_ql, e2d_dsl_edp_fast_l2pre, e2d_dsl_edp_fast_ql, e_dsl_edp_brst_l2, e_dsl_edp_brst_l2pre, e_dsl_edp_brst_ql, e_dsl_edp_fast_l2, e_dsl_edp_fast_l2pre, e_dsl_edp_fast_ql, e_dsl_edp_slow_l2, e_dsl_edp_slow_l2pre, e_gse_edp_brst_l2, e_gse_edp_fast_l2, e_gse_edp_slow_l2, e_ssc_edp_brst_l2a, e_ssc_edp_fast_l2a, e_ssc_edp_slow_l2a, hmfe_dsl_edp_brst_l2, phase_edp_fast_l2a, phase_edp_slow_l2a, sdev12_edp_fast_l2a, sdev12_edp_slow_l2a, sdev34_edp_fast_l2a, sdev34_edp_slow_l2a, v_edp_brst_l2, v_edp_fast_l2, v_edp_fast_sitl, v_edp_slow_l2, v_edp_slow_sitl FGM: b_bcs_fgm_brst_l2, b_bcs_fgm_srvy_l2, b_dmpa_fgm_brst_l2, b_dmpa_fgm_srvy_l2, b_gse_fgm_brst_l2, b_gse_fgm_srvy_l2, b_gsm_fgm_brst_l2, b_gsm_fgm_srvy_l2 FPI: defbgi_fpi_brst_l2, defbgi_fpi_fast_l2, defe_fpi_brst_l2, defe_fpi_fast_l2, defe_fpi_fast_ql, defi_fpi_brst_l2, defi_fpi_fast_l2, nbgi_fpi_brst_l2, nbgi_fpi_fast_l2, ne_fpi_brst_l2, ne_fpi_fast_l2, ne_fpi_fast_ql, ni_fpi_brst_l2, ni_fpi_fast_l2, ni_fpi_fast_ql, partne_fpi_brst_l2, partne_fpi_fast_l2, partni_fpi_brst_l2, partni_fpi_fast_l2, partpe_gse_fpi_brst_l2, partpe_gse_fpi_fast_l2, partpi_gse_fpi_brst_l2, partpi_gse_fpi_fast_l2, partte_dbcs_fpi_brst_l2, partte_dbcs_fpi_fast_l2, partte_gse_fpi_brst_l2, partte_gse_fpi_fast_l2, partti_dbcs_fpi_brst_l2, partti_dbcs_fpi_fast_l2, partti_gse_fpi_brst_l2, partti_gse_fpi_fast_l2, parttparae_fpi_brst_l2, parttparai_fpi_brst_l2, parttparai_fpi_fast_l2, parttperpe_fpi_brst_l2, parttperpi_fpi_brst_l2, parttperpi_fpi_fast_l2, partve_dbcs_fpi_brst_l2, partve_dbcs_fpi_fast_l2, partve_gse_fpi_brst_l2, partve_gse_fpi_fast_l2, partvi_dbcs_fpi_brst_l2, partvi_dbcs_fpi_fast_l2, partvi_gse_fpi_brst_l2, partvi_gse_fpi_fast_l2, pbgi_fpi_brst_l2, pbgi_fpi_fast_l2, pde_fpi_brst_l2, pde_fpi_fast_l2, pderre_fpi_brst_l2, pderre_fpi_fast_l2, pderri_fpi_brst_l2, pderri_fpi_fast_l2, pdi_fpi_brst_l2, pdi_fpi_fast_l2, pe_dbcs_fpi_brst_l2, pe_dbcs_fpi_fast_l2, pe_dbcs_fpi_fast_ql, pe_gse_fpi_brst_l2, pe_gse_fpi_fast_l2, pe_gse_fpi_fast_ql, pi_dbcs_fpi_brst_l2, pi_dbcs_fpi_fast_l2, pi_dbcs_fpi_fast_ql, pi_gse_fpi_brst_l2, pi_gse_fpi_fast_l2, pi_gse_fpi_fast_ql, ste_dbcs_fpi_brst_l2, ste_dbcs_fpi_fast_l2, ste_gse_fpi_brst_l2, ste_gse_fpi_fast_l2, sti_dbcs_fpi_brst_l2, sti_dbcs_fpi_fast_l2, sti_gse_fpi_brst_l2, sti_gse_fpi_fast_l2, te_dbcs_fpi_brst_l2, te_dbcs_fpi_fast_l2, te_dbcs_fpi_fast_ql, te_gse_fpi_brst_l2, te_gse_fpi_fast_l2, te_gse_fpi_fast_ql, ti_dbcs_fpi_brst_l2, ti_dbcs_fpi_fast_l2, ti_gse_fpi_brst_l2, ti_gse_fpi_fast_l2, tparae_fpi_brst_l2, tparai_fpi_brst_l2, tparai_fpi_fast_l2, tperpe_fpi_brst_l2, tperpi_fpi_brst_l2, tperpi_fpi_fast_l2, ve_dbcs_fpi_brst_l2, ve_dbcs_fpi_fast_l2, ve_dbcs_fpi_fast_ql, ve_gse_fpi_brst_l2, ve_gse_fpi_fast_l2, ve_gse_fpi_fast_ql, vi_dbcs_fpi_brst_l2, vi_dbcs_fpi_fast_l2, vi_dbcs_fpi_fast_ql, vi_gse_fpi_brst_l2, vi_gse_fpi_fast_l2, vi_gse_fpi_fast_ql FSM: b_gse_fsm_brst_l3 HPCA: azimuth_hpca_brst_l2, azimuth_hpca_srvy_l2, dpfheplus_hpca_brst_l2, dpfheplus_hpca_srvy_l2, dpfheplusplus_hpca_brst_l2, dpfheplusplus_hpca_srvy_l2, dpfhplus_hpca_brst_l2, dpfhplus_hpca_srvy_l2, dpfoplus_hpca_brst_l2, dpfoplus_hpca_srvy_l2, nheplus_hpca_brst_l2, nheplus_hpca_srvy_l2, nheplusplus_hpca_brst_l2, nheplusplus_hpca_srvy_l2, nhplus_hpca_brst_l2, nhplus_hpca_srvy_l2, noplus_hpca_brst_l2, noplus_hpca_srvy_l2, saz_hpca_brst_l2, saz_hpca_srvy_l2, theplus_dbcs_hpca_brst_l2, theplus_dbcs_hpca_srvy_l2, theplusplus_dbcs_hpca_brst_l2, theplusplus_dbcs_hpca_srvy_l2, thplus_dbcs_hpca_brst_l2, thplus_dbcs_hpca_srvy_l2, toplus_dbcs_hpca_brst_l2, toplus_dbcs_hpca_srvy_l2, tsheplus_hpca_brst_l2, tsheplus_hpca_srvy_l2, tsheplusplus_hpca_brst_l2, tsheplusplus_hpca_srvy_l2, tshplus_hpca_brst_l2, tshplus_hpca_srvy_l2, tsoplus_hpca_brst_l2, tsoplus_hpca_srvy_l2, vheplus_dbcs_hpca_brst_l2, vheplus_dbcs_hpca_srvy_l2, vheplus_gsm_hpca_brst_l2, vheplus_gsm_hpca_srvy_l2, vheplusplus_dbcs_hpca_brst_l2, vheplusplus_dbcs_hpca_srvy_l2, vheplusplus_gsm_hpca_brst_l2, vheplusplus_gsm_hpca_srvy_l2, vhplus_dbcs_hpca_brst_l2, vhplus_dbcs_hpca_srvy_l2, vhplus_gsm_hpca_brst_l2, vhplus_gsm_hpca_srvy_l2, voplus_dbcs_hpca_brst_l2, voplus_dbcs_hpca_srvy_l2, voplus_gsm_hpca_brst_l2, voplus_gsm_hpca_srvy_l2 MEC: r_gse_mec_srvy_l2, r_gsm_mec_srvy_l2, r_gse_mec_brst_l2, r_gsm_mec_brst_l2, v_gse_mec_srvy_l2, v_gsm_mec_srvy_l2, v_gse_mec_brst_l2, v_gsm_mec_brst_l2 SCM: b_gse_scm_brst_l2 Parameters ---------- var_str : str Key of the target variable (use mms.get_data() to see keys.). tint : list of str Time interval. mms_id : str or int Index of the target spacecraft. verbose : bool, Optional Set to True to follow the loading. Default is True. data_path : str, Optional Local path of MMS data. Default uses that provided in `pyrfu/mms/config.json` source: {"local", "sdc", "aws"}, Optional Ressource to fetch data from. Default uses default in `pyrfu/mms/config.json` Returns ------- out : DataArray or Dataset Time series of the target variable of measured by the target spacecraft over the selected time interval. See also -------- pyrfu.mms.get_ts : Read time series. pyrfu.mms.get_dist : Read velocity distribution function. Examples -------- >>> from pyrfu import mms Define time interval >>> tint_brst = ["2019-09-14T07:54:00.000", "2019-09-14T08:11:00.000"] Index of MMS spacecraft >>> ic = 1 Load magnetic field from FGM >>> b_xyz = mms.get_data("b_gse_fgm_brst_l2", tint_brst, ic) """ # Convert mms_id to string mms_id = str(mms_id) var, cdf_name = _var_and_cdf_name(var_str, mms_id) # Read the current version of the MMS configuration file with open(MMS_CFG_PATH, "r", encoding="utf-8") as fs: config = json.load(fs) source = source if source else config.get("default") file_names, sdc_session, headers = _list_files_sources( source, tint, mms_id, var, data_path ) if not file_names: raise FileNotFoundError(f"No files found for {var_str} in {source}") if verbose: logging.info("Loading %s...", cdf_name) out = None for file_name in file_names: file_content = _get_file_content_sources( source, file_name, sdc_session, headers ) if "-dist" in var["dtype"]: out = dist_append(out, get_dist(file_content, cdf_name, tint)) else: out = ts_append(out, get_ts(file_content, cdf_name, tint)) out = _check_times(out) if sdc_session: sdc_session.close() return out