Source code for pyrfu.mms.feeps_remove_bad_data

#!/usr/bin/env python
# -*- coding: utf-8 -*-

import datetime
import json

# Built-in imports
import os

# 3rd party imports
import numpy as np

# Local imports
from ..pyrf.datetime642iso8601 import datetime642iso8601
from ..pyrf.iso86012datetime import iso86012datetime

__author__ = "Louis Richard"
__email__ = "louisr@irfu.se"
__copyright__ = "Copyright 2020-2023"
__license__ = "MIT"
__version__ = "2.4.2"
__status__ = "Prototype"


def _bad_vars(bad_data):
    bad_vars_top = list(filter(lambda x: x not in [6, 7, 8], bad_data["top"]))
    bad_vars_bot = list(
        filter(lambda x: x not in [6, 7, 8], bad_data["bottom"]),
    )

    bad_vars = [
        *[f"top-{x}" for x in bad_vars_top],
        *[f"bottom-{x}" for x in bad_vars_bot],
    ]

    return bad_vars


def _bad_eyes(inp_dataset, bad_vars):
    inp_dataset_clean = inp_dataset.copy()

    for bad_var in bad_vars:
        if bad_var not in list(inp_dataset.keys()):
            continue

        inp_dataset_clean[bad_var].data[:] = np.nan

    return inp_dataset


def _bad_ch0(inp_dataset, bad_vars):
    inp_dataset_clean = inp_dataset.copy()

    for bad_var in bad_vars:
        if bad_var not in list(inp_dataset.keys()):
            continue
        # check if the energy table contains all nans
        energy = inp_dataset[inp_dataset[bad_var].dims[1]]
        if np.isnan(np.sum(energy.data)):
            continue

        inp_dataset_clean[bad_var].data[:, 0] = np.nan

    return inp_dataset_clean


def _bad_ch1(inp_dataset, bad_vars):
    inp_dataset_clean = inp_dataset.copy()

    for bad_var in bad_vars:
        if bad_var not in list(inp_dataset.keys()):
            continue
        # check if the energy table contains all nans
        energy = inp_dataset[inp_dataset[bad_var].dims[1]]
        if np.isnan(np.sum(energy.data)):
            continue

        inp_dataset_clean[bad_var].data[:, 0] = np.nan
        inp_dataset_clean[bad_var].data[:, 1] = np.nan

    return inp_dataset_clean


def _bad_ch2(inp_dataset, bad_vars):
    inp_dataset_clean = inp_dataset.copy()

    for bad_var in bad_vars:
        if bad_var not in list(inp_dataset.keys()):
            continue
        # check if the energy table contains all nans
        energy = inp_dataset[inp_dataset[bad_var].dims[1]]
        if np.isnan(np.sum(energy.data)):
            continue

        inp_dataset_clean[bad_var].data[:, 0] = np.nan
        inp_dataset_clean[bad_var].data[:, 1] = np.nan
        inp_dataset_clean[bad_var].data[:, 2] = np.nan

    return inp_dataset_clean


[docs]def feeps_remove_bad_data(inp_dataset): r"""This function removes bad eyes, bad lowest energy channels based on data from Drew Turner Parameters ---------- inp_dataset : xarray.Dataset Dataset with all active telescopes data. Returns ------- inp_dataaset_clean_all : xarray.Dataset Dataset with all active telescopes data where bad eyes and lab lowest energy channels are set to NaN. """ mms_id = inp_dataset.attrs["mmsId"] root_path = os.path.dirname(os.path.abspath(__file__)) with open( os.path.join(root_path, "feeps_bad_data.json"), "r", encoding="utf-8" ) as file: feeps_bad_data = json.load(file) bad_data_table = feeps_bad_data["bad_data_table"] bad_ch0 = feeps_bad_data["bad_ch0"] bad_ch1 = feeps_bad_data["bad_ch1"] bad_ch2 = feeps_bad_data["bad_ch2"] # 1. BAD EYES # First, here is a list of the EYES that are bad, we need to make sure # these data are not usable (i.e., make all of the counts/rate/flux data # from these eyes NAN). These are for all modes, burst and survey: dates = [ datetime.datetime.strptime(t_, "%Y-%m-%d").timestamp() for t_ in bad_data_table.keys() ] t_data = iso86012datetime(datetime642iso8601(inp_dataset.time.data[0]))[0] closest_table_tm = np.argmin([t_ - t_data.timestamp() for t_ in dates]) closest_table = list(bad_data_table.keys())[closest_table_tm] bad_data = bad_data_table[closest_table][f"mms{mms_id:d}"] bad_vars_eyes = _bad_vars(bad_data) inp_dataset_clean_eye = _bad_eyes(inp_dataset, bad_vars_eyes) # 2. BAD LOWEST E-CHANNELS # Next, these eyes have bad first channels (i.e., lowest energy channel, # E-channel 0 in IDL indexing). Again, these data (just the # counts/rate/flux from the lowest energy channel ONLY!!!) should be # hardwired to be NAN for all modes (burst and both types of survey). # The eyes not listed here or above are ok though... so once we do this, # we can actually start showing the data down to the lowest levels (~33 # keV), meaning we'll have to adjust the hard-coded ylim settings in # SPEDAS and the SITL software: if t_data > iso86012datetime(["2019-05-01T00:00:00.000"])[0]: bad_data_ch0 = bad_ch0[">2019-05-01"][f"mms{mms_id}"] bad_data_ch1 = bad_ch1[">2019-05-01"][f"mms{mms_id}"] bad_data_ch2 = bad_ch2[">2019-05-01"][f"mms{mms_id}"] else: bad_data_ch0 = bad_ch0["<2019-05-01"][f"mms{mms_id}"] bad_data_ch1 = bad_ch1["<2019-05-01"][f"mms{mms_id}"] bad_data_ch2 = bad_ch2["<2019-05-01"][f"mms{mms_id}"] bad_vars_ch0 = _bad_vars(bad_data_ch0) inp_dataset_clean_ch0 = _bad_ch0(inp_dataset_clean_eye, bad_vars_ch0) bad_vars_ch1 = _bad_vars(bad_data_ch1) inp_dataset_clean_ch1 = _bad_ch1(inp_dataset_clean_ch0, bad_vars_ch1) bad_vars_ch2 = _bad_vars(bad_data_ch2) inp_dataset_clean_all = _bad_ch2(inp_dataset_clean_ch1, bad_vars_ch2) return inp_dataset_clean_all