Source code for pyrfu.mms.feeps_remove_bad_data
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import datetime
import json
# Built-in imports
import os
# 3rd party imports
import numpy as np
# Local imports
from ..pyrf.datetime642iso8601 import datetime642iso8601
from ..pyrf.iso86012datetime import iso86012datetime
__author__ = "Louis Richard"
__email__ = "louisr@irfu.se"
__copyright__ = "Copyright 2020-2023"
__license__ = "MIT"
__version__ = "2.4.2"
__status__ = "Prototype"
def _bad_vars(bad_data):
bad_vars_top = list(filter(lambda x: x not in [6, 7, 8], bad_data["top"]))
bad_vars_bot = list(
filter(lambda x: x not in [6, 7, 8], bad_data["bottom"]),
)
bad_vars = [
*[f"top-{x}" for x in bad_vars_top],
*[f"bottom-{x}" for x in bad_vars_bot],
]
return bad_vars
def _bad_eyes(inp_dataset, bad_vars):
inp_dataset_clean = inp_dataset.copy()
for bad_var in bad_vars:
if bad_var not in list(inp_dataset.keys()):
continue
inp_dataset_clean[bad_var].data[:] = np.nan
return inp_dataset
def _bad_ch0(inp_dataset, bad_vars):
inp_dataset_clean = inp_dataset.copy()
for bad_var in bad_vars:
if bad_var not in list(inp_dataset.keys()):
continue
# check if the energy table contains all nans
energy = inp_dataset[inp_dataset[bad_var].dims[1]]
if np.isnan(np.sum(energy.data)):
continue
inp_dataset_clean[bad_var].data[:, 0] = np.nan
return inp_dataset_clean
def _bad_ch1(inp_dataset, bad_vars):
inp_dataset_clean = inp_dataset.copy()
for bad_var in bad_vars:
if bad_var not in list(inp_dataset.keys()):
continue
# check if the energy table contains all nans
energy = inp_dataset[inp_dataset[bad_var].dims[1]]
if np.isnan(np.sum(energy.data)):
continue
inp_dataset_clean[bad_var].data[:, 0] = np.nan
inp_dataset_clean[bad_var].data[:, 1] = np.nan
return inp_dataset_clean
def _bad_ch2(inp_dataset, bad_vars):
inp_dataset_clean = inp_dataset.copy()
for bad_var in bad_vars:
if bad_var not in list(inp_dataset.keys()):
continue
# check if the energy table contains all nans
energy = inp_dataset[inp_dataset[bad_var].dims[1]]
if np.isnan(np.sum(energy.data)):
continue
inp_dataset_clean[bad_var].data[:, 0] = np.nan
inp_dataset_clean[bad_var].data[:, 1] = np.nan
inp_dataset_clean[bad_var].data[:, 2] = np.nan
return inp_dataset_clean
[docs]def feeps_remove_bad_data(inp_dataset):
r"""This function removes bad eyes, bad lowest energy channels based on
data from Drew Turner
Parameters
----------
inp_dataset : xarray.Dataset
Dataset with all active telescopes data.
Returns
-------
inp_dataaset_clean_all : xarray.Dataset
Dataset with all active telescopes data where bad eyes and lab lowest
energy channels are set to NaN.
"""
mms_id = inp_dataset.attrs["mmsId"]
root_path = os.path.dirname(os.path.abspath(__file__))
with open(
os.path.join(root_path, "feeps_bad_data.json"), "r", encoding="utf-8"
) as file:
feeps_bad_data = json.load(file)
bad_data_table = feeps_bad_data["bad_data_table"]
bad_ch0 = feeps_bad_data["bad_ch0"]
bad_ch1 = feeps_bad_data["bad_ch1"]
bad_ch2 = feeps_bad_data["bad_ch2"]
# 1. BAD EYES
# First, here is a list of the EYES that are bad, we need to make sure
# these data are not usable (i.e., make all of the counts/rate/flux data
# from these eyes NAN). These are for all modes, burst and survey:
dates = [
datetime.datetime.strptime(t_, "%Y-%m-%d").timestamp()
for t_ in bad_data_table.keys()
]
t_data = iso86012datetime(datetime642iso8601(inp_dataset.time.data[0]))[0]
closest_table_tm = np.argmin([t_ - t_data.timestamp() for t_ in dates])
closest_table = list(bad_data_table.keys())[closest_table_tm]
bad_data = bad_data_table[closest_table][f"mms{mms_id:d}"]
bad_vars_eyes = _bad_vars(bad_data)
inp_dataset_clean_eye = _bad_eyes(inp_dataset, bad_vars_eyes)
# 2. BAD LOWEST E-CHANNELS
# Next, these eyes have bad first channels (i.e., lowest energy channel,
# E-channel 0 in IDL indexing). Again, these data (just the
# counts/rate/flux from the lowest energy channel ONLY!!!) should be
# hardwired to be NAN for all modes (burst and both types of survey).
# The eyes not listed here or above are ok though... so once we do this,
# we can actually start showing the data down to the lowest levels (~33
# keV), meaning we'll have to adjust the hard-coded ylim settings in
# SPEDAS and the SITL software:
if t_data > iso86012datetime(["2019-05-01T00:00:00.000"])[0]:
bad_data_ch0 = bad_ch0[">2019-05-01"][f"mms{mms_id}"]
bad_data_ch1 = bad_ch1[">2019-05-01"][f"mms{mms_id}"]
bad_data_ch2 = bad_ch2[">2019-05-01"][f"mms{mms_id}"]
else:
bad_data_ch0 = bad_ch0["<2019-05-01"][f"mms{mms_id}"]
bad_data_ch1 = bad_ch1["<2019-05-01"][f"mms{mms_id}"]
bad_data_ch2 = bad_ch2["<2019-05-01"][f"mms{mms_id}"]
bad_vars_ch0 = _bad_vars(bad_data_ch0)
inp_dataset_clean_ch0 = _bad_ch0(inp_dataset_clean_eye, bad_vars_ch0)
bad_vars_ch1 = _bad_vars(bad_data_ch1)
inp_dataset_clean_ch1 = _bad_ch1(inp_dataset_clean_ch0, bad_vars_ch1)
bad_vars_ch2 = _bad_vars(bad_data_ch2)
inp_dataset_clean_all = _bad_ch2(inp_dataset_clean_ch1, bad_vars_ch2)
return inp_dataset_clean_all