Source code for pyrfu.mms.list_files_ancillary
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Built-in imports
import datetime
import fnmatch
import glob
import json
import os
import re
# 3rd party imports
import numpy as np
import pandas as pd
# Local imports
from ..pyrf.datetime642iso8601 import datetime642iso8601
from ..pyrf.iso86012datetime import iso86012datetime
from ..pyrf.iso86012datetime64 import iso86012datetime64
from .db_init import MMS_CFG_PATH
__author__ = "Louis Richard"
__email__ = "louisr@irfu.se"
__copyright__ = "Copyright 2020-2023"
__license__ = "MIT"
__version__ = "2.4.11"
__status__ = "Prototype"
[docs]def list_files_ancillary(tint, mms_id, product, data_path: str = ""):
r"""Find available ancillary files in the data directories for the target product
type.
Parameters
----------
tint : list of str
Time interval
mms_id : str or int
Spacecraft index
product : {"predatt", "predeph", "defatt", "defeph"}
Ancillary type.
data_path : str, Optional
Path of MMS data. If None use `pyrfu.mms.mms_config.py`
Returns
-------
file_names : list
Ancillary files in interval.
"""
# Check path
if not data_path:
# Read the current version of the MMS configuration file
with open(MMS_CFG_PATH, "r", encoding="utf-8") as fs:
config = json.load(fs)
data_path = os.path.normpath(config["local"])
else:
data_path = os.path.normpath(data_path)
# Make sure that the data path exists
assert os.path.exists(data_path), f"{data_path} doesn't exist!!"
if isinstance(mms_id, int):
mms_id = str(mms_id)
# Check time interval type
if isinstance(tint, (np.ndarray, list)):
if isinstance(tint[0], np.datetime64):
tint = datetime642iso8601(tint)
tint = iso86012datetime(tint)
elif isinstance(tint[0], str):
tint = iso86012datetime64(
np.array(tint),
) # to make sure it is ISO8601 ok!
tint = datetime642iso8601(tint)
tint = iso86012datetime(tint)
elif isinstance(tint[0], datetime.datetime):
pass
else:
raise TypeError("Values must be in datetime, datetime64, or str!!")
else:
raise TypeError("tint must be a DataArray or array_like!!")
# PAD time interval to handle ancillary file start after midnight
tint = [tint[0] - datetime.timedelta(days=1), tint[1]]
# directory and file name search patterns
# For now
# -all ancillary data is in one directory:
# mms\ancillary
# -assume file names are of the form:
# SPACECRAFT_FILETYPE_startDate_endDate.version
# where SPACECRAFT is [MMS1, MMS2, MMS3, MMS4] in uppercase
# and FILETYPE is either DEFATT, PREDATT, DEFEPH, PREDEPH in uppercase
# and start/endDate is YYYYDOY
# and version is Vnn (.V00, .V01, etc..)
dir_pattern = os.sep.join([data_path, "ancillary", f"mms{mms_id}", product])
file_pattern = "_".join([f"MMS{mms_id}", product.upper(), "???????_???????.V??"])
files_in_tint = []
out_files = []
files = glob.glob(os.sep.join([dir_pattern, file_pattern]))
# find the files within the time interval
fname_fmt = f"MMS{mms_id}_{product.upper()}_([0-9]{{7}})_([0-9]{{7}}).V[0-9]{{2}}"
if os.name == "nt":
full_path = os.sep.join([re.escape(dir_pattern) + os.sep, fname_fmt])
else:
full_path = os.sep.join([re.escape(dir_pattern), fname_fmt])
file_regex = re.compile(full_path)
for file in files:
time_match = file_regex.match(file)
if time_match is not None:
start_time = pd.to_datetime(time_match.group(1), format="%Y%j")
end_time = pd.to_datetime(time_match.group(2), format="%Y%j")
if start_time < tint[1] and end_time >= tint[0]:
files_in_tint.append(file)
# ensure only the latest version of each file is loaded
for file in files_in_tint:
this_file = file[0:-3] + "V??"
versions = fnmatch.filter(files_in_tint, this_file)
if len(versions) > 1:
# only grab the latest version
out_files.append(sorted(versions)[-1])
else:
out_files.append(versions[0])
file_names = list(set(out_files))
file_names.sort()
return file_names