Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support reading EO-SIP LAC data #125

Merged
merged 14 commits into from
Mar 19, 2024
2 changes: 1 addition & 1 deletion .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ jobs:
fail-fast: true
matrix:
os: ["ubuntu-latest"]
python-version: ["3.8", "3.9", "3.10"]
python-version: ["3.10", "3.11", "3.12"]
experimental: [false]

env:
Expand Down
45 changes: 30 additions & 15 deletions pygac/pod_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -279,7 +279,7 @@
# choose the right header depending on the date
with file_opener(fileobj or filename) as fd_:
self.tbm_head, self.head = self.read_header(
filename, fileobj=fd_)
filename, fileobj=fd_, header_date=self.header_date)
if self.tbm_head:
tbm_offset = tbm_header.itemsize
else:
Expand All @@ -302,12 +302,14 @@
return self.head, self.scans

@classmethod
def read_header(cls, filename, fileobj=None):
def read_header(cls, filename, fileobj=None, header_date="auto"):
"""Read the file header.

Args:
filename (str): Path to GAC/LAC file
fileobj: An open file object to read from. (optional)
header_date: date to use to choose the header.
Defaults to "auto" to use the data to pick the header corresponding to the date of the file.

Returns:
archive_header (struct): archive header
Expand All @@ -332,19 +334,7 @@
fd_.seek(0)
tbm_head = None
tbm_offset = 0
# read header
head0, = np.frombuffer(
fd_.read(header0.itemsize),
dtype=header0, count=1)
year, jday, _ = cls.decode_timestamps(head0["start_time"])
start_date = (datetime.date(year, 1, 1) +
datetime.timedelta(days=int(jday) - 1))
if start_date < datetime.date(1992, 9, 8):
header = header1
elif start_date <= datetime.date(1994, 11, 15):
header = header2
else:
header = header3
header = cls.choose_header_based_on_timestamp(header_date, fd_)
fd_.seek(tbm_offset, 0)
# need to copy frombuffer to have write access on head
head, = np.frombuffer(
Expand All @@ -354,6 +344,31 @@
cls._validate_header(head)
return tbm_head, head

@classmethod
def choose_header_based_on_timestamp(cls, header_date, fd_):
"""Choose the header dtype based on the timestamp."""
if header_date == "auto":
header_date = cls.get_start_date(fd_)
if header_date < datetime.date(1992, 9, 8):
header = header1

Check warning on line 353 in pygac/pod_reader.py

View check run for this annotation

Codecov / codecov/patch

pygac/pod_reader.py#L353

Added line #L353 was not covered by tests
elif header_date <= datetime.date(1994, 11, 15):
header = header2

Check warning on line 355 in pygac/pod_reader.py

View check run for this annotation

Codecov / codecov/patch

pygac/pod_reader.py#L355

Added line #L355 was not covered by tests
else:
header = header3
return header

@classmethod
def get_start_date(cls, fd_):
"""Get the start time from the filestream."""
head0, = np.frombuffer(
fd_.read(header0.itemsize),
dtype=header0, count=1)
year, jday, _ = cls.decode_timestamps(head0["start_time"])
header_date = (datetime.date(year, 1, 1) +
datetime.timedelta(days=int(jday) - 1))

return header_date

@classmethod
def _validate_header(cls, header):
"""Check if the header belongs to this reader."""
Expand Down
42 changes: 32 additions & 10 deletions pygac/reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,10 @@ class NoTLEData(IndexError):
"""Raised if no TLE data available within time range."""


class DecodingError(ValueError):
"""Raised when decoding of some value fails."""


class Reader(six.with_metaclass(ABCMeta)):
"""Reader for GAC and LAC format, POD and KLM platforms."""

Expand All @@ -95,7 +99,7 @@ class Reader(six.with_metaclass(ABCMeta)):

def __init__(self, interpolate_coords=True, adjust_clock_drift=True,
tle_dir=None, tle_name=None, tle_thresh=7, creation_site=None,
custom_calibration=None, calibration_file=None):
custom_calibration=None, calibration_file=None, header_date="auto"):
"""Init the reader.

Args:
Expand All @@ -111,6 +115,7 @@ def __init__(self, interpolate_coords=True, adjust_clock_drift=True,
custom_calibration: dictionary with a subset of user defined satellite specific
calibration coefficients
calibration_file: path to json file containing default calibrations
header_date: the date to use for pod header choice. Defaults to "auto".

"""
self.meta_data = {}
Expand All @@ -122,6 +127,7 @@ def __init__(self, interpolate_coords=True, adjust_clock_drift=True,
self.creation_site = (creation_site or 'NSS').encode('utf-8')
self.custom_calibration = custom_calibration
self.calibration_file = calibration_file
self.header_date = header_date
self.head = None
self.scans = None
self.spacecraft_name = None
Expand Down Expand Up @@ -205,22 +211,38 @@ def _correct_data_set_name(cls, header, filename):
filename (str): path to file
"""
filename = str(filename)
data_set_name = header['data_set_name'].decode(errors='ignore')
if not cls.data_set_pattern.match(data_set_name):
LOG.debug('The data_set_name in header %s does not match.'
' Use filename instead.' % header['data_set_name'])
for encoding in "utf-8", "cp500":
sfinkens marked this conversation as resolved.
Show resolved Hide resolved
data_set_name = header['data_set_name']
try:
data_set_name = cls._decode_data_set_name(data_set_name, encoding)
except DecodingError as err:
LOG.debug(str(err))
else:
header["data_set_name"] = data_set_name
break
else:
LOG.debug(f'The data_set_name in header {header["data_set_name"]} does not match.'
' Use filename instead.')
match = cls.data_set_pattern.search(filename)
if match:
data_set_name = match.group()
LOG.debug("Set data_set_name, to filename %s"
% data_set_name)
LOG.debug(f"Set data_set_name, to filename {data_set_name}")
header['data_set_name'] = data_set_name.encode()
else:
LOG.debug("header['data_set_name']=%s; filename='%s'"
% (header['data_set_name'], filename))
LOG.debug(f"header['data_set_name']={header['data_set_name']}; filename='{filename}'")
raise ReaderError('Cannot determine data_set_name!')
return header

@classmethod
def _decode_data_set_name(cls, data_set_name, encoding):
data_set_name = data_set_name.decode(encoding, errors='ignore')
if not cls.data_set_pattern.match(data_set_name):
raise DecodingError(f'The data_set_name in header {data_set_name} '
f'does not seem correct using encoding {encoding}.')
else:
data_set_name = data_set_name.encode()
return data_set_name

@classmethod
def _validate_header(cls, header):
"""Check if the header belongs to this reader.
Expand Down Expand Up @@ -274,7 +296,7 @@ def _read_scanlines(self, buffer, count):
"Expected %d scan lines, but found %d!"
% (count, line_count))
warnings.warn("Unexpected number of scanlines!",
category=RuntimeWarning)
category=RuntimeWarning, stacklevel=2)
self.scans = np.frombuffer(
buffer, dtype=self.scanline_type, count=line_count)

Expand Down
6 changes: 3 additions & 3 deletions pygac/tests/test_klm.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
class TestKLM:
"""Test the klm reader."""

def setup(self):
def setup_method(self):
"""Set up the tests."""
self.reader = GACKLMReader()

Expand Down Expand Up @@ -126,7 +126,7 @@ def test_quality_indicators(self):
class TestGACKLM:
"""Tests for gac klm."""

def setup(self):
def setup_method(self):
"""Set up the tests."""
self.reader = GACKLMReader()

Expand All @@ -150,7 +150,7 @@ def test_get_tsm_pixels(self, get_tsm_idx):
class TestLACKLM:
"""Tests for lac klm."""

def setup(self):
def setup_method(self):
"""Set up the tests."""
self.reader = LACKLMReader()
self.reader.scans = np.ones(100, dtype=scanline)
Expand Down
Loading
Loading