From 3993dfc6d8358a5a84782fda6e15642523b79de6 Mon Sep 17 00:00:00 2001 From: Peter Kuma Date: Thu, 18 Apr 2024 15:35:58 +0100 Subject: [PATCH] Add support for HIS L2 format New release v3.5.0 --- LICENSE.md | 2 +- README.md | 39 ++++++++++--- cl2nc.1 | 23 +++++--- cl2nc.py | 165 ++++++++++++++++++++++++++++++++++++++++++----------- setup.py | 4 +- 5 files changed, 182 insertions(+), 51 deletions(-) diff --git a/LICENSE.md b/LICENSE.md index cd69695..4551d15 100644 --- a/LICENSE.md +++ b/LICENSE.md @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2017–2023 Peter Kuma +Copyright (c) 2017–2024 Peter Kuma Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/README.md b/README.md index ee61696..6d234b5 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # cl2nc cl2nc is an open source command line Python program for converting Vaisala -CL51 and CL31 ceilometer dat files to NetCDF. +CL51 and CL31 ceilometer DAT and HIS L2 files to NetCDF. ## Example @@ -11,7 +11,7 @@ On the command-line: cl2nc input.dat output.nc ``` -where `input.dat` is a Vaisala CL51 or CL31 dat file and `output.nc` is the name +where `input.dat` is a Vaisala CL51 or CL31 DAT file and `output.nc` is the name of a NetCDF output file. See [example.zip](example.zip) for an example input and output. @@ -110,13 +110,13 @@ Synopsis: `cl2nc` [`-chq`] [`--debug`] *input* *output* \ `cl2nc` `-h`|`--help` -*input* is an input `.dat` file. *output* is an output `.nc` file. If -directories are supplied for *input* and *output*, all `.dat` and `.DAT` files -in *input* are converted to `.nc` files in *output*. +*input* is an input `.dat` or `.his` (L2) file. *output* is an output `.nc` file. +If directories are supplied for *input* and *output*, all `.dat`, `.DAT`, `.his` +and `.HIS` files in *input* are converted to `.nc` files in *output*. Options: -- `-c`: Enable checksum verification (slow). +- `-c`: Enable DAT checksum verification (slow). - `--debug`: Enable debugging output. - `-h`, `--help`: Show help message and exit. - `-q`: Run quietly (suppress output). @@ -135,12 +135,14 @@ variables. The DAT files can alternatively contain values in feet (instead of meters), in which case all values are converted by cl2nc to meters. -Time in DAT files is assumed to be UTC. +Time in DAT and HIS files is assumed to be UTC. Missing values are encoded as NaN (floating-point variables) or -2147483648 (integer variables). The `_FillValue` attribute contains the missing value used in the given variable. +DAT files produce the following NetCDF output: + | Variable | Description | Units | Dimensions | | --- | --- | --- | --- | | [background_light](#background_light) | Background light | mV | time | @@ -177,6 +179,17 @@ used in the given variable. | [vertical_visibility](#vertical_visibility) | Vertical visibility | m | time | | [window_transmission](#window_transmission) | Window transmission estimate | % | time | +HIS L2 files produce the following NetCDF output: + +| Variable | Description | Units | Dimensions | +| --- | --- | --- | --- | +| [backscatter](#backscatter) | Attenuated volume backscatter coefficient | km-1.sr-1 | time, level | +| [ceilometer](#ceilometer) | Ceilometer name | | time | +| [level](#level) | Level number | | level | +| [period](#period) | Period | | time | +| [time](#time) | Time | seconds since 1970-01-01 00:00:00 UTC | time | +| [time_utc](#time_utc) | Time (UTC) | ISO 8601 | time | + ### background_light Background light (mV) @@ -205,6 +218,10 @@ Second lowest cloud base height (m) Highest cloud base height (m) +### ceilometer + +Ceilometer name (HIS L2 variable `CEILOMETER`). + ### detection_status Detection status @@ -268,6 +285,10 @@ Message subclass - 6 – 10 m ⨉ 1540 samples, range 15400 m (msg1_10x1540) - 8 – without a backscatter profile (msg1_base) +### period + +Period (HIS L2 variable `PERIOD`). + ### pulse_energy Pulse energy (% of nominal factory setting) @@ -449,6 +470,10 @@ functions to read the file (you may need to change the file extension to `.h5`). cl2nc follows [semantic versioning](http://semver.org/). +### 3.5.0 (2024-04-18) + +- Added support for the HIS L2 format. + ### 3.4.0 (2023-03-10) - Added support for a CL51 format as in R/V Polarstern data. diff --git a/cl2nc.1 b/cl2nc.1 index 1be0ce6..30a9612 100644 --- a/cl2nc.1 +++ b/cl2nc.1 @@ -1,7 +1,7 @@ -.TH cl2nc "3.4.0" 08/21/2023 +.TH cl2nc "3.5.0" 04/18/2024 .SH NAME -cl2nc \- convert Vaisala CL51 and CL31 dat files to NetCDF +cl2nc \- convert Vaisala CL51 and CL31 DAT and HIS L2 files to NetCDF .SH SYNOPSIS .B cl2nc @@ -16,7 +16,9 @@ cl2nc \- convert Vaisala CL51 and CL31 dat files to NetCDF .IR input is an input .I .dat -file. +or +.I .his +(L2) file. .IR output is an output .I .nc @@ -26,7 +28,11 @@ If directories are supplied for and .IR output , all -.IR ".dat " "and " .DAT +.IR .dat , +.IR .DAT , +.I .his +and +.I .HIS files in .I input are converted to @@ -38,7 +44,7 @@ files in .TP .B -c -Enable checksum verification (slow). +Enable DAT checksum verification (slow). .TP .B --debug Enable debugging output. @@ -68,12 +74,15 @@ to NetCDF files in the directory .SH COPYRIGHT -Copyright (C) 2018-2023 Peter Kuma. +Copyright (C) 2018-2024 Peter Kuma. .PP This program is available under the terms of an MIT license (see LICENSE.md in the distribution). .SH SEE ALSO -See +See +.UR https://github.com/peterkuma/cl2nc +cl2nc GitHub repository +.UE for more information about .BR cl2nc . diff --git a/cl2nc.py b/cl2nc.py index 2889c40..26ad4d6 100755 --- a/cl2nc.py +++ b/cl2nc.py @@ -1,6 +1,6 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 -__version__ = '3.4.0' +__version__ = '3.5.0' import sys import signal @@ -41,6 +41,8 @@ re_line6 = re.compile(b'^\x03(?P.{4})\x04$') re_none = re.compile(b'^/* *$') +re_his_time = re.compile(b'^(?P\d{4})-(?P\d\d)-(?P\d\d) (?P\d\d):(?P\d\d):(?P\d\d)$') + def fsencode(x): return os.fsencode(x) if sys.version_info[0] > 2 else x @@ -235,13 +237,19 @@ def postprocess(d): 'scale', 'backscatter_sum', ]: - d[var] = int_to_float(d[var]) + if var in d: d[var] = int_to_float(d[var]) + + d['scale'] = d.get('scale', 10) - d['backscatter'] = d['backscatter']/100000.0*(d['scale']/100.0) - d['backscatter_sum'] = d['backscatter_sum']/10000.0*(d['scale']/100.0) + if 'backscatter' in d: + d['backscatter'] = d['backscatter']/100000.0*(d['scale']/100.0) - d['units'] = 'm' if (d['status_internal'] & 0x0080) else 'ft' - layer_height_factor = 100 if d['units'] == 'ft' else 10 + if 'backscatter_sum' in d: + d['backscatter_sum'] = d['backscatter_sum']/10000.0*(d['scale']/100.0) + + if 'status_internal' in d: + d['units'] = 'm' if (d['status_internal'] & 0x0080) else 'ft' + layer_height_factor = 100 if d['units'] == 'ft' else 10 if 'layer1_height' in d: d['layer_height'] = NA_INT32*np.ones(5) @@ -258,7 +266,7 @@ def postprocess(d): for i in range(5): d['layer_cloud_amount'][i] = d['layer%d_cloud_amount' % (i + 1)] - if d['units'] == 'ft': + if 'units' in d and d['units'] == 'ft': for var in [ 'vertical_visibility', 'layer_height', @@ -273,17 +281,19 @@ def postprocess(d): NA_INT32 ) - d['pulse_count'] = np.where( - d['pulse_count'] != NA_INT32, - d['pulse_count']*1024, - NA_INT32 - ) + if 'pulse_count' in d: + d['pulse_count'] = np.where( + d['pulse_count'] != NA_INT32, + d['pulse_count']*1024, + NA_INT32 + ) d['time_utc'] = d.get('time_utc', '') - d['time'] = NA_INT64 if d['time_utc'] == '' else ( - dt.datetime.strptime(d['time_utc'].decode('ascii'), '%Y-%m-%dT%H:%M:%S') - - dt.datetime(1970, 1, 1) - ).total_seconds() + if 'time_utc' in d: + d['time'] = NA_INT64 if d['time_utc'] == '' else ( + dt.datetime.strptime(d['time_utc'].decode('ascii'), '%Y-%m-%dT%H:%M:%S') + - dt.datetime(1970, 1, 1) + ).total_seconds() def crc16(buf): crc = 0xffff @@ -295,7 +305,7 @@ def crc16(buf): crc = (crc^xmask) & 0xffff return crc^0xffff; -def read_input(filename, options={}): +def read_dat(filename, options={}): options = dict({ 'check': False, }, **options) @@ -391,10 +401,79 @@ def finalize(d): break return dd +def read_his_time(s): + m = re_his_time.match(s) + if m is not None: + g = m.groupdict() + return b'%s-%s-%sT%s:%s:%s' % ( + g['year'], + g['month'], + g['day'], + g['hour'], + g['minute'], + g['second'] + ) + else: + raise ValueError('Invalid syntax for CREATEDATE field') + +def read_his_period(s): + try: + return int(s) + except ValueError: + raise ValueError('Invalid syntax for PERIOD field') + +def read_his_backscatter(s): + d2 = {} + d = {'backscatter': s} + read_hex_array(d, d2, 'backscatter', 5) + return d2['backscatter'] + +def read_his(filename, options={}): + with open(filename, 'rb') as f: + dd = [] + line_number = 0 + header = None + for line in f.readlines(): + line_number += 1 + try: + d = {} + items = line.split(b',') + items = [x.strip() for x in items] + if items[0] == b'History file': + continue + if header is None: + header = items + continue + for i, h in enumerate(header): + s = items[i] if i < len(items) else b'' + if h == b'CREATEDATE': + d['time_utc'] = read_his_time(s) + elif h == b'CEILOMETER': + d['ceilometer'] = s + elif h == b'PERIOD': + d['period'] = read_his_period(s) + elif h == b'BS_PROFILE': + d['backscatter'] = read_his_backscatter(s) + postprocess(d) + dd += [d] + except Exception as e: + t, v, tb = sys.exc_info() + log.warning('Error on line %d: %s' % ( + line_number, e + )) + log.debug(traceback.format_exc()) + return dd + +def read(filename, options={}): + filename_lower = filename.lower() + if filename_lower.endswith(b'.his'): + return read_his(filename, options) + else: + return read_dat(filename, options) + def write_output(dd, filename): n = len(dd) vars = list(set(itertools.chain(*[list(d.keys()) for d in dd]))) - m = np.max([0] + [len(d['backscatter']) for d in dd]) if os.path.dirname(filename) != b'' and \ not os.path.exists(os.path.dirname(filename)): @@ -402,14 +481,23 @@ def write_output(dd, filename): f = Dataset(fsdecode(filename), 'w', format='NETCDF4') f.createDimension('time', n) - f.createDimension('level', m) - f.createDimension('layer', 5) - level = np.arange(m) - layer = np.arange(5) + + if 'backscatter' in vars: + m = np.max([0] + [len(d['backscatter']) for d in dd]) + f.createDimension('level', m) + level = np.arange(m) + + has_layers = 'layer_height' in vars or 'layer_cloud_amount' in vars + if has_layers: + f.createDimension('layer', 5) + layer = np.arange(5) def write_var(var, dtype, attributes={}): if not var in vars: return fill_value = NA_NETCDF.get(dtype) + if dtype == 'SX': + slen = max([len(d[var]) for d in dd]) + dtype = 'S%d' % slen v = f.createVariable(var, dtype, ('time',), fill_value=fill_value) v[:] = np.array([d[var] for d in dd]) v.setncatts(attributes) @@ -445,12 +533,14 @@ def write_dim(var, dtype, x, attributes={}): 'long_name': 'Time', 'units': 'seconds since 1970-01-01 00:00:00 UTC', }) - write_dim('level', 'i4', level, { - 'long_name': 'Level number', - }) - write_dim('layer', 'i4', layer, { - 'long_name': 'Layer number', - }) + if 'backscatter' in vars: + write_dim('level', 'i4', level, { + 'long_name': 'Level number', + }) + if has_layers: + write_dim('layer', 'i4', layer, { + 'long_name': 'Layer number', + }) write_profile('backscatter', 'f4', { 'long_name': 'Attenuated volume backscatter coefficient', 'units': 'km^-1.sr^-1', @@ -566,6 +656,12 @@ def write_dim(var, dtype, x, attributes={}): 'units': 'sr^-1', 'comment': 'Sum of detected and normalized backscatter', }) + write_var('ceilometer', 'SX', { + 'long_name': 'Ceilometer name', + }) + write_var('period', 'i4', { + 'long_name': 'Period', + }) write_layer('layer_height', 'i4', { 'long_name': 'Layer height', 'units': 'm', @@ -584,11 +680,11 @@ def write_dim(var, dtype, x, attributes={}): f.close() def main(): - parser = argparse.ArgumentParser(description='Convert Vaisala CL51 and CL31 dat files to NetCDF') + parser = argparse.ArgumentParser(description='Convert Vaisala CL51 and CL31 DAT and HIS L2 files to NetCDF') parser.add_argument('-c', dest='check', action='store_true', - help='enable checksum verification (slow)' + help='enable DAT checksum verification (slow)' ) parser.add_argument('-q', dest='quiet', @@ -612,7 +708,8 @@ def main(): if os.path.isdir(input_): for file_ in sorted([fsencode(x) for x in os.listdir(input_)]): - if not (file_.endswith(b'.dat') or file_.endswith(b'.DAT')): + file_lower = file_.lower() + if not (file_lower.endswith(b'.dat') or file_lower.endswith(b'.his')): continue input_filename = os.path.join(input_, file_) output_filename = os.path.join( @@ -622,7 +719,7 @@ def main(): if not args.quiet: print(fsdecode(input_filename)) try: - dd = read_input(input_filename, {'check': args.check}) + dd = read(input_filename, {'check': args.check}) if len(dd) > 0: write_output(dd, output_filename) else: @@ -632,7 +729,7 @@ def main(): log.debug(traceback.format_exc()) else: try: - dd = read_input(input_, {'check': args.check}) + dd = read(input_, {'check': args.check}) if len(dd) > 0: write_output(dd, output) else: diff --git a/setup.py b/setup.py index a5fa0e8..046e056 100644 --- a/setup.py +++ b/setup.py @@ -4,8 +4,8 @@ setup( name='cl2nc', - version='3.4.0', - description='Convert Vaisala CL51 and CL31 dat files to NetCDF', + version='3.5.0', + description='Convert Vaisala CL51 and CL31 DAT and HIS L2 files to NetCDF', author='Peter Kuma', author_email='peter@peterkuma.net', license='MIT',