From 28b83afc8c1adb976e746e278316a79326d87398 Mon Sep 17 00:00:00 2001 From: ahothan Date: Sat, 6 Jun 2020 10:13:19 -0700 Subject: [PATCH] Add support for tagged logs This patch addresses https://github.com/HdrHistogram/HdrHistogram_py/issues/27 Change-Id: Ib9c133ba8f7ebfa2ff60017f882475f70cf0b2d6 --- hdrh/codec.py | 4 -- hdrh/dump.py | 16 ++++++-- hdrh/histogram.py | 77 +++++++++++++++++++++++++++++--------- hdrh/log.py | 12 ++++++ pylint.rc | 6 +-- test-requirements.txt | 1 + test/tagged-Log.logV2.hlog | 46 +++++++++++++++++++++++ test/test_hdrhistogram.py | 54 ++++++++++++++++++++++++-- tox.ini | 6 ++- 9 files changed, 188 insertions(+), 34 deletions(-) create mode 100644 test/tagged-Log.logV2.hlog diff --git a/hdrh/codec.py b/hdrh/codec.py index e6f2e3b..3c3dd9d 100644 --- a/hdrh/codec.py +++ b/hdrh/codec.py @@ -388,7 +388,3 @@ def dump_payload(counts, max_index): # there is always a last series to dump _dump_series(series_start_index, index, counts[index]) print('[%06d] --END-- total count=%d' % (index + 1, total_count)) - -def hex_dump(label, hstr): - print(label) - print(':'.join(x.encode('hex') for x in hstr)) diff --git a/hdrh/dump.py b/hdrh/dump.py index c535784..4c58e95 100644 --- a/hdrh/dump.py +++ b/hdrh/dump.py @@ -20,17 +20,25 @@ from hdrh.histogram import HdrHistogram - -def main(): - args = sys.argv[1:] +def dump(args=None): + """ + Dump a list of Hdr histograms encodings + + args: list of strings, each string representing an Hdr encoding + """ + if not args: + args = sys.argv[1:] + res = 1 if args: encoded_histograms = args for hdrh in encoded_histograms: print('\nDumping histogram: ' + hdrh + '\n') HdrHistogram.dump(hdrh) + res = 0 else: print('\nUsage: %s []*\n' % (sys.argv[0])) + return res if __name__ == '__main__': - main() + sys.exit(dump()) diff --git a/hdrh/histogram.py b/hdrh/histogram.py index a17a0a6..4d0239c 100644 --- a/hdrh/histogram.py +++ b/hdrh/histogram.py @@ -136,6 +136,8 @@ def __init__(self, self.counts = self.encoder.get_counts() self.start_time_stamp_msec = 0 self.end_time_stamp_msec = 0 + # no tag by default + self.tag = None def _clz(self, value): """calculate the leading zeros, equivalent to C __builtin_clzll() @@ -421,24 +423,24 @@ def encode(self): ''' return self.encoder.encode() - def adjust_internal_tacking_values(self, - min_non_zero_index, - max_index, - total_added): - '''Called during decoding and add to adjust the new min/max value and - total count - - Args: - min_non_zero_index min nonzero index of all added counts (-1 if none) - max_index max index of all added counts (-1 if none) - ''' - if max_index >= 0: - max_value = self.get_highest_equivalent_value(self.get_value_from_index(max_index)) - self.max_value = max(self.max_value, max_value) - if min_non_zero_index >= 0: - min_value = self.get_value_from_index(min_non_zero_index) - self.min_value = min(self.min_value, min_value) - self.total_count += total_added + # def adjust_internal_tacking_values(self, + # min_non_zero_index, + # max_index, + # total_added): + # '''Called during decoding and add to adjust the new min/max value and + # total count + + # Args: + # min_non_zero_index min nonzero index of all added counts (-1 if none) + # max_index max index of all added counts (-1 if none) + # ''' + # if max_index >= 0: + # max_value = self.get_highest_equivalent_value(self.get_value_from_index(max_index)) + # self.max_value = max(self.max_value, max_value) + # if min_non_zero_index >= 0: + # min_value = self.get_value_from_index(min_non_zero_index) + # self.min_value = min(self.min_value, min_value) + # self.total_count += total_added def set_internal_tacking_values(self, min_non_zero_index, @@ -496,6 +498,18 @@ def set_end_time_stamp(self, time_stamp_msec): ''' self.end_time_stamp_msec = time_stamp_msec + def set_tag(self, tag): + """ + Associate a tag to this histogram. + """ + self.tag = tag + + def get_tag(self): + """ + Retrieve the tag for this histogram. + """ + return self.tag + def add(self, other_hist): highest_recordable_value = \ self.get_highest_equivalent_value(self.get_value_from_index(self.counts_len - 1)) @@ -633,3 +647,30 @@ def dump(encoded_histogram, output=None, output = sys.stdout histogram.output_percentile_distribution(output, output_value_unit_scaling_ratio) + + def equals(self, other): + """ + Check if this instance of histogram is equal to the provided histogram. + + other: the other histogram to compare to + return: True if this histogram are equivalent with the other. + """ + if self == other: + return True + if self.lowest_trackable_value != other.lowest_trackable_value or \ + self.int_to_double_conversion_ratio != other.int_to_double_conversion_ratio or \ + self.significant_figures != other.significant_figures: + return False + if self.get_total_count() != other.get_total_count(): + return False + if self.get_max_value() != other.get_max_value(): + return False + # if (getMinNonZeroValue() != that.getMinNonZeroValue()) { + if self.get_min_value() != other.get_min_value(): + return False + if self.counts_len != other.counts_len: + return False + for index in range(self.counts_len): + if self.get_count_at_index(index) != other.get_count_at_index(index): + return False + return True diff --git a/hdrh/log.py b/hdrh/log.py index e1cd273..c84765e 100644 --- a/hdrh/log.py +++ b/hdrh/log.py @@ -138,6 +138,9 @@ def close(self): # "#[BaseTime: %f (seconds since epoch)]\n" re_base_time = re.compile(r'#\[BaseTime: *([\d\.]*) ') + +# 0.127,1.007,2.769,HISTFAAAAEV42pNpmSz... +# Tag=A,0.127,1.007,2.769,HISTFAAAAEV42pNpmSz # "%f,%f,%f,%s\n" re_histogram_interval = re.compile(r'([\d\.]*),([\d\.]*),([\d\.]*),(.*)') @@ -231,6 +234,13 @@ def _decode_next_interval_histogram(self, self.observed_base_time = True continue + # check tag "Tag=<>," + if line.startswith('Tag='): + index = line.find(',') + tag = line[4:index] + line = line[index + 1:] + else: + tag = None match_res = re_histogram_interval.match(line) if not match_res: # probably a legend line that starts with "\"StartTimestamp" @@ -286,6 +296,8 @@ def _decode_next_interval_histogram(self, histogram = HdrHistogram.decode(cpayload) histogram.set_start_time_stamp(absolute_start_time_stamp_sec * 1000.0) histogram.set_end_time_stamp(absolute_end_time_stamp_sec * 1000.0) + if tag: + histogram.set_tag(tag) return histogram def get_next_interval_histogram(self, diff --git a/pylint.rc b/pylint.rc index dc8fafc..bf12130 100644 --- a/pylint.rc +++ b/pylint.rc @@ -373,13 +373,13 @@ max-locals=20 max-parents=7 # Maximum number of public methods for a class (see R0904). -max-public-methods=40 +max-public-methods=50 # Maximum number of return / yield for function / method body -max-returns=6 +max-returns=10 # Maximum number of statements in function / method body -max-statements=50 +max-statements=100 # Minimum number of public methods for a class (see R0903). min-public-methods=0 diff --git a/test-requirements.txt b/test-requirements.txt index 97810e6..c84290a 100644 --- a/test-requirements.txt +++ b/test-requirements.txt @@ -1,3 +1,4 @@ pytest>=2.7.2 flake8>=2.3.0 pylint>=1.7.1 +pytest-cov>=2.9 diff --git a/test/tagged-Log.logV2.hlog b/test/tagged-Log.logV2.hlog new file mode 100644 index 0000000..520f41d --- /dev/null +++ b/test/tagged-Log.logV2.hlog @@ -0,0 +1,46 @@ +#[Logged with jHiccup version 2.0.7-SNAPSHOT, manually edited to duplicate contents with Tag=A] +#[Histogram log format version 1.2] +#[StartTime: 1441812279.474 (seconds since epoch), Wed Sep 09 08:24:39 PDT 2015] +"StartTimestamp","Interval_Length","Interval_Max","Interval_Compressed_Histogram" +0.127,1.007,2.769,HISTFAAAAEV42pNpmSzMwMCgyAABTBDKT4GBgdnNYMcCBvsPEBEJISEuATEZMQ4uASkhIR4nrxg9v2lMaxhvMekILGZkKmcCAEf2CsI= +Tag=A,0.127,1.007,2.769,HISTFAAAAEV42pNpmSzMwMCgyAABTBDKT4GBgdnNYMcCBvsPEBEJISEuATEZMQ4uASkhIR4nrxg9v2lMaxhvMekILGZkKmcCAEf2CsI= +1.134,0.999,0.442,HISTFAAAAEJ42pNpmSzMwMAgxwABTBDKT4GBgdnNYMcCBvsPEBEWLj45FTExAT4pBSEBKa6UkAgBi1uM7xjfMMlwMDABAC0CCjM= +Tag=A,1.134,0.999,0.442,HISTFAAAAEJ42pNpmSzMwMAgxwABTBDKT4GBgdnNYMcCBvsPEBEWLj45FTExAT4pBSEBKa6UkAgBi1uM7xjfMMlwMDABAC0CCjM= +2.133,1.001,0.426,HISTFAAAAD942pNpmSzMwMAgwwABTBDKT4GBgdnNYMcCBvsPEBE+Ph4OLgk5OSkeIS4+LgEeswIDo1+MbmdYNASYAA51CSo= +Tag=A,2.133,1.001,0.426,HISTFAAAAD942pNpmSzMwMAgwwABTBDKT4GBgdnNYMcCBvsPEBE+Ph4OLgk5OSkeIS4+LgEeswIDo1+MbmdYNASYAA51CSo= +3.134,1.001,0.426,HISTFAAAAD942pNpmSzMwMAgwwABTBDKT4GBgdnNYMcCBvsPEBExPiEpITEFGTkRKSEeOR6FkCg1hTeMXvNYlHhYABQ5CTo= +Tag=A,3.134,1.001,0.426,HISTFAAAAD942pNpmSzMwMAgwwABTBDKT4GBgdnNYMcCBvsPEBExPiEpITEFGTkRKSEeOR6FkCg1hTeMXvNYlHhYABQ5CTo= +4.135,0.997,0.426,HISTFAAAAD942pNpmSzMwMAgwwABTBDKT4GBgdnNYMcCBvsPEBE2PiERBREpBREhER4+Hj4uvQAdrTlMBldYDDhYAAugCKk= +Tag=A,4.135,0.997,0.426,HISTFAAAAD942pNpmSzMwMAgwwABTBDKT4GBgdnNYMcCBvsPEBE2PiERBREpBREhER4+Hj4uvQAdrTlMBldYDDhYAAugCKk= +5.132,1.002,0.426,HISTFAAAAEF42pNpmSzMwMAgywABTBDKT4GBgdnNYMcCBvsPEBEWPhElOR4pARUpKTkpGQkxq2mMegZnGI0+MZuIcAEAHo8Jvw== +Tag=A,5.132,1.002,0.426,HISTFAAAAEF42pNpmSzMwMAgywABTBDKT4GBgdnNYMcCBvsPEBEWPhElOR4pARUpKTkpGQkxq2mMegZnGI0+MZuIcAEAHo8Jvw== +6.134,0.999,0.442,HISTFAAAAEF42pNpmSzMwMAgxwABTBDKT4GBgdnNYMcCBvsPEBEWIS4FITEhDiEJERE+GT6ZkhZGLbl7jEqrWHREmFgAIbAJMw== +Tag=A,6.134,0.999,0.442,HISTFAAAAEF42pNpmSzMwMAgxwABTBDKT4GBgdnNYMcCBvsPEBEWIS4FITEhDiEJERE+GT6ZkhZGLbl7jEqrWHREmFgAIbAJMw== +7.133,0.999,0.459,HISTFAAAAEJ42pNpmSzMwMCgwAABTBDKD8hndjPYsYDB/gNEhEtMQEBBTk5MQERCRkBEQEWlh9FJbg9jE+MS5ig1LhYmADkkCcE= +Tag=A,7.133,0.999,0.459,HISTFAAAAEJ42pNpmSzMwMCgwAABTBDKD8hndjPYsYDB/gNEhEtMQEBBTk5MQERCRkBEQEWlh9FJbg9jE+MS5ig1LhYmADkkCcE= +8.132,1.000,0.459,HISTFAAAAEB42pNpmSzMwMAgxwABTBDKT4GBgdnNYMcCBvsPEBEWIREgEOIQEuGT4xHg41Oo0pIqu8LYwVImwMfGBAAfkgkw +Tag=A,8.132,1.000,0.459,HISTFAAAAEB42pNpmSzMwMAgxwABTBDKT4GBgdnNYMcCBvsPEBEWIREgEOIQEuGT4xHg41Oo0pIqu8LYwVImwMfGBAAfkgkw +9.132,1.751,1551.892,HISTFAAAAJZ42pNpmSzMwMB0nQECmCCUnwIDA7ObwY4FDPYfYDJMXFxsbGwMbBwszDwsDDxsHFw6RWJMLJMZmcqBMJrJmskSiA2ZZJmkgRBCgmheIORGI1H5rEzMQAyDzFhY2EWRWUwMWCBxQtQQhAIWJiyAaEHyFbKwsLHAADYWAWmiFeKS5gACLsIEzdQICAgBIQShEfhFABXDF+M= +Tag=A,9.132,1.751,1551.892,HISTFAAAAJZ42pNpmSzMwMB0nQECmCCUnwIDA7ObwY4FDPYfYDJMXFxsbGwMbBwszDwsDDxsHFw6RWJMLJMZmcqBMJrJmskSiA2ZZJmkgRBCgmheIORGI1H5rEzMQAyDzFhY2EWRWUwMWCBxQtQQhAIWJiyAaEHyFbKwsLHAADYWAWmiFeKS5gACLsIEzdQICAgBIQShEfhFABXDF+M= +10.883,0.250,0.426,HISTFAAAAD142pNpmSzMwMAgxQABTBDKT4GBgdnNYMcCBvsPEBEeFi4mPg4WLhY2BjY2FhYOBSkpASEtoRA+NgDkCQZR +Tag=A,10.883,0.250,0.426,HISTFAAAAD142pNpmSzMwMAgxQABTBDKT4GBgdnNYMcCBvsPEBEeFi4mPg4WLhY2BjY2FhYOBSkpASEtoRA+NgDkCQZR +11.133,1.003,0.524,HISTFAAAAER42pNpmSzMwMCgyAABTBDKT4GBgdnNYMcCBvsPUBk2HgkZKREpEQUeGSEBAQ6xSYxhCnp7GJ02sWgJsbCwMgEAO0AJSQ== +Tag=A,11.133,1.003,0.524,HISTFAAAAER42pNpmSzMwMCgyAABTBDKT4GBgdnNYMcCBvsPUBk2HgkZKREpEQUeGSEBAQ6xSYxhCnp7GJ02sWgJsbCwMgEAO0AJSQ== +12.136,0.997,0.459,HISTFAAAAEB42pNpmSzMwMAgxwABTBDKT4GBgdnNYMcCBvsPUBk2AT4eCQURHgkuEREOHjERlSQhhWuMSV9Y7ERYWAAa4gko +Tag=A,12.136,0.997,0.459,HISTFAAAAEB42pNpmSzMwMAgxwABTBDKT4GBgdnNYMcCBvsPUBk2AT4eCQURHgkuEREOHjERlSQhhWuMSV9Y7ERYWAAa4gko +13.133,0.998,0.459,HISTFAAAAD942pNpmSzMwMAgxwABTBDKT4GBgdnNYMcCBvsPMBkRIR4RMRk5KQE+PgEhMRmzEjWZJ4whW1hMBNiYAB42CTA= +Tag=A,13.133,0.998,0.459,HISTFAAAAD942pNpmSzMwMAgxwABTBDKT4GBgdnNYMcCBvsPMBkRIR4RMRk5KQE+PgEhMRmzEjWZJ4whW1hMBNiYAB42CTA= +14.131,1.000,0.492,HISTFAAAAEN42pNpmSzMwMCgyAABTBDKT4GBgdnNYMcCBvsPUBkWFhE5GT4FKQkRCR4ZCREpqwmMBhpHGG16WHx42JgYmAA6swk+ +Tag=A,14.131,1.000,0.492,HISTFAAAAEN42pNpmSzMwMCgyAABTBDKT4GBgdnNYMcCBvsPUBkWFhE5GT4FKQkRCR4ZCREpqwmMBhpHGG16WHx42JgYmAA6swk+ +15.131,1.001,0.442,HISTFAAAAD542pNpmSzMwMAgywABTBDKT4GBgdnNYMcCBvsPMBkuMTEFHgklFRkRATkJERGdKgudfYwRTSwGalwAF2IJOw== +Tag=A,15.131,1.001,0.442,HISTFAAAAD542pNpmSzMwMAgywABTBDKT4GBgdnNYMcCBvsPMBkuMTEFHgklFRkRATkJERGdKgudfYwRTSwGalwAF2IJOw== +16.132,1.001,0.524,HISTFAAAAEZ42pNpmSzMwMCgxAABTBDKT4GBgdnNYMcCBvsPEBE2IQEFCQkpGREpHj4hKS6NU4z7GDMkuBoYDSYw2wiwMLEyAQBQ3wne +Tag=A,16.132,1.001,0.524,HISTFAAAAEZ42pNpmSzMwMCgxAABTBDKT4GBgdnNYMcCBvsPEBE2IQEFCQkpGREpHj4hKS6NU4z7GDMkuBoYDSYw2wiwMLEyAQBQ3wne +17.133,0.998,0.459,HISTFAAAAEB42pNpmSzMwMAgxwABTBDKT4GBgdnNYMcCBvsPUBk2DjElIR4RHiExKQE5IT61iCodtXWMdn0sKVJMTAAekAk0 +Tag=A,17.133,0.998,0.459,HISTFAAAAEB42pNpmSzMwMAgxwABTBDKT4GBgdnNYMcCBvsPUBk2DjElIR4RHiExKQE5IT61iCodtXWMdn0sKVJMTAAekAk0 +18.131,1.000,0.459,HISTFAAAAEF42pNpmSzMwMAgzwABTBDKT4GBgdnNYMcCBvsPUBkWISERJSUJESklHhEJEREhqwZGLakPjDZdLBYCHCwAKOkJPg== +Tag=A,18.131,1.000,0.459,HISTFAAAAEF42pNpmSzMwMAgzwABTBDKT4GBgdnNYMcCBvsPUBkWISERJSUJESklHhEJEREhqwZGLakPjDZdLBYCHCwAKOkJPg== +19.131,1.000,0.475,HISTFAAAAEF42pNpmSzMwMAgzwABTBDKT4GBgdnNYMcCBvsPUAk2HjkJBSk+Pi4BMT4xIQE9pxIluTOMPhtYbITY2JgAKLoJOQ== +Tag=A,19.131,1.000,0.475,HISTFAAAAEF42pNpmSzMwMAgzwABTBDKT4GBgdnNYMcCBvsPUAk2HjkJBSk+Pi4BMT4xIQE9pxIluTOMPhtYbITY2JgAKLoJOQ== +20.131,1.004,0.475,HISTFAAAAEF42pNpmSzMwMAgxwABTBDKT4GBgdnNYMcCBvsPEBFmPhEJOSEhDi4+ETEeASEhswIVi1+MFjtYvCRYGJgAIP8JNw== +Tag=A,20.131,1.004,0.475,HISTFAAAAEF42pNpmSzMwMAgxwABTBDKT4GBgdnNYMcCBvsPEBFmPhEJOSEhDi4+ETEeASEhswIVi1+MFjtYvCRYGJgAIP8JNw== diff --git a/test/test_hdrhistogram.py b/test/test_hdrhistogram.py index 469e92f..7236ce6 100644 --- a/test/test_hdrhistogram.py +++ b/test/test_hdrhistogram.py @@ -48,6 +48,7 @@ from hdrh.log import HistogramLogReader from hdrh.codec import HdrPayload from hdrh.codec import HdrCookieException +from hdrh.dump import dump def python_bitness(): "cross-platform way of calculating bitness, returns either 32 or 64" @@ -72,6 +73,8 @@ def test_basic(): assert histogram.counts_len == expected_counts_len assert histogram.unit_magnitude == 0 assert histogram.sub_bucket_half_count_magnitude == 10 + assert histogram.get_count_at_sub_bucket(0, 0) == 0 + assert histogram.equals(histogram) @pytest.mark.basic def test_empty_histogram(): @@ -296,6 +299,13 @@ def test_reset(): assert histogram.get_value_at_percentile(99.99) == 0 assert histogram.get_start_time_stamp() == sys.maxsize assert histogram.get_end_time_stamp() == 0 + print(histogram.get_percentile_to_value_dict([90, 99, 99.9])) + +@pytest.mark.basic +def test_perc_value_list(): + histogram = load_histogram() + res = {90: 1000, 99.999: 100007935} + assert histogram.get_percentile_to_value_dict(res.keys()) == res @pytest.mark.basic def test_invalid_significant_figures(): @@ -366,6 +376,10 @@ def check_hdr_payload(counter_size): # now verify that the counters are identical to the original check_counts(dpayload, HDR_PAYLOAD_COUNTS) + # run dump + payload.dump(label='test') + + @pytest.mark.codec def test_hdr_payload(): # Check the payload work in all 3 supported counter sizes @@ -625,6 +639,39 @@ def test_jHiccup_v2_log(): log_reader.close() + +TAGGED_V2_LOG = 'test/tagged-Log.logV2.hlog' +@pytest.mark.log +def test_tagged_v2_log(): + histogram_count = 0 + total_count = 0 + accumulated_histogram = HdrHistogram(LOWEST, HIGHEST, SIGNIFICANT) + accumulated_histogram_tags = HdrHistogram(LOWEST, HIGHEST, SIGNIFICANT) + log_reader = HistogramLogReader(TAGGED_V2_LOG, accumulated_histogram) + while 1: + decoded_histogram = log_reader.get_next_interval_histogram() + if not decoded_histogram: + break + histogram_count += 1 + total_count += decoded_histogram.get_total_count() + if decoded_histogram.get_tag() == 'A': + accumulated_histogram_tags.add(decoded_histogram) + else: + assert decoded_histogram.get_tag() is None + accumulated_histogram.add(decoded_histogram) + + assert accumulated_histogram.equals(accumulated_histogram_tags) + assert total_count == 32290 + +@pytest.mark.log +def test_tagged_v2_log_add(): + accumulated_histogram = HdrHistogram(LOWEST, HIGHEST, SIGNIFICANT) + log_reader = HistogramLogReader(TAGGED_V2_LOG, accumulated_histogram) + while 1: + decoded_histogram = log_reader.add_next_interval_histogram() + if not decoded_histogram: + break + @pytest.mark.log def test_output_percentile_distribution(): histogram = load_histogram() @@ -818,10 +865,6 @@ def test_zz_decode(): for hdr_len in [0, 8]: check_zz_decode(int_type, hdr_len) -def hex_dump(label, str): - print(label) - print(':'.join(x.encode('hex') for x in str)) - @pytest.mark.basic def test_get_value_at_percentile(): histogram = HdrHistogram(LOWEST, 3600000000, 3) @@ -836,3 +879,6 @@ def test_get_value_at_percentile(): # val = histogram.get_value_at_percentile(25) # assert histogram.get_value_at_percentile(25) == 2 assert histogram.get_value_at_percentile(30) == 2 + +def test_dump(): + dump(['HISTFAAAACl4nJNpmSzMwMBgyAABzFCaEURcm7yEwf4DROA8/4I5jNM7mJgAlWkH9g==']) diff --git a/tox.ini b/tox.ini index bff1fe9..8af41ff 100644 --- a/tox.ini +++ b/tox.ini @@ -8,10 +8,14 @@ commands = flake8 hdrh test commands = pylint --rcfile pylint.rc hdrh test [testenv] +setenv = + LANG=en_US.UTF-8 + LC_ALL=en_US.UTF-8 deps = -r{toxinidir}/requirements.txt -r{toxinidir}/test-requirements.txt commands= - py.test -q --basetemp={envtmpdir} {posargs} + {posargs:pytest --cov=hdrh --cov-report=term-missing -vv test} + [flake8] show-source = True