Skip to content

Commit

Permalink
feat(opl/data.py): Provide and use non-NumPy percentile function
Browse files Browse the repository at this point in the history
  • Loading branch information
pablomh committed Oct 2, 2023
1 parent a7e81f9 commit 084246f
Show file tree
Hide file tree
Showing 2 changed files with 46 additions and 10 deletions.
28 changes: 23 additions & 5 deletions core/opl/data.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import datetime
import logging
import math
import statistics
import time
import numpy
Expand Down Expand Up @@ -138,16 +139,33 @@ def process(self):
return found_in_total


def percentile(data, percent):
if not data:
return None

data.sort()
k = (len(data) - 1) * percent / 100
# Python 2.x returns float for floor an ceil, so cast to int
f = int(math.floor(k))
c = int(math.ceil(k))
if f == c:
return data[int(k)]

d0 = data[f] * (c - k)
d1 = data[c] * (k - f)
return d0+d1


def data_stats(data):
if len(data) == 0:
return {"samples": 0}
non_zero_data = [i for i in data if i != 0]
if isinstance(data[0], int) or isinstance(data[0], float):
q25 = numpy.percentile(data, 25)
q75 = numpy.percentile(data, 75)
q90 = numpy.percentile(data, 90)
q99 = numpy.percentile(data, 99)
q999 = numpy.percentile(data, 99.9)
q25 = percentile(data, 25)
q75 = percentile(data, 75)
q90 = percentile(data, 90)
q99 = percentile(data, 99)
q999 = percentile(data, 99.9)
return {
"samples": len(data),
"min": min(data),
Expand Down
28 changes: 23 additions & 5 deletions opl/data.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import datetime
import logging
import math
import statistics
import time
import numpy
Expand Down Expand Up @@ -138,16 +139,33 @@ def process(self):
return found_in_total


def percentile(data, percent):
if not data:
return None

data.sort()
k = (len(data) - 1) * percent / 100
# Python 2.x returns float for floor an ceil, so cast to int
f = int(math.floor(k))
c = int(math.ceil(k))
if f == c:
return data[int(k)]

d0 = data[f] * (c - k)
d1 = data[c] * (k - f)
return d0+d1


def data_stats(data):
if len(data) == 0:
return {"samples": 0}
non_zero_data = [i for i in data if i != 0]
if isinstance(data[0], int) or isinstance(data[0], float):
q25 = numpy.percentile(data, 25)
q75 = numpy.percentile(data, 75)
q90 = numpy.percentile(data, 90)
q99 = numpy.percentile(data, 99)
q999 = numpy.percentile(data, 99.9)
q25 = percentile(data, 25)
q75 = percentile(data, 75)
q90 = percentile(data, 90)
q99 = percentile(data, 99)
q999 = percentile(data, 99.9)
return {
"samples": len(data),
"min": min(data),
Expand Down

0 comments on commit 084246f

Please sign in to comment.