diff --git a/midas/utility.py b/midas/utility.py index 2cfbfd1..873d5fc 100755 --- a/midas/utility.py +++ b/midas/utility.py @@ -269,6 +269,14 @@ def iopen(inpath, mode='r'): # Python2 if sys.version_info[0] == 2: if ext == 'gz': return gzip.open(inpath, mode) + # I usually recommend lz4 over gz and bz2, but if we must use one + # of these cpu-intensive algorithms, it's best not to have + # it run on the same core as the python script. This can be achieved + # using the approach of function smarter_open in this example + # https://github.com/chanzuckerberg/idseq-bench/blob/master/util.py + # In addition, that approach can stream files from AWS S3 or another + # machine without having to copy them to an attached filesystem, + # which avoids more performance problems. elif ext == 'bz2': return bz2.BZ2File(inpath, mode) else: return open(inpath, mode) # Python3