Skip to content

Commit

Permalink
Created test - resolving isssue #2
Browse files Browse the repository at this point in the history
Cleaned up the code
No runner for github workflows still
  • Loading branch information
3nthusia5t committed May 8, 2024
1 parent dccdc30 commit 3f938cd
Show file tree
Hide file tree
Showing 3 changed files with 57 additions and 35 deletions.
7 changes: 6 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ Around 10% faster than solution for single file and a lot faster for multiple fi
from scipy.stats import entropy
import numpy as np

def entropy1(labels, base=None):
def entropy(labels, base=None):
labels = np.frombuffer(labels, dtype=np.uint8)

value,counts = np.unique(labels, return_counts=True)
Expand All @@ -27,6 +27,11 @@ It took 10522.091444253922 seconds to complete the processing of all 200k malwar
The malware was stored on network attached storage, which has greatly impacted the I/O performance.


## Testing

Currently, tests cannot be performed on the Github actions as there is no Nvidia GPU available.
If it will be possible, I will create a self-hosted runner in the future.

## Remarks

Code is not optimized and cleaned yet.
48 changes: 15 additions & 33 deletions entro.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,6 @@ def calculate_histogram(data, hist_out):
tx = cuda.threadIdx.x

local_hist[tx] = 0
cuda.syncthreads()


idx = cuda.grid(1)
stride = cuda.gridsize(1)
Expand All @@ -30,16 +28,19 @@ def calculate_histogram(data, hist_out):


@cuda.jit
def calculate_entropy(hist, total_pixels, entropy_out):
def calculate_entropy(hist, data_size, entropy_out):
idx = cuda.grid(1)
stride = cuda.gridsize(1)
for i in range(idx, hist.shape[0], stride):
prob = hist[i] / total_pixels
prob = hist[i] / data_size
if prob != 0:
entropy_out[i] = -prob * math.log2(prob)
else:
# Some small, not important number
entropy_out[i] = -0.000001 * math.log2(0.000001)


# TODO: implement it properly.
@cuda.jit
def sum_array(arr, result):
local_mem = cuda.shared.array(256, dtype=np.float32)
Expand All @@ -65,24 +66,21 @@ def sum_array(arr, result):

cuda.syncthreads()

def entropy_with_cuda(data):

total_pixels = len(data)
def entropy_with_cuda(data):

data_size = len(data)
data_gpu = cuda.to_device(np.frombuffer(data, dtype=np.uint8))

cuda.synchronize()
hist_host = np.zeros(256, dtype=np.uint32)
#hist_out = cuda.device_array(256, dtype=np.uint32)
# Initialize histogram array to zeros
#cuda.device_array_like(hist_out, fill_value=0)

#Initialize hist with 0. For some reason numba.cuda.device_array didnt work as expected.
hist_host = np.zeros(256, dtype=np.uint32)
hist_out = cuda.to_device(hist_host)
cuda.synchronize()

threadsperblock_hist = 256
blockspergrid_hist = min((len(data) + (threadsperblock_hist - 1)) // threadsperblock_hist, 1024)
calculate_histogram[blockspergrid_hist, threadsperblock_hist](data_gpu, hist_out)


del data_gpu
cuda.synchronize()

Expand All @@ -91,31 +89,15 @@ def entropy_with_cuda(data):

threadsperblock_entropy = 256
blockspergrid_entropy = min((hist_out.size + (threadsperblock_entropy - 1)) // threadsperblock_entropy, 1024)
calculate_entropy[blockspergrid_entropy, threadsperblock_entropy](hist_out, total_pixels, entropy_out_gpu)
calculate_entropy[blockspergrid_entropy, threadsperblock_entropy](hist_out, data_size, entropy_out_gpu)

cuda.synchronize()
del hist_out

result = cuda.device_array(blockspergrid_entropy, dtype=np.float32)

cuda.synchronize()

sum_array[blockspergrid_entropy, threadsperblock_entropy](entropy_out_gpu, result)


cuda.synchronize()
del entropy_out_gpu


entropy_sum = result.copy_to_host()

del result
local_entropies = entropy_out_gpu.copy_to_host()


cuda.synchronize()

#todo: remove sum() make it parrarel
return entropy_sum.sum()
return local_entropies.sum()

def is_supported_cuda():
return cuda.is_available() and cuda.detect()
37 changes: 36 additions & 1 deletion test_entro.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,10 @@
import math
from scipy.stats import entropy
from numba.core.errors import NumbaPerformanceWarning
import time

# Functions to test
from entro import calculate_histogram, calculate_entropy
from entro import calculate_histogram, calculate_entropy, entropy_with_cuda

class TestCalculateHistogram(unittest.TestCase):

Expand Down Expand Up @@ -91,5 +92,39 @@ def test_random_histograms(self):
del entropy_out_gpu


class TestEntropyWithCUDA(unittest.TestCase):
def test_entropy_calculation(self):
# Test case for correctness of entropy calculation
for i in range(1, 100):
data = np.random.randint(0, 256, size=1000, dtype=np.uint8)
entropy_expected = self.calculate_entropy_numpy(data)
entropy_actual = entropy_with_cuda(data)
np.testing.assert_almost_equal(entropy_actual, entropy_expected, decimal=3)

def test_performance(self):
# Test case for performance
for i in range(0, 100):
data = np.random.randint(0, 256, size=10**6, dtype=np.uint8)
# Measure time for CUDA entropy calculation
start_time = time.time()
entropy_actual = entropy_with_cuda(data)
cuda_time = time.time() - start_time

# Measure time for NumPy entropy calculation
start_time = time.time()
entropy_expected = self.calculate_entropy_numpy(data)
numpy_time = time.time() - start_time

# CUDA needs to be at least twice as fast
np.testing.assert_almost_equal(entropy_actual, entropy_expected, decimal=3)
self.assertTrue(cuda_time*2 < numpy_time)

def calculate_entropy_numpy(self, data):
hist, _ = np.histogram(data, bins=256, range=[0, 256])
hist = hist / len(data)
entropy = -np.sum(hist * np.log2(hist + (hist == 0)))
return entropy


if __name__ == '__main__':
unittest.main()

0 comments on commit 3f938cd

Please sign in to comment.