diff --git a/README.md b/README.md index ec39e29..f004d04 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ Around 10% faster than solution for single file and a lot faster for multiple fi from scipy.stats import entropy import numpy as np -def entropy1(labels, base=None): +def entropy(labels, base=None): labels = np.frombuffer(labels, dtype=np.uint8) value,counts = np.unique(labels, return_counts=True) @@ -27,6 +27,11 @@ It took 10522.091444253922 seconds to complete the processing of all 200k malwar The malware was stored on network attached storage, which has greatly impacted the I/O performance. +## Testing + +Currently, tests cannot be performed on the Github actions as there is no Nvidia GPU available. +If it will be possible, I will create a self-hosted runner in the future. + ## Remarks Code is not optimized and cleaned yet. \ No newline at end of file diff --git a/entro.py b/entro.py index 26fefff..14cd120 100644 --- a/entro.py +++ b/entro.py @@ -15,8 +15,6 @@ def calculate_histogram(data, hist_out): tx = cuda.threadIdx.x local_hist[tx] = 0 - cuda.syncthreads() - idx = cuda.grid(1) stride = cuda.gridsize(1) @@ -30,16 +28,19 @@ def calculate_histogram(data, hist_out): @cuda.jit -def calculate_entropy(hist, total_pixels, entropy_out): +def calculate_entropy(hist, data_size, entropy_out): idx = cuda.grid(1) stride = cuda.gridsize(1) for i in range(idx, hist.shape[0], stride): - prob = hist[i] / total_pixels + prob = hist[i] / data_size if prob != 0: entropy_out[i] = -prob * math.log2(prob) else: + # Some small, not important number entropy_out[i] = -0.000001 * math.log2(0.000001) + +# TODO: implement it properly. @cuda.jit def sum_array(arr, result): local_mem = cuda.shared.array(256, dtype=np.float32) @@ -65,24 +66,21 @@ def sum_array(arr, result): cuda.syncthreads() -def entropy_with_cuda(data): - total_pixels = len(data) +def entropy_with_cuda(data): + + data_size = len(data) data_gpu = cuda.to_device(np.frombuffer(data, dtype=np.uint8)) - - cuda.synchronize() - hist_host = np.zeros(256, dtype=np.uint32) - #hist_out = cuda.device_array(256, dtype=np.uint32) - # Initialize histogram array to zeros - #cuda.device_array_like(hist_out, fill_value=0) + #Initialize hist with 0. For some reason numba.cuda.device_array didnt work as expected. + hist_host = np.zeros(256, dtype=np.uint32) hist_out = cuda.to_device(hist_host) - cuda.synchronize() + threadsperblock_hist = 256 blockspergrid_hist = min((len(data) + (threadsperblock_hist - 1)) // threadsperblock_hist, 1024) calculate_histogram[blockspergrid_hist, threadsperblock_hist](data_gpu, hist_out) - + del data_gpu cuda.synchronize() @@ -91,31 +89,15 @@ def entropy_with_cuda(data): threadsperblock_entropy = 256 blockspergrid_entropy = min((hist_out.size + (threadsperblock_entropy - 1)) // threadsperblock_entropy, 1024) - calculate_entropy[blockspergrid_entropy, threadsperblock_entropy](hist_out, total_pixels, entropy_out_gpu) + calculate_entropy[blockspergrid_entropy, threadsperblock_entropy](hist_out, data_size, entropy_out_gpu) cuda.synchronize() del hist_out - result = cuda.device_array(blockspergrid_entropy, dtype=np.float32) - - cuda.synchronize() - - sum_array[blockspergrid_entropy, threadsperblock_entropy](entropy_out_gpu, result) - - - cuda.synchronize() - del entropy_out_gpu - - - entropy_sum = result.copy_to_host() - - del result + local_entropies = entropy_out_gpu.copy_to_host() - - cuda.synchronize() - #todo: remove sum() make it parrarel - return entropy_sum.sum() + return local_entropies.sum() def is_supported_cuda(): return cuda.is_available() and cuda.detect() \ No newline at end of file diff --git a/test_entro.py b/test_entro.py index 25ede29..d906153 100644 --- a/test_entro.py +++ b/test_entro.py @@ -4,9 +4,10 @@ import math from scipy.stats import entropy from numba.core.errors import NumbaPerformanceWarning +import time # Functions to test -from entro import calculate_histogram, calculate_entropy +from entro import calculate_histogram, calculate_entropy, entropy_with_cuda class TestCalculateHistogram(unittest.TestCase): @@ -91,5 +92,39 @@ def test_random_histograms(self): del entropy_out_gpu +class TestEntropyWithCUDA(unittest.TestCase): + def test_entropy_calculation(self): + # Test case for correctness of entropy calculation + for i in range(1, 100): + data = np.random.randint(0, 256, size=1000, dtype=np.uint8) + entropy_expected = self.calculate_entropy_numpy(data) + entropy_actual = entropy_with_cuda(data) + np.testing.assert_almost_equal(entropy_actual, entropy_expected, decimal=3) + + def test_performance(self): + # Test case for performance + for i in range(0, 100): + data = np.random.randint(0, 256, size=10**6, dtype=np.uint8) + # Measure time for CUDA entropy calculation + start_time = time.time() + entropy_actual = entropy_with_cuda(data) + cuda_time = time.time() - start_time + + # Measure time for NumPy entropy calculation + start_time = time.time() + entropy_expected = self.calculate_entropy_numpy(data) + numpy_time = time.time() - start_time + + # CUDA needs to be at least twice as fast + np.testing.assert_almost_equal(entropy_actual, entropy_expected, decimal=3) + self.assertTrue(cuda_time*2 < numpy_time) + + def calculate_entropy_numpy(self, data): + hist, _ = np.histogram(data, bins=256, range=[0, 256]) + hist = hist / len(data) + entropy = -np.sum(hist * np.log2(hist + (hist == 0))) + return entropy + + if __name__ == '__main__': unittest.main() \ No newline at end of file