Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bug repair #60

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions pyebsdindex/opencl/band_detect_cl.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,8 @@ def find_bands(self, patternsIn, verbose=0, clparams=None, chunksize=528, useCPU
def radon_fasterCL(self,image,padding = np.array([0,0]), fixArtifacts = False, background = None, returnBuff = True, clparams=None ):
# this function executes the radon sumations on the GPU
tic = timer()
image = np.asarray(image)

# make sure we have an OpenCL environment
if clparams is not None:
if clparams.queue is None:
Expand Down
10 changes: 8 additions & 2 deletions pyebsdindex/opencl/nlpar_cl.py
Original file line number Diff line number Diff line change
Expand Up @@ -264,6 +264,7 @@ def calcsigma_cl(self,nn=1,saturation_protect=True,automask=True, normalize_d=Fa


sigmachunk_gpu = cl.Buffer(ctx, mf.WRITE_ONLY, size=sigmachunk.nbytes)

cl.enqueue_barrier(queue)
prg.calcsigma(queue, (np.uint32(ncolchunk), np.uint32(nrowchunk)), None,
datapad_gpu, mask_gpu,sigmachunk_gpu,
Expand Down Expand Up @@ -404,7 +405,7 @@ def calcnlpar_cl(self, searchradius=None, lam = None, dthresh = None, saturation
clvectlen = 16



# print("target mem:", target_mem)
chunks = self._calcchunks( [pwidth, pheight], ncols, nrows, target_bytes=target_mem,
col_overlap=sr, row_overlap=sr)
#print(chunks[2], chunks[3])
Expand All @@ -426,10 +427,14 @@ def calcnlpar_cl(self, searchradius=None, lam = None, dthresh = None, saturation
nchunks = chunksize.size
#return chunks, chunksize
mxchunk = int(chunksize.max())
# print("max chunk:" , mxchunk)

npadmx = clvectlen * int(np.ceil(float(mxchunk)*npat_point/ clvectlen))

datapad_gpu = cl.Buffer(ctx, mf.READ_WRITE, size=int(npadmx) * int(4))
datapadout_gpu = cl.Buffer(ctx, mf.READ_WRITE, size=int(npadmx) * int(4))
# print("data pad", datapad_gpu.size)
# print("data out", datapadout_gpu.size)

nnn = int((2 * sr + 1) ** 2)

Expand Down Expand Up @@ -469,14 +474,15 @@ def calcnlpar_cl(self, searchradius=None, lam = None, dthresh = None, saturation

sigmachunk = np.ascontiguousarray(sigma[rstart:rend, cstart:cend].astype(np.float32))
sigmachunk_gpu = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=sigmachunk)
# print("sigma", sigmachunk_gpu.size)
szdata = data.size
npad = clvectlen * int(np.ceil(szdata / clvectlen))

#datapad = np.zeros((npad), dtype=np.float32) + np.float32(mxval + 10)
#datapad[0:szdata] = data.reshape(-1)

data_gpu = cl.Buffer(ctx,mf.READ_ONLY | mf.COPY_HOST_PTR,hostbuf=data)

# print("data", data_gpu.size)
if data.dtype.type is np.float32:
prg.nlloadpat32flt(queue, (np.uint64(data.size),1), None, data_gpu, datapad_gpu, wait_for=[filldatain])
if data.dtype.type is np.ubyte:
Expand Down
13 changes: 9 additions & 4 deletions pyebsdindex/opencl/nlpar_clray.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,9 +119,10 @@ def calcsigma_clray(self, nn=1, saturation_protect=True, automask=True, normaliz
normalize_d=normalize_d,
gpu_id=gpu_id, **kwargs)

target_mem = clparams.gpu[gpu_id].max_mem_alloc_size // 3
max_mem = clparams.gpu[gpu_id].global_mem_size * 0.75
target_mem = clparams.gpu[gpu_id].max_mem_alloc_size // 2
max_mem = clparams.gpu[gpu_id].global_mem_size * 0.5
if target_mem * ngpuwrker > max_mem:
#print('revisemem:')
target_mem = max_mem / ngpuwrker

patternfile = self.getinfileobj()
Expand Down Expand Up @@ -479,7 +480,7 @@ def calcnlpar_clray(self, searchradius=None, lam = None, dthresh = None, saturat
gpu_id= gpu_id)

target_mem = clparams.gpu[gpu_id].max_mem_alloc_size//3
max_mem = clparams.gpu[gpu_id].global_mem_size*0.75
max_mem = clparams.gpu[gpu_id].global_mem_size*0.4
if target_mem*ngpuwrker > max_mem:
target_mem = max_mem/ngpuwrker
#print(target_mem/1.0e9)
Expand Down Expand Up @@ -545,7 +546,7 @@ def calcnlpar_clray(self, searchradius=None, lam = None, dthresh = None, saturat
if len(jobqueue) > 0:
if len(idlewrker) > 0:
wrker = idlewrker.pop()
job = jobqueue.pop()
job = jobqueue.pop(0)

tasks.append(wrker.runnlpar_chunk.remote(job, nlparobj=nlpar_remote))
busywrker.append(wrker)
Expand All @@ -561,6 +562,10 @@ def calcnlpar_clray(self, searchradius=None, lam = None, dthresh = None, saturat
ndone += 1
if verbose >= 2:
print("tiles complete: ", ndone, "/", njobs, sep='', end='\r')
else: #An error has occured ... hopefully just need a re-process.
jobqueue.append(job)
print(message)

if verbose >= 2:
print('\n', end='')
return str(self.patternfileout.filepath)
Expand Down
2 changes: 1 addition & 1 deletion pyebsdindex/opencl/openclparam.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
from os import path
import pyopencl as cl
from os import environ
environ['PYOPENCL_COMPILER_OUTPUT'] = '1'
environ['PYOPENCL_COMPILER_OUTPUT'] = '0'

RADDEG = 180.0/np.pi
DEGRAD = np.pi/180.0
Expand Down
3 changes: 3 additions & 0 deletions pyebsdindex/radon_fast.py
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,7 @@ def radon_fast(self, imageIn, padding = np.array([0,0]), fixArtifacts = False,

def radon_faster(self,imageIn,padding = np.array([0,0]), fixArtifacts = False, background = None, normalization=True):
tic = timer()

shapeIm = np.shape(imageIn)
if imageIn.ndim == 2:
nIm = 1
Expand All @@ -244,11 +245,13 @@ def radon_faster(self,imageIn,padding = np.array([0,0]), fixArtifacts = False, b
nIm = shapeIm[0]
# reform = False


if background is None:
image = (imageIn.reshape(-1)).astype(np.float32)
else:
image = imageIn - background
image = (image.reshape(-1)).astype(np.float32)
image = np.asarray(image)

nPx = shapeIm[-1]*shapeIm[-2]
indxDim = np.asarray(self.indexPlan.shape)
Expand Down