From b18b8dce8da2b866e2b3870a47f52fa7f9c0e873 Mon Sep 17 00:00:00 2001 From: Robert Edwards Date: Wed, 10 Dec 2014 13:51:41 -0500 Subject: [PATCH 1/8] Removed autogenerated files. Updated the other_libs (except qio), and have done the same. --- other_libs/filedb | 2 +- other_libs/libintrin | 2 +- other_libs/qio | 2 +- other_libs/xpath_reader | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/other_libs/filedb b/other_libs/filedb index e390a3ed5..f1b2ba0b7 160000 --- a/other_libs/filedb +++ b/other_libs/filedb @@ -1 +1 @@ -Subproject commit e390a3ed5421af273067033d80615790987381d7 +Subproject commit f1b2ba0b7fbf60a03d8bcb5b87859b6df8379db4 diff --git a/other_libs/libintrin b/other_libs/libintrin index 4ea340d86..aabf4c91b 160000 --- a/other_libs/libintrin +++ b/other_libs/libintrin @@ -1 +1 @@ -Subproject commit 4ea340d86d59fa12600fc73f06fcabc2a12cebfc +Subproject commit aabf4c91b691e5309a3e3e60e50ba3ff01563011 diff --git a/other_libs/qio b/other_libs/qio index 3ef54fab1..23094e180 160000 --- a/other_libs/qio +++ b/other_libs/qio @@ -1 +1 @@ -Subproject commit 3ef54fab11403e6eb33b035fe5334eff43be6251 +Subproject commit 23094e180171a0bd70942d01d6fb8697ae0d7b6e diff --git a/other_libs/xpath_reader b/other_libs/xpath_reader index fd1861a8c..2d665c2f3 160000 --- a/other_libs/xpath_reader +++ b/other_libs/xpath_reader @@ -1 +1 @@ -Subproject commit fd1861a8ccb769559ea632ab97872602ac5e1c42 +Subproject commit 2d665c2f33e0772f43078b5a74ae1eeb63109cd5 From afe8f7d359eda646e1830fd72902989ef861c743 Mon Sep 17 00:00:00 2001 From: Robert Edwards Date: Wed, 10 Dec 2014 14:02:44 -0500 Subject: [PATCH 2/8] Switched to new qio. --- other_libs/qio | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/other_libs/qio b/other_libs/qio index 23094e180..0f9075662 160000 --- a/other_libs/qio +++ b/other_libs/qio @@ -1 +1 @@ -Subproject commit 23094e180171a0bd70942d01d6fb8697ae0d7b6e +Subproject commit 0f90756622869d00023a56a41c953886d258c537 From d6414c9c0895b59ca25355cf8db42c8d125ecdcc Mon Sep 17 00:00:00 2001 From: Robert Edwards Date: Wed, 10 Dec 2014 14:04:41 -0500 Subject: [PATCH 3/8] Turned off the "using namespace std" in qdp.h, and did an include of complex.h in qdp_io.h, and also turned on some I/O declarations that use std::complex. This will break old code. I'm turning down the volume on my emails... --- include/qdp.h | 4 ++-- include/qdp_io.h | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/include/qdp.h b/include/qdp.h index c498e8820..f77790a10 100644 --- a/include/qdp.h +++ b/include/qdp.h @@ -85,8 +85,8 @@ #include #include -#if 1 -// NOTE: (master) chroma will compile with this turned off, but holding back. +#if 0 +// NOTE: having this off will probably break a bunch of old code. using namespace std; using std::iostream; using std::ostream; diff --git a/include/qdp_io.h b/include/qdp_io.h index 7f8bc1ca9..7126895db 100644 --- a/include/qdp_io.h +++ b/include/qdp_io.h @@ -16,7 +16,7 @@ #include #include -#if 0 +#if 1 #include #endif @@ -496,7 +496,7 @@ namespace QDP BinaryReader& operator>>(BinaryReader& bin, double& input); BinaryReader& operator>>(BinaryReader& bin, bool& input); -#if 0 +#if 1 //! Complex reader void read(BinaryReader& bin, std::complex& param); void read(BinaryReader& bin, std::complex& param); @@ -1139,7 +1139,7 @@ namespace QDP BinaryWriter& operator<<(BinaryWriter& bin, double output); BinaryWriter& operator<<(BinaryWriter& bin, bool output); -#if 0 +#if 1 //! Complex writer void write(BinaryWriter& bin, const std::complex& param); void write(BinaryWriter& bin, const std::complex& param); From 8621f680ec098479a7688b6e57f724415019e869 Mon Sep 17 00:00:00 2001 From: Robert Edwards Date: Fri, 12 Dec 2014 14:29:07 -0500 Subject: [PATCH 4/8] Added a trivial script for auto-isms --- autogen.sh | 3 +++ 1 file changed, 3 insertions(+) create mode 100755 autogen.sh diff --git a/autogen.sh b/autogen.sh new file mode 100755 index 000000000..b01fdc449 --- /dev/null +++ b/autogen.sh @@ -0,0 +1,3 @@ +#!/bin/sh + +autoreconf -f From 0aa5ee2a006e8bd4467e3d55a36cd94cccc41636 Mon Sep 17 00:00:00 2001 From: Robert Edwards Date: Fri, 12 Dec 2014 14:57:27 -0500 Subject: [PATCH 5/8] Updated to newest versions of submodules. --- other_libs/filedb | 2 +- other_libs/libintrin | 2 +- other_libs/qio | 2 +- other_libs/xpath_reader | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/other_libs/filedb b/other_libs/filedb index f1b2ba0b7..dc6a280d8 160000 --- a/other_libs/filedb +++ b/other_libs/filedb @@ -1 +1 @@ -Subproject commit f1b2ba0b7fbf60a03d8bcb5b87859b6df8379db4 +Subproject commit dc6a280d82d093b82003f5b235f6004e3a71d4be diff --git a/other_libs/libintrin b/other_libs/libintrin index aabf4c91b..f030fe06e 160000 --- a/other_libs/libintrin +++ b/other_libs/libintrin @@ -1 +1 @@ -Subproject commit aabf4c91b691e5309a3e3e60e50ba3ff01563011 +Subproject commit f030fe06e61608d304e9a0bf2768af5e3784447e diff --git a/other_libs/qio b/other_libs/qio index 0f9075662..3cdb37034 160000 --- a/other_libs/qio +++ b/other_libs/qio @@ -1 +1 @@ -Subproject commit 0f90756622869d00023a56a41c953886d258c537 +Subproject commit 3cdb37034b309ad5246b908679e9147e6b91bcb8 diff --git a/other_libs/xpath_reader b/other_libs/xpath_reader index 2d665c2f3..fd43cae5a 160000 --- a/other_libs/xpath_reader +++ b/other_libs/xpath_reader @@ -1 +1 @@ -Subproject commit 2d665c2f33e0772f43078b5a74ae1eeb63109cd5 +Subproject commit fd43cae5a861b267a88e35b8354ab89bce45238b From d107281d5b8ac1dff23efff4dd37f3572caafcfd Mon Sep 17 00:00:00 2001 From: Thorsten Kurth Date: Thu, 15 Jan 2015 10:59:55 -0800 Subject: [PATCH 6/8] better local reordering --- include/qdp_hdf5.h | 63 ++++++++++++++++++++++++++++++++++++++++------ lib/qdp_hdf5.cc | 25 ++++++++++-------- 2 files changed, 70 insertions(+), 18 deletions(-) diff --git a/include/qdp_hdf5.h b/include/qdp_hdf5.h index 8f1f2de98..c711802a7 100644 --- a/include/qdp_hdf5.h +++ b/include/qdp_hdf5.h @@ -76,9 +76,52 @@ namespace QDP { QDP_error_exit(message.c_str()); } + //*********************************************************************************************************************************** + //*********************************************************************************************************************************** + //LAYOUT HELPERS + //*********************************************************************************************************************************** + //*********************************************************************************************************************************** //prefetch mapping for CB->lexicographical: int prefetchLatticeCoordinates(); + //conversion: LAYOUT<-HOST + template + inline void CvtToLayout(OLattice& field, void* buf, const unsigned int& nodeSites, const unsigned int& elemSize){ +#pragma omp parallel for shared(nodeSites,elemSize,buf,field) + for(unsigned int run=0; run(buf)+run*elemSize,elemSize); + } + } + + template + inline void CvtToLayout(multi1d< OLattice >& fieldarray, void* buf, const unsigned int& nodeSites, const unsigned int& arraySize, const unsigned int& elemSize){ +#pragma omp parallel for shared(nodeSites,arraySize,elemSize,buf,fieldarray) + for(unsigned int run=0; run(buf)+(dd+arraySize*run)*elemSize,elemSize); + } + } + } + + //conversion: HOST<-LAYOUT + template + inline void CvtToHost(void* buf, const OLattice& field, const unsigned int& nodeSites, const unsigned int& elemSize){ +#pragma omp parallel for shared(nodeSites,elemSize,buf,field) + for(unsigned int run=0; run(buf)+run*elemSize,&(field.elem(reordermap[run])),elemSize); + } + } + + template + inline void CvtToHost(void* buf, const multi1d< OLattice >& fieldarray, const unsigned int& nodeSites, const unsigned int& arraySize, const unsigned int& elemSize){ +#pragma omp parallel for shared(nodeSites,arraySize,elemSize,buf,fieldarray) + for(unsigned int run=0; run(buf)+(dd+arraySize*run)*elemSize,&(fieldarray[dd].elem(reordermap[run])),elemSize); + } + } + } + //*********************************************************************************************************************************** //*********************************************************************************************************************************** //DATATYPE HELPERS @@ -647,10 +690,11 @@ namespace QDP { //put lattice into u-field and reconstruct as well as reorder them on the fly: // Reconstruct the gauge field if(profile) swatch_reorder.start(); -#pragma omp parallel for firstprivate(nodeSites,obj_size,float_size) shared(buf,field) + /*#pragma omp parallel for firstprivate(nodeSites,obj_size,float_size) shared(buf,field) for(unsigned int run=0; run(buf+run*obj_size),float_size*obj_size); - } + }*/ + CvtToLayout(field,reinterpret_cast(buf),nodeSites,float_size*obj_size); delete [] buf; if(profile) swatch_reorder.stop(); @@ -717,12 +761,13 @@ namespace QDP { // Reconstruct the gauge field if(profile) swatch_reorder.start(); fieldarray.resize(arr_size); -#pragma omp parallel for firstprivate(nodeSites,arr_size,obj_size,float_size) shared(buf,fieldarray) + /*#pragma omp parallel for firstprivate(nodeSites,arr_size,obj_size,float_size) shared(buf,fieldarray) for(unsigned int run=0; run(buf+(dd+arr_size*run)*obj_size),float_size*obj_size); } - } + }*/ + CvtToLayout(fieldarray,reinterpret_cast(buf),nodeSites,arr_size,float_size*obj_size); delete [] buf; if(profile) swatch_reorder.stop(); @@ -1131,10 +1176,11 @@ namespace QDP { size_t float_size=sizeof(REAL); size_t obj_size=sizeof(T)/float_size; REAL* buf=new REAL[nodeSites*obj_size]; -#pragma omp parallel for firstprivate(nodeSites,obj_size,float_size) shared(buf,field) + /*#pragma omp parallel for firstprivate(nodeSites,obj_size,float_size) shared(buf,field) for(unsigned int run=0; run(buf+run*obj_size),&(field.elem(reordermap[run])),float_size*obj_size); - } + }*/ + CvtToHost(reinterpret_cast(buf),field,nodeSites,float_size*obj_size); if(profile) swatch_reorder.stop(); //determine datatype: @@ -1187,12 +1233,13 @@ namespace QDP { size_t obj_size=sizeof(T)/float_size; size_t arr_size=fieldarray.size(); REAL* buf=new REAL[nodeSites*obj_size*arr_size]; -#pragma omp parallel for firstprivate(nodeSites,arr_size,obj_size,float_size) shared(buf,fieldarray) + /*#pragma omp parallel for firstprivate(nodeSites,arr_size,obj_size,float_size) shared(buf,fieldarray) for(unsigned int run=0; run(buf+(dd+arr_size*run)*obj_size),&(fieldarray[dd].elem(reordermap[run])),float_size*obj_size); } - } + }*/ + CvtToHost(reinterpret_cast(buf),fieldarray,nodeSites,arr_size,float_size*obj_size); hid_t type_id; if(float_size==4){ diff --git a/lib/qdp_hdf5.cc b/lib/qdp_hdf5.cc index ca920f022..9302c992d 100644 --- a/lib/qdp_hdf5.cc +++ b/lib/qdp_hdf5.cc @@ -907,10 +907,11 @@ namespace QDP { //put lattice into u-field and reconstruct as well as reorder them on the fly: // Reconstruct the gauge field if(profile) swatch_reorder.start(); -#pragma omp parallel for firstprivate(nodeSites,obj_size,float_size) shared(buf,field) + /*#pragma omp parallel for firstprivate(nodeSites,obj_size,float_size) shared(buf,field) for(unsigned int run=0; run(buf+run*obj_size),float_size*obj_size); - } + }*/ + CvtToLayout(field,reinterpret_cast(buf),nodeSites,float_size*obj_size); delete [] buf; if(profile) swatch_reorder.stop(); @@ -975,12 +976,13 @@ namespace QDP { // Reconstruct the gauge field if(profile) swatch_reorder.start(); unsigned int arr_size=sizes[Nd]; -#pragma omp parallel for firstprivate(nodeSites,arr_size,obj_size,float_size) shared(buf,field) + /*#pragma omp parallel for firstprivate(nodeSites,arr_size,obj_size,float_size) shared(buf,field) for(unsigned int run=0; run(buf+(dd+arr_size*run)*obj_size),float_size*obj_size); } - } + }*/ + CvtToLayout(field,reinterpret_cast(buf),nodeSites,arr_size,float_size*obj_size); delete [] buf; if(profile) swatch_reorder.stop(); @@ -1811,10 +1813,11 @@ namespace QDP { size_t float_size=sizeof(REAL32); size_t obj_size=sizeof(ColorMatrixF3)/float_size; REAL32* buf=new REAL32[nodeSites*obj_size]; -#pragma omp parallel for firstprivate(nodeSites,obj_size,float_size) shared(buf,field) + /*#pragma omp parallel for firstprivate(nodeSites,obj_size,float_size) shared(buf,field) for(unsigned int run=0; run(buf+run*obj_size),&(field.elem(reordermap[run])),float_size*obj_size); - } + }*/ + CvtToHost(reinterpret_cast(buf),field,nodeSites,float_size*obj_size); if(profile) swatch_reorder.stop(); //write out the stuff: @@ -1884,10 +1887,11 @@ namespace QDP { size_t float_size=sizeof(REAL64); size_t obj_size=sizeof(ColorMatrixD3)/float_size; REAL64* buf=new REAL64[nodeSites*obj_size]; -#pragma omp parallel for firstprivate(nodeSites,obj_size,float_size) shared(buf,field) + /*#pragma omp parallel for firstprivate(nodeSites,obj_size,float_size) shared(buf,field) for(unsigned int run=0; run(buf+run*obj_size),&(field.elem(reordermap[run])),float_size*obj_size); - } + }*/ + CvtToHost(reinterpret_cast(buf),field,nodeSites,float_size*obj_size); if(profile) swatch_reorder.stop(); //write out the stuff: @@ -1959,12 +1963,13 @@ namespace QDP { size_t tot_size = nodeSites*field.size()*obj_size; REAL64* buf=new REAL64[tot_size]; unsigned int fsize=field.size(); -#pragma omp parallel for firstprivate(nodeSites,fsize,obj_size,float_size) shared(buf,field) + /*#pragma omp parallel for firstprivate(nodeSites,fsize,obj_size,float_size) shared(buf,field) for(unsigned int run=0; run(buf+(dd+fsize*run)*obj_size),&(field[dd].elem(reordermap[run])),float_size*obj_size); } - } + }*/ + CvtToHost(reinterpret_cast(buf),field,nodeSites,fsize,float_size*obj_size); if(profile) swatch_reorder.stop(); //write out the stuff: From d08630c12b8b5f6d9bda0f079bf46a88eb0ad044 Mon Sep 17 00:00:00 2001 From: Thorsten Kurth Date: Thu, 15 Jan 2015 11:05:53 -0800 Subject: [PATCH 7/8] debugging threading --- include/qdp_dispatch.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/qdp_dispatch.h b/include/qdp_dispatch.h index 1408bbeff..820c3673e 100644 --- a/include/qdp_dispatch.h +++ b/include/qdp_dispatch.h @@ -33,6 +33,7 @@ void dispatch_to_threads(int numSiteTable, Arg a, void (*func)(int,int,int, Arg* myId = omp_get_thread_num(); low = numSiteTable*myId/threads_num; high = numSiteTable*(myId+1)/threads_num; + high=(high > numSiteTable ? numSiteTable : high); func(low, high, myId, &a); } From 06704026119043c9f62e3a499a907d94b8155907 Mon Sep 17 00:00:00 2001 From: bjoo Date: Thu, 15 Jan 2015 14:25:27 -0500 Subject: [PATCH 8/8] Added upper guard to qdp_dispatch.h --- include/qdp_dispatch.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/qdp_dispatch.h b/include/qdp_dispatch.h index 1408bbeff..e12fd615e 100644 --- a/include/qdp_dispatch.h +++ b/include/qdp_dispatch.h @@ -32,7 +32,8 @@ void dispatch_to_threads(int numSiteTable, Arg a, void (*func)(int,int,int, Arg* threads_num = omp_get_num_threads(); myId = omp_get_thread_num(); low = numSiteTable*myId/threads_num; - high = numSiteTable*(myId+1)/threads_num; + int hi = numSiteTable*(myId+1)/threads_num; + high = ( hi > numSiteTable ) ? numSiteTable : hi; func(low, high, myId, &a); }