From 745682ef30e806ba605708e04dc41c0788bd0ea8 Mon Sep 17 00:00:00 2001 From: sisong Date: Sat, 24 Sep 2022 13:05:21 +0800 Subject: [PATCH 01/20] remove isSkipSameRange from TDigestMatcher, for keep simple; --- libHDiffPatch/HDiff/diff.cpp | 14 +++-- libHDiffPatch/HDiff/diff.h | 4 +- libHDiffPatch/HDiff/match_block.cpp | 3 +- .../limit_mem_diff/digest_matcher.cpp | 53 ++----------------- .../limit_mem_diff/digest_matcher.h | 3 +- 5 files changed, 15 insertions(+), 62 deletions(-) diff --git a/libHDiffPatch/HDiff/diff.cpp b/libHDiffPatch/HDiff/diff.cpp index e6dc21a6..51db042d 100644 --- a/libHDiffPatch/HDiff/diff.cpp +++ b/libHDiffPatch/HDiff/diff.cpp @@ -926,9 +926,8 @@ void create_single_compressed_diff_stream(const hpatch_TStreamInput* newData, const hpatch_TStreamOutput* out_diff, const hdiff_TCompress* compressPlugin, size_t kMatchBlockSize,size_t patchStepMemSize){ - const bool isSkipSameRange=(compressPlugin!=0); TCoversBuf covers(newData->streamSize,oldData->streamSize); - get_match_covers_by_block(newData,oldData,&covers,kMatchBlockSize,isSkipSameRange); + get_match_covers_by_block(newData,oldData,&covers,kMatchBlockSize); serialize_single_compressed_diff(newData,oldData,true,covers, out_diff,compressPlugin,patchStepMemSize); } @@ -1056,20 +1055,20 @@ void __hdiff_private__create_compressed_diff(const TByte* newData,const TByte* n //====================== void get_match_covers_by_block(const hpatch_TStreamInput* newData,const hpatch_TStreamInput* oldData, - hpatch_TOutputCovers* out_covers,size_t kMatchBlockSize,bool kIsSkipSameRange){ + hpatch_TOutputCovers* out_covers,size_t kMatchBlockSize){ assert(out_covers->push_cover!=0); - TDigestMatcher matcher(oldData,kMatchBlockSize,kIsSkipSameRange); + TDigestMatcher matcher(oldData,kMatchBlockSize); matcher.search_cover(newData,out_covers); //todo: + extend_cover_stream ? } void get_match_covers_by_block(const unsigned char* newData,const unsigned char* newData_end, const unsigned char* oldData,const unsigned char* oldData_end, - hpatch_TOutputCovers* out_covers,size_t kMatchBlockSize,bool kIsSkipSameRange){ + hpatch_TOutputCovers* out_covers,size_t kMatchBlockSize){ hdiff_TStreamInput oldData_stream; mem_as_hStreamInput(&oldData_stream,oldData,oldData_end); hdiff_TStreamInput newData_stream; mem_as_hStreamInput(&newData_stream,newData,newData_end); - get_match_covers_by_block(&newData_stream,&oldData_stream,out_covers,kMatchBlockSize,kIsSkipSameRange); + get_match_covers_by_block(&newData_stream,&oldData_stream,out_covers,kMatchBlockSize); } void get_match_covers_by_sstring(const unsigned char* newData,const unsigned char* newData_end, @@ -1159,9 +1158,8 @@ void create_compressed_diff_stream(const hpatch_TStreamInput* newData, const hpatch_TStreamInput* oldData, const hpatch_TStreamOutput* out_diff, const hdiff_TCompress* compressPlugin,size_t kMatchBlockSize){ - const bool isSkipSameRange=(compressPlugin!=0); TCoversBuf covers(newData->streamSize,oldData->streamSize); - get_match_covers_by_block(newData,oldData,&covers,kMatchBlockSize,isSkipSameRange); + get_match_covers_by_block(newData,oldData,&covers,kMatchBlockSize); stream_serialize(newData,oldData->streamSize,out_diff,compressPlugin,covers); } diff --git a/libHDiffPatch/HDiff/diff.h b/libHDiffPatch/HDiff/diff.h index dd7735cb..5149241f 100644 --- a/libHDiffPatch/HDiff/diff.h +++ b/libHDiffPatch/HDiff/diff.h @@ -175,10 +175,10 @@ void resave_single_compressed_diff(const hpatch_TStreamInput* in_diff, //same as create?compressed_diff_stream(), but not serialize diffData, only got covers void get_match_covers_by_block(const hpatch_TStreamInput* newData,const hpatch_TStreamInput* oldData, - hpatch_TOutputCovers* out_covers,size_t kMatchBlockSize,bool kIsSkipSameRange=false); + hpatch_TOutputCovers* out_covers,size_t kMatchBlockSize); void get_match_covers_by_block(const unsigned char* newData,const unsigned char* newData_end, const unsigned char* oldData,const unsigned char* oldData_end, - hpatch_TOutputCovers* out_covers,size_t kMatchBlockSize,bool kIsSkipSameRange=false); + hpatch_TOutputCovers* out_covers,size_t kMatchBlockSize); //same as create?_diff(), but not serialize diffData, only got covers void get_match_covers_by_sstring(const unsigned char* newData,const unsigned char* newData_end, diff --git a/libHDiffPatch/HDiff/match_block.cpp b/libHDiffPatch/HDiff/match_block.cpp index 54cd7238..e7b0c8ae 100644 --- a/libHDiffPatch/HDiff/match_block.cpp +++ b/libHDiffPatch/HDiff/match_block.cpp @@ -33,7 +33,6 @@ #define _check(value,info) { if (!(value)) { throw std::runtime_error(info); } } namespace hdiff_private { - static const bool kIsSkipSameRange = false; typedef TMatchBlock::TPackedCover TPackedCover; template inline static @@ -88,7 +87,7 @@ namespace hdiff_private { void TMatchBlock::getBlockCovers(){ TOutputCovers covers(blockCovers); get_match_covers_by_block(newData,newData_end,oldData,oldData_end, - &covers,matchBlockSize,kIsSkipSameRange); + &covers,matchBlockSize); } void TMatchBlock::getPackedCover(){ diff --git a/libHDiffPatch/HDiff/private_diff/limit_mem_diff/digest_matcher.cpp b/libHDiffPatch/HDiff/private_diff/limit_mem_diff/digest_matcher.cpp index b16180c0..6179dc76 100644 --- a/libHDiffPatch/HDiff/private_diff/limit_mem_diff/digest_matcher.cpp +++ b/libHDiffPatch/HDiff/private_diff/limit_mem_diff/digest_matcher.cpp @@ -131,8 +131,8 @@ static size_t posToBlockIndex(hpatch_StreamPos_t pos,size_t kMatchBlockSize,size TDigestMatcher::~TDigestMatcher(){ } -TDigestMatcher::TDigestMatcher(const hpatch_TStreamInput* oldData,size_t kMatchBlockSize,bool kIsSkipSameRange) -:m_oldData(oldData),m_isUseLargeSorted(true),m_kIsSkipSameRange(kIsSkipSameRange), +TDigestMatcher::TDigestMatcher(const hpatch_TStreamInput* oldData,size_t kMatchBlockSize) +:m_oldData(oldData),m_isUseLargeSorted(true), m_newCacheSize(0),m_oldCacheSize(0),m_oldMinCacheSize(0),m_backupCacheSize(0),m_kMatchBlockSize(0){ if (kMatchBlockSize>(oldData->streamSize+1)/2) kMatchBlockSize=(size_t)((oldData->streamSize+1)/2); @@ -331,21 +331,6 @@ struct TNewStreamCache:public TBlockStreamCache{ return roll(); } } - bool skip_same(unsigned char same){ - if (!TBlockStreamCache::resetPos(pos()+kMatchBlockSize)) return false; - while (true) { - const unsigned char* pdata=data(); - const unsigned char* pdata_end=pdata+dataLength(); - for (;pdata& filter, - bool kIsSkipSameRange,hpatch_TOutputCovers* out_covers) { + hpatch_TOutputCovers* out_covers) { TDigest_comp comp(blocksBase); TCover lastCover={0,0,0}; while (true) { @@ -588,11 +550,6 @@ static void tm_search_cover(const adler_uint_t* blocksBase,size_t blocksSize, if (range.first==range.second) { if (newStream.roll()) continue; else break; }//finish - if (kIsSkipSameRange&&is_same_data(newStream.data(),newStream.kMatchBlockSize)){ - if (!newStream.skip_same(*newStream.data())) break;//finish - continue; - } - hpatch_StreamPos_t newPosBack=newStream.pos(); TCover curCover; if (getBestMatch(blocksBase,blocksSize,range.first,range.second, @@ -620,10 +577,10 @@ void TDigestMatcher::search_cover(const hpatch_TStreamInput* newData,hpatch_TOut m_oldCacheSize,m_backupCacheSize,m_kMatchBlockSize); if (m_isUseLargeSorted) tm_search_cover(&m_blocks[0],m_blocks.size(),&m_sorted_larger[0],&m_sorted_larger[0]+m_blocks.size(), - oldStream,newStream,m_filter,m_kIsSkipSameRange,out_covers); + oldStream,newStream,m_filter,out_covers); else tm_search_cover(&m_blocks[0],m_blocks.size(),&m_sorted_limit[0],&m_sorted_limit[0]+m_blocks.size(), - oldStream,newStream,m_filter,m_kIsSkipSameRange,out_covers); + oldStream,newStream,m_filter,out_covers); } }//namespace hdiff_private diff --git a/libHDiffPatch/HDiff/private_diff/limit_mem_diff/digest_matcher.h b/libHDiffPatch/HDiff/private_diff/limit_mem_diff/digest_matcher.h index 0d0eeef8..1b4c76ec 100644 --- a/libHDiffPatch/HDiff/private_diff/limit_mem_diff/digest_matcher.h +++ b/libHDiffPatch/HDiff/private_diff/limit_mem_diff/digest_matcher.h @@ -62,7 +62,7 @@ static inline adler_hash_t adler_to_hash(const uint64_t x){ return x; } class TDigestMatcher{ public: //throw std::runtime_error when data->read error or kMatchBlockSize error; - TDigestMatcher(const hpatch_TStreamInput* oldData,size_t kMatchBlockSize,bool kIsSkipSameRange); + TDigestMatcher(const hpatch_TStreamInput* oldData,size_t kMatchBlockSize); void search_cover(const hpatch_TStreamInput* newData,hpatch_TOutputCovers* out_covers); ~TDigestMatcher(); private: @@ -72,7 +72,6 @@ class TDigestMatcher{ std::vector m_sorted_limit; std::vector m_sorted_larger; bool m_isUseLargeSorted; - bool m_kIsSkipSameRange; TAutoMem m_mem; size_t m_newCacheSize; size_t m_oldCacheSize; From 6fd0d1928c3e9506279c9ce5eafc19de9c88e22d Mon Sep 17 00:00:00 2001 From: sisong Date: Sun, 25 Sep 2022 08:05:02 +0800 Subject: [PATCH 02/20] hpatchz.so add zstd; --- builds/android_ndk_jni_mk/build_libs_zstd.bat | 1 + builds/android_ndk_jni_mk/build_libs_zstd.sh | 1 + 2 files changed, 2 insertions(+) create mode 100644 builds/android_ndk_jni_mk/build_libs_zstd.bat create mode 100644 builds/android_ndk_jni_mk/build_libs_zstd.sh diff --git a/builds/android_ndk_jni_mk/build_libs_zstd.bat b/builds/android_ndk_jni_mk/build_libs_zstd.bat new file mode 100644 index 00000000..12f07607 --- /dev/null +++ b/builds/android_ndk_jni_mk/build_libs_zstd.bat @@ -0,0 +1 @@ +ndk-build NDK_PROJECT_PATH=. APP_BUILD_SCRIPT=Android.mk NDK_APPLICATION_MK=Application.mk ZSTD=1 \ No newline at end of file diff --git a/builds/android_ndk_jni_mk/build_libs_zstd.sh b/builds/android_ndk_jni_mk/build_libs_zstd.sh new file mode 100644 index 00000000..12f07607 --- /dev/null +++ b/builds/android_ndk_jni_mk/build_libs_zstd.sh @@ -0,0 +1 @@ +ndk-build NDK_PROJECT_PATH=. APP_BUILD_SCRIPT=Android.mk NDK_APPLICATION_MK=Application.mk ZSTD=1 \ No newline at end of file From a9a46f2242f4046d874841aabb3692b873759fd8 Mon Sep 17 00:00:00 2001 From: sisong Date: Sun, 25 Sep 2022 09:43:36 +0800 Subject: [PATCH 03/20] pass threadNum to class TDigestMatcher; --- bsdiff_wrapper/bsdiff_wrapper.cpp | 11 ++++--- bsdiff_wrapper/bsdiff_wrapper.h | 8 +++-- dirDiffPatch/dir_diff/dir_diff.cpp | 10 +++--- dirDiffPatch/dir_patch/dir_patch_types.h | 1 + hdiffz.cpp | 31 ++++++++++--------- libHDiffPatch/HDiff/diff.cpp | 17 +++++----- libHDiffPatch/HDiff/diff.h | 10 +++--- libHDiffPatch/HDiff/match_block.cpp | 29 +++++++++-------- libHDiffPatch/HDiff/match_block.h | 30 ++++++++++++------ .../limit_mem_diff/digest_matcher.cpp | 4 +-- .../limit_mem_diff/digest_matcher.h | 3 +- 11 files changed, 92 insertions(+), 62 deletions(-) diff --git a/bsdiff_wrapper/bsdiff_wrapper.cpp b/bsdiff_wrapper/bsdiff_wrapper.cpp index a61b20f1..03e63e47 100644 --- a/bsdiff_wrapper/bsdiff_wrapper.cpp +++ b/bsdiff_wrapper/bsdiff_wrapper.cpp @@ -206,27 +206,30 @@ void create_bsdiff(const hpatch_TStreamInput* newData,const hpatch_TStreamInput* void create_bsdiff_block(unsigned char* newData,unsigned char* newData_end, unsigned char* oldData,unsigned char* oldData_end, const hpatch_TStreamOutput* out_diff,const hdiff_TCompress* compressPlugin, - int kMinSingleMatchScore,bool isUseBigCacheMatch,size_t matchBlockSize){ + int kMinSingleMatchScore,bool isUseBigCacheMatch, + size_t matchBlockSize,size_t threadNum){ if (matchBlockSize==0){ _create_bsdiff(newData,newData_end,newData_end,oldData,oldData_end,oldData_end, out_diff,compressPlugin,kMinSingleMatchScore,isUseBigCacheMatch,0); return; } - TCoversOptimMB coversOp(newData,newData_end,oldData,oldData_end,matchBlockSize); + TCoversOptimMB coversOp(newData,newData_end,oldData,oldData_end,matchBlockSize,threadNum); _create_bsdiff(newData,coversOp.matchBlock->newData_end_cur,newData_end, oldData,coversOp.matchBlock->oldData_end_cur,oldData_end, out_diff,compressPlugin,kMinSingleMatchScore,isUseBigCacheMatch,&coversOp); } void create_bsdiff_block(const hpatch_TStreamInput* newData,const hpatch_TStreamInput* oldData, const hpatch_TStreamOutput* out_diff,const hdiff_TCompress* compressPlugin, - int kMinSingleMatchScore,bool isUseBigCacheMatch,size_t matchBlockSize){ + int kMinSingleMatchScore,bool isUseBigCacheMatch, + size_t matchBlockSize,size_t threadNum){ TAutoMem oldAndNewData; loadOldAndNewStream(oldAndNewData,oldData,newData); size_t old_size=oldData?(size_t)oldData->streamSize:0; unsigned char* pOldData=oldAndNewData.data(); unsigned char* pNewData=pOldData+old_size; create_bsdiff_block(pNewData,pNewData+(size_t)newData->streamSize,pOldData,pOldData+old_size, - out_diff,compressPlugin,kMinSingleMatchScore,isUseBigCacheMatch,matchBlockSize); + out_diff,compressPlugin,kMinSingleMatchScore,isUseBigCacheMatch, + matchBlockSize,threadNum); } bool get_is_bsdiff(const hpatch_TStreamInput* diffData){ diff --git a/bsdiff_wrapper/bsdiff_wrapper.h b/bsdiff_wrapper/bsdiff_wrapper.h index e7ae86b2..33eaf930 100644 --- a/bsdiff_wrapper/bsdiff_wrapper.h +++ b/bsdiff_wrapper/bsdiff_wrapper.h @@ -55,10 +55,14 @@ void create_bsdiff_block(unsigned char* newData,unsigned char* newData_end, unsigned char* oldData,unsigned char* oldData_end, const hpatch_TStreamOutput* out_diff,const hdiff_TCompress* compressPlugin, int kMinSingleMatchScore=kMinSingleMatchScore_default, - bool isUseBigCacheMatch=false,size_t matchBlockSize=kDefaultFastMatchBlockSize); + bool isUseBigCacheMatch=false, + size_t matchBlockSize=kDefaultFastMatchBlockSize, + size_t threadNum=1); void create_bsdiff_block(const hpatch_TStreamInput* newData,const hpatch_TStreamInput* oldData, const hpatch_TStreamOutput* out_diff,const hdiff_TCompress* compressPlugin, int kMinSingleMatchScore=kMinSingleMatchScore_default, - bool isUseBigCacheMatch=false,size_t matchBlockSize=kDefaultFastMatchBlockSize); + bool isUseBigCacheMatch=false, + size_t matchBlockSize=kDefaultFastMatchBlockSize, + size_t threadNum=1); #endif \ No newline at end of file diff --git a/dirDiffPatch/dir_diff/dir_diff.cpp b/dirDiffPatch/dir_diff/dir_diff.cpp index 9cd057d5..106a2c2a 100644 --- a/dirDiffPatch/dir_diff/dir_diff.cpp +++ b/dirDiffPatch/dir_diff/dir_diff.cpp @@ -467,7 +467,8 @@ void dir_diff(IDirDiffListener* listener,const TManifest& oldManifest, create_single_compressed_diff_block(newData,newData+newRefStream.stream->streamSize, oldData,oldData+oldRefStream.stream->streamSize, &ofStream,compressPlugin,(int)hdiffSets.matchScore, - hdiffSets.patchStepMemSize,hdiffSets.isUseBigCacheMatch); + hdiffSets.patchStepMemSize,hdiffSets.isUseBigCacheMatch, + hdiffSets.threadNum); else create_single_compressed_diff(newData,newData+newRefStream.stream->streamSize, oldData,oldData+oldRefStream.stream->streamSize, @@ -485,7 +486,7 @@ void dir_diff(IDirDiffListener* listener,const TManifest& oldManifest, create_compressed_diff_block(newData,newData+newRefStream.stream->streamSize, oldData,oldData+oldRefStream.stream->streamSize, out_diff,compressPlugin,(int)hdiffSets.matchScore, - hdiffSets.isUseBigCacheMatch); + hdiffSets.isUseBigCacheMatch,hdiffSets.threadNum); else create_compressed_diff(newData,newData+newRefStream.stream->streamSize, oldData,oldData+oldRefStream.stream->streamSize, @@ -498,10 +499,11 @@ void dir_diff(IDirDiffListener* listener,const TManifest& oldManifest, TOffsetStreamOutput ofStream(outDiffStream,writeToPos); if (hdiffSets.isSingleCompressedDiff){ create_single_compressed_diff_stream(newRefStream.stream,oldRefStream.stream,&ofStream, - compressPlugin,hdiffSets.matchBlockSize,hdiffSets.patchStepMemSize); + compressPlugin,hdiffSets.matchBlockSize, + hdiffSets.patchStepMemSize,hdiffSets.threadNum); }else{ create_compressed_diff_stream(newRefStream.stream,oldRefStream.stream,&ofStream, - compressPlugin,hdiffSets.matchBlockSize); + compressPlugin,hdiffSets.matchBlockSize,hdiffSets.threadNum); } diffDataSize=ofStream.outSize; if (checksumByteSize>0){ diff --git a/dirDiffPatch/dir_patch/dir_patch_types.h b/dirDiffPatch/dir_patch/dir_patch_types.h index b2a95435..090a4fd9 100644 --- a/dirDiffPatch/dir_patch/dir_patch_types.h +++ b/dirDiffPatch/dir_patch/dir_patch_types.h @@ -44,6 +44,7 @@ struct THDiffSets{ //diff by stream size_t patchStepMemSize; size_t matchBlockSize; + size_t threadNum; }; #ifndef _IS_NEED_DIR_DIFF_PATCH diff --git a/hdiffz.cpp b/hdiffz.cpp index 78f8201e..baa53f09 100644 --- a/hdiffz.cpp +++ b/hdiffz.cpp @@ -713,11 +713,11 @@ int hdiff_cmd_line(int argc, const char * argv[]){ diffSets.isSingleCompressedDiff =_kNULL_VALUE; diffSets.isUseBigCacheMatch =_kNULL_VALUE; diffSets.isUseFastMatchBlock=_kNULL_VALUE; + diffSets.threadNum=_THREAD_NUMBER_NULL; hpatch_BOOL isForceOverwrite=_kNULL_VALUE; hpatch_BOOL isOutputHelp=_kNULL_VALUE; hpatch_BOOL isOutputVersion=_kNULL_VALUE; hpatch_BOOL isOldPathInputEmpty=_kNULL_VALUE; - size_t threadNum = _THREAD_NUMBER_NULL; hdiff_TCompress* compressPlugin=0; #if (_IS_NEED_DIR_DIFF_PATCH) hpatch_BOOL isForceRunDirDiff=_kNULL_VALUE; @@ -819,10 +819,10 @@ int hdiff_cmd_line(int argc, const char * argv[]){ } break; #if (_IS_USED_MULTITHREAD) case 'p':{ - _options_check((threadNum==_THREAD_NUMBER_NULL)&&(op[2]=='-'),"-p-?"); + _options_check((diffSets.threadNum==_THREAD_NUMBER_NULL)&&(op[2]=='-'),"-p-?"); const char* pnum=op+3; - _options_check(a_to_size(pnum,strlen(pnum),&threadNum),"-p-?"); - _options_check(threadNum>=_THREAD_NUMBER_MIN,"-p-?"); + _options_check(a_to_size(pnum,strlen(pnum),&diffSets.threadNum),"-p-?"); + _options_check(diffSets.threadNum>=_THREAD_NUMBER_MIN,"-p-?"); } break; #endif case 'b':{ @@ -949,12 +949,12 @@ int hdiff_cmd_line(int argc, const char * argv[]){ if (kMaxOpenFileNumber_THREAD_NUMBER_MAX) - threadNum=_THREAD_NUMBER_MAX; + if (diffSets.threadNum==_THREAD_NUMBER_NULL) + diffSets.threadNum=_THREAD_NUMBER_DEFUALT; + else if (diffSets.threadNum>_THREAD_NUMBER_MAX) + diffSets.threadNum=_THREAD_NUMBER_MAX; if (compressPlugin!=0){ - compressPlugin->setParallelThreadNumber(compressPlugin,(int)threadNum); + compressPlugin->setParallelThreadNumber(compressPlugin,(int)diffSets.threadNum); } if (isOldPathInputEmpty==_kNULL_VALUE) @@ -1221,7 +1221,8 @@ static int hdiff_in_mem(const char* oldFileName,const char* newFileName,const ch if (diffSets.isBsDiff){ if (diffSets.isUseFastMatchBlock) create_bsdiff_block(newMem.data(),newMem.data_end(),oldMem.data(),oldMem.data_end(),&diffData_out.base, - compressPlugin,(int)diffSets.matchScore,diffSets.isUseBigCacheMatch,diffSets.fastMatchBlockSize); + compressPlugin,(int)diffSets.matchScore,diffSets.isUseBigCacheMatch, + diffSets.fastMatchBlockSize,diffSets.threadNum); else create_bsdiff(newMem.data(),newMem.data_end(),oldMem.data(),oldMem.data_end(),&diffData_out.base, compressPlugin,(int)diffSets.matchScore,diffSets.isUseBigCacheMatch); @@ -1231,7 +1232,8 @@ static int hdiff_in_mem(const char* oldFileName,const char* newFileName,const ch if (diffSets.isUseFastMatchBlock) create_single_compressed_diff_block(newMem.data(),newMem.data_end(),oldMem.data(),oldMem.data_end(), &diffData_out.base,compressPlugin,(int)diffSets.matchScore, - diffSets.patchStepMemSize,diffSets.isUseBigCacheMatch,diffSets.fastMatchBlockSize); + diffSets.patchStepMemSize,diffSets.isUseBigCacheMatch, + diffSets.fastMatchBlockSize,diffSets.threadNum); else create_single_compressed_diff(newMem.data(),newMem.data_end(),oldMem.data(),oldMem.data_end(), &diffData_out.base,compressPlugin,(int)diffSets.matchScore, @@ -1240,7 +1242,7 @@ static int hdiff_in_mem(const char* oldFileName,const char* newFileName,const ch if (diffSets.isUseFastMatchBlock) create_compressed_diff_block(newMem.data(),newMem.data_end(),oldMem.data(),oldMem.data_end(), &diffData_out.base,compressPlugin,(int)diffSets.matchScore, - diffSets.isUseBigCacheMatch,diffSets.fastMatchBlockSize); + diffSets.isUseBigCacheMatch,diffSets.fastMatchBlockSize,diffSets.threadNum); else create_compressed_diff(newMem.data(),newMem.data_end(),oldMem.data(),oldMem.data_end(), &diffData_out.base,compressPlugin,(int)diffSets.matchScore, @@ -1350,10 +1352,11 @@ static int hdiff_by_stream(const char* oldFileName,const char* newFileName,const try{ if (diffSets.isSingleCompressedDiff) create_single_compressed_diff_stream(&newData.base,&oldData.base, &diffData_out.base, - compressPlugin,diffSets.matchBlockSize,diffSets.patchStepMemSize); + compressPlugin,diffSets.matchBlockSize, + diffSets.patchStepMemSize,diffSets.threadNum); else create_compressed_diff_stream(&newData.base,&oldData.base, &diffData_out.base, - compressPlugin,diffSets.matchBlockSize); + compressPlugin,diffSets.matchBlockSize,diffSets.threadNum); diffData_out.base.streamSize=diffData_out.out_length; }catch(const std::exception& e){ check(!newData.fileError,HDIFF_OPENREAD_ERROR,"read newFile"); diff --git a/libHDiffPatch/HDiff/diff.cpp b/libHDiffPatch/HDiff/diff.cpp index 51db042d..3fa22e24 100644 --- a/libHDiffPatch/HDiff/diff.cpp +++ b/libHDiffPatch/HDiff/diff.cpp @@ -925,9 +925,10 @@ void create_single_compressed_diff_stream(const hpatch_TStreamInput* newData, const hpatch_TStreamInput* oldData, const hpatch_TStreamOutput* out_diff, const hdiff_TCompress* compressPlugin, - size_t kMatchBlockSize,size_t patchStepMemSize){ + size_t kMatchBlockSize,size_t patchStepMemSize, + size_t threadNum){ TCoversBuf covers(newData->streamSize,oldData->streamSize); - get_match_covers_by_block(newData,oldData,&covers,kMatchBlockSize); + get_match_covers_by_block(newData,oldData,&covers,kMatchBlockSize,threadNum); serialize_single_compressed_diff(newData,oldData,true,covers, out_diff,compressPlugin,patchStepMemSize); } @@ -1055,20 +1056,20 @@ void __hdiff_private__create_compressed_diff(const TByte* newData,const TByte* n //====================== void get_match_covers_by_block(const hpatch_TStreamInput* newData,const hpatch_TStreamInput* oldData, - hpatch_TOutputCovers* out_covers,size_t kMatchBlockSize){ + hpatch_TOutputCovers* out_covers,size_t kMatchBlockSize,size_t threadNum){ assert(out_covers->push_cover!=0); - TDigestMatcher matcher(oldData,kMatchBlockSize); + TDigestMatcher matcher(oldData,kMatchBlockSize,threadNum); matcher.search_cover(newData,out_covers); //todo: + extend_cover_stream ? } void get_match_covers_by_block(const unsigned char* newData,const unsigned char* newData_end, const unsigned char* oldData,const unsigned char* oldData_end, - hpatch_TOutputCovers* out_covers,size_t kMatchBlockSize){ + hpatch_TOutputCovers* out_covers,size_t kMatchBlockSize,size_t threadNum){ hdiff_TStreamInput oldData_stream; mem_as_hStreamInput(&oldData_stream,oldData,oldData_end); hdiff_TStreamInput newData_stream; mem_as_hStreamInput(&newData_stream,newData,newData_end); - get_match_covers_by_block(&newData_stream,&oldData_stream,out_covers,kMatchBlockSize); + get_match_covers_by_block(&newData_stream,&oldData_stream,out_covers,kMatchBlockSize,threadNum); } void get_match_covers_by_sstring(const unsigned char* newData,const unsigned char* newData_end, @@ -1157,9 +1158,9 @@ static void stream_serialize(const hpatch_TStreamInput* newData, void create_compressed_diff_stream(const hpatch_TStreamInput* newData, const hpatch_TStreamInput* oldData, const hpatch_TStreamOutput* out_diff, - const hdiff_TCompress* compressPlugin,size_t kMatchBlockSize){ + const hdiff_TCompress* compressPlugin,size_t kMatchBlockSize,size_t threadNum){ TCoversBuf covers(newData->streamSize,oldData->streamSize); - get_match_covers_by_block(newData,oldData,&covers,kMatchBlockSize); + get_match_covers_by_block(newData,oldData,&covers,kMatchBlockSize,threadNum); stream_serialize(newData,oldData->streamSize,out_diff,compressPlugin,covers); } diff --git a/libHDiffPatch/HDiff/diff.h b/libHDiffPatch/HDiff/diff.h index 5149241f..00a33b79 100644 --- a/libHDiffPatch/HDiff/diff.h +++ b/libHDiffPatch/HDiff/diff.h @@ -91,7 +91,8 @@ void create_compressed_diff_stream(const hpatch_TStreamInput* newData, const hpatch_TStreamInput* oldData, const hpatch_TStreamOutput* out_diff, const hdiff_TCompress* compressPlugin=0, - size_t kMatchBlockSize=kMatchBlockSize_default); + size_t kMatchBlockSize=kMatchBlockSize_default, + size_t threadNum=1); //return patch_decompress(oldData+diff)==newData? bool check_compressed_diff(const unsigned char* newData,const unsigned char* newData_end, @@ -149,7 +150,8 @@ void create_single_compressed_diff_stream(const hpatch_TStreamInput* newData, const hpatch_TStreamOutput* out_diff, const hdiff_TCompress* compressPlugin=0, size_t kMatchBlockSize=kMatchBlockSize_default, - size_t patchStepMemSize=kDefaultPatchStepMemSize); + size_t patchStepMemSize=kDefaultPatchStepMemSize, + size_t threadNum=1); //return patch_single_?(oldData+diff)==newData? bool check_single_compressed_diff(const unsigned char* newData,const unsigned char* newData_end, @@ -175,10 +177,10 @@ void resave_single_compressed_diff(const hpatch_TStreamInput* in_diff, //same as create?compressed_diff_stream(), but not serialize diffData, only got covers void get_match_covers_by_block(const hpatch_TStreamInput* newData,const hpatch_TStreamInput* oldData, - hpatch_TOutputCovers* out_covers,size_t kMatchBlockSize); + hpatch_TOutputCovers* out_covers,size_t kMatchBlockSize,size_t threadNum); void get_match_covers_by_block(const unsigned char* newData,const unsigned char* newData_end, const unsigned char* oldData,const unsigned char* oldData_end, - hpatch_TOutputCovers* out_covers,size_t kMatchBlockSize); + hpatch_TOutputCovers* out_covers,size_t kMatchBlockSize,size_t threadNum); //same as create?_diff(), but not serialize diffData, only got covers void get_match_covers_by_sstring(const unsigned char* newData,const unsigned char* newData_end, diff --git a/libHDiffPatch/HDiff/match_block.cpp b/libHDiffPatch/HDiff/match_block.cpp index e7b0c8ae..c5c0961c 100644 --- a/libHDiffPatch/HDiff/match_block.cpp +++ b/libHDiffPatch/HDiff/match_block.cpp @@ -87,7 +87,7 @@ namespace hdiff_private { void TMatchBlock::getBlockCovers(){ TOutputCovers covers(blockCovers); get_match_covers_by_block(newData,newData_end,oldData,oldData_end, - &covers,matchBlockSize); + &covers,matchBlockSize,threadNum); } void TMatchBlock::getPackedCover(){ @@ -265,39 +265,42 @@ using namespace hdiff_private; void create_compressed_diff_block(unsigned char* newData,unsigned char* newData_end, unsigned char* oldData,unsigned char* oldData_end, std::vector& out_diff,const hdiff_TCompress* compressPlugin, - int kMinSingleMatchScore,bool isUseBigCacheMatch,size_t matchBlockSize){ + int kMinSingleMatchScore,bool isUseBigCacheMatch, + size_t matchBlockSize,size_t threadNum){ if (matchBlockSize==0){ create_compressed_diff(newData,newData_end,oldData,oldData_end, out_diff,compressPlugin,kMinSingleMatchScore,isUseBigCacheMatch); return; } - TCoversOptimMB coversOp(newData,newData_end,oldData,oldData_end,matchBlockSize); + TCoversOptimMB coversOp(newData,newData_end,oldData,oldData_end,matchBlockSize,threadNum); create_compressed_diff(newData,coversOp.matchBlock->newData_end_cur,oldData,coversOp.matchBlock->oldData_end_cur, out_diff,compressPlugin,kMinSingleMatchScore,isUseBigCacheMatch,&coversOp); } void create_compressed_diff_block(unsigned char* newData,unsigned char* newData_end, unsigned char* oldData,unsigned char* oldData_end, const hpatch_TStreamOutput* out_diff,const hdiff_TCompress* compressPlugin, - int kMinSingleMatchScore,bool isUseBigCacheMatch,size_t matchBlockSize){ + int kMinSingleMatchScore,bool isUseBigCacheMatch, + size_t matchBlockSize,size_t threadNum){ if (matchBlockSize==0){ create_compressed_diff(newData,newData_end,oldData,oldData_end, out_diff,compressPlugin,kMinSingleMatchScore,isUseBigCacheMatch); return; } - TCoversOptimMB coversOp(newData,newData_end,oldData,oldData_end,matchBlockSize); + TCoversOptimMB coversOp(newData,newData_end,oldData,oldData_end,matchBlockSize,threadNum); create_compressed_diff(newData,coversOp.matchBlock->newData_end_cur,oldData,coversOp.matchBlock->oldData_end_cur, out_diff,compressPlugin,kMinSingleMatchScore,isUseBigCacheMatch,&coversOp); } void create_compressed_diff_block(const hpatch_TStreamInput* newData,const hpatch_TStreamInput* oldData, const hpatch_TStreamOutput* out_diff,const hdiff_TCompress* compressPlugin, - int kMinSingleMatchScore,bool isUseBigCacheMatch,size_t matchBlockSize){ + int kMinSingleMatchScore,bool isUseBigCacheMatch, + size_t matchBlockSize,size_t threadNum){ TAutoMem oldAndNewData; loadOldAndNewStream(oldAndNewData,oldData,newData); size_t old_size=oldData?(size_t)oldData->streamSize:0; unsigned char* pOldData=oldAndNewData.data(); unsigned char* pNewData=pOldData+old_size; create_compressed_diff_block(pNewData,pNewData+(size_t)newData->streamSize,pOldData,pOldData+old_size, - out_diff,compressPlugin,kMinSingleMatchScore,isUseBigCacheMatch,matchBlockSize); + out_diff,compressPlugin,kMinSingleMatchScore,isUseBigCacheMatch,matchBlockSize,threadNum); } @@ -305,14 +308,14 @@ void create_single_compressed_diff_block(unsigned char* newData,unsigned char* n unsigned char* oldData,unsigned char* oldData_end, const hpatch_TStreamOutput* out_diff,const hdiff_TCompress* compressPlugin, int kMinSingleMatchScore,size_t patchStepMemSize, - bool isUseBigCacheMatch,size_t matchBlockSize){ + bool isUseBigCacheMatch,size_t matchBlockSize,size_t threadNum){ if (matchBlockSize==0){ create_single_compressed_diff(newData,newData_end,oldData,oldData_end, out_diff,compressPlugin,kMinSingleMatchScore, patchStepMemSize,isUseBigCacheMatch); return; } - TCoversOptimMB coversOp(newData,newData_end,oldData,oldData_end,matchBlockSize); + TCoversOptimMB coversOp(newData,newData_end,oldData,oldData_end,matchBlockSize,threadNum); create_single_compressed_diff(newData,coversOp.matchBlock->newData_end_cur,oldData,coversOp.matchBlock->oldData_end_cur, out_diff,compressPlugin,kMinSingleMatchScore, patchStepMemSize,isUseBigCacheMatch,&coversOp); @@ -321,14 +324,14 @@ void create_single_compressed_diff_block(unsigned char* newData,unsigned char* n unsigned char* oldData,unsigned char* oldData_end, std::vector& out_diff,const hdiff_TCompress* compressPlugin, int kMinSingleMatchScore,size_t patchStepMemSize, - bool isUseBigCacheMatch,size_t matchBlockSize){ + bool isUseBigCacheMatch,size_t matchBlockSize,size_t threadNum){ if (matchBlockSize==0){ create_single_compressed_diff(newData,newData_end,oldData,oldData_end, out_diff,compressPlugin,kMinSingleMatchScore, patchStepMemSize,isUseBigCacheMatch); return; } - TCoversOptimMB coversOp(newData,newData_end,oldData,oldData_end,matchBlockSize); + TCoversOptimMB coversOp(newData,newData_end,oldData,oldData_end,matchBlockSize,threadNum); create_single_compressed_diff(newData,coversOp.matchBlock->newData_end_cur,oldData,coversOp.matchBlock->oldData_end_cur, out_diff,compressPlugin,kMinSingleMatchScore, patchStepMemSize,isUseBigCacheMatch,&coversOp); @@ -336,7 +339,7 @@ void create_single_compressed_diff_block(unsigned char* newData,unsigned char* n void create_single_compressed_diff_block(const hpatch_TStreamInput* newData,const hpatch_TStreamInput* oldData, const hpatch_TStreamOutput* out_diff,const hdiff_TCompress* compressPlugin, int kMinSingleMatchScore,size_t patchStepMemSize, - bool isUseBigCacheMatch,size_t matchBlockSize){ + bool isUseBigCacheMatch,size_t matchBlockSize,size_t threadNum){ TAutoMem oldAndNewData; loadOldAndNewStream(oldAndNewData,oldData,newData); size_t old_size=oldData?(size_t)oldData->streamSize:0; @@ -344,5 +347,5 @@ void create_single_compressed_diff_block(const hpatch_TStreamInput* newData,cons unsigned char* pNewData=pOldData+old_size; create_single_compressed_diff_block(pNewData,pNewData+(size_t)newData->streamSize,pOldData,pOldData+old_size, out_diff,compressPlugin,kMinSingleMatchScore, - patchStepMemSize,isUseBigCacheMatch,matchBlockSize); + patchStepMemSize,isUseBigCacheMatch,matchBlockSize,threadNum); } diff --git a/libHDiffPatch/HDiff/match_block.h b/libHDiffPatch/HDiff/match_block.h index 27aac96c..53f02504 100644 --- a/libHDiffPatch/HDiff/match_block.h +++ b/libHDiffPatch/HDiff/match_block.h @@ -41,12 +41,14 @@ namespace hdiff_private{ unsigned char* oldData_end; unsigned char* oldData_end_cur; const size_t matchBlockSize; + const size_t threadNum; typedef hpatch_TCover TPackedCover; TMatchBlock(unsigned char* _newData,unsigned char* _newData_end, - unsigned char* _oldData,unsigned char* _oldData_end,size_t _matchBlockSize) + unsigned char* _oldData,unsigned char* _oldData_end, + size_t _matchBlockSize,size_t _threadNum) :newData(_newData),newData_end(_newData_end),newData_end_cur(_newData_end), oldData(_oldData),oldData_end(_oldData_end),oldData_end_cur(_oldData_end), - matchBlockSize(_matchBlockSize){} + matchBlockSize(_matchBlockSize),threadNum(_threadNum){} void getBlockCovers(); void getPackedCover(); void packData(); @@ -73,8 +75,10 @@ namespace hdiff_private{ template struct TCoversOptimMB:public TCoversOptim{ TCoversOptimMB(unsigned char* newData,unsigned char* newData_end, - unsigned char* oldData,unsigned char* oldData_end,size_t matchBlockSize) - :TCoversOptim(&_matchBlock),_matchBlock(newData,newData_end,oldData,oldData_end,matchBlockSize){ + unsigned char* oldData,unsigned char* oldData_end, + size_t matchBlockSize,size_t threadNum) + :TCoversOptim(&_matchBlock),_matchBlock(newData,newData_end,oldData,oldData_end, + matchBlockSize,threadNum){ _matchBlock.getBlockCovers(); _matchBlock.getPackedCover(); _matchBlock.packData(); @@ -99,21 +103,24 @@ void create_compressed_diff_block(const hpatch_TStreamInput* newData,//will load const hdiff_TCompress* compressPlugin=0, int kMinSingleMatchScore=kMinSingleMatchScore_default, bool isUseBigCacheMatch=false, - size_t matchBlockSize=kDefaultFastMatchBlockSize); + size_t matchBlockSize=kDefaultFastMatchBlockSize, + size_t threadNum=1); void create_compressed_diff_block(unsigned char* newData,unsigned char* newData_end, unsigned char* oldData,unsigned char* oldData_end, const hpatch_TStreamOutput* out_diff, const hdiff_TCompress* compressPlugin=0, int kMinSingleMatchScore=kMinSingleMatchScore_default, bool isUseBigCacheMatch=false, - size_t matchBlockSize=kDefaultFastMatchBlockSize); + size_t matchBlockSize=kDefaultFastMatchBlockSize, + size_t threadNum=1); void create_compressed_diff_block(unsigned char* newData,unsigned char* newData_end, unsigned char* oldData,unsigned char* oldData_end, std::vector& out_diff, const hdiff_TCompress* compressPlugin=0, int kMinSingleMatchScore=kMinSingleMatchScore_default, bool isUseBigCacheMatch=false, - size_t matchBlockSize=kDefaultFastMatchBlockSize); + size_t matchBlockSize=kDefaultFastMatchBlockSize, + size_t threadNum=1); void create_single_compressed_diff_block(const hpatch_TStreamInput* newData,//will load in memory const hpatch_TStreamInput* oldData,//will load in memory @@ -121,20 +128,23 @@ void create_single_compressed_diff_block(const hpatch_TStreamInput* newData,//wi int kMinSingleMatchScore=kMinSingleMatchScore_default, size_t patchStepMemSize=kDefaultPatchStepMemSize, bool isUseBigCacheMatch=false, - size_t matchBlockSize=kDefaultFastMatchBlockSize); + size_t matchBlockSize=kDefaultFastMatchBlockSize, + size_t threadNum=1); void create_single_compressed_diff_block(unsigned char* newData,unsigned char* newData_end, unsigned char* oldData,unsigned char* oldData_end, const hpatch_TStreamOutput* out_diff,const hdiff_TCompress* compressPlugin=0, int kMinSingleMatchScore=kMinSingleMatchScore_default, size_t patchStepMemSize=kDefaultPatchStepMemSize, bool isUseBigCacheMatch=false, - size_t matchBlockSize=kDefaultFastMatchBlockSize); + size_t matchBlockSize=kDefaultFastMatchBlockSize, + size_t threadNum=1); void create_single_compressed_diff_block(unsigned char* newData,unsigned char* newData_end, unsigned char* oldData,unsigned char* oldData_end, std::vector& out_diff,const hdiff_TCompress* compressPlugin=0, int kMinSingleMatchScore=kMinSingleMatchScore_default, size_t patchStepMemSize=kDefaultPatchStepMemSize, bool isUseBigCacheMatch=false, - size_t matchBlockSize=kDefaultFastMatchBlockSize); + size_t matchBlockSize=kDefaultFastMatchBlockSize, + size_t threadNum=1); #endif //hdiff_match_block_h diff --git a/libHDiffPatch/HDiff/private_diff/limit_mem_diff/digest_matcher.cpp b/libHDiffPatch/HDiff/private_diff/limit_mem_diff/digest_matcher.cpp index 6179dc76..1e793afd 100644 --- a/libHDiffPatch/HDiff/private_diff/limit_mem_diff/digest_matcher.cpp +++ b/libHDiffPatch/HDiff/private_diff/limit_mem_diff/digest_matcher.cpp @@ -131,8 +131,8 @@ static size_t posToBlockIndex(hpatch_StreamPos_t pos,size_t kMatchBlockSize,size TDigestMatcher::~TDigestMatcher(){ } -TDigestMatcher::TDigestMatcher(const hpatch_TStreamInput* oldData,size_t kMatchBlockSize) -:m_oldData(oldData),m_isUseLargeSorted(true), +TDigestMatcher::TDigestMatcher(const hpatch_TStreamInput* oldData,size_t kMatchBlockSize,size_t threadNum) +:m_oldData(oldData),m_isUseLargeSorted(true),m_threadNum(threadNum), m_newCacheSize(0),m_oldCacheSize(0),m_oldMinCacheSize(0),m_backupCacheSize(0),m_kMatchBlockSize(0){ if (kMatchBlockSize>(oldData->streamSize+1)/2) kMatchBlockSize=(size_t)((oldData->streamSize+1)/2); diff --git a/libHDiffPatch/HDiff/private_diff/limit_mem_diff/digest_matcher.h b/libHDiffPatch/HDiff/private_diff/limit_mem_diff/digest_matcher.h index 1b4c76ec..2ead4993 100644 --- a/libHDiffPatch/HDiff/private_diff/limit_mem_diff/digest_matcher.h +++ b/libHDiffPatch/HDiff/private_diff/limit_mem_diff/digest_matcher.h @@ -62,7 +62,7 @@ static inline adler_hash_t adler_to_hash(const uint64_t x){ return x; } class TDigestMatcher{ public: //throw std::runtime_error when data->read error or kMatchBlockSize error; - TDigestMatcher(const hpatch_TStreamInput* oldData,size_t kMatchBlockSize); + TDigestMatcher(const hpatch_TStreamInput* oldData,size_t kMatchBlockSize,size_t threadNum); void search_cover(const hpatch_TStreamInput* newData,hpatch_TOutputCovers* out_covers); ~TDigestMatcher(); private: @@ -72,6 +72,7 @@ class TDigestMatcher{ std::vector m_sorted_limit; std::vector m_sorted_larger; bool m_isUseLargeSorted; + const size_t m_threadNum; TAutoMem m_mem; size_t m_newCacheSize; size_t m_oldCacheSize; From 6e2bee902dd59e7844597a8116725e0c4d7d7d31 Mon Sep 17 00:00:00 2001 From: sisong Date: Sun, 25 Sep 2022 15:09:38 +0800 Subject: [PATCH 04/20] add parallel qsort func: sort_parallel() for TDigestMatcher --- .../limit_mem_diff/digest_matcher.cpp | 5 +- .../HDiff/private_diff/qsort_parallel.h | 126 ++++++++++++++++++ 2 files changed, 129 insertions(+), 2 deletions(-) create mode 100644 libHDiffPatch/HDiff/private_diff/qsort_parallel.h diff --git a/libHDiffPatch/HDiff/private_diff/limit_mem_diff/digest_matcher.cpp b/libHDiffPatch/HDiff/private_diff/limit_mem_diff/digest_matcher.cpp index 1e793afd..729687f3 100644 --- a/libHDiffPatch/HDiff/private_diff/limit_mem_diff/digest_matcher.cpp +++ b/libHDiffPatch/HDiff/private_diff/limit_mem_diff/digest_matcher.cpp @@ -29,6 +29,7 @@ #include //std::runtime_error #include //std::sort,std::equal_range #include "../compress_detect.h" //_getUIntCost +#include "../qsort_parallel.h" namespace hdiff_private{ static const size_t kMinTrustMatchedLength=1024*16; static const size_t kMinMatchedLength = 16; @@ -214,9 +215,9 @@ void TDigestMatcher::getDigests(){ size_t kMaxCmpDeep= 1 + upperCount(kMinTrustMatchedLength,m_kMatchBlockSize); TIndex_comp comp(m_blocks.data(),m_blocks.size(),kMaxCmpDeep); if (m_isUseLargeSorted) - std::sort(m_sorted_larger.begin(),m_sorted_larger.end(),comp); + sort_parallel(m_sorted_larger.data(),m_sorted_larger.data()+m_sorted_larger.size(),comp,m_threadNum); else - std::sort(m_sorted_limit.begin(),m_sorted_limit.end(),comp); + sort_parallel(m_sorted_limit.data(),m_sorted_limit.data()+m_sorted_limit.size(),comp,m_threadNum); } struct TBlockStreamCache:public TStreamCache{ diff --git a/libHDiffPatch/HDiff/private_diff/qsort_parallel.h b/libHDiffPatch/HDiff/private_diff/qsort_parallel.h new file mode 100644 index 00000000..1541d3bc --- /dev/null +++ b/libHDiffPatch/HDiff/private_diff/qsort_parallel.h @@ -0,0 +1,126 @@ +// qsort_parallel.h +// parallel sort for HDiffz +/* + The MIT License (MIT) + Copyright (c) 2022 HouSisong + + Permission is hereby granted, free of charge, to any person + obtaining a copy of this software and associated documentation + files (the "Software"), to deal in the Software without + restriction, including without limitation the rights to use, + copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the + Software is furnished to do so, subject to the following + conditions: + + The above copyright notice and this permission notice shall be + included in all copies of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef HDiff_qsort_parallel_h +#define HDiff_qsort_parallel_h +#include //sort +#include +#include "../../../libParallel/parallel_import.h" + +#if (_IS_USED_MULTITHREAD) + inline size_t __index_by_ratio(size_t size,size_t ratio,size_t ratio_base){ + return (size_t)(((hpatch_uint64_t)size)*ratio/ratio_base); + } + + template + struct _sort_parallel_TCmpi{ + inline _sort_parallel_TCmpi(const TValue* begin,TCmp& cmp):_begin(begin),_cmp(cmp){} + inline bool operator()(const size_t& x,const size_t& y) const{ + return _cmp(_begin[x],_begin[y]); + } + const TValue* _begin; + TCmp& _cmp; + }; + + template + static TValue* _sort_parallel_partition(TValue* begin,TValue* end,TCmp cmp, + size_t leftWeight=1,size_t rightWeight=1){ + const size_t size=end-begin; + size_t samples[kSampleSize]; + const size_t _kIndexStep=size/kSampleSize+1; + size_t curIndex=0; + for(size_t i=0;i(begin,cmp)); + size_t _pivot_i=__index_by_ratio(kSampleSize,leftWeight,(leftWeight+rightWeight)); + std::nth_element(samples,samples+_pivot_i,samples+kSampleSize,_sort_parallel_TCmpi(begin,cmp)); + size_t pivot=samples[_pivot_i]; + std::swap(begin[0],begin[pivot]); + TValue x(begin[0]); + size_t mid=0; + for (size_t j=mid+1;j + static void _sort_parallel_thread(TValue* begin,TValue* end,TCmp cmp,size_t threadNum){ +#if (_IS_USED_MULTITHREAD) + if (threadNum>1){ + const size_t rightWeight=(threadNum>>1); + const size_t leftWeight=threadNum-rightWeight; + TValue* mid; + const bool _kIsPartitionNotMerge=true; + + if (_kIsPartitionNotMerge){ // partition + //mid=begin+__index_by_ratio(size,leftWeight,threadNum); std::nth_element(begin,mid,end,cmp); + //mid=std::_Partition_by_median_guess_unchecked(begin, end, cmp).first; + mid=_sort_parallel_partition(begin,end,cmp,leftWeight,rightWeight); + }else{ + mid=begin+__index_by_ratio(end-begin,leftWeight,threadNum); + } + + std::thread threadRight(_sort_parallel_thread, + mid,end,cmp,rightWeight); + _sort_parallel_thread(begin,mid,cmp,leftWeight); + threadRight.join(); + + if (!_kIsPartitionNotMerge){ //merge + std::inplace_merge(begin,mid,end,cmp); + } + }else +#endif + { + std::sort(begin,end,cmp); + //printf("parallel sort size: %" PRIu64 " \n",(hpatch_StreamPos_t)(end-begin)); + } + } + + template + static void sort_parallel(TValue* begin,TValue* end,TCmp cmp,size_t threadNum){ + const size_t size=end-begin; + if (size<=1) return; +#if (_IS_USED_MULTITHREAD) + if ((threadNum>1)&&(size>=kMinQSortParallelSize)){ + const size_t maxThreanNum=size/(kMinQSortParallelSize/2); + threadNum=(threadNum<=maxThreanNum)?threadNum:maxThreanNum; + //std::random_shuffle(begin,end); + }else{ + threadNum=1; + } +#endif + _sort_parallel_thread(begin,end,cmp,threadNum); + } + +#endif From bc915e34752fc776fc5ce16ae9702246565af7f5 Mon Sep 17 00:00:00 2001 From: sisong Date: Mon, 26 Sep 2022 09:09:32 +0800 Subject: [PATCH 05/20] add parallel bloom filter insert func: filter_insert_parallel() for TDigestMatcher --- builds/codeblocks/HDiffZ.cbp | 4 +- .../limit_mem_diff/bloom_filter.h | 23 ++++++++++ .../limit_mem_diff/digest_matcher.cpp | 45 +++++++++++++++++-- .../limit_mem_diff/digest_matcher.h | 2 +- .../HDiff/private_diff/qsort_parallel.h | 31 +++++++------ 5 files changed, 83 insertions(+), 22 deletions(-) diff --git a/builds/codeblocks/HDiffZ.cbp b/builds/codeblocks/HDiffZ.cbp index 02b41ce9..0286ac49 100644 --- a/builds/codeblocks/HDiffZ.cbp +++ b/builds/codeblocks/HDiffZ.cbp @@ -332,10 +332,10 @@ - + - + diff --git a/libHDiffPatch/HDiff/private_diff/limit_mem_diff/bloom_filter.h b/libHDiffPatch/HDiff/private_diff/limit_mem_diff/bloom_filter.h index 556befc2..ae814f9c 100644 --- a/libHDiffPatch/HDiff/private_diff/limit_mem_diff/bloom_filter.h +++ b/libHDiffPatch/HDiff/private_diff/limit_mem_diff/bloom_filter.h @@ -31,6 +31,7 @@ #include //memset #include #include //std::runtime_error +#include namespace hdiff_private{ class TBitSet{ @@ -42,6 +43,21 @@ class TBitSet{ //assert(bitIndex>kBaseShr] |= ((base_t)1<<(bitIndex&kBaseMask)); } +#if (_IS_USED_MULTITHREAD) + void set_MT(size_t bitIndex){ + //assert(bitIndex)==sizeof(base_t)); + base_t oldv; + base_t newv; + std::atomic& v=*(std::atomic*)&m_bits[bitIndex>>kBaseShr]; + do { + oldv=v.load(); + newv=oldv | ((base_t)1<<(bitIndex&kBaseMask)); + if (oldv==newv) + return; + } while(!v.compare_exchange_weak(oldv,newv)); + } +#endif inline bool is_hit(size_t bitIndex)const{ //assert(bitIndex>kBaseShr] & ((base_t)1<<(bitIndex&kBaseMask))); @@ -93,6 +109,13 @@ class TBloomFilter{ m_bitSet.set(hash1(data)); m_bitSet.set(hash2(data)); } +#if (_IS_USED_MULTITHREAD) + inline void insert_MT(T data){ + m_bitSet.set_MT(hash0(data)); + m_bitSet.set_MT(hash1(data)); + m_bitSet.set_MT(hash2(data)); + } +#endif inline bool is_hit(T data)const{ return m_bitSet.is_hit(hash0(data)) && m_bitSet.is_hit(hash1(data)) diff --git a/libHDiffPatch/HDiff/private_diff/limit_mem_diff/digest_matcher.cpp b/libHDiffPatch/HDiff/private_diff/limit_mem_diff/digest_matcher.cpp index 729687f3..94f236ea 100644 --- a/libHDiffPatch/HDiff/private_diff/limit_mem_diff/digest_matcher.cpp +++ b/libHDiffPatch/HDiff/private_diff/limit_mem_diff/digest_matcher.cpp @@ -195,23 +195,62 @@ struct TIndex_comp{ } }; + + +static inline void _filter_insert(TBloomFilter* filter,const adler_uint_t* begin,const adler_uint_t* end){ + while (begin!=end){ + filter->insert(adler_to_hash(*begin++)); + } +} +#if (_IS_USED_MULTITHREAD) +static void _filter_insert_MT(TBloomFilter* filter,const adler_uint_t* begin,const adler_uint_t* end){ + while (begin!=end){ + filter->insert_MT(adler_to_hash(*begin++)); + } +} +#endif + +static void filter_insert_parallel(TBloomFilter& filter,const adler_uint_t* begin,const adler_uint_t* end, + size_t threadNum,size_t kMinParallelSize=4096){ + const size_t size=end-begin; +#if (_IS_USED_MULTITHREAD) + if ((threadNum>1)&&(size>=kMinParallelSize)) { + const size_t maxThreanNum=size/(kMinParallelSize/2); + threadNum=(threadNum<=maxThreanNum)?threadNum:maxThreanNum; + + const size_t step=size/threadNum; + const size_t threadCount=threadNum-1; + std::vector threads(threadCount); + for (size_t i=0;istreamSize); streamCache.resetPos(0,readPos,m_kMatchBlockSize); adler_uint_t adler=adler_start(streamCache.data(),m_kMatchBlockSize); - m_filter.insert(adler_to_hash(adler)); m_blocks[i]=adler; if (m_isUseLargeSorted) m_sorted_larger[i]=i; else m_sorted_limit[i]=(uint32_t)i; } + m_filter.init(blockCount); + filter_insert_parallel(m_filter,m_blocks.data(),m_blocks.data()+blockCount,m_threadNum); + size_t kMaxCmpDeep= 1 + upperCount(kMinTrustMatchedLength,m_kMatchBlockSize); TIndex_comp comp(m_blocks.data(),m_blocks.size(),kMaxCmpDeep); if (m_isUseLargeSorted) diff --git a/libHDiffPatch/HDiff/private_diff/limit_mem_diff/digest_matcher.h b/libHDiffPatch/HDiff/private_diff/limit_mem_diff/digest_matcher.h index 2ead4993..b976687b 100644 --- a/libHDiffPatch/HDiff/private_diff/limit_mem_diff/digest_matcher.h +++ b/libHDiffPatch/HDiff/private_diff/limit_mem_diff/digest_matcher.h @@ -30,11 +30,11 @@ #ifndef digest_matcher_h #define digest_matcher_h +#include "../../../../libParallel/parallel_import.h" #include "bloom_filter.h" #include "covers.h" #include "adler_roll.h" #include "../mem_buf.h" - namespace hdiff_private{ typedef uint64_t adler_uint_t; diff --git a/libHDiffPatch/HDiff/private_diff/qsort_parallel.h b/libHDiffPatch/HDiff/private_diff/qsort_parallel.h index 1541d3bc..7d3f6db3 100644 --- a/libHDiffPatch/HDiff/private_diff/qsort_parallel.h +++ b/libHDiffPatch/HDiff/private_diff/qsort_parallel.h @@ -29,8 +29,10 @@ #ifndef HDiff_qsort_parallel_h #define HDiff_qsort_parallel_h #include //sort -#include #include "../../../libParallel/parallel_import.h" +#if (_IS_USED_MULTITHREAD) +#include +#endif #if (_IS_USED_MULTITHREAD) inline size_t __index_by_ratio(size_t size,size_t ratio,size_t ratio_base){ @@ -58,11 +60,12 @@ size_t ri=(curIndex+(size_t)rand()) % size; //muti thread safe samples[i]=ri; } - //std::sort(samples,samples+kSampleSize,_sort_parallel_TCmpi(begin,cmp)); + const bool _kIsSortNotNth=false; + if (_kIsSortNotNth) std::sort(samples,samples+kSampleSize,_sort_parallel_TCmpi(begin,cmp)); size_t _pivot_i=__index_by_ratio(kSampleSize,leftWeight,(leftWeight+rightWeight)); - std::nth_element(samples,samples+_pivot_i,samples+kSampleSize,_sort_parallel_TCmpi(begin,cmp)); + if (!_kIsSortNotNth) std::nth_element(samples,samples+_pivot_i,samples+kSampleSize,_sort_parallel_TCmpi(begin,cmp)); size_t pivot=samples[_pivot_i]; - std::swap(begin[0],begin[pivot]); + std::swap(begin[0], begin[pivot]); TValue x(begin[0]); size_t mid=0; for (size_t j=mid+1;j static void _sort_parallel_thread(TValue* begin,TValue* end,TCmp cmp,size_t threadNum){ -#if (_IS_USED_MULTITHREAD) if (threadNum>1){ const size_t rightWeight=(threadNum>>1); const size_t leftWeight=threadNum-rightWeight; TValue* mid; const bool _kIsPartitionNotMerge=true; - if (_kIsPartitionNotMerge){ // partition //mid=begin+__index_by_ratio(size,leftWeight,threadNum); std::nth_element(begin,mid,end,cmp); //mid=std::_Partition_by_median_guess_unchecked(begin, end, cmp).first; @@ -99,28 +99,27 @@ if (!_kIsPartitionNotMerge){ //merge std::inplace_merge(begin,mid,end,cmp); } - }else -#endif - { + }else{ std::sort(begin,end,cmp); //printf("parallel sort size: %" PRIu64 " \n",(hpatch_StreamPos_t)(end-begin)); } } +#endif template static void sort_parallel(TValue* begin,TValue* end,TCmp cmp,size_t threadNum){ const size_t size=end-begin; - if (size<=1) return; #if (_IS_USED_MULTITHREAD) if ((threadNum>1)&&(size>=kMinQSortParallelSize)){ const size_t maxThreanNum=size/(kMinQSortParallelSize/2); threadNum=(threadNum<=maxThreanNum)?threadNum:maxThreanNum; - //std::random_shuffle(begin,end); - }else{ - threadNum=1; - } + //try? std::random_shuffle(begin,end); + _sort_parallel_thread(begin,end,cmp,threadNum); + }else #endif - _sort_parallel_thread(begin,end,cmp,threadNum); + { + std::sort(begin,end,cmp); + } } #endif From 81dbcdd2a4d30bb9afae6d689ad8e101ef537976 Mon Sep 17 00:00:00 2001 From: sisong Date: Thu, 29 Sep 2022 11:38:43 +0800 Subject: [PATCH 06/20] used c++11's std::thread, so vc++ builder need updated to vc2012; --- builds/vc/HDiffZ.vcxproj | 4 ++++ builds/vc/HPatchZ.vcxproj | 4 ++++ builds/vc/bzip2.vcxproj | 4 ++++ builds/vc/lzma.vcxproj | 4 ++++ builds/vc/unitTest.vcxproj | 4 ++++ builds/vc/zlib.vcxproj | 4 ++++ builds/vc/zstd.vcxproj | 4 ++++ dirDiffPatch/dir_patch/new_dir_output.c | 9 ++++++--- .../private_diff/limit_mem_diff/bloom_filter.h | 15 +++------------ .../limit_mem_diff/digest_matcher.cpp | 7 ++++--- .../private_diff/limit_mem_diff/digest_matcher.h | 1 - .../HDiff/private_diff/qsort_parallel.h | 4 ++-- libParallel/parallel_channel.cpp | 8 ++++++++ libParallel/parallel_channel.h | 16 +++++++++++++++- 14 files changed, 66 insertions(+), 22 deletions(-) diff --git a/builds/vc/HDiffZ.vcxproj b/builds/vc/HDiffZ.vcxproj index 7d33774e..15c97b35 100644 --- a/builds/vc/HDiffZ.vcxproj +++ b/builds/vc/HDiffZ.vcxproj @@ -30,12 +30,14 @@ true false Unicode + v110 Application true false Unicode + v110 Application @@ -43,6 +45,7 @@ false Unicode true + v110 Application @@ -50,6 +53,7 @@ false Unicode true + v110 diff --git a/builds/vc/HPatchZ.vcxproj b/builds/vc/HPatchZ.vcxproj index 54f2346b..c240763d 100644 --- a/builds/vc/HPatchZ.vcxproj +++ b/builds/vc/HPatchZ.vcxproj @@ -30,12 +30,14 @@ true false Unicode + v110 Application true false Unicode + v110 Application @@ -43,6 +45,7 @@ false Unicode true + v110 Application @@ -50,6 +53,7 @@ false Unicode true + v110 diff --git a/builds/vc/bzip2.vcxproj b/builds/vc/bzip2.vcxproj index e97aca49..7eeaad31 100644 --- a/builds/vc/bzip2.vcxproj +++ b/builds/vc/bzip2.vcxproj @@ -30,12 +30,14 @@ true false Unicode + v110 StaticLibrary true false Unicode + v110 StaticLibrary @@ -43,6 +45,7 @@ false Unicode true + v110 StaticLibrary @@ -50,6 +53,7 @@ false Unicode true + v110 diff --git a/builds/vc/lzma.vcxproj b/builds/vc/lzma.vcxproj index c0ef04f3..9f173dd2 100644 --- a/builds/vc/lzma.vcxproj +++ b/builds/vc/lzma.vcxproj @@ -30,12 +30,14 @@ true false Unicode + v110 StaticLibrary true false Unicode + v110 StaticLibrary @@ -43,6 +45,7 @@ false Unicode true + v110 StaticLibrary @@ -50,6 +53,7 @@ false Unicode true + v110 diff --git a/builds/vc/unitTest.vcxproj b/builds/vc/unitTest.vcxproj index 18112d57..420989fb 100644 --- a/builds/vc/unitTest.vcxproj +++ b/builds/vc/unitTest.vcxproj @@ -27,23 +27,27 @@ Application true MultiByte + v110 Application true MultiByte + v110 Application false true MultiByte + v110 Application false true MultiByte + v110 diff --git a/builds/vc/zlib.vcxproj b/builds/vc/zlib.vcxproj index b723e9e1..71e5ab1b 100644 --- a/builds/vc/zlib.vcxproj +++ b/builds/vc/zlib.vcxproj @@ -30,12 +30,14 @@ true false Unicode + v110 StaticLibrary true false Unicode + v110 StaticLibrary @@ -43,6 +45,7 @@ false Unicode true + v110 StaticLibrary @@ -50,6 +53,7 @@ false Unicode true + v110 diff --git a/builds/vc/zstd.vcxproj b/builds/vc/zstd.vcxproj index 939af9c8..f97d272f 100644 --- a/builds/vc/zstd.vcxproj +++ b/builds/vc/zstd.vcxproj @@ -28,12 +28,14 @@ true false Unicode + v110 StaticLibrary true false Unicode + v110 StaticLibrary @@ -41,6 +43,7 @@ false Unicode true + v110 StaticLibrary @@ -48,6 +51,7 @@ false Unicode true + v110 diff --git a/dirDiffPatch/dir_patch/new_dir_output.c b/dirDiffPatch/dir_patch/new_dir_output.c index 436d8fa2..6922443f 100644 --- a/dirDiffPatch/dir_patch/new_dir_output.c +++ b/dirDiffPatch/dir_patch/new_dir_output.c @@ -91,9 +91,11 @@ hpatch_BOOL TDirPatcher_readFile(const char* oldFileName_utf8,hpatch_ICopyDataLi static hpatch_BOOL _tryCloseNewFile(TNewDirOutput* self){ + hpatch_BOOL result; + hpatch_FileError_t fileError; if (self->_curNewFile==0) return hpatch_TRUE; - hpatch_BOOL result=self->_listener->closeNewFile(self->_listener,self->_curNewFile); - hpatch_FileError_t fileError=self->_curNewFile->fileError; + result=self->_listener->closeNewFile(self->_listener,self->_curNewFile); + fileError=self->_curNewFile->fileError; hpatch_TFileStreamOutput_init(self->_curNewFile); return result&&(!fileError); } @@ -141,8 +143,9 @@ static hpatch_BOOL _openNewFile(hpatch_INewStreamListener* listener,size_t newRe hpatch_BOOL result=hpatch_TRUE; TNewDirOutput* self=(TNewDirOutput*)listener->listenerImport; const char* utf8fileName=0; + hpatch_StreamPos_t fileSize; assert((newRefIndexnewRefFileCount)&&(self->_curNewFile->base.write==0)); - hpatch_StreamPos_t fileSize=self->newRefSizeList[newRefIndex]; + fileSize=self->newRefSizeList[newRefIndex]; if (fileSize==0){ size_t newPathIndex=self->newRefList?self->newRefList[newRefIndex]:newRefIndex; check(_makeNewDirOrEmptyFile(listener,newPathIndex)); diff --git a/libHDiffPatch/HDiff/private_diff/limit_mem_diff/bloom_filter.h b/libHDiffPatch/HDiff/private_diff/limit_mem_diff/bloom_filter.h index ae814f9c..3458ccdf 100644 --- a/libHDiffPatch/HDiff/private_diff/limit_mem_diff/bloom_filter.h +++ b/libHDiffPatch/HDiff/private_diff/limit_mem_diff/bloom_filter.h @@ -31,7 +31,7 @@ #include //memset #include #include //std::runtime_error -#include +#include "../../../../libParallel/parallel_channel.h" namespace hdiff_private{ class TBitSet{ @@ -46,16 +46,7 @@ class TBitSet{ #if (_IS_USED_MULTITHREAD) void set_MT(size_t bitIndex){ //assert(bitIndex)==sizeof(base_t)); - base_t oldv; - base_t newv; - std::atomic& v=*(std::atomic*)&m_bits[bitIndex>>kBaseShr]; - do { - oldv=v.load(); - newv=oldv | ((base_t)1<<(bitIndex&kBaseMask)); - if (oldv==newv) - return; - } while(!v.compare_exchange_weak(oldv,newv)); + atomic32_or(&m_bits[bitIndex>>kBaseShr],((base_t)1<<(bitIndex&kBaseMask))); } #endif inline bool is_hit(size_t bitIndex)const{ @@ -80,7 +71,7 @@ class TBitSet{ } private: inline static size_t bitSizeToCount(size_t bitSize){ return (bitSize+(kBaseTBits-1))/kBaseTBits; } - typedef size_t base_t; + typedef uint32_t base_t; enum { kBaseShr=(sizeof(base_t)==8)?6:((sizeof(base_t)==4)?5:0), kBaseTBits=(1< //std::runtime_error #include //std::sort,std::equal_range #include "../compress_detect.h" //_getUIntCost +#include "../../../../libParallel/parallel_channel.h" #include "../qsort_parallel.h" namespace hdiff_private{ static const size_t kMinTrustMatchedLength=1024*16; @@ -76,7 +77,7 @@ struct TStreamCache{ if (streamPos+kMinCacheDataSize>streamSize) return false; hpatch_StreamPos_t readPos=(streamPos>=kBackupCacheSize)?(streamPos-kBackupCacheSize):0; size_t readLen=((streamSize-readPos)>=cacheSize)?cacheSize:(size_t)(streamSize-readPos); - + unsigned char* dst=cache+cacheSize-readLen; if ((m_readPosEnd>readPos)&&(m_readPos<=readPos)){ size_t moveLen=(size_t)(m_readPosEnd-readPos); @@ -254,9 +255,9 @@ void TDigestMatcher::getDigests(){ size_t kMaxCmpDeep= 1 + upperCount(kMinTrustMatchedLength,m_kMatchBlockSize); TIndex_comp comp(m_blocks.data(),m_blocks.size(),kMaxCmpDeep); if (m_isUseLargeSorted) - sort_parallel(m_sorted_larger.data(),m_sorted_larger.data()+m_sorted_larger.size(),comp,m_threadNum); + sort_parallel(m_sorted_larger.data(),m_sorted_larger.data()+m_sorted_larger.size(),comp,m_threadNum); else - sort_parallel(m_sorted_limit.data(),m_sorted_limit.data()+m_sorted_limit.size(),comp,m_threadNum); + sort_parallel(m_sorted_limit.data(),m_sorted_limit.data()+m_sorted_limit.size(),comp,m_threadNum); } struct TBlockStreamCache:public TStreamCache{ diff --git a/libHDiffPatch/HDiff/private_diff/limit_mem_diff/digest_matcher.h b/libHDiffPatch/HDiff/private_diff/limit_mem_diff/digest_matcher.h index b976687b..d6dff7f8 100644 --- a/libHDiffPatch/HDiff/private_diff/limit_mem_diff/digest_matcher.h +++ b/libHDiffPatch/HDiff/private_diff/limit_mem_diff/digest_matcher.h @@ -30,7 +30,6 @@ #ifndef digest_matcher_h #define digest_matcher_h -#include "../../../../libParallel/parallel_import.h" #include "bloom_filter.h" #include "covers.h" #include "adler_roll.h" diff --git a/libHDiffPatch/HDiff/private_diff/qsort_parallel.h b/libHDiffPatch/HDiff/private_diff/qsort_parallel.h index 7d3f6db3..91ef8458 100644 --- a/libHDiffPatch/HDiff/private_diff/qsort_parallel.h +++ b/libHDiffPatch/HDiff/private_diff/qsort_parallel.h @@ -31,7 +31,7 @@ #include //sort #include "../../../libParallel/parallel_import.h" #if (_IS_USED_MULTITHREAD) -#include +#include //if used vc++, need >= vc2012 #endif #if (_IS_USED_MULTITHREAD) @@ -106,7 +106,7 @@ } #endif - template + template static void sort_parallel(TValue* begin,TValue* end,TCmp cmp,size_t threadNum){ const size_t size=end-begin; #if (_IS_USED_MULTITHREAD) diff --git a/libParallel/parallel_channel.cpp b/libParallel/parallel_channel.cpp index 869f9497..33770a36 100644 --- a/libParallel/parallel_channel.cpp +++ b/libParallel/parallel_channel.cpp @@ -167,4 +167,12 @@ TChanData CChannel::accept(bool isWait){ return _import->accept(isWait); } + +#if (_NEED_MSVC_WIN32_atomic_func) +# include "windows.h" + void atomic32_or(uint32_t* p,uint32_t or_v){ + InterlockedOr((volatile LONG*)p,(LONG)or_v); + } +#endif + #endif //_IS_USED_MULTITHREAD diff --git a/libParallel/parallel_channel.h b/libParallel/parallel_channel.h index 07b11ef0..cdd0cd91 100644 --- a/libParallel/parallel_channel.h +++ b/libParallel/parallel_channel.h @@ -29,7 +29,8 @@ #define parallel_channel_h #include "parallel_import.h" #if (_IS_USED_MULTITHREAD) - +#include //uint32 +#include #include //for size_t ptrdiff_t struct CHLocker{ @@ -69,5 +70,18 @@ struct CHLocker{ _CChannel_import* _import; }; + +#if (_MSC_VER<1700) && (defined(WIN32)) //vc2012 support atomic +# define _NEED_MSVC_WIN32_atomic_func 1 + void atomic32_or(uint32_t* p,uint32_t or_v); +#else +# include + static inline void atomic32_or(uint32_t* p,uint32_t or_v){ + assert(sizeof(std::atomic)==sizeof(uint32_t)); + std::atomic& v=*(std::atomic*)p; + v.fetch_or(or_v); + } +#endif + #endif //_IS_USED_MULTITHREAD #endif //parallel_channel_h From 9cc0391ffaa54c5bbf4fa1168df7f7b1c728c30a Mon Sep 17 00:00:00 2001 From: sisong Date: Sat, 1 Oct 2022 20:40:04 +0800 Subject: [PATCH 07/20] add parallel search_cover for TDigestMatcher; clip by newDataSize/threadNum; --- hdiffz.cpp | 20 +- libHDiffPatch/HDiff/diff.cpp | 10 +- libHDiffPatch/HDiff/diff_types.h | 13 +- libHDiffPatch/HDiff/match_block.cpp | 7 +- libHDiffPatch/HDiff/match_block.h | 2 +- .../private_diff/limit_mem_diff/covers.h | 52 ++++- .../limit_mem_diff/digest_matcher.cpp | 191 ++++++++++++++---- .../limit_mem_diff/digest_matcher.h | 9 +- .../HDiff/private_diff/qsort_parallel.h | 16 +- libHDiffPatch/HPatch/patch_types.h | 1 + libParallel/parallel_channel.h | 5 +- 11 files changed, 243 insertions(+), 83 deletions(-) diff --git a/hdiffz.cpp b/hdiffz.cpp index baa53f09..33cc521d 100644 --- a/hdiffz.cpp +++ b/hdiffz.cpp @@ -1219,34 +1219,20 @@ static int hdiff_in_mem(const char* oldFileName,const char* newFileName,const ch try { #if (_IS_NEED_BSDIFF) if (diffSets.isBsDiff){ - if (diffSets.isUseFastMatchBlock) create_bsdiff_block(newMem.data(),newMem.data_end(),oldMem.data(),oldMem.data_end(),&diffData_out.base, compressPlugin,(int)diffSets.matchScore,diffSets.isUseBigCacheMatch, - diffSets.fastMatchBlockSize,diffSets.threadNum); - else - create_bsdiff(newMem.data(),newMem.data_end(),oldMem.data(),oldMem.data_end(),&diffData_out.base, - compressPlugin,(int)diffSets.matchScore,diffSets.isUseBigCacheMatch); + diffSets.isUseFastMatchBlock?diffSets.fastMatchBlockSize:0,diffSets.threadNum); }else #endif if (diffSets.isSingleCompressedDiff){ - if (diffSets.isUseFastMatchBlock) create_single_compressed_diff_block(newMem.data(),newMem.data_end(),oldMem.data(),oldMem.data_end(), &diffData_out.base,compressPlugin,(int)diffSets.matchScore, diffSets.patchStepMemSize,diffSets.isUseBigCacheMatch, - diffSets.fastMatchBlockSize,diffSets.threadNum); - else - create_single_compressed_diff(newMem.data(),newMem.data_end(),oldMem.data(),oldMem.data_end(), - &diffData_out.base,compressPlugin,(int)diffSets.matchScore, - diffSets.patchStepMemSize,diffSets.isUseBigCacheMatch); + diffSets.isUseFastMatchBlock?diffSets.fastMatchBlockSize:0,diffSets.threadNum); }else{ - if (diffSets.isUseFastMatchBlock) create_compressed_diff_block(newMem.data(),newMem.data_end(),oldMem.data(),oldMem.data_end(), &diffData_out.base,compressPlugin,(int)diffSets.matchScore, - diffSets.isUseBigCacheMatch,diffSets.fastMatchBlockSize,diffSets.threadNum); - else - create_compressed_diff(newMem.data(),newMem.data_end(),oldMem.data(),oldMem.data_end(), - &diffData_out.base,compressPlugin,(int)diffSets.matchScore, - diffSets.isUseBigCacheMatch); + diffSets.isUseBigCacheMatch,diffSets.isUseFastMatchBlock?diffSets.fastMatchBlockSize:0,diffSets.threadNum); } diffData_out.base.streamSize=diffData_out.out_length; }catch(const std::exception& e){ diff --git a/libHDiffPatch/HDiff/diff.cpp b/libHDiffPatch/HDiff/diff.cpp index 3fa22e24..1395d299 100644 --- a/libHDiffPatch/HDiff/diff.cpp +++ b/libHDiffPatch/HDiff/diff.cpp @@ -50,9 +50,9 @@ static const char* kHDiffSFVersionType="HDIFFSF20"; #define check(value) checki(value,"check "#value" error!") #if (_SSTRING_FAST_MATCH>0) -static const int kMinMatchLen = (_SSTRING_FAST_MATCH>5)?_SSTRING_FAST_MATCH:5; +static const int kMinMatchLen = (_SSTRING_FAST_MATCH>kCoverMinMatchLen)?_SSTRING_FAST_MATCH:kCoverMinMatchLen; #else -static const int kMinMatchLen = 5; //最小搜寻相等长度。 +static const int kMinMatchLen = kCoverMinMatchLen; //最小搜寻相等长度。 #endif static const int kMinMatchScore = 2; //最小搜寻覆盖收益. @@ -78,7 +78,7 @@ namespace{ return isCollinear(next)&&(linkSpaceLength(next)<=kMaxLinkSpaceLength); } inline bool isCollinear(const TOldCover& next)const{//覆盖线是否在同一条直线上. - return (oldPos-next.oldPos==newPos-next.newPos); + return cover_is_collinear(*this,next); } inline TInt linkSpaceLength(const TOldCover& next)const{//覆盖线间的间距. return next.oldPos-(oldPos+length); @@ -1058,8 +1058,8 @@ void __hdiff_private__create_compressed_diff(const TByte* newData,const TByte* n void get_match_covers_by_block(const hpatch_TStreamInput* newData,const hpatch_TStreamInput* oldData, hpatch_TOutputCovers* out_covers,size_t kMatchBlockSize,size_t threadNum){ assert(out_covers->push_cover!=0); - TDigestMatcher matcher(oldData,kMatchBlockSize,threadNum); - matcher.search_cover(newData,out_covers); + TDigestMatcher matcher(oldData,newData,kMatchBlockSize,threadNum); + matcher.search_cover(out_covers); //todo: + extend_cover_stream ? } void get_match_covers_by_block(const unsigned char* newData,const unsigned char* newData_end, diff --git a/libHDiffPatch/HDiff/diff_types.h b/libHDiffPatch/HDiff/diff_types.h index 43a5c544..a8d4bf47 100644 --- a/libHDiffPatch/HDiff/diff_types.h +++ b/libHDiffPatch/HDiff/diff_types.h @@ -34,7 +34,12 @@ namespace hdiff_private{ template struct cover_cmp_by_new_t{ - inline bool operator ()(const TCover& x,const TCover& y){ return x.newPos struct cover_cmp_by_old_t{ @@ -45,6 +50,12 @@ namespace hdiff_private{ return x.length + inline static bool cover_is_collinear(const TCover& x,const TCover& y){ + return (x.oldPos+y.newPos==x.newPos+y.oldPos); + } + + static const int kCoverMinMatchLen=5; } #ifdef __cplusplus diff --git a/libHDiffPatch/HDiff/match_block.cpp b/libHDiffPatch/HDiff/match_block.cpp index c5c0961c..818c9169 100644 --- a/libHDiffPatch/HDiff/match_block.cpp +++ b/libHDiffPatch/HDiff/match_block.cpp @@ -28,6 +28,7 @@ #include "match_block.h" #include "diff.h" #include "private_diff/limit_mem_diff/stream_serialize.h" //TAutoMem +#include "private_diff/limit_mem_diff/covers.h" // tm_collate_covers() #include #include //std::runtime_error #define _check(value,info) { if (!(value)) { throw std::runtime_error(info); } } @@ -76,12 +77,16 @@ namespace hdiff_private { struct TOutputCovers:public hpatch_TOutputCovers{ TOutputCovers(std::vector& _blockCovers) :blockCovers(_blockCovers){ - blockCovers.clear(); push_cover=_push_cover; } + blockCovers.clear(); push_cover=_push_cover; collate_covers=_collate_covers; } static hpatch_BOOL _push_cover(struct hpatch_TOutputCovers* out_covers,const hpatch_TCover* cover){ TOutputCovers* self=(TOutputCovers*)out_covers; self->blockCovers.push_back(*cover); return hpatch_TRUE; } + static void _collate_covers(struct hpatch_TOutputCovers* out_covers){ + TOutputCovers* self=(TOutputCovers*)out_covers; + tm_collate_covers(self->blockCovers); + } std::vector& blockCovers; }; void TMatchBlock::getBlockCovers(){ diff --git a/libHDiffPatch/HDiff/match_block.h b/libHDiffPatch/HDiff/match_block.h index 53f02504..912e7ac7 100644 --- a/libHDiffPatch/HDiff/match_block.h +++ b/libHDiffPatch/HDiff/match_block.h @@ -94,7 +94,7 @@ namespace hdiff_private{ //optimize diff speed by match block -//note: newData&oldData will be changed +//note: newData&oldData in memory will be changed //see create_compressed_diff | create_single_compressed_diff void create_compressed_diff_block(const hpatch_TStreamInput* newData,//will load in memory diff --git a/libHDiffPatch/HDiff/private_diff/limit_mem_diff/covers.h b/libHDiffPatch/HDiff/private_diff/limit_mem_diff/covers.h index 950f1bd7..fb95cbc3 100644 --- a/libHDiffPatch/HDiff/private_diff/limit_mem_diff/covers.h +++ b/libHDiffPatch/HDiff/private_diff/limit_mem_diff/covers.h @@ -27,7 +27,8 @@ #ifndef covers_h #define covers_h #include -#include "../../../HPatch/patch_types.h" +#include //std::sort +#include "../../../HDiff/diff_types.h" namespace hdiff_private{ typedef hpatch_TCover TCover; @@ -56,29 +57,72 @@ struct TCovers:public hpatch_TOutputCovers{ } }; +template +static void tm_collate_covers(std::vector<_TCover>& covers){ + if (covers.size()<=1) return; + std::sort(covers.begin(),covers.end(),cover_cmp_by_new_t<_TCover>()); + size_t backi=0; + for (size_t i=1;icovers[backi].newPos+covers[backi].length){ + if (cover_is_collinear(covers[i],covers[backi])){//insert i part to backi,del i + covers[backi].length=covers[i].newPos+covers[i].length-covers[backi].newPos; + }else{//del backi part, save i + covers[backi].length=covers[i].newPos-covers[backi].newPos; + if (covers[backi].length>=kCoverMinMatchLen) + ++backi; + covers[backi]=covers[i]; + } + } //else del i + }else if ((covers[i].newPos==covers[backi].newPos+covers[backi].length) + &&(covers[i].oldPos==covers[backi].oldPos+covers[backi].length)){ + covers[backi].length+=covers[i].length; //insert i all to backi,del i + }else{ //save i + ++backi; + covers[backi]=covers[i]; + } + } + covers.resize(backi+1); +} + class TCoversBuf:public TCovers{ public: inline TCoversBuf(hpatch_StreamPos_t dataSize0,hpatch_StreamPos_t dataSize1) :TCovers(0,0,(dataSize0|dataSize1)<((hpatch_StreamPos_t)1<<32)){ push_cover=_push_cover; + collate_covers=_collate_covers; } private: + template + inline void _update(std::vector<_TCover>& covers){ + _covers=covers.data(); + _coverCount=covers.size(); + } static hpatch_BOOL _push_cover(struct hpatch_TOutputCovers* out_covers,const TCover* cover){ TCoversBuf* self=(TCoversBuf*)out_covers; - ++self->_coverCount; if (self->_isCover32) { hpatch_TCover32 c32; c32.oldPos=(hpatch_uint32_t)cover->oldPos; c32.newPos=(hpatch_uint32_t)cover->newPos; c32.length=(hpatch_uint32_t)cover->length; self->m_covers_limit.push_back(c32); - self->_covers=self->m_covers_limit.data(); + self->_update(self->m_covers_limit); }else{ self->m_covers_larger.push_back(*cover); - self->_covers=self->m_covers_larger.data(); + self->_update(self->m_covers_larger); } return hpatch_TRUE; } + static void _collate_covers(struct hpatch_TOutputCovers* out_covers){ + TCoversBuf* self=(TCoversBuf*)out_covers; + if (self->_isCover32){ + tm_collate_covers(self->m_covers_limit); + self->_update(self->m_covers_limit); + }else{ + tm_collate_covers(self->m_covers_larger); + self->_update(self->m_covers_larger); + } + } std::vector m_covers_limit; std::vector m_covers_larger; }; diff --git a/libHDiffPatch/HDiff/private_diff/limit_mem_diff/digest_matcher.cpp b/libHDiffPatch/HDiff/private_diff/limit_mem_diff/digest_matcher.cpp index 98bca37e..eb9f4fcd 100644 --- a/libHDiffPatch/HDiff/private_diff/limit_mem_diff/digest_matcher.cpp +++ b/libHDiffPatch/HDiff/private_diff/limit_mem_diff/digest_matcher.cpp @@ -35,21 +35,21 @@ namespace hdiff_private{ static const size_t kMinTrustMatchedLength=1024*16; static const size_t kMinMatchedLength = 16; static const size_t kMatchBlockSize_min=4;//sizeof(hpatch_uint32_t); -static const size_t kBestReadSize=1024*256; //for sequence read -static const size_t kMinReadSize=1024; //for random first read speed +static const size_t kBestReadSize=1024*512; //for sequence read +static const size_t kMinReadSize=1024*4; //for random first read speed static const size_t kMinBackupReadSize=256; -static const size_t kMaxMatchRange=1024*64; +static const size_t kBestMatchRange=1024*64; static const size_t kMaxLinkIndexFindSize=64; #define readStream(stream,pos,dst,n) { \ - if (((n)>0)&&(!(stream)->read(stream,pos,dst,dst+(n)))) \ + if (((n)>0)&&(!(stream)->read(stream,m_streamOffset+pos,dst,dst+(n)))) \ throw std::runtime_error("TStreamCache::_resetPos_continue() stream->read() error!"); } struct TStreamCache{ - TStreamCache(const hpatch_TStreamInput* _stream, - unsigned char* _cache,size_t _cacheSize) - :stream(_stream),m_readPos(0),m_readPosEnd(0), + TStreamCache(const hpatch_TStreamInput* _stream,unsigned char* _cache,size_t _cacheSize, + hpatch_StreamPos_t _streamOffset,void* _locker) + :stream(_stream),m_readPos(0),m_readPosEnd(0),m_streamOffset(_streamOffset),m_locker(_locker), cache(_cache),cacheSize(_cacheSize),cachePos(_cacheSize){ } inline hpatch_StreamPos_t streamSize()const{ return stream->streamSize; } inline hpatch_StreamPos_t pos()const { return m_readPosEnd-dataLength(); } @@ -77,7 +77,22 @@ struct TStreamCache{ if (streamPos+kMinCacheDataSize>streamSize) return false; hpatch_StreamPos_t readPos=(streamPos>=kBackupCacheSize)?(streamPos-kBackupCacheSize):0; size_t readLen=((streamSize-readPos)>=cacheSize)?cacheSize:(size_t)(streamSize-readPos); - +#if (_IS_USED_MULTITHREAD) + if (m_locker){ + CAutoLocker _autoLocker(m_locker); + _resetPos_continue_read(readPos,readLen); + }else +#endif + { + _resetPos_continue_read(readPos,readLen); + } + m_readPos=readPos; + m_readPosEnd=readPos+readLen; + cachePos=cacheSize-(size_t)(m_readPosEnd-streamPos); + return true; + } +private: + void _resetPos_continue_read(hpatch_StreamPos_t readPos,size_t readLen){ unsigned char* dst=cache+cacheSize-readLen; if ((m_readPosEnd>readPos)&&(m_readPos<=readPos)){ size_t moveLen=(size_t)(m_readPosEnd-readPos); @@ -90,16 +105,13 @@ struct TStreamCache{ }else{ readStream(stream,readPos,dst,readLen); } - m_readPos=readPos; - m_readPosEnd=readPos+readLen; - cachePos=cacheSize-(size_t)(m_readPosEnd-streamPos); - return true; } -private: const hpatch_TStreamInput* stream; protected: hpatch_StreamPos_t m_readPos; hpatch_StreamPos_t m_readPosEnd; + const hpatch_StreamPos_t m_streamOffset; + void* m_locker; unsigned char* cache; size_t cacheSize; size_t cachePos; @@ -133,8 +145,24 @@ static size_t posToBlockIndex(hpatch_StreamPos_t pos,size_t kMatchBlockSize,size TDigestMatcher::~TDigestMatcher(){ } -TDigestMatcher::TDigestMatcher(const hpatch_TStreamInput* oldData,size_t kMatchBlockSize,size_t threadNum) -:m_oldData(oldData),m_isUseLargeSorted(true),m_threadNum(threadNum), +size_t TDigestMatcher::getSearchThreadNum()const{ +#if (_IS_USED_MULTITHREAD) + const size_t kMinParallelSize=1024*64; + const size_t threadNum=m_threadNum; + hpatch_StreamPos_t size=m_newData->streamSize; + if ((threadNum>1)&&(size>=kMinParallelSize)&&(size/2>=m_kMatchBlockSize)) { + const size_t maxThreanNum=size/(kMinParallelSize/2); + return (threadNum<=maxThreanNum)?threadNum:maxThreanNum; + }else +#endif + { + return 1; + } +} + +TDigestMatcher::TDigestMatcher(const hpatch_TStreamInput* oldData,const hpatch_TStreamInput* newData, + size_t kMatchBlockSize,size_t threadNum) +:m_oldData(oldData),m_newData(newData),m_isUseLargeSorted(true),m_threadNum(threadNum), m_newCacheSize(0),m_oldCacheSize(0),m_oldMinCacheSize(0),m_backupCacheSize(0),m_kMatchBlockSize(0){ if (kMatchBlockSize>(oldData->streamSize+1)/2) kMatchBlockSize=(size_t)((oldData->streamSize+1)/2); @@ -159,7 +187,7 @@ m_newCacheSize(0),m_oldCacheSize(0),m_oldMinCacheSize(0),m_backupCacheSize(0),m_ m_oldCacheSize=upperCount(m_kMatchBlockSize+m_backupCacheSize,kBestReadSize)*kBestReadSize; m_oldMinCacheSize=upperCount(m_kMatchBlockSize+m_backupCacheSize,kMinReadSize)*kMinReadSize; assert(m_oldMinCacheSize<=m_oldCacheSize); - m_mem.realloc(m_newCacheSize+m_oldCacheSize); + m_mem.realloc((m_newCacheSize+m_oldCacheSize)*getSearchThreadNum()); getDigests(); } @@ -213,8 +241,8 @@ static void _filter_insert_MT(TBloomFilter* filter,const adler_uin static void filter_insert_parallel(TBloomFilter& filter,const adler_uint_t* begin,const adler_uint_t* end, size_t threadNum,size_t kMinParallelSize=4096){ - const size_t size=end-begin; #if (_IS_USED_MULTITHREAD) + const size_t size=end-begin; if ((threadNum>1)&&(size>=kMinParallelSize)) { const size_t maxThreanNum=size/(kMinParallelSize/2); threadNum=(threadNum<=maxThreanNum)?threadNum:maxThreanNum; @@ -234,11 +262,15 @@ static void filter_insert_parallel(TBloomFilter& filter,const adle } } + +#define __sort_indexs(TIndex,indexs,comp,m_threadNum) sort_parallel \ + (indexs.data(),indexs.data()+indexs.size(),comp,m_threadNum) + void TDigestMatcher::getDigests(){ if (m_blocks.empty()) return; const size_t blockCount=m_blocks.size(); - TStreamCache streamCache(m_oldData,m_mem.data(),m_newCacheSize+m_oldCacheSize); + TStreamCache streamCache(m_oldData,m_mem.data(),m_newCacheSize+m_oldCacheSize,0,0); for (size_t i=0;istreamSize); streamCache.resetPos(0,readPos,m_kMatchBlockSize); @@ -255,15 +287,16 @@ void TDigestMatcher::getDigests(){ size_t kMaxCmpDeep= 1 + upperCount(kMinTrustMatchedLength,m_kMatchBlockSize); TIndex_comp comp(m_blocks.data(),m_blocks.size(),kMaxCmpDeep); if (m_isUseLargeSorted) - sort_parallel(m_sorted_larger.data(),m_sorted_larger.data()+m_sorted_larger.size(),comp,m_threadNum); + __sort_indexs(uint64_t,m_sorted_larger,comp,m_threadNum); else - sort_parallel(m_sorted_limit.data(),m_sorted_limit.data()+m_sorted_limit.size(),comp,m_threadNum); + __sort_indexs(uint32_t,m_sorted_limit,comp,m_threadNum); } struct TBlockStreamCache:public TStreamCache{ TBlockStreamCache(const hpatch_TStreamInput* _stream,unsigned char* _cache, - size_t _cacheSize,size_t _backupCacheSize, size_t _kMatchBlockSize) - :TStreamCache(_stream,_cache,_cacheSize), + size_t _cacheSize,size_t _backupCacheSize, size_t _kMatchBlockSize, + hpatch_StreamPos_t _streamOffset,void* _locker) + :TStreamCache(_stream,_cache,_cacheSize,_streamOffset,_locker), backupCacheSize(_backupCacheSize),kMatchBlockSize(_kMatchBlockSize){ assert(cacheSize>=(backupCacheSize+kMatchBlockSize)); } inline bool resetPos(hpatch_StreamPos_t streamPos){ @@ -293,9 +326,9 @@ struct TBlockStreamCache:public TStreamCache{ struct TOldStreamCache:public TBlockStreamCache{ TOldStreamCache(const hpatch_TStreamInput* _stream,unsigned char* _cache, size_t _minCacheSize,size_t _maxCacheSize, - size_t _backupCacheSize,size_t _kMatchBlockSize) + size_t _backupCacheSize,size_t _kMatchBlockSize,void* _locker) :TBlockStreamCache(_stream,_cache+_maxCacheSize-_minCacheSize, - _minCacheSize, _backupCacheSize,_kMatchBlockSize), + _minCacheSize, _backupCacheSize,_kMatchBlockSize,0,_locker), minCacheSize(_minCacheSize),maxCacheSize(_maxCacheSize){ } inline bool resetPos(hpatch_StreamPos_t streamPos){ @@ -344,8 +377,10 @@ struct TOldStreamCache:public TBlockStreamCache{ struct TNewStreamCache:public TBlockStreamCache{ TNewStreamCache(const hpatch_TStreamInput* _stream,unsigned char* _cache, - size_t _cacheSize,size_t _backupCacheSize,size_t _kMatchBlockSize) - :TBlockStreamCache(_stream,_cache,_cacheSize,_backupCacheSize,_kMatchBlockSize){ + size_t _cacheSize,size_t _backupCacheSize,size_t _kMatchBlockSize, + hpatch_StreamPos_t _streamOffset,void* _locker) + :TBlockStreamCache(_stream,_cache,_cacheSize,_backupCacheSize,_kMatchBlockSize, + _streamOffset,_locker){ resetPos(0); } void toBestDataLength(){ @@ -500,7 +535,7 @@ static bool getBestMatch(const adler_uint_t* blocksBase,size_t blocksSize, if(best==0){ //继续找; best=left+(right-left)/2; hpatch_StreamPos_t _best_distance=~(hpatch_StreamPos_t)0; - const TIndex* end=(left+kMaxMatchRange>=right)?right:(left+kMaxMatchRange); + const TIndex* end=(left+kBestMatchRange>=right)?right:(left+kBestMatchRange); for (const TIndex* it=left;itpush_cover(out_covers,_pcover)) \ + throw std::runtime_error("TDigestMatcher::search_cover() push_cover error!"); } template -static void tm_search_cover(const adler_uint_t* blocksBase,size_t blocksSize, +static void tm_search_cover(const adler_uint_t* blocksBase, const TIndex* iblocks,const TIndex* iblocks_end, TOldStreamCache& oldStream,TNewStreamCache& newStream, const TBloomFilter& filter, - hpatch_TOutputCovers* out_covers) { + hpatch_TOutputCovers* out_covers, + hpatch_StreamPos_t _coverNewOffset,void* _coverLocker) { + const size_t blocksSize=iblocks_end-iblocks; TDigest_comp comp(blocksBase); TCover lastCover={0,0,0}; while (true) { @@ -598,8 +638,17 @@ static void tm_search_cover(const adler_uint_t* blocksBase,size_t blocksSize, tryLink(lastCover,curCover,oldStream,newStream); if (curCover.length>=kMinMatchedLength){ //matched - if (!out_covers->push_cover(out_covers,&curCover)) - throw std::runtime_error("TDigestMatcher::search_cover() push_cover error!"); + TCover _cover; + setCover(_cover,curCover.oldPos,curCover.newPos+_coverNewOffset,curCover.length); +#if (_IS_USED_MULTITHREAD) + if (_coverLocker){ + CAutoLocker _autoLocker(_coverLocker); + __push_cover(&_cover); + }else +#endif + { + __push_cover(&_cover); + } lastCover=curCover; if (!newStream.resetPos(curCover.newPos+curCover.length)) break;//finish continue; @@ -610,18 +659,78 @@ static void tm_search_cover(const adler_uint_t* blocksBase,size_t blocksSize, } } -void TDigestMatcher::search_cover(const hpatch_TStreamInput* newData,hpatch_TOutputCovers* out_covers){ - if (m_blocks.empty()) return; - if (newData->streamSize_search_cover_thread(out_covers,threadIndex,threadNum,mt_data); +} + +void TDigestMatcher::search_cover(hpatch_TOutputCovers* out_covers){ + if (m_blocks.empty()) return; + if (m_newData->streamSize1){ + const size_t threadCount=searchThreadNum-1; + mt_data_t mt_data; + std::vector threads(threadCount); + for (size_t i=0;i1) + out_covers->collate_covers(out_covers); } }//namespace hdiff_private diff --git a/libHDiffPatch/HDiff/private_diff/limit_mem_diff/digest_matcher.h b/libHDiffPatch/HDiff/private_diff/limit_mem_diff/digest_matcher.h index d6dff7f8..bf670b6e 100644 --- a/libHDiffPatch/HDiff/private_diff/limit_mem_diff/digest_matcher.h +++ b/libHDiffPatch/HDiff/private_diff/limit_mem_diff/digest_matcher.h @@ -61,11 +61,13 @@ static inline adler_hash_t adler_to_hash(const uint64_t x){ return x; } class TDigestMatcher{ public: //throw std::runtime_error when data->read error or kMatchBlockSize error; - TDigestMatcher(const hpatch_TStreamInput* oldData,size_t kMatchBlockSize,size_t threadNum); - void search_cover(const hpatch_TStreamInput* newData,hpatch_TOutputCovers* out_covers); + TDigestMatcher(const hpatch_TStreamInput* oldData,const hpatch_TStreamInput* newData, + size_t kMatchBlockSize,size_t threadNum); + void search_cover(hpatch_TOutputCovers* out_covers); ~TDigestMatcher(); private: const hpatch_TStreamInput* m_oldData; + const hpatch_TStreamInput* m_newData; std::vector m_blocks; TBloomFilter m_filter; std::vector m_sorted_limit; @@ -80,6 +82,9 @@ class TDigestMatcher{ size_t m_kMatchBlockSize; void getDigests(); + size_t getSearchThreadNum()const; +public: //private for muti-thread + void _search_cover_thread(hpatch_TOutputCovers* out_covers,size_t threadIndex,size_t threadNum,void* mt_data); }; }//namespace hdiff_private diff --git a/libHDiffPatch/HDiff/private_diff/qsort_parallel.h b/libHDiffPatch/HDiff/private_diff/qsort_parallel.h index 91ef8458..f87a1d7e 100644 --- a/libHDiffPatch/HDiff/private_diff/qsort_parallel.h +++ b/libHDiffPatch/HDiff/private_diff/qsort_parallel.h @@ -65,12 +65,12 @@ size_t _pivot_i=__index_by_ratio(kSampleSize,leftWeight,(leftWeight+rightWeight)); if (!_kIsSortNotNth) std::nth_element(samples,samples+_pivot_i,samples+kSampleSize,_sort_parallel_TCmpi(begin,cmp)); size_t pivot=samples[_pivot_i]; - std::swap(begin[0], begin[pivot]); + std::swap(begin[0],begin[pivot]); TValue x(begin[0]); size_t mid=0; - for (size_t j=mid+1;j(begin,end,cmp,leftWeight,rightWeight); }else{ mid=begin+__index_by_ratio(end-begin,leftWeight,threadNum); @@ -108,12 +108,12 @@ template static void sort_parallel(TValue* begin,TValue* end,TCmp cmp,size_t threadNum){ - const size_t size=end-begin; #if (_IS_USED_MULTITHREAD) + const size_t size=end-begin; if ((threadNum>1)&&(size>=kMinQSortParallelSize)){ const size_t maxThreanNum=size/(kMinQSortParallelSize/2); threadNum=(threadNum<=maxThreanNum)?threadNum:maxThreanNum; - //try? std::random_shuffle(begin,end); + //std::random_shuffle(begin,end); //test shuffle befor parallel sort? _sort_parallel_thread(begin,end,cmp,threadNum); }else #endif diff --git a/libHDiffPatch/HPatch/patch_types.h b/libHDiffPatch/HPatch/patch_types.h index c03f7e44..41a56f02 100644 --- a/libHDiffPatch/HPatch/patch_types.h +++ b/libHDiffPatch/HPatch/patch_types.h @@ -264,6 +264,7 @@ typedef hpatch_BOOL hpatch_FileError_t;// 0: no error; other: error; //output covers typedef struct hpatch_TOutputCovers{ hpatch_BOOL (*push_cover)(struct hpatch_TOutputCovers* out_covers,const hpatch_TCover* cover); + void (*collate_covers)(struct hpatch_TOutputCovers* out_covers); // for support search covers by muti-thread } hpatch_TOutputCovers; typedef struct{ diff --git a/libParallel/parallel_channel.h b/libParallel/parallel_channel.h index cdd0cd91..9eee094a 100644 --- a/libParallel/parallel_channel.h +++ b/libParallel/parallel_channel.h @@ -77,9 +77,8 @@ struct CHLocker{ #else # include static inline void atomic32_or(uint32_t* p,uint32_t or_v){ - assert(sizeof(std::atomic)==sizeof(uint32_t)); - std::atomic& v=*(std::atomic*)p; - v.fetch_or(or_v); + static_assert(sizeof(std::atomic)==sizeof(uint32_t),"std::atomic size error!"); + ((std::atomic*)p)->fetch_or(or_v); } #endif From 7c08691d6126ef376c87914c31dfd25efab2af05 Mon Sep 17 00:00:00 2001 From: sisong Date: Mon, 3 Oct 2022 17:00:48 +0800 Subject: [PATCH 08/20] changed parallel search_cover model for TDigestMatcher; clip by fix len; --- .../limit_mem_diff/digest_matcher.cpp | 89 ++++++++++--------- .../limit_mem_diff/digest_matcher.h | 6 +- 2 files changed, 53 insertions(+), 42 deletions(-) diff --git a/libHDiffPatch/HDiff/private_diff/limit_mem_diff/digest_matcher.cpp b/libHDiffPatch/HDiff/private_diff/limit_mem_diff/digest_matcher.cpp index eb9f4fcd..dfb6019c 100644 --- a/libHDiffPatch/HDiff/private_diff/limit_mem_diff/digest_matcher.cpp +++ b/libHDiffPatch/HDiff/private_diff/limit_mem_diff/digest_matcher.cpp @@ -35,11 +35,13 @@ namespace hdiff_private{ static const size_t kMinTrustMatchedLength=1024*16; static const size_t kMinMatchedLength = 16; static const size_t kMatchBlockSize_min=4;//sizeof(hpatch_uint32_t); -static const size_t kBestReadSize=1024*512; //for sequence read +static const size_t kBestReadSize=1024*256; //for sequence read static const size_t kMinReadSize=1024*4; //for random first read speed static const size_t kMinBackupReadSize=256; static const size_t kBestMatchRange=1024*64; -static const size_t kMaxLinkIndexFindSize=64; +static const size_t kMaxLinkIndexFindCount=64; +static const size_t kMinParallelSize=1024*64; +static const size_t kBestParallelSize=1024*1024*16; #define readStream(stream,pos,dst,n) { \ @@ -147,7 +149,6 @@ TDigestMatcher::~TDigestMatcher(){ size_t TDigestMatcher::getSearchThreadNum()const{ #if (_IS_USED_MULTITHREAD) - const size_t kMinParallelSize=1024*64; const size_t threadNum=m_threadNum; hpatch_StreamPos_t size=m_newData->streamSize; if ((threadNum>1)&&(size>=kMinParallelSize)&&(size/2>=m_kMatchBlockSize)) { @@ -512,7 +513,7 @@ static bool getBestMatch(const adler_uint_t* blocksBase,size_t blocksSize, if (best==0){ TIndex_comp comp(blocksBase,blocksSize,max_digests_n); size_t findCount=(right-left)*2+1; - if (findCount>kMaxLinkIndexFindSize) findCount=kMaxLinkIndexFindSize; + if (findCount>kMaxLinkIndexFindCount) findCount=kMaxLinkIndexFindCount; for (TIndex inc=1;(inc<=findCount);++inc) { //linkIndex附近找; TIndex fi; TIndex s=(inc>>1); if (inc&1){ @@ -667,34 +668,15 @@ struct mt_data_t{ CHLocker oldDataLocker; CHLocker newDataLocker; CHLocker coversLocker; + hpatch_StreamPos_t rollCount; + hpatch_StreamPos_t workCount; + volatile hpatch_StreamPos_t workIndex; }; -void TDigestMatcher::_search_cover_thread(hpatch_TOutputCovers* out_covers, - size_t threadIndex,size_t threadNum,void* mt_data){ - void* oldDataLocker=0; - void* newDataLocker=0; - void* coversLocker=0; - hpatch_StreamPos_t newOffset=0; - hpatch_TStreamInput newData=*m_newData; -#if (_IS_USED_MULTITHREAD) - if (mt_data){ - mt_data_t& mt=*(mt_data_t*)mt_data; - oldDataLocker=mt.oldDataLocker.locker; - newDataLocker=mt.newDataLocker.locker; - coversLocker =mt.coversLocker.locker; - - hpatch_StreamPos_t rollCount=newData.streamSize-(m_kMatchBlockSize-1); - hpatch_StreamPos_t step=rollCount/threadNum; - newOffset=step*threadIndex; - newData.streamSize=((threadIndex& workIndex=*(std::atomic*)&mt.workIndex; + while (true){ + hpatch_StreamPos_t curWorkIndex=workIndex++; + if (curWorkIndex>=mt.workCount) break; + //printf("%d ",(int)curWorkIndex); + hpatch_TStreamInput newData=*m_newData; + hpatch_StreamPos_t newOffset=mt.rollCount*curWorkIndex/mt.workCount; + newData.streamSize=((curWorkIndex+1_search_cover_thread(out_covers,threadIndex,threadNum,mt_data); } void TDigestMatcher::search_cover(hpatch_TOutputCovers* out_covers){ if (m_blocks.empty()) return; if (m_newData->streamSize1){ - const size_t threadCount=searchThreadNum-1; + size_t threadNum=getSearchThreadNum(); + const hpatch_StreamPos_t rollCount=m_newData->streamSize-(m_kMatchBlockSize-1); + size_t bestStep=(kBestParallelSize/2>m_kMatchBlockSize)?kBestParallelSize:2*m_kMatchBlockSize; + hpatch_StreamPos_t workCount=(rollCount+bestStep-1)/bestStep; + workCount=(threadNum>workCount)?threadNum:workCount; + if (threadNum>1){ mt_data_t mt_data; + mt_data.rollCount=rollCount; + mt_data.workCount=workCount; + mt_data.workIndex=0; + const size_t threadCount=threadNum-1; std::vector threads(threadCount); for (size_t i=0;icollate_covers(out_covers); }else #endif { - _search_cover(this,out_covers,0,1); + _search_cover(m_newData,0,out_covers,m_mem.data()); } - - if (searchThreadNum>1) - out_covers->collate_covers(out_covers); } }//namespace hdiff_private diff --git a/libHDiffPatch/HDiff/private_diff/limit_mem_diff/digest_matcher.h b/libHDiffPatch/HDiff/private_diff/limit_mem_diff/digest_matcher.h index bf670b6e..85040cee 100644 --- a/libHDiffPatch/HDiff/private_diff/limit_mem_diff/digest_matcher.h +++ b/libHDiffPatch/HDiff/private_diff/limit_mem_diff/digest_matcher.h @@ -83,8 +83,12 @@ class TDigestMatcher{ void getDigests(); size_t getSearchThreadNum()const; + void _search_cover(const hpatch_TStreamInput* newData,hpatch_StreamPos_t newOffset, + hpatch_TOutputCovers* out_covers,unsigned char* pmem, + void* oldDataLocker=0,void* newDataLocker=0,void* coversLocker=0); public: //private for muti-thread - void _search_cover_thread(hpatch_TOutputCovers* out_covers,size_t threadIndex,size_t threadNum,void* mt_data); + void _search_cover_thread(hpatch_TOutputCovers* out_covers, + size_t threadIndex,size_t threadNum,void* mt_data); }; }//namespace hdiff_private From 0b07e429f4db132c7b1e52277d32ff52e8664463 Mon Sep 17 00:00:00 2001 From: sisong Date: Thu, 6 Oct 2022 20:15:11 +0800 Subject: [PATCH 09/20] libdivsufsort raname "*.c" to "*.cpp" file --- builds/vc2019/HDiffZ.vcxproj | 4 ++-- .../{divsufsort.c => divsufsort.cpp} | 0 .../{divsufsort64.c => divsufsort64.cpp} | 0 .../limit_mem_diff/digest_matcher.cpp | 7 +++---- .../HDiff/private_diff/qsort_parallel.h | 4 ++-- .../HDiff/private_diff/suffix_string.cpp | 19 +++++++++++-------- libParallel/parallel_channel.h | 2 +- 7 files changed, 19 insertions(+), 17 deletions(-) rename libHDiffPatch/HDiff/private_diff/libdivsufsort/{divsufsort.c => divsufsort.cpp} (100%) rename libHDiffPatch/HDiff/private_diff/libdivsufsort/{divsufsort64.c => divsufsort64.cpp} (100%) diff --git a/builds/vc2019/HDiffZ.vcxproj b/builds/vc2019/HDiffZ.vcxproj index 77f49a81..e54bead6 100644 --- a/builds/vc2019/HDiffZ.vcxproj +++ b/builds/vc2019/HDiffZ.vcxproj @@ -384,8 +384,8 @@ - - + + diff --git a/libHDiffPatch/HDiff/private_diff/libdivsufsort/divsufsort.c b/libHDiffPatch/HDiff/private_diff/libdivsufsort/divsufsort.cpp similarity index 100% rename from libHDiffPatch/HDiff/private_diff/libdivsufsort/divsufsort.c rename to libHDiffPatch/HDiff/private_diff/libdivsufsort/divsufsort.cpp diff --git a/libHDiffPatch/HDiff/private_diff/libdivsufsort/divsufsort64.c b/libHDiffPatch/HDiff/private_diff/libdivsufsort/divsufsort64.cpp similarity index 100% rename from libHDiffPatch/HDiff/private_diff/libdivsufsort/divsufsort64.c rename to libHDiffPatch/HDiff/private_diff/libdivsufsort/divsufsort64.cpp diff --git a/libHDiffPatch/HDiff/private_diff/limit_mem_diff/digest_matcher.cpp b/libHDiffPatch/HDiff/private_diff/limit_mem_diff/digest_matcher.cpp index dfb6019c..94a55276 100644 --- a/libHDiffPatch/HDiff/private_diff/limit_mem_diff/digest_matcher.cpp +++ b/libHDiffPatch/HDiff/private_diff/limit_mem_diff/digest_matcher.cpp @@ -152,8 +152,8 @@ size_t TDigestMatcher::getSearchThreadNum()const{ const size_t threadNum=m_threadNum; hpatch_StreamPos_t size=m_newData->streamSize; if ((threadNum>1)&&(size>=kMinParallelSize)&&(size/2>=m_kMatchBlockSize)) { - const size_t maxThreanNum=size/(kMinParallelSize/2); - return (threadNum<=maxThreanNum)?threadNum:maxThreanNum; + const hpatch_StreamPos_t maxThreanNum=size/(kMinParallelSize/2); + return (threadNum<=maxThreanNum)?threadNum:(size_t)maxThreanNum; }else #endif { @@ -288,7 +288,7 @@ void TDigestMatcher::getDigests(){ size_t kMaxCmpDeep= 1 + upperCount(kMinTrustMatchedLength,m_kMatchBlockSize); TIndex_comp comp(m_blocks.data(),m_blocks.size(),kMaxCmpDeep); if (m_isUseLargeSorted) - __sort_indexs(uint64_t,m_sorted_larger,comp,m_threadNum); + __sort_indexs(std::vector::value_type,m_sorted_larger,comp,m_threadNum); else __sort_indexs(uint32_t,m_sorted_limit,comp,m_threadNum); } @@ -695,7 +695,6 @@ void TDigestMatcher::_search_cover_thread(hpatch_TOutputCovers* out_covers, while (true){ hpatch_StreamPos_t curWorkIndex=workIndex++; if (curWorkIndex>=mt.workCount) break; - //printf("%d ",(int)curWorkIndex); hpatch_TStreamInput newData=*m_newData; hpatch_StreamPos_t newOffset=mt.rollCount*curWorkIndex/mt.workCount; newData.streamSize=((curWorkIndex+1 //sort +#include #include "../../../libParallel/parallel_import.h" #if (_IS_USED_MULTITHREAD) #include //if used vc++, need >= vc2012 #endif #if (_IS_USED_MULTITHREAD) - inline size_t __index_by_ratio(size_t size,size_t ratio,size_t ratio_base){ + inline static size_t __index_by_ratio(size_t size,size_t ratio,size_t ratio_base){ return (size_t)(((hpatch_uint64_t)size)*ratio/ratio_base); } diff --git a/libHDiffPatch/HDiff/private_diff/suffix_string.cpp b/libHDiffPatch/HDiff/private_diff/suffix_string.cpp index d364106e..35c13292 100644 --- a/libHDiffPatch/HDiff/private_diff/suffix_string.cpp +++ b/libHDiffPatch/HDiff/private_diff/suffix_string.cpp @@ -54,6 +54,12 @@ namespace hdiff_private{ +template +static void _clearVector(std::vector& v){ + std::vector _tmp; + v.swap(_tmp); +} + namespace { typedef TSuffixString::TInt TInt; typedef TSuffixString::TInt32 TInt32; @@ -256,24 +262,21 @@ void TSuffixString::clear(){ clear_cache(); m_src_begin=0; m_src_end=0; - std::vector _tmp_m; - m_SA_limit.swap(_tmp_m); - std::vector _tmp_g; - m_SA_large.swap(_tmp_g); + _clearVector(m_SA_limit); + _clearVector(m_SA_large); } + void TSuffixString::resetSuffixString(const TChar* src_begin,const TChar* src_end){ assert(src_begin<=src_end); m_src_begin=src_begin; m_src_end=src_end; if (isUseLargeSA()){ - std::vector _tmp_m; - m_SA_limit.swap(_tmp_m); + _clearVector(m_SA_limit); _suffixString_create(m_src_begin,m_src_end,m_SA_large); }else{ assert(sizeof(TInt32)==4); - std::vector _tmp_g; - m_SA_large.swap(_tmp_g); + _clearVector(m_SA_large); _suffixString_create(m_src_begin,m_src_end,m_SA_limit); } build_cache(); diff --git a/libParallel/parallel_channel.h b/libParallel/parallel_channel.h index 9eee094a..17ad4b02 100644 --- a/libParallel/parallel_channel.h +++ b/libParallel/parallel_channel.h @@ -71,7 +71,7 @@ struct CHLocker{ }; -#if (_MSC_VER<1700) && (defined(WIN32)) //vc2012 support atomic +#if (defined(_MSC_VER)) && (_MSC_VER<1700) && (defined(WIN32)) //vc2012 support atomic # define _NEED_MSVC_WIN32_atomic_func 1 void atomic32_or(uint32_t* p,uint32_t or_v); #else From d5120d95de569c1b83b2c4e45a7f8105d558c855 Mon Sep 17 00:00:00 2001 From: sisong Date: Thu, 6 Oct 2022 20:24:13 +0800 Subject: [PATCH 10/20] libdivsufsort omp_get_max_threads() changed to threadNum; --- .../private_diff/libdivsufsort/divsufsort.c.inc.h | 12 ++++++------ .../HDiff/private_diff/libdivsufsort/divsufsort.h | 6 +++--- .../HDiff/private_diff/libdivsufsort/divsufsort64.h | 4 ++-- .../HDiff/private_diff/libdivsufsort/utils.c.inc.h | 4 ++-- 4 files changed, 13 insertions(+), 13 deletions(-) diff --git a/libHDiffPatch/HDiff/private_diff/libdivsufsort/divsufsort.c.inc.h b/libHDiffPatch/HDiff/private_diff/libdivsufsort/divsufsort.c.inc.h index 595063c2..909db273 100644 --- a/libHDiffPatch/HDiff/private_diff/libdivsufsort/divsufsort.c.inc.h +++ b/libHDiffPatch/HDiff/private_diff/libdivsufsort/divsufsort.c.inc.h @@ -37,7 +37,7 @@ static saidx_t sort_typeBstar(const sauchar_t *T, saidx_t *SA, saidx_t *bucket_A, saidx_t *bucket_B, - saidx_t n) { + saidx_t n,size_t threadNum) { saidx_t *PAb, *ISAb, *buf; #ifdef _OPENMP saidx_t *curbuf; @@ -101,7 +101,7 @@ sort_typeBstar(const sauchar_t *T, saidx_t *SA, /* Sort the type B* substrings using sssort. */ #ifdef _OPENMP - tmp = omp_get_max_threads(); + tmp = (int)threadNum; buf = SA + m, bufsize = (n - (2 * m)) / tmp; c0 = ALPHABET_SIZE - 2, c1 = ALPHABET_SIZE - 1, j = m; #pragma omp parallel default(shared) private(curbuf, k, l, d0, d1, tmp) @@ -329,7 +329,7 @@ construct_BWT(const sauchar_t *T, saidx_t *SA, /*- Function -*/ saint_t -divsufsort(const sauchar_t *T, saidx_t *SA, saidx_t n) { +divsufsort(const sauchar_t *T, saidx_t *SA, saidx_t n,size_t threadNum) { saidx_t *bucket_A, *bucket_B; saidx_t m; saint_t err = 0; @@ -345,7 +345,7 @@ divsufsort(const sauchar_t *T, saidx_t *SA, saidx_t n) { /* Suffixsort. */ if((bucket_A != NULL) && (bucket_B != NULL)) { - m = sort_typeBstar(T, SA, bucket_A, bucket_B, n); + m = sort_typeBstar(T, SA, bucket_A, bucket_B, n, threadNum); construct_SA(T, SA, bucket_A, bucket_B, n, m); } else { err = -2; @@ -358,7 +358,7 @@ divsufsort(const sauchar_t *T, saidx_t *SA, saidx_t n) { } saidx_t -divbwt(const sauchar_t *T, sauchar_t *U, saidx_t *A, saidx_t n) { +divbwt(const sauchar_t *T, sauchar_t *U, saidx_t *A, saidx_t n,size_t threadNum) { saidx_t *B; saidx_t *bucket_A, *bucket_B; saidx_t m, pidx, i; @@ -373,7 +373,7 @@ divbwt(const sauchar_t *T, sauchar_t *U, saidx_t *A, saidx_t n) { /* Burrows-Wheeler Transform. */ if((B != NULL) && (bucket_A != NULL) && (bucket_B != NULL)) { - m = sort_typeBstar(T, B, bucket_A, bucket_B, n); + m = sort_typeBstar(T, B, bucket_A, bucket_B, n, threadNum); pidx = construct_BWT(T, B, bucket_A, bucket_B, n, m); /* Copy to output string. */ diff --git a/libHDiffPatch/HDiff/private_diff/libdivsufsort/divsufsort.h b/libHDiffPatch/HDiff/private_diff/libdivsufsort/divsufsort.h index 79f40903..77f717cd 100644 --- a/libHDiffPatch/HDiff/private_diff/libdivsufsort/divsufsort.h +++ b/libHDiffPatch/HDiff/private_diff/libdivsufsort/divsufsort.h @@ -87,7 +87,7 @@ typedef int32_t saidx_t; */ DIVSUFSORT_API saint_t -divsufsort(const sauchar_t *T, saidx_t *SA, saidx_t n); +divsufsort(const sauchar_t *T, saidx_t *SA, saidx_t n,size_t threadNum); /** * Constructs the burrows-wheeler transformed string of a given string. @@ -99,7 +99,7 @@ divsufsort(const sauchar_t *T, saidx_t *SA, saidx_t n); */ DIVSUFSORT_API saidx_t -divbwt(const sauchar_t *T, sauchar_t *U, saidx_t *A, saidx_t n); +divbwt(const sauchar_t *T, sauchar_t *U, saidx_t *A, saidx_t n,size_t threadNum); /** * Returns the version of the divsufsort library. @@ -123,7 +123,7 @@ DIVSUFSORT_API saint_t bw_transform(const sauchar_t *T, sauchar_t *U, saidx_t *SA /* can NULL */, - saidx_t n, saidx_t *idx); + saidx_t n, saidx_t *idx,size_t threadNum); /** * Inverse BW-transforms a given BWTed string. diff --git a/libHDiffPatch/HDiff/private_diff/libdivsufsort/divsufsort64.h b/libHDiffPatch/HDiff/private_diff/libdivsufsort/divsufsort64.h index 79b3965b..d7613196 100644 --- a/libHDiffPatch/HDiff/private_diff/libdivsufsort/divsufsort64.h +++ b/libHDiffPatch/HDiff/private_diff/libdivsufsort/divsufsort64.h @@ -97,7 +97,7 @@ typedef llong_t saidx64_t; */ DIVSUFSORT_API saint_t -divsufsort64(const sauchar_t *T, saidx64_t *SA, saidx64_t n); +divsufsort64(const sauchar_t *T, saidx64_t *SA, saidx64_t n,size_t threadNum); /** * Constructs the burrows-wheeler transformed string of a given string. @@ -133,7 +133,7 @@ DIVSUFSORT_API saint_t bw_transform64(const sauchar_t *T, sauchar_t *U, saidx64_t *SA /* can NULL */, - saidx64_t n, saidx64_t *idx); + saidx64_t n, saidx64_t *idx,size_t threadNum); /** * Inverse BW-transforms a given BWTed string. diff --git a/libHDiffPatch/HDiff/private_diff/libdivsufsort/utils.c.inc.h b/libHDiffPatch/HDiff/private_diff/libdivsufsort/utils.c.inc.h index 90fb23ef..c5fa8433 100644 --- a/libHDiffPatch/HDiff/private_diff/libdivsufsort/utils.c.inc.h +++ b/libHDiffPatch/HDiff/private_diff/libdivsufsort/utils.c.inc.h @@ -51,7 +51,7 @@ binarysearch_lower(const saidx_t *A, saidx_t size, saidx_t value) { /* Burrows-Wheeler transform. */ saint_t bw_transform(const sauchar_t *T, sauchar_t *U, saidx_t *SA, - saidx_t n, saidx_t *idx) { + saidx_t n, saidx_t *idx,size_t threadNum) { saidx_t *A, i, j, p, t; saint_t c; @@ -64,7 +64,7 @@ bw_transform(const sauchar_t *T, sauchar_t *U, saidx_t *SA, } if((A = SA) == NULL) { - i = divbwt(T, U, NULL, n); + i = divbwt(T, U, NULL, n, threadNum); if(0 <= i) { *idx = i; i = 0; } return (saint_t)i; } From 4f91aaed815221e7d6c2c8aac07d9a90040a547a Mon Sep 17 00:00:00 2001 From: sisong Date: Fri, 7 Oct 2022 10:03:33 +0800 Subject: [PATCH 11/20] libdivsufsort openmp changed to std::thread; --- .../libdivsufsort/divsufsort.c.inc.h | 112 ++++++++++-------- 1 file changed, 60 insertions(+), 52 deletions(-) diff --git a/libHDiffPatch/HDiff/private_diff/libdivsufsort/divsufsort.c.inc.h b/libHDiffPatch/HDiff/private_diff/libdivsufsort/divsufsort.c.inc.h index 909db273..472afec7 100644 --- a/libHDiffPatch/HDiff/private_diff/libdivsufsort/divsufsort.c.inc.h +++ b/libHDiffPatch/HDiff/private_diff/libdivsufsort/divsufsort.c.inc.h @@ -25,30 +25,48 @@ */ #include "divsufsort_private.h" -#ifdef _OPENMP -# include -#endif - +#include "../../../../libParallel/parallel_channel.h" +#include +#include /*- Private Functions -*/ +static void _sssort_thread(HLocker locker,saint_t* c0,saint_t* c1,saidx_t* j, + saidx_t *bucket_B,const sauchar_t *T, const saidx_t *PAb, + saidx_t *SA,saidx_t *buf, saidx_t bufsize,saidx_t n,saidx_t m){ + saidx_t k = 0; + saidx_t l; + for(;;) { + { + CAutoLocker __autoLocker(locker); + if(0 < (l = *j)) { + saint_t d0 = *c0, d1 = *c1; + do { + k = BUCKET_BSTAR(d0, d1); + if(--d1 <= d0) { + d1 = ALPHABET_SIZE - 1; + if(--d0 < 0) { break; } + } + } while(((l - k) <= 1) && (0 < (l = k))); + *c0 = d0, *c1 = d1, *j = k; + } + } + if(l == 0) { break; } + sssort(T, PAb, SA + k, SA + l, + buf, bufsize, 2, n, *(SA + k) == (m - 1)); + } +} + + /* Sorts suffixes of type B*. */ static saidx_t sort_typeBstar(const sauchar_t *T, saidx_t *SA, saidx_t *bucket_A, saidx_t *bucket_B, saidx_t n,size_t threadNum) { - saidx_t *PAb, *ISAb, *buf; -#ifdef _OPENMP - saidx_t *curbuf; - saidx_t l; -#endif - saidx_t i, j, k, t, m, bufsize; + saidx_t *PAb, *ISAb; + saidx_t i, j, k, t, m; saint_t c0, c1; -#ifdef _OPENMP - saint_t d0, d1; - int tmp; -#endif /* Initialize bucket arrays. */ for(i = 0; i < BUCKET_A_SIZE; ++i) { bucket_A[i] = 0; } @@ -100,47 +118,37 @@ sort_typeBstar(const sauchar_t *T, saidx_t *SA, SA[--BUCKET_BSTAR(c0, c1)] = m - 1; /* Sort the type B* substrings using sssort. */ -#ifdef _OPENMP - tmp = (int)threadNum; - buf = SA + m, bufsize = (n - (2 * m)) / tmp; - c0 = ALPHABET_SIZE - 2, c1 = ALPHABET_SIZE - 1, j = m; -#pragma omp parallel default(shared) private(curbuf, k, l, d0, d1, tmp) - { - tmp = omp_get_thread_num(); - curbuf = buf + tmp * bufsize; - k = 0; - for(;;) { - #pragma omp critical(sssort_lock) - { - if(0 < (l = j)) { - d0 = c0, d1 = c1; - do { - k = BUCKET_BSTAR(d0, d1); - if(--d1 <= d0) { - d1 = ALPHABET_SIZE - 1; - if(--d0 < 0) { break; } - } - } while(((l - k) <= 1) && (0 < (l = k))); - c0 = d0, c1 = d1, j = k; - } +#if (_IS_USED_MULTITHREAD) + if (threadNum>1){ + CHLocker locker; + const saidx_t bufsize = (n - (2 * m)) / (saidx_t)threadNum; + const size_t threadCount=threadNum-1; + c0 = ALPHABET_SIZE - 2, c1 = ALPHABET_SIZE - 1, j = m; + std::vector threads(threadCount); + saidx_t* buf = SA + m; + for (size_t ti=0;ti Date: Fri, 7 Oct 2022 12:21:29 +0800 Subject: [PATCH 12/20] cmdline -m support -p-threadNum --- bsdiff_wrapper/bsdiff_wrapper.cpp | 21 +++++---- bsdiff_wrapper/bsdiff_wrapper.h | 6 ++- dirDiffPatch/dir_diff/dir_diff.cpp | 17 ++----- dirDiffPatch/dir_patch/dir_patch_types.h | 3 -- hdiffz.cpp | 23 +++++----- libHDiffPatch/HDiff/diff.cpp | 45 ++++++++++--------- libHDiffPatch/HDiff/diff.h | 16 ++++--- libHDiffPatch/HDiff/match_block.cpp | 16 +++---- .../HDiff/private_diff/suffix_string.cpp | 17 +++---- .../HDiff/private_diff/suffix_string.h | 4 +- test/_private_searchBestParams.cpp | 2 +- 11 files changed, 84 insertions(+), 86 deletions(-) diff --git a/bsdiff_wrapper/bsdiff_wrapper.cpp b/bsdiff_wrapper/bsdiff_wrapper.cpp index 03e63e47..0fc91016 100644 --- a/bsdiff_wrapper/bsdiff_wrapper.cpp +++ b/bsdiff_wrapper/bsdiff_wrapper.cpp @@ -152,10 +152,11 @@ static void serialize_bsdiff(const unsigned char* newData,const unsigned char* n void _create_bsdiff(const unsigned char* newData,const unsigned char* cur_newData_end,const unsigned char* newData_end, const unsigned char* oldData,const unsigned char* cur_oldData_end,const unsigned char* oldData_end, const hpatch_TStreamOutput* out_diff,const hdiff_TCompress* compressPlugin, - int kMinSingleMatchScore,bool isUseBigCacheMatch,ICoverLinesListener* coverLinesListener){ + int kMinSingleMatchScore,bool isUseBigCacheMatch, + ICoverLinesListener* coverLinesListener,size_t threadNum){ std::vector covers; get_match_covers_by_sstring(newData,cur_newData_end,oldData,cur_oldData_end,covers, - kMinSingleMatchScore,isUseBigCacheMatch,coverLinesListener); + kMinSingleMatchScore,isUseBigCacheMatch,coverLinesListener,threadNum); if (covers.empty()||(covers[0].newPos!=0)||(covers[0].oldPos!=0)){//begin cover hpatch_TCover_sz lc; lc.newPos=0; @@ -186,13 +187,16 @@ using namespace hdiff_private; void create_bsdiff(const unsigned char* newData,const unsigned char* newData_end, const unsigned char* oldData,const unsigned char* oldData_end, const hpatch_TStreamOutput* out_diff,const hdiff_TCompress* compressPlugin, - int kMinSingleMatchScore,bool isUseBigCacheMatch,ICoverLinesListener* coverLinesListener){ + int kMinSingleMatchScore,bool isUseBigCacheMatch, + ICoverLinesListener* coverLinesListener,size_t threadNum){ _create_bsdiff(newData,newData_end,newData_end,oldData,oldData_end,oldData_end, - out_diff,compressPlugin,kMinSingleMatchScore,isUseBigCacheMatch,coverLinesListener); + out_diff,compressPlugin,kMinSingleMatchScore,isUseBigCacheMatch, + coverLinesListener,threadNum); } void create_bsdiff(const hpatch_TStreamInput* newData,const hpatch_TStreamInput* oldData, const hpatch_TStreamOutput* out_diff,const hdiff_TCompress* compressPlugin, - int kMinSingleMatchScore,bool isUseBigCacheMatch,ICoverLinesListener* coverLinesListener){ + int kMinSingleMatchScore,bool isUseBigCacheMatch, + ICoverLinesListener* coverLinesListener,size_t threadNum){ TAutoMem oldAndNewData; loadOldAndNewStream(oldAndNewData,oldData,newData); size_t old_size=oldData?(size_t)oldData->streamSize:0; @@ -200,7 +204,8 @@ void create_bsdiff(const hpatch_TStreamInput* newData,const hpatch_TStreamInput* unsigned char* pNewData=pOldData+old_size; unsigned char* pNewDataEnd=pNewData+(size_t)newData->streamSize; _create_bsdiff(pNewData,pNewDataEnd,pNewDataEnd,pOldData,pOldData+old_size,pOldData+old_size, - out_diff,compressPlugin,kMinSingleMatchScore,isUseBigCacheMatch,coverLinesListener); + out_diff,compressPlugin,kMinSingleMatchScore,isUseBigCacheMatch, + coverLinesListener,threadNum); } void create_bsdiff_block(unsigned char* newData,unsigned char* newData_end, @@ -210,13 +215,13 @@ void create_bsdiff_block(unsigned char* newData,unsigned char* newData_end, size_t matchBlockSize,size_t threadNum){ if (matchBlockSize==0){ _create_bsdiff(newData,newData_end,newData_end,oldData,oldData_end,oldData_end, - out_diff,compressPlugin,kMinSingleMatchScore,isUseBigCacheMatch,0); + out_diff,compressPlugin,kMinSingleMatchScore,isUseBigCacheMatch,0,threadNum); return; } TCoversOptimMB coversOp(newData,newData_end,oldData,oldData_end,matchBlockSize,threadNum); _create_bsdiff(newData,coversOp.matchBlock->newData_end_cur,newData_end, oldData,coversOp.matchBlock->oldData_end_cur,oldData_end, - out_diff,compressPlugin,kMinSingleMatchScore,isUseBigCacheMatch,&coversOp); + out_diff,compressPlugin,kMinSingleMatchScore,isUseBigCacheMatch,&coversOp,threadNum); } void create_bsdiff_block(const hpatch_TStreamInput* newData,const hpatch_TStreamInput* oldData, const hpatch_TStreamOutput* out_diff,const hdiff_TCompress* compressPlugin, diff --git a/bsdiff_wrapper/bsdiff_wrapper.h b/bsdiff_wrapper/bsdiff_wrapper.h index 33eaf930..6739dcd3 100644 --- a/bsdiff_wrapper/bsdiff_wrapper.h +++ b/bsdiff_wrapper/bsdiff_wrapper.h @@ -33,11 +33,13 @@ void create_bsdiff(const unsigned char* newData,const unsigned char* newData_end const unsigned char* oldData,const unsigned char* oldData_end, const hpatch_TStreamOutput* out_diff,const hdiff_TCompress* compressPlugin, int kMinSingleMatchScore=kMinSingleMatchScore_default, - bool isUseBigCacheMatch=false,ICoverLinesListener* coverLinesListener=0); + bool isUseBigCacheMatch=false,ICoverLinesListener* coverLinesListener=0, + size_t threadNum=1); void create_bsdiff(const hpatch_TStreamInput* newData,const hpatch_TStreamInput* oldData, const hpatch_TStreamOutput* out_diff,const hdiff_TCompress* compressPlugin, int kMinSingleMatchScore=kMinSingleMatchScore_default, - bool isUseBigCacheMatch=false,ICoverLinesListener* coverLinesListener=0); + bool isUseBigCacheMatch=false,ICoverLinesListener* coverLinesListener=0, + size_t threadNum=1); bool get_is_bsdiff(const unsigned char* diffData,const unsigned char* diffData_end); bool get_is_bsdiff(const hpatch_TStreamInput* diffData); diff --git a/dirDiffPatch/dir_diff/dir_diff.cpp b/dirDiffPatch/dir_diff/dir_diff.cpp index 106a2c2a..07a6924a 100644 --- a/dirDiffPatch/dir_diff/dir_diff.cpp +++ b/dirDiffPatch/dir_diff/dir_diff.cpp @@ -463,17 +463,11 @@ void dir_diff(IDirDiffListener* listener,const TManifest& oldManifest, resLimit.close(); //close files if (hdiffSets.isSingleCompressedDiff){ TOffsetStreamOutput ofStream(outDiffStream,writeToPos); - if (hdiffSets.isUseFastMatchBlock) create_single_compressed_diff_block(newData,newData+newRefStream.stream->streamSize, oldData,oldData+oldRefStream.stream->streamSize, &ofStream,compressPlugin,(int)hdiffSets.matchScore, hdiffSets.patchStepMemSize,hdiffSets.isUseBigCacheMatch, - hdiffSets.threadNum); - else - create_single_compressed_diff(newData,newData+newRefStream.stream->streamSize, - oldData,oldData+oldRefStream.stream->streamSize, - &ofStream,compressPlugin,(int)hdiffSets.matchScore, - hdiffSets.patchStepMemSize,hdiffSets.isUseBigCacheMatch); + hdiffSets.matchBlockSize,hdiffSets.threadNum); diffDataSize=ofStream.outSize; if (checksumByteSize>0){ assert(outDiffStream->read_writed!=0); @@ -482,16 +476,11 @@ void dir_diff(IDirDiffListener* listener,const TManifest& oldManifest, } }else{ std::vector out_diff; - if (hdiffSets.isUseFastMatchBlock) create_compressed_diff_block(newData,newData+newRefStream.stream->streamSize, oldData,oldData+oldRefStream.stream->streamSize, out_diff,compressPlugin,(int)hdiffSets.matchScore, - hdiffSets.isUseBigCacheMatch,hdiffSets.threadNum); - else - create_compressed_diff(newData,newData+newRefStream.stream->streamSize, - oldData,oldData+oldRefStream.stream->streamSize, - out_diff,compressPlugin,(int)hdiffSets.matchScore, - hdiffSets.isUseBigCacheMatch); + hdiffSets.isUseBigCacheMatch, + hdiffSets.matchBlockSize,hdiffSets.threadNum); diffDataSize=out_diff.size(); _pushv(out_diff); } diff --git a/dirDiffPatch/dir_patch/dir_patch_types.h b/dirDiffPatch/dir_patch/dir_patch_types.h index 090a4fd9..1599fad9 100644 --- a/dirDiffPatch/dir_patch/dir_patch_types.h +++ b/dirDiffPatch/dir_patch/dir_patch_types.h @@ -38,10 +38,7 @@ struct THDiffSets{ hpatch_BOOL isSingleCompressedDiff; //diff in mem hpatch_BOOL isUseBigCacheMatch; - hpatch_BOOL isUseFastMatchBlock; size_t matchScore; - size_t fastMatchBlockSize; - //diff by stream size_t patchStepMemSize; size_t matchBlockSize; size_t threadNum; diff --git a/hdiffz.cpp b/hdiffz.cpp index 33cc521d..dcd2c7c5 100644 --- a/hdiffz.cpp +++ b/hdiffz.cpp @@ -712,7 +712,7 @@ int hdiff_cmd_line(int argc, const char * argv[]){ diffSets.isDiffInMem =_kNULL_VALUE; diffSets.isSingleCompressedDiff =_kNULL_VALUE; diffSets.isUseBigCacheMatch =_kNULL_VALUE; - diffSets.isUseFastMatchBlock=_kNULL_VALUE; + diffSets.matchBlockSize=0; diffSets.threadNum=_THREAD_NUMBER_NULL; hpatch_BOOL isForceOverwrite=_kNULL_VALUE; hpatch_BOOL isOutputHelp=_kNULL_VALUE; @@ -766,6 +766,8 @@ int hdiff_cmd_line(int argc, const char * argv[]){ } } break; case 's':{ + _options_check((diffSets.isDiffInMem==_kNULL_VALUE),"-s"); + _options_check((diffSets.matchBlockSize==0),"-block must run with -m"); diffSets.isDiffInMem=hpatch_FALSE; //diff by stream if (op[2]=='-'){ const char* pnum=op+3; @@ -826,16 +828,15 @@ int hdiff_cmd_line(int argc, const char * argv[]){ } break; #endif case 'b':{ - _options_check((diffSets.isUseFastMatchBlock==_kNULL_VALUE)&& + _options_check((diffSets.matchBlockSize==0)&& (op[2]=='l')&&(op[3]=='o')&&(op[4]=='c')&&(op[5]=='k')&& ((op[6]=='\0')||(op[6]=='-')),"-block?"); - diffSets.isUseFastMatchBlock=hpatch_TRUE; //use block match if (op[6]=='-'){ const char* pnum=op+7; - _options_check(kmg_to_size(pnum,strlen(pnum),&diffSets.fastMatchBlockSize),"-block-?"); - _options_check(kMatchBlockSize_min<=diffSets.fastMatchBlockSize,"-block-?"); + _options_check(kmg_to_size(pnum,strlen(pnum),&diffSets.matchBlockSize),"-block-?"); + _options_check(kMatchBlockSize_min<=diffSets.matchBlockSize,"-block-?"); }else{ - diffSets.fastMatchBlockSize=kDefaultFastMatchBlockSize; + diffSets.matchBlockSize=kDefaultFastMatchBlockSize; } } break; case 'c':{ @@ -970,13 +971,10 @@ int hdiff_cmd_line(int argc, const char * argv[]){ if (diffSets.isDoPatchCheck==_kNULL_VALUE) diffSets.isDoPatchCheck=hpatch_TRUE; assert(diffSets.isDoDiff||diffSets.isDoPatchCheck); - if (diffSets.isUseFastMatchBlock==_kNULL_VALUE) - diffSets.isUseFastMatchBlock=hpatch_FALSE; if (diffSets.isUseBigCacheMatch==_kNULL_VALUE) diffSets.isUseBigCacheMatch=hpatch_FALSE; if (diffSets.isDoDiff&&(!diffSets.isDiffInMem)){ _options_check(!diffSets.isUseBigCacheMatch, "-cache must run with -m"); - _options_check(!diffSets.isUseFastMatchBlock,"-block must run with -m"); } #if (_IS_NEED_DIR_DIFF_PATCH) @@ -1024,7 +1022,6 @@ int hdiff_cmd_line(int argc, const char * argv[]){ diffSets.isDiffInMem=hpatch_FALSE; //not need -m, set as -s diffSets.matchBlockSize=hpatch_kStreamCacheSize; //not used diffSets.isUseBigCacheMatch=hpatch_FALSE; - diffSets.isUseFastMatchBlock=hpatch_FALSE; } }else{ _return_check(hpatch_getPathStat(oldPath,&oldType,0),HDIFF_PATHTYPE_ERROR,"get oldPath type"); @@ -1221,18 +1218,18 @@ static int hdiff_in_mem(const char* oldFileName,const char* newFileName,const ch if (diffSets.isBsDiff){ create_bsdiff_block(newMem.data(),newMem.data_end(),oldMem.data(),oldMem.data_end(),&diffData_out.base, compressPlugin,(int)diffSets.matchScore,diffSets.isUseBigCacheMatch, - diffSets.isUseFastMatchBlock?diffSets.fastMatchBlockSize:0,diffSets.threadNum); + diffSets.matchBlockSize,diffSets.threadNum); }else #endif if (diffSets.isSingleCompressedDiff){ create_single_compressed_diff_block(newMem.data(),newMem.data_end(),oldMem.data(),oldMem.data_end(), &diffData_out.base,compressPlugin,(int)diffSets.matchScore, diffSets.patchStepMemSize,diffSets.isUseBigCacheMatch, - diffSets.isUseFastMatchBlock?diffSets.fastMatchBlockSize:0,diffSets.threadNum); + diffSets.matchBlockSize,diffSets.threadNum); }else{ create_compressed_diff_block(newMem.data(),newMem.data_end(),oldMem.data(),oldMem.data_end(), &diffData_out.base,compressPlugin,(int)diffSets.matchScore, - diffSets.isUseBigCacheMatch,diffSets.isUseFastMatchBlock?diffSets.fastMatchBlockSize:0,diffSets.threadNum); + diffSets.isUseBigCacheMatch,diffSets.matchBlockSize,diffSets.threadNum); } diffData_out.base.streamSize=diffData_out.out_length; }catch(const std::exception& e){ diff --git a/libHDiffPatch/HDiff/diff.cpp b/libHDiffPatch/HDiff/diff.cpp index 1395d299..2bc8218c 100644 --- a/libHDiffPatch/HDiff/diff.cpp +++ b/libHDiffPatch/HDiff/diff.cpp @@ -778,8 +778,8 @@ struct TDiffInsertCover:public IDiffInsertCover{ static void get_diff(const TByte* newData,const TByte* newData_end, const TByte* oldData,const TByte* oldData_end, TDiffData& out_diff,int kMinSingleMatchScore, - bool isUseBigCacheMatch,ICoverLinesListener* listener=0, - const TSuffixString* sstring=0){ + bool isUseBigCacheMatch,ICoverLinesListener* listener, + const TSuffixString* sstring,size_t threadNum){ assert(newData<=newData_end); assert(oldData<=oldData_end); TDiffData& diff=out_diff; @@ -794,7 +794,7 @@ static void get_diff(const TByte* newData,const TByte* newData_end, { TSuffixString _sstring_default(isUseBigCacheMatch); if (sstring==0){ - _sstring_default.resetSuffixString(oldData,oldData_end); + _sstring_default.resetSuffixString(oldData,oldData_end,threadNum); sstring=&_sstring_default; } search_cover(diff.covers,diff,*sstring); @@ -841,30 +841,32 @@ static void get_diff(const TByte* newData,const TByte* newData_end, void create_diff(const TByte* newData,const TByte* newData_end, const TByte* oldData,const TByte* oldData_end, std::vector& out_diff, - int kMinSingleMatchScore,bool isUseBigCacheMatch){ + int kMinSingleMatchScore,bool isUseBigCacheMatch,size_t threadNum){ TDiffData diff; get_diff(newData,newData_end,oldData,oldData_end,diff, - kMinSingleMatchScore,isUseBigCacheMatch); + kMinSingleMatchScore,isUseBigCacheMatch,0,0,threadNum); serialize_diff(diff,out_diff); } void create_compressed_diff(const TByte* newData,const TByte* newData_end, const TByte* oldData,const TByte* oldData_end, std::vector& out_diff,const hdiff_TCompress* compressPlugin, - int kMinSingleMatchScore,bool isUseBigCacheMatch,ICoverLinesListener* listener){ + int kMinSingleMatchScore,bool isUseBigCacheMatch, + ICoverLinesListener* listener,size_t threadNum){ TDiffData diff; get_diff(newData,newData_end,oldData,oldData_end,diff, - kMinSingleMatchScore,isUseBigCacheMatch,listener); + kMinSingleMatchScore,isUseBigCacheMatch,listener,0,threadNum); serialize_compressed_diff(diff,out_diff,compressPlugin); } void create_compressed_diff(const TByte* newData,const TByte* newData_end, const TByte* oldData,const TByte* oldData_end, const hpatch_TStreamOutput* out_diff,const hdiff_TCompress* compressPlugin, - int kMinSingleMatchScore,bool isUseBigCacheMatch,ICoverLinesListener* listener){ + int kMinSingleMatchScore,bool isUseBigCacheMatch, + ICoverLinesListener* listener,size_t threadNum){ std::vector _out_diff; create_compressed_diff(newData,newData_end,oldData,oldData_end,_out_diff, - compressPlugin,kMinSingleMatchScore,isUseBigCacheMatch,listener); + compressPlugin,kMinSingleMatchScore,isUseBigCacheMatch,listener,threadNum); checki(out_diff->write(out_diff,0,_out_diff.data(),_out_diff.data()+_out_diff.size()),"create_compressed_diff() out_diff->write"); } @@ -894,11 +896,11 @@ void create_single_compressed_diff(const TByte* newData,const TByte* newData_end std::vector& out_diff, const hdiff_TCompress* compressPlugin,int kMinSingleMatchScore, size_t patchStepMemSize,bool isUseBigCacheMatch, - ICoverLinesListener* listener){ + ICoverLinesListener* listener,size_t threadNum){ TVectorAsStreamOutput outDiffStream(out_diff); create_single_compressed_diff(newData,newData_end,oldData,oldData_end,&outDiffStream, compressPlugin,kMinSingleMatchScore,patchStepMemSize, - isUseBigCacheMatch,listener); + isUseBigCacheMatch,listener,threadNum); } void create_single_compressed_diff(const TByte* newData,const TByte* newData_end, @@ -906,10 +908,10 @@ void create_single_compressed_diff(const TByte* newData,const TByte* newData_end const hpatch_TStreamOutput* out_diff, const hdiff_TCompress* compressPlugin,int kMinSingleMatchScore, size_t patchStepMemSize,bool isUseBigCacheMatch, - ICoverLinesListener* listener){ + ICoverLinesListener* listener,size_t threadNum){ TDiffData diff; get_diff(newData,newData_end,oldData,oldData_end,diff, - kMinSingleMatchScore,isUseBigCacheMatch,listener); + kMinSingleMatchScore,isUseBigCacheMatch,listener,0,threadNum); hpatch_TStreamInput _newStream; hpatch_TStreamInput _oldStream; @@ -1048,7 +1050,7 @@ void __hdiff_private__create_compressed_diff(const TByte* newData,const TByte* n const TSuffixString* sstring){ TDiffData diff; get_diff(newData,newData_end,oldData,oldData_end,diff, - kMinSingleMatchScore,false,0,sstring); + kMinSingleMatchScore,false,0,sstring,1); serialize_compressed_diff(diff,out_diff,compressPlugin); } @@ -1075,20 +1077,22 @@ void get_match_covers_by_block(const unsigned char* newData,const unsigned char* void get_match_covers_by_sstring(const unsigned char* newData,const unsigned char* newData_end, const unsigned char* oldData,const unsigned char* oldData_end, std::vector& out_covers,int kMinSingleMatchScore, - bool isUseBigCacheMatch,ICoverLinesListener* listener){ + bool isUseBigCacheMatch,ICoverLinesListener* listener, + size_t threadNum){ TDiffData diff; get_diff(newData,newData_end,oldData,oldData_end,diff, - kMinSingleMatchScore,isUseBigCacheMatch,listener); + kMinSingleMatchScore,isUseBigCacheMatch,listener,0,threadNum); void* pcovers=&diff.covers; out_covers.swap(*(std::vector*)pcovers); } void get_match_covers_by_sstring(const unsigned char* newData,const unsigned char* newData_end, const unsigned char* oldData,const unsigned char* oldData_end, hpatch_TOutputCovers* out_covers,int kMinSingleMatchScore, - bool isUseBigCacheMatch,ICoverLinesListener* listener){ + bool isUseBigCacheMatch,ICoverLinesListener* listener, + size_t threadNum){ std::vector covers; get_match_covers_by_sstring(newData,newData_end,oldData,oldData_end,covers, - kMinSingleMatchScore,isUseBigCacheMatch,listener); + kMinSingleMatchScore,isUseBigCacheMatch,listener,threadNum); const hpatch_TCover_sz* pcovers=covers.data(); for (size_t i=0;i& out_di void create_lite_diff(const unsigned char* newData,const unsigned char* newData_end, const unsigned char* oldData,const unsigned char* oldData_end, std::vector& out_lite_diff,const hdiffi_TCompress* compressPlugin, - int kMinSingleMatchScore,bool isUseBigCacheMatch){ + int kMinSingleMatchScore,bool isUseBigCacheMatch,size_t threadNum){ static const int _kMatchScore_optim4bin=6; TDiffData diff; - get_diff(newData,newData_end,oldData,oldData_end,diff,kMinSingleMatchScore-_kMatchScore_optim4bin,isUseBigCacheMatch); + get_diff(newData,newData_end,oldData,oldData_end,diff,kMinSingleMatchScore-_kMatchScore_optim4bin, + isUseBigCacheMatch,0,0,threadNum); hpatch_StreamPos_t oldPosEnd=0; hpatch_StreamPos_t newPosEnd=0; if (!diff.covers.empty()){ diff --git a/libHDiffPatch/HDiff/diff.h b/libHDiffPatch/HDiff/diff.h index 00a33b79..93b4d935 100644 --- a/libHDiffPatch/HDiff/diff.h +++ b/libHDiffPatch/HDiff/diff.h @@ -43,7 +43,7 @@ void create_diff(const unsigned char* newData,const unsigned char* newData_end, const unsigned char* oldData,const unsigned char* oldData_end, std::vector& out_diff, int kMinSingleMatchScore=kMinSingleMatchScore_default, - bool isUseBigCacheMatch=false); + bool isUseBigCacheMatch=false,size_t threadNum=1); //return patch(oldData+diff)==newData? bool check_diff(const unsigned char* newData,const unsigned char* newData_end, @@ -67,14 +67,14 @@ void create_compressed_diff(const unsigned char* newData,const unsigned char* ne const hdiff_TCompress* compressPlugin=0, int kMinSingleMatchScore=kMinSingleMatchScore_default, bool isUseBigCacheMatch=false, - ICoverLinesListener* listener=0); + ICoverLinesListener* listener=0,size_t threadNum=1); void create_compressed_diff(const unsigned char* newData,const unsigned char* newData_end, const unsigned char* oldData,const unsigned char* oldData_end, const hpatch_TStreamOutput* out_diff, const hdiff_TCompress* compressPlugin=0, int kMinSingleMatchScore=kMinSingleMatchScore_default, bool isUseBigCacheMatch=false, - ICoverLinesListener* listener=0); + ICoverLinesListener* listener=0,size_t threadNum=1); //create a compressed diff data by stream: // can control memory requires and run speed by different kMatchBlockSize value, @@ -129,14 +129,14 @@ void create_single_compressed_diff(const unsigned char* newData,const unsigned c int kMinSingleMatchScore=kMinSingleMatchScore_default, size_t patchStepMemSize=kDefaultPatchStepMemSize, bool isUseBigCacheMatch=false, - ICoverLinesListener* listener=0); + ICoverLinesListener* listener=0,size_t threadNum=1); void create_single_compressed_diff(const unsigned char* newData,const unsigned char* newData_end, const unsigned char* oldData,const unsigned char* oldData_end, const hpatch_TStreamOutput* out_diff,const hdiff_TCompress* compressPlugin=0, int kMinSingleMatchScore=kMinSingleMatchScore_default, size_t patchStepMemSize=kDefaultPatchStepMemSize, bool isUseBigCacheMatch=false, - ICoverLinesListener* listener=0); + ICoverLinesListener* listener=0,size_t threadNum=1); //create single compressed diff data by stream: // can control memory requires and run speed by different kMatchBlockSize value, // but out_diff size is larger than create_single_compressed_diff() @@ -187,10 +187,12 @@ void get_match_covers_by_sstring(const unsigned char* newData,const unsigned cha const unsigned char* oldData,const unsigned char* oldData_end, hpatch_TOutputCovers* out_covers, int kMinSingleMatchScore=kMinSingleMatchScore_default, - bool isUseBigCacheMatch=false,ICoverLinesListener* listener=0); + bool isUseBigCacheMatch=false,ICoverLinesListener* listener=0, + size_t threadNum=1); void get_match_covers_by_sstring(const unsigned char* newData,const unsigned char* newData_end, const unsigned char* oldData,const unsigned char* oldData_end, std::vector& out_covers, int kMinSingleMatchScore=kMinSingleMatchScore_default, - bool isUseBigCacheMatch=false,ICoverLinesListener* listener=0); + bool isUseBigCacheMatch=false,ICoverLinesListener* listener=0, + size_t threadNum=1); #endif diff --git a/libHDiffPatch/HDiff/match_block.cpp b/libHDiffPatch/HDiff/match_block.cpp index 818c9169..6796666f 100644 --- a/libHDiffPatch/HDiff/match_block.cpp +++ b/libHDiffPatch/HDiff/match_block.cpp @@ -274,12 +274,12 @@ void create_compressed_diff_block(unsigned char* newData,unsigned char* newData_ size_t matchBlockSize,size_t threadNum){ if (matchBlockSize==0){ create_compressed_diff(newData,newData_end,oldData,oldData_end, - out_diff,compressPlugin,kMinSingleMatchScore,isUseBigCacheMatch); + out_diff,compressPlugin,kMinSingleMatchScore,isUseBigCacheMatch,0,threadNum); return; } TCoversOptimMB coversOp(newData,newData_end,oldData,oldData_end,matchBlockSize,threadNum); create_compressed_diff(newData,coversOp.matchBlock->newData_end_cur,oldData,coversOp.matchBlock->oldData_end_cur, - out_diff,compressPlugin,kMinSingleMatchScore,isUseBigCacheMatch,&coversOp); + out_diff,compressPlugin,kMinSingleMatchScore,isUseBigCacheMatch,&coversOp,threadNum); } void create_compressed_diff_block(unsigned char* newData,unsigned char* newData_end, unsigned char* oldData,unsigned char* oldData_end, @@ -288,12 +288,12 @@ void create_compressed_diff_block(unsigned char* newData,unsigned char* newData_ size_t matchBlockSize,size_t threadNum){ if (matchBlockSize==0){ create_compressed_diff(newData,newData_end,oldData,oldData_end, - out_diff,compressPlugin,kMinSingleMatchScore,isUseBigCacheMatch); + out_diff,compressPlugin,kMinSingleMatchScore,isUseBigCacheMatch,0,threadNum); return; } TCoversOptimMB coversOp(newData,newData_end,oldData,oldData_end,matchBlockSize,threadNum); create_compressed_diff(newData,coversOp.matchBlock->newData_end_cur,oldData,coversOp.matchBlock->oldData_end_cur, - out_diff,compressPlugin,kMinSingleMatchScore,isUseBigCacheMatch,&coversOp); + out_diff,compressPlugin,kMinSingleMatchScore,isUseBigCacheMatch,&coversOp,threadNum); } void create_compressed_diff_block(const hpatch_TStreamInput* newData,const hpatch_TStreamInput* oldData, const hpatch_TStreamOutput* out_diff,const hdiff_TCompress* compressPlugin, @@ -317,13 +317,13 @@ void create_single_compressed_diff_block(unsigned char* newData,unsigned char* n if (matchBlockSize==0){ create_single_compressed_diff(newData,newData_end,oldData,oldData_end, out_diff,compressPlugin,kMinSingleMatchScore, - patchStepMemSize,isUseBigCacheMatch); + patchStepMemSize,isUseBigCacheMatch,0,threadNum); return; } TCoversOptimMB coversOp(newData,newData_end,oldData,oldData_end,matchBlockSize,threadNum); create_single_compressed_diff(newData,coversOp.matchBlock->newData_end_cur,oldData,coversOp.matchBlock->oldData_end_cur, out_diff,compressPlugin,kMinSingleMatchScore, - patchStepMemSize,isUseBigCacheMatch,&coversOp); + patchStepMemSize,isUseBigCacheMatch,&coversOp,threadNum); } void create_single_compressed_diff_block(unsigned char* newData,unsigned char* newData_end, unsigned char* oldData,unsigned char* oldData_end, @@ -333,13 +333,13 @@ void create_single_compressed_diff_block(unsigned char* newData,unsigned char* n if (matchBlockSize==0){ create_single_compressed_diff(newData,newData_end,oldData,oldData_end, out_diff,compressPlugin,kMinSingleMatchScore, - patchStepMemSize,isUseBigCacheMatch); + patchStepMemSize,isUseBigCacheMatch,0,threadNum); return; } TCoversOptimMB coversOp(newData,newData_end,oldData,oldData_end,matchBlockSize,threadNum); create_single_compressed_diff(newData,coversOp.matchBlock->newData_end_cur,oldData,coversOp.matchBlock->oldData_end_cur, out_diff,compressPlugin,kMinSingleMatchScore, - patchStepMemSize,isUseBigCacheMatch,&coversOp); + patchStepMemSize,isUseBigCacheMatch,&coversOp,threadNum); } void create_single_compressed_diff_block(const hpatch_TStreamInput* newData,const hpatch_TStreamInput* oldData, const hpatch_TStreamOutput* out_diff,const hdiff_TCompress* compressPlugin, diff --git a/libHDiffPatch/HDiff/private_diff/suffix_string.cpp b/libHDiffPatch/HDiff/private_diff/suffix_string.cpp index 35c13292..70701bc0 100644 --- a/libHDiffPatch/HDiff/private_diff/suffix_string.cpp +++ b/libHDiffPatch/HDiff/private_diff/suffix_string.cpp @@ -110,7 +110,8 @@ namespace { }; template - static void _suffixString_create(const TChar* src,const TChar* src_end,std::vector& out_sstring){ + static void _suffixString_create(const TChar* src,const TChar* src_end, + std::vector& out_sstring,size_t threadNum){ TSAInt size=(TSAInt)(src_end-src); if (size<0) throw std::runtime_error("suffixString_create() error."); @@ -133,9 +134,9 @@ namespace { #ifdef _SA_SORTBY_DIVSUFSORT saint_t rt=-1; if (sizeof(TSAInt)==8) - rt=divsufsort64(src,(saidx64_t*)&out_sstring[0],(saidx64_t)size); + rt=divsufsort64(src,(saidx64_t*)&out_sstring[0],(saidx64_t)size,threadNum); else if (sizeof(TSAInt)==4) - rt=divsufsort(src,(saidx_t*)&out_sstring[0],(saidx_t)size); + rt=divsufsort(src,(saidx_t*)&out_sstring[0],(saidx_t)size,threadNum); #endif if (rt!=0) throw std::runtime_error("suffixString_create() error."); @@ -248,10 +249,10 @@ TSuffixString::TSuffixString(bool isUsedFastMatch) clear_cache(); } -TSuffixString::TSuffixString(const TChar* src_begin,const TChar* src_end,bool isUsedFastMatch) +TSuffixString::TSuffixString(const TChar* src_begin,const TChar* src_end,bool isUsedFastMatch,size_t threadNum) :m_src_begin(0),m_src_end(0),m_isUsedFastMatch(isUsedFastMatch),m_cached2char_range(0){ clear_cache(); - resetSuffixString(src_begin,src_end); + resetSuffixString(src_begin,src_end,threadNum); } TSuffixString::~TSuffixString(){ @@ -267,17 +268,17 @@ void TSuffixString::clear(){ } -void TSuffixString::resetSuffixString(const TChar* src_begin,const TChar* src_end){ +void TSuffixString::resetSuffixString(const TChar* src_begin,const TChar* src_end,size_t threadNum){ assert(src_begin<=src_end); m_src_begin=src_begin; m_src_end=src_end; if (isUseLargeSA()){ _clearVector(m_SA_limit); - _suffixString_create(m_src_begin,m_src_end,m_SA_large); + _suffixString_create(m_src_begin,m_src_end,m_SA_large,threadNum); }else{ assert(sizeof(TInt32)==4); _clearVector(m_SA_large); - _suffixString_create(m_src_begin,m_src_end,m_SA_limit); + _suffixString_create(m_src_begin,m_src_end,m_SA_limit,threadNum); } build_cache(); } diff --git a/libHDiffPatch/HDiff/private_diff/suffix_string.h b/libHDiffPatch/HDiff/private_diff/suffix_string.h index 43fae139..b5677753 100644 --- a/libHDiffPatch/HDiff/private_diff/suffix_string.h +++ b/libHDiffPatch/HDiff/private_diff/suffix_string.h @@ -83,8 +83,8 @@ class TSuffixString{ ~TSuffixString(); //throw std::runtime_error when create SA error - TSuffixString(const TChar* src_begin,const TChar* src_end,bool isUsedFastMatch=false); - void resetSuffixString(const TChar* src_begin,const TChar* src_end); + TSuffixString(const TChar* src_begin,const TChar* src_end,bool isUsedFastMatch=false,size_t threadNum=1); + void resetSuffixString(const TChar* src_begin,const TChar* src_end,size_t threadNum=1); inline const TChar* src_begin()const{ return m_src_begin; } inline const TChar* src_end()const{ return m_src_end; } diff --git a/test/_private_searchBestParams.cpp b/test/_private_searchBestParams.cpp index aa542637..b14f89bd 100644 --- a/test/_private_searchBestParams.cpp +++ b/test/_private_searchBestParams.cpp @@ -162,7 +162,7 @@ void doDiff(TDiffInfo& di){ di.oldFileSize=di.oldData.size(); di.newFileSize=di.newData.size(); const TByte* oldData0=di.oldData.data(); - di.sstring.resetSuffixString(oldData0,oldData0+di.oldData.size()); + di.sstring.resetSuffixString(oldData0,oldData0+di.oldData.size(),8); } di.diffSize=_compress_diff(di,0,0); From f2b1e0dfe69b700d349cc253f1673f9239811c6b Mon Sep 17 00:00:00 2001 From: sisong Date: Fri, 7 Oct 2022 13:21:50 +0800 Subject: [PATCH 13/20] libdivsufsort remove utils functions; --- .../libdivsufsort/divsufsort.c.inc.h | 35 -- .../private_diff/libdivsufsort/divsufsort.cpp | 4 +- .../private_diff/libdivsufsort/divsufsort.h | 97 ----- .../libdivsufsort/divsufsort64.cpp | 5 +- .../private_diff/libdivsufsort/divsufsort64.h | 6 - .../libdivsufsort/divsufsort_private.h | 51 ++- .../private_diff/libdivsufsort/sssort.c.inc.h | 37 +- .../private_diff/libdivsufsort/trsort.c.inc.h | 29 +- .../private_diff/libdivsufsort/utils.c.inc.h | 381 ------------------ 9 files changed, 44 insertions(+), 601 deletions(-) delete mode 100644 libHDiffPatch/HDiff/private_diff/libdivsufsort/utils.c.inc.h diff --git a/libHDiffPatch/HDiff/private_diff/libdivsufsort/divsufsort.c.inc.h b/libHDiffPatch/HDiff/private_diff/libdivsufsort/divsufsort.c.inc.h index 472afec7..98158f13 100644 --- a/libHDiffPatch/HDiff/private_diff/libdivsufsort/divsufsort.c.inc.h +++ b/libHDiffPatch/HDiff/private_diff/libdivsufsort/divsufsort.c.inc.h @@ -365,41 +365,6 @@ divsufsort(const sauchar_t *T, saidx_t *SA, saidx_t n,size_t threadNum) { return err; } -saidx_t -divbwt(const sauchar_t *T, sauchar_t *U, saidx_t *A, saidx_t n,size_t threadNum) { - saidx_t *B; - saidx_t *bucket_A, *bucket_B; - saidx_t m, pidx, i; - - /* Check arguments. */ - if((T == NULL) || (U == NULL) || (n < 0)) { return -1; } - else if(n <= 1) { if(n == 1) { U[0] = T[0]; } return n; } - - if((B = A) == NULL) { B = (saidx_t *)malloc((size_t)(n + 1) * sizeof(saidx_t)); } - bucket_A = (saidx_t *)malloc(BUCKET_A_SIZE * sizeof(saidx_t)); - bucket_B = (saidx_t *)malloc(BUCKET_B_SIZE * sizeof(saidx_t)); - - /* Burrows-Wheeler Transform. */ - if((B != NULL) && (bucket_A != NULL) && (bucket_B != NULL)) { - m = sort_typeBstar(T, B, bucket_A, bucket_B, n, threadNum); - pidx = construct_BWT(T, B, bucket_A, bucket_B, n, m); - - /* Copy to output string. */ - U[0] = T[n - 1]; - for(i = 0; i < pidx; ++i) { U[i + 1] = (sauchar_t)B[i]; } - for(i += 1; i < n; ++i) { U[i] = (sauchar_t)B[i]; } - pidx += 1; - } else { - pidx = -2; - } - - free(bucket_B); - free(bucket_A); - if(A == NULL) { free(B); } - - return pidx; -} - const char * divsufsort_version(void) { return PROJECT_VERSION_FULL; diff --git a/libHDiffPatch/HDiff/private_diff/libdivsufsort/divsufsort.cpp b/libHDiffPatch/HDiff/private_diff/libdivsufsort/divsufsort.cpp index 7c825df1..bdc95b58 100644 --- a/libHDiffPatch/HDiff/private_diff/libdivsufsort/divsufsort.cpp +++ b/libHDiffPatch/HDiff/private_diff/libdivsufsort/divsufsort.cpp @@ -6,8 +6,6 @@ #include "divsufsort_private.h" #include "divsufsort.c.inc.h" -#include "trsort.c.inc.h" -#define lg_table sssort_lg_table #include "sssort.c.inc.h" -#include "utils.c.inc.h" +#include "trsort.c.inc.h" diff --git a/libHDiffPatch/HDiff/private_diff/libdivsufsort/divsufsort.h b/libHDiffPatch/HDiff/private_diff/libdivsufsort/divsufsort.h index 77f717cd..182ed11a 100644 --- a/libHDiffPatch/HDiff/private_diff/libdivsufsort/divsufsort.h +++ b/libHDiffPatch/HDiff/private_diff/libdivsufsort/divsufsort.h @@ -68,13 +68,6 @@ typedef int32_t saint_t; #define SAIDX_T typedef int32_t saidx_t; #endif /* SAIDX_T */ -#ifndef PRIdSAINT_T -#define PRIdSAINT_T PRId32 -#endif /* PRIdSAINT_T */ -#ifndef PRIdSAIDX_T -#define PRIdSAIDX_T PRId32 -#endif /* PRIdSAIDX_T */ - /*- Prototypes -*/ @@ -89,18 +82,6 @@ DIVSUFSORT_API saint_t divsufsort(const sauchar_t *T, saidx_t *SA, saidx_t n,size_t threadNum); -/** - * Constructs the burrows-wheeler transformed string of a given string. - * @param T[0..n-1] The input string. - * @param U[0..n-1] The output string. (can be T) - * @param A[0..n-1] The temporary array. (can be NULL) - * @param n The length of the given string. - * @return The primary index if no error occurred, -1 or -2 otherwise. - */ -DIVSUFSORT_API -saidx_t -divbwt(const sauchar_t *T, sauchar_t *U, saidx_t *A, saidx_t n,size_t threadNum); - /** * Returns the version of the divsufsort library. * @return The version number string. @@ -109,84 +90,6 @@ DIVSUFSORT_API const char * divsufsort_version(void); - -/** - * Constructs the burrows-wheeler transformed string of a given string and suffix array. - * @param T[0..n-1] The input string. - * @param U[0..n-1] The output string. (can be T) - * @param SA[0..n-1] The suffix array. (can be NULL) - * @param n The length of the given string. - * @param idx The output primary index. - * @return 0 if no error occurred, -1 or -2 otherwise. - */ -DIVSUFSORT_API -saint_t -bw_transform(const sauchar_t *T, sauchar_t *U, - saidx_t *SA /* can NULL */, - saidx_t n, saidx_t *idx,size_t threadNum); - -/** - * Inverse BW-transforms a given BWTed string. - * @param T[0..n-1] The input string. - * @param U[0..n-1] The output string. (can be T) - * @param A[0..n-1] The temporary array. (can be NULL) - * @param n The length of the given string. - * @param idx The primary index. - * @return 0 if no error occurred, -1 or -2 otherwise. - */ -DIVSUFSORT_API -saint_t -inverse_bw_transform(const sauchar_t *T, sauchar_t *U, - saidx_t *A /* can NULL */, - saidx_t n, saidx_t idx); - -/** - * Checks the correctness of a given suffix array. - * @param T[0..n-1] The input string. - * @param SA[0..n-1] The input suffix array. - * @param n The length of the given string. - * @param verbose The verbose mode. - * @return 0 if no error occurred. - */ -DIVSUFSORT_API -saint_t -sufcheck(const sauchar_t *T, const saidx_t *SA, saidx_t n, saint_t verbose); - -/** - * Search for the pattern P in the string T. - * @param T[0..Tsize-1] The input string. - * @param Tsize The length of the given string. - * @param P[0..Psize-1] The input pattern string. - * @param Psize The length of the given pattern string. - * @param SA[0..SAsize-1] The input suffix array. - * @param SAsize The length of the given suffix array. - * @param idx The output index. - * @return The count of matches if no error occurred, -1 otherwise. - */ -DIVSUFSORT_API -saidx_t -sa_search(const sauchar_t *T, saidx_t Tsize, - const sauchar_t *P, saidx_t Psize, - const saidx_t *SA, saidx_t SAsize, - saidx_t *left); - -/** - * Search for the character c in the string T. - * @param T[0..Tsize-1] The input string. - * @param Tsize The length of the given string. - * @param SA[0..SAsize-1] The input suffix array. - * @param SAsize The length of the given suffix array. - * @param c The input character. - * @param idx The output index. - * @return The count of matches if no error occurred, -1 otherwise. - */ -DIVSUFSORT_API -saidx_t -sa_simplesearch(const sauchar_t *T, saidx_t Tsize, - const saidx_t *SA, saidx_t SAsize, - saint_t c, saidx_t *left); - - #ifdef __cplusplus } /* extern "C" */ #endif /* __cplusplus */ diff --git a/libHDiffPatch/HDiff/private_diff/libdivsufsort/divsufsort64.cpp b/libHDiffPatch/HDiff/private_diff/libdivsufsort/divsufsort64.cpp index 0b374573..0fe021aa 100644 --- a/libHDiffPatch/HDiff/private_diff/libdivsufsort/divsufsort64.cpp +++ b/libHDiffPatch/HDiff/private_diff/libdivsufsort/divsufsort64.cpp @@ -5,9 +5,6 @@ #define HAVE_CONFIG_H 1 #include #include "divsufsort_private.h" - #include "divsufsort.c.inc.h" -#include "trsort.c.inc.h" -#define lg_table sssort_lg_table #include "sssort.c.inc.h" -#include "utils.c.inc.h" +#include "trsort.c.inc.h" diff --git a/libHDiffPatch/HDiff/private_diff/libdivsufsort/divsufsort64.h b/libHDiffPatch/HDiff/private_diff/libdivsufsort/divsufsort64.h index d7613196..5da8283f 100644 --- a/libHDiffPatch/HDiff/private_diff/libdivsufsort/divsufsort64.h +++ b/libHDiffPatch/HDiff/private_diff/libdivsufsort/divsufsort64.h @@ -78,12 +78,6 @@ typedef int32_t saint_t; #define SAIDX64_T typedef llong_t saidx64_t; #endif /* SAIDX64_T */ -#ifndef PRIdSAINT_T -#define PRIdSAINT_T PRId32 -#endif /* PRIdSAINT_T */ -#ifndef PRIdSAIDX64_T -#define PRIdSAIDX64_T PRId64 -#endif /* PRIdSAIDX64_T */ /*- Prototypes -*/ diff --git a/libHDiffPatch/HDiff/private_diff/libdivsufsort/divsufsort_private.h b/libHDiffPatch/HDiff/private_diff/libdivsufsort/divsufsort_private.h index 7e261c19..02c25a61 100644 --- a/libHDiffPatch/HDiff/private_diff/libdivsufsort/divsufsort_private.h +++ b/libHDiffPatch/HDiff/private_diff/libdivsufsort/divsufsort_private.h @@ -64,24 +64,27 @@ extern "C" { # define SAIDX_T # define saidx_t saidx64_t # endif /* SAIDX_T */ -# ifndef PRIdSAIDX_T -# define PRIdSAIDX_T PRIdSAIDX64_T -# endif /* PRIdSAIDX_T */ +#define sastore_t saidx64_t # define divsufsort divsufsort64 -# define divbwt divbwt64 # define divsufsort_version divsufsort64_version -# define bw_transform bw_transform64 -# define inverse_bw_transform inverse_bw_transform64 -# define sufcheck sufcheck64 -# define sa_search sa_search64 -# define sa_simplesearch sa_simplesearch64 # define sssort sssort64 # define trsort trsort64 +#elif defined(BUILD_DIVSUFSORT40) +# include "divsufsort40.h" +# ifndef SAIDX_T +# define SAIDX_T +# define saidx_t saidx64_t +# endif /* SAIDX_T */ +#define sastore_t saidx40_t +# define divsufsort divsufsort40 +# define divsufsort_version divsufsort40_version +# define sssort sssort40 +# define trsort trsort40 #else # include "divsufsort.h" +#define sastore_t saidx32_t #endif - /*- Constants -*/ #if !defined(UINT8_MAX) # define UINT8_MAX (255) @@ -117,27 +120,27 @@ extern "C" { #endif /* minstacksize = log(SS_BLOCKSIZE) / log(3) * 2 */ #if SS_BLOCKSIZE == 0 -# if defined(BUILD_DIVSUFSORT64) -# define SS_MISORT_STACKSIZE (96) -# else +# if !defined(BUILD_DIVSUFSORT64) # define SS_MISORT_STACKSIZE (64) +# else +# define SS_MISORT_STACKSIZE (96) # endif #elif SS_BLOCKSIZE <= 4096 # define SS_MISORT_STACKSIZE (16) #else # define SS_MISORT_STACKSIZE (24) #endif -#if defined(BUILD_DIVSUFSORT64) -# define SS_SMERGE_STACKSIZE (64) -#else +#if !defined(BUILD_DIVSUFSORT64) # define SS_SMERGE_STACKSIZE (32) +#else +# define SS_SMERGE_STACKSIZE (64) #endif /* for trsort.c */ #define TR_INSERTIONSORT_THRESHOLD (8) -#if defined(BUILD_DIVSUFSORT64) -# define TR_STACKSIZE (96) -#else +#if !defined(BUILD_DIVSUFSORT64) # define TR_STACKSIZE (64) +#else +# define TR_STACKSIZE (96) #endif @@ -199,6 +202,16 @@ sssort(const sauchar_t *Td, const saidx_t *PA, void trsort(saidx_t *ISA, saidx_t *SA, saidx_t n, saidx_t depth); +static const int lg_table[256]= { + -1,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4, + 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, + 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, + 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7 +}; #ifdef __cplusplus } /* extern "C" */ diff --git a/libHDiffPatch/HDiff/private_diff/libdivsufsort/sssort.c.inc.h b/libHDiffPatch/HDiff/private_diff/libdivsufsort/sssort.c.inc.h index 7bcd3c58..d5e7ea16 100644 --- a/libHDiffPatch/HDiff/private_diff/libdivsufsort/sssort.c.inc.h +++ b/libHDiffPatch/HDiff/private_diff/libdivsufsort/sssort.c.inc.h @@ -29,48 +29,13 @@ /*- Private Functions -*/ -static const saint_t lg_table[256]= { - -1,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4, - 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, - 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, - 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, - 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, - 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, - 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, - 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7 -}; - #if (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) static INLINE saint_t ss_ilg(saidx_t n) { #if SS_BLOCKSIZE == 0 -# if defined(BUILD_DIVSUFSORT64) - return (n >> 32) ? - ((n >> 48) ? - ((n >> 56) ? - 56 + lg_table[(n >> 56) & 0xff] : - 48 + lg_table[(n >> 48) & 0xff]) : - ((n >> 40) ? - 40 + lg_table[(n >> 40) & 0xff] : - 32 + lg_table[(n >> 32) & 0xff])) : - ((n & 0xffff0000) ? - ((n & 0xff000000) ? - 24 + lg_table[(n >> 24) & 0xff] : - 16 + lg_table[(n >> 16) & 0xff]) : - ((n & 0x0000ff00) ? - 8 + lg_table[(n >> 8) & 0xff] : - 0 + lg_table[(n >> 0) & 0xff])); -# else - return (n & 0xffff0000) ? - ((n & 0xff000000) ? - 24 + lg_table[(n >> 24) & 0xff] : - 16 + lg_table[(n >> 16) & 0xff]) : - ((n & 0x0000ff00) ? - 8 + lg_table[(n >> 8) & 0xff] : - 0 + lg_table[(n >> 0) & 0xff]); -# endif + return tr_ilg(n); #elif SS_BLOCKSIZE < 256 return lg_table[n]; #else diff --git a/libHDiffPatch/HDiff/private_diff/libdivsufsort/trsort.c.inc.h b/libHDiffPatch/HDiff/private_diff/libdivsufsort/trsort.c.inc.h index 00408b8d..4655df61 100644 --- a/libHDiffPatch/HDiff/private_diff/libdivsufsort/trsort.c.inc.h +++ b/libHDiffPatch/HDiff/private_diff/libdivsufsort/trsort.c.inc.h @@ -29,21 +29,18 @@ /*- Private Functions -*/ -static const saint_t lg_table[256]= { - -1,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4, - 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, - 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, - 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, - 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, - 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, - 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, - 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7 -}; - static INLINE saint_t tr_ilg(saidx_t n) { -#if defined(BUILD_DIVSUFSORT64) +#if !defined(BUILD_DIVSUFSORT64) + return (n & 0xffff0000) ? + ((n & 0xff000000) ? + 24 + lg_table[(n >> 24) & 0xff] : + 16 + lg_table[(n >> 16) & 0xff]) : + ((n & 0x0000ff00) ? + 8 + lg_table[(n >> 8) & 0xff] : + 0 + lg_table[(n >> 0) & 0xff]); +#else return (n >> 32) ? ((n >> 48) ? ((n >> 56) ? @@ -59,14 +56,6 @@ tr_ilg(saidx_t n) { ((n & 0x0000ff00) ? 8 + lg_table[(n >> 8) & 0xff] : 0 + lg_table[(n >> 0) & 0xff])); -#else - return (n & 0xffff0000) ? - ((n & 0xff000000) ? - 24 + lg_table[(n >> 24) & 0xff] : - 16 + lg_table[(n >> 16) & 0xff]) : - ((n & 0x0000ff00) ? - 8 + lg_table[(n >> 8) & 0xff] : - 0 + lg_table[(n >> 0) & 0xff]); #endif } diff --git a/libHDiffPatch/HDiff/private_diff/libdivsufsort/utils.c.inc.h b/libHDiffPatch/HDiff/private_diff/libdivsufsort/utils.c.inc.h deleted file mode 100644 index c5fa8433..00000000 --- a/libHDiffPatch/HDiff/private_diff/libdivsufsort/utils.c.inc.h +++ /dev/null @@ -1,381 +0,0 @@ -/* - * utils.c for libdivsufsort - * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -#include "divsufsort_private.h" - - -/*- Private Function -*/ - -/* Binary search for inverse bwt. */ -static -saidx_t -binarysearch_lower(const saidx_t *A, saidx_t size, saidx_t value) { - saidx_t half, i; - for(i = 0, half = size >> 1; - 0 < size; - size = half, half >>= 1) { - if(A[i + half] < value) { - i += half + 1; - half -= (size & 1) ^ 1; - } - } - return i; -} - - -/*- Functions -*/ - -/* Burrows-Wheeler transform. */ -saint_t -bw_transform(const sauchar_t *T, sauchar_t *U, saidx_t *SA, - saidx_t n, saidx_t *idx,size_t threadNum) { - saidx_t *A, i, j, p, t; - saint_t c; - - /* Check arguments. */ - if((T == NULL) || (U == NULL) || (n < 0) || (idx == NULL)) { return -1; } - if(n <= 1) { - if(n == 1) { U[0] = T[0]; } - *idx = n; - return 0; - } - - if((A = SA) == NULL) { - i = divbwt(T, U, NULL, n, threadNum); - if(0 <= i) { *idx = i; i = 0; } - return (saint_t)i; - } - - /* BW transform. */ - if(T == U) { - t = n; - for(i = 0, j = 0; i < n; ++i) { - p = t - 1; - t = A[i]; - if(0 <= p) { - c = T[j]; - U[j] = (j <= p) ? T[p] : (sauchar_t)A[p]; - A[j] = c; - j++; - } else { - *idx = i; - } - } - p = t - 1; - if(0 <= p) { - c = T[j]; - U[j] = (j <= p) ? T[p] : (sauchar_t)A[p]; - A[j] = c; - } else { - *idx = i; - } - } else { - U[0] = T[n - 1]; - for(i = 0; A[i] != 0; ++i) { U[i + 1] = T[A[i] - 1]; } - *idx = i + 1; - for(++i; i < n; ++i) { U[i] = T[A[i] - 1]; } - } - - if(SA == NULL) { - /* Deallocate memory. */ - free(A); - } - - return 0; -} - -/* Inverse Burrows-Wheeler transform. */ -saint_t -inverse_bw_transform(const sauchar_t *T, sauchar_t *U, saidx_t *A, - saidx_t n, saidx_t idx) { - saidx_t C[ALPHABET_SIZE]; - sauchar_t D[ALPHABET_SIZE]; - saidx_t *B; - saidx_t i, p; - saint_t c, d; - - /* Check arguments. */ - if((T == NULL) || (U == NULL) || (n < 0) || (idx < 0) || - (n < idx) || ((0 < n) && (idx == 0))) { - return -1; - } - if(n <= 1) { return 0; } - - if((B = A) == NULL) { - /* Allocate n*sizeof(saidx_t) bytes of memory. */ - if((B = (saidx_t *)malloc((size_t)n * sizeof(saidx_t))) == NULL) { return -2; } - } - - /* Inverse BW transform. */ - for(c = 0; c < ALPHABET_SIZE; ++c) { C[c] = 0; } - for(i = 0; i < n; ++i) { ++C[T[i]]; } - for(c = 0, d = 0, i = 0; c < ALPHABET_SIZE; ++c) { - p = C[c]; - if(0 < p) { - C[c] = i; - D[d++] = (sauchar_t)c; - i += p; - } - } - for(i = 0; i < idx; ++i) { B[C[T[i]]++] = i; } - for( ; i < n; ++i) { B[C[T[i]]++] = i + 1; } - for(c = 0; c < d; ++c) { C[c] = C[D[c]]; } - for(i = 0, p = idx; i < n; ++i) { - U[i] = D[binarysearch_lower(C, d, p)]; - p = B[p - 1]; - } - - if(A == NULL) { - /* Deallocate memory. */ - free(B); - } - - return 0; -} - -/* Checks the suffix array SA of the string T. */ -saint_t -sufcheck(const sauchar_t *T, const saidx_t *SA, - saidx_t n, saint_t verbose) { - saidx_t C[ALPHABET_SIZE]; - saidx_t i, p, q, t; - saint_t c; - - if(verbose) { fprintf(stderr, "sufcheck: "); } - - /* Check arguments. */ - if((T == NULL) || (SA == NULL) || (n < 0)) { - if(verbose) { fprintf(stderr, "Invalid arguments.\n"); } - return -1; - } - if(n == 0) { - if(verbose) { fprintf(stderr, "Done.\n"); } - return 0; - } - - /* check range: [0..n-1] */ - for(i = 0; i < n; ++i) { - if((SA[i] < 0) || (n <= SA[i])) { - if(verbose) { - fprintf(stderr, "Out of the range [0,%" PRIdSAIDX_T "].\n" - " SA[%" PRIdSAIDX_T "]=%" PRIdSAIDX_T "\n", - n - 1, i, SA[i]); - } - return -2; - } - } - - /* check first characters. */ - for(i = 1; i < n; ++i) { - if(T[SA[i - 1]] > T[SA[i]]) { - if(verbose) { - fprintf(stderr, "Suffixes in wrong order.\n" - " T[SA[%" PRIdSAIDX_T "]=%" PRIdSAIDX_T "]=%d" - " > T[SA[%" PRIdSAIDX_T "]=%" PRIdSAIDX_T "]=%d\n", - i - 1, SA[i - 1], T[SA[i - 1]], i, SA[i], T[SA[i]]); - } - return -3; - } - } - - /* check suffixes. */ - for(i = 0; i < ALPHABET_SIZE; ++i) { C[i] = 0; } - for(i = 0; i < n; ++i) { ++C[T[i]]; } - for(i = 0, p = 0; i < ALPHABET_SIZE; ++i) { - t = C[i]; - C[i] = p; - p += t; - } - - q = C[T[n - 1]]; - C[T[n - 1]] += 1; - for(i = 0; i < n; ++i) { - p = SA[i]; - if(0 < p) { - c = T[--p]; - t = C[c]; - } else { - c = T[p = n - 1]; - t = q; - } - if((t < 0) || (p != SA[t])) { - if(verbose) { - fprintf(stderr, "Suffix in wrong position.\n" - " SA[%" PRIdSAIDX_T "]=%" PRIdSAIDX_T " or\n" - " SA[%" PRIdSAIDX_T "]=%" PRIdSAIDX_T "\n", - t, (0 <= t) ? SA[t] : -1, i, SA[i]); - } - return -4; - } - if(t != q) { - ++C[c]; - if((n <= C[c]) || (T[SA[C[c]]] != c)) { C[c] = -1; } - } - } - - if(1 <= verbose) { fprintf(stderr, "Done.\n"); } - return 0; -} - - -static -int -_compare(const sauchar_t *T, saidx_t Tsize, - const sauchar_t *P, saidx_t Psize, - saidx_t suf, saidx_t *match) { - saidx_t i, j; - saint_t r; - for(i = suf + *match, j = *match, r = 0; - (i < Tsize) && (j < Psize) && ((r = T[i] - P[j]) == 0); ++i, ++j) { } - *match = j; - return (r == 0) ? -(j != Psize) : r; -} - -/* Search for the pattern P in the string T. */ -saidx_t -sa_search(const sauchar_t *T, saidx_t Tsize, - const sauchar_t *P, saidx_t Psize, - const saidx_t *SA, saidx_t SAsize, - saidx_t *idx) { - saidx_t size, lsize, rsize, half; - saidx_t match, lmatch, rmatch; - saidx_t llmatch, lrmatch, rlmatch, rrmatch; - saidx_t i, j, k; - saint_t r; - - if(idx != NULL) { *idx = -1; } - if((T == NULL) || (P == NULL) || (SA == NULL) || - (Tsize < 0) || (Psize < 0) || (SAsize < 0)) { return -1; } - if((Tsize == 0) || (SAsize == 0)) { return 0; } - if(Psize == 0) { if(idx != NULL) { *idx = 0; } return SAsize; } - - for(i = j = k = 0, lmatch = rmatch = 0, size = SAsize, half = size >> 1; - 0 < size; - size = half, half >>= 1) { - match = MIN(lmatch, rmatch); - r = _compare(T, Tsize, P, Psize, SA[i + half], &match); - if(r < 0) { - i += half + 1; - half -= (size & 1) ^ 1; - lmatch = match; - } else if(r > 0) { - rmatch = match; - } else { - lsize = half, j = i, rsize = size - half - 1, k = i + half + 1; - - /* left part */ - for(llmatch = lmatch, lrmatch = match, half = lsize >> 1; - 0 < lsize; - lsize = half, half >>= 1) { - lmatch = MIN(llmatch, lrmatch); - r = _compare(T, Tsize, P, Psize, SA[j + half], &lmatch); - if(r < 0) { - j += half + 1; - half -= (lsize & 1) ^ 1; - llmatch = lmatch; - } else { - lrmatch = lmatch; - } - } - - /* right part */ - for(rlmatch = match, rrmatch = rmatch, half = rsize >> 1; - 0 < rsize; - rsize = half, half >>= 1) { - rmatch = MIN(rlmatch, rrmatch); - r = _compare(T, Tsize, P, Psize, SA[k + half], &rmatch); - if(r <= 0) { - k += half + 1; - half -= (rsize & 1) ^ 1; - rlmatch = rmatch; - } else { - rrmatch = rmatch; - } - } - - break; - } - } - - if(idx != NULL) { *idx = (0 < (k - j)) ? j : i; } - return k - j; -} - -/* Search for the character c in the string T. */ -saidx_t -sa_simplesearch(const sauchar_t *T, saidx_t Tsize, - const saidx_t *SA, saidx_t SAsize, - saint_t c, saidx_t *idx) { - saidx_t size, lsize, rsize, half; - saidx_t i, j, k, p; - saint_t r; - - if(idx != NULL) { *idx = -1; } - if((T == NULL) || (SA == NULL) || (Tsize < 0) || (SAsize < 0)) { return -1; } - if((Tsize == 0) || (SAsize == 0)) { return 0; } - - for(i = j = k = 0, size = SAsize, half = size >> 1; - 0 < size; - size = half, half >>= 1) { - p = SA[i + half]; - r = (p < Tsize) ? T[p] - c : -1; - if(r < 0) { - i += half + 1; - half -= (size & 1) ^ 1; - } else if(r == 0) { - lsize = half, j = i, rsize = size - half - 1, k = i + half + 1; - - /* left part */ - for(half = lsize >> 1; - 0 < lsize; - lsize = half, half >>= 1) { - p = SA[j + half]; - r = (p < Tsize) ? T[p] - c : -1; - if(r < 0) { - j += half + 1; - half -= (lsize & 1) ^ 1; - } - } - - /* right part */ - for(half = rsize >> 1; - 0 < rsize; - rsize = half, half >>= 1) { - p = SA[k + half]; - r = (p < Tsize) ? T[p] - c : -1; - if(r <= 0) { - k += half + 1; - half -= (rsize & 1) ^ 1; - } - } - - break; - } - } - - if(idx != NULL) { *idx = (0 < (k - j)) ? j : i; } - return k - j; -} From 5c4012e21099df9109a180f09421f5d29c6a1f8b Mon Sep 17 00:00:00 2001 From: sisong Date: Fri, 7 Oct 2022 15:32:53 +0800 Subject: [PATCH 14/20] libdivsufsort rename saidx_t* to sastore_t* --- libHDiffPatch/HDiff/diff.cpp | 4 +- .../libdivsufsort/divsufsort.c.inc.h | 90 ++------------- .../private_diff/libdivsufsort/divsufsort.cpp | 9 +- .../private_diff/libdivsufsort/divsufsort.h | 10 +- .../libdivsufsort/divsufsort64.cpp | 14 ++- .../private_diff/libdivsufsort/divsufsort64.h | 90 --------------- .../libdivsufsort/divsufsort_private.h | 42 +------ .../private_diff/libdivsufsort/sssort.c.inc.h | 106 +++++++++--------- .../private_diff/libdivsufsort/trsort.c.inc.h | 62 +++++----- .../HDiff/private_diff/suffix_string.cpp | 4 +- 10 files changed, 119 insertions(+), 312 deletions(-) diff --git a/libHDiffPatch/HDiff/diff.cpp b/libHDiffPatch/HDiff/diff.cpp index 2bc8218c..0526487f 100644 --- a/libHDiffPatch/HDiff/diff.cpp +++ b/libHDiffPatch/HDiff/diff.cpp @@ -1407,8 +1407,8 @@ void create_lite_diff(const unsigned char* newData,const unsigned char* newData_ TDiffData diff; get_diff(newData,newData_end,oldData,oldData_end,diff,kMinSingleMatchScore-_kMatchScore_optim4bin, isUseBigCacheMatch,0,0,threadNum); - hpatch_StreamPos_t oldPosEnd=0; - hpatch_StreamPos_t newPosEnd=0; + size_t oldPosEnd=0; + size_t newPosEnd=0; if (!diff.covers.empty()){ const TOldCover& c=diff.covers.back(); oldPosEnd=c.oldPos+c.length; diff --git a/libHDiffPatch/HDiff/private_diff/libdivsufsort/divsufsort.c.inc.h b/libHDiffPatch/HDiff/private_diff/libdivsufsort/divsufsort.c.inc.h index 98158f13..5bc5305b 100644 --- a/libHDiffPatch/HDiff/private_diff/libdivsufsort/divsufsort.c.inc.h +++ b/libHDiffPatch/HDiff/private_diff/libdivsufsort/divsufsort.c.inc.h @@ -32,8 +32,8 @@ /*- Private Functions -*/ static void _sssort_thread(HLocker locker,saint_t* c0,saint_t* c1,saidx_t* j, - saidx_t *bucket_B,const sauchar_t *T, const saidx_t *PAb, - saidx_t *SA,saidx_t *buf, saidx_t bufsize,saidx_t n,saidx_t m){ + saidx_t *bucket_B,const sauchar_t *T, const sastore_t *PAb, + sastore_t* SA,sastore_t *buf, saidx_t bufsize,saidx_t n,saidx_t m){ saidx_t k = 0; saidx_t l; for(;;) { @@ -61,10 +61,10 @@ static void _sssort_thread(HLocker locker,saint_t* c0,saint_t* c1,saidx_t* j, /* Sorts suffixes of type B*. */ static saidx_t -sort_typeBstar(const sauchar_t *T, saidx_t *SA, +sort_typeBstar(const sauchar_t *T, sastore_t* SA, saidx_t *bucket_A, saidx_t *bucket_B, saidx_t n,size_t threadNum) { - saidx_t *PAb, *ISAb; + sastore_t *PAb, *ISAb; saidx_t i, j, k, t, m; saint_t c0, c1; @@ -125,7 +125,7 @@ sort_typeBstar(const sauchar_t *T, saidx_t *SA, const size_t threadCount=threadNum-1; c0 = ALPHABET_SIZE - 2, c1 = ALPHABET_SIZE - 1, j = m; std::vector threads(threadCount); - saidx_t* buf = SA + m; + sastore_t* buf = SA + m; for (size_t ti=0;ti c0)) { s = ~s; } - if(c0 != c2) { - if(0 <= c2) { BUCKET_B(c2, c1) = (saidx_t)(k - SA); } - k = SA + BUCKET_B(c2 = c0, c1); - } - assert(k < j); - *k-- = s; - } else if(s != 0) { - *j = ~s; -#ifndef NDEBUG - } else { - assert(T[s] == c1); -#endif - } - } - } - } - - /* Construct the BWTed string by using - the sorted order of type B suffixes. */ - k = SA + BUCKET_A(c2 = T[n - 1]); - *k++ = (T[n - 2] < c2) ? ~((saidx_t)T[n - 2]) : (n - 1); - /* Scan the suffix array from left to right. */ - for(i = SA, j = SA + n, orig = SA; i < j; ++i) { - if(0 < (s = *i)) { - assert(T[s - 1] >= T[s]); - c0 = T[--s]; - *i = c0; - if((0 < s) && (T[s - 1] < c0)) { s = ~((saidx_t)T[s - 1]); } - if(c0 != c2) { - BUCKET_A(c2) = (saidx_t)(k - SA); - k = SA + BUCKET_A(c2 = c0); - } - assert(i < k); - *k++ = s; - } else if(s != 0) { - *i = ~s; - } else { - orig = i; - } - } - - return (saidx_t)(orig - SA); -} - - /*---------------------------------------------------------------------------*/ /*- Function -*/ saint_t -divsufsort(const sauchar_t *T, saidx_t *SA, saidx_t n,size_t threadNum) { +divsufsort(const sauchar_t *T, sastore_t* SA, saidx_t n,size_t threadNum) { saidx_t *bucket_A, *bucket_B; saidx_t m; saint_t err = 0; diff --git a/libHDiffPatch/HDiff/private_diff/libdivsufsort/divsufsort.cpp b/libHDiffPatch/HDiff/private_diff/libdivsufsort/divsufsort.cpp index bdc95b58..495e1ec6 100644 --- a/libHDiffPatch/HDiff/private_diff/libdivsufsort/divsufsort.cpp +++ b/libHDiffPatch/HDiff/private_diff/libdivsufsort/divsufsort.cpp @@ -1,10 +1,9 @@ -#ifdef BUILD_DIVSUFSORT64 -# undef BUILD_DIVSUFSORT64 -#endif #define HAVE_CONFIG_H 1 -#include -#include "divsufsort_private.h" +# include "divsufsort.h" +typedef saidx32_t saidx_t; +typedef saidx_t sastore_t; +#include "divsufsort_private.h" #include "divsufsort.c.inc.h" #include "sssort.c.inc.h" #include "trsort.c.inc.h" diff --git a/libHDiffPatch/HDiff/private_diff/libdivsufsort/divsufsort.h b/libHDiffPatch/HDiff/private_diff/libdivsufsort/divsufsort.h index 182ed11a..0839904a 100644 --- a/libHDiffPatch/HDiff/private_diff/libdivsufsort/divsufsort.h +++ b/libHDiffPatch/HDiff/private_diff/libdivsufsort/divsufsort.h @@ -64,10 +64,10 @@ typedef uint8_t sauchar_t; #define SAINT_T typedef int32_t saint_t; #endif /* SAINT_T */ -#ifndef SAIDX_T -#define SAIDX_T -typedef int32_t saidx_t; -#endif /* SAIDX_T */ +#ifndef SAIDX32_T +#define SAIDX32_T +typedef int32_t saidx32_t; +#endif /* SAIDX32_T */ /*- Prototypes -*/ @@ -80,7 +80,7 @@ typedef int32_t saidx_t; */ DIVSUFSORT_API saint_t -divsufsort(const sauchar_t *T, saidx_t *SA, saidx_t n,size_t threadNum); +divsufsort(const sauchar_t *T,saidx32_t *SA,saidx32_t n,size_t threadNum); /** * Returns the version of the divsufsort library. diff --git a/libHDiffPatch/HDiff/private_diff/libdivsufsort/divsufsort64.cpp b/libHDiffPatch/HDiff/private_diff/libdivsufsort/divsufsort64.cpp index 0fe021aa..52e2fcc8 100644 --- a/libHDiffPatch/HDiff/private_diff/libdivsufsort/divsufsort64.cpp +++ b/libHDiffPatch/HDiff/private_diff/libdivsufsort/divsufsort64.cpp @@ -1,9 +1,13 @@ -#ifdef BUILD_DIVSUFSORT64 -# undef BUILD_DIVSUFSORT64 -#endif -#define BUILD_DIVSUFSORT64 1 +#define BUILD_DIVSUFSORT64 #define HAVE_CONFIG_H 1 -#include +# include "divsufsort64.h" +typedef saidx64_t saidx_t; +typedef saidx_t sastore_t; +# define divsufsort divsufsort64 +# define divsufsort_version divsufsort64_version +# define sssort sssort64 +# define trsort trsort64 + #include "divsufsort_private.h" #include "divsufsort.c.inc.h" #include "sssort.c.inc.h" diff --git a/libHDiffPatch/HDiff/private_diff/libdivsufsort/divsufsort64.h b/libHDiffPatch/HDiff/private_diff/libdivsufsort/divsufsort64.h index 5da8283f..9ac57587 100644 --- a/libHDiffPatch/HDiff/private_diff/libdivsufsort/divsufsort64.h +++ b/libHDiffPatch/HDiff/private_diff/libdivsufsort/divsufsort64.h @@ -93,18 +93,6 @@ DIVSUFSORT_API saint_t divsufsort64(const sauchar_t *T, saidx64_t *SA, saidx64_t n,size_t threadNum); -/** - * Constructs the burrows-wheeler transformed string of a given string. - * @param T[0..n-1] The input string. - * @param U[0..n-1] The output string. (can be T) - * @param A[0..n-1] The temporary array. (can be NULL) - * @param n The length of the given string. - * @return The primary index if no error occurred, -1 or -2 otherwise. - */ -DIVSUFSORT_API -saidx64_t -divbwt64(const sauchar_t *T, sauchar_t *U, saidx64_t *A, saidx64_t n); - /** * Returns the version of the divsufsort library. * @return The version number string. @@ -113,84 +101,6 @@ DIVSUFSORT_API const char * divsufsort64_version(void); - -/** - * Constructs the burrows-wheeler transformed string of a given string and suffix array. - * @param T[0..n-1] The input string. - * @param U[0..n-1] The output string. (can be T) - * @param SA[0..n-1] The suffix array. (can be NULL) - * @param n The length of the given string. - * @param idx The output primary index. - * @return 0 if no error occurred, -1 or -2 otherwise. - */ -DIVSUFSORT_API -saint_t -bw_transform64(const sauchar_t *T, sauchar_t *U, - saidx64_t *SA /* can NULL */, - saidx64_t n, saidx64_t *idx,size_t threadNum); - -/** - * Inverse BW-transforms a given BWTed string. - * @param T[0..n-1] The input string. - * @param U[0..n-1] The output string. (can be T) - * @param A[0..n-1] The temporary array. (can be NULL) - * @param n The length of the given string. - * @param idx The primary index. - * @return 0 if no error occurred, -1 or -2 otherwise. - */ -DIVSUFSORT_API -saint_t -inverse_bw_transform64(const sauchar_t *T, sauchar_t *U, - saidx64_t *A /* can NULL */, - saidx64_t n, saidx64_t idx); - -/** - * Checks the correctness of a given suffix array. - * @param T[0..n-1] The input string. - * @param SA[0..n-1] The input suffix array. - * @param n The length of the given string. - * @param verbose The verbose mode. - * @return 0 if no error occurred. - */ -DIVSUFSORT_API -saint_t -sufcheck64(const sauchar_t *T, const saidx64_t *SA, saidx64_t n, saint_t verbose); - -/** - * Search for the pattern P in the string T. - * @param T[0..Tsize-1] The input string. - * @param Tsize The length of the given string. - * @param P[0..Psize-1] The input pattern string. - * @param Psize The length of the given pattern string. - * @param SA[0..SAsize-1] The input suffix array. - * @param SAsize The length of the given suffix array. - * @param idx The output index. - * @return The count of matches if no error occurred, -1 otherwise. - */ -DIVSUFSORT_API -saidx64_t -sa_search64(const sauchar_t *T, saidx64_t Tsize, - const sauchar_t *P, saidx64_t Psize, - const saidx64_t *SA, saidx64_t SAsize, - saidx64_t *left); - -/** - * Search for the character c in the string T. - * @param T[0..Tsize-1] The input string. - * @param Tsize The length of the given string. - * @param SA[0..SAsize-1] The input suffix array. - * @param SAsize The length of the given suffix array. - * @param c The input character. - * @param idx The output index. - * @return The count of matches if no error occurred, -1 otherwise. - */ -DIVSUFSORT_API -saidx64_t -sa_simplesearch64(const sauchar_t *T, saidx64_t Tsize, - const saidx64_t *SA, saidx64_t SAsize, - saint_t c, saidx64_t *left); - - #ifdef __cplusplus } /* extern "C" */ #endif /* __cplusplus */ diff --git a/libHDiffPatch/HDiff/private_diff/libdivsufsort/divsufsort_private.h b/libHDiffPatch/HDiff/private_diff/libdivsufsort/divsufsort_private.h index 02c25a61..336eb614 100644 --- a/libHDiffPatch/HDiff/private_diff/libdivsufsort/divsufsort_private.h +++ b/libHDiffPatch/HDiff/private_diff/libdivsufsort/divsufsort_private.h @@ -27,10 +27,6 @@ #ifndef _DIVSUFSORT_PRIVATE_H #define _DIVSUFSORT_PRIVATE_H 1 -#ifdef __cplusplus -extern "C" { -#endif /* __cplusplus */ - #if HAVE_CONFIG_H # include "config.h" #endif @@ -58,32 +54,6 @@ extern "C" { # include # endif #endif -#if defined(BUILD_DIVSUFSORT64) -# include "divsufsort64.h" -# ifndef SAIDX_T -# define SAIDX_T -# define saidx_t saidx64_t -# endif /* SAIDX_T */ -#define sastore_t saidx64_t -# define divsufsort divsufsort64 -# define divsufsort_version divsufsort64_version -# define sssort sssort64 -# define trsort trsort64 -#elif defined(BUILD_DIVSUFSORT40) -# include "divsufsort40.h" -# ifndef SAIDX_T -# define SAIDX_T -# define saidx_t saidx64_t -# endif /* SAIDX_T */ -#define sastore_t saidx40_t -# define divsufsort divsufsort40 -# define divsufsort_version divsufsort40_version -# define sssort sssort40 -# define trsort trsort40 -#else -# include "divsufsort.h" -#define sastore_t saidx32_t -#endif /*- Constants -*/ #if !defined(UINT8_MAX) @@ -194,13 +164,13 @@ extern "C" { /*- Private Prototypes -*/ /* sssort.c */ void -sssort(const sauchar_t *Td, const saidx_t *PA, - saidx_t *first, saidx_t *last, - saidx_t *buf, saidx_t bufsize, +sssort(const sauchar_t *Td, const sastore_t *PA, + sastore_t *first, sastore_t *last, + sastore_t *buf, saidx_t bufsize, saidx_t depth, saidx_t n, saint_t lastsuffix); /* trsort.c */ void -trsort(saidx_t *ISA, saidx_t *SA, saidx_t n, saidx_t depth); +trsort(sastore_t *ISA, sastore_t* SA, saidx_t n, saidx_t depth); static const int lg_table[256]= { -1,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4, @@ -213,8 +183,4 @@ static const int lg_table[256]= { 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7 }; -#ifdef __cplusplus -} /* extern "C" */ -#endif /* __cplusplus */ - #endif /* _DIVSUFSORT_PRIVATE_H */ diff --git a/libHDiffPatch/HDiff/private_diff/libdivsufsort/sssort.c.inc.h b/libHDiffPatch/HDiff/private_diff/libdivsufsort/sssort.c.inc.h index d5e7ea16..6b81ecfd 100644 --- a/libHDiffPatch/HDiff/private_diff/libdivsufsort/sssort.c.inc.h +++ b/libHDiffPatch/HDiff/private_diff/libdivsufsort/sssort.c.inc.h @@ -104,7 +104,7 @@ ss_isqrt(saidx_t x) { static INLINE saint_t ss_compare(const sauchar_t *T, - const saidx_t *p1, const saidx_t *p2, + const sastore_t*p1, const sastore_t*p2, saidx_t depth) { const sauchar_t *U1, *U2, *U1n, *U2n; @@ -129,9 +129,9 @@ ss_compare(const sauchar_t *T, /* Insertionsort for small size groups */ static void -ss_insertionsort(const sauchar_t *T, const saidx_t *PA, - saidx_t *first, saidx_t *last, saidx_t depth) { - saidx_t *i, *j; +ss_insertionsort(const sauchar_t *T, const sastore_t *PA, + sastore_t *first, sastore_t *last, saidx_t depth) { + sastore_t *i, *j; saidx_t t; saint_t r; @@ -154,8 +154,8 @@ ss_insertionsort(const sauchar_t *T, const saidx_t *PA, static INLINE void -ss_fixdown(const sauchar_t *Td, const saidx_t *PA, - saidx_t *SA, saidx_t i, saidx_t size) { +ss_fixdown(const sauchar_t *Td, const sastore_t *PA, + sastore_t* SA, saidx_t i, saidx_t size) { saidx_t j, k; saidx_t v; saint_t c, d, e; @@ -171,7 +171,7 @@ ss_fixdown(const sauchar_t *Td, const saidx_t *PA, /* Simple top-down heapsort. */ static void -ss_heapsort(const sauchar_t *Td, const saidx_t *PA, saidx_t *SA, saidx_t size) { +ss_heapsort(const sauchar_t *Td, const sastore_t*PA, sastore_t* SA, saidx_t size) { saidx_t i, m; saidx_t t; @@ -195,10 +195,10 @@ ss_heapsort(const sauchar_t *Td, const saidx_t *PA, saidx_t *SA, saidx_t size) { /* Returns the median of three elements. */ static INLINE -saidx_t * -ss_median3(const sauchar_t *Td, const saidx_t *PA, - saidx_t *v1, saidx_t *v2, saidx_t *v3) { - saidx_t *t; +sastore_t* +ss_median3(const sauchar_t *Td, const sastore_t*PA, + sastore_t*v1, sastore_t*v2, sastore_t*v3) { + sastore_t*t; if(Td[PA[*v1]] > Td[PA[*v2]]) { SWAP(v1, v2); } if(Td[PA[*v2]] > Td[PA[*v3]]) { if(Td[PA[*v1]] > Td[PA[*v3]]) { return v1; } @@ -209,10 +209,10 @@ ss_median3(const sauchar_t *Td, const saidx_t *PA, /* Returns the median of five elements. */ static INLINE -saidx_t * -ss_median5(const sauchar_t *Td, const saidx_t *PA, - saidx_t *v1, saidx_t *v2, saidx_t *v3, saidx_t *v4, saidx_t *v5) { - saidx_t *t; +sastore_t* +ss_median5(const sauchar_t *Td, const sastore_t*PA, + sastore_t*v1, sastore_t*v2, sastore_t*v3, sastore_t*v4, sastore_t*v5) { + sastore_t *t; if(Td[PA[*v2]] > Td[PA[*v3]]) { SWAP(v2, v3); } if(Td[PA[*v4]] > Td[PA[*v5]]) { SWAP(v4, v5); } if(Td[PA[*v2]] > Td[PA[*v4]]) { SWAP(v2, v4); SWAP(v3, v5); } @@ -224,9 +224,9 @@ ss_median5(const sauchar_t *Td, const saidx_t *PA, /* Returns the pivot element. */ static INLINE -saidx_t * -ss_pivot(const sauchar_t *Td, const saidx_t *PA, saidx_t *first, saidx_t *last) { - saidx_t *middle; +sastore_t* +ss_pivot(const sauchar_t *Td, const sastore_t*PA, sastore_t*first, sastore_t*last) { + sastore_t*middle; saidx_t t; t = (saidx_t)(last - first); @@ -252,10 +252,10 @@ ss_pivot(const sauchar_t *Td, const saidx_t *PA, saidx_t *first, saidx_t *last) /* Binary partition for substrings. */ static INLINE -saidx_t * -ss_partition(const saidx_t *PA, - saidx_t *first, saidx_t *last, saidx_t depth) { - saidx_t *a, *b; +sastore_t* +ss_partition(const sastore_t*PA, + sastore_t*first, sastore_t*last, saidx_t depth) { + sastore_t *a, *b; saidx_t t; for(a = first - 1, b = last;;) { for(; (++a < b) && ((PA[*a] + depth) >= (PA[*a + 1] + 1));) { *a = ~*a; } @@ -272,13 +272,13 @@ ss_partition(const saidx_t *PA, /* Multikey introsort for medium size groups. */ static void -ss_mintrosort(const sauchar_t *T, const saidx_t *PA, - saidx_t *first, saidx_t *last, +ss_mintrosort(const sauchar_t *T, const sastore_t *PA, + sastore_t *first, sastore_t *last, saidx_t depth) { #define STACK_SIZE SS_MISORT_STACKSIZE - struct { saidx_t *a, *b, c; saint_t d; } stack[STACK_SIZE]; + struct { sastore_t*a, *b, c; saint_t d; } stack[STACK_SIZE]; const sauchar_t *Td; - saidx_t *a, *b, *c, *d, *e, *f; + sastore_t*a, *b, *c, *d, *e, *f; saidx_t s, t; saint_t ssize; saint_t limit; @@ -414,7 +414,7 @@ ss_mintrosort(const sauchar_t *T, const saidx_t *PA, static INLINE void -ss_blockswap(saidx_t *a, saidx_t *b, saidx_t n) { +ss_blockswap(sastore_t*a, sastore_t*b, saidx_t n) { saidx_t t; for(; 0 < n; --n, ++a, ++b) { t = *a, *a = *b, *b = t; @@ -423,8 +423,8 @@ ss_blockswap(saidx_t *a, saidx_t *b, saidx_t n) { static INLINE void -ss_rotate(saidx_t *first, saidx_t *middle, saidx_t *last) { - saidx_t *a, *b, t; +ss_rotate(sastore_t*first, sastore_t*middle, sastore_t*last) { + sastore_t*a, *b, t; saidx_t l, r; l = (saidx_t)(middle - first), r = (saidx_t)(last - middle); for(; (0 < l) && (0 < r);) { @@ -464,11 +464,11 @@ ss_rotate(saidx_t *first, saidx_t *middle, saidx_t *last) { static void -ss_inplacemerge(const sauchar_t *T, const saidx_t *PA, - saidx_t *first, saidx_t *middle, saidx_t *last, +ss_inplacemerge(const sauchar_t *T, const sastore_t*PA, + sastore_t*first, sastore_t*middle, sastore_t*last, saidx_t depth) { - const saidx_t *p; - saidx_t *a, *b; + const sastore_t*p; + sastore_t*a, *b; saidx_t len, half; saint_t q, r; saint_t x; @@ -507,10 +507,10 @@ ss_inplacemerge(const sauchar_t *T, const saidx_t *PA, /* Merge-forward with internal buffer. */ static void -ss_mergeforward(const sauchar_t *T, const saidx_t *PA, - saidx_t *first, saidx_t *middle, saidx_t *last, - saidx_t *buf, saidx_t depth) { - saidx_t *a, *b, *c, *bufend; +ss_mergeforward(const sauchar_t *T, const sastore_t*PA, + sastore_t*first, sastore_t*middle, sastore_t*last, + sastore_t*buf, saidx_t depth) { + sastore_t*a, *b, *c, *bufend; saidx_t t; saint_t r; @@ -557,11 +557,11 @@ ss_mergeforward(const sauchar_t *T, const saidx_t *PA, /* Merge-backward with internal buffer. */ static void -ss_mergebackward(const sauchar_t *T, const saidx_t *PA, - saidx_t *first, saidx_t *middle, saidx_t *last, - saidx_t *buf, saidx_t depth) { - const saidx_t *p1, *p2; - saidx_t *a, *b, *c, *bufend; +ss_mergebackward(const sauchar_t *T, const sastore_t*PA, + sastore_t*first, sastore_t*middle, sastore_t*last, + sastore_t*buf, saidx_t depth) { + const sastore_t*p1, *p2; + sastore_t*a, *b, *c, *bufend; saidx_t t; saint_t r; saint_t x; @@ -616,9 +616,9 @@ ss_mergebackward(const sauchar_t *T, const saidx_t *PA, /* D&C based merge. */ static void -ss_swapmerge(const sauchar_t *T, const saidx_t *PA, - saidx_t *first, saidx_t *middle, saidx_t *last, - saidx_t *buf, saidx_t bufsize, saidx_t depth) { +ss_swapmerge(const sauchar_t *T, const sastore_t*PA, + sastore_t*first, sastore_t*middle, sastore_t*last, + sastore_t*buf, saidx_t bufsize, saidx_t depth) { #define STACK_SIZE SS_SMERGE_STACKSIZE #define GETIDX(a) ((0 <= (a)) ? (a) : (~(a))) #define MERGE_CHECK(a, b, c)\ @@ -631,8 +631,8 @@ ss_swapmerge(const sauchar_t *T, const saidx_t *PA, *(b) = ~*(b);\ }\ } while(0) - struct { saidx_t *a, *b, *c; saint_t d; } stack[STACK_SIZE]; - saidx_t *l, *r, *lm, *rm; + struct { sastore_t*a, *b, *c; saint_t d; } stack[STACK_SIZE]; + sastore_t*l, *r, *lm, *rm; saidx_t m, len, half; saint_t ssize; saint_t check, next; @@ -709,13 +709,13 @@ ss_swapmerge(const sauchar_t *T, const saidx_t *PA, /* Substring sort */ void -sssort(const sauchar_t *T, const saidx_t *PA, - saidx_t *first, saidx_t *last, - saidx_t *buf, saidx_t bufsize, +sssort(const sauchar_t *T, const sastore_t*PA, + sastore_t *first, sastore_t*last, + sastore_t *buf, saidx_t bufsize, saidx_t depth, saidx_t n, saint_t lastsuffix) { - saidx_t *a; + sastore_t *a; #if SS_BLOCKSIZE != 0 - saidx_t *b, *middle, *curbuf; + sastore_t *b, *middle, *curbuf; saidx_t j, k, curbufsize, limit; #endif saidx_t i; @@ -769,7 +769,7 @@ sssort(const sauchar_t *T, const saidx_t *PA, if(lastsuffix != 0) { /* Insert last type B* suffix. */ - saidx_t PAi[2]; PAi[0] = PA[*(first - 1)], PAi[1] = n - 2; + sastore_t PAi[2]; PAi[0] = PA[*(first - 1)], PAi[1] = n - 2; for(a = first, i = *(first - 1); (a < last) && ((*a < 0) || (0 < ss_compare(T, &(PAi[0]), PA + *a, depth))); ++a) { diff --git a/libHDiffPatch/HDiff/private_diff/libdivsufsort/trsort.c.inc.h b/libHDiffPatch/HDiff/private_diff/libdivsufsort/trsort.c.inc.h index 4655df61..21c46988 100644 --- a/libHDiffPatch/HDiff/private_diff/libdivsufsort/trsort.c.inc.h +++ b/libHDiffPatch/HDiff/private_diff/libdivsufsort/trsort.c.inc.h @@ -65,8 +65,8 @@ tr_ilg(saidx_t n) { /* Simple insertionsort for small size groups. */ static void -tr_insertionsort(const saidx_t *ISAd, saidx_t *first, saidx_t *last) { - saidx_t *a, *b; +tr_insertionsort(const sastore_t*ISAd, sastore_t*first, sastore_t*last) { + sastore_t*a, *b; saidx_t t, r; for(a = first + 1; a < last; ++a) { @@ -84,7 +84,7 @@ tr_insertionsort(const saidx_t *ISAd, saidx_t *first, saidx_t *last) { static INLINE void -tr_fixdown(const saidx_t *ISAd, saidx_t *SA, saidx_t i, saidx_t size) { +tr_fixdown(const sastore_t*ISAd, sastore_t* SA, saidx_t i, saidx_t size) { saidx_t j, k; saidx_t v; saidx_t c, d, e; @@ -100,7 +100,7 @@ tr_fixdown(const saidx_t *ISAd, saidx_t *SA, saidx_t i, saidx_t size) { /* Simple top-down heapsort. */ static void -tr_heapsort(const saidx_t *ISAd, saidx_t *SA, saidx_t size) { +tr_heapsort(const sastore_t*ISAd, sastore_t* SA, saidx_t size) { saidx_t i, m; saidx_t t; @@ -124,9 +124,9 @@ tr_heapsort(const saidx_t *ISAd, saidx_t *SA, saidx_t size) { /* Returns the median of three elements. */ static INLINE -saidx_t * -tr_median3(const saidx_t *ISAd, saidx_t *v1, saidx_t *v2, saidx_t *v3) { - saidx_t *t; +sastore_t* +tr_median3(const sastore_t*ISAd, sastore_t*v1, sastore_t*v2, sastore_t*v3) { + sastore_t*t; if(ISAd[*v1] > ISAd[*v2]) { SWAP(v1, v2); } if(ISAd[*v2] > ISAd[*v3]) { if(ISAd[*v1] > ISAd[*v3]) { return v1; } @@ -137,10 +137,10 @@ tr_median3(const saidx_t *ISAd, saidx_t *v1, saidx_t *v2, saidx_t *v3) { /* Returns the median of five elements. */ static INLINE -saidx_t * -tr_median5(const saidx_t *ISAd, - saidx_t *v1, saidx_t *v2, saidx_t *v3, saidx_t *v4, saidx_t *v5) { - saidx_t *t; +sastore_t* +tr_median5(const sastore_t*ISAd, + sastore_t*v1, sastore_t*v2, sastore_t*v3, sastore_t*v4, sastore_t*v5) { + sastore_t*t; if(ISAd[*v2] > ISAd[*v3]) { SWAP(v2, v3); } if(ISAd[*v4] > ISAd[*v5]) { SWAP(v4, v5); } if(ISAd[*v2] > ISAd[*v4]) { SWAP(v2, v4); SWAP(v3, v5); } @@ -152,9 +152,9 @@ tr_median5(const saidx_t *ISAd, /* Returns the pivot element. */ static INLINE -saidx_t * -tr_pivot(const saidx_t *ISAd, saidx_t *first, saidx_t *last) { - saidx_t *middle; +sastore_t* +tr_pivot(const sastore_t*ISAd, sastore_t*first, sastore_t*last) { + sastore_t*middle; saidx_t t; t = (saidx_t)(last - first); @@ -208,10 +208,10 @@ trbudget_check(trbudget_t *budget, saidx_t size) { static INLINE void -tr_partition(const saidx_t *ISAd, - saidx_t *first, saidx_t *middle, saidx_t *last, - saidx_t **pa, saidx_t **pb, saidx_t v) { - saidx_t *a, *b, *c, *d, *e, *f; +tr_partition(const sastore_t*ISAd, + sastore_t*first, sastore_t*middle, sastore_t*last, + sastore_t**pa, sastore_t**pb, saidx_t v) { + sastore_t*a, *b, *c, *d, *e, *f; saidx_t t, s; saidx_t x = 0; @@ -250,12 +250,12 @@ tr_partition(const saidx_t *ISAd, static void -tr_copy(saidx_t *ISA, const saidx_t *SA, - saidx_t *first, saidx_t *a, saidx_t *b, saidx_t *last, +tr_copy(sastore_t*ISA, const sastore_t* SA, + sastore_t*first, sastore_t*a, sastore_t*b, sastore_t*last, saidx_t depth) { /* sort suffixes of middle partition by using sorted order of suffixes of left and right partition. */ - saidx_t *c, *d, *e; + sastore_t*c, *d, *e; saidx_t s, v; v = (saidx_t)(b - SA - 1); @@ -275,10 +275,10 @@ tr_copy(saidx_t *ISA, const saidx_t *SA, static void -tr_partialcopy(saidx_t *ISA, const saidx_t *SA, - saidx_t *first, saidx_t *a, saidx_t *b, saidx_t *last, +tr_partialcopy(sastore_t*ISA, const sastore_t* SA, + sastore_t*first, sastore_t*a, sastore_t*b, sastore_t*last, saidx_t depth) { - saidx_t *c, *d, *e; + sastore_t *c, *d, *e; saidx_t s, v; saidx_t rank, lastrank, newrank = -1; @@ -313,12 +313,12 @@ tr_partialcopy(saidx_t *ISA, const saidx_t *SA, static void -tr_introsort(saidx_t *ISA, const saidx_t *ISAd, - saidx_t *SA, saidx_t *first, saidx_t *last, +tr_introsort(sastore_t*ISA, const sastore_t*ISAd, + sastore_t* SA, sastore_t*first, sastore_t*last, trbudget_t *budget) { #define STACK_SIZE TR_STACKSIZE - struct { const saidx_t *a; saidx_t *b, *c; saint_t d, e; }stack[STACK_SIZE]; - saidx_t *a, *b, *c; + struct { const sastore_t*a; sastore_t*b, *c; saint_t d, e; }stack[STACK_SIZE]; + sastore_t*a, *b, *c; saidx_t t; saidx_t v, x = 0; saidx_t incr = (saidx_t)(ISAd - ISA); @@ -541,9 +541,9 @@ tr_introsort(saidx_t *ISA, const saidx_t *ISAd, /* Tandem repeat sort */ void -trsort(saidx_t *ISA, saidx_t *SA, saidx_t n, saidx_t depth) { - saidx_t *ISAd; - saidx_t *first, *last; +trsort(sastore_t *ISA, sastore_t* SA, saidx_t n, saidx_t depth) { + sastore_t*ISAd; + sastore_t*first, *last; trbudget_t budget; saidx_t t, skip, unsorted; diff --git a/libHDiffPatch/HDiff/private_diff/suffix_string.cpp b/libHDiffPatch/HDiff/private_diff/suffix_string.cpp index 70701bc0..0bb2fa79 100644 --- a/libHDiffPatch/HDiff/private_diff/suffix_string.cpp +++ b/libHDiffPatch/HDiff/private_diff/suffix_string.cpp @@ -112,7 +112,7 @@ namespace { template static void _suffixString_create(const TChar* src,const TChar* src_end, std::vector& out_sstring,size_t threadNum){ - TSAInt size=(TSAInt)(src_end-src); + size_t size=(size_t)(src_end-src); if (size<0) throw std::runtime_error("suffixString_create() error."); out_sstring.resize(size); @@ -136,7 +136,7 @@ namespace { if (sizeof(TSAInt)==8) rt=divsufsort64(src,(saidx64_t*)&out_sstring[0],(saidx64_t)size,threadNum); else if (sizeof(TSAInt)==4) - rt=divsufsort(src,(saidx_t*)&out_sstring[0],(saidx_t)size,threadNum); + rt=divsufsort(src,(saidx32_t*)&out_sstring[0],(saidx32_t)size,threadNum); #endif if (rt!=0) throw std::runtime_error("suffixString_create() error."); From 3496e02a6af57d9ddf934c48b6df2bcc8b1af3c8 Mon Sep 17 00:00:00 2001 From: sisong Date: Fri, 7 Oct 2022 17:00:08 +0800 Subject: [PATCH 15/20] add parallel build big cache for TSuffixString; --- .../libdivsufsort/divsufsort.c.inc.h | 2 + .../limit_mem_diff/digest_matcher.cpp | 29 ++++----- .../HDiff/private_diff/suffix_string.cpp | 65 ++++++++++++++----- .../HDiff/private_diff/suffix_string.h | 8 +-- 4 files changed, 70 insertions(+), 34 deletions(-) diff --git a/libHDiffPatch/HDiff/private_diff/libdivsufsort/divsufsort.c.inc.h b/libHDiffPatch/HDiff/private_diff/libdivsufsort/divsufsort.c.inc.h index 5bc5305b..a6e65c19 100644 --- a/libHDiffPatch/HDiff/private_diff/libdivsufsort/divsufsort.c.inc.h +++ b/libHDiffPatch/HDiff/private_diff/libdivsufsort/divsufsort.c.inc.h @@ -26,7 +26,9 @@ #include "divsufsort_private.h" #include "../../../../libParallel/parallel_channel.h" +#if (_IS_USED_MULTITHREAD) #include +#endif #include /*- Private Functions -*/ diff --git a/libHDiffPatch/HDiff/private_diff/limit_mem_diff/digest_matcher.cpp b/libHDiffPatch/HDiff/private_diff/limit_mem_diff/digest_matcher.cpp index 94a55276..00a899e4 100644 --- a/libHDiffPatch/HDiff/private_diff/limit_mem_diff/digest_matcher.cpp +++ b/libHDiffPatch/HDiff/private_diff/limit_mem_diff/digest_matcher.cpp @@ -225,24 +225,23 @@ struct TIndex_comp{ } }; - - -static inline void _filter_insert(TBloomFilter* filter,const adler_uint_t* begin,const adler_uint_t* end){ +template +static void _filter_insert(TBloomFilter* filter,const adler_uint_t* begin,const adler_uint_t* end){ while (begin!=end){ - filter->insert(adler_to_hash(*begin++)); - } -} + adler_hash_t h=adler_to_hash(*begin++); #if (_IS_USED_MULTITHREAD) -static void _filter_insert_MT(TBloomFilter* filter,const adler_uint_t* begin,const adler_uint_t* end){ - while (begin!=end){ - filter->insert_MT(adler_to_hash(*begin++)); + if (isMT) + filter->insert_MT(h); + else +#endif + filter->insert(h); } } -#endif -static void filter_insert_parallel(TBloomFilter& filter,const adler_uint_t* begin,const adler_uint_t* end, - size_t threadNum,size_t kMinParallelSize=4096){ +static void filter_insert_parallel(TBloomFilter& filter,const adler_uint_t* begin, + const adler_uint_t* end,size_t threadNum){ #if (_IS_USED_MULTITHREAD) + const size_t kMinParallelSize=4096; const size_t size=end-begin; if ((threadNum>1)&&(size>=kMinParallelSize)) { const size_t maxThreanNum=size/(kMinParallelSize/2); @@ -252,14 +251,14 @@ static void filter_insert_parallel(TBloomFilter& filter,const adle const size_t threadCount=threadNum-1; std::vector threads(threadCount); for (size_t i=0;i,&filter,begin,begin+step); + _filter_insert(&filter,begin,end); for (size_t i=0;i(&filter,begin,end); } } diff --git a/libHDiffPatch/HDiff/private_diff/suffix_string.cpp b/libHDiffPatch/HDiff/private_diff/suffix_string.cpp index 0bb2fa79..69205e23 100644 --- a/libHDiffPatch/HDiff/private_diff/suffix_string.cpp +++ b/libHDiffPatch/HDiff/private_diff/suffix_string.cpp @@ -30,6 +30,10 @@ #include #include //memset #include //std::runtime_error +#include "../../../libParallel/parallel_import.h" +#if (_IS_USED_MULTITHREAD) +#include //if used vc++, need >= vc2012 +#endif //排序方法选择. #ifndef _SA_SORTBY #define _SA_SORTBY @@ -280,7 +284,7 @@ void TSuffixString::resetSuffixString(const TChar* src_begin,const TChar* src_en _clearVector(m_SA_large); _suffixString_create(m_src_begin,m_src_end,m_SA_limit,threadNum); } - build_cache(); + build_cache(threadNum); } #define _cached2(ix) (TChar*)m_cached_SA_begin+(isLarge? \ @@ -326,10 +330,10 @@ void TSuffixString::clear_cache(){ m_lower_bound=(t_lower_bound_func)_lower_bound_TInt32;//safe } -void TSuffixString::build_cache(){ +void TSuffixString::build_cache(size_t threadNum){ clear_cache(); #if (_SSTRING_FAST_MATCH>0) - if (m_isUsedFastMatch) m_fastMatch.buildMatchCache(m_src_begin,m_src_end); + if (m_isUsedFastMatch) m_fastMatch.buildMatchCache(m_src_begin,m_src_end,threadNum); #endif const size_t kUsedCacheMinSASize =2*(1<<20); //当字符串较大时再启用大缓存表. if (SASize()>kUsedCacheMinSASize){ @@ -363,21 +367,52 @@ void TSuffixString::build_cache(){ #if (_SSTRING_FAST_MATCH>0) - void TFastMatchForSString::buildMatchCache(const TChar* src_begin,const TChar* src_end){ + + template + static void _filter_insert(TBloomFilter* filter, + const TChar* src_begin,const TChar* src_end){ + const TChar* cur = src_begin; + TFastMatchForSString::THash h=TFastMatchForSString::getHash(cur); + cur+=TFastMatchForSString::kFMMinStrSize; + do { + #if (_IS_USED_MULTITHREAD) + if (isMT) + filter->insert_MT(h); + else + #endif + filter->insert(h); + if (cur=kFMMinStrSize){ - bf.init(srcSize-(kFMMinStrSize-1),kFMZoom); //alloc large memory - const TChar* cur = src_begin; - THash h = getHash(cur); - cur += kFMMinStrSize; - do { - bf.insert(h); //random write slow - if (cur1)&&(rollSize>=kMinParallelSize)) { + const size_t maxThreanNum=rollSize/(kMinParallelSize/2); + threadNum=(threadNum<=maxThreanNum)?threadNum:maxThreanNum; + + const size_t step=rollSize/threadNum; + const size_t threadCount=threadNum-1; + std::vector threads(threadCount); + for (size_t i=0;i,&bf,src_begin,src_begin+step+(kFMMinStrSize-1)); + _filter_insert(&bf,src_begin,src_end); + for (size_t i=0;i(&bf,src_begin,src_end); + } }else if ((srcSize>0)||(src_begin!=0)) bf.init(0,kFMZoom); else{ diff --git a/libHDiffPatch/HDiff/private_diff/suffix_string.h b/libHDiffPatch/HDiff/private_diff/suffix_string.h index b5677753..92a7dd51 100644 --- a/libHDiffPatch/HDiff/private_diff/suffix_string.h +++ b/libHDiffPatch/HDiff/private_diff/suffix_string.h @@ -63,14 +63,14 @@ class TFastMatchForSString{ inline TFastMatchForSString(){} inline void clear(){ bf.clear(); } - void buildMatchCache(const TChar* src_begin,const TChar* src_end); + void buildMatchCache(const TChar* src_begin,const TChar* src_end,size_t threadNum); static inline THash getHash(const TChar* datas) { return fast_adler32_start(datas,kFMMinStrSize); } + static inline THash rollHash(THash h,const TChar* cur) { return fast_adler32_roll(h,kFMMinStrSize,cur[-kFMMinStrSize],cur[0]); } inline bool isHit(THash h) const { return bf.is_hit(h); } private: - TBloomFilter bf; - static inline THash rollHash(THash h,const TChar* cur) { return fast_adler32_roll(h,kFMMinStrSize,cur[-kFMMinStrSize],cur[0]); } + TBloomFilter bf; }; #endif @@ -122,7 +122,7 @@ class TSuffixString{ const TChar* src_begin,const TChar* src_end, const void* SA_begin,size_t min_eq); t_lower_bound_func m_lower_bound; - void build_cache(); + void build_cache(size_t threadNum); void clear_cache(); }; From 77238a9a3ed37c942a0529127a9b86d3b268e17c Mon Sep 17 00:00:00 2001 From: sisong Date: Fri, 7 Oct 2022 21:14:43 +0800 Subject: [PATCH 16/20] add parallel search cover for diff in mem; --- libHDiffPatch/HDiff/diff.cpp | 286 +++++++++++------- .../limit_mem_diff/digest_matcher.cpp | 51 ++-- .../limit_mem_diff/digest_matcher.h | 3 +- .../HDiff/private_diff/suffix_string.cpp | 10 +- 4 files changed, 219 insertions(+), 131 deletions(-) diff --git a/libHDiffPatch/HDiff/diff.cpp b/libHDiffPatch/HDiff/diff.cpp index 0526487f..1bdcb016 100644 --- a/libHDiffPatch/HDiff/diff.cpp +++ b/libHDiffPatch/HDiff/diff.cpp @@ -41,6 +41,11 @@ #include "private_diff/limit_mem_diff/covers.h" #include "private_diff/limit_mem_diff/digest_matcher.h" #include "private_diff/limit_mem_diff/stream_serialize.h" +#include "../../libParallel/parallel_import.h" +#if (_IS_USED_MULTITHREAD) +#include //if used vc++, need >= vc2012 +#include +#endif using namespace hdiff_private; static const char* kHDiffVersionType ="HDIFF13"; @@ -96,7 +101,6 @@ struct TDiffData{ const TByte* newData_end; const TByte* oldData; const TByte* oldData_end; - std::vector covers; //选出的覆盖线. }; @@ -263,13 +267,14 @@ static void tryCollinear(TOldCover& lastCover,const TOldCover& matchCover,const //寻找合适的覆盖线. -static void search_cover(std::vector& covers,const TDiffData& diff, - const TSuffixString& sstring,TDiffLimit* diffLimit=0){ +static void _search_cover(std::vector& covers,const TDiffData& diff, + const TSuffixString& sstring,TDiffLimit* diffLimit=0){ if (sstring.SASize()<=0) return; TInt newPos=diffLimit?diffLimit->newPos:0; const TInt newEnd=diffLimit?diffLimit->newEnd:(diff.newData_end-diff.newData); if (newEnd-newPos<=kMinMatchLen) return; const TInt maxSearchNewPos=newEnd-kMinMatchLen; + const size_t cover_begin=covers.size(); TOldCover lastCover(0,0,0); while (newPos<=maxSearchNewPos) { @@ -286,12 +291,12 @@ static void search_cover(std::vector& covers,const TDiffData& diff, }//else matched if (tryLinkExtend(lastCover,matchCover,diff,diffLimit)){//use link - if (covers.empty()) + if (covers.size()==cover_begin) covers.push_back(lastCover); else covers.back()=lastCover; }else{ //use match - if (!covers.empty())//尝试共线; + if (covers.size()>cover_begin)//尝试共线; tryCollinear(covers.back(),matchCover,diff,diffLimit); covers.push_back(matchCover); } @@ -302,20 +307,20 @@ static void search_cover(std::vector& covers,const TDiffData& diff, //选择合适的覆盖线,去掉不合适的. -static void _select_cover(std::vector& covers,const TDiffData& diff,int kMinSingleMatchScore, +static void _select_cover(std::vector& covers,size_t cover_begin,const TDiffData& diff,int kMinSingleMatchScore, TCompressDetect& nocover_detect,TCompressDetect& cover_detect,TDiffLimit* diffLimit){ TOldCover lastCover(0,0,0); if (diffLimit) lastCover=diffLimit->lastCover_back; - const TInt coverSize_old=(TInt)covers.size(); - TInt insertIndex=0; - for (TInt i=0;i0)&&(covers[insertIndex-1].isCanLink(covers[i]))){ + if ((insertIndex>cover_begin)&&(covers[insertIndex-1].isCanLink(covers[i]))){ if (diffLimit){ const TOldCover& fc=covers[insertIndex-1]; hpatch_TCover cover={(size_t)(fc.oldPos+fc.length),(size_t)(fc.newPos+fc.length), @@ -331,7 +336,7 @@ static void _select_cover(std::vector& covers,const TDiffData& diff,i } } if (i+1& covers,const TDiffData& diff,i covers.resize(insertIndex); } -static void select_cover(std::vector& covers,const TDiffData& diff, +static void select_cover(std::vector& covers,size_t cover_begin,const TDiffData& diff, int kMinSingleMatchScore,TDiffLimit* diffLimit=0){ if (diffLimit==0){ TCompressDetect nocover_detect; TCompressDetect cover_detect; - _select_cover(covers,diff,kMinSingleMatchScore,nocover_detect,cover_detect,0); + _select_cover(covers,cover_begin,diff,kMinSingleMatchScore,nocover_detect,cover_detect,0); }else{ - _select_cover(covers,diff,kMinSingleMatchScore,diffLimit->nocover_detect,diffLimit->cover_detect,diffLimit); + _select_cover(covers,cover_begin,diff,kMinSingleMatchScore,diffLimit->nocover_detect,diffLimit->cover_detect,diffLimit); } } @@ -423,12 +428,12 @@ static void select_cover(std::vector& covers,const TDiffData& diff, } //尝试延长覆盖区域. -static void extend_cover(std::vector& covers,const TDiffData& diff, +static void extend_cover(std::vector& covers,size_t cover_begin,const TDiffData& diff, const TFixedFloatSmooth kExtendMinSameRatio,TDiffLimit* diffLimit=0){ TInt lastNewEnd=diffLimit?diffLimit->newPos:0; - for (TInt i=0; i<(TInt)covers.size(); ++i) { + for (size_t i=cover_begin; inewEnd:(TInt)(diff.newData_end-diff.newData); @@ -503,8 +508,8 @@ static void extend_cover(std::vector& covers,const TDiffData& diff, } //diff结果序列化输出. -static void serialize_diff(const TDiffData& diff,std::vector& out_diff){ - const TUInt coverCount=(TUInt)diff.covers.size(); +static void serialize_diff(const TDiffData& diff,const std::vector& covers,std::vector& out_diff){ + const TUInt coverCount=(TUInt)covers.size(); std::vector length_buf; std::vector inc_newPos_buf; std::vector inc_oldPos_buf; @@ -512,21 +517,21 @@ static void serialize_diff(const TDiffData& diff,std::vector& out_diff){ TInt oldPosBack=0; TInt lastNewEnd=0; for (TUInt i=0; i=lastNewEnd); - packUInt(inc_newPos_buf,(TUInt)(diff.covers[i].newPos-lastNewEnd)); //save inc_newPos - if (diff.covers[i].oldPos>=oldPosBack){ //save inc_oldPos - packUIntWithTag(inc_oldPos_buf,(TUInt)(diff.covers[i].oldPos-oldPosBack), 0, 1); + packUInt(length_buf, (TUInt)covers[i].length); + assert(covers[i].newPos>=lastNewEnd); + packUInt(inc_newPos_buf,(TUInt)(covers[i].newPos-lastNewEnd)); //save inc_newPos + if (covers[i].oldPos>=oldPosBack){ //save inc_oldPos + packUIntWithTag(inc_oldPos_buf,(TUInt)(covers[i].oldPos-oldPosBack), 0, 1); }else{ - packUIntWithTag(inc_oldPos_buf,(TUInt)(oldPosBack-diff.covers[i].oldPos), 1, 1);//sub safe + packUIntWithTag(inc_oldPos_buf,(TUInt)(oldPosBack-covers[i].oldPos), 1, 1);//sub safe } - oldPosBack=diff.covers[i].oldPos; - lastNewEnd=diff.covers[i].newPos+diff.covers[i].length; + oldPosBack=covers[i].oldPos; + lastNewEnd=covers[i].newPos+covers[i].length; } } - const TCovers _covers((void*)diff.covers.data(),diff.covers.size(), - sizeof(*diff.covers.data())==sizeof(hpatch_TCover32)); + const TCovers _covers((void*)covers.data(),covers.size(), + sizeof(*covers.data())==sizeof(hpatch_TCover32)); hpatch_TStreamInput _newDataStream; mem_as_hStreamInput(&_newDataStream,diff.newData,diff.newData_end); TNewDataDiffStream newDataDiffStream(_covers,&_newDataStream); @@ -581,32 +586,32 @@ static void serialize_diff(const TDiffData& diff,std::vector& out_diff){ pushBack(out_data,&_cstrEndTag,(&_cstrEndTag)+1); } -static void serialize_compressed_diff(const TDiffData& diff,std::vector& out_diff, +static void serialize_compressed_diff(const TDiffData& diff,std::vector& covers,std::vector& out_diff, const hdiff_TCompress* compressPlugin){ - const TUInt coverCount=(TUInt)diff.covers.size(); + const TUInt coverCount=(TUInt)covers.size(); std::vector cover_buf; { TInt lastOldEnd=0; TInt lastNewEnd=0; for (TUInt i=0; i=lastOldEnd){ //save inc_oldPos - packUIntWithTag(cover_buf,(TUInt)(diff.covers[i].oldPos-lastOldEnd), 0, 1); + if (covers[i].oldPos>=lastOldEnd){ //save inc_oldPos + packUIntWithTag(cover_buf,(TUInt)(covers[i].oldPos-lastOldEnd), 0, 1); }else{ - packUIntWithTag(cover_buf,(TUInt)(lastOldEnd-diff.covers[i].oldPos), 1, 1);//sub safe + packUIntWithTag(cover_buf,(TUInt)(lastOldEnd-covers[i].oldPos), 1, 1);//sub safe } - assert(diff.covers[i].newPos>=lastNewEnd); - packUInt(cover_buf,(TUInt)(diff.covers[i].newPos-lastNewEnd)); //save inc_newPos - packUInt(cover_buf,(TUInt)diff.covers[i].length); - lastOldEnd=diff.covers[i].oldPos+diff.covers[i].length;//! +length - lastNewEnd=diff.covers[i].newPos+diff.covers[i].length; + assert(covers[i].newPos>=lastNewEnd); + packUInt(cover_buf,(TUInt)(covers[i].newPos-lastNewEnd)); //save inc_newPos + packUInt(cover_buf,(TUInt)covers[i].length); + lastOldEnd=covers[i].oldPos+covers[i].length;//! +length + lastNewEnd=covers[i].newPos+covers[i].length; } } std::vector rle_ctrlBuf; std::vector rle_codeBuf; { - const TCovers _covers((void*)diff.covers.data(),diff.covers.size(), - sizeof(*diff.covers.data())==sizeof(hpatch_TCover32)); + const TCovers _covers((void*)covers.data(),covers.size(), + sizeof(*covers.data())==sizeof(hpatch_TCover32)); TNewDataSubDiffStream_mem newDataSubDiff(diff.newData,diff.newData_end, diff.oldData,diff.oldData_end,_covers); bytesRLE_save(rle_ctrlBuf,rle_codeBuf,&newDataSubDiff,kRle_bestSize); @@ -620,8 +625,8 @@ static void serialize_compressed_diff(const TDiffData& diff,std::vector& do_compress(compress_rle_ctrlBuf,rle_ctrlBuf,compressPlugin); do_compress(compress_rle_codeBuf,rle_codeBuf,compressPlugin); - const TCovers _covers((void*)diff.covers.data(),diff.covers.size(), - sizeof(*diff.covers.data())==sizeof(hpatch_TCover32)); + const TCovers _covers((void*)covers.data(),covers.size(), + sizeof(*covers.data())==sizeof(hpatch_TCover32)); hpatch_TStreamInput _newDataStream; mem_as_hStreamInput(&_newDataStream,diff.newData,diff.newData_end); TNewDataDiffStream newDataDiffStream(_covers,&_newDataStream); @@ -649,27 +654,97 @@ static void serialize_compressed_diff(const TDiffData& diff,std::vector& } -static void dispose_cover(std::vector& covers,const TDiffData& diff, - int kMinSingleMatchScore,TDiffLimit* diffLimit=0){ +static void _dispose_cover(std::vector& covers,size_t cover_begin,const TDiffData& diff, + int kMinSingleMatchScore,TDiffLimit* diffLimit=0){ TFixedFloatSmooth kExtendMinSameRatio=kMinSingleMatchScore*36+254; if (kExtendMinSameRatio<200) kExtendMinSameRatio=200; if (kExtendMinSameRatio>800) kExtendMinSameRatio=800; - extend_cover(covers,diff,kExtendMinSameRatio,diffLimit);//先尝试扩展. - select_cover(covers,diff,kMinSingleMatchScore,diffLimit); - extend_cover(covers,diff,kExtendMinSameRatio,diffLimit);//select_cover会删除一些覆盖线,所以重新扩展. + extend_cover(covers,cover_begin,diff,kExtendMinSameRatio,diffLimit);//先尝试扩展. + select_cover(covers,cover_begin,diff,kMinSingleMatchScore,diffLimit); + extend_cover(covers,cover_begin,diff,kExtendMinSameRatio,diffLimit);//select_cover会删除一些覆盖线,所以重新扩展. +} + + +static void search_and_dispose_cover(std::vector& covers,const TDiffData& diff, + const TSuffixString& sstring,int kMinSingleMatchScore, + TDiffLimit* diffLimit=0){ + const size_t cover_begin=covers.size(); + _search_cover(covers,diff,sstring,diffLimit); + if (covers.size()>cover_begin) + _dispose_cover(covers,cover_begin,diff,kMinSingleMatchScore,diffLimit); +} + +#if (_IS_USED_MULTITHREAD) + static void _search_and_dispose_cover_MT(std::vector* _covers,const TDiffData* _diff, + const TSuffixString* sstring,int kMinSingleMatchScore, + size_t workCount,size_t* pworkIndex){ + const size_t kPartPepeatSize=1024; + std::vector& covers=*_covers; + const TDiffData& diff=*_diff; + std::atomic& workIndex=*(std::atomic*)pworkIndex; + const size_t newSize=diff.newData_end-diff.newData; + while (true){ + size_t curWorkIndex=workIndex++; + if (curWorkIndex>=workCount) break; + size_t new_begin=(size_t)(newSize*(hpatch_uint64_t)curWorkIndex/workCount); + size_t new_end=((curWorkIndex+1& covers,const TDiffData& diff, + const TSuffixString& sstring,int kMinSingleMatchScore, + TDiffLimit* diffLimit=0,size_t threadNum=1){ +double t0=clock_s(); +#if (_IS_USED_MULTITHREAD) + const size_t kMinParallelSize=1024*64; + const size_t kBestParallelSize=1024*1024*16; + size_t newSize=diff.newData_end-diff.newData; + if ((threadNum>1)&&(diffLimit==0)&&(diff.oldData!=diff.oldData_end)&&(newSize>=kMinParallelSize)){ + const size_t maxThreanNum=newSize/(kMinParallelSize/2); + threadNum=(threadNum<=maxThreanNum)?threadNum:maxThreanNum; + size_t workCount=(newSize+kBestParallelSize-1)/kBestParallelSize; + workCount=(threadNum>workCount)?threadNum:workCount; + + size_t workIndex=0; + const size_t threadCount=threadNum-1; + std::vector threads(threadCount); + std::vector > threadCovers(threadCount); + for (size_t i=0;i tmp; tmp.swap(threadCovers[i]); } + } + tm_collate_covers(covers); + }else +#endif + { + search_and_dispose_cover(covers,diff,sstring,kMinSingleMatchScore,diffLimit); + } + double t1=clock_s(); + printf("search_and_dispose_cover time:%3.3f s\n",t1-t0); } static const hpatch_StreamPos_t _kNullCoverHitEndPos =~(hpatch_StreamPos_t)0; struct TDiffResearchCover:public IDiffResearchCover{ - TDiffResearchCover(TDiffData& diff_,const TSuffixString& sstring_,int kMinSingleMatchScore_) - :diff(diff_),sstring(sstring_),kMinSingleMatchScore(kMinSingleMatchScore_), + TDiffResearchCover(TDiffData& diff_,std::vector& covers_,const TSuffixString& sstring_,int kMinSingleMatchScore_) + :diff(diff_), covers(covers_),sstring(sstring_),kMinSingleMatchScore(kMinSingleMatchScore_), limitCoverIndex_back(~(size_t)0),limitCoverHitEndPos_back(_kNullCoverHitEndPos){ researchCover=_researchCover; } void _researchRange(TDiffLimit* diffLimit){ - search_cover(curCovers,diff,sstring,diffLimit); - if (curCovers.empty()) return; - dispose_cover(curCovers,diff,kMinSingleMatchScore,diffLimit); + search_and_dispose_cover(curCovers,diff,sstring,kMinSingleMatchScore,diffLimit); if (curCovers.empty()) return; reCovers.insert(reCovers.end(),curCovers.begin(),curCovers.end()); curCovers.clear(); @@ -677,7 +752,7 @@ struct TDiffResearchCover:public IDiffResearchCover{ inline void endResearchCover(){ if (limitCoverHitEndPos_back!=_kNullCoverHitEndPos){ - TOldCover& cover=diff.covers[limitCoverIndex_back]; + TOldCover& cover=covers[limitCoverIndex_back]; cover.oldPos+=(TInt)limitCoverHitEndPos_back; cover.newPos+=(TInt)limitCoverHitEndPos_back; cover.length-=(TInt)limitCoverHitEndPos_back; @@ -691,7 +766,7 @@ struct TDiffResearchCover:public IDiffResearchCover{ limitCoverIndex_back=limitCoverIndex; limitCoverHitEndPos_back=hitPos+hitLen; - const TOldCover& cover=diff.covers[limitCoverIndex]; + const TOldCover& cover=covers[limitCoverIndex]; TOldCover lastCover_back(0,0,0); if (endPosBack0) - lastCover_back=diff.covers[limitCoverIndex-1]; + lastCover_back=covers[limitCoverIndex-1]; if ((!reCovers.empty())&&(reCovers.back().newPos>lastCover_back.newPos)) lastCover_back=reCovers.back(); } @@ -721,17 +796,18 @@ struct TDiffResearchCover:public IDiffResearchCover{ void researchFinish(){ endResearchCover(); size_t insert=0; - for (size_t i=0;i0) - diff.covers[insert++]=diff.covers[i]; + for (size_t i=0;i0) + covers[insert++]=covers[i]; } - diff.covers.resize(insert); - diff.covers.insert(diff.covers.end(),reCovers.begin(),reCovers.end()); - std::inplace_merge(diff.covers.begin(),diff.covers.begin()+insert, - diff.covers.end(),cover_cmp_by_new_t()); + covers.resize(insert); + covers.insert(covers.end(),reCovers.begin(),reCovers.end()); + std::inplace_merge(covers.begin(),covers.begin()+insert, + covers.end(),cover_cmp_by_new_t()); } TDiffData& diff; + std::vector& covers; const TSuffixString& sstring; int kMinSingleMatchScore; std::vector reCovers; @@ -777,7 +853,8 @@ struct TDiffInsertCover:public IDiffInsertCover{ static void get_diff(const TByte* newData,const TByte* newData_end, const TByte* oldData,const TByte* oldData_end, - TDiffData& out_diff,int kMinSingleMatchScore, + TDiffData& out_diff,std::vector& covers, + int kMinSingleMatchScore, bool isUseBigCacheMatch,ICoverLinesListener* listener, const TSuffixString* sstring,size_t threadNum){ assert(newData<=newData_end); @@ -788,50 +865,49 @@ static void get_diff(const TByte* newData,const TByte* newData_end, diff.oldData=oldData; diff.oldData_end=oldData_end; - const bool isCover32=sizeof(*diff.covers.data())==sizeof(hpatch_TCover32); + const bool isCover32=sizeof(*covers.data())==sizeof(hpatch_TCover32); if (!isCover32) - assert(sizeof(*diff.covers.data())==sizeof(hpatch_TCover)); + assert(sizeof(*covers.data())==sizeof(hpatch_TCover)); { TSuffixString _sstring_default(isUseBigCacheMatch); if (sstring==0){ _sstring_default.resetSuffixString(oldData,oldData_end,threadNum); sstring=&_sstring_default; } - search_cover(diff.covers,diff,*sstring); - dispose_cover(diff.covers,diff,kMinSingleMatchScore); - assert_covers_safe(diff.covers,diff.newData_end-diff.newData,diff.oldData_end-diff.oldData); + search_and_dispose_cover_MT(covers,diff,*sstring,kMinSingleMatchScore,0,threadNum); + assert_covers_safe(covers,diff.newData_end-diff.newData,diff.oldData_end-diff.oldData); if (listener&&listener->search_cover_limit&& - listener->search_cover_limit(listener,diff.covers.data(),diff.covers.size(),isCover32)){ - TDiffResearchCover diffResearchCover(diff,*sstring,kMinSingleMatchScore); - listener->research_cover(listener,&diffResearchCover,diff.covers.data(),diff.covers.size(),isCover32); + listener->search_cover_limit(listener,covers.data(),covers.size(),isCover32)){ + TDiffResearchCover diffResearchCover(diff,covers,*sstring,kMinSingleMatchScore); + listener->research_cover(listener,&diffResearchCover,covers.data(),covers.size(),isCover32); diffResearchCover.researchFinish(); } sstring=0; _sstring_default.clear(); } if (listener&&listener->insert_cover){ - TDiffInsertCover diffInsertCover(diff.covers); + TDiffInsertCover diffInsertCover(covers); hpatch_StreamPos_t newDataSize=(size_t)(diff.newData_end-diff.newData); hpatch_StreamPos_t oldDataSize=(size_t)(diff.oldData_end-diff.oldData); - listener->insert_cover(listener,&diffInsertCover,diff.covers.data(),diff.covers.size(),isCover32, + listener->insert_cover(listener,&diffInsertCover,covers.data(),covers.size(),isCover32, &newDataSize,&oldDataSize); diff.newData_end=diff.newData+(size_t)newDataSize; diff.oldData_end=diff.oldData+(size_t)oldDataSize; - assert_covers_safe(diff.covers,diff.newData_end-diff.newData,diff.oldData_end-diff.oldData); + assert_covers_safe(covers,diff.newData_end-diff.newData,diff.oldData_end-diff.oldData); } if (listener&&listener->search_cover_finish){ hpatch_StreamPos_t newDataSize=(size_t)(diff.newData_end-diff.newData); hpatch_StreamPos_t oldDataSize=(size_t)(diff.oldData_end-diff.oldData); - size_t newCoverCount=diff.covers.size(); - listener->search_cover_finish(listener,diff.covers.data(),&newCoverCount,isCover32, + size_t newCoverCount=covers.size(); + listener->search_cover_finish(listener,covers.data(),&newCoverCount,isCover32, &newDataSize,&oldDataSize); - check(newCoverCount<=diff.covers.size()); - diff.covers.resize(newCoverCount); + check(newCoverCount<=covers.size()); + covers.resize(newCoverCount); diff.newData_end=diff.newData+(size_t)newDataSize; diff.oldData_end=diff.oldData+(size_t)oldDataSize; } if (listener){ - assert_covers_safe(diff.covers,diff.newData_end-diff.newData,diff.oldData_end-diff.oldData); + assert_covers_safe(covers,diff.newData_end-diff.newData,diff.oldData_end-diff.oldData); } } @@ -843,9 +919,10 @@ void create_diff(const TByte* newData,const TByte* newData_end, std::vector& out_diff, int kMinSingleMatchScore,bool isUseBigCacheMatch,size_t threadNum){ TDiffData diff; - get_diff(newData,newData_end,oldData,oldData_end,diff, + std::vector covers; + get_diff(newData,newData_end,oldData,oldData_end,diff,covers, kMinSingleMatchScore,isUseBigCacheMatch,0,0,threadNum); - serialize_diff(diff,out_diff); + serialize_diff(diff,covers,out_diff); } void create_compressed_diff(const TByte* newData,const TByte* newData_end, @@ -854,9 +931,10 @@ void create_compressed_diff(const TByte* newData,const TByte* newData_end, int kMinSingleMatchScore,bool isUseBigCacheMatch, ICoverLinesListener* listener,size_t threadNum){ TDiffData diff; - get_diff(newData,newData_end,oldData,oldData_end,diff, + std::vector covers; + get_diff(newData,newData_end,oldData,oldData_end,diff,covers, kMinSingleMatchScore,isUseBigCacheMatch,listener,0,threadNum); - serialize_compressed_diff(diff,out_diff,compressPlugin); + serialize_compressed_diff(diff,covers,out_diff,compressPlugin); } void create_compressed_diff(const TByte* newData,const TByte* newData_end, @@ -910,15 +988,16 @@ void create_single_compressed_diff(const TByte* newData,const TByte* newData_end size_t patchStepMemSize,bool isUseBigCacheMatch, ICoverLinesListener* listener,size_t threadNum){ TDiffData diff; - get_diff(newData,newData_end,oldData,oldData_end,diff, + std::vector covers; + get_diff(newData,newData_end,oldData,oldData_end,diff,covers, kMinSingleMatchScore,isUseBigCacheMatch,listener,0,threadNum); hpatch_TStreamInput _newStream; hpatch_TStreamInput _oldStream; mem_as_hStreamInput(&_newStream,diff.newData,diff.newData_end); mem_as_hStreamInput(&_oldStream,diff.oldData,diff.oldData_end); - const TCovers _covers((void*)diff.covers.data(),diff.covers.size(), - sizeof(*diff.covers.data())==sizeof(hpatch_TCover32)); + const TCovers _covers((void*)covers.data(),covers.size(), + sizeof(*covers.data())==sizeof(hpatch_TCover32)); serialize_single_compressed_diff(&_newStream,&_oldStream,false,_covers, out_diff,compressPlugin,patchStepMemSize); } @@ -1049,9 +1128,10 @@ void __hdiff_private__create_compressed_diff(const TByte* newData,const TByte* n const hdiff_TCompress* compressPlugin,int kMinSingleMatchScore, const TSuffixString* sstring){ TDiffData diff; - get_diff(newData,newData_end,oldData,oldData_end,diff, + std::vector covers; + get_diff(newData,newData_end,oldData,oldData_end,diff,covers, kMinSingleMatchScore,false,0,sstring,1); - serialize_compressed_diff(diff,out_diff,compressPlugin); + serialize_compressed_diff(diff,covers,out_diff,compressPlugin); } @@ -1080,9 +1160,12 @@ void get_match_covers_by_sstring(const unsigned char* newData,const unsigned cha bool isUseBigCacheMatch,ICoverLinesListener* listener, size_t threadNum){ TDiffData diff; - get_diff(newData,newData_end,oldData,oldData_end,diff, + std::vector covers; + assert(sizeof(TOldCover)==sizeof(hpatch_TCover_sz)); + { std::vector tmp; tmp.swap(out_covers); } + get_diff(newData,newData_end,oldData,oldData_end,diff,covers, kMinSingleMatchScore,isUseBigCacheMatch,listener,0,threadNum); - void* pcovers=&diff.covers; + void* pcovers=&covers; out_covers.swap(*(std::vector*)pcovers); } void get_match_covers_by_sstring(const unsigned char* newData,const unsigned char* newData_end, @@ -1343,9 +1426,9 @@ namespace{ subDiff[i]=pnew[i]-pold[i]; } -static void serialize_lite_diff(const TDiffData& diff,std::vector& out_diff, - const hdiffi_TCompress* compressPlugin){ - const TUInt coverCount=(TUInt)diff.covers.size(); +static void serialize_lite_diff(const TDiffData& diff,const std::vector& covers, + std::vector& out_diff,const hdiffi_TCompress* compressPlugin){ + const TUInt coverCount=(TUInt)covers.size(); std::vector subDiff; std::vector buf; hpi_packUInt(buf,coverCount); @@ -1354,7 +1437,7 @@ static void serialize_lite_diff(const TDiffData& diff,std::vector& out_di TUInt lastOldEnd=0; TUInt lastNewEnd=0; for (TUInt i=0; i covers; + get_diff(newData,newData_end,oldData,oldData_end,diff,covers, + kMinSingleMatchScore-_kMatchScore_optim4bin,isUseBigCacheMatch,0,0,threadNum); size_t oldPosEnd=0; size_t newPosEnd=0; - if (!diff.covers.empty()){ - const TOldCover& c=diff.covers.back(); + if (!covers.empty()){ + const TOldCover& c=covers.back(); oldPosEnd=c.oldPos+c.length; newPosEnd=c.newPos+c.length; } const size_t newSize=newData_end-newData; if (newPosEnd +#endif namespace hdiff_private{ static const size_t kMinTrustMatchedLength=1024*16; static const size_t kMinMatchedLength = 16; @@ -241,10 +244,10 @@ static void _filter_insert(TBloomFilter* filter,const adler_uint_t static void filter_insert_parallel(TBloomFilter& filter,const adler_uint_t* begin, const adler_uint_t* end,size_t threadNum){ #if (_IS_USED_MULTITHREAD) - const size_t kMinParallelSize=4096; + const size_t kInsertMinParallelSize=4096; const size_t size=end-begin; - if ((threadNum>1)&&(size>=kMinParallelSize)) { - const size_t maxThreanNum=size/(kMinParallelSize/2); + if ((threadNum>1)&&(size>=kInsertMinParallelSize)) { + const size_t maxThreanNum=size/(kInsertMinParallelSize/2); threadNum=(threadNum<=maxThreanNum)?threadNum:maxThreanNum; const size_t step=size/threadNum; @@ -663,15 +666,6 @@ static void tm_search_cover(const adler_uint_t* blocksBase, tm_search_cover(m_blocks.data(),indexs.data(),indexs.data()+indexs.size(), \ oldStream,newStream,m_filter,out_covers,coverNewOffset,coversLocker) -struct mt_data_t{ - CHLocker oldDataLocker; - CHLocker newDataLocker; - CHLocker coversLocker; - hpatch_StreamPos_t rollCount; - hpatch_StreamPos_t workCount; - volatile hpatch_StreamPos_t workIndex; -}; - void TDigestMatcher::_search_cover(const hpatch_TStreamInput* newData,hpatch_StreamPos_t newOffset, hpatch_TOutputCovers* out_covers,unsigned char* pmem, void* oldDataLocker,void* newDataLocker,void* coversLocker){ @@ -685,10 +679,20 @@ void TDigestMatcher::_search_cover(const hpatch_TStreamInput* newData,hpatch_Str __search_cover(m_sorted_limit,newOffset,coversLocker); } +#if (_IS_USED_MULTITHREAD) +struct mt_data_t{ + CHLocker oldDataLocker; + CHLocker newDataLocker; + CHLocker coversLocker; + hpatch_StreamPos_t rollCount; + hpatch_StreamPos_t workCount; + volatile hpatch_StreamPos_t workIndex; +}; +#endif + void TDigestMatcher::_search_cover_thread(hpatch_TOutputCovers* out_covers, - size_t threadIndex,size_t threadNum,void* mt_data){ + unsigned char* pmem,void* mt_data){ #if (_IS_USED_MULTITHREAD) - unsigned char* pmem=m_mem.data()+(m_newCacheSize+m_oldCacheSize)*threadIndex; mt_data_t& mt=*(mt_data_t*)mt_data; std::atomic& workIndex=*(std::atomic*)&mt.workIndex; while (true){ @@ -705,8 +709,8 @@ void TDigestMatcher::_search_cover_thread(hpatch_TOutputCovers* out_covers, } static inline void __search_cover_mt(TDigestMatcher* self,hpatch_TOutputCovers* out_covers, - size_t threadIndex,size_t threadNum,void* mt_data){ - self->_search_cover_thread(out_covers,threadIndex,threadNum,mt_data); + unsigned char* pmem,void* mt_data){ + self->_search_cover_thread(out_covers,pmem,mt_data); } void TDigestMatcher::search_cover(hpatch_TOutputCovers* out_covers){ @@ -714,20 +718,21 @@ void TDigestMatcher::search_cover(hpatch_TOutputCovers* out_covers){ if (m_newData->streamSizestreamSize-(m_kMatchBlockSize-1); - size_t bestStep=(kBestParallelSize/2>m_kMatchBlockSize)?kBestParallelSize:2*m_kMatchBlockSize; - hpatch_StreamPos_t workCount=(rollCount+bestStep-1)/bestStep; - workCount=(threadNum>workCount)?threadNum:workCount; if (threadNum>1){ + const hpatch_StreamPos_t rollCount=m_newData->streamSize-(m_kMatchBlockSize-1); + size_t bestStep=(kBestParallelSize/2>m_kMatchBlockSize)?kBestParallelSize:2*m_kMatchBlockSize; + hpatch_StreamPos_t workCount=(rollCount+bestStep-1)/bestStep; + workCount=(threadNum>workCount)?threadNum:workCount; mt_data_t mt_data; mt_data.rollCount=rollCount; mt_data.workCount=workCount; mt_data.workIndex=0; const size_t threadCount=threadNum-1; std::vector threads(threadCount); - for (size_t i=0;icollate_covers(out_covers); diff --git a/libHDiffPatch/HDiff/private_diff/limit_mem_diff/digest_matcher.h b/libHDiffPatch/HDiff/private_diff/limit_mem_diff/digest_matcher.h index 85040cee..fd119ac0 100644 --- a/libHDiffPatch/HDiff/private_diff/limit_mem_diff/digest_matcher.h +++ b/libHDiffPatch/HDiff/private_diff/limit_mem_diff/digest_matcher.h @@ -87,8 +87,7 @@ class TDigestMatcher{ hpatch_TOutputCovers* out_covers,unsigned char* pmem, void* oldDataLocker=0,void* newDataLocker=0,void* coversLocker=0); public: //private for muti-thread - void _search_cover_thread(hpatch_TOutputCovers* out_covers, - size_t threadIndex,size_t threadNum,void* mt_data); + void _search_cover_thread(hpatch_TOutputCovers* out_covers,unsigned char* pmem,void* mt_data); }; }//namespace hdiff_private diff --git a/libHDiffPatch/HDiff/private_diff/suffix_string.cpp b/libHDiffPatch/HDiff/private_diff/suffix_string.cpp index 69205e23..13c43ae5 100644 --- a/libHDiffPatch/HDiff/private_diff/suffix_string.cpp +++ b/libHDiffPatch/HDiff/private_diff/suffix_string.cpp @@ -287,7 +287,7 @@ void TSuffixString::resetSuffixString(const TChar* src_begin,const TChar* src_en build_cache(threadNum); } -#define _cached2(ix) (TChar*)m_cached_SA_begin+(isLarge? \ +#define _cached2(ix,isLarge) (TChar*)m_cached_SA_begin+(isLarge? \ ((size_t*)m_cached2char_range)[ix]*sizeof(size_t) : ((TInt32*)m_cached2char_range)[ix]*sizeof(TInt32) ) TInt TSuffixString::lower_bound(const TChar* str,const TChar* str_end)const{ @@ -305,7 +305,7 @@ TInt TSuffixString::lower_bound(const TChar* str,const TChar* str_end)const{ if ((kMinStrLen>=2)&(m_cached2char_range!=0)){ size_t cc=((size_t)str[1]) | (((size_t)str[0])<<8); const bool isLarge=isUseLargeSA(); - return m_lower_bound(_cached2(cc),_cached2(cc+1), + return m_lower_bound(_cached2(cc,isLarge),_cached2(cc+1,isLarge), str,str_end,m_src_begin,m_src_end,m_cached_SA_begin,2); }else if (kMinStrLen>0){ size_t c=str[0]; @@ -395,9 +395,9 @@ void TSuffixString::build_cache(size_t threadNum){ const size_t rollSize=srcSize-(kFMMinStrSize-1); bf.init(rollSize,kFMZoom); //alloc large memory #if (_IS_USED_MULTITHREAD) - const size_t kMinParallelSize=4096; - if ((threadNum>1)&&(rollSize>=kMinParallelSize)) { - const size_t maxThreanNum=rollSize/(kMinParallelSize/2); + const size_t kInsertMinParallelSize=4096; + if ((threadNum>1)&&(rollSize>=kInsertMinParallelSize)) { + const size_t maxThreanNum=rollSize/(kInsertMinParallelSize/2); threadNum=(threadNum<=maxThreanNum)?threadNum:maxThreanNum; const size_t step=rollSize/threadNum; From c46c93df389a7eef46327738a0f4a209e9d23260 Mon Sep 17 00:00:00 2001 From: sisong Date: Sat, 8 Oct 2022 10:58:23 +0800 Subject: [PATCH 17/20] parallel diff with -m & -s is run ok; ref #150 #96 #18 --- hdiffz.cpp | 6 ++- libHDiffPatch/HDiff/diff.cpp | 13 ++--- .../limit_mem_diff/digest_matcher.cpp | 54 +++++++++---------- .../limit_mem_diff/digest_matcher.h | 3 ++ .../HDiff/private_diff/suffix_string.h | 5 +- 5 files changed, 43 insertions(+), 38 deletions(-) diff --git a/hdiffz.cpp b/hdiffz.cpp index dcd2c7c5..7b90cb71 100644 --- a/hdiffz.cpp +++ b/hdiffz.cpp @@ -146,7 +146,11 @@ static void printUsage(){ " matchScore>=0, DEFAULT -m-6, recommended bin: 0--4 text: 4--9 etc...\n" " -s[-matchBlockSize]\n" " all file load as Stream; fast;\n" - " requires O(oldFileSize*16/matchBlockSize+matchBlockSize*5)bytes of memory;\n" + " requires O(oldFileSize*16/matchBlockSize+matchBlockSize*5" +#if (_IS_USED_MULTITHREAD) + "*parallelThreadNumber" +#endif + ")bytes of memory;\n" " matchBlockSize>=4, DEFAULT -s-64, recommended 16,32,48,1k,64k,1m etc...\n" "special options:\n" " -block[-fastMatchBlockSize] \n" diff --git a/libHDiffPatch/HDiff/diff.cpp b/libHDiffPatch/HDiff/diff.cpp index 1bdcb016..da2e4e6d 100644 --- a/libHDiffPatch/HDiff/diff.cpp +++ b/libHDiffPatch/HDiff/diff.cpp @@ -679,7 +679,7 @@ static void search_and_dispose_cover(std::vector& covers,const TDiffD static void _search_and_dispose_cover_MT(std::vector* _covers,const TDiffData* _diff, const TSuffixString* sstring,int kMinSingleMatchScore, size_t workCount,size_t* pworkIndex){ - const size_t kPartPepeatSize=1024; + const size_t kPartPepeatSize=1024*2; std::vector& covers=*_covers; const TDiffData& diff=*_diff; std::atomic& workIndex=*(std::atomic*)pworkIndex; @@ -700,14 +700,13 @@ static void search_and_dispose_cover(std::vector& covers,const TDiffD } } #endif -#include "../../_clock_for_demo.h" + static void search_and_dispose_cover_MT(std::vector& covers,const TDiffData& diff, const TSuffixString& sstring,int kMinSingleMatchScore, - TDiffLimit* diffLimit=0,size_t threadNum=1){ -double t0=clock_s(); + TDiffLimit* diffLimit=0,size_t threadNum=1){ #if (_IS_USED_MULTITHREAD) - const size_t kMinParallelSize=1024*64; - const size_t kBestParallelSize=1024*1024*16; + const size_t kMinParallelSize=1024*1024*2; + const size_t kBestParallelSize=1024*1024*8; size_t newSize=diff.newData_end-diff.newData; if ((threadNum>1)&&(diffLimit==0)&&(diff.oldData!=diff.oldData_end)&&(newSize>=kMinParallelSize)){ const size_t maxThreanNum=newSize/(kMinParallelSize/2); @@ -733,8 +732,6 @@ double t0=clock_s(); { search_and_dispose_cover(covers,diff,sstring,kMinSingleMatchScore,diffLimit); } - double t1=clock_s(); - printf("search_and_dispose_cover time:%3.3f s\n",t1-t0); } static const hpatch_StreamPos_t _kNullCoverHitEndPos =~(hpatch_StreamPos_t)0; diff --git a/libHDiffPatch/HDiff/private_diff/limit_mem_diff/digest_matcher.cpp b/libHDiffPatch/HDiff/private_diff/limit_mem_diff/digest_matcher.cpp index 77b4b0b3..53cd5a18 100644 --- a/libHDiffPatch/HDiff/private_diff/limit_mem_diff/digest_matcher.cpp +++ b/libHDiffPatch/HDiff/private_diff/limit_mem_diff/digest_matcher.cpp @@ -43,18 +43,17 @@ static const size_t kMinReadSize=1024*4; //for random first read speed static const size_t kMinBackupReadSize=256; static const size_t kBestMatchRange=1024*64; static const size_t kMaxLinkIndexFindCount=64; -static const size_t kMinParallelSize=1024*64; -static const size_t kBestParallelSize=1024*1024*16; +static const size_t kMinParallelSize=1024*1024*2; +static const size_t kBestParallelSize=1024*1024*8; #define readStream(stream,pos,dst,n) { \ - if (((n)>0)&&(!(stream)->read(stream,m_streamOffset+pos,dst,dst+(n)))) \ + if (((n)>0)&&(!(stream)->read(stream,pos,dst,dst+(n)))) \ throw std::runtime_error("TStreamCache::_resetPos_continue() stream->read() error!"); } struct TStreamCache{ - TStreamCache(const hpatch_TStreamInput* _stream,unsigned char* _cache,size_t _cacheSize, - hpatch_StreamPos_t _streamOffset,void* _locker) - :stream(_stream),m_readPos(0),m_readPosEnd(0),m_streamOffset(_streamOffset),m_locker(_locker), + TStreamCache(const hpatch_TStreamInput* _stream,unsigned char* _cache,size_t _cacheSize,void* _locker) + :stream(_stream),m_readPos(0),m_readPosEnd(0),m_locker(_locker), cache(_cache),cacheSize(_cacheSize),cachePos(_cacheSize){ } inline hpatch_StreamPos_t streamSize()const{ return stream->streamSize; } inline hpatch_StreamPos_t pos()const { return m_readPosEnd-dataLength(); } @@ -115,7 +114,6 @@ struct TStreamCache{ protected: hpatch_StreamPos_t m_readPos; hpatch_StreamPos_t m_readPosEnd; - const hpatch_StreamPos_t m_streamOffset; void* m_locker; unsigned char* cache; size_t cacheSize; @@ -154,7 +152,8 @@ size_t TDigestMatcher::getSearchThreadNum()const{ #if (_IS_USED_MULTITHREAD) const size_t threadNum=m_threadNum; hpatch_StreamPos_t size=m_newData->streamSize; - if ((threadNum>1)&&(size>=kMinParallelSize)&&(size/2>=m_kMatchBlockSize)) { + if ((threadNum>1)&&(m_oldData->streamSize>=m_kMatchBlockSize) + &&(size>=kMinParallelSize)&&(size/2>=m_kMatchBlockSize)) { const hpatch_StreamPos_t maxThreanNum=size/(kMinParallelSize/2); return (threadNum<=maxThreanNum)?threadNum:(size_t)maxThreanNum; }else @@ -273,7 +272,7 @@ void TDigestMatcher::getDigests(){ if (m_blocks.empty()) return; const size_t blockCount=m_blocks.size(); - TStreamCache streamCache(m_oldData,m_mem.data(),m_newCacheSize+m_oldCacheSize,0,0); + TStreamCache streamCache(m_oldData,m_mem.data(),m_newCacheSize+m_oldCacheSize,0); for (size_t i=0;istreamSize); streamCache.resetPos(0,readPos,m_kMatchBlockSize); @@ -296,10 +295,9 @@ void TDigestMatcher::getDigests(){ } struct TBlockStreamCache:public TStreamCache{ - TBlockStreamCache(const hpatch_TStreamInput* _stream,unsigned char* _cache, - size_t _cacheSize,size_t _backupCacheSize, size_t _kMatchBlockSize, - hpatch_StreamPos_t _streamOffset,void* _locker) - :TStreamCache(_stream,_cache,_cacheSize,_streamOffset,_locker), + TBlockStreamCache(const hpatch_TStreamInput* _stream,unsigned char* _cache,size_t _cacheSize, + size_t _backupCacheSize, size_t _kMatchBlockSize,void* _locker) + :TStreamCache(_stream,_cache,_cacheSize,_locker), backupCacheSize(_backupCacheSize),kMatchBlockSize(_kMatchBlockSize){ assert(cacheSize>=(backupCacheSize+kMatchBlockSize)); } inline bool resetPos(hpatch_StreamPos_t streamPos){ @@ -331,7 +329,7 @@ struct TOldStreamCache:public TBlockStreamCache{ size_t _minCacheSize,size_t _maxCacheSize, size_t _backupCacheSize,size_t _kMatchBlockSize,void* _locker) :TBlockStreamCache(_stream,_cache+_maxCacheSize-_minCacheSize, - _minCacheSize, _backupCacheSize,_kMatchBlockSize,0,_locker), + _minCacheSize, _backupCacheSize,_kMatchBlockSize,_locker), minCacheSize(_minCacheSize),maxCacheSize(_maxCacheSize){ } inline bool resetPos(hpatch_StreamPos_t streamPos){ @@ -379,11 +377,9 @@ struct TOldStreamCache:public TBlockStreamCache{ }; struct TNewStreamCache:public TBlockStreamCache{ - TNewStreamCache(const hpatch_TStreamInput* _stream,unsigned char* _cache, - size_t _cacheSize,size_t _backupCacheSize,size_t _kMatchBlockSize, - hpatch_StreamPos_t _streamOffset,void* _locker) - :TBlockStreamCache(_stream,_cache,_cacheSize,_backupCacheSize,_kMatchBlockSize, - _streamOffset,_locker){ + TNewStreamCache(const hpatch_TStreamInput* _stream,unsigned char* _cache,size_t _cacheSize, + size_t _backupCacheSize,size_t _kMatchBlockSize,void* _locker) + :TBlockStreamCache(_stream,_cache,_cacheSize,_backupCacheSize,_kMatchBlockSize,_locker){ resetPos(0); } void toBestDataLength(){ @@ -670,7 +666,7 @@ void TDigestMatcher::_search_cover(const hpatch_TStreamInput* newData,hpatch_Str hpatch_TOutputCovers* out_covers,unsigned char* pmem, void* oldDataLocker,void* newDataLocker,void* coversLocker){ TNewStreamCache newStream(newData,pmem,m_newCacheSize,m_backupCacheSize, - m_kMatchBlockSize,newOffset,newDataLocker); + m_kMatchBlockSize,newDataLocker); TOldStreamCache oldStream(m_oldData,pmem+m_newCacheSize,m_oldMinCacheSize, m_oldCacheSize,m_backupCacheSize,m_kMatchBlockSize,oldDataLocker); if (m_isUseLargeSorted) @@ -684,7 +680,6 @@ struct mt_data_t{ CHLocker oldDataLocker; CHLocker newDataLocker; CHLocker coversLocker; - hpatch_StreamPos_t rollCount; hpatch_StreamPos_t workCount; volatile hpatch_StreamPos_t workIndex; }; @@ -693,16 +688,20 @@ struct mt_data_t{ void TDigestMatcher::_search_cover_thread(hpatch_TOutputCovers* out_covers, unsigned char* pmem,void* mt_data){ #if (_IS_USED_MULTITHREAD) + const size_t kPartPepeatSize=m_kMatchBlockSize-1; mt_data_t& mt=*(mt_data_t*)mt_data; + const hpatch_StreamPos_t workCount=mt.workCount; + const hpatch_StreamPos_t rollCount=m_newData->streamSize-(m_kMatchBlockSize-1); std::atomic& workIndex=*(std::atomic*)&mt.workIndex; while (true){ hpatch_StreamPos_t curWorkIndex=workIndex++; - if (curWorkIndex>=mt.workCount) break; - hpatch_TStreamInput newData=*m_newData; - hpatch_StreamPos_t newOffset=mt.rollCount*curWorkIndex/mt.workCount; - newData.streamSize=((curWorkIndex+1=workCount) break; + hpatch_StreamPos_t new_begin=rollCount*curWorkIndex/workCount; + hpatch_StreamPos_t new_end=(curWorkIndex+1streamSize); + TStreamInputClip newClip; + TStreamInputClip_init(&newClip,m_newData,new_begin,new_end+kPartPepeatSize); + _search_cover(&newClip.base,new_begin,out_covers,pmem, mt.oldDataLocker.locker,mt.newDataLocker.locker,mt.coversLocker.locker); } #endif @@ -724,7 +723,6 @@ void TDigestMatcher::search_cover(hpatch_TOutputCovers* out_covers){ hpatch_StreamPos_t workCount=(rollCount+bestStep-1)/bestStep; workCount=(threadNum>workCount)?threadNum:workCount; mt_data_t mt_data; - mt_data.rollCount=rollCount; mt_data.workCount=workCount; mt_data.workIndex=0; const size_t threadCount=threadNum-1; diff --git a/libHDiffPatch/HDiff/private_diff/limit_mem_diff/digest_matcher.h b/libHDiffPatch/HDiff/private_diff/limit_mem_diff/digest_matcher.h index fd119ac0..a6ead00e 100644 --- a/libHDiffPatch/HDiff/private_diff/limit_mem_diff/digest_matcher.h +++ b/libHDiffPatch/HDiff/private_diff/limit_mem_diff/digest_matcher.h @@ -65,6 +65,9 @@ class TDigestMatcher{ size_t kMatchBlockSize,size_t threadNum); void search_cover(hpatch_TOutputCovers* out_covers); ~TDigestMatcher(); +private: + TDigestMatcher(const TDigestMatcher &); //empty + TDigestMatcher &operator=(const TDigestMatcher &); //empty private: const hpatch_TStreamInput* m_oldData; const hpatch_TStreamInput* m_newData; diff --git a/libHDiffPatch/HDiff/private_diff/suffix_string.h b/libHDiffPatch/HDiff/private_diff/suffix_string.h index 92a7dd51..812e5579 100644 --- a/libHDiffPatch/HDiff/private_diff/suffix_string.h +++ b/libHDiffPatch/HDiff/private_diff/suffix_string.h @@ -79,7 +79,7 @@ class TSuffixString{ typedef ptrdiff_t TInt; typedef int32_t TInt32; typedef unsigned char TChar; - TSuffixString(bool isUsedFastMatch=false); + explicit TSuffixString(bool isUsedFastMatch=false); ~TSuffixString(); //throw std::runtime_error when create SA error @@ -98,6 +98,9 @@ class TSuffixString{ return (TInt)m_SA_limit[i]; } TInt lower_bound(const TChar* str,const TChar* str_end)const;//return index in SA; must str_end-str>=2 ! +private: + TSuffixString(const TSuffixString &); //empty + TSuffixString &operator=(const TSuffixString &); //empty private: const TChar* m_src_begin;//原字符串. const TChar* m_src_end; From 894fe2403ab6591b20e1e954758c5a66d5feb2a5 Mon Sep 17 00:00:00 2001 From: sisong Date: Sat, 8 Oct 2022 13:59:25 +0800 Subject: [PATCH 18/20] vc2012 build ok; --- builds/codeblocks/HDiffZ.cbp | 8 +--- builds/codeblocks/unitTest.cbp | 8 +--- builds/vc/HDiffZ.vcxproj | 4 +- builds/vc/unitTest.vcxproj | 4 +- builds/vc2019/unitTest.vcxproj | 4 +- .../bestParams.xcodeproj/project.pbxproj | 16 ++++---- builds/xcode/hdiffz.xcodeproj/project.pbxproj | 16 ++++---- .../xcode/unitTest.xcodeproj/project.pbxproj | 18 ++++---- libHDiffPatch/HDiff/diff.cpp | 27 ++++++++---- .../libdivsufsort/divsufsort.c.inc.h | 41 +++++++++++++------ 10 files changed, 82 insertions(+), 64 deletions(-) diff --git a/builds/codeblocks/HDiffZ.cbp b/builds/codeblocks/HDiffZ.cbp index 0286ac49..cce6f32e 100644 --- a/builds/codeblocks/HDiffZ.cbp +++ b/builds/codeblocks/HDiffZ.cbp @@ -320,12 +320,8 @@ - - - - + + diff --git a/builds/codeblocks/unitTest.cbp b/builds/codeblocks/unitTest.cbp index b2daa2d9..c08f4076 100644 --- a/builds/codeblocks/unitTest.cbp +++ b/builds/codeblocks/unitTest.cbp @@ -39,12 +39,8 @@ - - - - + + diff --git a/builds/vc/HDiffZ.vcxproj b/builds/vc/HDiffZ.vcxproj index 15c97b35..00679aa9 100644 --- a/builds/vc/HDiffZ.vcxproj +++ b/builds/vc/HDiffZ.vcxproj @@ -205,8 +205,8 @@ - - + + diff --git a/builds/vc/unitTest.vcxproj b/builds/vc/unitTest.vcxproj index 420989fb..397e3ccf 100644 --- a/builds/vc/unitTest.vcxproj +++ b/builds/vc/unitTest.vcxproj @@ -170,8 +170,8 @@ - - + + diff --git a/builds/vc2019/unitTest.vcxproj b/builds/vc2019/unitTest.vcxproj index 8bef32f0..7e9d1abc 100644 --- a/builds/vc2019/unitTest.vcxproj +++ b/builds/vc2019/unitTest.vcxproj @@ -327,8 +327,8 @@ - - + + diff --git a/builds/xcode/bestParams.xcodeproj/project.pbxproj b/builds/xcode/bestParams.xcodeproj/project.pbxproj index 61171768..9dab8533 100644 --- a/builds/xcode/bestParams.xcodeproj/project.pbxproj +++ b/builds/xcode/bestParams.xcodeproj/project.pbxproj @@ -14,8 +14,8 @@ 80D1B2A7199A40A700F6B3A8 /* patch.c in Sources */ = {isa = PBXBuildFile; fileRef = 80D1B2A2199A40A700F6B3A8 /* patch.c */; }; D6241ACD21DA5C9600E2846A /* liblzma.a in Frameworks */ = {isa = PBXBuildFile; fileRef = D6EF751F21C13CE4000EBBCC /* liblzma.a */; }; D639ACDE208B458500811BB3 /* _private_searchBestParams.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D639ACDD208B458500811BB3 /* _private_searchBestParams.cpp */; }; - D66166271E189F5A005C570A /* divsufsort.c in Sources */ = {isa = PBXBuildFile; fileRef = D661661E1E189F5A005C570A /* divsufsort.c */; }; - D66166281E189F5A005C570A /* divsufsort64.c in Sources */ = {isa = PBXBuildFile; fileRef = D66166211E189F5A005C570A /* divsufsort64.c */; }; + D66166271E189F5A005C570A /* divsufsort.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D661661E1E189F5A005C570A /* divsufsort.cpp */; }; + D66166281E189F5A005C570A /* divsufsort64.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D66166211E189F5A005C570A /* divsufsort64.cpp */; }; D68CD9151F469DDB00A61337 /* adler_roll.c in Sources */ = {isa = PBXBuildFile; fileRef = D68CD90D1F469DDB00A61337 /* adler_roll.c */; }; D68CD9161F469DDB00A61337 /* digest_matcher.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D68CD9111F469DDB00A61337 /* digest_matcher.cpp */; }; D68CD9171F469DDB00A61337 /* stream_serialize.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D68CD9131F469DDB00A61337 /* stream_serialize.cpp */; }; @@ -53,10 +53,10 @@ 80D1B2A3199A40A700F6B3A8 /* patch.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = patch.h; sourceTree = ""; }; D639ACDD208B458500811BB3 /* _private_searchBestParams.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = _private_searchBestParams.cpp; path = ../../test/_private_searchBestParams.cpp; sourceTree = ""; }; D661661D1E189F5A005C570A /* config.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = config.h; sourceTree = ""; }; - D661661E1E189F5A005C570A /* divsufsort.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = divsufsort.c; sourceTree = ""; }; + D661661E1E189F5A005C570A /* divsufsort.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = divsufsort.cpp; sourceTree = ""; }; D661661F1E189F5A005C570A /* divsufsort.c.inc.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = divsufsort.c.inc.h; sourceTree = ""; }; D66166201E189F5A005C570A /* divsufsort.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = divsufsort.h; sourceTree = ""; }; - D66166211E189F5A005C570A /* divsufsort64.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = divsufsort64.c; sourceTree = ""; }; + D66166211E189F5A005C570A /* divsufsort64.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = divsufsort64.cpp; sourceTree = ""; }; D66166221E189F5A005C570A /* divsufsort64.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = divsufsort64.h; sourceTree = ""; }; D66166231E189F5A005C570A /* divsufsort_private.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = divsufsort_private.h; sourceTree = ""; }; D66166241E189F5A005C570A /* sssort.c.inc.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = sssort.c.inc.h; sourceTree = ""; }; @@ -154,10 +154,10 @@ isa = PBXGroup; children = ( D661661D1E189F5A005C570A /* config.h */, - D661661E1E189F5A005C570A /* divsufsort.c */, + D661661E1E189F5A005C570A /* divsufsort.cpp */, D661661F1E189F5A005C570A /* divsufsort.c.inc.h */, D66166201E189F5A005C570A /* divsufsort.h */, - D66166211E189F5A005C570A /* divsufsort64.c */, + D66166211E189F5A005C570A /* divsufsort64.cpp */, D66166221E189F5A005C570A /* divsufsort64.h */, D66166231E189F5A005C570A /* divsufsort_private.h */, D66166241E189F5A005C570A /* sssort.c.inc.h */, @@ -264,11 +264,11 @@ files = ( D639ACDE208B458500811BB3 /* _private_searchBestParams.cpp in Sources */, D6EE50601F2990AD002854A2 /* compress_detect.cpp in Sources */, - D66166281E189F5A005C570A /* divsufsort64.c in Sources */, + D66166281E189F5A005C570A /* divsufsort64.cpp in Sources */, D68CD9161F469DDB00A61337 /* digest_matcher.cpp in Sources */, 80D1B2A5199A40A700F6B3A8 /* bytes_rle.cpp in Sources */, 80D1B2A6199A40A700F6B3A8 /* suffix_string.cpp in Sources */, - D66166271E189F5A005C570A /* divsufsort.c in Sources */, + D66166271E189F5A005C570A /* divsufsort.cpp in Sources */, 0D7FEAB2283124970029772D /* hpatch_lite.c in Sources */, 80D1B2A4199A40A700F6B3A8 /* diff.cpp in Sources */, 80D1B2A7199A40A700F6B3A8 /* patch.c in Sources */, diff --git a/builds/xcode/hdiffz.xcodeproj/project.pbxproj b/builds/xcode/hdiffz.xcodeproj/project.pbxproj index fcfb2528..3e65ad3c 100644 --- a/builds/xcode/hdiffz.xcodeproj/project.pbxproj +++ b/builds/xcode/hdiffz.xcodeproj/project.pbxproj @@ -24,8 +24,8 @@ D690AB9C1F20792A0089DC57 /* hdiffz_import_patch.c in Sources */ = {isa = PBXBuildFile; fileRef = D690AB9B1F20792A0089DC57 /* hdiffz_import_patch.c */; }; D690ABB71F2079E80089DC57 /* diff.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D690ABA01F2079E80089DC57 /* diff.cpp */; }; D690ABB81F2079E80089DC57 /* bytes_rle.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D690ABA31F2079E80089DC57 /* bytes_rle.cpp */; }; - D690ABB91F2079E80089DC57 /* divsufsort.c in Sources */ = {isa = PBXBuildFile; fileRef = D690ABA71F2079E80089DC57 /* divsufsort.c */; }; - D690ABBA1F2079E80089DC57 /* divsufsort64.c in Sources */ = {isa = PBXBuildFile; fileRef = D690ABAA1F2079E80089DC57 /* divsufsort64.c */; }; + D690ABB91F2079E80089DC57 /* divsufsort.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D690ABA71F2079E80089DC57 /* divsufsort.cpp */; }; + D690ABBA1F2079E80089DC57 /* divsufsort64.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D690ABAA1F2079E80089DC57 /* divsufsort64.cpp */; }; D690ABBB1F2079E80089DC57 /* suffix_string.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D690ABB21F2079E80089DC57 /* suffix_string.cpp */; }; D690ABBC1F2079E80089DC57 /* patch.c in Sources */ = {isa = PBXBuildFile; fileRef = D690ABB51F2079E80089DC57 /* patch.c */; }; D690AEFB1F2097C90089DC57 /* libbz2.tbd in Frameworks */ = {isa = PBXBuildFile; fileRef = D690AEFA1F2097C90089DC57 /* libbz2.tbd */; }; @@ -96,10 +96,10 @@ D690ABA31F2079E80089DC57 /* bytes_rle.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = bytes_rle.cpp; sourceTree = ""; }; D690ABA41F2079E80089DC57 /* bytes_rle.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = bytes_rle.h; sourceTree = ""; }; D690ABA61F2079E80089DC57 /* config.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = config.h; sourceTree = ""; }; - D690ABA71F2079E80089DC57 /* divsufsort.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = divsufsort.c; sourceTree = ""; }; + D690ABA71F2079E80089DC57 /* divsufsort.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = divsufsort.cpp; sourceTree = ""; }; D690ABA81F2079E80089DC57 /* divsufsort.c.inc.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = divsufsort.c.inc.h; sourceTree = ""; }; D690ABA91F2079E80089DC57 /* divsufsort.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = divsufsort.h; sourceTree = ""; }; - D690ABAA1F2079E80089DC57 /* divsufsort64.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = divsufsort64.c; sourceTree = ""; }; + D690ABAA1F2079E80089DC57 /* divsufsort64.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = divsufsort64.cpp; sourceTree = ""; }; D690ABAB1F2079E80089DC57 /* divsufsort64.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = divsufsort64.h; sourceTree = ""; }; D690ABAC1F2079E80089DC57 /* divsufsort_private.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = divsufsort_private.h; sourceTree = ""; }; D690ABAD1F2079E80089DC57 /* sssort.c.inc.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = sssort.c.inc.h; sourceTree = ""; }; @@ -302,10 +302,10 @@ isa = PBXGroup; children = ( D690ABA61F2079E80089DC57 /* config.h */, - D690ABA71F2079E80089DC57 /* divsufsort.c */, + D690ABA71F2079E80089DC57 /* divsufsort.cpp */, D690ABA81F2079E80089DC57 /* divsufsort.c.inc.h */, D690ABA91F2079E80089DC57 /* divsufsort.h */, - D690ABAA1F2079E80089DC57 /* divsufsort64.c */, + D690ABAA1F2079E80089DC57 /* divsufsort64.cpp */, D690ABAB1F2079E80089DC57 /* divsufsort64.h */, D690ABAC1F2079E80089DC57 /* divsufsort_private.h */, D690ABAD1F2079E80089DC57 /* sssort.c.inc.h */, @@ -447,8 +447,8 @@ D61DE5472344D16900F6FCCF /* dir_diff_tools.cpp in Sources */, D69EECDF220C49DE001EF085 /* parallel_channel.cpp in Sources */, D690ABB71F2079E80089DC57 /* diff.cpp in Sources */, - D690ABBA1F2079E80089DC57 /* divsufsort64.c in Sources */, - D690ABB91F2079E80089DC57 /* divsufsort.c in Sources */, + D690ABBA1F2079E80089DC57 /* divsufsort64.cpp in Sources */, + D690ABB91F2079E80089DC57 /* divsufsort.cpp in Sources */, D690AB9C1F20792A0089DC57 /* hdiffz_import_patch.c in Sources */, D690AB9C1F20792A0089DC57 /* hdiffz_import_patch.c in Sources */, D68CD8F21F4426B100A61337 /* stream_serialize.cpp in Sources */, diff --git a/builds/xcode/unitTest.xcodeproj/project.pbxproj b/builds/xcode/unitTest.xcodeproj/project.pbxproj index 56650345..5278f902 100644 --- a/builds/xcode/unitTest.xcodeproj/project.pbxproj +++ b/builds/xcode/unitTest.xcodeproj/project.pbxproj @@ -17,8 +17,8 @@ D6BD9BB017578491004886DE /* bytes_rle.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D6BD9BA517578491004886DE /* bytes_rle.cpp */; }; D6BD9BB117578491004886DE /* suffix_string.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D6BD9BA817578491004886DE /* suffix_string.cpp */; }; D6BD9BB217578491004886DE /* patch.c in Sources */ = {isa = PBXBuildFile; fileRef = D6BD9BAB17578491004886DE /* patch.c */; }; - D6C701511E17E5BC00C19D84 /* divsufsort.c in Sources */ = {isa = PBXBuildFile; fileRef = D6C701481E17E5BC00C19D84 /* divsufsort.c */; }; - D6C701521E17E5BC00C19D84 /* divsufsort64.c in Sources */ = {isa = PBXBuildFile; fileRef = D6C7014B1E17E5BC00C19D84 /* divsufsort64.c */; }; + D6C701511E17E5BC00C19D84 /* divsufsort.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D6C701481E17E5BC00C19D84 /* divsufsort.cpp */; }; + D6C701521E17E5BC00C19D84 /* divsufsort64.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D6C7014B1E17E5BC00C19D84 /* divsufsort64.cpp */; }; D6E150A01F238B2C00C2AD3D /* libbz2.tbd in Frameworks */ = {isa = PBXBuildFile; fileRef = D6E1509E1F238B2C00C2AD3D /* libbz2.tbd */; }; D6E150A11F238B2C00C2AD3D /* libz.tbd in Frameworks */ = {isa = PBXBuildFile; fileRef = D6E1509F1F238B2C00C2AD3D /* libz.tbd */; }; D6EE50631F2A0AA3002854A2 /* compress_detect.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D6EE50611F2A0AA3002854A2 /* compress_detect.cpp */; }; @@ -62,10 +62,10 @@ D6BD9BAB17578491004886DE /* patch.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = patch.c; sourceTree = ""; }; D6BD9BAC17578491004886DE /* patch.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = patch.h; sourceTree = ""; }; D6C701471E17E5BC00C19D84 /* config.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = config.h; sourceTree = ""; }; - D6C701481E17E5BC00C19D84 /* divsufsort.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = divsufsort.c; sourceTree = ""; }; + D6C701481E17E5BC00C19D84 /* divsufsort.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = divsufsort.cpp; sourceTree = ""; }; D6C701491E17E5BC00C19D84 /* divsufsort.c.inc.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = divsufsort.c.inc.h; sourceTree = ""; }; D6C7014A1E17E5BC00C19D84 /* divsufsort.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = divsufsort.h; sourceTree = ""; }; - D6C7014B1E17E5BC00C19D84 /* divsufsort64.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = divsufsort64.c; sourceTree = ""; }; + D6C7014B1E17E5BC00C19D84 /* divsufsort64.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = divsufsort64.cpp; sourceTree = ""; }; D6C7014C1E17E5BC00C19D84 /* divsufsort64.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = divsufsort64.h; sourceTree = ""; }; D6C7014D1E17E5BC00C19D84 /* divsufsort_private.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = divsufsort_private.h; sourceTree = ""; }; D6C7014E1E17E5BC00C19D84 /* sssort.c.inc.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = sssort.c.inc.h; sourceTree = ""; }; @@ -171,10 +171,10 @@ isa = PBXGroup; children = ( D6C701471E17E5BC00C19D84 /* config.h */, - D6C701481E17E5BC00C19D84 /* divsufsort.c */, + D6C701481E17E5BC00C19D84 /* divsufsort.cpp */, D6C701491E17E5BC00C19D84 /* divsufsort.c.inc.h */, D6C7014A1E17E5BC00C19D84 /* divsufsort.h */, - D6C7014B1E17E5BC00C19D84 /* divsufsort64.c */, + D6C7014B1E17E5BC00C19D84 /* divsufsort64.cpp */, D6C7014C1E17E5BC00C19D84 /* divsufsort64.h */, D6C7014D1E17E5BC00C19D84 /* divsufsort_private.h */, D6C7014E1E17E5BC00C19D84 /* sssort.c.inc.h */, @@ -269,10 +269,10 @@ D68CD8FD1F44367A00A61337 /* digest_matcher.cpp in Sources */, D6BD9BB017578491004886DE /* bytes_rle.cpp in Sources */, D6BD9BB117578491004886DE /* suffix_string.cpp in Sources */, - D6BD9BB217578491004886DE /* patch.c in Sources */, - D6C701521E17E5BC00C19D84 /* divsufsort64.c in Sources */, + D6BD9BB217578491004886DE /* patch.cpp in Sources */, + D6C701521E17E5BC00C19D84 /* divsufsort64.cpp in Sources */, 0D7FEAAC283124810029772D /* hpatch_lite.c in Sources */, - D6C701511E17E5BC00C19D84 /* divsufsort.c in Sources */, + D6C701511E17E5BC00C19D84 /* divsufsort.cpp in Sources */, D68CD8FC1F44367A00A61337 /* adler_roll.c in Sources */, D639ACDC2089C7A200811BB3 /* unit_test.cpp in Sources */, D68CD8FE1F44367A00A61337 /* stream_serialize.cpp in Sources */, diff --git a/libHDiffPatch/HDiff/diff.cpp b/libHDiffPatch/HDiff/diff.cpp index da2e4e6d..a1c948cd 100644 --- a/libHDiffPatch/HDiff/diff.cpp +++ b/libHDiffPatch/HDiff/diff.cpp @@ -676,13 +676,18 @@ static void search_and_dispose_cover(std::vector& covers,const TDiffD } #if (_IS_USED_MULTITHREAD) - static void _search_and_dispose_cover_MT(std::vector* _covers,const TDiffData* _diff, - const TSuffixString* sstring,int kMinSingleMatchScore, - size_t workCount,size_t* pworkIndex){ + struct mt_data_t{ + const TDiffData* diff; + const TSuffixString* sstring; + int kMinSingleMatchScore; + volatile size_t workIndex; + }; + + static void _search_and_dispose_cover_MT(std::vector* _covers,size_t workCount,mt_data_t* mt){ const size_t kPartPepeatSize=1024*2; std::vector& covers=*_covers; - const TDiffData& diff=*_diff; - std::atomic& workIndex=*(std::atomic*)pworkIndex; + const TDiffData& diff=*mt->diff; + std::atomic& workIndex=*(std::atomic*)&mt->workIndex; const size_t newSize=diff.newData_end-diff.newData; while (true){ size_t curWorkIndex=workIndex++; @@ -694,7 +699,7 @@ static void search_and_dispose_cover(std::vector& covers,const TDiffD diff_part.newData=diff.newData+new_begin; diff_part.newData_end=diff.newData+new_end; size_t coverCountBack=covers.size(); - search_and_dispose_cover(covers,diff_part,*sstring,kMinSingleMatchScore,0); + search_and_dispose_cover(covers,diff_part,*mt->sstring,mt->kMinSingleMatchScore,0); for (size_t i=coverCountBack;i& covers,const TDi size_t workCount=(newSize+kBestParallelSize-1)/kBestParallelSize; workCount=(threadNum>workCount)?threadNum:workCount; - size_t workIndex=0; const size_t threadCount=threadNum-1; std::vector threads(threadCount); std::vector > threadCovers(threadCount); + mt_data_t mt_data; + mt_data.diff=&diff; + mt_data.sstring=&sstring; + mt_data.kMinSingleMatchScore=kMinSingleMatchScore; + mt_data.workIndex=0; for (size_t i=0;i #include "../../../../libParallel/parallel_channel.h" #if (_IS_USED_MULTITHREAD) #include #endif -#include /*- Private Functions -*/ -static void _sssort_thread(HLocker locker,saint_t* c0,saint_t* c1,saidx_t* j, - saidx_t *bucket_B,const sauchar_t *T, const sastore_t *PAb, - sastore_t* SA,sastore_t *buf, saidx_t bufsize,saidx_t n,saidx_t m){ +struct mt_data_t{ + CHLocker locker; + const sauchar_t* T; + sastore_t* SA; + const saidx_t* bucket_B; + const sastore_t* PAb; + saidx_t bufsize; + saidx_t n; + saidx_t m; +}; + +static void _sssort_thread(saint_t* c0,saint_t* c1,saidx_t* j, + sastore_t *buf,mt_data_t* mt){ saidx_t k = 0; saidx_t l; + const saidx_t* bucket_B=mt->bucket_B; for(;;) { { - CAutoLocker __autoLocker(locker); + CAutoLocker __autoLocker(mt->locker.locker); if(0 < (l = *j)) { saint_t d0 = *c0, d1 = *c1; do { @@ -54,8 +65,9 @@ static void _sssort_thread(HLocker locker,saint_t* c0,saint_t* c1,saidx_t* j, } } if(l == 0) { break; } - sssort(T, PAb, SA + k, SA + l, - buf, bufsize, 2, n, *(SA + k) == (m - 1)); + sastore_t* SA=mt->SA; + sssort(mt->T, mt->PAb, SA + k, SA + l, + buf, mt->bufsize, 2, mt->n, *(SA + k) == (mt->m - 1)); } } @@ -122,18 +134,23 @@ sort_typeBstar(const sauchar_t *T, sastore_t* SA, /* Sort the type B* substrings using sssort. */ #if (_IS_USED_MULTITHREAD) if (threadNum>1){ - CHLocker locker; const saidx_t bufsize = (n - (2 * m)) / (saidx_t)threadNum; const size_t threadCount=threadNum-1; c0 = ALPHABET_SIZE - 2, c1 = ALPHABET_SIZE - 1, j = m; + mt_data_t mt_data; + mt_data.T=T; + mt_data.SA=SA; + mt_data.bucket_B=bucket_B; + mt_data.PAb=PAb; + mt_data.bufsize=bufsize; + mt_data.n=n; + mt_data.m=m; std::vector threads(threadCount); sastore_t* buf = SA + m; for (size_t ti=0;ti Date: Sat, 8 Oct 2022 16:45:48 +0800 Subject: [PATCH 19/20] update version; fix CI builds; --- CHANGELOG.md | 10 ++++++- LICENSE | 2 +- README.md | 16 +++++----- README_cmdline_cn.md | 2 +- builds/codeblocks/unitTest.cbp | 7 +++-- builds/vc/unitTest.vcxproj | 2 ++ builds/vc2019/unitTest.vcxproj | 2 ++ .../bestParams.xcodeproj/project.pbxproj | 29 ++++++++++++++++--- builds/xcode/hdiffz.xcodeproj/project.pbxproj | 4 +-- .../xcode/unitTest.xcodeproj/project.pbxproj | 27 +++++++++++++++-- libHDiffPatch/HDiff/diff_for_hpatch_lite.h | 3 +- .../libdivsufsort/divsufsort.c.inc.h | 10 +++---- .../private_diff/libdivsufsort/divsufsort.h | 2 +- .../private_diff/libdivsufsort/divsufsort64.h | 2 +- .../HDiff/private_diff/suffix_string.cpp | 4 +-- libHDiffPatch/HPatch/patch_types.h | 2 +- 16 files changed, 91 insertions(+), 33 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 595a8b3f..45a12085 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,7 +1,15 @@ # HDiffPatch Change Log full changelog at: https://github.com/sisong/HDiffPatch/commits - + +## [v4.4.0](https://github.com/sisong/HDiffPatch/tree/v4.4.0) - 2022-10-09 +### Changed +* optimize diff -m & -s speed by muti-thread parallel, requires C++11. + +## [v4.3.0](https://github.com/sisong/HDiffPatch/tree/v4.3.0) - 2022-09-23 +### Changed +* recode some patch error code: decompresser errors, file error, disk space full error, jni error + ## [v4.2.0](https://github.com/sisong/HDiffPatch/tree/v4.2.0) - 2022-05-15 ### Added * add function create_lite_diff() & hpatch_lite_open(),hpatch_lite_patch(); optimized hpatch on MCU,NB-IoT... (demo [HPatchLite](https://github.com/sisong/HPatchLite)) diff --git a/LICENSE b/LICENSE index a289e344..89298f56 100644 --- a/LICENSE +++ b/LICENSE @@ -1,7 +1,7 @@ MIT License HDiffPatch -Copyright (c) 2012-2021 housisong +Copyright (c) 2012-2022 housisong Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/README.md b/README.md index f0da1fea..1b382496 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@ # [HDiffPatch](https://github.com/sisong/HDiffPatch) -[![release](https://img.shields.io/badge/release-v4.3.0-blue.svg)](https://github.com/sisong/HDiffPatch/releases) +[![release](https://img.shields.io/badge/release-v4.4.0-blue.svg)](https://github.com/sisong/HDiffPatch/releases) [![license](https://img.shields.io/badge/license-MIT-blue.svg)](https://github.com/sisong/HDiffPatch/blob/master/LICENSE) [![PRs Welcome](https://img.shields.io/badge/PRs-welcome-blue.svg)](https://github.com/sisong/HDiffPatch/pulls) [![+issue Welcome](https://img.shields.io/github/issues-raw/sisong/HDiffPatch?color=green&label=%2Bissue%20welcome)](https://github.com/sisong/HDiffPatch/issues) @@ -75,7 +75,7 @@ memory options: matchScore>=0, DEFAULT -m-6, recommended bin: 0--4 text: 4--9 etc... -s[-matchBlockSize] all file load as Stream; fast; - requires O(oldFileSize*16/matchBlockSize+matchBlockSize*5) bytes of memory; + requires O(oldFileSize*16/matchBlockSize+matchBlockSize*5*parallelThreadNumber)bytes of memory; matchBlockSize>=4, DEFAULT -s-64, recommended 16,32,48,1k,64k,1m etc... special options: -block[-fastMatchBlockSize] @@ -99,7 +99,7 @@ special options: if parallelThreadNumber>1 then open multi-thread Parallel mode; DEFAULT -p-4; requires more memory! -c-compressType[-compressLevel] - set outDiffFile Compress type & level, DEFAULT uncompress; + set outDiffFile Compress type, DEFAULT uncompress; for resave diffFile,recompress diffFile to outDiffFile by new set; support compress type & level & dict: (re. https://github.com/sisong/lzbench/blob/master/lzbench171_sorted.md ) @@ -121,7 +121,7 @@ special options: support run by multi-thread parallel, fast! WARNING: code not compatible with it compressed by -c-lzma! -c-zstd[-{0..22}[-dictBits]] DEFAULT level 20 - dictBits can 10--31, DEFAULT 24. + dictBits can 10--31, DEFAULT 23. support run by multi-thread parallel, fast! -C-checksumType set outDiffFile Checksum type for directory diff, DEFAULT -C-fadler64; @@ -164,9 +164,9 @@ special options: if used -f and write path is exist directory, will always return error. --patch swap to hpatchz mode. - -h or -? - output Help info (this usage). -v output Version info. + -h (or -?) + output usage info. ``` ## **patch** command line usage: @@ -217,9 +217,9 @@ special options: if patch output file, will always return error; if patch output directory, will overwrite, but not delete needless existing files in directory. - -h or -? - output Help info (this usage). -v output Version info. + -h (or -?) + output usage info. ``` --- diff --git a/README_cmdline_cn.md b/README_cmdline_cn.md index 4ba9b664..b50b4e05 100644 --- a/README_cmdline_cn.md +++ b/README_cmdline_cn.md @@ -18,7 +18,7 @@ 数据的可压缩性相关,一般输入数据的可压缩性越大,这个值就可以越大。 -s[-matchBlockSize] 所有文件当作文件流加载;一般速度比较快; - 需要的内存大小: O(旧版本文件大小*16/matchBlockSize+matchBlockSize*5); + 需要的内存大小: O(旧版本文件大小*16/matchBlockSize+matchBlockSize*5*parallelThreadNumber); 匹配块大小matchBlockSize>=4, 默认为64, 推荐16,32,48,1k,64k,1m等; 一般匹配块越大,内存占用越小,速度越快,但补丁包可能变大。 其他选项: diff --git a/builds/codeblocks/unitTest.cbp b/builds/codeblocks/unitTest.cbp index c08f4076..e1a72fcc 100644 --- a/builds/codeblocks/unitTest.cbp +++ b/builds/codeblocks/unitTest.cbp @@ -35,6 +35,7 @@ + @@ -47,12 +48,14 @@ - + - + + + diff --git a/builds/vc/unitTest.vcxproj b/builds/vc/unitTest.vcxproj index 397e3ccf..cf4566d9 100644 --- a/builds/vc/unitTest.vcxproj +++ b/builds/vc/unitTest.vcxproj @@ -180,6 +180,8 @@ CompileAsC + + diff --git a/builds/vc2019/unitTest.vcxproj b/builds/vc2019/unitTest.vcxproj index 7e9d1abc..fdda5b61 100644 --- a/builds/vc2019/unitTest.vcxproj +++ b/builds/vc2019/unitTest.vcxproj @@ -339,6 +339,8 @@ CompileAsC CompileAsC + + diff --git a/builds/xcode/bestParams.xcodeproj/project.pbxproj b/builds/xcode/bestParams.xcodeproj/project.pbxproj index 9dab8533..c6a74e26 100644 --- a/builds/xcode/bestParams.xcodeproj/project.pbxproj +++ b/builds/xcode/bestParams.xcodeproj/project.pbxproj @@ -8,6 +8,8 @@ /* Begin PBXBuildFile section */ 0D7FEAB2283124970029772D /* hpatch_lite.c in Sources */ = {isa = PBXBuildFile; fileRef = 0D7FEAAF283124970029772D /* hpatch_lite.c */; }; + 0DD93B1B28F16B5D0097699C /* parallel_channel.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 0DD93B1728F16B5D0097699C /* parallel_channel.cpp */; }; + 0DD93B1C28F16B5D0097699C /* parallel_import.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 0DD93B1928F16B5D0097699C /* parallel_import.cpp */; }; 80D1B2A4199A40A700F6B3A8 /* diff.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 80D1B298199A40A700F6B3A8 /* diff.cpp */; }; 80D1B2A5199A40A700F6B3A8 /* bytes_rle.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 80D1B29B199A40A700F6B3A8 /* bytes_rle.cpp */; }; 80D1B2A6199A40A700F6B3A8 /* suffix_string.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 80D1B29F199A40A700F6B3A8 /* suffix_string.cpp */; }; @@ -41,6 +43,10 @@ 0D7FEAAF283124970029772D /* hpatch_lite.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = hpatch_lite.c; sourceTree = ""; }; 0D7FEAB0283124970029772D /* hpatch_lite_input_cache.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = hpatch_lite_input_cache.h; sourceTree = ""; }; 0D7FEAB1283124970029772D /* hpatch_lite_types.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = hpatch_lite_types.h; sourceTree = ""; }; + 0DD93B1728F16B5D0097699C /* parallel_channel.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = parallel_channel.cpp; sourceTree = ""; }; + 0DD93B1828F16B5D0097699C /* parallel_import.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = parallel_import.h; sourceTree = ""; }; + 0DD93B1928F16B5D0097699C /* parallel_import.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = parallel_import.cpp; sourceTree = ""; }; + 0DD93B1A28F16B5D0097699C /* parallel_channel.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = parallel_channel.h; sourceTree = ""; }; 80D1B298199A40A700F6B3A8 /* diff.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = diff.cpp; sourceTree = ""; }; 80D1B299199A40A700F6B3A8 /* diff.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = diff.h; sourceTree = ""; }; 80D1B29B199A40A700F6B3A8 /* bytes_rle.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = bytes_rle.cpp; sourceTree = ""; }; @@ -103,6 +109,18 @@ path = HPatchLite; sourceTree = ""; }; + 0DD93B1628F16B5D0097699C /* libParallel */ = { + isa = PBXGroup; + children = ( + 0DD93B1728F16B5D0097699C /* parallel_channel.cpp */, + 0DD93B1828F16B5D0097699C /* parallel_import.h */, + 0DD93B1928F16B5D0097699C /* parallel_import.cpp */, + 0DD93B1A28F16B5D0097699C /* parallel_channel.h */, + ); + name = libParallel; + path = ../../libParallel; + sourceTree = ""; + }; 80D1B296199A40A700F6B3A8 /* libHDiffPatch */ = { isa = PBXGroup; children = ( @@ -196,6 +214,7 @@ isa = PBXGroup; children = ( D639ACDD208B458500811BB3 /* _private_searchBestParams.cpp */, + 0DD93B1628F16B5D0097699C /* libParallel */, 80D1B296199A40A700F6B3A8 /* libHDiffPatch */, D6CB33621664FD2900AB34B7 /* Products */, D690A7B21F0E6F260089DC57 /* Frameworks */, @@ -266,6 +285,8 @@ D6EE50601F2990AD002854A2 /* compress_detect.cpp in Sources */, D66166281E189F5A005C570A /* divsufsort64.cpp in Sources */, D68CD9161F469DDB00A61337 /* digest_matcher.cpp in Sources */, + 0DD93B1B28F16B5D0097699C /* parallel_channel.cpp in Sources */, + 0DD93B1C28F16B5D0097699C /* parallel_import.cpp in Sources */, 80D1B2A5199A40A700F6B3A8 /* bytes_rle.cpp in Sources */, 80D1B2A6199A40A700F6B3A8 /* suffix_string.cpp in Sources */, D66166271E189F5A005C570A /* divsufsort.cpp in Sources */, @@ -285,7 +306,7 @@ buildSettings = { ALWAYS_SEARCH_USER_PATHS = NO; CLANG_ANALYZER_LOCALIZABILITY_NONLOCALIZED = YES; - CLANG_CXX_LANGUAGE_STANDARD = "gnu++0x"; + CLANG_CXX_LANGUAGE_STANDARD = "c++0x"; CLANG_CXX_LIBRARY = "libc++"; CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES; CLANG_WARN_BOOL_CONVERSION = YES; @@ -307,7 +328,7 @@ COPY_PHASE_STRIP = NO; ENABLE_STRICT_OBJC_MSGSEND = YES; ENABLE_TESTABILITY = YES; - GCC_C_LANGUAGE_STANDARD = gnu99; + GCC_C_LANGUAGE_STANDARD = "compiler-default"; GCC_DYNAMIC_NO_PIC = NO; GCC_ENABLE_OBJC_EXCEPTIONS = YES; GCC_NO_COMMON_BLOCKS = YES; @@ -340,7 +361,7 @@ buildSettings = { ALWAYS_SEARCH_USER_PATHS = NO; CLANG_ANALYZER_LOCALIZABILITY_NONLOCALIZED = YES; - CLANG_CXX_LANGUAGE_STANDARD = "gnu++0x"; + CLANG_CXX_LANGUAGE_STANDARD = "c++0x"; CLANG_CXX_LIBRARY = "libc++"; CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES; CLANG_WARN_BOOL_CONVERSION = YES; @@ -362,7 +383,7 @@ COPY_PHASE_STRIP = YES; DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym"; ENABLE_STRICT_OBJC_MSGSEND = YES; - GCC_C_LANGUAGE_STANDARD = gnu99; + GCC_C_LANGUAGE_STANDARD = "compiler-default"; GCC_ENABLE_OBJC_EXCEPTIONS = YES; GCC_NO_COMMON_BLOCKS = YES; GCC_PREPROCESSOR_DEFINITIONS = ""; diff --git a/builds/xcode/hdiffz.xcodeproj/project.pbxproj b/builds/xcode/hdiffz.xcodeproj/project.pbxproj index 3e65ad3c..537c636f 100644 --- a/builds/xcode/hdiffz.xcodeproj/project.pbxproj +++ b/builds/xcode/hdiffz.xcodeproj/project.pbxproj @@ -466,7 +466,7 @@ CLANG_ANALYZER_LOCALIZABILITY_NONLOCALIZED = YES; CLANG_ANALYZER_NONNULL = YES; CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE; - CLANG_CXX_LANGUAGE_STANDARD = "compiler-default"; + CLANG_CXX_LANGUAGE_STANDARD = "c++0x"; CLANG_CXX_LIBRARY = "libc++"; CLANG_ENABLE_MODULES = YES; CLANG_ENABLE_OBJC_ARC = YES; @@ -527,7 +527,7 @@ CLANG_ANALYZER_LOCALIZABILITY_NONLOCALIZED = YES; CLANG_ANALYZER_NONNULL = YES; CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE; - CLANG_CXX_LANGUAGE_STANDARD = "compiler-default"; + CLANG_CXX_LANGUAGE_STANDARD = "c++0x"; CLANG_CXX_LIBRARY = "libc++"; CLANG_ENABLE_MODULES = YES; CLANG_ENABLE_OBJC_ARC = YES; diff --git a/builds/xcode/unitTest.xcodeproj/project.pbxproj b/builds/xcode/unitTest.xcodeproj/project.pbxproj index 5278f902..00cc8e87 100644 --- a/builds/xcode/unitTest.xcodeproj/project.pbxproj +++ b/builds/xcode/unitTest.xcodeproj/project.pbxproj @@ -8,6 +8,8 @@ /* Begin PBXBuildFile section */ 0D7FEAAC283124810029772D /* hpatch_lite.c in Sources */ = {isa = PBXBuildFile; fileRef = 0D7FEAA9283124810029772D /* hpatch_lite.c */; }; + 0DD93B1428F16B010097699C /* parallel_channel.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 0DD93B1028F16B010097699C /* parallel_channel.cpp */; }; + 0DD93B1528F16B010097699C /* parallel_import.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 0DD93B1228F16B010097699C /* parallel_import.cpp */; }; D639ACDC2089C7A200811BB3 /* unit_test.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D639ACDB2089C7A200811BB3 /* unit_test.cpp */; }; D64989871F602E99000FCC24 /* liblzma.a in Frameworks */ = {isa = PBXBuildFile; fileRef = D64989861F602E99000FCC24 /* liblzma.a */; }; D68CD8FC1F44367A00A61337 /* adler_roll.c in Sources */ = {isa = PBXBuildFile; fileRef = D68CD8F41F44367A00A61337 /* adler_roll.c */; }; @@ -41,6 +43,10 @@ 0D7FEAA9283124810029772D /* hpatch_lite.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = hpatch_lite.c; sourceTree = ""; }; 0D7FEAAA283124810029772D /* hpatch_lite_input_cache.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = hpatch_lite_input_cache.h; sourceTree = ""; }; 0D7FEAAB283124810029772D /* hpatch_lite_types.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = hpatch_lite_types.h; sourceTree = ""; }; + 0DD93B1028F16B010097699C /* parallel_channel.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = parallel_channel.cpp; sourceTree = ""; }; + 0DD93B1128F16B010097699C /* parallel_import.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = parallel_import.h; sourceTree = ""; }; + 0DD93B1228F16B010097699C /* parallel_import.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = parallel_import.cpp; sourceTree = ""; }; + 0DD93B1328F16B010097699C /* parallel_channel.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = parallel_channel.h; sourceTree = ""; }; 801FA5621999090000F7E57B /* pack_uint.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = pack_uint.h; sourceTree = ""; }; D639ACDB2089C7A200811BB3 /* unit_test.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = unit_test.cpp; path = ../../test/unit_test.cpp; sourceTree = ""; }; D64989861F602E99000FCC24 /* liblzma.a */ = {isa = PBXFileReference; lastKnownFileType = archive.ar; name = liblzma.a; path = "../../../../housisong/Library/Developer/Xcode/DerivedData/HDiffPatch-eyllrfrhmsokhaatowcgosnaofne/Build/Products/Release/liblzma.a"; sourceTree = ""; }; @@ -104,6 +110,18 @@ path = HPatchLite; sourceTree = ""; }; + 0DD93B0F28F16B010097699C /* libParallel */ = { + isa = PBXGroup; + children = ( + 0DD93B1028F16B010097699C /* parallel_channel.cpp */, + 0DD93B1128F16B010097699C /* parallel_import.h */, + 0DD93B1228F16B010097699C /* parallel_import.cpp */, + 0DD93B1328F16B010097699C /* parallel_channel.h */, + ); + name = libParallel; + path = ../../libParallel; + sourceTree = ""; + }; D68CD8F31F44367A00A61337 /* limit_mem_diff */ = { isa = PBXGroup; children = ( @@ -188,6 +206,7 @@ isa = PBXGroup; children = ( D639ACDB2089C7A200811BB3 /* unit_test.cpp */, + 0DD93B0F28F16B010097699C /* libParallel */, D6BD9B9F17578491004886DE /* libHDiffPatch */, D6CB33621664FD2900AB34B7 /* Products */, D6E1509D1F238B2C00C2AD3D /* Frameworks */, @@ -268,8 +287,10 @@ D6BD9BAF17578491004886DE /* diff.cpp in Sources */, D68CD8FD1F44367A00A61337 /* digest_matcher.cpp in Sources */, D6BD9BB017578491004886DE /* bytes_rle.cpp in Sources */, + 0DD93B1428F16B010097699C /* parallel_channel.cpp in Sources */, + 0DD93B1528F16B010097699C /* parallel_import.cpp in Sources */, D6BD9BB117578491004886DE /* suffix_string.cpp in Sources */, - D6BD9BB217578491004886DE /* patch.cpp in Sources */, + D6BD9BB217578491004886DE /* patch.c in Sources */, D6C701521E17E5BC00C19D84 /* divsufsort64.cpp in Sources */, 0D7FEAAC283124810029772D /* hpatch_lite.c in Sources */, D6C701511E17E5BC00C19D84 /* divsufsort.cpp in Sources */, @@ -287,7 +308,7 @@ buildSettings = { ALWAYS_SEARCH_USER_PATHS = NO; CLANG_ANALYZER_LOCALIZABILITY_NONLOCALIZED = YES; - CLANG_CXX_LANGUAGE_STANDARD = "compiler-default"; + CLANG_CXX_LANGUAGE_STANDARD = "c++0x"; CLANG_CXX_LIBRARY = "libc++"; CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES; CLANG_WARN_BOOL_CONVERSION = YES; @@ -343,7 +364,7 @@ buildSettings = { ALWAYS_SEARCH_USER_PATHS = NO; CLANG_ANALYZER_LOCALIZABILITY_NONLOCALIZED = YES; - CLANG_CXX_LANGUAGE_STANDARD = "compiler-default"; + CLANG_CXX_LANGUAGE_STANDARD = "c++0x"; CLANG_CXX_LIBRARY = "libc++"; CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES; CLANG_WARN_BOOL_CONVERSION = YES; diff --git a/libHDiffPatch/HDiff/diff_for_hpatch_lite.h b/libHDiffPatch/HDiff/diff_for_hpatch_lite.h index 9c26fe56..3c703edf 100644 --- a/libHDiffPatch/HDiff/diff_for_hpatch_lite.h +++ b/libHDiffPatch/HDiff/diff_for_hpatch_lite.h @@ -19,7 +19,8 @@ const int kLiteMatchScore_default = 6; void create_lite_diff(const hpi_byte* newData,const hpi_byte* newData_end, const hpi_byte* oldData,const hpi_byte* oldData_end, std::vector& out_lite_diff,const hdiffi_TCompress* compressPlugin, - int kMinSingleMatchScore=kLiteMatchScore_default,bool isUseBigCacheMatch=false); + int kMinSingleMatchScore=kLiteMatchScore_default, + bool isUseBigCacheMatch=false,size_t threadNum=1); bool check_lite_diff_open(const hpi_byte* lite_diff,const hpi_byte* lite_diff_end, hpi_compressType* out_compress_type); diff --git a/libHDiffPatch/HDiff/private_diff/libdivsufsort/divsufsort.c.inc.h b/libHDiffPatch/HDiff/private_diff/libdivsufsort/divsufsort.c.inc.h index 0812cc1b..0fdce47a 100644 --- a/libHDiffPatch/HDiff/private_diff/libdivsufsort/divsufsort.c.inc.h +++ b/libHDiffPatch/HDiff/private_diff/libdivsufsort/divsufsort.c.inc.h @@ -77,7 +77,7 @@ static saidx_t sort_typeBstar(const sauchar_t *T, sastore_t* SA, saidx_t *bucket_A, saidx_t *bucket_B, - saidx_t n,size_t threadNum) { + saidx_t n,int threadNum) { sastore_t *PAb, *ISAb; saidx_t i, j, k, t, m; saint_t c0, c1; @@ -135,7 +135,7 @@ sort_typeBstar(const sauchar_t *T, sastore_t* SA, #if (_IS_USED_MULTITHREAD) if (threadNum>1){ const saidx_t bufsize = (n - (2 * m)) / (saidx_t)threadNum; - const size_t threadCount=threadNum-1; + const int threadCount=threadNum-1; c0 = ALPHABET_SIZE - 2, c1 = ALPHABET_SIZE - 1, j = m; mt_data_t mt_data; mt_data.T=T; @@ -147,11 +147,11 @@ sort_typeBstar(const sauchar_t *T, sastore_t* SA, mt_data.m=m; std::vector threads(threadCount); sastore_t* buf = SA + m; - for (size_t ti=0;ti Date: Sat, 8 Oct 2022 17:05:08 +0800 Subject: [PATCH 20/20] fix CI make; --- Makefile | 2 +- .../HDiff/private_diff/libdivsufsort/divsufsort.c.inc.h | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 722fcc9e..6ddf2519 100644 --- a/Makefile +++ b/Makefile @@ -291,7 +291,7 @@ else endif CFLAGS += $(DEF_FLAGS) -CXXFLAGS += $(DEF_FLAGS) +CXXFLAGS += $(DEF_FLAGS) -std=c++11 .PHONY: all install clean diff --git a/libHDiffPatch/HDiff/private_diff/libdivsufsort/divsufsort.c.inc.h b/libHDiffPatch/HDiff/private_diff/libdivsufsort/divsufsort.c.inc.h index 0fdce47a..fde3cfa9 100644 --- a/libHDiffPatch/HDiff/private_diff/libdivsufsort/divsufsort.c.inc.h +++ b/libHDiffPatch/HDiff/private_diff/libdivsufsort/divsufsort.c.inc.h @@ -33,6 +33,7 @@ /*- Private Functions -*/ +#if (_IS_USED_MULTITHREAD) struct mt_data_t{ CHLocker locker; const sauchar_t* T; @@ -70,7 +71,7 @@ static void _sssort_thread(saint_t* c0,saint_t* c1,saidx_t* j, buf, mt->bufsize, 2, mt->n, *(SA + k) == (mt->m - 1)); } } - +#endif /* Sorts suffixes of type B*. */ static