From a2de38d5c875986b9dc62d1f88f4aa04921e5cb5 Mon Sep 17 00:00:00 2001 From: tsa96 Date: Tue, 23 Jul 2024 03:23:26 +0100 Subject: [PATCH] refactor: split --include and --exclude into file and archive variants --- src/create.cpp | 115 +++++++++++++++++++++++++------------------------ src/create.hpp | 4 +- src/main.cpp | 31 ++++++++----- 3 files changed, 81 insertions(+), 69 deletions(-) diff --git a/src/create.cpp b/src/create.cpp index 3e1f196..137ca46 100644 --- a/src/create.cpp +++ b/src/create.cpp @@ -20,9 +20,13 @@ #include "log.hpp" -static auto enterVPK( std::ofstream& writer, std::string_view vpkPath, std::string_view vpkPathRel, const std::vector& excluded, const std::vector& included ) -> bool; +static auto enterVPK( std::ofstream& writer, std::string_view vpkPath, std::string_view vpkPathRel, const std::vector& excludes, const std::vector& includes ) -> bool; +static auto buildRegexCollection( const std::vector& regexStrings, std::string_view collectionType ) -> std::vector; +static auto matchPath( const std::string& path, const std::vector& regexes ) -> bool; -auto createFromRoot( std::string_view root_, std::string_view indexLocation, bool skipArchives, const std::vector& excluded, const std::vector& included ) -> int { +auto createFromRoot( std::string_view root_, std::string_view indexLocation, bool skipArchives, + const std::vector& fileExcludes, const std::vector& fileIncludes, + const std::vector& archiveExcludes, const std::vector& archiveIncludes ) -> int { const std::filesystem::path root{ root_ }; const std::filesystem::path indexPath{ root / indexLocation }; @@ -36,25 +40,13 @@ auto createFromRoot( std::string_view root_, std::string_view indexLocation, boo return 1; } - Log_Info( "Compiling exclusion regexes..." ); - std::vector exclusionREs; - exclusionREs.reserve( excluded.size() ); - for ( const auto& exclusion : excluded ) { - exclusionREs.emplace_back( exclusion, std::regex::ECMAScript | std::regex::icase | std::regex::optimize ); - } - if (! skipArchives ) { - exclusionREs.emplace_back( R"(.*_[0-9][0-9][0-9]\.vpk)", std::regex::ECMAScript | std::regex::icase | std::regex::optimize ); - } - - Log_Info( "Compiling inclusion regexes..." ); - std::vector inclusionREs; - if (! included.empty() ) { - inclusionREs.reserve( included.size() ); - for ( const auto& inclusion: included ) { - inclusionREs.emplace_back( inclusion, std::regex::ECMAScript | std::regex::icase | std::regex::optimize ); - } - } + std::vector archiveExclusionREs = buildRegexCollection(archiveExcludes, "archive exclusion"); + std::vector archiveInclusionREs = buildRegexCollection(archiveIncludes, "archive inclusion"); + std::vector fileExclusionREs = buildRegexCollection(fileExcludes, "file exclusion"); + std::vector fileInclusionREs = buildRegexCollection(fileIncludes, "file inclusion"); + // We always pass some regexes in from main.cpp, so not need for an ugly check if we actually + // compiled anything - fileExclusionREs will always be non-empty. Log_Info( "Done in {}", std::chrono::duration_cast( std::chrono::high_resolution_clock::now() - start ) ); unsigned count{ 0 }; @@ -73,34 +65,35 @@ auto createFromRoot( std::string_view root_, std::string_view indexLocation, boo auto pathRel{ std::filesystem::relative( path, root ).string() }; sourcepp::string::normalizeSlashes( pathRel ); - auto breaker{ false }; - for ( const auto& exclusion : exclusionREs ) { - if ( std::regex_match( pathRel, exclusion ) ) { - breaker = true; - break; + if ( path.ends_with( ".vpk" ) ) { + static const std::regex numberedVpkRegex { R"(.*_[0-9][0-9][0-9]\.vpk)", std::regex::ECMAScript | std::regex::icase | std::regex::optimize }; + + if ( skipArchives || std::regex_match( pathRel, numberedVpkRegex ) ) { + continue; } - } - if ( breaker ) - continue; - if (! inclusionREs.empty() ) { - for ( const auto& inclusion: inclusionREs ) { - if ( std::regex_match( pathRel, inclusion ) ) { - breaker = true; - break; - } + if ( !archiveExclusionREs.empty() && matchPath( pathRel, archiveExclusionREs ) ) { + continue; } - if (! breaker ) + + if ( !archiveExclusionREs.empty() && !matchPath( pathRel, archiveInclusionREs ) ) { continue; - } + } - if ( !skipArchives && path.ends_with( ".vpk" ) ) { - // We've already ignored numbered archives in the exclusion regexes - if ( enterVPK( writer, path, pathRel, exclusionREs, inclusionREs ) ) { + if ( enterVPK( writer, path, pathRel, fileExclusionREs, fileInclusionREs ) ) { Log_Info( "Processed VPK at `{}`", path ); continue; } + Log_Warn( "Unable to open VPK at `{}`. Treating as a regular file...", path ); + } else { + if ( !fileInclusionREs.empty() && matchPath( pathRel, fileExclusionREs ) ) { + continue; + } + + if ( !fileExclusionREs.empty() && !matchPath( pathRel, fileInclusionREs ) ) { + continue; + } } // open file @@ -156,7 +149,7 @@ auto createFromRoot( std::string_view root_, std::string_view indexLocation, boo return 0; } -static auto enterVPK( std::ofstream& writer, std::string_view vpkPath, std::string_view vpkPathRel, const std::vector& excluded, const std::vector& included ) -> bool { +static auto enterVPK( std::ofstream& writer, std::string_view vpkPath, std::string_view vpkPathRel, const std::vector& excludes, const std::vector& includes ) -> bool { using namespace vpkpp; auto vpk = VPK::open( std::string{ vpkPath } ); @@ -166,25 +159,12 @@ static auto enterVPK( std::ofstream& writer, std::string_view vpkPath, std::stri for ( const auto& [ entryDirectory, entries ] : vpk->getBakedEntries() ) { for ( const auto& entry : entries ) { - auto breaker{ false }; - for ( const auto& exclusion : excluded ) { - if ( std::regex_match( entry.path, exclusion ) ) { - breaker = true; - break; - } - } - if ( breaker ) + if ( !excludes.empty() && matchPath( entry.path, excludes) ) { continue; + } - if (! included.empty() ) { - for ( const auto& inclusion: included ) { - if ( std::regex_match( entry.path, inclusion ) ) { - breaker = true; - break; - } - } - if (! breaker ) - continue; + if ( !includes.empty() && !matchPath( entry.path, includes ) ) { + continue; } auto entryData{ vpk->readEntry( entry ) }; @@ -214,3 +194,24 @@ static auto enterVPK( std::ofstream& writer, std::string_view vpkPath, std::stri return true; } + +static auto buildRegexCollection( const std::vector& regexStrings, std::string_view collectionType ) -> std::vector { + std::vector collection {}; + + if ( !regexStrings.empty() ) { + Log_Info( "Compiling {} regexes...", collectionType ); + collection.reserve( regexStrings.size() ); + + for ( const auto& item : regexStrings ) { + collection.emplace_back( item, std::regex::ECMAScript | std::regex::icase | std::regex::optimize ); + } + } + + return collection; +} + +static auto matchPath( const std::string& path, const std::vector& regexes ) -> bool { + return std::any_of( regexes.begin(), regexes.end(), [&]( const auto& item ) { + return std::regex_match( path, item ); + }); +} diff --git a/src/create.hpp b/src/create.hpp index 7051331..69086dd 100644 --- a/src/create.hpp +++ b/src/create.hpp @@ -6,4 +6,6 @@ #include #include -auto createFromRoot( std::string_view root, std::string_view indexLocation, bool skipArchives, const std::vector& excluded, const std::vector& included ) -> int; +auto createFromRoot( std::string_view root_, std::string_view indexLocation, bool skipArchives, + const std::vector& fileExcludes, const std::vector& fileIncludes, + const std::vector& archiveExcludes, const std::vector& archiveIncludes) -> int; diff --git a/src/main.cpp b/src/main.cpp index c217efd..23d1a12 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -39,8 +39,10 @@ auto main( int argc, char* argv[] ) -> int { bool newIndex{ false }; std::string root; bool skipArchives{ false }; - std::vector excludes; - std::vector includes; + std::vector fileExcludes; + std::vector fileIncludes; + std::vector archiveExcludes; + std::vector archiveIncludes; std::string indexLocation; bool overwrite{ false }; const auto programFile{ std::filesystem::path( argv[ 0 ] ).filename() }; @@ -64,13 +66,20 @@ auto main( int argc, char* argv[] ) -> int { .help( "Don't parse files stored in VPKs, parse the entire VPK instead." ) .metavar( "skip-archives" ) .absent( false ); - params.add_parameter( excludes, "--exclude", "-e" ) + params.add_parameter( fileExcludes, "--exclude", "-e" ) .help( "RegExp pattern(s) to exclude files when creating the index." ) .metavar( "excluded" ) .minargs( 1 ); - params.add_parameter( includes, "--include" ) + params.add_parameter( fileIncludes, "--include" ) .help( "RegExp pattern(s) to include files when creating the index. If not present, all files not matching an exclusion will be included." ) .metavar( "included" ); + params.add_parameter( archiveExcludes, "--exclude-archives", "-E" ) + .help( "RegExp pattern(s) to exclude VPKs when creating the index." ) + .metavar( "excluded-archives" ) + .minargs( 1 ); + params.add_parameter( archiveIncludes, "--include-archives" ) + .help( "RegExp pattern(s) to include VPKs when creating the index. If not present, all VPKs not matching an exclusion will be included." ) + .metavar( "included-archives" ); params.add_parameter( indexLocation, "--index", "-i" ) .help( "The index file to use." ) .metavar( "index-loc" ) @@ -119,17 +128,17 @@ auto main( int argc, char* argv[] ) -> int { } // stuff we ignore during the building of the index, the "standard" useless stuff is hardcoded - excludes.emplace_back( "sdk_content.*" ); - excludes.emplace_back( ".*\\.vmf_autosave.*" ); - excludes.emplace_back( ".*\\.vmx" ); - excludes.emplace_back( ".*\\.log" ); - excludes.emplace_back( ".*verifier_index\\.rsv" ); + fileExcludes.emplace_back( "sdk_content.*" ); + fileExcludes.emplace_back( ".*\\.vmf_autosave.*" ); + fileExcludes.emplace_back( ".*\\.vmx" ); + fileExcludes.emplace_back( ".*\\.log" ); + fileExcludes.emplace_back( ".*verifier_index\\.rsv" ); - ret = createFromRoot( root, indexLocation, skipArchives, excludes, includes ); + ret = createFromRoot( root, indexLocation, skipArchives, fileExcludes, fileIncludes, archiveExcludes, archiveIncludes ); } else { if ( overwrite ) Log_Error( "current action doesn't support `--overwrite`, please remove it." ); - if (! excludes.empty() ) + if ( !fileExcludes.empty() ) Log_Error( "current action doesn't support `--exclude`, please remove it." ); ret = verify( root, indexLocation );