diff --git a/CPP/7zip/Archive/Zip/ZipItem.cpp b/CPP/7zip/Archive/Zip/ZipItem.cpp index ad0fd740..7d3218e4 100644 --- a/CPP/7zip/Archive/Zip/ZipItem.cpp +++ b/CPP/7zip/Archive/Zip/ZipItem.cpp @@ -1,609 +1,615 @@ -// Archive/ZipItem.cpp - -#ifndef _WIN32 -#include -#include -#endif - -#include "StdAfx.h" - -#include "../../../../C/CpuArch.h" -#include "../../../../C/7zCrc.h" - -#include "../../../Common/IntToString.h" -#include "../../../Common/MyLinux.h" -#include "../../../Common/StringConvert.h" - -#include "../../../Windows/PropVariantUtils.h" - -#include "../Common/ItemNameUtils.h" - -#include "ZipItem.h" - -namespace NArchive { -namespace NZip { - -using namespace NFileHeader; - - -/* -const char *k_SpecName_NTFS_STREAM = "@@NTFS@STREAM@"; -const char *k_SpecName_MAC_RESOURCE_FORK = "@@MAC@RESOURCE-FORK@"; -*/ - -static const CUInt32PCharPair g_ExtraTypes[] = -{ - { NExtraID::kZip64, "Zip64" }, - { NExtraID::kNTFS, "NTFS" }, - { NExtraID::kUnix0, "UNIX" }, - { NExtraID::kStrongEncrypt, "StrongCrypto" }, - { NExtraID::kUnixTime, "UT" }, - { NExtraID::kUnix1, "UX" }, - { NExtraID::kUnix2, "Ux" }, - { NExtraID::kUnixN, "ux" }, - { NExtraID::kIzUnicodeComment, "uc" }, - { NExtraID::kIzUnicodeName, "up" }, - { NExtraID::kIzNtSecurityDescriptor, "SD" }, - { NExtraID::kWzAES, "WzAES" }, - { NExtraID::kApkAlign, "ApkAlign" } -}; - -void CExtraSubBlock::PrintInfo(AString &s) const -{ - for (unsigned i = 0; i < ARRAY_SIZE(g_ExtraTypes); i++) - { - const CUInt32PCharPair &pair = g_ExtraTypes[i]; - if (pair.Value == ID) - { - s += pair.Name; - if (ID == NExtraID::kUnixTime) - { - if (Data.Size() >= 1) - { - s += ':'; - const Byte flags = Data[0]; - if (flags & 1) s += 'M'; - if (flags & 2) s += 'A'; - if (flags & 4) s += 'C'; - const UInt32 size = (UInt32)(Data.Size()) - 1; - if (size % 4 == 0) - { - s += ':'; - s.Add_UInt32(size / 4); - } - } - } - /* - if (ID == NExtraID::kApkAlign && Data.Size() >= 2) - { - char sz[32]; - sz[0] = ':'; - ConvertUInt32ToHex(GetUi16(Data), sz + 1); - s += sz; - for (unsigned j = 2; j < Data.Size(); j++) - { - char sz[32]; - sz[0] = '-'; - ConvertUInt32ToHex(Data[j], sz + 1); - s += sz; - } - } - */ - return; - } - } - { - char sz[32]; - sz[0] = '0'; - sz[1] = 'x'; - ConvertUInt32ToHex(ID, sz + 2); - s += sz; - } -} - - -void CExtraBlock::PrintInfo(AString &s) const -{ - if (Error) - s.Add_OptSpaced("Extra_ERROR"); - - if (MinorError) - s.Add_OptSpaced("Minor_Extra_ERROR"); - - if (IsZip64 || IsZip64_Error) - { - s.Add_OptSpaced("Zip64"); - if (IsZip64_Error) - s += "_ERROR"; - } - - FOR_VECTOR (i, SubBlocks) - { - s.Add_Space_if_NotEmpty(); - SubBlocks[i].PrintInfo(s); - } -} - - -bool CExtraSubBlock::ExtractNtfsTime(unsigned index, FILETIME &ft) const -{ - ft.dwHighDateTime = ft.dwLowDateTime = 0; - UInt32 size = (UInt32)Data.Size(); - if (ID != NExtraID::kNTFS || size < 32) - return false; - const Byte *p = (const Byte *)Data; - p += 4; // for reserved - size -= 4; - while (size > 4) - { - UInt16 tag = GetUi16(p); - unsigned attrSize = GetUi16(p + 2); - p += 4; - size -= 4; - if (attrSize > size) - attrSize = size; - - if (tag == NNtfsExtra::kTagTime && attrSize >= 24) - { - p += 8 * index; - ft.dwLowDateTime = GetUi32(p); - ft.dwHighDateTime = GetUi32(p + 4); - return true; - } - p += attrSize; - size -= attrSize; - } - return false; -} - -bool CExtraSubBlock::Extract_UnixTime(bool isCentral, unsigned index, UInt32 &res) const -{ - /* Info-Zip : - The central-header extra field contains the modification - time only, or no timestamp at all. - Size of Data is used to flag its presence or absence - If "Flags" indicates that Modtime is present in the local header - field, it MUST be present in the central header field, too - */ - - res = 0; - UInt32 size = (UInt32)Data.Size(); - if (ID != NExtraID::kUnixTime || size < 5) - return false; - const Byte *p = (const Byte *)Data; - const Byte flags = *p++; - size--; - if (isCentral) - { - if (index != NUnixTime::kMTime || - (flags & (1 << NUnixTime::kMTime)) == 0 || - size < 4) - return false; - res = GetUi32(p); - return true; - } - for (unsigned i = 0; i < 3; i++) - if ((flags & (1 << i)) != 0) - { - if (size < 4) - return false; - if (index == i) - { - res = GetUi32(p); - return true; - } - p += 4; - size -= 4; - } - return false; -} - - -// Info-ZIP's abandoned "Unix1 timestamps & owner ID info" - -bool CExtraSubBlock::Extract_Unix01_Time(unsigned index, UInt32 &res) const -{ - res = 0; - const unsigned offset = index * 4; - if (Data.Size() < offset + 4) - return false; - if (ID != NExtraID::kUnix0 && - ID != NExtraID::kUnix1) - return false; - const Byte *p = (const Byte *)Data + offset; - res = GetUi32(p); - return true; -} - -/* -// PKWARE's Unix "extra" is similar to Info-ZIP's abandoned "Unix1 timestamps" -bool CExtraSubBlock::Extract_Unix_Time(unsigned index, UInt32 &res) const -{ - res = 0; - const unsigned offset = index * 4; - if (ID != NExtraID::kUnix0 || Data.Size() < offset) - return false; - const Byte *p = (const Byte *)Data + offset; - res = GetUi32(p); - return true; -} -*/ - -bool CExtraBlock::GetNtfsTime(unsigned index, FILETIME &ft) const -{ - FOR_VECTOR (i, SubBlocks) - { - const CExtraSubBlock &sb = SubBlocks[i]; - if (sb.ID == NFileHeader::NExtraID::kNTFS) - return sb.ExtractNtfsTime(index, ft); - } - return false; -} - -bool CExtraBlock::GetUnixTime(bool isCentral, unsigned index, UInt32 &res) const -{ - { - FOR_VECTOR (i, SubBlocks) - { - const CExtraSubBlock &sb = SubBlocks[i]; - if (sb.ID == NFileHeader::NExtraID::kUnixTime) - return sb.Extract_UnixTime(isCentral, index, res); - } - } - - switch (index) - { - case NUnixTime::kMTime: index = NUnixExtra::kMTime; break; - case NUnixTime::kATime: index = NUnixExtra::kATime; break; - default: return false; - } - - { - FOR_VECTOR (i, SubBlocks) - { - const CExtraSubBlock &sb = SubBlocks[i]; - if (sb.ID == NFileHeader::NExtraID::kUnix0 || - sb.ID == NFileHeader::NExtraID::kUnix1) - return sb.Extract_Unix01_Time(index, res); - } - } - return false; -} - - -bool CLocalItem::IsDir() const -{ - return NItemName::HasTailSlash(Name, GetCodePage()); -} - -bool CItem::IsDir() const -{ - // FIXME: we can check InfoZip UTF-8 name at first. - if (NItemName::HasTailSlash(Name, GetCodePage())) - return true; - - Byte hostOS = GetHostOS(); - - if (Size == 0 && PackSize == 0 && !Name.IsEmpty() && Name.Back() == '\\') - { - // do we need to use CharPrevExA? - // .NET Framework 4.5 : System.IO.Compression::CreateFromDirectory() probably writes backslashes to headers? - // so we support that case - switch (hostOS) - { - case NHostOS::kFAT: - case NHostOS::kNTFS: - case NHostOS::kHPFS: - case NHostOS::kVFAT: - return true; - } - } - - if (!FromCentral) - return false; - - UInt16 highAttrib = (UInt16)((ExternalAttrib >> 16 ) & 0xFFFF); - - switch (hostOS) - { - case NHostOS::kAMIGA: - switch (highAttrib & NAmigaAttrib::kIFMT) - { - case NAmigaAttrib::kIFDIR: return true; - case NAmigaAttrib::kIFREG: return false; - default: return false; // change it throw kUnknownAttributes; - } - case NHostOS::kFAT: - case NHostOS::kNTFS: - case NHostOS::kHPFS: - case NHostOS::kVFAT: - return ((ExternalAttrib & FILE_ATTRIBUTE_DIRECTORY) != 0); - case NHostOS::kAtari: - case NHostOS::kMac: - case NHostOS::kVMS: - case NHostOS::kVM_CMS: - case NHostOS::kAcorn: - case NHostOS::kMVS: - return false; // change it throw kUnknownAttributes; - case NHostOS::kUnix: - return MY_LIN_S_ISDIR(highAttrib); - default: - return false; - } -} - -UInt32 CItem::GetWinAttrib() const -{ - UInt32 winAttrib = 0; - switch (GetHostOS()) - { - case NHostOS::kFAT: - case NHostOS::kNTFS: - if (FromCentral) - winAttrib = ExternalAttrib; - break; - case NHostOS::kUnix: - // do we need to clear 16 low bits in this case? - if (FromCentral) - { - /* - Some programs write posix attributes in high 16 bits of ExternalAttrib - Also some programs can write additional marker flag: - 0x8000 - p7zip - 0x4000 - Zip in MacOS - no marker - Info-Zip - - Client code has two options to detect posix field: - 1) check 0x8000 marker. In that case we must add 0x8000 marker here. - 2) check that high 4 bits (file type bits in posix field) of attributes are not zero. - */ - - winAttrib = ExternalAttrib & 0xFFFF0000; - - // #ifndef _WIN32 - winAttrib |= 0x8000; // add posix mode marker - // #endif - } - break; - } - if (IsDir()) // test it; - winAttrib |= FILE_ATTRIBUTE_DIRECTORY; - return winAttrib; -} - -bool CItem::GetPosixAttrib(UInt32 &attrib) const -{ - // some archivers can store PosixAttrib in high 16 bits even with HostOS=FAT. - if (FromCentral && GetHostOS() == NHostOS::kUnix) - { - attrib = ExternalAttrib >> 16; - return (attrib != 0); - } - attrib = 0; - if (IsDir()) - attrib = MY_LIN_S_IFDIR; - return false; -} - - -bool CExtraSubBlock::CheckIzUnicode(const AString &s) const -{ - size_t size = Data.Size(); - if (size < 1 + 4) - return false; - const Byte *p = (const Byte *)Data; - if (p[0] > 1) - return false; - if (CrcCalc(s, s.Len()) != GetUi32(p + 1)) - return false; - size -= 5; - p += 5; - for (size_t i = 0; i < size; i++) - if (p[i] == 0) - return false; - return Check_UTF8_Buf((const char *)(const void *)p, size, false); -} - - -void CItem::GetUnicodeString(UString &res, const AString &s, bool isComment, bool useSpecifiedCodePage, UINT codePage) const -{ - bool isUtf8 = IsUtf8(); - // bool ignore_Utf8_Errors = true; - - if (!isUtf8) - { - { - const unsigned id = isComment ? - NFileHeader::NExtraID::kIzUnicodeComment: - NFileHeader::NExtraID::kIzUnicodeName; - const CObjectVector &subBlocks = GetMainExtra().SubBlocks; - - FOR_VECTOR (i, subBlocks) - { - const CExtraSubBlock &sb = subBlocks[i]; - if (sb.ID == id) - { - if (sb.CheckIzUnicode(s)) - { - // const unsigned kIzUnicodeHeaderSize = 5; - if (Convert_UTF8_Buf_To_Unicode( - (const char *)(const void *)(const Byte *)sb.Data + 5, - sb.Data.Size() - 5, res)) - return; - } - break; - } - } - } - - if (useSpecifiedCodePage) - isUtf8 = (codePage == CP_UTF8); - #ifdef _WIN32 - else if (GetHostOS() == NFileHeader::NHostOS::kUnix) - { - /* Some ZIP archives in Unix use UTF-8 encoding without Utf8 flag in header. - We try to get name as UTF-8. - Do we need to do it in POSIX version also? */ - isUtf8 = true; - - /* 21.02: we want to ignore UTF-8 errors to support file paths that are mixed - of UTF-8 and non-UTF-8 characters. */ - // ignore_Utf8_Errors = false; - // ignore_Utf8_Errors = true; - } - #endif - } - - #ifndef _WIN32 - - // Convert OEM char set to UTF-8 if needed - // Use system locale to select code page - - // locale -> code page translation tables generated from Wine source code - - const char *lcToOemTable[] = { - "af_ZA", "CP850", "ar_SA", "CP720", "ar_LB", "CP720", "ar_EG", "CP720", - "ar_DZ", "CP720", "ar_BH", "CP720", "ar_IQ", "CP720", "ar_JO", "CP720", - "ar_KW", "CP720", "ar_LY", "CP720", "ar_MA", "CP720", "ar_OM", "CP720", - "ar_QA", "CP720", "ar_SY", "CP720", "ar_TN", "CP720", "ar_AE", "CP720", - "ar_YE", "CP720", "ast_ES", "CP850", "az_AZ", "CP866", "az_AZ", "CP857", - "be_BY", "CP866", "bg_BG", "CP866", "br_FR", "CP850", "ca_ES", "CP850", - "zh_CN", "CP936", "zh_TW", "CP950", "kw_GB", "CP850", "cs_CZ", "CP852", - "cy_GB", "CP850", "da_DK", "CP850", "de_AT", "CP850", "de_LI", "CP850", - "de_LU", "CP850", "de_CH", "CP850", "de_DE", "CP850", "el_GR", "CP737", - "en_AU", "CP850", "en_CA", "CP850", "en_GB", "CP850", "en_IE", "CP850", - "en_JM", "CP850", "en_BZ", "CP850", "en_PH", "CP437", "en_ZA", "CP437", - "en_TT", "CP850", "en_US", "CP437", "en_ZW", "CP437", "en_NZ", "CP850", - "es_PA", "CP850", "es_BO", "CP850", "es_CR", "CP850", "es_DO", "CP850", - "es_SV", "CP850", "es_EC", "CP850", "es_GT", "CP850", "es_HN", "CP850", - "es_NI", "CP850", "es_CL", "CP850", "es_MX", "CP850", "es_ES", "CP850", - "es_CO", "CP850", "es_ES", "CP850", "es_PE", "CP850", "es_AR", "CP850", - "es_PR", "CP850", "es_VE", "CP850", "es_UY", "CP850", "es_PY", "CP850", - "et_EE", "CP775", "eu_ES", "CP850", "fa_IR", "CP720", "fi_FI", "CP850", - "fo_FO", "CP850", "fr_FR", "CP850", "fr_BE", "CP850", "fr_CA", "CP850", - "fr_LU", "CP850", "fr_MC", "CP850", "fr_CH", "CP850", "ga_IE", "CP437", - "gd_GB", "CP850", "gv_IM", "CP850", "gl_ES", "CP850", "he_IL", "CP862", - "hr_HR", "CP852", "hu_HU", "CP852", "id_ID", "CP850", "is_IS", "CP850", - "it_IT", "CP850", "it_CH", "CP850", "iv_IV", "CP437", "ja_JP", "CP932", - "kk_KZ", "CP866", "ko_KR", "CP949", "ky_KG", "CP866", "lt_LT", "CP775", - "lv_LV", "CP775", "mk_MK", "CP866", "mn_MN", "CP866", "ms_BN", "CP850", - "ms_MY", "CP850", "nl_BE", "CP850", "nl_NL", "CP850", "nl_SR", "CP850", - "nn_NO", "CP850", "nb_NO", "CP850", "pl_PL", "CP852", "pt_BR", "CP850", - "pt_PT", "CP850", "rm_CH", "CP850", "ro_RO", "CP852", "ru_RU", "CP866", - "sk_SK", "CP852", "sl_SI", "CP852", "sq_AL", "CP852", "sr_RS", "CP855", - "sr_RS", "CP852", "sv_SE", "CP850", "sv_FI", "CP850", "sw_KE", "CP437", - "th_TH", "CP874", "tr_TR", "CP857", "tt_RU", "CP866", "uk_UA", "CP866", - "ur_PK", "CP720", "uz_UZ", "CP866", "uz_UZ", "CP857", "vi_VN", "CP1258", - "wa_BE", "CP850", "zh_HK", "CP950", "zh_SG", "CP936"}; - - const char *lcToAnsiTable[] = { - "af_ZA", "CP1252", "ar_SA", "CP1256", "ar_LB", "CP1256", "ar_EG", "CP1256", - "ar_DZ", "CP1256", "ar_BH", "CP1256", "ar_IQ", "CP1256", "ar_JO", "CP1256", - "ar_KW", "CP1256", "ar_LY", "CP1256", "ar_MA", "CP1256", "ar_OM", "CP1256", - "ar_QA", "CP1256", "ar_SY", "CP1256", "ar_TN", "CP1256", "ar_AE", "CP1256", - "ar_YE", "CP1256","ast_ES", "CP1252", "az_AZ", "CP1251", "az_AZ", "CP1254", - "be_BY", "CP1251", "bg_BG", "CP1251", "br_FR", "CP1252", "ca_ES", "CP1252", - "zh_CN", "CP936", "zh_TW", "CP950", "kw_GB", "CP1252", "cs_CZ", "CP1250", - "cy_GB", "CP1252", "da_DK", "CP1252", "de_AT", "CP1252", "de_LI", "CP1252", - "de_LU", "CP1252", "de_CH", "CP1252", "de_DE", "CP1252", "el_GR", "CP1253", - "en_AU", "CP1252", "en_CA", "CP1252", "en_GB", "CP1252", "en_IE", "CP1252", - "en_JM", "CP1252", "en_BZ", "CP1252", "en_PH", "CP1252", "en_ZA", "CP1252", - "en_TT", "CP1252", "en_US", "CP1252", "en_ZW", "CP1252", "en_NZ", "CP1252", - "es_PA", "CP1252", "es_BO", "CP1252", "es_CR", "CP1252", "es_DO", "CP1252", - "es_SV", "CP1252", "es_EC", "CP1252", "es_GT", "CP1252", "es_HN", "CP1252", - "es_NI", "CP1252", "es_CL", "CP1252", "es_MX", "CP1252", "es_ES", "CP1252", - "es_CO", "CP1252", "es_ES", "CP1252", "es_PE", "CP1252", "es_AR", "CP1252", - "es_PR", "CP1252", "es_VE", "CP1252", "es_UY", "CP1252", "es_PY", "CP1252", - "et_EE", "CP1257", "eu_ES", "CP1252", "fa_IR", "CP1256", "fi_FI", "CP1252", - "fo_FO", "CP1252", "fr_FR", "CP1252", "fr_BE", "CP1252", "fr_CA", "CP1252", - "fr_LU", "CP1252", "fr_MC", "CP1252", "fr_CH", "CP1252", "ga_IE", "CP1252", - "gd_GB", "CP1252", "gv_IM", "CP1252", "gl_ES", "CP1252", "he_IL", "CP1255", - "hr_HR", "CP1250", "hu_HU", "CP1250", "id_ID", "CP1252", "is_IS", "CP1252", - "it_IT", "CP1252", "it_CH", "CP1252", "iv_IV", "CP1252", "ja_JP", "CP932", - "kk_KZ", "CP1251", "ko_KR", "CP949", "ky_KG", "CP1251", "lt_LT", "CP1257", - "lv_LV", "CP1257", "mk_MK", "CP1251", "mn_MN", "CP1251", "ms_BN", "CP1252", - "ms_MY", "CP1252", "nl_BE", "CP1252", "nl_NL", "CP1252", "nl_SR", "CP1252", - "nn_NO", "CP1252", "nb_NO", "CP1252", "pl_PL", "CP1250", "pt_BR", "CP1252", - "pt_PT", "CP1252", "rm_CH", "CP1252", "ro_RO", "CP1250", "ru_RU", "CP1251", - "sk_SK", "CP1250", "sl_SI", "CP1250", "sq_AL", "CP1250", "sr_RS", "CP1251", - "sr_RS", "CP1250", "sv_SE", "CP1252", "sv_FI", "CP1252", "sw_KE", "CP1252", - "th_TH", "CP874", "tr_TR", "CP1254", "tt_RU", "CP1251", "uk_UA", "CP1251", - "ur_PK", "CP1256", "uz_UZ", "CP1251", "uz_UZ", "CP1254", "vi_VN", "CP1258", - "wa_BE", "CP1252", "zh_HK", "CP950", "zh_SG", "CP936"}; - - bool isAnsi = false; - bool isOem = false; - - if (!isUtf8 && - MadeByVersion.HostOS == NFileHeader::NHostOS::kNTFS && - MadeByVersion.Version >= 20) { - isAnsi = true; - } else if (!isUtf8 && - (MadeByVersion.HostOS == NFileHeader::NHostOS::kNTFS || - MadeByVersion.HostOS == NFileHeader::NHostOS::kFAT)) { - isOem = true; - } - - if (isOem || isAnsi) { - - const char *legacyCp = nullptr; - int tableLen = sizeof(isOem ? lcToOemTable : lcToAnsiTable) / sizeof(char *); - int lcLen = 0, i; - - // Detect required code page name from current locale - char *lc = setlocale(LC_CTYPE, ""); - - if (lc && lc[0]) { - // Compare up to the dot, if it exists, e.g. en_US.UTF-8 - for (lcLen = 0; lc[lcLen] != '.' && lc[lcLen] != '\0'; ++lcLen); - - for (i = 0; i < tableLen; i += 2) - if (strncmp(lc, (isOem ? lcToOemTable[i] : lcToAnsiTable[i]), lcLen) == 0) { - legacyCp = isOem ? lcToOemTable[i + 1] : lcToAnsiTable[i + 1]; - break; // Stop searching once a match is found - } - } - - if (legacyCp) { - iconv_t cd; - if ((cd = iconv_open("UTF-8", legacyCp)) != (iconv_t)-1) { - - AString s_utf8; - const char* src = s.Ptr(); - size_t slen = s.Len(); - size_t dlen = slen * 4; - char* dest = s_utf8.GetBuf_SetEnd(dlen + 1); // (source length * 4) + null termination - - char* srcPtr = const_cast(src); // iconv requires non-const input pointer - char* destPtr = dest; - size_t done = iconv(cd, &srcPtr, &slen, &destPtr, &dlen); - if (done == (size_t)-1) { - // Handle iconv error - iconv_close(cd); - // Add proper error handling or logging here - return; - } - - // Null-terminate the result - *destPtr = '\0'; - - iconv_close(cd); - - if (ConvertUTF8ToUnicode(s_utf8, res) /*|| ignore_Utf8_Errors*/) { - return; - } - } - } - } - #endif - - if (isUtf8) - { - ConvertUTF8ToUnicode(s, res); - return; - } - - MultiByteToUnicodeString2(res, s, useSpecifiedCodePage ? codePage : GetCodePage()); -} - -}} +// Archive/ZipItem.cpp + +#ifndef _WIN32 +#include +#include +#endif + +#include "StdAfx.h" + +#include "../../../../C/CpuArch.h" +#include "../../../../C/7zCrc.h" + +#include "../../../Common/IntToString.h" +#include "../../../Common/MyLinux.h" +#include "../../../Common/StringConvert.h" + +#include "../../../Windows/PropVariantUtils.h" + +#include "../Common/ItemNameUtils.h" + +#include "ZipItem.h" + +namespace NArchive { +namespace NZip { + +using namespace NFileHeader; + + +/* +const char *k_SpecName_NTFS_STREAM = "@@NTFS@STREAM@"; +const char *k_SpecName_MAC_RESOURCE_FORK = "@@MAC@RESOURCE-FORK@"; +*/ + +static const CUInt32PCharPair g_ExtraTypes[] = +{ + { NExtraID::kZip64, "Zip64" }, + { NExtraID::kNTFS, "NTFS" }, + { NExtraID::kUnix0, "UNIX" }, + { NExtraID::kStrongEncrypt, "StrongCrypto" }, + { NExtraID::kUnixTime, "UT" }, + { NExtraID::kUnix1, "UX" }, + { NExtraID::kUnix2, "Ux" }, + { NExtraID::kUnixN, "ux" }, + { NExtraID::kIzUnicodeComment, "uc" }, + { NExtraID::kIzUnicodeName, "up" }, + { NExtraID::kIzNtSecurityDescriptor, "SD" }, + { NExtraID::kWzAES, "WzAES" }, + { NExtraID::kApkAlign, "ApkAlign" } +}; + +void CExtraSubBlock::PrintInfo(AString &s) const +{ + for (unsigned i = 0; i < Z7_ARRAY_SIZE(g_ExtraTypes); i++) + { + const CUInt32PCharPair &pair = g_ExtraTypes[i]; + if (pair.Value == ID) + { + s += pair.Name; + if (ID == NExtraID::kUnixTime) + { + if (Data.Size() >= 1) + { + s.Add_Colon(); + const Byte flags = Data[0]; + if (flags & 1) s.Add_Char('M'); + if (flags & 2) s.Add_Char('A'); + if (flags & 4) s.Add_Char('C'); + const UInt32 size = (UInt32)(Data.Size()) - 1; + if (size % 4 == 0) + { + s.Add_Colon(); + s.Add_UInt32(size / 4); + } + } + } + /* + if (ID == NExtraID::kApkAlign && Data.Size() >= 2) + { + char sz[32]; + sz[0] = ':'; + ConvertUInt32ToHex(GetUi16(Data), sz + 1); + s += sz; + for (unsigned j = 2; j < Data.Size(); j++) + { + char sz[32]; + sz[0] = '-'; + ConvertUInt32ToHex(Data[j], sz + 1); + s += sz; + } + } + */ + return; + } + } + { + char sz[16]; + sz[0] = '0'; + sz[1] = 'x'; + ConvertUInt32ToHex(ID, sz + 2); + s += sz; + } +} + + +void CExtraBlock::PrintInfo(AString &s) const +{ + if (Error) + s.Add_OptSpaced("Extra_ERROR"); + + if (MinorError) + s.Add_OptSpaced("Minor_Extra_ERROR"); + + if (IsZip64 || IsZip64_Error) + { + s.Add_OptSpaced("Zip64"); + if (IsZip64_Error) + s += "_ERROR"; + } + + FOR_VECTOR (i, SubBlocks) + { + s.Add_Space_if_NotEmpty(); + SubBlocks[i].PrintInfo(s); + } +} + + +bool CExtraSubBlock::ExtractNtfsTime(unsigned index, FILETIME &ft) const +{ + ft.dwHighDateTime = ft.dwLowDateTime = 0; + UInt32 size = (UInt32)Data.Size(); + if (ID != NExtraID::kNTFS || size < 32) + return false; + const Byte *p = (const Byte *)Data; + p += 4; // for reserved + size -= 4; + while (size > 4) + { + UInt16 tag = GetUi16(p); + unsigned attrSize = GetUi16(p + 2); + p += 4; + size -= 4; + if (attrSize > size) + attrSize = size; + + if (tag == NNtfsExtra::kTagTime && attrSize >= 24) + { + p += 8 * index; + ft.dwLowDateTime = GetUi32(p); + ft.dwHighDateTime = GetUi32(p + 4); + return true; + } + p += attrSize; + size -= attrSize; + } + return false; +} + +bool CExtraSubBlock::Extract_UnixTime(bool isCentral, unsigned index, UInt32 &res) const +{ + /* Info-Zip : + The central-header extra field contains the modification + time only, or no timestamp at all. + Size of Data is used to flag its presence or absence + If "Flags" indicates that Modtime is present in the local header + field, it MUST be present in the central header field, too + */ + + res = 0; + UInt32 size = (UInt32)Data.Size(); + if (ID != NExtraID::kUnixTime || size < 5) + return false; + const Byte *p = (const Byte *)Data; + const Byte flags = *p++; + size--; + if (isCentral) + { + if (index != NUnixTime::kMTime || + (flags & (1 << NUnixTime::kMTime)) == 0 || + size < 4) + return false; + res = GetUi32(p); + return true; + } + for (unsigned i = 0; i < 3; i++) + if ((flags & (1 << i)) != 0) + { + if (size < 4) + return false; + if (index == i) + { + res = GetUi32(p); + return true; + } + p += 4; + size -= 4; + } + return false; +} + + +// Info-ZIP's abandoned "Unix1 timestamps & owner ID info" + +bool CExtraSubBlock::Extract_Unix01_Time(unsigned index, UInt32 &res) const +{ + res = 0; + const unsigned offset = index * 4; + if (Data.Size() < offset + 4) + return false; + if (ID != NExtraID::kUnix0 && + ID != NExtraID::kUnix1) + return false; + const Byte *p = (const Byte *)Data + offset; + res = GetUi32(p); + return true; +} + +/* +// PKWARE's Unix "extra" is similar to Info-ZIP's abandoned "Unix1 timestamps" +bool CExtraSubBlock::Extract_Unix_Time(unsigned index, UInt32 &res) const +{ + res = 0; + const unsigned offset = index * 4; + if (ID != NExtraID::kUnix0 || Data.Size() < offset) + return false; + const Byte *p = (const Byte *)Data + offset; + res = GetUi32(p); + return true; +} +*/ + +bool CExtraBlock::GetNtfsTime(unsigned index, FILETIME &ft) const +{ + FOR_VECTOR (i, SubBlocks) + { + const CExtraSubBlock &sb = SubBlocks[i]; + if (sb.ID == NFileHeader::NExtraID::kNTFS) + return sb.ExtractNtfsTime(index, ft); + } + return false; +} + +bool CExtraBlock::GetUnixTime(bool isCentral, unsigned index, UInt32 &res) const +{ + { + FOR_VECTOR (i, SubBlocks) + { + const CExtraSubBlock &sb = SubBlocks[i]; + if (sb.ID == NFileHeader::NExtraID::kUnixTime) + return sb.Extract_UnixTime(isCentral, index, res); + } + } + + switch (index) + { + case NUnixTime::kMTime: index = NUnixExtra::kMTime; break; + case NUnixTime::kATime: index = NUnixExtra::kATime; break; + default: return false; + } + + { + FOR_VECTOR (i, SubBlocks) + { + const CExtraSubBlock &sb = SubBlocks[i]; + if (sb.ID == NFileHeader::NExtraID::kUnix0 || + sb.ID == NFileHeader::NExtraID::kUnix1) + return sb.Extract_Unix01_Time(index, res); + } + } + return false; +} + + +bool CLocalItem::IsDir() const +{ + return NItemName::HasTailSlash(Name, GetCodePage()); +} + +bool CItem::IsDir() const +{ + // FIXME: we can check InfoZip UTF-8 name at first. + if (NItemName::HasTailSlash(Name, GetCodePage())) + return true; + + Byte hostOS = GetHostOS(); + + if (Size == 0 && PackSize == 0 && !Name.IsEmpty() && Name.Back() == '\\') + { + // do we need to use CharPrevExA? + // .NET Framework 4.5 : System.IO.Compression::CreateFromDirectory() probably writes backslashes to headers? + // so we support that case + switch (hostOS) + { + case NHostOS::kFAT: + case NHostOS::kNTFS: + case NHostOS::kHPFS: + case NHostOS::kVFAT: + return true; + default: break; + } + } + + if (!FromCentral) + return false; + + UInt16 highAttrib = (UInt16)((ExternalAttrib >> 16 ) & 0xFFFF); + + switch (hostOS) + { + case NHostOS::kAMIGA: + switch (highAttrib & NAmigaAttrib::kIFMT) + { + case NAmigaAttrib::kIFDIR: return true; + case NAmigaAttrib::kIFREG: return false; + default: return false; // change it throw kUnknownAttributes; + } + case NHostOS::kFAT: + case NHostOS::kNTFS: + case NHostOS::kHPFS: + case NHostOS::kVFAT: + return ((ExternalAttrib & FILE_ATTRIBUTE_DIRECTORY) != 0); + case NHostOS::kAtari: + case NHostOS::kMac: + case NHostOS::kVMS: + case NHostOS::kVM_CMS: + case NHostOS::kAcorn: + case NHostOS::kMVS: + return false; // change it throw kUnknownAttributes; + case NHostOS::kUnix: + return MY_LIN_S_ISDIR(highAttrib); + default: + return false; + } +} + +UInt32 CItem::GetWinAttrib() const +{ + UInt32 winAttrib = 0; + switch (GetHostOS()) + { + case NHostOS::kFAT: + case NHostOS::kNTFS: + if (FromCentral) + winAttrib = ExternalAttrib; + break; + case NHostOS::kUnix: + // do we need to clear 16 low bits in this case? + if (FromCentral) + { + /* + Some programs write posix attributes in high 16 bits of ExternalAttrib + Also some programs can write additional marker flag: + 0x8000 - p7zip + 0x4000 - Zip in MacOS + no marker - Info-Zip + + Client code has two options to detect posix field: + 1) check 0x8000 marker. In that case we must add 0x8000 marker here. + 2) check that high 4 bits (file type bits in posix field) of attributes are not zero. + */ + + winAttrib = ExternalAttrib & 0xFFFF0000; + + // #ifndef _WIN32 + winAttrib |= 0x8000; // add posix mode marker + // #endif + } + break; + default: break; + } + if (IsDir()) // test it; + winAttrib |= FILE_ATTRIBUTE_DIRECTORY; + return winAttrib; +} + +bool CItem::GetPosixAttrib(UInt32 &attrib) const +{ + // some archivers can store PosixAttrib in high 16 bits even with HostOS=FAT. + if (FromCentral && GetHostOS() == NHostOS::kUnix) + { + attrib = ExternalAttrib >> 16; + return (attrib != 0); + } + attrib = 0; + if (IsDir()) + attrib = MY_LIN_S_IFDIR; + return false; +} + + +bool CExtraSubBlock::CheckIzUnicode(const AString &s) const +{ + size_t size = Data.Size(); + if (size < 1 + 4) + return false; + const Byte *p = (const Byte *)Data; + if (p[0] > 1) + return false; + if (CrcCalc(s, s.Len()) != GetUi32(p + 1)) + return false; + size -= 5; + p += 5; + for (size_t i = 0; i < size; i++) + if (p[i] == 0) + return false; + return Check_UTF8_Buf((const char *)(const void *)p, size, false); +} + + +void CItem::GetUnicodeString(UString &res, const AString &s, bool isComment, bool useSpecifiedCodePage, UINT codePage) const +{ + bool isUtf8 = IsUtf8(); + // bool ignore_Utf8_Errors = true; + + if (!isUtf8) + { + { + const unsigned id = isComment ? + NFileHeader::NExtraID::kIzUnicodeComment: + NFileHeader::NExtraID::kIzUnicodeName; + const CObjectVector &subBlocks = GetMainExtra().SubBlocks; + + FOR_VECTOR (i, subBlocks) + { + const CExtraSubBlock &sb = subBlocks[i]; + if (sb.ID == id) + { + if (sb.CheckIzUnicode(s)) + { + // const unsigned kIzUnicodeHeaderSize = 5; + if (Convert_UTF8_Buf_To_Unicode( + (const char *)(const void *)(const Byte *)sb.Data + 5, + sb.Data.Size() - 5, res)) + return; + } + break; + } + } + } + + if (useSpecifiedCodePage) + isUtf8 = (codePage == CP_UTF8); + #ifdef _WIN32 + else if (GetHostOS() == NFileHeader::NHostOS::kUnix) + { + /* Some ZIP archives in Unix use UTF-8 encoding without Utf8 flag in header. + We try to get name as UTF-8. + Do we need to do it in POSIX version also? */ + isUtf8 = true; + + /* 21.02: we want to ignore UTF-8 errors to support file paths that are mixed + of UTF-8 and non-UTF-8 characters. */ + // ignore_Utf8_Errors = false; + // ignore_Utf8_Errors = true; + } + #endif + } + + #ifndef _WIN32 + + // Convert OEM char set to UTF-8 if needed + // Use system locale to select code page + + // locale -> code page translation tables generated from Wine source code + + const char *lcToOemTable[] = { + "af_ZA", "CP850", "ar_SA", "CP720", "ar_LB", "CP720", "ar_EG", "CP720", + "ar_DZ", "CP720", "ar_BH", "CP720", "ar_IQ", "CP720", "ar_JO", "CP720", + "ar_KW", "CP720", "ar_LY", "CP720", "ar_MA", "CP720", "ar_OM", "CP720", + "ar_QA", "CP720", "ar_SY", "CP720", "ar_TN", "CP720", "ar_AE", "CP720", + "ar_YE", "CP720", "ast_ES", "CP850", "az_AZ", "CP866", "az_AZ", "CP857", + "be_BY", "CP866", "bg_BG", "CP866", "br_FR", "CP850", "ca_ES", "CP850", + "zh_CN", "CP936", "zh_TW", "CP950", "kw_GB", "CP850", "cs_CZ", "CP852", + "cy_GB", "CP850", "da_DK", "CP850", "de_AT", "CP850", "de_LI", "CP850", + "de_LU", "CP850", "de_CH", "CP850", "de_DE", "CP850", "el_GR", "CP737", + "en_AU", "CP850", "en_CA", "CP850", "en_GB", "CP850", "en_IE", "CP850", + "en_JM", "CP850", "en_BZ", "CP850", "en_PH", "CP437", "en_ZA", "CP437", + "en_TT", "CP850", "en_US", "CP437", "en_ZW", "CP437", "en_NZ", "CP850", + "es_PA", "CP850", "es_BO", "CP850", "es_CR", "CP850", "es_DO", "CP850", + "es_SV", "CP850", "es_EC", "CP850", "es_GT", "CP850", "es_HN", "CP850", + "es_NI", "CP850", "es_CL", "CP850", "es_MX", "CP850", "es_ES", "CP850", + "es_CO", "CP850", "es_ES", "CP850", "es_PE", "CP850", "es_AR", "CP850", + "es_PR", "CP850", "es_VE", "CP850", "es_UY", "CP850", "es_PY", "CP850", + "et_EE", "CP775", "eu_ES", "CP850", "fa_IR", "CP720", "fi_FI", "CP850", + "fo_FO", "CP850", "fr_FR", "CP850", "fr_BE", "CP850", "fr_CA", "CP850", + "fr_LU", "CP850", "fr_MC", "CP850", "fr_CH", "CP850", "ga_IE", "CP437", + "gd_GB", "CP850", "gv_IM", "CP850", "gl_ES", "CP850", "he_IL", "CP862", + "hr_HR", "CP852", "hu_HU", "CP852", "id_ID", "CP850", "is_IS", "CP850", + "it_IT", "CP850", "it_CH", "CP850", "iv_IV", "CP437", "ja_JP", "CP932", + "kk_KZ", "CP866", "ko_KR", "CP949", "ky_KG", "CP866", "lt_LT", "CP775", + "lv_LV", "CP775", "mk_MK", "CP866", "mn_MN", "CP866", "ms_BN", "CP850", + "ms_MY", "CP850", "nl_BE", "CP850", "nl_NL", "CP850", "nl_SR", "CP850", + "nn_NO", "CP850", "nb_NO", "CP850", "pl_PL", "CP852", "pt_BR", "CP850", + "pt_PT", "CP850", "rm_CH", "CP850", "ro_RO", "CP852", "ru_RU", "CP866", + "sk_SK", "CP852", "sl_SI", "CP852", "sq_AL", "CP852", "sr_RS", "CP855", + "sr_RS", "CP852", "sv_SE", "CP850", "sv_FI", "CP850", "sw_KE", "CP437", + "th_TH", "CP874", "tr_TR", "CP857", "tt_RU", "CP866", "uk_UA", "CP866", + "ur_PK", "CP720", "uz_UZ", "CP866", "uz_UZ", "CP857", "vi_VN", "CP1258", + "wa_BE", "CP850", "zh_HK", "CP950", "zh_SG", "CP936"}; + + const char *lcToAnsiTable[] = { + "af_ZA", "CP1252", "ar_SA", "CP1256", "ar_LB", "CP1256", "ar_EG", "CP1256", + "ar_DZ", "CP1256", "ar_BH", "CP1256", "ar_IQ", "CP1256", "ar_JO", "CP1256", + "ar_KW", "CP1256", "ar_LY", "CP1256", "ar_MA", "CP1256", "ar_OM", "CP1256", + "ar_QA", "CP1256", "ar_SY", "CP1256", "ar_TN", "CP1256", "ar_AE", "CP1256", + "ar_YE", "CP1256","ast_ES", "CP1252", "az_AZ", "CP1251", "az_AZ", "CP1254", + "be_BY", "CP1251", "bg_BG", "CP1251", "br_FR", "CP1252", "ca_ES", "CP1252", + "zh_CN", "CP936", "zh_TW", "CP950", "kw_GB", "CP1252", "cs_CZ", "CP1250", + "cy_GB", "CP1252", "da_DK", "CP1252", "de_AT", "CP1252", "de_LI", "CP1252", + "de_LU", "CP1252", "de_CH", "CP1252", "de_DE", "CP1252", "el_GR", "CP1253", + "en_AU", "CP1252", "en_CA", "CP1252", "en_GB", "CP1252", "en_IE", "CP1252", + "en_JM", "CP1252", "en_BZ", "CP1252", "en_PH", "CP1252", "en_ZA", "CP1252", + "en_TT", "CP1252", "en_US", "CP1252", "en_ZW", "CP1252", "en_NZ", "CP1252", + "es_PA", "CP1252", "es_BO", "CP1252", "es_CR", "CP1252", "es_DO", "CP1252", + "es_SV", "CP1252", "es_EC", "CP1252", "es_GT", "CP1252", "es_HN", "CP1252", + "es_NI", "CP1252", "es_CL", "CP1252", "es_MX", "CP1252", "es_ES", "CP1252", + "es_CO", "CP1252", "es_ES", "CP1252", "es_PE", "CP1252", "es_AR", "CP1252", + "es_PR", "CP1252", "es_VE", "CP1252", "es_UY", "CP1252", "es_PY", "CP1252", + "et_EE", "CP1257", "eu_ES", "CP1252", "fa_IR", "CP1256", "fi_FI", "CP1252", + "fo_FO", "CP1252", "fr_FR", "CP1252", "fr_BE", "CP1252", "fr_CA", "CP1252", + "fr_LU", "CP1252", "fr_MC", "CP1252", "fr_CH", "CP1252", "ga_IE", "CP1252", + "gd_GB", "CP1252", "gv_IM", "CP1252", "gl_ES", "CP1252", "he_IL", "CP1255", + "hr_HR", "CP1250", "hu_HU", "CP1250", "id_ID", "CP1252", "is_IS", "CP1252", + "it_IT", "CP1252", "it_CH", "CP1252", "iv_IV", "CP1252", "ja_JP", "CP932", + "kk_KZ", "CP1251", "ko_KR", "CP949", "ky_KG", "CP1251", "lt_LT", "CP1257", + "lv_LV", "CP1257", "mk_MK", "CP1251", "mn_MN", "CP1251", "ms_BN", "CP1252", + "ms_MY", "CP1252", "nl_BE", "CP1252", "nl_NL", "CP1252", "nl_SR", "CP1252", + "nn_NO", "CP1252", "nb_NO", "CP1252", "pl_PL", "CP1250", "pt_BR", "CP1252", + "pt_PT", "CP1252", "rm_CH", "CP1252", "ro_RO", "CP1250", "ru_RU", "CP1251", + "sk_SK", "CP1250", "sl_SI", "CP1250", "sq_AL", "CP1250", "sr_RS", "CP1251", + "sr_RS", "CP1250", "sv_SE", "CP1252", "sv_FI", "CP1252", "sw_KE", "CP1252", + "th_TH", "CP874", "tr_TR", "CP1254", "tt_RU", "CP1251", "uk_UA", "CP1251", + "ur_PK", "CP1256", "uz_UZ", "CP1251", "uz_UZ", "CP1254", "vi_VN", "CP1258", + "wa_BE", "CP1252", "zh_HK", "CP950", "zh_SG", "CP936"}; + + bool isAnsi = false; + bool isOem = false; + + if (!isUtf8 && + MadeByVersion.HostOS == NFileHeader::NHostOS::kNTFS && + MadeByVersion.Version >= 20) { + isAnsi = true; + } else if (!isUtf8 && + (MadeByVersion.HostOS == NFileHeader::NHostOS::kNTFS || + MadeByVersion.HostOS == NFileHeader::NHostOS::kFAT)) { + isOem = true; + } + + if (isOem || isAnsi) { + + const char *legacyCp = nullptr; + int tableLen = sizeof(isOem ? lcToOemTable : lcToAnsiTable) / sizeof(char *); + int lcLen = 0, i; + + // Detect required code page name from current locale + char *lc = setlocale(LC_CTYPE, ""); + + if (lc && lc[0]) { + // Compare up to the dot, if it exists, e.g. en_US.UTF-8 + for (lcLen = 0; lc[lcLen] != '.' && lc[lcLen] != ':' && lc[lcLen] != '\0'; ++lcLen); + + for (i = 0; i < tableLen; i += 2) + if (strncmp(lc, (isOem ? lcToOemTable[i] : lcToAnsiTable[i]), lcLen) == 0) { + legacyCp = isOem ? lcToOemTable[i + 1] : lcToAnsiTable[i + 1]; + break; // Stop searching once a match is found + } + } + + if (legacyCp) { + iconv_t cd; + if ((cd = iconv_open("UTF-8", legacyCp)) != (iconv_t)-1) { + + AString s_utf8; + + size_t slen = s.Len(); + char* src = s.Ptr_non_const(); + + size_t dlen = slen * 4 + 1; // (source length * 4) + null termination + char* dst = s_utf8.GetBuf_SetEnd(dlen); + + memset(dst, 0, dlen); + + size_t done = iconv(cd, &src, &slen, &dst, &dlen); + + if (done == (size_t)-1) { + iconv_close(cd); + + // iconv failed. Falling back to default behavior + MultiByteToUnicodeString2(res, s, useSpecifiedCodePage ? codePage : GetCodePage()); + return; + } + + // Null-terminate the result + *dst = '\0'; + + iconv_close(cd); + + if (ConvertUTF8ToUnicode(s_utf8, res) /*|| ignore_Utf8_Errors*/) { + return; + } + } + } + } + #endif + + if (isUtf8) + { + ConvertUTF8ToUnicode(s, res); + return; + } + + MultiByteToUnicodeString2(res, s, useSpecifiedCodePage ? codePage : GetCodePage()); +} + +}}