diff --git a/erts/preloaded/ebin/prim_zip.beam b/erts/preloaded/ebin/prim_zip.beam index 2ab80d4b494d..44ad467c9295 100644 Binary files a/erts/preloaded/ebin/prim_zip.beam and b/erts/preloaded/ebin/prim_zip.beam differ diff --git a/erts/preloaded/src/erlang.erl b/erts/preloaded/src/erlang.erl index 5095998225bc..d7052f999482 100644 --- a/erts/preloaded/src/erlang.erl +++ b/erts/preloaded/src/erlang.erl @@ -7128,20 +7128,17 @@ follows: `{spawn_executable, FileName}`. The external program starts using `Dir` as its working directory. `Dir` must be a string. -- **`{env, Env}`** - Types: -   `Name = ``t:os:env_var_name/0` -   `Val = ``t:os:env_var_value/0`` | false` -   `Env = [{Name, Val}]` - - Only valid for `{spawn, Command}`, and `{spawn_executable, FileName}`. The - environment of the started process is extended using the environment +- **`{env, Env}`** - Only valid for `{spawn, Command}`, and `{spawn_executable, FileName}`. + The environment of the started process is extended using the environment specifications in `Env`. - `Env` is to be a list of tuples `{Name, Val}`, where `Name` is the name of an - environment variable, and `Val` is the value it is to have in the spawned port - process. Both `Name` and `Val` must be strings. The one exception is `Val` - being the atom `false` (in analogy with `os:getenv/1`), which removes the - environment variable. + `Env` is to be a list of tuples `{Name, Val}`, where `Name` is a `t:os:env_var_name/0` + representing the name of an environment variable, and `Val` is a `t:os:env_var_name/0` + representing the value it is to have in the spawned port process. Both `Name` and `Val` must + be strings. + + If `Val` is set to the atom `false` or the empty string (that is `""` or `[]`), open_port + will consider those variables unset just as if `os:unsetenv/1` had been called. For information about encoding requirements, see documentation of the types for `Name` and `Val`. @@ -7327,7 +7324,7 @@ by passing command-line flag [`+Q`](erl_cmd.md#max_ports) to [erl](erl_cmd.md). | stream | {line, L :: non_neg_integer()} | {cd, Dir :: string() | binary()} - | {env, Env :: [{Name :: os:env_var_name(), Val :: os:env_var_value() | false}]} + | {env, Env :: [{Name :: os:env_var_name(), Val :: os:env_var_value() | [] | false}]} | {args, [string() | binary()]} | {arg0, string() | binary()} | exit_status diff --git a/erts/preloaded/src/prim_zip.erl b/erts/preloaded/src/prim_zip.erl index 528c62f5a398..f1cfb4dafce9 100644 --- a/erts/preloaded/src/prim_zip.erl +++ b/erts/preloaded/src/prim_zip.erl @@ -533,7 +533,7 @@ dos_date_time_to_datetime(DosDate, DosTime) -> <> = <>, <> = <>, {{YearFrom1980+1980, Month, Day}, - {Hour, Min, Sec}}. + {Hour, Min, Sec * 2}}. cd_file_header_from_bin(< - openzip_open(F, []). - --doc false. -openzip_open(F, Options) -> - case ?CATCH(do_openzip_open(F, Options)) of - {ok, OpenZip} -> - {ok, OpenZip}; - Error -> - {error, Error} - end. - -do_openzip_open(F, Options) -> - Opts = get_openzip_options(Options), - #openzip_opts{output = Output, open_opts = OpO, cwd = CWD} = Opts, - Input = get_input(F), - In0 = Input({open, F, OpO -- [write]}, []), - {[#zip_comment{comment = C} | Files], In1} = - get_central_dir(In0, fun raw_file_info_etc/5, Input), - Z = zlib:open(), - {ok, #openzip{zip_comment = C, - files = Files, - in = In1, - input = Input, - output = Output, - zlib = Z, - cwd = CWD}}. - -%% retrieve all files from an open archive --doc false. -openzip_get(OpenZip) -> - case ?CATCH(do_openzip_get(OpenZip)) of - {ok, Result} -> {ok, Result}; - Error -> {error, Error} - end. - -do_openzip_get(#openzip{files = Files, in = In0, input = Input, - output = Output, zlib = Z, cwd = CWD}) -> - ZipOpts = #unzip_opts{output = Output, input = Input, - file_filter = fun all/1, open_opts = [], - feedback = fun silent/1, cwd = CWD}, - R = get_z_files(Files, Z, In0, ZipOpts, []), - {ok, R}; -do_openzip_get(_) -> - throw(einval). - -%% retrieve the crc32 checksum from an open archive -openzip_get_crc32(FileName, #openzip{files = Files}) -> - case file_name_search(FileName, Files) of - {_,#zip_file_extra{crc32=CRC}} -> {ok, CRC}; - _ -> throw(file_not_found) - end. - -%% retrieve a file from an open archive --doc false. -openzip_get(FileName, OpenZip) -> - case ?CATCH(do_openzip_get(FileName, OpenZip)) of - {ok, Result} -> {ok, Result}; - Error -> {error, Error} - end. - -do_openzip_get(F, #openzip{files = Files, in = In0, input = Input, - output = Output, zlib = Z, cwd = CWD}) -> - %%case lists:keysearch(F, #zip_file.name, Files) of - case file_name_search(F, Files) of - {#zip_file{offset = Offset},_}=ZFile -> - In1 = Input({seek, bof, Offset}, In0), - case get_z_file(In1, Z, Input, Output, [], fun silent/1, - CWD, ZFile, fun all/1) of - {file, R, _In2} -> {ok, R}; - _ -> throw(file_not_found) - end; - _ -> throw(file_not_found) - end; -do_openzip_get(_, _) -> - throw(einval). - -file_name_search(Name,Files) -> - Fun = fun({ZipFile,_}) -> - not string:equal(ZipFile#zip_file.name, Name, - _IgnoreCase = false, _Norm = nfc) - end, - case lists:dropwhile(Fun, Files) of - [ZFile|_] -> ZFile; - [] -> false - end. - -%% %% add a file to an open archive -%% openzip_add(File, OpenZip) -> -%% case ?CATCH do_openzip_add(File, OpenZip) of -%% {ok, Result} -> {ok, Result}; -%% Error -> {error, Error} -%% end. - -%% do_openzip_add(File, #open_zip{files = Files, in = In0, -%% opts = Opts} = OpenZip0) -> -%% throw(nyi), -%% Z = zlib:open(), -%% R = get_z_files(Files, In0, Z, Opts, []), -%% zlib:close(Z), -%% {ok, R}; -%% do_openzip_add(_, _) -> -%% throw(einval). - -%% get file list from open archive --doc false. -openzip_list_dir(#openzip{zip_comment = Comment, - files = Files}) -> - {ZipFiles,_Extras} = lists:unzip(Files), - {ok, [#zip_comment{comment = Comment} | ZipFiles]}; -openzip_list_dir(_) -> - {error, einval}. - --doc false. -openzip_list_dir(#openzip{files = Files}, [names_only]) -> - {ZipFiles,_Extras} = lists:unzip(Files), - Names = [Name || {#zip_file{name=Name},_} <- ZipFiles], - {ok, Names}; -openzip_list_dir(_, _) -> - {error, einval}. - -%% close an open archive --doc false. -openzip_close(#openzip{in = In0, input = Input, zlib = Z}) -> - Input(close, In0), - zlib:close(Z); -openzip_close(_) -> - {error, einval}. - %% Extract from a zip archive with options %% %% Accepted options: @@ -454,6 +370,15 @@ Options: with option `memory` specified, which means that no files are overwritten, existing files are excluded from the result. +- **`skip_directories`** - By default empty directories within zip archives are + extracted. With option `skip_directories` set, empty directories are no longer + created. + +- **`{extra, Extras}`** - The zip "extra" features to respect. The supported + "extra" features are "extended timestamps" and "UID and GID" handling. + By default only "extended timestamps" is enabled when unzipping. + See `t:extra/0` for more details. + - **`verbose`** - Prints an informational message for each extracted file. - **`memory`** - Instead of extracting to the current directory, the result is @@ -469,7 +394,8 @@ Options: Options :: [Option], Option :: {file_list, FileList} | cooked | keep_old_files | verbose | memory | - {file_filter, FileFilter} | {cwd, CWD}, + {file_filter, FileFilter} | {cwd, CWD} | + {extra, extra()}, FileList :: [file:name()], FileBinList :: [{file:name(),binary()}], FileFilter :: fun((ZipFile) -> boolean()), @@ -488,10 +414,11 @@ unzip(F, Options) -> do_unzip(F, Options) -> Opts = get_unzip_options(F, Options), - #unzip_opts{input = Input, open_opts = OpO} = Opts, + #unzip_opts{input = Input, open_opts = OpO, + extra = ExtraOpts} = Opts, In0 = Input({open, F, OpO -- [write]}, []), RawIterator = fun raw_file_info_etc/5, - {Info, In1} = get_central_dir(In0, RawIterator, Input), + {Info, In1} = get_central_dir(In0, RawIterator, Input, ExtraOpts), %% get rid of zip-comment Z = zlib:open(), Files = try @@ -504,7 +431,7 @@ do_unzip(F, Options) -> %% Iterate over all files in a zip archive -doc """ -Calls `Fun(FileInArchive, GetInfo , GetBin, AccIn)` on successive files in the +Calls `Fun(FileInArchive, GetInfo, GetBin, AccIn)` on successive files in the `Archive`, starting with `AccIn == Acc0`. `FileInArchive` is the name that the file has in the archive. @@ -565,15 +492,31 @@ _Example:_ Archive :: file:name() | {file:name(), binary()}, Reason :: term()). +foldl(Fun, Acc0, {_Filename, Binary}) -> + foldl(Fun, Acc0, Binary); foldl(Fun, Acc0, Archive) when is_function(Fun, 4) -> - ZipFun = - fun({Name, GetInfo, GetBin}, A) -> - A2 = Fun(Name, GetInfo, GetBin, A), - {true, false, A2} - end, - case prim_zip:open(ZipFun, Acc0, Archive) of - {ok, PrimZip, Acc1} -> - ok = prim_zip:close(PrimZip), + case zip_open(Archive,[memory]) of + {ok, Handle} -> + {ok, Files} = zip_list_dir(Handle), + Acc1 = + lists:foldl( + fun(#zip_comment{}, Acc) -> + Acc; + (#zip_file{ name = Name, info = Info }, Acc) -> + GetInfo = fun() -> Info end, + GetBin = case lists:last(Name) of + $/ -> fun() -> <<>> end; + _ -> + fun() -> + case zip_get(Name, Handle) of + {ok, {Name, Data}} -> Data; + {error, Error} -> throw({Name, Error}) + end + end + end, + Fun(Name, GetInfo, GetBin, Acc) + end, Acc0, Files), + ok = zip_close(Handle), {ok, Acc1}; {error, bad_eocd} -> {error, "Not an archive file"}; @@ -649,6 +592,11 @@ Options: zip archive (acting like `file:set_cwd/1` in Kernel, but without changing the global `cwd` property.). +- **`{extra, Extras}`** - The zip "extra" features to respect. The supported + "extra" features are "extended timestamps" and "UID and GID" handling. + By default both these "extra" features are enabled. + See `t:extra/0` for more details. + - **`{compress, What}`** - Controls what types of files to be compressed. Defaults to `all`. The following values of `What` are allowed: @@ -707,7 +655,7 @@ do_zip(F, Files, Options) -> {ok, Out3} catch C:R:Stk -> - zlib:close(Z), + ?CATCH(zlib:close(Z)), Output({close, F}, Out0), erlang:raise(C, R, Stk) end. @@ -738,13 +686,22 @@ One option is available: which is faster but does not allow a remote (Erlang) file server to be used. Adding `cooked` to the mode list overrides the default and opens the zip file without option `raw`. + +- **`skip_directories`** - By default empty directories within zip archives are + listed. With option `skip_directories` set, empty directories are no longer + listed. + +- **`{extra, Extras}`** - The zip "extra" features to respect. The supported + "extra" features are "extended timestamps" and "UID and GID" handling. + By default only "extended timestamps" is enabled when listing files. + See `t:extra/0` for more details. """. -spec(list_dir(Archive, Options) -> RetValue when Archive :: file:name() | binary(), RetValue :: {ok, CommentAndFiles} | {error, Reason :: term()}, CommentAndFiles :: [zip_comment() | zip_file()], Options :: [Option], - Option :: cooked). + Option :: cooked | {extra, extra()}). list_dir(F, Options) -> case ?CATCH(do_list_dir(F, Options)) of @@ -755,11 +712,126 @@ list_dir(F, Options) -> do_list_dir(F, Options) -> Opts = get_list_dir_options(F, Options), #list_dir_opts{input = Input, open_opts = OpO, - raw_iterator = RawIterator} = Opts, + raw_iterator = RawIterator, + skip_dirs = SkipDirs, + extra = ExtraOpts} = Opts, In0 = Input({open, F, OpO}, []), - {Info, In1} = get_central_dir(In0, RawIterator, Input), + {Info, In1} = get_central_dir(In0, RawIterator, Input, ExtraOpts), Input(close, In1), - {ok, Info}. + if SkipDirs -> + {ok, + lists:filter( + fun(#zip_file{ name = Name }) -> + lists:last(Name) =/= $/; + (#zip_comment{}) -> + true + end, Info)}; + true -> + {ok, Info} + end. + +-doc(#{equiv => zip_open/2}). +-spec(zip_open(Archive) -> {ok, ZipHandle} | {error, Reason} when + Archive :: file:name() | binary(), + ZipHandle :: handle(), + Reason :: term()). + +zip_open(Archive) -> zip_open(Archive, []). + +-doc """ +Opens a zip archive, and reads and saves its directory. This means that later +reading files from the archive is faster than unzipping files one at a time with +[`unzip/1,2`](`unzip/1`). + +The options are equivalent to those in `unzip/2`. + +The archive must be closed with `zip_close/1`. + +The `ZipHandle` is closed if the process that originally opened the archive +dies. +""". +-spec(zip_open(Archive, Options) -> {ok, ZipHandle} | {error, Reason} when + Archive :: file:name() | binary(), + ZipHandle :: handle(), + Options :: [Option], + Option :: cooked | memory | {cwd, CWD :: file:filename()} | {extra, extra()}, + Reason :: term()). + +zip_open(Archive, Options) -> + Self = self(), + Pid = spawn_link(fun() -> server_init(Self) end), + request(Self, Pid, {open, Archive, Options}). + +-doc(#{equiv => zip_get/2}). +-spec(zip_get(ZipHandle) -> {ok, [Result]} | {error, Reason} when + ZipHandle :: handle(), + Result :: file:name() | {file:name(), binary()}, + Reason :: term()). + +zip_get(Pid) when is_pid(Pid) -> + request(self(), Pid, get). + +-doc """ +Closes a zip archive, previously opened with [`zip_open/1,2`](`zip_open/1`). All +resources are closed, and the handle is not to be used after closing. +""". +-spec(zip_close(ZipHandle) -> ok | {error, einval} when + ZipHandle :: handle()). + +zip_close(Pid) when is_pid(Pid) -> + request(self(), Pid, close). + +-doc """ +Extracts one or all files from an open archive. + +The files are unzipped to memory or to file, depending on the options specified +to function [`zip_open/1,2`](`zip_open/1`) when opening the archive. +""". +-spec(zip_get(FileName, ZipHandle) -> {ok, Result} | {error, Reason} when + FileName :: file:name(), + ZipHandle :: handle(), + Result :: file:name() | {file:name(), binary()}, + Reason :: term()). + +zip_get(FileName, Pid) when is_pid(Pid) -> + request(self(), Pid, {get, FileName}). + +-doc "Extracts one crc32 checksum from an open archive.". +-doc(#{since => <<"OTP 26.0">>}). +-spec(zip_get_crc32(FileName, ZipHandle) -> {ok, CRC} | {error, Reason} when + FileName :: file:name(), + ZipHandle :: handle(), + CRC :: non_neg_integer(), + Reason :: term()). + +zip_get_crc32(FileName, Pid) when is_pid(Pid) -> + request(self(), Pid, {get_crc32, FileName}). + +-doc """ +Returns the file list of an open zip archive. The first returned element is the +zip archive comment. +""". +-spec(zip_list_dir(ZipHandle) -> {ok, Result} | {error, Reason} when + Result :: [zip_comment() | zip_file()], + ZipHandle :: handle(), + Reason :: term()). + +zip_list_dir(Pid) when is_pid(Pid) -> + request(self(), Pid, list_dir). + +request(Self, Pid, Req) -> + Pid ! {Self, Req}, + receive + {Pid, R} -> R + end. + +zip_t(Pid) when is_pid(Pid) -> + Openzip = request(self(), Pid, get_state), + openzip_t(Openzip). + +zip_tt(Pid) when is_pid(Pid) -> + Openzip = request(self(), Pid, get_state), + openzip_tt(Openzip). %% Print zip directory in short form @@ -772,7 +844,6 @@ to `tar t`.) ZipHandle :: handle()). t(F) when is_pid(F) -> zip_t(F); -t(F) when is_record(F, openzip) -> openzip_t(F); t(F) -> t(F, fun raw_short_print_info_etc/5). t(F, RawPrint) -> @@ -785,7 +856,7 @@ do_t(F, RawPrint) -> Input = get_input(F), OpO = [raw], In0 = Input({open, F, OpO}, []), - {_Info, In1} = get_central_dir(In0, RawPrint, Input), + {_Info, In1} = get_central_dir(In0, RawPrint, Input, ?EXTRA_OPTIONS), Input(close, In1), ok. @@ -800,7 +871,6 @@ the Erlang shell. (Similar to `tar tv`.) ZipHandle :: handle()). tt(F) when is_pid(F) -> zip_tt(F); -tt(F) when is_record(F, openzip) -> openzip_tt(F); tt(F) -> t(F, fun raw_long_print_info_etc/5). @@ -827,6 +897,15 @@ get_unzip_opt([keep_old_files | Rest], Opts) -> Keep = fun keep_old_file/1, Filter = fun_and_1(Keep, Opts#unzip_opts.file_filter), get_unzip_opt(Rest, Opts#unzip_opts{file_filter = Filter}); +get_unzip_opt([skip_directories | Rest], Opts) -> + get_unzip_opt(Rest, Opts#unzip_opts{skip_dirs = true}); +get_unzip_opt([{extra, What} = O| Rest], Opts) when is_list(What) -> + case lists:all(fun(E) -> lists:member(E, ?EXTRA_OPTIONS) end, What) of + true -> + get_zip_opt(Rest, Opts#unzip_opts{extra = What}); + false -> + throw({bad_option, O}) + end; get_unzip_opt([Unknown | _Rest], _Opts) -> throw({bad_option, Unknown}). @@ -837,6 +916,15 @@ get_list_dir_opt([cooked | Rest], #list_dir_opts{open_opts = OpO} = Opts) -> get_list_dir_opt([names_only | Rest], Opts) -> get_list_dir_opt(Rest, Opts#list_dir_opts{ raw_iterator = fun(A, B, C, D, E) -> raw_name_only(A, B, C, D, E) end}); +get_list_dir_opt([skip_directories | Rest], Opts) -> + get_list_dir_opt(Rest, Opts#list_dir_opts{skip_dirs = true}); +get_list_dir_opt([{extra, What} = O| Rest], Opts) when is_list(What) -> + case lists:all(fun(E) -> lists:member(E, ?EXTRA_OPTIONS) end, What) of + true -> + get_zip_opt(Rest, Opts#list_dir_opts{extra = What}); + false -> + throw({bad_option, O}) + end; %% get_list_dir_opt([{file_output, F} | Rest], Opts) -> %% get_list_dir_opt(Rest, Opts#list_dir_opts{file_output = F}); %% get_list_dir_opt([{file_filter, F} | Rest], Opts) -> @@ -886,6 +974,13 @@ get_zip_opt([{uncompress, Which} = O| Rest], Opts) -> throw({bad_option, O}) end, get_zip_opt(Rest, Opts#zip_opts{uncompress = Which2}); +get_zip_opt([{extra, What} = O| Rest], Opts) when is_list(What) -> + case lists:all(fun(E) -> lists:member(E, ?EXTRA_OPTIONS) end, What) of + true -> + get_zip_opt(Rest, Opts#zip_opts{extra = What}); + false -> + throw({bad_option, O}) + end; get_zip_opt([Unknown | _Rest], _Opts) -> throw({bad_option, Unknown}). @@ -926,7 +1021,8 @@ get_zip_options(Files, Options) -> feedback = fun silent/1, cwd = "", compress = all, - uncompress = Suffixes + uncompress = Suffixes, + extra = ?EXTRA_OPTIONS }, Opts1 = #zip_opts{comment = Comment} = get_zip_opt(Options, Opts), %% UTF-8 encode characters in the interval from 127 to 255. @@ -939,14 +1035,18 @@ get_unzip_options(F, Options) -> input = get_input(F), open_opts = [raw], feedback = fun silent/1, - cwd = "" + skip_dirs = false, + cwd = "", + extra = [extended_timestamp] }, get_unzip_opt(Options, Opts). get_openzip_options(Options) -> Opts = #openzip_opts{open_opts = [raw, read], output = fun file_io/2, - cwd = ""}, + cwd = "", + skip_dirs = false, + extra = ?EXTRA_OPTIONS}, get_openzip_opt(Options, Opts). get_input(F) when is_binary(F) -> @@ -974,7 +1074,9 @@ get_zip_input(_) -> get_list_dir_options(F, Options) -> Opts = #list_dir_opts{raw_iterator = fun raw_file_info_public/5, input = get_input(F), - open_opts = [raw]}, + open_opts = [raw], + skip_dirs = false, + extra = [extended_timestamp]}, get_list_dir_opt(Options, Opts). %% aliases for erl_tar compatibility @@ -1062,25 +1164,33 @@ extract(F, O) -> unzip(F, O). %% put the central directory, at the end of the zip archive put_central_dir(LHS, Pos, Out0, - #zip_opts{output = Output, comment = Comment}) -> - {Out1, Sz} = put_cd_files_loop(LHS, Output, Out0, 0), + #zip_opts{output = Output, comment = Comment, extra = ExtraOpts}) -> + {Out1, Sz} = put_cd_files_loop(LHS, Output, ExtraOpts, Out0, 0), put_eocd(length(LHS), Pos, Sz, Comment, Output, Out1). -put_cd_files_loop([], _Output, Out, Sz) -> +put_cd_files_loop([], _Output, _ExtraOpts, Out, Sz) -> {Out, Sz}; -put_cd_files_loop([{LH, Name, Pos} | LHRest], Output, Out0, Sz0) -> - CDFH = cd_file_header_from_lh_and_pos(LH, Pos), +put_cd_files_loop([{LH, Name, Pos} | LHRest], Output, ExtraOpts, Out0, Sz0) -> + Extra = cd_file_header_extra_from_lh_and_pos(LH, Pos, ExtraOpts), + CDFH = cd_file_header_from_lh_pos_and_extra(LH, Pos, Extra), BCDFH = cd_file_header_to_bin(CDFH), - B = [<>, BCDFH, Name], + B = [<>, BCDFH, Name, Extra], Out1 = Output({write, B}, Out0), Sz1 = Sz0 + ?CENTRAL_FILE_HEADER_SZ + - LH#local_file_header.file_name_length, - put_cd_files_loop(LHRest, Output, Out1, Sz1). + CDFH#cd_file_header.file_name_length + CDFH#cd_file_header.extra_field_length, + put_cd_files_loop(LHRest, Output, ExtraOpts, Out1, Sz1). + +cd_file_header_extra_from_lh_and_pos( + #local_file_header{ comp_size = CompSize, + uncomp_size = UnCompSize, + info = FI }, Pos, ExtraOpts) -> + encode_extra(UnCompSize, CompSize, Pos, + FI#file_info{ atime = undefined }, ExtraOpts). %% put end marker of central directory, the last record in the archive -put_eocd(N, Pos, Sz, Comment, Output, Out0) -> - %% BComment = list_to_binary(Comment), - CommentSz = length(Comment), % size(BComment), +put_eocd(N, Pos, Sz, Comment, Output, Out0) when + Pos < ?MAX_INT32, N < ?MAX_INT16, Sz < ?MAX_INT32 -> + CommentSz = length(Comment), EOCD = #eocd{disk_num = 0, start_disk_num = 0, entries_on_disk = N, @@ -1089,8 +1199,37 @@ put_eocd(N, Pos, Sz, Comment, Output, Out0) -> offset = Pos, zip_comment_length = CommentSz}, BEOCD = eocd_to_bin(EOCD), - B = [<>, BEOCD, Comment], % BComment], - Output({write, B}, Out0). + B = [<>, BEOCD, Comment], + Output({write, B}, Out0); +put_eocd(N, Pos, Sz, Comment, Output, Out0) -> + %% Zip64 eocd + EOCD64 = #eocd{os_made_by = ?OS_MADE_BY_UNIX, + version_made_by = ?VERSION_MADE_BY, + extract_version = ?VERSION_NEEDED_ZIP64, + disk_num = 0, + start_disk_num = 0, + entries_on_disk = N, + entries = N, + size = Sz, + offset = Pos, + extra = <<>> }, + BEOCD64 = eocd64_to_bin(EOCD64), + B = [<>, BEOCD64], + Out1 = Output({write, B}, Out0), + Out2 = Output({write, <> %% Total disks + }, Out1), + CommentSz = length(Comment), + EOCD = #eocd{disk_num = 0, + start_disk_num = 0, + entries_on_disk = min(N,?MAX_INT16), + entries = min(N,?MAX_INT16), + size = min(Sz,?MAX_INT32), + offset = min(Pos, ?MAX_INT32), + zip_comment_length = CommentSz}, + Output({write, [<>, eocd_to_bin(EOCD), Comment]}, Out2). get_filename({Name, _}, Type) -> get_filename(Name, Type); @@ -1125,13 +1264,16 @@ get_comp_method(F, _, #zip_opts{compress = Compress, uncompress = Uncompress}, _ end. put_z_files([], _Z, Out, Pos, _Opts, Acc) -> - {Out, lists:reverse(Acc, []), Pos}; + {Out, lists:reverse(Acc), Pos}; put_z_files([F | Rest], Z, Out0, Pos0, #zip_opts{input = Input, output = Output, open_opts = OpO, - feedback = FB, cwd = CWD} = Opts, Acc) -> + feedback = FB, cwd = CWD, extra = ExtraOpts} = Opts, Acc) -> + + %% {Pos0, _} = Output({position, cur, 0}, Out0), %% Assert correct Pos0 + In0 = [], F1 = add_cwd(CWD, F), - FileInfo = Input({file_info, F1}, In0), + FileInfo = Input({file_info, F1, [{time, posix}]}, In0), Type = FileInfo#file_info.type, UncompSize = case Type of @@ -1142,38 +1284,108 @@ put_z_files([F | Rest], Z, Out0, Pos0, %% UTF-8 encode characters in the interval from 127 to 255. {FileName, GPFlag} = encode_string(FileName0), CompMethod = get_comp_method(FileName, UncompSize, Opts, Type), - LH = local_file_header_from_info_method_name(FileInfo, UncompSize, CompMethod, FileName, GPFlag), + + %% Add any extra data needed and patch + Extra = encode_extra(UncompSize, FileInfo, ExtraOpts), + + LH = local_file_header_from_info_method_name(FileInfo, UncompSize, CompMethod, + FileName, GPFlag, Extra), BLH = local_file_header_to_bin(LH), B = [<>, BLH], Out1 = Output({write, B}, Out0), Out2 = Output({write, FileName}, Out1), - {Out3, CompSize, CRC} = put_z_file(CompMethod, UncompSize, Out2, F1, + + %% Start of extra data + Pos1 = Pos0 + ?LOCAL_FILE_HEADER_SZ + LH#local_file_header.file_name_length, + + Out3 = Output({write, Extra}, Out2), + + {Out4, CompSize, CRC} = put_z_file(CompMethod, UncompSize, Out3, F1, 0, Input, Output, OpO, Z, Type), + + Pos2 = Pos1 + LH#local_file_header.extra_field_length + CompSize, FB(FileName0), - Patch = <>, - Out4 = Output({pwrite, Pos0 + ?LOCAL_FILE_HEADER_CRC32_OFFSET, Patch}, Out3), - Out5 = Output({seek, eof, 0}, Out4), - Pos1 = Pos0 + ?LOCAL_FILE_HEADER_SZ + LH#local_file_header.file_name_length, - Pos2 = Pos1 + CompSize, - LH2 = LH#local_file_header{comp_size = CompSize, crc32 = CRC}, + + %% Patch the CRC + Patch = <>, + Out5 = Output({pwrite, Pos0 + ?LOCAL_FILE_HEADER_CRC32_OFFSET, Patch}, Out4), + + Out6 = + %% If UncompSize > 4GB we always put the CompSize in the extra field + if UncompSize >= ?MAX_INT32 -> + %% 4 bytes for extra header + size and 8 bytes for UnComp:64 + Output({pwrite, Pos1 + 2 + 2 + 8, <>}, Out5); + true -> + %% Patch comp size if not zip64 + Output({pwrite, Pos0 + ?LOCAL_FILE_HEADER_CRC32_OFFSET + 4, <>}, Out5) + end, + + Out7 = Output({seek, eof, 0}, Out6), + + %% {Pos2, _} = Output({position, cur, 0}, Out7), %% Assert correct Pos2 + + LH2 = LH#local_file_header{uncomp_size = UncompSize, comp_size = CompSize, crc32 = CRC}, ThisAcc = [{LH2, FileName, Pos0}], - {Out6, SubAcc, Pos3} = + {Out8, SubAcc, Pos3} = case Type of regular -> - {Out5, ThisAcc, Pos2}; + {Out7, ThisAcc, Pos2}; directory -> Files = Input({list_dir, F1}, []), RevFiles = reverse_join_files(F, Files, []), - put_z_files(RevFiles, Z, Out5, Pos2, Opts, ThisAcc) + put_z_files(RevFiles, Z, Out7, Pos2, Opts, ThisAcc) end, Acc2 = lists:reverse(SubAcc) ++ Acc, - put_z_files(Rest, Z, Out6, Pos3, Opts, Acc2). + put_z_files(Rest, Z, Out8, Pos3, Opts, Acc2). reverse_join_files(Dir, [File | Files], Acc) -> reverse_join_files(Dir, Files, [filename:join([Dir, File]) | Acc]); reverse_join_files(_Dir, [], Acc) -> Acc. +encode_extra(UnCompSize, FileInfo, ExtraOpts) -> + encode_extra(UnCompSize, 0, 0, FileInfo, ExtraOpts). +encode_extra(UnCompSize, CompSize, Pos, FileInfo, ExtraOpts) -> + %% zip64 needs to be first so that we can patch the CompSize + [encode_extra_zip64(UnCompSize, CompSize, Pos), + [encode_extra_extended_timestamp(FileInfo) || lists:member(extended_timestamp, ExtraOpts)], + [encode_extra_uid_gid(FileInfo) || lists:member(uid_gid, ExtraOpts)]]. + +encode_extra_header(Header, Value) -> + [<>, Value]. + +encode_extra_zip64(UncompSize, CompSize, Pos) when UncompSize >= ?MAX_INT32 -> + encode_extra_header(?X0001_ZIP64, [<>, + [<> || Pos >= ?MAX_INT32]]); +encode_extra_zip64(_UncompSize, _CompSize, Pos) when Pos >= ?MAX_INT32 -> + encode_extra_header(?X0001_ZIP64, <>); +encode_extra_zip64(_, _, _) -> + <<>>. + +encode_extra_extended_timestamp(FI) -> + {Mbit, MSystemTime} = + case datetime_to_system_time(FI#file_info.mtime) of + undefined -> {0, <<>>}; + Mtime -> + {1, <<(datetime_to_system_time(Mtime)):32/little>>} + end, + + {Abit, ASystemTime} = + case datetime_to_system_time(FI#file_info.atime) of + undefined -> {0, <<>>}; + Atime -> + {2, <<(datetime_to_system_time(Atime)):32/little>>} + end, + + encode_extra_header(?X5455_EXTENDED_TIMESTAMP, [Abit bor Mbit, MSystemTime, ASystemTime]). + +encode_extra_uid_gid(#file_info{ uid = Uid, gid = Gid }) + when Uid =/= undefined, Gid =/= undefined -> + encode_extra_header(?X7875_UNIX3,<<1, 4, Uid:32/little, + 4, Gid:32/little>>); +encode_extra_uid_gid(_) -> + <<>>. + %% flag for zlib -define(MAX_WBITS, 15). @@ -1185,43 +1397,44 @@ put_z_file(_Method, 0, Out, _F, Pos, _Input, _Output, _OpO, _Z, regular) -> put_z_file(?STORED, UncompSize, Out0, F, Pos0, Input, Output, OpO, _Z, regular) -> In0 = [], In1 = Input({open, F, OpO -- [write]}, In0), - {Data, In2} = Input({read, UncompSize}, In1), - Out1 = Output({write, Data}, Out0), - CRC = erlang:crc32(Data), + CRC0 = 0, + {Out1, Pos1, In2, CRC} = + put_z_data_loop(UncompSize, In1, Out0, Pos0, Input, Output, CRC0, fun(Data, _Sync) -> Data end), Input(close, In2), - {Out1, Pos0+erlang:iolist_size(Data), CRC}; + {Out1, Pos1, CRC}; put_z_file(?DEFLATED, UncompSize, Out0, F, Pos0, Input, Output, OpO, Z, regular) -> In0 = [], In1 = Input({open, F, OpO -- [write]}, In0), ok = zlib:deflateInit(Z, default, deflated, -?MAX_WBITS, 8, default), CRC0 = 0, - {Out1, Pos1, CRC} = - put_z_data_loop(UncompSize, In1, Out0, Pos0, Input, Output, CRC0, Z), + {Out1, Pos1, In2, CRC} = + put_z_data_loop(UncompSize, In1, Out0, Pos0, Input, Output, CRC0, + fun(Data, Sync) -> zlib:deflate(Z, Data, Sync) end), ok = zlib:deflateEnd(Z), - Input(close, In1), + Input(close, In2), {Out1, Pos1, CRC}. -%% zlib is finished with the last chunk compressed -get_sync(N, N) -> finish; -get_sync(_, _) -> full. - %% compress data -put_z_data_loop(0, _In, Out, Pos, _Input, _Output, CRC0, _Z) -> - {Out, Pos, CRC0}; -put_z_data_loop(UncompSize, In0, Out0, Pos0, Input, Output, CRC0, Z) -> +put_z_data_loop(0, In, Out, Pos, _Input, _Output, CRC0, _DeflateFun) -> + {Out, Pos, In, CRC0}; +put_z_data_loop(UncompSize, In0, Out0, Pos0, Input, Output, CRC0, DeflateFun) -> N = erlang:min(?WRITE_BLOCK_SIZE, UncompSize), case Input({read, N}, In0) of {eof, _In1} -> {Out0, Pos0}; {Uncompressed, In1} -> CRC1 = erlang:crc32(CRC0, Uncompressed), - Compressed = zlib:deflate(Z, Uncompressed, get_sync(N, UncompSize)), + Compressed = DeflateFun(Uncompressed, get_sync(N, UncompSize)), Sz = erlang:iolist_size(Compressed), Out1 = Output({write, Compressed}, Out0), put_z_data_loop(UncompSize - N, In1, Out1, Pos0 + Sz, - Input, Output, CRC1, Z) + Input, Output, CRC1, DeflateFun) end. +%% zlib is finished with the last chunk compressed +get_sync(N, N) -> finish; +get_sync(_, _) -> full. + %% raw iterators over central dir %% name only @@ -1245,11 +1458,9 @@ print_file_name(FileName) -> %% for printing directory (tt/1) raw_long_print_info_etc(#cd_file_header{comp_size = CompSize, - uncomp_size = UncompSize, - last_mod_date = LMDate, - last_mod_time = LMTime}, + uncomp_size = UncompSize} = CDFH, FileName, FileComment, _BExtraField, Acc) -> - MTime = dos_date_time_to_datetime(LMDate, LMTime), + MTime = file_header_mtime_to_datetime(CDFH), print_header(CompSize, MTime, UncompSize, FileName, FileComment), Acc; raw_long_print_info_etc(EOCD, _, Comment, _, Acc) when is_record(EOCD, eocd) -> @@ -1257,7 +1468,7 @@ raw_long_print_info_etc(EOCD, _, Comment, _, Acc) when is_record(EOCD, eocd) -> Acc. print_header(CompSize, MTime, UncompSize, FileName, FileComment) -> - io:format("~8w ~s ~8w ~2w% ~ts ~ts\n", + io:format("~10w ~s ~10w ~3w% ~ts ~ts\n", [CompSize, time_to_string(MTime), UncompSize, get_percent(CompSize, UncompSize), FileName, FileComment]). @@ -1291,8 +1502,8 @@ month(11) -> "Nov"; month(12) -> "Dec". %% zip header functions -cd_file_header_from_lh_and_pos(LH, Pos) -> - #local_file_header{version_needed = VersionNeeded, +cd_file_header_from_lh_pos_and_extra(LH, Pos, Extra) -> + #local_file_header{version_needed = LHVersionNeeded, gp_flag = GPFlag, comp_method = CompMethod, last_mod_time = LastModTime, @@ -1301,31 +1512,49 @@ cd_file_header_from_lh_and_pos(LH, Pos) -> comp_size = CompSize, uncomp_size = UncompSize, file_name_length = FileNameLength, - extra_field_length = ExtraFieldLength, - type = Type} = LH, - #cd_file_header{version_made_by = ?VERSION_MADE_BY, + extra_field_length = _ExtraFieldLength, + info = #file_info{ type = Type, mode = Mode }} = LH, + + VersionNeeded = + if Pos >= ?MAX_INT32 -> + ?VERSION_NEEDED_ZIP64; + true -> + LHVersionNeeded + end, + + #cd_file_header{os_made_by = ?OS_MADE_BY_UNIX, + version_made_by = ?VERSION_MADE_BY, version_needed = VersionNeeded, gp_flag = GPFlag, comp_method = CompMethod, last_mod_time = LastModTime, last_mod_date = LastModDate, crc32 = CRC32, - comp_size = CompSize, - uncomp_size = UncompSize, + comp_size = + if UncompSize >= ?MAX_INT32 -> + ?MAX_INT32; + true -> + CompSize + end, + uncomp_size = min(UncompSize, ?MAX_INT32), file_name_length = FileNameLength, - extra_field_length = ExtraFieldLength, + extra_field_length = iolist_size(Extra), file_comment_length = 0, % FileCommentLength, disk_num_start = 0, % DiskNumStart, internal_attr = 0, % InternalAttr, external_attr = % ExternalAttr - case Type of - regular -> ?CENTRAL_REGULAR_FILE_EXT_ATTRIBUTES; - directory -> ?CENTRAL_DIRECTORY_FILE_EXT_ATTRIBUTES - end, - local_header_offset = Pos}. + if Mode =:= undefined -> + case Type of + regular -> ?DEFAULT_REGULAR_FILE_MODE; + directory -> ?DEFAULT_DIRECTORY_FILE_MODE + end; + true -> Mode band 8#777 + end bsl 16, + local_header_offset = min(Pos, ?MAX_INT32)}. cd_file_header_to_bin( - #cd_file_header{version_made_by = VersionMadeBy, + #cd_file_header{os_made_by = OsMadeBy, + version_made_by = VersionMadeBy, version_needed = VersionNeeded, gp_flag = GPFlag, comp_method = CompMethod, @@ -1341,7 +1570,7 @@ cd_file_header_to_bin( internal_attr = InternalAttr, external_attr = ExternalAttr, local_header_offset = LocalHeaderOffset}) -> - < <>. + StartDiskNum:16/little, + EntriesOnDisk:16/little, + Entries:16/little, + Size:32/little, + Offset:32/little, + ZipCommentLength:16/little>>. + +eocd64_to_bin( + #eocd{os_made_by = OsMadeBy, + version_made_by = VersionMadeBy, + extract_version = ExtractVersion, + disk_num = DiskNum, + start_disk_num = StartDiskNum, + entries_on_disk = EntriesOnDisk, + entries = Entries, + size = Size, + offset = Offset, + extra = Extra}) -> + <>. %% put together a local file header -local_file_header_from_info_method_name(#file_info{mtime = MTime, type = Type}, - UncompSize, - CompMethod, Name, GPFlag) -> - {ModDate, ModTime} = dos_date_time_from_datetime(MTime), - #local_file_header{version_needed = 20, +local_file_header_from_info_method_name(#file_info{mtime = MTime, + atime = ATime, + uid = Uid, + gid = Gid} = Info, + UncompSize, CompMethod, + Name, GPFlag, Extra ) -> + CreationTime = os:system_time(second), + {ModDate, ModTime} = dos_date_time_from_datetime( + calendar:system_time_to_local_time( + datetime_to_system_time(MTime), second)), + VersionNeeded = if UncompSize >= ?MAX_INT32 -> + ?VERSION_NEEDED_ZIP64; + true -> + case CompMethod of + ?STORED -> ?VERSION_NEEDED_STORE; + ?DEFLATED -> ?VERSION_NEEDED_DEFLATE + end + end, + #local_file_header{version_needed = VersionNeeded, gp_flag = GPFlag, comp_method = CompMethod, last_mod_time = ModTime, last_mod_date = ModDate, + mtime = datetime_to_system_time(MTime), + atime = datetime_to_system_time(ATime), + ctime = datetime_to_system_time(CreationTime), + uid = Uid, + gid = Gid, crc32 = -1, - comp_size = -1, - uncomp_size = UncompSize, + comp_size = ?MAX_INT32, + uncomp_size = min(UncompSize, ?MAX_INT32), file_name_length = length(Name), - extra_field_length = 0, - type = Type}. + extra_field_length = iolist_size(Extra), + info = Info}. + +%% +%% Functions used by zip server to work with archives. +%% +openzip_open(F, Options) -> + case ?CATCH(do_openzip_open(F, Options)) of + {ok, OpenZip} -> + {ok, OpenZip}; + Error -> + {error, Error} + end. + +do_openzip_open(F, Options) -> + Opts = get_openzip_options(Options), + #openzip_opts{output = Output, open_opts = OpO, cwd = CWD, + skip_dirs = SkipDirs, extra = ExtraOpts} = Opts, + Input = get_input(F), + In0 = Input({open, F, OpO -- [write]}, []), + {[#zip_comment{comment = C} | Files], In1} = + get_central_dir(In0, fun raw_file_info_etc/5, Input, ExtraOpts), + Z = zlib:open(), + {ok, #openzip{zip_comment = C, + files = Files, + in = In1, + input = Input, + output = Output, + zlib = Z, + cwd = CWD, + skip_dirs = SkipDirs, + extra = ExtraOpts}}. + +%% retrieve all files from an open archive +openzip_get(OpenZip) -> + case ?CATCH(do_openzip_get(OpenZip)) of + {ok, Result} -> {ok, Result}; + Error -> {error, Error} + end. + +do_openzip_get(#openzip{files = Files, in = In0, input = Input, + output = Output, zlib = Z, cwd = CWD, skip_dirs = SkipDirs, + extra = ExtraOpts}) -> + ZipOpts = #unzip_opts{output = Output, input = Input, + file_filter = fun all/1, open_opts = [], + feedback = fun silent/1, cwd = CWD, skip_dirs = SkipDirs, + extra = ExtraOpts}, + R = get_z_files(Files, Z, In0, ZipOpts, []), + {ok, R}; +do_openzip_get(_) -> + throw(einval). +%% retrieve the crc32 checksum from an open archive +openzip_get_crc32(FileName, #openzip{files = Files}) -> + case file_name_search(FileName, Files) of + {_,#zip_file_extra{crc32=CRC}} -> {ok, CRC}; + _ -> throw(file_not_found) + end. + +%% retrieve a file from an open archive +openzip_get(FileName, OpenZip) -> + case ?CATCH(do_openzip_get(FileName, OpenZip)) of + {ok, Result} -> {ok, Result}; + Error -> {error, Error} + end. + +do_openzip_get(F, #openzip{files = Files, in = In0, input = Input, + output = Output, zlib = Z, cwd = CWD, extra = ExtraOpts}) -> + %%case lists:keysearch(F, #zip_file.name, Files) of + case file_name_search(F, Files) of + {#zip_file{offset = Offset},_}=ZFile -> + In1 = Input({seek, bof, Offset}, In0), + case get_z_file(In1, Z, Input, Output, [], fun silent/1, + CWD, ZFile, fun all/1, false, ExtraOpts) of + {file, R, _In2} -> {ok, R}; + _ -> throw(file_not_found) + end; + _ -> throw(file_not_found) + end; +do_openzip_get(_, _) -> + throw(einval). + +file_name_search(Name,Files) -> + Fun = fun({ZipFile,_}) -> + not string:equal(ZipFile#zip_file.name, Name, + _IgnoreCase = false, _Norm = nfc) + end, + case lists:dropwhile(Fun, Files) of + [ZFile|_] -> ZFile; + [] -> false + end. + +%% get file list from open archive +openzip_list_dir(#openzip{zip_comment = Comment, + files = Files}) -> + {ZipFiles,_Extras} = lists:unzip(Files), + {ok, [#zip_comment{comment = Comment} | ZipFiles]}; +openzip_list_dir(_) -> + {error, einval}. + +%% close an open archive +openzip_close(#openzip{in = In0, input = Input, zlib = Z}) -> + Input(close, In0), + zlib:close(Z); +openzip_close(_) -> + {error, einval}. + +%% small, simple, stupid zip-archive server server_init(Parent) -> %% we want to know if our parent dies process_flag(trap_exit, true), server_loop(Parent, not_open). -%% small, simple, stupid zip-archive server server_loop(Parent, OpenZip) -> receive {From, {open, Archive, Options}} -> @@ -1442,9 +1815,6 @@ server_loop(Parent, OpenZip) -> {From, list_dir} -> From ! {self(), openzip_list_dir(OpenZip)}, server_loop(Parent, OpenZip); - {From, {list_dir, Opts}} -> - From ! {self(), openzip_list_dir(OpenZip, Opts)}, - server_loop(Parent, OpenZip); {From, get_state} -> From ! {self(), OpenZip}, server_loop(Parent, OpenZip); @@ -1455,118 +1825,6 @@ server_loop(Parent, OpenZip) -> {error, bad_msg} end. --doc(#{equiv => zip_open/2}). --spec(zip_open(Archive) -> {ok, ZipHandle} | {error, Reason} when - Archive :: file:name() | binary(), - ZipHandle :: handle(), - Reason :: term()). - -zip_open(Archive) -> zip_open(Archive, []). - --doc """ -Opens a zip archive, and reads and saves its directory. This means that later -reading files from the archive is faster than unzipping files one at a time with -[`unzip/1,2`](`unzip/1`). - -The archive must be closed with `zip_close/1`. - -The `ZipHandle` is closed if the process that originally opened the archive -dies. -""". --spec(zip_open(Archive, Options) -> {ok, ZipHandle} | {error, Reason} when - Archive :: file:name() | binary(), - ZipHandle :: handle(), - Options :: [Option], - Option :: cooked | memory | {cwd, CWD :: file:filename()}, - Reason :: term()). - -zip_open(Archive, Options) -> - Self = self(), - Pid = spawn_link(fun() -> server_init(Self) end), - request(Self, Pid, {open, Archive, Options}). - --doc(#{equiv => zip_get/2}). --spec(zip_get(ZipHandle) -> {ok, [Result]} | {error, Reason} when - ZipHandle :: handle(), - Result :: file:name() | {file:name(), binary()}, - Reason :: term()). - -zip_get(Pid) when is_pid(Pid) -> - request(self(), Pid, get). - --doc """ -Closes a zip archive, previously opened with [`zip_open/1,2`](`zip_open/1`). All -resources are closed, and the handle is not to be used after closing. -""". --spec(zip_close(ZipHandle) -> ok | {error, einval} when - ZipHandle :: handle()). - -zip_close(Pid) when is_pid(Pid) -> - request(self(), Pid, close). - --doc """ -Extracts one or all files from an open archive. - -The files are unzipped to memory or to file, depending on the options specified -to function [`zip_open/1,2`](`zip_open/1`) when opening the archive. -""". --spec(zip_get(FileName, ZipHandle) -> {ok, Result} | {error, Reason} when - FileName :: file:name(), - ZipHandle :: handle(), - Result :: file:name() | {file:name(), binary()}, - Reason :: term()). - -zip_get(FileName, Pid) when is_pid(Pid) -> - request(self(), Pid, {get, FileName}). - --doc "Extracts one crc32 checksum from an open archive.". --doc(#{since => <<"OTP 26.0">>}). --spec(zip_get_crc32(FileName, ZipHandle) -> {ok, CRC} | {error, Reason} when - FileName :: file:name(), - ZipHandle :: handle(), - CRC :: non_neg_integer(), - Reason :: term()). - -zip_get_crc32(FileName, Pid) when is_pid(Pid) -> - request(self(), Pid, {get_crc32, FileName}). - --doc """ -Returns the file list of an open zip archive. The first returned element is the -zip archive comment. -""". --spec(zip_list_dir(ZipHandle) -> {ok, Result} | {error, Reason} when - Result :: [zip_comment() | zip_file()], - ZipHandle :: handle(), - Reason :: term()). - -zip_list_dir(Pid) when is_pid(Pid) -> - request(self(), Pid, list_dir). - --doc false. -zip_list_dir(Pid, Opts) when is_pid(Pid) -> - request(self(), Pid, {list_dir, Opts}). - --doc false. -zip_get_state(Pid) when is_pid(Pid) -> - request(self(), Pid, get_state). - -request(Self, Pid, Req) -> - Pid ! {Self, Req}, - receive - {Pid, R} -> R - end. - --doc false. -zip_t(Pid) when is_pid(Pid) -> - Openzip = request(self(), Pid, get_state), - openzip_t(Openzip). - --doc false. -zip_tt(Pid) when is_pid(Pid) -> - Openzip = request(self(), Pid, get_state), - openzip_tt(Openzip). - --doc false. openzip_tt(#openzip{zip_comment = ZipComment, files = Files}) -> print_comment(ZipComment), lists_foreach(fun({#zip_file{comp_size = CompSize, @@ -1579,7 +1837,6 @@ openzip_tt(#openzip{zip_comment = ZipComment, files = Files}) -> end, Files), ok. --doc false. openzip_t(#openzip{zip_comment = ZipComment, files = Files}) -> print_comment(ZipComment), lists_foreach(fun({#zip_file{name = FileName},_}) -> @@ -1602,27 +1859,40 @@ get_openzip_opt([memory | Rest], Opts) -> get_openzip_opt(Rest, Opts#openzip_opts{output = fun binary_io/2}); get_openzip_opt([{cwd, CWD} | Rest], Opts) -> get_openzip_opt(Rest, Opts#openzip_opts{cwd = CWD}); +get_openzip_opt([skip_directories | Rest], Opts) -> + get_openzip_opt(Rest, Opts#openzip_opts{skip_dirs = true}); +get_openzip_opt([{extra, What} = O| Rest], Opts) when is_list(What) -> + case lists:all(fun(E) -> lists:member(E, ?EXTRA_OPTIONS) end, What) of + true -> + get_zip_opt(Rest, Opts#openzip_opts{extra = What}); + false -> + throw({bad_option, O}) + end; get_openzip_opt([Unknown | _Rest], _Opts) -> throw({bad_option, Unknown}). %% get the central directory from the archive -get_central_dir(In0, RawIterator, Input) -> - {B, In1} = get_end_of_central_dir(In0, ?END_OF_CENTRAL_DIR_SZ, Input), - {EOCD, BComment} = eocd_and_comment_from_bin(B), - In2 = Input({seek, bof, EOCD#eocd.offset}, In1), +get_central_dir(In0, RawIterator, Input, ExtraOpts) -> + {Size, In1} = Input({position, eof, 0}, In0), + {{EOCD, BComment}, In2} = + get_end_of_central_dir( + In1, ?END_OF_CENTRAL_DIR_SZ, + min(16#ffff + ?END_OF_CENTRAL_DIR_SZ + ?END_OF_CENTRAL_DIR_64_LOCATOR_SZ, Size), + Input), + EOCD#eocd.disk_num == 0 orelse throw(multiple_disks_not_supported), + In3 = Input({seek, bof, EOCD#eocd.offset}, In2), N = EOCD#eocd.entries, Acc0 = [], %% There is no encoding flag for the archive comment. Comment = heuristic_to_string(BComment), Out0 = RawIterator(EOCD, "", Comment, <<>>, Acc0), - get_cd_loop(N, In2, RawIterator, Input, Out0). + get_cd_loop(N, In3, RawIterator, Input, ExtraOpts, Out0). -get_cd_loop(0, In, _RawIterator, _Input, Acc) -> +get_cd_loop(0, In, _RawIterator, _Input, _ExtraOpts, Acc) -> {lists:reverse(Acc), In}; -get_cd_loop(N, In0, RawIterator, Input, Acc0) -> - {B, In1} = Input({read, ?CENTRAL_FILE_HEADER_SZ}, In0), - BCD = case B of - <> -> XBCD; +get_cd_loop(N, In0, RawIterator, Input, ExtraOpts, Acc0) -> + {BCD, In1} = case Input({read, ?CENTRAL_FILE_HEADER_SZ}, In0) of + {<>, In} -> {XBCD, In}; _ -> throw(bad_central_directory) end, CD = cd_file_header_from_bin(BCD), @@ -1632,17 +1902,85 @@ get_cd_loop(N, In0, RawIterator, Input, Acc0) -> ToRead = FileNameLen + ExtraLen + CommentLen, GPFlag = CD#cd_file_header.gp_flag, {B2, In2} = Input({read, ToRead}, In1), - {FileName, Comment, BExtra} = - get_name_extra_comment(B2, FileNameLen, ExtraLen, CommentLen, GPFlag), - Acc1 = RawIterator(CD, FileName, Comment, BExtra, Acc0), - get_cd_loop(N-1, In2, RawIterator, Input, Acc1). - -get_name_extra_comment(B, FileNameLen, ExtraLen, CommentLen, GPFlag) -> + {FileName, BExtra, Comment} = + get_filename_extra_comment(B2, FileNameLen, ExtraLen, CommentLen, GPFlag), + + ExtraCD = + update_extra_fields(CD, BExtra, ExtraOpts), + + Acc1 = RawIterator(ExtraCD, FileName, Comment, BExtra, Acc0), + get_cd_loop(N-1, In2, RawIterator, Input, ExtraOpts, Acc1). + +%% We parse and apply some extra fields defined by Info-ZIP. For details see: +%% proginfo/extrafld.txt in unzip. https://fossies.org/linux/unzip/proginfo/extrafld.txt +-spec update_extra_fields(#local_file_header{} | #cd_file_header{}, binary(), extra()) -> + #local_file_header{} | #cd_file_header{}. +update_extra_fields(FileHeader, BExtra, ExtraOpts) -> + %% We depend on some fields in the records to be at the same position + #local_file_header.comp_size = #cd_file_header.comp_size, + #local_file_header.uncomp_size = #cd_file_header.uncomp_size, + #local_file_header.mtime = #cd_file_header.mtime, + #local_file_header.atime = #cd_file_header.atime, + #local_file_header.ctime = #cd_file_header.ctime, + #local_file_header.uid = #cd_file_header.uid, + #local_file_header.gid = #cd_file_header.gid, + + ExtendedTimestamp = lists:member(extended_timestamp, ExtraOpts), + UidGid = lists:member(uid_gid, ExtraOpts), + + lists:foldl( + fun({?X0001_ZIP64, Data}, Acc) -> + update_zip64(Acc, Data); + ({?X5455_EXTENDED_TIMESTAMP, Data}, Acc) when ExtendedTimestamp -> + update_extended_timestamp(Acc, Data); + ({?X7875_UNIX3, Data}, Acc) when UidGid -> + update_unix3(Acc, Data); + (_, Acc) -> + Acc + end, FileHeader, parse_extra(BExtra)). + +update_zip64(FH, <>) when element(#cd_file_header.uncomp_size, FH) == ?MAX_INT32 -> + update_zip64(setelement(#cd_file_header.uncomp_size, FH, UnComp), Rest); +update_zip64(FH, <>) when element(#cd_file_header.comp_size, FH) == ?MAX_INT32 -> + update_zip64(setelement(#cd_file_header.comp_size, FH, Comp), Rest); +update_zip64(FH, <>) when element(#cd_file_header.local_header_offset, FH) == ?MAX_INT32 -> + update_zip64(setelement(#cd_file_header.local_header_offset, FH, LocalHeaderOffset), Rest); +update_zip64(FH, <>) when element(#cd_file_header.disk_num_start, FH) == ?MAX_INT32 -> + update_zip64(setelement(#cd_file_header.disk_num_start, FH, DiskNumStart), Rest); +update_zip64(FH, <<>>) -> + FH. + +update_extended_timestamp(FileHeader, <<_:5,HasCre:1,HasAcc:1,HasMod:1,Data/binary>> ) -> + {FHMod, DataMod} = update_extended_timestamp(FileHeader, HasMod, Data, #cd_file_header.mtime), + {FHAcc, DataAcc} = update_extended_timestamp(FHMod, HasAcc, DataMod, #cd_file_header.atime), + {FHCre, <<>>} = update_extended_timestamp(FHAcc, HasCre, DataAcc, #cd_file_header.ctime), + FHCre. + +update_extended_timestamp(FH, 1, <>, Field) -> + {setelement(Field, FH, Value), Rest}; +%% It seems like sometimes bits are set, but the data does not include any payload +update_extended_timestamp(FH, 1, <<>>, _Field) -> + {FH, <<>>}; +update_extended_timestamp(FH, 0, Data, _Field) -> + {FH, Data}. + +update_unix3(FH, <<1, UidSize, Uid:(UidSize*8)/little, GidSize, Gid:(GidSize*8)/little>>) -> + setelement(#cd_file_header.gid, setelement(#cd_file_header.uid, FH, Uid), Gid); +update_unix3(FH, <>) when Vsn =/= 1 -> + FH. + +parse_extra(<>) -> + [{Tag, Data} | parse_extra(Rest)]; +parse_extra(<<>>) -> + []. + +get_filename_extra_comment(B, FileNameLen, ExtraLen, CommentLen, GPFlag) -> try <> = B, {binary_to_chars(BFileName, GPFlag), + BExtra, %% Appendix D says: "If general purpose bit 11 is unset, the %% file name and comment should conform to the original ZIP %% character encoding." However, it seems that at least Linux @@ -1651,8 +1989,7 @@ get_name_extra_comment(B, FileNameLen, ExtraLen, CommentLen, GPFlag) -> %% binary_to_chars/1 could (should?) be called (it can fail), %% but the choice is to employ heuristics in this case too %% (it does not fail). - heuristic_to_string(BComment), - BExtra} + heuristic_to_string(BComment)} catch _:_ -> throw(bad_central_directory) @@ -1661,27 +1998,154 @@ get_name_extra_comment(B, FileNameLen, ExtraLen, CommentLen, GPFlag) -> %% get end record, containing the offset to the central directory %% the end record is always at the end of the file BUT alas it is %% of variable size (yes that's dumb!) -get_end_of_central_dir(_In, Sz, _Input) when Sz > 16#ffff -> - throw(bad_eocd); -get_end_of_central_dir(In0, Sz, Input) -> +get_end_of_central_dir(In0, Sz, MaxCentralDirSize, Input) -> In1 = Input({seek, eof, -Sz}, In0), {B, In2} = Input({read, Sz}, In1), - case find_eocd_header(B) of + case find_eocd(B) of + none when Sz =:= MaxCentralDirSize -> + throw(bad_eocd); none -> - get_end_of_central_dir(In2, Sz+Sz, Input); + get_end_of_central_dir(In2, min(Sz+Sz, MaxCentralDirSize), MaxCentralDirSize, Input); + {EOCD64Location, EOCD, Comment} -> + case find_eocd64(In2, EOCD64Location, EOCD, Comment, Input) of + none -> + throw(bad_eocd64); + {EOCD64, In3} -> + {EOCD64, In3} + end; Header -> {Header, In2} end. %% find the end record by matching for it -find_eocd_header(<>) -> - Rest; -find_eocd_header(<<_:8, Rest/binary>>) - when byte_size(Rest) > ?END_OF_CENTRAL_DIR_SZ-4 -> - find_eocd_header(Rest); -find_eocd_header(_) -> +%% The ?END_OF_CENTRAL_DIR_MAGIC could be in the comment, +%% so we need to match for the entire structure and make sure +%% the comment size consumes all of the binary. +find_eocd(<>) -> + if DiskNum =:= ?MAX_INT16; + StartDiskNum =:= ?MAX_INT16; + EntriesOnDisk =:= ?MAX_INT16, + Entries =:= ?MAX_INT16; + Size =:= ?MAX_INT32; + Offset =:= ?MAX_INT32 -> + {{EOCD64StartDiskNum, EOCD64Offset, EOCD64TotalDisk}, + #eocd{disk_num = DiskNum, + start_disk_num = StartDiskNum, + entries_on_disk = EntriesOnDisk, + entries = Entries, + size = Size, + offset = Offset, + zip_comment_length = ZipCommentLength}, + Comment}; + true -> + none + end; +find_eocd(<>) -> + if DiskNum =:= ?MAX_INT16; + StartDiskNum =:= ?MAX_INT16; + EntriesOnDisk =:= ?MAX_INT16; + Entries =:= ?MAX_INT16; + Size =:= ?MAX_INT32; + Offset =:= ?MAX_INT32 -> + %% There should be a eocd64 locator before this entry + none; + true -> + {#eocd{disk_num = DiskNum, + start_disk_num = StartDiskNum, + entries_on_disk = EntriesOnDisk, + entries = Entries, + size = Size, + offset = Offset, + zip_comment_length = ZipCommentLength}, + Comment} + end; +find_eocd(<<_:8, Rest/binary>>) when byte_size(Rest) > ?END_OF_CENTRAL_DIR_SZ-4 -> + find_eocd(Rest); +find_eocd(_) -> none. +find_eocd64(In0,{_EOCD64StartDiskNum, EOCD64Offset, _EOCD64TotalDisk}, EOCD, Comment, Input) -> + maybe + In1 = Input({seek, bof, EOCD64Offset}, In0), + + {<>, In2} + ?= Input({read, 4 + 8}, In1), + + {<>, In3} + ?= Input({read, EOCDSize}, In2), + + {{EOCD#eocd{ + eocd = EOCD, + version_made_by = VersionMadeBy, + os_made_by = os_id_to_atom(OsMadeBy), + extract_version = ExtractVersion, + disk_num = DiskNum, + start_disk_num = StartDiskNum, + entries_on_disk = EntriesOnDisk, + entries = Entries, + size = Size, + offset = Offset, + extra = parse_extra(Extra)}, Comment}, In3} + else + {eof, InEOF} -> + {eof, InEOF}; + _ -> + none + end. + + +%% Taken from APPNOTE.TXT version 6.3.10 section 4.4.2.2 +os_id_to_atom(0) -> ~"MS-DOS and OS/2"; +os_id_to_atom(1) -> ~"Amiga"; +os_id_to_atom(2) -> ~"OpenVMS"; +os_id_to_atom(3) -> ~"UNIX"; +os_id_to_atom(4) -> ~"VM/CMS"; +os_id_to_atom(5) -> ~"Atari ST"; +os_id_to_atom(6) -> ~"OS/2 H.P.F.S"; +os_id_to_atom(7) -> ~"Macintosh"; +os_id_to_atom(8) -> ~"Z-System"; +os_id_to_atom(9) -> ~"CP/M"; +os_id_to_atom(10) -> ~"Windows NTFS"; +os_id_to_atom(11) -> ~"MVS"; +os_id_to_atom(12) -> ~"VSE"; +os_id_to_atom(13) -> ~"Acorn Risc"; +os_id_to_atom(14) -> ~"VFAT"; +os_id_to_atom(15) -> ~"alternate MVS"; +os_id_to_atom(16) -> ~"BeOS"; +os_id_to_atom(17) -> ~"Tandem"; +os_id_to_atom(18) -> ~"OS/400"; +os_id_to_atom(19) -> ~"OS X (Darwin)"; +os_id_to_atom(No) -> No. + %% from a central directory record, filter and accumulate what we need %% with zip_file_extra @@ -1708,36 +2172,39 @@ raw_file_info_public(CD, FileName, FileComment, BExtraField, Acc0) -> %% make a file_info from a central directory header cd_file_header_to_file_info(FileName, - #cd_file_header{uncomp_size = UncompSize, - last_mod_time = ModTime, - last_mod_date = ModDate}, - ExtraField) -> - T = dos_date_time_to_datetime(ModDate, ModTime), + #cd_file_header{uncomp_size = UncompSize} = CDFH, + _ExtraField) -> + M = file_header_mtime_to_datetime(CDFH), + A = file_header_atime_to_datetime(CDFH), + C = file_header_ctime_to_datetime(CDFH), Type = case lists:last(FileName) of $/ -> directory; _ -> regular end, - FI = #file_info{size = UncompSize, - type = Type, - access = read_write, - atime = T, - mtime = T, - ctime = T, - mode = 8#066, - links = 1, - major_device = 0, - minor_device = 0, - inode = 0, - uid = 0, - gid = 0}, - add_extra_info(FI, ExtraField). - -%% Currently, we ignore all the extra fields. -add_extra_info(FI, _) -> - FI. - - + Mode = + if CDFH#cd_file_header.os_made_by =:= ~"UNIX" -> + (CDFH#cd_file_header.external_attr bsr 16) band 8#777; + true -> + if Type =:= directory -> + ?DEFAULT_DIRECTORY_FILE_MODE; + true -> + ?DEFAULT_REGULAR_FILE_MODE + end + end, + #file_info{size = UncompSize, + type = Type, + access = read_write, + atime = A, + mtime = M, + ctime = C, + mode = Mode, + links = 1, + major_device = 0, + minor_device = 0, + inode = 0, + uid = CDFH#cd_file_header.uid, + gid = CDFH#cd_file_header.gid}. %% get all files using file list %% (the offset list is already filtered on which file to get... isn't it?) @@ -1748,14 +2215,15 @@ get_z_files([#zip_comment{comment = _} | Rest], Z, In, Opts, Acc) -> get_z_files([{#zip_file{offset = Offset},_} = ZFile | Rest], Z, In0, #unzip_opts{input = Input, output = Output, open_opts = OpO, file_filter = Filter, feedback = FB, - cwd = CWD} = Opts, Acc0) -> + cwd = CWD, skip_dirs = SkipDirs, extra = ExtraOpts} = Opts, Acc0) -> case Filter(ZFile) of true -> In1 = Input({seek, bof, Offset}, In0), {In2, Acc1} = case get_z_file(In1, Z, Input, Output, OpO, FB, - CWD, ZFile, Filter) of - {file, GZD, Inx} -> {Inx, [GZD | Acc0]}; + CWD, ZFile, Filter, SkipDirs, ExtraOpts) of + {Type, GZD, Inx} when Type =:= file; Type =:= dir -> + {Inx, [GZD | Acc0]}; {_, Inx} -> {Inx, Acc0} end, get_z_files(Rest, Z, In2, Opts, Acc1); @@ -1765,7 +2233,7 @@ get_z_files([{#zip_file{offset = Offset},_} = ZFile | Rest], Z, In0, %% get a file from the archive, reading chunks get_z_file(In0, Z, Input, Output, OpO, FB, - CWD, {ZipFile,Extra}, Filter) -> + CWD, {ZipFile,ZipExtra}, Filter, SkipDirs, ExtraOpts) -> case Input({read, ?LOCAL_FILE_HEADER_SZ}, In0) of {eof, In1} -> {eof, In1}; @@ -1777,64 +2245,89 @@ get_z_file(In0, Z, Input, Output, OpO, FB, file_name_length = FileNameLen, extra_field_length = ExtraLen} = LH, + {BFileN, In3} = Input({read, FileNameLen + ExtraLen}, In1), + {FileName, BLHExtra} = + get_filename_extra(FileNameLen, ExtraLen, BFileN, GPFlag), + LHExtra = + update_extra_fields(LH, BLHExtra, ExtraOpts), + {CompSize,CRC32} = case GPFlag band 8 =:= 8 of true -> {ZipFile#zip_file.comp_size, - Extra#zip_file_extra.crc32}; - false -> {LH#local_file_header.comp_size, - LH#local_file_header.crc32} + ZipExtra#zip_file_extra.crc32}; + false -> {LHExtra#local_file_header.comp_size, + LHExtra#local_file_header.crc32} end, - {BFileN, In3} = Input({read, FileNameLen + ExtraLen}, In1), - {FileName, _} = - get_file_name_extra(FileNameLen, ExtraLen, BFileN, GPFlag), + ReadAndWrite = case check_valid_location(CWD, FileName) of {true,FileName1} -> true; {false,FileName1} -> - Filter({ZipFile#zip_file{name = FileName1},Extra}) + Filter({ZipFile#zip_file{name = FileName1},ZipExtra}) end, - case ReadAndWrite of + + IsDir = lists:last(FileName) =:= $/, + + case ReadAndWrite andalso not (IsDir andalso SkipDirs) of true -> - case lists:last(FileName) of - $/ -> - %% perhaps this should always be done? - Output({ensure_dir,FileName1},[]), - {dir, In3}; - _ -> - %% FileInfo = local_file_header_to_file_info(LH) - %%{Out, In4, CRC, UncompSize} = - {Out, In4, CRC, _UncompSize} = - get_z_data(CompMethod, In3, FileName1, - CompSize, Input, Output, OpO, Z), - In5 = skip_z_data_descriptor(GPFlag, Input, In4), - %% TODO This should be fixed some day: - %% In5 = Input({set_file_info, FileName, - %% FileInfo#file_info{size=UncompSize}}, In4), - FB(FileName), - CRC =:= CRC32 orelse throw({bad_crc, FileName}), - {file, Out, In5} - end; + {Type, Out, In} = + case lists:last(FileName) of + $/ -> + Out1 = Output({ensure_path,FileName1},[]), + {dir, Out1, In3}; + _ -> + {Out1, In4, CRC, _UncompSize} = + get_z_data(CompMethod, In3, FileName1, + CompSize, Input, Output, OpO, Z), + In5 = skip_z_data_descriptor(GPFlag, Input, In4), + + FB(FileName), + CRC =:= CRC32 orelse throw({bad_crc, FileName}), + {file, Out1, In5} + end, + + FileInfo = local_file_header_to_file_info( + Output({file_info, FileName1}, Out), + LHExtra, ZipFile), + + Out2 = Output({set_file_info, FileName1, FileInfo, [{time, local}]}, Out), + {Type, Out2, In}; false -> {ignore, In3} end; - _ -> - throw(bad_local_file_header) + Else -> + throw({bad_local_file_header, Else}) end. +local_file_header_to_file_info(FI, LFH, ZipFile) -> + %% Validate that local_file_header mtime is the same as cd_file_header + FI#file_info{ mode = ZipFile#zip_file.info#file_info.mode, + mtime = file_header_mtime_to_datetime(LFH), + atime = file_header_atime_to_datetime(LFH), + ctime = file_header_ctime_to_datetime(LFH) + }. + + %% make sure FileName doesn't have relative path that points over CWD check_valid_location(CWD, FileName) -> + TrailingSlash = case lists:last(FileName) of + $/ -> "/"; + _ -> "" + end, %% check for directory traversal exploit - case check_dir_level(filename:split(FileName), 0) of - {FileOrDir,Level} when Level < 0 -> - CWD1 = if CWD == "" -> "./"; - true -> CWD - end, - error_logger:format("Illegal path: ~ts, extracting in ~ts~n", - [add_cwd(CWD,FileName),CWD1]), - {false,add_cwd(CWD, FileOrDir)}; - _ -> - {true,add_cwd(CWD, FileName)} - end. + {IsValid, Cwd, Name} = + case check_dir_level(filename:split(FileName), 0) of + {FileOrDir,Level} when Level < 0 -> + CWD1 = if CWD == "" -> "./"; + true -> CWD + end, + error_logger:format("Illegal path: ~ts, extracting in ~ts~n", + [add_cwd(CWD,FileName),CWD1]), + {false, CWD, FileOrDir}; + _ -> + {true, CWD, FileName} + end, + {IsValid, string:trim(add_cwd(Cwd, Name), trailing, "/") ++ TrailingSlash}. check_dir_level([FileOrDir], Level) -> {FileOrDir,Level}; @@ -1845,7 +2338,7 @@ check_dir_level([".." | Parts], Level) -> check_dir_level([_Dir | Parts], Level) -> check_dir_level(Parts, Level+1). -get_file_name_extra(FileNameLen, ExtraLen, B, GPFlag) -> +get_filename_extra(FileNameLen, ExtraLen, B, GPFlag) -> try <> = B, {binary_to_chars(BFileName, GPFlag), BExtra} @@ -1907,6 +2400,41 @@ skip_z_data_descriptor(GPFlag, Input, In0) when GPFlag band 8 =:= 8 -> skip_z_data_descriptor(_GPFlag, _Input, In0) -> In0. +%% If we have mtime we use that, otherwise use dos time +file_header_mtime_to_datetime(FH) -> + #cd_file_header.mtime = #local_file_header.mtime, + case element(#cd_file_header.mtime, FH) of + undefined -> + dos_date_time_to_datetime( + element(#cd_file_header.last_mod_date, FH), + element(#cd_file_header.last_mod_time, FH)); + MTime -> + calendar:system_time_to_local_time(MTime, second) + end. + +%% If we have atime we use that, otherwise use dos time +file_header_atime_to_datetime(FH) -> + #cd_file_header.atime = #local_file_header.atime, + case element(#cd_file_header.atime, FH) of + undefined -> + dos_date_time_to_datetime( + element(#cd_file_header.last_mod_date, FH), + element(#cd_file_header.last_mod_time, FH)); + Atime -> + calendar:system_time_to_local_time(Atime, second) + end. + +%% Normally ctime will not be set, but if it is we use that. If it is not set +%% we return undefined so that when we later do write_file_info ctime will remain +%% the time that the file was created when extracted from the archive. +file_header_ctime_to_datetime(FH) -> + #cd_file_header.ctime = #local_file_header.ctime, + case element(#cd_file_header.ctime, FH) of + undefined -> undefined; + Ctime -> + calendar:system_time_to_local_time(Ctime, second) + end. + %% convert between erlang datetime and the MSDOS date and time %% that's stored in the zip archive %% MSDOS Time MSDOS Date @@ -1916,17 +2444,24 @@ dos_date_time_to_datetime(DosDate, DosTime) -> <> = <>, <> = <>, {{YearFrom1980+1980, Month, Day}, - {Hour, Min, Sec}}. + {Hour, Min, Sec * 2}}. -dos_date_time_from_datetime(Seconds) when is_integer(Seconds) -> - DateTime = calendar:now_to_datetime({0, Seconds, 0}), - dos_date_time_from_datetime(DateTime); dos_date_time_from_datetime({{Year, Month, Day}, {Hour, Min, Sec}}) -> YearFrom1980 = Year-1980, - <> = <>, + <> = <>, <> = <>, {DosDate, DosTime}. +%% Convert a local datetime or universal time seconds to +%% system time (aka POSIX time, aka Unix time) +datetime_to_system_time(undefined) -> + undefined; +datetime_to_system_time(PosixTime) when is_integer(PosixTime) -> + PosixTime; +datetime_to_system_time(DateTime) -> + erlang:universaltime_to_posixtime( + erlang:localtime_to_universaltime(DateTime)). + %% A pwrite-like function for iolists (used by memory-option) pwrite_binary(B, Pos, Bin) when byte_size(B) =:= Pos -> @@ -1960,7 +2495,6 @@ skip_bin(B, Pos) when is_binary(B) -> end. binary_to_chars(B, GPFlag) -> - ?SHOW_GP_BIT_11(B, GPFlag band ?GP_BIT_11), case GPFlag band ?GP_BIT_11 of 0 -> binary_to_list(B); @@ -1992,27 +2526,7 @@ encode_string(String) -> {String, 0} end. -%% ZIP header manipulations -eocd_and_comment_from_bin(<>) -> - {#eocd{disk_num = DiskNum, - start_disk_num = StartDiskNum, - entries_on_disk = EntriesOnDisk, - entries = Entries, - size = Size, - offset = Offset, - zip_comment_length = ZipCommentLength}, - Comment}; -eocd_and_comment_from_bin(_) -> - throw(bad_eocd). - -cd_file_header_from_bin(<>) -> #cd_file_header{version_made_by = VersionMadeBy, + os_made_by = os_id_to_atom(OsMadeBy), version_needed = VersionNeeded, gp_flag = GPFlag, comp_method = CompMethod, @@ -2070,34 +2585,23 @@ local_file_header_from_bin(< throw(bad_local_file_header). -%% make a file_info from a local directory header -%% local_file_header_to_file_info( -%% #local_file_header{last_mod_time = ModTime, -%% last_mod_date = ModDate, -%% uncomp_size = UncompSize}) -> -%% T = dos_date_time_to_datetime(ModDate, ModTime), -%% FI = #file_info{size = UncompSize, -%% type = regular, -%% access = read_write, -%% atime = T, -%% mtime = T, -%% ctime = T, -%% mode = 8#066, -%% links = 1, -%% major_device = 0, -%% minor_device = 0, -%% inode = 0, -%% uid = 0, -%% gid = 0}, -%% FI. - %% io functions +binary_io({file_info, FN, Opts}, A) -> + FI = binary_io({file_info, FN}, A), + case proplists:get_value(time, Opts, local) of + local -> FI; + posix -> FI#file_info{ atime = datetime_to_system_time(FI#file_info.atime), + mtime = datetime_to_system_time(FI#file_info.mtime), + ctime = datetime_to_system_time(FI#file_info.ctime) } + end; binary_io({file_info, {_Filename, _B, #file_info{} = FI}}, _A) -> FI; binary_io({file_info, {_Filename, #file_info{} = FI, _B}}, _A) -> FI; binary_io({file_info, {_Filename, B}}, A) -> binary_io({file_info, B}, A); +binary_io({file_info, Filename}, A) when is_list(Filename) -> + binary_io({file_info, {Filename, <<>>}}, A); binary_io({file_info, B}, _) -> {Type, Size} = if @@ -2107,7 +2611,11 @@ binary_io({file_info, B}, _) -> Now = calendar:local_time(), #file_info{size = Size, type = Type, access = read_write, atime = Now, - mtime = Now, ctime = Now, mode = 0, + mtime = Now, ctime = Now, mode = + if + Type =:= directory -> ?DEFAULT_DIRECTORY_FILE_MODE; + true -> ?DEFAULT_REGULAR_FILE_MODE + end, links = 1, major_device = 0, minor_device = 0, inode = 0, uid = 0, gid = 0}; @@ -2142,6 +2650,9 @@ binary_io({seek, cur, Pos}, {OldPos, B}) -> {OldPos + Pos, B}; binary_io({seek, eof, Pos}, {_OldPos, B}) -> {byte_size(B) + Pos, B}; +binary_io({position, Loc, Adj}, File) -> + {Pos, _} = NewFile = binary_io({seek, Loc, Adj}, File), + {Pos, NewFile}; binary_io({pwrite, Pos, Data}, {OldPos, B}) -> {OldPos, pwrite_binary(B, Pos, Data)}; binary_io({write, Data}, {Pos, B}) -> @@ -2154,14 +2665,21 @@ binary_io({list_dir, _F}, _B) -> []; binary_io({set_file_info, _F, _FI}, B) -> B; -binary_io({ensure_dir, _Dir}, B) -> - B. +binary_io({set_file_info, _F, _FI, _O}, B) -> + B; +binary_io({ensure_path, Dir}, _B) -> + {Dir, <<>>}. file_io({file_info, F}, _) -> case file:read_file_info(F) of {ok, Info} -> Info; {error, E} -> throw(E) end; +file_io({file_info, F, Opts}, _) -> + case file:read_file_info(F, Opts) of + {ok, Info} -> Info; + {error, E} -> throw(E) + end; file_io({open, FN, Opts}, _) -> case lists:member(write, Opts) of true -> ok = filelib:ensure_dir(FN); @@ -2188,6 +2706,11 @@ file_io({seek, S, Pos}, H) -> {ok, _NewPos} -> H; {error, Error} -> throw(Error) end; +file_io({position, S, Pos}, H) -> + case file:position(H, {S, Pos}) of + {ok, NewPos} -> {NewPos, H}; + {error, Error} -> throw(Error) + end; file_io({write, Data}, H) -> case file:write(H, Data) of ok -> H; @@ -2215,6 +2738,11 @@ file_io({set_file_info, F, FI}, H) -> ok -> H; {error, Error} -> throw(Error) end; -file_io({ensure_dir, Dir}, H) -> - ok = filelib:ensure_dir(Dir), - H. +file_io({set_file_info, F, FI, O}, H) -> + case file:write_file_info(F, FI, O) of + ok -> H; + {error, Error} -> throw(Error) + end; +file_io({ensure_path, Dir}, _H) -> + ok = filelib:ensure_path(Dir), + Dir. diff --git a/lib/stdlib/test/zip_SUITE.erl b/lib/stdlib/test/zip_SUITE.erl index 3810c0362c04..9cb7055ecdda 100644 --- a/lib/stdlib/test/zip_SUITE.erl +++ b/lib/stdlib/test/zip_SUITE.erl @@ -20,53 +20,186 @@ -module(zip_SUITE). -export([all/0, suite/0,groups/0,init_per_suite/1, end_per_suite/1, - init_per_group/2,end_per_group/2, borderline/1, atomic/1, + init_per_group/2,end_per_group/2, + init_per_testcase/2, end_per_testcase/2]). + +-export([borderline/1, atomic/1, bad_zip/1, unzip_from_binary/1, unzip_to_binary/1, zip_to_binary/1, unzip_options/1, zip_options/1, list_dir_options/1, aliases/1, - openzip_api/1, zip_api/1, open_leak/1, unzip_jar/1, + zip_api/1, open_leak/1, unzip_jar/1, unzip_traversal_exploit/1, compress_control/1, foldl/1,fd_leak/1,unicode/1,test_zip_dir/1, - explicit_file_info/1]). + explicit_file_info/1, mode/1, + zip64_central_headers/0, unzip64_central_headers/0, + zip64_central_headers/1, unzip64_central_headers/1, + zip64_central_directory/1, + basic_timestamp/1, extended_timestamp/1, + uid_gid/1]). + +-export([zip/5, unzip/3]). + +-import(proplists,[get_value/2, get_value/3]). -include_lib("common_test/include/ct.hrl"). -include_lib("kernel/include/file.hrl"). -include_lib("stdlib/include/zip.hrl"). +-include_lib("stdlib/include/assert.hrl"). suite() -> [{ct_hooks,[ts_install_cth]}]. all() -> [borderline, atomic, bad_zip, unzip_from_binary, unzip_to_binary, zip_to_binary, unzip_options, - zip_options, list_dir_options, aliases, openzip_api, + zip_options, list_dir_options, aliases, zip_api, open_leak, unzip_jar, compress_control, foldl, - unzip_traversal_exploit,fd_leak,unicode,test_zip_dir, - explicit_file_info]. + unzip_traversal_exploit, fd_leak, unicode, test_zip_dir, + explicit_file_info, {group, zip_group}, {group, zip64_group}]. groups() -> - []. + zip_groups(). + +%% zip - Use zip unix tools +%% ezip - Use erlang zip on disk +%% emzip - Use erlang zip in memory +-define(ZIP_MODES,[zip, ezip, emzip]). +%% -define(ZIP_MODES,[emzip]). +-define(UNZIP_MODES,[unzip, unezip, unemzip]). +%% How much memory the zip/unzip 64 testcases that zip/unzip from/to are expected to use +-define(EMZIP64_MEM_USAGE, (8 * (1 bsl 30))). + +zip_groups() -> + + ZipGroup= + [{zip_group,[],[{group,ZipMode} || ZipMode <- ?ZIP_MODES]}] ++ + [{ZipMode, [], [{group,UnZipMode} || UnZipMode <- ?UNZIP_MODES]} + || ZipMode <- ?ZIP_MODES] ++ + [{G, [parallel], zip_testcases()} || G <- ?UNZIP_MODES], + + Zip64Group = [{zip64_group,[],[{group,z64(ZipMode)} || ZipMode <- ?ZIP_MODES]}] ++ + [{z64(ZipMode), [sequence], [zip64_central_headers]++ + [{group,z64(UnZipMode)} || UnZipMode <- ?UNZIP_MODES]} + || ZipMode <- ?ZIP_MODES] ++ + [{z64(G), [], zip64_testcases()} || G <- ?UNZIP_MODES], + + ZipGroup ++ Zip64Group. + +z64(Mode) when is_atom(Mode) -> + list_to_atom(lists:concat([z64_,Mode])); +z64(Modes) when is_list(Modes) -> + [z64(M) || M <- Modes]. + +un_z64(Mode) -> + case atom_to_list(Mode) of + "z64_" ++ ModeString -> list_to_atom(ModeString); + _ -> Mode + end. + +zip_testcases() -> + [mode, basic_timestamp, extended_timestamp, uid_gid]. + +zip64_testcases() -> + [unzip64_central_headers, + zip64_central_directory]. init_per_suite(Config) -> - Config. + {ok, Started} = application:ensure_all_started(os_mon), + cleanup_priv_dir(Config), + [{started, Started} | Config]. -end_per_suite(_Config) -> +end_per_suite(Config) -> + [application:stop(App) || App <- lists:reverse(get_value(started, Config))], + cleanup_priv_dir(Config), ok. -init_per_group(_GroupName, Config) -> - Config. +cleanup_priv_dir(Config) -> + %% Cleanup potential files in priv_dir + Pdir = get_value(pdir, Config, get_value(priv_dir,Config)), + ct:log("Cleaning up ~s",[Pdir]), + [ case file:delete(File) of + {error, eperm} -> file:del_dir_r(File); + _ -> ok + end || File <- filelib:wildcard(filename:join(Pdir, "*"))]. + +init_per_group(zip64_group, Config) -> + PrivDir = get_value(priv_dir, Config), + + case {erlang:system_info(wordsize), disc_free(PrivDir), memsize()} of + {4, _, _} -> + {skip, "Zip64 tests only work on 64-bit systems"}; + {8, error, _} -> + {skip, "Failed to query disk space for priv_dir. " + "Is it on a remote file system?~n"}; + {8, N,M} when N >= 16 * (1 bsl 20), M >= ?EMZIP64_MEM_USAGE -> + ct:log("Free disk: ~w KByte~n", [N]), + ct:log("Free memory: ~w MByte~n", [M div (1 bsl 20)]), + OneMB = <<0:(8 bsl 20)>>, + Large4GB = filename:join(PrivDir, "large.txt"), + ok = file:write_file(Large4GB, lists:duplicate(4 bsl 10, OneMB)), + Medium4MB = filename:join(PrivDir, "medium.txt"), + ok = file:write_file(Medium4MB, lists:duplicate(4, OneMB)), + + [{large, Large4GB},{medium,Medium4MB}|Config]; + {8,N,M} -> + ct:log("Free disk: ~w KByte~n", [N]), + ct:log("Free memory: ~w MByte~n", [M div (1 bsl 20)]), + {skip,"Less than 16 GByte free disk or less then 8 GB free mem"} + end; +init_per_group(Group, Config) -> + case lists:member(Group, ?ZIP_MODES ++ ?UNZIP_MODES ++ z64(?ZIP_MODES ++ ?UNZIP_MODES)) of + true -> + case get_value(zip, Config) of + undefined -> + case un_z64(Group) =/= zip orelse has_zip() of + true -> + Pdir = filename:join(get_value(priv_dir, Config),Group), + ok = filelib:ensure_path(Pdir), + [{pdir, Pdir},{zip, Group} | Config]; + false -> + {skip, "No zip program found"} + end; + _Zip -> + case un_z64(Group) =/= unzip orelse has_zip() of + true -> + Pdir = filename:join(get_value(pdir, Config),Group), + ok = filelib:ensure_path(Pdir), + [{pdir, Pdir},{unzip, Group} | Config]; + false -> + {skip, "No zip program found"} + end + end; + false -> + Config + end. end_per_group(_GroupName, Config) -> + cleanup_priv_dir(Config), Config. +init_per_testcase(TC, Config) -> + UsesZip = un_z64(get_value(zip, Config)) =:= zip orelse un_z64(get_value(unzip, Config)) =:= unzip, + HasZip = has_zip(), + ct:log("Free memory: ~w MByte~n", [memsize() div (1 bsl 20)]), + if UsesZip andalso not HasZip -> + {skip, "No zip command found"}; + true -> + PrivDir = filename:join(get_value(pdir, Config,get_value(priv_dir, Config)), TC), + ok = filelib:ensure_path(PrivDir), + [{pdir, PrivDir} | Config] + end. + + +end_per_testcase(_TC, Config) -> + cleanup_priv_dir(Config), + Config. %% Test creating, listing and extracting one file from an archive %% multiple times with different file sizes. Also check that the %% modification date of the extracted file has survived. borderline(Config) when is_list(Config) -> - RootDir = proplists:get_value(priv_dir, Config), + RootDir = get_value(priv_dir, Config), TempDir = filename:join(RootDir, "borderline"), - ok = file:make_dir(TempDir), Record = 512, Block = 20 * Record, @@ -200,7 +333,7 @@ next_random(X) -> %% Test the 'atomic' operations: zip/unzip/list_dir, on archives. %% Also test the 'cooked' option. atomic(Config) when is_list(Config) -> - ok = file:set_cwd(proplists:get_value(priv_dir, Config)), + ok = file:set_cwd(get_value(priv_dir, Config)), DataFiles = data_files(), Names = [Name || {Name,_,_} <- DataFiles], io:format("Names: ~p", [Names]), @@ -223,47 +356,10 @@ atomic(Config) when is_list(Config) -> ok. -%% Test the openzip_open/2, openzip_get/1, openzip_get/2, openzip_close/1 -%% and openzip_list_dir/1 functions. -openzip_api(Config) when is_list(Config) -> - ok = file:set_cwd(proplists:get_value(priv_dir, Config)), - DataFiles = data_files(), - Names = [Name || {Name, _, _} <- DataFiles], - io:format("Names: ~p", [Names]), - - %% Create a zip archive - - Zip = "zip.zip", - {ok, Zip} = zip:zip(Zip, Names, []), - - %% Open archive - {ok, OpenZip} = zip:openzip_open(Zip, [memory]), - - %% List dir - Names = names_from_list_dir(zip:openzip_list_dir(OpenZip)), - - %% Get a file - Name1 = hd(Names), - {ok, Data1} = file:read_file(Name1), - {ok, {Name1, Data1}} = zip:openzip_get(Name1, OpenZip), - - %% Get all files - FilesDatas = lists:map(fun(Name) -> {ok, B} = file:read_file(Name), - {Name, B} end, Names), - {ok, FilesDatas} = zip:openzip_get(OpenZip), - - %% Close - ok = zip:openzip_close(OpenZip), - - %% Clean up. - delete_files([Names]), - - ok. - %% Test the zip_open/2, zip_get/1, zip_get/2, zip_close/1, %% and zip_list_dir/1 functions. zip_api(Config) when is_list(Config) -> - ok = file:set_cwd(proplists:get_value(priv_dir, Config)), + ok = file:set_cwd(get_value(priv_dir, Config)), DataFiles = data_files(), Names = [Name || {Name, _, _} <- DataFiles], io:format("Names: ~p", [Names]), @@ -336,8 +432,8 @@ spawned_zip_dead(ZipSrv) -> %% Test options for unzip, only cwd and file_list currently. unzip_options(Config) when is_list(Config) -> - DataDir = proplists:get_value(data_dir, Config), - PrivDir = proplists:get_value(priv_dir, Config), + DataDir = get_value(data_dir, Config), + PrivDir = get_value(priv_dir, Config), Long = filename:join(DataDir, "abc.zip"), %% create a temp directory @@ -358,14 +454,30 @@ unzip_options(Config) when is_list(Config) -> lists:foreach(fun(F)-> ok = file:delete(F) end, RetList), + %% Clean up and verify no more files. + 0 = delete_files([Subdir]), + + FList2 = ["abc.txt","quotes/rain.txt","wikipedia.txt","emptyFile"], + + %% Unzip a zip file in Subdir + {ok, RetList2} = zip:unzip(Long, [{cwd, Subdir},skip_directories]), + + %% Verify. + true = (length(RetList2) =:= 4), + lists:foreach(fun(F)-> {ok,B} = file:read_file(filename:join(DataDir, F)), + {ok,B} = file:read_file(filename:join(Subdir, F)) end, + FList2), + lists:foreach(fun(F)-> 1 = delete_files([F]) end, + RetList2), + %% Clean up and verify no more files. 0 = delete_files([Subdir]), ok. %% Test that unzip handles directory traversal exploit (OTP-13633) unzip_traversal_exploit(Config) -> - DataDir = proplists:get_value(data_dir, Config), - PrivDir = proplists:get_value(priv_dir, Config), + DataDir = get_value(data_dir, Config), + PrivDir = get_value(priv_dir, Config), ZipName = filename:join(DataDir, "exploit.zip"), %% $ zipinfo -1 test/zip_SUITE_data/exploit.zip @@ -410,25 +522,29 @@ unzip_traversal_exploit(Config) -> %% Test unzip a jar file (OTP-7382). unzip_jar(Config) when is_list(Config) -> - DataDir = proplists:get_value(data_dir, Config), - PrivDir = proplists:get_value(priv_dir, Config), + DataDir = get_value(data_dir, Config), + PrivDir = get_value(priv_dir, Config), JarFile = filename:join(DataDir, "test.jar"), %% create a temp directory Subdir = filename:join(PrivDir, "jartest"), ok = file:make_dir(Subdir), - ok = file:set_cwd(Subdir), FList = ["META-INF/MANIFEST.MF","test.txt"], - {ok, RetList} = zip:unzip(JarFile), + {ok, RetList} = zip:unzip(JarFile, [{cwd, Subdir}]), %% Verify. lists:foreach(fun(F)-> {ok,B} = file:read_file(filename:join(DataDir, F)), {ok,B} = file:read_file(filename:join(Subdir, F)) end, FList), - lists:foreach(fun(F)-> ok = file:delete(F) end, - RetList), + lists:foreach(fun(F)-> + case lists:last(F) =:= $/ of + true -> ok = file:del_dir(F); + false -> ok = file:delete(F) + end + end, + lists:reverse(RetList)), %% Clean up and verify no more files. 0 = delete_files([Subdir]), @@ -436,13 +552,13 @@ unzip_jar(Config) when is_list(Config) -> %% Test the options for unzip, only cwd currently. zip_options(Config) when is_list(Config) -> - PrivDir = proplists:get_value(priv_dir, Config), + PrivDir = get_value(priv_dir, Config), ok = file:set_cwd(PrivDir), DataFiles = data_files(), Names = [Name || {Name, _, _} <- DataFiles], %% Make sure cwd is not where we get the files - ok = file:set_cwd(proplists:get_value(data_dir, Config)), + ok = file:set_cwd(get_value(data_dir, Config)), %% Create a zip archive {ok, {_,Zip}} = @@ -474,10 +590,23 @@ zip_options(Config) when is_list(Config) -> %% Test the options for list_dir... one day. list_dir_options(Config) when is_list(Config) -> - ok. + DataDir = get_value(data_dir, Config), + Archive = filename:join(DataDir, "abc.zip"), + + {ok, + ["abc.txt", "quotes/rain.txt", "empty/", "wikipedia.txt", "emptyFile" ]} = + zip:list_dir(Archive,[names_only]), + {ok, + [#zip_comment{}, + #zip_file{ name = "abc.txt" }, + #zip_file{ name = "quotes/rain.txt" }, + #zip_file{ name = "wikipedia.txt" }, + #zip_file{ name = "emptyFile" } + ]} = zip:list_dir(Archive,[skip_directories]), + ok. %% convert zip_info as returned from list_dir to a list of names names_from_list_dir({ok, Info}) -> @@ -526,15 +655,15 @@ create_files([]) -> %% Try zip:unzip/1 on some corrupted zip files. bad_zip(Config) when is_list(Config) -> - ok = file:set_cwd(proplists:get_value(priv_dir, Config)), + ok = file:set_cwd(get_value(priv_dir, Config)), try_bad("bad_crc", {bad_crc, "abc.txt"}, Config), try_bad("bad_central_directory", bad_central_directory, Config), try_bad("bad_file_header", bad_file_header, Config), try_bad("bad_eocd", bad_eocd, Config), try_bad("enoent", enoent, Config), GetNotFound = fun(A) -> - {ok, O} = zip:openzip_open(A, []), - zip:openzip_get("not_here", O) + {ok, O} = zip:zip_open(A, []), + zip:zip_get("not_here", O) end, try_bad("abc", file_not_found, GetNotFound, Config), ok. @@ -546,7 +675,7 @@ try_bad(N, R, Config) -> try_bad(Name0, Reason, What, Config) -> %% Intentionally no macros here. - DataDir = proplists:get_value(data_dir, Config), + DataDir = get_value(data_dir, Config), Name = Name0 ++ ".zip", io:format("~nTrying ~s", [Name]), Full = filename:join(DataDir, Name), @@ -561,10 +690,11 @@ try_bad(Name0, Reason, What, Config) -> %% Test extracting to binary with memory option. unzip_to_binary(Config) when is_list(Config) -> - DataDir = proplists:get_value(data_dir, Config), - PrivDir = proplists:get_value(priv_dir, Config), + DataDir = get_value(data_dir, Config), + PrivDir = get_value(priv_dir, Config), WorkDir = filename:join(PrivDir, "unzip_to_binary"), _ = file:make_dir(WorkDir), + _ = file:make_dir(filename:join(DataDir, "empty")), ok = file:set_cwd(WorkDir), Long = filename:join(DataDir, "abc.zip"), @@ -573,7 +703,16 @@ unzip_to_binary(Config) when is_list(Config) -> {ok, FBList} = zip:unzip(Long, [memory]), %% Verify. - lists:foreach(fun({F,B}) -> {ok,B}=file:read_file(filename:join(DataDir, F)) + lists:foreach(fun({F,B}) -> + Filename = filename:join(DataDir, F), + case lists:last(F) =:= $/ of + true -> + <<>> = B, + {ok, #file_info{ type = directory}} = + file:read_file_info(Filename); + false -> + {ok,B}=file:read_file(filename:join(DataDir, F)) + end end, FBList), %% Make sure no files created in cwd @@ -583,8 +722,8 @@ unzip_to_binary(Config) when is_list(Config) -> %% Test compressing to binary with memory option. zip_to_binary(Config) when is_list(Config) -> - DataDir = proplists:get_value(data_dir, Config), - PrivDir = proplists:get_value(priv_dir, Config), + DataDir = get_value(data_dir, Config), + PrivDir = get_value(priv_dir, Config), WorkDir = filename:join(PrivDir, "zip_to_binary"), _ = file:make_dir(WorkDir), @@ -636,21 +775,23 @@ aliases(Config) when is_list(Config) -> %% Test extracting a zip archive from a binary. unzip_from_binary(Config) when is_list(Config) -> - DataDir = proplists:get_value(data_dir, Config), - PrivDir = proplists:get_value(priv_dir, Config), + DataDir = get_value(data_dir, Config), + PrivDir = get_value(priv_dir, Config), ExtractDir = filename:join(PrivDir, "extract_from_binary"), - ok = file:make_dir(ExtractDir), Archive = filename:join(ExtractDir, "abc.zip"), + + ok = file:make_dir(ExtractDir), {ok, _Size} = file:copy(filename:join(DataDir, "abc.zip"), Archive), FileName = "abc.txt", Quote = "quotes/rain.txt", Wikipedia = "wikipedia.txt", EmptyFile = "emptyFile", + EmptyDir = "empty/", file:set_cwd(ExtractDir), %% Read a zip file into a binary and extract from the binary. {ok, Bin} = file:read_file(Archive), - {ok, [FileName,Quote,Wikipedia,EmptyFile]} = zip:unzip(Bin), + {ok, [FileName,Quote,EmptyDir,Wikipedia,EmptyFile]} = zip:unzip(Bin), %% Verify. DestFilename = filename:join(ExtractDir, "abc.txt"), @@ -661,8 +802,31 @@ unzip_from_binary(Config) when is_list(Config) -> {ok, QuoteData} = file:read_file(filename:join(DataDir, Quote)), {ok, QuoteData} = file:read_file(DestQuote), + %% Don't be in ExtractDir when we delete it + ok = file:set_cwd(PrivDir), + %% Clean up. delete_files([DestFilename, DestQuote, Archive, ExtractDir]), + + ok = file:make_dir(ExtractDir), + file:set_cwd(ExtractDir), + + %% Read a zip file into a binary and extract from the binary with skip_directories + {ok, [FileName,Quote,Wikipedia,EmptyFile]} + = zip:unzip(Bin, [skip_directories]), + + %% Verify. + DestFilename = filename:join(ExtractDir, "abc.txt"), + {ok, Data} = file:read_file(filename:join(DataDir, FileName)), + {ok, Data} = file:read_file(DestFilename), + + DestQuote = filename:join([ExtractDir, "quotes", "rain.txt"]), + {ok, QuoteData} = file:read_file(filename:join(DataDir, Quote)), + {ok, QuoteData} = file:read_file(DestQuote), + + %% Clean up. + delete_files([DestFilename, DestQuote, ExtractDir]), + ok. %% oac_files() -> @@ -705,7 +869,7 @@ do_delete_files([Item|Rest], Cnt) -> %% Test control of which files that should be compressed. compress_control(Config) when is_list(Config) -> - ok = file:set_cwd(proplists:get_value(priv_dir, Config)), + ok = file:set_cwd(get_value(priv_dir, Config)), Dir = "compress_control", Files = [ {Dir, dir, $d}, @@ -765,11 +929,11 @@ test_compress_control(Dir, Files, ZipOptions, Expected) -> create_files(Files), {ok, Zip} = zip:create(Zip, [Dir], ZipOptions), - {ok, OpenZip} = zip:openzip_open(Zip, [memory]), - {ok,[#zip_comment{comment = ""} | ZipList]} = zip:openzip_list_dir(OpenZip), + {ok, OpenZip} = zip:zip_open(Zip, [memory]), + {ok,[#zip_comment{comment = ""} | ZipList]} = zip:zip_list_dir(OpenZip), io:format("compress_control: -> ~p -> ~p\n -> ~pn", [Expected, ZipOptions, ZipList]), verify_compression(Files, ZipList, OpenZip, ZipOptions, Expected), - ok = zip:openzip_close(OpenZip), + ok = zip:zip_close(OpenZip), %% Cleanup delete_files([Zip]), @@ -783,7 +947,7 @@ verify_compression([{Name, Kind, _Filler} | Files], ZipList, OpenZip, ZipOptions dir -> {Name ++ "/", 0}; _ -> - {ok, {Name, Bin}} = zip:openzip_get(Name, OpenZip), + {ok, {Name, Bin}} = zip:zip_get(Name, OpenZip), {Name, size(Bin)} end, {Name2, {value, ZipFile}} = {Name2, lists:keysearch(Name2, #zip_file.name, ZipList)}, @@ -836,17 +1000,17 @@ extensions([], Old) -> Old. foldl(Config) -> - PrivDir = proplists:get_value(priv_dir, Config), + PrivDir = get_value(priv_dir, Config), File = filename:join([PrivDir, "foldl.zip"]), FooBin = <<"FOO">>, BarBin = <<"BAR">>, Files = [{"foo", FooBin}, {"bar", BarBin}], - {ok, {File, Bin}} = zip:create(File, Files, [memory]), + {ok, {File, Bin}} = zip:create(File, Files, [memory,{extra,[]}]), ZipFun = fun(N, I, B, Acc) -> [{N, B(), I()} | Acc] end, {ok, FileSpec} = zip:foldl(ZipFun, [], {File, Bin}), [{"bar", BarBin, #file_info{}}, {"foo", FooBin, #file_info{}}] = FileSpec, - {ok, {File, Bin}} = zip:create(File, lists:reverse(FileSpec), [memory]), + {ok, {File, Bin}} = zip:create(File, lists:reverse(FileSpec), [memory,{extra,[]}]), {foo_bin, FooBin} = try zip:foldl(fun("foo", _, B, _) -> throw(B()); (_, _, _, Acc) -> Acc end, [], {File, Bin}) @@ -867,8 +1031,8 @@ foldl(Config) -> ok. fd_leak(Config) -> - ok = file:set_cwd(proplists:get_value(priv_dir, Config)), - DataDir = proplists:get_value(data_dir, Config), + ok = file:set_cwd(get_value(priv_dir, Config)), + DataDir = get_value(data_dir, Config), Name = filename:join(DataDir, "bad_file_header.zip"), BadExtract = fun() -> {error,bad_file_header} = zip:extract(Name), @@ -902,8 +1066,8 @@ unicode(Config) -> latin1 -> {comment, "Native name encoding is Latin-1; skipping all tests"}; utf8 -> - DataDir = proplists:get_value(data_dir, Config), - ok = file:set_cwd(proplists:get_value(priv_dir, Config)), + DataDir = get_value(data_dir, Config), + ok = file:set_cwd(get_value(priv_dir, Config)), test_file_comment(DataDir), test_archive_comment(DataDir), test_bad_comment(DataDir), @@ -996,7 +1160,7 @@ test_zip1() -> has_zip() andalso zip_is_unicode_aware(). has_zip() -> - os:find_executable("zip") =/= false. + os:find_executable("zip") =/= false andalso element(1, os:type()) =:= unix. zip_is_unicode_aware() -> S = os:cmd("zip -v | grep 'UNICODE_SUPPORT'"), @@ -1034,7 +1198,7 @@ test_latin1_archive(DataDir) -> test_zip_dir(Config) when is_list(Config) -> case {os:find_executable("unzip"), os:type()} of {UnzipPath, {unix,_}} when is_list(UnzipPath)-> - DataDir = proplists:get_value(data_dir, Config), + DataDir = get_value(data_dir, Config), Dir = filename:join([DataDir, "test-zip", "dir-1"]), TestZipOutputDir = filename:join(DataDir, "test-zip-output"), TestZipOutput = filename:join(TestZipOutputDir, "test.zip"), @@ -1063,3 +1227,554 @@ explicit_file_info(_Config) -> {"seconds", <<>>, FileInfo#file_info{mtime=315532800}}], {ok, _} = zip:zip("", Files, [memory]), ok. + +mode(Config) -> + + PrivDir = get_value(pdir, Config), + ExtractDir = filename:join(PrivDir, "extract"), + Archive = filename:join(PrivDir, "archive.zip"), + + Executable = filename:join(PrivDir,"exec"), + file:write_file(Executable, "aaa"), + {ok, ExecFI } = file:read_file_info(Executable), + ok = file:write_file_info(Executable, ExecFI#file_info{ mode = 8#111 bor 8#400 }), + {ok, #file_info{ mode = OrigExecMode }} = file:read_file_info(Executable), + + Directory = filename:join(PrivDir,"dir"), + ok = file:make_dir(Directory), + {ok, DirFI } = file:read_file_info(Executable), + ok = file:write_file_info(Directory, DirFI#file_info{ mode = 8#111 bor 8#400 }), + {ok, #file_info{ mode = OrigDirMode }} = file:read_file_info(Directory), + + ?assertMatch( + {ok, Archive}, + zip(Config, Archive, "-r", ["dir","exec"], [{cwd, PrivDir},{extra,[extended_timestamp]}])), + + OrigExecMode777 = OrigExecMode band 8#777, + OrigDirMode777 = OrigDirMode band 8#777, + + ?assertMatch( + {ok, [#zip_comment{}, + #zip_file{ name = "dir/", info = #file_info{ mode = OrigDirMode777 }}, + #zip_file{ name = "exec", info = #file_info{ mode = OrigExecMode777 }} ]}, + zip:list_dir(Archive)), + + ok = file:make_dir(ExtractDir), + ?assertMatch( + {ok, ["dir/","exec"]}, unzip(Config, Archive, [{cwd,ExtractDir}])), + + case un_z64(get_value(unzip, Config)) =/= unemzip of + true -> + {ok,#file_info{ mode = ExecMode }} = + file:read_file_info(filename:join(ExtractDir,"exec")), + ?assertEqual(ExecMode band 8#777, OrigExecMode777), + + {ok,#file_info{ mode = DirMode }} = + file:read_file_info(filename:join(ExtractDir,"dir")), + ?assertEqual(DirMode band 8#777, OrigDirMode777); + false -> + %% emzip does not support mode + ok + end, + + ok. + +%% Test that zip64 local and central headers are respected when unzipping. +%% The fields in the header that can be 64-bit are: +%% * compressed size +%% * uncompressed size +%% * relative offset +%% * starting disk +%% +%% As we do not support using multiple disks, we do not test starting disks +zip64_central_headers() -> [{timetrap, {minutes, 60}}]. +zip64_central_headers(Config) -> + + PrivDir = get_value(pdir, Config), + Archive = filename:join(PrivDir, "../archive.zip"), + + %% Check that ../../large.txt exists and is of correct size + {ok, #file_info{ size = 1 bsl 32 } } = + file:read_file_info(filename:join(PrivDir, "../../large.txt")), + + %% We very carefully create an archive that should contain all + %% different header combinations. + %% - uncomp.txt: uncomp size > 4GB + %% - uncomp.comp.zip: uncomp and comp size > 4GB + %% - offset.txt: offset > 4GB + %% - uncomp.offset.txt: uncomp size and offset > 4GB + %% - uncomp.comp.offset.zip: uncomp and comp size and offset > 4GB + %% + %% The archive will be roughly 8 GBs large + + ok = file:make_link(filename:join(PrivDir, "../../large.txt"), + filename:join(PrivDir, "uncomp.txt")), + ok = file:make_link(filename:join(PrivDir, "../../large.txt"), + filename:join(PrivDir, "uncomp.comp.zip")), + ok = file:make_link(filename:join(PrivDir, "../../medium.txt"), + filename:join(PrivDir, "offset.txt")), + ok = file:make_link(filename:join(PrivDir, "../../large.txt"), + filename:join(PrivDir, "uncomp.offset.txt")), + ok = file:make_link(filename:join(PrivDir, "../../large.txt"), + filename:join(PrivDir, "uncomp.comp.offset.zip")), + ?assertMatch( + {ok, Archive}, + zip(Config, Archive, "-1", + ["uncomp.txt","uncomp.comp.zip","offset.txt", + "uncomp.offset.txt","uncomp.comp.offset.zip"], + [{cwd, PrivDir}])), + + %% Check that list archive works + {ok, [#zip_comment{}, + #zip_file{ name = "uncomp.txt", + info = #file_info{ size = 1 bsl 32 } }, + #zip_file{ name = "uncomp.comp.zip", + comp_size = 1 bsl 32, + info = #file_info{ size = 1 bsl 32 } }, + #zip_file{ name = "offset.txt", + info = #file_info{ size = 4 bsl 20 } }, + #zip_file{ name = "uncomp.offset.txt", + info = #file_info{ size = 1 bsl 32 } }, + #zip_file{ name = "uncomp.comp.offset.zip", + comp_size = 1 bsl 32, + info = #file_info{ size = 1 bsl 32 } } + ]} = + zip:list_dir(Archive), + ok. + +unzip64_central_headers() -> [{timetrap, {minutes, 60}}]. +unzip64_central_headers(Config) -> + + PrivDir = get_value(pdir, Config), + ExtractDir = filename:join(PrivDir, "extract"), + Archive = filename:join(PrivDir, "../../archive.zip"), + Large4GB = filename:join(get_value(priv_dir, Config),"large.txt"), + Medium4MB = filename:join(get_value(priv_dir, Config), "medium.txt"), + + %% Test that extraction of each file works + lists:map( + fun F({Name, Compare}) -> + ok = file:make_dir(ExtractDir), + ?assertMatch( + {ok, [Name]}, + unzip(Config, Archive, [{cwd, ExtractDir},{file_list,[Name]}])), + cmp(Compare, filename:join(ExtractDir,Name)), + file:del_dir_r(ExtractDir); + F(Name) -> + F({Name, Large4GB}) + end, ["uncomp.txt","uncomp.comp.zip",{"offset.txt",Medium4MB}, + "uncomp.offset.txt","uncomp.comp.offset.zip"]), + + ok. + +%% Test that zip64 end of central directory are respected when unzipping. +%% The fields in the header that can be 64-bit are: +%% * total number of files > 2 bytes +%% * size of central directory > 4 bytes (cannot test as it requires an archive with 8 million files) +%% * offset of central directory > 4 bytes (implicitly tested when testing large relative location of header) +%% +%% Fields that we don't test as we don't support multiple disks +%% * number of disk where end of central directory is > 2 bytes +%% * number of disk to find central directory > 2 bytes +%% * number central directory entries on this disk > 2 bytes +zip64_central_directory(Config) -> + + PrivDir = get_value(pdir, Config), + Dir = filename:join(PrivDir, "files"), + ExtractDir = filename:join(PrivDir, "extract"), + + Archive = filename:join(PrivDir, "archive.zip"), + + %% To test when total number of files > 65535, we create an archive with 66000 entries + ok = file:make_dir(Dir), + lists:foreach( + fun(I) -> + ok = file:write_file(filename:join(Dir, integer_to_list(I)++".txt"),<<0:8>>) + end, lists:seq(0, 65600)), + ?assertMatch( + {ok, Archive}, + zip(Config, Archive, "-1 -r", ["files"], [{cwd, PrivDir}])), + + {ok, Files} = zip:list_dir(Archive), + ?assertEqual(65603, length(Files)), + + ok = file:make_dir(ExtractDir), + ?assertMatch( + {ok, ["files/1.txt","files/65599.txt"]}, + unzip(Config, Archive, [{cwd, ExtractDir},{file_list,["files/1.txt", + "files/65599.txt"]}])), + cmp(filename:join(ExtractDir,"files/1.txt"), + filename:join(ExtractDir,"files/65599.txt")), + + ok. + +%% Test basic timestamps, the atime and mtime should be the original +%% mtime of the file +basic_timestamp(Config) -> + PrivDir = get_value(pdir, Config), + Archive = filename:join(PrivDir, "archive.zip"), + ExtractDir = filename:join(PrivDir, "extract"), + Testfile = filename:join(PrivDir, "testfile.txt"), + + ok = file:write_file(Testfile, "abc"), + {ok, OndiskFI = #file_info{ mtime = Mtime }} = + file:read_file_info(Testfile), + + %% Sleep a bit to let the timestamp progress + timer:sleep(1000), + + %% Create an archive without extended timestamps + ?assertMatch( + {ok, Archive}, + zip(Config, Archive, "-X", ["testfile.txt"], [{cwd, PrivDir}, {extra, []}])), + + {ok, [#zip_comment{}, + #zip_file{ info = ZipFI = #file_info{ mtime = ZMtime }} ]} = + zip:list_dir(Archive), + + ct:log("on disk: ~p",[OndiskFI]), + ct:log("in zip : ~p",[ZipFI]), + ct:log("zipinfo:~n~ts",[os:cmd("zipinfo -v "++Archive)]), + + %% Timestamp in archive is when entry was added to archive + %% Need to add 2 to ZMtime as the dos time in zip archives + %% are in precise. + ?assert(calendar:datetime_to_gregorian_seconds(Mtime) =< + calendar:datetime_to_gregorian_seconds(ZMtime) + 1), + + %% Sleep a bit to let the timestamp progress + timer:sleep(1000), + + ok = file:make_dir(ExtractDir), + ?assertMatch( + {ok, ["testfile.txt"]}, + unzip(Config, Archive, [{cwd,ExtractDir}])), + + {ok, UnzipFI = #file_info{ atime = UnZAtime, + mtime = UnZMtime, + ctime = UnZCtime + }} = + file:read_file_info(filename:join(ExtractDir, "testfile.txt"),[raw]), + + + ct:log("extract: ~p",[UnzipFI]), + + UnzipMode = un_z64(get_value(unzip, Config)), + + if UnzipMode =/= unemzip -> + ?assertEqual(ZMtime, UnZMtime), + + %% When using unzip, the atime is sometimes set to ctime for unknown reasons... so we cannot test it + %% ?assertEqual(UnZAtime, UnZMtime), + ?assert(UnZAtime =:= UnZMtime orelse UnZAtime =:= UnZCtime), + + %% On windows the ctime and mtime are the same so + %% we cannot compare them. + [?assert(UnZMtime < UnZCtime) || os:type() =/= {win32,nt}]; + UnzipMode =:= unemzip -> + %% emzip does not support timestamps + ok + end, + + ok. + +%% Test extended timestamps, the atime and ctime in the archive are +%% the atime and ctime when the file is added to the archive. +extended_timestamp(Config) -> + + PrivDir = get_value(pdir, Config), + Archive = filename:join(PrivDir, "archive.zip"), + ExtractDir = filename:join(PrivDir, "extract"), + Testfile = filename:join(PrivDir, "testfile.txt"), + + ok = file:write_file(Testfile, "abc"), + {ok, OndiskFI = #file_info{ mtime = Mtime }} = + file:read_file_info(Testfile), + + %% Sleep a bit to let the timestamp progress + timer:sleep(1000), + + ?assertMatch( + {ok, Archive}, + zip(Config, Archive, "", ["testfile.txt"], [{cwd, PrivDir}])), + + %% list_dir only reads the central directory header and thus only + %% the mtime will be correct here + {ok, [#zip_comment{}, + #zip_file{ info = ZipFI = #file_info{ mtime = ZMtime}} ]} = + zip:list_dir(Archive), + + ct:log("on disk: ~p",[OndiskFI]), + ct:log("in zip : ~p",[ZipFI]), + ct:log("zipinfo:~n~ts",[os:cmd("zipinfo -v "++Archive)]), + + ?assertEqual(Mtime, ZMtime), + + %% Sleep a bit to let the timestamp progress + timer:sleep(1000), + + ok = file:make_dir(ExtractDir), + ?assertMatch( + {ok, ["testfile.txt"]}, + unzip(Config, Archive, [{cwd,ExtractDir}])), + + {ok, UnzipFI = #file_info{ atime = UnZAtime, + mtime = UnZMtime, + ctime = UnZCtime + }} = + file:read_file_info(filename:join(ExtractDir, "testfile.txt"),[raw]), + + ct:log("extract: ~p",[UnzipFI]), + + UnzipMode = un_z64(get_value(unzip, Config)), + + if UnzipMode =/= unemzip -> + ?assertEqual(ZMtime, UnZMtime), + + %% When using unzip, the atime is sometimes set to ctime for unknown reasons... so we cannot test it + %% ?assertEqual(UnZAtime, UnZMtime), + ?assert(UnZAtime =:= UnZMtime orelse UnZAtime =:= UnZCtime), + + %% On windows the ctime and mtime are the same so + %% we cannot compare them. + [?assert(UnZMtime < UnZCtime) || os:type() =/= {win32,nt}]; + UnzipMode =:= unemzip -> + %% emzip does not support timestamps + ok + end, + + ok. + +uid_gid(Config) -> + + PrivDir = get_value(pdir, Config), + ExtractDir = filename:join(PrivDir, "extract"), + Archive = filename:join(PrivDir, "archive.zip"), + Testfile = filename:join(PrivDir, "testfile.txt"), + + ok = file:write_file(Testfile, "abc"), + {ok, OndiskFI = #file_info{ gid = GID, uid = UID }} = + file:read_file_info(Testfile), + + ?assertMatch( + {ok, Archive}, + zip(Config, Archive, "", ["testfile.txt"], [{cwd, PrivDir}])), + + {ok, [#zip_comment{}, + #zip_file{ info = ZipFI = #file_info{ gid = ZGID, uid = ZUID }} ]} = + zip:list_dir(Archive,[{extra, [uid_gid]}]), + + ct:log("on disk: ~p",[OndiskFI]), + ct:log("in zip : ~p",[ZipFI]), + + ?assertEqual(UID, ZUID), + ?assertEqual(GID, ZGID), + + ok = file:make_dir(ExtractDir), + ?assertMatch( + {ok, ["testfile.txt"]}, + unzip(Config, Archive, [{cwd, ExtractDir},{extra,[uid_gid]}])), + + {ok,#file_info{ gid = ExZGID, uid = ExZUID }} = + file:read_file_info(filename:join(ExtractDir,"testfile.txt")), + + case un_z64(get_value(unzip, Config)) =/= unemzip of + true -> + ?assertEqual(UID, ExZUID), + ?assertEqual(GID, ExZGID); + _ -> + %% emzip does not support uid_gid + ok + end, + + ok. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%% Generic zip interface +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +zip(Config, Archive, ZipOpts, Filelist, Opts) when is_list(Config) -> + zip(get_value(zip, Config), + Archive, ZipOpts, Filelist, Opts); +zip(z64_zip, Archive, ZipOpts, Filelist, Opts) -> + zip(zip, Archive, ZipOpts, Filelist, Opts); +zip(zip, Archive, ZipOpts, Filelist, Opts) -> + cmd("cd "++get_value(cwd, Opts)++" && " + "zip "++ZipOpts++" "++Archive++" "++lists:join($ ,Filelist)), + {ok, Archive}; +zip(z64_ezip, Archive, _ZipOpts, Filelist, Opts) -> + zip(ezip, Archive, _ZipOpts, Filelist, Opts); +zip(ezip, Archive, _ZipOpts, Filelist, Opts) -> + ct:log("Creating zip:zip(~p,~n~p,~n~p)",[Archive, Filelist, Opts]), + zip:zip(Archive, Filelist, Opts); +zip(z64_emzip, Archive, _ZipOpts, Filelist, Opts) -> + %% Run in peer node so that memory issues don't crash test node + {ok, Peer, Node} = ?CT_PEER(#{ args => emzip_peer_args() }), + try + erpc:call( + Node, + fun() -> + ?MODULE:zip(emzip, Archive, _ZipOpts, Filelist, Opts) + end) + after + catch peer:stop(Peer) + end; +zip(emzip, Archive, _ZipOpts, Filelist, Opts) -> + ct:log("Creating emzip ~ts",[Archive]), + Cwd = get_value(cwd, Opts), + + + %% For this not to use a huge amount of memory we re-use + %% the binary for files that are the same size as those are the same file. + %% This cuts memory usage from ~16GB to ~4GB. + + {Files,_Cache} = + lists:mapfoldl( + fun F(Fn, Cache) -> + AbsFn = filename:join(Cwd, Fn), + {ok, Fi} = file:read_file_info(AbsFn), + CacheKey = {Fi#file_info.type, Fi#file_info.size}, + {SubDirFiles, NewCache} = + if Fi#file_info.type == directory -> + {ok, Files} = file:list_dir(AbsFn), + lists:mapfoldl(F, Cache#{ CacheKey => <<>> }, + [filename:join(Fn, DirFn) || DirFn <- Files]); + Fi#file_info.type == regular -> + {[], + case maps:find(CacheKey, Cache) of + {ok, _} -> Cache; + error -> + {ok, Data} = read_file( + file:open(AbsFn, [read, raw, binary]), + Fi#file_info.size), + Cache#{ CacheKey => Data } + end} + end, + {[{Fn, maps:get(CacheKey, NewCache), Fi}|SubDirFiles], NewCache} + end, #{}, Filelist), + zip:zip(Archive, lists:flatten(Files), proplists:delete(cwd,Opts)). + +%% Special read_file that works on windows on > 4 GB files +read_file({ok, D}, Size) -> + Bin = iolist_to_binary(read_file(D, Size)), + erlang:garbage_collect(), %% Do a GC to get rid of all intermediate binaries + {ok, Bin}; +read_file({error, _} = E, _Size) -> + E; +read_file(eof = E, _Size) -> + E; +read_file(D, 0) -> + file:close(D), + []; +read_file(D, Size) -> + {ok, B} = file:read(D, min(1 bsl 30, Size)), + [B | read_file(D, Size - byte_size(B))]. + +unzip(Config, Archive, Opts) when is_list(Config) -> + unzip(get_value(unzip, Config), Archive, Opts); +unzip(z64_unzip, Archive, Opts) -> + unzip(unzip, Archive, Opts); +unzip(unzip, Archive, Opts) -> + UidGid = [" -X " || lists:member(uid_gid, get_value(extra, Opts, []))], + Files = lists:join($ , get_value(file_list, Opts, [])), + Res = cmd("cd "++get_value(cwd, Opts)++" && " + "unzip "++UidGid++" "++Archive++" "++Files), + {ok, lists:sort( + lists:flatmap( + fun(Ln) -> + case re:run(Ln, ~B'\s+[a-z]+: ([^\s]+)', [{capture,all_but_first,list},unicode]) of + nomatch -> []; + {match,Match} -> Match + end + end,string:split(Res,"\n",all)))}; +unzip(z64_unezip, Archive, Opts) -> + unzip(unezip, Archive, Opts); +unzip(unezip, Archive, Opts) -> + Cwd = get_value(cwd, Opts) ++ "/", + {ok, Files} = zip:unzip(Archive, Opts), + {ok, lists:sort([F -- Cwd || F <- Files])}; +unzip(z64_unemzip, Archive, Opts) -> + %% Run in peer node so that memory issues don't crash test node + {ok, Peer, Node} = ?CT_PEER(#{ args => emzip_peer_args() }), + try + erpc:call( + Node, + fun() -> + unzip(unemzip, Archive, Opts) + end) + after + catch peer:stop(Peer) + end; +unzip(unemzip, Archive, Opts) -> + Cwd = get_value(cwd, Opts) ++ "/", + + {ok, Files} = zip:unzip(Archive, [memory | Opts]), + {ok, lists:sort( + [begin + case lists:last(F) of + $/ -> + filelib:ensure_path(F); + _ -> + filelib:ensure_dir(F), + file:write_file(F, B) + end, + F -- Cwd + end || {F, B} <- Files])}. + +emzip_peer_args() -> + 8 = erlang:system_info(wordsize),%% Supercarrier only supported on 64-bit + ["+MMscs",integer_to_list(?EMZIP64_MEM_USAGE div (1024 * 1024))]. + +cmp(Source, Target) -> + {ok, SrcInfo} = file:read_file_info(Source), + {ok, TgtInfo} = file:read_file_info(Target), + ?assertEqual(SrcInfo#file_info.size, TgtInfo#file_info.size), + ?assertEqual(SrcInfo#file_info.mode, TgtInfo#file_info.mode), + + {ok, Src} = file:open(Source, [read, binary]), + {ok, Tgt} = file:open(Target, [read, binary]), + + cmp(Src, Tgt, 0), + + file:close(Src), + file:close(Tgt). + +%% Check if first 100 MB are the same +cmp(Src, Tgt, Pos) when Pos < 100 bsl 20 -> + erlang:garbage_collect(), + case {file:read(Src, 20 bsl 20), file:read(Tgt, 20 bsl 20)} of + {{ok, Data}, {ok, Data}} -> + cmp(Src, Tgt, Pos + 20 bsl 20); + {E, E} -> + ok + end; +cmp(_Src, _Tgt, _) -> + ok. + +cmd(Cmd) -> + Res = os:cmd(Cmd), + ct:log("Cmd: ~ts~nRes: ~ts~n",[Cmd, Res]), + Res. + +disc_free(Path) -> + Data = disksup:get_disk_data(), + + %% What partitions could Data be mounted on? + Partitions = + [D || {P, _Tot, _Perc}=D <- Data, + lists:prefix(filename:nativename(P), filename:nativename(Path))], + + %% Sorting in descending order places the partition with the most specific + %% path first. + case lists:sort(fun erlang:'>='/2, Partitions) of + [{_,Tot, Perc} | _] -> round(Tot * (1-(Perc/100))); + [] -> error + end. + +memsize() -> + case proplists:get_value(available_memory, memsup:get_system_memory_data()) of + undefined -> + {Tot,_Used,_} = memsup:get_memory_data(), + Tot; + Available -> + Available + end.