diff --git a/lib/hex/http.ex b/lib/hex/http.ex index 5f9a6b01..7e8cd89a 100644 --- a/lib/hex/http.ex +++ b/lib/hex/http.ex @@ -28,6 +28,66 @@ defmodule Hex.HTTP do @impl :mix_hex_http def request(method, url, headers, body, adapter_config) when is_map(adapter_config) do + {method, url, request, http_opts, timeout, profile} = + prepare_request(method, url, headers, body, adapter_config) + + Hex.Shell.debug("Hex.HTTP.request(#{inspect(method)}, #{inspect(url)})") + + result = + retry(method, request, http_opts, @request_retries, profile, fn request, http_opts -> + redirect(request, http_opts, @request_redirects, fn request, http_opts -> + timeout(request, http_opts, timeout, fn request, http_opts -> + :httpc.request(method, request, http_opts, [body_format: :binary], profile) + |> handle_response(method, url) + end) + end) + end) + + # Convert to hex_core expected format + case result do + {:ok, status, headers, body} -> + # Convert headers to map with binary keys/values for hex_core + headers = Map.new(headers, fn {k, v} -> {to_string(k), to_string(v)} end) + {:ok, {status, headers, body}} + + {:error, reason} -> + {:error, reason} + end + end + + @impl :mix_hex_http + def request_to_file(method, url, headers, body, filename, adapter_config) + when is_map(adapter_config) do + {method, url, request, http_opts, timeout, profile} = + prepare_request(method, url, headers, body, adapter_config) + + Hex.Shell.debug("Hex.HTTP.request_to_file(#{inspect(method)}, #{inspect(url)})") + + filename_charlist = String.to_charlist(filename) + + result = + retry(method, request, http_opts, @request_retries, profile, fn request, http_opts -> + redirect(request, http_opts, @request_redirects, fn request, http_opts -> + timeout(request, http_opts, timeout, fn request, http_opts -> + :httpc.request(method, request, http_opts, [{:stream, filename_charlist}], profile) + |> handle_response_to_file(method, url) + end) + end) + end) + + # Convert to hex_core expected format + case result do + {:ok, status, headers} -> + # Convert headers to map with binary keys/values for hex_core + headers = Map.new(headers, fn {k, v} -> {to_string(k), to_string(v)} end) + {:ok, {status, headers}} + + {:error, reason} -> + {:error, reason} + end + end + + defp prepare_request(method, url, headers, body, adapter_config) do # Convert method to atom if it's not already method = if is_binary(method), do: String.to_atom(method), else: method # Convert URL to string if it's binary @@ -36,8 +96,6 @@ defmodule Hex.HTTP do # Convert headers from map to our format headers = if is_map(headers), do: headers, else: Map.new(headers) - Hex.Shell.debug("Hex.HTTP.request(#{inspect(method)}, #{inspect(url)})") - headers = add_basic_auth_via_netrc(headers, url) timeout = @@ -61,30 +119,30 @@ defmodule Hex.HTTP do end http_opts = build_http_opts(url, timeout) - opts = [body_format: :binary] request = build_request(url, headers, body) profile = Hex.State.fetch!(:httpc_profile) - result = - retry(method, request, http_opts, @request_retries, profile, fn request, http_opts -> - redirect(request, http_opts, @request_redirects, fn request, http_opts -> - timeout(request, http_opts, timeout, fn request, http_opts -> - :httpc.request(method, request, http_opts, opts, profile) - |> handle_response(method, url) - end) - end) - end) + {method, url, request, http_opts, timeout, profile} + end - # Convert to hex_core expected format - case result do - {:ok, status, headers, body} -> - # Convert headers to map with binary keys/values for hex_core - headers = Map.new(headers, fn {k, v} -> {to_string(k), to_string(v)} end) - {:ok, {status, headers, body}} + defp handle_response_to_file({:ok, :saved_to_file}, method, url) do + Hex.Shell.debug("Hex.HTTP.request_to_file(#{inspect(method)}, #{inspect(url)}) => 200") + {:ok, 200, %{}} + end - {:error, reason} -> - {:error, reason} - end + defp handle_response_to_file({:ok, {{_version, code, _reason}, headers, _body}}, method, url) do + Hex.Shell.debug("Hex.HTTP.request_to_file(#{inspect(method)}, #{inspect(url)}) => #{code}") + headers = Map.new(headers, &decode_header/1) + handle_hex_message(headers["x-hex-message"]) + {:ok, code, headers} + end + + defp handle_response_to_file({:error, term}, method, url) do + Hex.Shell.debug( + "Hex.HTTP.request_to_file(#{inspect(method)}, #{inspect(url)}) => #{inspect(term, limit: :infinity, pretty: true)}" + ) + + {:error, term} end defp fallback(:inet), do: :inet6 @@ -162,12 +220,7 @@ defmodule Hex.HTTP do {:ok, code, headers, body} -> case handle_redirect(code, headers) do {:ok, location} when times > 0 -> - ssl_opts = Hex.HTTP.SSL.ssl_opts(to_string(location)) - http_opts = Keyword.put(http_opts, :ssl, ssl_opts) - - request - |> update_request(location) - |> redirect(http_opts, times - 0, fun) + do_redirect(request, http_opts, location, times, fun) {:ok, _location} -> Mix.raise("Too many redirects") @@ -176,11 +229,32 @@ defmodule Hex.HTTP do {:ok, code, headers, body} end + {:ok, code, headers} -> + case handle_redirect(code, headers) do + {:ok, location} when times > 0 -> + do_redirect(request, http_opts, location, times, fun) + + {:ok, _location} -> + Mix.raise("Too many redirects") + + :error -> + {:ok, code, headers} + end + {:error, reason} -> {:error, reason} end end + defp do_redirect(request, http_opts, location, times, fun) do + ssl_opts = Hex.HTTP.SSL.ssl_opts(to_string(location)) + http_opts = Keyword.put(http_opts, :ssl, ssl_opts) + + request + |> update_request(location) + |> redirect(http_opts, times - 1, fun) + end + defp handle_redirect(code, headers) when code in [301, 302, 303, 307, 308] do if location = headers["location"] do diff --git a/lib/mix/tasks/hex.docs.ex b/lib/mix/tasks/hex.docs.ex index 08918629..555eff13 100644 --- a/lib/mix/tasks/hex.docs.ex +++ b/lib/mix/tasks/hex.docs.ex @@ -357,8 +357,7 @@ defmodule Mix.Tasks.Hex.Docs do defp extract_docs(target, target_dir) do File.mkdir_p!(target_dir) - fd = File.open!(target, [:read, :compressed]) - :ok = :mix_hex_erl_tar.extract({:file, fd}, [:compressed, cwd: Path.dirname(target)]) + :ok = :mix_hex_erl_tar.extract(target, [:compressed, cwd: Path.dirname(target)]) Hex.Shell.info("Docs fetched: #{target_dir}") end diff --git a/scripts/vendor_hex_core.sh b/scripts/vendor_hex_core.sh index 46039d08..af129430 100755 --- a/scripts/vendor_hex_core.sh +++ b/scripts/vendor_hex_core.sh @@ -27,7 +27,6 @@ filenames="hex_api_auth.erl \ hex_core.erl \ hex_erl_tar.erl \ hex_erl_tar.hrl \ - hex_filename.erl \ hex_http.erl \ hex_http_httpc.erl \ hex_licenses.erl \ diff --git a/src/mix_hex_api.erl b/src/mix_hex_api.erl index 95b164f4..d311fd4b 100644 --- a/src/mix_hex_api.erl +++ b/src/mix_hex_api.erl @@ -1,4 +1,4 @@ -%% Vendored from hex_core v0.12.2 (c4db9f6), do not edit manually +%% Vendored from hex_core v0.15.0 (90f9f59), do not edit manually %% @doc %% Hex HTTP API diff --git a/src/mix_hex_api_auth.erl b/src/mix_hex_api_auth.erl index fb080f83..80712229 100644 --- a/src/mix_hex_api_auth.erl +++ b/src/mix_hex_api_auth.erl @@ -1,4 +1,4 @@ -%% Vendored from hex_core v0.12.2 (c4db9f6), do not edit manually +%% Vendored from hex_core v0.15.0 (90f9f59), do not edit manually %% @doc %% Hex HTTP API - Authentication. diff --git a/src/mix_hex_api_key.erl b/src/mix_hex_api_key.erl index 9d255bbc..3e31239a 100644 --- a/src/mix_hex_api_key.erl +++ b/src/mix_hex_api_key.erl @@ -1,4 +1,4 @@ -%% Vendored from hex_core v0.12.2 (c4db9f6), do not edit manually +%% Vendored from hex_core v0.15.0 (90f9f59), do not edit manually %% @doc %% Hex HTTP API - Keys. diff --git a/src/mix_hex_api_oauth.erl b/src/mix_hex_api_oauth.erl index f42475f5..21949b58 100644 --- a/src/mix_hex_api_oauth.erl +++ b/src/mix_hex_api_oauth.erl @@ -1,4 +1,4 @@ -%% Vendored from hex_core v0.12.2 (c4db9f6), do not edit manually +%% Vendored from hex_core v0.15.0 (90f9f59), do not edit manually %% @doc %% Hex HTTP API - OAuth. diff --git a/src/mix_hex_api_organization.erl b/src/mix_hex_api_organization.erl index 331c73e9..07433fc9 100644 --- a/src/mix_hex_api_organization.erl +++ b/src/mix_hex_api_organization.erl @@ -1,4 +1,4 @@ -%% Vendored from hex_core v0.12.2 (c4db9f6), do not edit manually +%% Vendored from hex_core v0.15.0 (90f9f59), do not edit manually %% @doc %% Hex HTTP API - Organizations. diff --git a/src/mix_hex_api_organization_member.erl b/src/mix_hex_api_organization_member.erl index 85cfef8a..35c5b17f 100644 --- a/src/mix_hex_api_organization_member.erl +++ b/src/mix_hex_api_organization_member.erl @@ -1,4 +1,4 @@ -%% Vendored from hex_core v0.12.2 (c4db9f6), do not edit manually +%% Vendored from hex_core v0.15.0 (90f9f59), do not edit manually %% @doc %% Hex HTTP API - Organization Members. diff --git a/src/mix_hex_api_package.erl b/src/mix_hex_api_package.erl index 83b3fe47..0351e0ff 100644 --- a/src/mix_hex_api_package.erl +++ b/src/mix_hex_api_package.erl @@ -1,4 +1,4 @@ -%% Vendored from hex_core v0.12.2 (c4db9f6), do not edit manually +%% Vendored from hex_core v0.15.0 (90f9f59), do not edit manually %% @doc %% Hex HTTP API - Packages. diff --git a/src/mix_hex_api_package_owner.erl b/src/mix_hex_api_package_owner.erl index 4d7e9888..eb52dafb 100644 --- a/src/mix_hex_api_package_owner.erl +++ b/src/mix_hex_api_package_owner.erl @@ -1,4 +1,4 @@ -%% Vendored from hex_core v0.12.2 (c4db9f6), do not edit manually +%% Vendored from hex_core v0.15.0 (90f9f59), do not edit manually %% @doc %% Hex HTTP API - Package Owners. diff --git a/src/mix_hex_api_release.erl b/src/mix_hex_api_release.erl index f5f8fbf7..effbd617 100644 --- a/src/mix_hex_api_release.erl +++ b/src/mix_hex_api_release.erl @@ -1,4 +1,4 @@ -%% Vendored from hex_core v0.12.2 (c4db9f6), do not edit manually +%% Vendored from hex_core v0.15.0 (90f9f59), do not edit manually %% @doc %% Hex HTTP API - Releases. diff --git a/src/mix_hex_api_short_url.erl b/src/mix_hex_api_short_url.erl index fbc93150..6aab9672 100644 --- a/src/mix_hex_api_short_url.erl +++ b/src/mix_hex_api_short_url.erl @@ -1,4 +1,4 @@ -%% Vendored from hex_core v0.12.2 (c4db9f6), do not edit manually +%% Vendored from hex_core v0.15.0 (90f9f59), do not edit manually %% @doc %% Hex HTTP API - Short URLs. diff --git a/src/mix_hex_api_user.erl b/src/mix_hex_api_user.erl index 70f89e0e..dc5d3526 100644 --- a/src/mix_hex_api_user.erl +++ b/src/mix_hex_api_user.erl @@ -1,4 +1,4 @@ -%% Vendored from hex_core v0.12.2 (c4db9f6), do not edit manually +%% Vendored from hex_core v0.15.0 (90f9f59), do not edit manually %% @doc %% Hex HTTP API - Users. diff --git a/src/mix_hex_core.erl b/src/mix_hex_core.erl index 7c1e2f61..18923c1c 100644 --- a/src/mix_hex_core.erl +++ b/src/mix_hex_core.erl @@ -1,4 +1,4 @@ -%% Vendored from hex_core v0.12.2 (c4db9f6), do not edit manually +%% Vendored from hex_core v0.15.0 (90f9f59), do not edit manually %% @doc %% `hex_core' entrypoint module. diff --git a/src/mix_hex_core.hrl b/src/mix_hex_core.hrl index 3cdfb818..8dbff979 100644 --- a/src/mix_hex_core.hrl +++ b/src/mix_hex_core.hrl @@ -1,3 +1,3 @@ -%% Vendored from hex_core v0.12.2 (c4db9f6), do not edit manually +%% Vendored from hex_core v0.15.0 (90f9f59), do not edit manually --define(HEX_CORE_VERSION, "0.12.2"). +-define(HEX_CORE_VERSION, "0.15.0"). diff --git a/src/mix_hex_erl_tar.erl b/src/mix_hex_erl_tar.erl index 27b8a408..e3db3e8c 100644 --- a/src/mix_hex_erl_tar.erl +++ b/src/mix_hex_erl_tar.erl @@ -1,22 +1,24 @@ -%% Vendored from hex_core v0.12.2 (c4db9f6), do not edit manually - -%% @private -%% Copied from https://github.com/erlang/otp/blob/OTP-20.0.1/lib/stdlib/src/erl_tar.erl -%% with modifications: -%% - Change module name to `mix_hex_erl_tar` -%% - Set tar mtimes to 0 and remove dependency on :os.system_time/1 -%% - Preserve modes when building tarball -%% - Do not crash if failing to write tar -%% - Allow setting file_info opts on :mix_hex_erl_tar.add -%% - Add safe_relative_path_links/2 to check directory traversal vulnerability when extracting files, -%% it differs from OTP's current fix (2020-02-04) in that it checks regular files instead of -%% symlink targets. This allows creating symlinks with relative path targets such as `../tmp/log` -%% - Remove ram_file usage (backported from OTP master) - +%% Vendored from hex_core v0.15.0 (90f9f59), do not edit manually + +%% This file is a copy of erl_tar.erl from OTP with the following modifications: +%% 1. Module renamed from erl_tar to mix_hex_erl_tar +%% 2. -include changed from erl_tar.hrl to mix_hex_erl_tar.hrl +%% 3. -doc and -moduledoc attributes removed for OTP 24 compatibility +%% 4. safe_link_name/2 fixed to validate symlink targets relative to symlink's +%% parent directory instead of in isolation +%% 5. When extracting to disk (cwd option), stream file entries in chunks +%% instead of loading them fully into memory +%% 6. Default chunk_size to 65536 in add_opts instead of 0 with special case +%% 7. Use compressed instead of compressed_one for file:open for OTP 24 compat +%% 8. Added {max_size, N} extraction option for zip bomb protection +%% +%% OTP commit: 013041bd68c2547848e88963739edea7f0a1a90f %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 1997-2017. All Rights Reserved. +%% SPDX-License-Identifier: Apache-2.0 +%% +%% Copyright Ericsson AB 1997-2025. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. @@ -44,18 +46,20 @@ %% http://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html -module(mix_hex_erl_tar). + -export([init/3, create/2, create/3, extract/1, extract/2, table/1, table/2, t/1, tt/1, open/2, close/1, - add/3, add/4, add/5, + add/3, add/4, format_error/1]). -include_lib("kernel/include/file.hrl"). --include_lib("mix_hex_erl_tar.hrl"). +-include("mix_hex_erl_tar.hrl"). %% Converts the short error reason to a descriptive string. + -spec format_error(term()) -> string(). format_error(invalid_tar_checksum) -> "Checksum failed"; @@ -86,6 +90,8 @@ format_error({invalid_gnu_0_1_sparsemap, Format}) -> lists:flatten(io_lib:format("Invalid GNU sparse map (version ~s)", [Format])); format_error(unsafe_path) -> "The path points above the current working directory"; +format_error(too_big) -> + "Extraction size exceeds the configured max_size limit"; format_error({Name,Reason}) -> lists:flatten(io_lib:format("~ts: ~ts", [Name,format_error(Reason)])); format_error(Atom) when is_atom(Atom) -> @@ -94,17 +100,21 @@ format_error(Term) -> lists:flatten(io_lib:format("~tp", [Term])). %% Initializes a new reader given a custom file handle and I/O wrappers --spec init(handle(), write | read, file_op()) -> {ok, reader()} | {error, badarg}. -init(Handle, AccessMode, Fun) when is_function(Fun, 2) -> - Reader = #reader{handle=Handle,access=AccessMode,func=Fun}, + + +-spec init(UserData :: user_data(), write | read, file_op()) -> + {ok, tar_descriptor()} | {error, badarg}. +init(UserData, AccessMode, Fun) when is_function(Fun, 2) -> + Reader = #reader{handle=UserData,access=AccessMode,func=Fun}, {ok, Pos, Reader2} = do_position(Reader, {cur, 0}), {ok, Reader2#reader{pos=Pos}}; -init(_Handle, _AccessMode, _Fun) -> +init(_UserData, _AccessMode, _Fun) -> {error, badarg}. %%%================================================================ %% Extracts all files from the tar file Name. --spec extract(open_handle()) -> ok | {error, term()}. + +-spec extract(Open :: open_type()) -> ok | {error, term()}. extract(Name) -> extract(Name, []). @@ -117,10 +127,11 @@ extract(Name) -> %% - {files, ListOfFilesToExtract}: Only extract ListOfFilesToExtract %% - verbose: Prints verbose information about the extraction, %% - {cwd, AbsoluteDir}: Sets the current working directory for the extraction --spec extract(open_handle(), [extract_opt()]) -> - ok - | {ok, [{string(), binary()}]} - | {error, term()}. + +-spec extract(Open :: open_type(), []) -> + ok | {error, term()}; + (Open :: open_type(), [extract_opt(), ...]) -> + ok | {ok, [{string(), binary()}]} | {error, term()}. extract({binary, Bin}, Opts) when is_list(Opts) -> do_extract({binary, Bin}, Opts); extract({file, Fd}, Opts) when is_list(Opts) -> @@ -132,9 +143,80 @@ extract(Name, Opts) when is_list(Name); is_binary(Name), is_list(Opts) -> do_extract(Handle, Opts) when is_list(Opts) -> Opts2 = extract_opts(Opts), - Acc = if Opts2#read_opts.output =:= memory -> []; true -> ok end, - foldl_read(Handle, fun extract1/4, Acc, Opts2). + case maybe_inflate_with_limit(Handle, Opts2) of + {error, _} = Err -> + Err; + {ok, Handle2, Opts3} -> + Acc0 = if Opts3#read_opts.output =:= memory -> []; true -> ok end, + Acc = case Opts3#read_opts.max_size of + infinity -> Acc0; + _ -> {size_tracked, 0, Acc0} + end, + foldl_read(Handle2, fun extract1/4, Acc, Opts3) + end. +maybe_inflate_with_limit({binary, Bin}, #read_opts{max_size=MaxSize}=Opts) + when is_integer(MaxSize), is_binary(Bin) -> + case lists:member(compressed, Opts#read_opts.open_mode) of + true -> + case inflate_with_limit(Bin, MaxSize) of + {ok, Inflated} -> + OpenMode = Opts#read_opts.open_mode -- [compressed], + {ok, {binary, Inflated}, Opts#read_opts{open_mode=OpenMode}}; + {error, too_big} -> + {error, too_big} + end; + false -> + {ok, {binary, Bin}, Opts} + end; +maybe_inflate_with_limit(Handle, Opts) -> + {ok, Handle, Opts}. + +inflate_with_limit(Bin, MaxSize) -> + Z = zlib:open(), + try + zlib:inflateInit(Z, 31, cut), + inflate_with_limit_loop(Z, Bin, MaxSize, 0, []) + catch + _:_ -> {ok, Bin} + after + zlib:close(Z) + end. + +inflate_with_limit_loop(Z, Bin, MaxSize, Total, Acc) -> + case zlib:safeInflate(Z, Bin) of + {finished, Chunks} -> + Size = iolist_size(Chunks), + NewTotal = Total + Size, + if NewTotal > MaxSize -> {error, too_big}; + true -> {ok, iolist_to_binary(lists:reverse(Acc, Chunks))} + end; + {continue, Chunks} -> + Size = iolist_size(Chunks), + NewTotal = Total + Size, + if NewTotal > MaxSize -> {error, too_big}; + true -> inflate_with_limit_loop(Z, <<>>, MaxSize, NewTotal, [Chunks|Acc]) + end + end. + +extract1(eof, Reader, _, {size_tracked, _, Acc}) when is_list(Acc) -> + {ok, {ok, lists:reverse(Acc)}, Reader}; +extract1(eof, Reader, _, {size_tracked, _, leading_slash}) -> + error_logger:info_msg("erl_tar: removed leading '/' from member names\n"), + {ok, ok, Reader}; +extract1(eof, Reader, _, {size_tracked, _, Acc}) -> + {ok, Acc, Reader}; +extract1(#tar_header{size=Size}=Header, Reader0, Opts, + {size_tracked, Total, InnerAcc}) -> + NewTotal = Total + Size, + case NewTotal > Opts#read_opts.max_size of + true -> throw({error, too_big}); + false -> ok + end, + case extract1(Header, Reader0, Opts, InnerAcc) of + {ok, NewInnerAcc, Reader1} -> + {ok, {size_tracked, NewTotal, NewInnerAcc}, Reader1} + end; extract1(eof, Reader, _, Acc) when is_list(Acc) -> {ok, {ok, lists:reverse(Acc)}, Reader}; extract1(eof, Reader, _, leading_slash) -> @@ -145,12 +227,18 @@ extract1(eof, Reader, _, Acc) -> extract1(#tar_header{name=Name,size=Size}=Header, Reader0, Opts, Acc0) -> case check_extract(Name, Opts) of true -> - case do_read(Reader0, Size) of - {ok, Bin, Reader1} -> - Acc = extract2(Header, Bin, Opts, Acc0), - {ok, Acc, Reader1}; - {error, _} = Err -> - throw(Err) + case Opts#read_opts.output of + memory -> + case do_read(Reader0, Size) of + {ok, Bin, Reader1} -> + Acc = extract2(Header, Bin, Opts, Acc0), + {ok, Acc, Reader1}; + {error, _} = Err -> + throw(Err) + end; + file -> + Reader1 = extract_to_file(Header, Reader0, Opts), + {ok, Acc0, Reader1} end; false -> {ok, Acc0, skip_file(Reader0)} @@ -171,6 +259,79 @@ extract2(Header, Bin, Opts, Acc) -> throw(Err) end. +extract_to_file(#tar_header{name=Name0}=Header, Reader0, Opts) -> + case typeflag(Header#tar_header.typeflag) of + regular -> + Name1 = make_safe_path(Name0, Opts), + case stream_to_file(Name1, Reader0, Opts) of + {ok, Reader1} -> + read_verbose(Opts, "x ~ts~n", [Name0]), + _ = set_extracted_file_info(Name1, Header), + Reader1; + {error, _} = Err -> + throw(Err) + end; + _ -> + Reader1 = skip_file(Reader0), + _ = write_extracted_element(Header, <<>>, Opts), + Reader1 + end. + +stream_to_file(Name, Reader0, Opts) -> + Write = + case Opts#read_opts.keep_old_files of + true -> + case file:read_file_info(Name) of + {ok, _} -> false; + _ -> true + end; + false -> true + end, + case Write of + true -> + ChunkSize = Opts#read_opts.chunk_size, + case open_output_file(Name) of + {ok, Fd} -> + try + stream_to_file_loop(Fd, Reader0, ChunkSize) + after + file:close(Fd) + end; + {error, _} = Err -> + Err + end; + false -> + {ok, skip_file(Reader0)} + end. + +open_output_file(Name) -> + case file:open(Name, [write, raw, binary]) of + {ok, _} = Ok -> + Ok; + {error, enoent} -> + ok = make_dirs(Name, file), + file:open(Name, [write, raw, binary]); + {error, _} = Err -> + Err + end. + +stream_to_file_loop(_Fd, #reg_file_reader{num_bytes=0}=Reader, _ChunkSize) -> + {ok, Reader}; +stream_to_file_loop(_Fd, #sparse_file_reader{num_bytes=0}=Reader, _ChunkSize) -> + {ok, Reader}; +stream_to_file_loop(Fd, Reader, ChunkSize) -> + case do_read(Reader, ChunkSize) of + {ok, Bin, Reader1} -> + case file:write(Fd, Bin) of + ok -> + stream_to_file_loop(Fd, Reader1, ChunkSize); + {error, _} = Err -> + Err + end; + {error, _} = Err -> + Err + end. + %% Checks if the file Name should be extracted. check_extract(_, #read_opts{files=all}) -> true; @@ -180,7 +341,6 @@ check_extract(Name, #read_opts{files=Files}) -> %%%================================================================ %% The following table functions produce a list of information about %% the files contained in the archive. --type filename() :: string(). -type typeflag() :: regular | link | symlink | char | block | directory | fifo | reserved | unknown. @@ -188,23 +348,25 @@ check_extract(Name, #read_opts{files=Files}) -> -type uid() :: non_neg_integer(). -type gid() :: non_neg_integer(). --type tar_entry() :: {filename(), - typeflag(), - non_neg_integer(), - tar_time(), - mode(), - uid(), - gid()}. +-type tar_entry() :: {Name :: name_in_archive(), + Type :: typeflag(), + Size :: non_neg_integer(), + MTime :: tar_time(), + Mode :: mode(), + Uid :: uid(), + Gid :: gid()}. %% Returns a list of names of the files in the tar file Name. --spec table(open_handle()) -> {ok, [string()]} | {error, term()}. + +-spec table(Open :: open_type()) -> {ok, [name_in_archive()]} | {error, term()}. table(Name) -> table(Name, []). %% Returns a list of names of the files in the tar file Name. %% Options accepted: compressed, verbose, cooked. --spec table(open_handle(), [compressed | verbose | cooked]) -> - {ok, [tar_entry()]} | {error, term()}. + +-spec table(Open :: open_type(), [compressed | verbose | cooked]) -> + {ok, [name_in_archive() | tar_entry()]} | {error, term()}. table(Name, Opts) when is_list(Opts) -> foldl_read(Name, fun table1/4, [], table_opts(Opts)). @@ -244,6 +406,7 @@ typeflag(_) -> unknown. %% meant to be invoked from the shell. %% Prints each filename in the archive + -spec t(file:filename()) -> ok | {error, term()}. t(Name) when is_list(Name); is_binary(Name) -> case table(Name) of @@ -254,7 +417,8 @@ t(Name) when is_list(Name); is_binary(Name) -> end. %% Prints verbose information about each file in the archive --spec tt(open_handle()) -> ok | {error, term()}. + +-spec tt(open_type()) -> ok | {error, term()}. tt(Name) -> case table(Name, [verbose]) of {ok, List} -> @@ -316,11 +480,12 @@ month(12) -> "Dec". %%%================================================================ %% The open function with friends is to keep the file and binary api of this module --type open_handle() :: file:filename() +-type open_type() :: file:filename_all() | {binary, binary()} - | {file, term()}. --spec open(open_handle(), [write | compressed | cooked]) -> - {ok, reader()} | {error, term()}. + | {file, file:io_device()}. + +-spec open(Open :: open_type(), [write | compressed | cooked]) -> + {ok, tar_descriptor()} | {error, term()}. open({binary, Bin}, Mode) when is_binary(Bin) -> do_open({binary, Bin}, Mode); open({file, Fd}, Mode) -> @@ -336,32 +501,47 @@ do_open(Name, Mode) when is_list(Mode) -> {error, {Name, Reason}} end. -open1({binary,Bin0}, read, _Raw, Opts) when is_binary(Bin0) -> +open1({binary,Bin0}=Handle, read, _Raw, Opts) when is_binary(Bin0) -> Bin = case lists:member(compressed, Opts) of true -> + %% emulate file:open with Modes = [compressed_one ...] + Z = zlib:open(), + zlib:inflateInit(Z, 31, cut), try - zlib:gunzip(Bin0) + IoList = zlib:inflate(Z, Bin0), + zlib:inflateEnd(Z), + iolist_to_binary(IoList) catch - _:_ -> Bin0 + _:_ -> Bin0 + after + zlib:close(Z) end; false -> Bin0 end, + case file:open(Bin, [ram,binary,read]) of {ok,File} -> {ok, #reader{handle=File,access=read,func=fun file_op/2}}; - Error -> - Error + {error, Reason} -> + {error, {Handle, Reason}} end; -open1({file, Fd}, read, _Raw, _Opts) -> - Reader = #reader{handle=Fd,access=read,func=fun file_op/2}, - case do_position(Reader, {cur, 0}) of - {ok, Pos, Reader2} -> - {ok, Reader2#reader{pos=Pos}}; - {error, _} = Err -> - Err +open1({file, Fd}=Handle, read, [raw], Opts) -> + case not lists:member(compressed, Opts) of + true -> + Reader = #reader{handle=Fd,access=read,func=fun file_op/2}, + case do_position(Reader, {cur, 0}) of + {ok, Pos, Reader2} -> + {ok, Reader2#reader{pos=Pos}}; + {error, Reason} -> + {error, {Handle, Reason}} + end; + false -> + {error, {Handle, {incompatible_option, compressed}}} end; -open1(Name, Access, Raw, Opts) when is_list(Name) or is_binary(Name) -> +open1({file, _Fd}=Handle, read, [], _Opts) -> + {error, {Handle, {incompatible_option, cooked}}}; +open1(Name, Access, Raw, Opts) when is_list(Name); is_binary(Name) -> case file:open(Name, Raw ++ [binary, Access|Opts]) of {ok, File} -> {ok, #reader{handle=File,access=Access,func=fun file_op/2}}; @@ -381,7 +561,7 @@ open_mode([read|Rest], false, Raw, Opts) -> open_mode([write|Rest], false, Raw, Opts) -> open_mode(Rest, write, Raw, Opts); open_mode([compressed|Rest], Access, Raw, Opts) -> - open_mode(Rest, Access, Raw, [compressed|Opts]); + open_mode(Rest, Access, Raw, [compressed,read_ahead|Opts]); open_mode([cooked|Rest], Access, _Raw, Opts) -> open_mode(Rest, Access, [], Opts); open_mode([], Access, Raw, Opts) -> @@ -399,7 +579,8 @@ file_op(close, Fd) -> file:close(Fd). %% Closes a tar archive. --spec close(reader()) -> ok | {error, term()}. + +-spec close(TarDescriptor :: tar_descriptor()) -> ok | {error, term()}. close(#reader{access=read}=Reader) -> ok = do_close(Reader); close(#reader{access=write}=Reader) -> @@ -420,16 +601,18 @@ pad_file(#reader{pos=Pos}=Reader) -> %% Creation/modification of tar archives %% Creates a tar file Name containing the given files. --spec create(file:filename(), filelist()) -> ok | {error, {string(), term()}}. + +-spec create(file:filename_all(), filelist()) -> ok | {error, {string(), term()}}. create(Name, FileList) when is_list(Name); is_binary(Name) -> create(Name, FileList, []). %% Creates a tar archive Name containing the given files. %% Accepted options: verbose, compressed, cooked --spec create(file:filename(), filelist(), [create_opt()]) -> + +-spec create(file:filename_all(), filelist(), [create_opt()]) -> ok | {error, term()} | {error, {string(), term()}}. create(Name, FileList, Options) when is_list(Name); is_binary(Name) -> - Mode = lists:filter(fun(X) -> (X=:=compressed) or (X=:=cooked) + Mode = lists:filter(fun(X) -> X =:= compressed orelse X =:= cooked end, Options), case open(Name, [write|Mode]) of {ok, TarFile} -> @@ -458,49 +641,44 @@ do_create(TarFile, [Name|Rest], Opts) -> end. %% Adds a file to a tape archive. --type add_type() :: string() - | {string(), string()} - | {string(), binary()}. --spec add(reader(), add_type(), [add_opt()]) -> ok | {error, term()}. + +-spec add(TarDescriptor, Name, Options) -> ok | {error, term()} when + TarDescriptor :: tar_descriptor(), + Name :: name_in_archive() | {name_in_archive(), file:filename_all()}, + Options :: [add_opt()]. add(Reader, {NameInArchive, Name}, Opts) when is_list(NameInArchive), is_list(Name) -> - do_add(Reader, Name, NameInArchive, undefined, Opts); + do_add(Reader, Name, NameInArchive, Opts); add(Reader, {NameInArchive, Bin}, Opts) when is_list(NameInArchive), is_binary(Bin) -> - do_add(Reader, Bin, NameInArchive, undefined, Opts); -add(Reader, {NameInArchive, Bin, Mode}, Opts) - when is_list(NameInArchive), is_binary(Bin), is_integer(Mode) -> - do_add(Reader, Bin, NameInArchive, Mode, Opts); + do_add(Reader, Bin, NameInArchive, Opts); add(Reader, Name, Opts) when is_list(Name) -> - do_add(Reader, Name, Name, undefined, Opts). + do_add(Reader, Name, Name, Opts). --spec add(reader(), string() | binary(), string(), [add_opt()]) -> - ok | {error, term()}. +-spec add(TarDescriptor, Filename, NameInArchive, Options) -> + ok | {error, term()} when + TarDescriptor :: tar_descriptor(), + Filename :: file:filename_all(), + NameInArchive :: name_in_archive(), + Options :: [add_opt()]. add(Reader, NameOrBin, NameInArchive, Options) when is_list(NameOrBin); is_binary(NameOrBin), is_list(NameInArchive), is_list(Options) -> - do_add(Reader, NameOrBin, NameInArchive, undefined, Options). - --spec add(reader(), string() | binary(), string(), integer(), [add_opt()]) -> - ok | {error, term()}. -add(Reader, NameOrBin, NameInArchive, Mode, Options) - when is_list(NameOrBin); is_binary(NameOrBin), - is_list(NameInArchive), is_integer(Mode), is_list(Options) -> - do_add(Reader, NameOrBin, NameInArchive, Mode, Options). + do_add(Reader, NameOrBin, NameInArchive, Options). -do_add(#reader{access=write}=Reader, Name, NameInArchive, Mode, Options) +do_add(#reader{access=write}=Reader, Name, NameInArchive, Options) when is_list(NameInArchive), is_list(Options) -> - RF = fun(F) -> apply_file_info_opts(Options, file:read_link_info(F, [{time, posix}])) end, + RF = apply_file_info_opts_fun(Options, read_link_info), Opts = #add_opts{read_info=RF}, - add1(Reader, Name, NameInArchive, Mode, add_opts(Options, Options, Opts)); -do_add(#reader{access=read},_,_,_,_) -> + add1(Reader, Name, NameInArchive, add_opts(Options, Options, Opts)); +do_add(#reader{access=read},_,_,_) -> {error, eacces}; -do_add(Reader,_,_,_,_) -> +do_add(Reader,_,_,_) -> {error, {badarg, Reader}}. add_opts([dereference|T], AllOptions, Opts) -> - RF = fun(F) -> apply_file_info_opts(AllOptions, file:read_file_info(F, [{time, posix}])) end, + RF = apply_file_info_opts_fun(AllOptions, read_file_info), add_opts(T, AllOptions, Opts#add_opts{read_info=RF}); add_opts([verbose|T], AllOptions, Opts) -> add_opts(T, AllOptions, Opts#add_opts{verbose=true}); @@ -512,6 +690,8 @@ add_opts([{mtime,Value}|T], AllOptions, Opts) -> add_opts(T, AllOptions, Opts#add_opts{mtime=Value}); add_opts([{ctime,Value}|T], AllOptions, Opts) -> add_opts(T, AllOptions, Opts#add_opts{ctime=Value}); +add_opts([{mode,Value}|T], AllOptions, Opts) -> + add_opts(T, AllOptions, Opts#add_opts{mode=Value}); add_opts([{uid,Value}|T], AllOptions, Opts) -> add_opts(T, AllOptions, Opts#add_opts{uid=Value}); add_opts([{gid,Value}|T], AllOptions, Opts) -> @@ -532,6 +712,8 @@ do_apply_file_info_opts([{mtime,Value}|T], FileInfo) -> do_apply_file_info_opts(T, FileInfo#file_info{mtime=Value}); do_apply_file_info_opts([{ctime,Value}|T], FileInfo) -> do_apply_file_info_opts(T, FileInfo#file_info{ctime=Value}); +do_apply_file_info_opts([{mode,Value}|T], FileInfo) -> + do_apply_file_info_opts(T, FileInfo#file_info{mode=Value}); do_apply_file_info_opts([{uid,Value}|T], FileInfo) -> do_apply_file_info_opts(T, FileInfo#file_info{uid=Value}); do_apply_file_info_opts([{gid,Value}|T], FileInfo) -> @@ -541,7 +723,12 @@ do_apply_file_info_opts([_|T], FileInfo) -> do_apply_file_info_opts([], FileInfo) -> FileInfo. -add1(#reader{}=Reader, Name, NameInArchive, undefined, #add_opts{read_info=ReadInfo}=Opts) +apply_file_info_opts_fun(Options, InfoFunction) -> + fun(F) -> + apply_file_info_opts(Options, file:InfoFunction(F, [{time, posix}])) + end. + +add1(#reader{}=Reader, Name, NameInArchive, #add_opts{read_info=ReadInfo}=Opts) when is_list(Name) -> Res = case ReadInfo(Name) of {error, Reason0} -> @@ -572,9 +759,9 @@ add1(#reader{}=Reader, Name, NameInArchive, undefined, #add_opts{read_info=ReadI {ok, _Reader} -> ok; {error, _Reason} = Err -> Err end; -add1(Reader, Bin, NameInArchive, Mode, Opts) when is_binary(Bin) -> +add1(Reader, Bin, NameInArchive, Opts) when is_binary(Bin) -> add_verbose(Opts, "a ~ts~n", [NameInArchive]), - Now = 0, + Now = os:system_time(seconds), Header = #tar_header{ name = NameInArchive, size = byte_size(Bin), @@ -584,7 +771,7 @@ add1(Reader, Bin, NameInArchive, Mode, Opts) when is_binary(Bin) -> ctime = add_opts_time(Opts#add_opts.ctime, Now), uid = Opts#add_opts.uid, gid = Opts#add_opts.gid, - mode = default_mode(Mode, 8#100644)}, + mode = Opts#add_opts.mode}, {ok, Reader2} = add_header(Reader, Header, Opts), Padding = skip_padding(byte_size(Bin)), Data = [Bin, <<0:Padding/unit:8>>], @@ -593,12 +780,9 @@ add1(Reader, Bin, NameInArchive, Mode, Opts) when is_binary(Bin) -> {error, Reason} -> {error, {NameInArchive, Reason}} end. -add_opts_time(undefined, _Now) -> 0; +add_opts_time(undefined, Now) -> Now; add_opts_time(Time, _Now) -> Time. -default_mode(undefined, Mode) -> Mode; -default_mode(Mode, _) -> Mode. - add_directory(Reader, DirName, NameInArchive, Info, Opts) -> case file:list_dir(DirName) of {ok, []} -> @@ -822,7 +1006,7 @@ split_ustar_path(Path) -> false; true -> PathBin = binary:list_to_bin(Path), - case binary:split(PathBin, [<<$/>>], [global, trim_all]) of + case filename:split(PathBin) of [Part] when byte_size(Part) >= ?V7_NAME_LEN -> false; Parts -> @@ -1029,11 +1213,14 @@ do_get_format({error, _} = Err, _Bin) -> do_get_format(#header_v7{}=V7, Bin) when is_binary(Bin), byte_size(Bin) =:= ?BLOCK_SIZE -> Checksum = parse_octal(V7#header_v7.checksum), - Chk1 = compute_checksum(Bin), - Chk2 = compute_signed_checksum(Bin), - if Checksum =/= Chk1 andalso Checksum =/= Chk2 -> + IsBadChecksum = case compute_checksum(Bin) of + Checksum -> false; + _ -> compute_signed_checksum(Bin) =/= Checksum + end, + case IsBadChecksum of + true -> ?FORMAT_UNKNOWN; - true -> + false -> %% guess magic Ustar = to_ustar(V7, Bin), Star = to_star(V7, Bin), @@ -1172,8 +1359,8 @@ validate_sparse_entries([#sparse_entry{}=Entry|Rest], RealSize, I, LastOffset) - validate_sparse_entries(Rest, RealSize, I+1, Offset+NumBytes). --spec parse_sparse_map(header_gnu(), reader_type()) -> - {[sparse_entry()], reader_type()}. +-spec parse_sparse_map(header_gnu(), descriptor_type()) -> + {[sparse_entry()], descriptor_type()}. parse_sparse_map(#header_gnu{sparse=Sparse}, Reader) when Sparse#sparse_array.is_extended -> parse_sparse_map(Sparse, Reader, []); @@ -1213,6 +1400,9 @@ compute_signed_checksum(< checksum(Bin, 0). + +checksum(<>, Sum) -> + checksum(Rest, Sum+A+B+C+D); checksum(<>, Sum) -> checksum(Rest, Sum+A); checksum(<<>>, Sum) -> Sum. @@ -1261,39 +1451,40 @@ parse_numeric(<> = Bin) -> parse_octal(Bin) end. -parse_octal(Bin) when is_binary(Bin) -> +parse_octal(<>) -> %% skip leading/trailing zero bytes and spaces - do_parse_octal(Bin, <<>>). -do_parse_octal(<<>>, <<>>) -> - 0; -do_parse_octal(<<>>, Acc) -> - case io_lib:fread("~8u", binary:bin_to_list(Acc)) of - {error, _} -> throw({error, invalid_tar_checksum}); - {ok, [Octal], []} -> Octal; - {ok, _, _} -> throw({error, invalid_tar_checksum}) - end; -do_parse_octal(<<$\s,Rest/binary>>, Acc) -> + do_parse_octal(Bin, 0). + +do_parse_octal(<<$\s, Rest/binary>>, Acc) -> do_parse_octal(Rest, Acc); do_parse_octal(<<0, Rest/binary>>, Acc) -> do_parse_octal(Rest, Acc); do_parse_octal(<>, Acc) -> - do_parse_octal(Rest, <>). + Digit = C - $0, + case Digit band 7 of + Digit -> + do_parse_octal(Rest, Acc bsl 3 bor Digit); + _ -> + throw({error, invalid_tar_checksum}) + end; +do_parse_octal(<<>>, Acc) -> + Acc. parse_string(Bin) when is_binary(Bin) -> - do_parse_string(Bin, <<>>). -do_parse_string(<<>>, Acc) -> - case unicode:characters_to_list(Acc) of + N = strlen(Bin, 0), + <> = Bin, + case unicode:characters_to_list(Prefix) of Str when is_list(Str) -> Str; {incomplete, _Str, _Rest} -> - binary:bin_to_list(Acc); + binary_to_list(Bin); {error, _Str, _Rest} -> throw({error, {bad_header, invalid_string}}) - end; -do_parse_string(<<0, _/binary>>, Acc) -> - do_parse_string(<<>>, Acc); -do_parse_string(<>, Acc) -> - do_parse_string(Rest, <>). + end. + +strlen(<<>>, N) -> N; +strlen(<<0, _/binary>>, N) -> N; +strlen(<<_, Rest/binary>>, N) -> strlen(Rest, N + 1). convert_header(Bin, #reader{pos=Pos}=Reader) when byte_size(Bin) =:= ?BLOCK_SIZE, (Pos rem ?BLOCK_SIZE) =:= 0 -> @@ -1318,10 +1509,12 @@ convert_header(_Bin, _Reader) -> %% If the file is a directory, a slash is appended to the name. fileinfo_to_header(Name, #file_info{}=Fi, Link) when is_list(Name) -> BaseHeader = #tar_header{name=Name, - mtime=0, - atime=0, - ctime=0, + mtime=Fi#file_info.mtime, + atime=Fi#file_info.atime, + ctime=Fi#file_info.ctime, mode=Fi#file_info.mode, + uid=Fi#file_info.uid, + gid=Fi#file_info.gid, typeflag=?TYPE_REGULAR}, do_fileinfo_to_header(BaseHeader, Fi, Link). @@ -1539,9 +1732,9 @@ do_parse_pax(Reader, Bin, Headers) -> parse_pax_record(Bin) when is_binary(Bin) -> case binary:split(Bin, [<<$\n>>]) of [Record, Residual] -> - case [X || X <- binary:split(Record, [<<$\s>>], [global]), X =/= <<>>] of + case binary:split(Record, [<<$\s>>], [trim_all]) of [_Len, Record1] -> - case [X || X <- binary:split(Record1, [<<$=>>], [global]), X =/= <<>>] of + case binary:split(Record1, [<<$=>>], [trim_all]) of [AttrName, AttrValue] -> {AttrName, AttrValue, Residual}; _Other -> @@ -1641,7 +1834,8 @@ write_extracted_element(#tar_header{name=Name0}=Header, Bin, Opts) -> create_extracted_dir(Name1, Opts); symlink -> read_verbose(Opts, "x ~ts~n", [Name0]), - create_symlink(Name1, Header#tar_header.linkname, Opts); + LinkName = safe_link_name(Header, Opts), + create_symlink(Name1, LinkName, Opts); Device when Device =:= char orelse Device =:= block -> %% char/block devices will be created as empty files %% and then have their major/minor device set later @@ -1661,51 +1855,20 @@ write_extracted_element(#tar_header{name=Name0}=Header, Bin, Opts) -> make_safe_path([$/|Path], Opts) -> make_safe_path(Path, Opts); -make_safe_path(Path, #read_opts{cwd=Cwd}) -> - case safe_relative_path_links(Path, Cwd) of - unsafe -> - throw({error,{Path,unsafe_path}}); - SafePath -> - filename:absname(SafePath, Cwd) +make_safe_path(Path0, #read_opts{cwd=Cwd}) -> + case filelib:safe_relative_path(Path0, Cwd) of + unsafe -> throw({error,{Path0,unsafe_path}}); + Path -> filename:absname(Path, Cwd) end. -safe_relative_path_links(Path, Cwd) -> - case filename:pathtype(Path) of - relative -> safe_relative_path_links(filename:split(Path), Cwd, [], ""); - _ -> unsafe +safe_link_name(#tar_header{name=Name,linkname=Path0},#read_opts{cwd=Cwd} ) -> + ParentDir = filename:dirname(Name), + ResolvedTarget = filename:join(ParentDir, Path0), + case filelib:safe_relative_path(ResolvedTarget, Cwd) of + unsafe -> throw({error,{Path0,unsafe_symlink}}); + _Path -> Path0 end. -safe_relative_path_links([], _Cwd, _PrevLinks, Acc) -> - Acc; - -safe_relative_path_links([Segment | Segments], Cwd, PrevLinks, Acc) -> - AccSegment = join(Acc, Segment), - - case mix_hex_filename:safe_relative_path(AccSegment) of - unsafe -> - unsafe; - - SafeAccSegment -> - case file:read_link(join(Cwd, SafeAccSegment)) of - {ok, LinkPath} -> - case lists:member(LinkPath, PrevLinks) of - true -> - unsafe; - false -> - case safe_relative_path_links(filename:split(LinkPath), Cwd, [LinkPath | PrevLinks], Acc) of - unsafe -> unsafe; - NewAcc -> safe_relative_path_links(Segments, Cwd, [], NewAcc) - end - end; - - {error, _} -> - safe_relative_path_links(Segments, Cwd, PrevLinks, SafeAccSegment) - end - end. - -join([], Path) -> Path; -join(Left, Right) -> filename:join(Left, Right). - create_regular(Name, NameInArchive, Bin, Opts) -> case write_extracted_file(Name, Bin, Opts) of not_written -> @@ -1823,9 +1986,6 @@ do_write(#reader{handle=Handle,func=Fun}=Reader0, Data) Err end. -do_copy(#reader{func=Fun}=Reader, Source, #add_opts{chunk_size=0}=Opts) - when is_function(Fun, 2) -> - do_copy(Reader, Source, Opts#add_opts{chunk_size=65536}); do_copy(#reader{func=Fun}=Reader, Source, #add_opts{chunk_size=ChunkSize}) when is_function(Fun, 2) -> case file:open(Source, [read, binary]) of @@ -1966,7 +2126,7 @@ read_sparse_hole(#sparse_file_reader{pos=Pos}=Reader, Offset, Len) -> num_bytes=NumBytes, pos=Pos+N2}}. --spec do_close(reader()) -> ok | {error, term()}. +-spec do_close(tar_descriptor()) -> ok | {error, term()}. do_close(#reader{handle=Handle,func=Fun}) when is_function(Fun,2) -> Fun(close,Handle). @@ -1999,6 +2159,10 @@ extract_opts([cooked|Rest], Opts=#read_opts{open_mode=OpenMode}) -> extract_opts(Rest, Opts#read_opts{open_mode=[cooked|OpenMode]}); extract_opts([verbose|Rest], Opts) -> extract_opts(Rest, Opts#read_opts{verbose=true}); +extract_opts([{chunks,N}|Rest], Opts) -> + extract_opts(Rest, Opts#read_opts{chunk_size=N}); +extract_opts([{max_size,N}|Rest], Opts) -> + extract_opts(Rest, Opts#read_opts{max_size=N}); extract_opts([Other|Rest], Opts) -> extract_opts(Rest, read_opts([Other], Opts)); extract_opts([], Opts) -> diff --git a/src/mix_hex_erl_tar.hrl b/src/mix_hex_erl_tar.hrl index 468656be..f25d9340 100644 --- a/src/mix_hex_erl_tar.hrl +++ b/src/mix_hex_erl_tar.hrl @@ -1,11 +1,19 @@ -%% Vendored from hex_core v0.12.2 (c4db9f6), do not edit manually - -% Copied from https://github.com/erlang/otp/blob/OTP-20.0.1/lib/stdlib/src/erl_tar.hrl - +%% Vendored from hex_core v0.15.0 (90f9f59), do not edit manually + +%% This file is a copy of erl_tar.hrl from OTP with the following modifications: +%% 1. Added chunk_size field to #read_opts{} for streaming extraction to disk +%% 2. Added {chunks, pos_integer()} to extract_opt() type +%% 3. Default chunk_size to 65536 in #add_opts{} instead of 0 +%% 4. Added max_size field to #read_opts{} for zip bomb protection +%% 5. Added {max_size, pos_integer() | infinity} to extract_opt() type +%% +%% OTP commit: 013041bd68c2547848e88963739edea7f0a1a90f %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 2017. All Rights Reserved. +%% SPDX-License-Identifier: Apache-2.0 +%% +%% Copyright Ericsson AB 1997-2025. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. @@ -23,14 +31,15 @@ %% Options used when adding files to a tar archive. -record(add_opts, { - read_info, %% Fun to use for read file/link info. - chunk_size = 0 :: integer(), %% For file reading when sending to sftp. 0=do not chunk - verbose = false :: boolean(), %% Verbose on/off. - atime = undefined :: undefined | integer(), - mtime = undefined :: undefined | integer(), - ctime = undefined :: undefined | integer(), - uid = 0 :: integer(), - gid = 0 :: integer()}). + read_info, %% Fun to use for read file/link info. + chunk_size = 65536, %% Chunk size for reading files. + verbose = false, %% Verbose on/off. + atime = undefined, + mtime = undefined, + ctime = undefined, + mode = 8#100644, + uid = 0, + gid = 0}). -type add_opts() :: #add_opts{}. %% Options used when reading a tar archive. @@ -40,21 +49,27 @@ files = all, %% Set of files to extract (or all) output = file :: 'file' | 'memory', open_mode = [], %% Open mode options. - verbose = false :: boolean()}). %% Verbose on/off. + verbose = false :: boolean(), %% Verbose on/off. + chunk_size = 65536, %% Chunk size for streaming to disk. + max_size = infinity :: pos_integer() | 'infinity'}). -type read_opts() :: #read_opts{}. --type add_opt() :: dereference - | verbose - | {chunks, pos_integer()} - | {atime, integer()} - | {mtime, integer()} - | {ctime, integer()} - | {uid, integer()} - | {gid, integer()}. +-type add_opt() :: dereference | + verbose | + {chunks, pos_integer()} | + {atime, non_neg_integer()} | + {mtime, non_neg_integer()} | + {ctime, non_neg_integer()} | + {mode, non_neg_integer()} | + {uid, non_neg_integer()} | + {gid, non_neg_integer()}. +-type name_in_archive() :: string(). -type extract_opt() :: {cwd, string()} | - {files, [string()]} | + {files, [name_in_archive()]} | + {chunks, pos_integer()} | + {max_size, pos_integer() | infinity} | compressed | cooked | memory | @@ -67,21 +82,20 @@ verbose. -type filelist() :: [file:filename() | - {string(), binary()} | - {string(), file:filename()}]. + {name_in_archive(), file:filename_all()}]. -type tar_time() :: non_neg_integer(). %% The tar header, once fully parsed. -record(tar_header, { - name = "" :: string(), %% name of header file entry + name = "" :: name_in_archive(), %% name of header file entry mode = 8#100644 :: non_neg_integer(), %% permission and mode bits uid = 0 :: non_neg_integer(), %% user id of owner gid = 0 :: non_neg_integer(), %% group id of owner size = 0 :: non_neg_integer(), %% length in bytes mtime :: tar_time(), %% modified time typeflag :: char(), %% type of header entry - linkname = "" :: string(), %% target name of link + linkname = "" :: name_in_archive(), %% target name of link uname = "" :: string(), %% user name of owner gname = "" :: string(), %% group name of owner devmajor = 0 :: non_neg_integer(), %% major number of character or block device @@ -162,16 +176,18 @@ %% The overall tar reader, it holds the low-level file handle, %% its access, position, and the I/O primitives wrapper. -record(reader, { - handle :: file:io_device() | term(), + handle :: user_data(), access :: read | write | ram, pos = 0 :: non_neg_integer(), func :: file_op() }). --type reader() :: #reader{}. +-opaque tar_descriptor() :: #reader{}. +-export_type([tar_descriptor/0]). + %% A reader for a regular file within the tar archive, %% It tracks its current state relative to that file. -record(reg_file_reader, { - handle :: reader(), + handle :: tar_descriptor(), num_bytes = 0, pos = 0, size = 0 @@ -180,7 +196,7 @@ %% A reader for a sparse file within the tar archive, %% It tracks its current state relative to that file. -record(sparse_file_reader, { - handle :: reader(), + handle :: tar_descriptor(), num_bytes = 0, %% bytes remaining pos = 0, %% pos size = 0, %% total size of file @@ -189,13 +205,13 @@ -type sparse_file_reader() :: #sparse_file_reader{}. %% Types for the readers --type reader_type() :: reader() | reg_file_reader() | sparse_file_reader(). --type handle() :: file:io_device() | term(). +-type descriptor_type() :: tar_descriptor() | reg_file_reader() | sparse_file_reader(). +-type user_data() :: term(). %% Type for the I/O primitive wrapper function -type file_op() :: fun((write | close | read2 | position, - {handle(), iodata()} | handle() | {handle(), non_neg_integer()} - | {handle(), non_neg_integer()}) -> + {user_data(), iodata()} | user_data() | {user_data(), non_neg_integer()} + | {user_data(), non_neg_integer()}) -> ok | eof | {ok, string() | binary()} | {ok, non_neg_integer()} | {error, term()}). diff --git a/src/mix_hex_filename.erl b/src/mix_hex_filename.erl deleted file mode 100644 index e087409e..00000000 --- a/src/mix_hex_filename.erl +++ /dev/null @@ -1,60 +0,0 @@ -%% Vendored from hex_core v0.12.2 (c4db9f6), do not edit manually - -% @private -% Excerpt from https://github.com/erlang/otp/blob/OTP-20.0.1/lib/stdlib/src/filename.erl#L761-L788 -% with modifications for changing local function calls to remote function calls -% to the `filename` module, for the functions `pathtype/1`, `split/1`, and `join/1` -% -% safe_relative_path/1 was not present in earlier OTP releases. - -%% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 1997-2017. All Rights Reserved. -%% -%% Licensed under the Apache License, Version 2.0 (the "License"); -%% you may not use this file except in compliance with the License. -%% You may obtain a copy of the License at -%% -%% http://www.apache.org/licenses/LICENSE-2.0 -%% -%% Unless required by applicable law or agreed to in writing, software -%% distributed under the License is distributed on an "AS IS" BASIS, -%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -%% See the License for the specific language governing permissions and -%% limitations under the License. -%% -%% %CopyrightEnd% -%% - --module(mix_hex_filename). --export([safe_relative_path/1]). - -safe_relative_path(Path) -> - case filename:pathtype(Path) of - relative -> - Cs0 = filename:split(Path), - safe_relative_path_1(Cs0, []); - _ -> - unsafe - end. - -safe_relative_path_1(["." | T], Acc) -> - safe_relative_path_1(T, Acc); -safe_relative_path_1([<<".">> | T], Acc) -> - safe_relative_path_1(T, Acc); -safe_relative_path_1([".." | T], Acc) -> - climb(T, Acc); -safe_relative_path_1([<<"..">> | T], Acc) -> - climb(T, Acc); -safe_relative_path_1([H | T], Acc) -> - safe_relative_path_1(T, [H | Acc]); -safe_relative_path_1([], []) -> - []; -safe_relative_path_1([], Acc) -> - filename:join(lists:reverse(Acc)). - -climb(_, []) -> - unsafe; -climb(T, [_ | Acc]) -> - safe_relative_path_1(T, Acc). diff --git a/src/mix_hex_http.erl b/src/mix_hex_http.erl index 18c5a41b..0ef191f9 100644 --- a/src/mix_hex_http.erl +++ b/src/mix_hex_http.erl @@ -1,9 +1,9 @@ -%% Vendored from hex_core v0.12.2 (c4db9f6), do not edit manually +%% Vendored from hex_core v0.15.0 (90f9f59), do not edit manually %% @doc %% HTTP contract. -module(mix_hex_http). --export([request/5]). +-export([request/5, request_to_file/6]). -ifdef(TEST). -export([user_agent/1]). -endif. @@ -23,26 +23,31 @@ {ok, {status(), headers(), binary()}} | {error, term()}. +-callback request_to_file( + method(), URI :: binary(), headers(), body(), file:name_all(), adapter_config() +) -> + {ok, {status(), headers()}} | {error, term()}. + -spec request(mix_hex_core:config(), method(), URI :: binary(), headers(), body()) -> {ok, {status(), headers(), binary()}} | {error, term()}. request(Config, Method, URI, Headers, Body) when is_binary(URI) and is_map(Headers) -> - {Adapter, AdapterConfig} = - case maps:get(http_adapter, Config, {mix_hex_http_httpc, #{}}) of - {Adapter0, AdapterConfig0} -> - {Adapter0, AdapterConfig0}; - %% TODO: remove in v0.9 - Adapter0 when is_atom(Adapter0) -> - AdapterConfig0 = maps:get(http_adapter_config, Config, #{}), - io:format( - "[mix_hex_http] setting #{http_adapter => Module, http_adapter_config => Map} " - "is deprecated in favour of #{http_adapter => {Module, Map}}~n" - ), - {Adapter0, AdapterConfig0} - end, + {Adapter, AdapterConfig} = adapter(Config), UserAgentFragment = maps:get(http_user_agent_fragment, Config), Headers2 = put_new(<<"user-agent">>, user_agent(UserAgentFragment), Headers), Adapter:request(Method, URI, Headers2, Body, AdapterConfig). +-spec request_to_file( + mix_hex_core:config(), method(), URI :: binary(), headers(), body(), file:name_all() +) -> + {ok, {status(), headers()}} | {error, term()}. +request_to_file(Config, Method, URI, Headers, Body, Filename) when + is_binary(URI) and is_map(Headers) +-> + {Adapter, AdapterConfig} = adapter(Config), + UserAgentFragment = maps:get(http_user_agent_fragment, Config), + Headers2 = put_new(<<"user-agent">>, user_agent(UserAgentFragment), Headers), + Adapter:request_to_file(Method, URI, Headers2, Body, Filename, AdapterConfig). + %% @private user_agent(UserAgentFragment) -> OTPRelease = erlang:system_info(otp_release), @@ -54,6 +59,21 @@ user_agent(UserAgentFragment) -> %% Internal functions %%==================================================================== +%% @private +adapter(Config) -> + case maps:get(http_adapter, Config, {mix_hex_http_httpc, #{}}) of + {Adapter, AdapterConfig} -> + {Adapter, AdapterConfig}; + %% TODO: remove in v0.9 + Adapter when is_atom(Adapter) -> + AdapterConfig = maps:get(http_adapter_config, Config, #{}), + io:format( + "[mix_hex_http] setting #{http_adapter => Module, http_adapter_config => Map} " + "is deprecated in favour of #{http_adapter => {Module, Map}}~n" + ), + {Adapter, AdapterConfig} + end. + %% @private put_new(Key, Value, Map) -> case maps:find(Key, Map) of diff --git a/src/mix_hex_http_httpc.erl b/src/mix_hex_http_httpc.erl index 766dc82a..62ac35de 100644 --- a/src/mix_hex_http_httpc.erl +++ b/src/mix_hex_http_httpc.erl @@ -1,4 +1,4 @@ -%% Vendored from hex_core v0.12.2 (c4db9f6), do not edit manually +%% Vendored from hex_core v0.15.0 (90f9f59), do not edit manually %% @doc %% httpc-based implementation of {@link mix_hex_http} contract. @@ -14,7 +14,7 @@ -module(mix_hex_http_httpc). -behaviour(mix_hex_http). --export([request/5]). +-export([request/5, request_to_file/6]). %%==================================================================== %% API functions @@ -22,45 +22,7 @@ request(Method, URI, ReqHeaders, Body, AdapterConfig) when is_binary(URI) -> Profile = maps:get(profile, AdapterConfig, default), - HTTPOptions0 = maps:get(http_options, AdapterConfig, []), - - HTTPS = - case URI of - <<"https", _/binary>> -> true; - _ -> false - end, - SSLOpts0 = proplists:get_value(ssl, HTTPOptions0), - - HTTPOptions = - if - HTTPS == true andalso SSLOpts0 == undefined -> - %% Add safe defaults if possible. - try - [ - {ssl, [ - {verify, verify_peer}, - {cacerts, public_key:cacerts_get()}, - {depth, 3}, - {customize_hostname_check, [ - {match_fun, public_key:pkix_verify_hostname_match_fun(https)} - ]} - ]} - | HTTPOptions0 - ] - catch - _:_ -> - io:format( - "[mix_hex_http_httpc] using default ssl options which are insecure.~n" - "Configure your adapter with: " - "{mix_hex_http_httpc, #{http_options => [{ssl, SslOpts}]}}~n" - "or upgrade Erlang/OTP to OTP-25 or later.~n" - ), - HTTPOptions0 - end; - true -> - HTTPOptions0 - end, - + HTTPOptions = http_options(URI, AdapterConfig), Request = build_request(URI, ReqHeaders, Body), case httpc:request(Method, Request, HTTPOptions, [{body_format, binary}], Profile) of {ok, {{_, StatusCode, _}, RespHeaders, RespBody}} -> @@ -70,10 +32,71 @@ request(Method, URI, ReqHeaders, Body, AdapterConfig) when is_binary(URI) -> {error, Reason} end. +request_to_file(Method, URI, ReqHeaders, Body, Filename, AdapterConfig) when is_binary(URI) -> + Profile = maps:get(profile, AdapterConfig, default), + HTTPOptions = http_options(URI, AdapterConfig), + Request = build_request(URI, ReqHeaders, Body), + case + httpc:request( + Method, + Request, + HTTPOptions, + [{stream, unicode:characters_to_list(Filename)}], + Profile + ) + of + {ok, saved_to_file} -> + {ok, {200, #{}}}; + {ok, {{_, StatusCode, _}, RespHeaders, _RespBody}} -> + RespHeaders2 = load_headers(RespHeaders), + {ok, {StatusCode, RespHeaders2}}; + {error, Reason} -> + {error, Reason} + end. + %%==================================================================== %% Internal functions %%==================================================================== +%% @private +http_options(URI, AdapterConfig) -> + HTTPOptions0 = maps:get(http_options, AdapterConfig, []), + + HTTPS = + case URI of + <<"https", _/binary>> -> true; + _ -> false + end, + SSLOpts0 = proplists:get_value(ssl, HTTPOptions0), + + if + HTTPS == true andalso SSLOpts0 == undefined -> + try + [ + {ssl, [ + {verify, verify_peer}, + {cacerts, public_key:cacerts_get()}, + {depth, 3}, + {customize_hostname_check, [ + {match_fun, public_key:pkix_verify_hostname_match_fun(https)} + ]} + ]} + | HTTPOptions0 + ] + catch + _:_ -> + io:format( + "[mix_hex_http_httpc] using default ssl options which are insecure.~n" + "Configure your adapter with: " + "{mix_hex_http_httpc, #{http_options => [{ssl, SslOpts}]}}~n" + "or upgrade Erlang/OTP to OTP-25 or later.~n" + ), + HTTPOptions0 + end; + true -> + HTTPOptions0 + end. + %% @private build_request(URI, ReqHeaders, Body) -> build_request2(binary_to_list(URI), dump_headers(ReqHeaders), Body). diff --git a/src/mix_hex_licenses.erl b/src/mix_hex_licenses.erl index 86602c06..192f9091 100644 --- a/src/mix_hex_licenses.erl +++ b/src/mix_hex_licenses.erl @@ -1,4 +1,4 @@ -%% Vendored from hex_core v0.12.2 (c4db9f6), do not edit manually +%% Vendored from hex_core v0.15.0 (90f9f59), do not edit manually %% @doc %% Hex Licenses. diff --git a/src/mix_hex_pb_names.erl b/src/mix_hex_pb_names.erl index c1701f0a..75aa3055 100644 --- a/src/mix_hex_pb_names.erl +++ b/src/mix_hex_pb_names.erl @@ -1,4 +1,4 @@ -%% Vendored from hex_core v0.12.2 (c4db9f6), do not edit manually +%% Vendored from hex_core v0.15.0 (90f9f59), do not edit manually %% -*- coding: utf-8 -*- %% % this file is @generated diff --git a/src/mix_hex_pb_package.erl b/src/mix_hex_pb_package.erl index 60270770..bb8e0574 100644 --- a/src/mix_hex_pb_package.erl +++ b/src/mix_hex_pb_package.erl @@ -1,4 +1,4 @@ -%% Vendored from hex_core v0.12.2 (c4db9f6), do not edit manually +%% Vendored from hex_core v0.15.0 (90f9f59), do not edit manually %% -*- coding: utf-8 -*- %% % this file is @generated diff --git a/src/mix_hex_pb_signed.erl b/src/mix_hex_pb_signed.erl index 1898890d..0a1ecefa 100644 --- a/src/mix_hex_pb_signed.erl +++ b/src/mix_hex_pb_signed.erl @@ -1,4 +1,4 @@ -%% Vendored from hex_core v0.12.2 (c4db9f6), do not edit manually +%% Vendored from hex_core v0.15.0 (90f9f59), do not edit manually %% -*- coding: utf-8 -*- %% % this file is @generated diff --git a/src/mix_hex_pb_versions.erl b/src/mix_hex_pb_versions.erl index 3ea51dc4..e9649979 100644 --- a/src/mix_hex_pb_versions.erl +++ b/src/mix_hex_pb_versions.erl @@ -1,4 +1,4 @@ -%% Vendored from hex_core v0.12.2 (c4db9f6), do not edit manually +%% Vendored from hex_core v0.15.0 (90f9f59), do not edit manually %% -*- coding: utf-8 -*- %% % this file is @generated diff --git a/src/mix_hex_registry.erl b/src/mix_hex_registry.erl index e6b01233..b361e9ab 100644 --- a/src/mix_hex_registry.erl +++ b/src/mix_hex_registry.erl @@ -1,4 +1,4 @@ -%% Vendored from hex_core v0.12.2 (c4db9f6), do not edit manually +%% Vendored from hex_core v0.15.0 (90f9f59), do not edit manually %% @doc %% Functions for encoding and decoding Hex registries. diff --git a/src/mix_hex_repo.erl b/src/mix_hex_repo.erl index 9ab6703c..aadf5374 100644 --- a/src/mix_hex_repo.erl +++ b/src/mix_hex_repo.erl @@ -1,4 +1,4 @@ -%% Vendored from hex_core v0.12.2 (c4db9f6), do not edit manually +%% Vendored from hex_core v0.15.0 (90f9f59), do not edit manually %% @doc %% Repo API. @@ -8,7 +8,9 @@ get_versions/1, get_package/2, get_tarball/3, + get_tarball_to_file/4, get_docs/3, + get_docs_to_file/4, get_public_key/1, get_hex_installs/1 ]). @@ -93,7 +95,7 @@ get_package(Config, Name) when is_binary(Name) and is_map(Config) -> %% %% ``` %% > {ok, {200, _, Tarball}} = mix_hex_repo:get_tarball(mix_hex_core:default_config(), <<"package1">>, <<"1.0.0">>), -%% > {ok, #{metadata := Metadata}} = mix_hex_tarball:unpack(Tarball, memory). +%% > {ok, #{metadata := Metadata}} = mix_hex_tarball:unpack(Tarball, "/tmp/package"). %% ''' %% @end get_tarball(Config, Name, Version) -> @@ -106,6 +108,26 @@ get_tarball(Config, Name, Version) -> Other end. +%% @doc +%% Gets tarball from the repository and writes it to a file. +%% +%% Examples: +%% +%% ``` +%% > {ok, {200, _}} = mix_hex_repo:get_tarball_to_file(mix_hex_core:default_config(), <<"package1">>, <<"1.0.0">>, "/tmp/package.tar"), +%% > {ok, #{metadata := Metadata}} = mix_hex_tarball:unpack({file, "/tmp/package.tar"}, "/tmp/package"). +%% ''' +%% @end +get_tarball_to_file(Config, Name, Version, Filename) -> + ReqHeaders = make_headers(Config), + + case get_to_file(Config, tarball_url(Config, Name, Version), ReqHeaders, Filename) of + {ok, {200, RespHeaders}} -> + {ok, {200, RespHeaders}}; + Other -> + Other + end. + %% @doc %% Gets docs tarball from the repository. %% @@ -113,8 +135,8 @@ get_tarball(Config, Name, Version) -> %% %% ``` %% > {ok, {200, _, Docs}} = mix_hex_repo:get_docs(mix_hex_core:default_config(), <<"package1">>, <<"1.0.0">>), -%% > mix_hex_tarball:unpack_docs(Docs, memory) -%% {ok, [{"index.html", <<"">>}, ...]} +%% > mix_hex_tarball:unpack_docs(Docs, "/tmp/docs") +%% ok %% ''' get_docs(Config, Name, Version) -> ReqHeaders = make_headers(Config), @@ -126,6 +148,25 @@ get_docs(Config, Name, Version) -> Other end. +%% @doc +%% Gets docs tarball from the repository and writes it to a file. +%% +%% Examples: +%% +%% ``` +%% > {ok, {200, _}} = mix_hex_repo:get_docs_to_file(mix_hex_core:default_config(), <<"package1">>, <<"1.0.0">>, "/tmp/docs.tar.gz"), +%% > ok = mix_hex_tarball:unpack_docs({file, "/tmp/docs.tar.gz"}, "/tmp/docs"). +%% ''' +get_docs_to_file(Config, Name, Version, Filename) -> + ReqHeaders = make_headers(Config), + + case get_to_file(Config, docs_url(Config, Name, Version), ReqHeaders, Filename) of + {ok, {200, RespHeaders}} -> + {ok, {200, RespHeaders}}; + Other -> + Other + end. + %% @doc %% Gets the public key from the repository. %% @@ -175,6 +216,10 @@ get_hex_installs(Config) -> get(Config, URI, Headers) -> mix_hex_http:request(Config, get, URI, Headers, undefined). +%% @private +get_to_file(Config, URI, Headers, Filename) -> + mix_hex_http:request_to_file(Config, get, URI, Headers, undefined, Filename). + %% @private get_protobuf(Config, Path, Decoder) -> PublicKey = maps:get(repo_public_key, Config), diff --git a/src/mix_hex_safe_binary_to_term.erl b/src/mix_hex_safe_binary_to_term.erl index 5b771a03..1e7e308f 100644 --- a/src/mix_hex_safe_binary_to_term.erl +++ b/src/mix_hex_safe_binary_to_term.erl @@ -1,4 +1,4 @@ -%% Vendored from hex_core v0.12.2 (c4db9f6), do not edit manually +%% Vendored from hex_core v0.15.0 (90f9f59), do not edit manually %% @hidden %% Safe deserialization of Erlang terms from binary. diff --git a/src/mix_hex_tarball.erl b/src/mix_hex_tarball.erl index e552fc2c..2379e0fb 100644 --- a/src/mix_hex_tarball.erl +++ b/src/mix_hex_tarball.erl @@ -1,4 +1,4 @@ -%% Vendored from hex_core v0.12.2 (c4db9f6), do not edit manually +%% Vendored from hex_core v0.15.0 (90f9f59), do not edit manually %% @doc %% Functions for creating and unpacking Hex tarballs. @@ -15,6 +15,10 @@ -export([do_decode_metadata/1, gzip/1, normalize_requirements/1]). -endif. -define(VERSION, <<"3">>). +-define(HASH_CHUNK_SIZE, 65536). +-define(MAX_VERSION_SIZE, 32). +-define(MAX_CHECKSUM_SIZE, 128). +-define(MAX_METADATA_SIZE, 128 * 1024). -define(BUILD_TOOL_FILES, [ {<<"mix.exs">>, <<"mix">>}, {<<"rebar.config">>, <<"rebar3">>}, @@ -25,7 +29,7 @@ -include_lib("kernel/include/file.hrl"). -type checksum() :: binary(). --type contents() :: #{filename() => binary()}. +-type contents() :: [{filename(), binary()}]. -type filename() :: string(). -type files() :: [{filename(), filename() | binary()}]. -type metadata() :: map(). @@ -66,37 +70,41 @@ create(Metadata, Files, Config) -> } = Config, MetadataBinary = encode_metadata(Metadata), - ContentsTarball = create_memory_tarball(Files), - ContentsTarballCompressed = gzip(ContentsTarball), - InnerChecksum = inner_checksum(?VERSION, MetadataBinary, ContentsTarballCompressed), - InnerChecksumBase16 = encode_base16(InnerChecksum), - TarballMaxSize = maps:get(tarball_max_size, Config), - TarballMaxUncompressedSize = maps:get(tarball_max_uncompressed_size, Config), - - OuterFiles = [ - {"VERSION", ?VERSION}, - {"CHECKSUM", InnerChecksumBase16}, - {"metadata.config", MetadataBinary}, - {"contents.tar.gz", ContentsTarballCompressed} - ], - case valid_size(ContentsTarball, TarballMaxUncompressedSize) of + case valid_size(MetadataBinary, ?MAX_METADATA_SIZE) of + false -> + {error, {tarball, {file_too_big, "metadata.config"}}}; true -> - Tarball = create_memory_tarball(OuterFiles), - OuterChecksum = checksum(Tarball), + ContentsTarball = create_memory_tarball(Files), + ContentsTarballCompressed = gzip(ContentsTarball), + InnerChecksum = inner_checksum(?VERSION, MetadataBinary, ContentsTarballCompressed), + InnerChecksumBase16 = encode_base16(InnerChecksum), + + OuterFiles = [ + {"VERSION", ?VERSION}, + {"CHECKSUM", InnerChecksumBase16}, + {"metadata.config", MetadataBinary}, + {"contents.tar.gz", ContentsTarballCompressed} + ], - case valid_size(Tarball, TarballMaxSize) of + case valid_size(ContentsTarball, TarballMaxUncompressedSize) of true -> - {ok, #{ - tarball => Tarball, - outer_checksum => OuterChecksum, - inner_checksum => InnerChecksum - }}; + Tarball = create_memory_tarball(OuterFiles), + OuterChecksum = checksum(Tarball), + + case valid_size(Tarball, TarballMaxSize) of + true -> + {ok, #{ + tarball => Tarball, + outer_checksum => OuterChecksum, + inner_checksum => InnerChecksum + }}; + false -> + {error, {tarball, {too_big_compressed, TarballMaxSize}}} + end; false -> - {error, {tarball, {too_big_compressed, TarballMaxSize}}} - end; - false -> - {error, {tarball, {too_big_uncompressed, TarballMaxUncompressedSize}}} + {error, {tarball, {too_big_uncompressed, TarballMaxUncompressedSize}}} + end end. -spec create(metadata(), files()) -> @@ -153,6 +161,16 @@ create_docs(Files) -> %% Remember to verify the outer tarball checksum against the registry checksum %% returned from `mix_hex_repo:get_package(Config, Package)'. %% +%% The first argument is the tarball, either as a binary or `{file, Path}' +%% to read from a file on disk. Using `{file, Path}' avoids loading the +%% tarball into memory. +%% +%% The second argument controls the output: +%% +%% - `memory' - unpack contents into memory and return them +%% - `none' - only extract metadata and checksums, skip contents +%% - A path string - extract contents to the given directory +%% %% Examples: %% %% ``` @@ -161,12 +179,16 @@ create_docs(Files) -> %% contents => [{"src/foo.erl",<<"-module(foo).">>}], %% metadata => #{<<"name">> => <<"foo">>, ...}}} %% +%% > mix_hex_tarball:unpack(Tarball, none). +%% {ok,#{outer_checksum => <<...>>, +%% metadata => #{<<"name">> => <<"foo">>, ...}}} +%% %% > mix_hex_tarball:unpack(Tarball, "path/to/unpack"). %% {ok,#{outer_checksum => <<...>>, %% metadata => #{<<"name">> => <<"foo">>, ...}}} %% ''' -spec unpack - (tarball(), memory, mix_hex_core:config()) -> + (tarball() | {file, filename()}, memory, mix_hex_core:config()) -> {ok, #{ outer_checksum => checksum(), inner_checksum => checksum(), @@ -174,27 +196,65 @@ create_docs(Files) -> contents => contents() }} | {error, term()}; - (tarball(), filename(), mix_hex_core:config()) -> + (tarball() | {file, filename()}, none, mix_hex_core:config()) -> + {ok, #{ + outer_checksum => checksum(), + inner_checksum => checksum(), + metadata => metadata() + }} + | {error, term()}; + (tarball() | {file, filename()}, filename(), mix_hex_core:config()) -> {ok, #{ outer_checksum => checksum(), inner_checksum => checksum(), metadata => metadata() }} | {error, term()}. -unpack(Tarball, Output, Config) -> - case valid_size(Tarball, maps:get(tarball_max_size, Config)) of +unpack(Input, memory, Config) -> + case check_input_size(Input, Config) of true -> - case mix_hex_erl_tar:extract({binary, Tarball}, [memory]) of + OuterChecksum = outer_checksum(Input), + Source = tar_source(Input), + case mix_hex_erl_tar:extract(Source, [memory]) of {ok, []} -> {error, {tarball, empty}}; {ok, FileList} -> - OuterChecksum = crypto:hash(sha256, Tarball), - do_unpack(maps:from_list(FileList), OuterChecksum, Output); + case validate_outer_file_sizes(maps:from_list(FileList)) of + {ok, Files} -> + do_unpack(Files, OuterChecksum, memory, Config); + {error, _} = Error -> + Error + end; {error, Reason} -> {error, {tarball, Reason}} end; false -> {error, {tarball, too_big}} + end; +unpack(Input, Output, Config) -> + case check_input_size(Input, Config) of + true -> + OuterChecksum = outer_checksum(Input), + Source = tar_source(Input), + TmpDir = tmp_path(), + ok = file:make_dir(TmpDir), + try + case mix_hex_erl_tar:extract(Source, [{cwd, TmpDir}]) of + ok -> + case read_outer_files(TmpDir) of + {ok, Files} -> + do_unpack(Files, OuterChecksum, Output, Config); + {error, _} = Error -> + Error + end; + {error, Reason} -> + {error, {tarball, Reason}} + end + after + remove_dir(TmpDir) + end; + false -> + {error, {tarball, too_big}} end. %% @doc @@ -202,7 +262,7 @@ unpack(Tarball, Output, Config) -> %% %% @see unpack/3 -spec unpack - (tarball(), memory) -> + (tarball() | {file, filename()}, memory) -> {ok, #{ outer_checksum => checksum(), inner_checksum => checksum(), @@ -210,7 +270,14 @@ unpack(Tarball, Output, Config) -> contents => contents() }} | {error, term()}; - (tarball(), filename()) -> + (tarball() | {file, filename()}, none) -> + {ok, #{ + outer_checksum => checksum(), + inner_checksum => checksum(), + metadata => metadata() + }} + | {error, term()}; + (tarball() | {file, filename()}, filename()) -> {ok, #{ outer_checksum => checksum(), inner_checksum => checksum(), @@ -223,6 +290,10 @@ unpack(Tarball, Output) -> %% @doc %% Unpacks a documentation tarball. %% +%% The first argument is the tarball, either as a binary or `{file, Path}' +%% to read from a file on disk. Using `{file, Path}' avoids loading the +%% tarball into memory. +%% %% Examples: %% %% ``` @@ -233,21 +304,23 @@ unpack(Tarball, Output) -> %% ok %% ''' -spec unpack_docs - (tarball(), memory, mix_hex_core:config()) -> {ok, contents()} | {error, term()}; - (tarball(), filename(), mix_hex_core:config()) -> ok | {error, term()}. -unpack_docs(Tarball, Output, Config) -> - case valid_size(Tarball, maps:get(docs_tarball_max_size, Config)) of + (tarball() | {file, filename()}, memory, mix_hex_core:config()) -> + {ok, contents()} | {error, term()}; + (tarball() | {file, filename()}, filename(), mix_hex_core:config()) -> ok | {error, term()}. +unpack_docs(Input, Output, Config) -> + case check_docs_input_size(Input, Config) of true -> - unpack_tarball(Tarball, Output); + MaxSize = maps:get(docs_tarball_max_uncompressed_size, Config), + unpack_tarball(tar_source(Input), Output, MaxSize); false -> {error, {tarball, too_big}} end. -spec unpack_docs - (tarball(), memory) -> {ok, contents()} | {error, term()}; - (tarball(), filename()) -> ok | {error, term()}. -unpack_docs(Tarball, Output) -> - unpack_docs(Tarball, Output, mix_hex_core:default_config()). + (tarball() | {file, filename()}, memory) -> {ok, contents()} | {error, term()}; + (tarball() | {file, filename()}, filename()) -> ok | {error, term()}. +unpack_docs(Input, Output) -> + unpack_docs(Input, Output, mix_hex_core:default_config()). %% @doc %% Returns base16-encoded representation of checksum. @@ -258,6 +331,8 @@ format_checksum(Checksum) -> %% @doc %% Converts an error reason term to a human-readable error message string. -spec format_error(term()) -> string(). +format_error({tarball, {file_too_big, Name}}) -> + io_lib:format("file too big: ~s", [Name]); format_error({tarball, empty}) -> "empty tarball"; format_error({tarball, {too_big_uncompressed, Size}}) -> @@ -296,6 +371,12 @@ format_byte_size(Size) -> %%==================================================================== %% @private +inner_checksum(Version, MetadataBinary, {path, ContentsPath}) -> + HashState0 = crypto:hash_init(sha256), + HashState1 = crypto:hash_update(HashState0, Version), + HashState2 = crypto:hash_update(HashState1, MetadataBinary), + HashState3 = stream_file_hash(HashState2, ContentsPath), + crypto:hash_final(HashState3); inner_checksum(Version, MetadataBinary, ContentsBinary) -> Blob = <>, crypto:hash(sha256, Blob). @@ -304,6 +385,26 @@ inner_checksum(Version, MetadataBinary, ContentsBinary) -> checksum(ContentsBinary) when is_binary(ContentsBinary) -> crypto:hash(sha256, ContentsBinary). +%% @private +tar_source({file, Path}) -> Path; +tar_source(Tarball) -> {binary, Tarball}. + +%% @private +outer_checksum({file, Path}) -> file_checksum(Path); +outer_checksum(Tarball) -> crypto:hash(sha256, Tarball). + +%% @private +check_input_size({file, Path}, Config) -> + valid_file_size(Path, maps:get(tarball_max_size, Config)); +check_input_size(Tarball, Config) -> + valid_size(Tarball, maps:get(tarball_max_size, Config)). + +%% @private +check_docs_input_size({file, Path}, Config) -> + valid_file_size(Path, maps:get(docs_tarball_max_size, Config)); +check_docs_input_size(Tarball, Config) -> + valid_size(Tarball, maps:get(docs_tarball_max_size, Config)). + %% @private encode_metadata(Meta) -> Data = lists:map( @@ -316,14 +417,15 @@ encode_metadata(Meta) -> iolist_to_binary(Data). %% @private -do_unpack(Files, OuterChecksum, Output) -> +do_unpack(Files, OuterChecksum, Output, Config) -> State = #{ inner_checksum => undefined, outer_checksum => OuterChecksum, contents => undefined, files => Files, metadata => undefined, - output => Output + output => Output, + config => Config }, State1 = check_files(State), State2 = check_version(State1), @@ -339,33 +441,61 @@ finish_unpack(#{ files := Files, inner_checksum := InnerChecksum, outer_checksum := OuterChecksum, - output := Output + output := Output, + config := Config }) -> _ = maps:get("VERSION", Files), - ContentsBinary = maps:get("contents.tar.gz", Files), + Contents = maps:get("contents.tar.gz", Files), + MaxUncompressedSize = maps:get(tarball_max_uncompressed_size, Config), + + Result = #{ + inner_checksum => InnerChecksum, + outer_checksum => OuterChecksum, + metadata => Metadata + }, case Output of - memory -> ok; - _ -> filelib:ensure_dir(filename:join(Output, "*")) - end, + none -> + {ok, Result}; + memory -> + case unpack_contents(Contents, memory, MaxUncompressedSize) of + {ok, UnpackedContents} -> + {ok, Result#{contents => UnpackedContents}}; + {error, Reason} -> + {error, {inner_tarball, Reason}} + end; + _ -> + filelib:ensure_dir(filename:join(Output, "*")), + case unpack_contents(Contents, Output, MaxUncompressedSize) of + ok -> + [ + try_updating_mtime(filename:join(Output, P)) + || P <- filelib:wildcard("**", Output) + ], + copy_metadata_config(Output, maps:get("metadata.config", Files)), + {ok, Result}; + {error, Reason} -> + {error, {inner_tarball, Reason}} + end + end. - case unpack_tarball(ContentsBinary, Output) of - ok -> - copy_metadata_config(Output, maps:get("metadata.config", Files)), - {ok, #{ - inner_checksum => InnerChecksum, - outer_checksum => OuterChecksum, - metadata => Metadata - }}; - {ok, Contents} -> - {ok, #{ - inner_checksum => InnerChecksum, - outer_checksum => OuterChecksum, - metadata => Metadata, - contents => Contents - }}; - {error, Reason} -> - {error, {inner_tarball, Reason}} +%% @private +unpack_contents(Contents, Output, MaxSize) -> + Opts = + case Output of + memory -> [memory, compressed]; + _ -> [{cwd, Output}, compressed] + end, + Source = + case Contents of + {path, ContentsPath} -> ContentsPath; + ContentsBinary -> {binary, ContentsBinary} + end, + case mix_hex_erl_tar:extract(Source, [{max_size, MaxSize} | Opts]) of + {error, too_big} -> + {error, {too_big_uncompressed, MaxSize}}; + Other -> + Other end. %% @private @@ -403,8 +533,8 @@ check_inner_checksum(#{files := Files} = State) -> Version = maps:get("VERSION", Files), MetadataBinary = maps:get("metadata.config", Files), - ContentsBinary = maps:get("contents.tar.gz", Files), - ActualChecksum = inner_checksum(Version, MetadataBinary, ContentsBinary), + Contents = maps:get("contents.tar.gz", Files), + ActualChecksum = inner_checksum(Version, MetadataBinary, Contents), if byte_size(ExpectedChecksum) /= 32 -> @@ -502,17 +632,24 @@ guess_build_tools(Metadata) -> %%==================================================================== %% @private -unpack_tarball(ContentsBinary, memory) -> - mix_hex_erl_tar:extract({binary, ContentsBinary}, [memory, compressed]); -unpack_tarball(ContentsBinary, Output) -> +unpack_tarball(Source, memory, MaxSize) -> + case mix_hex_erl_tar:extract(Source, [memory, compressed, {max_size, MaxSize}]) of + {error, too_big} -> + {error, {tarball, {too_big_uncompressed, MaxSize}}}; + Other -> + Other + end; +unpack_tarball(Source, Output, MaxSize) -> filelib:ensure_dir(filename:join(Output, "*")), - case mix_hex_erl_tar:extract({binary, ContentsBinary}, [{cwd, Output}, compressed]) of + case mix_hex_erl_tar:extract(Source, [{cwd, Output}, compressed, {max_size, MaxSize}]) of ok -> [ try_updating_mtime(filename:join(Output, Path)) || Path <- filelib:wildcard("**", Output) ], ok; + {error, too_big} -> + {error, {tarball, {too_big_uncompressed, MaxSize}}}; Other -> Other end. @@ -567,7 +704,7 @@ add_file(Tar, {Filename, AbsFilename}) when is_list(Filename), is_list(AbsFilena _ -> Mode = FileInfo#file_info.mode, {ok, Contents} = file:read_file(AbsFilename), - ok = mix_hex_erl_tar:add(Tar, Contents, Filename, Mode, tar_opts()) + ok = mix_hex_erl_tar:add(Tar, Contents, Filename, [{mode, Mode} | tar_opts()]) end. %% @private @@ -624,6 +761,115 @@ valid_size(Binary, infinity) when is_binary(Binary) -> valid_size(Binary, Limit) when is_binary(Binary) and is_integer(Limit) -> byte_size(Binary) =< Limit. +%% @private +valid_file_size(_Path, infinity) -> + true; +valid_file_size(Path, Limit) when is_integer(Limit) -> + case file:read_file_info(Path) of + {ok, #file_info{size = Size}} -> Size =< Limit; + {error, _} -> false + end. + +%% @private +file_checksum(Path) -> + {ok, Fd} = file:open(Path, [read, raw, binary]), + try + file_checksum_loop(Fd, crypto:hash_init(sha256)) + after + file:close(Fd) + end. + +%% @private +file_checksum_loop(Fd, HashState) -> + case file:read(Fd, ?HASH_CHUNK_SIZE) of + {ok, Data} -> file_checksum_loop(Fd, crypto:hash_update(HashState, Data)); + eof -> crypto:hash_final(HashState) + end. + +%% @private +stream_file_hash(HashState, Path) -> + {ok, Fd} = file:open(Path, [read, raw, binary]), + try + stream_file_hash_loop(Fd, HashState) + after + file:close(Fd) + end. + +%% @private +stream_file_hash_loop(Fd, HashState) -> + case file:read(Fd, ?HASH_CHUNK_SIZE) of + {ok, Data} -> stream_file_hash_loop(Fd, crypto:hash_update(HashState, Data)); + eof -> HashState + end. + +%% @private +%% Reads outer tar files from a directory after extraction. +%% Small files (VERSION, CHECKSUM, metadata.config) are read into memory. +%% contents.tar.gz is referenced by path. +read_outer_files(Dir) -> + RequiredFiles = ["VERSION", "CHECKSUM", "metadata.config", "contents.tar.gz"], + case read_outer_files(Dir, RequiredFiles, #{}) of + {ok, Files} -> + validate_outer_file_sizes(Files); + {error, _} = Error -> + Error + end. + +read_outer_files(_Dir, [], Acc) -> + {ok, Acc}; +read_outer_files(Dir, ["contents.tar.gz" | Rest], Acc) -> + Path = filename:join(Dir, "contents.tar.gz"), + case filelib:is_regular(Path) of + true -> + read_outer_files(Dir, Rest, Acc#{"contents.tar.gz" => {path, Path}}); + false -> + {error, {tarball, {missing_files, ["contents.tar.gz"]}}} + end; +read_outer_files(Dir, [Name | Rest], Acc) -> + Path = filename:join(Dir, Name), + case file:read_file(Path) of + {ok, Data} -> + read_outer_files(Dir, Rest, Acc#{Name => Data}); + {error, _} -> + {error, {tarball, {missing_files, [Name]}}} + end. + +%% @private +validate_outer_file_sizes(Files) -> + case byte_size(maps:get("VERSION", Files, <<>>)) > ?MAX_VERSION_SIZE of + true -> + {error, {tarball, {file_too_big, "VERSION"}}}; + false -> + case byte_size(maps:get("CHECKSUM", Files, <<>>)) > ?MAX_CHECKSUM_SIZE of + true -> + {error, {tarball, {file_too_big, "CHECKSUM"}}}; + false -> + case byte_size(maps:get("metadata.config", Files, <<>>)) > ?MAX_METADATA_SIZE of + true -> {error, {tarball, {file_too_big, "metadata.config"}}}; + false -> {ok, Files} + end + end + end. + +%% @private +remove_dir(Dir) -> + case file:list_dir(Dir) of + {ok, Entries} -> + lists:foreach( + fun(Entry) -> + Path = filename:join(Dir, Entry), + case filelib:is_dir(Path) of + true -> remove_dir(Path); + false -> file:delete(Path) + end + end, + Entries + ), + file:del_dir(Dir); + {error, _} -> + ok + end. + %% @private binarify(Binary) when is_binary(Binary) -> Binary; binarify(Number) when is_number(Number) -> Number; diff --git a/src/mix_safe_erl_term.xrl b/src/mix_safe_erl_term.xrl index 620d183a..cc6854b5 100644 --- a/src/mix_safe_erl_term.xrl +++ b/src/mix_safe_erl_term.xrl @@ -1,4 +1,4 @@ -%% Vendored from hex_core v0.12.2 (c4db9f6), do not edit manually +%% Vendored from hex_core v0.15.0 (90f9f59), do not edit manually %%% Author : Robert Virding %%% Purpose : Token definitions for Erlang. diff --git a/test/hex/http_test.exs b/test/hex/http_test.exs index a5d2d2af..68ce240d 100644 --- a/test/hex/http_test.exs +++ b/test/hex/http_test.exs @@ -172,4 +172,27 @@ defmodule Hex.HTTPTest do assert status == 401 assert response_body == "unauthorized" end + + test "request_to_file downloads to file", %{bypass: bypass} do + in_tmp(fn -> + Bypass.expect(bypass, fn conn -> + Plug.Conn.resp(conn, 200, "file content") + end) + + filename = Path.join(File.cwd!(), "downloaded.txt") + + {:ok, {status, _headers}} = + Hex.HTTP.request_to_file( + :get, + "http://localhost:#{bypass.port}/file", + %{}, + nil, + filename, + %{} + ) + + assert status == 200 + assert File.read!(filename) == "file content" + end) + end end