Skip to content

Commit bf47d17

Browse files
committed
Vendor updated hex_core tarball files
1 parent 64b9f92 commit bf47d17

2 files changed

Lines changed: 293 additions & 32 deletions

File tree

src/mix_hex_tarball.erl

Lines changed: 289 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
%% Vendored from hex_core v0.15.0 (90f9f59), do not edit manually
1+
%% Vendored from hex_core v0.15.0 (4a6a1e9), do not edit manually
22

33
%% @doc
44
%% Functions for creating and unpacking Hex tarballs.
@@ -12,13 +12,14 @@
1212
format_error/1
1313
]).
1414
-ifdef(TEST).
15-
-export([do_decode_metadata/1, gzip/1, normalize_requirements/1]).
15+
-export([do_decode_metadata/1, do_decode_metadata/2, gzip/1, normalize_requirements/1]).
1616
-endif.
1717
-define(VERSION, <<"3">>).
1818
-define(HASH_CHUNK_SIZE, 65536).
1919
-define(MAX_VERSION_SIZE, 32).
2020
-define(MAX_CHECKSUM_SIZE, 128).
21-
-define(MAX_METADATA_SIZE, 128 * 1024).
21+
-define(MAX_METADATA_SIZE, 1024 * 1024).
22+
-define(METADATA_CHUNK_SIZE, 4096).
2223
-define(BUILD_TOOL_FILES, [
2324
{<<"mix.exs">>, <<"mix">>},
2425
{<<"rebar.config">>, <<"rebar3">>},
@@ -80,7 +81,9 @@ create(Metadata, Files, Config) ->
8081
{ok, ValidatedFiles} ->
8182
ContentsTarball = create_memory_tarball(ValidatedFiles),
8283
ContentsTarballCompressed = gzip(ContentsTarball),
83-
InnerChecksum = inner_checksum(?VERSION, MetadataBinary, ContentsTarballCompressed),
84+
InnerChecksum = inner_checksum(
85+
?VERSION, MetadataBinary, ContentsTarballCompressed
86+
),
8487
InnerChecksumBase16 = encode_base16(InnerChecksum),
8588

8689
OuterFiles = [
@@ -568,41 +571,292 @@ check_inner_checksum(#{files := Files} = State) ->
568571
%% @private
569572
decode_metadata({error, _} = Error) ->
570573
Error;
571-
decode_metadata(#{files := #{"metadata.config" := Binary}} = State) when is_binary(Binary) ->
572-
case do_decode_metadata(Binary) of
574+
decode_metadata(#{files := #{"metadata.config" := Binary}, config := Config} = State) when
575+
is_binary(Binary)
576+
->
577+
Fields = maps:get(metadata_fields, Config, all),
578+
case do_decode_metadata(Binary, Fields) of
573579
#{} = Metadata -> maps:put(metadata, normalize_metadata(Metadata), State);
574580
Other -> Other
575581
end.
576582

583+
-ifdef(TEST).
577584
%% @private
578-
do_decode_metadata(Binary) when is_binary(Binary) ->
579-
{ok, String} = characters_to_list(Binary),
585+
do_decode_metadata(Binary) ->
586+
do_decode_metadata(Binary, all).
587+
-endif.
580588

581-
case mix_safe_erl_term:string(String) of
582-
{ok, Tokens, _Line} ->
583-
try
584-
Terms = mix_safe_erl_term:terms(Tokens),
585-
maps:from_list(Terms)
586-
catch
587-
error:function_clause ->
588-
{error, {metadata, invalid_terms}};
589-
error:badarg ->
590-
{error, {metadata, not_key_value}}
589+
%% @private
590+
do_decode_metadata(Binary, all) when is_binary(Binary) ->
591+
case decode_metadata_chunked(utf8, Binary, <<>>, [], "", []) of
592+
latin1_fallback ->
593+
decode_metadata_chunked(latin1, Binary, <<>>, [], "", []);
594+
Other ->
595+
Other
596+
end;
597+
do_decode_metadata(Binary, Fields) when is_binary(Binary), is_list(Fields) ->
598+
case decode_metadata_streaming(utf8, Binary, <<>>, [], "", [], Fields, start) of
599+
latin1_fallback ->
600+
decode_metadata_streaming(latin1, Binary, <<>>, [], "", [], Fields, start);
601+
Other ->
602+
Other
603+
end.
604+
605+
%% @private
606+
%% Streams the metadata.config binary through mix_safe_erl_term:tokens/2 in
607+
%% small chunks so we never materialize the whole binary as a char list.
608+
%% Each accepted dot-terminated form is parsed and accumulated immediately,
609+
%% keeping peak memory at roughly one chunk + one term's tokens + AST.
610+
decode_metadata_chunked(Encoding, Binary, IncTail, Cont, Chars, Acc) ->
611+
case Chars of
612+
[] when Binary =:= <<>>, IncTail =:= <<>> ->
613+
flush_metadata_eof(Cont, Acc);
614+
[] when Binary =:= <<>>, Encoding =:= utf8 ->
615+
%% Trailing bytes that can never form a complete UTF-8 sequence —
616+
%% restart the whole decode in latin1 mode rather than spin.
617+
latin1_fallback;
618+
[] ->
619+
case decode_metadata_chunk(Encoding, Binary, IncTail) of
620+
{ok, NewChars, NewBinary, NewTail} ->
621+
feed_metadata(Encoding, Cont, NewChars, NewBinary, NewTail, Acc);
622+
latin1_fallback ->
623+
latin1_fallback
624+
end;
625+
_ ->
626+
feed_metadata(Encoding, Cont, Chars, Binary, IncTail, Acc)
627+
end.
628+
629+
%% @private
630+
feed_metadata(Encoding, Cont, Chars, Binary, IncTail, Acc) ->
631+
case mix_safe_erl_term:tokens(Cont, Chars) of
632+
{more, NewCont} ->
633+
decode_metadata_chunked(Encoding, Binary, IncTail, NewCont, "", Acc);
634+
{done, {ok, Tokens, _}, RestChars} ->
635+
case parse_metadata_term(Tokens) of
636+
{ok, Term} ->
637+
decode_metadata_chunked(
638+
Encoding, Binary, IncTail, [], normalize_rest_chars(RestChars), [Term | Acc]
639+
);
640+
{error, _} = Err ->
641+
Err
591642
end;
592-
{error, {_Line, mix_safe_erl_term, Reason}, _Line2} ->
643+
{done, {eof, _}, _} ->
644+
finalize_metadata(Acc);
645+
{done, {error, {_, mix_safe_erl_term, Reason}, _}, _} ->
646+
{error, {metadata, Reason}}
647+
end.
648+
649+
%% @private
650+
flush_metadata_eof([], Acc) ->
651+
finalize_metadata(Acc);
652+
flush_metadata_eof(Cont, Acc) ->
653+
case mix_safe_erl_term:tokens(Cont, eof) of
654+
{done, {eof, _}, _} ->
655+
finalize_metadata(Acc);
656+
{done, {ok, _Tokens, _}, _} ->
657+
{error, {metadata, invalid_terms}};
658+
{done, {error, {_, mix_safe_erl_term, Reason}, _}, _} ->
593659
{error, {metadata, Reason}}
594660
end.
595661

596662
%% @private
597-
characters_to_list(Binary) ->
598-
case unicode:characters_to_list(Binary) of
599-
List when is_list(List) ->
600-
{ok, List};
663+
finalize_metadata([]) ->
664+
{error, {metadata, invalid_terms}};
665+
finalize_metadata(Acc) ->
666+
try maps:from_list(lists:reverse(Acc)) of
667+
Map -> Map
668+
catch
669+
error:badarg -> {error, {metadata, not_key_value}}
670+
end.
671+
672+
%% @private
673+
parse_metadata_term(Tokens) ->
674+
case erl_parse:parse_term(Tokens) of
675+
{ok, Term} -> {ok, Term};
676+
{error, _} -> {error, {metadata, invalid_terms}}
677+
end.
678+
679+
%% @private
680+
decode_metadata_chunk(utf8, Binary, IncTail) ->
681+
{Chunk, Rest} = take_metadata_chunk(Binary),
682+
Combined =
683+
case IncTail of
684+
<<>> -> Chunk;
685+
_ -> <<IncTail/binary, Chunk/binary>>
686+
end,
687+
case unicode:characters_to_list(Combined, utf8) of
688+
L when is_list(L) ->
689+
{ok, L, Rest, <<>>};
690+
{incomplete, L, NewTail} ->
691+
{ok, L, Rest, NewTail};
601692
{error, _, _} ->
602-
case unicode:characters_to_list(Binary, latin1) of
603-
List when is_list(List) -> {ok, List};
604-
Other -> Other
605-
end
693+
latin1_fallback
694+
end;
695+
decode_metadata_chunk(latin1, Binary, _IncTail) ->
696+
{Chunk, Rest} = take_metadata_chunk(Binary),
697+
{ok, binary_to_list(Chunk), Rest, <<>>}.
698+
699+
%% @private
700+
take_metadata_chunk(Binary) when byte_size(Binary) > ?METADATA_CHUNK_SIZE ->
701+
<<Chunk:(?METADATA_CHUNK_SIZE)/binary, Rest/binary>> = Binary,
702+
{Chunk, Rest};
703+
take_metadata_chunk(Binary) ->
704+
{Binary, <<>>}.
705+
706+
%% @private
707+
normalize_rest_chars(eof) -> "";
708+
normalize_rest_chars(L) when is_list(L) -> L.
709+
710+
%% @private
711+
%% Streams the metadata.config binary through mix_safe_erl_term:token/2 one token
712+
%% at a time. Forms whose key is in Fields are accumulated and parsed; forms
713+
%% whose key is not in Fields are discarded with only a depth counter held in
714+
%% state, so peak memory stays bounded regardless of the unwanted form's size.
715+
decode_metadata_streaming(Encoding, Binary, IncTail, Cont, Chars, Acc, Fields, State) ->
716+
case Chars of
717+
[] when Binary =:= <<>>, IncTail =:= <<>> ->
718+
flush_metadata_streaming_eof(Cont, Acc, Fields, State);
719+
[] when Binary =:= <<>>, Encoding =:= utf8 ->
720+
latin1_fallback;
721+
[] ->
722+
case decode_metadata_chunk(Encoding, Binary, IncTail) of
723+
{ok, NewChars, NewBinary, NewTail} ->
724+
feed_metadata_streaming(
725+
Encoding, Cont, NewChars, NewBinary, NewTail, Acc, Fields, State
726+
);
727+
latin1_fallback ->
728+
latin1_fallback
729+
end;
730+
_ ->
731+
feed_metadata_streaming(Encoding, Cont, Chars, Binary, IncTail, Acc, Fields, State)
732+
end.
733+
734+
%% @private
735+
feed_metadata_streaming(Encoding, Cont, Chars, Binary, IncTail, Acc, Fields, State) ->
736+
case mix_safe_erl_term:token(Cont, Chars) of
737+
{more, NewCont} ->
738+
decode_metadata_streaming(Encoding, Binary, IncTail, NewCont, "", Acc, Fields, State);
739+
{done, {ok, Token, _}, RestChars} ->
740+
case advance_metadata_state(State, Acc, Fields, Token) of
741+
{next, NewState, NewAcc} ->
742+
decode_metadata_streaming(
743+
Encoding,
744+
Binary,
745+
IncTail,
746+
[],
747+
normalize_rest_chars(RestChars),
748+
NewAcc,
749+
Fields,
750+
NewState
751+
);
752+
{error, _} = Err ->
753+
Err
754+
end;
755+
{done, {eof, _}, _} ->
756+
finalize_metadata_streaming(Acc, State);
757+
{done, {error, {_, mix_safe_erl_term, Reason}, _}, _} ->
758+
{error, {metadata, Reason}}
759+
end.
760+
761+
%% @private
762+
flush_metadata_streaming_eof([], Acc, _Fields, State) ->
763+
finalize_metadata_streaming(Acc, State);
764+
flush_metadata_streaming_eof(Cont, Acc, Fields, State) ->
765+
case mix_safe_erl_term:token(Cont, eof) of
766+
{done, {ok, Token, _}, _} ->
767+
case advance_metadata_state(State, Acc, Fields, Token) of
768+
{next, NewState, NewAcc} ->
769+
flush_metadata_streaming_eof([], NewAcc, Fields, NewState);
770+
{error, _} = Err ->
771+
Err
772+
end;
773+
{done, {eof, _}, _} ->
774+
finalize_metadata_streaming(Acc, State);
775+
{done, {error, {_, mix_safe_erl_term, Reason}, _}, _} ->
776+
{error, {metadata, Reason}}
777+
end.
778+
779+
%% @private
780+
finalize_metadata_streaming(Acc, start) ->
781+
finalize_metadata(Acc);
782+
finalize_metadata_streaming([], between) ->
783+
#{};
784+
finalize_metadata_streaming(Acc, between) ->
785+
finalize_metadata(Acc);
786+
finalize_metadata_streaming(_Acc, _State) ->
787+
{error, {metadata, invalid_terms}}.
788+
789+
%% @private
790+
%% State machine for streaming the metadata.config schema. Forms are required
791+
%% to be `{<<"key">>, value}.` — anything else is rejected as invalid.
792+
%%
793+
%% States: start | between | {after_open, Prefix} | {after_left_binary, Prefix}
794+
%% | {after_key, KeyChars, Prefix} | {after_right_binary, KeyChars, Prefix}
795+
%% | {accumulate, Prefix, Depth} | {skip, Depth}
796+
%%
797+
%% `start` is the initial position; `between` is the position after a form has
798+
%% been completed. Distinguishing them lets empty input return the same
799+
%% invalid_terms error as the non-streaming path while a stream that
800+
%% successfully skipped every form returns an empty map.
801+
advance_metadata_state(Open, Acc, _Fields, {'{', _} = T) when Open =:= start; Open =:= between ->
802+
{next, {after_open, [T]}, Acc};
803+
advance_metadata_state({after_open, Prefix}, Acc, _Fields, {'<<', _} = T) ->
804+
{next, {after_left_binary, [T | Prefix]}, Acc};
805+
advance_metadata_state({after_left_binary, Prefix}, Acc, _Fields, {string, _, KeyChars} = T) ->
806+
{next, {after_key, KeyChars, [T | Prefix]}, Acc};
807+
advance_metadata_state({after_key, KeyChars, Prefix}, Acc, _Fields, {'>>', _} = T) ->
808+
{next, {after_right_binary, KeyChars, [T | Prefix]}, Acc};
809+
advance_metadata_state({after_right_binary, KeyChars, Prefix}, Acc, Fields, {',', _} = T) ->
810+
case extract_metadata_key(KeyChars) of
811+
{ok, Key} ->
812+
case lists:member(Key, Fields) of
813+
true -> {next, {accumulate, [T | Prefix], 1}, Acc};
814+
false -> {next, {skip, 1}, Acc}
815+
end;
816+
error ->
817+
{error, {metadata, not_key_value}}
818+
end;
819+
advance_metadata_state({accumulate, Prefix, 0}, Acc, _Fields, {dot, _} = T) ->
820+
Tokens = lists:reverse([T | Prefix]),
821+
case parse_metadata_term(Tokens) of
822+
{ok, Term} -> {next, between, [Term | Acc]};
823+
{error, _} = Err -> Err
824+
end;
825+
advance_metadata_state({accumulate, _, _}, _Acc, _Fields, {dot, _}) ->
826+
{error, {metadata, invalid_terms}};
827+
advance_metadata_state({accumulate, Prefix, Depth}, Acc, _Fields, {Open, _} = T) when
828+
Open =:= '{'; Open =:= '['
829+
->
830+
{next, {accumulate, [T | Prefix], Depth + 1}, Acc};
831+
advance_metadata_state({accumulate, Prefix, Depth}, Acc, _Fields, {Close, _} = T) when
832+
Close =:= '}'; Close =:= ']'
833+
->
834+
{next, {accumulate, [T | Prefix], Depth - 1}, Acc};
835+
advance_metadata_state({accumulate, Prefix, Depth}, Acc, _Fields, T) ->
836+
{next, {accumulate, [T | Prefix], Depth}, Acc};
837+
advance_metadata_state({skip, 0}, Acc, _Fields, {dot, _}) ->
838+
{next, between, Acc};
839+
advance_metadata_state({skip, _}, _Acc, _Fields, {dot, _}) ->
840+
{error, {metadata, invalid_terms}};
841+
advance_metadata_state({skip, Depth}, Acc, _Fields, {Open, _}) when
842+
Open =:= '{'; Open =:= '['
843+
->
844+
{next, {skip, Depth + 1}, Acc};
845+
advance_metadata_state({skip, Depth}, Acc, _Fields, {Close, _}) when
846+
Close =:= '}'; Close =:= ']'
847+
->
848+
{next, {skip, Depth - 1}, Acc};
849+
advance_metadata_state({skip, Depth}, Acc, _Fields, _Token) ->
850+
{next, {skip, Depth}, Acc};
851+
advance_metadata_state(_State, _Acc, _Fields, _Token) ->
852+
{error, {metadata, not_key_value}}.
853+
854+
%% @private
855+
extract_metadata_key(KeyChars) ->
856+
try list_to_binary(KeyChars) of
857+
Key -> {ok, Key}
858+
catch
859+
error:badarg -> error
606860
end.
607861

608862
%% @private
@@ -663,14 +917,18 @@ validate_create_files([File | Rest], FilesRoot, Acc) ->
663917
{error, _} = Error -> Error
664918
end.
665919

666-
validate_create_file({Filename, Contents}, _FilesRoot) when is_list(Filename), is_binary(Contents) ->
920+
validate_create_file({Filename, Contents}, _FilesRoot) when
921+
is_list(Filename), is_binary(Contents)
922+
->
667923
case validate_archive_path(Filename) of
668924
ok -> {ok, {Filename, Contents}};
669925
{error, _} = Error -> Error
670926
end;
671927
validate_create_file(Filename, FilesRoot) when is_list(Filename) ->
672928
validate_create_file({Filename, Filename}, FilesRoot);
673-
validate_create_file({Filename, AbsFilename}, FilesRoot) when is_list(Filename), is_list(AbsFilename) ->
929+
validate_create_file({Filename, AbsFilename}, FilesRoot) when
930+
is_list(Filename), is_list(AbsFilename)
931+
->
674932
case validate_archive_path(Filename) of
675933
ok -> validate_source_file(Filename, AbsFilename, FilesRoot);
676934
{error, _} = Error -> Error
@@ -709,7 +967,8 @@ validate_source_file_root(ArchiveName, SourcePath, FilesRoot) ->
709967
{ok, LinkTarget} = file:read_link(DiskPath),
710968
ResolvedTarget = archive_join(archive_dirname(ArchiveName), LinkTarget),
711969
case safe_relative_archive_path(ResolvedTarget) of
712-
false -> {error, {tarball, {unsafe_symlink, ArchiveName, LinkTarget}}};
970+
false ->
971+
{error, {tarball, {unsafe_symlink, ArchiveName, LinkTarget}}};
713972
true ->
714973
case validate_source_root(ArchiveName, SourcePath, Root) of
715974
ok -> {ok, {ArchiveName, DiskPath}};

src/mix_safe_erl_term.xrl

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
%% Vendored from hex_core v0.15.0 (90f9f59), do not edit manually
1+
%% Vendored from hex_core v0.15.0 (4a6a1e9), do not edit manually
22

33
%%% Author : Robert Virding
44
%%% Purpose : Token definitions for Erlang.
@@ -19,7 +19,9 @@ Rules.
1919
{D}+ : {token, {integer, TokenLine, list_to_integer(TokenChars)}}.
2020
[\#\[\]}{,+-] : {token, {list_to_atom(TokenChars), TokenLine}}.
2121
(<<|>>|=>) : {token, {list_to_atom(TokenChars), TokenLine}}.
22-
\. : {token, {dot, TokenLine}}.
22+
% end_token (not token) lets mix_hex_tarball stream-decode metadata.config
23+
% one form at a time via mix_safe_erl_term:tokens/2.
24+
\. : {end_token, {dot, TokenLine}}.
2325
/ : {token, {'/', TokenLine}}.
2426
{WS}+ : skip_token.
2527

0 commit comments

Comments
 (0)