|
1 | | -%% Vendored from hex_core v0.15.0 (90f9f59), do not edit manually |
| 1 | +%% Vendored from hex_core v0.15.0 (4a6a1e9), do not edit manually |
2 | 2 |
|
3 | 3 | %% @doc |
4 | 4 | %% Functions for creating and unpacking Hex tarballs. |
|
12 | 12 | format_error/1 |
13 | 13 | ]). |
14 | 14 | -ifdef(TEST). |
15 | | --export([do_decode_metadata/1, gzip/1, normalize_requirements/1]). |
| 15 | +-export([do_decode_metadata/1, do_decode_metadata/2, gzip/1, normalize_requirements/1]). |
16 | 16 | -endif. |
17 | 17 | -define(VERSION, <<"3">>). |
18 | 18 | -define(HASH_CHUNK_SIZE, 65536). |
19 | 19 | -define(MAX_VERSION_SIZE, 32). |
20 | 20 | -define(MAX_CHECKSUM_SIZE, 128). |
21 | | --define(MAX_METADATA_SIZE, 128 * 1024). |
| 21 | +-define(MAX_METADATA_SIZE, 1024 * 1024). |
| 22 | +-define(METADATA_CHUNK_SIZE, 4096). |
22 | 23 | -define(BUILD_TOOL_FILES, [ |
23 | 24 | {<<"mix.exs">>, <<"mix">>}, |
24 | 25 | {<<"rebar.config">>, <<"rebar3">>}, |
@@ -80,7 +81,9 @@ create(Metadata, Files, Config) -> |
80 | 81 | {ok, ValidatedFiles} -> |
81 | 82 | ContentsTarball = create_memory_tarball(ValidatedFiles), |
82 | 83 | ContentsTarballCompressed = gzip(ContentsTarball), |
83 | | - InnerChecksum = inner_checksum(?VERSION, MetadataBinary, ContentsTarballCompressed), |
| 84 | + InnerChecksum = inner_checksum( |
| 85 | + ?VERSION, MetadataBinary, ContentsTarballCompressed |
| 86 | + ), |
84 | 87 | InnerChecksumBase16 = encode_base16(InnerChecksum), |
85 | 88 |
|
86 | 89 | OuterFiles = [ |
@@ -568,41 +571,292 @@ check_inner_checksum(#{files := Files} = State) -> |
568 | 571 | %% @private |
569 | 572 | decode_metadata({error, _} = Error) -> |
570 | 573 | Error; |
571 | | -decode_metadata(#{files := #{"metadata.config" := Binary}} = State) when is_binary(Binary) -> |
572 | | - case do_decode_metadata(Binary) of |
| 574 | +decode_metadata(#{files := #{"metadata.config" := Binary}, config := Config} = State) when |
| 575 | + is_binary(Binary) |
| 576 | +-> |
| 577 | + Fields = maps:get(metadata_fields, Config, all), |
| 578 | + case do_decode_metadata(Binary, Fields) of |
573 | 579 | #{} = Metadata -> maps:put(metadata, normalize_metadata(Metadata), State); |
574 | 580 | Other -> Other |
575 | 581 | end. |
576 | 582 |
|
| 583 | +-ifdef(TEST). |
577 | 584 | %% @private |
578 | | -do_decode_metadata(Binary) when is_binary(Binary) -> |
579 | | - {ok, String} = characters_to_list(Binary), |
| 585 | +do_decode_metadata(Binary) -> |
| 586 | + do_decode_metadata(Binary, all). |
| 587 | +-endif. |
580 | 588 |
|
581 | | - case mix_safe_erl_term:string(String) of |
582 | | - {ok, Tokens, _Line} -> |
583 | | - try |
584 | | - Terms = mix_safe_erl_term:terms(Tokens), |
585 | | - maps:from_list(Terms) |
586 | | - catch |
587 | | - error:function_clause -> |
588 | | - {error, {metadata, invalid_terms}}; |
589 | | - error:badarg -> |
590 | | - {error, {metadata, not_key_value}} |
| 589 | +%% @private |
| 590 | +do_decode_metadata(Binary, all) when is_binary(Binary) -> |
| 591 | + case decode_metadata_chunked(utf8, Binary, <<>>, [], "", []) of |
| 592 | + latin1_fallback -> |
| 593 | + decode_metadata_chunked(latin1, Binary, <<>>, [], "", []); |
| 594 | + Other -> |
| 595 | + Other |
| 596 | + end; |
| 597 | +do_decode_metadata(Binary, Fields) when is_binary(Binary), is_list(Fields) -> |
| 598 | + case decode_metadata_streaming(utf8, Binary, <<>>, [], "", [], Fields, start) of |
| 599 | + latin1_fallback -> |
| 600 | + decode_metadata_streaming(latin1, Binary, <<>>, [], "", [], Fields, start); |
| 601 | + Other -> |
| 602 | + Other |
| 603 | + end. |
| 604 | + |
| 605 | +%% @private |
| 606 | +%% Streams the metadata.config binary through mix_safe_erl_term:tokens/2 in |
| 607 | +%% small chunks so we never materialize the whole binary as a char list. |
| 608 | +%% Each accepted dot-terminated form is parsed and accumulated immediately, |
| 609 | +%% keeping peak memory at roughly one chunk + one term's tokens + AST. |
| 610 | +decode_metadata_chunked(Encoding, Binary, IncTail, Cont, Chars, Acc) -> |
| 611 | + case Chars of |
| 612 | + [] when Binary =:= <<>>, IncTail =:= <<>> -> |
| 613 | + flush_metadata_eof(Cont, Acc); |
| 614 | + [] when Binary =:= <<>>, Encoding =:= utf8 -> |
| 615 | + %% Trailing bytes that can never form a complete UTF-8 sequence — |
| 616 | + %% restart the whole decode in latin1 mode rather than spin. |
| 617 | + latin1_fallback; |
| 618 | + [] -> |
| 619 | + case decode_metadata_chunk(Encoding, Binary, IncTail) of |
| 620 | + {ok, NewChars, NewBinary, NewTail} -> |
| 621 | + feed_metadata(Encoding, Cont, NewChars, NewBinary, NewTail, Acc); |
| 622 | + latin1_fallback -> |
| 623 | + latin1_fallback |
| 624 | + end; |
| 625 | + _ -> |
| 626 | + feed_metadata(Encoding, Cont, Chars, Binary, IncTail, Acc) |
| 627 | + end. |
| 628 | + |
| 629 | +%% @private |
| 630 | +feed_metadata(Encoding, Cont, Chars, Binary, IncTail, Acc) -> |
| 631 | + case mix_safe_erl_term:tokens(Cont, Chars) of |
| 632 | + {more, NewCont} -> |
| 633 | + decode_metadata_chunked(Encoding, Binary, IncTail, NewCont, "", Acc); |
| 634 | + {done, {ok, Tokens, _}, RestChars} -> |
| 635 | + case parse_metadata_term(Tokens) of |
| 636 | + {ok, Term} -> |
| 637 | + decode_metadata_chunked( |
| 638 | + Encoding, Binary, IncTail, [], normalize_rest_chars(RestChars), [Term | Acc] |
| 639 | + ); |
| 640 | + {error, _} = Err -> |
| 641 | + Err |
591 | 642 | end; |
592 | | - {error, {_Line, mix_safe_erl_term, Reason}, _Line2} -> |
| 643 | + {done, {eof, _}, _} -> |
| 644 | + finalize_metadata(Acc); |
| 645 | + {done, {error, {_, mix_safe_erl_term, Reason}, _}, _} -> |
| 646 | + {error, {metadata, Reason}} |
| 647 | + end. |
| 648 | + |
| 649 | +%% @private |
| 650 | +flush_metadata_eof([], Acc) -> |
| 651 | + finalize_metadata(Acc); |
| 652 | +flush_metadata_eof(Cont, Acc) -> |
| 653 | + case mix_safe_erl_term:tokens(Cont, eof) of |
| 654 | + {done, {eof, _}, _} -> |
| 655 | + finalize_metadata(Acc); |
| 656 | + {done, {ok, _Tokens, _}, _} -> |
| 657 | + {error, {metadata, invalid_terms}}; |
| 658 | + {done, {error, {_, mix_safe_erl_term, Reason}, _}, _} -> |
593 | 659 | {error, {metadata, Reason}} |
594 | 660 | end. |
595 | 661 |
|
596 | 662 | %% @private |
597 | | -characters_to_list(Binary) -> |
598 | | - case unicode:characters_to_list(Binary) of |
599 | | - List when is_list(List) -> |
600 | | - {ok, List}; |
| 663 | +finalize_metadata([]) -> |
| 664 | + {error, {metadata, invalid_terms}}; |
| 665 | +finalize_metadata(Acc) -> |
| 666 | + try maps:from_list(lists:reverse(Acc)) of |
| 667 | + Map -> Map |
| 668 | + catch |
| 669 | + error:badarg -> {error, {metadata, not_key_value}} |
| 670 | + end. |
| 671 | + |
| 672 | +%% @private |
| 673 | +parse_metadata_term(Tokens) -> |
| 674 | + case erl_parse:parse_term(Tokens) of |
| 675 | + {ok, Term} -> {ok, Term}; |
| 676 | + {error, _} -> {error, {metadata, invalid_terms}} |
| 677 | + end. |
| 678 | + |
| 679 | +%% @private |
| 680 | +decode_metadata_chunk(utf8, Binary, IncTail) -> |
| 681 | + {Chunk, Rest} = take_metadata_chunk(Binary), |
| 682 | + Combined = |
| 683 | + case IncTail of |
| 684 | + <<>> -> Chunk; |
| 685 | + _ -> <<IncTail/binary, Chunk/binary>> |
| 686 | + end, |
| 687 | + case unicode:characters_to_list(Combined, utf8) of |
| 688 | + L when is_list(L) -> |
| 689 | + {ok, L, Rest, <<>>}; |
| 690 | + {incomplete, L, NewTail} -> |
| 691 | + {ok, L, Rest, NewTail}; |
601 | 692 | {error, _, _} -> |
602 | | - case unicode:characters_to_list(Binary, latin1) of |
603 | | - List when is_list(List) -> {ok, List}; |
604 | | - Other -> Other |
605 | | - end |
| 693 | + latin1_fallback |
| 694 | + end; |
| 695 | +decode_metadata_chunk(latin1, Binary, _IncTail) -> |
| 696 | + {Chunk, Rest} = take_metadata_chunk(Binary), |
| 697 | + {ok, binary_to_list(Chunk), Rest, <<>>}. |
| 698 | + |
| 699 | +%% @private |
| 700 | +take_metadata_chunk(Binary) when byte_size(Binary) > ?METADATA_CHUNK_SIZE -> |
| 701 | + <<Chunk:(?METADATA_CHUNK_SIZE)/binary, Rest/binary>> = Binary, |
| 702 | + {Chunk, Rest}; |
| 703 | +take_metadata_chunk(Binary) -> |
| 704 | + {Binary, <<>>}. |
| 705 | + |
| 706 | +%% @private |
| 707 | +normalize_rest_chars(eof) -> ""; |
| 708 | +normalize_rest_chars(L) when is_list(L) -> L. |
| 709 | + |
| 710 | +%% @private |
| 711 | +%% Streams the metadata.config binary through mix_safe_erl_term:token/2 one token |
| 712 | +%% at a time. Forms whose key is in Fields are accumulated and parsed; forms |
| 713 | +%% whose key is not in Fields are discarded with only a depth counter held in |
| 714 | +%% state, so peak memory stays bounded regardless of the unwanted form's size. |
| 715 | +decode_metadata_streaming(Encoding, Binary, IncTail, Cont, Chars, Acc, Fields, State) -> |
| 716 | + case Chars of |
| 717 | + [] when Binary =:= <<>>, IncTail =:= <<>> -> |
| 718 | + flush_metadata_streaming_eof(Cont, Acc, Fields, State); |
| 719 | + [] when Binary =:= <<>>, Encoding =:= utf8 -> |
| 720 | + latin1_fallback; |
| 721 | + [] -> |
| 722 | + case decode_metadata_chunk(Encoding, Binary, IncTail) of |
| 723 | + {ok, NewChars, NewBinary, NewTail} -> |
| 724 | + feed_metadata_streaming( |
| 725 | + Encoding, Cont, NewChars, NewBinary, NewTail, Acc, Fields, State |
| 726 | + ); |
| 727 | + latin1_fallback -> |
| 728 | + latin1_fallback |
| 729 | + end; |
| 730 | + _ -> |
| 731 | + feed_metadata_streaming(Encoding, Cont, Chars, Binary, IncTail, Acc, Fields, State) |
| 732 | + end. |
| 733 | + |
| 734 | +%% @private |
| 735 | +feed_metadata_streaming(Encoding, Cont, Chars, Binary, IncTail, Acc, Fields, State) -> |
| 736 | + case mix_safe_erl_term:token(Cont, Chars) of |
| 737 | + {more, NewCont} -> |
| 738 | + decode_metadata_streaming(Encoding, Binary, IncTail, NewCont, "", Acc, Fields, State); |
| 739 | + {done, {ok, Token, _}, RestChars} -> |
| 740 | + case advance_metadata_state(State, Acc, Fields, Token) of |
| 741 | + {next, NewState, NewAcc} -> |
| 742 | + decode_metadata_streaming( |
| 743 | + Encoding, |
| 744 | + Binary, |
| 745 | + IncTail, |
| 746 | + [], |
| 747 | + normalize_rest_chars(RestChars), |
| 748 | + NewAcc, |
| 749 | + Fields, |
| 750 | + NewState |
| 751 | + ); |
| 752 | + {error, _} = Err -> |
| 753 | + Err |
| 754 | + end; |
| 755 | + {done, {eof, _}, _} -> |
| 756 | + finalize_metadata_streaming(Acc, State); |
| 757 | + {done, {error, {_, mix_safe_erl_term, Reason}, _}, _} -> |
| 758 | + {error, {metadata, Reason}} |
| 759 | + end. |
| 760 | + |
| 761 | +%% @private |
| 762 | +flush_metadata_streaming_eof([], Acc, _Fields, State) -> |
| 763 | + finalize_metadata_streaming(Acc, State); |
| 764 | +flush_metadata_streaming_eof(Cont, Acc, Fields, State) -> |
| 765 | + case mix_safe_erl_term:token(Cont, eof) of |
| 766 | + {done, {ok, Token, _}, _} -> |
| 767 | + case advance_metadata_state(State, Acc, Fields, Token) of |
| 768 | + {next, NewState, NewAcc} -> |
| 769 | + flush_metadata_streaming_eof([], NewAcc, Fields, NewState); |
| 770 | + {error, _} = Err -> |
| 771 | + Err |
| 772 | + end; |
| 773 | + {done, {eof, _}, _} -> |
| 774 | + finalize_metadata_streaming(Acc, State); |
| 775 | + {done, {error, {_, mix_safe_erl_term, Reason}, _}, _} -> |
| 776 | + {error, {metadata, Reason}} |
| 777 | + end. |
| 778 | + |
| 779 | +%% @private |
| 780 | +finalize_metadata_streaming(Acc, start) -> |
| 781 | + finalize_metadata(Acc); |
| 782 | +finalize_metadata_streaming([], between) -> |
| 783 | + #{}; |
| 784 | +finalize_metadata_streaming(Acc, between) -> |
| 785 | + finalize_metadata(Acc); |
| 786 | +finalize_metadata_streaming(_Acc, _State) -> |
| 787 | + {error, {metadata, invalid_terms}}. |
| 788 | + |
| 789 | +%% @private |
| 790 | +%% State machine for streaming the metadata.config schema. Forms are required |
| 791 | +%% to be `{<<"key">>, value}.` — anything else is rejected as invalid. |
| 792 | +%% |
| 793 | +%% States: start | between | {after_open, Prefix} | {after_left_binary, Prefix} |
| 794 | +%% | {after_key, KeyChars, Prefix} | {after_right_binary, KeyChars, Prefix} |
| 795 | +%% | {accumulate, Prefix, Depth} | {skip, Depth} |
| 796 | +%% |
| 797 | +%% `start` is the initial position; `between` is the position after a form has |
| 798 | +%% been completed. Distinguishing them lets empty input return the same |
| 799 | +%% invalid_terms error as the non-streaming path while a stream that |
| 800 | +%% successfully skipped every form returns an empty map. |
| 801 | +advance_metadata_state(Open, Acc, _Fields, {'{', _} = T) when Open =:= start; Open =:= between -> |
| 802 | + {next, {after_open, [T]}, Acc}; |
| 803 | +advance_metadata_state({after_open, Prefix}, Acc, _Fields, {'<<', _} = T) -> |
| 804 | + {next, {after_left_binary, [T | Prefix]}, Acc}; |
| 805 | +advance_metadata_state({after_left_binary, Prefix}, Acc, _Fields, {string, _, KeyChars} = T) -> |
| 806 | + {next, {after_key, KeyChars, [T | Prefix]}, Acc}; |
| 807 | +advance_metadata_state({after_key, KeyChars, Prefix}, Acc, _Fields, {'>>', _} = T) -> |
| 808 | + {next, {after_right_binary, KeyChars, [T | Prefix]}, Acc}; |
| 809 | +advance_metadata_state({after_right_binary, KeyChars, Prefix}, Acc, Fields, {',', _} = T) -> |
| 810 | + case extract_metadata_key(KeyChars) of |
| 811 | + {ok, Key} -> |
| 812 | + case lists:member(Key, Fields) of |
| 813 | + true -> {next, {accumulate, [T | Prefix], 1}, Acc}; |
| 814 | + false -> {next, {skip, 1}, Acc} |
| 815 | + end; |
| 816 | + error -> |
| 817 | + {error, {metadata, not_key_value}} |
| 818 | + end; |
| 819 | +advance_metadata_state({accumulate, Prefix, 0}, Acc, _Fields, {dot, _} = T) -> |
| 820 | + Tokens = lists:reverse([T | Prefix]), |
| 821 | + case parse_metadata_term(Tokens) of |
| 822 | + {ok, Term} -> {next, between, [Term | Acc]}; |
| 823 | + {error, _} = Err -> Err |
| 824 | + end; |
| 825 | +advance_metadata_state({accumulate, _, _}, _Acc, _Fields, {dot, _}) -> |
| 826 | + {error, {metadata, invalid_terms}}; |
| 827 | +advance_metadata_state({accumulate, Prefix, Depth}, Acc, _Fields, {Open, _} = T) when |
| 828 | + Open =:= '{'; Open =:= '[' |
| 829 | +-> |
| 830 | + {next, {accumulate, [T | Prefix], Depth + 1}, Acc}; |
| 831 | +advance_metadata_state({accumulate, Prefix, Depth}, Acc, _Fields, {Close, _} = T) when |
| 832 | + Close =:= '}'; Close =:= ']' |
| 833 | +-> |
| 834 | + {next, {accumulate, [T | Prefix], Depth - 1}, Acc}; |
| 835 | +advance_metadata_state({accumulate, Prefix, Depth}, Acc, _Fields, T) -> |
| 836 | + {next, {accumulate, [T | Prefix], Depth}, Acc}; |
| 837 | +advance_metadata_state({skip, 0}, Acc, _Fields, {dot, _}) -> |
| 838 | + {next, between, Acc}; |
| 839 | +advance_metadata_state({skip, _}, _Acc, _Fields, {dot, _}) -> |
| 840 | + {error, {metadata, invalid_terms}}; |
| 841 | +advance_metadata_state({skip, Depth}, Acc, _Fields, {Open, _}) when |
| 842 | + Open =:= '{'; Open =:= '[' |
| 843 | +-> |
| 844 | + {next, {skip, Depth + 1}, Acc}; |
| 845 | +advance_metadata_state({skip, Depth}, Acc, _Fields, {Close, _}) when |
| 846 | + Close =:= '}'; Close =:= ']' |
| 847 | +-> |
| 848 | + {next, {skip, Depth - 1}, Acc}; |
| 849 | +advance_metadata_state({skip, Depth}, Acc, _Fields, _Token) -> |
| 850 | + {next, {skip, Depth}, Acc}; |
| 851 | +advance_metadata_state(_State, _Acc, _Fields, _Token) -> |
| 852 | + {error, {metadata, not_key_value}}. |
| 853 | + |
| 854 | +%% @private |
| 855 | +extract_metadata_key(KeyChars) -> |
| 856 | + try list_to_binary(KeyChars) of |
| 857 | + Key -> {ok, Key} |
| 858 | + catch |
| 859 | + error:badarg -> error |
606 | 860 | end. |
607 | 861 |
|
608 | 862 | %% @private |
@@ -663,14 +917,18 @@ validate_create_files([File | Rest], FilesRoot, Acc) -> |
663 | 917 | {error, _} = Error -> Error |
664 | 918 | end. |
665 | 919 |
|
666 | | -validate_create_file({Filename, Contents}, _FilesRoot) when is_list(Filename), is_binary(Contents) -> |
| 920 | +validate_create_file({Filename, Contents}, _FilesRoot) when |
| 921 | + is_list(Filename), is_binary(Contents) |
| 922 | +-> |
667 | 923 | case validate_archive_path(Filename) of |
668 | 924 | ok -> {ok, {Filename, Contents}}; |
669 | 925 | {error, _} = Error -> Error |
670 | 926 | end; |
671 | 927 | validate_create_file(Filename, FilesRoot) when is_list(Filename) -> |
672 | 928 | validate_create_file({Filename, Filename}, FilesRoot); |
673 | | -validate_create_file({Filename, AbsFilename}, FilesRoot) when is_list(Filename), is_list(AbsFilename) -> |
| 929 | +validate_create_file({Filename, AbsFilename}, FilesRoot) when |
| 930 | + is_list(Filename), is_list(AbsFilename) |
| 931 | +-> |
674 | 932 | case validate_archive_path(Filename) of |
675 | 933 | ok -> validate_source_file(Filename, AbsFilename, FilesRoot); |
676 | 934 | {error, _} = Error -> Error |
@@ -709,7 +967,8 @@ validate_source_file_root(ArchiveName, SourcePath, FilesRoot) -> |
709 | 967 | {ok, LinkTarget} = file:read_link(DiskPath), |
710 | 968 | ResolvedTarget = archive_join(archive_dirname(ArchiveName), LinkTarget), |
711 | 969 | case safe_relative_archive_path(ResolvedTarget) of |
712 | | - false -> {error, {tarball, {unsafe_symlink, ArchiveName, LinkTarget}}}; |
| 970 | + false -> |
| 971 | + {error, {tarball, {unsafe_symlink, ArchiveName, LinkTarget}}}; |
713 | 972 | true -> |
714 | 973 | case validate_source_root(ArchiveName, SourcePath, Root) of |
715 | 974 | ok -> {ok, {ArchiveName, DiskPath}}; |
|
0 commit comments