Skip to content

Commit

Permalink
Lift restrictions for matching of binaries and maps
Browse files Browse the repository at this point in the history
There has always been an implementation limitation for matching
of binaries (for technical reasons). For example:

    foo(Bin) ->
        <<A:8>> = <<X:4,Y:4>> = Bin,
        {A,X,Y}.

This would fail to compile with the following message:

    t.erl:5:5: binary patterns cannot be matched in parallel using '='
    %    5|     <<A:8>> = <<X:4,Y:4>> = Bin,
    %     |     ^

This commit lifts this restriction, making the example legal.

A restriction for map matching is also lifted, but before we can
describe that, we'll need a digression to talk about the `=` operator.

The `=` operator can be used for two similar but slightly differently
purposes.

When used in a pattern, for example in a function head, both the
left-hand and right-hand side operands must be patterns:

    Pattern1 = Pattern2

For example:

    bar(#{a := A} = #{b := B}) -> {A, B}.

The following example will not compile because the right-hand side
is not a pattern but an expression:

    wrong(#{a := A} = #{b => B}) -> {A, B}.

    t.erl:4:23: illegal pattern
    %    4| wrong(#{a := A} = #{b => B}) -> {A, B}.
    %     |                       ^

Used in this context, the `=` operator does not imply that the two
patterns are matched in any particular order. Attempting to use a
variable matched out on the left-hand side on the right-hand side, or
vice versa, will fail:

    also_wrong1(#{B := A} = #{b := B}) -> {A,B}.
    also_wrong2(#{a := A} = #{A := B}) -> {A,B}.

    t.erl:6:15: variable 'B' is unbound
    %    6| also_wrong1(#{B := A} = #{b := B}) -> {A,B}.
    %     |               ^

    t.erl:7:27: variable 'A' is unbound
    %    7| also_wrong2(#{a := A} = #{A := B}) -> {A,B}.
    %     |                           ^

The other way to use `=` is in a function body. Used in this way,
the right-hand side must be an expression:

    Pattern = Expression

For example:

    foobar(Value) ->
        #{a := A} = #{a => Value},
        A.

Used in this context, the right-hand side of `=` must **not** be a pattern:

    illegal_foobar(Value) ->
        #{a := A} = #{a := Value},
        A.

    t.erl:18:21: only association operators '=>' are allowed in map construction
    %   18|     #{a := A} = #{a := Value},
    %     |                     ^

When used in a body context, the value of the `=` operator is the
value of its right-hand side operand. When multiple `=` operators are
combined, they are evaluted from right to left. That means that any
number of patterns can be matched at once:

    Pattern1 = Pattern2 = ... = PatternN = Expr

Given that there is a well-defined evaluation order, one would expect
that the following example would be legal:

    baz(M) ->
        #{K := V} = #{k := K} = M,
        V.

It is not. In Erlang/OTP 25 or earlier, the compilation fails with the
following message:

    t.erl:28:7: variable 'K' is unbound
    %   28|     #{K := V} = #{k := K} = M,
    %     |       ^

That restriction is now lifted, making the example legal.

Closes erlang#6348
  • Loading branch information
bjorng committed Nov 10, 2022
1 parent eb2dc69 commit 01fb1f2
Show file tree
Hide file tree
Showing 9 changed files with 509 additions and 193 deletions.
115 changes: 98 additions & 17 deletions lib/compiler/src/v3_core.erl
Original file line number Diff line number Diff line change
Expand Up @@ -1050,6 +1050,8 @@ letify_aliases(P, E) ->

sanitize({match,L,P1,P2}) ->
{tuple,L,[sanitize(P1),sanitize(P2)]};
sanitize({sequential_match,L,P1,P2}) ->
{tuple,L,[sanitize(P1),sanitize(P2)]};
sanitize({cons,L,H,T}) ->
{cons,L,sanitize(H),sanitize(T)};
sanitize({tuple,L,Ps0}) ->
Expand Down Expand Up @@ -2025,10 +2027,10 @@ is_safe(_) -> false.
%% fold_match(MatchExpr, Pat) -> {MatchPat,Expr}.
%% Fold nested matches into one match with aliased patterns.

fold_match({match,L,P0,E0}, P) ->
{P1,E1} = fold_match(E0, P),
{{match,L,P0,P1},E1};
fold_match(E, P) -> {P,E}.
fold_match({match, L, P, E}, E0) ->
fold_match(E, {sequential_match, L, P, E0});
fold_match(E, E0) ->
{E0, E}.

%% pattern(Pattern, State) -> {CorePat,[PreExp],State}.
%% Transform a pattern by removing line numbers. We also normalise
Expand All @@ -2055,9 +2057,58 @@ pattern({bin,L,Ps}, St0) ->
{Segments,St} = pat_bin(Ps, St0),
{#ibinary{anno=#a{anno=lineno_anno(L, St)},segments=Segments},St};
pattern({match,_,P1,P2}, St) ->
%% Handle aliased patterns in a clause. Example:
%%
%% f({a,b} = {A,B}) -> . . .
%%
%% The `=` operator does not have any defined order in which the
%% two patterns are matched. Therefore, this example can safely be
%% rewritten like so:
%%
%% f({a=A,b=B}) -> . . .
%%
%% Aliased patterns that are illegal, such as:
%%
%% f(#{Key := Value} = {key := Key}) -> . . .
%%
%% have already been rejected by erl_lint.
%%
{Cp1,St1} = pattern(P1, St),
{Cp2,St2} = pattern(P2, St1),
{pat_alias(Cp1, Cp2),St2};
pattern({sequential_match,_,P1,P2}, St) ->
%% Handle sequential matching in a function body. Example:
%%
%% f(Map) ->
%% #{Key := Value} = {key := Key} = Map,
%% Map.
%%
%% In a function body, the patterns are matched one at a time to
%% the expression, going from right to left.
%%
{Cp1,St1} = pattern(P1, St),
{Cp2,St2} = pattern(P2, St1),

P = case {Cp1,Cp2} of
{#c_var{},_} ->
pat_alias(Cp1, Cp2);
{_,#c_var{}} ->
pat_alias(Cp1, Cp2);
{_,_} ->
case Cp2 of
#c_cons{anno=[sequential_match]} ->
ok;
_ ->
%% Reject pattern aliases that obviously cannot match.
_ = pat_alias(Cp1, Cp2),
ok
end,

%% Set up sequential matching of P1 and P2.
#c_cons{anno=[sequential_match],hd=Cp1,tl=Cp2}
end,

{P,St2};
%% Evaluate compile-time expressions.
pattern({op,_,'++',{nil,_},R}, St) ->
pattern(R, St);
Expand Down Expand Up @@ -2199,9 +2250,17 @@ pat_alias(P1, #c_var{}=Var) ->
pat_alias(P1, #c_alias{pat=P2}=Alias) ->
Alias#c_alias{pat=pat_alias(P1, P2)};

pat_alias(#ibinary{segments=[]}=P, #ibinary{segments=[]}) ->
P;
pat_alias(#ibinary{segments=[_|_]=Segs1}=P, #ibinary{segments=[S0|Segs2]}) ->
%% Handle aliases of binary patterns in a clause. Example:
%% f(<<A:8,B:8>> = <<C:16>>) -> . . .
#ibitstr{anno=#a{anno=Anno}=A} = S0,
S = S0#ibitstr{anno=A#a{anno=[sequential_match|Anno]}},
P#ibinary{segments=Segs1++[S|Segs2]};

pat_alias(P1, P2) ->
%% Aliases between binaries are not allowed, so the only
%% legal patterns that remain are data patterns.
%% The only legal patterns that remain are data patterns.
case cerl:is_data(P1) andalso cerl:is_data(P2) of
false -> throw(nomatch);
true -> ok
Expand Down Expand Up @@ -2803,9 +2862,9 @@ uexpr_list(Les0, Ks, St0) ->
%% upattern(Pat, [KnownVar], State) ->
%% {Pat,[GuardTest],[NewVar],[UsedVar],State}.

upattern(#c_var{name='_'}, _, St0) ->
upattern(#c_var{anno=Anno,name='_'}, _, St0) ->
{New,St1} = new_var_name(St0),
{#c_var{name=New},[],[New],[],St1};
{#c_var{anno=Anno,name=New},[],[New],[],St1};
upattern(#c_var{name=V}=Var, Ks, St0) ->
case is_element(V, known_get(Ks)) of
true ->
Expand Down Expand Up @@ -3008,9 +3067,10 @@ ren_pat(#ibinary{segments=Es0}=P, Ks, {Isub,Osub0}, St0) ->
{Es,_Isub,Osub,St} = ren_pat_bin(Es0, Ks, Isub, Osub0, St0),
{P#ibinary{segments=Es},{Isub,Osub},St};
ren_pat(P, Ks0, {_,_}=Subs0, St0) ->
Anno = cerl:get_ann(P),
Es0 = cerl:data_es(P),
{Es,Subs,St} = ren_pats(Es0, Ks0, Subs0, St0),
{cerl:make_data(cerl:data_type(P), Es),Subs,St}.
{cerl:ann_make_data(Anno, cerl:data_type(P), Es),Subs,St}.

ren_pat_bin([#ibitstr{val=Val0,size=Sz0}=E|Es0], Ks, Isub0, Osub0, St0) ->
Sz = ren_get_subst(Sz0, Isub0),
Expand Down Expand Up @@ -3635,9 +3695,14 @@ split_pat(#c_binary{segments=Segs0}=Bin, St0) ->
case split_bin_segments(Segs0, Vars, St0, []) of
none ->
none;
{TailVar,Wrap,Bef,Aft,St} ->
{size_var,TailVar,Wrap,Bef,Aft,St} ->
BefBin = Bin#c_binary{segments=Bef},
{BefBin,{split,[TailVar],Wrap,Bin#c_binary{segments=Aft},nil},St}
{BefBin,{split,[TailVar],Wrap,Bin#c_binary{segments=Aft},nil},St};
{sequential_match,Bef,Aft,St1} ->
{BinVar,St} = new_var(St1),
BefBin = #c_alias{var=BinVar,pat=Bin#c_binary{segments=Bef}},
Wrap = fun(Body) -> Body end,
{BefBin,{split,[BinVar],Wrap,Bin#c_binary{segments=Aft},nil},St}
end;
split_pat(#c_map{es=Es}=Map, St) ->
split_map_pat(Es, Map, St, []);
Expand All @@ -3652,6 +3717,12 @@ split_pat(#c_alias{pat=Pat}=Alias0, St0) ->
Alias = Alias0#c_alias{pat=Var},
{Alias,{split,[Var],Ps,Split},St}
end;
split_pat(#c_cons{anno=[sequential_match],hd=Cons1,tl=Cons2}, St0) ->
%% Handle sequential matching of all types of patterns.
{Var,St} = new_var(St0),
BefCons = #c_alias{var=Var,pat=Cons1},
Wrap = fun(Body) -> Body end,
{BefCons,{split,[Var],Wrap,Cons2,nil},St};
split_pat(Data, St0) ->
Type = cerl:data_type(Data),
Es = cerl:data_es(Data),
Expand Down Expand Up @@ -3719,7 +3790,19 @@ split_data([E|Es0], Type, St0, Acc) ->
end;
split_data([], _, _, _) -> none.

split_bin_segments([#c_bitstr{val=Val,size=Size}=S0|Segs], Vars0, St0, Acc) ->
split_bin_segments([#c_bitstr{anno=Anno0}=S0|Segs], Vars, St, Acc) ->
case member(sequential_match, Anno0) of
true ->
Anno = Anno0 -- [sequential_match],
S = S0#c_bitstr{anno=Anno},
{sequential_match,reverse(Acc),[S|Segs],St};
false ->
split_bin_segments_1(S0, Segs, Vars, St, Acc)
end;
split_bin_segments(_, _, _, _) ->
none.

split_bin_segments_1(#c_bitstr{val=Val,size=Size}=S0, Segs, Vars0, St0, Acc) ->
Vars = case Val of
#c_var{name=V} -> gb_sets:add(V, Vars0);
_ -> Vars0
Expand All @@ -3736,7 +3819,7 @@ split_bin_segments([#c_bitstr{val=Val,size=Size}=S0|Segs], Vars0, St0, Acc) ->
%% in the same pattern.
{TailVar,Tail,St} = split_tail_seg(S0, Segs, St0),
Wrap = fun(Body) -> Body end,
{TailVar,Wrap,reverse(Acc, [Tail]),[S0|Segs],St};
{size_var,TailVar,Wrap,reverse(Acc, [Tail]),[S0|Segs],St};
false ->
split_bin_segments(Segs, Vars, St0, [S0|Acc])
end;
Expand All @@ -3748,10 +3831,8 @@ split_bin_segments([#c_bitstr{val=Val,size=Size}=S0|Segs], Vars0, St0, Acc) ->
{SizeVar,St2} = new_var(St1),
S = S0#c_bitstr{size=SizeVar},
{Wrap,St3} = split_wrap(SizeVar, Size, St2),
{TailVar,Wrap,reverse(Acc, [Tail]),[S|Segs],St3}
end;
split_bin_segments(_, _, _, _) ->
none.
{size_var,TailVar,Wrap,reverse(Acc, [Tail]),[S|Segs],St3}
end.

split_tail_seg(#c_bitstr{anno=A}=S, Segs, St0) ->
{TailVar,St} = new_var(St0),
Expand Down
118 changes: 116 additions & 2 deletions lib/compiler/test/bs_match_SUITE.erl
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,8 @@
combine_empty_segments/1,hangs_forever/1,
bs_saved_position_units/1,empty_matches/1,
trim_bs_start_match_resume/1,
gh_6410/1]).
gh_6410/1,
binary_aliases/1]).

-export([coverage_id/1,coverage_external_ignore/2]).

Expand Down Expand Up @@ -91,7 +92,8 @@ groups() ->
many_clauses,combine_empty_segments,hangs_forever,
bs_saved_position_units,empty_matches,
trim_bs_start_match_resume,
gh_6410]}].
gh_6410,
binary_aliases]}].

init_per_suite(Config) ->
test_lib:recompile(?MODULE),
Expand Down Expand Up @@ -2670,6 +2672,118 @@ do_gh_6410(X) ->
X
end).

%% GH-6348/OTP-18297: Allow aliases for binaries.
-record(ba_foo, {a,b,c}).

binary_aliases(_Config) ->
F1 = fun(<<A:8>> = <<B:8>>) -> {A,B} end,
{42,42} = F1(id(<<42>>)),
{99,99} = F1(id(<<99>>)),

F2 = fun(#ba_foo{a = <<X:8>>} = #ba_foo{a = <<Y:8>>}) -> {X,Y} end,
{255,255} = F2(id(#ba_foo{a = <<-1>>})),
{107,107} = F2(id(#ba_foo{a = <<107>>})),

F3 = fun(#ba_foo{a = <<X:8>>} = #ba_foo{a = <<Y:4,Z:4>>}) -> {X,Y,Z} end,
{255,15,15} = F3(id(#ba_foo{a = <<-1>>})),
{16#5c,16#5,16#c} = F3(id(#ba_foo{a = <<16#5c>>})),

F4 = fun([<<A:8>> = {C,D} = <<B:8>>]) ->
{A,B,C,D};
(L) ->
lists:sum(L)
end,
6 = F4(id([1,2,3])),

F5 = fun(Val) ->
<<A:8>> = X = <<B:8>> = Val,
{A,B,X}
end,
{42,42,<<42>>} = F5(id(<<42>>)),

F6 = fun(X, Y) ->
<<A:8>> = <<X:4,Y:4>>,
A
end,
16#7c = F6(16#7, 16#c),
16#ed = F6(16#e, 16#d),

F7 = fun(Val) ->
(<<A:8>> = X) = (<<B:8>> = <<A:4,B:4>>) = Val,
{A,B,X}
end,
{0,0,<<0>>} = F7(id(<<0>>)),
{'EXIT',{{badmatch,<<1>>},_}} = catch F7(<<1>>),

F8 = fun(Val) ->
(<<A:8>> = X) = (Y = <<B:8>>) = Val,
{A,B,X,Y}
end,
{253,253,<<253>>,<<253>>} = F8(id(<<253>>)),

F9 = fun(Val) ->
(Z = <<A:8>> = X) = (Y = <<B:8>> = W) = Val,
{A,B,X,Y,Z,W}
end,
{201,201,<<201>>,<<201>>,<<201>>,<<201>>} = F9(id(<<201>>)),

F10 = fun(X) ->
<<>> = (<<>> = X)
end,
<<>> = F10(id(<<>>)),
{'EXIT',{{badmatch,42},_}} = catch F10(id(42)),

F11 = fun(Bin) ->
<<A:8/bits,B:24/bits>> = <<C:16,D:16>> = <<E:8,F:8,G:8,H:8>> = Bin,
{A,B,C,D,E,F,G,H}
end,
{<<0>>,<<0,0,0>>, 0,0, 0,0,0,0} = F11(id(<<0:32>>)),
{<<16#ab>>,<<16#cdef57:24>>, 16#abcd,16#ef57, 16#ab,16#cd,16#ef,16#57} =
F11(id(<<16#abcdef57:32>>)),

F12 = fun(#{key := <<X:8>>} = #{key := <<Y:8>>}) -> {X,Y} end,
{255,255} = F12(id(#{key => <<-1>>})),
{209,209} = F12(id(#{key => <<209>>})),

F13 = fun(Bin) ->
<<_:8,A:Size>> = <<_:8,B:Size/bits>> = <<Size:8,_/bits>> = Bin,
{Size,A,B}
end,
{0,0,<<>>} = F13(id(<<0>>)),
{1,1,<<1:1>>} = F13(id(<<1,1:1>>)),
{8,42,<<42>>} = F13(id(<<8,42>>)),

F14 = fun(Bin) ->
[<<_:Y>> | _] = [_ | Y] = id(Bin),
ok
end,
ok = F14([<<>>|0]),
ok = F14([<<-1:32>>|32]),
{'EXIT',{{badmatch,[<<0:16>>|0]},_}} = catch F14([<<0:16>>|0]),
{'EXIT',{{badmatch,[<<0:16>>|atom]},_}} = catch F14([<<0:16>>|atom]),

F15 = fun(Bin) ->
{<<_:Y>>, _} = {_, Y} = id(Bin),
Y
end,
0 = F15({<<>>, 0}),
32 = F15({<<-1:32>>, 32}),
{'EXIT',{{badmatch,{<<0:16>>,0}},_}} = catch F15({<<0:16>>, 0}),
{'EXIT',{{badmatch,{<<0:16>>,atom}},_}} = catch F15({<<0:16>>, atom}),

F16 = fun(Bin) ->
[{<<_:Y>>, _}] = [{_, Y}] = id(Bin),
Y
end,
0 = F16([{<<>>, 0}]),
32 = F16([{<<-1:32>>, 32}]),
{'EXIT',{{badmatch,[{<<0:16>>,0}]},_}} = catch F16([{<<0:16>>, 0}]),
{'EXIT',{{badmatch,[{<<0:16>>,atom}]},_}} = catch F16([{<<0:16>>, atom}]),

ok.


%%% Utilities.

id(I) -> I.

35 changes: 33 additions & 2 deletions lib/compiler/test/map_SUITE.erl
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,8 @@
%% miscellaneous
t_conflicting_destinations/1,
t_cse_assoc/1,
shared_key_tuples/1
shared_key_tuples/1,
map_aliases/1
]).

-define(badmap(V, F, Args), {'EXIT', {{badmap,V}, [{maps,F,Args,_}|_]}}).
Expand Down Expand Up @@ -163,7 +164,8 @@ all() ->
%% miscellaneous
t_conflicting_destinations,
t_cse_assoc,
shared_key_tuples
shared_key_tuples,
map_aliases
].

groups() -> [].
Expand Down Expand Up @@ -2567,6 +2569,35 @@ shared_key_tuples(_Config) ->
decimal(Int) ->
#{type => decimal, int => Int, exp => 0}.

%% GH-6348/OTP-18297: Extend parallel matching of maps.
map_aliases(_Config) ->
F1 = fun(M) ->
#{K := V} = #{k := {a,K}} = M,
V
end,
value = F1(id(#{k => {a,key}, key => value})),

F2 = fun(#{} = #{}) -> ok end,
ok = F2(id(#{})),
ok = F2(id(#{key => whatever})),

F3 = fun(#{a := V} = #{}) -> V end,
{a,b,c} = F3(id(#{a => {a,b,c}})),

F4 = fun(Map) ->
[#{Key := Value} | _] = [_ | Key] = id(Map),
Value
end,
bar = F4([#{foo => bar} | foo]),

F5 = fun(Map) ->
{#{Key := Value}, _} = {_, Key} = id(Map),
Value
end,
light = F5({#{frotz => light}, frotz}),

ok.

%% aux

rand_terms(0) -> [];
Expand Down
Loading

0 comments on commit 01fb1f2

Please sign in to comment.