Skip to content

Commit

Permalink
Lift restrictions for matching of binaries and maps
Browse files Browse the repository at this point in the history
There has always been an implementation limitation for matching
of binaries (for technical reasons). For example:

    foo(Bin) ->
        <<A:8>> = <<X:4,Y:4>> = Bin,
        {A,X,Y}.

This would fail to compile with the following message:

    t.erl:5:5: binary patterns cannot be matched in parallel using '='
    %    5|     <<A:8>> = <<X:4,Y:4>> = Bin,
    %     |     ^

This commit lifts this restriction, making the example legal.

A restriction for map matching is also lifted, but before we can
describe that, we'll need a digression to talk about the `=` operator.

The `=` operator can be used for two similar but slightly differently
purposes.

When used in a pattern in a clause, for example in a function head,
both the left-hand and right-hand side operands must be patterns:

    Pattern1 = Pattern2

For example:

    bar(#{a := A} = #{b := B}) -> {A, B}.

The following example will not compile because the right-hand side
is not a pattern but an expression:

    wrong(#{a := A} = #{b => B}) -> {A, B}.

    t.erl:4:23: illegal pattern
    %    4| wrong(#{a := A} = #{b => B}) -> {A, B}.
    %     |                       ^

Used in this context, the `=` operator does not imply that the two
patterns are matched in any particular order. Attempting to use a
variable matched out on the left-hand side on the right-hand side, or
vice versa, will fail:

    also_wrong1(#{B := A} = #{b := B}) -> {A,B}.
    also_wrong2(#{a := A} = #{A := B}) -> {A,B}.

    t.erl:6:15: variable 'B' is unbound
    %    6| also_wrong1(#{B := A} = #{b := B}) -> {A,B}.
    %     |               ^

    t.erl:7:27: variable 'A' is unbound
    %    7| also_wrong2(#{a := A} = #{A := B}) -> {A,B}.
    %     |                           ^

The other way to use `=` is in a function body. Used in this way,
the right-hand side must be an expression:

    Pattern = Expression

For example:

    foobar(Value) ->
        #{a := A} = #{a => Value},
        A.

Used in this context, the right-hand side of `=` must **not** be a pattern:

    illegal_foobar(Value) ->
        #{a := A} = #{a := Value},
        A.

    t.erl:18:21: only association operators '=>' are allowed in map construction
    %   18|     #{a := A} = #{a := Value},
    %     |                     ^

When used in a body context, the value of the `=` operator is the
value of its right-hand side operand. When multiple `=` operators are
combined, they are evaluted from right to left. That means that any
number of patterns can be matched at once:

    Pattern1 = Pattern2 = ... = PatternN = Expr

which is equivalent to:

    Var = Expr
    PatternN = Var
       .
       .
       .
    Pattern2 = Var
    Pattern1 = Var

Given that there is a well-defined evaluation order from right to
left, one would expect that the following example would be legal:

    baz(M) ->
        #{K := V} = #{k := K} = M,
        V.

It is not. In Erlang/OTP 25 or earlier, the compilation fails with the
following message:

    t.erl:28:7: variable 'K' is unbound
    %   28|     #{K := V} = #{k := K} = M,
    %     |       ^

That restriction is now lifted, making the example legal.

Closes erlang#6348
Closes erlang#6444
Closes erlang#6467
  • Loading branch information
bjorng committed Nov 21, 2022
1 parent 058e356 commit 2ffc98f
Show file tree
Hide file tree
Showing 10 changed files with 593 additions and 201 deletions.
27 changes: 19 additions & 8 deletions lib/compiler/src/beam_ssa_opt.erl
Original file line number Diff line number Diff line change
Expand Up @@ -1940,14 +1940,15 @@ coalesce_skips_is(_, _, _) ->
%%% Short-cutting binary matching instructions.
%%%

ssa_opt_bsm_shortcut({#opt_st{ssa=Linear}=St, FuncDb}) ->
Positions = bsm_positions(Linear, #{}),
ssa_opt_bsm_shortcut({#opt_st{ssa=Linear0}=St, FuncDb}) ->
Positions = bsm_positions(Linear0, #{}),
case map_size(Positions) of
0 ->
%% No binary matching instructions.
{St, FuncDb};
_ ->
{St#opt_st{ssa=bsm_shortcut(Linear, Positions)}, FuncDb}
Linear = bsm_shortcut(Linear0, Positions),
ssa_opt_live({St#opt_st{ssa=Linear}, FuncDb})
end.

bsm_positions([{L,#b_blk{is=Is,last=Last}}|Bs], PosMap0) ->
Expand Down Expand Up @@ -1988,20 +1989,30 @@ bsm_update_bits([_,_,_,#b_literal{val=Sz},#b_literal{val=U}], Bits)
Bits + Sz*U;
bsm_update_bits(_, Bits) -> Bits.

bsm_shortcut([{L,#b_blk{is=Is,last=Last0}=Blk}|Bs], PosMap) ->
bsm_shortcut([{L,#b_blk{is=Is,last=Last0}=Blk}|Bs], PosMap0) ->
case {Is,Last0} of
{[#b_set{op=bs_match,dst=New,args=[_,Old|_]},
#b_set{op={succeeded,guard},dst=Bool,args=[New]}],
#b_br{bool=Bool,fail=Fail}} ->
case PosMap of
#{Old:=Bits,Fail:={TailBits,NextFail}} when Bits > TailBits ->
case PosMap0 of
#{Old := Bits,Fail := {TailBits,NextFail}} when Bits > TailBits ->
Last = Last0#b_br{fail=NextFail},
[{L,Blk#b_blk{last=Last}}|bsm_shortcut(Bs, PosMap)];
[{L,Blk#b_blk{last=Last}}|bsm_shortcut(Bs, PosMap0)];
#{} ->
[{L,Blk}|bsm_shortcut(Bs, PosMap0)]
end;
{[#b_set{op=bs_test_tail,dst=Bool,args=[Old,#b_literal{val=TailBits}]}],
#b_br{bool=Bool,succ=Succ}} ->
case PosMap0 of
#{{bs_test_tail,Old,L} := TailBits} ->
Last = beam_ssa:normalize(Last0#b_br{fail=Succ}),
[{L,Blk#b_blk{last=Last}}|bsm_shortcut(Bs, PosMap0)];
#{} ->
PosMap = PosMap0#{{bs_test_tail,Old,Succ} => TailBits},
[{L,Blk}|bsm_shortcut(Bs, PosMap)]
end;
{_,_} ->
[{L,Blk}|bsm_shortcut(Bs, PosMap)]
[{L,Blk}|bsm_shortcut(Bs, PosMap0)]
end;
bsm_shortcut([], _PosMap) -> [].

Expand Down
113 changes: 96 additions & 17 deletions lib/compiler/src/v3_core.erl
Original file line number Diff line number Diff line change
Expand Up @@ -1050,6 +1050,8 @@ letify_aliases(P, E) ->

sanitize({match,L,P1,P2}) ->
{tuple,L,[sanitize(P1),sanitize(P2)]};
sanitize({sequential_match,L,P1,P2}) ->
{tuple,L,[sanitize(P1),sanitize(P2)]};
sanitize({cons,L,H,T}) ->
{cons,L,sanitize(H),sanitize(T)};
sanitize({tuple,L,Ps0}) ->
Expand Down Expand Up @@ -2025,10 +2027,10 @@ is_safe(_) -> false.
%% fold_match(MatchExpr, Pat) -> {MatchPat,Expr}.
%% Fold nested matches into one match with aliased patterns.

fold_match({match,L,P0,E0}, P) ->
{P1,E1} = fold_match(E0, P),
{{match,L,P0,P1},E1};
fold_match(E, P) -> {P,E}.
fold_match({match, L, P, E}, E0) ->
fold_match(E, {sequential_match, L, P, E0});
fold_match(E, E0) ->
{E0, E}.

%% pattern(Pattern, State) -> {CorePat,[PreExp],State}.
%% Transform a pattern by removing line numbers. We also normalise
Expand All @@ -2055,9 +2057,56 @@ pattern({bin,L,Ps}, St0) ->
{Segments,St} = pat_bin(Ps, St0),
{#ibinary{anno=#a{anno=lineno_anno(L, St)},segments=Segments},St};
pattern({match,_,P1,P2}, St) ->
%% Handle aliased patterns in a clause. Example:
%%
%% f({a,b} = {A,B}) -> . . .
%%
%% The `=` operator does not have any defined order in which the
%% two patterns are matched. Therefore, this example can safely be
%% rewritten like so:
%%
%% f({a=A,b=B}) -> . . .
%%
%% Aliased patterns that are illegal, such as:
%%
%% f(#{Key := Value} = {key := Key}) -> . . .
%%
%% have already been rejected by erl_lint.
%%
{Cp1,St1} = pattern(P1, St),
{Cp2,St2} = pattern(P2, St1),
{pat_alias(Cp1, Cp2),St2};
pattern({sequential_match,_,P1,P2}, St) ->
%% Handle sequential matching in a function body. Example:
%%
%% f(Map) ->
%% #{Key := Value} = {key := Key} = Map,
%% Value.
%%
%% In a function body, the patterns are matched one at a time to
%% the expression, going from right to left, making the example
%% equivalent to:
%%
%% f(Map) ->
%% {key := Key} = Map,
%% #{Key := Value} = Map,
%% Value.
%%
{Cp1,St1} = pattern(P1, St),
{Cp2,St2} = pattern(P2, St1),

case Cp2 of
#c_cons{anno=[sequential_match]} ->
ok;
_ ->
%% Reject pattern aliases that obviously cannot match.
_ = pat_alias(Cp1, Cp2),
ok
end,

%% Set up sequential matching of P1 and P2.
P = #c_cons{anno=[sequential_match],hd=Cp1,tl=Cp2},
{P,St2};
%% Evaluate compile-time expressions.
pattern({op,_,'++',{nil,_},R}, St) ->
pattern(R, St);
Expand Down Expand Up @@ -2199,9 +2248,17 @@ pat_alias(P1, #c_var{}=Var) ->
pat_alias(P1, #c_alias{pat=P2}=Alias) ->
Alias#c_alias{pat=pat_alias(P1, P2)};

pat_alias(#ibinary{segments=[]}=P, #ibinary{segments=[]}) ->
P;
pat_alias(#ibinary{segments=[_|_]=Segs1}=P, #ibinary{segments=[S0|Segs2]}) ->
%% Handle aliases of binary patterns in a clause. Example:
%% f(<<A:8,B:8>> = <<C:16>>) -> . . .
#ibitstr{anno=#a{anno=Anno}=A} = S0,
S = S0#ibitstr{anno=A#a{anno=[sequential_match|Anno]}},
P#ibinary{segments=Segs1++[S|Segs2]};

pat_alias(P1, P2) ->
%% Aliases between binaries are not allowed, so the only
%% legal patterns that remain are data patterns.
%% The only legal patterns that remain are data patterns.
case cerl:is_data(P1) andalso cerl:is_data(P2) of
false -> throw(nomatch);
true -> ok
Expand Down Expand Up @@ -2803,9 +2860,9 @@ uexpr_list(Les0, Ks, St0) ->
%% upattern(Pat, [KnownVar], State) ->
%% {Pat,[GuardTest],[NewVar],[UsedVar],State}.

upattern(#c_var{name='_'}, _, St0) ->
upattern(#c_var{anno=Anno,name='_'}, _, St0) ->
{New,St1} = new_var_name(St0),
{#c_var{name=New},[],[New],[],St1};
{#c_var{anno=Anno,name=New},[],[New],[],St1};
upattern(#c_var{name=V}=Var, Ks, St0) ->
case is_element(V, known_get(Ks)) of
true ->
Expand Down Expand Up @@ -3008,9 +3065,10 @@ ren_pat(#ibinary{segments=Es0}=P, Ks, {Isub,Osub0}, St0) ->
{Es,_Isub,Osub,St} = ren_pat_bin(Es0, Ks, Isub, Osub0, St0),
{P#ibinary{segments=Es},{Isub,Osub},St};
ren_pat(P, Ks0, {_,_}=Subs0, St0) ->
Anno = cerl:get_ann(P),
Es0 = cerl:data_es(P),
{Es,Subs,St} = ren_pats(Es0, Ks0, Subs0, St0),
{cerl:make_data(cerl:data_type(P), Es),Subs,St}.
{cerl:ann_make_data(Anno, cerl:data_type(P), Es),Subs,St}.

ren_pat_bin([#ibitstr{val=Val0,size=Sz0}=E|Es0], Ks, Isub0, Osub0, St0) ->
Sz = ren_get_subst(Sz0, Isub0),
Expand Down Expand Up @@ -3635,9 +3693,14 @@ split_pat(#c_binary{segments=Segs0}=Bin, St0) ->
case split_bin_segments(Segs0, Vars, St0, []) of
none ->
none;
{TailVar,Wrap,Bef,Aft,St} ->
{size_var,TailVar,Wrap,Bef,Aft,St} ->
BefBin = Bin#c_binary{segments=Bef},
{BefBin,{split,[TailVar],Wrap,Bin#c_binary{segments=Aft},nil},St}
{BefBin,{split,[TailVar],Wrap,Bin#c_binary{segments=Aft},nil},St};
{sequential_match,Bef,Aft,St1} ->
{BinVar,St} = new_var(St1),
BefBin = #c_alias{var=BinVar,pat=Bin#c_binary{segments=Bef}},
Wrap = fun(Body) -> Body end,
{BefBin,{split,[BinVar],Wrap,Bin#c_binary{segments=Aft},nil},St}
end;
split_pat(#c_map{es=Es}=Map, St) ->
split_map_pat(Es, Map, St, []);
Expand All @@ -3652,6 +3715,12 @@ split_pat(#c_alias{pat=Pat}=Alias0, St0) ->
Alias = Alias0#c_alias{pat=Var},
{Alias,{split,[Var],Ps,Split},St}
end;
split_pat(#c_cons{anno=[sequential_match],hd=Cons1,tl=Cons2}, St0) ->
%% Handle sequential matching of all types of patterns.
{Var,St} = new_var(St0),
BefCons = #c_alias{var=Var,pat=Cons1},
Wrap = fun(Body) -> Body end,
{BefCons,{split,[Var],Wrap,Cons2,nil},St};
split_pat(Data, St0) ->
Type = cerl:data_type(Data),
Es = cerl:data_es(Data),
Expand Down Expand Up @@ -3719,7 +3788,19 @@ split_data([E|Es0], Type, St0, Acc) ->
end;
split_data([], _, _, _) -> none.

split_bin_segments([#c_bitstr{val=Val,size=Size}=S0|Segs], Vars0, St0, Acc) ->
split_bin_segments([#c_bitstr{anno=Anno0}=S0|Segs], Vars, St, Acc) ->
case member(sequential_match, Anno0) of
true ->
Anno = Anno0 -- [sequential_match],
S = S0#c_bitstr{anno=Anno},
{sequential_match,reverse(Acc),[S|Segs],St};
false ->
split_bin_segments_1(S0, Segs, Vars, St, Acc)
end;
split_bin_segments(_, _, _, _) ->
none.

split_bin_segments_1(#c_bitstr{val=Val,size=Size}=S0, Segs, Vars0, St0, Acc) ->
Vars = case Val of
#c_var{name=V} -> gb_sets:add(V, Vars0);
_ -> Vars0
Expand All @@ -3736,7 +3817,7 @@ split_bin_segments([#c_bitstr{val=Val,size=Size}=S0|Segs], Vars0, St0, Acc) ->
%% in the same pattern.
{TailVar,Tail,St} = split_tail_seg(S0, Segs, St0),
Wrap = fun(Body) -> Body end,
{TailVar,Wrap,reverse(Acc, [Tail]),[S0|Segs],St};
{size_var,TailVar,Wrap,reverse(Acc, [Tail]),[S0|Segs],St};
false ->
split_bin_segments(Segs, Vars, St0, [S0|Acc])
end;
Expand All @@ -3748,10 +3829,8 @@ split_bin_segments([#c_bitstr{val=Val,size=Size}=S0|Segs], Vars0, St0, Acc) ->
{SizeVar,St2} = new_var(St1),
S = S0#c_bitstr{size=SizeVar},
{Wrap,St3} = split_wrap(SizeVar, Size, St2),
{TailVar,Wrap,reverse(Acc, [Tail]),[S|Segs],St3}
end;
split_bin_segments(_, _, _, _) ->
none.
{size_var,TailVar,Wrap,reverse(Acc, [Tail]),[S|Segs],St3}
end.

split_tail_seg(#c_bitstr{anno=A}=S, Segs, St0) ->
{TailVar,St} = new_var(St0),
Expand Down
Loading

0 comments on commit 2ffc98f

Please sign in to comment.