Skip to content

Commit

Permalink
Lift restrictions for matching of binaries and maps
Browse files Browse the repository at this point in the history
There has always been an implementation limitation for matching
of binaries (for technical reasons). For example:

    foo(Bin) ->
        <<A:8>> = <<X:4,Y:4>> = Bin,
        {A,X,Y}.

This would fail to compile with the following message:

    t.erl:5:5: binary patterns cannot be matched in parallel using '='
    %    5|     <<A:8>> = <<X:4,Y:4>> = Bin,
    %     |     ^

This commit lifts this restriction, making the example legal.

A restriction for map matching is also lifted, but before we can
describe that, we'll need a digression to talk about the `=` operator.

The `=` operator can be used for two similar but slightly differently
purposes.

When used in a pattern in a clause, for example in a function head,
both the left-hand and right-hand side operands must be patterns:

    Pattern1 = Pattern2

For example:

    bar(#{a := A} = #{b := B}) -> {A, B}.

The following example will not compile because the right-hand side
is not a pattern but an expression:

    wrong(#{a := A} = #{b => B}) -> {A, B}.

    t.erl:4:23: illegal pattern
    %    4| wrong(#{a := A} = #{b => B}) -> {A, B}.
    %     |                       ^

Used in this context, the `=` operator does not imply that the two
patterns are matched in any particular order. Attempting to use a
variable matched out on the left-hand side on the right-hand side, or
vice versa, will fail:

    also_wrong1(#{B := A} = #{b := B}) -> {A,B}.
    also_wrong2(#{a := A} = #{A := B}) -> {A,B}.

    t.erl:6:15: variable 'B' is unbound
    %    6| also_wrong1(#{B := A} = #{b := B}) -> {A,B}.
    %     |               ^

    t.erl:7:27: variable 'A' is unbound
    %    7| also_wrong2(#{a := A} = #{A := B}) -> {A,B}.
    %     |                           ^

The other way to use `=` is in a function body. Used in this way,
the right-hand side must be an expression:

    Pattern = Expression

For example:

    foobar(Value) ->
        #{a := A} = #{a => Value},
        A.

Used in this context, the right-hand side of `=` must **not** be a pattern:

    illegal_foobar(Value) ->
        #{a := A} = #{a := Value},
        A.

    t.erl:18:21: only association operators '=>' are allowed in map construction
    %   18|     #{a := A} = #{a := Value},
    %     |                     ^

When used in a body context, the value of the `=` operator is the
value of its right-hand side operand. When multiple `=` operators are
combined, they are evaluted from right to left. That means that any
number of patterns can be matched at once:

    Pattern1 = Pattern2 = ... = PatternN = Expr

which is equivalent to:

    Var = Expr
    PatternN = Var
       .
       .
       .
    Pattern2 = Var
    Pattern1 = Var

Given that there is a well-defined evaluation order from right to
left, one would expect that the following example would be legal:

    baz(M) ->
        #{K := V} = #{k := K} = M,
        V.

It is not. In Erlang/OTP 25 or earlier, the compilation fails with the
following message:

    t.erl:28:7: variable 'K' is unbound
    %   28|     #{K := V} = #{k := K} = M,
    %     |       ^

That restriction is now lifted, making the example legal.

Closes erlang#6348
Closes erlang#6444
Closes erlang#6467
  • Loading branch information
bjorng committed Dec 13, 2022
1 parent 58b62e5 commit 6a9ebe6
Show file tree
Hide file tree
Showing 12 changed files with 1,060 additions and 321 deletions.
17 changes: 16 additions & 1 deletion lib/compiler/src/beam_ssa_codegen.erl
Original file line number Diff line number Diff line change
Expand Up @@ -2198,7 +2198,8 @@ bs_translate([I|Is0]) ->
[I|bs_translate(Is0)];
{Ctx,Fail0,First} ->
{Instrs0,Fail,Is} = bs_translate_collect(Is0, Ctx, Fail0, [First]),
Instrs = bs_eq_fixup(Instrs0),
Instrs1 = bs_seq_match_fixup(Instrs0),
Instrs = bs_eq_fixup(Instrs1),
[{bs_match,Fail,Ctx,{commands,Instrs}}|bs_translate(Is)]
end;
bs_translate([]) -> [].
Expand All @@ -2224,6 +2225,20 @@ bs_translate_fixup([{test_tail,Bits}|Is0]) ->
bs_translate_fixup(Is) ->
reverse(Is).

%% Fix up matching of multiple binaries in parallel. Example:
%% f(<<_:8>> = <<X:8>>) -> ...
bs_seq_match_fixup([{test_tail,Bits},{ensure_exactly,Bits}|Is]) ->
[{ensure_exactly,Bits}|bs_seq_match_fixup(Is)];
bs_seq_match_fixup([{test_tail,Bits0},{ensure_at_least,Bits1,Unit}|Is])
when Bits0 >= Bits1, Bits0 rem Unit =:= 0 ->
%% The tail test is at least as strict as the ensure_at_least test.
[{ensure_exactly,Bits0}|bs_seq_match_fixup(Is)];
bs_seq_match_fixup([{test_tail,Bits}|Is]) ->
[{ensure_exactly,Bits}|bs_seq_match_fixup(Is)];
bs_seq_match_fixup([I|Is]) ->
[I|bs_seq_match_fixup(Is)];
bs_seq_match_fixup([]) -> [].

bs_eq_fixup([{'=:=',nil,Bits,Value}|Is]) ->
EqInstrs = bs_eq_fixup_split(Bits, <<Value:Bits>>),
EqInstrs ++ bs_eq_fixup(Is);
Expand Down
95 changes: 64 additions & 31 deletions lib/compiler/src/beam_ssa_opt.erl
Original file line number Diff line number Diff line change
Expand Up @@ -1834,15 +1834,17 @@ trim_try_is([], _Killed) ->
%%% with bs_test_tail.
%%%

ssa_opt_bsm({#opt_st{ssa=Linear}=St, FuncDb}) ->
Extracted0 = bsm_extracted(Linear),
ssa_opt_bsm({#opt_st{ssa=Linear0}=St, FuncDb}) ->
Extracted0 = bsm_extracted(Linear0),
Extracted = sets:from_list(Extracted0, [{version, 2}]),
{St#opt_st{ssa=bsm_skip(Linear, Extracted)}, FuncDb}.
Linear1 = bsm_skip(Linear0, Extracted),
Linear = bsm_coalesce_skips(Linear1, #{}),
{St#opt_st{ssa=Linear}, FuncDb}.

bsm_skip([{L,#b_blk{is=Is0}=Blk}|Bs0], Extracted) ->
Bs = bsm_skip(Bs0, Extracted),
Is = bsm_skip_is(Is0, Extracted),
coalesce_skips({L,Blk#b_blk{is=Is}}, Bs);
[{L,Blk#b_blk{is=Is}}|Bs];
bsm_skip([], _) -> [].

bsm_skip_is([I0|Is], Extracted) ->
Expand Down Expand Up @@ -1878,76 +1880,91 @@ bsm_extracted([{_,#b_blk{is=Is}}|Bs]) ->
end;
bsm_extracted([]) -> [].

bsm_coalesce_skips([{L,Blk0}|Bs0], Renames0) ->
case coalesce_skips({L,Blk0}, Bs0, Renames0) of
not_possible ->
[{L,Blk0}|bsm_coalesce_skips(Bs0, Renames0)];
{Bs,Renames} ->
bsm_coalesce_skips(Bs, Renames)
end;
bsm_coalesce_skips([], _Renames) -> [].

coalesce_skips({L,#b_blk{is=[#b_set{op=bs_extract}=Extract|Is0],
last=Last0}=Blk0}, Bs0) ->
case coalesce_skips_is(Is0, Last0, Bs0) of
last=Last0}=Blk0}, Bs0, Renames0) ->
case coalesce_skips_is(Is0, Last0, Bs0, Renames0) of
not_possible ->
[{L,Blk0}|Bs0];
{Is,Last,Bs} ->
not_possible;
{Is,Last,Bs,Renames} ->
Blk = Blk0#b_blk{is=[Extract|Is],last=Last},
[{L,Blk}|Bs]
{[{L,Blk}|Bs],Renames}
end;
coalesce_skips({L,#b_blk{is=Is0,last=Last0}=Blk0}, Bs0) ->
case coalesce_skips_is(Is0, Last0, Bs0) of
coalesce_skips({L,#b_blk{is=Is0,last=Last0}=Blk0}, Bs0, Renames0) ->
case coalesce_skips_is(Is0, Last0, Bs0, Renames0) of
not_possible ->
[{L,Blk0}|Bs0];
{Is,Last,Bs} ->
not_possible;
{Is,Last,Bs,Renames} ->
Blk = Blk0#b_blk{is=Is,last=Last},
[{L,Blk}|Bs]
{[{L,Blk}|Bs],Renames}
end.

coalesce_skips_is([#b_set{op=bs_match,
args=[#b_literal{val=skip},
Ctx0,Type,Flags,
#b_literal{val=Size0},
#b_literal{val=Unit0}],
dst=Ctx}=Skip0,
dst=PrevCtx}=Skip0,
#b_set{op={succeeded,guard}}],
#b_br{succ=L2,fail=Fail}=Br0,
Bs0) when is_integer(Size0) ->
Bs0,
Renames0) when is_integer(Size0) ->
case Bs0 of
[{L2,#b_blk{is=[#b_set{op=bs_match,
dst=SkipDst,
args=[#b_literal{val=skip},Ctx,_,_,
args=[#b_literal{val=skip},PrevCtx,_,_,
#b_literal{val=Size1},
#b_literal{val=Unit1}]},
#b_set{op={succeeded,guard}}=Succeeded],
last=#b_br{fail=Fail}=Br}}|Bs] when is_integer(Size1) ->
OldCtx = maps:get(Ctx0, Renames0, Ctx0),
SkipBits = Size0 * Unit0 + Size1 * Unit1,
Skip = Skip0#b_set{dst=SkipDst,
args=[#b_literal{val=skip},Ctx0,
args=[#b_literal{val=skip},OldCtx,
Type,Flags,
#b_literal{val=SkipBits},
#b_literal{val=1}]},
Is = [Skip,Succeeded],
{Is,Br,Bs};
Renames = Renames0#{PrevCtx => Ctx0},
{Is,Br,Bs,Renames};
[{L2,#b_blk{is=[#b_set{op=bs_test_tail,
args=[Ctx,#b_literal{val=TailSkip}]}],
args=[PrevCtx,#b_literal{val=TailSkip}]}],
last=#b_br{succ=NextSucc,fail=Fail}}}|Bs] ->
OldCtx = maps:get(Ctx0, Renames0, Ctx0),
SkipBits = Size0 * Unit0,
TestTail = Skip0#b_set{op=bs_test_tail,
args=[Ctx0,#b_literal{val=SkipBits+TailSkip}]},
args=[OldCtx,#b_literal{val=SkipBits+TailSkip}]},
Br = Br0#b_br{bool=TestTail#b_set.dst,succ=NextSucc},
Is = [TestTail],
{Is,Br,Bs};
Renames = Renames0#{PrevCtx => Ctx0},
{Is,Br,Bs,Renames};
_ ->
not_possible
end;
coalesce_skips_is(_, _, _) ->
coalesce_skips_is(_, _, _, _) ->
not_possible.

%%%
%%% Short-cutting binary matching instructions.
%%%

ssa_opt_bsm_shortcut({#opt_st{ssa=Linear}=St, FuncDb}) ->
Positions = bsm_positions(Linear, #{}),
ssa_opt_bsm_shortcut({#opt_st{ssa=Linear0}=St, FuncDb}) ->
Positions = bsm_positions(Linear0, #{}),
case map_size(Positions) of
0 ->
%% No binary matching instructions.
{St, FuncDb};
_ ->
{St#opt_st{ssa=bsm_shortcut(Linear, Positions)}, FuncDb}
Linear = bsm_shortcut(Linear0, Positions),
ssa_opt_live({St#opt_st{ssa=Linear}, FuncDb})
end.

bsm_positions([{L,#b_blk{is=Is,last=Last}}|Bs], PosMap0) ->
Expand Down Expand Up @@ -1988,20 +2005,36 @@ bsm_update_bits([_,_,_,#b_literal{val=Sz},#b_literal{val=U}], Bits)
Bits + Sz*U;
bsm_update_bits(_, Bits) -> Bits.

bsm_shortcut([{L,#b_blk{is=Is,last=Last0}=Blk}|Bs], PosMap) ->
bsm_shortcut([{L,#b_blk{is=Is,last=Last0}=Blk}|Bs], PosMap0) ->
case {Is,Last0} of
{[#b_set{op=bs_match,dst=New,args=[_,Old|_]},
#b_set{op={succeeded,guard},dst=Bool,args=[New]}],
#b_br{bool=Bool,fail=Fail}} ->
case PosMap of
#{Old:=Bits,Fail:={TailBits,NextFail}} when Bits > TailBits ->
case PosMap0 of
#{Old := Bits,Fail := {TailBits,NextFail}} when Bits > TailBits ->
Last = Last0#b_br{fail=NextFail},
[{L,Blk#b_blk{last=Last}}|bsm_shortcut(Bs, PosMap)];
[{L,Blk#b_blk{last=Last}}|bsm_shortcut(Bs, PosMap0)];
#{} ->
[{L,Blk}|bsm_shortcut(Bs, PosMap0)]
end;
{[#b_set{op=bs_test_tail,dst=Bool,args=[Old,#b_literal{val=TailBits}]}],
#b_br{bool=Bool,succ=Succ,fail=Fail}} ->
case PosMap0 of
#{{bs_test_tail,Old,L} := ActualTailBits} ->
Last1 = if
TailBits =:= ActualTailBits ->
Last0#b_br{fail=Succ};
true ->
Last0#b_br{succ=Fail}
end,
Last = beam_ssa:normalize(Last1),
[{L,Blk#b_blk{last=Last}}|bsm_shortcut(Bs, PosMap0)];
#{} ->
PosMap = PosMap0#{{bs_test_tail,Old,Succ} => TailBits},
[{L,Blk}|bsm_shortcut(Bs, PosMap)]
end;
{_,_} ->
[{L,Blk}|bsm_shortcut(Bs, PosMap)]
[{L,Blk}|bsm_shortcut(Bs, PosMap0)]
end;
bsm_shortcut([], _PosMap) -> [].

Expand Down
Loading

0 comments on commit 6a9ebe6

Please sign in to comment.