From bcd1cf92d3695cc7d30710fd394e5e0f9ec79c5e Mon Sep 17 00:00:00 2001 From: Daniel Frederico Lins Leite Date: Fri, 5 Jul 2024 21:59:07 +0100 Subject: [PATCH] `match` for string slices (#6202) ## Description This PR implements `match` for string slices including radix trie optimization and is a task of https://github.com/FuelLabs/sway/issues/5110. For example a simple `match` like ``` fn return_match_on_str_slice(param: str) -> u64 { match param { "get_a" => { 1u64 }, "get_a_b" => { 2u64 }, "get_b" => { 3u64 }, _ => { 1000u64 }, } } ``` will generate code following this logic: ``` let packed_string = "get_a_b" if str.len() == 5 if str[0..4] == "get_" at packed_string[0] if str[4..5] == "b" at packed_string[6] return branch 2 if str[4..5] == "a" at packed_string[4] return branch 0 return wildcard branch return wildcard branch if str.len() == 7 if str[0..7] == "get_a_b" at packed_string[0] return branch 1 return wildcard branch return wildcard branch ``` In logical terms, this boils down to checking the length and an `O(N)` check on the string. Albeit the bytecode will be more complex because of all the branches. Another interesting optimization is the "packed string literal" that coalesces all "match arms string slices" into just one string. In the case above, given that one of the arms contains all the necessary strings for all other comparisons, we will create just one string literal. Saving a lot of bytes in the data section. The section below describes how `rustc` deals with this desugaring. I think these choices make more sense to us for two reasons: 1 - Avoid testing common prefixes multiple times will spend less gas in general (needs more testing); 2 - packing all strings will decrease the data section size. This is the bytecode generated in this case: ``` fn return_match_on_str_slice(param: str) -> u64 { match param { "get_a" => { 1u64 }, "get_a_b" => { 2u64 }, "get_b" => { 3u64 }, _ => { 1000u64 }, } } @ /home/xunilrj/github/sway/test/src/e2e_vm_tests/test_programs/should_pass/language/match_expressions_all/src/main.sw:22:1 0x0000017c PSHL 0xf ;; [149, 0, 0, 15] 0x00000180 PSHH 0x80000 ;; [150, 8, 0, 0] 0x00000184 MOVE R59 $sp ;; [26, 236, 80, 0] 0x00000188 CFEI 0x90 ;; [145, 0, 0, 144] 0x0000018c MOVE $writable R58 ;; [26, 67, 160, 0] 0x00000190 MOVE R19 R62 ;; [26, 79, 224, 0] match param { "get_a" => { 1u64 }, "get_a_b" => { 2u64 }, "get_b" => { 3u64 }, _ => { 1000u64 }, } @ /home/xunilrj/github/sway/test/src/e2e_vm_tests/test_programs/should_pass/language/match_expressions_all/src/main.sw:23:5 0x00000194 ADDI R17 R59 0x80 ;; 0x00000198 MOVI R18 0x10 ;; 0x0000019c MCP R17 $writable R18 ;; 0x000001a0 MOVI R17 0x7 ;; 0x7 = "get_a_b".len() @ :1:1 0x000001a4 LW $writable R59 0x11 ;; R59 + 0x11 = a.len() 0x000001a8 EQ $writable $writable R17 ;; a.len() == 0x7 0x000001ac JNZF $writable $zero 0x3c ;; if false jump to 2a0? 0x000001b0 MOVI R17 0x5 ;; we have two arms with length equals 0x5 0x000001b4 LW $writable R59 0x11 ;; R59 + 0x11 = a.len() 0x000001b8 EQ $writable $writable R17 ;; a.len() == 0x5 0x000001bc MOVI R17 0x3e8 ;; 0x3e8 = 1000 (wildcard return value) 0x000001c0 JNZF $writable $zero 0x1 ;; if true jump to 1c8 0x000001c4 JMPF $zero 0x35 ;; if false jump to 29c (will return R17) 0x000001c8 LW $writable R63 0x3 ;; R63 = start of data section, will load 13c 0x000001cc ADD $writable $writable $pc ;; $writable = 0x308 = packed strings 0x000001d0 ADDI R17 R59 0x20 ;; 0x000001d4 SW R59 $writable 0x4 ;; R59 + 0x4 = packed strings 0x000001d8 MOVI $writable 0x7 ;; 0x000001dc SW R59 $writable 0x5 ;; R59 + 0x5 = 0x7 0x000001e0 ADDI $writable R59 0x30 ;; 0x000001e4 MOVI R18 0x10 ;; 0x000001e8 MCP $writable R17 R18 ;; R59 + 0x30 = R59 + 0x20 0x000001ec MOVI R18 0x4 ;; 0x4 = "get_".len() 0x000001f0 LW $writable R59 0x10 ;; 0x000001f4 ADDI $writable $writable 0x0 ;; 0x000001f8 LW R17 R59 0x6 ;; R17 = a.ptr() 0x000001fc ADDI R17 R17 0x0 ;; 0x00000200 MEQ $writable $writable R17 R18 ;; a[0..4] = packed[0..4] 0x00000204 MOVI R17 0x3e8 ;; 0x3e8 = 1000 (wildcard return value) 0x00000208 JNZF $writable $zero 0x1 ;; if true jump to 210 0x0000020c JMPF $zero 0x23 ;; if false jump to 29c (will return R17) .... .data_section: 0x00000300 .bytes as hex ([]), len i0, as ascii "" 0x00000300 .word i18446744073709486084, as hex be bytes ([FF, FF, FF, FF, FF, FF, 00, 04]) 0x00000308 .bytes as hex ([67, 65, 74, 5F, 61, 5F, 62]), len i7, as ascii "get_a_b" 0x00000310 .word i500, as hex be bytes ([00, 00, 00, 00, 00, 00, 01, F4]) 0x00000318 .word i316, as hex be bytes ([00, 00, 00, 00, 00, 00, 01, 3C]) 0x00000320 .word i244, as hex be bytes ([00, 00, 00, 00, 00, 00, 00, F4]) 0x00000328 .word i176, as hex be bytes ([00, 00, 00, 00, 00, 00, 00, B0]) 0x00000330 .word i100, as hex be bytes ([00, 00, 00, 00, 00, 00, 00, 64]) ``` ## How `rustc` desugar `match` For comparison, this is the generated ASM with comments on how Rust tackles this. First, this is the function used: ``` #[inline(never)] fn f(a: &str) -> u64 { match a { "get_method" => 0, "get_tokens" => 1, "get_something_else" => 2, "get_tokens_2" => 3, "clear" => 4, "get_m" => 5, _ => 6, } } ``` This is the LLVM IR generated. There is a match on the length of each string slice arms. The valid range is (5, 18), everything outside of this is the wildcard match arm. This range will be important later. ``` efine internal fastcc noundef i64 @example::f::hdb860bcd6d383112(ptr noalias nocapture noundef nonnull readonly align 1 %a.0, i64 noundef %a.1) unnamed_addr { start: switch i64 %a.1, label %bb13 [ i64 10, label %"_ZN73_$LT$$u5b$A$u5d$$u20$as$u20$core..slice..cmp..SlicePartialEq$LT$B$GT$$GT$5equal17h510120b4d3581de7E.exit" i64 18, label %"_ZN73_$LT$$u5b$A$u5d$$u20$as$u20$core..slice..cmp..SlicePartialEq$LT$B$GT$$GT$5equal17h510120b4d3581de7E.exit30" i64 12, label %"_ZN73_$LT$$u5b$A$u5d$$u20$as$u20$core..slice..cmp..SlicePartialEq$LT$B$GT$$GT$5equal17h510120b4d3581de7E.exit35" i64 5, label %"_ZN73_$LT$$u5b$A$u5d$$u20$as$u20$core..slice..cmp..SlicePartialEq$LT$B$GT$$GT$5equal17h510120b4d3581de7E.exit40" ] ``` this is how "f" is called ``` mov rbx, qword ptr [rsp + 32] mov r14, qword ptr [rsp + 40] mov rsi, qword ptr [rsp + 48] <- length of the string slice mov rdi, r14 <- ptr to string slice call _ZN4main1f17h126a5dfd4e318ebcE ``` this is `f` body. `ja .LBB8_12` jumps into a simple return, returning EAX as 6. It is the wildcard return value. The cleverness of this is that when `RSI` is smaller than 5, it will become negative (because of `add rsi, -5`, wrapping into huge unsigned ints, and will also trigger `JA` (which stands for `Jump Above`), effectively jumping when the slice length is outside of the expected range which is (5, 18). After that, it uses a jump table based on the string length minus 5. Everywhere the string length is invalid, the jump address is `LBB8_12`., still returning `EAX` as 6. ``` _ZN4main1f17h126a5dfd4e318ebcE: .cfi_startproc mov eax, 6 add rsi, -5 cmp rsi, 13 ja .LBB8_12 lea rcx, [rip + .LJTI8_0] movsxd rdx, dword ptr [rcx + 4*rsi] add rdx, rcx jmp rdx ``` ``` .LBB8_12: ret ``` This is the jump table used: ``` .LJTI8_0: .long .LBB8_9-.LJTI8_0 .long .LBB8_12-.LJTI8_0 .long .LBB8_12-.LJTI8_0 .long .LBB8_12-.LJTI8_0 .long .LBB8_12-.LJTI8_0 .long .LBB8_2-.LJTI8_0 <- 5th entry is length = 10 (remember we add -5 to the length) .long .LBB8_12-.LJTI8_0 .long .LBB8_8-.LJTI8_0 .long .LBB8_12-.LJTI8_0 .long .LBB8_12-.LJTI8_0 .long .LBB8_12-.LJTI8_0 .long .LBB8_12-.LJTI8_0 .long .LBB8_12-.LJTI8_0 .long .LBB8_6-.LJTI8_0 ``` The interesting entry is entry 5, which has two strings: "get_method" and "get_tokens". Here we can see that `rust` actually compares the complete string slice twice. Even though they have an intersection. ``` .LBB8_2: movabs rcx, 7526752397670245735=6874656D5F746567="htem_teg" (inverted "get_meth") xor rcx, qword ptr [rdi] movzx edx, word ptr [rdi + 8] xor rdx, 25711=646F="do" (inverted "od") or rdx, rcx je .LBB8_3 movabs rcx, 7308057365947114855=656B6F745F746567="ekot_teg" (inverted "get_toke") xor rcx, qword ptr [rdi] movzx edx, word ptr [rdi + 8] xor rdx, 29550=736E="sn" (inverted "ns") or rdx, rcx je .LBB8_5 ``` ``` .LBB8_3: xor eax, eax <- returns 0 ret ``` ``` .LBB8_5: mov eax, 1 <- returns 1 ret ``` This is comparable to what `clang` is doing: https://github.com/rust-lang/rust/issues/61961 ## Code and Bytecode This PR also implements code printing when printing bytecode. For now this is only enable for tests. It gnerates something like: ``` match param { "get_a" => { 1u64 }, "get_a_b" => { 2u64 }, "get_b" => { 3u64 }, _ => { 1000u64 }, } @ /home/xunilrj/github/sway/test/src/e2e_vm_tests/test_programs/should_pass/language/match_expressions_all/src/main.sw:23:5 0x00000194 ADDI R17 R59 0x80 ;; 0x00000198 MOVI R18 0x10 ;; 0x0000019c MCP R17 $writable R18 ;; 0x000001a0 MOVI R17 0x7 ;; 0x7 = "get_a_b".len() @ :1:1 0x000001a4 LW $writable R59 0x11 ;; R59 + 0x11 = a.len() 0x000001a8 EQ $writable $writable R17 ;; a.len() == 0x7 ``` As we can see, not great, but helpful nonetheless. We can (should?) improve this by better "carrying" spans in all transformations and lowerings. ## Checklist - [x] I have linked to any relevant issues. - [x] I have commented my code, particularly in hard-to-understand areas. - [ ] I have updated the documentation where relevant (API docs, the reference, and the Sway book). - [ ] If my change requires substantial documentation changes, I have [requested support from the DevRel team](https://github.com/FuelLabs/devrel-requests/issues/new/choose) - [ ] I have added tests that prove my fix is effective or that my feature works. - [ ] I have added (or requested a maintainer to add) the necessary `Breaking*` or `New Feature` labels where relevant. - [ ] I have done my best to ensure that my PR adheres to [the Fuel Labs Code Review Standards](https://github.com/FuelLabs/rfcs/blob/master/text/code-standards/external-contributors.md). - [ ] I have requested a review from the relevant team or maintainers. --------- Co-authored-by: Joshua Batty Co-authored-by: IGI-111 --- forc-pkg/src/manifest/build_profile.rs | 5 + forc-pkg/src/pkg.rs | 8 +- forc-plugins/forc-client/src/op/deploy.rs | 1 + forc-plugins/forc-client/src/op/run/mod.rs | 1 + forc/src/cli/commands/test.rs | 1 + forc/src/ops/forc_build.rs | 1 + forc/src/ops/forc_contract_id.rs | 1 + forc/src/ops/forc_predicate_root.rs | 1 + sway-core/src/asm_generation/finalized_asm.rs | 107 +++- .../allocated_abstract_instruction_set.rs | 8 +- .../src/asm_generation/fuel/functions.rs | 6 +- sway-core/src/build_config.rs | 7 +- .../match_expression/analysis/usefulness.rs | 2 +- .../typed/typed_match_expression.rs | 533 +++++++++++++++++- .../ast_node/expression/typed_expression.rs | 8 +- sway-core/src/type_system/info.rs | 4 +- sway-ir/src/error.rs | 29 +- sway-ir/src/printer.rs | 47 +- sway-ir/src/verify.rs | 61 +- test/src/e2e_vm_tests/harness.rs | 1 + .../Forc.lock | 6 +- .../Forc.toml | 2 +- .../json_abi_oracle.json | 0 .../json_abi_oracle_new_encoding.json | 4 +- .../src/main.sw | 30 +- .../test.toml | 4 +- 26 files changed, 805 insertions(+), 73 deletions(-) rename test/src/e2e_vm_tests/test_programs/should_pass/language/{match_expressions => match_expressions_all}/Forc.lock (56%) rename test/src/e2e_vm_tests/test_programs/should_pass/language/{match_expressions => match_expressions_all}/Forc.toml (88%) rename test/src/e2e_vm_tests/test_programs/should_pass/language/{match_expressions => match_expressions_all}/json_abi_oracle.json (100%) rename test/src/e2e_vm_tests/test_programs/should_pass/language/{match_expressions => match_expressions_all}/json_abi_oracle_new_encoding.json (88%) rename test/src/e2e_vm_tests/test_programs/should_pass/language/{match_expressions => match_expressions_all}/src/main.sw (64%) rename test/src/e2e_vm_tests/test_programs/should_pass/language/{match_expressions => match_expressions_all}/test.toml (57%) diff --git a/forc-pkg/src/manifest/build_profile.rs b/forc-pkg/src/manifest/build_profile.rs index 0922ba59124..def146b7295 100644 --- a/forc-pkg/src/manifest/build_profile.rs +++ b/forc-pkg/src/manifest/build_profile.rs @@ -24,6 +24,8 @@ pub struct BuildProfile { #[serde(default)] pub print_bytecode: bool, #[serde(default)] + pub print_bytecode_spans: bool, + #[serde(default)] pub terse: bool, #[serde(default)] pub time_phases: bool, @@ -57,6 +59,7 @@ impl BuildProfile { print_ir: PrintIr::default(), print_asm: PrintAsm::default(), print_bytecode: false, + print_bytecode_spans: false, terse: false, time_phases: false, metrics_outfile: None, @@ -80,6 +83,7 @@ impl BuildProfile { print_ir: PrintIr::default(), print_asm: PrintAsm::default(), print_bytecode: false, + print_bytecode_spans: false, terse: false, time_phases: false, metrics_outfile: None, @@ -152,6 +156,7 @@ mod tests { print_ir: PrintIr::r#final(), print_asm: PrintAsm::all(), print_bytecode: true, + print_bytecode_spans: false, terse: true, time_phases: true, metrics_outfile: Some("metrics_outfile".into()), diff --git a/forc-pkg/src/pkg.rs b/forc-pkg/src/pkg.rs index bf5baadbf6e..c887cc68720 100644 --- a/forc-pkg/src/pkg.rs +++ b/forc-pkg/src/pkg.rs @@ -263,6 +263,8 @@ pub struct PrintOpts { pub asm: PrintAsm, /// Print the bytecode. This is the final output of the compiler. pub bytecode: bool, + /// Print the original source code together with bytecode. + pub bytecode_spans: bool, /// Print the generated Sway IR (Intermediate Representation). pub ir: PrintIr, /// Output build errors and warnings in reverse order. @@ -1557,7 +1559,10 @@ pub fn sway_build_config( .with_print_dca_graph(build_profile.print_dca_graph.clone()) .with_print_dca_graph_url_format(build_profile.print_dca_graph_url_format.clone()) .with_print_asm(build_profile.print_asm) - .with_print_bytecode(build_profile.print_bytecode) + .with_print_bytecode( + build_profile.print_bytecode, + build_profile.print_bytecode_spans, + ) .with_print_ir(build_profile.print_ir.clone()) .with_include_tests(build_profile.include_tests) .with_time_phases(build_profile.time_phases) @@ -2087,6 +2092,7 @@ fn build_profile_from_opts( profile.print_ir |= print.ir.clone(); profile.print_asm |= print.asm; profile.print_bytecode |= print.bytecode; + profile.print_bytecode_spans |= print.bytecode_spans; profile.terse |= pkg.terse; profile.time_phases |= time_phases; if profile.metrics_outfile.is_none() { diff --git a/forc-plugins/forc-client/src/op/deploy.rs b/forc-plugins/forc-client/src/op/deploy.rs index bb7a95cc549..e8a33e9644a 100644 --- a/forc-plugins/forc-client/src/op/deploy.rs +++ b/forc-plugins/forc-client/src/op/deploy.rs @@ -355,6 +355,7 @@ fn build_opts_from_cmd(cmd: &cmd::Deploy) -> pkg::BuildOpts { dca_graph_url_format: cmd.print.dca_graph_url_format.clone(), asm: cmd.print.asm(), bytecode: cmd.print.bytecode, + bytecode_spans: false, ir: cmd.print.ir(), reverse_order: cmd.print.reverse_order, }, diff --git a/forc-plugins/forc-client/src/op/run/mod.rs b/forc-plugins/forc-client/src/op/run/mod.rs index 317afd80687..49047de3a37 100644 --- a/forc-plugins/forc-client/src/op/run/mod.rs +++ b/forc-plugins/forc-client/src/op/run/mod.rs @@ -231,6 +231,7 @@ fn build_opts_from_cmd(cmd: &cmd::Run) -> pkg::BuildOpts { dca_graph_url_format: cmd.print.dca_graph_url_format.clone(), asm: cmd.print.asm(), bytecode: cmd.print.bytecode, + bytecode_spans: false, ir: cmd.print.ir(), reverse_order: cmd.print.reverse_order, }, diff --git a/forc/src/cli/commands/test.rs b/forc/src/cli/commands/test.rs index 86387522029..34245c803df 100644 --- a/forc/src/cli/commands/test.rs +++ b/forc/src/cli/commands/test.rs @@ -242,6 +242,7 @@ fn opts_from_cmd(cmd: Command) -> forc_test::TestOpts { dca_graph_url_format: cmd.build.print.dca_graph_url_format.clone(), asm: cmd.build.print.asm(), bytecode: cmd.build.print.bytecode, + bytecode_spans: false, ir: cmd.build.print.ir(), reverse_order: cmd.build.print.reverse_order, }, diff --git a/forc/src/ops/forc_build.rs b/forc/src/ops/forc_build.rs index 35a03b611a7..91e316c37ba 100644 --- a/forc/src/ops/forc_build.rs +++ b/forc/src/ops/forc_build.rs @@ -26,6 +26,7 @@ fn opts_from_cmd(cmd: BuildCommand) -> pkg::BuildOpts { dca_graph_url_format: cmd.build.print.dca_graph_url_format.clone(), asm: cmd.build.print.asm(), bytecode: cmd.build.print.bytecode, + bytecode_spans: false, ir: cmd.build.print.ir(), reverse_order: cmd.build.print.reverse_order, }, diff --git a/forc/src/ops/forc_contract_id.rs b/forc/src/ops/forc_contract_id.rs index 3a6d975ac40..51e9a7cd708 100644 --- a/forc/src/ops/forc_contract_id.rs +++ b/forc/src/ops/forc_contract_id.rs @@ -61,6 +61,7 @@ fn build_opts_from_cmd(cmd: &ContractIdCommand) -> pkg::BuildOpts { dca_graph_url_format: cmd.print.dca_graph_url_format.clone(), asm: cmd.print.asm(), bytecode: cmd.print.bytecode, + bytecode_spans: false, ir: cmd.print.ir(), reverse_order: cmd.print.reverse_order, }, diff --git a/forc/src/ops/forc_predicate_root.rs b/forc/src/ops/forc_predicate_root.rs index febf7b96ca6..efca0f92bec 100644 --- a/forc/src/ops/forc_predicate_root.rs +++ b/forc/src/ops/forc_predicate_root.rs @@ -30,6 +30,7 @@ fn build_opts_from_cmd(cmd: PredicateRootCommand) -> pkg::BuildOpts { dca_graph_url_format: cmd.print.dca_graph_url_format.clone(), asm: cmd.print.asm(), bytecode: cmd.print.bytecode, + bytecode_spans: false, ir: cmd.print.ir(), reverse_order: cmd.print.reverse_order, }, diff --git a/sway-core/src/asm_generation/finalized_asm.rs b/sway-core/src/asm_generation/finalized_asm.rs index 2fc0fe4326d..c3304b4fd53 100644 --- a/sway-core/src/asm_generation/finalized_asm.rs +++ b/sway-core/src/asm_generation/finalized_asm.rs @@ -4,6 +4,7 @@ use super::{ fuel::{checks, data_section::DataSection}, ProgramABI, ProgramKind, }; +use crate::asm_generation::fuel::data_section::{DataId, Datum, Entry}; use crate::asm_lang::allocated_ops::{AllocatedOp, AllocatedOpcode}; use crate::decl_engine::DeclRefFunction; use crate::source_map::SourceMap; @@ -151,6 +152,13 @@ fn to_bytecode_mut( println!(";; --- START OF TARGET BYTECODE ---\n"); } + let mut last_span = None; + let mut indentation = if build_config.print_bytecode_spans { + 4 + } else { + 0 + }; + let mut half_word_ix = 0; let mut offset_from_instr_start = 0; for op in ops.iter() { @@ -165,7 +173,7 @@ fn to_bytecode_mut( match fuel_op { Either::Right(data) => { if build_config.print_bytecode { - print!("{:#010x} ", bytecode.len()); + print!("{}{:#010x} ", " ".repeat(indentation), bytecode.len()); println!( " ;; {:?}", data @@ -181,8 +189,45 @@ fn to_bytecode_mut( } Either::Left(instructions) => { for instruction in instructions { + // Print original source span only once + if build_config.print_bytecode_spans { + last_span = match (last_span, &span) { + (None, Some(span)) => { + indentation = 4; + let line_col = span.start_pos().line_col(); + println!( + "{} @ {}:{}:{}", + span.as_str(), + span.source_id() + .map(|source_id| source_engine.get_path(source_id)) + .map(|x| x.display().to_string()) + .unwrap_or("".to_string()), + line_col.line, + line_col.col + ); + Some(span.clone()) + } + (Some(last), Some(span)) if last != *span => { + indentation = 4; + let line_col = span.start_pos().line_col(); + println!( + "{} @ {}:{}:{}", + span.as_str(), + span.source_id() + .map(|source_id| source_engine.get_path(source_id)) + .map(|x| x.display().to_string()) + .unwrap_or("".to_string()), + line_col.line, + line_col.col + ); + Some(span.clone()) + } + (last, _) => last, + }; + } + if build_config.print_bytecode { - print!("{:#010x} ", bytecode.len()); + print!("{}{:#010x} ", " ".repeat(indentation), bytecode.len()); print_instruction(&instruction); } @@ -202,8 +247,64 @@ fn to_bytecode_mut( } } } + if build_config.print_bytecode { - println!("{}", data_section); + println!(".data_section:"); + + let offset = bytecode.len(); + + fn print_entry(indentation: usize, offset: usize, pair: &Entry) { + print!("{}{:#010x} ", " ".repeat(indentation), offset); + + match &pair.value { + Datum::Byte(w) => println!(".byte i{w}, as hex {w:02X}"), + Datum::Word(w) => { + println!(".word i{w}, as hex be bytes ({:02X?})", w.to_be_bytes()) + } + Datum::ByteArray(bs) => { + print!(".bytes as hex ({bs:02X?}), len i{}, as ascii \"", bs.len()); + + for b in bs { + print!( + "{}", + if *b == b' ' || b.is_ascii_graphic() { + *b as char + } else { + '.' + } + ); + } + println!("\""); + } + Datum::Slice(bs) => { + print!(".slice as hex ({bs:02X?}), len i{}, as ascii \"", bs.len()); + + for b in bs { + print!( + "{}", + if *b == b' ' || b.is_ascii_graphic() { + *b as char + } else { + '.' + } + ); + } + println!("\""); + } + Datum::Collection(els) => { + println!(".collection"); + for e in els { + print_entry(indentation + 1, offset, e); + } + } + }; + } + + for (i, entry) in data_section.value_pairs.iter().enumerate() { + let entry_offset = data_section.data_id_to_offset(&DataId(i as u32)); + print_entry(indentation, offset + entry_offset, entry); + } + println!(";; --- END OF TARGET BYTECODE ---\n"); } diff --git a/sway-core/src/asm_generation/fuel/allocated_abstract_instruction_set.rs b/sway-core/src/asm_generation/fuel/allocated_abstract_instruction_set.rs index 38466cf3edc..bf0627de013 100644 --- a/sway-core/src/asm_generation/fuel/allocated_abstract_instruction_set.rs +++ b/sway-core/src/asm_generation/fuel/allocated_abstract_instruction_set.rs @@ -121,14 +121,14 @@ impl AllocatedAbstractInstructionSet { new_ops.push(AllocatedAbstractOp { opcode: Either::Left(AllocatedOpcode::PSHL(mask_l)), comment: "Save registers 16..40".into(), - owning_span: None, + owning_span: op.owning_span.clone(), }); } if mask_h.value != 0 { new_ops.push(AllocatedAbstractOp { opcode: Either::Left(AllocatedOpcode::PSHH(mask_h)), comment: "Save registers 40..64".into(), - owning_span: None, + owning_span: op.owning_span.clone(), }); } } @@ -147,14 +147,14 @@ impl AllocatedAbstractInstructionSet { new_ops.push(AllocatedAbstractOp { opcode: Either::Left(AllocatedOpcode::POPH(mask_h)), comment: "Restore registers 40..64".into(), - owning_span: None, + owning_span: op.owning_span.clone(), }); } if mask_l.value != 0 { new_ops.push(AllocatedAbstractOp { opcode: Either::Left(AllocatedOpcode::POPL(mask_l)), comment: "Restore registers 16..40".into(), - owning_span: None, + owning_span: op.owning_span.clone(), }); } } diff --git a/sway-core/src/asm_generation/fuel/functions.rs b/sway-core/src/asm_generation/fuel/functions.rs index fed3a7ed0a3..5939535c716 100644 --- a/sway-core/src/asm_generation/fuel/functions.rs +++ b/sway-core/src/asm_generation/fuel/functions.rs @@ -274,8 +274,8 @@ impl<'ir, 'eng> FuelAsmBuilder<'ir, 'eng> { function.get_name(self.context) ); - self.cur_bytecode.push(match span { - Some(span) => Op::jump_label_comment(start_label, span, comment), + self.cur_bytecode.push(match &span { + Some(span) => Op::jump_label_comment(start_label, span.clone(), comment), None => Op::unowned_jump_label_comment(start_label, comment), }); @@ -285,7 +285,7 @@ impl<'ir, 'eng> FuelAsmBuilder<'ir, 'eng> { self.cur_bytecode.push(Op { opcode: Either::Right(OrganizationalOp::PushAll(start_label)), comment: "save all regs".to_owned(), - owning_span: None, + owning_span: span.clone(), }); } diff --git a/sway-core/src/build_config.rs b/sway-core/src/build_config.rs index b79f2112d66..ab1b3f4b78b 100644 --- a/sway-core/src/build_config.rs +++ b/sway-core/src/build_config.rs @@ -187,6 +187,7 @@ pub struct BuildConfig { pub(crate) print_dca_graph_url_format: Option, pub(crate) print_asm: PrintAsm, pub(crate) print_bytecode: bool, + pub(crate) print_bytecode_spans: bool, pub(crate) print_ir: PrintIr, pub(crate) include_tests: bool, pub(crate) optimization_level: OptLevel, @@ -234,6 +235,7 @@ impl BuildConfig { print_dca_graph_url_format: None, print_asm: PrintAsm::default(), print_bytecode: false, + print_bytecode_spans: false, print_ir: PrintIr::default(), include_tests: false, time_phases: false, @@ -264,9 +266,10 @@ impl BuildConfig { Self { print_asm, ..self } } - pub fn with_print_bytecode(self, a: bool) -> Self { + pub fn with_print_bytecode(self, bytecode: bool, bytecode_spans: bool) -> Self { Self { - print_bytecode: a, + print_bytecode: bytecode, + print_bytecode_spans: bytecode_spans, ..self } } diff --git a/sway-core/src/semantic_analysis/ast_node/expression/match_expression/analysis/usefulness.rs b/sway-core/src/semantic_analysis/ast_node/expression/match_expression/analysis/usefulness.rs index 0f240e6d15c..d403b2c0aef 100644 --- a/sway-core/src/semantic_analysis/ast_node/expression/match_expression/analysis/usefulness.rs +++ b/sway-core/src/semantic_analysis/ast_node/expression/match_expression/analysis/usefulness.rs @@ -183,7 +183,7 @@ use super::{ /// /// # Details /// -/// This algorithm checks is a match expression is exhaustive and if its match +/// This algorithm checks if a match expression is exhaustive and if its match /// arms are reachable by applying the above definitions of usefulness and /// witnesses. This algorithm sequentially creates a [WitnessReport] for every /// match arm by calling *U(P, q)*, where *P* is the [Matrix] of patterns seen diff --git a/sway-core/src/semantic_analysis/ast_node/expression/match_expression/typed/typed_match_expression.rs b/sway-core/src/semantic_analysis/ast_node/expression/match_expression/typed/typed_match_expression.rs index 9f415035b65..7abdfbc54a3 100644 --- a/sway-core/src/semantic_analysis/ast_node/expression/match_expression/typed/typed_match_expression.rs +++ b/sway-core/src/semantic_analysis/ast_node/expression/match_expression/typed/typed_match_expression.rs @@ -1,20 +1,53 @@ -use std::ops::ControlFlow; - -use sway_error::handler::{ErrorEmitted, Handler}; -use sway_types::{Span, Spanned}; - use crate::{ compiler_generated::INVALID_DESUGARED_MATCHED_EXPRESSION_SIGNAL, language::{ parsed::*, - ty::{self, TyExpression}, + ty::{ + self, TyAsmRegisterDeclaration, TyExpression, TyExpressionVariant, + TyIntrinsicFunctionKind, + }, + AsmOp, AsmRegister, }, semantic_analysis::{ ast_node::expression::typed_expression::instantiate_if_expression, expression::match_expression::typed::instantiate::Instantiate, TypeCheckContext, }, - CompileError, TypeId, + CompileError, TypeId, TypeInfo, }; +use std::{collections::BTreeMap, ops::ControlFlow}; +use sway_error::handler::{ErrorEmitted, Handler}; +use sway_types::{BaseIdent, Ident, Span, Spanned}; + +// Enable this to see a pseudo-code printed to understand what is being generated. +const RADIX_TREE_DEBUG: bool = true; + +#[derive(Default, Debug, Clone)] +struct TrieNode { + output: Option, + previous: Option, + next: BTreeMap, +} + +struct Trie { + nodes: Vec, +} + +fn revert(never_type_id: TypeId, u64_type_id: TypeId) -> TyExpression { + TyExpression { + expression: TyExpressionVariant::IntrinsicFunction(TyIntrinsicFunctionKind { + kind: sway_ast::Intrinsic::Revert, + arguments: vec![TyExpression { + expression: TyExpressionVariant::Literal(crate::language::Literal::U64(17)), + return_type: u64_type_id, + span: Span::dummy(), + }], + type_arguments: vec![], + span: Span::dummy(), + }), + return_type: never_type_id, + span: Span::dummy(), + } +} impl ty::TyMatchExpression { pub(crate) fn type_check( @@ -58,7 +91,7 @@ impl ty::TyMatchExpression { Ok((typed_exp, typed_scrutinees)) } - pub(crate) fn convert_to_typed_if_expression( + pub(crate) fn desugar( self, handler: &Handler, ctx: TypeCheckContext, @@ -76,14 +109,490 @@ impl ty::TyMatchExpression { ); } - let typed_if_exp = handler.scope(|handler| { - self.convert_to_typed_if_expression_inner(instantiate, ctx, handler) - })?; + let typed_if_exp = + handler.scope( + |handler| match &*ctx.engines().te().get(self.value_type_id) { + TypeInfo::StringSlice => self.desugar_to_radix_trie(ctx), + _ => self.desugar_to_typed_if_expression(instantiate, ctx, handler), + }, + )?; Ok(typed_if_exp) } - fn convert_to_typed_if_expression_inner( + fn desugar_to_radix_trie( + &self, + mut ctx: TypeCheckContext<'_>, + ) -> Result { + let never_type_id = ctx.engines.te().insert(ctx.engines, TypeInfo::Never, None); + + let u64_type_id = ctx.engines.te().insert( + ctx.engines, + TypeInfo::UnsignedInteger(sway_types::integer_bits::IntegerBits::SixtyFour), + None, + ); + + let bool_type_id = ctx + .engines + .te() + .insert(ctx.engines, TypeInfo::Boolean, None); + + let branch_return_type_id = self + .branches + .iter() + .map(|x| x.result.return_type) + .next() + .unwrap(); + + let matched_value = self + .branches + .iter() + .flat_map(|x| match &x.condition.as_ref().map(|x| &x.expression) { + Some(TyExpressionVariant::FunctionApplication { arguments, .. }) => { + Some(&arguments[0].1) + } + _ => None, + }) + .next() + .unwrap(); + + // the block for the wildcard arm + let wildcard_return_expr = self + .branches + .iter() + .filter(|x| x.condition.is_none()) + .map(|x| x.result.clone()) + .next() + .unwrap_or_else(|| revert(never_type_id, u64_type_id)); + + // All the match string slices, ignoring the wildcard + let match_arms_string_slices = self + .branches + .iter() + .flat_map(|x| match &x.condition.as_ref().map(|x| &x.expression) { + Some(TyExpressionVariant::FunctionApplication { arguments, .. }) => { + match &arguments[1].1.expression { + TyExpressionVariant::Literal(crate::language::Literal::String(v)) => { + Some(v.as_str().to_string()) + } + _ => None, + } + } + _ => None, + }) + .collect::>(); + + // group match arms by size of the arm string slice + let match_arms_by_size = match_arms_string_slices.iter().enumerate().fold( + BTreeMap::>::new(), + |mut map, (i, item)| { + map.entry(item.len()).or_default().push((item.clone(), i)); + map + }, + ); + + // create and compress all tries. One per arm size + let tries = match_arms_by_size + .values() + .map(|branches| self.generate_radix_trie(branches).unwrap()) + .collect::>(); + + // Navigate all valid nodes and collect string pieces. + // Then pack them starting from the biggest. + let mut string_pieces = tries + .iter() + .flat_map(|x| x.nodes.iter()) + .flat_map(|x| x.next.keys().cloned()) + .collect::>(); + string_pieces.sort_by(|l, r| l.len().cmp(&r.len()).reverse()); + let packed_strings = string_pieces + .into_iter() + .fold(String::new(), |mut pack, item| { + if !pack.contains(&item) { + pack.push_str(&item); + } + pack + }); + + if RADIX_TREE_DEBUG { + println!("let packed_string = {packed_strings:?}"); + } + + // Now create the outer expression checking the size of the string slice + let mut block = wildcard_return_expr.clone(); + + for ((k, _), trie) in match_arms_by_size.into_iter().zip(tries.into_iter()) { + if RADIX_TREE_DEBUG { + println!("if str.len() == {k}"); + } + + let expression = TyExpressionVariant::AsmExpression { + registers: vec![ + TyAsmRegisterDeclaration { + name: Ident::new_no_span("is_eq".into()), + initializer: None, + }, + TyAsmRegisterDeclaration { + name: Ident::new_no_span("slice".into()), + initializer: Some(matched_value.clone()), + }, + TyAsmRegisterDeclaration { + name: Ident::new_no_span("len".into()), + initializer: None, + }, + TyAsmRegisterDeclaration { + name: Ident::new_no_span("expected_len".into()), + initializer: Some(TyExpression { + expression: TyExpressionVariant::Literal( + crate::language::Literal::U64(k as u64), + ), + return_type: u64_type_id, + span: Span::dummy(), + }), + }, + ], + body: vec![ + AsmOp { + op_name: Ident::new_no_span("lw".into()), + op_args: vec![ + BaseIdent::new_no_span("len".into()), + BaseIdent::new_no_span("slice".into()), + ], + immediate: Some(BaseIdent::new_no_span("i1".into())), + span: Span::dummy(), + }, + AsmOp { + op_name: Ident::new_no_span("eq".into()), + op_args: vec![ + BaseIdent::new_no_span("is_eq".into()), + BaseIdent::new_no_span("len".into()), + BaseIdent::new_no_span("expected_len".into()), + ], + immediate: None, + span: Span::dummy(), + }, + ], + returns: Some(( + AsmRegister { + name: "is_eq".into(), + }, + Span::dummy(), + )), + whole_block_span: self.span.clone(), + }; + + let then_node = self + .generate_radix_tree_checks( + ctx.by_ref(), + matched_value, + u64_type_id, + branch_return_type_id, + wildcard_return_expr.clone(), + trie, + &packed_strings, + ) + .unwrap(); + + block = TyExpression { + expression: TyExpressionVariant::IfExp { + condition: Box::new(TyExpression { + expression, + return_type: bool_type_id, + span: self.span.clone(), + }), + then: Box::new(then_node), + r#else: Some(Box::new(block)), + }, + return_type: branch_return_type_id, + span: self.span.clone(), + }; + } + + if RADIX_TREE_DEBUG { + println!("return wildcard branch"); + } + + Ok(block) + } + + #[allow(clippy::too_many_arguments)] + fn generate_radix_trie(&self, branches: &[(String, usize)]) -> Result { + let mut nodes = vec![TrieNode::default()]; + + for (b, i) in branches.iter() { + let mut current = 0; + for c in b.chars() { + let c = c.to_string(); + if let Some(next) = nodes[current].next.get(&c) { + current = *next; + continue; + } + + let next = nodes.len(); + nodes[current].next.insert(c, next); + current = next; + nodes.push(TrieNode::default()); + } + + nodes[current].output = Some(*i); + } + + // compress trie + let mut q = vec![0]; + while let Some(i) = q.pop() { + let mut current = nodes[i].clone(); + if current.next.len() == 1 { + let edge = current.next.pop_first().unwrap(); + let mut next = nodes[edge.1].clone(); + if next.next.len() == 1 { + let next_edge = next.next.pop_first().unwrap(); + let compressed_key = format!("{}{}", edge.0, next_edge.0); + + nodes[i].next.clear(); + nodes[i].next.insert(compressed_key, next_edge.1); + nodes[i].output = next.output.take(); + + q.push(i); + } else { + nodes[edge.1].previous = Some(i); + q.push(edge.1); + } + } else { + for (_, v) in current.next.iter() { + nodes[*v].previous = Some(i); + q.push(*v); + } + } + } + + Ok(Trie { nodes }) + } + + #[allow(clippy::too_many_arguments)] + fn generate_radix_tree_checks( + &self, + ctx: TypeCheckContext<'_>, + matched_value: &TyExpression, + u64_type_id: TypeId, + branch_return_type_id: TypeId, + wildcard_return_expr: TyExpression, + trie: Trie, + packed_strings: &str, + ) -> Result { + //generate code + let bool_type_id = ctx + .engines + .te() + .insert(ctx.engines, TypeInfo::Boolean, None); + + let string_slice_type_id = + ctx.engines + .te() + .insert(ctx.engines, TypeInfo::StringSlice, None); + + let packed_strings_expr = TyExpression { + expression: TyExpressionVariant::Literal(crate::language::Literal::String( + Span::from_string(packed_strings.to_string()), + )), + return_type: string_slice_type_id, + span: Span::dummy(), + }; + + let expr = self.generate_radrix_trie_code( + matched_value, + packed_strings, + &packed_strings_expr, + &trie.nodes, + 0, + 0, + bool_type_id, + u64_type_id, + branch_return_type_id, + 1, + wildcard_return_expr, + ); + + Ok(expr) + } + + #[allow(clippy::too_many_arguments)] + fn generate_radrix_trie_code( + &self, + matched_value: &TyExpression, + packed_strings: &str, + packed_strings_expr: &TyExpression, + nodes: &[TrieNode], + slice_pos: usize, + current_node_index: usize, + bool_type_id: TypeId, + u64_type_id: TypeId, + branch_return_type_id: TypeId, + depth: usize, + block_when_all_fail: TyExpression, + ) -> TyExpression { + let current = &nodes[current_node_index]; + + if let Some(output) = current.output { + assert!(current.next.is_empty()); + + if RADIX_TREE_DEBUG { + println!("{}return branch {:?}", " ".repeat(depth * 4), output); + } + + let branch = &self.branches[output]; + return branch.result.clone(); + } + + let mut block = block_when_all_fail.clone(); + + for (prefix, next_node_index) in current.next.iter().rev() { + let start = current_node_index; + let end = current_node_index + prefix.len(); + let eq_len: u64 = end as u64 - start as u64; + + let prefix_pos = packed_strings + .find(prefix) + .expect("prefix should be inside this string"); + + if RADIX_TREE_DEBUG { + println!( + "{}if str[{start}..{end}] == \"{prefix}\" at packed_string[{prefix_pos}]", + " ".repeat(depth * 4), + ); + } + + let then_node = self.generate_radrix_trie_code( + matched_value, + packed_strings, + packed_strings_expr, + nodes, + end, + *next_node_index, + bool_type_id, + u64_type_id, + branch_return_type_id, + depth + 1, + block_when_all_fail.clone(), + ); + + let prefix_pos = packed_strings + .find(prefix) + .expect("prefix should be inside this string"); + + let expression = TyExpressionVariant::AsmExpression { + registers: vec![ + TyAsmRegisterDeclaration { + name: Ident::new_no_span("slice".into()), + initializer: Some(matched_value.clone()), + }, + TyAsmRegisterDeclaration { + name: Ident::new_no_span("prefix".into()), + initializer: Some(packed_strings_expr.clone()), + }, + TyAsmRegisterDeclaration { + name: Ident::new_no_span("slice_ptr".into()), + initializer: None, + }, + TyAsmRegisterDeclaration { + name: Ident::new_no_span("prefix_ptr".into()), + initializer: None, + }, + TyAsmRegisterDeclaration { + name: Ident::new_no_span("len".into()), + initializer: Some(TyExpression { + expression: TyExpressionVariant::Literal( + crate::language::Literal::U64(eq_len), + ), + return_type: u64_type_id, + span: Span::dummy(), + }), + }, + TyAsmRegisterDeclaration { + name: Ident::new_no_span("is_eq".into()), + initializer: None, + }, + ], + body: vec![ + AsmOp { + op_name: Ident::new_no_span("lw".into()), + op_args: vec![ + BaseIdent::new_no_span("slice_ptr".into()), + BaseIdent::new_no_span("slice".into()), + ], + immediate: Some(BaseIdent::new_no_span("i0".into())), + span: Span::dummy(), + }, + AsmOp { + op_name: Ident::new_no_span("addi".into()), + op_args: vec![ + BaseIdent::new_no_span("slice_ptr".into()), + BaseIdent::new_no_span("slice_ptr".into()), + ], + immediate: Some(BaseIdent::new_no_span(format!("i{}", slice_pos))), + span: Span::dummy(), + }, + AsmOp { + op_name: Ident::new_no_span("lw".into()), + op_args: vec![ + BaseIdent::new_no_span("prefix_ptr".into()), + BaseIdent::new_no_span("prefix".into()), + ], + immediate: Some(BaseIdent::new_no_span("i0".into())), + span: Span::dummy(), + }, + AsmOp { + op_name: Ident::new_no_span("addi".into()), + op_args: vec![ + BaseIdent::new_no_span("prefix_ptr".into()), + BaseIdent::new_no_span("prefix_ptr".into()), + ], + immediate: Some(BaseIdent::new_no_span(format!("i{}", prefix_pos))), + span: Span::dummy(), + }, + AsmOp { + op_name: Ident::new_no_span("meq".into()), + op_args: vec![ + BaseIdent::new_no_span("is_eq".into()), + BaseIdent::new_no_span("slice_ptr".into()), + BaseIdent::new_no_span("prefix_ptr".into()), + BaseIdent::new_no_span("len".into()), + ], + immediate: None, + span: Span::dummy(), + }, + ], + returns: Some(( + AsmRegister { + name: "is_eq".into(), + }, + Span::dummy(), + )), + whole_block_span: Span::dummy(), + }; + + block = TyExpression { + expression: TyExpressionVariant::IfExp { + condition: Box::new(TyExpression { + expression, + return_type: bool_type_id, + span: Span::dummy(), + }), + then: Box::new(then_node), + r#else: Some(Box::new(block)), + }, + return_type: branch_return_type_id, + span: Span::dummy(), + }; + } + + if RADIX_TREE_DEBUG { + println!("{}return wildcard branch", " ".repeat(depth * 4),); + } + + block + } + + fn desugar_to_typed_if_expression( &self, instantiate: Instantiate, mut ctx: TypeCheckContext<'_>, diff --git a/sway-core/src/semantic_analysis/ast_node/expression/typed_expression.rs b/sway-core/src/semantic_analysis/ast_node/expression/typed_expression.rs index c9dcd6a7968..9ce246074f3 100644 --- a/sway-core/src/semantic_analysis/ast_node/expression/typed_expression.rs +++ b/sway-core/src/semantic_analysis/ast_node/expression/typed_expression.rs @@ -840,13 +840,13 @@ impl ty::TyExpression { } // desugar the typed match expression to a typed if expression - let typed_if_exp = typed_match_expression.convert_to_typed_if_expression(handler, ctx)?; + let desugared = typed_match_expression.desugar(handler, ctx)?; let match_exp = ty::TyExpression { - span: typed_if_exp.span.clone(), - return_type: typed_if_exp.return_type, + span: desugared.span.clone(), + return_type: desugared.return_type, expression: ty::TyExpressionVariant::MatchExp { - desugared: Box::new(typed_if_exp), + desugared: Box::new(desugared), scrutinees: typed_scrutinees, }, }; diff --git a/sway-core/src/type_system/info.rs b/sway-core/src/type_system/info.rs index 9eae7d6b9d1..6796bb67a69 100644 --- a/sway-core/src/type_system/info.rs +++ b/sway-core/src/type_system/info.rs @@ -1281,7 +1281,8 @@ impl TypeInfo { | TypeInfo::B256 | TypeInfo::UnknownGeneric { .. } | TypeInfo::Numeric - | TypeInfo::Never => Ok(()), + | TypeInfo::Never + | TypeInfo::StringSlice => Ok(()), TypeInfo::Alias { ty, .. } => { let ty = engines.te().get(ty.type_id); ty.expect_is_supported_in_match_expressions(handler, engines, span) @@ -1291,7 +1292,6 @@ impl TypeInfo { | TypeInfo::Ptr(..) | TypeInfo::Slice(..) | TypeInfo::StringArray(_) - | TypeInfo::StringSlice | TypeInfo::Array(_, _) => Err(handler.emit_err(CompileError::Unimplemented { feature: format!( "Matched value has type \"{}\". Matching on this type", diff --git a/sway-ir/src/error.rs b/sway-ir/src/error.rs index b2f44625793..80b0894fe16 100644 --- a/sway-ir/src/error.rs +++ b/sway-ir/src/error.rs @@ -33,8 +33,8 @@ pub enum IrError { VerifyConditionExprNotABool, VerifyContractCallBadTypes(String), VerifyGepElementTypeNonPointer, - VerifyGepFromNonPointer(String), - VerifyGepInconsistentTypes, + VerifyGepFromNonPointer(String, Option), + VerifyGepInconsistentTypes(String, Option), VerifyGepOnNonAggregate, VerifyGetNonExistentPointer, VerifyInsertElementOfIncorrectType, @@ -64,15 +64,26 @@ pub enum IrError { VerifyStateDestBadType(String), VerifyStateKeyBadType, VerifyStateKeyNonPointer(String), - VerifyStoreMismatchedTypes, + VerifyStoreMismatchedTypes(Option), VerifyStoreToNonPointer(String), VerifyUntypedValuePassedToFunction, } +impl IrError { + pub(crate) fn get_problematic_value(&self) -> Option<&Value> { + match self { + Self::VerifyGepFromNonPointer(_, v) => v.as_ref(), + Self::VerifyGepInconsistentTypes(_, v) => v.as_ref(), + Self::VerifyStoreMismatchedTypes(v) => v.as_ref(), + _ => None, + } + } +} impl std::error::Error for IrError {} use std::fmt; +use crate::Value; use itertools::Itertools; impl fmt::Display for IrError { @@ -194,10 +205,14 @@ impl fmt::Display for IrError { IrError::VerifyGepElementTypeNonPointer => { write!(f, "Verification failed: GEP on a non-pointer.") } - IrError::VerifyGepInconsistentTypes => { - write!(f, "Verification failed: Struct field type mismatch.") + IrError::VerifyGepInconsistentTypes(error, _) => { + write!( + f, + "Verification failed: Struct field type mismatch: ({}).", + error + ) } - IrError::VerifyGepFromNonPointer(ty) => { + IrError::VerifyGepFromNonPointer(ty, _) => { write!( f, "Verification failed: Struct access must be to a pointer value, not a {ty}." @@ -337,7 +352,7 @@ impl fmt::Display for IrError { "Verification failed: State access operation must be to a {ty} pointer." ) } - IrError::VerifyStoreMismatchedTypes => { + IrError::VerifyStoreMismatchedTypes(_) => { write!( f, "Verification failed: Store value and pointer type mismatch." diff --git a/sway-ir/src/printer.rs b/sway-ir/src/printer.rs index f14286ccda9..19b90c92861 100644 --- a/sway-ir/src/printer.rs +++ b/sway-ir/src/printer.rs @@ -23,7 +23,7 @@ use crate::{ }; #[derive(Debug)] -enum Doc { +pub(crate) enum Doc { Empty, Space, Comma, @@ -42,7 +42,7 @@ enum Doc { } impl Doc { - fn text>(s: S) -> Self { + pub(crate) fn text>(s: S) -> Self { Doc::Text(s.into()) } @@ -50,7 +50,7 @@ impl Doc { Doc::Line(Box::new(doc)) } - fn text_line>(s: S) -> Self { + pub(crate) fn text_line>(s: S) -> Self { Doc::Line(Box::new(Doc::Text(s.into()))) } @@ -66,7 +66,7 @@ impl Doc { Doc::Parens(Box::new(Doc::list_sep(docs, Doc::Comma))) } - fn append(self, doc: Doc) -> Doc { + pub(crate) fn append(self, doc: Doc) -> Doc { match (&self, &doc) { (Doc::Empty, _) => doc, (_, Doc::Empty) => self, @@ -81,7 +81,7 @@ impl Doc { } } - fn build(self) -> String { + pub(crate) fn build(self) -> String { build_doc(self, 0) } } @@ -90,17 +90,32 @@ impl Doc { /// /// The output from this function must always be suitable for [crate::parser::parse]. pub fn to_string(context: &Context) -> String { + context_print(context, &|_, doc| doc) +} + +pub(crate) fn context_print(context: &Context, map_doc: &impl Fn(&Value, Doc) -> Doc) -> String { let mut md_namer = MetadataNamer::default(); context .modules .iter() .fold(Doc::Empty, |doc, (_, module)| { - doc.append(module_to_doc(context, &mut md_namer, module)) + doc.append(module_to_doc(context, &mut md_namer, module, map_doc)) }) .append(md_namer.to_doc(context)) .build() } +pub(crate) fn block_print( + context: &Context, + function: Function, + block: Block, + map_doc: &impl Fn(&Value, Doc) -> Doc, +) -> String { + let mut md_namer = MetadataNamer::default(); + let mut namer = Namer::new(function); + block_to_doc(context, &mut md_namer, &mut namer, &block, map_doc).build() +} + pub struct ModulePrinterResult; impl AnalysisResultT for ModulePrinterResult {} @@ -116,7 +131,8 @@ pub fn module_printer_pass( module_to_doc( context, &mut md_namer, - context.modules.get(module.0).unwrap() + context.modules.get(module.0).unwrap(), + &|_, doc| doc ) .append(md_namer.to_doc(context)) .build() @@ -132,7 +148,8 @@ pub fn module_print(context: &Context, _analyses: &AnalysisResults, module: Modu module_to_doc( context, &mut md_namer, - context.modules.get(module.0).unwrap() + context.modules.get(module.0).unwrap(), + &|_, doc| doc ) .append(md_namer.to_doc(context)) .build() @@ -148,7 +165,8 @@ pub fn function_print(context: &Context, function: Function) { context, &mut md_namer, &mut Namer::new(function), - context.functions.get(function.0).unwrap() + context.functions.get(function.0).unwrap(), + &|_, doc| doc ) .append(md_namer.to_doc(context)) .build() @@ -170,6 +188,7 @@ fn module_to_doc<'a>( context: &'a Context, md_namer: &mut MetadataNamer, module: &'a ModuleContent, + map_doc: &impl Fn(&Value, Doc) -> Doc, ) -> Doc { Doc::line(Doc::Text(format!( "{} {{", @@ -207,6 +226,7 @@ fn module_to_doc<'a>( md_namer, &mut Namer::new(*function), &context.functions[function.0], + map_doc, ) }) .collect(), @@ -268,6 +288,7 @@ fn function_to_doc<'a>( md_namer: &mut MetadataNamer, namer: &mut Namer, function: &'a FunctionContent, + map_doc: &impl Fn(&Value, Doc) -> Doc, ) -> Doc { let public = if function.is_public { "pub " } else { "" }; let entry = if function.is_entry { "entry " } else { "" }; @@ -373,7 +394,7 @@ fn function_to_doc<'a>( function .blocks .iter() - .map(|block| block_to_doc(context, md_namer, namer, block)) + .map(|block| block_to_doc(context, md_namer, namer, block, map_doc)) .collect(), Doc::line(Doc::Empty), ), @@ -389,6 +410,7 @@ fn block_to_doc( md_namer: &mut MetadataNamer, namer: &mut Namer, block: &Block, + map_doc: &impl Fn(&Value, Doc) -> Doc, ) -> Doc { let block_content = &context.blocks[block.0]; Doc::line( @@ -410,7 +432,10 @@ fn block_to_doc( .append(Doc::List( block .instruction_iter(context) - .map(|ins| instruction_to_doc(context, md_namer, namer, block, &ins)) + .map(|current_value| { + let doc = instruction_to_doc(context, md_namer, namer, block, ¤t_value); + (map_doc)(¤t_value, doc) + }) .collect(), )) } diff --git a/sway-ir/src/verify.rs b/sway-ir/src/verify.rs index f39cb509a08..be2fcc77fc2 100644 --- a/sway-ir/src/verify.rs +++ b/sway-ir/src/verify.rs @@ -13,9 +13,10 @@ use crate::{ irtype::Type, local_var::LocalVar, metadata::{MetadataIndex, Metadatum}, + printer, value::{Value, ValueDatum}, AnalysisResult, AnalysisResultT, AnalysisResults, BinaryOpKind, Block, BlockArgument, - BranchToWithArgs, Module, Pass, PassMutability, ScopedPass, TypeOption, UnaryOpKind, + BranchToWithArgs, Doc, Module, Pass, PassMutability, ScopedPass, TypeOption, UnaryOpKind, }; pub struct ModuleVerifierResult; @@ -131,10 +132,29 @@ impl<'eng> Context<'eng> { } .verify_instructions(); - if r.is_err() { - println!("{}", self); - println!("{}", cur_function.get_name(self)); - println!("{}", cur_block.get_label(self)); + // Help to understand the verification failure + // If the error knows the problematic value, prints everything with the error highlighted, + // if not, print only the block to help pinpoint the issue + if let Err(error) = &r { + println!( + "Verification failed at {}::{}", + cur_function.get_name(self), + cur_block.get_label(self) + ); + + let block = if let Some(problematic_value) = error.get_problematic_value() { + printer::context_print(self, &|current_value: &Value, doc: Doc| { + if *current_value == *problematic_value { + doc.append(Doc::text_line(format!("\x1b[0;31m^ {}\x1b[0m", error))) + } else { + doc + } + }) + } else { + printer::block_print(self, cur_function, cur_block, &|_, doc| doc) + }; + + println!("{}", block); } r?; @@ -303,7 +323,7 @@ impl<'a, 'eng> InstructionVerifier<'a, 'eng> { base, elem_ptr_ty, indices, - } => self.verify_get_elem_ptr(base, elem_ptr_ty, indices)?, + } => self.verify_get_elem_ptr(&ins, base, elem_ptr_ty, indices)?, InstOp::GetLocal(local_var) => self.verify_get_local(local_var)?, InstOp::GetConfig(_, name) => self.verify_get_config(self.cur_module, name)?, InstOp::IntToPtr(value, ty) => self.verify_int_to_ptr(value, ty)?, @@ -323,7 +343,7 @@ impl<'a, 'eng> InstructionVerifier<'a, 'eng> { InstOp::Store { dst_val_ptr, stored_val, - } => self.verify_store(dst_val_ptr, stored_val)?, + } => self.verify_store(&ins, dst_val_ptr, stored_val)?, }; // Verify the instruction metadata too. @@ -743,13 +763,15 @@ impl<'a, 'eng> InstructionVerifier<'a, 'eng> { fn verify_get_elem_ptr( &self, + ins: &Value, base: &Value, elem_ptr_ty: &Type, indices: &[Value], ) -> Result<(), IrError> { use crate::constant::ConstantValue; - let base_ty = self.get_ptr_type(base, IrError::VerifyGepFromNonPointer)?; + let base_ty = + self.get_ptr_type(base, |s| IrError::VerifyGepFromNonPointer(s, Some(*ins)))?; if !base_ty.is_aggregate(self.context) { return Err(IrError::VerifyGepOnNonAggregate); } @@ -759,7 +781,10 @@ impl<'a, 'eng> InstructionVerifier<'a, 'eng> { }; if indices.is_empty() { - return Err(IrError::VerifyGepInconsistentTypes); + return Err(IrError::VerifyGepInconsistentTypes( + "Empty Indices".into(), + Some(*base), + )); } // Fetch the field type from the vector of Values. If the value is a constant int then @@ -781,7 +806,14 @@ impl<'a, 'eng> InstructionVerifier<'a, 'eng> { }); if self.opt_ty_not_eq(&Some(elem_inner_ty), &index_ty) { - return Err(IrError::VerifyGepInconsistentTypes); + return Err(IrError::VerifyGepInconsistentTypes( + format!( + "Element type \"{}\" versus index type {:?}", + elem_inner_ty.as_string(self.context), + index_ty.map(|x| x.as_string(self.context)) + ), + Some(*ins), + )); } Ok(()) @@ -1033,11 +1065,16 @@ impl<'a, 'eng> InstructionVerifier<'a, 'eng> { } } - fn verify_store(&self, dst_val: &Value, stored_val: &Value) -> Result<(), IrError> { + fn verify_store( + &self, + ins: &Value, + dst_val: &Value, + stored_val: &Value, + ) -> Result<(), IrError> { let dst_ty = self.get_ptr_type(dst_val, IrError::VerifyStoreToNonPointer)?; let stored_ty = stored_val.get_type(self.context); if self.opt_ty_not_eq(&Some(dst_ty), &stored_ty) { - Err(IrError::VerifyStoreMismatchedTypes) + Err(IrError::VerifyStoreMismatchedTypes(Some(*ins))) } else { Ok(()) } diff --git a/test/src/e2e_vm_tests/harness.rs b/test/src/e2e_vm_tests/harness.rs index 72ca391a3d1..cb839dc2608 100644 --- a/test/src/e2e_vm_tests/harness.rs +++ b/test/src/e2e_vm_tests/harness.rs @@ -278,6 +278,7 @@ pub(crate) async fn compile_to_bytes(file_name: &str, run_config: &RunConfig) -> dca_graph_url_format: None, asm: run_config.print_asm, bytecode: run_config.print_bytecode, + bytecode_spans: run_config.print_bytecode, ir: run_config.print_ir.clone(), reverse_order: false, }, diff --git a/test/src/e2e_vm_tests/test_programs/should_pass/language/match_expressions/Forc.lock b/test/src/e2e_vm_tests/test_programs/should_pass/language/match_expressions_all/Forc.lock similarity index 56% rename from test/src/e2e_vm_tests/test_programs/should_pass/language/match_expressions/Forc.lock rename to test/src/e2e_vm_tests/test_programs/should_pass/language/match_expressions_all/Forc.lock index ce884974e55..3a555a8fe55 100644 --- a/test/src/e2e_vm_tests/test_programs/should_pass/language/match_expressions/Forc.lock +++ b/test/src/e2e_vm_tests/test_programs/should_pass/language/match_expressions_all/Forc.lock @@ -1,9 +1,9 @@ [[package]] name = "core" -source = "path+from-root-C27B2C742213E5AD" +source = "path+from-root-0B4C76FF57814684" [[package]] -name = "match_expressions" +name = "match_expressions_all" source = "member" dependencies = [ "core", @@ -12,5 +12,5 @@ dependencies = [ [[package]] name = "std" -source = "path+from-root-C27B2C742213E5AD" +source = "path+from-root-0B4C76FF57814684" dependencies = ["core"] diff --git a/test/src/e2e_vm_tests/test_programs/should_pass/language/match_expressions/Forc.toml b/test/src/e2e_vm_tests/test_programs/should_pass/language/match_expressions_all/Forc.toml similarity index 88% rename from test/src/e2e_vm_tests/test_programs/should_pass/language/match_expressions/Forc.toml rename to test/src/e2e_vm_tests/test_programs/should_pass/language/match_expressions_all/Forc.toml index eaec31a928c..6d1dea0b55d 100644 --- a/test/src/e2e_vm_tests/test_programs/should_pass/language/match_expressions/Forc.toml +++ b/test/src/e2e_vm_tests/test_programs/should_pass/language/match_expressions_all/Forc.toml @@ -1,7 +1,7 @@ [project] authors = ["Fuel Labs "] license = "Apache-2.0" -name = "match_expressions" +name = "match_expressions_all" entry = "main.sw" [dependencies] diff --git a/test/src/e2e_vm_tests/test_programs/should_pass/language/match_expressions/json_abi_oracle.json b/test/src/e2e_vm_tests/test_programs/should_pass/language/match_expressions_all/json_abi_oracle.json similarity index 100% rename from test/src/e2e_vm_tests/test_programs/should_pass/language/match_expressions/json_abi_oracle.json rename to test/src/e2e_vm_tests/test_programs/should_pass/language/match_expressions_all/json_abi_oracle.json diff --git a/test/src/e2e_vm_tests/test_programs/should_pass/language/match_expressions/json_abi_oracle_new_encoding.json b/test/src/e2e_vm_tests/test_programs/should_pass/language/match_expressions_all/json_abi_oracle_new_encoding.json similarity index 88% rename from test/src/e2e_vm_tests/test_programs/should_pass/language/match_expressions/json_abi_oracle_new_encoding.json rename to test/src/e2e_vm_tests/test_programs/should_pass/language/match_expressions_all/json_abi_oracle_new_encoding.json index 068da3305ab..e60dda965d4 100644 --- a/test/src/e2e_vm_tests/test_programs/should_pass/language/match_expressions/json_abi_oracle_new_encoding.json +++ b/test/src/e2e_vm_tests/test_programs/should_pass/language/match_expressions_all/json_abi_oracle_new_encoding.json @@ -17,8 +17,8 @@ "messagesTypes": [], "types": [ { - "components": null, - "type": "u64", + "components": [], + "type": "()", "typeId": 0, "typeParameters": null } diff --git a/test/src/e2e_vm_tests/test_programs/should_pass/language/match_expressions/src/main.sw b/test/src/e2e_vm_tests/test_programs/should_pass/language/match_expressions_all/src/main.sw similarity index 64% rename from test/src/e2e_vm_tests/test_programs/should_pass/language/match_expressions/src/main.sw rename to test/src/e2e_vm_tests/test_programs/should_pass/language/match_expressions_all/src/main.sw index 689aaffc9b0..0a493b8bc16 100644 --- a/test/src/e2e_vm_tests/test_programs/should_pass/language/match_expressions/src/main.sw +++ b/test/src/e2e_vm_tests/test_programs/should_pass/language/match_expressions_all/src/main.sw @@ -18,7 +18,17 @@ fn inc_i(ref mut i: u64) -> Struct { Struct { x: 21, y: 21, z: 1 } } -fn main() -> u64 { +#[inline(never)] +fn return_match_on_str_slice(param: str) -> u64 { + match param { + "get_a" => { 1u64 }, + "get_a_b" => { 2u64 }, + "get_b" => { 3u64 }, + _ => { 1000u64 }, + } +} + +fn main() { let x = match 8 { 7 => { 4 }, 9 => { 5 }, @@ -86,8 +96,22 @@ fn main() -> u64 { }; assert(i == 11); - match 42 { + let r = match 42 { 0 => { 24 }, foo => { foo }, - } + }; + assert(r == 42); + + // string slice + assert(return_match_on_str_slice("") == 1000); + assert(return_match_on_str_slice("g") == 1000); + assert(return_match_on_str_slice("ge") == 1000); + assert(return_match_on_str_slice("get") == 1000); + assert(return_match_on_str_slice("get_") == 1000); + assert(return_match_on_str_slice("get_a") == 1); + assert(return_match_on_str_slice("get_a_") == 1000); + assert(return_match_on_str_slice("get_a_b") == 2); + assert(return_match_on_str_slice("get_b") == 3); + assert(return_match_on_str_slice("get_c") == 1000); } + diff --git a/test/src/e2e_vm_tests/test_programs/should_pass/language/match_expressions/test.toml b/test/src/e2e_vm_tests/test_programs/should_pass/language/match_expressions_all/test.toml similarity index 57% rename from test/src/e2e_vm_tests/test_programs/should_pass/language/match_expressions/test.toml rename to test/src/e2e_vm_tests/test_programs/should_pass/language/match_expressions_all/test.toml index 057e34e97ef..8fb3859ce4c 100644 --- a/test/src/e2e_vm_tests/test_programs/should_pass/language/match_expressions/test.toml +++ b/test/src/e2e_vm_tests/test_programs/should_pass/language/match_expressions_all/test.toml @@ -1,4 +1,4 @@ category = "run" -expected_result = { action = "return", value = 42 } -expected_result_new_encoding = { action = "return_data", value = "000000000000002A" } +expected_result = { action = "return", value = 0 } +expected_result_new_encoding = { action = "return_data", value = "" } validate_abi = true