From aa7ce78a7bcede545b41422a3f35ed393475ee30 Mon Sep 17 00:00:00 2001 From: Selwyn Gomes Date: Mon, 20 Jun 2022 08:08:09 -0700 Subject: [PATCH] Providing support for Alveo boards (#552) Providing support for Alveo boards (#552) Co-authored-by: Sioni Summers Co-authored-by: Gabriele-bot Co-authored-by: Selwyn96 --- MANIFEST.in | 1 + example-models | 2 +- .../vivado_accelerator/supported_boards.json | 30 +- .../vivado_accelerator_backend.py | 55 ++- .../vivado_accelerator_config.py | 23 +- .../krnl_rtl_src/krnl_rtl_axi_read_master.sv | 280 ++++++++++++ .../krnl_rtl_src/krnl_rtl_axi_write_master.sv | 276 ++++++++++++ .../krnl_rtl_src/krnl_rtl_control_s_axi.v | 422 ++++++++++++++++++ .../alveo/krnl_rtl_src/krnl_rtl_counter.sv | 88 ++++ .../alveo/krnl_rtl_src/krnl_rtl_int.sv | 415 +++++++++++++++++ .../alveo/krnl_rtl_src/myproject_kernel.v | 170 +++++++ .../alveo/python_drivers/axi_stream_driver.py | 108 +++++ .../alveo/tcl_scripts/axi_stream_design.tcl | 109 +++++ hls4ml/writer/vivado_accelerator_writer.py | 14 +- test/hls4ml-keras-test.sh | 3 + test/keras-models.txt | 3 +- test/keras-to-hls.sh | 21 +- 17 files changed, 1998 insertions(+), 22 deletions(-) create mode 100644 hls4ml/templates/vivado_accelerator/alveo/krnl_rtl_src/krnl_rtl_axi_read_master.sv create mode 100644 hls4ml/templates/vivado_accelerator/alveo/krnl_rtl_src/krnl_rtl_axi_write_master.sv create mode 100644 hls4ml/templates/vivado_accelerator/alveo/krnl_rtl_src/krnl_rtl_control_s_axi.v create mode 100644 hls4ml/templates/vivado_accelerator/alveo/krnl_rtl_src/krnl_rtl_counter.sv create mode 100644 hls4ml/templates/vivado_accelerator/alveo/krnl_rtl_src/krnl_rtl_int.sv create mode 100644 hls4ml/templates/vivado_accelerator/alveo/krnl_rtl_src/myproject_kernel.v create mode 100644 hls4ml/templates/vivado_accelerator/alveo/python_drivers/axi_stream_driver.py create mode 100644 hls4ml/templates/vivado_accelerator/alveo/tcl_scripts/axi_stream_design.tcl diff --git a/MANIFEST.in b/MANIFEST.in index 47a636c11a..a3482f0847 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -4,3 +4,4 @@ graft example-prjs graft example-models graft test recursive-include hls4ml/templates * +include hls4ml/backends/vivado_accelerator/supported_boards.json diff --git a/example-models b/example-models index 0d4cc7277e..ff74f73dbc 160000 --- a/example-models +++ b/example-models @@ -1 +1 @@ -Subproject commit 0d4cc7277eac9bb9020e3d73a992dc15dbdcce4e +Subproject commit ff74f73dbc253d1aa7de1603ee10ede551919548 diff --git a/hls4ml/backends/vivado_accelerator/supported_boards.json b/hls4ml/backends/vivado_accelerator/supported_boards.json index 34d676d9cf..1279ec22d0 100644 --- a/hls4ml/backends/vivado_accelerator/supported_boards.json +++ b/hls4ml/backends/vivado_accelerator/supported_boards.json @@ -10,5 +10,33 @@ "tcl_scripts": { "axi_stream": "axi_stream_design.tcl"}, "python_drivers": {"axi_stream": "axi_stream_driver.py"}, "c_drivers": {} + }, + "alveo-u50": { + "part": "xcu50-fsvh2104-2-e", + "tcl_scripts": {"axi_stream": "axi_stream_design.tcl"}, + "python_drivers": {"axi_stream": "axi_stream_driver.py"}, + "krnl_rtl_srcs": {"axi_stream": "krnl_rtl_src"}, + "c_drivers": {} + }, + "alveo-u250": { + "part": "xcu250-figd2104-2L-e", + "tcl_scripts": {"axi_stream": "axi_stream_design.tcl"}, + "python_drivers": {"axi_stream": "axi_stream_driver.py"}, + "krnl_rtl_srcs": {"axi_stream": "krnl_rtl_src"}, + "c_drivers": {} + }, + "alveo-u200": { + "part": "xcu200-fsgd2104-2-e", + "tcl_scripts": {"axi_stream": "axi_stream_design.tcl"}, + "python_drivers": {"axi_stream": "axi_stream_driver.py"}, + "krnl_rtl_srcs": {"axi_stream": "krnl_rtl_src"}, + "c_drivers": {} + }, + "alveo-u280": { + "part": "xcu280-fsvh2892-2L-e", + "tcl_scripts": {"axi_stream": "axi_stream_design.tcl"}, + "python_drivers": {"axi_stream": "axi_stream_driver.py"}, + "krnl_rtl_srcs": {"axi_stream": "krnl_rtl_src"}, + "c_drivers": {} } -} \ No newline at end of file +} diff --git a/hls4ml/backends/vivado_accelerator/vivado_accelerator_backend.py b/hls4ml/backends/vivado_accelerator/vivado_accelerator_backend.py index 3f33900f95..63b83659f3 100644 --- a/hls4ml/backends/vivado_accelerator/vivado_accelerator_backend.py +++ b/hls4ml/backends/vivado_accelerator/vivado_accelerator_backend.py @@ -12,20 +12,55 @@ def __init__(self): def build(self, model, reset=False, csim=True, synth=True, cosim=False, validation=False, export=False, vsynth=False, bitfile=False): # run the VivadoBackend build report = super().build(model, reset=reset, csim=csim, synth=synth, cosim=cosim, validation=validation, export=export, vsynth=vsynth) + # Get Config to view Board and Platform + from hls4ml.backends import VivadoAcceleratorConfig + vivado_accelerator_config=VivadoAcceleratorConfig(model.config, model.get_input_variables(),model.get_output_variables()) # now make a bitfile if bitfile: - curr_dir = os.getcwd() - os.chdir(model.config.get_output_dir()) - try: - os.system('vivado -mode batch -source design.tcl') - except: - print("Something went wrong, check the Vivado logs") - os.chdir(curr_dir) + if(vivado_accelerator_config.get_board().startswith('alveo')): + self.make_xclbin(model,vivado_accelerator_config.get_platform()) + else: + curr_dir = os.getcwd() + os.chdir(model.config.get_output_dir()) + try: + os.system('vivado -mode batch -source design.tcl') + except: + print("Something went wrong, check the Vivado logs") + os.chdir(curr_dir) return parse_vivado_report(model.config.get_output_dir()) + def make_xclbin(self,model, platform='xilinx_u250_xdma_201830_2'): + """ + + Parameters + ---------- + - model : compiled and built hls_model. + - platform : development Target Platform, must be installed first. On the host machine is required only the + deployment target platform, both can be found on the Getting Started section of the Alveo card. + """ + curr_dir = os.getcwd() + abs_path_dir=os.path.abspath(model.config.get_output_dir()) + os.chdir(abs_path_dir) + os.makedirs('xo_files', exist_ok=True) + try: + os.system('vivado -mode batch -source design.tcl') + except: + print("Something went wrong, check the Vivado logs") + project_name=model.config.get_project_name() + ip_repo_path = abs_path_dir + '/'+project_name+'_prj'+'/solution1/impl/ip' + os.makedirs('xclbin_files', exist_ok=True) + os.chdir(abs_path_dir + '/xclbin_files') + # TODO Add other platforms + vitis_cmd = "v++ -t hw --platform " + platform + " --link ../xo_files/"+project_name+"_kernel.xo -o'"+project_name+"_kernel.xclbin' --user_ip_repo_paths " + ip_repo_path + try: + os.system(vitis_cmd) + except: + print("Something went wrong, check the Vitis/Vivado logs") + os.chdir(curr_dir) + def create_initial_config(self, board='pynq-z2', part=None, clock_period=5, io_type='io_parallel', interface='axi_stream', - driver='python', input_type='float', output_type='float'): + driver='python', input_type='float', output_type='float',platform='xilinx_u250_xdma_201830_2'): ''' Create initial accelerator config with default parameters Args: @@ -42,6 +77,7 @@ def create_initial_config(self, board='pynq-z2', part=None, clock_period=5, io_t will round the number of bits used to the next power-of-2 value. output_type: the wrapper output precision. Can be `float` or an `ap_type`. Note: VivadoAcceleratorBackend will round the number of bits used to the next power-of-2 value. + platform: development target platform Returns: populated config @@ -57,6 +93,9 @@ def create_initial_config(self, board='pynq-z2', part=None, clock_period=5, io_t config['AcceleratorConfig']['Precision']['Output'] = {} config['AcceleratorConfig']['Precision']['Input'] = input_type # float, double or ap_fixed config['AcceleratorConfig']['Precision']['Output'] = output_type # float, double or ap_fixed + if board.startswith('alveo'): + config['AcceleratorConfig']['Platform'] = platform + return config def _register_flows(self): diff --git a/hls4ml/backends/vivado_accelerator/vivado_accelerator_config.py b/hls4ml/backends/vivado_accelerator/vivado_accelerator_config.py index 31828e5cd6..f9c7848ef2 100644 --- a/hls4ml/backends/vivado_accelerator/vivado_accelerator_config.py +++ b/hls4ml/backends/vivado_accelerator/vivado_accelerator_config.py @@ -47,6 +47,7 @@ def __init__(self, config, model_inputs, model_outputs): 'float') # float, double or ap_fixed self.output_type = self.config['AcceleratorConfig']['Precision'].get('Output', 'float') # float, double or ap_fixed + self.platform= self.config['AcceleratorConfig'].get('Platform', 'xilinx_u250_xdma_201830_2') # Get platform folder name assert len( model_inputs) == 1, "Only models with one input tensor are currently supported by VivadoAcceleratorBackend" @@ -118,14 +119,28 @@ def get_driver(self): def get_board(self): return self.board + def get_platform(self): + return self.platform + + def get_clock_period(self): + return self.clock_period + def get_driver_path(self): - return '../templates/vivado_accelerator/' + self.board + '/' + self.driver + '_drivers/' + \ + if self.board.startswith('alveo'): + return '../templates/vivado_accelerator/' + 'alveo/' + self.driver + '_drivers/' + \ + self.get_driver_file() + else: + return '../templates/vivado_accelerator/' + self.board + '/' + self.driver + '_drivers/' + \ self.get_driver_file() def get_driver_file(self): driver_ext = '.py' if self.driver == 'python' else '.h' return self.interface + '_driver' + driver_ext + def get_krnl_rtl_src_dir(self): + return '../templates/vivado_accelerator/' + 'alveo/' + '/krnl_rtl_src' + + def get_input_type(self): return self.input_type @@ -140,4 +155,8 @@ def get_tcl_file_path(self): tcl_script = tcl_scripts.get(self.interface, None) if tcl_script is None: raise Exception('No tcl script definition available for the desired interface in supported_board.json') - return '../templates/vivado_accelerator/' + self.board + '/tcl_scripts/' + tcl_script + if self.board.startswith('alveo'): + return '../templates/vivado_accelerator/' + 'alveo/' + '/tcl_scripts/' + tcl_script + else: + return '../templates/vivado_accelerator/' + self.board + '/tcl_scripts/' + tcl_script + diff --git a/hls4ml/templates/vivado_accelerator/alveo/krnl_rtl_src/krnl_rtl_axi_read_master.sv b/hls4ml/templates/vivado_accelerator/alveo/krnl_rtl_src/krnl_rtl_axi_read_master.sv new file mode 100644 index 0000000000..a82dfc5a74 --- /dev/null +++ b/hls4ml/templates/vivado_accelerator/alveo/krnl_rtl_src/krnl_rtl_axi_read_master.sv @@ -0,0 +1,280 @@ +/** +* Copyright (C) 2019-2021 Xilinx, Inc +* +* Licensed under the Apache License, Version 2.0 (the "License"). You may +* not use this file except in compliance with the License. A copy of the +* License is located at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +* License for the specific language governing permissions and limitations +* under the License. +*/ + +/////////////////////////////////////////////////////////////////////////////// +// Description: This is a multi-threaded AXI4 read master. Each channel will +// issue commands on a different IDs. As a result data may arrive out of +// order. The amount of data requested is equal to the ctrl_length variable. +// Prog full is set and sampled such that the FIFO will never overflow. Thus +// rready can be always asserted for better timing. +/////////////////////////////////////////////////////////////////////////////// + +`default_nettype none + +module krnl_rtl_axi_read_master #( + parameter integer C_ID_WIDTH = 0, // Must be >= $clog2(C_NUM_CHANNELS) + parameter integer C_ADDR_WIDTH = 64, + parameter integer C_DATA_WIDTH = 32, + parameter integer C_NUM_CHANNELS = 1, // Only 2 tested. + parameter integer C_LENGTH_WIDTH = 32, + parameter integer C_BURST_LEN = 256, // Max AXI burst length for read commands + parameter integer C_LOG_BURST_LEN = 8, + parameter integer C_MAX_OUTSTANDING = 3 +) +( + // System signals + input wire aclk, + input wire areset, + // Control signals + input wire ctrl_start, + output wire ctrl_done, + input wire [C_NUM_CHANNELS-1:0][C_ADDR_WIDTH-1:0] ctrl_offset, + input wire [C_LENGTH_WIDTH-1:0] ctrl_length, + input wire [C_NUM_CHANNELS-1:0] ctrl_prog_full, + // AXI4 master interface + output wire arvalid, + input wire arready, + output wire [C_ADDR_WIDTH-1:0] araddr, + output wire [C_ID_WIDTH-1:0] arid, + output wire [7:0] arlen, + output wire [2:0] arsize, + input wire rvalid, + output wire rready, + input wire [C_DATA_WIDTH - 1:0] rdata, + input wire rlast, + input wire [C_ID_WIDTH - 1:0] rid, + input wire [1:0] rresp, + // AXI4-Stream master interface, 1 interface per channel. + output wire [C_NUM_CHANNELS-1:0] m_tvalid, + input wire [C_NUM_CHANNELS-1:0] m_tready, + output wire [C_NUM_CHANNELS-1:0][C_DATA_WIDTH-1:0] m_tdata, + output wire [C_NUM_CHANNELS-1:0] m_tlast +); + +timeunit 1ps; +timeprecision 1ps; + +/////////////////////////////////////////////////////////////////////////////// +// Local Parameters +/////////////////////////////////////////////////////////////////////////////// +localparam integer LP_MAX_OUTSTANDING_CNTR_WIDTH = $clog2(C_MAX_OUTSTANDING+1); +localparam integer LP_TRANSACTION_CNTR_WIDTH = C_LENGTH_WIDTH-C_LOG_BURST_LEN; + +/////////////////////////////////////////////////////////////////////////////// +// Variables +/////////////////////////////////////////////////////////////////////////////// +// Control logic +logic [C_NUM_CHANNELS-1:0] done = '0; +logic [LP_TRANSACTION_CNTR_WIDTH-1:0] num_full_bursts; +logic num_partial_bursts; +logic start = 1'b0; +logic [LP_TRANSACTION_CNTR_WIDTH-1:0] num_transactions; +logic has_partial_burst; +logic [C_LOG_BURST_LEN-1:0] final_burst_len; +logic single_transaction; +logic ar_idle = 1'b1; +logic ar_done; +// AXI Read Address Channel +logic fifo_stall; +logic arxfer; +logic arvalid_r = 1'b0; +logic [C_NUM_CHANNELS-1:0][C_ADDR_WIDTH-1:0] addr; +logic [C_ID_WIDTH-1:0] id = {C_ID_WIDTH{1'b1}}; +logic [LP_TRANSACTION_CNTR_WIDTH-1:0] ar_transactions_to_go; +logic ar_final_transaction; +logic [C_NUM_CHANNELS-1:0] incr_ar_to_r_cnt; +logic [C_NUM_CHANNELS-1:0] decr_ar_to_r_cnt; +logic [C_NUM_CHANNELS-1:0] stall_ar; +logic [C_NUM_CHANNELS-1:0][LP_MAX_OUTSTANDING_CNTR_WIDTH-1:0] outstanding_vacancy_count; +// AXI Data Channel +logic [C_NUM_CHANNELS-1:0] tvalid; +logic [C_NUM_CHANNELS-1:0][C_DATA_WIDTH-1:0] tdata; +logic [C_NUM_CHANNELS-1:0] tlast; +logic rxfer; +logic [C_NUM_CHANNELS-1:0] decr_r_transaction_cntr; +logic [C_NUM_CHANNELS-1:0][LP_TRANSACTION_CNTR_WIDTH-1:0] r_transactions_to_go; +logic [C_NUM_CHANNELS-1:0] r_final_transaction; +/////////////////////////////////////////////////////////////////////////////// +// Control Logic +/////////////////////////////////////////////////////////////////////////////// + +always @(posedge aclk) begin + for (int i = 0; i < C_NUM_CHANNELS; i++) begin + done[i] <= rxfer & rlast & (rid == i) & r_final_transaction[i] ? 1'b1 : + ctrl_done ? 1'b0 : done[i]; + end +end +assign ctrl_done = &done; + +// Determine how many full burst to issue and if there are any partial bursts. +assign num_full_bursts = ctrl_length[C_LOG_BURST_LEN+:C_LENGTH_WIDTH-C_LOG_BURST_LEN]; +assign num_partial_bursts = ctrl_length[0+:C_LOG_BURST_LEN] ? 1'b1 : 1'b0; + +always @(posedge aclk) begin + start <= ctrl_start; + num_transactions <= (num_partial_bursts == 1'b0) ? num_full_bursts - 1'b1 : num_full_bursts; + has_partial_burst <= num_partial_bursts; + final_burst_len <= ctrl_length[0+:C_LOG_BURST_LEN] - 1'b1; +end + +// Special case if there is only 1 AXI transaction. +assign single_transaction = (num_transactions == {LP_TRANSACTION_CNTR_WIDTH{1'b0}}) ? 1'b1 : 1'b0; + +/////////////////////////////////////////////////////////////////////////////// +// AXI Read Address Channel +/////////////////////////////////////////////////////////////////////////////// +assign arvalid = arvalid_r; +assign araddr = addr[id]; +assign arlen = ar_final_transaction || (start & single_transaction) ? final_burst_len : C_BURST_LEN - 1; +assign arsize = $clog2((C_DATA_WIDTH/8)); +assign arid = id; + +assign arxfer = arvalid & arready; +assign fifo_stall = ctrl_prog_full[id]; + +always @(posedge aclk) begin + if (areset) begin + arvalid_r <= 1'b0; + end + else begin + arvalid_r <= ~ar_idle & ~stall_ar[id] & ~arvalid_r & ~fifo_stall ? 1'b1 : + arready ? 1'b0 : arvalid_r; + end +end + +// When ar_idle, there are no transactions to issue. +always @(posedge aclk) begin + if (areset) begin + ar_idle <= 1'b1; + end + else begin + ar_idle <= start ? 1'b0 : + ar_done ? 1'b1 : + ar_idle; + end +end + +// each channel is assigned a different id. The transactions are interleaved. +always @(posedge aclk) begin + if (start) begin + id <= {C_ID_WIDTH{1'b1}}; + end + else begin + id <= arxfer ? id - 1'b1 : id; + end +end + + +// Increment to next address after each transaction is issued. +always @(posedge aclk) begin + for (int i = 0; i < C_NUM_CHANNELS; i++) begin + addr[i] <= ctrl_start ? ctrl_offset[i] : + arxfer && (id == i) ? addr[i] + C_BURST_LEN*C_DATA_WIDTH/8 : + addr[i]; + end +end + +// Counts down the number of transactions to send. +krnl_rtl_counter #( + .C_WIDTH ( LP_TRANSACTION_CNTR_WIDTH ) , + .C_INIT ( {LP_TRANSACTION_CNTR_WIDTH{1'b0}} ) +) +inst_ar_transaction_cntr ( + .clk ( aclk ) , + .clken ( 1'b1 ) , + .rst ( areset ) , + .load ( start ) , + .incr ( 1'b0 ) , + .decr ( arxfer && id == '0 ) , + .load_value ( num_transactions ) , + .count ( ar_transactions_to_go ) , + .is_zero ( ar_final_transaction ) +); + +assign ar_done = ar_final_transaction && arxfer && id == 1'b0; + +always_comb begin + for (int i = 0; i < C_NUM_CHANNELS; i++) begin + incr_ar_to_r_cnt[i] = rxfer & rlast & (rid == i); + decr_ar_to_r_cnt[i] = arxfer & (arid == i); + end +end + +// Keeps track of the number of outstanding transactions. Stalls +// when the value is reached so that the FIFO won't overflow. +krnl_rtl_counter #( + .C_WIDTH ( LP_MAX_OUTSTANDING_CNTR_WIDTH ) , + .C_INIT ( C_MAX_OUTSTANDING[0+:LP_MAX_OUTSTANDING_CNTR_WIDTH] ) +) +inst_ar_to_r_transaction_cntr[C_NUM_CHANNELS-1:0] ( + .clk ( aclk ) , + .clken ( 1'b1 ) , + .rst ( areset ) , + .load ( 1'b0 ) , + .incr ( incr_ar_to_r_cnt ) , + .decr ( decr_ar_to_r_cnt ) , + .load_value ( {LP_MAX_OUTSTANDING_CNTR_WIDTH{1'b0}} ) , + .count ( outstanding_vacancy_count ) , + .is_zero ( stall_ar ) +); + +/////////////////////////////////////////////////////////////////////////////// +// AXI Read Channel +/////////////////////////////////////////////////////////////////////////////// +assign m_tvalid = tvalid; +assign m_tdata = tdata; +assign m_tlast = tlast; + +always_comb begin + for (int i = 0; i < C_NUM_CHANNELS; i++) begin + tvalid[i] = rvalid && (rid == i); + tdata[i] = rdata; + tlast[i] = rlast; + end +end + +// rready can remain high for optimal timing because ar transactions are not issued +// unless there is enough space in the FIFO. +assign rready = 1'b1; +assign rxfer = rready & rvalid; + +always_comb begin + for (int i = 0; i < C_NUM_CHANNELS; i++) begin + decr_r_transaction_cntr[i] = rxfer & rlast & (rid == i); + end +end +krnl_rtl_counter #( + .C_WIDTH ( LP_TRANSACTION_CNTR_WIDTH ) , + .C_INIT ( {LP_TRANSACTION_CNTR_WIDTH{1'b0}} ) +) +inst_r_transaction_cntr[C_NUM_CHANNELS-1:0] ( + .clk ( aclk ) , + .clken ( 1'b1 ) , + .rst ( areset ) , + .load ( start ) , + .incr ( 1'b0 ) , + .decr ( decr_r_transaction_cntr ) , + .load_value ( num_transactions ) , + .count ( r_transactions_to_go ) , + .is_zero ( r_final_transaction ) +); + + +endmodule : krnl_rtl_axi_read_master + +`default_nettype wire + + diff --git a/hls4ml/templates/vivado_accelerator/alveo/krnl_rtl_src/krnl_rtl_axi_write_master.sv b/hls4ml/templates/vivado_accelerator/alveo/krnl_rtl_src/krnl_rtl_axi_write_master.sv new file mode 100644 index 0000000000..ab41386db0 --- /dev/null +++ b/hls4ml/templates/vivado_accelerator/alveo/krnl_rtl_src/krnl_rtl_axi_write_master.sv @@ -0,0 +1,276 @@ +/** +* Copyright (C) 2019-2021 Xilinx, Inc +* +* Licensed under the Apache License, Version 2.0 (the "License"). You may +* not use this file except in compliance with the License. A copy of the +* License is located at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +* License for the specific language governing permissions and limitations +* under the License. +*/ + +//////////////////////////////////////////////////////////// +// Description: AXI4 Write Master. Takes a stream of data in, +// appends address information and sends it out. +`default_nettype none + +module krnl_rtl_axi_write_master #( + parameter integer C_ADDR_WIDTH = 64, + parameter integer C_DATA_WIDTH = 32, + parameter integer C_MAX_LENGTH_WIDTH = 32, + parameter integer C_BURST_LEN = 256, + parameter integer C_LOG_BURST_LEN = 8 +) +( + // Control interface + input wire ctrl_start, + input wire [C_ADDR_WIDTH-1:0] ctrl_offset, + input wire [C_MAX_LENGTH_WIDTH-1:0] ctrl_length, + output wire ctrl_done, + + // AXI4-Stream interface + input wire s_tvalid, + input wire [C_DATA_WIDTH-1:0] s_tdata, + output wire s_tready, + + // AXI Interface + input wire aclk, + input wire areset, + + output wire [C_ADDR_WIDTH-1:0] awaddr, + output wire [7:0] awlen, + output wire [2:0] awsize, + output wire awvalid, + input wire awready, + + output wire [C_DATA_WIDTH-1:0] wdata, + output wire [C_DATA_WIDTH/8-1:0] wstrb, + output wire wlast, + output wire wvalid, + input wire wready, + + input wire [1:0] bresp, + input wire bvalid, + output wire bready +); + +timeunit 1ps; +timeprecision 1ps; + +///////////////////////////////////////////////////////////////////////////// +// Local Parameters +///////////////////////////////////////////////////////////////////////////// +localparam integer LP_LOG_MAX_W_TO_AW = 8; // Allow up to 256 outstanding w to aw transactions +localparam integer LP_TRANSACTION_CNTR_WIDTH = C_MAX_LENGTH_WIDTH-C_LOG_BURST_LEN; + +///////////////////////////////////////////////////////////////////////////// +// Variables +///////////////////////////////////////////////////////////////////////////// +logic [LP_TRANSACTION_CNTR_WIDTH-1:0] num_full_bursts; +logic num_partial_bursts; +logic start = 1'b0; +logic [LP_TRANSACTION_CNTR_WIDTH-1:0] num_transactions; +logic has_partial_burst; +logic [C_LOG_BURST_LEN-1:0] final_burst_len; +logic single_transaction; + +logic wxfer; // Unregistered write data transfer +logic wfirst = 1'b1; +logic load_burst_cntr; +logic [C_LOG_BURST_LEN-1:0] wxfers_to_go; // Used for simulation debug +logic [LP_TRANSACTION_CNTR_WIDTH-1:0] w_transactions_to_go; +logic w_final_transaction; +logic w_almost_final_transaction = 1'b0; + +logic awxfer; +logic awvalid_r = 1'b0; +logic [C_ADDR_WIDTH-1:0] addr; +logic wfirst_d1 = 1'b0; +logic wfirst_pulse = 1'b0; +logic [LP_LOG_MAX_W_TO_AW-1:0] dbg_w_to_aw_outstanding; +logic idle_aw; +logic [LP_TRANSACTION_CNTR_WIDTH-1:0] aw_transactions_to_go; +logic aw_final_transaction; + +wire bxfer; +logic [LP_TRANSACTION_CNTR_WIDTH-1:0] b_transactions_to_go; +logic b_final_transaction; + +///////////////////////////////////////////////////////////////////////////// +// Control logic +///////////////////////////////////////////////////////////////////////////// +// Count the number of transfers and assert done when the last bvalid is received. +assign num_full_bursts = ctrl_length[C_LOG_BURST_LEN+:C_MAX_LENGTH_WIDTH-C_LOG_BURST_LEN]; +assign num_partial_bursts = ctrl_length[0+:C_LOG_BURST_LEN] ? 1'b1 : 1'b0; + +always @(posedge aclk) begin + start <= ctrl_start; + num_transactions <= (num_partial_bursts == 1'b0) ? num_full_bursts - 1'b1 : num_full_bursts; + has_partial_burst <= num_partial_bursts; + final_burst_len <= ctrl_length[0+:C_LOG_BURST_LEN] - 1'b1; +end + +assign ctrl_done = bxfer & b_final_transaction; +assign single_transaction = (num_transactions == {LP_TRANSACTION_CNTR_WIDTH{1'b0}}) ? 1'b1 : 1'b0; + +///////////////////////////////////////////////////////////////////////////// +// AXI Write Data Channel +///////////////////////////////////////////////////////////////////////////// +assign wvalid = s_tvalid; +assign wdata = s_tdata; +assign wstrb = {(C_DATA_WIDTH/8){1'b1}}; +assign s_tready = wready; + +assign wxfer = wvalid & wready; + +always @(posedge aclk) begin + if (areset) begin + wfirst <= 1'b1; + end + else begin + wfirst <= wxfer ? wlast : wfirst; + end +end + +// Load burst counter with partial burst if on final transaction or if there is only 1 transaction +assign load_burst_cntr = (wxfer & wlast & w_almost_final_transaction) || (start & single_transaction); + +krnl_rtl_counter #( + .C_WIDTH ( C_LOG_BURST_LEN ) , + .C_INIT ( {C_LOG_BURST_LEN{1'b1}} ) +) +inst_burst_cntr ( + .clk ( aclk ) , + .clken ( 1'b1 ) , + .rst ( areset ) , + .load ( load_burst_cntr ) , + .incr ( 1'b0 ) , + .decr ( wxfer ) , + .load_value ( final_burst_len ) , + .count ( wxfers_to_go ) , + .is_zero ( wlast ) +); + +krnl_rtl_counter #( + .C_WIDTH ( LP_TRANSACTION_CNTR_WIDTH ) , + .C_INIT ( {LP_TRANSACTION_CNTR_WIDTH{1'b0}} ) +) +inst_w_transaction_cntr ( + .clk ( aclk ) , + .clken ( 1'b1 ) , + .rst ( areset ) , + .load ( start ) , + .incr ( 1'b0 ) , + .decr ( wxfer & wlast ) , + .load_value ( num_transactions ) , + .count ( w_transactions_to_go ) , + .is_zero ( w_final_transaction ) +); + +always @(posedge aclk) begin + w_almost_final_transaction <= (w_transactions_to_go == 1) ? 1'b1 : 1'b0; +end + +///////////////////////////////////////////////////////////////////////////// +// AXI Write Address Channel +///////////////////////////////////////////////////////////////////////////// +// The address channel samples the data channel and send out transactions when +// first beat of wdata is asserted. This ensures that address requests are not +// sent without data on the way. + +assign awvalid = awvalid_r; +assign awxfer = awvalid & awready; + +always @(posedge aclk) begin + if (areset) begin + awvalid_r <= 1'b0; + end + else begin + awvalid_r <= ~idle_aw & ~awvalid_r ? 1'b1 : + awready ? 1'b0 : + awvalid_r; + end +end + +assign awaddr = addr; + +always @(posedge aclk) begin + addr <= ctrl_start ? ctrl_offset : + awxfer ? addr + C_BURST_LEN*C_DATA_WIDTH/8 : + addr; +end + +assign awlen = aw_final_transaction || (start & single_transaction) ? final_burst_len : C_BURST_LEN - 1; +assign awsize = $clog2((C_DATA_WIDTH/8)); + +krnl_rtl_counter #( + .C_WIDTH (LP_LOG_MAX_W_TO_AW), + .C_INIT ({LP_LOG_MAX_W_TO_AW{1'b0}}) +) +inst_w_to_aw_cntr ( + .clk ( aclk ) , + .clken ( 1'b1 ) , + .rst ( areset ) , + .load ( 1'b0 ) , + .incr ( wfirst_pulse ) , + .decr ( awxfer ) , + .load_value ( ) , + .count ( dbg_w_to_aw_outstanding ) , + .is_zero ( idle_aw ) +); + +always @(posedge aclk) begin + wfirst_d1 <= wvalid & wfirst; +end + +always @(posedge aclk) begin + wfirst_pulse <= wvalid & wfirst & ~wfirst_d1; +end + +krnl_rtl_counter #( + .C_WIDTH ( LP_TRANSACTION_CNTR_WIDTH ) , + .C_INIT ( {LP_TRANSACTION_CNTR_WIDTH{1'b0}} ) +) +inst_aw_transaction_cntr ( + .clk ( aclk ) , + .clken ( 1'b1 ) , + .rst ( areset ) , + .load ( start ) , + .incr ( 1'b0 ) , + .decr ( awxfer ) , + .load_value ( num_transactions ) , + .count ( aw_transactions_to_go ) , + .is_zero ( aw_final_transaction ) +); + +///////////////////////////////////////////////////////////////////////////// +// AXI Write Response Channel +///////////////////////////////////////////////////////////////////////////// + +assign bready = 1'b1; +assign bxfer = bready & bvalid; + +krnl_rtl_counter #( + .C_WIDTH ( LP_TRANSACTION_CNTR_WIDTH ) , + .C_INIT ( {LP_TRANSACTION_CNTR_WIDTH{1'b0}} ) +) +inst_b_transaction_cntr ( + .clk ( aclk ) , + .clken ( 1'b1 ) , + .rst ( areset ) , + .load ( start ) , + .incr ( 1'b0 ) , + .decr ( bxfer ) , + .load_value ( num_transactions ) , + .count ( b_transactions_to_go ) , + .is_zero ( b_final_transaction ) +); + +endmodule : krnl_rtl_axi_write_master + +`default_nettype wire diff --git a/hls4ml/templates/vivado_accelerator/alveo/krnl_rtl_src/krnl_rtl_control_s_axi.v b/hls4ml/templates/vivado_accelerator/alveo/krnl_rtl_src/krnl_rtl_control_s_axi.v new file mode 100644 index 0000000000..c4a76ef0c3 --- /dev/null +++ b/hls4ml/templates/vivado_accelerator/alveo/krnl_rtl_src/krnl_rtl_control_s_axi.v @@ -0,0 +1,422 @@ +/** +* Copyright (C) 2019-2021 Xilinx, Inc +* +* Licensed under the Apache License, Version 2.0 (the "License"). You may +* not use this file except in compliance with the License. A copy of the +* License is located at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +* License for the specific language governing permissions and limitations +* under the License. +*/ + +`timescale 1ns/1ps +module krnl_rtl_control_s_axi +#(parameter + C_S_AXI_ADDR_WIDTH = 6, + C_S_AXI_DATA_WIDTH = 32 +)( + // axi4 lite slave signals + input wire ACLK, + input wire ARESET, + input wire ACLK_EN, + input wire [C_S_AXI_ADDR_WIDTH-1:0] AWADDR, + input wire AWVALID, + output wire AWREADY, + input wire [C_S_AXI_DATA_WIDTH-1:0] WDATA, + input wire [C_S_AXI_DATA_WIDTH/8-1:0] WSTRB, + input wire WVALID, + output wire WREADY, + output wire [1:0] BRESP, + output wire BVALID, + input wire BREADY, + input wire [C_S_AXI_ADDR_WIDTH-1:0] ARADDR, + input wire ARVALID, + output wire ARREADY, + output wire [C_S_AXI_DATA_WIDTH-1:0] RDATA, + output wire [1:0] RRESP, + output wire RVALID, + input wire RREADY, + output wire interrupt, + // user signals + output wire ap_start, + input wire ap_done, + input wire ap_ready, + input wire ap_idle, + output wire [63:0] fifo_in, + output wire [63:0] fifo_out, + output wire [31:0] length_r_in, + output wire [31:0] length_r_out +); +//------------------------Address Info------------------- +// 0x00 : Control signals +// bit 0 - ap_start (Read/Write/COH) +// bit 1 - ap_done (Read/COR) +// bit 2 - ap_idle (Read) +// bit 3 - ap_ready (Read) +// bit 7 - auto_restart (Read/Write) +// others - reserved +// 0x04 : Global Interrupt Enable Register +// bit 0 - Global Interrupt Enable (Read/Write) +// others - reserved +// 0x08 : IP Interrupt Enable Register (Read/Write) +// bit 0 - Channel 0 (ap_done) +// bit 1 - Channel 1 (ap_ready) +// others - reserved +// 0x0c : IP Interrupt Status Register (Read/TOW) +// bit 0 - Channel 0 (ap_done) +// bit 1 - Channel 1 (ap_ready) +// others - reserved +// 0x10 : Data signal of fifo_in +// bit 31~0 - a[31:0] (Read/Write) +// 0x14 : Data signal of fifo_in +// bit 31~0 - a[63:32] (Read/Write) +// 0x18 : reserved +// 0x1c : Data signal of fifo_out +// bit 31~0 - b[31:0] (Read/Write) +// 0x20 : Data signal of fifo_out +// bit 31~0 - b[63:32] (Read/Write) +// 0x24 : reserved +// 0x28 : Data signal of length_r_in +// bit 31~0 - length_r[31:0] (Read/Write) +// 0x2c : reserved +// 0x30 : Data signal of length_r_out +// bit 31~0 - length_r[31:0] (Read/Write) +// 0x34 : reserved +// (SC = Self Clear, COR = Clear on Read, TOW = Toggle on Write, COH = Clear on Handshake) + +//------------------------Parameter---------------------- +localparam + ADDR_AP_CTRL = 6'h00, + ADDR_GIE = 6'h04, + ADDR_IER = 6'h08, + ADDR_ISR = 6'h0c, + ADDR_FIFO_IN_DATA_0 = 6'h10, + ADDR_FIFO_IN_DATA_1 = 6'h14, + ADDR_FIFO_IN_CTRL = 6'h18, + ADDR_FIFO_OUT_DATA_0 = 6'h1c, + ADDR_FIFO_OUT_DATA_1 = 6'h20, + ADDR_FIFO_OUT_CTRL = 6'h24, + ADDR_LENGTH_R_IN_DATA_0 = 6'h28, + ADDR_LENGTH_R_IN_CTRL = 6'h2c, + ADDR_LENGTH_R_OUT_DATA_0 = 6'h30, + ADDR_LENGTH_R_OUT_CTRL = 6'h34, + WRIDLE = 2'd0, + WRDATA = 2'd1, + WRRESP = 2'd2, + RDIDLE = 2'd0, + RDDATA = 2'd1, + ADDR_BITS = 6; + +//------------------------Local signal------------------- + reg [1:0] wstate = WRIDLE; + reg [1:0] wnext; + reg [ADDR_BITS-1:0] waddr; + wire [31:0] wmask; + wire aw_hs; + wire w_hs; + reg [1:0] rstate = RDIDLE; + reg [1:0] rnext; + reg [31:0] rdata; + wire ar_hs; + wire [ADDR_BITS-1:0] raddr; + // internal registers + wire int_ap_idle; + wire int_ap_ready; + reg int_ap_done = 1'b0; + reg int_ap_start = 1'b0; + reg int_auto_restart = 1'b0; + reg int_gie = 2'b0; + reg [1:0] int_ier = 2'b0; + reg [1:0] int_isr = 2'b0; + reg [63:0] int_fifo_in = 64'b0; + reg [63:0] int_fifo_out = 64'b0; + reg [63:0] int_length_r_in = 32'b0; + reg [31:0] int_length_r_out = 32'b0; + +//------------------------Instantiation------------------ + +//------------------------AXI write fsm------------------ +assign AWREADY = (~ARESET) & (wstate == WRIDLE); +assign WREADY = (wstate == WRDATA); +assign BRESP = 2'b00; // OKAY +assign BVALID = (wstate == WRRESP); +assign wmask = { {8{WSTRB[3]}}, {8{WSTRB[2]}}, {8{WSTRB[1]}}, {8{WSTRB[0]}} }; +assign aw_hs = AWVALID & AWREADY; +assign w_hs = WVALID & WREADY; + +// wstate +always @(posedge ACLK) begin + if (ARESET) + wstate <= WRIDLE; + else if (ACLK_EN) + wstate <= wnext; +end + +// wnext +always @(*) begin + case (wstate) + WRIDLE: + if (AWVALID) + wnext = WRDATA; + else + wnext = WRIDLE; + WRDATA: + if (WVALID) + wnext = WRRESP; + else + wnext = WRDATA; + WRRESP: + if (BREADY) + wnext = WRIDLE; + else + wnext = WRRESP; + default: + wnext = WRIDLE; + endcase +end + +// waddr +always @(posedge ACLK) begin + if (ACLK_EN) begin + if (aw_hs) + waddr <= AWADDR[ADDR_BITS-1:0]; + end +end + +//------------------------AXI read fsm------------------- +assign ARREADY = (~ARESET) && (rstate == RDIDLE); +assign RDATA = rdata; +assign RRESP = 2'b00; // OKAY +assign RVALID = (rstate == RDDATA); +assign ar_hs = ARVALID & ARREADY; +assign raddr = ARADDR[ADDR_BITS-1:0]; + +// rstate +always @(posedge ACLK) begin + if (ARESET) + rstate <= RDIDLE; + else if (ACLK_EN) + rstate <= rnext; +end + +// rnext +always @(*) begin + case (rstate) + RDIDLE: + if (ARVALID) + rnext = RDDATA; + else + rnext = RDIDLE; + RDDATA: + if (RREADY & RVALID) + rnext = RDIDLE; + else + rnext = RDDATA; + default: + rnext = RDIDLE; + endcase +end + +// rdata +always @(posedge ACLK) begin + if (ACLK_EN) begin + if (ar_hs) begin + rdata <= 1'b0; + case (raddr) + ADDR_AP_CTRL: begin + rdata[0] <= int_ap_start; + rdata[1] <= int_ap_done; + rdata[2] <= int_ap_idle; + rdata[3] <= int_ap_ready; + rdata[7] <= int_auto_restart; + end + ADDR_GIE: begin + rdata <= int_gie; + end + ADDR_IER: begin + rdata <= int_ier; + end + ADDR_ISR: begin + rdata <= int_isr; + end + ADDR_FIFO_IN_DATA_0: begin + rdata <= int_fifo_in[31:0]; + end + ADDR_FIFO_IN_DATA_1: begin + rdata <= int_fifo_in[63:32]; + end + ADDR_FIFO_OUT_DATA_0: begin + rdata <= int_fifo_out[31:0]; + end + ADDR_FIFO_OUT_DATA_1: begin + rdata <= int_fifo_out[63:32]; + end + ADDR_LENGTH_R_IN_DATA_0: begin + rdata <= int_length_r_in[31:0]; + end + ADDR_LENGTH_R_OUT_DATA_0: begin + rdata <= int_length_r_out[31:0]; + end + endcase + end + end +end + + +//------------------------Register logic----------------- +assign interrupt = int_gie & (|int_isr); +assign ap_start = int_ap_start; +assign int_ap_idle = ap_idle; +assign int_ap_ready = ap_ready; +assign fifo_in = int_fifo_in; +assign fifo_out = int_fifo_out; +assign length_r_in = int_length_r_in; +assign length_r_out = int_length_r_out; +// int_ap_start +always @(posedge ACLK) begin + if (ARESET) + int_ap_start <= 1'b0; + else if (ACLK_EN) begin + if (w_hs && waddr == ADDR_AP_CTRL && WSTRB[0] && WDATA[0]) + int_ap_start <= 1'b1; + else if (int_ap_ready) + int_ap_start <= int_auto_restart; // clear on handshake/auto restart + end +end + +// int_ap_done +always @(posedge ACLK) begin + if (ARESET) + int_ap_done <= 1'b0; + else if (ACLK_EN) begin + if (ap_done) + int_ap_done <= 1'b1; + else if (ar_hs && raddr == ADDR_AP_CTRL) + int_ap_done <= 1'b0; // clear on read + end +end + +// int_auto_restart +always @(posedge ACLK) begin + if (ARESET) + int_auto_restart <= 1'b0; + else if (ACLK_EN) begin + if (w_hs && waddr == ADDR_AP_CTRL && WSTRB[0]) + int_auto_restart <= WDATA[7]; + end +end + +// int_gie +always @(posedge ACLK) begin + if (ARESET) + int_gie <= 1'b0; + else if (ACLK_EN) begin + if (w_hs && waddr == ADDR_GIE && WSTRB[0]) + int_gie <= WDATA[0]; + end +end + +// int_ier +always @(posedge ACLK) begin + if (ARESET) + int_ier <= 1'b0; + else if (ACLK_EN) begin + if (w_hs && waddr == ADDR_IER && WSTRB[0]) + int_ier <= WDATA[1:0]; + end +end + +// int_isr[0] +always @(posedge ACLK) begin + if (ARESET) + int_isr[0] <= 1'b0; + else if (ACLK_EN) begin + if (int_ier[0] & ap_done) + int_isr[0] <= 1'b1; + else if (w_hs && waddr == ADDR_ISR && WSTRB[0]) + int_isr[0] <= int_isr[0] ^ WDATA[0]; // toggle on write + end +end + +// int_isr[1] +always @(posedge ACLK) begin + if (ARESET) + int_isr[1] <= 1'b0; + else if (ACLK_EN) begin + if (int_ier[1] & ap_ready) + int_isr[1] <= 1'b1; + else if (w_hs && waddr == ADDR_ISR && WSTRB[0]) + int_isr[1] <= int_isr[1] ^ WDATA[1]; // toggle on write + end +end + +// int_fifo_in[31:0] +always @(posedge ACLK) begin + if (ARESET) + int_fifo_in[31:0] <= 0; + else if (ACLK_EN) begin + if (w_hs && waddr == ADDR_FIFO_IN_DATA_0) + int_fifo_in[31:0] <= (WDATA[31:0] & wmask) | (int_fifo_in[31:0] & ~wmask); + end +end + +// int_fifo_in[63:32] +always @(posedge ACLK) begin + if (ARESET) + int_fifo_in[63:32] <= 0; + else if (ACLK_EN) begin + if (w_hs && waddr == ADDR_FIFO_IN_DATA_1) + int_fifo_in[63:32] <= (WDATA[31:0] & wmask) | (int_fifo_in[63:32] & ~wmask); + end +end + +// int_fifo_out[31:0] +always @(posedge ACLK) begin + if (ARESET) + int_fifo_out[31:0] <= 0; + else if (ACLK_EN) begin + if (w_hs && waddr == ADDR_FIFO_OUT_DATA_0) + int_fifo_out[31:0] <= (WDATA[31:0] & wmask) | (int_fifo_out[31:0] & ~wmask); + end +end + +// int_fifo_out[63:32] +always @(posedge ACLK) begin + if (ARESET) + int_fifo_out[63:32] <= 0; + else if (ACLK_EN) begin + if (w_hs && waddr == ADDR_FIFO_OUT_DATA_1) + int_fifo_out[63:32] <= (WDATA[31:0] & wmask) | (int_fifo_out[63:32] & ~wmask); + end +end + +// int_length_r_in[31:0] +always @(posedge ACLK) begin + if (ARESET) + int_length_r_in[31:0] <= 0; + else if (ACLK_EN) begin + if (w_hs && waddr == ADDR_LENGTH_R_IN_DATA_0) + int_length_r_in[31:0] <= (WDATA[31:0] & wmask) | (int_length_r_in[31:0] & ~wmask); + end +end + + +// int_length_r_out[31:0] +always @(posedge ACLK) begin + if (ARESET) + int_length_r_out[31:0] <= 0; + else if (ACLK_EN) begin + if (w_hs && waddr == ADDR_LENGTH_R_OUT_DATA_0) + int_length_r_out[31:0] <= (WDATA[31:0] & wmask) | (int_length_r_out[31:0] & ~wmask); + end +end + + +//------------------------Memory logic------------------- + +endmodule diff --git a/hls4ml/templates/vivado_accelerator/alveo/krnl_rtl_src/krnl_rtl_counter.sv b/hls4ml/templates/vivado_accelerator/alveo/krnl_rtl_src/krnl_rtl_counter.sv new file mode 100644 index 0000000000..631cde7b09 --- /dev/null +++ b/hls4ml/templates/vivado_accelerator/alveo/krnl_rtl_src/krnl_rtl_counter.sv @@ -0,0 +1,88 @@ +/** +* Copyright (C) 2019-2021 Xilinx, Inc +* +* Licensed under the Apache License, Version 2.0 (the "License"). You may +* not use this file except in compliance with the License. A copy of the +* License is located at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +* License for the specific language governing permissions and limitations +* under the License. +*/ + +//----------------------------------------------------------------------------- +// Simple up/down counter with reset. +//----------------------------------------------------------------------------- +`default_nettype none +`timescale 1ps/1ps +module krnl_rtl_counter #( + parameter integer C_WIDTH = 4, + parameter [C_WIDTH-1:0] C_INIT = {C_WIDTH{1'b0}} +) +( + input wire clk, + input wire clken, + input wire rst, + input wire load, + input wire incr, + input wire decr, + input wire [C_WIDTH-1:0] load_value, + output wire [C_WIDTH-1:0] count, + output wire is_zero +); + + localparam [C_WIDTH-1:0] LP_ZERO = {C_WIDTH{1'b0}}; + localparam [C_WIDTH-1:0] LP_ONE = {{C_WIDTH-1{1'b0}},1'b1}; + localparam [C_WIDTH-1:0] LP_MAX = {C_WIDTH{1'b1}}; + + reg [C_WIDTH-1:0] count_r = C_INIT; + reg is_zero_r = (C_INIT == LP_ZERO); + + assign count = count_r; + + always @(posedge clk) begin + if (rst) begin + count_r <= C_INIT; + end + else if (clken) begin + if (load) begin + count_r <= load_value; + end + else if (incr & ~decr) begin + count_r <= count_r + 1'b1; + end + else if (~incr & decr) begin + count_r <= count_r - 1'b1; + end + else + count_r <= count_r; + end + end + + assign is_zero = is_zero_r; + + always @(posedge clk) begin + if (rst) begin + is_zero_r <= (C_INIT == LP_ZERO); + end + else if (clken) begin + if (load) begin + is_zero_r <= (load_value == LP_ZERO); + end + else begin + is_zero_r <= incr ^ decr ? (decr && (count_r == LP_ONE)) || (incr && (count_r == LP_MAX)) : is_zero_r; + end + end + else begin + is_zero_r <= is_zero_r; + end + end + + +endmodule : krnl_rtl_counter +`default_nettype wire + diff --git a/hls4ml/templates/vivado_accelerator/alveo/krnl_rtl_src/krnl_rtl_int.sv b/hls4ml/templates/vivado_accelerator/alveo/krnl_rtl_src/krnl_rtl_int.sv new file mode 100644 index 0000000000..63581799cb --- /dev/null +++ b/hls4ml/templates/vivado_accelerator/alveo/krnl_rtl_src/krnl_rtl_int.sv @@ -0,0 +1,415 @@ +/** +* Copyright (C) 2019-2021 Xilinx, Inc +* +* Licensed under the Apache License, Version 2.0 (the "License"). You may +* not use this file except in compliance with the License. A copy of the +* License is located at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +* License for the specific language governing permissions and limitations +* under the License. +*/ + +/////////////////////////////////////////////////////////////////////////////// +// Description: This is a example of how to create an RTL Kernel. The function +// of this module is to add two 32-bit values and produce a result. The values +// are read from one AXI4 memory mapped master, processed and then written out. +// +// Data flow: axi_read_master->fifo->adder->fifo->axi_write_master +/////////////////////////////////////////////////////////////////////////////// + +// default_nettype of none prevents implicit wire declaration. +`default_nettype none +`timescale 1 ns / 1 ps + +module krnl_rtl_int #( + parameter integer C_S_AXI_CONTROL_DATA_WIDTH = 32, + parameter integer C_S_AXI_CONTROL_ADDR_WIDTH = 6, + parameter integer C_M_AXI_GMEM_ID_WIDTH = 1, + parameter integer C_M_AXI_GMEM_ADDR_WIDTH = 64, + parameter integer C_M_AXI_GMEM_DATA_WIDTH = 32 +) +( + // System signals + input wire ap_clk, + input wire ap_rst_n, + // AXI4 master interface + output wire m_axi_gmem_AWVALID, + input wire m_axi_gmem_AWREADY, + output wire [C_M_AXI_GMEM_ADDR_WIDTH-1:0] m_axi_gmem_AWADDR, + output wire [C_M_AXI_GMEM_ID_WIDTH - 1:0] m_axi_gmem_AWID, + output wire [7:0] m_axi_gmem_AWLEN, + output wire [2:0] m_axi_gmem_AWSIZE, + // Tie-off AXI4 transaction options that are not being used. + output wire [1:0] m_axi_gmem_AWBURST, + output wire [1:0] m_axi_gmem_AWLOCK, + output wire [3:0] m_axi_gmem_AWCACHE, + output wire [2:0] m_axi_gmem_AWPROT, + output wire [3:0] m_axi_gmem_AWQOS, + output wire [3:0] m_axi_gmem_AWREGION, + output wire m_axi_gmem_WVALID, + input wire m_axi_gmem_WREADY, + output wire [C_M_AXI_GMEM_DATA_WIDTH-1:0] m_axi_gmem_WDATA, + output wire [C_M_AXI_GMEM_DATA_WIDTH/8-1:0] m_axi_gmem_WSTRB, + output wire m_axi_gmem_WLAST, + output wire m_axi_gmem_ARVALID, + input wire m_axi_gmem_ARREADY, + output wire [C_M_AXI_GMEM_ADDR_WIDTH-1:0] m_axi_gmem_ARADDR, + output wire [C_M_AXI_GMEM_ID_WIDTH-1:0] m_axi_gmem_ARID, + output wire [7:0] m_axi_gmem_ARLEN, + output wire [2:0] m_axi_gmem_ARSIZE, + output wire [1:0] m_axi_gmem_ARBURST, + output wire [1:0] m_axi_gmem_ARLOCK, + output wire [3:0] m_axi_gmem_ARCACHE, + output wire [2:0] m_axi_gmem_ARPROT, + output wire [3:0] m_axi_gmem_ARQOS, + output wire [3:0] m_axi_gmem_ARREGION, + input wire m_axi_gmem_RVALID, + output wire m_axi_gmem_RREADY, + input wire [C_M_AXI_GMEM_DATA_WIDTH - 1:0] m_axi_gmem_RDATA, + input wire m_axi_gmem_RLAST, + input wire [C_M_AXI_GMEM_ID_WIDTH - 1:0] m_axi_gmem_RID, + input wire [1:0] m_axi_gmem_RRESP, + input wire m_axi_gmem_BVALID, + output wire m_axi_gmem_BREADY, + input wire [1:0] m_axi_gmem_BRESP, + input wire [C_M_AXI_GMEM_ID_WIDTH - 1:0] m_axi_gmem_BID, + + // AXI4-Lite slave interface + input wire s_axi_control_AWVALID, + output wire s_axi_control_AWREADY, + input wire [C_S_AXI_CONTROL_ADDR_WIDTH-1:0] s_axi_control_AWADDR, + input wire s_axi_control_WVALID, + output wire s_axi_control_WREADY, + input wire [C_S_AXI_CONTROL_DATA_WIDTH-1:0] s_axi_control_WDATA, + input wire [C_S_AXI_CONTROL_DATA_WIDTH/8-1:0] s_axi_control_WSTRB, + input wire s_axi_control_ARVALID, + output wire s_axi_control_ARREADY, + input wire [C_S_AXI_CONTROL_ADDR_WIDTH-1:0] s_axi_control_ARADDR, + output wire s_axi_control_RVALID, + input wire s_axi_control_RREADY, + output wire [C_S_AXI_CONTROL_DATA_WIDTH-1:0] s_axi_control_RDATA, + output wire [1:0] s_axi_control_RRESP, + output wire s_axi_control_BVALID, + input wire s_axi_control_BREADY, + output wire [1:0] s_axi_control_BRESP, + output wire interrupt +); +/////////////////////////////////////////////////////////////////////////////// +// Local Parameters (constants) +/////////////////////////////////////////////////////////////////////////////// +localparam integer LP_NUM_READ_CHANNELS = 1; +localparam integer LP_LENGTH_WIDTH = 32; +localparam integer LP_DW_BYTES = C_M_AXI_GMEM_DATA_WIDTH/8; +localparam integer LP_AXI_BURST_LEN = 4096/LP_DW_BYTES < 256 ? 4096/LP_DW_BYTES : 256; +localparam integer LP_LOG_BURST_LEN = $clog2(LP_AXI_BURST_LEN); +localparam integer LP_RD_MAX_OUTSTANDING = 3; +localparam integer LP_RD_FIFO_DEPTH = LP_AXI_BURST_LEN*(LP_RD_MAX_OUTSTANDING + 1); +localparam integer LP_WR_FIFO_DEPTH = LP_AXI_BURST_LEN; + + +/////////////////////////////////////////////////////////////////////////////// +// Variables +/////////////////////////////////////////////////////////////////////////////// +logic areset = 1'b0; +logic ap_start; +logic ap_start_pulse; +logic ap_start_r; +logic ap_ready; +logic ap_done; +logic ap_idle = 1'b1; +logic [C_M_AXI_GMEM_ADDR_WIDTH-1:0] fifo_in; +logic [C_M_AXI_GMEM_ADDR_WIDTH-1:0] fifo_out; +logic [LP_LENGTH_WIDTH-1:0] length_r_in; +logic [LP_LENGTH_WIDTH-1:0] length_r_out; + +logic read_done; +logic [LP_NUM_READ_CHANNELS-1:0] rd_tvalid; +logic [LP_NUM_READ_CHANNELS-1:0] rd_tready_n; +logic [LP_NUM_READ_CHANNELS-1:0] [C_M_AXI_GMEM_DATA_WIDTH-1:0] rd_tdata; +logic [LP_NUM_READ_CHANNELS-1:0] rd_tlast; +logic [LP_NUM_READ_CHANNELS-1:0] ctrl_rd_fifo_prog_full; +logic [LP_NUM_READ_CHANNELS-1:0] rd_fifo_tvalid_n; +logic [LP_NUM_READ_CHANNELS-1:0] rd_fifo_tready; +logic [LP_NUM_READ_CHANNELS-1:0] [C_M_AXI_GMEM_DATA_WIDTH-1:0] rd_fifo_tdata; +logic [LP_NUM_READ_CHANNELS-1:0] rd_fifo_tlast; + +logic NN_inf_tvalid; +logic NN_inf_tready_n; +logic [C_M_AXI_GMEM_DATA_WIDTH-1:0] NN_inf_tdata; +logic wr_fifo_tvalid_n; +logic wr_fifo_tready; +logic [C_M_AXI_GMEM_DATA_WIDTH-1:0] wr_fifo_tdata; + +/////////////////////////////////////////////////////////////////////////////// +// RTL Logic +/////////////////////////////////////////////////////////////////////////////// +// Tie-off unused AXI protocol features +assign m_axi_gmem_AWID = {C_M_AXI_GMEM_ID_WIDTH{1'b0}}; +assign m_axi_gmem_AWBURST = 2'b01; +assign m_axi_gmem_AWLOCK = 2'b00; +assign m_axi_gmem_AWCACHE = 4'b0011; +assign m_axi_gmem_AWPROT = 3'b000; +assign m_axi_gmem_AWQOS = 4'b0000; +assign m_axi_gmem_AWREGION = 4'b0000; +assign m_axi_gmem_ARBURST = 2'b01; +assign m_axi_gmem_ARLOCK = 2'b00; +assign m_axi_gmem_ARCACHE = 4'b0011; +assign m_axi_gmem_ARPROT = 3'b000; +assign m_axi_gmem_ARQOS = 4'b0000; +assign m_axi_gmem_ARREGION = 4'b0000; + +// Register and invert reset signal for better timing. +always @(posedge ap_clk) begin + areset <= ~ap_rst_n; +end + +// create pulse when ap_start transitions to 1 +always @(posedge ap_clk) begin + begin + ap_start_r <= ap_start; + end +end + +assign ap_start_pulse = ap_start & ~ap_start_r; + +// ap_idle is asserted when done is asserted, it is de-asserted when ap_start_pulse +// is asserted +always @(posedge ap_clk) begin + if (areset) begin + ap_idle <= 1'b1; + end + else begin + ap_idle <= ap_done ? 1'b1 : + ap_start_pulse ? 1'b0 : + ap_idle; + end +end + +assign ap_ready = ap_done; + +// AXI4-Lite slave +krnl_rtl_control_s_axi #( + .C_S_AXI_ADDR_WIDTH( C_S_AXI_CONTROL_ADDR_WIDTH ), + .C_S_AXI_DATA_WIDTH( C_S_AXI_CONTROL_DATA_WIDTH ) +) +inst_krnl_control_s_axi ( + .AWVALID ( s_axi_control_AWVALID ) , + .AWREADY ( s_axi_control_AWREADY ) , + .AWADDR ( s_axi_control_AWADDR ) , + .WVALID ( s_axi_control_WVALID ) , + .WREADY ( s_axi_control_WREADY ) , + .WDATA ( s_axi_control_WDATA ) , + .WSTRB ( s_axi_control_WSTRB ) , + .ARVALID ( s_axi_control_ARVALID ) , + .ARREADY ( s_axi_control_ARREADY ) , + .ARADDR ( s_axi_control_ARADDR ) , + .RVALID ( s_axi_control_RVALID ) , + .RREADY ( s_axi_control_RREADY ) , + .RDATA ( s_axi_control_RDATA ) , + .RRESP ( s_axi_control_RRESP ) , + .BVALID ( s_axi_control_BVALID ) , + .BREADY ( s_axi_control_BREADY ) , + .BRESP ( s_axi_control_BRESP ) , + .ACLK ( ap_clk ) , + .ARESET ( areset ) , + .ACLK_EN ( 1'b1 ) , + .ap_start ( ap_start ) , + .interrupt ( interrupt ) , + .ap_ready ( ap_ready ) , + .ap_done ( ap_done ) , + .ap_idle ( ap_idle ) , + .fifo_in ( fifo_in[0+:C_M_AXI_GMEM_ADDR_WIDTH] ) , + .fifo_out ( fifo_out[0+:C_M_AXI_GMEM_ADDR_WIDTH] ) , + .length_r_in ( length_r_in[0+:LP_LENGTH_WIDTH] ) , + .length_r_out ( length_r_out[0+:LP_LENGTH_WIDTH] ) +); + +// AXI4 Read Master +krnl_rtl_axi_read_master #( + .C_ADDR_WIDTH ( C_M_AXI_GMEM_ADDR_WIDTH ) , + .C_DATA_WIDTH ( C_M_AXI_GMEM_DATA_WIDTH ) , + .C_ID_WIDTH ( C_M_AXI_GMEM_ID_WIDTH ) , + .C_NUM_CHANNELS ( LP_NUM_READ_CHANNELS ) , + .C_LENGTH_WIDTH ( LP_LENGTH_WIDTH ) , + .C_BURST_LEN ( LP_AXI_BURST_LEN ) , + .C_LOG_BURST_LEN ( LP_LOG_BURST_LEN ) , + .C_MAX_OUTSTANDING ( LP_RD_MAX_OUTSTANDING ) +) +inst_axi_read_master ( + .aclk ( ap_clk ) , + .areset ( areset ) , + + .ctrl_start ( ap_start_pulse ) , + .ctrl_done ( read_done ) , + .ctrl_offset ( fifo_in ) , + .ctrl_length ( length_r_in ) , + .ctrl_prog_full ( ctrl_rd_fifo_prog_full ) , + + .arvalid ( m_axi_gmem_ARVALID ) , + .arready ( m_axi_gmem_ARREADY ) , + .araddr ( m_axi_gmem_ARADDR ) , + .arid ( m_axi_gmem_ARID ) , + .arlen ( m_axi_gmem_ARLEN ) , + .arsize ( m_axi_gmem_ARSIZE ) , + .rvalid ( m_axi_gmem_RVALID ) , + .rready ( m_axi_gmem_RREADY ) , + .rdata ( m_axi_gmem_RDATA ) , + .rlast ( m_axi_gmem_RLAST ) , + .rid ( m_axi_gmem_RID ) , + .rresp ( m_axi_gmem_RRESP ) , + + .m_tvalid ( rd_tvalid ) , + .m_tready ( ~rd_tready_n ) , + .m_tdata ( rd_tdata ) , + .m_tlast ( rd_tlast ) +); + +// xpm_fifo_sync: Synchronous FIFO +// Xilinx Parameterized Macro, Version 2016.4 +xpm_fifo_sync # ( + .FIFO_MEMORY_TYPE ("auto"), //string; "auto", "block", "distributed", or "ultra"; + .ECC_MODE ("no_ecc"), //string; "no_ecc" or "en_ecc"; + .FIFO_WRITE_DEPTH (LP_RD_FIFO_DEPTH), //positive integer + .WRITE_DATA_WIDTH (C_M_AXI_GMEM_DATA_WIDTH+1), //positive integer + .WR_DATA_COUNT_WIDTH ($clog2(LP_RD_FIFO_DEPTH)+1), //positive integer, Not used + .PROG_FULL_THRESH (LP_AXI_BURST_LEN-2), //positive integer + .FULL_RESET_VALUE (1), //positive integer; 0 or 1 + .READ_MODE ("fwft"), //string; "std" or "fwft"; + .FIFO_READ_LATENCY (1), //positive integer; + .READ_DATA_WIDTH (C_M_AXI_GMEM_DATA_WIDTH+1), //positive integer + .RD_DATA_COUNT_WIDTH ($clog2(LP_RD_FIFO_DEPTH)+1), //positive integer, not used + .PROG_EMPTY_THRESH (10), //positive integer, not used + .DOUT_RESET_VALUE ("0"), //string, don't care + .WAKEUP_TIME (0) //positive integer; 0 or 2; + +) inst_rd_xpm_fifo_sync[LP_NUM_READ_CHANNELS-1:0] ( + .sleep ( 1'b0 ) , + .rst ( areset ) , + .wr_clk ( ap_clk ) , + .wr_en ( rd_tvalid ) , + .din ( {rd_tlast,rd_tdata} ) , + .full ( rd_tready_n ) , + .prog_full ( ctrl_rd_fifo_prog_full) , + .wr_data_count ( ) , + .overflow ( ) , + .wr_rst_busy ( ) , + .rd_en ( rd_fifo_tready ) , + .dout ( {rd_fifo_tlast,rd_fifo_tdata} ) , + .empty ( rd_fifo_tvalid_n ) , + .prog_empty ( ) , + .rd_data_count ( ) , + .underflow ( ) , + .rd_rst_busy ( ) , + .injectsbiterr ( 1'b0 ) , + .injectdbiterr ( 1'b0 ) , + .sbiterr ( ) , + .dbiterr ( ) + +); + +// NN inference +myproject_axi_0 #() +hls4ml_IP ( + .ap_clk ( ap_clk ) , + .ap_rst_n ( ap_rst_n ) , + + .in_r_TVALID ( ~rd_fifo_tvalid_n ) , + .in_r_TREADY ( rd_fifo_tready ) , + .in_r_TDATA ( rd_fifo_tdata ) , + .in_r_TLAST ( rd_fifo_tlast ) , + + .out_r_TVALID ( NN_inf_tvalid ) , + .out_r_TREADY ( ~NN_inf_tready_n ) , + .out_r_TDATA ( NN_inf_tdata ) +); + +// xpm_fifo_sync: Synchronous FIFO +// Xilinx Parameterized Macro, Version 2016.4 +xpm_fifo_sync # ( + .FIFO_MEMORY_TYPE ("auto"), //string; "auto", "block", "distributed", or "ultra"; + .ECC_MODE ("no_ecc"), //string; "no_ecc" or "en_ecc"; + .FIFO_WRITE_DEPTH (LP_WR_FIFO_DEPTH), //positive integer + .WRITE_DATA_WIDTH (C_M_AXI_GMEM_DATA_WIDTH), //positive integer + .WR_DATA_COUNT_WIDTH ($clog2(LP_WR_FIFO_DEPTH)), //positive integer, Not used + .PROG_FULL_THRESH (10), //positive integer, Not used + .FULL_RESET_VALUE (1), //positive integer; 0 or 1 + .READ_MODE ("fwft"), //string; "std" or "fwft"; + .FIFO_READ_LATENCY (1), //positive integer; + .READ_DATA_WIDTH (C_M_AXI_GMEM_DATA_WIDTH), //positive integer + .RD_DATA_COUNT_WIDTH ($clog2(LP_WR_FIFO_DEPTH)), //positive integer, not used + .PROG_EMPTY_THRESH (10), //positive integer, not used + .DOUT_RESET_VALUE ("0"), //string, don't care + .WAKEUP_TIME (0) //positive integer; 0 or 2; + +) inst_wr_xpm_fifo_sync ( + .sleep ( 1'b0 ) , + .rst ( areset ) , + .wr_clk ( ap_clk ) , + .wr_en ( NN_inf_tvalid ) , + .din ( NN_inf_tdata ) , + .full ( NN_inf_tready_n ) , + .prog_full ( ) , + .wr_data_count ( ) , + .overflow ( ) , + .wr_rst_busy ( ) , + .rd_en ( wr_fifo_tready ) , + .dout ( wr_fifo_tdata ) , + .empty ( wr_fifo_tvalid_n ) , + .prog_empty ( ) , + .rd_data_count ( ) , + .underflow ( ) , + .rd_rst_busy ( ) , + .injectsbiterr ( 1'b0 ) , + .injectdbiterr ( 1'b0 ) , + .sbiterr ( ) , + .dbiterr ( ) + +); + + +// AXI4 Write Master +krnl_rtl_axi_write_master #( + .C_ADDR_WIDTH ( C_M_AXI_GMEM_ADDR_WIDTH ) , + .C_DATA_WIDTH ( C_M_AXI_GMEM_DATA_WIDTH ) , + .C_MAX_LENGTH_WIDTH ( LP_LENGTH_WIDTH ) , + .C_BURST_LEN ( LP_AXI_BURST_LEN ) , + .C_LOG_BURST_LEN ( LP_LOG_BURST_LEN ) +) +inst_axi_write_master ( + .aclk ( ap_clk ) , + .areset ( areset ) , + + .ctrl_start ( ap_start_pulse ) , + .ctrl_offset ( fifo_out ) , + .ctrl_length ( length_r_out ) , + .ctrl_done ( ap_done ) , + + .awvalid ( m_axi_gmem_AWVALID ) , + .awready ( m_axi_gmem_AWREADY ) , + .awaddr ( m_axi_gmem_AWADDR ) , + .awlen ( m_axi_gmem_AWLEN ) , + .awsize ( m_axi_gmem_AWSIZE ) , + + .s_tvalid ( ~wr_fifo_tvalid_n ) , + .s_tready ( wr_fifo_tready ) , + .s_tdata ( wr_fifo_tdata ) , + + .wvalid ( m_axi_gmem_WVALID ) , + .wready ( m_axi_gmem_WREADY ) , + .wdata ( m_axi_gmem_WDATA ) , + .wstrb ( m_axi_gmem_WSTRB ) , + .wlast ( m_axi_gmem_WLAST ) , + + .bvalid ( m_axi_gmem_BVALID ) , + .bready ( m_axi_gmem_BREADY ) , + .bresp ( m_axi_gmem_BRESP ) +); + +endmodule : krnl_rtl_int + +`default_nettype wire diff --git a/hls4ml/templates/vivado_accelerator/alveo/krnl_rtl_src/myproject_kernel.v b/hls4ml/templates/vivado_accelerator/alveo/krnl_rtl_src/myproject_kernel.v new file mode 100644 index 0000000000..0d5dc71d62 --- /dev/null +++ b/hls4ml/templates/vivado_accelerator/alveo/krnl_rtl_src/myproject_kernel.v @@ -0,0 +1,170 @@ +/** +* Copyright (C) 2019-2021 Xilinx, Inc +* +* Licensed under the Apache License, Version 2.0 (the "License"). You may +* not use this file except in compliance with the License. A copy of the +* License is located at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +* License for the specific language governing permissions and limitations +* under the License. +*/ + +/////////////////////////////////////////////////////////////////////////////// +// Description: This is a wrapper of module "krnl_rtl_int" +/////////////////////////////////////////////////////////////////////////////// + +// default_nettype of none prevents implicit wire declaration. +`default_nettype none +`timescale 1 ns / 1 ps + +module krnl_rtl #( + parameter integer C_S_AXI_CONTROL_DATA_WIDTH = 32, + parameter integer C_S_AXI_CONTROL_ADDR_WIDTH = 6, + parameter integer C_M_AXI_GMEM_ID_WIDTH = 1, + parameter integer C_M_AXI_GMEM_ADDR_WIDTH = 64, + parameter integer C_M_AXI_GMEM_DATA_WIDTH = 32 +) +( + // System signals + input wire ap_clk, + input wire ap_rst_n, + // AXI4 master interface + output wire m_axi_gmem_AWVALID, + input wire m_axi_gmem_AWREADY, + output wire [C_M_AXI_GMEM_ADDR_WIDTH-1:0] m_axi_gmem_AWADDR, + output wire [C_M_AXI_GMEM_ID_WIDTH - 1:0] m_axi_gmem_AWID, + output wire [7:0] m_axi_gmem_AWLEN, + output wire [2:0] m_axi_gmem_AWSIZE, + // Tie-off AXI4 transaction options that are not being used. + output wire [1:0] m_axi_gmem_AWBURST, + output wire [1:0] m_axi_gmem_AWLOCK, + output wire [3:0] m_axi_gmem_AWCACHE, + output wire [2:0] m_axi_gmem_AWPROT, + output wire [3:0] m_axi_gmem_AWQOS, + output wire [3:0] m_axi_gmem_AWREGION, + output wire m_axi_gmem_WVALID, + input wire m_axi_gmem_WREADY, + output wire [C_M_AXI_GMEM_DATA_WIDTH-1:0] m_axi_gmem_WDATA, + output wire [C_M_AXI_GMEM_DATA_WIDTH/8-1:0] m_axi_gmem_WSTRB, + output wire m_axi_gmem_WLAST, + output wire m_axi_gmem_ARVALID, + input wire m_axi_gmem_ARREADY, + output wire [C_M_AXI_GMEM_ADDR_WIDTH-1:0] m_axi_gmem_ARADDR, + output wire [C_M_AXI_GMEM_ID_WIDTH-1:0] m_axi_gmem_ARID, + output wire [7:0] m_axi_gmem_ARLEN, + output wire [2:0] m_axi_gmem_ARSIZE, + output wire [1:0] m_axi_gmem_ARBURST, + output wire [1:0] m_axi_gmem_ARLOCK, + output wire [3:0] m_axi_gmem_ARCACHE, + output wire [2:0] m_axi_gmem_ARPROT, + output wire [3:0] m_axi_gmem_ARQOS, + output wire [3:0] m_axi_gmem_ARREGION, + input wire m_axi_gmem_RVALID, + output wire m_axi_gmem_RREADY, + input wire [C_M_AXI_GMEM_DATA_WIDTH - 1:0] m_axi_gmem_RDATA, + input wire m_axi_gmem_RLAST, + input wire [C_M_AXI_GMEM_ID_WIDTH - 1:0] m_axi_gmem_RID, + input wire [1:0] m_axi_gmem_RRESP, + input wire m_axi_gmem_BVALID, + output wire m_axi_gmem_BREADY, + input wire [1:0] m_axi_gmem_BRESP, + input wire [C_M_AXI_GMEM_ID_WIDTH - 1:0] m_axi_gmem_BID, + + // AXI4-Lite slave interface + input wire s_axi_control_AWVALID, + output wire s_axi_control_AWREADY, + input wire [C_S_AXI_CONTROL_ADDR_WIDTH-1:0] s_axi_control_AWADDR, + input wire s_axi_control_WVALID, + output wire s_axi_control_WREADY, + input wire [C_S_AXI_CONTROL_DATA_WIDTH-1:0] s_axi_control_WDATA, + input wire [C_S_AXI_CONTROL_DATA_WIDTH/8-1:0] s_axi_control_WSTRB, + input wire s_axi_control_ARVALID, + output wire s_axi_control_ARREADY, + input wire [C_S_AXI_CONTROL_ADDR_WIDTH-1:0] s_axi_control_ARADDR, + output wire s_axi_control_RVALID, + input wire s_axi_control_RREADY, + output wire [C_S_AXI_CONTROL_DATA_WIDTH-1:0] s_axi_control_RDATA, + output wire [1:0] s_axi_control_RRESP, + output wire s_axi_control_BVALID, + input wire s_axi_control_BREADY, + output wire [1:0] s_axi_control_BRESP, + output wire interrupt +); + +krnl_rtl_int #( + .C_S_AXI_CONTROL_DATA_WIDTH ( C_S_AXI_CONTROL_DATA_WIDTH ), + .C_S_AXI_CONTROL_ADDR_WIDTH ( C_S_AXI_CONTROL_ADDR_WIDTH ), + .C_M_AXI_GMEM_ID_WIDTH ( C_M_AXI_GMEM_ID_WIDTH ), + .C_M_AXI_GMEM_ADDR_WIDTH ( C_M_AXI_GMEM_ADDR_WIDTH ), + .C_M_AXI_GMEM_DATA_WIDTH ( C_M_AXI_GMEM_DATA_WIDTH ) +) +inst_krnl_rtl_int ( + .ap_clk ( ap_clk ), + .ap_rst_n ( ap_rst_n ), + .m_axi_gmem_AWVALID ( m_axi_gmem_AWVALID ), + .m_axi_gmem_AWREADY ( m_axi_gmem_AWREADY ), + .m_axi_gmem_AWADDR ( m_axi_gmem_AWADDR ), + .m_axi_gmem_AWID ( m_axi_gmem_AWID ), + .m_axi_gmem_AWLEN ( m_axi_gmem_AWLEN ), + .m_axi_gmem_AWSIZE ( m_axi_gmem_AWSIZE ), + .m_axi_gmem_AWBURST ( m_axi_gmem_AWBURST ), + .m_axi_gmem_AWLOCK ( m_axi_gmem_AWLOCK ), + .m_axi_gmem_AWCACHE ( m_axi_gmem_AWCACHE ), + .m_axi_gmem_AWPROT ( m_axi_gmem_AWPROT ), + .m_axi_gmem_AWQOS ( m_axi_gmem_AWQOS ), + .m_axi_gmem_AWREGION ( m_axi_gmem_AWREGION ), + .m_axi_gmem_WVALID ( m_axi_gmem_WVALID ), + .m_axi_gmem_WREADY ( m_axi_gmem_WREADY ), + .m_axi_gmem_WDATA ( m_axi_gmem_WDATA ), + .m_axi_gmem_WSTRB ( m_axi_gmem_WSTRB ), + .m_axi_gmem_WLAST ( m_axi_gmem_WLAST ), + .m_axi_gmem_ARVALID ( m_axi_gmem_ARVALID ), + .m_axi_gmem_ARREADY ( m_axi_gmem_ARREADY ), + .m_axi_gmem_ARADDR ( m_axi_gmem_ARADDR ), + .m_axi_gmem_ARID ( m_axi_gmem_ARID ), + .m_axi_gmem_ARLEN ( m_axi_gmem_ARLEN ), + .m_axi_gmem_ARSIZE ( m_axi_gmem_ARSIZE ), + .m_axi_gmem_ARBURST ( m_axi_gmem_ARBURST ), + .m_axi_gmem_ARLOCK ( m_axi_gmem_ARLOCK ), + .m_axi_gmem_ARCACHE ( m_axi_gmem_ARCACHE ), + .m_axi_gmem_ARPROT ( m_axi_gmem_ARPROT ), + .m_axi_gmem_ARQOS ( m_axi_gmem_ARQOS ), + .m_axi_gmem_ARREGION ( m_axi_gmem_ARREGION ), + .m_axi_gmem_RVALID ( m_axi_gmem_RVALID ), + .m_axi_gmem_RREADY ( m_axi_gmem_RREADY ), + .m_axi_gmem_RDATA ( m_axi_gmem_RDATA ), + .m_axi_gmem_RLAST ( m_axi_gmem_RLAST ), + .m_axi_gmem_RID ( m_axi_gmem_RID ), + .m_axi_gmem_RRESP ( m_axi_gmem_RRESP ), + .m_axi_gmem_BVALID ( m_axi_gmem_BVALID ), + .m_axi_gmem_BREADY ( m_axi_gmem_BREADY ), + .m_axi_gmem_BRESP ( m_axi_gmem_BRESP ), + .m_axi_gmem_BID ( m_axi_gmem_BID ), + .s_axi_control_AWVALID ( s_axi_control_AWVALID ), + .s_axi_control_AWREADY ( s_axi_control_AWREADY ), + .s_axi_control_AWADDR ( s_axi_control_AWADDR ), + .s_axi_control_WVALID ( s_axi_control_WVALID ), + .s_axi_control_WREADY ( s_axi_control_WREADY ), + .s_axi_control_WDATA ( s_axi_control_WDATA ), + .s_axi_control_WSTRB ( s_axi_control_WSTRB ), + .s_axi_control_ARVALID ( s_axi_control_ARVALID ), + .s_axi_control_ARREADY ( s_axi_control_ARREADY ), + .s_axi_control_ARADDR ( s_axi_control_ARADDR ), + .s_axi_control_RVALID ( s_axi_control_RVALID ), + .s_axi_control_RREADY ( s_axi_control_RREADY ), + .s_axi_control_RDATA ( s_axi_control_RDATA ), + .s_axi_control_RRESP ( s_axi_control_RRESP ), + .s_axi_control_BVALID ( s_axi_control_BVALID ), + .s_axi_control_BREADY ( s_axi_control_BREADY ), + .s_axi_control_BRESP ( s_axi_control_BRESP ), + .interrupt ( interrupt ) +); +endmodule : krnl_rtl + +`default_nettype wire + diff --git a/hls4ml/templates/vivado_accelerator/alveo/python_drivers/axi_stream_driver.py b/hls4ml/templates/vivado_accelerator/alveo/python_drivers/axi_stream_driver.py new file mode 100644 index 0000000000..2c220df3fe --- /dev/null +++ b/hls4ml/templates/vivado_accelerator/alveo/python_drivers/axi_stream_driver.py @@ -0,0 +1,108 @@ +from datetime import datetime + +import numpy as np +from pynq import Overlay +from pynq import allocate + + +class NeuralNetworkOverlay(Overlay): + def __init__(self, xclbin_name, dtbo=None, download=True, ignore_version=False, device=None): + + super().__init__(xclbin_name, dtbo=dtbo, download=download, ignore_version=ignore_version, device=device) + self.input_buffer=None + self.output_buffer=None + + def allocate_mem(self, X_shape, y_shape, dtype=np.float32, trg_in=None, trg_out=None): + """ + Buffer allocation in the card memory + Parameters + ---------- + X_shape : input buffer shape. + y_shape : output buffer shape. + dtype : the data type of the elements of the input/output vectors. + Note: it should be set depending on the interface of the accelerator; if it uses 'float' + types for the 'data' AXI-Stream field, 'np.float32' dtype is the correct one to use. + Instead if it uses 'ap_fixed', 'np.intA' is the correct one to use (note that A cannot + any integer value, but it can assume {..., 8, 16, 32, ...} values. Check `numpy` + doc for more info). + In this case the encoding/decoding has to be computed by the host machine. For example for + 'ap_fixed<16,6>' type the following 2 functions are the correct one to use for encode/decode + 'float' -> 'ap_fixed<16,6>': + ``` + def encode(xi): + return np.int16(round(xi * 2**10)) # note 2**10 = 2**(A-B) + def decode(yi): + return yi * 2**-10 + encode_v = np.vectorize(encode) # to apply them element-wise + decode_v = np.vectorize(decode) + ``` + trg_in : input buffer target memory. By default the v++ command + set it to HBM[0] for alveo-u50. + trg_out : output buffer target memory.By default the v++ command + set it to HBM[0] for alveo-u50. + + Assigns + ------- + input_buffer : input PYNQ buffer, must be allocated first and just once. + output_buffer : output PYNQ buffer, must be allocated first and just once. + input_buffer, output_buffer : input and output PYNQ buffers + + """ + self.input_buffer = allocate(shape=X_shape, dtype=dtype, target=trg_in ) + self.output_buffer = allocate(shape=y_shape, dtype=dtype, target=trg_out) + + def predict(self, X, y_shape, dtype=np.float32, debug=None, profile=False, encode=None, + decode=None): + """ + Obtain the predictions of the NN implemented in the FPGA. + Parameters: + - X : the input vector. Should be numpy ndarray. + - y_shape : the shape of the output vector. Needed to the accelerator to set the TLAST bit properly and + for sizing the output vector shape. + - dtype : the data type of the elements of the input/output vectors. + - debug : boolean, if set the function will print information about the data transfers status. + - profile : boolean. Set it to `True` to print the performance of the algorithm in term of `inference/s`. + - encode/decode: function pointers. See `dtype` section for more information. + - return: an output array based on `np.ndarray` with a shape equal to `y_shape` and a `dtype` equal to + the namesake parameter. + """ + self.allocate_mem(X_shape=X.shape, y_shape=y_shape, dtype=dtype) + if profile: + timea = datetime.now() + if encode is not None: + X = encode(X) + in_size = np.prod(X.shape) + out_size = np.prod(y_shape) + self.input_buffer[:] = X + self.input_buffer.sync_to_device() + if debug: + print("Send OK") + self.krnl_rtl_1.call(self.input_buffer, self.output_buffer, in_size, out_size) + if debug: + print("Kernel call OK") + self.output_buffer.sync_from_device() + if debug: + print("Recieve OK") + result = self.output_buffer.copy() + if profile: + timeb = datetime.now() + dts, rate = self._print_dt(timea, timeb, len(X)) + self.input_buffer.flush() + self.output_buffer.flush() + self.free() + return result, dts, rate + self.input_buffer.flush() + self.output_buffer.flush() + return result + + def free_overlay(self): + self.free() + + def _print_dt(self, timea, timeb, N): + dt = (timeb - timea) + dts = dt.seconds + dt.microseconds * 10 ** -6 + rate = N / dts + print("Classified {} samples in {} seconds ({} inferences / s)".format(N, dts, rate)) + print("Or {} us / inferences".format(1 / rate * 1e6)) + return dts, rate + diff --git a/hls4ml/templates/vivado_accelerator/alveo/tcl_scripts/axi_stream_design.tcl b/hls4ml/templates/vivado_accelerator/alveo/tcl_scripts/axi_stream_design.tcl new file mode 100644 index 0000000000..2970100e25 --- /dev/null +++ b/hls4ml/templates/vivado_accelerator/alveo/tcl_scripts/axi_stream_design.tcl @@ -0,0 +1,109 @@ +set tcldir [file dirname [info script]] +source [file join $tcldir project.tcl] + +create_project project_1 ${myproject}_vivado_accelerator -part ${part} -force + +set_property ip_repo_paths ${myproject}_prj [current_project] +update_ip_catalog + + +add_files -scan_for_includes {src/krnl_rtl_int.sv src/krnl_rtl_axi_read_master.sv src/krnl_rtl_counter.sv src/myproject_kernel.v src/krnl_rtl_axi_write_master.sv src/krnl_rtl_control_s_axi.v} +import_files {src/krnl_rtl_int.sv src/krnl_rtl_axi_read_master.sv src/krnl_rtl_counter.sv src/myproject_kernel.v src/krnl_rtl_axi_write_master.sv src/krnl_rtl_control_s_axi.v} + + + +create_ip -vlnv xilinx.com:hls:${myproject}_axi:1.0 -module_name myproject_axi_0 + + +ipx::package_project -root_dir hls4ml_IP -vendor fastmachinelearning.org -library hls4ml -taxonomy /UserIP -import_files -set_current false +ipx::unload_core hls4ml_IP/component.xml +ipx::edit_ip_in_project -upgrade true -name tmp_edit_project -directory hls4ml_IP hls4ml_IP/component.xml +ipx::associate_bus_interfaces -busif m_axi_gmem -clock ap_clk [ipx::current_core] +ipx::associate_bus_interfaces -busif s_axi_control -clock ap_clk [ipx::current_core] +ipx::add_bus_parameter FREQ_HZ [ipx::get_bus_interfaces ap_clk -of_objects [ipx::current_core]] + + + +set_property value_resolve_type user [ipx::get_bus_parameters -of [::ipx::get_bus_interfaces -of [ipx::current_core] *clk*] "FREQ_HZ"] + + + +ipx::add_register CTRL [ipx::get_address_blocks reg0 -of_objects [ipx::get_memory_maps s_axi_control -of_objects [ipx::current_core]]] +ipx::add_register GIER [ipx::get_address_blocks reg0 -of_objects [ipx::get_memory_maps s_axi_control -of_objects [ipx::current_core]]] +ipx::add_register IP_IER [ipx::get_address_blocks reg0 -of_objects [ipx::get_memory_maps s_axi_control -of_objects [ipx::current_core]]] +ipx::add_register IP_ISR [ipx::get_address_blocks reg0 -of_objects [ipx::get_memory_maps s_axi_control -of_objects [ipx::current_core]]] +ipx::add_register fifo_in [ipx::get_address_blocks reg0 -of_objects [ipx::get_memory_maps s_axi_control -of_objects [ipx::current_core]]] +ipx::add_register fifo_out [ipx::get_address_blocks reg0 -of_objects [ipx::get_memory_maps s_axi_control -of_objects [ipx::current_core]]] +ipx::add_register length_r_in [ipx::get_address_blocks reg0 -of_objects [ipx::get_memory_maps s_axi_control -of_objects [ipx::current_core]]] +ipx::add_register length_r_out [ipx::get_address_blocks reg0 -of_objects [ipx::get_memory_maps s_axi_control -of_objects [ipx::current_core]]] + + +# Commands to set the descrtiprion, address offset and size + +# CTRL register properties +set_property Description "Control Signals" [ipx::get_registers CTRL -of_objects [ipx::get_address_blocks reg0 -of_objects [ipx::get_memory_maps s_axi_control -of_objects [ipx::current_core]]]] +set_property Address_Offset 0x000 [ipx::get_registers CTRL -of_objects [ipx::get_address_blocks reg0 -of_objects [ipx::get_memory_maps s_axi_control -of_objects [ipx::current_core]]]] +set_property Size 32 [ipx::get_registers CTRL -of_objects [ipx::get_address_blocks reg0 -of_objects [ipx::get_memory_maps s_axi_control -of_objects [ipx::current_core]]]] + +# GIER register properties +set_property Description "Global Interrupt Enable Register" [ipx::get_registers GIER -of_objects [ipx::get_address_blocks reg0 -of_objects [ipx::get_memory_maps s_axi_control -of_objects [ipx::current_core]]]] +set_property Address_Offset 0x004 [ipx::get_registers GIER -of_objects [ipx::get_address_blocks reg0 -of_objects [ipx::get_memory_maps s_axi_control -of_objects [ipx::current_core]]]] +set_property Size 32 [ipx::get_registers GIER -of_objects [ipx::get_address_blocks reg0 -of_objects [ipx::get_memory_maps s_axi_control -of_objects [ipx::current_core]]]] + +# IP_IER register properties +set_property Description "IP Interrupt Enable Register" [ipx::get_registers IP_IER -of_objects [ipx::get_address_blocks reg0 -of_objects [ipx::get_memory_maps s_axi_control -of_objects [ipx::current_core]]]] +set_property Address_Offset 0x008 [ipx::get_registers IP_IER -of_objects [ipx::get_address_blocks reg0 -of_objects [ipx::get_memory_maps s_axi_control -of_objects [ipx::current_core]]]] +set_property Size 32 [ipx::get_registers IP_IER -of_objects [ipx::get_address_blocks reg0 -of_objects [ipx::get_memory_maps s_axi_control -of_objects [ipx::current_core]]]] + +# IP_ISR register properties +set_property Description "IP Interrupt Status Register" [ipx::get_registers IP_ISR -of_objects [ipx::get_address_blocks reg0 -of_objects [ipx::get_memory_maps s_axi_control -of_objects [ipx::current_core]]]] +set_property Address_Offset 0x00C [ipx::get_registers IP_ISR -of_objects [ipx::get_address_blocks reg0 -of_objects [ipx::get_memory_maps s_axi_control -of_objects [ipx::current_core]]]] +set_property Size 32 [ipx::get_registers IP_ISR -of_objects [ipx::get_address_blocks reg0 -of_objects [ipx::get_memory_maps s_axi_control -of_objects [ipx::current_core]]]] + +# fifo_in register properties +set_property Description "fifo_in pointer argument" [ipx::get_registers fifo_in -of_objects [ipx::get_address_blocks reg0 -of_objects [ipx::get_memory_maps s_axi_control -of_objects [ipx::current_core]]]] +set_property Address_Offset 0x010 [ipx::get_registers fifo_in -of_objects [ipx::get_address_blocks reg0 -of_objects [ipx::get_memory_maps s_axi_control -of_objects [ipx::current_core]]]] +set_property Size 64 [ipx::get_registers fifo_in -of_objects [ipx::get_address_blocks reg0 -of_objects [ipx::get_memory_maps s_axi_control -of_objects [ipx::current_core]]]] + +# fifo_out register properties +set_property Description "fifo_out pointer argument" [ipx::get_registers fifo_out -of_objects [ipx::get_address_blocks reg0 -of_objects [ipx::get_memory_maps s_axi_control -of_objects [ipx::current_core]]]] +set_property Address_Offset 0x01C [ipx::get_registers fifo_out -of_objects [ipx::get_address_blocks reg0 -of_objects [ipx::get_memory_maps s_axi_control -of_objects [ipx::current_core]]]] +set_property Size 64 [ipx::get_registers fifo_out -of_objects [ipx::get_address_blocks reg0 -of_objects [ipx::get_memory_maps s_axi_control -of_objects [ipx::current_core]]]] + +# length_r_in register properties +set_property Description "length_r_in value" [ipx::get_registers length_r_in -of_objects [ipx::get_address_blocks reg0 -of_objects [ipx::get_memory_maps s_axi_control -of_objects [ipx::current_core]]]] +set_property Address_Offset 0x028 [ipx::get_registers length_r_in -of_objects [ipx::get_address_blocks reg0 -of_objects [ipx::get_memory_maps s_axi_control -of_objects [ipx::current_core]]]] +set_property Size 32 [ipx::get_registers length_r_in -of_objects [ipx::get_address_blocks reg0 -of_objects [ipx::get_memory_maps s_axi_control -of_objects [ipx::current_core]]]] + +# length_r_out register properties +set_property Description "length_r_out value" [ipx::get_registers length_r_out -of_objects [ipx::get_address_blocks reg0 -of_objects [ipx::get_memory_maps s_axi_control -of_objects [ipx::current_core]]]] +set_property Address_Offset 0x030 [ipx::get_registers length_r_out -of_objects [ipx::get_address_blocks reg0 -of_objects [ipx::get_memory_maps s_axi_control -of_objects [ipx::current_core]]]] +set_property Size 32 [ipx::get_registers length_r_out -of_objects [ipx::get_address_blocks reg0 -of_objects [ipx::get_memory_maps s_axi_control -of_objects [ipx::current_core]]]] + +ipx::add_register_parameter ASSOCIATED_BUSIF [ipx::get_registers fifo_in -of_objects [ipx::get_address_blocks reg0 -of_objects [ipx::get_memory_maps s_axi_control -of_objects [ipx::current_core]]]] +ipx::add_register_parameter ASSOCIATED_BUSIF [ipx::get_registers fifo_out -of_objects [ipx::get_address_blocks reg0 -of_objects [ipx::get_memory_maps s_axi_control -of_objects [ipx::current_core]]]] + +# Commands to set m_axi_gmem as value in the register ASSOCIATED_BUSIF parameters +set_property Value m_axi_gmem [ipx::get_register_parameters ASSOCIATED_BUSIF -of_objects [ipx::get_registers fifo_in -of_objects [ipx::get_address_blocks reg0 -of_objects [ipx::get_memory_maps s_axi_control -of_objects [ipx::current_core]]]]] +set_property Value m_axi_gmem [ipx::get_register_parameters ASSOCIATED_BUSIF -of_objects [ipx::get_registers fifo_out -of_objects [ipx::get_address_blocks reg0 -of_objects [ipx::get_memory_maps s_axi_control -of_objects [ipx::current_core]]]]] + +set core [ipx::current_core] + + +set_property xpm_libraries {XPM_CDC XPM_MEMORY XPM_FIFO} $core +set_property sdx_kernel true $core +set_property sdx_kernel_type rtl $core + + + +set_property core_revision 2 [ipx::current_core] +ipx::update_source_project_archive -component [ipx::current_core] +ipx::create_xgui_files [ipx::current_core] +ipx::update_checksums [ipx::current_core] +ipx::save_core [ipx::current_core] +ipx::check_integrity -quiet [ipx::current_core] +ipx::archive_core hls4ml_IP/fastmachinelearning.org_hls4ml_krnl_rtl_1.0.zip [ipx::current_core] +current_project project_1 + + +package_xo -force -xo_path xo_files/${myproject}_kernel.xo -kernel_name krnl_rtl -ip_directory hls4ml_IP diff --git a/hls4ml/writer/vivado_accelerator_writer.py b/hls4ml/writer/vivado_accelerator_writer.py index a873e8950c..353153c732 100644 --- a/hls4ml/writer/vivado_accelerator_writer.py +++ b/hls4ml/writer/vivado_accelerator_writer.py @@ -1,6 +1,6 @@ import os -from shutil import copyfile - +from shutil import copyfile, copytree +from distutils.dir_util import copy_tree from hls4ml.writer.vivado_writer import VivadoWriter class VivadoAcceleratorWriter(VivadoWriter): @@ -311,14 +311,22 @@ def write_wrapper_test(self, model): def write_board_script(self, model): ''' - Write the tcl scripts to create a Vivado IPI project for the VivadoAccelerator + Write the tcl scripts and kernel sources to create a Vivado IPI project for the VivadoAccelerator ''' filedir = os.path.dirname(os.path.abspath(__file__)) copyfile(os.path.join(filedir, self.vivado_accelerator_config.get_tcl_file_path()), '{}/design.tcl'.format(model.config.get_output_dir())) + # Generic alveo board + if self.vivado_accelerator_config.get_board().startswith('alveo'): + src_dir=os.path.join(filedir, self.vivado_accelerator_config.get_krnl_rtl_src_dir()) + dst_dir= os.path.abspath(model.config.get_output_dir())+'/src' + copy_tree(src_dir,dst_dir) f = open('{}/project.tcl'.format(model.config.get_output_dir()), 'w') f.write('variable myproject\n') f.write('set myproject "{}"\n'.format(model.config.get_project_name())) + if self.vivado_accelerator_config.get_board().startswith('alveo'): + f.write('variable part\n') + f.write('set part "{}"\n'.format(self.vivado_accelerator_config.get_part())) if self.vivado_accelerator_config.get_interface() == 'axi_stream': in_bit, out_bit = self.vivado_accelerator_config.get_io_bitwidth() f.write('set bit_width_hls_output {}\n'.format(in_bit)) diff --git a/test/hls4ml-keras-test.sh b/test/hls4ml-keras-test.sh index 09ce49053f..62c1bd20d8 100755 --- a/test/hls4ml-keras-test.sh +++ b/test/hls4ml-keras-test.sh @@ -11,6 +11,9 @@ VIVADO_VERSION=2020.1 # Alternatively, keras-to-hls script can be called, with the model name(s) specified, i.e.: #./keras-to-hls.sh KERAS_1layer KERAS_conv1d_small +./keras-to-hls.sh -b alveo-u250 -B VivadoAccelerator -x xcu250-figd2104-2L-e KERAS_3layer +./keras-to-hls.sh -b pynq-z2 -B VivadoAccelerator -x xc7z020clg400-1 KERAS_3layer +# KERAS_3layer b:pynq-z2 B:VivadoAccelerator x:xc7z020clg400-1 s:Resource # Build the projects generated by keras-to-hls script. # Remove parameter -s to disable synthesis. -p controls the number of parallel tasks diff --git a/test/keras-models.txt b/test/keras-models.txt index 439c770a0f..e087cb6f62 100644 --- a/test/keras-models.txt +++ b/test/keras-models.txt @@ -29,10 +29,11 @@ KERAS_3layer_batch_norm KERAS_3layer_binary_smaller KERAS_3layer_ternary_small +# Pynq backend KERAS_3layer b:pynq-z2 B:VivadoAccelerator x:xc7z020clg400-1 s:Resource - garnet_1layer x:xcku115-flvb2104-2-i y:garnet_1layer_config + # Resource strategy KERAS_3layer r:2 s:Resource qkeras_mnist_dense r:112 s:Resource diff --git a/test/keras-to-hls.sh b/test/keras-to-hls.sh index b11304c617..674c9c3f32 100755 --- a/test/keras-to-hls.sh +++ b/test/keras-to-hls.sh @@ -11,7 +11,7 @@ strategy="Latency" type="ap_fixed<16,6>" yml="" basedir=vivado_prj - +precision="float" sanitizer="[^A-Za-z0-9._]" function print_usage { @@ -47,9 +47,9 @@ function print_usage { echo " Prints this help message." } -while getopts ":x:b:B:c:sr:g:t:d:y:h" opt; do +while getopts ":x:b:B:c:sr:g:t:d:y:p:h" opt; do case "$opt" in - x) part=$OPTARG + x) part=$OPTARG ;; b) board=$OPTARG ;; @@ -69,6 +69,8 @@ while getopts ":x:b:B:c:sr:g:t:d:y:h" opt; do ;; y) yml=$OPTARG ;; + p) precision=$OPTARG + ;; h) print_usage exit @@ -109,7 +111,6 @@ do if [ ! -z "${yml}" ]; then hlscfg=`sed -ne '/HLSConfig/,$p' ../example-models/config-files/${yml}` fi - echo "KerasJson: ../example-models/keras/${name}.json" > ${file} echo "KerasH5: ../example-models/keras/${h5}.h5" >> ${file} echo "OutputDir: ${prjdir}" >> ${file} @@ -120,7 +121,6 @@ do echo "ClockPeriod: ${clock}" >> ${file} echo "" >> ${file} echo "IOType: ${io}" >> ${file} - if [ -z "${hlscfg}" ] then echo "HLSConfig:" >> ${file} @@ -131,7 +131,16 @@ do else echo "${hlscfg}" >> ${file} fi - + # Adding VivadoAccelerator config to file + if [ "${backend}" = "VivadoAccelerator" ]; + then + echo "AcceleratorConfig:" >> ${file} + echo " Board: ${board}" >> ${file} + echo " Precision:" >> ${file} + echo " Input: ${precision}" >> ${file} + echo " Output: ${precision}" >> ${file} + fi + ${pycmd} ../scripts/hls4ml convert -c ${file} || exit 1 rm ${file} rm -rf "${prjdir}"