Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fail to Reorder Reduction Loops #489

Open
sqPoseidon opened this issue Mar 1, 2023 · 2 comments
Open

Fail to Reorder Reduction Loops #489

sqPoseidon opened this issue Mar 1, 2023 · 2 comments

Comments

@sqPoseidon
Copy link

In packed_conv2d_nchw function, there're four reduction loops: in_channel, kernel_h, kernel_w, and bitwidth. When I try to move the output channel loop into the reduction loops, I get the error message:

heterocl-mlir/hcl-dialect/llvm-project/mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp:1496: 
unsigned int mlir::permuteLoops(llvm::MutableArrayRef<mlir::AffineForOp>, llvm::ArrayRef<unsigned int>): 
Assertion `false && "invalid permutation map"' failed.

Here's the example:

import heterocl as hcl
import heterocl.op.bnn as bnn
import numpy as np


def test_bconv_popcnt():
    packing_factor=8
    out_channel = 64
    strides = (1, 1)
    padding = (1, 1)
    in_channel = 8
    bitwidth = min(in_channel, packing_factor)
    in_dtype = hcl.Float()
    out_dtype = hcl.Float()
    in_shape = (1, in_channel, 3, 3) # n, c, h, w
    weight_shape = (out_channel, in_channel, 3, 3) # o, i, h, w
    out_shape = (1, out_channel, 3, 3)

    def conv(data, weight):
        data = hcl.compute(
            data.shape,
            lambda *args: hcl.select(data[args] > 0, 1, 0),
            name="data",
            dtype=hcl.UInt(1),
        )
        weight = hcl.compute(
            weight.shape,
            lambda *args: hcl.select(weight[args] > 0, 1, 0),
            name="weight",
            dtype=hcl.UInt(1),
        )
        # pack along channel dimension
        packed_data = hcl.pack(
            data,
            axis=1,
            factor=bitwidth,
            name="conv_packed",
            dtype=hcl.UInt(bitwidth),
        )
        packed_weight = hcl.pack(
            weight,
            axis=1,
            factor=bitwidth,
            name="conv_packed",
            dtype=hcl.UInt(bitwidth),
        )
        return bnn.packed_conv2d_nchw(
            packed_data,
            packed_weight,
            strides=strides,
            padding=padding,
            name="conv_conv2d",
            out_dtype=out_dtype,
        )

    data = hcl.placeholder(in_shape, "data", dtype=in_dtype)
    weight = hcl.placeholder(weight_shape, "weight", dtype=in_dtype)
    s = hcl.create_schedule([data, weight], conv)

    B = getattr(conv, "conv_conv2d")
    print("B.axis: ", B.axis) # nn, ff, yy, xx, conv_conv2d_rc, conv_conv2d_rx, conv_conv2d_ry
    # s[B].reorder(B.axis[0], B.axis[2], B.axis[1])
    s[B].reorder(B.axis[0], B.axis[2], B.axis[3], B.axis[4], B.axis[1], B.axis[5], B.axis[6]) # nn, yy, xx, conv_conv2d_rc, conv_conv2d_rx, conv_conv2d_ry, ff,

    f = hcl.build(s)
    print(f.host_src)

    a_np = np.random.randint(0, 10, in_shape)
    b_np = np.random.randint(0, 10, weight_shape)

    hcl_a = hcl.asarray(a_np, dtype=in_dtype)
    hcl_b = hcl.asarray(b_np, dtype=in_dtype)
    hcl_c = hcl.asarray(np.zeros(out_shape), dtype=hcl.Float())

    f(hcl_a, hcl_b, hcl_c)

    n, c, h, w = in_shape
    o, i, kh, kw = weight_shape
    # binarize a_np, b_np
    a_np = np.where(a_np > 0, 1, -1)
    b_np = np.where(b_np > 0, 1, -1)
    # pad a_np
    a_np = np.pad(a_np, ((0, 0), (0, 0), (1, 1), (1, 1)), 'constant')
    # calculate convolution
    baseline_output = np.zeros((n, o, h, w))
    for i in range(n):
        for j in range(o):
            for k in range(h):
                for l in range(w):
                    for m in range(c):
                        for p in range(kh):
                            for q in range(kw):
                                baseline_output[i][j][k][l] += a_np[i][m][k + p][l + q] * b_np[j][m][p][q]

    assert np.allclose(hcl_c.asnumpy(), baseline_output)

test_bconv_popcnt()
@zzzDavid
Copy link
Collaborator

zzzDavid commented Mar 2, 2023

This seems like a limitation of milr::permuteLoops, I will look into this and provide more detail

@chhzh123
Copy link
Member

chhzh123 commented Mar 2, 2023

It's actually our limitation. Currently we put the reduction variable outside all the reduction loops, causing inner loops imperfect, thus we cannot directly permute those reduction loops with spatial loops.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

3 participants