Skip to content

Commit

Permalink
[RV64_DYNAREC] Added more MMX opcodes and some optimizations too (#1535)
Browse files Browse the repository at this point in the history
* [RV64_DYNAREC] Added 0F F7 MASKMOVQ opcode

* [RV64_DYNAREC] Added 0F 38 1C PABSB opcode

* [RV64_DYNAREC] Added 0F 38 1E PABSD opcode

* [RV64_DYNAREC] Added 0F 38 1D PABSW opcode

* [RV64_DYNAREC] Added 0F 63 PACKSSWB opcode

* [RV64_DYNAREC] Added 0F FC PADDB opcode

* [RV64_DYNAREC] Added 0F D4 PADDQ opcode

* [RV64_DYNAREC] Added 0F EC PADDSB opcode and optimized 66 0F EC PADDSB opcode

* [RV64_DYNAREC] Added 0F DC PADDUSB opcode and optimized 66 0F DC PADDUSB opcode
  • Loading branch information
xctan authored May 28, 2024
1 parent b79f86b commit 7c5bf62
Show file tree
Hide file tree
Showing 4 changed files with 187 additions and 21 deletions.
156 changes: 156 additions & 0 deletions src/dynarec/rv64/dynarec_rv64_0f.c
Original file line number Diff line number Diff line change
Expand Up @@ -444,6 +444,42 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
SB(x3, gback, gdoffset + i);
}
break;
case 0x1C:
INST_NAME("PABSB Gm,Em");
nextop = F8;
GETGM();
GETEM(x2, 0);
for (int i = 0; i < 8; ++i) {
LB(x4, wback, fixedaddress + i);
BGE(x4, xZR, 4 + 4);
NEG(x4, x4);
SB(x4, gback, gdoffset + i);
}
break;
case 0x1D:
INST_NAME("PABSW Gm,Em");
nextop = F8;
GETGM();
GETEM(x2, 0);
for (int i = 0; i < 4; ++i) {
LH(x4, wback, fixedaddress + i * 2);
BGE(x4, xZR, 4 + 4);
NEG(x4, x4);
SH(x4, gback, gdoffset + i * 2);
}
break;
case 0x1E:
INST_NAME("PABSD Gm,Em");
nextop = F8;
GETGM();
GETEM(x2, 0);
for (int i = 0; i < 2; ++i) {
LW(x4, wback, fixedaddress + i * 4);
BGE(x4, xZR, 4 + 4);
NEG(x4, x4);
SW(x4, gback, gdoffset + i * 4);
}
break;
case 0xC8 ... 0xCD:
u8 = nextop;
switch (u8) {
Expand Down Expand Up @@ -866,6 +902,44 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
LWU(x3, wback, fixedaddress);
SW(x3, gback, gdoffset + 4 * 1);
break;
case 0x63:
INST_NAME("PACKSSWB Gm,Em");
nextop = F8;
GETGM();
GETEM(x2, 0);
MOV64x(x5, 127);
MOV64x(x6, -128);
for (int i = 0; i < 4; ++i) {
LH(x3, gback, gdoffset + i * 2);
if (rv64_zbb) {
MIN(x3, x3, x5);
MAX(x3, x3, x6);
} else {
BLT(x3, x5, 4 + 4);
MV(x3, x5);
BGE(x3, x6, 4 + 4);
MV(x3, x6);
}
SB(x3, gback, gdoffset + i);
}
if (MODREG && gd == ed) {
LW(x3, gback, gdoffset + 0);
SW(x3, gback, gdoffset + 4);
} else
for (int i = 0; i < 4; ++i) {
LH(x3, wback, fixedaddress + i * 2);
if (rv64_zbb) {
MIN(x3, x3, x5);
MAX(x3, x3, x6);
} else {
BLT(x3, x5, 4 + 4);
MV(x3, x5);
BGE(x3, x6, 4 + 4);
MV(x3, x6);
}
SB(x3, gback, gdoffset + 4 + i);
}
break;
case 0x67:
INST_NAME("PACKUSWB Gm, Em");
nextop = F8;
Expand Down Expand Up @@ -1909,6 +1983,16 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
gd = xRAX + (opcode & 7) + (rex.b << 3);
REV8xw(gd, gd, x1, x2, x3, x4);
break;
case 0xD4:
INST_NAME("PADDQ Gm,Em");
nextop = F8;
GETGM();
GETEM(x2, 0);
LD(x1, wback, fixedaddress);
LD(x2, gback, gdoffset);
ADD(x1, x1, x2);
SD(x1, gback, gdoffset);
break;
case 0xD5:
INST_NAME("PMULLW Gm, Em");
nextop = F8;
Expand All @@ -1933,6 +2017,25 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
AND(x1, x1, x2);
SD(x1, gback, gdoffset);
break;
case 0xDC:
INST_NAME("PADDUSB Gm,Em");
nextop = F8;
GETGM();
GETEM(x2, 0);
ADDI(x5, xZR, 0xFF);
for (int i = 0; i < 8; ++i) {
LBU(x3, gback, gdoffset + i);
LBU(x4, wback, fixedaddress + i);
ADD(x3, x3, x4);
if (rv64_zbb) {
MINU(x3, x3, x5);
} else {
BLT(x3, x5, 8);
ADDI(x3, xZR, 0xFF);
}
SB(x3, gback, gdoffset + i);
}
break;
case 0xE2:
INST_NAME("PSRAD Gm, Em");
nextop = F8;
Expand Down Expand Up @@ -2008,6 +2111,34 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
OR(x3, x3, x4);
SD(x3, gback, gdoffset);
break;
case 0xEC:
INST_NAME("PADDSB Gm,Em");
nextop = F8;
GETGM();
GETEM(x2, 0);
ADDI(x5, xZR, 0x7f);
ADDI(x6, xZR, 0xf80);
for (int i = 0; i < 8; ++i) {
// tmp16s = (int16_t)GX->sb[i] + EX->sb[i];
// GX->sb[i] = (tmp16s>127)?127:((tmp16s<-128)?-128:tmp16s);
LB(x3, gback, gdoffset + i);
LB(x4, wback, fixedaddress + i);
ADDW(x3, x3, x4);
if (rv64_zbb) {
MIN(x3, x3, x5);
MAX(x3, x3, x6);
SB(x3, gback, gdoffset + i);
} else {
BLT(x3, x5, 12); // tmp16s>127?
SB(x5, gback, gdoffset + i);
J(20); // continue
BLT(x6, x3, 12); // tmp16s<-128?
SB(x6, gback, gdoffset + i);
J(8); // continue
SB(x3, gback, gdoffset + i);
}
}
break;
case 0xED:
INST_NAME("PADDSW Gm,Em");
nextop = F8;
Expand Down Expand Up @@ -2060,13 +2191,38 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
SW(x1, gback, gdoffset + i * 4);
}
break;
case 0xF7:
INST_NAME("MASKMOVQ Gm, Em");
nextop = F8;
GETGM();
GETEM(x5, 0);
for (int i = 0; i < 8; i++) {
LB(x1, wback, fixedaddress + i);
BLT(xZR, x1, 4 * 3);
LB(x2, gback, gdoffset + i);
SB(x2, xRDI, i);
}
break;
case 0xF9:
INST_NAME("PSUBW Gm, Em");
nextop = F8;
GETGM();
GETEM(x2, 0);
MMX_LOOP_W(x3, x4, SUBW(x3, x3, x4));
break;
case 0xFC:
INST_NAME("PADDB Gm, Em");
nextop = F8;
GETGM();
GETEM(x2, 0);
for (int i = 0; i < 8; ++i) {
// GM->sb[i] += EM->sb[i];
LB(x3, gback, gdoffset + i);
LB(x4, wback, fixedaddress + i);
ADDW(x3, x3, x4);
SB(x3, gback, gdoffset + i);
}
break;
case 0xFD:
INST_NAME("PADDW Gm, Em");
nextop = F8;
Expand Down
35 changes: 21 additions & 14 deletions src/dynarec/rv64/dynarec_rv64_660f.c
Original file line number Diff line number Diff line change
Expand Up @@ -582,7 +582,6 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
nextop = F8;
GETGX();
GETEX(x2, 0);
MOV64x(x5, ~(1 << 31));
for (int i = 0; i < 4; ++i) {
LW(x4, wback, fixedaddress + i * 4);
BGE(x4, xZR, 4 + 4);
Expand Down Expand Up @@ -2763,8 +2762,12 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
LBU(x3, gback, gdoffset + i);
LBU(x4, wback, fixedaddress + i);
ADD(x3, x3, x4);
BLT(x3, x5, 8);
ADDI(x3, xZR, 0xFF);
if (rv64_zbb) {
MINU(x3, x3, x5);
} else {
BLT(x3, x5, 8);
ADDI(x3, xZR, 0xFF);
}
SB(x3, gback, gdoffset + i);
}
break;
Expand Down Expand Up @@ -2993,23 +2996,27 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
nextop = F8;
GETGX();
GETEX(x2, 0);
ADDI(x5, xZR, 0x7f);
ADDI(x6, xZR, 0xf80);
for (int i = 0; i < 16; ++i) {
// tmp16s = (int16_t)GX->sb[i] + EX->sb[i];
// GX->sb[i] = (tmp16s>127)?127:((tmp16s<-128)?-128:tmp16s);
LB(x3, gback, gdoffset + i);
LB(x4, wback, fixedaddress + i);
ADDW(x3, x3, x4);
SLLIW(x3, x3, 16);
SRAIW(x3, x3, 16);
ADDI(x4, xZR, 0x7f);
BLT(x3, x4, 12); // tmp16s>127?
SB(x4, gback, gdoffset + i);
J(24); // continue
ADDI(x4, xZR, 0xf80);
BLT(x4, x3, 12); // tmp16s<-128?
SB(x4, gback, gdoffset + i);
J(8); // continue
SB(x3, gback, gdoffset + i);
if (rv64_zbb) {
MIN(x3, x3, x5);
MAX(x3, x3, x6);
SB(x3, gback, gdoffset + i);
} else {
BLT(x3, x5, 12); // tmp16s>127?
SB(x5, gback, gdoffset + i);
J(20); // continue
BLT(x6, x3, 12); // tmp16s<-128?
SB(x6, gback, gdoffset + i);
J(8); // continue
SB(x3, gback, gdoffset + i);
}
}
break;
case 0xED:
Expand Down
7 changes: 1 addition & 6 deletions src/dynarec/rv64/dynarec_rv64_helper.h
Original file line number Diff line number Diff line change
Expand Up @@ -174,12 +174,7 @@
#define GETSEW(i, D) \
if (MODREG) { \
wback = xRAX + (nextop & 7) + (rex.b << 3); \
if (rv64_zbb) \
SEXTH(i, wback); \
else { \
SLLI(i, wback, 48); \
SRAI(i, i, 48); \
} \
SEXTH(i, wback); \
ed = i; \
wb1 = 0; \
} else { \
Expand Down
10 changes: 9 additions & 1 deletion src/dynarec/rv64/rv64_emitter.h
Original file line number Diff line number Diff line change
Expand Up @@ -821,7 +821,15 @@ f28–31 ft8–11 FP temporaries Caller
// Sign-extend byte
#define SEXTB(rd, rs) EMIT(R_type(0b0110000, 0b00100, rs, 0b001, rd, 0b0010011))
// Sign-extend half-word
#define SEXTH(rd, rs) EMIT(R_type(0b0110000, 0b00101, rs, 0b001, rd, 0b0010011))
#define SEXTH_(rd, rs) EMIT(R_type(0b0110000, 0b00101, rs, 0b001, rd, 0b0010011))
// Sign-extend half-word
#define SEXTH(rd, rs) \
if (rv64_zbb) \
SEXTH_(rd, rs); \
else { \
SLLI(rd, rs, 48); \
SRAI(rd, rd, 48); \
}
// Zero-extend half-word
#define ZEXTH_(rd, rs) EMIT(R_type(0b0000100, 0b00000, rs, 0b100, rd, 0b0111011))
// Zero-extend half-word
Expand Down

0 comments on commit 7c5bf62

Please sign in to comment.