diff --git a/src/crankshaft/s390/lithium-codegen-s390.cc b/src/crankshaft/s390/lithium-codegen-s390.cc index 7bc99718bef1..6c3673591971 100644 --- a/src/crankshaft/s390/lithium-codegen-s390.cc +++ b/src/crankshaft/s390/lithium-codegen-s390.cc @@ -1969,20 +1969,38 @@ void LCodeGen::DoArithmeticD(LArithmeticD* instr) { DoubleRegister left = ToDoubleRegister(instr->left()); DoubleRegister right = ToDoubleRegister(instr->right()); DoubleRegister result = ToDoubleRegister(instr->result()); - // All operations except MOD are computed in-place. - DCHECK(instr->op() == Token::MOD || left.is(result)); switch (instr->op()) { case Token::ADD: - __ adbr(result, right); + if (CpuFeatures::IsSupported(VECTOR_FACILITY)) { + __ vfa(result, left, right); + } else { + DCHECK(result.is(left)); + __ adbr(result, right); + } break; case Token::SUB: - __ sdbr(result, right); + if (CpuFeatures::IsSupported(VECTOR_FACILITY)) { + __ vfs(result, left, right); + } else { + DCHECK(result.is(left)); + __ sdbr(result, right); + } break; case Token::MUL: - __ mdbr(result, right); + if (CpuFeatures::IsSupported(VECTOR_FACILITY)) { + __ vfm(result, left, right); + } else { + DCHECK(result.is(left)); + __ mdbr(result, right); + } break; case Token::DIV: - __ ddbr(result, right); + if (CpuFeatures::IsSupported(VECTOR_FACILITY)) { + __ vfd(result, left, right); + } else { + DCHECK(result.is(left)); + __ ddbr(result, right); + } break; case Token::MOD: { __ PrepareCallCFunction(0, 2, scratch0()); diff --git a/src/crankshaft/s390/lithium-s390.cc b/src/crankshaft/s390/lithium-s390.cc index e147cebeffef..cd5ff79c781d 100644 --- a/src/crankshaft/s390/lithium-s390.cc +++ b/src/crankshaft/s390/lithium-s390.cc @@ -619,7 +619,9 @@ LInstruction* LChunkBuilder::DoArithmeticD(Token::Value op, LOperand* left = UseRegisterAtStart(instr->BetterLeftOperand()); LOperand* right = UseRegisterAtStart(instr->BetterRightOperand()); LArithmeticD* result = new (zone()) LArithmeticD(op, left, right); - return DefineSameAsFirst(result); + return CpuFeatures::IsSupported(VECTOR_FACILITY) + ? DefineAsRegister(result) + : DefineSameAsFirst(result); } } diff --git a/src/globals.h b/src/globals.h index cece1f3685f5..b76f7f5deb42 100644 --- a/src/globals.h +++ b/src/globals.h @@ -795,6 +795,7 @@ enum CpuFeature { DISTINCT_OPS, GENERAL_INSTR_EXT, FLOATING_POINT_EXT, + VECTOR_FACILITY, NUMBER_OF_CPU_FEATURES, diff --git a/src/s390/assembler-s390.cc b/src/s390/assembler-s390.cc index 36ab3a4b1773..697b71f050e0 100644 --- a/src/s390/assembler-s390.cc +++ b/src/s390/assembler-s390.cc @@ -138,30 +138,34 @@ void CpuFeatures::ProbeImpl(bool cross_compile) { // The facilities we are checking for are: // Bit 45 - Distinct Operands for instructions like ARK, SRK, etc. // As such, we require only 1 double word - int64_t facilities[1]; - facilities[0] = 0; + int64_t facilities[3] = {0L}; // LHI sets up GPR0 // STFLE is specified as .insn, as opcode is not recognized. // We register the instructions kill r0 (LHI) and the CC (STFLE). asm volatile( - "lhi 0,0\n" + "lhi 0,2\n" ".insn s,0xb2b00000,%0\n" : "=Q"(facilities) : : "cc", "r0"); + uint64_t one = static_cast(1); // Test for Distinct Operands Facility - Bit 45 - if (facilities[0] & (1lu << (63 - 45))) { + if (facilities[0] & (one << (63 - 45))) { supported_ |= (1u << DISTINCT_OPS); } // Test for General Instruction Extension Facility - Bit 34 - if (facilities[0] & (1lu << (63 - 34))) { + if (facilities[0] & (one << (63 - 34))) { supported_ |= (1u << GENERAL_INSTR_EXT); } // Test for Floating Point Extension Facility - Bit 37 - if (facilities[0] & (1lu << (63 - 37))) { + if (facilities[0] & (one << (63 - 37))) { supported_ |= (1u << FLOATING_POINT_EXT); } + // Test for Vector Facility - Bit 129 + if (facilities[2] & (one << (63 - (129 - 128)))) { + supported_ |= (1u << VECTOR_FACILITY); + } } #else // All distinct ops instructions can be simulated @@ -171,6 +175,7 @@ void CpuFeatures::ProbeImpl(bool cross_compile) { supported_ |= (1u << FLOATING_POINT_EXT); USE(performSTFLE); // To avoid assert + supported_ |= (1u << VECTOR_FACILITY); #endif supported_ |= (1u << FPU); } @@ -192,6 +197,7 @@ void CpuFeatures::PrintFeatures() { printf("FPU_EXT=%d\n", CpuFeatures::IsSupported(FLOATING_POINT_EXT)); printf("GENERAL_INSTR=%d\n", CpuFeatures::IsSupported(GENERAL_INSTR_EXT)); printf("DISTINCT_OPS=%d\n", CpuFeatures::IsSupported(DISTINCT_OPS)); + printf("VECTOR_FACILITY=%d\n", CpuFeatures::IsSupported(VECTOR_FACILITY)); } Register ToRegister(int num) { diff --git a/src/s390/assembler-s390.h b/src/s390/assembler-s390.h index 10cbaa8f0d6d..f8528e66de65 100644 --- a/src/s390/assembler-s390.h +++ b/src/s390/assembler-s390.h @@ -758,6 +758,55 @@ class Assembler : public AssemblerBase { void name(Register r3, Register b1, Disp d1, Register b2, Disp d2); \ void name(Register r3, const MemOperand& opnd1, const MemOperand& opnd2) +#define DECLARE_VRR_A_INSTRUCTIONS(name, opcode_name, opcode_value) \ + void name(DoubleRegister v1, DoubleRegister v2, Condition m5, Condition m4, \ + Condition m3) { \ + uint64_t code = (static_cast(opcode_value & 0xFF00)) * B32 | \ + (static_cast(v1.code())) * B36 | \ + (static_cast(v2.code())) * B32 | \ + (static_cast(m5 & 0xF)) * B20 | \ + (static_cast(m4 & 0xF)) * B16 | \ + (static_cast(m3 & 0xF)) * B12 | \ + (static_cast(opcode_value & 0x00FF)); \ + emit6bytes(code); \ + } + VRR_A_OPCODE_LIST(DECLARE_VRR_A_INSTRUCTIONS) +#undef DECLARE_VRR_A_INSTRUCTIONS + +#define DECLARE_VRR_C_INSTRUCTIONS(name, opcode_name, opcode_value) \ + void name(DoubleRegister v1, DoubleRegister v2, DoubleRegister v3, \ + Condition m6, Condition m5, Condition m4) { \ + uint64_t code = (static_cast(opcode_value & 0xFF00)) * B32 | \ + (static_cast(v1.code())) * B36 | \ + (static_cast(v2.code())) * B32 | \ + (static_cast(v3.code())) * B28 | \ + (static_cast(m6 & 0xF)) * B20 | \ + (static_cast(m5 & 0xF)) * B16 | \ + (static_cast(m4 & 0xF)) * B12 | \ + (static_cast(opcode_value & 0x00FF)); \ + emit6bytes(code); \ + } + VRR_C_OPCODE_LIST(DECLARE_VRR_C_INSTRUCTIONS) +#undef DECLARE_VRR_C_INSTRUCTIONS + + // Single Element format + void vfa(DoubleRegister v1, DoubleRegister v2, DoubleRegister v3) { + vfa(v1, v2, v3, static_cast(0), static_cast(8), + static_cast(3)); + } + void vfs(DoubleRegister v1, DoubleRegister v2, DoubleRegister v3) { + vfs(v1, v2, v3, static_cast(0), static_cast(8), + static_cast(3)); + } + void vfm(DoubleRegister v1, DoubleRegister v2, DoubleRegister v3) { + vfm(v1, v2, v3, static_cast(0), static_cast(8), + static_cast(3)); + } + void vfd(DoubleRegister v1, DoubleRegister v2, DoubleRegister v3) { + vfd(v1, v2, v3, static_cast(0), static_cast(8), + static_cast(3)); + } + // S390 instruction sets RX_FORM(bc); RR_FORM(bctr); diff --git a/src/s390/constants-s390.h b/src/s390/constants-s390.h index 1cfc0b7da754..9696decd0de4 100644 --- a/src/s390/constants-s390.h +++ b/src/s390/constants-s390.h @@ -157,9 +157,33 @@ typedef uint16_t TwoByteInstr; typedef uint32_t FourByteInstr; typedef uint64_t SixByteInstr; +#define VRR_A_OPCODE_LIST(V) \ + V(wfc, WFC, 0xE7CB) /* VECTOR FP COMPARE SCALAR */ \ + V(vcdg, VCDG, 0xE7D3) /* VECTOR FP CONVERT FROM FIXED 64-BIT */ \ + V(vcdlg, VCDLG, 0xE7C1) /* VECTOR FP CONVERT FROM LOGICAL 64-BIT */ \ + V(vcgd, VCGD, 0xE7C2) /* VECTOR FP CONVERT TO FIXED 64-BIT */ \ + V(vclgd, VCLGD, 0xE7C0) /* VECTOR FP CONVERT TO LOGICAL 64-BIT */ \ + V(vfi, VFI, 0xE7C7) /* VECTOR LOAD FP INTEGER */ \ + V(vlde, VLDE, 0xE7C4) /* VECTOR FP LOAD LENGTHENED */ \ + V(vled, VLED, 0xE7C5) /* VECTOR FP LOAD ROUNDED */ \ + V(vfpso, VFPSO, 0xE7CC) /* VECTOR FP PERFORM SIGN OPERATION */ \ + V(vfsq, VFSQ, 0xE7CE) /* VECTOR FP SQUARE ROOT */ \ + V(wfk, WFK, 0xE7CA) /* VECTOR FP COMPARE AND SIGNAL SCALAR */ + +#define VRR_C_OPCODE_LIST(V) \ + V(vfa, VFA, 0xE7E3) /* VECTOR FP ADD */ \ + V(vfs, VFS, 0xE7E2) /* VECTOR FP SUBTRACT */ \ + V(vfm, VFM, 0xE7E7) /* VECTOR FP MULTIPLY */ \ + V(vfd, VFD, 0xE7E5) /* VECTOR FP DIVIDE */ + // Opcodes as defined in Appendix B-2 table enum Opcode { - A = 0x5A, // Add (32) +#define DECLARE_OPCODES(name, opcode_name, opcode_value) \ + opcode_name = opcode_value, + VRR_A_OPCODE_LIST(DECLARE_OPCODES) VRR_C_OPCODE_LIST(DECLARE_OPCODES) +#undef DECLARE_OPCODES + + A = 0x5A, // Add (32) ADB = 0xED1A, // Add (long BFP) ADBR = 0xB31A, // Add (long BFP) ADTR = 0xB3D2, // Add (long DFP) @@ -1304,6 +1328,23 @@ class Instruction { DISALLOW_IMPLICIT_CONSTRUCTORS(Instruction); }; +#define DECLARE_FIELD_FOR_SIX_BYTE_INSTR(name, T, lo, hi) \ + inline int name() const { \ + return Bits(47 - (lo), 47 - (hi) + 1); \ + } + +// VRR Instruction +class VRR_C_Instruction : Instruction { + public: + DECLARE_FIELD_FOR_SIX_BYTE_INSTR(R1Value, int, 8, 12); + DECLARE_FIELD_FOR_SIX_BYTE_INSTR(R2Value, int, 12, 16); + DECLARE_FIELD_FOR_SIX_BYTE_INSTR(R3Value, int, 16, 20); + DECLARE_FIELD_FOR_SIX_BYTE_INSTR(M6Value, uint32_t, 24, 28); + DECLARE_FIELD_FOR_SIX_BYTE_INSTR(M5Value, uint32_t, 28, 32); + DECLARE_FIELD_FOR_SIX_BYTE_INSTR(M4Value, uint32_t, 32, 36); + inline int size() const { return 6; } +}; + // I Instruction -- suspect this will not be used, // but implement for completeness class IInstruction : Instruction { diff --git a/src/s390/disasm-s390.cc b/src/s390/disasm-s390.cc index 5f2f4011a716..0eaf8254ffba 100644 --- a/src/s390/disasm-s390.cc +++ b/src/s390/disasm-s390.cc @@ -1055,6 +1055,12 @@ bool Decoder::DecodeSixByte(Instruction* instr) { case DUMY: Format(instr, "dumy\t'r1, 'd2 ( 'r2d, 'r3 )"); break; +#define DECODE_VRR_C_INSTRUCTIONS(name, opcode_name, opcode_value) \ + case opcode_name: \ + Format(instr, #name "\t'f1,'f2,'f3"); \ + break; + VRR_A_OPCODE_LIST(DECODE_VRR_C_INSTRUCTIONS) +#undef DECODE_VRR_C_INSTRUCTIONS case LLILF: Format(instr, "llilf\t'r1,'i7"); break; diff --git a/src/s390/simulator-s390.cc b/src/s390/simulator-s390.cc index df4c02dfeb2d..4e2fcf7d8b9b 100644 --- a/src/s390/simulator-s390.cc +++ b/src/s390/simulator-s390.cc @@ -743,6 +743,11 @@ void Simulator::EvalTableInit() { EvalTable[i] = &Simulator::Evaluate_Unknown; } +#define CREATE_EVALUATE_TABLE(name, op_name, op_value) \ + EvalTable[op_name] = &Simulator::Evaluate_##op_name; + VRR_C_OPCODE_LIST(CREATE_EVALUATE_TABLE); +#undef CREATE_EVALUATE_TABLE + EvalTable[DUMY] = &Simulator::Evaluate_DUMY; EvalTable[BKPT] = &Simulator::Evaluate_BKPT; EvalTable[SPM] = &Simulator::Evaluate_SPM; @@ -6050,6 +6055,15 @@ uintptr_t Simulator::PopAddress() { int d2 = AS(RXEInstruction)->D2Value(); \ int length = 6; +#define DECODE_VRR_C_INSTRUCTION(r1, r2, r3, m6, m5, m4) \ + int r1 = AS(VRR_C_Instruction)->R1Value(); \ + int r2 = AS(VRR_C_Instruction)->R2Value(); \ + int r3 = AS(VRR_C_Instruction)->R3Value(); \ + int m6 = AS(VRR_C_Instruction)->M6Value(); \ + int m5 = AS(VRR_C_Instruction)->M5Value(); \ + int m4 = AS(VRR_C_Instruction)->M4Value(); \ + int length = 6; + #define GET_ADDRESS(index_reg, base_reg, offset) \ (((index_reg) == 0) ? 0 : get_register(index_reg)) + \ (((base_reg) == 0) ? 0 : get_register(base_reg)) + offset @@ -6059,10 +6073,75 @@ int Simulator::Evaluate_Unknown(Instruction* instr) { return 0; } +EVALUATE(VFA) { + DCHECK_OPCODE(VFA); + DECODE_VRR_C_INSTRUCTION(r1, r2, r3, m6, m5, m4); + USE(m6); + USE(m5); + USE(m4); + DCHECK(m5 == 8); + DCHECK(m4 == 3); + double r2_val = get_double_from_d_register(r2); + double r3_val = get_double_from_d_register(r3); + double r1_val = r2_val + r3_val; + set_d_register_from_double(r1, r1_val); + return length; +} + +EVALUATE(VFS) { + DCHECK_OPCODE(VFS); + DECODE_VRR_C_INSTRUCTION(r1, r2, r3, m6, m5, m4); + USE(m6); + USE(m5); + USE(m4); + DCHECK(m5 == 8); + DCHECK(m4 == 3); + double r2_val = get_double_from_d_register(r2); + double r3_val = get_double_from_d_register(r3); + double r1_val = r2_val - r3_val; + set_d_register_from_double(r1, r1_val); + return length; +} + +EVALUATE(VFM) { + DCHECK_OPCODE(VFM); + DECODE_VRR_C_INSTRUCTION(r1, r2, r3, m6, m5, m4); + USE(m6); + USE(m5); + USE(m4); + DCHECK(m5 == 8); + DCHECK(m4 == 3); + double r2_val = get_double_from_d_register(r2); + double r3_val = get_double_from_d_register(r3); + double r1_val = r2_val * r3_val; + set_d_register_from_double(r1, r1_val); + return length; +} + +EVALUATE(VFD) { + DCHECK_OPCODE(VFD); + DECODE_VRR_C_INSTRUCTION(r1, r2, r3, m6, m5, m4); + USE(m6); + USE(m5); + USE(m4); + DCHECK(m5 == 8); + DCHECK(m4 == 3); + double r2_val = get_double_from_d_register(r2); + double r3_val = get_double_from_d_register(r3); + double r1_val = r2_val / r3_val; + set_d_register_from_double(r1, r1_val); + return length; +} + EVALUATE(DUMY) { DCHECK_OPCODE(DUMY); + DECODE_RXY_A_INSTRUCTION(r1, x2, b2, d2); + USE(r1); + USE(x2); + USE(b2); + USE(d2); // dummy instruction does nothing. - return 6; + return length; } EVALUATE(CLR) { diff --git a/src/s390/simulator-s390.h b/src/s390/simulator-s390.h index 66730b5d7475..91d2fbf83419 100644 --- a/src/s390/simulator-s390.h +++ b/src/s390/simulator-s390.h @@ -522,6 +522,11 @@ class Simulator { static void EvalTableInit(); #define EVALUATE(name) int Evaluate_##name(Instruction* instr) +#define EVALUATE_VRR_INSTRUCTIONS(name, op_name, op_value) EVALUATE(op_name); + VRR_C_OPCODE_LIST(EVALUATE_VRR_INSTRUCTIONS) + VRR_A_OPCODE_LIST(EVALUATE_VRR_INSTRUCTIONS) +#undef EVALUATE_VRR_INSTRUCTIONS + EVALUATE(DUMY); EVALUATE(BKPT); EVALUATE(SPM);