diff --git a/src/crankshaft/s390/lithium-codegen-s390.cc b/src/crankshaft/s390/lithium-codegen-s390.cc
index 7bc99718bef1..6c3673591971 100644
--- a/src/crankshaft/s390/lithium-codegen-s390.cc
+++ b/src/crankshaft/s390/lithium-codegen-s390.cc
@@ -1969,20 +1969,38 @@ void LCodeGen::DoArithmeticD(LArithmeticD* instr) {
   DoubleRegister left = ToDoubleRegister(instr->left());
   DoubleRegister right = ToDoubleRegister(instr->right());
   DoubleRegister result = ToDoubleRegister(instr->result());
-  // All operations except MOD are computed in-place.
-  DCHECK(instr->op() == Token::MOD || left.is(result));
   switch (instr->op()) {
     case Token::ADD:
-      __ adbr(result, right);
+      if (CpuFeatures::IsSupported(VECTOR_FACILITY)) {
+        __ vfa(result, left, right);
+      } else {
+        DCHECK(result.is(left));
+        __ adbr(result, right);
+      }
       break;
     case Token::SUB:
-      __ sdbr(result, right);
+      if (CpuFeatures::IsSupported(VECTOR_FACILITY)) {
+        __ vfs(result, left, right);
+      } else {
+        DCHECK(result.is(left));
+        __ sdbr(result, right);
+      }
       break;
     case Token::MUL:
-      __ mdbr(result, right);
+      if (CpuFeatures::IsSupported(VECTOR_FACILITY)) {
+        __ vfm(result, left, right);
+      } else {
+        DCHECK(result.is(left));
+        __ mdbr(result, right);
+      }
       break;
     case Token::DIV:
-      __ ddbr(result, right);
+      if (CpuFeatures::IsSupported(VECTOR_FACILITY)) {
+        __ vfd(result, left, right);
+      } else {
+        DCHECK(result.is(left));
+        __ ddbr(result, right);
+      }
       break;
     case Token::MOD: {
       __ PrepareCallCFunction(0, 2, scratch0());
diff --git a/src/crankshaft/s390/lithium-s390.cc b/src/crankshaft/s390/lithium-s390.cc
index e147cebeffef..cd5ff79c781d 100644
--- a/src/crankshaft/s390/lithium-s390.cc
+++ b/src/crankshaft/s390/lithium-s390.cc
@@ -619,7 +619,9 @@ LInstruction* LChunkBuilder::DoArithmeticD(Token::Value op,
     LOperand* left = UseRegisterAtStart(instr->BetterLeftOperand());
     LOperand* right = UseRegisterAtStart(instr->BetterRightOperand());
     LArithmeticD* result = new (zone()) LArithmeticD(op, left, right);
-    return DefineSameAsFirst(result);
+    return CpuFeatures::IsSupported(VECTOR_FACILITY)
+               ? DefineAsRegister(result)
+               : DefineSameAsFirst(result);
   }
 }
 
diff --git a/src/globals.h b/src/globals.h
index cece1f3685f5..b76f7f5deb42 100644
--- a/src/globals.h
+++ b/src/globals.h
@@ -795,6 +795,7 @@ enum CpuFeature {
   DISTINCT_OPS,
   GENERAL_INSTR_EXT,
   FLOATING_POINT_EXT,
+  VECTOR_FACILITY,
 
   NUMBER_OF_CPU_FEATURES,
 
diff --git a/src/s390/assembler-s390.cc b/src/s390/assembler-s390.cc
index 36ab3a4b1773..697b71f050e0 100644
--- a/src/s390/assembler-s390.cc
+++ b/src/s390/assembler-s390.cc
@@ -138,30 +138,34 @@ void CpuFeatures::ProbeImpl(bool cross_compile) {
     // The facilities we are checking for are:
     //   Bit 45 - Distinct Operands for instructions like ARK, SRK, etc.
     // As such, we require only 1 double word
-    int64_t facilities[1];
-    facilities[0] = 0;
+    int64_t facilities[3] = {0L};
     // LHI sets up GPR0
     // STFLE is specified as .insn, as opcode is not recognized.
     // We register the instructions kill r0 (LHI) and the CC (STFLE).
     asm volatile(
-        "lhi   0,0\n"
+        "lhi   0,2\n"
         ".insn s,0xb2b00000,%0\n"
         : "=Q"(facilities)
         :
         : "cc", "r0");
 
+    uint64_t one = static_cast<uint64_t>(1);
     // Test for Distinct Operands Facility - Bit 45
-    if (facilities[0] & (1lu << (63 - 45))) {
+    if (facilities[0] & (one << (63 - 45))) {
       supported_ |= (1u << DISTINCT_OPS);
     }
     // Test for General Instruction Extension Facility - Bit 34
-    if (facilities[0] & (1lu << (63 - 34))) {
+    if (facilities[0] & (one << (63 - 34))) {
       supported_ |= (1u << GENERAL_INSTR_EXT);
     }
     // Test for Floating Point Extension Facility - Bit 37
-    if (facilities[0] & (1lu << (63 - 37))) {
+    if (facilities[0] & (one << (63 - 37))) {
       supported_ |= (1u << FLOATING_POINT_EXT);
     }
+    // Test for Vector Facility - Bit 129
+    if (facilities[2] & (one << (63 - (129 - 128)))) {
+      supported_ |= (1u << VECTOR_FACILITY);
+    }
   }
 #else
   // All distinct ops instructions can be simulated
@@ -171,6 +175,7 @@ void CpuFeatures::ProbeImpl(bool cross_compile) {
 
   supported_ |= (1u << FLOATING_POINT_EXT);
   USE(performSTFLE);  // To avoid assert
+  supported_ |= (1u << VECTOR_FACILITY);
 #endif
   supported_ |= (1u << FPU);
 }
@@ -192,6 +197,7 @@ void CpuFeatures::PrintFeatures() {
   printf("FPU_EXT=%d\n", CpuFeatures::IsSupported(FLOATING_POINT_EXT));
   printf("GENERAL_INSTR=%d\n", CpuFeatures::IsSupported(GENERAL_INSTR_EXT));
   printf("DISTINCT_OPS=%d\n", CpuFeatures::IsSupported(DISTINCT_OPS));
+  printf("VECTOR_FACILITY=%d\n", CpuFeatures::IsSupported(VECTOR_FACILITY));
 }
 
 Register ToRegister(int num) {
diff --git a/src/s390/assembler-s390.h b/src/s390/assembler-s390.h
index 10cbaa8f0d6d..f8528e66de65 100644
--- a/src/s390/assembler-s390.h
+++ b/src/s390/assembler-s390.h
@@ -758,6 +758,55 @@ class Assembler : public AssemblerBase {
   void name(Register r3, Register b1, Disp d1, Register b2, Disp d2); \
   void name(Register r3, const MemOperand& opnd1, const MemOperand& opnd2)
 
+#define DECLARE_VRR_A_INSTRUCTIONS(name, opcode_name, opcode_value)           \
+  void name(DoubleRegister v1, DoubleRegister v2, Condition m5, Condition m4, \
+            Condition m3) {                                                   \
+    uint64_t code = (static_cast<uint64_t>(opcode_value & 0xFF00)) * B32 |    \
+                    (static_cast<uint64_t>(v1.code())) * B36 |                \
+                    (static_cast<uint64_t>(v2.code())) * B32 |                \
+                    (static_cast<uint64_t>(m5 & 0xF)) * B20 |                 \
+                    (static_cast<uint64_t>(m4 & 0xF)) * B16 |                 \
+                    (static_cast<uint64_t>(m3 & 0xF)) * B12 |                 \
+                    (static_cast<uint64_t>(opcode_value & 0x00FF));           \
+    emit6bytes(code);                                                         \
+  }
+  VRR_A_OPCODE_LIST(DECLARE_VRR_A_INSTRUCTIONS)
+#undef DECLARE_VRR_A_INSTRUCTIONS
+
+#define DECLARE_VRR_C_INSTRUCTIONS(name, opcode_name, opcode_value)        \
+  void name(DoubleRegister v1, DoubleRegister v2, DoubleRegister v3,       \
+            Condition m6, Condition m5, Condition m4) {                    \
+    uint64_t code = (static_cast<uint64_t>(opcode_value & 0xFF00)) * B32 | \
+                    (static_cast<uint64_t>(v1.code())) * B36 |             \
+                    (static_cast<uint64_t>(v2.code())) * B32 |             \
+                    (static_cast<uint64_t>(v3.code())) * B28 |             \
+                    (static_cast<uint64_t>(m6 & 0xF)) * B20 |              \
+                    (static_cast<uint64_t>(m5 & 0xF)) * B16 |              \
+                    (static_cast<uint64_t>(m4 & 0xF)) * B12 |              \
+                    (static_cast<uint64_t>(opcode_value & 0x00FF));        \
+    emit6bytes(code);                                                      \
+  }
+  VRR_C_OPCODE_LIST(DECLARE_VRR_C_INSTRUCTIONS)
+#undef DECLARE_VRR_C_INSTRUCTIONS
+
+  // Single Element format
+  void vfa(DoubleRegister v1, DoubleRegister v2, DoubleRegister v3) {
+    vfa(v1, v2, v3, static_cast<Condition>(0), static_cast<Condition>(8),
+        static_cast<Condition>(3));
+  }
+  void vfs(DoubleRegister v1, DoubleRegister v2, DoubleRegister v3) {
+    vfs(v1, v2, v3, static_cast<Condition>(0), static_cast<Condition>(8),
+        static_cast<Condition>(3));
+  }
+  void vfm(DoubleRegister v1, DoubleRegister v2, DoubleRegister v3) {
+    vfm(v1, v2, v3, static_cast<Condition>(0), static_cast<Condition>(8),
+        static_cast<Condition>(3));
+  }
+  void vfd(DoubleRegister v1, DoubleRegister v2, DoubleRegister v3) {
+    vfd(v1, v2, v3, static_cast<Condition>(0), static_cast<Condition>(8),
+        static_cast<Condition>(3));
+  }
+
   // S390 instruction sets
   RX_FORM(bc);
   RR_FORM(bctr);
diff --git a/src/s390/constants-s390.h b/src/s390/constants-s390.h
index 1cfc0b7da754..9696decd0de4 100644
--- a/src/s390/constants-s390.h
+++ b/src/s390/constants-s390.h
@@ -157,9 +157,33 @@ typedef uint16_t TwoByteInstr;
 typedef uint32_t FourByteInstr;
 typedef uint64_t SixByteInstr;
 
+#define VRR_A_OPCODE_LIST(V)                                          \
+  V(wfc, WFC, 0xE7CB)     /* VECTOR FP COMPARE SCALAR */              \
+  V(vcdg, VCDG, 0xE7D3)   /* VECTOR FP CONVERT FROM FIXED 64-BIT */   \
+  V(vcdlg, VCDLG, 0xE7C1) /* VECTOR FP CONVERT FROM LOGICAL 64-BIT */ \
+  V(vcgd, VCGD, 0xE7C2)   /* VECTOR FP CONVERT TO FIXED 64-BIT */     \
+  V(vclgd, VCLGD, 0xE7C0) /* VECTOR FP CONVERT TO LOGICAL 64-BIT */   \
+  V(vfi, VFI, 0xE7C7)     /* VECTOR LOAD FP INTEGER */                \
+  V(vlde, VLDE, 0xE7C4)   /* VECTOR FP LOAD LENGTHENED */             \
+  V(vled, VLED, 0xE7C5)   /* VECTOR FP LOAD ROUNDED */                \
+  V(vfpso, VFPSO, 0xE7CC) /* VECTOR FP PERFORM SIGN OPERATION */      \
+  V(vfsq, VFSQ, 0xE7CE)   /* VECTOR FP SQUARE ROOT */                 \
+  V(wfk, WFK, 0xE7CA)     /* VECTOR FP COMPARE AND SIGNAL SCALAR */
+
+#define VRR_C_OPCODE_LIST(V)                   \
+  V(vfa, VFA, 0xE7E3) /* VECTOR FP ADD */      \
+  V(vfs, VFS, 0xE7E2) /* VECTOR FP SUBTRACT */ \
+  V(vfm, VFM, 0xE7E7) /* VECTOR FP MULTIPLY */ \
+  V(vfd, VFD, 0xE7E5) /* VECTOR FP DIVIDE */
+
 // Opcodes as defined in Appendix B-2 table
 enum Opcode {
-  A = 0x5A,           // Add (32)
+#define DECLARE_OPCODES(name, opcode_name, opcode_value) \
+  opcode_name = opcode_value,
+  VRR_A_OPCODE_LIST(DECLARE_OPCODES) VRR_C_OPCODE_LIST(DECLARE_OPCODES)
+#undef DECLARE_OPCODES
+
+      A = 0x5A,       // Add (32)
   ADB = 0xED1A,       // Add (long BFP)
   ADBR = 0xB31A,      // Add (long BFP)
   ADTR = 0xB3D2,      // Add (long DFP)
@@ -1304,6 +1328,23 @@ class Instruction {
   DISALLOW_IMPLICIT_CONSTRUCTORS(Instruction);
 };
 
+#define DECLARE_FIELD_FOR_SIX_BYTE_INSTR(name, T, lo, hi)   \
+  inline int name() const {                                 \
+    return Bits<SixByteInstr, T>(47 - (lo), 47 - (hi) + 1); \
+  }
+
+// VRR Instruction
+class VRR_C_Instruction : Instruction {
+ public:
+  DECLARE_FIELD_FOR_SIX_BYTE_INSTR(R1Value, int, 8, 12);
+  DECLARE_FIELD_FOR_SIX_BYTE_INSTR(R2Value, int, 12, 16);
+  DECLARE_FIELD_FOR_SIX_BYTE_INSTR(R3Value, int, 16, 20);
+  DECLARE_FIELD_FOR_SIX_BYTE_INSTR(M6Value, uint32_t, 24, 28);
+  DECLARE_FIELD_FOR_SIX_BYTE_INSTR(M5Value, uint32_t, 28, 32);
+  DECLARE_FIELD_FOR_SIX_BYTE_INSTR(M4Value, uint32_t, 32, 36);
+  inline int size() const { return 6; }
+};
+
 // I Instruction -- suspect this will not be used,
 // but implement for completeness
 class IInstruction : Instruction {
diff --git a/src/s390/disasm-s390.cc b/src/s390/disasm-s390.cc
index 5f2f4011a716..0eaf8254ffba 100644
--- a/src/s390/disasm-s390.cc
+++ b/src/s390/disasm-s390.cc
@@ -1055,6 +1055,12 @@ bool Decoder::DecodeSixByte(Instruction* instr) {
     case DUMY:
       Format(instr, "dumy\t'r1, 'd2 ( 'r2d, 'r3 )");
       break;
+#define DECODE_VRR_C_INSTRUCTIONS(name, opcode_name, opcode_value) \
+  case opcode_name:                                                \
+    Format(instr, #name "\t'f1,'f2,'f3");                          \
+    break;
+      VRR_A_OPCODE_LIST(DECODE_VRR_C_INSTRUCTIONS)
+#undef DECODE_VRR_C_INSTRUCTIONS
     case LLILF:
       Format(instr, "llilf\t'r1,'i7");
       break;
diff --git a/src/s390/simulator-s390.cc b/src/s390/simulator-s390.cc
index df4c02dfeb2d..4e2fcf7d8b9b 100644
--- a/src/s390/simulator-s390.cc
+++ b/src/s390/simulator-s390.cc
@@ -743,6 +743,11 @@ void Simulator::EvalTableInit() {
     EvalTable[i] = &Simulator::Evaluate_Unknown;
   }
 
+#define CREATE_EVALUATE_TABLE(name, op_name, op_value) \
+  EvalTable[op_name] = &Simulator::Evaluate_##op_name;
+  VRR_C_OPCODE_LIST(CREATE_EVALUATE_TABLE);
+#undef CREATE_EVALUATE_TABLE
+
   EvalTable[DUMY] = &Simulator::Evaluate_DUMY;
   EvalTable[BKPT] = &Simulator::Evaluate_BKPT;
   EvalTable[SPM] = &Simulator::Evaluate_SPM;
@@ -6050,6 +6055,15 @@ uintptr_t Simulator::PopAddress() {
   int d2 = AS(RXEInstruction)->D2Value();      \
   int length = 6;
 
+#define DECODE_VRR_C_INSTRUCTION(r1, r2, r3, m6, m5, m4) \
+  int r1 = AS(VRR_C_Instruction)->R1Value();             \
+  int r2 = AS(VRR_C_Instruction)->R2Value();             \
+  int r3 = AS(VRR_C_Instruction)->R3Value();             \
+  int m6 = AS(VRR_C_Instruction)->M6Value();             \
+  int m5 = AS(VRR_C_Instruction)->M5Value();             \
+  int m4 = AS(VRR_C_Instruction)->M4Value();             \
+  int length = 6;
+
 #define GET_ADDRESS(index_reg, base_reg, offset)       \
   (((index_reg) == 0) ? 0 : get_register(index_reg)) + \
       (((base_reg) == 0) ? 0 : get_register(base_reg)) + offset
@@ -6059,10 +6073,75 @@ int Simulator::Evaluate_Unknown(Instruction* instr) {
   return 0;
 }
 
+EVALUATE(VFA) {
+  DCHECK_OPCODE(VFA);
+  DECODE_VRR_C_INSTRUCTION(r1, r2, r3, m6, m5, m4);
+  USE(m6);
+  USE(m5);
+  USE(m4);
+  DCHECK(m5 == 8);
+  DCHECK(m4 == 3);
+  double r2_val = get_double_from_d_register(r2);
+  double r3_val = get_double_from_d_register(r3);
+  double r1_val = r2_val + r3_val;
+  set_d_register_from_double(r1, r1_val);
+  return length;
+}
+
+EVALUATE(VFS) {
+  DCHECK_OPCODE(VFS);
+  DECODE_VRR_C_INSTRUCTION(r1, r2, r3, m6, m5, m4);
+  USE(m6);
+  USE(m5);
+  USE(m4);
+  DCHECK(m5 == 8);
+  DCHECK(m4 == 3);
+  double r2_val = get_double_from_d_register(r2);
+  double r3_val = get_double_from_d_register(r3);
+  double r1_val = r2_val - r3_val;
+  set_d_register_from_double(r1, r1_val);
+  return length;
+}
+
+EVALUATE(VFM) {
+  DCHECK_OPCODE(VFM);
+  DECODE_VRR_C_INSTRUCTION(r1, r2, r3, m6, m5, m4);
+  USE(m6);
+  USE(m5);
+  USE(m4);
+  DCHECK(m5 == 8);
+  DCHECK(m4 == 3);
+  double r2_val = get_double_from_d_register(r2);
+  double r3_val = get_double_from_d_register(r3);
+  double r1_val = r2_val * r3_val;
+  set_d_register_from_double(r1, r1_val);
+  return length;
+}
+
+EVALUATE(VFD) {
+  DCHECK_OPCODE(VFD);
+  DECODE_VRR_C_INSTRUCTION(r1, r2, r3, m6, m5, m4);
+  USE(m6);
+  USE(m5);
+  USE(m4);
+  DCHECK(m5 == 8);
+  DCHECK(m4 == 3);
+  double r2_val = get_double_from_d_register(r2);
+  double r3_val = get_double_from_d_register(r3);
+  double r1_val = r2_val / r3_val;
+  set_d_register_from_double(r1, r1_val);
+  return length;
+}
+
 EVALUATE(DUMY) {
   DCHECK_OPCODE(DUMY);
+  DECODE_RXY_A_INSTRUCTION(r1, x2, b2, d2);
+  USE(r1);
+  USE(x2);
+  USE(b2);
+  USE(d2);
   // dummy instruction does nothing.
-  return 6;
+  return length;
 }
 
 EVALUATE(CLR) {
diff --git a/src/s390/simulator-s390.h b/src/s390/simulator-s390.h
index 66730b5d7475..91d2fbf83419 100644
--- a/src/s390/simulator-s390.h
+++ b/src/s390/simulator-s390.h
@@ -522,6 +522,11 @@ class Simulator {
   static void EvalTableInit();
 
 #define EVALUATE(name) int Evaluate_##name(Instruction* instr)
+#define EVALUATE_VRR_INSTRUCTIONS(name, op_name, op_value) EVALUATE(op_name);
+  VRR_C_OPCODE_LIST(EVALUATE_VRR_INSTRUCTIONS)
+  VRR_A_OPCODE_LIST(EVALUATE_VRR_INSTRUCTIONS)
+#undef EVALUATE_VRR_INSTRUCTIONS
+
   EVALUATE(DUMY);
   EVALUATE(BKPT);
   EVALUATE(SPM);