diff options
Diffstat (limited to 'llvm/lib/Target/SystemZ')
44 files changed, 2415 insertions, 1105 deletions
| diff --git a/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp b/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp index 607266d552a6..d5a3a19446c7 100644 --- a/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp +++ b/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp @@ -53,8 +53,6 @@ enum RegisterKind {    GRH32Reg,    GR64Reg,    GR128Reg, -  ADDR32Reg, -  ADDR64Reg,    FP32Reg,    FP64Reg,    FP128Reg, @@ -109,7 +107,7 @@ private:    // Base + Disp + Index, where Base and Index are LLVM registers or 0.    // MemKind says what type of memory this is and RegKind says what type -  // the base register has (ADDR32Reg or ADDR64Reg).  Length is the operand +  // the base register has (GR32Reg or GR64Reg).  Length is the operand    // length for D(L,B)-style operands, otherwise it is null.    struct MemOp {      unsigned Base : 12; @@ -348,8 +346,8 @@ public:    bool isGRX32() const { return false; }    bool isGR64() const { return isReg(GR64Reg); }    bool isGR128() const { return isReg(GR128Reg); } -  bool isADDR32() const { return isReg(ADDR32Reg); } -  bool isADDR64() const { return isReg(ADDR64Reg); } +  bool isADDR32() const { return isReg(GR32Reg); } +  bool isADDR64() const { return isReg(GR64Reg); }    bool isADDR128() const { return false; }    bool isFP32() const { return isReg(FP32Reg); }    bool isFP64() const { return isReg(FP64Reg); } @@ -361,16 +359,16 @@ public:    bool isAR32() const { return isReg(AR32Reg); }    bool isCR64() const { return isReg(CR64Reg); }    bool isAnyReg() const { return (isReg() || isImm(0, 15)); } -  bool isBDAddr32Disp12() const { return isMemDisp12(BDMem, ADDR32Reg); } -  bool isBDAddr32Disp20() const { return isMemDisp20(BDMem, ADDR32Reg); } -  bool isBDAddr64Disp12() const { return isMemDisp12(BDMem, ADDR64Reg); } -  bool isBDAddr64Disp20() const { return isMemDisp20(BDMem, ADDR64Reg); } -  bool isBDXAddr64Disp12() const { return isMemDisp12(BDXMem, ADDR64Reg); } -  bool isBDXAddr64Disp20() const { return isMemDisp20(BDXMem, ADDR64Reg); } -  bool isBDLAddr64Disp12Len4() const { return isMemDisp12Len4(ADDR64Reg); } -  bool isBDLAddr64Disp12Len8() const { return isMemDisp12Len8(ADDR64Reg); } -  bool isBDRAddr64Disp12() const { return isMemDisp12(BDRMem, ADDR64Reg); } -  bool isBDVAddr64Disp12() const { return isMemDisp12(BDVMem, ADDR64Reg); } +  bool isBDAddr32Disp12() const { return isMemDisp12(BDMem, GR32Reg); } +  bool isBDAddr32Disp20() const { return isMemDisp20(BDMem, GR32Reg); } +  bool isBDAddr64Disp12() const { return isMemDisp12(BDMem, GR64Reg); } +  bool isBDAddr64Disp20() const { return isMemDisp20(BDMem, GR64Reg); } +  bool isBDXAddr64Disp12() const { return isMemDisp12(BDXMem, GR64Reg); } +  bool isBDXAddr64Disp20() const { return isMemDisp20(BDXMem, GR64Reg); } +  bool isBDLAddr64Disp12Len4() const { return isMemDisp12Len4(GR64Reg); } +  bool isBDLAddr64Disp12Len8() const { return isMemDisp12Len8(GR64Reg); } +  bool isBDRAddr64Disp12() const { return isMemDisp12(BDRMem, GR64Reg); } +  bool isBDVAddr64Disp12() const { return isMemDisp12(BDVMem, GR64Reg); }    bool isU1Imm() const { return isImm(0, 1); }    bool isU2Imm() const { return isImm(0, 3); }    bool isU3Imm() const { return isImm(0, 7); } @@ -405,26 +403,24 @@ private:      SMLoc StartLoc, EndLoc;    }; -  bool parseRegister(Register &Reg); +  bool parseRegister(Register &Reg, bool RestoreOnFailure = false); -  bool parseRegister(Register &Reg, RegisterGroup Group, const unsigned *Regs, -                     bool IsAddress = false); +  bool parseIntegerRegister(Register &Reg, RegisterGroup Group);    OperandMatchResultTy parseRegister(OperandVector &Operands, -                                     RegisterGroup Group, const unsigned *Regs,                                       RegisterKind Kind);    OperandMatchResultTy parseAnyRegister(OperandVector &Operands); -  bool parseAddress(bool &HaveReg1, Register &Reg1, -                    bool &HaveReg2, Register &Reg2, -                    const MCExpr *&Disp, const MCExpr *&Length); +  bool parseAddress(bool &HaveReg1, Register &Reg1, bool &HaveReg2, +                    Register &Reg2, const MCExpr *&Disp, const MCExpr *&Length, +                    bool HasLength = false, bool HasVectorIndex = false);    bool parseAddressRegister(Register &Reg);    bool ParseDirectiveInsn(SMLoc L);    OperandMatchResultTy parseAddress(OperandVector &Operands, -                                    MemoryKind MemKind, const unsigned *Regs, +                                    MemoryKind MemKind,                                      RegisterKind RegKind);    OperandMatchResultTy parsePCRel(OperandVector &Operands, int64_t MinVal, @@ -449,6 +445,10 @@ public:    // Override MCTargetAsmParser.    bool ParseDirective(AsmToken DirectiveID) override;    bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; +  bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc, +                     bool RestoreOnFailure); +  OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc, +                                        SMLoc &EndLoc) override;    bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,                          SMLoc NameLoc, OperandVector &Operands) override;    bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, @@ -458,76 +458,78 @@ public:    // Used by the TableGen code to parse particular operand types.    OperandMatchResultTy parseGR32(OperandVector &Operands) { -    return parseRegister(Operands, RegGR, SystemZMC::GR32Regs, GR32Reg); +    return parseRegister(Operands, GR32Reg);    }    OperandMatchResultTy parseGRH32(OperandVector &Operands) { -    return parseRegister(Operands, RegGR, SystemZMC::GRH32Regs, GRH32Reg); +    return parseRegister(Operands, GRH32Reg);    }    OperandMatchResultTy parseGRX32(OperandVector &Operands) {      llvm_unreachable("GRX32 should only be used for pseudo instructions");    }    OperandMatchResultTy parseGR64(OperandVector &Operands) { -    return parseRegister(Operands, RegGR, SystemZMC::GR64Regs, GR64Reg); +    return parseRegister(Operands, GR64Reg);    }    OperandMatchResultTy parseGR128(OperandVector &Operands) { -    return parseRegister(Operands, RegGR, SystemZMC::GR128Regs, GR128Reg); +    return parseRegister(Operands, GR128Reg);    }    OperandMatchResultTy parseADDR32(OperandVector &Operands) { -    return parseRegister(Operands, RegGR, SystemZMC::GR32Regs, ADDR32Reg); +    // For the AsmParser, we will accept %r0 for ADDR32 as well. +    return parseRegister(Operands, GR32Reg);    }    OperandMatchResultTy parseADDR64(OperandVector &Operands) { -    return parseRegister(Operands, RegGR, SystemZMC::GR64Regs, ADDR64Reg); +    // For the AsmParser, we will accept %r0 for ADDR64 as well. +    return parseRegister(Operands, GR64Reg);    }    OperandMatchResultTy parseADDR128(OperandVector &Operands) {      llvm_unreachable("Shouldn't be used as an operand");    }    OperandMatchResultTy parseFP32(OperandVector &Operands) { -    return parseRegister(Operands, RegFP, SystemZMC::FP32Regs, FP32Reg); +    return parseRegister(Operands, FP32Reg);    }    OperandMatchResultTy parseFP64(OperandVector &Operands) { -    return parseRegister(Operands, RegFP, SystemZMC::FP64Regs, FP64Reg); +    return parseRegister(Operands, FP64Reg);    }    OperandMatchResultTy parseFP128(OperandVector &Operands) { -    return parseRegister(Operands, RegFP, SystemZMC::FP128Regs, FP128Reg); +    return parseRegister(Operands, FP128Reg);    }    OperandMatchResultTy parseVR32(OperandVector &Operands) { -    return parseRegister(Operands, RegV, SystemZMC::VR32Regs, VR32Reg); +    return parseRegister(Operands, VR32Reg);    }    OperandMatchResultTy parseVR64(OperandVector &Operands) { -    return parseRegister(Operands, RegV, SystemZMC::VR64Regs, VR64Reg); +    return parseRegister(Operands, VR64Reg);    }    OperandMatchResultTy parseVF128(OperandVector &Operands) {      llvm_unreachable("Shouldn't be used as an operand");    }    OperandMatchResultTy parseVR128(OperandVector &Operands) { -    return parseRegister(Operands, RegV, SystemZMC::VR128Regs, VR128Reg); +    return parseRegister(Operands, VR128Reg);    }    OperandMatchResultTy parseAR32(OperandVector &Operands) { -    return parseRegister(Operands, RegAR, SystemZMC::AR32Regs, AR32Reg); +    return parseRegister(Operands, AR32Reg);    }    OperandMatchResultTy parseCR64(OperandVector &Operands) { -    return parseRegister(Operands, RegCR, SystemZMC::CR64Regs, CR64Reg); +    return parseRegister(Operands, CR64Reg);    }    OperandMatchResultTy parseAnyReg(OperandVector &Operands) {      return parseAnyRegister(Operands);    }    OperandMatchResultTy parseBDAddr32(OperandVector &Operands) { -    return parseAddress(Operands, BDMem, SystemZMC::GR32Regs, ADDR32Reg); +    return parseAddress(Operands, BDMem, GR32Reg);    }    OperandMatchResultTy parseBDAddr64(OperandVector &Operands) { -    return parseAddress(Operands, BDMem, SystemZMC::GR64Regs, ADDR64Reg); +    return parseAddress(Operands, BDMem, GR64Reg);    }    OperandMatchResultTy parseBDXAddr64(OperandVector &Operands) { -    return parseAddress(Operands, BDXMem, SystemZMC::GR64Regs, ADDR64Reg); +    return parseAddress(Operands, BDXMem, GR64Reg);    }    OperandMatchResultTy parseBDLAddr64(OperandVector &Operands) { -    return parseAddress(Operands, BDLMem, SystemZMC::GR64Regs, ADDR64Reg); +    return parseAddress(Operands, BDLMem, GR64Reg);    }    OperandMatchResultTy parseBDRAddr64(OperandVector &Operands) { -    return parseAddress(Operands, BDRMem, SystemZMC::GR64Regs, ADDR64Reg); +    return parseAddress(Operands, BDRMem, GR64Reg);    }    OperandMatchResultTy parseBDVAddr64(OperandVector &Operands) { -    return parseAddress(Operands, BDVMem, SystemZMC::GR64Regs, ADDR64Reg); +    return parseAddress(Operands, BDVMem, GR64Reg);    }    OperandMatchResultTy parsePCRel12(OperandVector &Operands) {      return parsePCRel(Operands, -(1LL << 12), (1LL << 12) - 1, false); @@ -691,27 +693,37 @@ void SystemZOperand::print(raw_ostream &OS) const {  }  // Parse one register of the form %<prefix><number>. -bool SystemZAsmParser::parseRegister(Register &Reg) { +bool SystemZAsmParser::parseRegister(Register &Reg, bool RestoreOnFailure) {    Reg.StartLoc = Parser.getTok().getLoc();    // Eat the % prefix.    if (Parser.getTok().isNot(AsmToken::Percent))      return Error(Parser.getTok().getLoc(), "register expected"); +  const AsmToken &PercentTok = Parser.getTok();    Parser.Lex();    // Expect a register name. -  if (Parser.getTok().isNot(AsmToken::Identifier)) +  if (Parser.getTok().isNot(AsmToken::Identifier)) { +    if (RestoreOnFailure) +      getLexer().UnLex(PercentTok);      return Error(Reg.StartLoc, "invalid register"); +  }    // Check that there's a prefix.    StringRef Name = Parser.getTok().getString(); -  if (Name.size() < 2) +  if (Name.size() < 2) { +    if (RestoreOnFailure) +      getLexer().UnLex(PercentTok);      return Error(Reg.StartLoc, "invalid register"); +  }    char Prefix = Name[0];    // Treat the rest of the register name as a register number. -  if (Name.substr(1).getAsInteger(10, Reg.Num)) +  if (Name.substr(1).getAsInteger(10, Reg.Num)) { +    if (RestoreOnFailure) +      getLexer().UnLex(PercentTok);      return Error(Reg.StartLoc, "invalid register"); +  }    // Look for valid combinations of prefix and number.    if (Prefix == 'r' && Reg.Num < 16) @@ -724,49 +736,102 @@ bool SystemZAsmParser::parseRegister(Register &Reg) {      Reg.Group = RegAR;    else if (Prefix == 'c' && Reg.Num < 16)      Reg.Group = RegCR; -  else +  else { +    if (RestoreOnFailure) +      getLexer().UnLex(PercentTok);      return Error(Reg.StartLoc, "invalid register"); +  }    Reg.EndLoc = Parser.getTok().getLoc();    Parser.Lex();    return false;  } -// Parse a register of group Group.  If Regs is nonnull, use it to map -// the raw register number to LLVM numbering, with zero entries -// indicating an invalid register.  IsAddress says whether the -// register appears in an address context. Allow FP Group if expecting -// RegV Group, since the f-prefix yields the FP group even while used -// with vector instructions. -bool SystemZAsmParser::parseRegister(Register &Reg, RegisterGroup Group, -                                     const unsigned *Regs, bool IsAddress) { -  if (parseRegister(Reg)) -    return true; -  if (Reg.Group != Group && !(Reg.Group == RegFP && Group == RegV)) -    return Error(Reg.StartLoc, "invalid operand for instruction"); -  if (Regs && Regs[Reg.Num] == 0) -    return Error(Reg.StartLoc, "invalid register pair"); -  if (Reg.Num == 0 && IsAddress) -    return Error(Reg.StartLoc, "%r0 used in an address"); -  if (Regs) -    Reg.Num = Regs[Reg.Num]; -  return false; -} - -// Parse a register and add it to Operands.  The other arguments are as above. +// Parse a register of kind Kind and add it to Operands.  OperandMatchResultTy -SystemZAsmParser::parseRegister(OperandVector &Operands, RegisterGroup Group, -                                const unsigned *Regs, RegisterKind Kind) { -  if (Parser.getTok().isNot(AsmToken::Percent)) +SystemZAsmParser::parseRegister(OperandVector &Operands, RegisterKind Kind) { +  Register Reg; +  RegisterGroup Group; +  switch (Kind) { +  case GR32Reg: +  case GRH32Reg: +  case GR64Reg: +  case GR128Reg: +    Group = RegGR; +    break; +  case FP32Reg: +  case FP64Reg: +  case FP128Reg: +    Group = RegFP; +    break; +  case VR32Reg: +  case VR64Reg: +  case VR128Reg: +    Group = RegV; +    break; +  case AR32Reg: +    Group = RegAR; +    break; +  case CR64Reg: +    Group = RegCR; +    break; +  } + +  // Handle register names of the form %<prefix><number> +  if (Parser.getTok().is(AsmToken::Percent)) { +    if (parseRegister(Reg)) +      return MatchOperand_ParseFail; + +    // Check the parsed register group "Reg.Group" with the expected "Group" +    // Have to error out if user specified wrong prefix. +    switch (Group) { +    case RegGR: +    case RegFP: +    case RegAR: +    case RegCR: +      if (Group != Reg.Group) { +        Error(Reg.StartLoc, "invalid operand for instruction"); +        return MatchOperand_ParseFail; +      } +      break; +    case RegV: +      if (Reg.Group != RegV && Reg.Group != RegFP) { +        Error(Reg.StartLoc, "invalid operand for instruction"); +        return MatchOperand_ParseFail; +      } +      break; +    } +  } else if (Parser.getTok().is(AsmToken::Integer)) { +    if (parseIntegerRegister(Reg, Group)) +      return MatchOperand_ParseFail; +  } +  // Otherwise we didn't match a register operand. +  else      return MatchOperand_NoMatch; -  Register Reg; -  bool IsAddress = (Kind == ADDR32Reg || Kind == ADDR64Reg); -  if (parseRegister(Reg, Group, Regs, IsAddress)) +  // Determine the LLVM register number according to Kind. +  const unsigned *Regs; +  switch (Kind) { +  case GR32Reg:  Regs = SystemZMC::GR32Regs;  break; +  case GRH32Reg: Regs = SystemZMC::GRH32Regs; break; +  case GR64Reg:  Regs = SystemZMC::GR64Regs;  break; +  case GR128Reg: Regs = SystemZMC::GR128Regs; break; +  case FP32Reg:  Regs = SystemZMC::FP32Regs;  break; +  case FP64Reg:  Regs = SystemZMC::FP64Regs;  break; +  case FP128Reg: Regs = SystemZMC::FP128Regs; break; +  case VR32Reg:  Regs = SystemZMC::VR32Regs;  break; +  case VR64Reg:  Regs = SystemZMC::VR64Regs;  break; +  case VR128Reg: Regs = SystemZMC::VR128Regs; break; +  case AR32Reg:  Regs = SystemZMC::AR32Regs;  break; +  case CR64Reg:  Regs = SystemZMC::CR64Regs;  break; +  } +  if (Regs[Reg.Num] == 0) { +    Error(Reg.StartLoc, "invalid register pair");      return MatchOperand_ParseFail; +  } -  Operands.push_back(SystemZOperand::createReg(Kind, Reg.Num, -                                               Reg.StartLoc, Reg.EndLoc)); +  Operands.push_back( +      SystemZOperand::createReg(Kind, Regs[Reg.Num], Reg.StartLoc, Reg.EndLoc));    return MatchOperand_Success;  } @@ -831,11 +896,39 @@ SystemZAsmParser::parseAnyRegister(OperandVector &Operands) {    return MatchOperand_Success;  } +bool SystemZAsmParser::parseIntegerRegister(Register &Reg, +                                            RegisterGroup Group) { +  Reg.StartLoc = Parser.getTok().getLoc(); +  // We have an integer token +  const MCExpr *Register; +  if (Parser.parseExpression(Register)) +    return true; + +  const auto *CE = dyn_cast<MCConstantExpr>(Register); +  if (!CE) +    return true; + +  int64_t MaxRegNum = (Group == RegV) ? 31 : 15; +  int64_t Value = CE->getValue(); +  if (Value < 0 || Value > MaxRegNum) { +    Error(Parser.getTok().getLoc(), "invalid register"); +    return true; +  } + +  // Assign the Register Number +  Reg.Num = (unsigned)Value; +  Reg.Group = Group; +  Reg.EndLoc = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); + +  // At this point, successfully parsed an integer register. +  return false; +} +  // Parse a memory operand into Reg1, Reg2, Disp, and Length.  bool SystemZAsmParser::parseAddress(bool &HaveReg1, Register &Reg1,                                      bool &HaveReg2, Register &Reg2, -                                    const MCExpr *&Disp, -                                    const MCExpr *&Length) { +                                    const MCExpr *&Disp, const MCExpr *&Length, +                                    bool HasLength, bool HasVectorIndex) {    // Parse the displacement, which must always be present.    if (getParser().parseExpression(Disp))      return true; @@ -844,6 +937,27 @@ bool SystemZAsmParser::parseAddress(bool &HaveReg1, Register &Reg1,    HaveReg1 = false;    HaveReg2 = false;    Length = nullptr; + +  // If we have a scenario as below: +  //   vgef %v0, 0(0), 0 +  // This is an example of a "BDVMem" instruction type. +  // +  // So when we parse this as an integer register, the register group +  // needs to be tied to "RegV". Usually when the prefix is passed in +  // as %<prefix><reg-number> its easy to check which group it should belong to +  // However, if we're passing in just the integer there's no real way to +  // "check" what register group it should belong to. +  // +  // When the user passes in the register as an integer, the user assumes that +  // the compiler is responsible for substituting it as the right kind of +  // register. Whereas, when the user specifies a "prefix", the onus is on +  // the user to make sure they pass in the right kind of register. +  // +  // The restriction only applies to the first Register (i.e. Reg1). Reg2 is +  // always a general register. Reg1 should be of group RegV if "HasVectorIndex" +  // (i.e. insn is of type BDVMem) is true. +  RegisterGroup RegGroup = HasVectorIndex ? RegV : RegGR; +    if (getLexer().is(AsmToken::LParen)) {      Parser.Lex(); @@ -852,18 +966,47 @@ bool SystemZAsmParser::parseAddress(bool &HaveReg1, Register &Reg1,        HaveReg1 = true;        if (parseRegister(Reg1))          return true; +    } +    // So if we have an integer as the first token in ([tok1], ..), it could: +    // 1. Refer to a "Register" (i.e X,R,V fields in BD[X|R|V]Mem type of +    // instructions) +    // 2. Refer to a "Length" field (i.e L field in BDLMem type of instructions) +    else if (getLexer().is(AsmToken::Integer)) { +      if (HasLength) { +        // Instruction has a "Length" field, safe to parse the first token as +        // the "Length" field +        if (getParser().parseExpression(Length)) +          return true; +      } else { +        // Otherwise, if the instruction has no "Length" field, parse the +        // token as a "Register". We don't have to worry about whether the +        // instruction is invalid here, because the caller will take care of +        // error reporting. +        HaveReg1 = true; +        if (parseIntegerRegister(Reg1, RegGroup)) +          return true; +      }      } else { -      // Parse the length. -      if (getParser().parseExpression(Length)) -        return true; +      // If its not an integer or a percent token, then if the instruction +      // is reported to have a "Length" then, parse it as "Length". +      if (HasLength) { +        if (getParser().parseExpression(Length)) +          return true; +      }      }      // Check whether there's a second register.      if (getLexer().is(AsmToken::Comma)) {        Parser.Lex();        HaveReg2 = true; -      if (parseRegister(Reg2)) -        return true; + +      if (getLexer().is(AsmToken::Integer)) { +        if (parseIntegerRegister(Reg2, RegGR)) +          return true; +      } else { +        if (parseRegister(Reg2)) +          return true; +      }      }      // Consume the closing bracket. @@ -883,9 +1026,6 @@ SystemZAsmParser::parseAddressRegister(Register &Reg) {    } else if (Reg.Group != RegGR) {      Error(Reg.StartLoc, "invalid address register");      return true; -  } else if (Reg.Num == 0) { -    Error(Reg.StartLoc, "%r0 used in an address"); -    return true;    }    return false;  } @@ -894,16 +1034,27 @@ SystemZAsmParser::parseAddressRegister(Register &Reg) {  // are as above.  OperandMatchResultTy  SystemZAsmParser::parseAddress(OperandVector &Operands, MemoryKind MemKind, -                               const unsigned *Regs, RegisterKind RegKind) { +                               RegisterKind RegKind) {    SMLoc StartLoc = Parser.getTok().getLoc();    unsigned Base = 0, Index = 0, LengthReg = 0;    Register Reg1, Reg2;    bool HaveReg1, HaveReg2;    const MCExpr *Disp;    const MCExpr *Length; -  if (parseAddress(HaveReg1, Reg1, HaveReg2, Reg2, Disp, Length)) + +  bool HasLength = (MemKind == BDLMem) ? true : false; +  bool HasVectorIndex = (MemKind == BDVMem) ? true : false; +  if (parseAddress(HaveReg1, Reg1, HaveReg2, Reg2, Disp, Length, HasLength, +                   HasVectorIndex))      return MatchOperand_ParseFail; +  const unsigned *Regs; +  switch (RegKind) { +  case GR32Reg: Regs = SystemZMC::GR32Regs; break; +  case GR64Reg: Regs = SystemZMC::GR64Regs; break; +  default: llvm_unreachable("invalid RegKind"); +  } +    switch (MemKind) {    case BDMem:      // If we have Reg1, it must be an address register. @@ -912,11 +1063,7 @@ SystemZAsmParser::parseAddress(OperandVector &Operands, MemoryKind MemKind,          return MatchOperand_ParseFail;        Base = Regs[Reg1.Num];      } -    // There must be no Reg2 or length. -    if (Length) { -      Error(StartLoc, "invalid use of length addressing"); -      return MatchOperand_ParseFail; -    } +    // There must be no Reg2.      if (HaveReg2) {        Error(StartLoc, "invalid use of indexed addressing");        return MatchOperand_ParseFail; @@ -940,11 +1087,6 @@ SystemZAsmParser::parseAddress(OperandVector &Operands, MemoryKind MemKind,          return MatchOperand_ParseFail;        Base = Regs[Reg2.Num];      } -    // There must be no length. -    if (Length) { -      Error(StartLoc, "invalid use of length addressing"); -      return MatchOperand_ParseFail; -    }      break;    case BDLMem:      // If we have Reg2, it must be an address register. @@ -977,11 +1119,6 @@ SystemZAsmParser::parseAddress(OperandVector &Operands, MemoryKind MemKind,          return MatchOperand_ParseFail;        Base = Regs[Reg2.Num];      } -    // There must be no length. -    if (Length) { -      Error(StartLoc, "invalid use of length addressing"); -      return MatchOperand_ParseFail; -    }      break;    case BDVMem:      // We must have Reg1, and it must be a vector register. @@ -996,16 +1133,11 @@ SystemZAsmParser::parseAddress(OperandVector &Operands, MemoryKind MemKind,          return MatchOperand_ParseFail;        Base = Regs[Reg2.Num];      } -    // There must be no length. -    if (Length) { -      Error(StartLoc, "invalid use of length addressing"); -      return MatchOperand_ParseFail; -    }      break;    }    SMLoc EndLoc = -    SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); +      SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);    Operands.push_back(SystemZOperand::createMem(MemKind, RegKind, Base, Disp,                                                 Index, Length, LengthReg,                                                 StartLoc, EndLoc)); @@ -1118,15 +1250,15 @@ bool SystemZAsmParser::ParseDirectiveInsn(SMLoc L) {    }    // Emit as a regular instruction. -  Parser.getStreamer().EmitInstruction(Inst, getSTI()); +  Parser.getStreamer().emitInstruction(Inst, getSTI());    return false;  }  bool SystemZAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, -                                     SMLoc &EndLoc) { +                                     SMLoc &EndLoc, bool RestoreOnFailure) {    Register Reg; -  if (parseRegister(Reg)) +  if (parseRegister(Reg, RestoreOnFailure))      return true;    if (Reg.Group == RegGR)      RegNo = SystemZMC::GR64Regs[Reg.Num]; @@ -1143,6 +1275,25 @@ bool SystemZAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,    return false;  } +bool SystemZAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, +                                     SMLoc &EndLoc) { +  return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false); +} + +OperandMatchResultTy SystemZAsmParser::tryParseRegister(unsigned &RegNo, +                                                        SMLoc &StartLoc, +                                                        SMLoc &EndLoc) { +  bool Result = +      ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true); +  bool PendingErrors = getParser().hasPendingError(); +  getParser().clearPendingErrors(); +  if (PendingErrors) +    return MatchOperand_ParseFail; +  if (Result) +    return MatchOperand_NoMatch; +  return MatchOperand_Success; +} +  bool SystemZAsmParser::ParseInstruction(ParseInstructionInfo &Info,                                          StringRef Name, SMLoc NameLoc,                                          OperandVector &Operands) { @@ -1215,7 +1366,8 @@ bool SystemZAsmParser::parseOperand(OperandVector &Operands,    bool HaveReg1, HaveReg2;    const MCExpr *Expr;    const MCExpr *Length; -  if (parseAddress(HaveReg1, Reg1, HaveReg2, Reg2, Expr, Length)) +  if (parseAddress(HaveReg1, Reg1, HaveReg2, Reg2, Expr, Length, +                   /*HasLength*/ true, /*HasVectorIndex*/ true))      return true;    // If the register combination is not valid for any instruction, reject it.    // Otherwise, fall back to reporting an unrecognized instruction. @@ -1252,7 +1404,7 @@ bool SystemZAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,    switch (MatchResult) {    case Match_Success:      Inst.setLoc(IDLoc); -    Out.EmitInstruction(Inst, getSTI()); +    Out.emitInstruction(Inst, getSTI());      return false;    case Match_MissingFeature: { @@ -1322,7 +1474,7 @@ SystemZAsmParser::parsePCRel(OperandVector &Operands, int64_t MinVal,      }      int64_t Value = CE->getValue();      MCSymbol *Sym = Ctx.createTempSymbol(); -    Out.EmitLabel(Sym); +    Out.emitLabel(Sym);      const MCExpr *Base = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None,                                                   Ctx);      Expr = Value == 0 ? Base : MCBinaryExpr::createAdd(Base, Expr, Ctx); diff --git a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.cpp b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.cpp index 5893b227c08c..fac363cae713 100644 --- a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.cpp +++ b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.cpp @@ -155,7 +155,8 @@ void SystemZInstPrinter::printPCRelOperand(const MCInst *MI, int OpNum,      MO.getExpr()->print(O, &MAI);  } -void SystemZInstPrinter::printPCRelTLSOperand(const MCInst *MI, int OpNum, +void SystemZInstPrinter::printPCRelTLSOperand(const MCInst *MI, +                                              uint64_t Address, int OpNum,                                                raw_ostream &O) {    // Output the PC-relative operand.    printPCRelOperand(MI, OpNum, O); diff --git a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.h b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.h index 5628e9252f03..cfe1bd89c3eb 100644 --- a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.h +++ b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.h @@ -46,6 +46,10 @@ public:  private:    // Print various types of operand.    void printOperand(const MCInst *MI, int OpNum, raw_ostream &O); +  void printOperand(const MCInst *MI, uint64_t /*Address*/, unsigned OpNum, +                    raw_ostream &O) { +    printOperand(MI, OpNum, O); +  }    void printBDAddrOperand(const MCInst *MI, int OpNum, raw_ostream &O);    void printBDXAddrOperand(const MCInst *MI, int OpNum, raw_ostream &O);    void printBDLAddrOperand(const MCInst *MI, int OpNum, raw_ostream &O); @@ -65,7 +69,12 @@ private:    void printU32ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);    void printU48ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);    void printPCRelOperand(const MCInst *MI, int OpNum, raw_ostream &O); -  void printPCRelTLSOperand(const MCInst *MI, int OpNum, raw_ostream &O); +  void printPCRelOperand(const MCInst *MI, uint64_t /*Address*/, int OpNum, +                         raw_ostream &O) { +    printPCRelOperand(MI, OpNum, O); +  } +  void printPCRelTLSOperand(const MCInst *MI, uint64_t Address, int OpNum, +                            raw_ostream &O);    // Print the mnemonic for a condition-code mask ("ne", "lh", etc.)    // This forms part of the instruction name rather than the operand list. diff --git a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp index 23d8585095cc..e62f5040898f 100644 --- a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp +++ b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp @@ -63,10 +63,6 @@ public:                              const MCAsmLayout &Layout) const override {      return false;    } -  void relaxInstruction(const MCInst &Inst, const MCSubtargetInfo &STI, -                        MCInst &Res) const override { -    llvm_unreachable("SystemZ does do not have assembler relaxation"); -  }    bool writeNopData(raw_ostream &OS, uint64_t Count) const override;    std::unique_ptr<MCObjectTargetWriter>    createObjectTargetWriter() const override { diff --git a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp index d6cdacfcab92..e540ff4e4811 100644 --- a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp +++ b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp @@ -23,6 +23,4 @@ SystemZMCAsmInfo::SystemZMCAsmInfo(const Triple &TT) {    UsesELFSectionDirectiveForBSS = true;    SupportsDebugInformation = true;    ExceptionsType = ExceptionHandling::DwarfCFI; - -  UseIntegratedAssembler = true;  } diff --git a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp index eb2112674a12..f2ef1ad6c698 100644 --- a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp +++ b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp @@ -150,10 +150,9 @@ static MCAsmInfo *createSystemZMCAsmInfo(const MCRegisterInfo &MRI,                                           const Triple &TT,                                           const MCTargetOptions &Options) {    MCAsmInfo *MAI = new SystemZMCAsmInfo(TT); -  MCCFIInstruction Inst = -      MCCFIInstruction::createDefCfa(nullptr, -                                     MRI.getDwarfRegNum(SystemZ::R15D, true), -                                     SystemZMC::CFAOffsetFromInitialSP); +  MCCFIInstruction Inst = MCCFIInstruction::cfiDefCfa( +      nullptr, MRI.getDwarfRegNum(SystemZ::R15D, true), +      SystemZMC::CFAOffsetFromInitialSP);    MAI->addInitialFrameState(Inst);    return MAI;  } diff --git a/llvm/lib/Target/SystemZ/SystemZ.h b/llvm/lib/Target/SystemZ/SystemZ.h index 0808160f627c..bedbd061ea5c 100644 --- a/llvm/lib/Target/SystemZ/SystemZ.h +++ b/llvm/lib/Target/SystemZ/SystemZ.h @@ -193,6 +193,7 @@ FunctionPass *createSystemZElimComparePass(SystemZTargetMachine &TM);  FunctionPass *createSystemZShortenInstPass(SystemZTargetMachine &TM);  FunctionPass *createSystemZLongBranchPass(SystemZTargetMachine &TM);  FunctionPass *createSystemZLDCleanupPass(SystemZTargetMachine &TM); +FunctionPass *createSystemZCopyPhysRegsPass(SystemZTargetMachine &TM);  FunctionPass *createSystemZPostRewritePass(SystemZTargetMachine &TM);  FunctionPass *createSystemZTDCPass();  } // end namespace llvm diff --git a/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp b/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp index 67c4aa08f90d..4109bfc11337 100644 --- a/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp +++ b/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp @@ -92,9 +92,9 @@ static void lowerAlignmentHint(const MachineInstr *MI, MCInst &LoweredMI,      return;    const MachineMemOperand *MMO = *MI->memoperands_begin();    unsigned AlignmentHint = 0; -  if (MMO->getAlignment() >= 16) +  if (MMO->getAlign() >= Align(16))      AlignmentHint = 4; -  else if (MMO->getAlignment() >= 8) +  else if (MMO->getAlign() >= Align(8))      AlignmentHint = 3;    if (AlignmentHint == 0)      return; @@ -124,7 +124,7 @@ static MCInst lowerSubvectorStore(const MachineInstr *MI, unsigned Opcode) {      .addImm(0);  } -void SystemZAsmPrinter::EmitInstruction(const MachineInstr *MI) { +void SystemZAsmPrinter::emitInstruction(const MachineInstr *MI) {    SystemZMCInstLower Lower(MF->getContext(), *this);    MCInst LoweredMI;    switch (MI->getOpcode()) { @@ -479,7 +479,7 @@ void SystemZAsmPrinter::EmitInstruction(const MachineInstr *MI) {    // that instead.    case SystemZ::Trap: {      MCSymbol *DotSym = OutContext.createTempSymbol(); -    OutStreamer->EmitLabel(DotSym); +    OutStreamer->emitLabel(DotSym);      const MCSymbolRefExpr *Expr = MCSymbolRefExpr::create(DotSym, OutContext);      const MCConstantExpr *ConstExpr = MCConstantExpr::create(2, OutContext); @@ -492,7 +492,7 @@ void SystemZAsmPrinter::EmitInstruction(const MachineInstr *MI) {    // to the relative immediate field of the jump instruction. (eg. "jo .+2")    case SystemZ::CondTrap: {      MCSymbol *DotSym = OutContext.createTempSymbol(); -    OutStreamer->EmitLabel(DotSym); +    OutStreamer->emitLabel(DotSym);      const MCSymbolRefExpr *Expr = MCSymbolRefExpr::create(DotSym, OutContext);      const MCConstantExpr *ConstExpr = MCConstantExpr::create(2, OutContext); @@ -522,7 +522,6 @@ void SystemZAsmPrinter::EmitInstruction(const MachineInstr *MI) {    EmitToStreamer(*OutStreamer, LoweredMI);  } -  // Emit the largest nop instruction smaller than or equal to NumBytes  // bytes.  Return the size of nop emitted.  static unsigned EmitNop(MCContext &OutContext, MCStreamer &OutStreamer, @@ -532,22 +531,22 @@ static unsigned EmitNop(MCContext &OutContext, MCStreamer &OutStreamer,      return 0;    }    else if (NumBytes < 4) { -    OutStreamer.EmitInstruction(MCInstBuilder(SystemZ::BCRAsm) -                                  .addImm(0).addReg(SystemZ::R0D), STI); +    OutStreamer.emitInstruction( +        MCInstBuilder(SystemZ::BCRAsm).addImm(0).addReg(SystemZ::R0D), STI);      return 2;    }    else if (NumBytes < 6) { -    OutStreamer.EmitInstruction(MCInstBuilder(SystemZ::BCAsm) -                                  .addImm(0).addReg(0).addImm(0).addReg(0), -                                STI); +    OutStreamer.emitInstruction( +        MCInstBuilder(SystemZ::BCAsm).addImm(0).addReg(0).addImm(0).addReg(0), +        STI);      return 4;    }    else {      MCSymbol *DotSym = OutContext.createTempSymbol();      const MCSymbolRefExpr *Dot = MCSymbolRefExpr::create(DotSym, OutContext); -    OutStreamer.EmitLabel(DotSym); -    OutStreamer.EmitInstruction(MCInstBuilder(SystemZ::BRCLAsm) -                                  .addImm(0).addExpr(Dot), STI); +    OutStreamer.emitLabel(DotSym); +    OutStreamer.emitInstruction( +        MCInstBuilder(SystemZ::BRCLAsm).addImm(0).addExpr(Dot), STI);      return 6;    }  } @@ -560,9 +559,9 @@ void SystemZAsmPrinter::LowerFENTRY_CALL(const MachineInstr &MI,      OutStreamer->PushSection();      OutStreamer->SwitchSection(          Ctx.getELFSection("__mcount_loc", ELF::SHT_PROGBITS, ELF::SHF_ALLOC)); -    OutStreamer->EmitSymbolValue(DotSym, 8); +    OutStreamer->emitSymbolValue(DotSym, 8);      OutStreamer->PopSection(); -    OutStreamer->EmitLabel(DotSym); +    OutStreamer->emitLabel(DotSym);    }    if (MF->getFunction().hasFnAttribute("mnop-mcount")) { @@ -573,8 +572,9 @@ void SystemZAsmPrinter::LowerFENTRY_CALL(const MachineInstr &MI,    MCSymbol *fentry = Ctx.getOrCreateSymbol("__fentry__");    const MCSymbolRefExpr *Op =        MCSymbolRefExpr::create(fentry, MCSymbolRefExpr::VK_PLT, Ctx); -  OutStreamer->EmitInstruction(MCInstBuilder(SystemZ::BRASL) -                       .addReg(SystemZ::R0D).addExpr(Op), getSubtargetInfo()); +  OutStreamer->emitInstruction( +      MCInstBuilder(SystemZ::BRASL).addReg(SystemZ::R0D).addExpr(Op), +      getSubtargetInfo());  }  void SystemZAsmPrinter::LowerSTACKMAP(const MachineInstr &MI) { @@ -585,7 +585,7 @@ void SystemZAsmPrinter::LowerSTACKMAP(const MachineInstr &MI) {    auto &Ctx = OutStreamer->getContext();    MCSymbol *MILabel = Ctx.createTempSymbol(); -  OutStreamer->EmitLabel(MILabel); +  OutStreamer->emitLabel(MILabel);    SM.recordStackMap(*MILabel, MI);    assert(NumNOPBytes % 2 == 0 && "Invalid number of NOP bytes requested!"); @@ -618,7 +618,7 @@ void SystemZAsmPrinter::LowerPATCHPOINT(const MachineInstr &MI,                                          SystemZMCInstLower &Lower) {    auto &Ctx = OutStreamer->getContext();    MCSymbol *MILabel = Ctx.createTempSymbol(); -  OutStreamer->EmitLabel(MILabel); +  OutStreamer->emitLabel(MILabel);    SM.recordPatchPoint(*MILabel, MI);    PatchPointOpers Opers(&MI); @@ -685,8 +685,8 @@ getModifierVariantKind(SystemZCP::SystemZCPModifier Modifier) {    llvm_unreachable("Invalid SystemCPModifier!");  } -void SystemZAsmPrinter:: -EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) { +void SystemZAsmPrinter::emitMachineConstantPoolValue( +    MachineConstantPoolValue *MCPV) {    auto *ZCPV = static_cast<SystemZConstantPoolValue*>(MCPV);    const MCExpr *Expr = @@ -695,7 +695,7 @@ EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) {                              OutContext);    uint64_t Size = getDataLayout().getTypeAllocSize(ZCPV->getType()); -  OutStreamer->EmitValue(Expr, Size); +  OutStreamer->emitValue(Expr, Size);  }  bool SystemZAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, @@ -719,7 +719,7 @@ bool SystemZAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,    return false;  } -void SystemZAsmPrinter::EmitEndOfAsmFile(Module &M) { +void SystemZAsmPrinter::emitEndOfAsmFile(Module &M) {    emitStackMaps(SM);  } diff --git a/llvm/lib/Target/SystemZ/SystemZAsmPrinter.h b/llvm/lib/Target/SystemZ/SystemZAsmPrinter.h index d01a17c2ebe2..2d7562c7238d 100644 --- a/llvm/lib/Target/SystemZ/SystemZAsmPrinter.h +++ b/llvm/lib/Target/SystemZ/SystemZAsmPrinter.h @@ -32,9 +32,9 @@ public:    // Override AsmPrinter.    StringRef getPassName() const override { return "SystemZ Assembly Printer"; } -  void EmitInstruction(const MachineInstr *MI) override; -  void EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) override; -  void EmitEndOfAsmFile(Module &M) override; +  void emitInstruction(const MachineInstr *MI) override; +  void emitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) override; +  void emitEndOfAsmFile(Module &M) override;    bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,                         const char *ExtraCode, raw_ostream &OS) override;    bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, diff --git a/llvm/lib/Target/SystemZ/SystemZCallingConv.h b/llvm/lib/Target/SystemZ/SystemZCallingConv.h index 4432adc6a269..d4c7ce07420b 100644 --- a/llvm/lib/Target/SystemZ/SystemZCallingConv.h +++ b/llvm/lib/Target/SystemZ/SystemZCallingConv.h @@ -108,7 +108,7 @@ inline bool CC_SystemZ_I128Indirect(unsigned &ValNo, MVT &ValVT,    // the location (register or stack slot) for the indirect pointer.    // (This duplicates the usual i64 calling convention rules.)    unsigned Reg = State.AllocateReg(SystemZ::ArgGPRs); -  unsigned Offset = Reg ? 0 : State.AllocateStack(8, 8); +  unsigned Offset = Reg ? 0 : State.AllocateStack(8, Align(8));    // Use that same location for all the pending parts.    for (auto &It : PendingMembers) { diff --git a/llvm/lib/Target/SystemZ/SystemZConstantPoolValue.cpp b/llvm/lib/Target/SystemZ/SystemZConstantPoolValue.cpp index ffeee4da95cc..86c6b2985385 100644 --- a/llvm/lib/Target/SystemZ/SystemZConstantPoolValue.cpp +++ b/llvm/lib/Target/SystemZ/SystemZConstantPoolValue.cpp @@ -25,13 +25,12 @@ SystemZConstantPoolValue::Create(const GlobalValue *GV,    return new SystemZConstantPoolValue(GV, Modifier);  } -int SystemZConstantPoolValue:: -getExistingMachineCPValue(MachineConstantPool *CP, unsigned Alignment) { -  unsigned AlignMask = Alignment - 1; +int SystemZConstantPoolValue::getExistingMachineCPValue(MachineConstantPool *CP, +                                                        Align Alignment) {    const std::vector<MachineConstantPoolEntry> &Constants = CP->getConstants();    for (unsigned I = 0, E = Constants.size(); I != E; ++I) {      if (Constants[I].isMachineConstantPoolEntry() && -        (Constants[I].getAlignment() & AlignMask) == 0) { +        Constants[I].getAlign() >= Alignment) {        auto *ZCPV =          static_cast<SystemZConstantPoolValue *>(Constants[I].Val.MachineCPVal);        if (ZCPV->GV == GV && ZCPV->Modifier == Modifier) diff --git a/llvm/lib/Target/SystemZ/SystemZConstantPoolValue.h b/llvm/lib/Target/SystemZ/SystemZConstantPoolValue.h index 6cb7710abdfe..da610ab45070 100644 --- a/llvm/lib/Target/SystemZ/SystemZConstantPoolValue.h +++ b/llvm/lib/Target/SystemZ/SystemZConstantPoolValue.h @@ -43,7 +43,7 @@ public:    // Override MachineConstantPoolValue.    int getExistingMachineCPValue(MachineConstantPool *CP, -                                unsigned Alignment) override; +                                Align Alignment) override;    void addSelectionDAGCSEId(FoldingSetNodeID &ID) override;    void print(raw_ostream &O) const override; diff --git a/llvm/lib/Target/SystemZ/SystemZCopyPhysRegs.cpp b/llvm/lib/Target/SystemZ/SystemZCopyPhysRegs.cpp new file mode 100644 index 000000000000..7d21d29d270e --- /dev/null +++ b/llvm/lib/Target/SystemZ/SystemZCopyPhysRegs.cpp @@ -0,0 +1,120 @@ +//===---------- SystemZPhysRegCopy.cpp - Handle phys reg copies -----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass makes sure that a COPY of a physical register will be +// implementable after register allocation in copyPhysReg() (this could be +// done in EmitInstrWithCustomInserter() instead if COPY instructions would +// be passed to it). +// +//===----------------------------------------------------------------------===// + +#include "SystemZMachineFunctionInfo.h" +#include "SystemZTargetMachine.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/Target/TargetMachine.h" + +using namespace llvm; + +#define SYSTEMZ_COPYPHYSREGS_NAME "SystemZ Copy Physregs" + +namespace llvm { +  void initializeSystemZCopyPhysRegsPass(PassRegistry&); +} + +namespace { + +class SystemZCopyPhysRegs : public MachineFunctionPass { +public: +  static char ID; +  SystemZCopyPhysRegs() +    : MachineFunctionPass(ID), TII(nullptr), MRI(nullptr) { +    initializeSystemZCopyPhysRegsPass(*PassRegistry::getPassRegistry()); +  } + +  StringRef getPassName() const override { return SYSTEMZ_COPYPHYSREGS_NAME; } + +  bool runOnMachineFunction(MachineFunction &MF) override; +  void getAnalysisUsage(AnalysisUsage &AU) const override; + +private: + +  bool visitMBB(MachineBasicBlock &MBB); + +  const SystemZInstrInfo *TII; +  MachineRegisterInfo *MRI; +}; + +char SystemZCopyPhysRegs::ID = 0; + +} // end anonymous namespace + +INITIALIZE_PASS(SystemZCopyPhysRegs, "systemz-copy-physregs", +                SYSTEMZ_COPYPHYSREGS_NAME, false, false) + +FunctionPass *llvm::createSystemZCopyPhysRegsPass(SystemZTargetMachine &TM) { +  return new SystemZCopyPhysRegs(); +} + +void SystemZCopyPhysRegs::getAnalysisUsage(AnalysisUsage &AU) const { +  AU.setPreservesCFG(); +  MachineFunctionPass::getAnalysisUsage(AU); +} + +bool SystemZCopyPhysRegs::visitMBB(MachineBasicBlock &MBB) { +  bool Modified = false; + +  // Certain special registers can only be copied from a subset of the +  // default register class of the type. It is therefore necessary to create +  // the target copy instructions before regalloc instead of in copyPhysReg(). +  for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); +       MBBI != E; ) { +    MachineInstr *MI = &*MBBI++; +    if (!MI->isCopy()) +      continue; + +    DebugLoc DL = MI->getDebugLoc(); +    Register SrcReg = MI->getOperand(1).getReg(); +    Register DstReg = MI->getOperand(0).getReg(); +    if (DstReg.isVirtual() && +        (SrcReg == SystemZ::CC || SystemZ::AR32BitRegClass.contains(SrcReg))) { +      Register Tmp = MRI->createVirtualRegister(&SystemZ::GR32BitRegClass); +      if (SrcReg == SystemZ::CC) +        BuildMI(MBB, MI, DL, TII->get(SystemZ::IPM), Tmp); +      else +        BuildMI(MBB, MI, DL, TII->get(SystemZ::EAR), Tmp).addReg(SrcReg); +      MI->getOperand(1).setReg(Tmp); +      Modified = true; +    } +    else if (SrcReg.isVirtual() && +             SystemZ::AR32BitRegClass.contains(DstReg)) { +      Register Tmp = MRI->createVirtualRegister(&SystemZ::GR32BitRegClass); +      MI->getOperand(0).setReg(Tmp); +      BuildMI(MBB, MBBI, DL, TII->get(SystemZ::SAR), DstReg).addReg(Tmp); +      Modified = true; +    } +  } + +  return Modified; +} + +bool SystemZCopyPhysRegs::runOnMachineFunction(MachineFunction &F) { +  TII = static_cast<const SystemZInstrInfo *>(F.getSubtarget().getInstrInfo()); +  MRI = &F.getRegInfo(); + +  bool Modified = false; +  for (auto &MBB : F) +    Modified |= visitMBB(MBB); + +  return Modified; +} + diff --git a/llvm/lib/Target/SystemZ/SystemZFeatures.td b/llvm/lib/Target/SystemZ/SystemZFeatures.td index dae795e845b0..28f58cb310af 100644 --- a/llvm/lib/Target/SystemZ/SystemZFeatures.td +++ b/llvm/lib/Target/SystemZ/SystemZFeatures.td @@ -10,13 +10,13 @@  //  //===----------------------------------------------------------------------===// -class SystemZFeature<string extname, string intname, string desc> -  : Predicate<"Subtarget->has"##intname##"()">, -    AssemblerPredicate<"Feature"##intname, extname>, -    SubtargetFeature<extname, "Has"##intname, "true", desc>; +class SystemZFeature<string extname, string intname, dag featdag, string desc> +  : Predicate<"Subtarget->has"#intname#"()">, +    AssemblerPredicate<featdag, extname>, +    SubtargetFeature<extname, "Has"#intname, "true", desc>;  class SystemZMissingFeature<string intname> -  : Predicate<"!Subtarget->has"##intname##"()">; +  : Predicate<"!Subtarget->has"#intname#"()">;  class SystemZFeatureList<list<SystemZFeature> x> {    list<SystemZFeature> List = x; @@ -25,6 +25,13 @@ class SystemZFeatureList<list<SystemZFeature> x> {  class SystemZFeatureAdd<list<SystemZFeature> x, list<SystemZFeature> y>    : SystemZFeatureList<!listconcat(x, y)>; +// This feature is added as a subtarget feature whenever the function is +// compiled to use soft-float. +def FeatureSoftFloat : SystemZFeature< +  "soft-float", "SoftFloat", (all_of FeatureSoftFloat), +  "Use software emulation for floating point" +>; +  //===----------------------------------------------------------------------===//  //  // New features added in the Ninth Edition of the z/Architecture @@ -32,54 +39,54 @@ class SystemZFeatureAdd<list<SystemZFeature> x, list<SystemZFeature> y>  //===----------------------------------------------------------------------===//  def FeatureDistinctOps : SystemZFeature< -  "distinct-ops", "DistinctOps", +  "distinct-ops", "DistinctOps", (all_of FeatureDistinctOps),    "Assume that the distinct-operands facility is installed"  >;  def FeatureFastSerialization : SystemZFeature< -  "fast-serialization", "FastSerialization", +  "fast-serialization", "FastSerialization", (all_of FeatureFastSerialization),    "Assume that the fast-serialization facility is installed"  >;  def FeatureFPExtension : SystemZFeature< -  "fp-extension", "FPExtension", +  "fp-extension", "FPExtension", (all_of FeatureFPExtension),    "Assume that the floating-point extension facility is installed"  >;  def FeatureHighWord : SystemZFeature< -  "high-word", "HighWord", +  "high-word", "HighWord", (all_of FeatureHighWord),    "Assume that the high-word facility is installed"  >;  def FeatureInterlockedAccess1 : SystemZFeature< -  "interlocked-access1", "InterlockedAccess1", +  "interlocked-access1", "InterlockedAccess1", (all_of FeatureInterlockedAccess1),    "Assume that interlocked-access facility 1 is installed"  >;  def FeatureNoInterlockedAccess1 : SystemZMissingFeature<"InterlockedAccess1">;  def FeatureLoadStoreOnCond : SystemZFeature< -  "load-store-on-cond", "LoadStoreOnCond", +  "load-store-on-cond", "LoadStoreOnCond", (all_of FeatureLoadStoreOnCond),    "Assume that the load/store-on-condition facility is installed"  >;  def FeatureNoLoadStoreOnCond : SystemZMissingFeature<"LoadStoreOnCond">;  def FeaturePopulationCount : SystemZFeature< -  "population-count", "PopulationCount", +  "population-count", "PopulationCount", (all_of FeaturePopulationCount),    "Assume that the population-count facility is installed"  >;  def FeatureMessageSecurityAssist3 : SystemZFeature< -  "message-security-assist-extension3", "MessageSecurityAssist3", +  "message-security-assist-extension3", "MessageSecurityAssist3", (all_of FeatureMessageSecurityAssist3),    "Assume that the message-security-assist extension facility 3 is installed"  >;  def FeatureMessageSecurityAssist4 : SystemZFeature< -  "message-security-assist-extension4", "MessageSecurityAssist4", +  "message-security-assist-extension4", "MessageSecurityAssist4", (all_of FeatureMessageSecurityAssist4),    "Assume that the message-security-assist extension facility 4 is installed"  >;  def FeatureResetReferenceBitsMultiple : SystemZFeature< -  "reset-reference-bits-multiple", "ResetReferenceBitsMultiple", +  "reset-reference-bits-multiple", "ResetReferenceBitsMultiple", (all_of FeatureResetReferenceBitsMultiple),    "Assume that the reset-reference-bits-multiple facility is installed"  >; @@ -103,37 +110,37 @@ def Arch9NewFeatures : SystemZFeatureList<[  //===----------------------------------------------------------------------===//  def FeatureExecutionHint : SystemZFeature< -  "execution-hint", "ExecutionHint", +  "execution-hint", "ExecutionHint", (all_of FeatureExecutionHint),    "Assume that the execution-hint facility is installed"  >;  def FeatureLoadAndTrap : SystemZFeature< -  "load-and-trap", "LoadAndTrap", +  "load-and-trap", "LoadAndTrap", (all_of FeatureLoadAndTrap),    "Assume that the load-and-trap facility is installed"  >;  def FeatureMiscellaneousExtensions : SystemZFeature< -  "miscellaneous-extensions", "MiscellaneousExtensions", +  "miscellaneous-extensions", "MiscellaneousExtensions", (all_of FeatureMiscellaneousExtensions),    "Assume that the miscellaneous-extensions facility is installed"  >;  def FeatureProcessorAssist : SystemZFeature< -  "processor-assist", "ProcessorAssist", +  "processor-assist", "ProcessorAssist", (all_of FeatureProcessorAssist),    "Assume that the processor-assist facility is installed"  >;  def FeatureTransactionalExecution : SystemZFeature< -  "transactional-execution", "TransactionalExecution", +  "transactional-execution", "TransactionalExecution", (all_of FeatureTransactionalExecution),    "Assume that the transactional-execution facility is installed"  >;  def FeatureDFPZonedConversion : SystemZFeature< -  "dfp-zoned-conversion", "DFPZonedConversion", +  "dfp-zoned-conversion", "DFPZonedConversion", (all_of FeatureDFPZonedConversion),    "Assume that the DFP zoned-conversion facility is installed"  >;  def FeatureEnhancedDAT2 : SystemZFeature< -  "enhanced-dat-2", "EnhancedDAT2", +  "enhanced-dat-2", "EnhancedDAT2", (all_of FeatureEnhancedDAT2),    "Assume that the enhanced-DAT facility 2 is installed"  >; @@ -154,27 +161,27 @@ def Arch10NewFeatures : SystemZFeatureList<[  //===----------------------------------------------------------------------===//  def FeatureLoadAndZeroRightmostByte : SystemZFeature< -  "load-and-zero-rightmost-byte", "LoadAndZeroRightmostByte", +  "load-and-zero-rightmost-byte", "LoadAndZeroRightmostByte", (all_of FeatureLoadAndZeroRightmostByte),    "Assume that the load-and-zero-rightmost-byte facility is installed"  >;  def FeatureLoadStoreOnCond2 : SystemZFeature< -  "load-store-on-cond-2", "LoadStoreOnCond2", +  "load-store-on-cond-2", "LoadStoreOnCond2", (all_of FeatureLoadStoreOnCond2),    "Assume that the load/store-on-condition facility 2 is installed"  >;  def FeatureMessageSecurityAssist5 : SystemZFeature< -  "message-security-assist-extension5", "MessageSecurityAssist5", +  "message-security-assist-extension5", "MessageSecurityAssist5", (all_of FeatureMessageSecurityAssist5),    "Assume that the message-security-assist extension facility 5 is installed"  >;  def FeatureDFPPackedConversion : SystemZFeature< -  "dfp-packed-conversion", "DFPPackedConversion", +  "dfp-packed-conversion", "DFPPackedConversion", (all_of FeatureDFPPackedConversion),    "Assume that the DFP packed-conversion facility is installed"  >;  def FeatureVector : SystemZFeature< -  "vector", "Vector", +  "vector", "Vector", (all_of FeatureVector),    "Assume that the vectory facility is installed"  >;  def FeatureNoVector : SystemZMissingFeature<"Vector">; @@ -194,38 +201,38 @@ def Arch11NewFeatures : SystemZFeatureList<[  //===----------------------------------------------------------------------===//  def FeatureMiscellaneousExtensions2 : SystemZFeature< -  "miscellaneous-extensions-2", "MiscellaneousExtensions2", +  "miscellaneous-extensions-2", "MiscellaneousExtensions2", (all_of FeatureMiscellaneousExtensions2),    "Assume that the miscellaneous-extensions facility 2 is installed"  >;  def FeatureGuardedStorage : SystemZFeature< -  "guarded-storage", "GuardedStorage", +  "guarded-storage", "GuardedStorage", (all_of FeatureGuardedStorage),    "Assume that the guarded-storage facility is installed"  >;  def FeatureMessageSecurityAssist7 : SystemZFeature< -  "message-security-assist-extension7", "MessageSecurityAssist7", +  "message-security-assist-extension7", "MessageSecurityAssist7", (all_of FeatureMessageSecurityAssist7),    "Assume that the message-security-assist extension facility 7 is installed"  >;  def FeatureMessageSecurityAssist8 : SystemZFeature< -  "message-security-assist-extension8", "MessageSecurityAssist8", +  "message-security-assist-extension8", "MessageSecurityAssist8", (all_of FeatureMessageSecurityAssist8),    "Assume that the message-security-assist extension facility 8 is installed"  >;  def FeatureVectorEnhancements1 : SystemZFeature< -  "vector-enhancements-1", "VectorEnhancements1", +  "vector-enhancements-1", "VectorEnhancements1", (all_of FeatureVectorEnhancements1),    "Assume that the vector enhancements facility 1 is installed"  >;  def FeatureNoVectorEnhancements1 : SystemZMissingFeature<"VectorEnhancements1">;  def FeatureVectorPackedDecimal : SystemZFeature< -  "vector-packed-decimal", "VectorPackedDecimal", +  "vector-packed-decimal", "VectorPackedDecimal", (all_of FeatureVectorPackedDecimal),    "Assume that the vector packed decimal facility is installed"  >;  def FeatureInsertReferenceBitsMultiple : SystemZFeature< -  "insert-reference-bits-multiple", "InsertReferenceBitsMultiple", +  "insert-reference-bits-multiple", "InsertReferenceBitsMultiple", (all_of FeatureInsertReferenceBitsMultiple),    "Assume that the insert-reference-bits-multiple facility is installed"  >; @@ -246,32 +253,32 @@ def Arch12NewFeatures : SystemZFeatureList<[  //===----------------------------------------------------------------------===//  def FeatureMiscellaneousExtensions3 : SystemZFeature< -  "miscellaneous-extensions-3", "MiscellaneousExtensions3", +  "miscellaneous-extensions-3", "MiscellaneousExtensions3", (all_of FeatureMiscellaneousExtensions3),    "Assume that the miscellaneous-extensions facility 3 is installed"  >;  def FeatureMessageSecurityAssist9 : SystemZFeature< -  "message-security-assist-extension9", "MessageSecurityAssist9", +  "message-security-assist-extension9", "MessageSecurityAssist9", (all_of FeatureMessageSecurityAssist9),    "Assume that the message-security-assist extension facility 9 is installed"  >;  def FeatureVectorEnhancements2 : SystemZFeature< -  "vector-enhancements-2", "VectorEnhancements2", +  "vector-enhancements-2", "VectorEnhancements2", (all_of FeatureVectorEnhancements2),    "Assume that the vector enhancements facility 2 is installed"  >;  def FeatureVectorPackedDecimalEnhancement : SystemZFeature< -  "vector-packed-decimal-enhancement", "VectorPackedDecimalEnhancement", +  "vector-packed-decimal-enhancement", "VectorPackedDecimalEnhancement", (all_of FeatureVectorPackedDecimalEnhancement),    "Assume that the vector packed decimal enhancement facility is installed"  >;  def FeatureEnhancedSort : SystemZFeature< -  "enhanced-sort", "EnhancedSort", +  "enhanced-sort", "EnhancedSort", (all_of FeatureEnhancedSort),    "Assume that the enhanced-sort facility is installed"  >;  def FeatureDeflateConversion : SystemZFeature< -  "deflate-conversion", "DeflateConversion", +  "deflate-conversion", "DeflateConversion", (all_of FeatureDeflateConversion),    "Assume that the deflate-conversion facility is installed"  >; diff --git a/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp b/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp index 3cdf6bf98ee0..985722fdcab4 100644 --- a/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp +++ b/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp @@ -17,6 +17,7 @@  #include "llvm/CodeGen/MachineRegisterInfo.h"  #include "llvm/CodeGen/RegisterScavenging.h"  #include "llvm/IR/Function.h" +#include "llvm/Target/TargetMachine.h"  using namespace llvm; @@ -62,18 +63,6 @@ SystemZFrameLowering::SystemZFrameLowering()      RegSpillOffsets[SpillOffsetTable[I].Reg] = SpillOffsetTable[I].Offset;  } -static bool usePackedStack(MachineFunction &MF) { -  bool HasPackedStackAttr = MF.getFunction().hasFnAttribute("packed-stack"); -  bool IsVarArg = MF.getFunction().isVarArg(); -  bool CallConv = MF.getFunction().getCallingConv() != CallingConv::GHC; -  bool BackChain = MF.getFunction().hasFnAttribute("backchain"); -  bool FramAddressTaken = MF.getFrameInfo().isFrameAddressTaken(); -  if (HasPackedStackAttr && BackChain) -    report_fatal_error("packed-stack with backchain is currently unsupported."); -  return HasPackedStackAttr && !IsVarArg && CallConv && !BackChain && -         !FramAddressTaken; -} -  bool SystemZFrameLowering::  assignCalleeSavedSpillSlots(MachineFunction &MF,                              const TargetRegisterInfo *TRI, @@ -87,71 +76,44 @@ assignCalleeSavedSpillSlots(MachineFunction &MF,    unsigned LowGPR = 0;    unsigned HighGPR = SystemZ::R15D;    int StartSPOffset = SystemZMC::CallFrameSize; -  int CurrOffset; -  if (!usePackedStack(MF)) { -    for (auto &CS : CSI) { -      unsigned Reg = CS.getReg(); -      int Offset = RegSpillOffsets[Reg]; -      if (Offset) { -        if (SystemZ::GR64BitRegClass.contains(Reg) && StartSPOffset > Offset) { -          LowGPR = Reg; -          StartSPOffset = Offset; -        } -        Offset -= SystemZMC::CallFrameSize; -        int FrameIdx = MFFrame.CreateFixedSpillStackObject(8, Offset); -        CS.setFrameIdx(FrameIdx); -      } else -        CS.setFrameIdx(INT32_MAX); -    } - -    // Save the range of call-saved registers, for use by the -    // prologue/epilogue inserters. -    ZFI->setRestoreGPRRegs(LowGPR, HighGPR, StartSPOffset); -    if (IsVarArg) { -      // Also save the GPR varargs, if any.  R6D is call-saved, so would -      // already be included, but we also need to handle the call-clobbered -      // argument registers. -      unsigned FirstGPR = ZFI->getVarArgsFirstGPR(); -      if (FirstGPR < SystemZ::NumArgGPRs) { -        unsigned Reg = SystemZ::ArgGPRs[FirstGPR]; -        int Offset = RegSpillOffsets[Reg]; -        if (StartSPOffset > Offset) { -          LowGPR = Reg; StartSPOffset = Offset; -        } +  for (auto &CS : CSI) { +    unsigned Reg = CS.getReg(); +    int Offset = getRegSpillOffset(MF, Reg); +    if (Offset) { +      if (SystemZ::GR64BitRegClass.contains(Reg) && StartSPOffset > Offset) { +        LowGPR = Reg; +        StartSPOffset = Offset;        } -    } -    ZFI->setSpillGPRRegs(LowGPR, HighGPR, StartSPOffset); +      Offset -= SystemZMC::CallFrameSize; +      int FrameIdx = MFFrame.CreateFixedSpillStackObject(8, Offset); +      CS.setFrameIdx(FrameIdx); +    } else +      CS.setFrameIdx(INT32_MAX); +  } -    CurrOffset = -SystemZMC::CallFrameSize; -  } else { -    // Packed stack: put all the GPRs at the top of the Register save area. -    uint32_t LowGR64Num = UINT32_MAX; -    for (auto &CS : CSI) { -      unsigned Reg = CS.getReg(); -      if (SystemZ::GR64BitRegClass.contains(Reg)) { -        unsigned GR64Num = SystemZMC::getFirstReg(Reg); -        int Offset = -8 * (15 - GR64Num + 1); -        if (LowGR64Num > GR64Num) { -          LowGR64Num = GR64Num; -          StartSPOffset = SystemZMC::CallFrameSize + Offset; -        } -        int FrameIdx = MFFrame.CreateFixedSpillStackObject(8, Offset); -        CS.setFrameIdx(FrameIdx); -      } else -        CS.setFrameIdx(INT32_MAX); +  // Save the range of call-saved registers, for use by the +  // prologue/epilogue inserters. +  ZFI->setRestoreGPRRegs(LowGPR, HighGPR, StartSPOffset); +  if (IsVarArg) { +    // Also save the GPR varargs, if any.  R6D is call-saved, so would +    // already be included, but we also need to handle the call-clobbered +    // argument registers. +    unsigned FirstGPR = ZFI->getVarArgsFirstGPR(); +    if (FirstGPR < SystemZ::NumArgGPRs) { +      unsigned Reg = SystemZ::ArgGPRs[FirstGPR]; +      int Offset = getRegSpillOffset(MF, Reg); +      if (StartSPOffset > Offset) { +        LowGPR = Reg; StartSPOffset = Offset; +      }      } -    if (LowGR64Num < UINT32_MAX) -      LowGPR = SystemZMC::GR64Regs[LowGR64Num]; - -    // Save the range of call-saved registers, for use by the -    // prologue/epilogue inserters. -    ZFI->setRestoreGPRRegs(LowGPR, HighGPR, StartSPOffset); -    ZFI->setSpillGPRRegs(LowGPR, HighGPR, StartSPOffset); - -    CurrOffset = LowGPR ? -(SystemZMC::CallFrameSize - StartSPOffset) : 0;    } +  ZFI->setSpillGPRRegs(LowGPR, HighGPR, StartSPOffset);    // Create fixed stack objects for the remaining registers. +  int CurrOffset = -SystemZMC::CallFrameSize; +  if (usePackedStack(MF)) +    CurrOffset += StartSPOffset; +    for (auto &CS : CSI) {      if (CS.getFrameIdx() != INT32_MAX)        continue; @@ -234,11 +196,9 @@ static void addSavedGPR(MachineBasicBlock &MBB, MachineInstrBuilder &MIB,    }  } -bool SystemZFrameLowering:: -spillCalleeSavedRegisters(MachineBasicBlock &MBB, -                          MachineBasicBlock::iterator MBBI, -                          const std::vector<CalleeSavedInfo> &CSI, -                          const TargetRegisterInfo *TRI) const { +bool SystemZFrameLowering::spillCalleeSavedRegisters( +    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, +    ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {    if (CSI.empty())      return false; @@ -296,11 +256,9 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB,    return true;  } -bool SystemZFrameLowering:: -restoreCalleeSavedRegisters(MachineBasicBlock &MBB, -                            MachineBasicBlock::iterator MBBI, -                            std::vector<CalleeSavedInfo> &CSI, -                            const TargetRegisterInfo *TRI) const { +bool SystemZFrameLowering::restoreCalleeSavedRegisters( +    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, +    MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {    if (CSI.empty())      return false; @@ -358,9 +316,10 @@ void SystemZFrameLowering::  processFunctionBeforeFrameFinalized(MachineFunction &MF,                                      RegScavenger *RS) const {    MachineFrameInfo &MFFrame = MF.getFrameInfo(); +  bool BackChain = MF.getFunction().hasFnAttribute("backchain"); -  if (!usePackedStack(MF)) -    // Always create the full incoming register save area. +  if (!usePackedStack(MF) || BackChain) +    // Create the incoming register save area.      getOrCreateFramePointerSaveIndex(MF);    // Get the size of our stack frame to be allocated ... @@ -382,16 +341,15 @@ processFunctionBeforeFrameFinalized(MachineFunction &MF,      // are outside the reach of an unsigned 12-bit displacement.      // Create 2 for the case where both addresses in an MVC are      // out of range. -    RS->addScavengingFrameIndex(MFFrame.CreateStackObject(8, 8, false)); -    RS->addScavengingFrameIndex(MFFrame.CreateStackObject(8, 8, false)); +    RS->addScavengingFrameIndex(MFFrame.CreateStackObject(8, Align(8), false)); +    RS->addScavengingFrameIndex(MFFrame.CreateStackObject(8, Align(8), false));    }  }  // Emit instructions before MBBI (in MBB) to add NumBytes to Reg.  static void emitIncrement(MachineBasicBlock &MBB, -                          MachineBasicBlock::iterator &MBBI, -                          const DebugLoc &DL, -                          unsigned Reg, int64_t NumBytes, +                          MachineBasicBlock::iterator &MBBI, const DebugLoc &DL, +                          Register Reg, int64_t NumBytes,                            const TargetInstrInfo *TII) {    while (NumBytes) {      unsigned Opcode; @@ -416,12 +374,39 @@ static void emitIncrement(MachineBasicBlock &MBB,    }  } +// Add CFI for the new CFA offset. +static void buildCFAOffs(MachineBasicBlock &MBB, +                         MachineBasicBlock::iterator MBBI, +                         const DebugLoc &DL, int Offset, +                         const SystemZInstrInfo *ZII) { +  unsigned CFIIndex = MBB.getParent()->addFrameInst( +    MCCFIInstruction::cfiDefCfaOffset(nullptr, -Offset)); +  BuildMI(MBB, MBBI, DL, ZII->get(TargetOpcode::CFI_INSTRUCTION)) +    .addCFIIndex(CFIIndex); +} + +// Add CFI for the new frame location. +static void buildDefCFAReg(MachineBasicBlock &MBB, +                           MachineBasicBlock::iterator MBBI, +                           const DebugLoc &DL, unsigned Reg, +                           const SystemZInstrInfo *ZII) { +  MachineFunction &MF = *MBB.getParent(); +  MachineModuleInfo &MMI = MF.getMMI(); +  const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); +  unsigned RegNum = MRI->getDwarfRegNum(Reg, true); +  unsigned CFIIndex = MF.addFrameInst( +                        MCCFIInstruction::createDefCfaRegister(nullptr, RegNum)); +  BuildMI(MBB, MBBI, DL, ZII->get(TargetOpcode::CFI_INSTRUCTION)) +    .addCFIIndex(CFIIndex); +} +  void SystemZFrameLowering::emitPrologue(MachineFunction &MF,                                          MachineBasicBlock &MBB) const {    assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported"); +  const SystemZSubtarget &STI = MF.getSubtarget<SystemZSubtarget>(); +  const SystemZTargetLowering &TLI = *STI.getTargetLowering();    MachineFrameInfo &MFFrame = MF.getFrameInfo(); -  auto *ZII = -      static_cast<const SystemZInstrInfo *>(MF.getSubtarget().getInstrInfo()); +  auto *ZII = static_cast<const SystemZInstrInfo *>(STI.getInstrInfo());    SystemZMachineFunctionInfo *ZFI = MF.getInfo<SystemZMachineFunctionInfo>();    MachineBasicBlock::iterator MBBI = MBB.begin();    MachineModuleInfo &MMI = MF.getMMI(); @@ -504,19 +489,31 @@ void SystemZFrameLowering::emitPrologue(MachineFunction &MF,      // Allocate StackSize bytes.      int64_t Delta = -int64_t(StackSize); -    emitIncrement(MBB, MBBI, DL, SystemZ::R15D, Delta, ZII); - -    // Add CFI for the allocation. -    unsigned CFIIndex = MF.addFrameInst( -        MCCFIInstruction::createDefCfaOffset(nullptr, SPOffsetFromCFA + Delta)); -    BuildMI(MBB, MBBI, DL, ZII->get(TargetOpcode::CFI_INSTRUCTION)) -        .addCFIIndex(CFIIndex); +    const unsigned ProbeSize = TLI.getStackProbeSize(MF); +    bool FreeProbe = (ZFI->getSpillGPRRegs().GPROffset && +           (ZFI->getSpillGPRRegs().GPROffset + StackSize) < ProbeSize); +    if (!FreeProbe && +        MF.getSubtarget().getTargetLowering()->hasInlineStackProbe(MF)) { +      // Stack probing may involve looping, but splitting the prologue block +      // is not possible at this point since it would invalidate the +      // SaveBlocks / RestoreBlocks sets of PEI in the single block function +      // case. Build a pseudo to be handled later by inlineStackProbe(). +      BuildMI(MBB, MBBI, DL, ZII->get(SystemZ::PROBED_STACKALLOC)) +        .addImm(StackSize); +    } +    else { +      emitIncrement(MBB, MBBI, DL, SystemZ::R15D, Delta, ZII); +      buildCFAOffs(MBB, MBBI, DL, SPOffsetFromCFA + Delta, ZII); +    }      SPOffsetFromCFA += Delta; -    if (StoreBackchain) +    if (StoreBackchain) { +      // The back chain is stored topmost with packed-stack. +      int Offset = usePackedStack(MF) ? SystemZMC::CallFrameSize - 8 : 0;        BuildMI(MBB, MBBI, DL, ZII->get(SystemZ::STG)) -        .addReg(SystemZ::R1D, RegState::Kill).addReg(SystemZ::R15D).addImm(0) -        .addReg(0); +        .addReg(SystemZ::R1D, RegState::Kill).addReg(SystemZ::R15D) +        .addImm(Offset).addReg(0); +    }    }    if (HasFP) { @@ -525,11 +522,7 @@ void SystemZFrameLowering::emitPrologue(MachineFunction &MF,        .addReg(SystemZ::R15D);      // Add CFI for the new frame location. -    unsigned HardFP = MRI->getDwarfRegNum(SystemZ::R11D, true); -    unsigned CFIIndex = MF.addFrameInst( -        MCCFIInstruction::createDefCfaRegister(nullptr, HardFP)); -    BuildMI(MBB, MBBI, DL, ZII->get(TargetOpcode::CFI_INSTRUCTION)) -        .addCFIIndex(CFIIndex); +    buildDefCFAReg(MBB, MBBI, DL, SystemZ::R11D, ZII);      // Mark the FramePtr as live at the beginning of every block except      // the entry block.  (We'll have marked R11 as live on entry when @@ -560,7 +553,7 @@ void SystemZFrameLowering::emitPrologue(MachineFunction &MF,      // Add CFI for the this save.      unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true); -    unsigned IgnoredFrameReg; +    Register IgnoredFrameReg;      int64_t Offset =          getFrameIndexReference(MF, Save.getFrameIdx(), IgnoredFrameReg); @@ -622,6 +615,91 @@ void SystemZFrameLowering::emitEpilogue(MachineFunction &MF,    }  } +void SystemZFrameLowering::inlineStackProbe(MachineFunction &MF, +                                            MachineBasicBlock &PrologMBB) const { +  auto *ZII = +    static_cast<const SystemZInstrInfo *>(MF.getSubtarget().getInstrInfo()); +  const SystemZSubtarget &STI = MF.getSubtarget<SystemZSubtarget>(); +  const SystemZTargetLowering &TLI = *STI.getTargetLowering(); + +  MachineInstr *StackAllocMI = nullptr; +  for (MachineInstr &MI : PrologMBB) +    if (MI.getOpcode() == SystemZ::PROBED_STACKALLOC) { +      StackAllocMI = &MI; +      break; +    } +  if (StackAllocMI == nullptr) +    return; +  uint64_t StackSize = StackAllocMI->getOperand(0).getImm(); +  const unsigned ProbeSize = TLI.getStackProbeSize(MF); +  uint64_t NumFullBlocks = StackSize / ProbeSize; +  uint64_t Residual = StackSize % ProbeSize; +  int64_t SPOffsetFromCFA = -SystemZMC::CFAOffsetFromInitialSP; +  MachineBasicBlock *MBB = &PrologMBB; +  MachineBasicBlock::iterator MBBI = StackAllocMI; +  const DebugLoc DL = StackAllocMI->getDebugLoc(); + +  // Allocate a block of Size bytes on the stack and probe it. +  auto allocateAndProbe = [&](MachineBasicBlock &InsMBB, +                              MachineBasicBlock::iterator InsPt, unsigned Size, +                              bool EmitCFI) -> void { +    emitIncrement(InsMBB, InsPt, DL, SystemZ::R15D, -int64_t(Size), ZII); +    if (EmitCFI) { +      SPOffsetFromCFA -= Size; +      buildCFAOffs(InsMBB, InsPt, DL, SPOffsetFromCFA, ZII); +    } +    // Probe by means of a volatile compare. +    MachineMemOperand *MMO = MF.getMachineMemOperand(MachinePointerInfo(), +      MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad, 8, Align(1)); +    BuildMI(InsMBB, InsPt, DL, ZII->get(SystemZ::CG)) +      .addReg(SystemZ::R0D, RegState::Undef) +      .addReg(SystemZ::R15D).addImm(Size - 8).addReg(0) +      .addMemOperand(MMO); +  }; + +  if (NumFullBlocks < 3) { +    // Emit unrolled probe statements. +    for (unsigned int i = 0; i < NumFullBlocks; i++) +      allocateAndProbe(*MBB, MBBI, ProbeSize, true/*EmitCFI*/); +  } else { +    // Emit a loop probing the pages. +    uint64_t LoopAlloc = ProbeSize * NumFullBlocks; +    SPOffsetFromCFA -= LoopAlloc; + +    BuildMI(*MBB, MBBI, DL, ZII->get(SystemZ::LGR), SystemZ::R1D) +      .addReg(SystemZ::R15D); +    buildDefCFAReg(*MBB, MBBI, DL, SystemZ::R1D, ZII); +    emitIncrement(*MBB, MBBI, DL, SystemZ::R1D, -int64_t(LoopAlloc), ZII); +    buildCFAOffs(*MBB, MBBI, DL, -int64_t(SystemZMC::CallFrameSize + LoopAlloc), +                 ZII); + +    MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MBBI, MBB); +    MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(MBB); +    MBB->addSuccessor(LoopMBB); +    LoopMBB->addSuccessor(LoopMBB); +    LoopMBB->addSuccessor(DoneMBB); + +    MBB = LoopMBB; +    allocateAndProbe(*MBB, MBB->end(), ProbeSize, false/*EmitCFI*/); +    BuildMI(*MBB, MBB->end(), DL, ZII->get(SystemZ::CLGR)) +      .addReg(SystemZ::R15D).addReg(SystemZ::R1D); +    BuildMI(*MBB, MBB->end(), DL, ZII->get(SystemZ::BRC)) +      .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_GT).addMBB(MBB); + +    MBB = DoneMBB; +    MBBI = DoneMBB->begin(); +    buildDefCFAReg(*MBB, MBBI, DL, SystemZ::R15D, ZII); + +    recomputeLiveIns(*DoneMBB); +    recomputeLiveIns(*LoopMBB); +  } + +  if (Residual) +    allocateAndProbe(*MBB, MBBI, Residual, true/*EmitCFI*/); + +  StackAllocMI->eraseFromParent(); +} +  bool SystemZFrameLowering::hasFP(const MachineFunction &MF) const {    return (MF.getTarget().Options.DisableFramePointerElim(MF) ||            MF.getFrameInfo().hasVarSizedObjects() || @@ -639,7 +717,7 @@ SystemZFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {  int SystemZFrameLowering::getFrameIndexReference(const MachineFunction &MF,                                                   int FI, -                                                 unsigned &FrameReg) const { +                                                 Register &FrameReg) const {    // Our incoming SP is actually SystemZMC::CallFrameSize below the CFA, so    // add that difference here.    int64_t Offset = @@ -664,14 +742,43 @@ eliminateCallFramePseudoInstr(MachineFunction &MF,    }  } +unsigned SystemZFrameLowering::getRegSpillOffset(MachineFunction &MF, +                                                 Register Reg) const { +  bool IsVarArg = MF.getFunction().isVarArg(); +  bool BackChain = MF.getFunction().hasFnAttribute("backchain"); +  bool SoftFloat = MF.getSubtarget<SystemZSubtarget>().hasSoftFloat(); +  unsigned Offset = RegSpillOffsets[Reg]; +  if (usePackedStack(MF) && !(IsVarArg && !SoftFloat)) { +    if (SystemZ::GR64BitRegClass.contains(Reg)) +      // Put all GPRs at the top of the Register save area with packed +      // stack. Make room for the backchain if needed. +      Offset += BackChain ? 24 : 32; +    else +      Offset = 0; +  } +  return Offset; +} +  int SystemZFrameLowering::  getOrCreateFramePointerSaveIndex(MachineFunction &MF) const {    SystemZMachineFunctionInfo *ZFI = MF.getInfo<SystemZMachineFunctionInfo>();    int FI = ZFI->getFramePointerSaveIndex();    if (!FI) {      MachineFrameInfo &MFFrame = MF.getFrameInfo(); -    FI = MFFrame.CreateFixedObject(8, -SystemZMC::CallFrameSize, false); +    // The back chain is stored topmost with packed-stack. +    int Offset = usePackedStack(MF) ? -8 : -SystemZMC::CallFrameSize; +    FI = MFFrame.CreateFixedObject(8, Offset, false);      ZFI->setFramePointerSaveIndex(FI);    }    return FI;  } + +bool SystemZFrameLowering::usePackedStack(MachineFunction &MF) const { +  bool HasPackedStackAttr = MF.getFunction().hasFnAttribute("packed-stack"); +  bool BackChain = MF.getFunction().hasFnAttribute("backchain"); +  bool SoftFloat = MF.getSubtarget<SystemZSubtarget>().hasSoftFloat(); +  if (HasPackedStackAttr && BackChain && !SoftFloat) +    report_fatal_error("packed-stack + backchain + hard-float is unsupported."); +  bool CallConv = MF.getFunction().getCallingConv() != CallingConv::GHC; +  return HasPackedStackAttr && CallConv; +} diff --git a/llvm/lib/Target/SystemZ/SystemZFrameLowering.h b/llvm/lib/Target/SystemZ/SystemZFrameLowering.h index 4189a92b8294..8752acc7e5ae 100644 --- a/llvm/lib/Target/SystemZ/SystemZFrameLowering.h +++ b/llvm/lib/Target/SystemZ/SystemZFrameLowering.h @@ -32,33 +32,36 @@ public:                              RegScavenger *RS) const override;    bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,                                   MachineBasicBlock::iterator MBBI, -                                 const std::vector<CalleeSavedInfo> &CSI, +                                 ArrayRef<CalleeSavedInfo> CSI,                                   const TargetRegisterInfo *TRI) const override; -  bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, -                                   MachineBasicBlock::iterator MBBII, -                                   std::vector<CalleeSavedInfo> &CSI, -                                   const TargetRegisterInfo *TRI) const -    override; +  bool +  restoreCalleeSavedRegisters(MachineBasicBlock &MBB, +                              MachineBasicBlock::iterator MBBII, +                              MutableArrayRef<CalleeSavedInfo> CSI, +                              const TargetRegisterInfo *TRI) const override;    void processFunctionBeforeFrameFinalized(MachineFunction &MF,                                             RegScavenger *RS) const override;    void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override;    void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override; +  void inlineStackProbe(MachineFunction &MF, +                        MachineBasicBlock &PrologMBB) const override;    bool hasFP(const MachineFunction &MF) const override;    bool hasReservedCallFrame(const MachineFunction &MF) const override;    int getFrameIndexReference(const MachineFunction &MF, int FI, -                             unsigned &FrameReg) const override; +                             Register &FrameReg) const override;    MachineBasicBlock::iterator    eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,                                  MachineBasicBlock::iterator MI) const override;    // Return the byte offset from the incoming stack pointer of Reg's -  // ABI-defined save slot.  Return 0 if no slot is defined for Reg. -  unsigned getRegSpillOffset(unsigned Reg) const { -    return RegSpillOffsets[Reg]; -  } +  // ABI-defined save slot.  Return 0 if no slot is defined for Reg.  Adjust +  // the offset in case MF has packed-stack. +  unsigned getRegSpillOffset(MachineFunction &MF, Register Reg) const;    // Get or create the frame index of where the old frame pointer is stored.    int getOrCreateFramePointerSaveIndex(MachineFunction &MF) const; + +  bool usePackedStack(MachineFunction &MF) const;  };  } // end namespace llvm diff --git a/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp index 3927a977e6fc..37328684399b 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp +++ b/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp @@ -1456,7 +1456,8 @@ bool SystemZDAGToDAGISel::storeLoadCanUseBlockBinary(SDNode *N,    auto *StoreA = cast<StoreSDNode>(N);    auto *LoadA = cast<LoadSDNode>(StoreA->getValue().getOperand(1 - I));    auto *LoadB = cast<LoadSDNode>(StoreA->getValue().getOperand(I)); -  return !LoadA->isVolatile() && canUseBlockOperation(StoreA, LoadB); +  return !LoadA->isVolatile() && LoadA->getMemoryVT() == LoadB->getMemoryVT() && +         canUseBlockOperation(StoreA, LoadB);  }  void SystemZDAGToDAGISel::Select(SDNode *Node) { diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp index c73905d3357a..eb1e51341ec4 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -88,25 +88,27 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,    else      addRegisterClass(MVT::i32, &SystemZ::GR32BitRegClass);    addRegisterClass(MVT::i64, &SystemZ::GR64BitRegClass); -  if (Subtarget.hasVector()) { -    addRegisterClass(MVT::f32, &SystemZ::VR32BitRegClass); -    addRegisterClass(MVT::f64, &SystemZ::VR64BitRegClass); -  } else { -    addRegisterClass(MVT::f32, &SystemZ::FP32BitRegClass); -    addRegisterClass(MVT::f64, &SystemZ::FP64BitRegClass); -  } -  if (Subtarget.hasVectorEnhancements1()) -    addRegisterClass(MVT::f128, &SystemZ::VR128BitRegClass); -  else -    addRegisterClass(MVT::f128, &SystemZ::FP128BitRegClass); +  if (!useSoftFloat()) { +    if (Subtarget.hasVector()) { +      addRegisterClass(MVT::f32, &SystemZ::VR32BitRegClass); +      addRegisterClass(MVT::f64, &SystemZ::VR64BitRegClass); +    } else { +      addRegisterClass(MVT::f32, &SystemZ::FP32BitRegClass); +      addRegisterClass(MVT::f64, &SystemZ::FP64BitRegClass); +    } +    if (Subtarget.hasVectorEnhancements1()) +      addRegisterClass(MVT::f128, &SystemZ::VR128BitRegClass); +    else +      addRegisterClass(MVT::f128, &SystemZ::FP128BitRegClass); -  if (Subtarget.hasVector()) { -    addRegisterClass(MVT::v16i8, &SystemZ::VR128BitRegClass); -    addRegisterClass(MVT::v8i16, &SystemZ::VR128BitRegClass); -    addRegisterClass(MVT::v4i32, &SystemZ::VR128BitRegClass); -    addRegisterClass(MVT::v2i64, &SystemZ::VR128BitRegClass); -    addRegisterClass(MVT::v4f32, &SystemZ::VR128BitRegClass); -    addRegisterClass(MVT::v2f64, &SystemZ::VR128BitRegClass); +    if (Subtarget.hasVector()) { +      addRegisterClass(MVT::v16i8, &SystemZ::VR128BitRegClass); +      addRegisterClass(MVT::v8i16, &SystemZ::VR128BitRegClass); +      addRegisterClass(MVT::v4i32, &SystemZ::VR128BitRegClass); +      addRegisterClass(MVT::v2i64, &SystemZ::VR128BitRegClass); +      addRegisterClass(MVT::v4f32, &SystemZ::VR128BitRegClass); +      addRegisterClass(MVT::v2f64, &SystemZ::VR128BitRegClass); +    }    }    // Compute derived properties from the register classes @@ -639,12 +641,16 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,    setTargetDAGCombine(ISD::FP_ROUND);    setTargetDAGCombine(ISD::STRICT_FP_ROUND);    setTargetDAGCombine(ISD::FP_EXTEND); +  setTargetDAGCombine(ISD::SINT_TO_FP); +  setTargetDAGCombine(ISD::UINT_TO_FP);    setTargetDAGCombine(ISD::STRICT_FP_EXTEND);    setTargetDAGCombine(ISD::BSWAP);    setTargetDAGCombine(ISD::SDIV);    setTargetDAGCombine(ISD::UDIV);    setTargetDAGCombine(ISD::SREM);    setTargetDAGCombine(ISD::UREM); +  setTargetDAGCombine(ISD::INTRINSIC_VOID); +  setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);    // Handle intrinsics.    setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); @@ -666,6 +672,10 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,    IsStrictFPEnabled = true;  } +bool SystemZTargetLowering::useSoftFloat() const { +  return Subtarget.hasSoftFloat(); +} +  EVT SystemZTargetLowering::getSetCCResultType(const DataLayout &DL,                                                LLVMContext &, EVT VT) const {    if (!VT.isVector()) @@ -816,6 +826,15 @@ bool SystemZTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,    return SystemZVectorConstantInfo(Imm).isVectorConstantLegal(Subtarget);  } +/// Returns true if stack probing through inline assembly is requested. +bool SystemZTargetLowering::hasInlineStackProbe(MachineFunction &MF) const { +  // If the function specifically requests inline stack probes, emit them. +  if (MF.getFunction().hasFnAttribute("probe-stack")) +    return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() == +           "inline-asm"; +  return false; +} +  bool SystemZTargetLowering::isLegalICmpImmediate(int64_t Imm) const {    // We can use CGFI or CLGFI.    return isInt<32>(Imm) || isUInt<32>(Imm); @@ -1123,12 +1142,14 @@ SystemZTargetLowering::getRegForInlineAsmConstraint(        return std::make_pair(0U, &SystemZ::GRH32BitRegClass);      case 'f': // Floating-point register -      if (VT == MVT::f64) -        return std::make_pair(0U, &SystemZ::FP64BitRegClass); -      else if (VT == MVT::f128) -        return std::make_pair(0U, &SystemZ::FP128BitRegClass); -      return std::make_pair(0U, &SystemZ::FP32BitRegClass); - +      if (!useSoftFloat()) { +        if (VT == MVT::f64) +          return std::make_pair(0U, &SystemZ::FP64BitRegClass); +        else if (VT == MVT::f128) +          return std::make_pair(0U, &SystemZ::FP128BitRegClass); +        return std::make_pair(0U, &SystemZ::FP32BitRegClass); +      } +      break;      case 'v': // Vector register        if (Subtarget.hasVector()) {          if (VT == MVT::f32) @@ -1156,6 +1177,9 @@ SystemZTargetLowering::getRegForInlineAsmConstraint(                                   SystemZMC::GR64Regs, 16);      }      if (Constraint[1] == 'f') { +      if (useSoftFloat()) +        return std::make_pair( +            0u, static_cast<const TargetRegisterClass *>(nullptr));        if (VT == MVT::f32)          return parseRegisterNumber(Constraint, &SystemZ::FP32BitRegClass,                                     SystemZMC::FP32Regs, 16); @@ -1166,6 +1190,9 @@ SystemZTargetLowering::getRegForInlineAsmConstraint(                                   SystemZMC::FP64Regs, 16);      }      if (Constraint[1] == 'v') { +      if (!Subtarget.hasVector()) +        return std::make_pair( +            0u, static_cast<const TargetRegisterClass *>(nullptr));        if (VT == MVT::f32)          return parseRegisterNumber(Constraint, &SystemZ::VR32BitRegClass,                                     SystemZMC::VR32Regs, 32); @@ -1179,6 +1206,19 @@ SystemZTargetLowering::getRegForInlineAsmConstraint(    return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);  } +// FIXME? Maybe this could be a TableGen attribute on some registers and +// this table could be generated automatically from RegInfo. +Register SystemZTargetLowering::getRegisterByName(const char *RegName, LLT VT, +                                                  const MachineFunction &MF) const { + +  Register Reg = StringSwitch<Register>(RegName) +                   .Case("r15", SystemZ::R15D) +                   .Default(0); +  if (Reg) +    return Reg; +  report_fatal_error("Invalid register name global variable"); +} +  void SystemZTargetLowering::  LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint,                               std::vector<SDValue> &Ops, @@ -1437,17 +1477,19 @@ SDValue SystemZTargetLowering::LowerFormalArguments(      // ...and a similar frame index for the caller-allocated save area      // that will be used to store the incoming registers. -    int64_t RegSaveOffset = -SystemZMC::CallFrameSize; +    int64_t RegSaveOffset = +      -SystemZMC::CallFrameSize + TFL->getRegSpillOffset(MF, SystemZ::R2D) - 16;      unsigned RegSaveIndex = MFI.CreateFixedObject(1, RegSaveOffset, true);      FuncInfo->setRegSaveFrameIndex(RegSaveIndex);      // Store the FPR varargs in the reserved frame slots.  (We store the      // GPRs as part of the prologue.) -    if (NumFixedFPRs < SystemZ::NumArgFPRs) { +    if (NumFixedFPRs < SystemZ::NumArgFPRs && !useSoftFloat()) {        SDValue MemOps[SystemZ::NumArgFPRs];        for (unsigned I = NumFixedFPRs; I < SystemZ::NumArgFPRs; ++I) { -        unsigned Offset = TFL->getRegSpillOffset(SystemZ::ArgFPRs[I]); -        int FI = MFI.CreateFixedObject(8, RegSaveOffset + Offset, true); +        unsigned Offset = TFL->getRegSpillOffset(MF, SystemZ::ArgFPRs[I]); +        int FI = +          MFI.CreateFixedObject(8, -SystemZMC::CallFrameSize + Offset, true);          SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));          unsigned VReg = MF.addLiveIn(SystemZ::ArgFPRs[I],                                       &SystemZ::FP64BitRegClass); @@ -1633,6 +1675,7 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,    if (IsTailCall)      return DAG.getNode(SystemZISD::SIBCALL, DL, NodeTys, Ops);    Chain = DAG.getNode(SystemZISD::CALL, DL, NodeTys, Ops); +  DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);    Glue = Chain.getValue(1);    // Mark the end of the call, which is glued to the call itself. @@ -2020,8 +2063,9 @@ static void adjustSubwordCmp(SelectionDAG &DAG, const SDLoc &DL,    // We must have an 8- or 16-bit load.    auto *Load = cast<LoadSDNode>(C.Op0); -  unsigned NumBits = Load->getMemoryVT().getStoreSizeInBits(); -  if (NumBits != 8 && NumBits != 16) +  unsigned NumBits = Load->getMemoryVT().getSizeInBits(); +  if ((NumBits != 8 && NumBits != 16) || +      NumBits != Load->getMemoryVT().getStoreSizeInBits())      return;    // The load must be an extending one and the constant must be within the @@ -2161,15 +2205,6 @@ static bool shouldSwapCmpOperands(const Comparison &C) {    return false;  } -// Return a version of comparison CC mask CCMask in which the LT and GT -// actions are swapped. -static unsigned reverseCCMask(unsigned CCMask) { -  return ((CCMask & SystemZ::CCMASK_CMP_EQ) | -          (CCMask & SystemZ::CCMASK_CMP_GT ? SystemZ::CCMASK_CMP_LT : 0) | -          (CCMask & SystemZ::CCMASK_CMP_LT ? SystemZ::CCMASK_CMP_GT : 0) | -          (CCMask & SystemZ::CCMASK_CMP_UO)); -} -  // Check whether C tests for equality between X and Y and whether X - Y  // or Y - X is also computed.  In that case it's better to compare the  // result of the subtraction against zero. @@ -2205,7 +2240,7 @@ static void adjustForFNeg(Comparison &C) {        SDNode *N = *I;        if (N->getOpcode() == ISD::FNEG) {          C.Op0 = SDValue(N, 0); -        C.CCMask = reverseCCMask(C.CCMask); +        C.CCMask = SystemZ::reverseCCMask(C.CCMask);          return;        }      } @@ -2572,7 +2607,7 @@ static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1,    if (shouldSwapCmpOperands(C)) {      std::swap(C.Op0, C.Op1); -    C.CCMask = reverseCCMask(C.CCMask); +    C.CCMask = SystemZ::reverseCCMask(C.CCMask);    }    adjustForTestUnderMask(DAG, DL, C); @@ -3103,7 +3138,7 @@ SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node,        SystemZConstantPoolValue *CPV =          SystemZConstantPoolValue::Create(GV, SystemZCP::TLSGD); -      Offset = DAG.getConstantPool(CPV, PtrVT, 8); +      Offset = DAG.getConstantPool(CPV, PtrVT, Align(8));        Offset = DAG.getLoad(            PtrVT, DL, DAG.getEntryNode(), Offset,            MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); @@ -3118,7 +3153,7 @@ SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node,        SystemZConstantPoolValue *CPV =          SystemZConstantPoolValue::Create(GV, SystemZCP::TLSLDM); -      Offset = DAG.getConstantPool(CPV, PtrVT, 8); +      Offset = DAG.getConstantPool(CPV, PtrVT, Align(8));        Offset = DAG.getLoad(            PtrVT, DL, DAG.getEntryNode(), Offset,            MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); @@ -3136,7 +3171,7 @@ SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node,        // Add the per-symbol offset.        CPV = SystemZConstantPoolValue::Create(GV, SystemZCP::DTPOFF); -      SDValue DTPOffset = DAG.getConstantPool(CPV, PtrVT, 8); +      SDValue DTPOffset = DAG.getConstantPool(CPV, PtrVT, Align(8));        DTPOffset = DAG.getLoad(            PtrVT, DL, DAG.getEntryNode(), DTPOffset,            MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); @@ -3161,7 +3196,7 @@ SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node,        SystemZConstantPoolValue *CPV =          SystemZConstantPoolValue::Create(GV, SystemZCP::NTPOFF); -      Offset = DAG.getConstantPool(CPV, PtrVT, 8); +      Offset = DAG.getConstantPool(CPV, PtrVT, Align(8));        Offset = DAG.getLoad(            PtrVT, DL, DAG.getEntryNode(), Offset,            MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); @@ -3202,11 +3237,11 @@ SDValue SystemZTargetLowering::lowerConstantPool(ConstantPoolSDNode *CP,    SDValue Result;    if (CP->isMachineConstantPoolEntry()) -    Result = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, -                                       CP->getAlignment()); +    Result = +        DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, CP->getAlign());    else -    Result = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, -                                       CP->getAlignment(), CP->getOffset()); +    Result = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlign(), +                                       CP->getOffset());    // Use LARL to load the address of the constant pool entry.    return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result); @@ -3214,6 +3249,8 @@ SDValue SystemZTargetLowering::lowerConstantPool(ConstantPoolSDNode *CP,  SDValue SystemZTargetLowering::lowerFRAMEADDR(SDValue Op,                                                SelectionDAG &DAG) const { +  auto *TFL = +      static_cast<const SystemZFrameLowering *>(Subtarget.getFrameLowering());    MachineFunction &MF = DAG.getMachineFunction();    MachineFrameInfo &MFI = MF.getFrameInfo();    MFI.setFrameAddressIsTaken(true); @@ -3222,9 +3259,12 @@ SDValue SystemZTargetLowering::lowerFRAMEADDR(SDValue Op,    unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();    EVT PtrVT = getPointerTy(DAG.getDataLayout()); +  // Return null if the back chain is not present. +  bool HasBackChain = MF.getFunction().hasFnAttribute("backchain"); +  if (TFL->usePackedStack(MF) && !HasBackChain) +    return DAG.getConstant(0, DL, PtrVT); +    // By definition, the frame address is the address of the back chain. -  auto *TFL = -      static_cast<const SystemZFrameLowering *>(Subtarget.getFrameLowering());    int BackChainIdx = TFL->getOrCreateFramePointerSaveIndex(MF);    SDValue BackChain = DAG.getFrameIndex(BackChainIdx, PtrVT); @@ -3355,9 +3395,9 @@ SDValue SystemZTargetLowering::lowerVACOPY(SDValue Op,    SDLoc DL(Op);    return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr, DAG.getIntPtrConstant(32, DL), -                       /*Align*/8, /*isVolatile*/false, /*AlwaysInline*/false, -                       /*isTailCall*/false, -                       MachinePointerInfo(DstSV), MachinePointerInfo(SrcSV)); +                       Align(8), /*isVolatile*/ false, /*AlwaysInline*/ false, +                       /*isTailCall*/ false, MachinePointerInfo(DstSV), +                       MachinePointerInfo(SrcSV));  }  SDValue SystemZTargetLowering:: @@ -3398,10 +3438,17 @@ lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const {                                DAG.getConstant(ExtraAlignSpace, DL, MVT::i64));    // Get the new stack pointer value. -  SDValue NewSP = DAG.getNode(ISD::SUB, DL, MVT::i64, OldSP, NeededSpace); - -  // Copy the new stack pointer back. -  Chain = DAG.getCopyToReg(Chain, DL, SPReg, NewSP); +  SDValue NewSP; +  if (hasInlineStackProbe(MF)) { +    NewSP = DAG.getNode(SystemZISD::PROBED_ALLOCA, DL, +                DAG.getVTList(MVT::i64, MVT::Other), Chain, OldSP, NeededSpace); +    Chain = NewSP.getValue(1); +  } +  else { +    NewSP = DAG.getNode(ISD::SUB, DL, MVT::i64, OldSP, NeededSpace); +    // Copy the new stack pointer back. +    Chain = DAG.getCopyToReg(Chain, DL, SPReg, NewSP); +  }    // The allocated data lives above the 160 bytes allocated for the standard    // frame, plus any outgoing stack arguments.  We don't know how much that @@ -3995,7 +4042,7 @@ SDValue SystemZTargetLowering::lowerATOMIC_CMP_SWAP(SDValue Op,  }  MachineMemOperand::Flags -SystemZTargetLowering::getMMOFlags(const Instruction &I) const { +SystemZTargetLowering::getTargetMMOFlags(const Instruction &I) const {    // Because of how we convert atomic_load and atomic_store to normal loads and    // stores in the DAG, we need to ensure that the MMOs are marked volatile    // since DAGCombine hasn't been updated to account for atomic, but non @@ -4362,7 +4409,7 @@ static bool getShuffleInput(const SmallVectorImpl<int> &Bytes, unsigned Start,  }  // Bytes is a VPERM-like permute vector, except that -1 is used for -// undefined bytes.  Return true if it can be performed using VSLDI. +// undefined bytes.  Return true if it can be performed using VSLDB.  // When returning true, set StartIndex to the shift amount and OpNo0  // and OpNo1 to the VPERM operands that should be used as the first  // and second shift operand respectively. @@ -4420,23 +4467,86 @@ static SDValue getPermuteNode(SelectionDAG &DAG, const SDLoc &DL,    return Op;  } +static bool isZeroVector(SDValue N) { +  if (N->getOpcode() == ISD::BITCAST) +    N = N->getOperand(0); +  if (N->getOpcode() == ISD::SPLAT_VECTOR) +    if (auto *Op = dyn_cast<ConstantSDNode>(N->getOperand(0))) +      return Op->getZExtValue() == 0; +  return ISD::isBuildVectorAllZeros(N.getNode()); +} + +// Return the index of the zero/undef vector, or UINT32_MAX if not found. +static uint32_t findZeroVectorIdx(SDValue *Ops, unsigned Num) { +  for (unsigned I = 0; I < Num ; I++) +    if (isZeroVector(Ops[I])) +      return I; +  return UINT32_MAX; +} +  // Bytes is a VPERM-like permute vector, except that -1 is used for  // undefined bytes.  Implement it on operands Ops[0] and Ops[1] using -// VSLDI or VPERM. +// VSLDB or VPERM.  static SDValue getGeneralPermuteNode(SelectionDAG &DAG, const SDLoc &DL,                                       SDValue *Ops,                                       const SmallVectorImpl<int> &Bytes) {    for (unsigned I = 0; I < 2; ++I)      Ops[I] = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Ops[I]); -  // First see whether VSLDI can be used. +  // First see whether VSLDB can be used.    unsigned StartIndex, OpNo0, OpNo1;    if (isShlDoublePermute(Bytes, StartIndex, OpNo0, OpNo1))      return DAG.getNode(SystemZISD::SHL_DOUBLE, DL, MVT::v16i8, Ops[OpNo0],                         Ops[OpNo1],                         DAG.getTargetConstant(StartIndex, DL, MVT::i32)); -  // Fall back on VPERM.  Construct an SDNode for the permute vector. +  // Fall back on VPERM.  Construct an SDNode for the permute vector.  Try to +  // eliminate a zero vector by reusing any zero index in the permute vector. +  unsigned ZeroVecIdx = findZeroVectorIdx(&Ops[0], 2); +  if (ZeroVecIdx != UINT32_MAX) { +    bool MaskFirst = true; +    int ZeroIdx = -1; +    for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) { +      unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes; +      unsigned Byte = unsigned(Bytes[I]) % SystemZ::VectorBytes; +      if (OpNo == ZeroVecIdx && I == 0) { +        // If the first byte is zero, use mask as first operand. +        ZeroIdx = 0; +        break; +      } +      if (OpNo != ZeroVecIdx && Byte == 0) { +        // If mask contains a zero, use it by placing that vector first. +        ZeroIdx = I + SystemZ::VectorBytes; +        MaskFirst = false; +        break; +      } +    } +    if (ZeroIdx != -1) { +      SDValue IndexNodes[SystemZ::VectorBytes]; +      for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) { +        if (Bytes[I] >= 0) { +          unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes; +          unsigned Byte = unsigned(Bytes[I]) % SystemZ::VectorBytes; +          if (OpNo == ZeroVecIdx) +            IndexNodes[I] = DAG.getConstant(ZeroIdx, DL, MVT::i32); +          else { +            unsigned BIdx = MaskFirst ? Byte + SystemZ::VectorBytes : Byte; +            IndexNodes[I] = DAG.getConstant(BIdx, DL, MVT::i32); +          } +        } else +          IndexNodes[I] = DAG.getUNDEF(MVT::i32); +      } +      SDValue Mask = DAG.getBuildVector(MVT::v16i8, DL, IndexNodes); +      SDValue Src = ZeroVecIdx == 0 ? Ops[1] : Ops[0]; +      if (MaskFirst) +        return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Mask, Src, +                           Mask); +      else +        return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Src, Mask, +                           Mask); +    } +  } +    SDValue IndexNodes[SystemZ::VectorBytes];    for (unsigned I = 0; I < SystemZ::VectorBytes; ++I)      if (Bytes[I] >= 0) @@ -4444,16 +4554,20 @@ static SDValue getGeneralPermuteNode(SelectionDAG &DAG, const SDLoc &DL,      else        IndexNodes[I] = DAG.getUNDEF(MVT::i32);    SDValue Op2 = DAG.getBuildVector(MVT::v16i8, DL, IndexNodes); -  return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Ops[0], Ops[1], Op2); +  return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Ops[0], +                     (!Ops[1].isUndef() ? Ops[1] : Ops[0]), Op2);  }  namespace {  // Describes a general N-operand vector shuffle.  struct GeneralShuffle { -  GeneralShuffle(EVT vt) : VT(vt) {} +  GeneralShuffle(EVT vt) : VT(vt), UnpackFromEltSize(UINT_MAX) {}    void addUndef();    bool add(SDValue, unsigned);    SDValue getNode(SelectionDAG &, const SDLoc &); +  void tryPrepareForUnpack(); +  bool unpackWasPrepared() { return UnpackFromEltSize <= 4; } +  SDValue insertUnpackIfPrepared(SelectionDAG &DAG, const SDLoc &DL, SDValue Op);    // The operands of the shuffle.    SmallVector<SDValue, SystemZ::VectorBytes> Ops; @@ -4465,6 +4579,9 @@ struct GeneralShuffle {    // The type of the shuffle result.    EVT VT; + +  // Holds a value of 1, 2 or 4 if a final unpack has been prepared for. +  unsigned UnpackFromEltSize;  };  } @@ -4547,6 +4664,9 @@ SDValue GeneralShuffle::getNode(SelectionDAG &DAG, const SDLoc &DL) {    if (Ops.size() == 0)      return DAG.getUNDEF(VT); +  // Use a single unpack if possible as the last operation. +  tryPrepareForUnpack(); +    // Make sure that there are at least two shuffle operands.    if (Ops.size() == 1)      Ops.push_back(DAG.getUNDEF(MVT::v16i8)); @@ -4612,13 +4732,117 @@ SDValue GeneralShuffle::getNode(SelectionDAG &DAG, const SDLoc &DL) {    // to VPERM.    unsigned OpNo0, OpNo1;    SDValue Op; -  if (const Permute *P = matchPermute(Bytes, OpNo0, OpNo1)) +  if (unpackWasPrepared() && Ops[1].isUndef()) +    Op = Ops[0]; +  else if (const Permute *P = matchPermute(Bytes, OpNo0, OpNo1))      Op = getPermuteNode(DAG, DL, *P, Ops[OpNo0], Ops[OpNo1]);    else      Op = getGeneralPermuteNode(DAG, DL, &Ops[0], Bytes); + +  Op = insertUnpackIfPrepared(DAG, DL, Op); +    return DAG.getNode(ISD::BITCAST, DL, VT, Op);  } +#ifndef NDEBUG +static void dumpBytes(const SmallVectorImpl<int> &Bytes, std::string Msg) { +  dbgs() << Msg.c_str() << " { "; +  for (unsigned i = 0; i < Bytes.size(); i++) +    dbgs() << Bytes[i] << " "; +  dbgs() << "}\n"; +} +#endif + +// If the Bytes vector matches an unpack operation, prepare to do the unpack +// after all else by removing the zero vector and the effect of the unpack on +// Bytes. +void GeneralShuffle::tryPrepareForUnpack() { +  uint32_t ZeroVecOpNo = findZeroVectorIdx(&Ops[0], Ops.size()); +  if (ZeroVecOpNo == UINT32_MAX || Ops.size() == 1) +    return; + +  // Only do this if removing the zero vector reduces the depth, otherwise +  // the critical path will increase with the final unpack. +  if (Ops.size() > 2 && +      Log2_32_Ceil(Ops.size()) == Log2_32_Ceil(Ops.size() - 1)) +    return; + +  // Find an unpack that would allow removing the zero vector from Ops. +  UnpackFromEltSize = 1; +  for (; UnpackFromEltSize <= 4; UnpackFromEltSize *= 2) { +    bool MatchUnpack = true; +    SmallVector<int, SystemZ::VectorBytes> SrcBytes; +    for (unsigned Elt = 0; Elt < SystemZ::VectorBytes; Elt++) { +      unsigned ToEltSize = UnpackFromEltSize * 2; +      bool IsZextByte = (Elt % ToEltSize) < UnpackFromEltSize; +      if (!IsZextByte) +        SrcBytes.push_back(Bytes[Elt]); +      if (Bytes[Elt] != -1) { +        unsigned OpNo = unsigned(Bytes[Elt]) / SystemZ::VectorBytes; +        if (IsZextByte != (OpNo == ZeroVecOpNo)) { +          MatchUnpack = false; +          break; +        } +      } +    } +    if (MatchUnpack) { +      if (Ops.size() == 2) { +        // Don't use unpack if a single source operand needs rearrangement. +        for (unsigned i = 0; i < SystemZ::VectorBytes / 2; i++) +          if (SrcBytes[i] != -1 && SrcBytes[i] % 16 != int(i)) { +            UnpackFromEltSize = UINT_MAX; +            return; +          } +      } +      break; +    } +  } +  if (UnpackFromEltSize > 4) +    return; + +  LLVM_DEBUG(dbgs() << "Preparing for final unpack of element size " +             << UnpackFromEltSize << ". Zero vector is Op#" << ZeroVecOpNo +             << ".\n"; +             dumpBytes(Bytes, "Original Bytes vector:");); + +  // Apply the unpack in reverse to the Bytes array. +  unsigned B = 0; +  for (unsigned Elt = 0; Elt < SystemZ::VectorBytes;) { +    Elt += UnpackFromEltSize; +    for (unsigned i = 0; i < UnpackFromEltSize; i++, Elt++, B++) +      Bytes[B] = Bytes[Elt]; +  } +  while (B < SystemZ::VectorBytes) +    Bytes[B++] = -1; + +  // Remove the zero vector from Ops +  Ops.erase(&Ops[ZeroVecOpNo]); +  for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) +    if (Bytes[I] >= 0) { +      unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes; +      if (OpNo > ZeroVecOpNo) +        Bytes[I] -= SystemZ::VectorBytes; +    } + +  LLVM_DEBUG(dumpBytes(Bytes, "Resulting Bytes vector, zero vector removed:"); +             dbgs() << "\n";); +} + +SDValue GeneralShuffle::insertUnpackIfPrepared(SelectionDAG &DAG, +                                               const SDLoc &DL, +                                               SDValue Op) { +  if (!unpackWasPrepared()) +    return Op; +  unsigned InBits = UnpackFromEltSize * 8; +  EVT InVT = MVT::getVectorVT(MVT::getIntegerVT(InBits), +                                SystemZ::VectorBits / InBits); +  SDValue PackedOp = DAG.getNode(ISD::BITCAST, DL, InVT, Op); +  unsigned OutBits = InBits * 2; +  EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(OutBits), +                               SystemZ::VectorBits / OutBits); +  return DAG.getNode(SystemZISD::UNPACKL_HIGH, DL, OutVT, PackedOp); +} +  // Return true if the given BUILD_VECTOR is a scalar-to-vector conversion.  static bool isScalarToVector(SDValue Op) {    for (unsigned I = 1, E = Op.getNumOperands(); I != E; ++I) @@ -5013,9 +5237,8 @@ SystemZTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,    return DAG.getNode(ISD::BITCAST, DL, VT, Res);  } -SDValue -SystemZTargetLowering::lowerExtendVectorInreg(SDValue Op, SelectionDAG &DAG, -                                              unsigned UnpackHigh) const { +SDValue SystemZTargetLowering:: +lowerSIGN_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const {    SDValue PackedOp = Op.getOperand(0);    EVT OutVT = Op.getValueType();    EVT InVT = PackedOp.getValueType(); @@ -5025,11 +5248,39 @@ SystemZTargetLowering::lowerExtendVectorInreg(SDValue Op, SelectionDAG &DAG,      FromBits *= 2;      EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(FromBits),                                   SystemZ::VectorBits / FromBits); -    PackedOp = DAG.getNode(UnpackHigh, SDLoc(PackedOp), OutVT, PackedOp); +    PackedOp = +      DAG.getNode(SystemZISD::UNPACK_HIGH, SDLoc(PackedOp), OutVT, PackedOp);    } while (FromBits != ToBits);    return PackedOp;  } +// Lower a ZERO_EXTEND_VECTOR_INREG to a vector shuffle with a zero vector. +SDValue SystemZTargetLowering:: +lowerZERO_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const { +  SDValue PackedOp = Op.getOperand(0); +  SDLoc DL(Op); +  EVT OutVT = Op.getValueType(); +  EVT InVT = PackedOp.getValueType(); +  unsigned InNumElts = InVT.getVectorNumElements(); +  unsigned OutNumElts = OutVT.getVectorNumElements(); +  unsigned NumInPerOut = InNumElts / OutNumElts; + +  SDValue ZeroVec = +    DAG.getSplatVector(InVT, DL, DAG.getConstant(0, DL, InVT.getScalarType())); + +  SmallVector<int, 16> Mask(InNumElts); +  unsigned ZeroVecElt = InNumElts; +  for (unsigned PackedElt = 0; PackedElt < OutNumElts; PackedElt++) { +    unsigned MaskElt = PackedElt * NumInPerOut; +    unsigned End = MaskElt + NumInPerOut - 1; +    for (; MaskElt < End; MaskElt++) +      Mask[MaskElt] = ZeroVecElt++; +    Mask[MaskElt] = PackedElt; +  } +  SDValue Shuf = DAG.getVectorShuffle(InVT, DL, PackedOp, ZeroVec, Mask); +  return DAG.getNode(ISD::BITCAST, DL, OutVT, Shuf); +} +  SDValue SystemZTargetLowering::lowerShift(SDValue Op, SelectionDAG &DAG,                                            unsigned ByScalar) const {    // Look for cases where a vector shift can use the *_BY_SCALAR form. @@ -5195,9 +5446,9 @@ SDValue SystemZTargetLowering::LowerOperation(SDValue Op,    case ISD::EXTRACT_VECTOR_ELT:      return lowerEXTRACT_VECTOR_ELT(Op, DAG);    case ISD::SIGN_EXTEND_VECTOR_INREG: -    return lowerExtendVectorInreg(Op, DAG, SystemZISD::UNPACK_HIGH); +    return lowerSIGN_EXTEND_VECTOR_INREG(Op, DAG);    case ISD::ZERO_EXTEND_VECTOR_INREG: -    return lowerExtendVectorInreg(Op, DAG, SystemZISD::UNPACKL_HIGH); +    return lowerZERO_EXTEND_VECTOR_INREG(Op, DAG);    case ISD::SHL:      return lowerShift(Op, DAG, SystemZISD::VSHL_BY_SCALAR);    case ISD::SRL: @@ -5315,6 +5566,7 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {      OPCODE(BR_CCMASK);      OPCODE(SELECT_CCMASK);      OPCODE(ADJDYNALLOC); +    OPCODE(PROBED_ALLOCA);      OPCODE(POPCNT);      OPCODE(SMUL_LOHI);      OPCODE(UMUL_LOHI); @@ -6056,6 +6308,32 @@ SDValue SystemZTargetLowering::combineFP_EXTEND(    return SDValue();  } +SDValue SystemZTargetLowering::combineINT_TO_FP( +    SDNode *N, DAGCombinerInfo &DCI) const { +  if (DCI.Level != BeforeLegalizeTypes) +    return SDValue(); +  unsigned Opcode = N->getOpcode(); +  EVT OutVT = N->getValueType(0); +  SelectionDAG &DAG = DCI.DAG; +  SDValue Op = N->getOperand(0); +  unsigned OutScalarBits = OutVT.getScalarSizeInBits(); +  unsigned InScalarBits = Op->getValueType(0).getScalarSizeInBits(); + +  // Insert an extension before type-legalization to avoid scalarization, e.g.: +  // v2f64 = uint_to_fp v2i16 +  // => +  // v2f64 = uint_to_fp (v2i64 zero_extend v2i16) +  if (OutVT.isVector() && OutScalarBits > InScalarBits) { +    MVT ExtVT = MVT::getVectorVT(MVT::getIntegerVT(OutVT.getScalarSizeInBits()), +                                 OutVT.getVectorNumElements()); +    unsigned ExtOpcode = +      (Opcode == ISD::UINT_TO_FP ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND); +    SDValue ExtOp = DAG.getNode(ExtOpcode, SDLoc(N), ExtVT, Op); +    return DAG.getNode(Opcode, SDLoc(N), OutVT, ExtOp); +  } +  return SDValue(); +} +  SDValue SystemZTargetLowering::combineBSWAP(      SDNode *N, DAGCombinerInfo &DCI) const {    SelectionDAG &DAG = DCI.DAG; @@ -6243,15 +6521,7 @@ static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask) {        return false;      // Compute the effective CC mask for the new branch or select. -    switch (CCMask) { -    case SystemZ::CCMASK_CMP_EQ: break; -    case SystemZ::CCMASK_CMP_NE: break; -    case SystemZ::CCMASK_CMP_LT: CCMask = SystemZ::CCMASK_CMP_GT; break; -    case SystemZ::CCMASK_CMP_GT: CCMask = SystemZ::CCMASK_CMP_LT; break; -    case SystemZ::CCMASK_CMP_LE: CCMask = SystemZ::CCMASK_CMP_GE; break; -    case SystemZ::CCMASK_CMP_GE: CCMask = SystemZ::CCMASK_CMP_LE; break; -    default: return false; -    } +    CCMask = SystemZ::reverseCCMask(CCMask);      // Return the updated CCReg link.      CCReg = IPM->getOperand(0); @@ -6367,6 +6637,34 @@ SDValue SystemZTargetLowering::combineIntDIVREM(    return SDValue();  } +SDValue SystemZTargetLowering::combineINTRINSIC( +    SDNode *N, DAGCombinerInfo &DCI) const { +  SelectionDAG &DAG = DCI.DAG; + +  unsigned Id = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); +  switch (Id) { +  // VECTOR LOAD (RIGHTMOST) WITH LENGTH with a length operand of 15 +  // or larger is simply a vector load. +  case Intrinsic::s390_vll: +  case Intrinsic::s390_vlrl: +    if (auto *C = dyn_cast<ConstantSDNode>(N->getOperand(2))) +      if (C->getZExtValue() >= 15) +        return DAG.getLoad(N->getValueType(0), SDLoc(N), N->getOperand(0), +                           N->getOperand(3), MachinePointerInfo()); +    break; +  // Likewise for VECTOR STORE (RIGHTMOST) WITH LENGTH. +  case Intrinsic::s390_vstl: +  case Intrinsic::s390_vstrl: +    if (auto *C = dyn_cast<ConstantSDNode>(N->getOperand(3))) +      if (C->getZExtValue() >= 15) +        return DAG.getStore(N->getOperand(0), SDLoc(N), N->getOperand(2), +                            N->getOperand(4), MachinePointerInfo()); +    break; +  } + +  return SDValue(); +} +  SDValue SystemZTargetLowering::unwrapAddress(SDValue N) const {    if (N->getOpcode() == SystemZISD::PCREL_WRAPPER)      return N->getOperand(0); @@ -6391,6 +6689,8 @@ SDValue SystemZTargetLowering::PerformDAGCombine(SDNode *N,    case ISD::FP_ROUND:           return combineFP_ROUND(N, DCI);    case ISD::STRICT_FP_EXTEND:    case ISD::FP_EXTEND:          return combineFP_EXTEND(N, DCI); +  case ISD::SINT_TO_FP: +  case ISD::UINT_TO_FP:         return combineINT_TO_FP(N, DCI);    case ISD::BSWAP:              return combineBSWAP(N, DCI);    case SystemZISD::BR_CCMASK:   return combineBR_CCMASK(N, DCI);    case SystemZISD::SELECT_CCMASK: return combineSELECT_CCMASK(N, DCI); @@ -6399,6 +6699,8 @@ SDValue SystemZTargetLowering::PerformDAGCombine(SDNode *N,    case ISD::UDIV:    case ISD::SREM:    case ISD::UREM:               return combineIntDIVREM(N, DCI); +  case ISD::INTRINSIC_W_CHAIN: +  case ISD::INTRINSIC_VOID:     return combineINTRINSIC(N, DCI);    }    return SDValue(); @@ -6580,7 +6882,7 @@ SystemZTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,        APInt SrcDemE = getDemandedSrcElements(Op, DemandedElts, 0);        Known = DAG.computeKnownBits(SrcOp, SrcDemE, Depth + 1);        if (IsLogical) { -        Known = Known.zext(BitWidth, true); +        Known = Known.zext(BitWidth);        } else          Known = Known.sext(BitWidth);        break; @@ -6609,7 +6911,7 @@ SystemZTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,    // Known has the width of the source operand(s). Adjust if needed to match    // the passed bitwidth.    if (Known.getBitWidth() != BitWidth) -    Known = Known.zextOrTrunc(BitWidth, false); +    Known = Known.anyextOrTrunc(BitWidth);  }  static unsigned computeNumSignBitsBinOp(SDValue Op, const APInt &DemandedElts, @@ -6690,38 +6992,29 @@ SystemZTargetLowering::ComputeNumSignBitsForTargetNode(    return 1;  } +unsigned +SystemZTargetLowering::getStackProbeSize(MachineFunction &MF) const { +  const TargetFrameLowering *TFI = Subtarget.getFrameLowering(); +  unsigned StackAlign = TFI->getStackAlignment(); +  assert(StackAlign >=1 && isPowerOf2_32(StackAlign) && +         "Unexpected stack alignment"); +  // The default stack probe size is 4096 if the function has no +  // stack-probe-size attribute. +  unsigned StackProbeSize = 4096; +  const Function &Fn = MF.getFunction(); +  if (Fn.hasFnAttribute("stack-probe-size")) +    Fn.getFnAttribute("stack-probe-size") +        .getValueAsString() +        .getAsInteger(0, StackProbeSize); +  // Round down to the stack alignment. +  StackProbeSize &= ~(StackAlign - 1); +  return StackProbeSize ? StackProbeSize : StackAlign; +} +  //===----------------------------------------------------------------------===//  // Custom insertion  //===----------------------------------------------------------------------===// -// Create a new basic block after MBB. -static MachineBasicBlock *emitBlockAfter(MachineBasicBlock *MBB) { -  MachineFunction &MF = *MBB->getParent(); -  MachineBasicBlock *NewMBB = MF.CreateMachineBasicBlock(MBB->getBasicBlock()); -  MF.insert(std::next(MachineFunction::iterator(MBB)), NewMBB); -  return NewMBB; -} - -// Split MBB after MI and return the new block (the one that contains -// instructions after MI). -static MachineBasicBlock *splitBlockAfter(MachineBasicBlock::iterator MI, -                                          MachineBasicBlock *MBB) { -  MachineBasicBlock *NewMBB = emitBlockAfter(MBB); -  NewMBB->splice(NewMBB->begin(), MBB, -                 std::next(MachineBasicBlock::iterator(MI)), MBB->end()); -  NewMBB->transferSuccessorsAndUpdatePHIs(MBB); -  return NewMBB; -} - -// Split MBB before MI and return the new block (the one that contains MI). -static MachineBasicBlock *splitBlockBefore(MachineBasicBlock::iterator MI, -                                           MachineBasicBlock *MBB) { -  MachineBasicBlock *NewMBB = emitBlockAfter(MBB); -  NewMBB->splice(NewMBB->begin(), MBB, MI, MBB->end()); -  NewMBB->transferSuccessorsAndUpdatePHIs(MBB); -  return NewMBB; -} -  // Force base value Base into a register before MI.  Return the register.  static Register forceReg(MachineInstr &MI, MachineOperand &Base,                           const SystemZInstrInfo *TII) { @@ -6859,8 +7152,6 @@ SystemZTargetLowering::emitSelect(MachineInstr &MI,    for (MachineBasicBlock::iterator NextMIIt =           std::next(MachineBasicBlock::iterator(MI));         NextMIIt != MBB->end(); ++NextMIIt) { -    if (NextMIIt->definesRegister(SystemZ::CC)) -      break;      if (isSelectPseudo(*NextMIIt)) {        assert(NextMIIt->getOperand(3).getImm() == CCValid &&               "Bad CCValid operands since CC was not redefined."); @@ -6871,6 +7162,9 @@ SystemZTargetLowering::emitSelect(MachineInstr &MI,        }        break;      } +    if (NextMIIt->definesRegister(SystemZ::CC) || +        NextMIIt->usesCustomInsertionHook()) +      break;      bool User = false;      for (auto SelMI : Selects)        if (NextMIIt->readsVirtualRegister(SelMI->getOperand(0).getReg())) { @@ -6891,8 +7185,8 @@ SystemZTargetLowering::emitSelect(MachineInstr &MI,    bool CCKilled =        (LastMI->killsRegister(SystemZ::CC) || checkCCKill(*LastMI, MBB));    MachineBasicBlock *StartMBB = MBB; -  MachineBasicBlock *JoinMBB  = splitBlockAfter(LastMI, MBB); -  MachineBasicBlock *FalseMBB = emitBlockAfter(StartMBB); +  MachineBasicBlock *JoinMBB  = SystemZ::splitBlockAfter(LastMI, MBB); +  MachineBasicBlock *FalseMBB = SystemZ::emitBlockAfter(StartMBB);    // Unless CC was killed in the last Select instruction, mark it as    // live-in to both FalseMBB and JoinMBB. @@ -6985,8 +7279,8 @@ MachineBasicBlock *SystemZTargetLowering::emitCondStore(MachineInstr &MI,      CCMask ^= CCValid;    MachineBasicBlock *StartMBB = MBB; -  MachineBasicBlock *JoinMBB  = splitBlockBefore(MI, MBB); -  MachineBasicBlock *FalseMBB = emitBlockAfter(StartMBB); +  MachineBasicBlock *JoinMBB  = SystemZ::splitBlockBefore(MI, MBB); +  MachineBasicBlock *FalseMBB = SystemZ::emitBlockAfter(StartMBB);    // Unless CC was killed in the CondStore instruction, mark it as    // live-in to both FalseMBB and JoinMBB. @@ -7069,8 +7363,8 @@ MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadBinary(    // Insert a basic block for the main loop.    MachineBasicBlock *StartMBB = MBB; -  MachineBasicBlock *DoneMBB  = splitBlockBefore(MI, MBB); -  MachineBasicBlock *LoopMBB  = emitBlockAfter(StartMBB); +  MachineBasicBlock *DoneMBB  = SystemZ::splitBlockBefore(MI, MBB); +  MachineBasicBlock *LoopMBB  = SystemZ::emitBlockAfter(StartMBB);    //  StartMBB:    //   ... @@ -7187,10 +7481,10 @@ MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadMinMax(    // Insert 3 basic blocks for the loop.    MachineBasicBlock *StartMBB  = MBB; -  MachineBasicBlock *DoneMBB   = splitBlockBefore(MI, MBB); -  MachineBasicBlock *LoopMBB   = emitBlockAfter(StartMBB); -  MachineBasicBlock *UseAltMBB = emitBlockAfter(LoopMBB); -  MachineBasicBlock *UpdateMBB = emitBlockAfter(UseAltMBB); +  MachineBasicBlock *DoneMBB   = SystemZ::splitBlockBefore(MI, MBB); +  MachineBasicBlock *LoopMBB   = SystemZ::emitBlockAfter(StartMBB); +  MachineBasicBlock *UseAltMBB = SystemZ::emitBlockAfter(LoopMBB); +  MachineBasicBlock *UpdateMBB = SystemZ::emitBlockAfter(UseAltMBB);    //  StartMBB:    //   ... @@ -7298,9 +7592,9 @@ SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr &MI,    // Insert 2 basic blocks for the loop.    MachineBasicBlock *StartMBB = MBB; -  MachineBasicBlock *DoneMBB  = splitBlockBefore(MI, MBB); -  MachineBasicBlock *LoopMBB  = emitBlockAfter(StartMBB); -  MachineBasicBlock *SetMBB   = emitBlockAfter(LoopMBB); +  MachineBasicBlock *DoneMBB  = SystemZ::splitBlockBefore(MI, MBB); +  MachineBasicBlock *LoopMBB  = SystemZ::emitBlockAfter(StartMBB); +  MachineBasicBlock *SetMBB   = SystemZ::emitBlockAfter(LoopMBB);    //  StartMBB:    //   ... @@ -7460,7 +7754,7 @@ MachineBasicBlock *SystemZTargetLowering::emitMemMemWrapper(    // When generating more than one CLC, all but the last will need to    // branch to the end when a difference is found.    MachineBasicBlock *EndMBB = (Length > 256 && Opcode == SystemZ::CLC ? -                               splitBlockAfter(MI, MBB) : nullptr); +                               SystemZ::splitBlockAfter(MI, MBB) : nullptr);    // Check for the loop form, in which operand 5 is the trip count.    if (MI.getNumExplicitOperands() > 5) { @@ -7484,9 +7778,10 @@ MachineBasicBlock *SystemZTargetLowering::emitMemMemWrapper(      Register NextCountReg = MRI.createVirtualRegister(RC);      MachineBasicBlock *StartMBB = MBB; -    MachineBasicBlock *DoneMBB = splitBlockBefore(MI, MBB); -    MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB); -    MachineBasicBlock *NextMBB = (EndMBB ? emitBlockAfter(LoopMBB) : LoopMBB); +    MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB); +    MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB); +    MachineBasicBlock *NextMBB = +        (EndMBB ? SystemZ::emitBlockAfter(LoopMBB) : LoopMBB);      //  StartMBB:      //   # fall through to LoopMMB @@ -7602,7 +7897,7 @@ MachineBasicBlock *SystemZTargetLowering::emitMemMemWrapper(      // If there's another CLC to go, branch to the end if a difference      // was found.      if (EndMBB && Length > 0) { -      MachineBasicBlock *NextMBB = splitBlockBefore(MI, MBB); +      MachineBasicBlock *NextMBB = SystemZ::splitBlockBefore(MI, MBB);        BuildMI(MBB, DL, TII->get(SystemZ::BRC))          .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_NE)          .addMBB(EndMBB); @@ -7642,8 +7937,8 @@ MachineBasicBlock *SystemZTargetLowering::emitStringWrapper(    uint64_t End2Reg  = MRI.createVirtualRegister(RC);    MachineBasicBlock *StartMBB = MBB; -  MachineBasicBlock *DoneMBB = splitBlockBefore(MI, MBB); -  MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB); +  MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB); +  MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);    //  StartMBB:    //   # fall through to LoopMMB @@ -7754,6 +8049,97 @@ MachineBasicBlock *SystemZTargetLowering::emitLoadAndTestCmp0(    return MBB;  } +MachineBasicBlock *SystemZTargetLowering::emitProbedAlloca( +    MachineInstr &MI, MachineBasicBlock *MBB) const { +  MachineFunction &MF = *MBB->getParent(); +  MachineRegisterInfo *MRI = &MF.getRegInfo(); +  const SystemZInstrInfo *TII = +      static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo()); +  DebugLoc DL = MI.getDebugLoc(); +  const unsigned ProbeSize = getStackProbeSize(MF); +  Register DstReg = MI.getOperand(0).getReg(); +  Register SizeReg = MI.getOperand(2).getReg(); + +  MachineBasicBlock *StartMBB = MBB; +  MachineBasicBlock *DoneMBB  = SystemZ::splitBlockAfter(MI, MBB); +  MachineBasicBlock *LoopTestMBB  = SystemZ::emitBlockAfter(StartMBB); +  MachineBasicBlock *LoopBodyMBB = SystemZ::emitBlockAfter(LoopTestMBB); +  MachineBasicBlock *TailTestMBB = SystemZ::emitBlockAfter(LoopBodyMBB); +  MachineBasicBlock *TailMBB = SystemZ::emitBlockAfter(TailTestMBB); + +  MachineMemOperand *VolLdMMO = MF.getMachineMemOperand(MachinePointerInfo(), +    MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad, 8, Align(1)); + +  Register PHIReg = MRI->createVirtualRegister(&SystemZ::ADDR64BitRegClass); +  Register IncReg = MRI->createVirtualRegister(&SystemZ::ADDR64BitRegClass); + +  //  LoopTestMBB +  //  BRC TailTestMBB +  //  # fallthrough to LoopBodyMBB +  StartMBB->addSuccessor(LoopTestMBB); +  MBB = LoopTestMBB; +  BuildMI(MBB, DL, TII->get(SystemZ::PHI), PHIReg) +    .addReg(SizeReg) +    .addMBB(StartMBB) +    .addReg(IncReg) +    .addMBB(LoopBodyMBB); +  BuildMI(MBB, DL, TII->get(SystemZ::CLGFI)) +    .addReg(PHIReg) +    .addImm(ProbeSize); +  BuildMI(MBB, DL, TII->get(SystemZ::BRC)) +    .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_LT) +    .addMBB(TailTestMBB); +  MBB->addSuccessor(LoopBodyMBB); +  MBB->addSuccessor(TailTestMBB); + +  //  LoopBodyMBB: Allocate and probe by means of a volatile compare. +  //  J LoopTestMBB +  MBB = LoopBodyMBB; +  BuildMI(MBB, DL, TII->get(SystemZ::SLGFI), IncReg) +    .addReg(PHIReg) +    .addImm(ProbeSize); +  BuildMI(MBB, DL, TII->get(SystemZ::SLGFI), SystemZ::R15D) +    .addReg(SystemZ::R15D) +    .addImm(ProbeSize); +  BuildMI(MBB, DL, TII->get(SystemZ::CG)).addReg(SystemZ::R15D) +    .addReg(SystemZ::R15D).addImm(ProbeSize - 8).addReg(0) +    .setMemRefs(VolLdMMO); +  BuildMI(MBB, DL, TII->get(SystemZ::J)).addMBB(LoopTestMBB); +  MBB->addSuccessor(LoopTestMBB); + +  //  TailTestMBB +  //  BRC DoneMBB +  //  # fallthrough to TailMBB +  MBB = TailTestMBB; +  BuildMI(MBB, DL, TII->get(SystemZ::CGHI)) +    .addReg(PHIReg) +    .addImm(0); +  BuildMI(MBB, DL, TII->get(SystemZ::BRC)) +    .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_EQ) +    .addMBB(DoneMBB); +  MBB->addSuccessor(TailMBB); +  MBB->addSuccessor(DoneMBB); + +  //  TailMBB +  //  # fallthrough to DoneMBB +  MBB = TailMBB; +  BuildMI(MBB, DL, TII->get(SystemZ::SLGR), SystemZ::R15D) +    .addReg(SystemZ::R15D) +    .addReg(PHIReg); +  BuildMI(MBB, DL, TII->get(SystemZ::CG)).addReg(SystemZ::R15D) +    .addReg(SystemZ::R15D).addImm(-8).addReg(PHIReg) +    .setMemRefs(VolLdMMO); +  MBB->addSuccessor(DoneMBB); + +  //  DoneMBB +  MBB = DoneMBB; +  BuildMI(*MBB, MBB->begin(), DL, TII->get(TargetOpcode::COPY), DstReg) +    .addReg(SystemZ::R15D); + +  MI.eraseFromParent(); +  return DoneMBB; +} +  MachineBasicBlock *SystemZTargetLowering::EmitInstrWithCustomInserter(      MachineInstr &MI, MachineBasicBlock *MBB) const {    switch (MI.getOpcode()) { @@ -8014,6 +8400,9 @@ MachineBasicBlock *SystemZTargetLowering::EmitInstrWithCustomInserter(    case SystemZ::LTXBRCompare_VecPseudo:      return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTXBR); +  case SystemZ::PROBED_ALLOCA: +    return emitProbedAlloca(MI, MBB); +    case TargetOpcode::STACKMAP:    case TargetOpcode::PATCHPOINT:      return emitPatchPoint(MI, MBB); diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/llvm/lib/Target/SystemZ/SystemZISelLowering.h index defcaa6eb6eb..27637762296a 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.h +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.h @@ -83,6 +83,10 @@ enum NodeType : unsigned {    // base of the dynamically-allocatable area.    ADJDYNALLOC, +  // For allocating stack space when using stack clash protector. +  // Allocation is performed by block, and each block is probed. +  PROBED_ALLOCA, +    // Count number of bits set in operand 0 per byte.    POPCNT, @@ -393,6 +397,8 @@ public:    explicit SystemZTargetLowering(const TargetMachine &TM,                                   const SystemZSubtarget &STI); +  bool useSoftFloat() const override; +    // Override TargetLowering.    MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override {      return MVT::i32; @@ -426,6 +432,7 @@ public:                                    EVT VT) const override;    bool isFPImmLegal(const APFloat &Imm, EVT VT,                      bool ForCodeSize) const override; +  bool hasInlineStackProbe(MachineFunction &MF) const override;    bool isLegalICmpImmediate(int64_t Imm) const override;    bool isLegalAddImmediate(int64_t Imm) const override;    bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, @@ -437,6 +444,14 @@ public:                                        bool *Fast) const override;    bool isTruncateFree(Type *, Type *) const override;    bool isTruncateFree(EVT, EVT) const override; + +  bool shouldFormOverflowOp(unsigned Opcode, EVT VT, +                            bool MathUsed) const override { +    // Form add and sub with overflow intrinsics regardless of any extra +    // users of the math result. +    return VT == MVT::i32 || VT == MVT::i64; +  } +    const char *getTargetNodeName(unsigned Opcode) const override;    std::pair<unsigned, const TargetRegisterClass *>    getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, @@ -471,16 +486,19 @@ public:      return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);    } +  Register getRegisterByName(const char *RegName, LLT VT, +                             const MachineFunction &MF) const override; +    /// If a physical register, this returns the register that receives the    /// exception address on entry to an EH pad. -  unsigned +  Register    getExceptionPointerRegister(const Constant *PersonalityFn) const override {      return SystemZ::R6D;    }    /// If a physical register, this returns the register that receives the    /// exception typeid on entry to a landing pad. -  unsigned +  Register    getExceptionSelectorRegister(const Constant *PersonalityFn) const override {      return SystemZ::R7D;    } @@ -543,6 +561,8 @@ public:      return true;    } +  unsigned getStackProbeSize(MachineFunction &MF) const; +  private:    const SystemZSubtarget &Subtarget; @@ -607,8 +627,8 @@ private:    SDValue lowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;    SDValue lowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;    SDValue lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; -  SDValue lowerExtendVectorInreg(SDValue Op, SelectionDAG &DAG, -                                 unsigned UnpackHigh) const; +  SDValue lowerSIGN_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const; +  SDValue lowerZERO_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const;    SDValue lowerShift(SDValue Op, SelectionDAG &DAG, unsigned ByScalar) const;    bool canTreatAsByteVector(EVT VT) const; @@ -629,11 +649,13 @@ private:    SDValue combineJOIN_DWORDS(SDNode *N, DAGCombinerInfo &DCI) const;    SDValue combineFP_ROUND(SDNode *N, DAGCombinerInfo &DCI) const;    SDValue combineFP_EXTEND(SDNode *N, DAGCombinerInfo &DCI) const; +  SDValue combineINT_TO_FP(SDNode *N, DAGCombinerInfo &DCI) const;    SDValue combineBSWAP(SDNode *N, DAGCombinerInfo &DCI) const;    SDValue combineBR_CCMASK(SDNode *N, DAGCombinerInfo &DCI) const;    SDValue combineSELECT_CCMASK(SDNode *N, DAGCombinerInfo &DCI) const;    SDValue combineGET_CCMASK(SDNode *N, DAGCombinerInfo &DCI) const;    SDValue combineIntDIVREM(SDNode *N, DAGCombinerInfo &DCI) const; +  SDValue combineINTRINSIC(SDNode *N, DAGCombinerInfo &DCI) const;    SDValue unwrapAddress(SDValue N) const override; @@ -676,8 +698,11 @@ private:    MachineBasicBlock *emitLoadAndTestCmp0(MachineInstr &MI,                                           MachineBasicBlock *MBB,                                           unsigned Opcode) const; +  MachineBasicBlock *emitProbedAlloca(MachineInstr &MI, +                                      MachineBasicBlock *MBB) const; -  MachineMemOperand::Flags getMMOFlags(const Instruction &I) const override; +  MachineMemOperand::Flags +  getTargetMMOFlags(const Instruction &I) const override;    const TargetRegisterClass *getRepRegClassFor(MVT VT) const override;  }; diff --git a/llvm/lib/Target/SystemZ/SystemZInstrBuilder.h b/llvm/lib/Target/SystemZ/SystemZInstrBuilder.h index ec7639e71f81..9fc786f92635 100644 --- a/llvm/lib/Target/SystemZ/SystemZInstrBuilder.h +++ b/llvm/lib/Target/SystemZ/SystemZInstrBuilder.h @@ -17,7 +17,6 @@  #include "llvm/CodeGen/MachineFrameInfo.h"  #include "llvm/CodeGen/MachineInstrBuilder.h"  #include "llvm/CodeGen/MachineMemOperand.h" -#include "llvm/CodeGen/PseudoSourceValue.h"  namespace llvm { @@ -36,7 +35,7 @@ addFrameReference(const MachineInstrBuilder &MIB, int FI) {    int64_t Offset = 0;    MachineMemOperand *MMO = MF.getMachineMemOperand(        MachinePointerInfo::getFixedStack(MF, FI, Offset), Flags, -      MFFrame.getObjectSize(FI), MFFrame.getObjectAlignment(FI)); +      MFFrame.getObjectSize(FI), MFFrame.getObjectAlign(FI));    return MIB.addFrameIndex(FI).addImm(Offset).addReg(0).addMemOperand(MMO);  } diff --git a/llvm/lib/Target/SystemZ/SystemZInstrFP.td b/llvm/lib/Target/SystemZ/SystemZInstrFP.td index 6d03274fe8a6..337164d55e5f 100644 --- a/llvm/lib/Target/SystemZ/SystemZInstrFP.td +++ b/llvm/lib/Target/SystemZ/SystemZInstrFP.td @@ -438,8 +438,8 @@ let Uses = [FPC], mayRaiseFPException = 1,      def ADBR : BinaryRRE<"adbr", 0xB31A, any_fadd, FP64,  FP64>;      def AXBR : BinaryRRE<"axbr", 0xB34A, any_fadd, FP128, FP128>;    } -  def AEB : BinaryRXE<"aeb", 0xED0A, any_fadd, FP32, load, 4>; -  def ADB : BinaryRXE<"adb", 0xED1A, any_fadd, FP64, load, 8>; +  defm AEB : BinaryRXEAndPseudo<"aeb", 0xED0A, any_fadd, FP32, load, 4>; +  defm ADB : BinaryRXEAndPseudo<"adb", 0xED1A, any_fadd, FP64, load, 8>;  }  // Subtraction. @@ -449,8 +449,8 @@ let Uses = [FPC], mayRaiseFPException = 1,    def SDBR : BinaryRRE<"sdbr", 0xB31B, any_fsub, FP64,  FP64>;    def SXBR : BinaryRRE<"sxbr", 0xB34B, any_fsub, FP128, FP128>; -  def SEB : BinaryRXE<"seb",  0xED0B, any_fsub, FP32, load, 4>; -  def SDB : BinaryRXE<"sdb",  0xED1B, any_fsub, FP64, load, 8>; +  defm SEB : BinaryRXEAndPseudo<"seb",  0xED0B, any_fsub, FP32, load, 4>; +  defm SDB : BinaryRXEAndPseudo<"sdb",  0xED1B, any_fsub, FP64, load, 8>;  }  // Multiplication. @@ -460,8 +460,8 @@ let Uses = [FPC], mayRaiseFPException = 1 in {      def MDBR  : BinaryRRE<"mdbr",  0xB31C, any_fmul, FP64,  FP64>;      def MXBR  : BinaryRRE<"mxbr",  0xB34C, any_fmul, FP128, FP128>;    } -  def MEEB : BinaryRXE<"meeb", 0xED17, any_fmul, FP32, load, 4>; -  def MDB  : BinaryRXE<"mdb",  0xED1C, any_fmul, FP64, load, 8>; +  defm MEEB : BinaryRXEAndPseudo<"meeb", 0xED17, any_fmul, FP32, load, 4>; +  defm MDB  : BinaryRXEAndPseudo<"mdb",  0xED1C, any_fmul, FP64, load, 8>;  }  // f64 multiplication of two FP32 registers. @@ -503,8 +503,8 @@ let Uses = [FPC], mayRaiseFPException = 1 in {    def MAEBR : TernaryRRD<"maebr", 0xB30E, z_any_fma, FP32, FP32>;    def MADBR : TernaryRRD<"madbr", 0xB31E, z_any_fma, FP64, FP64>; -  def MAEB : TernaryRXF<"maeb", 0xED0E, z_any_fma, FP32, FP32, load, 4>; -  def MADB : TernaryRXF<"madb", 0xED1E, z_any_fma, FP64, FP64, load, 8>; +  defm MAEB : TernaryRXFAndPseudo<"maeb", 0xED0E, z_any_fma, FP32, FP32, load, 4>; +  defm MADB : TernaryRXFAndPseudo<"madb", 0xED1E, z_any_fma, FP64, FP64, load, 8>;  }  // Fused multiply-subtract. @@ -512,8 +512,8 @@ let Uses = [FPC], mayRaiseFPException = 1 in {    def MSEBR : TernaryRRD<"msebr", 0xB30F, z_any_fms, FP32, FP32>;    def MSDBR : TernaryRRD<"msdbr", 0xB31F, z_any_fms, FP64, FP64>; -  def MSEB : TernaryRXF<"mseb", 0xED0F, z_any_fms, FP32, FP32, load, 4>; -  def MSDB : TernaryRXF<"msdb", 0xED1F, z_any_fms, FP64, FP64, load, 8>; +  defm MSEB : TernaryRXFAndPseudo<"mseb", 0xED0F, z_any_fms, FP32, FP32, load, 4>; +  defm MSDB : TernaryRXFAndPseudo<"msdb", 0xED1F, z_any_fms, FP64, FP64, load, 8>;  }  // Division. @@ -522,8 +522,8 @@ let Uses = [FPC], mayRaiseFPException = 1 in {    def DDBR : BinaryRRE<"ddbr", 0xB31D, any_fdiv, FP64,  FP64>;    def DXBR : BinaryRRE<"dxbr", 0xB34D, any_fdiv, FP128, FP128>; -  def DEB : BinaryRXE<"deb", 0xED0D, any_fdiv, FP32, load, 4>; -  def DDB : BinaryRXE<"ddb", 0xED1D, any_fdiv, FP64, load, 8>; +  defm DEB : BinaryRXEAndPseudo<"deb", 0xED0D, any_fdiv, FP32, load, 4>; +  defm DDB : BinaryRXEAndPseudo<"ddb", 0xED1D, any_fdiv, FP64, load, 8>;  }  // Divide to integer. diff --git a/llvm/lib/Target/SystemZ/SystemZInstrFormats.td b/llvm/lib/Target/SystemZ/SystemZInstrFormats.td index f064d33ac2f3..50f1e09c6ee5 100644 --- a/llvm/lib/Target/SystemZ/SystemZInstrFormats.td +++ b/llvm/lib/Target/SystemZ/SystemZInstrFormats.td @@ -2334,49 +2334,49 @@ class FixedCmpBranchRSYb<CondVariant V, string mnemonic, bits<16> opcode,  class BranchUnaryRI<string mnemonic, bits<12> opcode, RegisterOperand cls>    : InstRIb<opcode, (outs cls:$R1), (ins cls:$R1src, brtarget16:$RI2), -            mnemonic##"\t$R1, $RI2", []> { +            mnemonic#"\t$R1, $RI2", []> {    let Constraints = "$R1 = $R1src";    let DisableEncoding = "$R1src";  }  class BranchUnaryRIL<string mnemonic, bits<12> opcode, RegisterOperand cls>    : InstRILb<opcode, (outs cls:$R1), (ins cls:$R1src, brtarget32:$RI2), -             mnemonic##"\t$R1, $RI2", []> { +             mnemonic#"\t$R1, $RI2", []> {    let Constraints = "$R1 = $R1src";    let DisableEncoding = "$R1src";  }  class BranchUnaryRR<string mnemonic, bits<8> opcode, RegisterOperand cls>    : InstRR<opcode, (outs cls:$R1), (ins cls:$R1src, GR64:$R2), -           mnemonic##"\t$R1, $R2", []> { +           mnemonic#"\t$R1, $R2", []> {    let Constraints = "$R1 = $R1src";    let DisableEncoding = "$R1src";  }  class BranchUnaryRRE<string mnemonic, bits<16> opcode, RegisterOperand cls>    : InstRRE<opcode, (outs cls:$R1), (ins cls:$R1src, GR64:$R2), -            mnemonic##"\t$R1, $R2", []> { +            mnemonic#"\t$R1, $R2", []> {    let Constraints = "$R1 = $R1src";    let DisableEncoding = "$R1src";  }  class BranchUnaryRX<string mnemonic, bits<8> opcode, RegisterOperand cls>    : InstRXa<opcode, (outs cls:$R1), (ins cls:$R1src, bdxaddr12only:$XBD2), -            mnemonic##"\t$R1, $XBD2", []> { +            mnemonic#"\t$R1, $XBD2", []> {    let Constraints = "$R1 = $R1src";    let DisableEncoding = "$R1src";  }  class BranchUnaryRXY<string mnemonic, bits<16> opcode, RegisterOperand cls>    : InstRXYa<opcode, (outs cls:$R1), (ins cls:$R1src, bdxaddr20only:$XBD2), -             mnemonic##"\t$R1, $XBD2", []> { +             mnemonic#"\t$R1, $XBD2", []> {    let Constraints = "$R1 = $R1src";    let DisableEncoding = "$R1src";  }  class BranchBinaryRSI<string mnemonic, bits<8> opcode, RegisterOperand cls>    : InstRSI<opcode, (outs cls:$R1), (ins cls:$R1src, cls:$R3, brtarget16:$RI2), -            mnemonic##"\t$R1, $R3, $RI2", []> { +            mnemonic#"\t$R1, $R3, $RI2", []> {    let Constraints = "$R1 = $R1src";    let DisableEncoding = "$R1src";  } @@ -2384,7 +2384,7 @@ class BranchBinaryRSI<string mnemonic, bits<8> opcode, RegisterOperand cls>  class BranchBinaryRIEe<string mnemonic, bits<16> opcode, RegisterOperand cls>    : InstRIEe<opcode, (outs cls:$R1),               (ins cls:$R1src, cls:$R3, brtarget16:$RI2), -             mnemonic##"\t$R1, $R3, $RI2", []> { +             mnemonic#"\t$R1, $R3, $RI2", []> {    let Constraints = "$R1 = $R1src";    let DisableEncoding = "$R1src";  } @@ -2392,7 +2392,7 @@ class BranchBinaryRIEe<string mnemonic, bits<16> opcode, RegisterOperand cls>  class BranchBinaryRS<string mnemonic, bits<8> opcode, RegisterOperand cls>    : InstRSa<opcode, (outs cls:$R1),              (ins cls:$R1src, cls:$R3, bdaddr12only:$BD2), -            mnemonic##"\t$R1, $R3, $BD2", []> { +            mnemonic#"\t$R1, $R3, $BD2", []> {    let Constraints = "$R1 = $R1src";    let DisableEncoding = "$R1src";  } @@ -2400,7 +2400,7 @@ class BranchBinaryRS<string mnemonic, bits<8> opcode, RegisterOperand cls>  class BranchBinaryRSY<string mnemonic, bits<16> opcode, RegisterOperand cls>    : InstRSYa<opcode,               (outs cls:$R1), (ins cls:$R1src, cls:$R3, bdaddr20only:$BD2), -             mnemonic##"\t$R1, $R3, $BD2", []> { +             mnemonic#"\t$R1, $R3, $BD2", []> {    let Constraints = "$R1 = $R1src";    let DisableEncoding = "$R1src";  } @@ -2421,7 +2421,7 @@ class LoadMultipleRSY<string mnemonic, bits<16> opcode, RegisterOperand cls,  multiclass LoadMultipleRSPair<string mnemonic, bits<8> rsOpcode,                                bits<16> rsyOpcode, RegisterOperand cls> { -  let DispKey = mnemonic ## #cls in { +  let DispKey = mnemonic # cls in {      let DispSize = "12" in        def "" : LoadMultipleRS<mnemonic, rsOpcode, cls, bdaddr12pair>;      let DispSize = "20" in @@ -2487,7 +2487,7 @@ class StoreRXY<string mnemonic, bits<16> opcode, SDPatternOperator operator,  multiclass StoreRXPair<string mnemonic, bits<8> rxOpcode, bits<16> rxyOpcode,                         SDPatternOperator operator, RegisterOperand cls,                         bits<5> bytes> { -  let DispKey = mnemonic ## #cls in { +  let DispKey = mnemonic # cls in {      let DispSize = "12" in        def "" : StoreRX<mnemonic, rxOpcode, operator, cls, bytes, bdxaddr12pair>;      let DispSize = "20" in @@ -2567,7 +2567,7 @@ class StoreMultipleRSY<string mnemonic, bits<16> opcode, RegisterOperand cls,  multiclass StoreMultipleRSPair<string mnemonic, bits<8> rsOpcode,                                 bits<16> rsyOpcode, RegisterOperand cls> { -  let DispKey = mnemonic ## #cls in { +  let DispKey = mnemonic # cls in {      let DispSize = "12" in        def "" : StoreMultipleRS<mnemonic, rsOpcode, cls, bdaddr12pair>;      let DispSize = "20" in @@ -2807,6 +2807,10 @@ class CondUnaryRSY<string mnemonic, bits<16> opcode,    let mayLoad = 1;    let AccessBytes = bytes;    let CCMaskLast = 1; +  let OpKey = mnemonic#"r"#cls; +  let OpType = "mem"; +  let MemKey = mnemonic#cls; +  let MemType = "target";  }  // Like CondUnaryRSY, but used for the raw assembly form.  The condition-code @@ -2884,7 +2888,7 @@ class UnaryRXY<string mnemonic, bits<16> opcode, SDPatternOperator operator,  multiclass UnaryRXPair<string mnemonic, bits<8> rxOpcode, bits<16> rxyOpcode,                         SDPatternOperator operator, RegisterOperand cls,                         bits<5> bytes> { -  let DispKey = mnemonic ## #cls in { +  let DispKey = mnemonic # cls in {      let DispSize = "12" in        def "" : UnaryRX<mnemonic, rxOpcode, operator, cls, bytes, bdxaddr12pair>;      let DispSize = "20" in @@ -2907,13 +2911,15 @@ class UnaryVRIaGeneric<string mnemonic, bits<16> opcode, ImmOpWithPattern imm>  class UnaryVRRa<string mnemonic, bits<16> opcode, SDPatternOperator operator,                  TypedReg tr1, TypedReg tr2, bits<4> type = 0, bits<4> m4 = 0, -                bits<4> m5 = 0> +                bits<4> m5 = 0, string fp_mnemonic = "">    : InstVRRa<opcode, (outs tr1.op:$V1), (ins tr2.op:$V2),               mnemonic#"\t$V1, $V2",               [(set (tr1.vt tr1.op:$V1), (operator (tr2.vt tr2.op:$V2)))]> {    let M3 = type;    let M4 = m4;    let M5 = m5; +  let OpKey = fp_mnemonic#!subst("VR", "FP", !cast<string>(tr1.op)); +  let OpType = "reg";  }  class UnaryVRRaGeneric<string mnemonic, bits<16> opcode, bits<4> m4 = 0, @@ -2948,7 +2954,7 @@ multiclass UnaryExtraVRRaSPair<string mnemonic, bits<16> opcode,    def : InstAlias<mnemonic#"\t$V1, $V2",                    (!cast<Instruction>(NAME) tr1.op:$V1, tr2.op:$V2, 0)>;    let Defs = [CC] in -    def S : UnaryVRRa<mnemonic##"s", opcode, operator_cc, tr1, tr2, +    def S : UnaryVRRa<mnemonic#"s", opcode, operator_cc, tr1, tr2,                        type, 0, 1>;  } @@ -2992,17 +2998,17 @@ multiclass UnaryVRXAlign<string mnemonic, bits<16> opcode> {  class SideEffectBinaryRX<string mnemonic, bits<8> opcode,                           RegisterOperand cls>    : InstRXa<opcode, (outs), (ins cls:$R1, bdxaddr12only:$XBD2), -            mnemonic##"\t$R1, $XBD2", []>; +            mnemonic#"\t$R1, $XBD2", []>;  class SideEffectBinaryRXY<string mnemonic, bits<16> opcode,                            RegisterOperand cls>    : InstRXYa<opcode, (outs), (ins cls:$R1, bdxaddr20only:$XBD2), -             mnemonic##"\t$R1, $XBD2", []>; +             mnemonic#"\t$R1, $XBD2", []>;  class SideEffectBinaryRILPC<string mnemonic, bits<12> opcode,                              RegisterOperand cls>    : InstRILb<opcode, (outs), (ins cls:$R1, pcrel32:$RI2), -             mnemonic##"\t$R1, $RI2", []> { +             mnemonic#"\t$R1, $RI2", []> {    // We want PC-relative addresses to be tried ahead of BD and BDX addresses.    // However, BDXs have two extra operands and are therefore 6 units more    // complex. @@ -3045,16 +3051,16 @@ class SideEffectBinarySIL<string mnemonic, bits<16> opcode,  class SideEffectBinarySSa<string mnemonic, bits<8> opcode>    : InstSSa<opcode, (outs), (ins bdladdr12onlylen8:$BDL1, bdaddr12only:$BD2), -            mnemonic##"\t$BDL1, $BD2", []>; +            mnemonic#"\t$BDL1, $BD2", []>;  class SideEffectBinarySSb<string mnemonic, bits<8> opcode>    : InstSSb<opcode,              (outs), (ins bdladdr12onlylen4:$BDL1, bdladdr12onlylen4:$BDL2), -            mnemonic##"\t$BDL1, $BDL2", []>; +            mnemonic#"\t$BDL1, $BDL2", []>;  class SideEffectBinarySSf<string mnemonic, bits<8> opcode>    : InstSSf<opcode, (outs), (ins bdaddr12only:$BD1, bdladdr12onlylen8:$BDL2), -            mnemonic##"\t$BD1, $BDL2", []>; +            mnemonic#"\t$BD1, $BDL2", []>;  class SideEffectBinarySSE<string mnemonic, bits<16> opcode>    : InstSSE<opcode, (outs), (ins bdaddr12only:$BD1, bdaddr12only:$BD2), @@ -3211,6 +3217,8 @@ class CondBinaryRRF<string mnemonic, bits<16> opcode, RegisterOperand cls1,    let CCMaskLast = 1;    let NumOpsKey = !subst("loc", "sel", mnemonic);    let NumOpsValue = "2"; +  let OpKey = mnemonic#cls1; +  let OpType = "reg";  }  // Like CondBinaryRRF, but used for the raw assembly form.  The condition-code @@ -3252,6 +3260,8 @@ class CondBinaryRRFa<string mnemonic, bits<16> opcode, RegisterOperand cls1,    let CCMaskLast = 1;    let NumOpsKey = mnemonic;    let NumOpsValue = "3"; +  let OpKey = mnemonic#cls1; +  let OpType = "reg";  }  // Like CondBinaryRRFa, but used for the raw assembly form.  The condition-code @@ -3299,7 +3309,7 @@ multiclass BinaryRIAndK<string mnemonic, bits<12> opcode1, bits<16> opcode2,                          ImmOpWithPattern imm> {    let NumOpsKey = mnemonic in {      let NumOpsValue = "3" in -      def K : BinaryRIE<mnemonic##"k", opcode2, operator, cls, imm>, +      def K : BinaryRIE<mnemonic#"k", opcode2, operator, cls, imm>,                Requires<[FeatureDistinctOps]>;      let NumOpsValue = "2" in        def "" : BinaryRI<mnemonic, opcode1, operator, cls, imm>; @@ -3376,7 +3386,7 @@ multiclass BinaryRSAndK<string mnemonic, bits<8> opcode1, bits<16> opcode2,                          SDPatternOperator operator, RegisterOperand cls> {    let NumOpsKey = mnemonic in {      let NumOpsValue = "3" in -      def K  : BinaryRSY<mnemonic##"k", opcode2, operator, cls>, +      def K  : BinaryRSY<mnemonic#"k", opcode2, operator, cls>,                 Requires<[FeatureDistinctOps]>;      let NumOpsValue = "2" in        def "" : BinaryRS<mnemonic, opcode1, operator, cls>; @@ -3448,7 +3458,7 @@ class BinaryRXY<string mnemonic, bits<16> opcode, SDPatternOperator operator,  multiclass BinaryRXPair<string mnemonic, bits<8> rxOpcode, bits<16> rxyOpcode,                          SDPatternOperator operator, RegisterOperand cls,                          SDPatternOperator load, bits<5> bytes> { -  let DispKey = mnemonic ## #cls in { +  let DispKey = mnemonic # cls in {      let DispSize = "12" in        def "" : BinaryRX<mnemonic, rxOpcode, operator, cls, load, bytes,                          bdxaddr12pair>; @@ -3479,7 +3489,7 @@ class BinarySIY<string mnemonic, bits<16> opcode, SDPatternOperator operator,  multiclass BinarySIPair<string mnemonic, bits<8> siOpcode,                          bits<16> siyOpcode, SDPatternOperator operator,                          Operand imm> { -  let DispKey = mnemonic ## #cls in { +  let DispKey = mnemonic # cls in {      let DispSize = "12" in        def "" : BinarySI<mnemonic, siOpcode, operator, imm, bdaddr12pair>;      let DispSize = "20" in @@ -3575,7 +3585,7 @@ multiclass BinaryVRRbSPair<string mnemonic, bits<16> opcode,    def "" : BinaryVRRb<mnemonic, opcode, operator, tr1, tr2, type,                        !and (modifier, 14)>;    let Defs = [CC] in -    def S : BinaryVRRb<mnemonic##"s", opcode, operator_cc, tr1, tr2, type, +    def S : BinaryVRRb<mnemonic#"s", opcode, operator_cc, tr1, tr2, type,                         !add (!and (modifier, 14), 1)>;  } @@ -3604,7 +3614,7 @@ multiclass BinaryExtraVRRbSPair<string mnemonic, bits<16> opcode,                    (!cast<Instruction>(NAME) tr1.op:$V1, tr2.op:$V2,                                              tr2.op:$V3, 0)>;    let Defs = [CC] in -    def S : BinaryVRRb<mnemonic##"s", opcode, operator_cc, tr1, tr2, type, 1>; +    def S : BinaryVRRb<mnemonic#"s", opcode, operator_cc, tr1, tr2, type, 1>;  }  multiclass BinaryExtraVRRbSPairGeneric<string mnemonic, bits<16> opcode> { @@ -3619,7 +3629,7 @@ multiclass BinaryExtraVRRbSPairGeneric<string mnemonic, bits<16> opcode> {  class BinaryVRRc<string mnemonic, bits<16> opcode, SDPatternOperator operator,                   TypedReg tr1, TypedReg tr2, bits<4> type = 0, bits<4> m5 = 0, -                 bits<4> m6 = 0> +                 bits<4> m6 = 0, string fp_mnemonic = "">    : InstVRRc<opcode, (outs tr1.op:$V1), (ins tr2.op:$V2, tr2.op:$V3),               mnemonic#"\t$V1, $V2, $V3",               [(set (tr1.vt tr1.op:$V1), (operator (tr2.vt tr2.op:$V2), @@ -3627,6 +3637,8 @@ class BinaryVRRc<string mnemonic, bits<16> opcode, SDPatternOperator operator,    let M4 = type;    let M5 = m5;    let M6 = m6; +  let OpKey = fp_mnemonic#"MemFold"#!subst("VR", "FP", !cast<string>(tr1.op)); +  let OpType = "reg";  }  class BinaryVRRcGeneric<string mnemonic, bits<16> opcode, bits<4> m5 = 0, @@ -3655,7 +3667,7 @@ multiclass BinaryVRRcSPair<string mnemonic, bits<16> opcode,    def "" : BinaryVRRc<mnemonic, opcode, operator, tr1, tr2, type,                        m5, !and (modifier, 14)>;    let Defs = [CC] in -    def S : BinaryVRRc<mnemonic##"s", opcode, operator_cc, tr1, tr2, type, +    def S : BinaryVRRc<mnemonic#"s", opcode, operator_cc, tr1, tr2, type,                         m5, !add (!and (modifier, 14), 1)>;  } @@ -3752,7 +3764,7 @@ class StoreBinaryRSY<string mnemonic, bits<16> opcode, RegisterOperand cls,  multiclass StoreBinaryRSPair<string mnemonic, bits<8> rsOpcode,                               bits<16> rsyOpcode, RegisterOperand cls,                               bits<5> bytes> { -  let DispKey = mnemonic ## #cls in { +  let DispKey = mnemonic # cls in {      let DispSize = "12" in        def "" : StoreBinaryRS<mnemonic, rsOpcode, cls, bytes, bdaddr12pair>;      let DispSize = "20" in @@ -3892,7 +3904,7 @@ class CompareRXY<string mnemonic, bits<16> opcode, SDPatternOperator operator,  multiclass CompareRXPair<string mnemonic, bits<8> rxOpcode, bits<16> rxyOpcode,                           SDPatternOperator operator, RegisterOperand cls,                           SDPatternOperator load, bits<5> bytes> { -  let DispKey = mnemonic ## #cls in { +  let DispKey = mnemonic # cls in {      let DispSize = "12" in        def "" : CompareRX<mnemonic, rxOpcode, operator, cls,                           load, bytes, bdxaddr12pair>; @@ -3920,7 +3932,7 @@ class CompareRSY<string mnemonic, bits<16> opcode, RegisterOperand cls,  multiclass CompareRSPair<string mnemonic, bits<8> rsOpcode, bits<16> rsyOpcode,                           RegisterOperand cls, bits<5> bytes> { -  let DispKey = mnemonic ## #cls in { +  let DispKey = mnemonic # cls in {      let DispSize = "12" in        def "" : CompareRS<mnemonic, rsOpcode, cls, bytes, bdaddr12pair>;      let DispSize = "20" in @@ -3931,7 +3943,7 @@ multiclass CompareRSPair<string mnemonic, bits<8> rsOpcode, bits<16> rsyOpcode,  class CompareSSb<string mnemonic, bits<8> opcode>    : InstSSb<opcode,              (outs), (ins bdladdr12onlylen4:$BDL1, bdladdr12onlylen4:$BDL2), -            mnemonic##"\t$BDL1, $BDL2", []> { +            mnemonic#"\t$BDL1, $BDL2", []> {    let isCompare = 1;    let mayLoad = 1;  } @@ -3978,7 +3990,7 @@ multiclass CompareSIPair<string mnemonic, bits<8> siOpcode, bits<16> siyOpcode,  }  class CompareVRRa<string mnemonic, bits<16> opcode, SDPatternOperator operator, -                  TypedReg tr, bits<4> type> +                  TypedReg tr, bits<4> type, string fp_mnemonic = "">    : InstVRRa<opcode, (outs), (ins tr.op:$V1, tr.op:$V2),               mnemonic#"\t$V1, $V2",               [(set CC, (operator (tr.vt tr.op:$V1), (tr.vt tr.op:$V2)))]> { @@ -3986,6 +3998,8 @@ class CompareVRRa<string mnemonic, bits<16> opcode, SDPatternOperator operator,    let M3 = type;    let M4 = 0;    let M5 = 0; +  let OpKey = fp_mnemonic#!subst("VR", "FP", !cast<string>(tr.op)); +  let OpType = "reg";  }  class CompareVRRaGeneric<string mnemonic, bits<16> opcode> @@ -4043,7 +4057,7 @@ class TestVRRg<string mnemonic, bits<16> opcode>  class SideEffectTernarySSc<string mnemonic, bits<8> opcode>    : InstSSc<opcode, (outs), (ins bdladdr12onlylen4:$BDL1,                                   shift12only:$BD2, imm32zx4:$I3), -            mnemonic##"\t$BDL1, $BD2, $I3", []>; +            mnemonic#"\t$BDL1, $BD2, $I3", []>;  class SideEffectTernaryRRFa<string mnemonic, bits<16> opcode,                              RegisterOperand cls1, RegisterOperand cls2, @@ -4179,7 +4193,7 @@ class TernaryRSY<string mnemonic, bits<16> opcode, RegisterOperand cls,  multiclass TernaryRSPair<string mnemonic, bits<8> rsOpcode, bits<16> rsyOpcode,                           RegisterOperand cls, bits<5> bytes> { -  let DispKey = mnemonic ## #cls in { +  let DispKey = mnemonic # cls in {      let DispSize = "12" in        def "" : TernaryRS<mnemonic, rsOpcode, cls, bytes, bdaddr12pair>;      let DispSize = "20" in @@ -4303,7 +4317,7 @@ multiclass TernaryOptVRRbSPair<string mnemonic, bits<16> opcode,                    (!cast<Instruction>(NAME) tr1.op:$V1, tr2.op:$V2,                                              tr2.op:$V3, 0)>;    let Defs = [CC] in -    def S : TernaryVRRb<mnemonic##"s", opcode, operator_cc, tr1, tr2, type, +    def S : TernaryVRRb<mnemonic#"s", opcode, operator_cc, tr1, tr2, type,                          imm32zx4even_timm, !add(!and (modifier, 14), 1)>;    def : InstAlias<mnemonic#"s\t$V1, $V2, $V3",                    (!cast<Instruction>(NAME#"S") tr1.op:$V1, tr2.op:$V2, @@ -4371,7 +4385,7 @@ class TernaryVRRdGeneric<string mnemonic, bits<16> opcode>  }  // Ternary operation where the assembler mnemonic has an extra operand to -// optionally allow specifiying arbitrary M6 values. +// optionally allow specifying arbitrary M6 values.  multiclass TernaryExtraVRRd<string mnemonic, bits<16> opcode,                               SDPatternOperator operator,                               TypedReg tr1, TypedReg tr2, bits<4> type> { @@ -4399,7 +4413,8 @@ multiclass TernaryExtraVRRdGeneric<string mnemonic, bits<16> opcode> {  }  class TernaryVRRe<string mnemonic, bits<16> opcode, SDPatternOperator operator, -                  TypedReg tr1, TypedReg tr2, bits<4> m5 = 0, bits<4> type = 0> +                  TypedReg tr1, TypedReg tr2, bits<4> m5 = 0, bits<4> type = 0, +                  string fp_mnemonic = "">    : InstVRRe<opcode, (outs tr1.op:$V1),               (ins tr2.op:$V2, tr2.op:$V3, tr1.op:$V4),               mnemonic#"\t$V1, $V2, $V3, $V4", @@ -4408,6 +4423,8 @@ class TernaryVRRe<string mnemonic, bits<16> opcode, SDPatternOperator operator,                                                    (tr1.vt tr1.op:$V4)))]> {    let M5 = m5;    let M6 = type; +  let OpKey = fp_mnemonic#"MemFold"#!subst("VR", "FP", !cast<string>(tr1.op)); +  let OpType = "reg";  }  class TernaryVRReFloatGeneric<string mnemonic, bits<16> opcode> @@ -4536,7 +4553,7 @@ multiclass QuaternaryOptVRRdSPair<string mnemonic, bits<16> opcode,                    (!cast<Instruction>(NAME) tr1.op:$V1, tr2.op:$V2,                                              tr2.op:$V3, tr2.op:$V4, 0)>;    let Defs = [CC] in -    def S : QuaternaryVRRd<mnemonic##"s", opcode, operator_cc, +    def S : QuaternaryVRRd<mnemonic#"s", opcode, operator_cc,                             tr1, tr2, tr2, tr2, type,                             imm32zx4even_timm, !add (!and (modifier, 14), 1)>;    def : InstAlias<mnemonic#"s\t$V1, $V2, $V3, $V4", @@ -4630,7 +4647,7 @@ class CmpSwapRSY<string mnemonic, bits<16> opcode, SDPatternOperator operator,  multiclass CmpSwapRSPair<string mnemonic, bits<8> rsOpcode, bits<16> rsyOpcode,                           SDPatternOperator operator, RegisterOperand cls> { -  let DispKey = mnemonic ## #cls in { +  let DispKey = mnemonic # cls in {      let DispSize = "12" in        def "" : CmpSwapRS<mnemonic, rsOpcode, operator, cls, bdaddr12pair>;      let DispSize = "20" in @@ -4650,13 +4667,13 @@ class RotateSelectRIEf<string mnemonic, bits<16> opcode, RegisterOperand cls1,  class PrefetchRXY<string mnemonic, bits<16> opcode, SDPatternOperator operator>    : InstRXYb<opcode, (outs), (ins imm32zx4:$M1, bdxaddr20only:$XBD2), -             mnemonic##"\t$M1, $XBD2", +             mnemonic#"\t$M1, $XBD2",               [(operator imm32zx4_timm:$M1, bdxaddr20only:$XBD2)]>;  class PrefetchRILPC<string mnemonic, bits<12> opcode,                      SDPatternOperator operator>    : InstRILc<opcode, (outs), (ins imm32zx4_timm:$M1, pcrel32:$RI2), -             mnemonic##"\t$M1, $RI2", +             mnemonic#"\t$M1, $RI2",               [(operator imm32zx4_timm:$M1, pcrel32:$RI2)]> {    // We want PC-relative addresses to be tried ahead of BD and BDX addresses.    // However, BDXs have two extra operands and are therefore 6 units more @@ -4765,7 +4782,9 @@ multiclass BinaryRIAndKPseudo<string key, SDPatternOperator operator,  class MemFoldPseudo<string mnemonic, RegisterOperand cls, bits<5> bytes,                      AddressingMode mode>    : Pseudo<(outs cls:$R1), (ins cls:$R2, mode:$XBD2), []> { -    let OpKey = mnemonic#"rk"#cls; +    let OpKey = !subst("mscrk", "msrkc", +                !subst("msgcrk", "msgrkc", +                mnemonic#"rk"#cls));      let OpType = "mem";      let MemKey = mnemonic#cls;      let MemType = "pseudo"; @@ -4775,6 +4794,40 @@ class MemFoldPseudo<string mnemonic, RegisterOperand cls, bits<5> bytes,      let hasNoSchedulingInfo = 1;  } +// Same as MemFoldPseudo but for mapping a W... vector instruction +class MemFoldPseudo_FP<string mnemonic, RegisterOperand cls, bits<5> bytes, +                    AddressingMode mode> +  : MemFoldPseudo<mnemonic, cls, bytes, mode> { +    let OpKey = mnemonic#"r"#"MemFold"#cls; +} + +class MemFoldPseudo_FPTern<string mnemonic, RegisterOperand cls, bits<5> bytes, +                           AddressingMode mode> +  : Pseudo<(outs cls:$R1), (ins cls:$R2, cls:$R3, mode:$XBD2), []> { +    let OpKey = mnemonic#"r"#"MemFold"#cls; +    let OpType = "mem"; +    let MemKey = mnemonic#cls; +    let MemType = "pseudo"; +    let mayLoad = 1; +    let AccessBytes = bytes; +    let HasIndex = 1; +    let hasNoSchedulingInfo = 1; +} + +// Same as MemFoldPseudo but for Load On Condition with CC operands. +class MemFoldPseudo_CondMove<string mnemonic, RegisterOperand cls, bits<5> bytes, +                             AddressingMode mode> +  : Pseudo<(outs cls:$R1), +           (ins cls:$R2, mode:$XBD2, cond4:$valid, cond4:$M3), []> { +    let OpKey = !subst("loc", "sel", mnemonic)#"r"#cls; +    let OpType = "mem"; +    let MemKey = mnemonic#cls; +    let MemType = "pseudo"; +    let mayLoad = 1; +    let AccessBytes = bytes; +    let hasNoSchedulingInfo = 1; +} +  // Like CompareRI, but expanded after RA depending on the choice of register.  class CompareRIPseudo<SDPatternOperator operator, RegisterOperand cls,                        ImmOpWithPattern imm> @@ -4813,6 +4866,8 @@ class CondBinaryRRFPseudo<string mnemonic, RegisterOperand cls1,    let CCMaskLast = 1;    let NumOpsKey = !subst("loc", "sel", mnemonic);    let NumOpsValue = "2"; +  let OpKey = mnemonic#cls1; +  let OpType = "reg";  }  // Like CondBinaryRRFa, but expanded after RA depending on the choice of @@ -4826,6 +4881,8 @@ class CondBinaryRRFaPseudo<string mnemonic, RegisterOperand cls1,    let CCMaskLast = 1;    let NumOpsKey = mnemonic;    let NumOpsValue = "3"; +  let OpKey = mnemonic#cls1; +  let OpType = "reg";  }  // Like CondBinaryRIE, but expanded after RA depending on the choice of @@ -4842,8 +4899,9 @@ class CondBinaryRIEPseudo<RegisterOperand cls, ImmOpWithPattern imm>  // Like CondUnaryRSY, but expanded after RA depending on the choice of  // register. -class CondUnaryRSYPseudo<SDPatternOperator operator, RegisterOperand cls, -                         bits<5> bytes, AddressingMode mode = bdaddr20only> +class CondUnaryRSYPseudo<string mnemonic, SDPatternOperator operator, +                         RegisterOperand cls, bits<5> bytes, +                         AddressingMode mode = bdaddr20only>    : Pseudo<(outs cls:$R1),             (ins cls:$R1src, mode:$BD2, cond4:$valid, cond4:$R3),             [(set cls:$R1, @@ -4854,6 +4912,10 @@ class CondUnaryRSYPseudo<SDPatternOperator operator, RegisterOperand cls,    let mayLoad = 1;    let AccessBytes = bytes;    let CCMaskLast = 1; +  let OpKey = mnemonic#"r"#cls; +  let OpType = "mem"; +  let MemKey = mnemonic#cls; +  let MemType = "target";  }  // Like CondStoreRSY, but expanded after RA depending on the choice of @@ -5039,7 +5101,6 @@ multiclass BinaryRXYAndPseudo<string mnemonic, bits<16> opcode,                                SDPatternOperator operator, RegisterOperand cls,                                SDPatternOperator load, bits<5> bytes,                                AddressingMode mode = bdxaddr20only> { -    def "" : BinaryRXY<mnemonic, opcode, operator, cls, load, bytes, mode> {      let MemKey = mnemonic#cls;      let MemType = "target"; @@ -5052,7 +5113,7 @@ multiclass BinaryRXPairAndPseudo<string mnemonic, bits<8> rxOpcode,                                   bits<16> rxyOpcode, SDPatternOperator operator,                                   RegisterOperand cls,                                   SDPatternOperator load, bits<5> bytes> { -  let DispKey = mnemonic ## #cls in { +  let DispKey = mnemonic # cls in {      def "" : BinaryRX<mnemonic, rxOpcode, operator, cls, load, bytes,                        bdxaddr12pair> {        let DispSize = "12"; @@ -5066,6 +5127,43 @@ multiclass BinaryRXPairAndPseudo<string mnemonic, bits<8> rxOpcode,    def _MemFoldPseudo : MemFoldPseudo<mnemonic, cls, bytes, bdxaddr12pair>;  } +multiclass BinaryRXEAndPseudo<string mnemonic, bits<16> opcode, +                              SDPatternOperator operator, RegisterOperand cls, +                              SDPatternOperator load, bits<5> bytes> { +  def "" : BinaryRXE<mnemonic, opcode, operator, cls, load, bytes> { +    let MemKey = mnemonic#cls; +    let MemType = "target"; +  } +  def _MemFoldPseudo : MemFoldPseudo_FP<mnemonic, cls, bytes, bdxaddr12pair>; +} + +multiclass TernaryRXFAndPseudo<string mnemonic, bits<16> opcode, +                               SDPatternOperator operator, RegisterOperand cls1, +                               RegisterOperand cls2, SDPatternOperator load, +                               bits<5> bytes> { +  def "" : TernaryRXF<mnemonic, opcode, operator, cls1, cls2, load, bytes> { +    let MemKey = mnemonic#cls1; +    let MemType = "target"; +  } +  def _MemFoldPseudo : MemFoldPseudo_FPTern<mnemonic, cls1, bytes, bdxaddr12pair>; +} + +multiclass CondUnaryRSYPairAndMemFold<string mnemonic, bits<16> opcode, +                                      SDPatternOperator operator, +                                      RegisterOperand cls, bits<5> bytes, +                                      AddressingMode mode = bdaddr20only> { +  defm "" : CondUnaryRSYPair<mnemonic, opcode, operator, cls, bytes, mode>; +  def _MemFoldPseudo : MemFoldPseudo_CondMove<mnemonic, cls, bytes, mode>; +} + +multiclass CondUnaryRSYPseudoAndMemFold<string mnemonic, +                                        SDPatternOperator operator, +                                        RegisterOperand cls, bits<5> bytes, +                                        AddressingMode mode = bdaddr20only> { +  def "" : CondUnaryRSYPseudo<mnemonic, operator, cls, bytes, mode>; +  def _MemFoldPseudo : MemFoldPseudo_CondMove<mnemonic, cls, bytes, mode>; +} +  // Define an instruction that operates on two fixed-length blocks of memory,  // and associated pseudo instructions for operating on blocks of any size.  // The Sequence form uses a straight-line sequence of instructions and @@ -5086,7 +5184,7 @@ multiclass MemorySS<string mnemonic, bits<8> opcode,    }  } -// The same, but setting a CC result as comparion operator. +// The same, but setting a CC result as comparison operator.  multiclass CompareMemorySS<string mnemonic, bits<8> opcode,                            SDPatternOperator sequence, SDPatternOperator loop> {    def "" : SideEffectBinarySSa<mnemonic, opcode>; diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp index 97c8fa7aa32e..223cfcba2fac 100644 --- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp +++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp @@ -513,8 +513,8 @@ unsigned SystemZInstrInfo::insertBranch(MachineBasicBlock &MBB,    return Count;  } -bool SystemZInstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg, -                                      unsigned &SrcReg2, int &Mask, +bool SystemZInstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg, +                                      Register &SrcReg2, int &Mask,                                        int &Value) const {    assert(MI.isCompare() && "Caller should have checked for a comparison"); @@ -532,8 +532,9 @@ bool SystemZInstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,  bool SystemZInstrInfo::canInsertSelect(const MachineBasicBlock &MBB,                                         ArrayRef<MachineOperand> Pred, -                                       unsigned TrueReg, unsigned FalseReg, -                                       int &CondCycles, int &TrueCycles, +                                       Register DstReg, Register TrueReg, +                                       Register FalseReg, int &CondCycles, +                                       int &TrueCycles,                                         int &FalseCycles) const {    // Not all subtargets have LOCR instructions.    if (!STI.hasLoadStoreOnCond()) @@ -565,10 +566,10 @@ bool SystemZInstrInfo::canInsertSelect(const MachineBasicBlock &MBB,  void SystemZInstrInfo::insertSelect(MachineBasicBlock &MBB,                                      MachineBasicBlock::iterator I, -                                    const DebugLoc &DL, unsigned DstReg, +                                    const DebugLoc &DL, Register DstReg,                                      ArrayRef<MachineOperand> Pred, -                                    unsigned TrueReg, -                                    unsigned FalseReg) const { +                                    Register TrueReg, +                                    Register FalseReg) const {    MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();    const TargetRegisterClass *RC = MRI.getRegClass(DstReg); @@ -606,7 +607,7 @@ void SystemZInstrInfo::insertSelect(MachineBasicBlock &MBB,  }  bool SystemZInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, -                                     unsigned Reg, +                                     Register Reg,                                       MachineRegisterInfo *MRI) const {    unsigned DefOpc = DefMI.getOpcode();    if (DefOpc != SystemZ::LHIMux && DefOpc != SystemZ::LHI && @@ -819,18 +820,11 @@ void SystemZInstrInfo::copyPhysReg(MachineBasicBlock &MBB,      return;    } -  // Move CC value from/to a GR32. -  if (SrcReg == SystemZ::CC) { -    auto MIB = BuildMI(MBB, MBBI, DL, get(SystemZ::IPM), DestReg); -    if (KillSrc) { -      const MachineFunction *MF = MBB.getParent(); -      const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); -      MIB->addRegisterKilled(SrcReg, TRI); -    } -    return; -  } +  // Move CC value from a GR32.    if (DestReg == SystemZ::CC) { -    BuildMI(MBB, MBBI, DL, get(SystemZ::TMLH)) +    unsigned Opcode = +      SystemZ::GR32BitRegClass.contains(SrcReg) ? SystemZ::TMLH : SystemZ::TMHH; +    BuildMI(MBB, MBBI, DL, get(Opcode))        .addReg(SrcReg, getKillRegState(KillSrc))        .addImm(3 << (SystemZ::IPM_CC - 16));      return; @@ -855,12 +849,6 @@ void SystemZInstrInfo::copyPhysReg(MachineBasicBlock &MBB,      Opcode = SystemZ::VLR;    else if (SystemZ::AR32BitRegClass.contains(DestReg, SrcReg))      Opcode = SystemZ::CPYA; -  else if (SystemZ::AR32BitRegClass.contains(DestReg) && -           SystemZ::GR32BitRegClass.contains(SrcReg)) -    Opcode = SystemZ::SAR; -  else if (SystemZ::GR32BitRegClass.contains(DestReg) && -           SystemZ::AR32BitRegClass.contains(SrcReg)) -    Opcode = SystemZ::EAR;    else      llvm_unreachable("Impossible reg-to-reg copy"); @@ -869,7 +857,7 @@ void SystemZInstrInfo::copyPhysReg(MachineBasicBlock &MBB,  }  void SystemZInstrInfo::storeRegToStackSlot( -    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned SrcReg, +    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg,      bool isKill, int FrameIdx, const TargetRegisterClass *RC,      const TargetRegisterInfo *TRI) const {    DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); @@ -884,7 +872,7 @@ void SystemZInstrInfo::storeRegToStackSlot(  }  void SystemZInstrInfo::loadRegFromStackSlot( -    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned DestReg, +    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DestReg,      int FrameIdx, const TargetRegisterClass *RC,      const TargetRegisterInfo *TRI) const {    DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); @@ -1005,33 +993,36 @@ MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl(      MachineBasicBlock::iterator InsertPt, int FrameIndex,      LiveIntervals *LIS, VirtRegMap *VRM) const {    const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); +  MachineRegisterInfo &MRI = MF.getRegInfo();    const MachineFrameInfo &MFI = MF.getFrameInfo();    unsigned Size = MFI.getObjectSize(FrameIndex);    unsigned Opcode = MI.getOpcode(); +  // Check CC liveness if new instruction introduces a dead def of CC. +  MCRegUnitIterator CCUnit(SystemZ::CC, TRI); +  SlotIndex MISlot = SlotIndex(); +  LiveRange *CCLiveRange = nullptr; +  bool CCLiveAtMI = true; +  if (LIS) { +    MISlot = LIS->getSlotIndexes()->getInstructionIndex(MI).getRegSlot(); +    CCLiveRange = &LIS->getRegUnit(*CCUnit); +    CCLiveAtMI = CCLiveRange->liveAt(MISlot); +  } +  ++CCUnit; +  assert(!CCUnit.isValid() && "CC only has one reg unit."); +    if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) { -    if (LIS != nullptr && (Opcode == SystemZ::LA || Opcode == SystemZ::LAY) && +    if (!CCLiveAtMI && (Opcode == SystemZ::LA || Opcode == SystemZ::LAY) &&          isInt<8>(MI.getOperand(2).getImm()) && !MI.getOperand(3).getReg()) { - -      // Check CC liveness, since new instruction introduces a dead -      // def of CC. -      MCRegUnitIterator CCUnit(SystemZ::CC, TRI); -      LiveRange &CCLiveRange = LIS->getRegUnit(*CCUnit); -      ++CCUnit; -      assert(!CCUnit.isValid() && "CC only has one reg unit."); -      SlotIndex MISlot = -          LIS->getSlotIndexes()->getInstructionIndex(MI).getRegSlot(); -      if (!CCLiveRange.liveAt(MISlot)) { -        // LA(Y) %reg, CONST(%reg) -> AGSI %mem, CONST -        MachineInstr *BuiltMI = BuildMI(*InsertPt->getParent(), InsertPt, -                                        MI.getDebugLoc(), get(SystemZ::AGSI)) -                                    .addFrameIndex(FrameIndex) -                                    .addImm(0) -                                    .addImm(MI.getOperand(2).getImm()); -        BuiltMI->findRegisterDefOperand(SystemZ::CC)->setIsDead(true); -        CCLiveRange.createDeadDef(MISlot, LIS->getVNInfoAllocator()); -        return BuiltMI; -      } +      // LA(Y) %reg, CONST(%reg) -> AGSI %mem, CONST +      MachineInstr *BuiltMI = BuildMI(*InsertPt->getParent(), InsertPt, +                                      MI.getDebugLoc(), get(SystemZ::AGSI)) +        .addFrameIndex(FrameIndex) +        .addImm(0) +        .addImm(MI.getOperand(2).getImm()); +      BuiltMI->findRegisterDefOperand(SystemZ::CC)->setIsDead(true); +      CCLiveRange->createDeadDef(MISlot, LIS->getVNInfoAllocator()); +      return BuiltMI;      }      return nullptr;    } @@ -1090,6 +1081,32 @@ MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl(      return BuiltMI;    } +  unsigned MemImmOpc = 0; +  switch (Opcode) { +  case SystemZ::LHIMux: +  case SystemZ::LHI:    MemImmOpc = SystemZ::MVHI;  break; +  case SystemZ::LGHI:   MemImmOpc = SystemZ::MVGHI; break; +  case SystemZ::CHIMux: +  case SystemZ::CHI:    MemImmOpc = SystemZ::CHSI;  break; +  case SystemZ::CGHI:   MemImmOpc = SystemZ::CGHSI; break; +  case SystemZ::CLFIMux: +  case SystemZ::CLFI: +    if (isUInt<16>(MI.getOperand(1).getImm())) +      MemImmOpc = SystemZ::CLFHSI; +    break; +  case SystemZ::CLGFI: +    if (isUInt<16>(MI.getOperand(1).getImm())) +      MemImmOpc = SystemZ::CLGHSI; +    break; +  default: break; +  } +  if (MemImmOpc) +    return BuildMI(*InsertPt->getParent(), InsertPt, MI.getDebugLoc(), +                   get(MemImmOpc)) +               .addFrameIndex(FrameIndex) +               .addImm(0) +               .addImm(MI.getOperand(1).getImm()); +    if (Opcode == SystemZ::LGDR || Opcode == SystemZ::LDGR) {      bool Op0IsGPR = (Opcode == SystemZ::LGDR);      bool Op1IsGPR = (Opcode == SystemZ::LDGR); @@ -1159,57 +1176,144 @@ MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl(    }    // If the spilled operand is the final one or the instruction is -  // commutable, try to change <INSN>R into <INSN>. +  // commutable, try to change <INSN>R into <INSN>.  Don't introduce a def of +  // CC if it is live and MI does not define it.    unsigned NumOps = MI.getNumExplicitOperands();    int MemOpcode = SystemZ::getMemOpcode(Opcode); +  if (MemOpcode == -1 || +      (CCLiveAtMI && !MI.definesRegister(SystemZ::CC) && +       get(MemOpcode).hasImplicitDefOfPhysReg(SystemZ::CC))) +    return nullptr; + +  // Check if all other vregs have a usable allocation in the case of vector +  // to FP conversion. +  const MCInstrDesc &MCID = MI.getDesc(); +  for (unsigned I = 0, E = MCID.getNumOperands(); I != E; ++I) { +    const MCOperandInfo &MCOI = MCID.OpInfo[I]; +    if (MCOI.OperandType != MCOI::OPERAND_REGISTER || I == OpNum) +      continue; +    const TargetRegisterClass *RC = TRI->getRegClass(MCOI.RegClass); +    if (RC == &SystemZ::VR32BitRegClass || RC == &SystemZ::VR64BitRegClass) { +      Register Reg = MI.getOperand(I).getReg(); +      Register PhysReg = Register::isVirtualRegister(Reg) +                             ? (VRM ? VRM->getPhys(Reg) : Register()) +                             : Reg; +      if (!PhysReg || +          !(SystemZ::FP32BitRegClass.contains(PhysReg) || +            SystemZ::FP64BitRegClass.contains(PhysReg) || +            SystemZ::VF128BitRegClass.contains(PhysReg))) +        return nullptr; +    } +  } +  // Fused multiply and add/sub need to have the same dst and accumulator reg. +  bool FusedFPOp = (Opcode == SystemZ::WFMADB || Opcode == SystemZ::WFMASB || +                    Opcode == SystemZ::WFMSDB || Opcode == SystemZ::WFMSSB); +  if (FusedFPOp) { +    Register DstReg = VRM->getPhys(MI.getOperand(0).getReg()); +    Register AccReg = VRM->getPhys(MI.getOperand(3).getReg()); +    if (OpNum == 0 || OpNum == 3 || DstReg != AccReg) +      return nullptr; +  } + +  // Try to swap compare operands if possible. +  bool NeedsCommute = false; +  if ((MI.getOpcode() == SystemZ::CR || MI.getOpcode() == SystemZ::CGR || +       MI.getOpcode() == SystemZ::CLR || MI.getOpcode() == SystemZ::CLGR || +       MI.getOpcode() == SystemZ::WFCDB || MI.getOpcode() == SystemZ::WFCSB || +       MI.getOpcode() == SystemZ::WFKDB || MI.getOpcode() == SystemZ::WFKSB) && +      OpNum == 0 && prepareCompareSwapOperands(MI)) +    NeedsCommute = true; + +  bool CCOperands = false; +  if (MI.getOpcode() == SystemZ::LOCRMux || MI.getOpcode() == SystemZ::LOCGR || +      MI.getOpcode() == SystemZ::SELRMux || MI.getOpcode() == SystemZ::SELGR) { +    assert(MI.getNumOperands() == 6 && NumOps == 5 && +           "LOCR/SELR instruction operands corrupt?"); +    NumOps -= 2; +    CCOperands = true; +  }    // See if this is a 3-address instruction that is convertible to 2-address    // and suitable for folding below.  Only try this with virtual registers    // and a provided VRM (during regalloc). -  bool NeedsCommute = false; -  if (SystemZ::getTwoOperandOpcode(Opcode) != -1 && MemOpcode != -1) { +  if (NumOps == 3 && SystemZ::getTargetMemOpcode(MemOpcode) != -1) {      if (VRM == nullptr) -      MemOpcode = -1; +      return nullptr;      else { -      assert(NumOps == 3 && "Expected two source registers.");        Register DstReg = MI.getOperand(0).getReg();        Register DstPhys =            (Register::isVirtualRegister(DstReg) ? VRM->getPhys(DstReg) : DstReg);        Register SrcReg = (OpNum == 2 ? MI.getOperand(1).getReg()                                      : ((OpNum == 1 && MI.isCommutable())                                             ? MI.getOperand(2).getReg() -                                         : Register())); +                                           : Register()));        if (DstPhys && !SystemZ::GRH32BitRegClass.contains(DstPhys) && SrcReg &&            Register::isVirtualRegister(SrcReg) &&            DstPhys == VRM->getPhys(SrcReg))          NeedsCommute = (OpNum == 1);        else -        MemOpcode = -1; +        return nullptr;      }    } -  if (MemOpcode >= 0) { -    if ((OpNum == NumOps - 1) || NeedsCommute) { -      const MCInstrDesc &MemDesc = get(MemOpcode); -      uint64_t AccessBytes = SystemZII::getAccessSize(MemDesc.TSFlags); -      assert(AccessBytes != 0 && "Size of access should be known"); -      assert(AccessBytes <= Size && "Access outside the frame index"); -      uint64_t Offset = Size - AccessBytes; -      MachineInstrBuilder MIB = BuildMI(*InsertPt->getParent(), InsertPt, -                                        MI.getDebugLoc(), get(MemOpcode)); +  if ((OpNum == NumOps - 1) || NeedsCommute || FusedFPOp) { +    const MCInstrDesc &MemDesc = get(MemOpcode); +    uint64_t AccessBytes = SystemZII::getAccessSize(MemDesc.TSFlags); +    assert(AccessBytes != 0 && "Size of access should be known"); +    assert(AccessBytes <= Size && "Access outside the frame index"); +    uint64_t Offset = Size - AccessBytes; +    MachineInstrBuilder MIB = BuildMI(*InsertPt->getParent(), InsertPt, +                                      MI.getDebugLoc(), get(MemOpcode)); +    if (MI.isCompare()) { +      assert(NumOps == 2 && "Expected 2 register operands for a compare."); +      MIB.add(MI.getOperand(NeedsCommute ? 1 : 0)); +    } +    else if (FusedFPOp) { +      MIB.add(MI.getOperand(0)); +      MIB.add(MI.getOperand(3)); +      MIB.add(MI.getOperand(OpNum == 1 ? 2 : 1)); +    } +    else {        MIB.add(MI.getOperand(0));        if (NeedsCommute)          MIB.add(MI.getOperand(2));        else          for (unsigned I = 1; I < OpNum; ++I)            MIB.add(MI.getOperand(I)); -      MIB.addFrameIndex(FrameIndex).addImm(Offset); -      if (MemDesc.TSFlags & SystemZII::HasIndex) -        MIB.addReg(0); -      transferDeadCC(&MI, MIB); -      transferMIFlag(&MI, MIB, MachineInstr::NoSWrap); -      return MIB;      } +    MIB.addFrameIndex(FrameIndex).addImm(Offset); +    if (MemDesc.TSFlags & SystemZII::HasIndex) +      MIB.addReg(0); +    if (CCOperands) { +      unsigned CCValid = MI.getOperand(NumOps).getImm(); +      unsigned CCMask = MI.getOperand(NumOps + 1).getImm(); +      MIB.addImm(CCValid); +      MIB.addImm(NeedsCommute ? CCMask ^ CCValid : CCMask); +    } +    if (MIB->definesRegister(SystemZ::CC) && +        (!MI.definesRegister(SystemZ::CC) || +         MI.registerDefIsDead(SystemZ::CC))) { +      MIB->addRegisterDead(SystemZ::CC, TRI); +      if (CCLiveRange) +        CCLiveRange->createDeadDef(MISlot, LIS->getVNInfoAllocator()); +    } +    // Constrain the register classes if converted from a vector opcode. The +    // allocated regs are in an FP reg-class per previous check above. +    for (const MachineOperand &MO : MIB->operands()) +      if (MO.isReg() && Register::isVirtualRegister(MO.getReg())) { +        unsigned Reg = MO.getReg(); +        if (MRI.getRegClass(Reg) == &SystemZ::VR32BitRegClass) +          MRI.setRegClass(Reg, &SystemZ::FP32BitRegClass); +        else if (MRI.getRegClass(Reg) == &SystemZ::VR64BitRegClass) +          MRI.setRegClass(Reg, &SystemZ::FP64BitRegClass); +        else if (MRI.getRegClass(Reg) == &SystemZ::VR128BitRegClass) +          MRI.setRegClass(Reg, &SystemZ::VF128BitRegClass); +      } + +    transferDeadCC(&MI, MIB); +    transferMIFlag(&MI, MIB, MachineInstr::NoSWrap); +    transferMIFlag(&MI, MIB, MachineInstr::NoFPExcept); +    return MIB;    }    return nullptr; @@ -1718,6 +1822,80 @@ unsigned SystemZInstrInfo::getFusedCompare(unsigned Opcode,    return 0;  } +bool SystemZInstrInfo:: +prepareCompareSwapOperands(MachineBasicBlock::iterator const MBBI) const { +  assert(MBBI->isCompare() && MBBI->getOperand(0).isReg() && +         MBBI->getOperand(1).isReg() && !MBBI->mayLoad() && +         "Not a compare reg/reg."); + +  MachineBasicBlock *MBB = MBBI->getParent(); +  bool CCLive = true; +  SmallVector<MachineInstr *, 4> CCUsers; +  for (MachineBasicBlock::iterator Itr = std::next(MBBI); +       Itr != MBB->end(); ++Itr) { +    if (Itr->readsRegister(SystemZ::CC)) { +      unsigned Flags = Itr->getDesc().TSFlags; +      if ((Flags & SystemZII::CCMaskFirst) || (Flags & SystemZII::CCMaskLast)) +        CCUsers.push_back(&*Itr); +      else +        return false; +    } +    if (Itr->definesRegister(SystemZ::CC)) { +      CCLive = false; +      break; +    } +  } +  if (CCLive) { +    LivePhysRegs LiveRegs(*MBB->getParent()->getSubtarget().getRegisterInfo()); +    LiveRegs.addLiveOuts(*MBB); +    if (LiveRegs.contains(SystemZ::CC)) +      return false; +  } + +  // Update all CC users. +  for (unsigned Idx = 0; Idx < CCUsers.size(); ++Idx) { +    unsigned Flags = CCUsers[Idx]->getDesc().TSFlags; +    unsigned FirstOpNum = ((Flags & SystemZII::CCMaskFirst) ? +                           0 : CCUsers[Idx]->getNumExplicitOperands() - 2); +    MachineOperand &CCMaskMO = CCUsers[Idx]->getOperand(FirstOpNum + 1); +    unsigned NewCCMask = SystemZ::reverseCCMask(CCMaskMO.getImm()); +    CCMaskMO.setImm(NewCCMask); +  } + +  return true; +} + +unsigned SystemZ::reverseCCMask(unsigned CCMask) { +  return ((CCMask & SystemZ::CCMASK_CMP_EQ) | +          (CCMask & SystemZ::CCMASK_CMP_GT ? SystemZ::CCMASK_CMP_LT : 0) | +          (CCMask & SystemZ::CCMASK_CMP_LT ? SystemZ::CCMASK_CMP_GT : 0) | +          (CCMask & SystemZ::CCMASK_CMP_UO)); +} + +MachineBasicBlock *SystemZ::emitBlockAfter(MachineBasicBlock *MBB) { +  MachineFunction &MF = *MBB->getParent(); +  MachineBasicBlock *NewMBB = MF.CreateMachineBasicBlock(MBB->getBasicBlock()); +  MF.insert(std::next(MachineFunction::iterator(MBB)), NewMBB); +  return NewMBB; +} + +MachineBasicBlock *SystemZ::splitBlockAfter(MachineBasicBlock::iterator MI, +                                            MachineBasicBlock *MBB) { +  MachineBasicBlock *NewMBB = emitBlockAfter(MBB); +  NewMBB->splice(NewMBB->begin(), MBB, +                 std::next(MachineBasicBlock::iterator(MI)), MBB->end()); +  NewMBB->transferSuccessorsAndUpdatePHIs(MBB); +  return NewMBB; +} + +MachineBasicBlock *SystemZ::splitBlockBefore(MachineBasicBlock::iterator MI, +                                             MachineBasicBlock *MBB) { +  MachineBasicBlock *NewMBB = emitBlockAfter(MBB); +  NewMBB->splice(NewMBB->begin(), MBB, MI, MBB->end()); +  NewMBB->transferSuccessorsAndUpdatePHIs(MBB); +  return NewMBB; +} +  unsigned SystemZInstrInfo::getLoadAndTrap(unsigned Opcode) const {    if (!STI.hasLoadAndTrap())      return 0; diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.h b/llvm/lib/Target/SystemZ/SystemZInstrInfo.h index 8391970c7d9d..72dafc3c93c2 100644 --- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.h +++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.h @@ -155,6 +155,20 @@ enum FusedCompareType {  namespace SystemZ {  int getTwoOperandOpcode(uint16_t Opcode);  int getTargetMemOpcode(uint16_t Opcode); + +// Return a version of comparison CC mask CCMask in which the LT and GT +// actions are swapped. +unsigned reverseCCMask(unsigned CCMask); + +// Create a new basic block after MBB. +MachineBasicBlock *emitBlockAfter(MachineBasicBlock *MBB); +// Split MBB after MI and return the new block (the one that contains +// instructions after MI). +MachineBasicBlock *splitBlockAfter(MachineBasicBlock::iterator MI, +                                   MachineBasicBlock *MBB); +// Split MBB before MI and return the new block (the one that contains MI). +MachineBasicBlock *splitBlockBefore(MachineBasicBlock::iterator MI, +                                    MachineBasicBlock *MBB);  }  class SystemZInstrInfo : public SystemZGenInstrInfo { @@ -219,15 +233,16 @@ public:                          MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond,                          const DebugLoc &DL,                          int *BytesAdded = nullptr) const override; -  bool analyzeCompare(const MachineInstr &MI, unsigned &SrcReg, -                      unsigned &SrcReg2, int &Mask, int &Value) const override; -  bool canInsertSelect(const MachineBasicBlock&, ArrayRef<MachineOperand> Cond, -                       unsigned, unsigned, int&, int&, int&) const override; +  bool analyzeCompare(const MachineInstr &MI, Register &SrcReg, +                      Register &SrcReg2, int &Mask, int &Value) const override; +  bool canInsertSelect(const MachineBasicBlock &, ArrayRef<MachineOperand> Cond, +                       Register, Register, Register, int &, int &, +                       int &) const override;    void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, -                    const DebugLoc &DL, unsigned DstReg, -                    ArrayRef<MachineOperand> Cond, unsigned TrueReg, -                    unsigned FalseReg) const override; -  bool FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, unsigned Reg, +                    const DebugLoc &DL, Register DstReg, +                    ArrayRef<MachineOperand> Cond, Register TrueReg, +                    Register FalseReg) const override; +  bool FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg,                       MachineRegisterInfo *MRI) const override;    bool isPredicable(const MachineInstr &MI) const override;    bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCycles, @@ -247,12 +262,12 @@ public:                     bool KillSrc) const override;    void storeRegToStackSlot(MachineBasicBlock &MBB,                             MachineBasicBlock::iterator MBBI, -                           unsigned SrcReg, bool isKill, int FrameIndex, +                           Register SrcReg, bool isKill, int FrameIndex,                             const TargetRegisterClass *RC,                             const TargetRegisterInfo *TRI) const override;    void loadRegFromStackSlot(MachineBasicBlock &MBB,                              MachineBasicBlock::iterator MBBI, -                            unsigned DestReg, int FrameIdx, +                            Register DestReg, int FrameIdx,                              const TargetRegisterClass *RC,                              const TargetRegisterInfo *TRI) const override;    MachineInstr *convertToThreeAddress(MachineFunction::iterator &MFI, @@ -313,6 +328,12 @@ public:                             SystemZII::FusedCompareType Type,                             const MachineInstr *MI = nullptr) const; +  // Try to find all CC users of the compare instruction (MBBI) and update +  // all of them to maintain equivalent behavior after swapping the compare +  // operands. Return false if not all users can be conclusively found and +  // handled. The compare instruction is *not* changed. +  bool prepareCompareSwapOperands(MachineBasicBlock::iterator MBBI) const; +    // If Opcode is a LOAD opcode for with an associated LOAD AND TRAP    // operation exists, returh the opcode for the latter, otherwise return 0.    unsigned getLoadAndTrap(unsigned Opcode) const; diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.td b/llvm/lib/Target/SystemZ/SystemZInstrInfo.td index 9579dcc0d1b6..d5d56ecf6e47 100644 --- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.td +++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.td @@ -29,6 +29,15 @@ let hasNoSchedulingInfo = 1, hasSideEffects = 1 in {  def ADJDYNALLOC : Pseudo<(outs GR64:$dst), (ins dynalloc12only:$src),                           [(set GR64:$dst, dynalloc12only:$src)]>; +let Defs = [R15D, CC], Uses = [R15D], hasNoSchedulingInfo = 1, +    usesCustomInserter = 1 in +  def PROBED_ALLOCA : Pseudo<(outs GR64:$dst), +                             (ins GR64:$oldSP, GR64:$space), +           [(set GR64:$dst, (z_probed_alloca GR64:$oldSP, GR64:$space))]>; + +let Defs = [R1D, R15D, CC], Uses = [R15D], hasNoSchedulingInfo = 1, +    hasSideEffects = 1 in +  def PROBED_STACKALLOC : Pseudo<(outs), (ins i64imm:$stacksize), []>;  //===----------------------------------------------------------------------===//  // Branch instructions @@ -492,7 +501,7 @@ let Predicates = [FeatureMiscellaneousExtensions3], Uses = [CC] in {    let isCommutable = 1 in {      // Expands to SELR or SELFHR or a branch-and-move sequence,      // depending on the choice of registers. -    def  SELRMux : CondBinaryRRFaPseudo<"selrmux", GRX32, GRX32, GRX32>; +    def  SELRMux : CondBinaryRRFaPseudo<"MUXselr", GRX32, GRX32, GRX32>;      defm SELFHR  : CondBinaryRRFaPair<"selfhr", 0xB9C0, GRH32, GRH32, GRH32>;      defm SELR    : CondBinaryRRFaPair<"selr",   0xB9F0, GR32, GR32, GR32>;      defm SELGR   : CondBinaryRRFaPair<"selgr",  0xB9E3, GR64, GR64, GR64>; @@ -525,13 +534,13 @@ let Predicates = [FeatureLoadStoreOnCond2], Uses = [CC] in {    let isCommutable = 1 in {      // Expands to LOCR or LOCFHR or a branch-and-move sequence,      // depending on the choice of registers. -    def LOCRMux : CondBinaryRRFPseudo<"locrmux", GRX32, GRX32>; +    def LOCRMux : CondBinaryRRFPseudo<"MUXlocr", GRX32, GRX32>;      defm LOCFHR : CondBinaryRRFPair<"locfhr", 0xB9E0, GRH32, GRH32>;    }    // Load on condition.  Matched via DAG pattern.    // Expands to LOC or LOCFH, depending on the choice of register. -  def LOCMux : CondUnaryRSYPseudo<simple_load, GRX32, 4>; +  defm LOCMux : CondUnaryRSYPseudoAndMemFold<"MUXloc", simple_load, GRX32, 4>;    defm LOCFH : CondUnaryRSYPair<"locfh", 0xEBE0, simple_load, GRH32, 4>;    // Store on condition.  Expanded from CondStore* pseudos. @@ -564,7 +573,7 @@ let Predicates = [FeatureLoadStoreOnCond], Uses = [CC] in {    // Load on condition.  Matched via DAG pattern.    defm LOC  : CondUnaryRSYPair<"loc",  0xEBF2, simple_load, GR32, 4>; -  defm LOCG : CondUnaryRSYPair<"locg", 0xEBE2, simple_load, GR64, 8>; +  defm LOCG : CondUnaryRSYPairAndMemFold<"locg", 0xEBE2, simple_load, GR64, 8>;    // Store on condition.  Expanded from CondStore* pseudos.    defm STOC  : CondStoreRSYPair<"stoc",  0xEBF3, GR32, 4>; @@ -1348,8 +1357,8 @@ def  MSG  : BinaryRXY<"msg",  0xE30C, mul, GR64, load, 8>;  // Multiplication of memory, setting the condition code.  let Predicates = [FeatureMiscellaneousExtensions2], Defs = [CC] in { -  def MSC  : BinaryRXY<"msc",  0xE353, null_frag, GR32, load, 4>; -  def MSGC : BinaryRXY<"msgc", 0xE383, null_frag, GR64, load, 8>; +  defm MSC  : BinaryRXYAndPseudo<"msc",  0xE353, null_frag, GR32, load, 4>; +  defm MSGC : BinaryRXYAndPseudo<"msgc", 0xE383, null_frag, GR64, load, 8>;  }  // Multiplication of a register, producing two results. diff --git a/llvm/lib/Target/SystemZ/SystemZInstrVector.td b/llvm/lib/Target/SystemZ/SystemZInstrVector.td index c945122ee577..e73f1e429c3c 100644 --- a/llvm/lib/Target/SystemZ/SystemZInstrVector.td +++ b/llvm/lib/Target/SystemZ/SystemZInstrVector.td @@ -177,9 +177,13 @@ let Predicates = [FeatureVector] in {  let Predicates = [FeatureVectorPackedDecimal] in {    // Load rightmost with length.  The number of loaded bytes is only known -  // at run time. -  def VLRL : BinaryVSI<"vlrl", 0xE635, int_s390_vlrl, 0>; +  // at run time.  Note that while the instruction will accept immediate +  // lengths larger that 15 at runtime, those will always result in a trap, +  // so we never emit them here. +  def VLRL : BinaryVSI<"vlrl", 0xE635, null_frag, 0>;    def VLRLR : BinaryVRSd<"vlrlr", 0xE637, int_s390_vlrl, 0>; +  def : Pat<(int_s390_vlrl imm32zx4:$len, bdaddr12only:$addr), +            (VLRL bdaddr12only:$addr, imm32zx4:$len)>;  }  // Use replicating loads if we're inserting a single element into an @@ -243,9 +247,13 @@ let Predicates = [FeatureVector] in {  let Predicates = [FeatureVectorPackedDecimal] in {    // Store rightmost with length.  The number of stored bytes is only known -  // at run time. -  def VSTRL : StoreLengthVSI<"vstrl", 0xE63D, int_s390_vstrl, 0>; +  // at run time.  Note that while the instruction will accept immediate +  // lengths larger that 15 at runtime, those will always result in a trap, +  // so we never emit them here. +  def VSTRL : StoreLengthVSI<"vstrl", 0xE63D, null_frag, 0>;    def VSTRLR : StoreLengthVRSd<"vstrlr", 0xE63F, int_s390_vstrl, 0>; +  def : Pat<(int_s390_vstrl VR128:$val, imm32zx4:$len, bdaddr12only:$addr), +            (VSTRL VR128:$val, bdaddr12only:$addr, imm32zx4:$len)>;  }  //===----------------------------------------------------------------------===// @@ -463,49 +471,56 @@ defm : GenericVectorOps<v2f64, v2i64>;  //===----------------------------------------------------------------------===//  let Predicates = [FeatureVector] in { -  // Add. -  def VA  : BinaryVRRcGeneric<"va", 0xE7F3>; -  def VAB : BinaryVRRc<"vab", 0xE7F3, add, v128b, v128b, 0>; -  def VAH : BinaryVRRc<"vah", 0xE7F3, add, v128h, v128h, 1>; -  def VAF : BinaryVRRc<"vaf", 0xE7F3, add, v128f, v128f, 2>; -  def VAG : BinaryVRRc<"vag", 0xE7F3, add, v128g, v128g, 3>; -  def VAQ : BinaryVRRc<"vaq", 0xE7F3, int_s390_vaq, v128q, v128q, 4>; - -  // Add compute carry. -  def VACC  : BinaryVRRcGeneric<"vacc", 0xE7F1>; -  def VACCB : BinaryVRRc<"vaccb", 0xE7F1, int_s390_vaccb, v128b, v128b, 0>; -  def VACCH : BinaryVRRc<"vacch", 0xE7F1, int_s390_vacch, v128h, v128h, 1>; -  def VACCF : BinaryVRRc<"vaccf", 0xE7F1, int_s390_vaccf, v128f, v128f, 2>; -  def VACCG : BinaryVRRc<"vaccg", 0xE7F1, int_s390_vaccg, v128g, v128g, 3>; -  def VACCQ : BinaryVRRc<"vaccq", 0xE7F1, int_s390_vaccq, v128q, v128q, 4>; - -  // Add with carry. -  def VAC  : TernaryVRRdGeneric<"vac", 0xE7BB>; -  def VACQ : TernaryVRRd<"vacq", 0xE7BB, int_s390_vacq, v128q, v128q, 4>; - -  // Add with carry compute carry. -  def VACCC  : TernaryVRRdGeneric<"vaccc", 0xE7B9>; -  def VACCCQ : TernaryVRRd<"vacccq", 0xE7B9, int_s390_vacccq, v128q, v128q, 4>; +  let isCommutable = 1 in { +    // Add. +    def VA  : BinaryVRRcGeneric<"va", 0xE7F3>; +    def VAB : BinaryVRRc<"vab", 0xE7F3, add, v128b, v128b, 0>; +    def VAH : BinaryVRRc<"vah", 0xE7F3, add, v128h, v128h, 1>; +    def VAF : BinaryVRRc<"vaf", 0xE7F3, add, v128f, v128f, 2>; +    def VAG : BinaryVRRc<"vag", 0xE7F3, add, v128g, v128g, 3>; +    def VAQ : BinaryVRRc<"vaq", 0xE7F3, int_s390_vaq, v128q, v128q, 4>; +  } + +  let isCommutable = 1 in { +    // Add compute carry. +    def VACC  : BinaryVRRcGeneric<"vacc", 0xE7F1>; +    def VACCB : BinaryVRRc<"vaccb", 0xE7F1, int_s390_vaccb, v128b, v128b, 0>; +    def VACCH : BinaryVRRc<"vacch", 0xE7F1, int_s390_vacch, v128h, v128h, 1>; +    def VACCF : BinaryVRRc<"vaccf", 0xE7F1, int_s390_vaccf, v128f, v128f, 2>; +    def VACCG : BinaryVRRc<"vaccg", 0xE7F1, int_s390_vaccg, v128g, v128g, 3>; +    def VACCQ : BinaryVRRc<"vaccq", 0xE7F1, int_s390_vaccq, v128q, v128q, 4>; + +    // Add with carry. +    def VAC  : TernaryVRRdGeneric<"vac", 0xE7BB>; +    def VACQ : TernaryVRRd<"vacq", 0xE7BB, int_s390_vacq, v128q, v128q, 4>; + +    // Add with carry compute carry. +    def VACCC  : TernaryVRRdGeneric<"vaccc", 0xE7B9>; +    def VACCCQ : TernaryVRRd<"vacccq", 0xE7B9, int_s390_vacccq, v128q, v128q, 4>; + }    // And. -  def VN : BinaryVRRc<"vn", 0xE768, null_frag, v128any, v128any>; +  let isCommutable = 1 in +    def VN : BinaryVRRc<"vn", 0xE768, null_frag, v128any, v128any>;    // And with complement.    def VNC : BinaryVRRc<"vnc", 0xE769, null_frag, v128any, v128any>; -  // Average. -  def VAVG  : BinaryVRRcGeneric<"vavg", 0xE7F2>; -  def VAVGB : BinaryVRRc<"vavgb", 0xE7F2, int_s390_vavgb, v128b, v128b, 0>; -  def VAVGH : BinaryVRRc<"vavgh", 0xE7F2, int_s390_vavgh, v128h, v128h, 1>; -  def VAVGF : BinaryVRRc<"vavgf", 0xE7F2, int_s390_vavgf, v128f, v128f, 2>; -  def VAVGG : BinaryVRRc<"vavgg", 0xE7F2, int_s390_vavgg, v128g, v128g, 3>; - -  // Average logical. -  def VAVGL  : BinaryVRRcGeneric<"vavgl", 0xE7F0>; -  def VAVGLB : BinaryVRRc<"vavglb", 0xE7F0, int_s390_vavglb, v128b, v128b, 0>; -  def VAVGLH : BinaryVRRc<"vavglh", 0xE7F0, int_s390_vavglh, v128h, v128h, 1>; -  def VAVGLF : BinaryVRRc<"vavglf", 0xE7F0, int_s390_vavglf, v128f, v128f, 2>; -  def VAVGLG : BinaryVRRc<"vavglg", 0xE7F0, int_s390_vavglg, v128g, v128g, 3>; +  let isCommutable = 1 in { +    // Average. +    def VAVG  : BinaryVRRcGeneric<"vavg", 0xE7F2>; +    def VAVGB : BinaryVRRc<"vavgb", 0xE7F2, int_s390_vavgb, v128b, v128b, 0>; +    def VAVGH : BinaryVRRc<"vavgh", 0xE7F2, int_s390_vavgh, v128h, v128h, 1>; +    def VAVGF : BinaryVRRc<"vavgf", 0xE7F2, int_s390_vavgf, v128f, v128f, 2>; +    def VAVGG : BinaryVRRc<"vavgg", 0xE7F2, int_s390_vavgg, v128g, v128g, 3>; + +    // Average logical. +    def VAVGL  : BinaryVRRcGeneric<"vavgl", 0xE7F0>; +    def VAVGLB : BinaryVRRc<"vavglb", 0xE7F0, int_s390_vavglb, v128b, v128b, 0>; +    def VAVGLH : BinaryVRRc<"vavglh", 0xE7F0, int_s390_vavglh, v128h, v128h, 1>; +    def VAVGLF : BinaryVRRc<"vavglf", 0xE7F0, int_s390_vavglf, v128f, v128f, 2>; +    def VAVGLG : BinaryVRRc<"vavglg", 0xE7F0, int_s390_vavglg, v128g, v128g, 3>; +  }    // Checksum.    def VCKSM : BinaryVRRc<"vcksm", 0xE766, int_s390_vcksm, v128f, v128f>; @@ -524,12 +539,14 @@ let Predicates = [FeatureVector] in {    def VCTZF : UnaryVRRa<"vctzf", 0xE752, cttz, v128f, v128f, 2>;    def VCTZG : UnaryVRRa<"vctzg", 0xE752, cttz, v128g, v128g, 3>; -  // Not exclusive or. -  let Predicates = [FeatureVectorEnhancements1] in -    def VNX : BinaryVRRc<"vnx", 0xE76C, null_frag, v128any, v128any>; +  let isCommutable = 1 in { +    // Not exclusive or. +    let Predicates = [FeatureVectorEnhancements1] in +      def VNX : BinaryVRRc<"vnx", 0xE76C, null_frag, v128any, v128any>; -  // Exclusive or. -  def VX : BinaryVRRc<"vx", 0xE76D, null_frag, v128any, v128any>; +    // Exclusive or. +    def VX : BinaryVRRc<"vx", 0xE76D, null_frag, v128any, v128any>; +  }    // Galois field multiply sum.    def VGFM  : BinaryVRRcGeneric<"vgfm", 0xE7B4>; @@ -559,135 +576,145 @@ let Predicates = [FeatureVector] in {    def VLPF : UnaryVRRa<"vlpf", 0xE7DF, z_viabs32, v128f, v128f, 2>;    def VLPG : UnaryVRRa<"vlpg", 0xE7DF, z_viabs64, v128g, v128g, 3>; -  // Maximum. -  def VMX  : BinaryVRRcGeneric<"vmx", 0xE7FF>; -  def VMXB : BinaryVRRc<"vmxb", 0xE7FF, null_frag, v128b, v128b, 0>; -  def VMXH : BinaryVRRc<"vmxh", 0xE7FF, null_frag, v128h, v128h, 1>; -  def VMXF : BinaryVRRc<"vmxf", 0xE7FF, null_frag, v128f, v128f, 2>; -  def VMXG : BinaryVRRc<"vmxg", 0xE7FF, null_frag, v128g, v128g, 3>; - -  // Maximum logical. -  def VMXL  : BinaryVRRcGeneric<"vmxl", 0xE7FD>; -  def VMXLB : BinaryVRRc<"vmxlb", 0xE7FD, null_frag, v128b, v128b, 0>; -  def VMXLH : BinaryVRRc<"vmxlh", 0xE7FD, null_frag, v128h, v128h, 1>; -  def VMXLF : BinaryVRRc<"vmxlf", 0xE7FD, null_frag, v128f, v128f, 2>; -  def VMXLG : BinaryVRRc<"vmxlg", 0xE7FD, null_frag, v128g, v128g, 3>; +  let isCommutable = 1 in { +    // Maximum. +    def VMX  : BinaryVRRcGeneric<"vmx", 0xE7FF>; +    def VMXB : BinaryVRRc<"vmxb", 0xE7FF, null_frag, v128b, v128b, 0>; +    def VMXH : BinaryVRRc<"vmxh", 0xE7FF, null_frag, v128h, v128h, 1>; +    def VMXF : BinaryVRRc<"vmxf", 0xE7FF, null_frag, v128f, v128f, 2>; +    def VMXG : BinaryVRRc<"vmxg", 0xE7FF, null_frag, v128g, v128g, 3>; + +    // Maximum logical. +    def VMXL  : BinaryVRRcGeneric<"vmxl", 0xE7FD>; +    def VMXLB : BinaryVRRc<"vmxlb", 0xE7FD, null_frag, v128b, v128b, 0>; +    def VMXLH : BinaryVRRc<"vmxlh", 0xE7FD, null_frag, v128h, v128h, 1>; +    def VMXLF : BinaryVRRc<"vmxlf", 0xE7FD, null_frag, v128f, v128f, 2>; +    def VMXLG : BinaryVRRc<"vmxlg", 0xE7FD, null_frag, v128g, v128g, 3>; +  } -  // Minimum. -  def VMN  : BinaryVRRcGeneric<"vmn", 0xE7FE>; -  def VMNB : BinaryVRRc<"vmnb", 0xE7FE, null_frag, v128b, v128b, 0>; -  def VMNH : BinaryVRRc<"vmnh", 0xE7FE, null_frag, v128h, v128h, 1>; -  def VMNF : BinaryVRRc<"vmnf", 0xE7FE, null_frag, v128f, v128f, 2>; -  def VMNG : BinaryVRRc<"vmng", 0xE7FE, null_frag, v128g, v128g, 3>; - -  // Minimum logical. -  def VMNL  : BinaryVRRcGeneric<"vmnl", 0xE7FC>; -  def VMNLB : BinaryVRRc<"vmnlb", 0xE7FC, null_frag, v128b, v128b, 0>; -  def VMNLH : BinaryVRRc<"vmnlh", 0xE7FC, null_frag, v128h, v128h, 1>; -  def VMNLF : BinaryVRRc<"vmnlf", 0xE7FC, null_frag, v128f, v128f, 2>; -  def VMNLG : BinaryVRRc<"vmnlg", 0xE7FC, null_frag, v128g, v128g, 3>; - -  // Multiply and add low. -  def VMAL   : TernaryVRRdGeneric<"vmal", 0xE7AA>; -  def VMALB  : TernaryVRRd<"vmalb",  0xE7AA, z_muladd, v128b, v128b, 0>; -  def VMALHW : TernaryVRRd<"vmalhw", 0xE7AA, z_muladd, v128h, v128h, 1>; -  def VMALF  : TernaryVRRd<"vmalf",  0xE7AA, z_muladd, v128f, v128f, 2>; - -  // Multiply and add high. -  def VMAH  : TernaryVRRdGeneric<"vmah", 0xE7AB>; -  def VMAHB : TernaryVRRd<"vmahb", 0xE7AB, int_s390_vmahb, v128b, v128b, 0>; -  def VMAHH : TernaryVRRd<"vmahh", 0xE7AB, int_s390_vmahh, v128h, v128h, 1>; -  def VMAHF : TernaryVRRd<"vmahf", 0xE7AB, int_s390_vmahf, v128f, v128f, 2>; - -  // Multiply and add logical high. -  def VMALH  : TernaryVRRdGeneric<"vmalh", 0xE7A9>; -  def VMALHB : TernaryVRRd<"vmalhb", 0xE7A9, int_s390_vmalhb, v128b, v128b, 0>; -  def VMALHH : TernaryVRRd<"vmalhh", 0xE7A9, int_s390_vmalhh, v128h, v128h, 1>; -  def VMALHF : TernaryVRRd<"vmalhf", 0xE7A9, int_s390_vmalhf, v128f, v128f, 2>; - -  // Multiply and add even. -  def VMAE  : TernaryVRRdGeneric<"vmae", 0xE7AE>; -  def VMAEB : TernaryVRRd<"vmaeb", 0xE7AE, int_s390_vmaeb, v128h, v128b, 0>; -  def VMAEH : TernaryVRRd<"vmaeh", 0xE7AE, int_s390_vmaeh, v128f, v128h, 1>; -  def VMAEF : TernaryVRRd<"vmaef", 0xE7AE, int_s390_vmaef, v128g, v128f, 2>; - -  // Multiply and add logical even. -  def VMALE  : TernaryVRRdGeneric<"vmale", 0xE7AC>; -  def VMALEB : TernaryVRRd<"vmaleb", 0xE7AC, int_s390_vmaleb, v128h, v128b, 0>; -  def VMALEH : TernaryVRRd<"vmaleh", 0xE7AC, int_s390_vmaleh, v128f, v128h, 1>; -  def VMALEF : TernaryVRRd<"vmalef", 0xE7AC, int_s390_vmalef, v128g, v128f, 2>; - -  // Multiply and add odd. -  def VMAO  : TernaryVRRdGeneric<"vmao", 0xE7AF>; -  def VMAOB : TernaryVRRd<"vmaob", 0xE7AF, int_s390_vmaob, v128h, v128b, 0>; -  def VMAOH : TernaryVRRd<"vmaoh", 0xE7AF, int_s390_vmaoh, v128f, v128h, 1>; -  def VMAOF : TernaryVRRd<"vmaof", 0xE7AF, int_s390_vmaof, v128g, v128f, 2>; - -  // Multiply and add logical odd. -  def VMALO  : TernaryVRRdGeneric<"vmalo", 0xE7AD>; -  def VMALOB : TernaryVRRd<"vmalob", 0xE7AD, int_s390_vmalob, v128h, v128b, 0>; -  def VMALOH : TernaryVRRd<"vmaloh", 0xE7AD, int_s390_vmaloh, v128f, v128h, 1>; -  def VMALOF : TernaryVRRd<"vmalof", 0xE7AD, int_s390_vmalof, v128g, v128f, 2>; - -  // Multiply high. -  def VMH  : BinaryVRRcGeneric<"vmh", 0xE7A3>; -  def VMHB : BinaryVRRc<"vmhb", 0xE7A3, int_s390_vmhb, v128b, v128b, 0>; -  def VMHH : BinaryVRRc<"vmhh", 0xE7A3, int_s390_vmhh, v128h, v128h, 1>; -  def VMHF : BinaryVRRc<"vmhf", 0xE7A3, int_s390_vmhf, v128f, v128f, 2>; - -  // Multiply logical high. -  def VMLH  : BinaryVRRcGeneric<"vmlh", 0xE7A1>; -  def VMLHB : BinaryVRRc<"vmlhb", 0xE7A1, int_s390_vmlhb, v128b, v128b, 0>; -  def VMLHH : BinaryVRRc<"vmlhh", 0xE7A1, int_s390_vmlhh, v128h, v128h, 1>; -  def VMLHF : BinaryVRRc<"vmlhf", 0xE7A1, int_s390_vmlhf, v128f, v128f, 2>; - -  // Multiply low. -  def VML   : BinaryVRRcGeneric<"vml", 0xE7A2>; -  def VMLB  : BinaryVRRc<"vmlb",  0xE7A2, mul, v128b, v128b, 0>; -  def VMLHW : BinaryVRRc<"vmlhw", 0xE7A2, mul, v128h, v128h, 1>; -  def VMLF  : BinaryVRRc<"vmlf",  0xE7A2, mul, v128f, v128f, 2>; - -  // Multiply even. -  def VME  : BinaryVRRcGeneric<"vme", 0xE7A6>; -  def VMEB : BinaryVRRc<"vmeb", 0xE7A6, int_s390_vmeb, v128h, v128b, 0>; -  def VMEH : BinaryVRRc<"vmeh", 0xE7A6, int_s390_vmeh, v128f, v128h, 1>; -  def VMEF : BinaryVRRc<"vmef", 0xE7A6, int_s390_vmef, v128g, v128f, 2>; - -  // Multiply logical even. -  def VMLE  : BinaryVRRcGeneric<"vmle", 0xE7A4>; -  def VMLEB : BinaryVRRc<"vmleb", 0xE7A4, int_s390_vmleb, v128h, v128b, 0>; -  def VMLEH : BinaryVRRc<"vmleh", 0xE7A4, int_s390_vmleh, v128f, v128h, 1>; -  def VMLEF : BinaryVRRc<"vmlef", 0xE7A4, int_s390_vmlef, v128g, v128f, 2>; - -  // Multiply odd. -  def VMO  : BinaryVRRcGeneric<"vmo", 0xE7A7>; -  def VMOB : BinaryVRRc<"vmob", 0xE7A7, int_s390_vmob, v128h, v128b, 0>; -  def VMOH : BinaryVRRc<"vmoh", 0xE7A7, int_s390_vmoh, v128f, v128h, 1>; -  def VMOF : BinaryVRRc<"vmof", 0xE7A7, int_s390_vmof, v128g, v128f, 2>; - -  // Multiply logical odd. -  def VMLO  : BinaryVRRcGeneric<"vmlo", 0xE7A5>; -  def VMLOB : BinaryVRRc<"vmlob", 0xE7A5, int_s390_vmlob, v128h, v128b, 0>; -  def VMLOH : BinaryVRRc<"vmloh", 0xE7A5, int_s390_vmloh, v128f, v128h, 1>; -  def VMLOF : BinaryVRRc<"vmlof", 0xE7A5, int_s390_vmlof, v128g, v128f, 2>; +  let isCommutable = 1 in { +    // Minimum. +    def VMN  : BinaryVRRcGeneric<"vmn", 0xE7FE>; +    def VMNB : BinaryVRRc<"vmnb", 0xE7FE, null_frag, v128b, v128b, 0>; +    def VMNH : BinaryVRRc<"vmnh", 0xE7FE, null_frag, v128h, v128h, 1>; +    def VMNF : BinaryVRRc<"vmnf", 0xE7FE, null_frag, v128f, v128f, 2>; +    def VMNG : BinaryVRRc<"vmng", 0xE7FE, null_frag, v128g, v128g, 3>; + +    // Minimum logical. +    def VMNL  : BinaryVRRcGeneric<"vmnl", 0xE7FC>; +    def VMNLB : BinaryVRRc<"vmnlb", 0xE7FC, null_frag, v128b, v128b, 0>; +    def VMNLH : BinaryVRRc<"vmnlh", 0xE7FC, null_frag, v128h, v128h, 1>; +    def VMNLF : BinaryVRRc<"vmnlf", 0xE7FC, null_frag, v128f, v128f, 2>; +    def VMNLG : BinaryVRRc<"vmnlg", 0xE7FC, null_frag, v128g, v128g, 3>; +  } + +  let isCommutable = 1 in { +    // Multiply and add low. +    def VMAL   : TernaryVRRdGeneric<"vmal", 0xE7AA>; +    def VMALB  : TernaryVRRd<"vmalb",  0xE7AA, z_muladd, v128b, v128b, 0>; +    def VMALHW : TernaryVRRd<"vmalhw", 0xE7AA, z_muladd, v128h, v128h, 1>; +    def VMALF  : TernaryVRRd<"vmalf",  0xE7AA, z_muladd, v128f, v128f, 2>; + +    // Multiply and add high. +    def VMAH  : TernaryVRRdGeneric<"vmah", 0xE7AB>; +    def VMAHB : TernaryVRRd<"vmahb", 0xE7AB, int_s390_vmahb, v128b, v128b, 0>; +    def VMAHH : TernaryVRRd<"vmahh", 0xE7AB, int_s390_vmahh, v128h, v128h, 1>; +    def VMAHF : TernaryVRRd<"vmahf", 0xE7AB, int_s390_vmahf, v128f, v128f, 2>; + +    // Multiply and add logical high. +    def VMALH  : TernaryVRRdGeneric<"vmalh", 0xE7A9>; +    def VMALHB : TernaryVRRd<"vmalhb", 0xE7A9, int_s390_vmalhb, v128b, v128b, 0>; +    def VMALHH : TernaryVRRd<"vmalhh", 0xE7A9, int_s390_vmalhh, v128h, v128h, 1>; +    def VMALHF : TernaryVRRd<"vmalhf", 0xE7A9, int_s390_vmalhf, v128f, v128f, 2>; + +    // Multiply and add even. +    def VMAE  : TernaryVRRdGeneric<"vmae", 0xE7AE>; +    def VMAEB : TernaryVRRd<"vmaeb", 0xE7AE, int_s390_vmaeb, v128h, v128b, 0>; +    def VMAEH : TernaryVRRd<"vmaeh", 0xE7AE, int_s390_vmaeh, v128f, v128h, 1>; +    def VMAEF : TernaryVRRd<"vmaef", 0xE7AE, int_s390_vmaef, v128g, v128f, 2>; + +    // Multiply and add logical even. +    def VMALE  : TernaryVRRdGeneric<"vmale", 0xE7AC>; +    def VMALEB : TernaryVRRd<"vmaleb", 0xE7AC, int_s390_vmaleb, v128h, v128b, 0>; +    def VMALEH : TernaryVRRd<"vmaleh", 0xE7AC, int_s390_vmaleh, v128f, v128h, 1>; +    def VMALEF : TernaryVRRd<"vmalef", 0xE7AC, int_s390_vmalef, v128g, v128f, 2>; + +    // Multiply and add odd. +    def VMAO  : TernaryVRRdGeneric<"vmao", 0xE7AF>; +    def VMAOB : TernaryVRRd<"vmaob", 0xE7AF, int_s390_vmaob, v128h, v128b, 0>; +    def VMAOH : TernaryVRRd<"vmaoh", 0xE7AF, int_s390_vmaoh, v128f, v128h, 1>; +    def VMAOF : TernaryVRRd<"vmaof", 0xE7AF, int_s390_vmaof, v128g, v128f, 2>; + +    // Multiply and add logical odd. +    def VMALO  : TernaryVRRdGeneric<"vmalo", 0xE7AD>; +    def VMALOB : TernaryVRRd<"vmalob", 0xE7AD, int_s390_vmalob, v128h, v128b, 0>; +    def VMALOH : TernaryVRRd<"vmaloh", 0xE7AD, int_s390_vmaloh, v128f, v128h, 1>; +    def VMALOF : TernaryVRRd<"vmalof", 0xE7AD, int_s390_vmalof, v128g, v128f, 2>; +  } + +  let isCommutable = 1 in { +    // Multiply high. +    def VMH  : BinaryVRRcGeneric<"vmh", 0xE7A3>; +    def VMHB : BinaryVRRc<"vmhb", 0xE7A3, int_s390_vmhb, v128b, v128b, 0>; +    def VMHH : BinaryVRRc<"vmhh", 0xE7A3, int_s390_vmhh, v128h, v128h, 1>; +    def VMHF : BinaryVRRc<"vmhf", 0xE7A3, int_s390_vmhf, v128f, v128f, 2>; + +    // Multiply logical high. +    def VMLH  : BinaryVRRcGeneric<"vmlh", 0xE7A1>; +    def VMLHB : BinaryVRRc<"vmlhb", 0xE7A1, int_s390_vmlhb, v128b, v128b, 0>; +    def VMLHH : BinaryVRRc<"vmlhh", 0xE7A1, int_s390_vmlhh, v128h, v128h, 1>; +    def VMLHF : BinaryVRRc<"vmlhf", 0xE7A1, int_s390_vmlhf, v128f, v128f, 2>; + +    // Multiply low. +    def VML   : BinaryVRRcGeneric<"vml", 0xE7A2>; +    def VMLB  : BinaryVRRc<"vmlb",  0xE7A2, mul, v128b, v128b, 0>; +    def VMLHW : BinaryVRRc<"vmlhw", 0xE7A2, mul, v128h, v128h, 1>; +    def VMLF  : BinaryVRRc<"vmlf",  0xE7A2, mul, v128f, v128f, 2>; + +    // Multiply even. +    def VME  : BinaryVRRcGeneric<"vme", 0xE7A6>; +    def VMEB : BinaryVRRc<"vmeb", 0xE7A6, int_s390_vmeb, v128h, v128b, 0>; +    def VMEH : BinaryVRRc<"vmeh", 0xE7A6, int_s390_vmeh, v128f, v128h, 1>; +    def VMEF : BinaryVRRc<"vmef", 0xE7A6, int_s390_vmef, v128g, v128f, 2>; + +    // Multiply logical even. +    def VMLE  : BinaryVRRcGeneric<"vmle", 0xE7A4>; +    def VMLEB : BinaryVRRc<"vmleb", 0xE7A4, int_s390_vmleb, v128h, v128b, 0>; +    def VMLEH : BinaryVRRc<"vmleh", 0xE7A4, int_s390_vmleh, v128f, v128h, 1>; +    def VMLEF : BinaryVRRc<"vmlef", 0xE7A4, int_s390_vmlef, v128g, v128f, 2>; + +    // Multiply odd. +    def VMO  : BinaryVRRcGeneric<"vmo", 0xE7A7>; +    def VMOB : BinaryVRRc<"vmob", 0xE7A7, int_s390_vmob, v128h, v128b, 0>; +    def VMOH : BinaryVRRc<"vmoh", 0xE7A7, int_s390_vmoh, v128f, v128h, 1>; +    def VMOF : BinaryVRRc<"vmof", 0xE7A7, int_s390_vmof, v128g, v128f, 2>; + +    // Multiply logical odd. +    def VMLO  : BinaryVRRcGeneric<"vmlo", 0xE7A5>; +    def VMLOB : BinaryVRRc<"vmlob", 0xE7A5, int_s390_vmlob, v128h, v128b, 0>; +    def VMLOH : BinaryVRRc<"vmloh", 0xE7A5, int_s390_vmloh, v128f, v128h, 1>; +    def VMLOF : BinaryVRRc<"vmlof", 0xE7A5, int_s390_vmlof, v128g, v128f, 2>; +  }    // Multiply sum logical. -  let Predicates = [FeatureVectorEnhancements1] in { +  let Predicates = [FeatureVectorEnhancements1], isCommutable = 1 in {      def VMSL  : QuaternaryVRRdGeneric<"vmsl", 0xE7B8>;      def VMSLG : QuaternaryVRRd<"vmslg", 0xE7B8, int_s390_vmslg,                                 v128q, v128g, v128g, v128q, 3>;    }    // Nand. -  let Predicates = [FeatureVectorEnhancements1] in +  let Predicates = [FeatureVectorEnhancements1], isCommutable = 1 in      def VNN : BinaryVRRc<"vnn", 0xE76E, null_frag, v128any, v128any>;    // Nor. -  def VNO : BinaryVRRc<"vno", 0xE76B, null_frag, v128any, v128any>; +  let isCommutable = 1 in +    def VNO : BinaryVRRc<"vno", 0xE76B, null_frag, v128any, v128any>;    def : InstAlias<"vnot\t$V1, $V2", (VNO VR128:$V1, VR128:$V2, VR128:$V2), 0>;    // Or. -  def VO : BinaryVRRc<"vo", 0xE76A, null_frag, v128any, v128any>; +  let isCommutable = 1 in +    def VO : BinaryVRRc<"vo", 0xE76A, null_frag, v128any, v128any>;    // Or with complement.    let Predicates = [FeatureVectorEnhancements1] in @@ -1017,13 +1044,15 @@ multiclass VectorRounding<Instruction insn, TypedReg tr> {  let Predicates = [FeatureVector] in {    // Add. -  let Uses = [FPC], mayRaiseFPException = 1 in { +  let Uses = [FPC], mayRaiseFPException = 1, isCommutable = 1 in {      def VFA   : BinaryVRRcFloatGeneric<"vfa", 0xE7E3>;      def VFADB : BinaryVRRc<"vfadb", 0xE7E3, any_fadd, v128db, v128db, 3, 0>; -    def WFADB : BinaryVRRc<"wfadb", 0xE7E3, any_fadd, v64db, v64db, 3, 8>; +    def WFADB : BinaryVRRc<"wfadb", 0xE7E3, any_fadd, v64db, v64db, 3, 8, 0, +                           "adbr">;      let Predicates = [FeatureVectorEnhancements1] in {        def VFASB : BinaryVRRc<"vfasb", 0xE7E3, any_fadd, v128sb, v128sb, 2, 0>; -      def WFASB : BinaryVRRc<"wfasb", 0xE7E3, any_fadd, v32sb, v32sb, 2, 8>; +      def WFASB : BinaryVRRc<"wfasb", 0xE7E3, any_fadd, v32sb, v32sb, 2, 8, 0, +                             "aebr">;        def WFAXB : BinaryVRRc<"wfaxb", 0xE7E3, any_fadd, v128xb, v128xb, 4, 8>;      }    } @@ -1104,10 +1133,12 @@ let Predicates = [FeatureVector] in {    let Uses = [FPC], mayRaiseFPException = 1 in {      def VFD   : BinaryVRRcFloatGeneric<"vfd", 0xE7E5>;      def VFDDB : BinaryVRRc<"vfddb", 0xE7E5, any_fdiv, v128db, v128db, 3, 0>; -    def WFDDB : BinaryVRRc<"wfddb", 0xE7E5, any_fdiv, v64db, v64db, 3, 8>; +    def WFDDB : BinaryVRRc<"wfddb", 0xE7E5, any_fdiv, v64db, v64db, 3, 8, 0, +                           "ddbr">;      let Predicates = [FeatureVectorEnhancements1] in {        def VFDSB : BinaryVRRc<"vfdsb", 0xE7E5, any_fdiv, v128sb, v128sb, 2, 0>; -      def WFDSB : BinaryVRRc<"wfdsb", 0xE7E5, any_fdiv, v32sb, v32sb, 2, 8>; +      def WFDSB : BinaryVRRc<"wfdsb", 0xE7E5, any_fdiv, v32sb, v32sb, 2, 8, 0, +                             "debr">;        def WFDXB : BinaryVRRc<"wfdxb", 0xE7E5, any_fdiv, v128xb, v128xb, 4, 8>;      }    } @@ -1135,7 +1166,8 @@ let Predicates = [FeatureVector] in {    let Uses = [FPC], mayRaiseFPException = 1 in {      def VLDE  : UnaryVRRaFloatGeneric<"vlde", 0xE7C4>;      def VLDEB : UnaryVRRa<"vldeb", 0xE7C4, z_any_vextend, v128db, v128sb, 2, 0>; -    def WLDEB : UnaryVRRa<"wldeb", 0xE7C4, any_fpextend, v64db, v32sb, 2, 8>; +    def WLDEB : UnaryVRRa<"wldeb", 0xE7C4, any_fpextend, v64db, v32sb, 2, 8, 0, +                          "ldebr">;    }    let Predicates = [FeatureVectorEnhancements1] in {      let Uses = [FPC], mayRaiseFPException = 1 in { @@ -1178,7 +1210,7 @@ let Predicates = [FeatureVector] in {      def : FPMinMax<insn, any_fmaximum, tr, 1>;    }    let Predicates = [FeatureVectorEnhancements1] in { -    let Uses = [FPC], mayRaiseFPException = 1 in { +    let Uses = [FPC], mayRaiseFPException = 1, isCommutable = 1 in {        def VFMAX   : TernaryVRRcFloatGeneric<"vfmax", 0xE7EF>;        def VFMAXDB : TernaryVRRcFloat<"vfmaxdb", 0xE7EF, int_s390_vfmaxdb,                                       v128db, v128db, 3, 0>; @@ -1204,7 +1236,7 @@ let Predicates = [FeatureVector] in {      def : FPMinMax<insn, any_fminimum, tr, 1>;    }    let Predicates = [FeatureVectorEnhancements1] in { -    let Uses = [FPC], mayRaiseFPException = 1 in { +    let Uses = [FPC], mayRaiseFPException = 1, isCommutable = 1 in {        def VFMIN   : TernaryVRRcFloatGeneric<"vfmin", 0xE7EE>;        def VFMINDB : TernaryVRRcFloat<"vfmindb", 0xE7EE, int_s390_vfmindb,                                       v128db, v128db, 3, 0>; @@ -1225,43 +1257,49 @@ let Predicates = [FeatureVector] in {    }    // Multiply. -  let Uses = [FPC], mayRaiseFPException = 1 in { +  let Uses = [FPC], mayRaiseFPException = 1, isCommutable = 1 in {      def VFM   : BinaryVRRcFloatGeneric<"vfm", 0xE7E7>;      def VFMDB : BinaryVRRc<"vfmdb", 0xE7E7, any_fmul, v128db, v128db, 3, 0>; -    def WFMDB : BinaryVRRc<"wfmdb", 0xE7E7, any_fmul, v64db, v64db, 3, 8>; +    def WFMDB : BinaryVRRc<"wfmdb", 0xE7E7, any_fmul, v64db, v64db, 3, 8, 0, +                           "mdbr">;      let Predicates = [FeatureVectorEnhancements1] in {        def VFMSB : BinaryVRRc<"vfmsb", 0xE7E7, any_fmul, v128sb, v128sb, 2, 0>; -      def WFMSB : BinaryVRRc<"wfmsb", 0xE7E7, any_fmul, v32sb, v32sb, 2, 8>; +      def WFMSB : BinaryVRRc<"wfmsb", 0xE7E7, any_fmul, v32sb, v32sb, 2, 8, 0, +                             "meebr">;        def WFMXB : BinaryVRRc<"wfmxb", 0xE7E7, any_fmul, v128xb, v128xb, 4, 8>;      }    }    // Multiply and add. -  let Uses = [FPC], mayRaiseFPException = 1 in { +  let Uses = [FPC], mayRaiseFPException = 1, isCommutable = 1 in {      def VFMA   : TernaryVRReFloatGeneric<"vfma", 0xE78F>;      def VFMADB : TernaryVRRe<"vfmadb", 0xE78F, any_fma, v128db, v128db, 0, 3>; -    def WFMADB : TernaryVRRe<"wfmadb", 0xE78F, any_fma, v64db, v64db, 8, 3>; +    def WFMADB : TernaryVRRe<"wfmadb", 0xE78F, any_fma, v64db, v64db, 8, 3, +                             "madbr">;      let Predicates = [FeatureVectorEnhancements1] in {        def VFMASB : TernaryVRRe<"vfmasb", 0xE78F, any_fma, v128sb, v128sb, 0, 2>; -      def WFMASB : TernaryVRRe<"wfmasb", 0xE78F, any_fma, v32sb, v32sb, 8, 2>; +      def WFMASB : TernaryVRRe<"wfmasb", 0xE78F, any_fma, v32sb, v32sb, 8, 2, +                               "maebr">;        def WFMAXB : TernaryVRRe<"wfmaxb", 0xE78F, any_fma, v128xb, v128xb, 8, 4>;      }    }    // Multiply and subtract. -  let Uses = [FPC], mayRaiseFPException = 1 in { +  let Uses = [FPC], mayRaiseFPException = 1, isCommutable = 1 in {      def VFMS   : TernaryVRReFloatGeneric<"vfms", 0xE78E>;      def VFMSDB : TernaryVRRe<"vfmsdb", 0xE78E, any_fms, v128db, v128db, 0, 3>; -    def WFMSDB : TernaryVRRe<"wfmsdb", 0xE78E, any_fms, v64db, v64db, 8, 3>; +    def WFMSDB : TernaryVRRe<"wfmsdb", 0xE78E, any_fms, v64db, v64db, 8, 3, +                             "msdbr">;      let Predicates = [FeatureVectorEnhancements1] in {        def VFMSSB : TernaryVRRe<"vfmssb", 0xE78E, any_fms, v128sb, v128sb, 0, 2>; -      def WFMSSB : TernaryVRRe<"wfmssb", 0xE78E, any_fms, v32sb, v32sb, 8, 2>; +      def WFMSSB : TernaryVRRe<"wfmssb", 0xE78E, any_fms, v32sb, v32sb, 8, 2, +                               "msebr">;        def WFMSXB : TernaryVRRe<"wfmsxb", 0xE78E, any_fms, v128xb, v128xb, 8, 4>;      }    }    // Negative multiply and add. -  let Uses = [FPC], mayRaiseFPException = 1, +  let Uses = [FPC], mayRaiseFPException = 1, isCommutable = 1,        Predicates = [FeatureVectorEnhancements1] in {      def VFNMA   : TernaryVRReFloatGeneric<"vfnma", 0xE79F>;      def VFNMADB : TernaryVRRe<"vfnmadb", 0xE79F, any_fnma, v128db, v128db, 0, 3>; @@ -1272,7 +1310,7 @@ let Predicates = [FeatureVector] in {    }    // Negative multiply and subtract. -  let Uses = [FPC], mayRaiseFPException = 1, +  let Uses = [FPC], mayRaiseFPException = 1, isCommutable = 1,        Predicates = [FeatureVectorEnhancements1] in {      def VFNMS   : TernaryVRReFloatGeneric<"vfnms", 0xE79E>;      def VFNMSDB : TernaryVRRe<"vfnmsdb", 0xE79E, any_fnms, v128db, v128db, 0, 3>; @@ -1323,10 +1361,12 @@ let Predicates = [FeatureVector] in {    let Uses = [FPC], mayRaiseFPException = 1 in {      def VFSQ   : UnaryVRRaFloatGeneric<"vfsq", 0xE7CE>;      def VFSQDB : UnaryVRRa<"vfsqdb", 0xE7CE, any_fsqrt, v128db, v128db, 3, 0>; -    def WFSQDB : UnaryVRRa<"wfsqdb", 0xE7CE, any_fsqrt, v64db, v64db, 3, 8>; +    def WFSQDB : UnaryVRRa<"wfsqdb", 0xE7CE, any_fsqrt, v64db, v64db, 3, 8, 0, +                           "sqdbr">;      let Predicates = [FeatureVectorEnhancements1] in {        def VFSQSB : UnaryVRRa<"vfsqsb", 0xE7CE, any_fsqrt, v128sb, v128sb, 2, 0>; -      def WFSQSB : UnaryVRRa<"wfsqsb", 0xE7CE, any_fsqrt, v32sb, v32sb, 2, 8>; +      def WFSQSB : UnaryVRRa<"wfsqsb", 0xE7CE, any_fsqrt, v32sb, v32sb, 2, 8, 0, +                             "sqebr">;        def WFSQXB : UnaryVRRa<"wfsqxb", 0xE7CE, any_fsqrt, v128xb, v128xb, 4, 8>;      }    } @@ -1335,10 +1375,12 @@ let Predicates = [FeatureVector] in {    let Uses = [FPC], mayRaiseFPException = 1 in {      def VFS   : BinaryVRRcFloatGeneric<"vfs", 0xE7E2>;      def VFSDB : BinaryVRRc<"vfsdb", 0xE7E2, any_fsub, v128db, v128db, 3, 0>; -    def WFSDB : BinaryVRRc<"wfsdb", 0xE7E2, any_fsub, v64db, v64db, 3, 8>; +    def WFSDB : BinaryVRRc<"wfsdb", 0xE7E2, any_fsub, v64db, v64db, 3, 8, 0, +                           "sdbr">;      let Predicates = [FeatureVectorEnhancements1] in {        def VFSSB : BinaryVRRc<"vfssb", 0xE7E2, any_fsub, v128sb, v128sb, 2, 0>; -      def WFSSB : BinaryVRRc<"wfssb", 0xE7E2, any_fsub, v32sb, v32sb, 2, 8>; +      def WFSSB : BinaryVRRc<"wfssb", 0xE7E2, any_fsub, v32sb, v32sb, 2, 8, 0, +                             "sebr">;        def WFSXB : BinaryVRRc<"wfsxb", 0xE7E2, any_fsub, v128xb, v128xb, 4, 8>;      }    } @@ -1364,9 +1406,9 @@ let Predicates = [FeatureVector] in {    // Compare scalar.    let Uses = [FPC], mayRaiseFPException = 1, Defs = [CC] in {      def WFC   : CompareVRRaFloatGeneric<"wfc", 0xE7CB>; -    def WFCDB : CompareVRRa<"wfcdb", 0xE7CB, z_any_fcmp, v64db, 3>; +    def WFCDB : CompareVRRa<"wfcdb", 0xE7CB, z_any_fcmp, v64db, 3, "cdbr">;      let Predicates = [FeatureVectorEnhancements1] in { -      def WFCSB : CompareVRRa<"wfcsb", 0xE7CB, z_any_fcmp, v32sb, 2>; +      def WFCSB : CompareVRRa<"wfcsb", 0xE7CB, z_any_fcmp, v32sb, 2, "cebr">;        def WFCXB : CompareVRRa<"wfcxb", 0xE7CB, z_any_fcmp, v128xb, 4>;      }    } @@ -1374,9 +1416,9 @@ let Predicates = [FeatureVector] in {    // Compare and signal scalar.    let Uses = [FPC], mayRaiseFPException = 1, Defs = [CC] in {      def WFK   : CompareVRRaFloatGeneric<"wfk", 0xE7CA>; -    def WFKDB : CompareVRRa<"wfkdb", 0xE7CA, z_strict_fcmps, v64db, 3>; +    def WFKDB : CompareVRRa<"wfkdb", 0xE7CA, z_strict_fcmps, v64db, 3, "kdbr">;      let Predicates = [FeatureVectorEnhancements1] in { -      def WFKSB : CompareVRRa<"wfksb", 0xE7CA, z_strict_fcmps, v32sb, 2>; +      def WFKSB : CompareVRRa<"wfksb", 0xE7CA, z_strict_fcmps, v32sb, 2, "kebr">;        def WFKXB : CompareVRRa<"wfkxb", 0xE7CA, z_strict_fcmps, v128xb, 4>;      }    } @@ -1545,7 +1587,7 @@ def : VectorReplicateScalar<v16i8, VREPB, 7>;  def : VectorReplicateScalar<v8i16, VREPH, 3>;  def : VectorReplicateScalar<v4i32, VREPF, 1>; -// i64 replications are just a single isntruction. +// i64 replications are just a single instruction.  def : Pat<(v2i64 (z_replicate GR64:$scalar)),            (VLVGP GR64:$scalar, GR64:$scalar)>; diff --git a/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h b/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h index d1f6511ceea3..f755d5cd3d5b 100644 --- a/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h +++ b/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h @@ -29,8 +29,8 @@ class SystemZMachineFunctionInfo : public MachineFunctionInfo {    SystemZ::GPRRegs SpillGPRRegs;    SystemZ::GPRRegs RestoreGPRRegs; -  unsigned VarArgsFirstGPR; -  unsigned VarArgsFirstFPR; +  Register VarArgsFirstGPR; +  Register VarArgsFirstFPR;    unsigned VarArgsFrameIndex;    unsigned RegSaveFrameIndex;    int FramePointerSaveIndex; @@ -47,7 +47,7 @@ public:    // this function and the SP offset for the STMG.  These are 0 if no GPRs    // need to be saved or restored.    SystemZ::GPRRegs getSpillGPRRegs() const { return SpillGPRRegs; } -  void setSpillGPRRegs(unsigned Low, unsigned High, unsigned Offs) { +  void setSpillGPRRegs(Register Low, Register High, unsigned Offs) {      SpillGPRRegs.LowGPR = Low;      SpillGPRRegs.HighGPR = High;      SpillGPRRegs.GPROffset = Offs; @@ -57,7 +57,7 @@ public:    // this function and the SP offset for the LMG.  These are 0 if no GPRs    // need to be saved or restored.    SystemZ::GPRRegs getRestoreGPRRegs() const { return RestoreGPRRegs; } -  void setRestoreGPRRegs(unsigned Low, unsigned High, unsigned Offs) { +  void setRestoreGPRRegs(Register Low, Register High, unsigned Offs) {      RestoreGPRRegs.LowGPR = Low;      RestoreGPRRegs.HighGPR = High;      RestoreGPRRegs.GPROffset = Offs; @@ -65,12 +65,12 @@ public:    // Get and set the number of fixed (as opposed to variable) arguments    // that are passed in GPRs to this function. -  unsigned getVarArgsFirstGPR() const { return VarArgsFirstGPR; } -  void setVarArgsFirstGPR(unsigned GPR) { VarArgsFirstGPR = GPR; } +  Register getVarArgsFirstGPR() const { return VarArgsFirstGPR; } +  void setVarArgsFirstGPR(Register GPR) { VarArgsFirstGPR = GPR; }    // Likewise FPRs. -  unsigned getVarArgsFirstFPR() const { return VarArgsFirstFPR; } -  void setVarArgsFirstFPR(unsigned FPR) { VarArgsFirstFPR = FPR; } +  Register getVarArgsFirstFPR() const { return VarArgsFirstFPR; } +  void setVarArgsFirstFPR(Register FPR) { VarArgsFirstFPR = FPR; }    // Get and set the frame index of the first stack vararg.    unsigned getVarArgsFrameIndex() const { return VarArgsFrameIndex; } diff --git a/llvm/lib/Target/SystemZ/SystemZOperands.td b/llvm/lib/Target/SystemZ/SystemZOperands.td index bd40f6d7bf40..a883daad73e7 100644 --- a/llvm/lib/Target/SystemZ/SystemZOperands.td +++ b/llvm/lib/Target/SystemZ/SystemZOperands.td @@ -22,8 +22,8 @@ class ImmediateTLSAsmOperand<string name>  }  class ImmediateOp<ValueType vt, string asmop> : Operand<vt> { -  let PrintMethod = "print"##asmop##"Operand"; -  let DecoderMethod = "decode"##asmop##"Operand"; +  let PrintMethod = "print"#asmop#"Operand"; +  let DecoderMethod = "decode"#asmop#"Operand";    let ParserMatchClass = !cast<AsmOperandClass>(asmop);    let OperandType = "OPERAND_IMMEDIATE";  } @@ -52,14 +52,14 @@ multiclass Immediate<ValueType vt, code pred, SDNodeXForm xform, string asmop> {  // Constructs an asm operand for a PC-relative address.  SIZE says how  // many bits there are. -class PCRelAsmOperand<string size> : ImmediateAsmOperand<"PCRel"##size> { +class PCRelAsmOperand<string size> : ImmediateAsmOperand<"PCRel"#size> {    let PredicateMethod = "isImm"; -  let ParserMethod = "parsePCRel"##size; +  let ParserMethod = "parsePCRel"#size;  }  class PCRelTLSAsmOperand<string size> -  : ImmediateTLSAsmOperand<"PCRelTLS"##size> { +  : ImmediateTLSAsmOperand<"PCRelTLS"#size> {    let PredicateMethod = "isImmTLS"; -  let ParserMethod = "parsePCRelTLS"##size; +  let ParserMethod = "parsePCRelTLS"#size;  }  // Constructs an operand for a PC-relative address with address type VT. @@ -92,9 +92,9 @@ class PCRelAddress<ValueType vt, string self, AsmOperandClass asmop>  class AddressAsmOperand<string format, string bitsize, string dispsize,                          string length = "">    : AsmOperandClass { -  let Name = format##bitsize##"Disp"##dispsize##length; -  let ParserMethod = "parse"##format##bitsize; -  let RenderMethod = "add"##format##"Operands"; +  let Name = format#bitsize#"Disp"#dispsize#length; +  let ParserMethod = "parse"#format#bitsize; +  let RenderMethod = "add"#format#"Operands";  }  // Constructs an instruction operand for an addressing mode.  FORMAT, @@ -103,15 +103,15 @@ class AddressAsmOperand<string format, string bitsize, string dispsize,  // (base register, displacement, etc.).  class AddressOperand<string bitsize, string dispsize, string length,                       string format, dag operands> -  : Operand<!cast<ValueType>("i"##bitsize)> { -  let PrintMethod = "print"##format##"Operand"; -  let EncoderMethod = "get"##format##dispsize##length##"Encoding"; +  : Operand<!cast<ValueType>("i"#bitsize)> { +  let PrintMethod = "print"#format#"Operand"; +  let EncoderMethod = "get"#format#dispsize#length#"Encoding";    let DecoderMethod = -    "decode"##format##bitsize##"Disp"##dispsize##length##"Operand"; +    "decode"#format#bitsize#"Disp"#dispsize#length#"Operand";    let OperandType = "OPERAND_MEMORY";    let MIOperandInfo = operands;    let ParserMatchClass = -    !cast<AddressAsmOperand>(format##bitsize##"Disp"##dispsize##length); +    !cast<AddressAsmOperand>(format#bitsize#"Disp"#dispsize#length);  }  // Constructs both a DAG pattern and instruction operand for an addressing mode. @@ -126,45 +126,45 @@ class AddressOperand<string bitsize, string dispsize, string length,  class AddressingMode<string seltype, string bitsize, string dispsize,                       string suffix, string length, int numops, string format,                       dag operands> -  : ComplexPattern<!cast<ValueType>("i"##bitsize), numops, -                   "select"##seltype##dispsize##suffix##length, +  : ComplexPattern<!cast<ValueType>("i"#bitsize), numops, +                   "select"#seltype#dispsize#suffix#length,                     [add, sub, or, frameindex, z_adjdynalloc]>,      AddressOperand<bitsize, dispsize, length, format, operands>;  // An addressing mode with a base and displacement but no index.  class BDMode<string type, string bitsize, string dispsize, string suffix>    : AddressingMode<type, bitsize, dispsize, suffix, "", 2, "BDAddr", -                   (ops !cast<RegisterOperand>("ADDR"##bitsize), -                        !cast<Operand>("disp"##dispsize##"imm"##bitsize))>; +                   (ops !cast<RegisterOperand>("ADDR"#bitsize), +                        !cast<Operand>("disp"#dispsize#"imm"#bitsize))>;  // An addressing mode with a base, displacement and index.  class BDXMode<string type, string bitsize, string dispsize, string suffix>    : AddressingMode<type, bitsize, dispsize, suffix, "", 3, "BDXAddr", -                   (ops !cast<RegisterOperand>("ADDR"##bitsize), -                        !cast<Operand>("disp"##dispsize##"imm"##bitsize), -                        !cast<RegisterOperand>("ADDR"##bitsize))>; +                   (ops !cast<RegisterOperand>("ADDR"#bitsize), +                        !cast<Operand>("disp"#dispsize#"imm"#bitsize), +                        !cast<RegisterOperand>("ADDR"#bitsize))>;  // A BDMode paired with an immediate length operand of LENSIZE bits.  class BDLMode<string type, string bitsize, string dispsize, string suffix,                string lensize> -  : AddressingMode<type, bitsize, dispsize, suffix, "Len"##lensize, 3, +  : AddressingMode<type, bitsize, dispsize, suffix, "Len"#lensize, 3,                     "BDLAddr", -                   (ops !cast<RegisterOperand>("ADDR"##bitsize), -                        !cast<Operand>("disp"##dispsize##"imm"##bitsize), -                        !cast<Operand>("imm"##bitsize))>; +                   (ops !cast<RegisterOperand>("ADDR"#bitsize), +                        !cast<Operand>("disp"#dispsize#"imm"#bitsize), +                        !cast<Operand>("imm"#bitsize))>;  // A BDMode paired with a register length operand.  class BDRMode<string type, string bitsize, string dispsize, string suffix>    : AddressingMode<type, bitsize, dispsize, suffix, "", 3, "BDRAddr", -                   (ops !cast<RegisterOperand>("ADDR"##bitsize), -                        !cast<Operand>("disp"##dispsize##"imm"##bitsize), -                        !cast<RegisterOperand>("GR"##bitsize))>; +                   (ops !cast<RegisterOperand>("ADDR"#bitsize), +                        !cast<Operand>("disp"#dispsize#"imm"#bitsize), +                        !cast<RegisterOperand>("GR"#bitsize))>;  // An addressing mode with a base, displacement and a vector index.  class BDVMode<string bitsize, string dispsize>    : AddressOperand<bitsize, dispsize, "", "BDVAddr", -                   (ops !cast<RegisterOperand>("ADDR"##bitsize), -                        !cast<Operand>("disp"##dispsize##"imm"##bitsize), +                   (ops !cast<RegisterOperand>("ADDR"#bitsize), +                        !cast<Operand>("disp"#dispsize#"imm"#bitsize),                          !cast<RegisterOperand>("VR128"))>;  //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/SystemZ/SystemZOperators.td b/llvm/lib/Target/SystemZ/SystemZOperators.td index a6a72903e573..81af5fd854db 100644 --- a/llvm/lib/Target/SystemZ/SystemZOperators.td +++ b/llvm/lib/Target/SystemZ/SystemZOperators.td @@ -40,6 +40,10 @@ def SDT_ZWrapOffset         : SDTypeProfile<1, 2,                                               SDTCisSameAs<0, 2>,                                               SDTCisPtrTy<0>]>;  def SDT_ZAdjDynAlloc        : SDTypeProfile<1, 0, [SDTCisVT<0, i64>]>; +def SDT_ZProbedAlloca       : SDTypeProfile<1, 2, +                                            [SDTCisSameAs<0, 1>, +                                             SDTCisSameAs<0, 2>, +                                             SDTCisPtrTy<0>]>;  def SDT_ZGR128Binary        : SDTypeProfile<1, 2,                                              [SDTCisVT<0, untyped>,                                               SDTCisInt<1>, @@ -269,6 +273,8 @@ def z_select_ccmask_1   : SDNode<"SystemZISD::SELECT_CCMASK",                                   SDT_ZSelectCCMask>;  def z_ipm_1             : SDNode<"SystemZISD::IPM", SDT_ZIPM>;  def z_adjdynalloc       : SDNode<"SystemZISD::ADJDYNALLOC", SDT_ZAdjDynAlloc>; +def z_probed_alloca     : SDNode<"SystemZISD::PROBED_ALLOCA", SDT_ZProbedAlloca, +                                 [SDNPHasChain]>;  def z_popcnt            : SDNode<"SystemZISD::POPCNT", SDTIntUnaryOp>;  def z_smul_lohi         : SDNode<"SystemZISD::SMUL_LOHI", SDT_ZGR128Binary>;  def z_umul_lohi         : SDNode<"SystemZISD::UMUL_LOHI", SDT_ZGR128Binary>; @@ -374,7 +380,7 @@ def z_vstrsz_cc         : SDNode<"SystemZISD::VSTRSZ_CC",  def z_vftci             : SDNode<"SystemZISD::VFTCI", SDT_ZVecBinaryConvIntCC>;  class AtomicWOp<string name, SDTypeProfile profile = SDT_ZAtomicLoadBinaryW> -  : SDNode<"SystemZISD::"##name, profile, +  : SDNode<"SystemZISD::"#name, profile,             [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>;  def z_atomic_swapw      : AtomicWOp<"ATOMIC_SWAPW">; diff --git a/llvm/lib/Target/SystemZ/SystemZPatterns.td b/llvm/lib/Target/SystemZ/SystemZPatterns.td index 501a69488397..e3190eddb9f1 100644 --- a/llvm/lib/Target/SystemZ/SystemZPatterns.td +++ b/llvm/lib/Target/SystemZ/SystemZPatterns.td @@ -57,10 +57,10 @@ multiclass RMWIByte<SDPatternOperator operator, AddressingMode mode,  // The inserted operand is loaded using LOAD from an address of mode MODE.  multiclass InsertMem<string type, Instruction insn, RegisterOperand cls,                       SDPatternOperator load, AddressingMode mode> { -  def : Pat<(!cast<SDPatternOperator>("or_as_"##type) +  def : Pat<(!cast<SDPatternOperator>("or_as_"#type)                cls:$src1, (load mode:$src2)),              (insn cls:$src1, mode:$src2)>; -  def : Pat<(!cast<SDPatternOperator>("or_as_rev"##type) +  def : Pat<(!cast<SDPatternOperator>("or_as_rev"#type)                (load mode:$src2), cls:$src1),              (insn cls:$src1, mode:$src2)>;  } @@ -167,7 +167,7 @@ class FPConversion<Instruction insn, SDPatternOperator operator, TypedReg tr1,    : Pat<(tr1.vt (operator (tr2.vt tr2.op:$vec))),          (insn tr2.op:$vec, suppress, mode)>; -// Use INSN to perform mininum/maximum operation OPERATOR on type TR. +// Use INSN to perform minimum/maximum operation OPERATOR on type TR.  // FUNCTION is the type of minimum/maximum function to perform.  class FPMinMax<Instruction insn, SDPatternOperator operator, TypedReg tr,                 bits<4> function> diff --git a/llvm/lib/Target/SystemZ/SystemZProcessors.td b/llvm/lib/Target/SystemZ/SystemZProcessors.td index af33a0300552..57c2411b8dcf 100644 --- a/llvm/lib/Target/SystemZ/SystemZProcessors.td +++ b/llvm/lib/Target/SystemZ/SystemZProcessors.td @@ -9,7 +9,7 @@  // Processor definitions.  //  // For compatibility with other compilers on the platform, each model can -// be identifed either by the system name (e.g. z10) or the level of the +// be identified either by the system name (e.g. z10) or the level of the  // architecture the model supports, as identified by the edition level  // of the z/Architecture Principles of Operation document (e.g. arch8).  // diff --git a/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp b/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp index 0d5e7af92523..fe2aaca8429a 100644 --- a/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp +++ b/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp @@ -73,13 +73,10 @@ static void addHints(ArrayRef<MCPhysReg> Order,        Hints.push_back(Reg);  } -bool -SystemZRegisterInfo::getRegAllocationHints(unsigned VirtReg, -                                           ArrayRef<MCPhysReg> Order, -                                           SmallVectorImpl<MCPhysReg> &Hints, -                                           const MachineFunction &MF, -                                           const VirtRegMap *VRM, -                                           const LiveRegMatrix *Matrix) const { +bool SystemZRegisterInfo::getRegAllocationHints( +    Register VirtReg, ArrayRef<MCPhysReg> Order, +    SmallVectorImpl<MCPhysReg> &Hints, const MachineFunction &MF, +    const VirtRegMap *VRM, const LiveRegMatrix *Matrix) const {    const MachineRegisterInfo *MRI = &MF.getRegInfo();    const SystemZSubtarget &Subtarget = MF.getSubtarget<SystemZSubtarget>();    const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); @@ -134,11 +131,11 @@ SystemZRegisterInfo::getRegAllocationHints(unsigned VirtReg,    }    if (MRI->getRegClass(VirtReg) == &SystemZ::GRX32BitRegClass) { -    SmallVector<unsigned, 8> Worklist; -    SmallSet<unsigned, 4> DoneRegs; +    SmallVector<Register, 8> Worklist; +    SmallSet<Register, 4> DoneRegs;      Worklist.push_back(VirtReg);      while (Worklist.size()) { -      unsigned Reg = Worklist.pop_back_val(); +      Register Reg = Worklist.pop_back_val();        if (!DoneRegs.insert(Reg).second)          continue; @@ -267,14 +264,14 @@ SystemZRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,    // Decompose the frame index into a base and offset.    int FrameIndex = MI->getOperand(FIOperandNum).getIndex(); -  unsigned BasePtr; +  Register BasePtr;    int64_t Offset = (TFI->getFrameIndexReference(MF, FrameIndex, BasePtr) +                      MI->getOperand(FIOperandNum + 1).getImm());    // Special handling of dbg_value instructions.    if (MI->isDebugValue()) {      MI->getOperand(FIOperandNum).ChangeToRegister(BasePtr, /*isDef*/ false); -    MI->getOperand(FIOperandNum + 1).ChangeToImmediate(Offset); +    MI->getDebugOffset().ChangeToImmediate(Offset);      return;    } diff --git a/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h b/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h index 7044efef1ac6..9f2cca0c83f6 100644 --- a/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h +++ b/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h @@ -58,11 +58,9 @@ public:    const TargetRegisterClass *    getCrossCopyRegClass(const TargetRegisterClass *RC) const override; -  bool getRegAllocationHints(unsigned VirtReg, -                             ArrayRef<MCPhysReg> Order, +  bool getRegAllocationHints(Register VirtReg, ArrayRef<MCPhysReg> Order,                               SmallVectorImpl<MCPhysReg> &Hints, -                             const MachineFunction &MF, -                             const VirtRegMap *VRM, +                             const MachineFunction &MF, const VirtRegMap *VRM,                               const LiveRegMatrix *Matrix) const override;    // Override TargetRegisterInfo.h. @@ -72,9 +70,6 @@ public:    bool requiresFrameIndexScavenging(const MachineFunction &MF) const override {      return true;    } -  bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const override { -    return true; -  }    const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;    const uint32_t *getCallPreservedMask(const MachineFunction &MF,                                         CallingConv::ID CC) const override; diff --git a/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td b/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td index 3567b0f3acf8..a85862e62749 100644 --- a/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td +++ b/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td @@ -35,15 +35,15 @@ multiclass SystemZRegClass<string name, list<ValueType> types, int size,                             dag regList, bit allocatable = 1> {    def AsmOperand : AsmOperandClass {      let Name = name; -    let ParserMethod = "parse"##name; +    let ParserMethod = "parse"#name;      let RenderMethod = "addRegOperands";    }    let isAllocatable = allocatable in      def Bit : RegisterClass<"SystemZ", types, size, regList> {        let Size = size;      } -  def "" : RegisterOperand<!cast<RegisterClass>(name##"Bit")> { -    let ParserMatchClass = !cast<AsmOperandClass>(name##"AsmOperand"); +  def "" : RegisterOperand<!cast<RegisterClass>(name#"Bit")> { +    let ParserMatchClass = !cast<AsmOperandClass>(name#"AsmOperand");    }  } diff --git a/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp b/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp index 47c925dcf730..6b4f35e5ba2b 100644 --- a/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp +++ b/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp @@ -47,7 +47,7 @@ static SDValue emitMemMem(SelectionDAG &DAG, const SDLoc &DL, unsigned Sequence,  SDValue SystemZSelectionDAGInfo::EmitTargetCodeForMemcpy(      SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Dst, SDValue Src, -    SDValue Size, unsigned Align, bool IsVolatile, bool AlwaysInline, +    SDValue Size, Align Alignment, bool IsVolatile, bool AlwaysInline,      MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const {    if (IsVolatile)      return SDValue(); @@ -74,7 +74,7 @@ static SDValue memsetStore(SelectionDAG &DAG, const SDLoc &DL, SDValue Chain,  SDValue SystemZSelectionDAGInfo::EmitTargetCodeForMemset(      SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Dst, -    SDValue Byte, SDValue Size, unsigned Align, bool IsVolatile, +    SDValue Byte, SDValue Size, Align Alignment, bool IsVolatile,      MachinePointerInfo DstPtrInfo) const {    EVT PtrVT = Dst.getValueType(); @@ -97,20 +97,22 @@ SDValue SystemZSelectionDAGInfo::EmitTargetCodeForMemset(          unsigned Size1 = Bytes == 16 ? 8 : 1 << findLastSet(Bytes);          unsigned Size2 = Bytes - Size1;          SDValue Chain1 = memsetStore(DAG, DL, Chain, Dst, ByteVal, Size1, -                                     Align, DstPtrInfo); +                                     Alignment.value(), DstPtrInfo);          if (Size2 == 0)            return Chain1;          Dst = DAG.getNode(ISD::ADD, DL, PtrVT, Dst,                            DAG.getConstant(Size1, DL, PtrVT));          DstPtrInfo = DstPtrInfo.getWithOffset(Size1); -        SDValue Chain2 = memsetStore(DAG, DL, Chain, Dst, ByteVal, Size2, -                                     std::min(Align, Size1), DstPtrInfo); +        SDValue Chain2 = memsetStore( +            DAG, DL, Chain, Dst, ByteVal, Size2, +            std::min((unsigned)Alignment.value(), Size1), DstPtrInfo);          return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chain1, Chain2);        }      } else {        // Handle one and two bytes using STC.        if (Bytes <= 2) { -        SDValue Chain1 = DAG.getStore(Chain, DL, Byte, Dst, DstPtrInfo, Align); +        SDValue Chain1 = +            DAG.getStore(Chain, DL, Byte, Dst, DstPtrInfo, Alignment);          if (Bytes == 1)            return Chain1;          SDValue Dst2 = DAG.getNode(ISD::ADD, DL, PtrVT, Dst, @@ -131,7 +133,7 @@ SDValue SystemZSelectionDAGInfo::EmitTargetCodeForMemset(      // Copy the byte to the first location and then use MVC to copy      // it to the rest. -    Chain = DAG.getStore(Chain, DL, Byte, Dst, DstPtrInfo, Align); +    Chain = DAG.getStore(Chain, DL, Byte, Dst, DstPtrInfo, Alignment);      SDValue DstPlus1 = DAG.getNode(ISD::ADD, DL, PtrVT, Dst,                                     DAG.getConstant(1, DL, PtrVT));      return emitMemMem(DAG, DL, SystemZISD::MVC, SystemZISD::MVC_LOOP, diff --git a/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.h b/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.h index 7d63bae83cf3..a4a5b1fbdf90 100644 --- a/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.h +++ b/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.h @@ -25,14 +25,15 @@ public:    SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, const SDLoc &DL,                                    SDValue Chain, SDValue Dst, SDValue Src, -                                  SDValue Size, unsigned Align, bool IsVolatile, -                                  bool AlwaysInline, +                                  SDValue Size, Align Alignment, +                                  bool IsVolatile, bool AlwaysInline,                                    MachinePointerInfo DstPtrInfo,                                    MachinePointerInfo SrcPtrInfo) const override;    SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, const SDLoc &DL,                                    SDValue Chain, SDValue Dst, SDValue Byte, -                                  SDValue Size, unsigned Align, bool IsVolatile, +                                  SDValue Size, Align Alignment, +                                  bool IsVolatile,                                    MachinePointerInfo DstPtrInfo) const override;    std::pair<SDValue, SDValue> diff --git a/llvm/lib/Target/SystemZ/SystemZShortenInst.cpp b/llvm/lib/Target/SystemZ/SystemZShortenInst.cpp index f6184cec795a..3d27b70d6ef9 100644 --- a/llvm/lib/Target/SystemZ/SystemZShortenInst.cpp +++ b/llvm/lib/Target/SystemZ/SystemZShortenInst.cpp @@ -46,6 +46,7 @@ private:    bool shortenOn001(MachineInstr &MI, unsigned Opcode);    bool shortenOn001AddCC(MachineInstr &MI, unsigned Opcode);    bool shortenFPConv(MachineInstr &MI, unsigned Opcode); +  bool shortenFusedFPOp(MachineInstr &MI, unsigned Opcode);    const SystemZInstrInfo *TII;    const TargetRegisterInfo *TRI; @@ -64,7 +65,7 @@ SystemZShortenInst::SystemZShortenInst(const SystemZTargetMachine &tm)  // Tie operands if MI has become a two-address instruction.  static void tieOpsIfNeeded(MachineInstr &MI) { -  if (MI.getDesc().getOperandConstraint(0, MCOI::TIED_TO) && +  if (MI.getDesc().getOperandConstraint(1, MCOI::TIED_TO) == 0 &&        !MI.getOperand(0).isTied())      MI.tieOperands(0, 1);  } @@ -175,6 +176,32 @@ bool SystemZShortenInst::shortenFPConv(MachineInstr &MI, unsigned Opcode) {    return false;  } +bool SystemZShortenInst::shortenFusedFPOp(MachineInstr &MI, unsigned Opcode) { +  MachineOperand &DstMO = MI.getOperand(0); +  MachineOperand &LHSMO = MI.getOperand(1); +  MachineOperand &RHSMO = MI.getOperand(2); +  MachineOperand &AccMO = MI.getOperand(3); +  if (SystemZMC::getFirstReg(DstMO.getReg()) < 16 && +      SystemZMC::getFirstReg(LHSMO.getReg()) < 16 && +      SystemZMC::getFirstReg(RHSMO.getReg()) < 16 && +      SystemZMC::getFirstReg(AccMO.getReg()) < 16 && +      DstMO.getReg() == AccMO.getReg()) { +    MachineOperand Lhs(LHSMO); +    MachineOperand Rhs(RHSMO); +    MachineOperand Src(AccMO); +    MI.RemoveOperand(3); +    MI.RemoveOperand(2); +    MI.RemoveOperand(1); +    MI.setDesc(TII->get(Opcode)); +    MachineInstrBuilder(*MI.getParent()->getParent(), &MI) +        .add(Src) +        .add(Lhs) +        .add(Rhs); +    return true; +  } +  return false; +} +  // Process all instructions in MBB.  Return true if something changed.  bool SystemZShortenInst::processBlock(MachineBasicBlock &MBB) {    bool Changed = false; @@ -235,6 +262,22 @@ bool SystemZShortenInst::processBlock(MachineBasicBlock &MBB) {        Changed |= shortenOn001(MI, SystemZ::MEEBR);        break; +    case SystemZ::WFMADB: +      Changed |= shortenFusedFPOp(MI, SystemZ::MADBR); +      break; + +    case SystemZ::WFMASB: +      Changed |= shortenFusedFPOp(MI, SystemZ::MAEBR); +      break; + +    case SystemZ::WFMSDB: +      Changed |= shortenFusedFPOp(MI, SystemZ::MSDBR); +      break; + +    case SystemZ::WFMSSB: +      Changed |= shortenFusedFPOp(MI, SystemZ::MSEBR); +      break; +      case SystemZ::WFLCDB:        Changed |= shortenOn01(MI, SystemZ::LCDFR);        break; diff --git a/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp b/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp index 5e8af81842c4..68e0b7ae66a4 100644 --- a/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp +++ b/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp @@ -9,6 +9,7 @@  #include "SystemZSubtarget.h"  #include "MCTargetDesc/SystemZMCTargetDesc.h"  #include "llvm/IR/GlobalValue.h" +#include "llvm/Target/TargetMachine.h"  using namespace llvm; @@ -28,11 +29,16 @@ void SystemZSubtarget::anchor() {}  SystemZSubtarget &  SystemZSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) { -  std::string CPUName = CPU; +  StringRef CPUName = CPU;    if (CPUName.empty())      CPUName = "generic";    // Parse features string.    ParseSubtargetFeatures(CPUName, FS); + +  // -msoft-float implies -mno-vx. +  if (HasSoftFloat) +    HasVector = false; +    return *this;  } @@ -57,7 +63,7 @@ SystemZSubtarget::SystemZSubtarget(const Triple &TT, const std::string &CPU,        HasInsertReferenceBitsMultiple(false),        HasMiscellaneousExtensions3(false), HasMessageSecurityAssist9(false),        HasVectorEnhancements2(false), HasVectorPackedDecimalEnhancement(false), -      HasEnhancedSort(false), HasDeflateConversion(false), +      HasEnhancedSort(false), HasDeflateConversion(false), HasSoftFloat(false),        TargetTriple(TT), InstrInfo(initializeSubtargetDependencies(CPU, FS)),        TLInfo(TM, *this), TSInfo(), FrameLowering() {} @@ -68,9 +74,12 @@ bool SystemZSubtarget::enableSubRegLiveness() const {  bool SystemZSubtarget::isPC32DBLSymbol(const GlobalValue *GV,                                         CodeModel::Model CM) const { -  // PC32DBL accesses require the low bit to be clear.  Note that a zero -  // value selects the default alignment and is therefore OK. -  if (GV->getAlignment() == 1) +  // PC32DBL accesses require the low bit to be clear. +  // +  // FIXME: Explicitly check for functions: the datalayout is currently +  // missing information about function pointers. +  const DataLayout &DL = GV->getParent()->getDataLayout(); +  if (GV->getPointerAlignment(DL) == 1 && !GV->getValueType()->isFunctionTy())      return false;    // For the small model, all locally-binding symbols are in range. diff --git a/llvm/lib/Target/SystemZ/SystemZSubtarget.h b/llvm/lib/Target/SystemZ/SystemZSubtarget.h index fa3f65d93c91..4b49c37fe4e6 100644 --- a/llvm/lib/Target/SystemZ/SystemZSubtarget.h +++ b/llvm/lib/Target/SystemZ/SystemZSubtarget.h @@ -68,6 +68,7 @@ protected:    bool HasVectorPackedDecimalEnhancement;    bool HasEnhancedSort;    bool HasDeflateConversion; +  bool HasSoftFloat;  private:    Triple TargetTriple; @@ -239,6 +240,9 @@ public:    // Return true if the target has the deflate-conversion facility.    bool hasDeflateConversion() const { return HasDeflateConversion; } +  // Return true if soft float should be used. +  bool hasSoftFloat() const { return HasSoftFloat; } +    // Return true if GV can be accessed using LARL for reloc model RM    // and code model CM.    bool isPC32DBLSymbol(const GlobalValue *GV, CodeModel::Model CM) const; diff --git a/llvm/lib/Target/SystemZ/SystemZTDC.cpp b/llvm/lib/Target/SystemZ/SystemZTDC.cpp index f103812eb096..7cb7dca2ea28 100644 --- a/llvm/lib/Target/SystemZ/SystemZTDC.cpp +++ b/llvm/lib/Target/SystemZ/SystemZTDC.cpp @@ -44,7 +44,9 @@  //===----------------------------------------------------------------------===//  #include "SystemZ.h" +#include "SystemZSubtarget.h"  #include "llvm/ADT/MapVector.h" +#include "llvm/CodeGen/TargetPassConfig.h"  #include "llvm/IR/Constants.h"  #include "llvm/IR/IRBuilder.h"  #include "llvm/IR/InstIterator.h" @@ -53,6 +55,7 @@  #include "llvm/IR/IntrinsicsS390.h"  #include "llvm/IR/LegacyPassManager.h"  #include "llvm/IR/Module.h" +#include "llvm/Target/TargetMachine.h"  #include <deque>  #include <set> @@ -72,6 +75,11 @@ public:    }    bool runOnFunction(Function &F) override; + +  void getAnalysisUsage(AnalysisUsage &AU) const override { +    AU.addRequired<TargetPassConfig>(); + } +  private:    // Maps seen instructions that can be mapped to a TDC, values are    // (TDC operand, TDC mask, worthy flag) triples. @@ -310,6 +318,12 @@ void SystemZTDCPass::convertLogicOp(BinaryOperator &I) {  }  bool SystemZTDCPass::runOnFunction(Function &F) { +  auto &TPC = getAnalysis<TargetPassConfig>(); +  if (TPC.getTM<TargetMachine>() +          .getSubtarget<SystemZSubtarget>(F) +          .hasSoftFloat()) +    return false; +    ConvertedInsts.clear();    LogicOpsWorklist.clear();    PossibleJunk.clear(); diff --git a/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp b/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp index dfcdb5356485..3f467b200852 100644 --- a/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp +++ b/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp @@ -40,8 +40,10 @@ static bool UsesVectorABI(StringRef CPU, StringRef FS) {    // This is the case by default if CPU is z13 or later, and can be    // overridden via "[+-]vector" feature string elements.    bool VectorABI = true; +  bool SoftFloat = false;    if (CPU.empty() || CPU == "generic" || -      CPU == "z10" || CPU == "z196" || CPU == "zEC12") +      CPU == "z10" || CPU == "z196" || CPU == "zEC12" || +      CPU == "arch8" || CPU == "arch9" || CPU == "arch10")      VectorABI = false;    SmallVector<StringRef, 3> Features; @@ -51,9 +53,13 @@ static bool UsesVectorABI(StringRef CPU, StringRef FS) {        VectorABI = true;      if (Feature == "-vector")        VectorABI = false; +    if (Feature == "soft-float" || Feature == "+soft-float") +      SoftFloat = true; +    if (Feature == "-soft-float") +      SoftFloat = false;    } -  return VectorABI; +  return VectorABI && !SoftFloat;  }  static std::string computeDataLayout(const Triple &TT, StringRef CPU, @@ -154,13 +160,46 @@ SystemZTargetMachine::SystemZTargetMachine(const Target &T, const Triple &TT,            getEffectiveRelocModel(RM),            getEffectiveSystemZCodeModel(CM, getEffectiveRelocModel(RM), JIT),            OL), -      TLOF(std::make_unique<TargetLoweringObjectFileELF>()), -      Subtarget(TT, CPU, FS, *this) { +      TLOF(std::make_unique<TargetLoweringObjectFileELF>()) {    initAsmInfo();  }  SystemZTargetMachine::~SystemZTargetMachine() = default; +const SystemZSubtarget * +SystemZTargetMachine::getSubtargetImpl(const Function &F) const { +  Attribute CPUAttr = F.getFnAttribute("target-cpu"); +  Attribute FSAttr = F.getFnAttribute("target-features"); + +  std::string CPU = !CPUAttr.hasAttribute(Attribute::None) +                        ? CPUAttr.getValueAsString().str() +                        : TargetCPU; +  std::string FS = !FSAttr.hasAttribute(Attribute::None) +                       ? FSAttr.getValueAsString().str() +                       : TargetFS; + +  // FIXME: This is related to the code below to reset the target options, +  // we need to know whether or not the soft float flag is set on the +  // function, so we can enable it as a subtarget feature. +  bool softFloat = +    F.hasFnAttribute("use-soft-float") && +    F.getFnAttribute("use-soft-float").getValueAsString() == "true"; + +  if (softFloat) +    FS += FS.empty() ? "+soft-float" : ",+soft-float"; + +  auto &I = SubtargetMap[CPU + FS]; +  if (!I) { +    // This needs to be done before we create a new subtarget since any +    // creation will depend on the TM and the code generation flags on the +    // function that reside in TargetOptions. +    resetTargetOptions(F); +    I = std::make_unique<SystemZSubtarget>(TargetTriple, CPU, FS, *this); +  } + +  return I.get(); +} +  namespace {  /// SystemZ Code Generator Pass Configuration Options. @@ -183,6 +222,7 @@ public:    void addIRPasses() override;    bool addInstSelector() override;    bool addILPOpts() override; +  void addPreRegAlloc() override;    void addPostRewrite() override;    void addPostRegAlloc() override;    void addPreSched2() override; @@ -214,6 +254,10 @@ bool SystemZPassConfig::addILPOpts() {    return true;  } +void SystemZPassConfig::addPreRegAlloc() { +  addPass(createSystemZCopyPhysRegsPass(getSystemZTargetMachine())); +} +  void SystemZPassConfig::addPostRewrite() {    addPass(createSystemZPostRewritePass(getSystemZTargetMachine()));  } diff --git a/llvm/lib/Target/SystemZ/SystemZTargetMachine.h b/llvm/lib/Target/SystemZ/SystemZTargetMachine.h index ac04a080f580..9ea03e104fc9 100644 --- a/llvm/lib/Target/SystemZ/SystemZTargetMachine.h +++ b/llvm/lib/Target/SystemZ/SystemZTargetMachine.h @@ -26,7 +26,8 @@ namespace llvm {  class SystemZTargetMachine : public LLVMTargetMachine {    std::unique_ptr<TargetLoweringObjectFile> TLOF; -  SystemZSubtarget Subtarget; + +  mutable StringMap<std::unique_ptr<SystemZSubtarget>> SubtargetMap;  public:    SystemZTargetMachine(const Target &T, const Triple &TT, StringRef CPU, @@ -35,11 +36,11 @@ public:                         CodeGenOpt::Level OL, bool JIT);    ~SystemZTargetMachine() override; -  const SystemZSubtarget *getSubtargetImpl() const { return &Subtarget; } - -  const SystemZSubtarget *getSubtargetImpl(const Function &) const override { -    return &Subtarget; -  } +  const SystemZSubtarget *getSubtargetImpl(const Function &) const override; +  // DO NOT IMPLEMENT: There is no such thing as a valid default subtarget, +  // subtargets are per-function entities based on the target-specific +  // attributes of each function. +  const SystemZSubtarget *getSubtargetImpl() const = delete;    // Override LLVMTargetMachine    TargetPassConfig *createPassConfig(PassManagerBase &PM) override; diff --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp index acec3c533585..864200e5f71c 100644 --- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp +++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp @@ -30,7 +30,8 @@ using namespace llvm;  //  //===----------------------------------------------------------------------===// -int SystemZTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) { +int SystemZTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty, +                                  TTI::TargetCostKind CostKind) {    assert(Ty->isIntegerTy());    unsigned BitSize = Ty->getPrimitiveSizeInBits(); @@ -63,7 +64,8 @@ int SystemZTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {  }  int SystemZTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx, -                                  const APInt &Imm, Type *Ty) { +                                  const APInt &Imm, Type *Ty, +                                  TTI::TargetCostKind CostKind) {    assert(Ty->isIntegerTy());    unsigned BitSize = Ty->getPrimitiveSizeInBits(); @@ -177,11 +179,12 @@ int SystemZTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx,      break;    } -  return SystemZTTIImpl::getIntImmCost(Imm, Ty); +  return SystemZTTIImpl::getIntImmCost(Imm, Ty, CostKind);  }  int SystemZTTIImpl::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, -                                        const APInt &Imm, Type *Ty) { +                                        const APInt &Imm, Type *Ty, +                                        TTI::TargetCostKind CostKind) {    assert(Ty->isIntegerTy());    unsigned BitSize = Ty->getPrimitiveSizeInBits(); @@ -226,7 +229,7 @@ int SystemZTTIImpl::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,        return TTI::TCC_Free;      break;    } -  return SystemZTTIImpl::getIntImmCost(Imm, Ty); +  return SystemZTTIImpl::getIntImmCost(Imm, Ty, CostKind);  }  TargetTransformInfo::PopcntSupportKind @@ -246,8 +249,7 @@ void SystemZTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,    for (auto &BB : L->blocks())      for (auto &I : *BB) {        if (isa<CallInst>(&I) || isa<InvokeInst>(&I)) { -        ImmutableCallSite CS(&I); -        if (const Function *F = CS.getCalledFunction()) { +        if (const Function *F = cast<CallBase>(I).getCalledFunction()) {            if (isLoweredToCall(F))              HasCall = true;            if (F->getIntrinsicID() == Intrinsic::memcpy || @@ -259,7 +261,8 @@ void SystemZTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,        }        if (isa<StoreInst>(&I)) {          Type *MemAccessTy = I.getOperand(0)->getType(); -        NumStores += getMemoryOpCost(Instruction::Store, MemAccessTy, None, 0); +        NumStores += getMemoryOpCost(Instruction::Store, MemAccessTy, None, 0, +                                     TTI::TCK_RecipThroughput);        }      } @@ -291,6 +294,10 @@ void SystemZTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,    UP.Force = true;  } +void SystemZTTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE, +                                           TTI::PeelingPreferences &PP) { +  BaseT::getPeelingPreferences(L, SE, PP); +}  bool SystemZTTIImpl::isLSRCostLess(TargetTransformInfo::LSRCost &C1,                                     TargetTransformInfo::LSRCost &C2) { @@ -323,6 +330,23 @@ unsigned SystemZTTIImpl::getRegisterBitWidth(bool Vector) const {    return 0;  } +unsigned SystemZTTIImpl::getMinPrefetchStride(unsigned NumMemAccesses, +                                              unsigned NumStridedMemAccesses, +                                              unsigned NumPrefetches, +                                              bool HasCall) const { +  // Don't prefetch a loop with many far apart accesses. +  if (NumPrefetches > 16) +    return UINT_MAX; + +  // Emit prefetch instructions for smaller strides in cases where we think +  // the hardware prefetcher might not be able to keep up. +  if (NumStridedMemAccesses > 32 && +      NumStridedMemAccesses == NumMemAccesses && !HasCall) +    return 1; + +  return ST->hasMiscellaneousExtensions3() ? 8192 : 2048; +} +  bool SystemZTTIImpl::hasDivRemOp(Type *DataType, bool IsSigned) {    EVT VT = TLI->getValueType(DL, DataType);    return (VT.isScalarInteger() && TLI->isTypeLegal(VT)); @@ -341,18 +365,25 @@ static unsigned getScalarSizeInBits(Type *Ty) {  // type until it is legal. This would e.g. return 4 for <6 x i64>, instead of  // 3.  static unsigned getNumVectorRegs(Type *Ty) { -  assert(Ty->isVectorTy() && "Expected vector type"); -  unsigned WideBits = getScalarSizeInBits(Ty) * Ty->getVectorNumElements(); +  auto *VTy = cast<FixedVectorType>(Ty); +  unsigned WideBits = getScalarSizeInBits(Ty) * VTy->getNumElements();    assert(WideBits > 0 && "Could not compute size of vector");    return ((WideBits % 128U) ? ((WideBits / 128U) + 1) : (WideBits / 128U));  }  int SystemZTTIImpl::getArithmeticInstrCost( -    unsigned Opcode, Type *Ty, TTI::OperandValueKind Op1Info, +    unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, +    TTI::OperandValueKind Op1Info,      TTI::OperandValueKind Op2Info, TTI::OperandValueProperties Opd1PropInfo,      TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args,      const Instruction *CxtI) { +  // TODO: Handle more cost kinds. +  if (CostKind != TTI::TCK_RecipThroughput) +    return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info, +                                         Op2Info, Opd1PropInfo, +                                         Opd2PropInfo, Args, CxtI); +    // TODO: return a good value for BB-VECTORIZER that includes the    // immediate loads, which we do not want to count for the loop    // vectorizer, since they are hopefully hoisted out of the loop. This @@ -391,10 +422,59 @@ int SystemZTTIImpl::getArithmeticInstrCost(      }    } -  if (Ty->isVectorTy()) { -    assert(ST->hasVector() && -           "getArithmeticInstrCost() called with vector type."); -    unsigned VF = Ty->getVectorNumElements(); +  if (!Ty->isVectorTy()) { +    // These FP operations are supported with a dedicated instruction for +    // float, double and fp128 (base implementation assumes float generally +    // costs 2). +    if (Opcode == Instruction::FAdd || Opcode == Instruction::FSub || +        Opcode == Instruction::FMul || Opcode == Instruction::FDiv) +      return 1; + +    // There is no native support for FRem. +    if (Opcode == Instruction::FRem) +      return LIBCALL_COST; + +    // Give discount for some combined logical operations if supported. +    if (Args.size() == 2 && ST->hasMiscellaneousExtensions3()) { +      if (Opcode == Instruction::Xor) { +        for (const Value *A : Args) { +          if (const Instruction *I = dyn_cast<Instruction>(A)) +            if (I->hasOneUse() && +                (I->getOpcode() == Instruction::And || +                 I->getOpcode() == Instruction::Or || +                 I->getOpcode() == Instruction::Xor)) +              return 0; +        } +      } +      else if (Opcode == Instruction::Or || Opcode == Instruction::And) { +        for (const Value *A : Args) { +          if (const Instruction *I = dyn_cast<Instruction>(A)) +            if (I->hasOneUse() && I->getOpcode() == Instruction::Xor) +              return 0; +        } +      } +    } + +    // Or requires one instruction, although it has custom handling for i64. +    if (Opcode == Instruction::Or) +      return 1; + +    if (Opcode == Instruction::Xor && ScalarBits == 1) { +      if (ST->hasLoadStoreOnCond2()) +        return 5; // 2 * (li 0; loc 1); xor +      return 7; // 2 * ipm sequences ; xor ; shift ; compare +    } + +    if (DivRemConstPow2) +      return (SignedDivRem ? SDivPow2Cost : 1); +    if (DivRemConst) +      return DivMulSeqCost; +    if (SignedDivRem || UnsignedDivRem) +      return DivInstrCost; +  } +  else if (ST->hasVector()) { +    auto *VTy = cast<FixedVectorType>(Ty); +    unsigned VF = VTy->getNumElements();      unsigned NumVectors = getNumVectorRegs(Ty);      // These vector operations are custom handled, but are still supported @@ -407,7 +487,7 @@ int SystemZTTIImpl::getArithmeticInstrCost(      if (DivRemConstPow2)        return (NumVectors * (SignedDivRem ? SDivPow2Cost : 1));      if (DivRemConst) -      return VF * DivMulSeqCost + getScalarizationOverhead(Ty, Args); +      return VF * DivMulSeqCost + getScalarizationOverhead(VTy, Args);      if ((SignedDivRem || UnsignedDivRem) && VF > 4)        // Temporary hack: disable high vectorization factors with integer        // division/remainder, which will get scalarized and handled with @@ -429,8 +509,8 @@ int SystemZTTIImpl::getArithmeticInstrCost(          // Return the cost of multiple scalar invocation plus the cost of          // inserting and extracting the values.          unsigned ScalarCost = -            getArithmeticInstrCost(Opcode, Ty->getScalarType()); -        unsigned Cost = (VF * ScalarCost) + getScalarizationOverhead(Ty, Args); +            getArithmeticInstrCost(Opcode, Ty->getScalarType(), CostKind); +        unsigned Cost = (VF * ScalarCost) + getScalarizationOverhead(VTy, Args);          // FIXME: VF 2 for these FP operations are currently just as          // expensive as for VF 4.          if (VF == 2) @@ -447,101 +527,51 @@ int SystemZTTIImpl::getArithmeticInstrCost(      // There is no native support for FRem.      if (Opcode == Instruction::FRem) { -      unsigned Cost = (VF * LIBCALL_COST) + getScalarizationOverhead(Ty, Args); +      unsigned Cost = (VF * LIBCALL_COST) + getScalarizationOverhead(VTy, Args);        // FIXME: VF 2 for float is currently just as expensive as for VF 4.        if (VF == 2 && ScalarBits == 32)          Cost *= 2;        return Cost;      }    } -  else {  // Scalar: -    // These FP operations are supported with a dedicated instruction for -    // float, double and fp128 (base implementation assumes float generally -    // costs 2). -    if (Opcode == Instruction::FAdd || Opcode == Instruction::FSub || -        Opcode == Instruction::FMul || Opcode == Instruction::FDiv) -      return 1; - -    // There is no native support for FRem. -    if (Opcode == Instruction::FRem) -      return LIBCALL_COST; - -    // Give discount for some combined logical operations if supported. -    if (Args.size() == 2 && ST->hasMiscellaneousExtensions3()) { -      if (Opcode == Instruction::Xor) { -        for (const Value *A : Args) { -          if (const Instruction *I = dyn_cast<Instruction>(A)) -            if (I->hasOneUse() && -                (I->getOpcode() == Instruction::And || -                 I->getOpcode() == Instruction::Or || -                 I->getOpcode() == Instruction::Xor)) -              return 0; -        } -      } -      else if (Opcode == Instruction::Or || Opcode == Instruction::And) { -        for (const Value *A : Args) { -          if (const Instruction *I = dyn_cast<Instruction>(A)) -            if (I->hasOneUse() && I->getOpcode() == Instruction::Xor) -              return 0; -        } -      } -    } - -    // Or requires one instruction, although it has custom handling for i64. -    if (Opcode == Instruction::Or) -      return 1; - -    if (Opcode == Instruction::Xor && ScalarBits == 1) { -      if (ST->hasLoadStoreOnCond2()) -        return 5; // 2 * (li 0; loc 1); xor -      return 7; // 2 * ipm sequences ; xor ; shift ; compare -    } - -    if (DivRemConstPow2) -      return (SignedDivRem ? SDivPow2Cost : 1); -    if (DivRemConst) -      return DivMulSeqCost; -    if (SignedDivRem || UnsignedDivRem) -      return DivInstrCost; -  }    // Fallback to the default implementation. -  return BaseT::getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info, +  return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info, Op2Info,                                         Opd1PropInfo, Opd2PropInfo, Args, CxtI);  } -int SystemZTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, -                                   Type *SubTp) { -  assert (Tp->isVectorTy()); -  assert (ST->hasVector() && "getShuffleCost() called."); -  unsigned NumVectors = getNumVectorRegs(Tp); +int SystemZTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, +                                   int Index, VectorType *SubTp) { +  if (ST->hasVector()) { +    unsigned NumVectors = getNumVectorRegs(Tp); -  // TODO: Since fp32 is expanded, the shuffle cost should always be 0. +    // TODO: Since fp32 is expanded, the shuffle cost should always be 0. -  // FP128 values are always in scalar registers, so there is no work -  // involved with a shuffle, except for broadcast. In that case register -  // moves are done with a single instruction per element. -  if (Tp->getScalarType()->isFP128Ty()) -    return (Kind == TargetTransformInfo::SK_Broadcast ? NumVectors - 1 : 0); +    // FP128 values are always in scalar registers, so there is no work +    // involved with a shuffle, except for broadcast. In that case register +    // moves are done with a single instruction per element. +    if (Tp->getScalarType()->isFP128Ty()) +      return (Kind == TargetTransformInfo::SK_Broadcast ? NumVectors - 1 : 0); -  switch (Kind) { -  case  TargetTransformInfo::SK_ExtractSubvector: -    // ExtractSubvector Index indicates start offset. +    switch (Kind) { +    case  TargetTransformInfo::SK_ExtractSubvector: +      // ExtractSubvector Index indicates start offset. -    // Extracting a subvector from first index is a noop. -    return (Index == 0 ? 0 : NumVectors); +      // Extracting a subvector from first index is a noop. +      return (Index == 0 ? 0 : NumVectors); -  case TargetTransformInfo::SK_Broadcast: -    // Loop vectorizer calls here to figure out the extra cost of -    // broadcasting a loaded value to all elements of a vector. Since vlrep -    // loads and replicates with a single instruction, adjust the returned -    // value. -    return NumVectors - 1; +    case TargetTransformInfo::SK_Broadcast: +      // Loop vectorizer calls here to figure out the extra cost of +      // broadcasting a loaded value to all elements of a vector. Since vlrep +      // loads and replicates with a single instruction, adjust the returned +      // value. +      return NumVectors - 1; -  default: +    default: -    // SystemZ supports single instruction permutation / replication. -    return NumVectors; +      // SystemZ supports single instruction permutation / replication. +      return NumVectors; +    }    }    return BaseT::getShuffleCost(Kind, Tp, Index, SubTp); @@ -564,8 +594,9 @@ getVectorTruncCost(Type *SrcTy, Type *DstTy) {    assert (SrcTy->isVectorTy() && DstTy->isVectorTy());    assert (SrcTy->getPrimitiveSizeInBits() > DstTy->getPrimitiveSizeInBits() &&            "Packing must reduce size of vector type."); -  assert (SrcTy->getVectorNumElements() == DstTy->getVectorNumElements() && -          "Packing should not change number of elements."); +  assert(cast<FixedVectorType>(SrcTy)->getNumElements() == +             cast<FixedVectorType>(DstTy)->getNumElements() && +         "Packing should not change number of elements.");    // TODO: Since fp32 is expanded, the extract cost should always be 0. @@ -580,7 +611,7 @@ getVectorTruncCost(Type *SrcTy, Type *DstTy) {    unsigned Cost = 0;    unsigned Log2Diff = getElSizeLog2Diff(SrcTy, DstTy); -  unsigned VF = SrcTy->getVectorNumElements(); +  unsigned VF = cast<FixedVectorType>(SrcTy)->getNumElements();    for (unsigned P = 0; P < Log2Diff; ++P) {      if (NumParts > 1)        NumParts /= 2; @@ -642,7 +673,7 @@ static Type *getCmpOpsType(const Instruction *I, unsigned VF = 1) {      // Return the potentially vectorized type based on 'I' and 'VF'.  'I' may      // be either scalar or already vectorized with a same or lesser VF.      Type *ElTy = OpTy->getScalarType(); -    return VectorType::get(ElTy, VF); +    return FixedVectorType::get(ElTy, VF);    }    return nullptr; @@ -653,8 +684,8 @@ static Type *getCmpOpsType(const Instruction *I, unsigned VF = 1) {  unsigned SystemZTTIImpl::  getBoolVecToIntConversionCost(unsigned Opcode, Type *Dst,                                const Instruction *I) { -  assert (Dst->isVectorTy()); -  unsigned VF = Dst->getVectorNumElements(); +  auto *DstVTy = cast<FixedVectorType>(Dst); +  unsigned VF = DstVTy->getNumElements();    unsigned Cost = 0;    // If we know what the widths of the compared operands, get any cost of    // converting it to match Dst. Otherwise assume same widths. @@ -668,14 +699,50 @@ getBoolVecToIntConversionCost(unsigned Opcode, Type *Dst,  }  int SystemZTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, +                                     TTI::TargetCostKind CostKind,                                       const Instruction *I) { +  // FIXME: Can the logic below also be used for these cost kinds? +  if (CostKind == TTI::TCK_CodeSize || CostKind == TTI::TCK_SizeAndLatency) { +    int BaseCost = BaseT::getCastInstrCost(Opcode, Dst, Src, CostKind, I); +    return BaseCost == 0 ? BaseCost : 1; +  } +    unsigned DstScalarBits = Dst->getScalarSizeInBits();    unsigned SrcScalarBits = Src->getScalarSizeInBits(); -  if (Src->isVectorTy()) { -    assert (ST->hasVector() && "getCastInstrCost() called with vector type."); -    assert (Dst->isVectorTy()); -    unsigned VF = Src->getVectorNumElements(); +  if (!Src->isVectorTy()) { +    assert (!Dst->isVectorTy()); + +    if (Opcode == Instruction::SIToFP || Opcode == Instruction::UIToFP) { +      if (SrcScalarBits >= 32 || +          (I != nullptr && isa<LoadInst>(I->getOperand(0)))) +        return 1; +      return SrcScalarBits > 1 ? 2 /*i8/i16 extend*/ : 5 /*branch seq.*/; +    } + +    if ((Opcode == Instruction::ZExt || Opcode == Instruction::SExt) && +        Src->isIntegerTy(1)) { +      if (ST->hasLoadStoreOnCond2()) +        return 2; // li 0; loc 1 + +      // This should be extension of a compare i1 result, which is done with +      // ipm and a varying sequence of instructions. +      unsigned Cost = 0; +      if (Opcode == Instruction::SExt) +        Cost = (DstScalarBits < 64 ? 3 : 4); +      if (Opcode == Instruction::ZExt) +        Cost = 3; +      Type *CmpOpTy = ((I != nullptr) ? getCmpOpsType(I) : nullptr); +      if (CmpOpTy != nullptr && CmpOpTy->isFloatingPointTy()) +        // If operands of an fp-type was compared, this costs +1. +        Cost++; +      return Cost; +    } +  } +  else if (ST->hasVector()) { +    auto *SrcVecTy = cast<FixedVectorType>(Src); +    auto *DstVecTy = cast<FixedVectorType>(Dst); +    unsigned VF = SrcVecTy->getNumElements();      unsigned NumDstVectors = getNumVectorRegs(Dst);      unsigned NumSrcVectors = getNumVectorRegs(Src); @@ -720,7 +787,7 @@ int SystemZTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,        // inserting and extracting the values. Base implementation does not        // realize float->int gets scalarized.        unsigned ScalarCost = getCastInstrCost(Opcode, Dst->getScalarType(), -                                             Src->getScalarType()); +                                             Src->getScalarType(), CostKind);        unsigned TotCost = VF * ScalarCost;        bool NeedsInserts = true, NeedsExtracts = true;        // FP128 registers do not get inserted or extracted. @@ -731,8 +798,8 @@ int SystemZTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,            (Opcode == Instruction::FPToSI || Opcode == Instruction::FPToUI))          NeedsExtracts = false; -      TotCost += getScalarizationOverhead(Src, false, NeedsExtracts); -      TotCost += getScalarizationOverhead(Dst, NeedsInserts, false); +      TotCost += getScalarizationOverhead(SrcVecTy, false, NeedsExtracts); +      TotCost += getScalarizationOverhead(DstVecTy, NeedsInserts, false);        // FIXME: VF 2 for float<->i32 is currently just as expensive as for VF 4.        if (VF == 2 && SrcScalarBits == 32 && DstScalarBits == 32) @@ -743,7 +810,8 @@ int SystemZTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,      if (Opcode == Instruction::FPTrunc) {        if (SrcScalarBits == 128)  // fp128 -> double/float + inserts of elements. -        return VF /*ldxbr/lexbr*/ + getScalarizationOverhead(Dst, true, false); +        return VF /*ldxbr/lexbr*/ + +               getScalarizationOverhead(DstVecTy, true, false);        else // double -> float          return VF / 2 /*vledb*/ + std::max(1U, VF / 4 /*vperm*/);      } @@ -756,40 +824,11 @@ int SystemZTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,          return VF * 2;        }        // -> fp128.  VF * lxdb/lxeb + extraction of elements. -      return VF + getScalarizationOverhead(Src, false, true); +      return VF + getScalarizationOverhead(SrcVecTy, false, true);      }    } -  else { // Scalar -    assert (!Dst->isVectorTy()); - -    if (Opcode == Instruction::SIToFP || Opcode == Instruction::UIToFP) { -      if (SrcScalarBits >= 32 || -          (I != nullptr && isa<LoadInst>(I->getOperand(0)))) -        return 1; -      return SrcScalarBits > 1 ? 2 /*i8/i16 extend*/ : 5 /*branch seq.*/; -    } -    if ((Opcode == Instruction::ZExt || Opcode == Instruction::SExt) && -        Src->isIntegerTy(1)) { -      if (ST->hasLoadStoreOnCond2()) -        return 2; // li 0; loc 1 - -      // This should be extension of a compare i1 result, which is done with -      // ipm and a varying sequence of instructions. -      unsigned Cost = 0; -      if (Opcode == Instruction::SExt) -        Cost = (DstScalarBits < 64 ? 3 : 4); -      if (Opcode == Instruction::ZExt) -        Cost = 3; -      Type *CmpOpTy = ((I != nullptr) ? getCmpOpsType(I) : nullptr); -      if (CmpOpTy != nullptr && CmpOpTy->isFloatingPointTy()) -        // If operands of an fp-type was compared, this costs +1. -        Cost++; -      return Cost; -    } -  } - -  return BaseT::getCastInstrCost(Opcode, Dst, Src, I); +  return BaseT::getCastInstrCost(Opcode, Dst, Src, CostKind, I);  }  // Scalar i8 / i16 operations will typically be made after first extending @@ -805,10 +844,38 @@ static unsigned getOperandsExtensionCost(const Instruction *I) {  }  int SystemZTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, -                                       Type *CondTy, const Instruction *I) { -  if (ValTy->isVectorTy()) { -    assert (ST->hasVector() && "getCmpSelInstrCost() called with vector type."); -    unsigned VF = ValTy->getVectorNumElements(); +                                       Type *CondTy, +                                       TTI::TargetCostKind CostKind, +                                       const Instruction *I) { +  if (CostKind != TTI::TCK_RecipThroughput) +    return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, CostKind); + +  if (!ValTy->isVectorTy()) { +    switch (Opcode) { +    case Instruction::ICmp: { +      // A loaded value compared with 0 with multiple users becomes Load and +      // Test. The load is then not foldable, so return 0 cost for the ICmp. +      unsigned ScalarBits = ValTy->getScalarSizeInBits(); +      if (I != nullptr && ScalarBits >= 32) +        if (LoadInst *Ld = dyn_cast<LoadInst>(I->getOperand(0))) +          if (const ConstantInt *C = dyn_cast<ConstantInt>(I->getOperand(1))) +            if (!Ld->hasOneUse() && Ld->getParent() == I->getParent() && +                C->getZExtValue() == 0) +              return 0; + +      unsigned Cost = 1; +      if (ValTy->isIntegerTy() && ValTy->getScalarSizeInBits() <= 16) +        Cost += (I != nullptr ? getOperandsExtensionCost(I) : 2); +      return Cost; +    } +    case Instruction::Select: +      if (ValTy->isFloatingPointTy()) +        return 4; // No load on condition for FP - costs a conditional jump. +      return 1; // Load On Condition / Select Register. +    } +  } +  else if (ST->hasVector()) { +    unsigned VF = cast<FixedVectorType>(ValTy)->getNumElements();      // Called with a compare instruction.      if (Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) { @@ -856,32 +923,8 @@ int SystemZTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,        return getNumVectorRegs(ValTy) /*vsel*/ + PackCost;      }    } -  else { // Scalar -    switch (Opcode) { -    case Instruction::ICmp: { -      // A loaded value compared with 0 with multiple users becomes Load and -      // Test. The load is then not foldable, so return 0 cost for the ICmp. -      unsigned ScalarBits = ValTy->getScalarSizeInBits(); -      if (I != nullptr && ScalarBits >= 32) -        if (LoadInst *Ld = dyn_cast<LoadInst>(I->getOperand(0))) -          if (const ConstantInt *C = dyn_cast<ConstantInt>(I->getOperand(1))) -            if (!Ld->hasOneUse() && Ld->getParent() == I->getParent() && -                C->getZExtValue() == 0) -              return 0; - -      unsigned Cost = 1; -      if (ValTy->isIntegerTy() && ValTy->getScalarSizeInBits() <= 16) -        Cost += (I != nullptr ? getOperandsExtensionCost(I) : 2); -      return Cost; -    } -    case Instruction::Select: -      if (ValTy->isFloatingPointTy()) -        return 4; // No load on condition for FP - costs a conditional jump. -      return 1; // Load On Condition / Select Register. -    } -  } -  return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, nullptr); +  return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, CostKind);  }  int SystemZTTIImpl:: @@ -995,9 +1038,14 @@ static bool isBswapIntrinsicCall(const Value *V) {  int SystemZTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,                                      MaybeAlign Alignment, unsigned AddressSpace, +                                    TTI::TargetCostKind CostKind,                                      const Instruction *I) {    assert(!Src->isVoidTy() && "Invalid type"); +  // TODO: Handle other cost kinds. +  if (CostKind != TTI::TCK_RecipThroughput) +    return 1; +    if (!Src->isVectorTy() && Opcode == Instruction::Load && I != nullptr) {      // Store the load or its truncated or extended value in FoldedValue.      const Instruction *FoldedValue = nullptr; @@ -1058,16 +1106,13 @@ int SystemZTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,  // needed for using / defining the vector operands. The SystemZ version does  // roughly the same but bases the computations on vector permutations  // instead. -int SystemZTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, -                                               unsigned Factor, -                                               ArrayRef<unsigned> Indices, -                                               unsigned Alignment, -                                               unsigned AddressSpace, -                                               bool UseMaskForCond, -                                               bool UseMaskForGaps) { +int SystemZTTIImpl::getInterleavedMemoryOpCost( +    unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices, +    Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, +    bool UseMaskForCond, bool UseMaskForGaps) {    if (UseMaskForCond || UseMaskForGaps)      return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, -                                             Alignment, AddressSpace, +                                             Alignment, AddressSpace, CostKind,                                               UseMaskForCond, UseMaskForGaps);    assert(isa<VectorType>(VecTy) &&           "Expect a vector type for interleaved memory op"); @@ -1075,7 +1120,7 @@ int SystemZTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,    // Return the ceiling of dividing A by B.    auto ceil = [](unsigned A, unsigned B) { return (A + B - 1) / B; }; -  unsigned NumElts = VecTy->getVectorNumElements(); +  unsigned NumElts = cast<FixedVectorType>(VecTy)->getNumElements();    assert(Factor > 1 && NumElts % Factor == 0 && "Invalid interleave factor");    unsigned VF = NumElts / Factor;    unsigned NumEltsPerVecReg = (128U / getScalarSizeInBits(VecTy)); @@ -1125,22 +1170,10 @@ static int getVectorIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy) {    return -1;  } -int SystemZTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, -                                          ArrayRef<Value *> Args, -                                          FastMathFlags FMF, unsigned VF) { -  int Cost = getVectorIntrinsicInstrCost(ID, RetTy); -  if (Cost != -1) -    return Cost; -  return BaseT::getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF); -} - -int SystemZTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, -                                          ArrayRef<Type *> Tys, -                                          FastMathFlags FMF, -                                          unsigned ScalarizationCostPassed) { -  int Cost = getVectorIntrinsicInstrCost(ID, RetTy); +int SystemZTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, +                                          TTI::TargetCostKind CostKind) { +  int Cost = getVectorIntrinsicInstrCost(ICA.getID(), ICA.getReturnType());    if (Cost != -1)      return Cost; -  return BaseT::getIntrinsicInstrCost(ID, RetTy, Tys, -                                      FMF, ScalarizationCostPassed); +  return BaseT::getIntrinsicInstrCost(ICA, CostKind);  } diff --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h index bc4d066881c1..7f8f7f6f923f 100644 --- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h +++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h @@ -38,17 +38,21 @@ public:    unsigned getInliningThresholdMultiplier() { return 3; } -  int getIntImmCost(const APInt &Imm, Type *Ty); +  int getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind); -  int getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty); +  int getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, +                        Type *Ty, TTI::TargetCostKind CostKind);    int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, -                          Type *Ty); +                          Type *Ty, TTI::TargetCostKind CostKind);    TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth);    void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,                                 TTI::UnrollingPreferences &UP); +  void getPeelingPreferences(Loop *L, ScalarEvolution &SE, +                             TTI::PeelingPreferences &PP); +    bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,                       TargetTransformInfo::LSRCost &C2);    /// @} @@ -60,8 +64,12 @@ public:    unsigned getRegisterBitWidth(bool Vector) const;    unsigned getCacheLineSize() const override { return 256; } -  unsigned getPrefetchDistance() const override { return 2000; } -  unsigned getMinPrefetchStride() const override { return 2048; } +  unsigned getPrefetchDistance() const override { return 4500; } +  unsigned getMinPrefetchStride(unsigned NumMemAccesses, +                                unsigned NumStridedMemAccesses, +                                unsigned NumPrefetches, +                                bool HasCall) const override; +  bool enableWritePrefetching() const override { return true; }    bool hasDivRemOp(Type *DataType, bool IsSigned);    bool prefersVectorizedAddressing() { return false; } @@ -71,40 +79,39 @@ public:    int getArithmeticInstrCost(        unsigned Opcode, Type *Ty, +      TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,        TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,        TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,        TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,        TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,        ArrayRef<const Value *> Args = ArrayRef<const Value *>(),        const Instruction *CxtI = nullptr); -  int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp); +  int getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, int Index, +                     VectorType *SubTp);    unsigned getVectorTruncCost(Type *SrcTy, Type *DstTy);    unsigned getVectorBitmaskConversionCost(Type *SrcTy, Type *DstTy);    unsigned getBoolVecToIntConversionCost(unsigned Opcode, Type *Dst,                                           const Instruction *I);    int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, +                       TTI::TargetCostKind CostKind,                         const Instruction *I = nullptr);    int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, +                         TTI::TargetCostKind CostKind,                           const Instruction *I = nullptr);    int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);    bool isFoldableLoad(const LoadInst *Ld, const Instruction *&FoldedValue);    int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, -                      unsigned AddressSpace, const Instruction *I = nullptr); - -  int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, -                                 unsigned Factor, -                                 ArrayRef<unsigned> Indices, -                                 unsigned Alignment, -                                 unsigned AddressSpace, -                                 bool UseMaskForCond = false, -                                 bool UseMaskForGaps = false); - -  int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, -                            ArrayRef<Value *> Args, FastMathFlags FMF, -                            unsigned VF = 1); -  int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, -                            ArrayRef<Type *> Tys, FastMathFlags FMF, -                            unsigned ScalarizationCostPassed = UINT_MAX); +                      unsigned AddressSpace, TTI::TargetCostKind CostKind, +                      const Instruction *I = nullptr); + +  int getInterleavedMemoryOpCost( +      unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices, +      Align Alignment, unsigned AddressSpace, +      TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency, +      bool UseMaskForCond = false, bool UseMaskForGaps = false); + +  int getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, +                            TTI::TargetCostKind CostKind);    /// @}  }; | 
