diff options
Diffstat (limited to 'llvm/lib/MC/MCParser/MasmParser.cpp')
-rw-r--r-- | llvm/lib/MC/MCParser/MasmParser.cpp | 871 |
1 files changed, 661 insertions, 210 deletions
diff --git a/llvm/lib/MC/MCParser/MasmParser.cpp b/llvm/lib/MC/MCParser/MasmParser.cpp index 4957ee7a0323..7b4d6e529cc2 100644 --- a/llvm/lib/MC/MCParser/MasmParser.cpp +++ b/llvm/lib/MC/MCParser/MasmParser.cpp @@ -13,6 +13,7 @@ #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" @@ -56,6 +57,7 @@ #include "llvm/Support/MD5.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Path.h" #include "llvm/Support/SMLoc.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/raw_ostream.h" @@ -65,6 +67,7 @@ #include <climits> #include <cstddef> #include <cstdint> +#include <ctime> #include <deque> #include <memory> #include <sstream> @@ -128,9 +131,11 @@ struct FieldInfo; struct StructInfo { StringRef Name; bool IsUnion = false; + bool Initializable = true; unsigned Alignment = 0; - unsigned Size = 0; unsigned AlignmentSize = 0; + unsigned NextOffset = 0; + unsigned Size = 0; std::vector<FieldInfo> Fields; StringMap<size_t> FieldsByName; @@ -322,7 +327,7 @@ struct StructInitializer { struct FieldInfo { // Offset of the field within the containing STRUCT. - size_t Offset = 0; + unsigned Offset = 0; // Total size of the field (= LengthOf * Type). unsigned SizeOf = 0; @@ -344,11 +349,10 @@ FieldInfo &StructInfo::addField(StringRef FieldName, FieldType FT, FieldsByName[FieldName.lower()] = Fields.size(); Fields.emplace_back(FT); FieldInfo &Field = Fields.back(); - if (IsUnion) { - Field.Offset = 0; - } else { - Size = llvm::alignTo(Size, std::min(Alignment, FieldAlignmentSize)); - Field.Offset = Size; + Field.Offset = + llvm::alignTo(NextOffset, std::min(Alignment, FieldAlignmentSize)); + if (!IsUnion) { + NextOffset = std::max(NextOffset, Field.Offset); } AlignmentSize = std::max(AlignmentSize, FieldAlignmentSize); return Field; @@ -371,6 +375,10 @@ private: /// This is the current buffer index we're lexing from as managed by the /// SourceMgr object. unsigned CurBuffer; + + /// time of assembly + struct tm TM; + std::vector<bool> EndStatementAtEOFStack; AsmCond TheCondState; @@ -383,10 +391,11 @@ private: /// maps assembly-time variable names to variables. struct Variable { + enum RedefinableKind { NOT_REDEFINABLE, WARN_ON_REDEFINITION, REDEFINABLE }; + StringRef Name; - bool Redefinable = true; + RedefinableKind Redefinable = REDEFINABLE; bool IsText = false; - int64_t NumericValue = 0; std::string TextValue; }; StringMap<Variable> Variables; @@ -446,7 +455,7 @@ private: public: MasmParser(SourceMgr &SM, MCContext &Ctx, MCStreamer &Out, - const MCAsmInfo &MAI, unsigned CB); + const MCAsmInfo &MAI, struct tm TM, unsigned CB = 0); MasmParser(const MasmParser &) = delete; MasmParser &operator=(const MasmParser &) = delete; ~MasmParser() override; @@ -489,7 +498,9 @@ public: bool Warning(SMLoc L, const Twine &Msg, SMRange Range = None) override; bool printError(SMLoc L, const Twine &Msg, SMRange Range = None) override; - const AsmToken &Lex() override; + enum ExpandKind { ExpandMacros, DoNotExpandMacros }; + const AsmToken &Lex(ExpandKind ExpandNextToken); + const AsmToken &Lex() override { return Lex(ExpandMacros); } void setParsingMSInlineAsm(bool V) override { ParsingMSInlineAsm = V; @@ -509,9 +520,9 @@ public: bool lookUpType(StringRef Name, AsmTypeInfo &Info) const override; - bool parseMSInlineAsm(void *AsmLoc, std::string &AsmString, - unsigned &NumOutputs, unsigned &NumInputs, - SmallVectorImpl<std::pair<void *,bool>> &OpDecls, + bool parseMSInlineAsm(std::string &AsmString, unsigned &NumOutputs, + unsigned &NumInputs, + SmallVectorImpl<std::pair<void *, bool>> &OpDecls, SmallVectorImpl<std::string> &Constraints, SmallVectorImpl<std::string> &Clobbers, const MCInstrInfo *MII, const MCInstPrinter *IP, @@ -532,7 +543,11 @@ public: /// Parse an identifier or string (as a quoted identifier) /// and set \p Res to the identifier contents. - bool parseIdentifier(StringRef &Res) override; + enum IdentifierPositionKind { StandardPosition, StartOfStatement }; + bool parseIdentifier(StringRef &Res, IdentifierPositionKind Position); + bool parseIdentifier(StringRef &Res) override { + return parseIdentifier(Res, StandardPosition); + } void eatToEndOfStatement() override; bool checkForValidSection() override; @@ -540,6 +555,9 @@ public: /// } private: + bool expandMacros(); + const AsmToken peekTok(bool ShouldSkipSpace = true); + bool parseStatement(ParseStatementInfo &Info, MCAsmParserSemaCallback *SI); bool parseCurlyBlockScope(SmallVectorImpl<AsmRewrite>& AsmStrRewrites); @@ -665,6 +683,7 @@ private: DK_REAL8, DK_REAL10, DK_ALIGN, + DK_EVEN, DK_ORG, DK_ENDR, DK_EXTERN, @@ -783,6 +802,37 @@ private: /// def_range types parsed by this class. StringMap<CVDefRangeType> CVDefRangeTypeMap; + // Generic (target and platform independent) directive parsing. + enum BuiltinSymbol { + BI_NO_SYMBOL, // Placeholder + BI_DATE, + BI_TIME, + BI_VERSION, + BI_FILECUR, + BI_FILENAME, + BI_LINE, + BI_CURSEG, + BI_CPU, + BI_INTERFACE, + BI_CODE, + BI_DATA, + BI_FARDATA, + BI_WORDSIZE, + BI_CODESIZE, + BI_DATASIZE, + BI_MODEL, + BI_STACK, + }; + + /// Maps builtin name --> BuiltinSymbol enum, for builtins handled by this + /// class. + StringMap<BuiltinSymbol> BuiltinSymbolMap; + + const MCExpr *evaluateBuiltinValue(BuiltinSymbol Symbol, SMLoc StartLoc); + + llvm::Optional<std::string> evaluateBuiltinTextMacro(BuiltinSymbol Symbol, + SMLoc StartLoc); + // ".ascii", ".asciz", ".string" bool parseDirectiveAscii(StringRef IDVal, bool ZeroTerminated); @@ -865,10 +915,13 @@ private: // "=", "equ", "textequ" bool parseDirectiveEquate(StringRef IDVal, StringRef Name, - DirectiveKind DirKind); + DirectiveKind DirKind, SMLoc NameLoc); + + bool parseDirectiveOrg(); // "org" - bool parseDirectiveOrg(); // ".org" + bool emitAlignTo(int64_t Alignment); bool parseDirectiveAlign(); // "align" + bool parseDirectiveEven(); // "even" // ".file", ".line", ".loc", ".stabs" bool parseDirectiveFile(SMLoc DirectiveLoc); @@ -999,10 +1052,11 @@ private: bool parseDirectiveRadix(SMLoc DirectiveLoc); // "echo" - bool parseDirectiveEcho(); + bool parseDirectiveEcho(SMLoc DirectiveLoc); void initializeDirectiveKindMap(); void initializeCVDefRangeTypeMap(); + void initializeBuiltinSymbolMap(); }; } // end anonymous namespace @@ -1016,9 +1070,9 @@ extern MCAsmParserExtension *createCOFFMasmParser(); enum { DEFAULT_ADDRSPACE = 0 }; MasmParser::MasmParser(SourceMgr &SM, MCContext &Ctx, MCStreamer &Out, - const MCAsmInfo &MAI, unsigned CB = 0) + const MCAsmInfo &MAI, struct tm TM, unsigned CB) : Lexer(MAI), Ctx(Ctx), Out(Out), MAI(MAI), SrcMgr(SM), - CurBuffer(CB ? CB : SM.getMainFileID()) { + CurBuffer(CB ? CB : SM.getMainFileID()), TM(TM) { HadError = false; // Save the old handler. SavedDiagHandler = SrcMgr.getDiagHandler(); @@ -1029,8 +1083,8 @@ MasmParser::MasmParser(SourceMgr &SM, MCContext &Ctx, MCStreamer &Out, EndStatementAtEOFStack.push_back(true); // Initialize the platform / file format parser. - switch (Ctx.getObjectFileInfo()->getObjectFileType()) { - case MCObjectFileInfo::IsCOFF: + switch (Ctx.getObjectFileType()) { + case MCContext::IsCOFF: PlatformParser.reset(createCOFFMasmParser()); break; default: @@ -1041,6 +1095,7 @@ MasmParser::MasmParser(SourceMgr &SM, MCContext &Ctx, MCStreamer &Out, initializeDirectiveKindMap(); PlatformParser->Initialize(*this); initializeCVDefRangeTypeMap(); + initializeBuiltinSymbolMap(); NumOfMacroInstantiations = 0; } @@ -1107,7 +1162,51 @@ void MasmParser::jumpToLoc(SMLoc Loc, unsigned InBuffer, Loc.getPointer(), EndStatementAtEOF); } -const AsmToken &MasmParser::Lex() { +bool MasmParser::expandMacros() { + const AsmToken &Tok = getTok(); + const std::string IDLower = Tok.getIdentifier().lower(); + + const llvm::MCAsmMacro *M = getContext().lookupMacro(IDLower); + if (M && M->IsFunction && peekTok().is(AsmToken::LParen)) { + // This is a macro function invocation; expand it in place. + const SMLoc MacroLoc = Tok.getLoc(); + const StringRef MacroId = Tok.getIdentifier(); + Lexer.Lex(); + if (handleMacroInvocation(M, MacroLoc)) { + Lexer.UnLex(AsmToken(AsmToken::Error, MacroId)); + Lexer.Lex(); + } + return false; + } + + llvm::Optional<std::string> ExpandedValue; + auto BuiltinIt = BuiltinSymbolMap.find(IDLower); + if (BuiltinIt != BuiltinSymbolMap.end()) { + ExpandedValue = + evaluateBuiltinTextMacro(BuiltinIt->getValue(), Tok.getLoc()); + } else { + auto VarIt = Variables.find(IDLower); + if (VarIt != Variables.end() && VarIt->getValue().IsText) { + ExpandedValue = VarIt->getValue().TextValue; + } + } + + if (!ExpandedValue.hasValue()) + return true; + std::unique_ptr<MemoryBuffer> Instantiation = + MemoryBuffer::getMemBufferCopy(*ExpandedValue, "<instantiation>"); + + // Jump to the macro instantiation and prime the lexer. + CurBuffer = + SrcMgr.AddNewSourceBuffer(std::move(Instantiation), Tok.getEndLoc()); + Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer(), nullptr, + /*EndStatementAtEOF=*/false); + EndStatementAtEOFStack.push_back(false); + Lexer.Lex(); + return false; +} + +const AsmToken &MasmParser::Lex(ExpandKind ExpandNextToken) { if (Lexer.getTok().is(AsmToken::Error)) Error(Lexer.getErrLoc(), Lexer.getErr()); @@ -1120,36 +1219,23 @@ const AsmToken &MasmParser::Lex() { } const AsmToken *tok = &Lexer.Lex(); - - while (tok->is(AsmToken::Identifier)) { - auto it = Variables.find(tok->getIdentifier().lower()); - const llvm::MCAsmMacro *M = - getContext().lookupMacro(tok->getIdentifier().lower()); - if (it != Variables.end() && it->second.IsText) { - // This is a textmacro; expand it in place. - std::unique_ptr<MemoryBuffer> Instantiation = - MemoryBuffer::getMemBufferCopy(it->second.TextValue, - "<instantiation>"); - - // Jump to the macro instantiation and prime the lexer. - CurBuffer = SrcMgr.AddNewSourceBuffer(std::move(Instantiation), - getTok().getEndLoc()); - Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer(), nullptr, - /*EndStatementAtEOF=*/false); - EndStatementAtEOFStack.push_back(false); - tok = &Lexer.Lex(); - } else if (M && M->IsFunction && Lexer.peekTok().is(AsmToken::LParen)) { - // This is a macro function invocation; expand it in place. - const AsmToken MacroTok = *tok; - tok = &Lexer.Lex(); - if (handleMacroInvocation(M, MacroTok.getLoc())) { - Lexer.UnLex(AsmToken(AsmToken::Error, MacroTok.getIdentifier())); - tok = &Lexer.Lex(); + bool StartOfStatement = Lexer.isAtStartOfStatement(); + + while (ExpandNextToken == ExpandMacros && tok->is(AsmToken::Identifier)) { + if (StartOfStatement) { + AsmToken NextTok; + MutableArrayRef<AsmToken> Buf(NextTok); + size_t ReadCount = Lexer.peekTokens(Buf); + if (ReadCount && NextTok.is(AsmToken::Identifier) && + (NextTok.getString().equals_insensitive("equ") || + NextTok.getString().equals_insensitive("textequ"))) { + // This looks like an EQU or TEXTEQU directive; don't expand the + // identifier, allowing for redefinitions. + break; } - continue; - } else { - break; } + if (expandMacros()) + break; } // Parse comments here to be deferred until end of next statement. @@ -1161,7 +1247,7 @@ const AsmToken &MasmParser::Lex() { // Recognize and bypass line continuations. while (tok->is(AsmToken::BackSlash) && - Lexer.peekTok().is(AsmToken::EndOfStatement)) { + peekTok().is(AsmToken::EndOfStatement)) { // Eat both the backslash and the end of statement. Lexer.Lex(); tok = &Lexer.Lex(); @@ -1183,6 +1269,29 @@ const AsmToken &MasmParser::Lex() { return *tok; } +const AsmToken MasmParser::peekTok(bool ShouldSkipSpace) { + AsmToken Tok; + + MutableArrayRef<AsmToken> Buf(Tok); + size_t ReadCount = Lexer.peekTokens(Buf, ShouldSkipSpace); + + if (ReadCount == 0) { + // If this is the end of an included file, pop the parent file off the + // include stack. + SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer); + if (ParentIncludeLoc != SMLoc()) { + EndStatementAtEOFStack.pop_back(); + jumpToLoc(ParentIncludeLoc, 0, EndStatementAtEOFStack.back()); + return peekTok(ShouldSkipSpace); + } + EndStatementAtEOFStack.pop_back(); + assert(EndStatementAtEOFStack.empty()); + } + + assert(ReadCount == 1); + return Tok; +} + bool MasmParser::enabledGenDwarfForAssembly() { // Check whether the user specified -g. if (!getContext().getGenDwarfForAssembly()) @@ -1235,6 +1344,8 @@ bool MasmParser::Run(bool NoInitialTextSection, bool NoFinalize) { (void)InsertResult; } + getTargetParser().onBeginOfFile(); + // While we have input, parse each statement. while (Lexer.isNot(AsmToken::Eof) || SrcMgr.getParentIncludeLoc(CurBuffer) != SMLoc()) { @@ -1472,7 +1583,7 @@ bool MasmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc, } } // Parse named bitwise negation. - if (Identifier.equals_lower("not")) { + if (Identifier.equals_insensitive("not")) { if (parsePrimaryExpr(Res, EndLoc, nullptr)) return true; Res = MCUnaryExpr::createNot(Res, getContext(), FirstTokenLoc); @@ -1545,20 +1656,39 @@ bool MasmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc, } MCSymbol *Sym = getContext().getInlineAsmLabel(SymbolName); - if (!Sym) + if (!Sym) { + // If this is a built-in numeric value, treat it as a constant. + auto BuiltinIt = BuiltinSymbolMap.find(SymbolName.lower()); + const BuiltinSymbol Symbol = (BuiltinIt == BuiltinSymbolMap.end()) + ? BI_NO_SYMBOL + : BuiltinIt->getValue(); + if (Symbol != BI_NO_SYMBOL) { + const MCExpr *Value = evaluateBuiltinValue(Symbol, FirstTokenLoc); + if (Value) { + Res = Value; + return false; + } + } + + // Variables use case-insensitive symbol names; if this is a variable, we + // find the symbol using its canonical name. + auto VarIt = Variables.find(SymbolName.lower()); + if (VarIt != Variables.end()) + SymbolName = VarIt->second.Name; Sym = getContext().getOrCreateSymbol(SymbolName); + } // If this is an absolute variable reference, substitute it now to preserve // semantics in the face of reassignment. if (Sym->isVariable()) { - auto V = Sym->getVariableValue(/*SetUsed*/ false); + auto V = Sym->getVariableValue(/*SetUsed=*/false); bool DoInline = isa<MCConstantExpr>(V) && !Variant; if (auto TV = dyn_cast<MCTargetExpr>(V)) DoInline = TV->inlineAssignedExpr(); if (DoInline) { if (Variant) return Error(EndLoc, "unexpected modifier on variable reference"); - Res = Sym->getVariableValue(/*SetUsed*/ false); + Res = Sym->getVariableValue(/*SetUsed=*/false); return false; } } @@ -2032,12 +2162,7 @@ bool MasmParser::parseStatement(ParseStatementInfo &Info, Lex(); // always eat a token if (!IDVal.startswith(".")) return Error(IDLoc, "unexpected token at start of statement"); - } else if (Lexer.is(AsmToken::Identifier) && - getTok().getString().equals_lower("echo")) { - // Intercept echo early to avoid lexical substitution in its message, and - // delegate all handling to the appropriate function. - return parseDirectiveEcho(); - } else if (parseIdentifier(IDVal)) { + } else if (parseIdentifier(IDVal, StartOfStatement)) { if (!TheCondState.Ignore) { Lex(); // always eat a token return Error(IDLoc, "unexpected token at start of statement"); @@ -2211,7 +2336,7 @@ bool MasmParser::parseStatement(ParseStatementInfo &Info, // Special-case handling of structure-end directives at higher priority, // since ENDS is overloaded as a segment-end directive. - if (IDVal.equals_lower("ends") && StructInProgress.size() > 1 && + if (IDVal.equals_insensitive("ends") && StructInProgress.size() > 1 && getTok().is(AsmToken::EndOfStatement)) { return parseDirectiveNestedEnds(); } @@ -2283,6 +2408,8 @@ bool MasmParser::parseStatement(ParseStatementInfo &Info, return parseDirectiveNestedEnds(); case DK_ALIGN: return parseDirectiveAlign(); + case DK_EVEN: + return parseDirectiveEven(); case DK_ORG: return parseDirectiveOrg(); case DK_EXTERN: @@ -2416,6 +2543,8 @@ bool MasmParser::parseStatement(ParseStatementInfo &Info, return parseDirectiveErrorIfe(IDLoc, false); case DK_RADIX: return parseDirectiveRadix(IDLoc); + case DK_ECHO: + return parseDirectiveEcho(IDLoc); } return Error(IDLoc, "unknown directive"); @@ -2432,6 +2561,8 @@ bool MasmParser::parseStatement(ParseStatementInfo &Info, const StringRef nextVal = nextTok.getString(); const SMLoc nextLoc = nextTok.getLoc(); + const AsmToken afterNextTok = peekTok(); + // There are several entities interested in parsing infix directives: // // 1. Asm parser extensions. For example, platform-specific parsers @@ -2445,7 +2576,7 @@ bool MasmParser::parseStatement(ParseStatementInfo &Info, // Special-case handling of structure-end directives at higher priority, since // ENDS is overloaded as a segment-end directive. - if (nextVal.equals_lower("ends") && StructInProgress.size() == 1) { + if (nextVal.equals_insensitive("ends") && StructInProgress.size() == 1) { Lex(); return parseDirectiveEnds(IDVal, IDLoc); } @@ -2473,27 +2604,57 @@ bool MasmParser::parseStatement(ParseStatementInfo &Info, case DK_EQU: case DK_TEXTEQU: Lex(); - return parseDirectiveEquate(nextVal, IDVal, DirKind); + return parseDirectiveEquate(nextVal, IDVal, DirKind, IDLoc); case DK_BYTE: + if (afterNextTok.is(AsmToken::Identifier) && + afterNextTok.getString().equals_insensitive("ptr")) { + // Size directive; part of an instruction. + break; + } + LLVM_FALLTHROUGH; case DK_SBYTE: case DK_DB: Lex(); return parseDirectiveNamedValue(nextVal, 1, IDVal, IDLoc); case DK_WORD: + if (afterNextTok.is(AsmToken::Identifier) && + afterNextTok.getString().equals_insensitive("ptr")) { + // Size directive; part of an instruction. + break; + } + LLVM_FALLTHROUGH; case DK_SWORD: case DK_DW: Lex(); return parseDirectiveNamedValue(nextVal, 2, IDVal, IDLoc); case DK_DWORD: + if (afterNextTok.is(AsmToken::Identifier) && + afterNextTok.getString().equals_insensitive("ptr")) { + // Size directive; part of an instruction. + break; + } + LLVM_FALLTHROUGH; case DK_SDWORD: case DK_DD: Lex(); return parseDirectiveNamedValue(nextVal, 4, IDVal, IDLoc); case DK_FWORD: + if (afterNextTok.is(AsmToken::Identifier) && + afterNextTok.getString().equals_insensitive("ptr")) { + // Size directive; part of an instruction. + break; + } + LLVM_FALLTHROUGH; case DK_DF: Lex(); return parseDirectiveNamedValue(nextVal, 6, IDVal, IDLoc); case DK_QWORD: + if (afterNextTok.is(AsmToken::Identifier) && + afterNextTok.getString().equals_insensitive("ptr")) { + // Size directive; part of an instruction. + break; + } + LLVM_FALLTHROUGH; case DK_SQWORD: case DK_DQ: Lex(); @@ -2795,16 +2956,17 @@ bool MasmParser::expandMacro(raw_svector_ostream &OS, StringRef Body, const char *Begin = Body.data() + Pos; StringRef Argument(Begin, I - Pos); + const std::string ArgumentLower = Argument.lower(); unsigned Index = 0; for (; Index < NParameters; ++Index) - if (Parameters[Index].Name == Argument) + if (Parameters[Index].Name.equals_insensitive(ArgumentLower)) break; if (Index == NParameters) { if (InitialAmpersand) OS << '&'; - auto it = LocalSymbols.find(Argument.lower()); + auto it = LocalSymbols.find(ArgumentLower); if (it != LocalSymbols.end()) OS << it->second; else @@ -2999,7 +3161,7 @@ bool MasmParser::parseMacroArguments(const MCAsmMacro *M, SMLoc IDLoc = Lexer.getLoc(); MCAsmMacroParameter FA; - if (Lexer.is(AsmToken::Identifier) && Lexer.peekTok().is(AsmToken::Equal)) { + if (Lexer.is(AsmToken::Identifier) && peekTok().is(AsmToken::Equal)) { if (parseIdentifier(FA.Name)) return Error(IDLoc, "invalid argument identifier for formal argument"); @@ -3219,7 +3381,8 @@ bool MasmParser::handleMacroInvocation(const MCAsmMacro *M, SMLoc NameLoc) { /// parseIdentifier: /// ::= identifier /// ::= string -bool MasmParser::parseIdentifier(StringRef &Res) { +bool MasmParser::parseIdentifier(StringRef &Res, + IdentifierPositionKind Position) { // The assembler has relaxed rules for accepting identifiers, in particular we // allow things like '.globl $foo' and '.def @feat.00', which would normally // be separate tokens. At this level, we have already lexed so we cannot @@ -3230,14 +3393,13 @@ bool MasmParser::parseIdentifier(StringRef &Res) { // Consume the prefix character, and check for a following identifier. - AsmToken Buf[1]; - Lexer.peekTokens(Buf, false); + AsmToken nextTok = peekTok(false); - if (Buf[0].isNot(AsmToken::Identifier)) + if (nextTok.isNot(AsmToken::Identifier)) return true; // We have a '$' or '@' followed by an identifier, make sure they are adjacent. - if (PrefixLoc.getPointer() + 1 != Buf[0].getLoc().getPointer()) + if (PrefixLoc.getPointer() + 1 != nextTok.getLoc().getPointer()) return true; // eat $ or @ @@ -3254,7 +3416,17 @@ bool MasmParser::parseIdentifier(StringRef &Res) { Res = getTok().getIdentifier(); - Lex(); // Consume the identifier token. + // Consume the identifier token - but if parsing certain directives, avoid + // lexical expansion of the next token. + ExpandKind ExpandNextToken = ExpandMacros; + if (Position == StartOfStatement && + StringSwitch<bool>(Res) + .CaseLower("echo", true) + .CasesLower("ifdef", "ifndef", "elseifdef", "elseifndef", true) + .Default(false)) { + ExpandNextToken = DoNotExpandMacros; + } + Lex(ExpandNextToken); return false; } @@ -3263,34 +3435,54 @@ bool MasmParser::parseIdentifier(StringRef &Res) { /// ::= name "=" expression /// | name "equ" expression (not redefinable) /// | name "equ" text-list -/// | name "textequ" text-list +/// | name "textequ" text-list (redefinability unspecified) bool MasmParser::parseDirectiveEquate(StringRef IDVal, StringRef Name, - DirectiveKind DirKind) { - Variable &Var = Variables[Name]; + DirectiveKind DirKind, SMLoc NameLoc) { + auto BuiltinIt = BuiltinSymbolMap.find(Name.lower()); + if (BuiltinIt != BuiltinSymbolMap.end()) + return Error(NameLoc, "cannot redefine a built-in symbol"); + + Variable &Var = Variables[Name.lower()]; if (Var.Name.empty()) { Var.Name = Name; - } else if (!Var.Redefinable) { - return TokError("invalid variable redefinition"); } - Var.Redefinable = (DirKind != DK_EQU); + SMLoc StartLoc = Lexer.getLoc(); if (DirKind == DK_EQU || DirKind == DK_TEXTEQU) { // "equ" and "textequ" both allow text expressions. std::string Value; - if (!parseTextItem(Value)) { - Var.IsText = true; - Var.TextValue = Value; + std::string TextItem; + if (!parseTextItem(TextItem)) { + Value += TextItem; // Accept a text-list, not just one text-item. auto parseItem = [&]() -> bool { - if (parseTextItem(Value)) + if (parseTextItem(TextItem)) return TokError("expected text item"); - Var.TextValue += Value; + Value += TextItem; return false; }; if (parseOptionalToken(AsmToken::Comma) && parseMany(parseItem)) return addErrorSuffix(" in '" + Twine(IDVal) + "' directive"); + if (!Var.IsText || Var.TextValue != Value) { + switch (Var.Redefinable) { + case Variable::NOT_REDEFINABLE: + return Error(getTok().getLoc(), "invalid variable redefinition"); + case Variable::WARN_ON_REDEFINITION: + if (Warning(NameLoc, "redefining '" + Name + + "', already defined on the command line")) { + return true; + } + break; + default: + break; + } + } + Var.IsText = true; + Var.TextValue = Value; + Var.Redefinable = Variable::REDEFINABLE; + return false; } } @@ -3299,22 +3491,73 @@ bool MasmParser::parseDirectiveEquate(StringRef IDVal, StringRef Name, // Parse as expression assignment. const MCExpr *Expr; - SMLoc EndLoc, StartLoc = Lexer.getLoc(); + SMLoc EndLoc; if (parseExpression(Expr, EndLoc)) return addErrorSuffix(" in '" + Twine(IDVal) + "' directive"); + StringRef ExprAsString = StringRef( + StartLoc.getPointer(), EndLoc.getPointer() - StartLoc.getPointer()); + + int64_t Value; + if (!Expr->evaluateAsAbsolute(Value, getStreamer().getAssemblerPtr())) { + if (DirKind == DK_ASSIGN) + return Error( + StartLoc, + "expected absolute expression; not all symbols have known values", + {StartLoc, EndLoc}); + + // Not an absolute expression; define as a text replacement. + if (!Var.IsText || Var.TextValue != ExprAsString) { + switch (Var.Redefinable) { + case Variable::NOT_REDEFINABLE: + return Error(getTok().getLoc(), "invalid variable redefinition"); + case Variable::WARN_ON_REDEFINITION: + if (Warning(NameLoc, "redefining '" + Name + + "', already defined on the command line")) { + return true; + } + break; + default: + break; + } + } + + Var.IsText = true; + Var.TextValue = ExprAsString.str(); + Var.Redefinable = Variable::REDEFINABLE; + + return false; + } + MCSymbol *Sym = getContext().getOrCreateSymbol(Var.Name); - Sym->setRedefinable(Var.Redefinable); + + const MCConstantExpr *PrevValue = + Sym->isVariable() ? dyn_cast_or_null<MCConstantExpr>( + Sym->getVariableValue(/*SetUsed=*/false)) + : nullptr; + if (Var.IsText || !PrevValue || PrevValue->getValue() != Value) { + switch (Var.Redefinable) { + case Variable::NOT_REDEFINABLE: + return Error(getTok().getLoc(), "invalid variable redefinition"); + case Variable::WARN_ON_REDEFINITION: + if (Warning(NameLoc, "redefining '" + Name + + "', already defined on the command line")) { + return true; + } + break; + default: + break; + } + } + + Var.IsText = false; + Var.TextValue.clear(); + Var.Redefinable = (DirKind == DK_ASSIGN) ? Variable::REDEFINABLE + : Variable::NOT_REDEFINABLE; + + Sym->setRedefinable(Var.Redefinable != Variable::NOT_REDEFINABLE); Sym->setVariableValue(Expr); Sym->setExternal(false); - if (Expr->evaluateAsAbsolute(Var.NumericValue, - getStreamer().getAssemblerPtr())) - return false; - - // Not an absolute expression; define as a text replacement. - Var.IsText = true; - Var.TextValue = StringRef(StartLoc.getPointer(), - EndLoc.getPointer() - StartLoc.getPointer()).str(); return false; } @@ -3376,21 +3619,52 @@ bool MasmParser::parseTextItem(std::string &Data) { case AsmToken::LessGreater: return parseAngleBracketString(Data); case AsmToken::Identifier: { + // This must be a text macro; we need to expand it accordingly. StringRef ID; + SMLoc StartLoc = getTok().getLoc(); if (parseIdentifier(ID)) return true; Data = ID.str(); - auto it = Variables.find(ID); - if (it == Variables.end()) - return true; + bool Expanded = false; + while (true) { + // Try to resolve as a built-in text macro + auto BuiltinIt = BuiltinSymbolMap.find(ID.lower()); + if (BuiltinIt != BuiltinSymbolMap.end()) { + llvm::Optional<std::string> BuiltinText = + evaluateBuiltinTextMacro(BuiltinIt->getValue(), StartLoc); + if (!BuiltinText.hasValue()) { + // Not a text macro; break without substituting + break; + } + Data = std::move(*BuiltinText); + ID = StringRef(Data); + Expanded = true; + continue; + } - while (it != Variables.end()) { - const Variable &Var = it->second; - if (!Var.IsText) - return true; - Data = Var.TextValue; - it = Variables.find(Data); + // Try to resolve as a variable text macro + auto VarIt = Variables.find(ID.lower()); + if (VarIt != Variables.end()) { + const Variable &Var = VarIt->getValue(); + if (!Var.IsText) { + // Not a text macro; break without substituting + break; + } + Data = Var.TextValue; + ID = StringRef(Data); + Expanded = true; + continue; + } + + break; + } + + if (!Expanded) { + // Not a text macro; not usable in TextItem context. Since we haven't used + // the token, put it back for better error recovery. + getLexer().UnLex(AsmToken(AsmToken::Identifier, ID)); + return true; } return false; } @@ -3455,7 +3729,7 @@ bool MasmParser::parseScalarInitializer(unsigned Size, if (parseExpression(Value)) return true; if (getTok().is(AsmToken::Identifier) && - getTok().getString().equals_lower("dup")) { + getTok().getString().equals_insensitive("dup")) { Lex(); // Eat 'dup'. const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value); if (!MCE) @@ -3524,10 +3798,11 @@ bool MasmParser::addIntegralField(StringRef Name, unsigned Size) { Field.SizeOf = Field.Type * IntInfo.Values.size(); Field.LengthOf = IntInfo.Values.size(); - if (Struct.IsUnion) - Struct.Size = std::max(Struct.Size, Field.SizeOf); - else - Struct.Size += Field.SizeOf; + const unsigned FieldEnd = Field.Offset + Field.SizeOf; + if (!Struct.IsUnion) { + Struct.NextOffset = FieldEnd; + } + Struct.Size = std::max(Struct.Size, FieldEnd); return false; } @@ -3613,11 +3888,11 @@ bool MasmParser::parseRealValue(const fltSemantics &Semantics, APInt &Res) { APFloat Value(Semantics); StringRef IDVal = getTok().getString(); if (getLexer().is(AsmToken::Identifier)) { - if (IDVal.equals_lower("infinity") || IDVal.equals_lower("inf")) + if (IDVal.equals_insensitive("infinity") || IDVal.equals_insensitive("inf")) Value = APFloat::getInf(Semantics); - else if (IDVal.equals_lower("nan")) + else if (IDVal.equals_insensitive("nan")) Value = APFloat::getNaN(Semantics, false, ~0); - else if (IDVal.equals_lower("?")) + else if (IDVal.equals_insensitive("?")) Value = APFloat::getZero(Semantics); else return TokError("invalid floating point literal"); @@ -3657,9 +3932,9 @@ bool MasmParser::parseRealInstList(const fltSemantics &Semantics, while (getTok().isNot(EndToken) || (EndToken == AsmToken::Greater && getTok().isNot(AsmToken::GreaterGreater))) { - const AsmToken NextTok = Lexer.peekTok(); + const AsmToken NextTok = peekTok(); if (NextTok.is(AsmToken::Identifier) && - NextTok.getString().equals_lower("dup")) { + NextTok.getString().equals_insensitive("dup")) { const MCExpr *Value; if (parseExpression(Value) || parseToken(AsmToken::Identifier)) return true; @@ -3730,10 +4005,12 @@ bool MasmParser::addRealField(StringRef Name, const fltSemantics &Semantics, Field.Type = RealInfo.AsIntValues.back().getBitWidth() / 8; Field.LengthOf = RealInfo.AsIntValues.size(); Field.SizeOf = Field.Type * Field.LengthOf; - if (Struct.IsUnion) - Struct.Size = std::max(Struct.Size, Field.SizeOf); - else - Struct.Size += Field.SizeOf; + + const unsigned FieldEnd = Field.Offset + Field.SizeOf; + if (!Struct.IsUnion) { + Struct.NextOffset = FieldEnd; + } + Struct.Size = std::max(Struct.Size, FieldEnd); return false; } @@ -4021,9 +4298,9 @@ bool MasmParser::parseStructInstList( while (getTok().isNot(EndToken) || (EndToken == AsmToken::Greater && getTok().isNot(AsmToken::GreaterGreater))) { - const AsmToken NextTok = Lexer.peekTok(); + const AsmToken NextTok = peekTok(); if (NextTok.is(AsmToken::Identifier) && - NextTok.getString().equals_lower("dup")) { + NextTok.getString().equals_insensitive("dup")) { const MCExpr *Value; if (parseExpression(Value) || parseToken(AsmToken::Identifier)) return true; @@ -4142,14 +4419,16 @@ bool MasmParser::emitFieldInitializer(const FieldInfo &Field, const StructFieldInfo &Contents, const StructFieldInfo &Initializer) { for (const auto &Init : Initializer.Initializers) { - emitStructInitializer(Contents.Structure, Init); + if (emitStructInitializer(Contents.Structure, Init)) + return true; } // Default-initialize all remaining values. for (auto It = Contents.Initializers.begin() + Initializer.Initializers.size(); It != Contents.Initializers.end(); ++It) { const auto &Init = *It; - emitStructInitializer(Contents.Structure, Init); + if (emitStructInitializer(Contents.Structure, Init)) + return true; } return false; } @@ -4172,6 +4451,10 @@ bool MasmParser::emitFieldInitializer(const FieldInfo &Field, bool MasmParser::emitStructInitializer(const StructInfo &Structure, const StructInitializer &Initializer) { + if (!Structure.Initializable) + return Error(getLexer().getLoc(), + "cannot initialize a value of type '" + Structure.Name + + "'; 'org' was used in the type's declaration"); size_t Index = 0, Offset = 0; for (const auto &Init : Initializer.FieldInitializers) { const auto &Field = Structure.Fields[Index++]; @@ -4228,10 +4511,12 @@ bool MasmParser::addStructField(StringRef Name, const StructInfo &Structure) { Field.LengthOf = StructInfo.Initializers.size(); Field.SizeOf = Field.Type * Field.LengthOf; - if (OwningStruct.IsUnion) - OwningStruct.Size = std::max(OwningStruct.Size, Field.SizeOf); - else - OwningStruct.Size += Field.SizeOf; + + const unsigned FieldEnd = Field.Offset + Field.SizeOf; + if (!OwningStruct.IsUnion) { + OwningStruct.NextOffset = FieldEnd; + } + OwningStruct.Size = std::max(OwningStruct.Size, FieldEnd); return false; } @@ -4306,7 +4591,7 @@ bool MasmParser::parseDirectiveStruct(StringRef Directive, QualifierLoc = getTok().getLoc(); if (parseIdentifier(Qualifier)) return addErrorSuffix(" in '" + Twine(Directive) + "' directive"); - if (!Qualifier.equals_lower("nonunique")) + if (!Qualifier.equals_insensitive("nonunique")) return Error(QualifierLoc, "Unrecognized qualifier for '" + Twine(Directive) + "' directive; expected none or NONUNIQUE"); @@ -4350,7 +4635,7 @@ bool MasmParser::parseDirectiveEnds(StringRef Name, SMLoc NameLoc) { return Error(NameLoc, "ENDS directive without matching STRUC/STRUCT/UNION"); if (StructInProgress.size() > 1) return Error(NameLoc, "unexpected name in nested ENDS directive"); - if (StructInProgress.back().Name.compare_lower(Name)) + if (StructInProgress.back().Name.compare_insensitive(Name)) return Error(NameLoc, "mismatched name in ENDS directive; expected '" + StructInProgress.back().Name + "'"); StructInfo Structure = StructInProgress.pop_back_val(); @@ -4381,6 +4666,8 @@ bool MasmParser::parseDirectiveNestedEnds() { StructInfo &ParentStruct = StructInProgress.back(); if (Structure.Name.empty()) { + // Anonymous substructures' fields are addressed as if they belong to the + // parent structure - so we transfer them to the parent here. const size_t OldFields = ParentStruct.Fields.size(); ParentStruct.Fields.insert( ParentStruct.Fields.end(), @@ -4390,17 +4677,28 @@ bool MasmParser::parseDirectiveNestedEnds() { ParentStruct.FieldsByName[FieldByName.getKey()] = FieldByName.getValue() + OldFields; } - if (!ParentStruct.IsUnion) { + + unsigned FirstFieldOffset = 0; + if (!Structure.Fields.empty() && !ParentStruct.IsUnion) { + FirstFieldOffset = llvm::alignTo( + ParentStruct.NextOffset, + std::min(ParentStruct.Alignment, Structure.AlignmentSize)); + } + + if (ParentStruct.IsUnion) { + ParentStruct.Size = std::max(ParentStruct.Size, Structure.Size); + } else { for (auto FieldIter = ParentStruct.Fields.begin() + OldFields; FieldIter != ParentStruct.Fields.end(); ++FieldIter) { - FieldIter->Offset += ParentStruct.Size; + FieldIter->Offset += FirstFieldOffset; } - } - if (ParentStruct.IsUnion) - ParentStruct.Size = std::max(ParentStruct.Size, Structure.Size); - else - ParentStruct.Size += Structure.Size; + const unsigned StructureEnd = FirstFieldOffset + Structure.Size; + if (!ParentStruct.IsUnion) { + ParentStruct.NextOffset = StructureEnd; + } + ParentStruct.Size = std::max(ParentStruct.Size, StructureEnd); + } } else { FieldInfo &Field = ParentStruct.addField(Structure.Name, FT_STRUCT, Structure.AlignmentSize); @@ -4409,10 +4707,11 @@ bool MasmParser::parseDirectiveNestedEnds() { Field.LengthOf = 1; Field.SizeOf = Structure.Size; - if (ParentStruct.IsUnion) - ParentStruct.Size = std::max(ParentStruct.Size, Field.SizeOf); - else - ParentStruct.Size += Field.SizeOf; + const unsigned StructureEnd = Field.Offset + Field.SizeOf; + if (!ParentStruct.IsUnion) { + ParentStruct.NextOffset = StructureEnd; + } + ParentStruct.Size = std::max(ParentStruct.Size, StructureEnd); StructInfo.Structure = Structure; StructInfo.Initializers.emplace_back(); @@ -4426,22 +4725,66 @@ bool MasmParser::parseDirectiveNestedEnds() { } /// parseDirectiveOrg -/// ::= .org expression [ , expression ] +/// ::= org expression bool MasmParser::parseDirectiveOrg() { const MCExpr *Offset; SMLoc OffsetLoc = Lexer.getLoc(); if (checkForValidSection() || parseExpression(Offset)) return true; - - // Parse optional fill expression. - int64_t FillExpr = 0; - if (parseOptionalToken(AsmToken::Comma)) - if (parseAbsoluteExpression(FillExpr)) - return addErrorSuffix(" in '.org' directive"); if (parseToken(AsmToken::EndOfStatement)) - return addErrorSuffix(" in '.org' directive"); + return addErrorSuffix(" in 'org' directive"); + + if (StructInProgress.empty()) { + // Not in a struct; change the offset for the next instruction or data + if (checkForValidSection()) + return addErrorSuffix(" in 'org' directive"); + + getStreamer().emitValueToOffset(Offset, 0, OffsetLoc); + } else { + // Offset the next field of this struct + StructInfo &Structure = StructInProgress.back(); + int64_t OffsetRes; + if (!Offset->evaluateAsAbsolute(OffsetRes, getStreamer().getAssemblerPtr())) + return Error(OffsetLoc, + "expected absolute expression in 'org' directive"); + if (OffsetRes < 0) + return Error( + OffsetLoc, + "expected non-negative value in struct's 'org' directive; was " + + std::to_string(OffsetRes)); + Structure.NextOffset = static_cast<unsigned>(OffsetRes); + + // ORG-affected structures cannot be initialized + Structure.Initializable = false; + } + + return false; +} + +bool MasmParser::emitAlignTo(int64_t Alignment) { + if (StructInProgress.empty()) { + // Not in a struct; align the next instruction or data + if (checkForValidSection()) + return true; + + // Check whether we should use optimal code alignment for this align + // directive. + const MCSection *Section = getStreamer().getCurrentSectionOnly(); + assert(Section && "must have section to emit alignment"); + if (Section->UseCodeAlign()) { + getStreamer().emitCodeAlignment(Alignment, /*MaxBytesToEmit=*/0); + } else { + // FIXME: Target specific behavior about how the "extra" bytes are filled. + getStreamer().emitValueToAlignment(Alignment, /*Value=*/0, + /*ValueSize=*/1, + /*MaxBytesToEmit=*/0); + } + } else { + // Align the next field of this struct + StructInfo &Structure = StructInProgress.back(); + Structure.NextOffset = llvm::alignTo(Structure.NextOffset, Alignment); + } - getStreamer().emitValueToOffset(Offset, FillExpr, OffsetLoc); return false; } @@ -4451,42 +4794,42 @@ bool MasmParser::parseDirectiveAlign() { SMLoc AlignmentLoc = getLexer().getLoc(); int64_t Alignment; - if (checkForValidSection()) - return addErrorSuffix(" in align directive"); // Ignore empty 'align' directives. if (getTok().is(AsmToken::EndOfStatement)) { - Warning(AlignmentLoc, "align directive with no operand is ignored"); - return parseToken(AsmToken::EndOfStatement); + return Warning(AlignmentLoc, + "align directive with no operand is ignored") && + parseToken(AsmToken::EndOfStatement); } if (parseAbsoluteExpression(Alignment) || parseToken(AsmToken::EndOfStatement)) return addErrorSuffix(" in align directive"); - // Always emit an alignment here even if we thrown an error. + // Always emit an alignment here even if we throw an error. bool ReturnVal = false; - // Reject alignments that aren't either a power of two or zero, for gas + // Reject alignments that aren't either a power of two or zero, for ML.exe // compatibility. Alignment of zero is silently rounded up to one. if (Alignment == 0) Alignment = 1; if (!isPowerOf2_64(Alignment)) - ReturnVal |= Error(AlignmentLoc, "alignment must be a power of 2"); - - // Check whether we should use optimal code alignment for this align - // directive. - const MCSection *Section = getStreamer().getCurrentSectionOnly(); - assert(Section && "must have section to emit alignment"); - if (Section->UseCodeAlign()) { - getStreamer().emitCodeAlignment(Alignment, /*MaxBytesToEmit=*/0); - } else { - // FIXME: Target specific behavior about how the "extra" bytes are filled. - getStreamer().emitValueToAlignment(Alignment, /*Value=*/0, /*ValueSize=*/1, - /*MaxBytesToEmit=*/0); - } + ReturnVal |= Error(AlignmentLoc, "alignment must be a power of 2; was " + + std::to_string(Alignment)); + + if (emitAlignTo(Alignment)) + ReturnVal |= addErrorSuffix(" in align directive"); return ReturnVal; } +/// parseDirectiveEven +/// ::= even +bool MasmParser::parseDirectiveEven() { + if (parseToken(AsmToken::EndOfStatement) || emitAlignTo(2)) + return addErrorSuffix(" in even directive"); + + return false; +} + /// parseDirectiveFile /// ::= .file filename /// ::= .file number [directory] filename [md5 checksum] [source source-text] @@ -5495,7 +5838,7 @@ bool MasmParser::parseDirectiveMacro(StringRef Name, SMLoc NameLoc) { // Emit an error if two (or more) named parameters share the same name. for (const MCAsmMacroParameter& CurrParam : Parameters) - if (CurrParam.Name.equals_lower(Parameter.Name)) + if (CurrParam.Name.equals_insensitive(Parameter.Name)) return TokError("macro '" + Name + "' has multiple parameters" " named '" + Parameter.Name + "'"); @@ -5520,9 +5863,9 @@ bool MasmParser::parseDirectiveMacro(StringRef Name, SMLoc NameLoc) { Parameter.Name + "' in macro '" + Name + "'"); - if (Qualifier.equals_lower("req")) + if (Qualifier.equals_insensitive("req")) Parameter.Required = true; - else if (Qualifier.equals_lower("vararg")) + else if (Qualifier.equals_insensitive("vararg")) Parameter.Vararg = true; else return Error(QualLoc, @@ -5542,7 +5885,7 @@ bool MasmParser::parseDirectiveMacro(StringRef Name, SMLoc NameLoc) { std::vector<std::string> Locals; if (getTok().is(AsmToken::Identifier) && - getTok().getIdentifier().equals_lower("local")) { + getTok().getIdentifier().equals_insensitive("local")) { Lex(); // Eat the LOCAL directive. StringRef ID; @@ -5576,7 +5919,7 @@ bool MasmParser::parseDirectiveMacro(StringRef Name, SMLoc NameLoc) { // Otherwise, check whether we have reached the 'endm'... and determine if // this is a macro function. if (getLexer().is(AsmToken::Identifier)) { - if (getTok().getIdentifier().equals_lower("endm")) { + if (getTok().getIdentifier().equals_insensitive("endm")) { if (MacroDepth == 0) { // Outermost macro. EndToken = getTok(); Lexer.Lex(); @@ -5588,9 +5931,8 @@ bool MasmParser::parseDirectiveMacro(StringRef Name, SMLoc NameLoc) { // Otherwise we just found the end of an inner macro. --MacroDepth; } - } else if (getTok().getIdentifier().equals_lower("exitm")) { - if (MacroDepth == 0 && - getLexer().peekTok().isNot(AsmToken::EndOfStatement)) { + } else if (getTok().getIdentifier().equals_insensitive("exitm")) { + if (MacroDepth == 0 && peekTok().isNot(AsmToken::EndOfStatement)) { IsMacroFunction = true; } } else if (isMacroLikeDirective()) { @@ -5615,7 +5957,7 @@ bool MasmParser::parseDirectiveMacro(StringRef Name, SMLoc NameLoc) { IsMacroFunction); DEBUG_WITH_TYPE("asm-macros", dbgs() << "Defining new macro:\n"; Macro.dump()); - getContext().defineMacro(Name, std::move(Macro)); + getContext().defineMacro(Name.lower(), std::move(Macro)); return false; } @@ -5815,9 +6157,9 @@ bool MasmParser::parseDirectiveInclude() { std::string Filename; SMLoc IncludeLoc = getTok().getLoc(); - if (!parseAngleBracketString(Filename)) + if (parseAngleBracketString(Filename)) Filename = parseStringTo(AsmToken::EndOfStatement); - if (check(!Filename.empty(), "missing filename in 'include' directive") || + if (check(Filename.empty(), "missing filename in 'include' directive") || check(getTok().isNot(AsmToken::EndOfStatement), "unexpected token in 'include' directive") || // Attempt to switch the lexer to the included file before consuming the @@ -5914,7 +6256,7 @@ bool MasmParser::parseDirectiveIfidn(SMLoc DirectiveLoc, bool ExpectEqual, TheCondState.TheCond = AsmCond::IfCond; if (CaseInsensitive) TheCondState.CondMet = - ExpectEqual == (StringRef(String1).equals_lower(String2)); + ExpectEqual == (StringRef(String1).equals_insensitive(String2)); else TheCondState.CondMet = ExpectEqual == (String1 == String2); TheCondState.Ignore = !TheCondState.CondMet; @@ -5943,10 +6285,12 @@ bool MasmParser::parseDirectiveIfdef(SMLoc DirectiveLoc, bool expect_defined) { parseToken(AsmToken::EndOfStatement, "unexpected token in 'ifdef'")) return true; - if (Variables.find(Name) != Variables.end()) { + if (BuiltinSymbolMap.find(Name.lower()) != BuiltinSymbolMap.end()) { + is_defined = true; + } else if (Variables.find(Name.lower()) != Variables.end()) { is_defined = true; } else { - MCSymbol *Sym = getContext().lookupSymbol(Name); + MCSymbol *Sym = getContext().lookupSymbol(Name.lower()); is_defined = (Sym && !Sym->isUndefined(false)); } } @@ -6065,7 +6409,9 @@ bool MasmParser::parseDirectiveElseIfdef(SMLoc DirectiveLoc, "unexpected token in 'elseifdef'")) return true; - if (Variables.find(Name) != Variables.end()) { + if (BuiltinSymbolMap.find(Name.lower()) != BuiltinSymbolMap.end()) { + is_defined = true; + } else if (Variables.find(Name.lower()) != Variables.end()) { is_defined = true; } else { MCSymbol *Sym = getContext().lookupSymbol(Name); @@ -6124,7 +6470,7 @@ bool MasmParser::parseDirectiveElseIfidn(SMLoc DirectiveLoc, bool ExpectEqual, if (CaseInsensitive) TheCondState.CondMet = - ExpectEqual == (StringRef(String1).equals_lower(String2)); + ExpectEqual == (StringRef(String1).equals_insensitive(String2)); else TheCondState.CondMet = ExpectEqual == (String1 == String2); TheCondState.Ignore = !TheCondState.CondMet; @@ -6235,7 +6581,9 @@ bool MasmParser::parseDirectiveErrorIfdef(SMLoc DirectiveLoc, if (check(parseIdentifier(Name), "expected identifier after '.errdef'")) return true; - if (Variables.find(Name) != Variables.end()) { + if (BuiltinSymbolMap.find(Name.lower()) != BuiltinSymbolMap.end()) { + IsDefined = true; + } else if (Variables.find(Name.lower()) != Variables.end()) { IsDefined = true; } else { MCSymbol *Sym = getContext().lookupSymbol(Name); @@ -6304,13 +6652,13 @@ bool MasmParser::parseDirectiveErrorIfidn(SMLoc DirectiveLoc, bool ExpectEqual, if (CaseInsensitive) TheCondState.CondMet = - ExpectEqual == (StringRef(String1).equals_lower(String2)); + ExpectEqual == (StringRef(String1).equals_insensitive(String2)); else TheCondState.CondMet = ExpectEqual == (String1 == String2); TheCondState.Ignore = !TheCondState.CondMet; if ((CaseInsensitive && - ExpectEqual == StringRef(String1).equals_lower(String2)) || + ExpectEqual == StringRef(String1).equals_insensitive(String2)) || (ExpectEqual == (String1 == String2))) return Error(DirectiveLoc, Message); return false; @@ -6381,7 +6729,8 @@ void MasmParser::initializeDirectiveKindMap() { DirectiveKindMap["real8"] = DK_REAL8; DirectiveKindMap["real10"] = DK_REAL10; DirectiveKindMap["align"] = DK_ALIGN; - // DirectiveKindMap[".org"] = DK_ORG; + DirectiveKindMap["even"] = DK_EVEN; + DirectiveKindMap["org"] = DK_ORG; DirectiveKindMap["extern"] = DK_EXTERN; DirectiveKindMap["public"] = DK_PUBLIC; // DirectiveKindMap[".comm"] = DK_COMM; @@ -6494,8 +6843,8 @@ bool MasmParser::isMacroLikeDirective() { if (IsMacroLike) return true; } - if (getLexer().peekTok().is(AsmToken::Identifier) && - getLexer().peekTok().getIdentifier().equals_lower("macro")) + if (peekTok().is(AsmToken::Identifier) && + peekTok().getIdentifier().equals_insensitive("macro")) return true; return false; @@ -6517,7 +6866,7 @@ MCAsmMacro *MasmParser::parseMacroLikeBody(SMLoc DirectiveLoc) { // Otherwise, check whether we have reached the endm. if (Lexer.is(AsmToken::Identifier) && - getTok().getIdentifier().equals_lower("endm")) { + getTok().getIdentifier().equals_insensitive("endm")) { if (NestLevel == 0) { EndToken = getTok(); Lex(); @@ -6549,16 +6898,36 @@ bool MasmParser::expandStatement(SMLoc Loc) { MCAsmMacroParameters Parameters; MCAsmMacroArguments Arguments; + + StringMap<std::string> BuiltinValues; + for (const auto &S : BuiltinSymbolMap) { + const BuiltinSymbol &Sym = S.getValue(); + if (llvm::Optional<std::string> Text = evaluateBuiltinTextMacro(Sym, Loc)) { + BuiltinValues[S.getKey().lower()] = std::move(*Text); + } + } + for (const auto &B : BuiltinValues) { + MCAsmMacroParameter P; + MCAsmMacroArgument A; + P.Name = B.getKey(); + P.Required = true; + A.push_back(AsmToken(AsmToken::String, B.getValue())); + + Parameters.push_back(std::move(P)); + Arguments.push_back(std::move(A)); + } + for (const auto &V : Variables) { const Variable &Var = V.getValue(); if (Var.IsText) { - Parameters.emplace_back(); - Arguments.emplace_back(); - MCAsmMacroParameter &P = Parameters.back(); - MCAsmMacroArgument &A = Arguments.back(); + MCAsmMacroParameter P; + MCAsmMacroArgument A; P.Name = Var.Name; P.Required = true; A.push_back(AsmToken(AsmToken::String, Var.TextValue)); + + Parameters.push_back(std::move(P)); + Arguments.push_back(std::move(A)); } } MacroLikeBodies.emplace_back(StringRef(), Body, Parameters); @@ -6707,7 +7076,7 @@ bool MasmParser::parseDirectiveFor(SMLoc DirectiveLoc, StringRef Dir) { Parameter.Name + "' in '" + Dir + "' directive"); - if (Qualifier.equals_lower("req")) + if (Qualifier.equals_insensitive("req")) Parameter.Required = true; else return Error(QualLoc, @@ -6866,14 +7235,7 @@ bool MasmParser::parseDirectiveRadix(SMLoc DirectiveLoc) { /// parseDirectiveEcho /// ::= "echo" message -bool MasmParser::parseDirectiveEcho() { - // We're called before the directive is parsed, to avoid triggering lexical - // substitutions in the message. Assert that the next token is the directive, - // then eat it without using the Parser's Lex method. - assert(getTok().is(AsmToken::Identifier) && - getTok().getString().equals_lower("echo")); - Lexer.Lex(); - +bool MasmParser::parseDirectiveEcho(SMLoc DirectiveLoc) { std::string Message = parseStringTo(AsmToken::EndOfStatement); llvm::outs() << Message; if (!StringRef(Message).endswith("\n")) @@ -6908,10 +7270,14 @@ bool MasmParser::defineMacro(StringRef Name, StringRef Value) { Variable &Var = Variables[Name.lower()]; if (Var.Name.empty()) { Var.Name = Name; - } else if (!Var.Redefinable) { - return TokError("invalid variable redefinition"); + } else if (Var.Redefinable == Variable::NOT_REDEFINABLE) { + return Error(SMLoc(), "invalid variable redefinition"); + } else if (Var.Redefinable == Variable::WARN_ON_REDEFINITION && + Warning(SMLoc(), "redefining '" + Name + + "', already defined on the command line")) { + return true; } - Var.Redefinable = true; + Var.Redefinable = Variable::WARN_ON_REDEFINITION; Var.IsText = true; Var.TextValue = Value.str(); return false; @@ -7021,8 +7387,8 @@ bool MasmParser::lookUpType(StringRef Name, AsmTypeInfo &Info) const { } bool MasmParser::parseMSInlineAsm( - void *AsmLoc, std::string &AsmString, unsigned &NumOutputs, - unsigned &NumInputs, SmallVectorImpl<std::pair<void *, bool>> &OpDecls, + std::string &AsmString, unsigned &NumOutputs, unsigned &NumInputs, + SmallVectorImpl<std::pair<void *, bool>> &OpDecls, SmallVectorImpl<std::string> &Constraints, SmallVectorImpl<std::string> &Clobbers, const MCInstrInfo *MII, const MCInstPrinter *IP, MCAsmParserSemaCallback &SI) { @@ -7284,9 +7650,94 @@ bool MasmParser::parseMSInlineAsm( return false; } +void MasmParser::initializeBuiltinSymbolMap() { + // Numeric built-ins (supported in all versions) + BuiltinSymbolMap["@version"] = BI_VERSION; + BuiltinSymbolMap["@line"] = BI_LINE; + + // Text built-ins (supported in all versions) + BuiltinSymbolMap["@date"] = BI_DATE; + BuiltinSymbolMap["@time"] = BI_TIME; + BuiltinSymbolMap["@filecur"] = BI_FILECUR; + BuiltinSymbolMap["@filename"] = BI_FILENAME; + BuiltinSymbolMap["@curseg"] = BI_CURSEG; + + // Some built-ins exist only for MASM32 (32-bit x86) + if (getContext().getSubtargetInfo()->getTargetTriple().getArch() == + Triple::x86) { + // Numeric built-ins + // BuiltinSymbolMap["@cpu"] = BI_CPU; + // BuiltinSymbolMap["@interface"] = BI_INTERFACE; + // BuiltinSymbolMap["@wordsize"] = BI_WORDSIZE; + // BuiltinSymbolMap["@codesize"] = BI_CODESIZE; + // BuiltinSymbolMap["@datasize"] = BI_DATASIZE; + // BuiltinSymbolMap["@model"] = BI_MODEL; + + // Text built-ins + // BuiltinSymbolMap["@code"] = BI_CODE; + // BuiltinSymbolMap["@data"] = BI_DATA; + // BuiltinSymbolMap["@fardata?"] = BI_FARDATA; + // BuiltinSymbolMap["@stack"] = BI_STACK; + } +} + +const MCExpr *MasmParser::evaluateBuiltinValue(BuiltinSymbol Symbol, + SMLoc StartLoc) { + switch (Symbol) { + default: + return nullptr; + case BI_VERSION: + // Match a recent version of ML.EXE. + return MCConstantExpr::create(1427, getContext()); + case BI_LINE: { + int64_t Line; + if (ActiveMacros.empty()) + Line = SrcMgr.FindLineNumber(StartLoc, CurBuffer); + else + Line = SrcMgr.FindLineNumber(ActiveMacros.front()->InstantiationLoc, + ActiveMacros.front()->ExitBuffer); + return MCConstantExpr::create(Line, getContext()); + } + } + llvm_unreachable("unhandled built-in symbol"); +} + +llvm::Optional<std::string> +MasmParser::evaluateBuiltinTextMacro(BuiltinSymbol Symbol, SMLoc StartLoc) { + switch (Symbol) { + default: + return {}; + case BI_DATE: { + // Current local date, formatted MM/DD/YY + char TmpBuffer[sizeof("mm/dd/yy")]; + const size_t Len = strftime(TmpBuffer, sizeof(TmpBuffer), "%D", &TM); + return std::string(TmpBuffer, Len); + } + case BI_TIME: { + // Current local time, formatted HH:MM:SS (24-hour clock) + char TmpBuffer[sizeof("hh:mm:ss")]; + const size_t Len = strftime(TmpBuffer, sizeof(TmpBuffer), "%T", &TM); + return std::string(TmpBuffer, Len); + } + case BI_FILECUR: + return SrcMgr + .getMemoryBuffer( + ActiveMacros.empty() ? CurBuffer : ActiveMacros.front()->ExitBuffer) + ->getBufferIdentifier() + .str(); + case BI_FILENAME: + return sys::path::stem(SrcMgr.getMemoryBuffer(SrcMgr.getMainFileID()) + ->getBufferIdentifier()) + .upper(); + case BI_CURSEG: + return getStreamer().getCurrentSectionOnly()->getName().str(); + } + llvm_unreachable("unhandled built-in symbol"); +} + /// Create an MCAsmParser instance. MCAsmParser *llvm::createMCMasmParser(SourceMgr &SM, MCContext &C, MCStreamer &Out, const MCAsmInfo &MAI, - unsigned CB) { - return new MasmParser(SM, C, Out, MAI, CB); + struct tm TM, unsigned CB) { + return new MasmParser(SM, C, Out, MAI, TM, CB); } |