diff options
-rw-r--r-- | include/llvm/MC/MCAsmInfo.h | 7 | ||||
-rw-r--r-- | lib/MC/MCAsmInfo.cpp | 1 | ||||
-rw-r--r-- | lib/MC/MCParser/AsmLexer.cpp | 4 | ||||
-rw-r--r-- | lib/MC/MCParser/AsmParser.cpp | 29 | ||||
-rw-r--r-- | lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp | 2 | ||||
-rw-r--r-- | test/MC/COFF/tricky-names.ll | 38 | ||||
-rw-r--r-- | test/MC/ELF/bad-relocation.s | 7 | ||||
-rw-r--r-- | test/MC/ELF/symbol-names.s | 12 |
8 files changed, 87 insertions, 13 deletions
diff --git a/include/llvm/MC/MCAsmInfo.h b/include/llvm/MC/MCAsmInfo.h index c9cecc1..bcf6fe8 100644 --- a/include/llvm/MC/MCAsmInfo.h +++ b/include/llvm/MC/MCAsmInfo.h @@ -156,6 +156,10 @@ namespace llvm { /// symbol names. This defaults to true. bool AllowPeriodsInName; + /// \brief This is true if the assembler allows @ characters in symbol + /// names. Defaults to false. + bool AllowAtInName; + /// AllowUTF8 - This is true if the assembler accepts UTF-8 input. // FIXME: Make this a more general encoding setting? bool AllowUTF8; @@ -485,6 +489,9 @@ namespace llvm { bool doesAllowPeriodsInName() const { return AllowPeriodsInName; } + bool doesAllowAtInName() const { + return AllowAtInName; + } bool doesAllowUTF8() const { return AllowUTF8; } diff --git a/lib/MC/MCAsmInfo.cpp b/lib/MC/MCAsmInfo.cpp index 6112ad1..0eea75e 100644 --- a/lib/MC/MCAsmInfo.cpp +++ b/lib/MC/MCAsmInfo.cpp @@ -53,6 +53,7 @@ MCAsmInfo::MCAsmInfo() { AllowQuotesInName = false; AllowNameToStartWithDigit = false; AllowPeriodsInName = true; + AllowAtInName = false; AllowUTF8 = true; UseDataRegionDirectives = false; ZeroDirective = "\t.zero\t"; diff --git a/lib/MC/MCParser/AsmLexer.cpp b/lib/MC/MCParser/AsmLexer.cpp index 1ce0cc2..b49dd01 100644 --- a/lib/MC/MCParser/AsmLexer.cpp +++ b/lib/MC/MCParser/AsmLexer.cpp @@ -138,9 +138,9 @@ AsmToken AsmLexer::LexHexFloatLiteral(bool NoIntDigits) { return AsmToken(AsmToken::Real, StringRef(TokStart, CurPtr - TokStart)); } -/// LexIdentifier: [a-zA-Z_.][a-zA-Z0-9_$.@]* +/// LexIdentifier: [a-zA-Z_.][a-zA-Z0-9_$.@?]* static bool IsIdentifierChar(char c) { - return isalnum(c) || c == '_' || c == '$' || c == '.' || c == '@'; + return isalnum(c) || c == '_' || c == '$' || c == '.' || c == '@' || c == '?'; } AsmToken AsmLexer::LexIdentifier() { // Check for floating point literals. diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp index 9a36256..1fb8480 100644 --- a/lib/MC/MCParser/AsmParser.cpp +++ b/lib/MC/MCParser/AsmParser.cpp @@ -769,6 +769,7 @@ bool AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) { Res = MCUnaryExpr::CreateLNot(Res, getContext()); return false; case AsmToken::Dollar: + case AsmToken::At: case AsmToken::String: case AsmToken::Identifier: { StringRef Identifier; @@ -792,19 +793,25 @@ bool AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) { EndLoc = SMLoc::getFromPointer(Identifier.end()); // This is a symbol reference. + StringRef SymbolName = Identifier; + MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None; std::pair<StringRef, StringRef> Split = Identifier.split('@'); - MCSymbol *Sym = getContext().GetOrCreateSymbol(Split.first); // Lookup the symbol variant if used. - MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None; if (Split.first.size() != Identifier.size()) { Variant = MCSymbolRefExpr::getVariantKindForName(Split.second); - if (Variant == MCSymbolRefExpr::VK_Invalid) { + if (Variant != MCSymbolRefExpr::VK_Invalid) { + SymbolName = Split.first; + } else if (MAI.doesAllowAtInName()) { + Variant = MCSymbolRefExpr::VK_None; + } else { Variant = MCSymbolRefExpr::VK_None; return TokError("invalid variant '" + Split.second + "'"); } } + MCSymbol *Sym = getContext().GetOrCreateSymbol(SymbolName); + // If this is an absolute variable reference, substitute it now to preserve // semantics in the face of reassignment. if (Sym->isVariable() && isa<MCConstantExpr>(Sym->getVariableValue())) { @@ -2105,25 +2112,25 @@ bool AsmParser::parseAssignment(StringRef Name, bool allow_redef, /// ::= string bool AsmParser::parseIdentifier(StringRef &Res) { // The assembler has relaxed rules for accepting identifiers, in particular we - // allow things like '.globl $foo', which would normally be separate - // tokens. At this level, we have already lexed so we cannot (currently) + // allow things like '.globl $foo' and '.def @feat.00', which would normally be + // separate tokens. At this level, we have already lexed so we cannot (currently) // handle this as a context dependent token, instead we detect adjacent tokens // and return the combined identifier. - if (Lexer.is(AsmToken::Dollar)) { - SMLoc DollarLoc = getLexer().getLoc(); + if (Lexer.is(AsmToken::Dollar) || Lexer.is(AsmToken::At)) { + SMLoc PrefixLoc = getLexer().getLoc(); - // Consume the dollar sign, and check for a following identifier. + // Consume the prefix character, and check for a following identifier. Lex(); if (Lexer.isNot(AsmToken::Identifier)) return true; - // We have a '$' followed by an identifier, make sure they are adjacent. - if (DollarLoc.getPointer() + 1 != getTok().getLoc().getPointer()) + // We have a '$' or '@' followed by an identifier, make sure they are adjacent. + if (PrefixLoc.getPointer() + 1 != getTok().getLoc().getPointer()) return true; // Construct the joined identifier and consume the token. Res = - StringRef(DollarLoc.getPointer(), getTok().getIdentifier().size() + 1); + StringRef(PrefixLoc.getPointer(), getTok().getIdentifier().size() + 1); Lex(); return false; } diff --git a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp index d3f5258..3861e1c 100644 --- a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp @@ -135,6 +135,8 @@ X86MCAsmInfoMicrosoft::X86MCAsmInfoMicrosoft(const Triple &Triple) { AssemblerDialect = AsmWriterFlavor; TextAlignFillValue = 0x90; + + AllowAtInName = true; } void X86MCAsmInfoGNUCOFF::anchor() { } diff --git a/test/MC/COFF/tricky-names.ll b/test/MC/COFF/tricky-names.ll new file mode 100644 index 0000000..6e041d3 --- /dev/null +++ b/test/MC/COFF/tricky-names.ll @@ -0,0 +1,38 @@ +; Check how tricky symbols are printed in the asm output. +; RUN: llc -mtriple=i686-pc-win32 %s -o - | FileCheck %s --check-prefix=ASM + +; Check that we can roundtrip these names through our assembler. +; RUN: llc -mtriple=i686-pc-win32 %s -o - | llvm-mc -triple i686-pc-win32 -filetype=obj | llvm-readobj -t | FileCheck %s --check-prefix=READOBJ + + +@"\01??__E_Generic_object@?$_Error_objects@H@std@@YAXXZ" = global i32 0 +@"\01__ZL16ExceptionHandlerP19_EXCEPTION_POINTERS@4" = global i32 0 +@"\01@foo.bar" = global i32 0 + +define weak i32 @"\01??_B?$num_put@_WV?$back_insert_iterator@V?$basic_string@_WU?$char_traits@_W@std@@V?$allocator@_W@2@@std@@@std@@@std@@51"() section ".text" { + %a = load i32* @"\01??__E_Generic_object@?$_Error_objects@H@std@@YAXXZ" + %b = load i32* @"\01__ZL16ExceptionHandlerP19_EXCEPTION_POINTERS@4" + %c = load i32* @"\01@foo.bar" + %x = add i32 %a, %b + %y = add i32 %x, %c + ret i32 %y +} + +; Check that these symbols are not quoted. They occur in output that gets passed to GAS. +; ASM: .globl __ZL16ExceptionHandlerP19_EXCEPTION_POINTERS@4 +; ASM-NOT: .globl "__ZL16ExceptionHandlerP19_EXCEPTION_POINTERS@4" +; ASM: .globl @foo.bar +; ASM-NOT: .globl "@foo.bar" + +; READOBJ: Symbol +; READOBJ: Name: .text$??_B?$num_put@_WV?$back_insert_iterator@V?$basic_string@_WU?$char_traits@_W@std@@V?$allocator@_W@2@@std@@@std@@@std@@51 +; READOBJ: Section: .text$??_B?$num_put@_WV?$back_insert_iterator@V?$basic_string@_WU?$char_traits@_W@std@@V?$allocator@_W@2@@std@@@std@@@std@@51 +; READOBJ: Symbol +; READOBJ: Name: ??_B?$num_put@_WV?$back_insert_iterator@V?$basic_string@_WU?$char_traits@_W@std@@V?$allocator@_W@2@@std@@@std@@@std@@51 +; READOBJ: Section: .text$??_B?$num_put@_WV?$back_insert_iterator@V?$basic_string@_WU?$char_traits@_W@std@@V?$allocator@_W@2@@std@@@std@@@std@@51 +; READOBJ: Symbol +; READOBJ: Name: ??__E_Generic_object@?$_Error_objects@H@std@@YAXXZ +; READOBJ: Symbol +; READOBJ: Name: __ZL16ExceptionHandlerP19_EXCEPTION_POINTERS@4 +; READOBJ: Symbol +; READOBJ: Name: @foo.bar diff --git a/test/MC/ELF/bad-relocation.s b/test/MC/ELF/bad-relocation.s new file mode 100644 index 0000000..1a66744 --- /dev/null +++ b/test/MC/ELF/bad-relocation.s @@ -0,0 +1,7 @@ +// RUN: not llvm-mc -filetype=obj -triple i386-pc-linux-gnu %s -o /dev/null 2>&1 | FileCheck %s + +// CHECK: error: invalid variant 'BADRELOC' + + .text +foo: + leal .Lfoo@BADRELOC(%ebx), %eax diff --git a/test/MC/ELF/symbol-names.s b/test/MC/ELF/symbol-names.s new file mode 100644 index 0000000..6459ac9 --- /dev/null +++ b/test/MC/ELF/symbol-names.s @@ -0,0 +1,12 @@ +// RUN: llvm-mc -triple i686-pc-linux -filetype=obj %s -o - | llvm-readobj -t | FileCheck %s + +// MC allows ?'s in symbol names as an extension. + +.text +.globl foo?bar +.type foo?bar, @function +foo?bar: +ret + +// CHECK: Symbol +// CHECK: Name: foo?bar |