aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/llvm/MC/MCAsmInfo.h7
-rw-r--r--lib/MC/MCAsmInfo.cpp1
-rw-r--r--lib/MC/MCParser/AsmLexer.cpp4
-rw-r--r--lib/MC/MCParser/AsmParser.cpp29
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp2
-rw-r--r--test/MC/COFF/tricky-names.ll38
-rw-r--r--test/MC/ELF/bad-relocation.s7
-rw-r--r--test/MC/ELF/symbol-names.s12
8 files changed, 87 insertions, 13 deletions
diff --git a/include/llvm/MC/MCAsmInfo.h b/include/llvm/MC/MCAsmInfo.h
index c9cecc1..bcf6fe8 100644
--- a/include/llvm/MC/MCAsmInfo.h
+++ b/include/llvm/MC/MCAsmInfo.h
@@ -156,6 +156,10 @@ namespace llvm {
/// symbol names. This defaults to true.
bool AllowPeriodsInName;
+ /// \brief This is true if the assembler allows @ characters in symbol
+ /// names. Defaults to false.
+ bool AllowAtInName;
+
/// AllowUTF8 - This is true if the assembler accepts UTF-8 input.
// FIXME: Make this a more general encoding setting?
bool AllowUTF8;
@@ -485,6 +489,9 @@ namespace llvm {
bool doesAllowPeriodsInName() const {
return AllowPeriodsInName;
}
+ bool doesAllowAtInName() const {
+ return AllowAtInName;
+ }
bool doesAllowUTF8() const {
return AllowUTF8;
}
diff --git a/lib/MC/MCAsmInfo.cpp b/lib/MC/MCAsmInfo.cpp
index 6112ad1..0eea75e 100644
--- a/lib/MC/MCAsmInfo.cpp
+++ b/lib/MC/MCAsmInfo.cpp
@@ -53,6 +53,7 @@ MCAsmInfo::MCAsmInfo() {
AllowQuotesInName = false;
AllowNameToStartWithDigit = false;
AllowPeriodsInName = true;
+ AllowAtInName = false;
AllowUTF8 = true;
UseDataRegionDirectives = false;
ZeroDirective = "\t.zero\t";
diff --git a/lib/MC/MCParser/AsmLexer.cpp b/lib/MC/MCParser/AsmLexer.cpp
index 1ce0cc2..b49dd01 100644
--- a/lib/MC/MCParser/AsmLexer.cpp
+++ b/lib/MC/MCParser/AsmLexer.cpp
@@ -138,9 +138,9 @@ AsmToken AsmLexer::LexHexFloatLiteral(bool NoIntDigits) {
return AsmToken(AsmToken::Real, StringRef(TokStart, CurPtr - TokStart));
}
-/// LexIdentifier: [a-zA-Z_.][a-zA-Z0-9_$.@]*
+/// LexIdentifier: [a-zA-Z_.][a-zA-Z0-9_$.@?]*
static bool IsIdentifierChar(char c) {
- return isalnum(c) || c == '_' || c == '$' || c == '.' || c == '@';
+ return isalnum(c) || c == '_' || c == '$' || c == '.' || c == '@' || c == '?';
}
AsmToken AsmLexer::LexIdentifier() {
// Check for floating point literals.
diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp
index 9a36256..1fb8480 100644
--- a/lib/MC/MCParser/AsmParser.cpp
+++ b/lib/MC/MCParser/AsmParser.cpp
@@ -769,6 +769,7 @@ bool AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
Res = MCUnaryExpr::CreateLNot(Res, getContext());
return false;
case AsmToken::Dollar:
+ case AsmToken::At:
case AsmToken::String:
case AsmToken::Identifier: {
StringRef Identifier;
@@ -792,19 +793,25 @@ bool AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
EndLoc = SMLoc::getFromPointer(Identifier.end());
// This is a symbol reference.
+ StringRef SymbolName = Identifier;
+ MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
std::pair<StringRef, StringRef> Split = Identifier.split('@');
- MCSymbol *Sym = getContext().GetOrCreateSymbol(Split.first);
// Lookup the symbol variant if used.
- MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
if (Split.first.size() != Identifier.size()) {
Variant = MCSymbolRefExpr::getVariantKindForName(Split.second);
- if (Variant == MCSymbolRefExpr::VK_Invalid) {
+ if (Variant != MCSymbolRefExpr::VK_Invalid) {
+ SymbolName = Split.first;
+ } else if (MAI.doesAllowAtInName()) {
+ Variant = MCSymbolRefExpr::VK_None;
+ } else {
Variant = MCSymbolRefExpr::VK_None;
return TokError("invalid variant '" + Split.second + "'");
}
}
+ MCSymbol *Sym = getContext().GetOrCreateSymbol(SymbolName);
+
// If this is an absolute variable reference, substitute it now to preserve
// semantics in the face of reassignment.
if (Sym->isVariable() && isa<MCConstantExpr>(Sym->getVariableValue())) {
@@ -2105,25 +2112,25 @@ bool AsmParser::parseAssignment(StringRef Name, bool allow_redef,
/// ::= string
bool AsmParser::parseIdentifier(StringRef &Res) {
// The assembler has relaxed rules for accepting identifiers, in particular we
- // allow things like '.globl $foo', which would normally be separate
- // tokens. At this level, we have already lexed so we cannot (currently)
+ // allow things like '.globl $foo' and '.def @feat.00', which would normally be
+ // separate tokens. At this level, we have already lexed so we cannot (currently)
// handle this as a context dependent token, instead we detect adjacent tokens
// and return the combined identifier.
- if (Lexer.is(AsmToken::Dollar)) {
- SMLoc DollarLoc = getLexer().getLoc();
+ if (Lexer.is(AsmToken::Dollar) || Lexer.is(AsmToken::At)) {
+ SMLoc PrefixLoc = getLexer().getLoc();
- // Consume the dollar sign, and check for a following identifier.
+ // Consume the prefix character, and check for a following identifier.
Lex();
if (Lexer.isNot(AsmToken::Identifier))
return true;
- // We have a '$' followed by an identifier, make sure they are adjacent.
- if (DollarLoc.getPointer() + 1 != getTok().getLoc().getPointer())
+ // We have a '$' or '@' followed by an identifier, make sure they are adjacent.
+ if (PrefixLoc.getPointer() + 1 != getTok().getLoc().getPointer())
return true;
// Construct the joined identifier and consume the token.
Res =
- StringRef(DollarLoc.getPointer(), getTok().getIdentifier().size() + 1);
+ StringRef(PrefixLoc.getPointer(), getTok().getIdentifier().size() + 1);
Lex();
return false;
}
diff --git a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
index d3f5258..3861e1c 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
@@ -135,6 +135,8 @@ X86MCAsmInfoMicrosoft::X86MCAsmInfoMicrosoft(const Triple &Triple) {
AssemblerDialect = AsmWriterFlavor;
TextAlignFillValue = 0x90;
+
+ AllowAtInName = true;
}
void X86MCAsmInfoGNUCOFF::anchor() { }
diff --git a/test/MC/COFF/tricky-names.ll b/test/MC/COFF/tricky-names.ll
new file mode 100644
index 0000000..6e041d3
--- /dev/null
+++ b/test/MC/COFF/tricky-names.ll
@@ -0,0 +1,38 @@
+; Check how tricky symbols are printed in the asm output.
+; RUN: llc -mtriple=i686-pc-win32 %s -o - | FileCheck %s --check-prefix=ASM
+
+; Check that we can roundtrip these names through our assembler.
+; RUN: llc -mtriple=i686-pc-win32 %s -o - | llvm-mc -triple i686-pc-win32 -filetype=obj | llvm-readobj -t | FileCheck %s --check-prefix=READOBJ
+
+
+@"\01??__E_Generic_object@?$_Error_objects@H@std@@YAXXZ" = global i32 0
+@"\01__ZL16ExceptionHandlerP19_EXCEPTION_POINTERS@4" = global i32 0
+@"\01@foo.bar" = global i32 0
+
+define weak i32 @"\01??_B?$num_put@_WV?$back_insert_iterator@V?$basic_string@_WU?$char_traits@_W@std@@V?$allocator@_W@2@@std@@@std@@@std@@51"() section ".text" {
+ %a = load i32* @"\01??__E_Generic_object@?$_Error_objects@H@std@@YAXXZ"
+ %b = load i32* @"\01__ZL16ExceptionHandlerP19_EXCEPTION_POINTERS@4"
+ %c = load i32* @"\01@foo.bar"
+ %x = add i32 %a, %b
+ %y = add i32 %x, %c
+ ret i32 %y
+}
+
+; Check that these symbols are not quoted. They occur in output that gets passed to GAS.
+; ASM: .globl __ZL16ExceptionHandlerP19_EXCEPTION_POINTERS@4
+; ASM-NOT: .globl "__ZL16ExceptionHandlerP19_EXCEPTION_POINTERS@4"
+; ASM: .globl @foo.bar
+; ASM-NOT: .globl "@foo.bar"
+
+; READOBJ: Symbol
+; READOBJ: Name: .text$??_B?$num_put@_WV?$back_insert_iterator@V?$basic_string@_WU?$char_traits@_W@std@@V?$allocator@_W@2@@std@@@std@@@std@@51
+; READOBJ: Section: .text$??_B?$num_put@_WV?$back_insert_iterator@V?$basic_string@_WU?$char_traits@_W@std@@V?$allocator@_W@2@@std@@@std@@@std@@51
+; READOBJ: Symbol
+; READOBJ: Name: ??_B?$num_put@_WV?$back_insert_iterator@V?$basic_string@_WU?$char_traits@_W@std@@V?$allocator@_W@2@@std@@@std@@@std@@51
+; READOBJ: Section: .text$??_B?$num_put@_WV?$back_insert_iterator@V?$basic_string@_WU?$char_traits@_W@std@@V?$allocator@_W@2@@std@@@std@@@std@@51
+; READOBJ: Symbol
+; READOBJ: Name: ??__E_Generic_object@?$_Error_objects@H@std@@YAXXZ
+; READOBJ: Symbol
+; READOBJ: Name: __ZL16ExceptionHandlerP19_EXCEPTION_POINTERS@4
+; READOBJ: Symbol
+; READOBJ: Name: @foo.bar
diff --git a/test/MC/ELF/bad-relocation.s b/test/MC/ELF/bad-relocation.s
new file mode 100644
index 0000000..1a66744
--- /dev/null
+++ b/test/MC/ELF/bad-relocation.s
@@ -0,0 +1,7 @@
+// RUN: not llvm-mc -filetype=obj -triple i386-pc-linux-gnu %s -o /dev/null 2>&1 | FileCheck %s
+
+// CHECK: error: invalid variant 'BADRELOC'
+
+ .text
+foo:
+ leal .Lfoo@BADRELOC(%ebx), %eax
diff --git a/test/MC/ELF/symbol-names.s b/test/MC/ELF/symbol-names.s
new file mode 100644
index 0000000..6459ac9
--- /dev/null
+++ b/test/MC/ELF/symbol-names.s
@@ -0,0 +1,12 @@
+// RUN: llvm-mc -triple i686-pc-linux -filetype=obj %s -o - | llvm-readobj -t | FileCheck %s
+
+// MC allows ?'s in symbol names as an extension.
+
+.text
+.globl foo?bar
+.type foo?bar, @function
+foo?bar:
+ret
+
+// CHECK: Symbol
+// CHECK: Name: foo?bar