diff options
-rw-r--r-- | lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp | 24 | ||||
-rw-r--r-- | test/MC/X86/AlignedBundling/long-nop-pad.s | 27 |
2 files changed, 39 insertions, 12 deletions
diff --git a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp index acc90ec..598ddee 100644 --- a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp +++ b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp @@ -315,18 +315,18 @@ bool X86AsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const { return true; } - // Write an optimal sequence for the first 15 bytes. - const uint64_t OptimalCount = (Count < 16) ? Count : 15; - const uint64_t Prefixes = OptimalCount <= 10 ? 0 : OptimalCount - 10; - for (uint64_t i = 0, e = Prefixes; i != e; i++) - OW->Write8(0x66); - const uint64_t Rest = OptimalCount - Prefixes; - for (uint64_t i = 0, e = Rest; i != e; i++) - OW->Write8(Nops[Rest - 1][i]); - - // Finish with single byte nops. - for (uint64_t i = OptimalCount, e = Count; i != e; ++i) - OW->Write8(0x90); + // 15 is the longest single nop instruction. Emit as many 15-byte nops as + // needed, then emit a nop of the remaining length. + do { + const uint8_t ThisNopLength = (uint8_t) std::min(Count, (uint64_t) 15); + const uint8_t Prefixes = ThisNopLength <= 10 ? 0 : ThisNopLength - 10; + for (uint8_t i = 0; i < Prefixes; i++) + OW->Write8(0x66); + const uint8_t Rest = ThisNopLength - Prefixes; + for (uint8_t i = 0; i < Rest; i++) + OW->Write8(Nops[Rest - 1][i]); + Count -= ThisNopLength; + } while (Count != 0); return true; } diff --git a/test/MC/X86/AlignedBundling/long-nop-pad.s b/test/MC/X86/AlignedBundling/long-nop-pad.s new file mode 100644 index 0000000..ea33e28 --- /dev/null +++ b/test/MC/X86/AlignedBundling/long-nop-pad.s @@ -0,0 +1,27 @@ +# RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - \ +# RUN: | llvm-objdump -disassemble -no-show-raw-insn - | FileCheck %s + +# Test that long nops are generated for padding where possible. + + .text +foo: + .bundle_align_mode 5 + +# This callq instruction is 5 bytes long + .bundle_lock align_to_end + callq bar + .bundle_unlock +# To align this group to a bundle end, we need a 15-byte NOP and a 12-byte NOP. +# CHECK: 0: nop +# CHECK-NEXT: f: nop +# CHECK-NEXT: 1b: callq + +# This push instruction is 1 byte long + .bundle_lock align_to_end + push %rax + .bundle_unlock +# To align this group to a bundle end, we need two 15-byte NOPs, and a 1-byte. +# CHECK: 20: nop +# CHECK-NEXT: 2f: nop +# CHECK-NEXT: 3e: nop +# CHECK-NEXT: 3f: pushq |