2 files changed, 58 insertions, 39 deletions
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.cpp b/lib/Target/SystemZ/SystemZInstrInfo.cpp
index 16207b3..12bcd09 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.cpp
+++ b/lib/Target/SystemZ/SystemZInstrInfo.cpp
@@ -363,18 +363,11 @@ SystemZInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
   // not valid in cases where the two memories partially overlap; however,
   // that is not a problem here, because we know that one of the memories
   // is a full frame index.
-  //
-  // For now we punt if the load or store is also to a frame index.
-  // In that case we might end up eliminating both of them to out-of-range
-  // offsets, which might then force the register scavenger to spill two
-  // other registers.  The backend can only handle one such scavenger spill
-  // at a time.
   if (OpNum == 0 && MI->hasOneMemOperand()) {
     MachineMemOperand *MMO = *MI->memoperands_begin();
     if (MMO->getSize() == Size && !MMO->isVolatile()) {
       // Handle conversion of loads.
-      if (isSimpleBD12Move(MI, SystemZII::SimpleBDXLoad) &&
-          !MI->getOperand(1).isFI()) {
+      if (isSimpleBD12Move(MI, SystemZII::SimpleBDXLoad)) {
         uint64_t Offset = 0;
         MachineMemOperand *FrameMMO = getFrameMMO(MF, FrameIndex, Offset,
                                                   MachineMemOperand::MOStore);
@@ -384,8 +377,7 @@ SystemZInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
           .addMemOperand(FrameMMO).addMemOperand(MMO);
       }
       // Handle conversion of stores.
-      if (isSimpleBD12Move(MI, SystemZII::SimpleBDXStore) &&
-          !MI->getOperand(1).isFI()) {
+      if (isSimpleBD12Move(MI, SystemZII::SimpleBDXStore)) {
         uint64_t Offset = 0;
         MachineMemOperand *FrameMMO = getFrameMMO(MF, FrameIndex, Offset,
                                                   MachineMemOperand::MOLoad);
diff --git a/test/CodeGen/SystemZ/Large/spill-02.py b/test/CodeGen/SystemZ/Large/spill-02.py
index 0eba3ed..0aa43d1 100644
--- a/test/CodeGen/SystemZ/Large/spill-02.py
+++ b/test/CodeGen/SystemZ/Large/spill-02.py
@@ -1,46 +1,73 @@
 # Test cases where we spill from one frame index to another, both of which
-# would be out of range of MVC.  At present we don't use MVC in this case.
+# are out of range of MVC, and both of which need emergency spill slots.
 # RUN: python %s | llc -mtriple=s390x-linux-gnu | FileCheck %s
 
-# There are 8 usable call-saved GPRs.  The first 160 bytes of the frame
-# are needed for the ABI call frame, and a further 8 bytes are needed
-# for the emergency spill slot.  That means we will have at least one
-# out-of-range slot if:
-#
-#    count == (4096 - 168) / 8 + 8 + 1 == 500
-#
-# Add in some extra just to be sure.
-#
 # CHECK: f1:
-# CHECK-NOT: mvc
+# CHECK: %fallthru
+# CHECK-DAG: stg [[REG1:%r[0-9]+]], 8168(%r15)
+# CHECK-DAG: stg [[REG2:%r[0-9]+]], 8176(%r15)
+# CHECK-DAG: lay [[REG3:%r[0-9]+]], 8192(%r15)
+# CHECK-DAG: lay [[REG4:%r[0-9]+]], 4096(%r15)
+# CHECK: mvc 0(8,[[REG3]]), 4088([[REG4]])
+# CHECK-DAG: lg [[REG1]], 8168(%r15)
+# CHECK-DAG: lg [[REG2]], 8176(%r15)
+# CHECK: %skip
 # CHECK: br %r14
-count = 510
 
-print 'declare void @foo(i64 *%base0, i64 *%base1)'
-print ''
-print 'define void @f1() {'
+# Arrange for %foo's spill slot to be at 8184(%r15) and the alloca area to be at
+# 8192(%r15).  The two emergency spill slots live below that, so this requires
+# the first 8168 bytes to be used for the call.  160 of these bytes are
+# allocated for the ABI frame.  There are also 5 argument registers, one of
+# which is used as a base pointer.
+args = (8168 - 160) / 8 + (5 - 1)
 
-for i in range(2):
-    print '  %%alloc%d = alloca [%d x i64]' % (i, count / 2)
-    print ('  %%base%d = getelementptr [%d x i64] * %%alloc%d, i64 0, i64 0'
-           % (i, count / 2, i))
+print 'declare i64 *@foo(i64 *%s)' % (', i64' * args)
+print 'declare void @bar(i64 *)'
+print ''
+print 'define i64 @f1(i64 %foo) {'
+print 'entry:'
 
-print '  call void @foo(i64 *%base0, i64 *%base1)'
+# Make the allocation big, so that it goes at the top of the frame.
+print '  %array = alloca [1000 x i64]'
+print '  %area = getelementptr [1000 x i64] *%array, i64 0, i64 0'
+print '  %%base = call i64 *@foo(i64 *%%area%s)' % (', i64 0' * args)
 print ''
 
+# Make sure all GPRs are used.  One is needed for the stack pointer and
+# another for %base, so we need 14 live values.
+count = 14
 for i in range(count):
-    print '  %%ptr%d = getelementptr i64 *%%base%d, i64 %d' % (i, i % 2, i / 2)
-    print '  %%val%d = load i64 *%%ptr%d' % (i, i)
+    print '  %%ptr%d = getelementptr i64 *%%base, i64 %d' % (i, i / 2)
+    print '  %%val%d = load volatile i64 *%%ptr%d' % (i, i)
     print ''
 
-print '  call void @foo(i64 *%base0, i64 *%base1)'
-print ''
-
-for i in range (count):
-    print '  store i64 %%val%d, i64 *%%ptr%d' % (i, i)
+# Encourage the register allocator to give preference to these %vals
+# by using them several times.
+for j in range(4):
+    for i in range(count):
+        print '  store volatile i64 %%val%d, i64 *%%ptr%d' % (i, i)
+    print ''
 
+# Copy the incoming argument, which we expect to be spilled, to the frame
+# index for the alloca area.  Also throw in a volatile store, so that this
+# block cannot be reordered with the surrounding code.
+print '  %cond = icmp eq i64 %val0, %val1'
+print '  br i1 %cond, label %skip, label %fallthru'
 print ''
-print '  call void @foo(i64 *%base0, i64 *%base1)'
+print 'fallthru:'
+print '  store i64 %foo, i64 *%area'
+print '  store volatile i64 %val0, i64 *%ptr0'
+print '  br label %skip'
 print ''
-print '  ret void'
+print 'skip:'
+
+# Use each %val a few more times to emphasise the point, and to make sure
+# that they are live across the store of %foo.
+for j in range(4):
+    for i in range(count):
+        print '  store volatile i64 %%val%d, i64 *%%ptr%d' % (i, i)
+    print ''
+
+print '  call void @bar(i64 *%area)'
+print '  ret i64 0'
 print '}'