aboutsummaryrefslogtreecommitdiffstats
path: root/include/llvm/Transforms/IPO/LowerBitSets.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/llvm/Transforms/IPO/LowerBitSets.h')
-rw-r--r--include/llvm/Transforms/IPO/LowerBitSets.h71
1 files changed, 58 insertions, 13 deletions
diff --git a/include/llvm/Transforms/IPO/LowerBitSets.h b/include/llvm/Transforms/IPO/LowerBitSets.h
index 0f60617..55d7d84 100644
--- a/include/llvm/Transforms/IPO/LowerBitSets.h
+++ b/include/llvm/Transforms/IPO/LowerBitSets.h
@@ -30,8 +30,8 @@ class GlobalVariable;
class Value;
struct BitSetInfo {
- // The actual bitset.
- std::vector<uint8_t> Bits;
+ // The indices of the set bits in the bitset.
+ std::set<uint64_t> Bits;
// The byte offset into the combined global represented by the bitset.
uint64_t ByteOffset;
@@ -45,26 +45,18 @@ struct BitSetInfo {
unsigned AlignLog2;
bool isSingleOffset() const {
- return Bits.size() == 1 && Bits[0] == 1;
+ return Bits.size() == 1;
}
bool isAllOnes() const {
- for (unsigned I = 0; I != Bits.size() - 1; ++I)
- if (Bits[I] != 0xFF)
- return false;
-
- if (BitSize % 8 == 0)
- return Bits[Bits.size() - 1] == 0xFF;
-
- return Bits[Bits.size() - 1] == (1 << (BitSize % 8)) - 1;
+ return Bits.size() == BitSize;
}
bool containsGlobalOffset(uint64_t Offset) const;
- bool containsValue(const DataLayout *DL,
+ bool containsValue(const DataLayout &DL,
const DenseMap<GlobalVariable *, uint64_t> &GlobalLayout,
Value *V, uint64_t COffset = 0) const;
-
};
struct BitSetBuilder {
@@ -148,6 +140,59 @@ struct GlobalLayoutBuilder {
void addFragment(const std::set<uint64_t> &F);
};
+/// This class is used to build a byte array containing overlapping bit sets. By
+/// loading from indexed offsets into the byte array and applying a mask, a
+/// program can test bits from the bit set with a relatively short instruction
+/// sequence. For example, suppose we have 15 bit sets to lay out:
+///
+/// A (16 bits), B (15 bits), C (14 bits), D (13 bits), E (12 bits),
+/// F (11 bits), G (10 bits), H (9 bits), I (7 bits), J (6 bits), K (5 bits),
+/// L (4 bits), M (3 bits), N (2 bits), O (1 bit)
+///
+/// These bits can be laid out in a 16-byte array like this:
+///
+/// Byte Offset
+/// 0123456789ABCDEF
+/// Bit
+/// 7 HHHHHHHHHIIIIIII
+/// 6 GGGGGGGGGGJJJJJJ
+/// 5 FFFFFFFFFFFKKKKK
+/// 4 EEEEEEEEEEEELLLL
+/// 3 DDDDDDDDDDDDDMMM
+/// 2 CCCCCCCCCCCCCCNN
+/// 1 BBBBBBBBBBBBBBBO
+/// 0 AAAAAAAAAAAAAAAA
+///
+/// For example, to test bit X of A, we evaluate ((bits[X] & 1) != 0), or to
+/// test bit X of I, we evaluate ((bits[9 + X] & 0x80) != 0). This can be done
+/// in 1-2 machine instructions on x86, or 4-6 instructions on ARM.
+///
+/// This is a byte array, rather than (say) a 2-byte array or a 4-byte array,
+/// because for one thing it gives us better packing (the more bins there are,
+/// the less evenly they will be filled), and for another, the instruction
+/// sequences can be slightly shorter, both on x86 and ARM.
+struct ByteArrayBuilder {
+ /// The byte array built so far.
+ std::vector<uint8_t> Bytes;
+
+ enum { BitsPerByte = 8 };
+
+ /// The number of bytes allocated so far for each of the bits.
+ uint64_t BitAllocs[BitsPerByte];
+
+ ByteArrayBuilder() {
+ memset(BitAllocs, 0, sizeof(BitAllocs));
+ }
+
+ /// Allocate BitSize bits in the byte array where Bits contains the bits to
+ /// set. AllocByteOffset is set to the offset within the byte array and
+ /// AllocMask is set to the bitmask for those bits. This uses the LPT (Longest
+ /// Processing Time) multiprocessor scheduling algorithm to lay out the bits
+ /// efficiently; the pass allocates bit sets in decreasing size order.
+ void allocate(const std::set<uint64_t> &Bits, uint64_t BitSize,
+ uint64_t &AllocByteOffset, uint8_t &AllocMask);
+};
+
} // namespace llvm
#endif