aboutsummaryrefslogtreecommitdiffstats
path: root/tools/edis/EDToken.h
blob: e4ae91f7ec3ad54d03a036d554338a2ac6b8e58c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
//===-EDToken.h - LLVM Enhanced Disassembler --------------------*- C++ -*-===//
//
//                     The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
// 
//===----------------------------------------------------------------------===//
//
// This file defines the interface for the Enhanced Disassembly library's token
// class.  The token is responsible for vending information about the token, 
// such as its type and logical value.
//
//===----------------------------------------------------------------------===//

#ifndef EDToken_
#define EDToken_

#include "llvm-c/EnhancedDisassembly.h"
#include "llvm/ADT/StringRef.h"

#include <string>
#include <vector>

/// EDToken - Encapsulates a single token, which can provide a string
///   representation of itself or interpret itself in various ways, depending
///   on the token type.
struct EDToken {
  enum tokenType {
    kTokenWhitespace,
    kTokenOpcode,
    kTokenLiteral,
    kTokenRegister,
    kTokenPunctuation
  };
  
  /// The parent disassembler
  EDDisassembler &Disassembler;

  /// The token's string representation
  llvm::StringRef Str;
  /// The token's string representation, but in a form suitable for export
  std::string PermStr;
  /// The type of the token, as exposed through the external API
  enum tokenType Type;
  /// The type of the token, as recorded by the syntax-specific tokenizer
  uint64_t LocalType;
  /// The operand corresponding to the token, or (unsigned int)-1 if not
  ///   part of an operand.
  int OperandID;
  
  /// The sign if the token is a literal (1 if negative, 0 otherwise)
  bool LiteralSign;
  /// The absolute value if the token is a literal
  uint64_t LiteralAbsoluteValue;
  /// The LLVM register ID if the token is a register name
  unsigned RegisterID;
  
  /// Constructor - Initializes an EDToken with the information common to all
  ///   tokens
  ///
  /// @arg str          - The string corresponding to the token
  /// @arg type         - The token's type as exposed through the public API
  /// @arg localType    - The token's type as recorded by the tokenizer
  /// @arg disassembler - The disassembler responsible for the token
  EDToken(llvm::StringRef str,
          enum tokenType type,
          uint64_t localType,
          EDDisassembler &disassembler);
  
  /// makeLiteral - Adds the information specific to a literal
  /// @arg sign           - The sign of the literal (1 if negative, 0 
  ///                       otherwise)
  ///
  /// @arg absoluteValue  - The absolute value of the literal
  void makeLiteral(bool sign, uint64_t absoluteValue);
  /// makeRegister - Adds the information specific to a register
  ///
  /// @arg registerID - The LLVM register ID
  void makeRegister(unsigned registerID);
  
  /// setOperandID - Links the token to a numbered operand
  ///
  /// @arg operandID  - The operand ID to link to
  void setOperandID(int operandID);
  
  ~EDToken();
  
  /// type - Returns the public type of the token
  enum tokenType type() const;
  /// localType - Returns the tokenizer-specific type of the token
  uint64_t localType() const;
  /// string - Returns the string representation of the token
  llvm::StringRef string() const;
  /// operandID - Returns the operand ID of the token
  int operandID() const;
  
  /// literalSign - Returns the sign of the token 
  ///   (1 if negative, 0 if positive or unsigned, -1 if it is not a literal)
  int literalSign() const;
  /// literalAbsoluteValue - Retrieves the absolute value of the token, and
  ///   returns -1 if the token is not a literal
  /// @arg value  - A reference to a value that is filled in with the absolute
  ///               value, if it is valid
  int literalAbsoluteValue(uint64_t &value) const;
  /// registerID - Retrieves the register ID of the token, and returns -1 if the
  ///   token is not a register
  ///
  /// @arg registerID - A reference to a value that is filled in with the 
  ///                   register ID, if it is valid
  int registerID(unsigned &registerID) const;
  
  /// tokenize - Tokenizes a string using the platform- and syntax-specific
  ///   tokenizer, and returns 0 on success (-1 on failure)
  ///
  /// @arg tokens       - A vector that will be filled in with pointers to
  ///                     allocated tokens
  /// @arg str          - The string, as outputted by the AsmPrinter
  /// @arg operandOrder - The order of the operands from the operandFlags array
  ///                     as they appear in str
  /// @arg disassembler - The disassembler for the desired target and
  //                      assembly syntax
  static int tokenize(std::vector<EDToken*> &tokens,
                      std::string &str,
                      const char *operandOrder,
                      EDDisassembler &disassembler);
  
  /// getString - Directs a character pointer to the string, returning 0 on
  ///   success (-1 on failure)
  /// @arg buf  - A reference to a pointer that is set to point to the string.
  ///   The string is still owned by the token.
  int getString(const char*& buf);
};

#endif