aboutsummaryrefslogtreecommitdiffstats
path: root/include/llvm-c/EnhancedDisassembly.h
blob: 9cd1e1f5f3cfe72aaf744a650246447ed88572d7 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
/*===-- llvm-c/EnhancedDisassembly.h - Disassembler C Interface ---*- C -*-===*\
|*                                                                            *|
|*                     The LLVM Compiler Infrastructure                       *|
|*                                                                            *|
|* This file is distributed under the University of Illinois Open Source      *|
|* License. See LICENSE.TXT for details.                                      *|
|*                                                                            *|
|*===----------------------------------------------------------------------===*|
|*                                                                            *|
|* This header declares the C interface to EnhancedDisassembly.so, which      *|
|* implements a disassembler with the ability to extract operand values and   *|
|* individual tokens from assembly instructions.                              *|
|*                                                                            *|
|* The header declares additional interfaces if the host compiler supports    *|
|* the blocks API.                                                            *|
|*                                                                            *|
\*===----------------------------------------------------------------------===*/

#ifndef LLVM_C_ENHANCEDDISASSEMBLY_H
#define LLVM_C_ENHANCEDDISASSEMBLY_H

#include "llvm/System/DataTypes.h"

#ifdef __cplusplus
extern "C" {
#endif

/*!
 @typedef EDByteReaderCallback
 Interface to memory from which instructions may be read.
 @param byte A pointer whose target should be filled in with the data returned.
 @param address The address of the byte to be read.
 @param arg An anonymous argument for client use.
 @result 0 on success; -1 otherwise.
 */
typedef int (*EDByteReaderCallback)(uint8_t *byte, uint64_t address, void *arg);

/*!
 @typedef EDRegisterReaderCallback
 Interface to registers from which registers may be read.
 @param value A pointer whose target should be filled in with the value of the
   register.
 @param regID The LLVM register identifier for the register to read.
 @param arg An anonymous argument for client use.
 @result 0 if the register could be read; -1 otherwise.
 */
typedef int (*EDRegisterReaderCallback)(uint64_t *value, unsigned regID, 
                                        void* arg);

/*!
 @typedef EDAssemblySyntax_t
 An assembly syntax for use in tokenizing instructions.
 */
typedef enum {
/*! @constant kEDAssemblySyntaxX86Intel Intel syntax for i386 and x86_64. */
  kEDAssemblySyntaxX86Intel  = 0,
/*! @constant kEDAssemblySyntaxX86ATT AT&T syntax for i386 and x86_64. */
  kEDAssemblySyntaxX86ATT    = 1
} EDAssemblySyntax_t;

/*!
 @typedef EDDisassemblerRef
 Encapsulates a disassembler for a single CPU architecture.
 */
struct EDDisassembler;
typedef struct EDDisassembler *EDDisassemblerRef;

/*!
 @typedef EDInstRef
 Encapsulates a single disassembled instruction in one assembly syntax.
 */
struct EDInst;
typedef struct EDInst *EDInstRef;

/*!
 @typedef EDTokenRef
 Encapsulates a token from the disassembly of an instruction.
 */
struct EDToken;
typedef struct EDToken *EDTokenRef;

/*!
 @typedef EDOperandRef
 Encapsulates an operand of an instruction.
 */
struct EDOperand;
typedef struct EDOperand *EDOperandRef;
  
/*!
 @functiongroup Getting a disassembler
 */

/*!
 @function EDGetDisassembler
 Gets the disassembler for a given target.
 @param disassembler A pointer whose target will be filled in with the 
   disassembler.
 @param triple Identifies the target.  Example: "x86_64-apple-darwin10"
 @param syntax The assembly syntax to use when decoding instructions.
 @result 0 on success; -1 otherwise.
 */
int EDGetDisassembler(EDDisassemblerRef *disassembler,
                      const char *triple,
                      EDAssemblySyntax_t syntax);

/*!
 @functiongroup Generic architectural queries
 */
  
/*!
 @function EDGetRegisterName
 Gets the human-readable name for a given register.
 @param regName A pointer whose target will be pointed at the name of the
   register.  The name does not need to be deallocated and will be 
 @param disassembler The disassembler to query for the name.
 @param regID The register identifier, as returned by EDRegisterTokenValue.
 @result 0 on success; -1 otherwise.
 */
int EDGetRegisterName(const char** regName,
                      EDDisassemblerRef disassembler,
                      unsigned regID);
  
/*!
 @function EDRegisterIsStackPointer
 Determines if a register is one of the platform's stack-pointer registers.
 @param disassembler The disassembler to query.
 @param regID The register identifier, as returned by EDRegisterTokenValue.
 @result 1 if true; 0 otherwise.
 */
int EDRegisterIsStackPointer(EDDisassemblerRef disassembler,
                             unsigned regID);

/*!
 @function EDRegisterIsProgramCounter
 Determines if a register is one of the platform's stack-pointer registers.
 @param disassembler The disassembler to query.
 @param regID The register identifier, as returned by EDRegisterTokenValue.
 @result 1 if true; 0 otherwise.
 */
int EDRegisterIsProgramCounter(EDDisassemblerRef disassembler,
                               unsigned regID);
  
/*!
 @functiongroup Creating and querying instructions
 */
  
/*!
 @function EDCreateInst
 Gets a set of contiguous instructions from a disassembler.
 @param insts A pointer to an array that will be filled in with the
   instructions.  Must have at least count entries.  Entries not filled in will 
   be set to NULL.
 @param count The maximum number of instructions to fill in.
 @param disassembler The disassembler to use when decoding the instructions.
 @param byteReader The function to use when reading the instruction's machine
   code.
 @param address The address of the first byte of the instruction.
 @param arg An anonymous argument to be passed to byteReader.
 @result The number of instructions read on success; 0 otherwise.
 */
unsigned int EDCreateInsts(EDInstRef *insts,
                           unsigned int count,
                           EDDisassemblerRef disassembler,
                           EDByteReaderCallback byteReader,
                           uint64_t address,
                           void *arg);

/*!
 @function EDReleaseInst
 Frees the memory for an instruction.  The instruction can no longer be accessed
 after this call.
 @param inst The instruction to be freed.
 */
void EDReleaseInst(EDInstRef inst);

/*!
 @function EDInstByteSize
 @param inst The instruction to be queried.
 @result The number of bytes in the instruction's machine-code representation.
 */
int EDInstByteSize(EDInstRef inst);

/*!
 @function EDGetInstString
 Gets the disassembled text equivalent of the instruction.
 @param buf A pointer whose target will be filled in with a pointer to the
   string.  (The string becomes invalid when the instruction is released.)
 @param inst The instruction to be queried.
 @result 0 on success; -1 otherwise.
 */
int EDGetInstString(const char **buf,
                    EDInstRef inst);

/*!
 @function EDInstID
 @param instID A pointer whose target will be filled in with the LLVM identifier
   for the instruction.
 @param inst The instruction to be queried.
 @result 0 on success; -1 otherwise.
 */
int EDInstID(unsigned *instID, EDInstRef inst);
  
/*!
 @function EDInstIsBranch
 @param inst The instruction to be queried.
 @result 1 if the instruction is a branch instruction; 0 if it is some other
   type of instruction; -1 if there was an error.
 */
int EDInstIsBranch(EDInstRef inst);

/*!
 @function EDInstIsMove
 @param inst The instruction to be queried.
 @result 1 if the instruction is a move instruction; 0 if it is some other
   type of instruction; -1 if there was an error.
 */
int EDInstIsMove(EDInstRef inst);

/*!
 @function EDBranchTargetID
 @param inst The instruction to be queried.
 @result The ID of the branch target operand, suitable for use with 
   EDCopyOperand.  -1 if no such operand exists.
 */
int EDBranchTargetID(EDInstRef inst);

/*!
 @function EDMoveSourceID
 @param inst The instruction to be queried.
 @result The ID of the move source operand, suitable for use with 
   EDCopyOperand.  -1 if no such operand exists.
 */
int EDMoveSourceID(EDInstRef inst);

/*!
 @function EDMoveTargetID
 @param inst The instruction to be queried.
 @result The ID of the move source operand, suitable for use with 
   EDCopyOperand.  -1 if no such operand exists.
 */
int EDMoveTargetID(EDInstRef inst);

/*!
 @functiongroup Creating and querying tokens
 */
  
/*!
 @function EDNumTokens
 @param inst The instruction to be queried.
 @result The number of tokens in the instruction, or -1 on error.
 */
int EDNumTokens(EDInstRef inst);

/*!
 @function EDGetToken
 Retrieves a token from an instruction.  The token is valid until the
 instruction is released.
 @param token A pointer to be filled in with the token.
 @param inst The instruction to be queried.
 @param index The index of the token in the instruction.
 @result 0 on success; -1 otherwise.
 */
int EDGetToken(EDTokenRef *token,
               EDInstRef inst,
               int index);
  
/*!
 @function EDGetTokenString
 Gets the disassembled text for a token.
 @param buf A pointer whose target will be filled in with a pointer to the
   string.  (The string becomes invalid when the token is released.)
 @param token The token to be queried.
 @result 0 on success; -1 otherwise.
 */
int EDGetTokenString(const char **buf,
                     EDTokenRef token);

/*!
 @function EDOperandIndexForToken
 Returns the index of the operand to which a token belongs.
 @param token The token to be queried.
 @result The operand index on success; -1 otherwise
 */
int EDOperandIndexForToken(EDTokenRef token);

/*!
 @function EDTokenIsWhitespace
 @param token The token to be queried.
 @result 1 if the token is whitespace; 0 if not; -1 on error.
 */
int EDTokenIsWhitespace(EDTokenRef token);
  
/*!
 @function EDTokenIsPunctuation
 @param token The token to be queried.
 @result 1 if the token is punctuation; 0 if not; -1 on error.
 */
int EDTokenIsPunctuation(EDTokenRef token);

/*!
 @function EDTokenIsOpcode
 @param token The token to be queried.
 @result 1 if the token is opcode; 0 if not; -1 on error.
 */
int EDTokenIsOpcode(EDTokenRef token);

/*!
 @function EDTokenIsLiteral
 @param token The token to be queried.
 @result 1 if the token is a numeric literal; 0 if not; -1 on error.
 */
int EDTokenIsLiteral(EDTokenRef token);

/*!
 @function EDTokenIsRegister
 @param token The token to be queried.
 @result 1 if the token identifies a register; 0 if not; -1 on error.
 */
int EDTokenIsRegister(EDTokenRef token);

/*!
 @function EDTokenIsNegativeLiteral
 @param token The token to be queried.
 @result 1 if the token is a negative signed literal; 0 if not; -1 on error.
 */
int EDTokenIsNegativeLiteral(EDTokenRef token);

/*!
 @function EDLiteralTokenAbsoluteValue
 @param value A pointer whose target will be filled in with the absolute value
   of the literal.
 @param token The token to be queried.
 @result 0 on success; -1 otherwise.
 */
int EDLiteralTokenAbsoluteValue(uint64_t *value,
                                EDTokenRef token);

/*!
 @function EDRegisterTokenValue
 @param registerID A pointer whose target will be filled in with the LLVM 
   register identifier for the token.
 @param token The token to be queried.
 @result 0 on success; -1 otherwise.
 */
int EDRegisterTokenValue(unsigned *registerID,
                         EDTokenRef token);
  
/*!
 @functiongroup Creating and querying operands
 */
  
/*!
 @function EDNumOperands
 @param inst The instruction to be queried.
 @result The number of operands in the instruction, or -1 on error.
 */
int EDNumOperands(EDInstRef inst);

/*!
 @function EDGetOperand
 Retrieves an operand from an instruction.  The operand is valid until the
 instruction is released.
 @param operand A pointer to be filled in with the operand.
 @param inst The instruction to be queried.
 @param index The index of the operand in the instruction.
 @result 0 on success; -1 otherwise.
 */
int EDGetOperand(EDOperandRef *operand,
                 EDInstRef inst,
                 int index);
  
/*!
 @function EDOperandIsRegister
 @param operand The operand to be queried.
 @result 1 if the operand names a register; 0 if not; -1 on error.
 */
int EDOperandIsRegister(EDOperandRef operand);

/*!
 @function EDOperandIsImmediate
 @param operand The operand to be queried.
 @result 1 if the operand specifies an immediate value; 0 if not; -1 on error.
 */
int EDOperandIsImmediate(EDOperandRef operand);

/*!
 @function EDOperandIsMemory
 @param operand The operand to be queried.
 @result 1 if the operand specifies a location in memory; 0 if not; -1 on error.
 */
int EDOperandIsMemory(EDOperandRef operand);

/*!
 @function EDRegisterOperandValue
 @param value A pointer whose target will be filled in with the LLVM register ID
   of the register named by the operand.  
 @param operand The operand to be queried.
 @result 0 on success; -1 otherwise.
 */
int EDRegisterOperandValue(unsigned *value,
                           EDOperandRef operand);
  
/*!
 @function EDImmediateOperandValue
 @param value A pointer whose target will be filled in with the value of the
   immediate.
 @param operand The operand to be queried.
 @result 0 on success; -1 otherwise.
 */
int EDImmediateOperandValue(uint64_t *value,
                            EDOperandRef operand);

/*!
 @function EDEvaluateOperand
 Evaluates an operand using a client-supplied register state accessor.  Register
 operands are evaluated by reading the value of the register; immediate operands
 are evaluated by reporting the immediate value; memory operands are evaluated
 by computing the target address (with only those relocations applied that were
 already applied to the original bytes).
 @param result A pointer whose target is to be filled with the result of
   evaluating the operand.
 @param operand The operand to be evaluated.
 @param regReader The function to use when reading registers from the register
   state.
 @param arg An anonymous argument for client use.
 @result 0 if the operand could be evaluated; -1 otherwise.
 */
int EDEvaluateOperand(uint64_t *result,
                      EDOperandRef operand,
                      EDRegisterReaderCallback regReader,
                      void *arg);
  
#ifdef __BLOCKS__

/*!
 @typedef EDByteBlock_t
 Block-based interface to memory from which instructions may be read.
 @param byte A pointer whose target should be filled in with the data returned.
 @param address The address of the byte to be read.
 @result 0 on success; -1 otherwise.
 */
typedef int (^EDByteBlock_t)(uint8_t *byte, uint64_t address);

/*!
 @typedef EDRegisterBlock_t
 Block-based interface to registers from which registers may be read.
 @param value A pointer whose target should be filled in with the value of the
   register.
 @param regID The LLVM register identifier for the register to read.
 @result 0 if the register could be read; -1 otherwise.
 */
typedef int (^EDRegisterBlock_t)(uint64_t *value, unsigned regID);

/*!
 @typedef EDTokenVisitor_t
 Block-based handler for individual tokens.
 @param token The current token being read.
 @result 0 to continue; 1 to stop normally; -1 on error.
 */
typedef int (^EDTokenVisitor_t)(EDTokenRef token);

/*! @functiongroup Block-based interfaces */
  
/*!
 @function EDBlockCreateInsts
 Gets a set of contiguous instructions from a disassembler, using a block to
 read memory.
 @param insts A pointer to an array that will be filled in with the
   instructions.  Must have at least count entries.  Entries not filled in will 
   be set to NULL.
 @param count The maximum number of instructions to fill in.
 @param disassembler The disassembler to use when decoding the instructions.
 @param byteBlock The block to use when reading the instruction's machine
   code.
 @param address The address of the first byte of the instruction.
 @result The number of instructions read on success; 0 otherwise.
 */
unsigned int EDBlockCreateInsts(EDInstRef *insts,
                                int count,
                                EDDisassemblerRef disassembler,
                                EDByteBlock_t byteBlock,
                                uint64_t address);

/*!
 @function EDBlockEvaluateOperand
 Evaluates an operand using a block to read registers.
 @param result A pointer whose target is to be filled with the result of
   evaluating the operand.
 @param operand The operand to be evaluated.
 @param regBlock The block to use when reading registers from the register
   state.
 @result 0 if the operand could be evaluated; -1 otherwise.
 */
int EDBlockEvaluateOperand(uint64_t *result,
                           EDOperandRef operand,
                           EDRegisterBlock_t regBlock);

/*!
 @function EDBlockVisitTokens
 Visits every token with a visitor.
 @param inst The instruction with the tokens to be visited.
 @param visitor The visitor.
 @result 0 if the visit ended normally; -1 if the visitor encountered an error
   or there was some other error.
 */
int EDBlockVisitTokens(EDInstRef inst,
                       EDTokenVisitor_t visitor);

#endif
  
#ifdef __cplusplus
}
#endif

#endif