From d53268279b7bd757fa674d24895d2935a2a4465d Mon Sep 17 00:00:00 2001 From: "ronp@winter" Date: Tue, 20 Nov 2012 09:50:16 +0200 Subject: [PATCH 1/2] updated for distorm 3.3 and 1.9 rubies. Some patches for generating OSX makefile --- .gitignore | 4 + README | 8 +- include/distorm.h | 473 ++++++++++++++++++++++++++++++++++++++++++++ include/mnemonics.h | 312 +++++++++++++++++++++++++++++ src/config.h | 108 +++++----- src/decoder.h | 46 ++--- src/extconf_osx.rb | 3 + src/frasm.c | 18 +- src/instructions.h | 273 +++++++++++++------------ src/insts.h | 61 +++++- src/operands.h | 33 +++- src/prefix.h | 68 +++++-- src/textdefs.h | 48 +++-- src/wstring.h | 53 +++-- src/x86defs.h | 155 +++++---------- 15 files changed, 1255 insertions(+), 408 deletions(-) create mode 100644 .gitignore create mode 100644 include/distorm.h create mode 100644 include/mnemonics.h create mode 100644 src/extconf_osx.rb diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..84b93de --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +*.o +*.bundle +src/Makefile + diff --git a/README b/README index dfbd9be..7f2d8a5 100644 --- a/README +++ b/README @@ -1,4 +1,4 @@ -frasm is a Ruby binding for Distorm64 v1.7.30 written by Gil Dabah [http://ragestorm.net/distorm/] +frasm is a Ruby binding for Distorm3 v3.3 written by Gil Dabah [http://ragestorm.net/distorm/] The frasm binding was originally written by Tom Ptacek and has since been updated by Chris Rohlf @@ -6,15 +6,17 @@ frasm requires you have built and installed libdistorm Distorm comes with pystorm by default so when you create distorm64.{so,bundle,dll} be sure to compile like so: 'make clib' otherwise ruby will throw an error due to undefined python symbols +make sure you have the distorm3.so/distorm3.dylib under your search path (the OSX generated makefile looks for /usr/local/lib/distorm3.dylib) --------------- Compiling Frasm --------------- cd src/ -ruby extconf.rb +ruby extconf.rb (for OSX: ruby extconf_osx.rb) make +make install (may require sudo) The directories linux/ osx/ and win32 also have manually created Makefiles if extconf.rb fails you @@ -26,6 +28,6 @@ require 'frasm' d = Frasm::DistormDecoder.new -d.decode("ABCDEFGHIJKLMNOPQRSTUVWXYZ").each do |l| +d.decode("\x90\x90\x90\x33\xc0\x66\xb8\x34\x12\x50\x40\xc3").each do |l| puts "#{l.mnem} #{l.size} #{l.offset} #{l.raw}" end diff --git a/include/distorm.h b/include/distorm.h new file mode 100644 index 0000000..29fe138 --- /dev/null +++ b/include/distorm.h @@ -0,0 +1,473 @@ +/* diStorm3 3.3 */ + +/* +distorm.h + +diStorm3 - Powerful disassembler for X86/AMD64 +http://ragestorm.net/distorm/ +distorm at gmail dot com +Copyright (C) 2003-2012 Gil Dabah + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see +*/ + + +#ifndef DISTORM_H +#define DISTORM_H + +/* + * 64 bit offsets support: + * If the diStorm library you use was compiled with 64 bits offsets, + * make sure you compile your own code with the following macro set: + * SUPPORT_64BIT_OFFSET + * Otherwise comment it out, or you will get a linker error of an unresolved symbol... + * Turned on by default! + */ + +#if !(defined(DISTORM_STATIC) || defined(DISTORM_DYNAMIC)) + /* Define this macro for outer projects by default. */ + #define SUPPORT_64BIT_OFFSET +#endif + +/* TINYC has a problem with some 64bits library functions, so ignore 64 bit offsets. */ +#ifdef __TINYC__ + #undef SUPPORT_64BIT_OFFSET +#endif + +/* If your compiler doesn't support stdint.h, define your own 64 bits type. */ +#ifdef SUPPORT_64BIT_OFFSET + #ifdef _MSC_VER + #define OFFSET_INTEGER unsigned __int64 + #else + #include + #define OFFSET_INTEGER uint64_t + #endif +#else + /* 32 bit offsets are used. */ + #define OFFSET_INTEGER unsigned long +#endif + +#ifdef _MSC_VER +/* Since MSVC isn't shipped with stdint.h, we will have our own: */ +typedef signed __int64 int64_t; +typedef unsigned __int64 uint64_t; +typedef signed __int32 int32_t; +typedef unsigned __int32 uint32_t; +typedef signed __int16 int16_t; +typedef unsigned __int16 uint16_t; +typedef signed __int8 int8_t; +typedef unsigned __int8 uint8_t; +#endif + +/* Support C++ compilers */ +#ifdef __cplusplus + extern "C" { +#endif + + +/* *** Helper Macros *** */ + +/* Get the ISC of the instruction, used with the definitions below. */ +#define META_GET_ISC(meta) (((meta) >> 3) & 0x1f) +#define META_SET_ISC(di, isc) (((di)->meta) |= ((isc) << 3)) +/* Get the flow control flags of the instruction, see 'features for decompose' below. */ +#define META_GET_FC(meta) ((meta) & 0x7) + +/* Get the target address of a branching instruction. O_PC operand type. */ +#define INSTRUCTION_GET_TARGET(di) ((_OffsetType)(((di)->addr + (di)->imm.addr + (di)->size))) +/* Get the target address of a RIP-relative memory indirection. */ +#define INSTRUCTION_GET_RIP_TARGET(di) ((_OffsetType)(((di)->addr + (di)->disp + (di)->size))) + +/* + * Operand Size or Adderss size are stored inside the flags: + * 0 - 16 bits + * 1 - 32 bits + * 2 - 64 bits + * 3 - reserved + * + * If you call these set-macros more than once, you will have to clean the bits before doing so. + */ +#define FLAG_SET_OPSIZE(di, size) ((di->flags) |= (((size) & 3) << 8)) +#define FLAG_SET_ADDRSIZE(di, size) ((di->flags) |= (((size) & 3) << 10)) +#define FLAG_GET_OPSIZE(flags) (((flags) >> 8) & 3) +#define FLAG_GET_ADDRSIZE(flags) (((flags) >> 10) & 3) +/* To get the LOCK/REPNZ/REP prefixes. */ +#define FLAG_GET_PREFIX(flags) ((flags) & 7) + +/* + * Macros to extract segment registers from 'segment': + */ +#define SEGMENT_DEFAULT 0x80 +#define SEGMENT_SET(di, seg) ((di->segment) |= seg) +#define SEGMENT_GET(segment) (((segment) == R_NONE) ? R_NONE : ((segment) & 0x7f)) +#define SEGMENT_IS_DEFAULT(segment) (((segment) & SEGMENT_DEFAULT) == SEGMENT_DEFAULT) + + +/* Decodes modes of the disassembler, 16 bits or 32 bits or 64 bits for AMD64, x86-64. */ +typedef enum { Decode16Bits = 0, Decode32Bits = 1, Decode64Bits = 2 } _DecodeType; + +typedef OFFSET_INTEGER _OffsetType; + +typedef struct { + _OffsetType codeOffset, nextOffset; /* nextOffset is OUT only. */ + const uint8_t* code; + int codeLen; /* Using signed integer makes it easier to detect an underflow. */ + _DecodeType dt; + unsigned int features; +} _CodeInfo; + +typedef enum { O_NONE, O_REG, O_IMM, O_IMM1, O_IMM2, O_DISP, O_SMEM, O_MEM, O_PC, O_PTR } _OperandType; + +typedef union { + /* Used by O_IMM: */ + int8_t sbyte; + uint8_t byte; + int16_t sword; + uint16_t word; + int32_t sdword; + uint32_t dword; + int64_t sqword; /* All immediates are SIGN-EXTENDED to 64 bits! */ + uint64_t qword; + + /* Used by O_PC: (Use GET_TARGET_ADDR).*/ + _OffsetType addr; /* It's a relative offset as for now. */ + + /* Used by O_PTR: */ + struct { + uint16_t seg; + /* Can be 16 or 32 bits, size is in ops[n].size. */ + uint32_t off; + } ptr; + + /* Used by O_IMM1 (i1) and O_IMM2 (i2). ENTER instruction only. */ + struct { + uint32_t i1; + uint32_t i2; + } ex; +} _Value; + +typedef struct { + /* Type of operand: + O_NONE: operand is to be ignored. + O_REG: index holds global register index. + O_IMM: instruction.imm. + O_IMM1: instruction.imm.ex.i1. + O_IMM2: instruction.imm.ex.i2. + O_DISP: memory dereference with displacement only, instruction.disp. + O_SMEM: simple memory dereference with optional displacement (a single register memory dereference). + O_MEM: complex memory dereference (optional fields: s/i/b/disp). + O_PC: the relative address of a branch instruction (instruction.imm.addr). + O_PTR: the absolute target address of a far branch instruction (instruction.imm.ptr.seg/off). + */ + uint8_t type; /* _OperandType */ + + /* Index of: + O_REG: holds global register index + O_SMEM: holds the 'base' register. E.G: [ECX], [EBX+0x1234] are both in operand.index. + O_MEM: holds the 'index' register. E.G: [EAX*4] is in operand.index. + */ + uint8_t index; + + /* Size of: + O_REG: register + O_IMM: instruction.imm + O_IMM1: instruction.imm.ex.i1 + O_IMM2: instruction.imm.ex.i2 + O_DISP: instruction.disp + O_SMEM: size of indirection. + O_MEM: size of indirection. + O_PC: size of the relative offset + O_PTR: size of instruction.imm.ptr.off (16 or 32) + */ + uint16_t size; +} _Operand; + +#define OPCODE_ID_NONE 0 +/* Instruction could not be disassembled. */ +#define FLAG_NOT_DECODABLE ((uint16_t)-1) +/* The instruction locks memory access. */ +#define FLAG_LOCK (1 << 0) +/* The instruction is prefixed with a REPNZ. */ +#define FLAG_REPNZ (1 << 1) +/* The instruction is prefixed with a REP, this can be a REPZ, it depends on the specific instruction. */ +#define FLAG_REP (1 << 2) +/* Indicates there is a hint taken for Jcc instructions only. */ +#define FLAG_HINT_TAKEN (1 << 3) +/* Indicates there is a hint non-taken for Jcc instructions only. */ +#define FLAG_HINT_NOT_TAKEN (1 << 4) +/* The Imm value is signed extended. */ +#define FLAG_IMM_SIGNED (1 << 5) +/* The destination operand is writable. */ +#define FLAG_DST_WR (1 << 6) +/* The instruction uses RIP-relative indirection. */ +#define FLAG_RIP_RELATIVE (1 << 7) + +/* No register was defined. */ +#define R_NONE ((uint8_t)-1) + +#define REGS64_BASE 0 +#define REGS32_BASE 16 +#define REGS16_BASE 32 +#define REGS8_BASE 48 +#define REGS8_REX_BASE 64 +#define SREGS_BASE 68 +#define FPUREGS_BASE 75 +#define MMXREGS_BASE 83 +#define SSEREGS_BASE 91 +#define AVXREGS_BASE 107 +#define CREGS_BASE 123 +#define DREGS_BASE 132 + +#define OPERANDS_NO (4) + +typedef struct { + /* Used by ops[n].type == O_IMM/O_IMM1&O_IMM2/O_PTR/O_PC. Its size is ops[n].size. */ + _Value imm; + /* Used by ops[n].type == O_SMEM/O_MEM/O_DISP. Its size is dispSize. */ + uint64_t disp; + /* Virtual address of first byte of instruction. */ + _OffsetType addr; + /* General flags of instruction, holds prefixes and more, if FLAG_NOT_DECODABLE, instruction is invalid. */ + uint16_t flags; + /* Unused prefixes mask, for each bit that is set that prefix is not used (LSB is byte [addr + 0]). */ + uint16_t unusedPrefixesMask; + /* Mask of registers that were used in the operands, only used for quick look up, in order to know *some* operand uses that register class. */ + uint16_t usedRegistersMask; + /* ID of opcode in the global opcode table. Use for mnemonic look up. */ + uint16_t opcode; + /* Up to four operands per instruction, ignored if ops[n].type == O_NONE. */ + _Operand ops[OPERANDS_NO]; + /* Size of the whole instruction. */ + uint8_t size; + /* Segment information of memory indirection, default segment, or overriden one, can be -1. Use SEGMENT macros. */ + uint8_t segment; + /* Used by ops[n].type == O_MEM. Base global register index (might be R_NONE), scale size (2/4/8), ignored for 0 or 1. */ + uint8_t base, scale; + uint8_t dispSize; + /* Meta defines the instruction set class, and the flow control flags. Use META macros. */ + uint8_t meta; + /* The CPU flags that the instruction operates upon. */ + uint8_t modifiedFlagsMask, testedFlagsMask, undefinedFlagsMask; +} _DInst; + +#ifndef DISTORM_LIGHT + +/* Static size of strings. Do not change this value. Keep Python wrapper in sync. */ +#define MAX_TEXT_SIZE (48) +typedef struct { + unsigned int length; + unsigned char p[MAX_TEXT_SIZE]; /* p is a null terminated string. */ +} _WString; + +/* + * Old decoded instruction structure in text format. + * Used only for backward compatibility with diStorm64. + * This structure holds all information the disassembler generates per instruction. + */ +typedef struct { + _WString mnemonic; /* Mnemonic of decoded instruction, prefixed if required by REP, LOCK etc. */ + _WString operands; /* Operands of the decoded instruction, up to 3 operands, comma-seperated. */ + _WString instructionHex; /* Hex dump - little endian, including prefixes. */ + unsigned int size; /* Size of decoded instruction. */ + _OffsetType offset; /* Start offset of the decoded instruction. */ +} _DecodedInst; + +#endif /* DISTORM_LIGHT */ + +/* Register masks for quick look up, each mask indicates one of a register-class that is being used in some operand. */ +#define RM_AX 1 /* AL, AH, AX, EAX, RAX */ +#define RM_CX 2 /* CL, CH, CX, ECX, RCX */ +#define RM_DX 4 /* DL, DH, DX, EDX, RDX */ +#define RM_BX 8 /* BL, BH, BX, EBX, RBX */ +#define RM_SP 0x10 /* SPL, SP, ESP, RSP */ +#define RM_BP 0x20 /* BPL, BP, EBP, RBP */ +#define RM_SI 0x40 /* SIL, SI, ESI, RSI */ +#define RM_DI 0x80 /* DIL, DI, EDI, RDI */ +#define RM_FPU 0x100 /* ST(0) - ST(7) */ +#define RM_MMX 0x200 /* MM0 - MM7 */ +#define RM_SSE 0x400 /* XMM0 - XMM15 */ +#define RM_AVX 0x800 /* YMM0 - YMM15 */ +#define RM_CR 0x1000 /* CR0, CR2, CR3, CR4, CR8 */ +#define RM_DR 0x2000 /* DR0, DR1, DR2, DR3, DR6, DR7 */ +/* RIP should be checked using the 'flags' field and FLAG_RIP_RELATIVE. + * Segments should be checked using the segment macros. + * For now R8 - R15 are not supported and non general purpose registers map into same RM. + */ + +/* CPU Flags that instructions modify, test or undefine. */ +#define D_ZF 1 /* Zero */ +#define D_SF 2 /* Sign */ +#define D_CF 4 /* Carry */ +#define D_OF 8 /* Overflow */ +#define D_PF 0x10 /* Parity */ +#define D_AF 0x20 /* Auxilary */ +#define D_DF 0x40 /* Direction */ +#define D_IF 0x80 /* Interrupt */ + +/* + * Instructions Set classes: + * if you want a better understanding of the available classes, look at disOps project, file: x86sets.py. + */ +/* Indicates the instruction belongs to the General Integer set. */ +#define ISC_INTEGER 1 +/* Indicates the instruction belongs to the 387 FPU set. */ +#define ISC_FPU 2 +/* Indicates the instruction belongs to the P6 set. */ +#define ISC_P6 3 +/* Indicates the instruction belongs to the MMX set. */ +#define ISC_MMX 4 +/* Indicates the instruction belongs to the SSE set. */ +#define ISC_SSE 5 +/* Indicates the instruction belongs to the SSE2 set. */ +#define ISC_SSE2 6 +/* Indicates the instruction belongs to the SSE3 set. */ +#define ISC_SSE3 7 +/* Indicates the instruction belongs to the SSSE3 set. */ +#define ISC_SSSE3 8 +/* Indicates the instruction belongs to the SSE4.1 set. */ +#define ISC_SSE4_1 9 +/* Indicates the instruction belongs to the SSE4.2 set. */ +#define ISC_SSE4_2 10 +/* Indicates the instruction belongs to the AMD's SSE4.A set. */ +#define ISC_SSE4_A 11 +/* Indicates the instruction belongs to the 3DNow! set. */ +#define ISC_3DNOW 12 +/* Indicates the instruction belongs to the 3DNow! Extensions set. */ +#define ISC_3DNOWEXT 13 +/* Indicates the instruction belongs to the VMX (Intel) set. */ +#define ISC_VMX 14 +/* Indicates the instruction belongs to the SVM (AMD) set. */ +#define ISC_SVM 15 +/* Indicates the instruction belongs to the AVX (Intel) set. */ +#define ISC_AVX 16 +/* Indicates the instruction belongs to the FMA (Intel) set. */ +#define ISC_FMA 17 +/* Indicates the instruction belongs to the AES/AVX (Intel) set. */ +#define ISC_AES 18 +/* Indicates the instruction belongs to the CLMUL (Intel) set. */ +#define ISC_CLMUL 19 + +/* Features for decompose: */ +#define DF_NONE 0 +/* The decoder will limit addresses to a maximum of 16 bits. */ +#define DF_MAXIMUM_ADDR16 1 +/* The decoder will limit addresses to a maximum of 32 bits. */ +#define DF_MAXIMUM_ADDR32 2 +/* The decoder will return only flow control instructions (and filter the others internally). */ +#define DF_RETURN_FC_ONLY 4 +/* The decoder will stop and return to the caller when the instruction 'CALL' (near and far) was decoded. */ +#define DF_STOP_ON_CALL 8 +/* The decoder will stop and return to the caller when the instruction 'RET' (near and far) was decoded. */ +#define DF_STOP_ON_RET 0x10 +/* The decoder will stop and return to the caller when the instruction system-call/ret was decoded. */ +#define DF_STOP_ON_SYS 0x20 +/* The decoder will stop and return to the caller when any of the branch 'JMP', (near and far) instructions were decoded. */ +#define DF_STOP_ON_UNC_BRANCH 0x40 +/* The decoder will stop and return to the caller when any of the conditional branch instruction were decoded. */ +#define DF_STOP_ON_CND_BRANCH 0x80 +/* The decoder will stop and return to the caller when the instruction 'INT' (INT, INT1, INTO, INT 3) was decoded. */ +#define DF_STOP_ON_INT 0x100 +/* The decoder will stop and return to the caller when any of the 'CMOVxx' instruction was decoded. */ +#define DF_STOP_ON_CMOV 0x200 +/* The decoder will stop and return to the caller when any flow control instruction was decoded. */ +#define DF_STOP_ON_FLOW_CONTROL (DF_STOP_ON_CALL | DF_STOP_ON_RET | DF_STOP_ON_SYS | DF_STOP_ON_UNC_BRANCH | DF_STOP_ON_CND_BRANCH | DF_STOP_ON_INT | DF_STOP_ON_CMOV) + +/* Indicates the instruction is not a flow-control instruction. */ +#define FC_NONE 0 +/* Indicates the instruction is one of: CALL, CALL FAR. */ +#define FC_CALL 1 +/* Indicates the instruction is one of: RET, IRET, RETF. */ +#define FC_RET 2 +/* Indicates the instruction is one of: SYSCALL, SYSRET, SYSENTER, SYSEXIT. */ +#define FC_SYS 3 +/* Indicates the instruction is one of: JMP, JMP FAR. */ +#define FC_UNC_BRANCH 4 +/* + * Indicates the instruction is one of: + * JCXZ, JO, JNO, JB, JAE, JZ, JNZ, JBE, JA, JS, JNS, JP, JNP, JL, JGE, JLE, JG, LOOP, LOOPZ, LOOPNZ. + */ +#define FC_CND_BRANCH 5 +/* Indiciates the instruction is one of: INT, INT1, INT 3, INTO, UD2. */ +#define FC_INT 6 +/* Indicates the instruction is one of: CMOVxx. */ +#define FC_CMOV 7 + +/* Return code of the decoding function. */ +typedef enum { DECRES_NONE, DECRES_SUCCESS, DECRES_MEMORYERR, DECRES_INPUTERR, DECRES_FILTERED } _DecodeResult; + +/* Define the following interface functions only for outer projects. */ +#if !(defined(DISTORM_STATIC) || defined(DISTORM_DYNAMIC)) + +/* distorm_decode + * Input: + * offset - Origin of the given code (virtual address that is), NOT an offset in code. + * code - Pointer to the code buffer to be disassembled. + * length - Amount of bytes that should be decoded from the code buffer. + * dt - Decoding mode, 16 bits (Decode16Bits), 32 bits (Decode32Bits) or AMD64 (Decode64Bits). + * result - Array of type _DecodeInst which will be used by this function in order to return the disassembled instructions. + * maxInstructions - The maximum number of entries in the result array that you pass to this function, so it won't exceed its bound. + * usedInstructionsCount - Number of the instruction that successfully were disassembled and written to the result array. + * Output: usedInstructionsCount will hold the number of entries used in the result array + * and the result array itself will be filled with the disassembled instructions. + * Return: DECRES_SUCCESS on success (no more to disassemble), DECRES_INPUTERR on input error (null code buffer, invalid decoding mode, etc...), + * DECRES_MEMORYERR when there are not enough entries to use in the result array, BUT YOU STILL have to check for usedInstructionsCount! + * Side-Effects: Even if the return code is DECRES_MEMORYERR, there might STILL be data in the + * array you passed, this function will try to use as much entries as possible! + * Notes: 1)The minimal size of maxInstructions is 15. + * 2)You will have to synchronize the offset,code and length by yourself if you pass code fragments and not a complete code block! + */ +#ifdef SUPPORT_64BIT_OFFSET + + _DecodeResult distorm_decompose64(_CodeInfo* ci, _DInst result[], unsigned int maxInstructions, unsigned int* usedInstructionsCount); + #define distorm_decompose distorm_decompose64 + +#ifndef DISTORM_LIGHT + /* If distorm-light is defined, we won't export these text-formatting functionality. */ + _DecodeResult distorm_decode64(_OffsetType codeOffset, const unsigned char* code, int codeLen, _DecodeType dt, _DecodedInst result[], unsigned int maxInstructions, unsigned int* usedInstructionsCount); + void distorm_format64(const _CodeInfo* ci, const _DInst* di, _DecodedInst* result); + #define distorm_decode distorm_decode64 + #define distorm_format distorm_format64 +#endif /*DISTORM_LIGHT*/ + +#else /*SUPPORT_64BIT_OFFSET*/ + + _DecodeResult distorm_decompose32(_CodeInfo* ci, _DInst result[], unsigned int maxInstructions, unsigned int* usedInstructionsCount); + #define distorm_decompose distorm_decompose32 + +#ifndef DISTORM_LIGHT + /* If distorm-light is defined, we won't export these text-formatting functionality. */ + _DecodeResult distorm_decode32(_OffsetType codeOffset, const unsigned char* code, int codeLen, _DecodeType dt, _DecodedInst result[], unsigned int maxInstructions, unsigned int* usedInstructionsCount); + void distorm_format32(const _CodeInfo* ci, const _DInst* di, _DecodedInst* result); + #define distorm_decode distorm_decode32 + #define distorm_format distorm_format32 +#endif /*DISTORM_LIGHT*/ + +#endif + +/* + * distorm_version + * Input: + * none + * + * Output: unsigned int - version of compiled library. + */ +unsigned int distorm_version(); + +#endif /* DISTORM_STATIC */ + +#ifdef __cplusplus +} /* End Of Extern */ +#endif + +#endif /* DISTORM_H */ diff --git a/include/mnemonics.h b/include/mnemonics.h new file mode 100644 index 0000000..0d6e41e --- /dev/null +++ b/include/mnemonics.h @@ -0,0 +1,312 @@ +/* +mnemonics.h + +diStorm3 - Powerful disassembler for X86/AMD64 +http://ragestorm.net/distorm/ +distorm at gmail dot com +Copyright (C) 2003-2012 Gil Dabah + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see +*/ + + +#ifndef MNEMONICS_H +#define MNEMONICS_H + +#ifdef __cplusplus + extern "C" { +#endif + +#ifndef DISTORM_LIGHT + +typedef struct WMnemonic { + unsigned char length; + unsigned char p[1]; /* p is a null terminated string, which contains 'length' characters. */ +} _WMnemonic; + +typedef struct WRegister { + unsigned int length; + unsigned char p[6]; /* p is a null terminated string. */ +} _WRegister; + +extern const unsigned char _MNEMONICS[]; +extern const _WRegister _REGISTERS[]; + +#endif /* DISTORM_LIGHT */ + +#ifdef __cplusplus +} /* End Of Extern */ +#endif + +#define GET_REGISTER_NAME(r) (unsigned char*)_REGISTERS[(r)].p +#define GET_MNEMONIC_NAME(m) ((_WMnemonic*)&_MNEMONICS[(m)])->p + +typedef enum { + I_UNDEFINED = 0, I_AAA = 66, I_AAD = 389, I_AAM = 384, I_AAS = 76, I_ADC = 31, I_ADD = 11, I_ADDPD = 3110, + I_ADDPS = 3103, I_ADDSD = 3124, I_ADDSS = 3117, I_ADDSUBPD = 6394, I_ADDSUBPS = 6404, + I_AESDEC = 9209, I_AESDECLAST = 9226, I_AESENC = 9167, I_AESENCLAST = 9184, + I_AESIMC = 9150, I_AESKEYGENASSIST = 9795, I_AND = 41, I_ANDNPD = 3021, I_ANDNPS = 3013, + I_ANDPD = 2990, I_ANDPS = 2983, I_ARPL = 111, I_BLENDPD = 9372, I_BLENDPS = 9353, + I_BLENDVPD = 7619, I_BLENDVPS = 7609, I_BOUND = 104, I_BSF = 4346, I_BSR = 4358, + I_BSWAP = 960, I_BT = 872, I_BTC = 934, I_BTR = 912, I_BTS = 887, I_CALL = 456, + I_CALL_FAR = 260, I_CBW = 228, I_CDQ = 250, I_CDQE = 239, I_CLC = 492, I_CLD = 512, + I_CLFLUSH = 4329, I_CLGI = 1833, I_CLI = 502, I_CLTS = 541, I_CMC = 487, I_CMOVA = 694, + I_CMOVAE = 663, I_CMOVB = 656, I_CMOVBE = 686, I_CMOVG = 754, I_CMOVGE = 738, + I_CMOVL = 731, I_CMOVLE = 746, I_CMOVNO = 648, I_CMOVNP = 723, I_CMOVNS = 708, + I_CMOVNZ = 678, I_CMOVO = 641, I_CMOVP = 716, I_CMOVS = 701, I_CMOVZ = 671, + I_CMP = 71, I_CMPEQPD = 4449, I_CMPEQPS = 4370, I_CMPEQSD = 4607, I_CMPEQSS = 4528, + I_CMPLEPD = 4467, I_CMPLEPS = 4388, I_CMPLESD = 4625, I_CMPLESS = 4546, I_CMPLTPD = 4458, + I_CMPLTPS = 4379, I_CMPLTSD = 4616, I_CMPLTSS = 4537, I_CMPNEQPD = 4488, I_CMPNEQPS = 4409, + I_CMPNEQSD = 4646, I_CMPNEQSS = 4567, I_CMPNLEPD = 4508, I_CMPNLEPS = 4429, + I_CMPNLESD = 4666, I_CMPNLESS = 4587, I_CMPNLTPD = 4498, I_CMPNLTPS = 4419, + I_CMPNLTSD = 4656, I_CMPNLTSS = 4577, I_CMPORDPD = 4518, I_CMPORDPS = 4439, + I_CMPORDSD = 4676, I_CMPORDSS = 4597, I_CMPS = 301, I_CMPUNORDPD = 4476, I_CMPUNORDPS = 4397, + I_CMPUNORDSD = 4634, I_CMPUNORDSS = 4555, I_CMPXCHG = 898, I_CMPXCHG16B = 6373, + I_CMPXCHG8B = 6362, I_COMISD = 2779, I_COMISS = 2771, I_CPUID = 865, I_CQO = 255, + I_CRC32 = 9258, I_CVTDQ2PD = 6787, I_CVTDQ2PS = 3307, I_CVTPD2DQ = 6797, I_CVTPD2PI = 2681, + I_CVTPD2PS = 3233, I_CVTPH2PS = 4161, I_CVTPI2PD = 2495, I_CVTPI2PS = 2485, + I_CVTPS2DQ = 3317, I_CVTPS2PD = 3223, I_CVTPS2PH = 4171, I_CVTPS2PI = 2671, + I_CVTSD2SI = 2701, I_CVTSD2SS = 3253, I_CVTSI2SD = 2515, I_CVTSI2SS = 2505, + I_CVTSS2SD = 3243, I_CVTSS2SI = 2691, I_CVTTPD2DQ = 6776, I_CVTTPD2PI = 2614, + I_CVTTPS2DQ = 3327, I_CVTTPS2PI = 2603, I_CVTTSD2SI = 2636, I_CVTTSS2SI = 2625, + I_CWD = 245, I_CWDE = 233, I_DAA = 46, I_DAS = 56, I_DEC = 86, I_DIV = 1630, + I_DIVPD = 3499, I_DIVPS = 3492, I_DIVSD = 3513, I_DIVSS = 3506, I_DPPD = 9615, + I_DPPS = 9602, I_EMMS = 4100, I_ENTER = 340, I_EXTRACTPS = 9480, I_EXTRQ = 4136, + I_F2XM1 = 1176, I_FABS = 1107, I_FADD = 1007, I_FADDP = 1533, I_FBLD = 1585, + I_FBSTP = 1591, I_FCHS = 1101, I_FCLEX = 7289, I_FCMOVB = 1360, I_FCMOVBE = 1376, + I_FCMOVE = 1368, I_FCMOVNB = 1429, I_FCMOVNBE = 1447, I_FCMOVNE = 1438, I_FCMOVNU = 1457, + I_FCMOVU = 1385, I_FCOM = 1019, I_FCOMI = 1496, I_FCOMIP = 1607, I_FCOMP = 1025, + I_FCOMPP = 1547, I_FCOS = 1295, I_FDECSTP = 1222, I_FDIV = 1045, I_FDIVP = 1578, + I_FDIVR = 1051, I_FDIVRP = 1570, I_FEDISI = 1472, I_FEMMS = 574, I_FENI = 1466, + I_FFREE = 1511, I_FIADD = 1301, I_FICOM = 1315, I_FICOMP = 1322, I_FIDIV = 1345, + I_FIDIVR = 1352, I_FILD = 1402, I_FIMUL = 1308, I_FINCSTP = 1231, I_FINIT = 7304, + I_FIST = 1416, I_FISTP = 1422, I_FISTTP = 1408, I_FISUB = 1330, I_FISUBR = 1337, + I_FLD = 1058, I_FLD1 = 1125, I_FLDCW = 1082, I_FLDENV = 1074, I_FLDL2E = 1139, + I_FLDL2T = 1131, I_FLDLG2 = 1154, I_FLDLN2 = 1162, I_FLDPI = 1147, I_FLDZ = 1170, + I_FMUL = 1013, I_FMULP = 1540, I_FNCLEX = 7281, I_FNINIT = 7296, I_FNOP = 1095, + I_FNSAVE = 7311, I_FNSTCW = 7266, I_FNSTENV = 7249, I_FNSTSW = 7326, I_FPATAN = 1197, + I_FPREM = 1240, I_FPREM1 = 1214, I_FPTAN = 1190, I_FRNDINT = 1272, I_FRSTOR = 1503, + I_FSAVE = 7319, I_FSCALE = 1281, I_FSETPM = 1480, I_FSIN = 1289, I_FSINCOS = 1263, + I_FSQRT = 1256, I_FST = 1063, I_FSTCW = 7274, I_FSTENV = 7258, I_FSTP = 1068, + I_FSTSW = 7334, I_FSUB = 1032, I_FSUBP = 1563, I_FSUBR = 1038, I_FSUBRP = 1555, + I_FTST = 1113, I_FUCOM = 1518, I_FUCOMI = 1488, I_FUCOMIP = 1598, I_FUCOMP = 1525, + I_FUCOMPP = 1393, I_FXAM = 1119, I_FXCH = 1089, I_FXRSTOR = 9892, I_FXRSTOR64 = 9901, + I_FXSAVE = 9864, I_FXSAVE64 = 9872, I_FXTRACT = 1205, I_FYL2X = 1183, I_FYL2XP1 = 1247, + I_GETSEC = 633, I_HADDPD = 4181, I_HADDPS = 4189, I_HLT = 482, I_HSUBPD = 4215, + I_HSUBPS = 4223, I_IDIV = 1635, I_IMUL = 117, I_IN = 447, I_INC = 81, I_INS = 123, + I_INSERTPS = 9547, I_INSERTQ = 4143, I_INT = 367, I_INT_3 = 360, I_INT1 = 476, + I_INTO = 372, I_INVD = 555, I_INVEPT = 8284, I_INVLPG = 1711, I_INVLPGA = 1847, + I_INVPCID = 8301, I_INVVPID = 8292, I_IRET = 378, I_JA = 166, I_JAE = 147, + I_JB = 143, I_JBE = 161, I_JCXZ = 427, I_JECXZ = 433, I_JG = 202, I_JGE = 192, + I_JL = 188, I_JLE = 197, I_JMP = 462, I_JMP_FAR = 467, I_JNO = 138, I_JNP = 183, + I_JNS = 174, I_JNZ = 156, I_JO = 134, I_JP = 179, I_JRCXZ = 440, I_JS = 170, + I_JZ = 152, I_LAHF = 289, I_LAR = 522, I_LDDQU = 6994, I_LDMXCSR = 9922, I_LDS = 335, + I_LEA = 223, I_LEAVE = 347, I_LES = 330, I_LFENCE = 4265, I_LFS = 917, I_LGDT = 1687, + I_LGS = 922, I_LIDT = 1693, I_LLDT = 1652, I_LMSW = 1705, I_LODS = 313, I_LOOP = 421, + I_LOOPNZ = 406, I_LOOPZ = 414, I_LSL = 527, I_LSS = 907, I_LTR = 1658, I_LZCNT = 4363, + I_MASKMOVDQU = 7119, I_MASKMOVQ = 7109, I_MAXPD = 3559, I_MAXPS = 3552, I_MAXSD = 3573, + I_MAXSS = 3566, I_MFENCE = 4291, I_MINPD = 3439, I_MINPS = 3432, I_MINSD = 3453, + I_MINSS = 3446, I_MONITOR = 1755, I_MOV = 218, I_MOVAPD = 2459, I_MOVAPS = 2451, + I_MOVBE = 9251, I_MOVD = 3920, I_MOVDDUP = 2186, I_MOVDQ2Q = 6522, I_MOVDQA = 3946, + I_MOVDQU = 3954, I_MOVHLPS = 2151, I_MOVHPD = 2345, I_MOVHPS = 2337, I_MOVLHPS = 2328, + I_MOVLPD = 2168, I_MOVLPS = 2160, I_MOVMSKPD = 2815, I_MOVMSKPS = 2805, I_MOVNTDQ = 6849, + I_MOVNTDQA = 7895, I_MOVNTI = 952, I_MOVNTPD = 2556, I_MOVNTPS = 2547, I_MOVNTQ = 6841, + I_MOVNTSD = 2574, I_MOVNTSS = 2565, I_MOVQ = 3926, I_MOVQ2DQ = 6513, I_MOVS = 295, + I_MOVSD = 2110, I_MOVSHDUP = 2353, I_MOVSLDUP = 2176, I_MOVSS = 2103, I_MOVSX = 939, + I_MOVSXD = 10013, I_MOVUPD = 2095, I_MOVUPS = 2087, I_MOVZX = 927, I_MPSADBW = 9628, + I_MUL = 1625, I_MULPD = 3170, I_MULPS = 3163, I_MULSD = 3184, I_MULSS = 3177, + I_MWAIT = 1764, I_NEG = 1620, I_NOP = 581, I_NOT = 1615, I_OR = 27, I_ORPD = 3053, + I_ORPS = 3047, I_OUT = 451, I_OUTS = 128, I_PABSB = 7688, I_PABSD = 7718, I_PABSW = 7703, + I_PACKSSDW = 3849, I_PACKSSWB = 3681, I_PACKUSDW = 7916, I_PACKUSWB = 3759, + I_PADDB = 7204, I_PADDD = 7234, I_PADDQ = 6481, I_PADDSB = 6930, I_PADDSW = 6947, + I_PADDUSB = 6620, I_PADDUSW = 6639, I_PADDW = 7219, I_PALIGNR = 9410, I_PAND = 6607, + I_PANDN = 6665, I_PAUSE = 10021, I_PAVGB = 6680, I_PAVGUSB = 2078, I_PAVGW = 6725, + I_PBLENDVB = 7599, I_PBLENDW = 9391, I_PCLMULQDQ = 9647, I_PCMPEQB = 4043, + I_PCMPEQD = 4081, I_PCMPEQQ = 7876, I_PCMPEQW = 4062, I_PCMPESTRI = 9726, + I_PCMPESTRM = 9703, I_PCMPGTB = 3702, I_PCMPGTD = 3740, I_PCMPGTQ = 8087, + I_PCMPGTW = 3721, I_PCMPISTRI = 9772, I_PCMPISTRM = 9749, I_PEXTRB = 9429, + I_PEXTRD = 9446, I_PEXTRQ = 9454, I_PEXTRW = 6311, I_PF2ID = 1914, I_PF2IW = 1907, + I_PFACC = 2028, I_PFADD = 1977, I_PFCMPEQ = 2035, I_PFCMPGE = 1938, I_PFCMPGT = 1984, + I_PFMAX = 1993, I_PFMIN = 1947, I_PFMUL = 2044, I_PFNACC = 1921, I_PFPNACC = 1929, + I_PFRCP = 1954, I_PFRCPIT1 = 2000, I_PFRCPIT2 = 2051, I_PFRSQIT1 = 2010, I_PFRSQRT = 1961, + I_PFSUB = 1970, I_PFSUBR = 2020, I_PHADDD = 7375, I_PHADDSW = 7392, I_PHADDW = 7358, + I_PHMINPOSUW = 8259, I_PHSUBD = 7451, I_PHSUBSW = 7468, I_PHSUBW = 7434, I_PI2FD = 1900, + I_PI2FW = 1893, I_PINSRB = 9530, I_PINSRD = 9568, I_PINSRQ = 9576, I_PINSRW = 6294, + I_PMADDUBSW = 7411, I_PMADDWD = 7073, I_PMAXSB = 8174, I_PMAXSD = 8191, I_PMAXSW = 6964, + I_PMAXUB = 6648, I_PMAXUD = 8225, I_PMAXUW = 8208, I_PMINSB = 8106, I_PMINSD = 8123, + I_PMINSW = 6902, I_PMINUB = 6590, I_PMINUD = 8157, I_PMINUW = 8140, I_PMOVMSKB = 6531, + I_PMOVSXBD = 7754, I_PMOVSXBQ = 7775, I_PMOVSXBW = 7733, I_PMOVSXDQ = 7838, + I_PMOVSXWD = 7796, I_PMOVSXWQ = 7817, I_PMOVZXBD = 7982, I_PMOVZXBQ = 8003, + I_PMOVZXBW = 7961, I_PMOVZXDQ = 8066, I_PMOVZXWD = 8024, I_PMOVZXWQ = 8045, + I_PMULDQ = 7859, I_PMULHRSW = 7538, I_PMULHRW = 2061, I_PMULHUW = 6740, I_PMULHW = 6759, + I_PMULLD = 8242, I_PMULLW = 6496, I_PMULUDQ = 7054, I_POP = 22, I_POPA = 98, + I_POPCNT = 4338, I_POPF = 277, I_POR = 6919, I_PREFETCH = 1872, I_PREFETCHNTA = 2402, + I_PREFETCHT0 = 2415, I_PREFETCHT1 = 2427, I_PREFETCHT2 = 2439, I_PREFETCHW = 1882, + I_PSADBW = 7092, I_PSHUFB = 7341, I_PSHUFD = 3988, I_PSHUFHW = 3996, I_PSHUFLW = 4005, + I_PSHUFW = 3980, I_PSIGNB = 7487, I_PSIGND = 7521, I_PSIGNW = 7504, I_PSLLD = 7024, + I_PSLLDQ = 9847, I_PSLLQ = 7039, I_PSLLW = 7009, I_PSRAD = 6710, I_PSRAW = 6695, + I_PSRLD = 6451, I_PSRLDQ = 9830, I_PSRLQ = 6466, I_PSRLW = 6436, I_PSUBB = 7144, + I_PSUBD = 7174, I_PSUBQ = 7189, I_PSUBSB = 6868, I_PSUBSW = 6885, I_PSUBUSB = 6552, + I_PSUBUSW = 6571, I_PSUBW = 7159, I_PSWAPD = 2070, I_PTEST = 7629, I_PUNPCKHBW = 3780, + I_PUNPCKHDQ = 3826, I_PUNPCKHQDQ = 3895, I_PUNPCKHWD = 3803, I_PUNPCKLBW = 3612, + I_PUNPCKLDQ = 3658, I_PUNPCKLQDQ = 3870, I_PUNPCKLWD = 3635, I_PUSH = 16, + I_PUSHA = 91, I_PUSHF = 270, I_PXOR = 6981, I_RCL = 977, I_RCPPS = 2953, I_RCPSS = 2960, + I_RCR = 982, I_RDFSBASE = 9882, I_RDGSBASE = 9912, I_RDMSR = 600, I_RDPMC = 607, + I_RDRAND = 9980, I_RDTSC = 593, I_RDTSCP = 1864, I_RET = 325, I_RETF = 354, + I_ROL = 967, I_ROR = 972, I_ROUNDPD = 9296, I_ROUNDPS = 9277, I_ROUNDSD = 9334, + I_ROUNDSS = 9315, I_RSM = 882, I_RSQRTPS = 2915, I_RSQRTSS = 2924, I_SAHF = 283, + I_SAL = 997, I_SALC = 394, I_SAR = 1002, I_SBB = 36, I_SCAS = 319, I_SETA = 807, + I_SETAE = 780, I_SETB = 774, I_SETBE = 800, I_SETG = 859, I_SETGE = 845, I_SETL = 839, + I_SETLE = 852, I_SETNO = 767, I_SETNP = 832, I_SETNS = 819, I_SETNZ = 793, + I_SETO = 761, I_SETP = 826, I_SETS = 813, I_SETZ = 787, I_SFENCE = 4321, I_SGDT = 1675, + I_SHL = 987, I_SHLD = 876, I_SHR = 992, I_SHRD = 892, I_SHUFPD = 6336, I_SHUFPS = 6328, + I_SIDT = 1681, I_SKINIT = 1839, I_SLDT = 1641, I_SMSW = 1699, I_SQRTPD = 2855, + I_SQRTPS = 2847, I_SQRTSD = 2871, I_SQRTSS = 2863, I_STC = 497, I_STD = 517, + I_STGI = 1827, I_STI = 507, I_STMXCSR = 9951, I_STOS = 307, I_STR = 1647, I_SUB = 51, + I_SUBPD = 3379, I_SUBPS = 3372, I_SUBSD = 3393, I_SUBSS = 3386, I_SWAPGS = 1856, + I_SYSCALL = 532, I_SYSENTER = 614, I_SYSEXIT = 624, I_SYSRET = 547, I_TEST = 206, + I_TZCNT = 4351, I_UCOMISD = 2742, I_UCOMISS = 2733, I_UD2 = 569, I_UNPCKHPD = 2296, + I_UNPCKHPS = 2286, I_UNPCKLPD = 2254, I_UNPCKLPS = 2244, I_VADDPD = 3139, + I_VADDPS = 3131, I_VADDSD = 3155, I_VADDSS = 3147, I_VADDSUBPD = 6414, I_VADDSUBPS = 6425, + I_VAESDEC = 9217, I_VAESDECLAST = 9238, I_VAESENC = 9175, I_VAESENCLAST = 9196, + I_VAESIMC = 9158, I_VAESKEYGENASSIST = 9812, I_VANDNPD = 3038, I_VANDNPS = 3029, + I_VANDPD = 3005, I_VANDPS = 2997, I_VBLENDPD = 9381, I_VBLENDPS = 9362, I_VBLENDVPD = 9681, + I_VBLENDVPS = 9670, I_VBROADCASTF128 = 7672, I_VBROADCASTSD = 7658, I_VBROADCASTSS = 7644, + I_VCMPEQPD = 5088, I_VCMPEQPS = 4686, I_VCMPEQSD = 5892, I_VCMPEQSS = 5490, + I_VCMPEQ_OSPD = 5269, I_VCMPEQ_OSPS = 4867, I_VCMPEQ_OSSD = 6073, I_VCMPEQ_OSSS = 5671, + I_VCMPEQ_UQPD = 5175, I_VCMPEQ_UQPS = 4773, I_VCMPEQ_UQSD = 5979, I_VCMPEQ_UQSS = 5577, + I_VCMPEQ_USPD = 5378, I_VCMPEQ_USPS = 4976, I_VCMPEQ_USSD = 6182, I_VCMPEQ_USSS = 5780, + I_VCMPFALSEPD = 5210, I_VCMPFALSEPS = 4808, I_VCMPFALSESD = 6014, I_VCMPFALSESS = 5612, + I_VCMPFALSE_OSPD = 5419, I_VCMPFALSE_OSPS = 5017, I_VCMPFALSE_OSSD = 6223, + I_VCMPFALSE_OSSS = 5821, I_VCMPGEPD = 5237, I_VCMPGEPS = 4835, I_VCMPGESD = 6041, + I_VCMPGESS = 5639, I_VCMPGE_OQPD = 5449, I_VCMPGE_OQPS = 5047, I_VCMPGE_OQSD = 6253, + I_VCMPGE_OQSS = 5851, I_VCMPGTPD = 5247, I_VCMPGTPS = 4845, I_VCMPGTSD = 6051, + I_VCMPGTSS = 5649, I_VCMPGT_OQPD = 5462, I_VCMPGT_OQPS = 5060, I_VCMPGT_OQSD = 6266, + I_VCMPGT_OQSS = 5864, I_VCMPLEPD = 5108, I_VCMPLEPS = 4706, I_VCMPLESD = 5912, + I_VCMPLESS = 5510, I_VCMPLE_OQPD = 5295, I_VCMPLE_OQPS = 4893, I_VCMPLE_OQSD = 6099, + I_VCMPLE_OQSS = 5697, I_VCMPLTPD = 5098, I_VCMPLTPS = 4696, I_VCMPLTSD = 5902, + I_VCMPLTSS = 5500, I_VCMPLT_OQPD = 5282, I_VCMPLT_OQPS = 4880, I_VCMPLT_OQSD = 6086, + I_VCMPLT_OQSS = 5684, I_VCMPNEQPD = 5131, I_VCMPNEQPS = 4729, I_VCMPNEQSD = 5935, + I_VCMPNEQSS = 5533, I_VCMPNEQ_OQPD = 5223, I_VCMPNEQ_OQPS = 4821, I_VCMPNEQ_OQSD = 6027, + I_VCMPNEQ_OQSS = 5625, I_VCMPNEQ_OSPD = 5435, I_VCMPNEQ_OSPS = 5033, I_VCMPNEQ_OSSD = 6239, + I_VCMPNEQ_OSSS = 5837, I_VCMPNEQ_USPD = 5323, I_VCMPNEQ_USPS = 4921, I_VCMPNEQ_USSD = 6127, + I_VCMPNEQ_USSS = 5725, I_VCMPNGEPD = 5188, I_VCMPNGEPS = 4786, I_VCMPNGESD = 5992, + I_VCMPNGESS = 5590, I_VCMPNGE_UQPD = 5391, I_VCMPNGE_UQPS = 4989, I_VCMPNGE_UQSD = 6195, + I_VCMPNGE_UQSS = 5793, I_VCMPNGTPD = 5199, I_VCMPNGTPS = 4797, I_VCMPNGTSD = 6003, + I_VCMPNGTSS = 5601, I_VCMPNGT_UQPD = 5405, I_VCMPNGT_UQPS = 5003, I_VCMPNGT_UQSD = 6209, + I_VCMPNGT_UQSS = 5807, I_VCMPNLEPD = 5153, I_VCMPNLEPS = 4751, I_VCMPNLESD = 5957, + I_VCMPNLESS = 5555, I_VCMPNLE_UQPD = 5351, I_VCMPNLE_UQPS = 4949, I_VCMPNLE_UQSD = 6155, + I_VCMPNLE_UQSS = 5753, I_VCMPNLTPD = 5142, I_VCMPNLTPS = 4740, I_VCMPNLTSD = 5946, + I_VCMPNLTSS = 5544, I_VCMPNLT_UQPD = 5337, I_VCMPNLT_UQPS = 4935, I_VCMPNLT_UQSD = 6141, + I_VCMPNLT_UQSS = 5739, I_VCMPORDPD = 5164, I_VCMPORDPS = 4762, I_VCMPORDSD = 5968, + I_VCMPORDSS = 5566, I_VCMPORD_SPD = 5365, I_VCMPORD_SPS = 4963, I_VCMPORD_SSD = 6169, + I_VCMPORD_SSS = 5767, I_VCMPTRUEPD = 5257, I_VCMPTRUEPS = 4855, I_VCMPTRUESD = 6061, + I_VCMPTRUESS = 5659, I_VCMPTRUE_USPD = 5475, I_VCMPTRUE_USPS = 5073, I_VCMPTRUE_USSD = 6279, + I_VCMPTRUE_USSS = 5877, I_VCMPUNORDPD = 5118, I_VCMPUNORDPS = 4716, I_VCMPUNORDSD = 5922, + I_VCMPUNORDSS = 5520, I_VCMPUNORD_SPD = 5308, I_VCMPUNORD_SPS = 4906, I_VCMPUNORD_SSD = 6112, + I_VCMPUNORD_SSS = 5710, I_VCOMISD = 2796, I_VCOMISS = 2787, I_VCVTDQ2PD = 6819, + I_VCVTDQ2PS = 3338, I_VCVTPD2DQ = 6830, I_VCVTPD2PS = 3274, I_VCVTPS2DQ = 3349, + I_VCVTPS2PD = 3263, I_VCVTSD2SI = 2722, I_VCVTSD2SS = 3296, I_VCVTSI2SD = 2536, + I_VCVTSI2SS = 2525, I_VCVTSS2SD = 3285, I_VCVTSS2SI = 2711, I_VCVTTPD2DQ = 6807, + I_VCVTTPS2DQ = 3360, I_VCVTTSD2SI = 2659, I_VCVTTSS2SI = 2647, I_VDIVPD = 3528, + I_VDIVPS = 3520, I_VDIVSD = 3544, I_VDIVSS = 3536, I_VDPPD = 9621, I_VDPPS = 9608, + I_VERR = 1663, I_VERW = 1669, I_VEXTRACTF128 = 9516, I_VEXTRACTPS = 9491, + I_VFMADD132PD = 8387, I_VFMADD132PS = 8374, I_VFMADD132SD = 8413, I_VFMADD132SS = 8400, + I_VFMADD213PD = 8667, I_VFMADD213PS = 8654, I_VFMADD213SD = 8693, I_VFMADD213SS = 8680, + I_VFMADD231PD = 8947, I_VFMADD231PS = 8934, I_VFMADD231SD = 8973, I_VFMADD231SS = 8960, + I_VFMADDSUB132PD = 8326, I_VFMADDSUB132PS = 8310, I_VFMADDSUB213PD = 8606, + I_VFMADDSUB213PS = 8590, I_VFMADDSUB231PD = 8886, I_VFMADDSUB231PS = 8870, + I_VFMSUB132PD = 8439, I_VFMSUB132PS = 8426, I_VFMSUB132SD = 8465, I_VFMSUB132SS = 8452, + I_VFMSUB213PD = 8719, I_VFMSUB213PS = 8706, I_VFMSUB213SD = 8745, I_VFMSUB213SS = 8732, + I_VFMSUB231PD = 8999, I_VFMSUB231PS = 8986, I_VFMSUB231SD = 9025, I_VFMSUB231SS = 9012, + I_VFMSUBADD132PD = 8358, I_VFMSUBADD132PS = 8342, I_VFMSUBADD213PD = 8638, + I_VFMSUBADD213PS = 8622, I_VFMSUBADD231PD = 8918, I_VFMSUBADD231PS = 8902, + I_VFNMADD132PD = 8492, I_VFNMADD132PS = 8478, I_VFNMADD132SD = 8520, I_VFNMADD132SS = 8506, + I_VFNMADD213PD = 8772, I_VFNMADD213PS = 8758, I_VFNMADD213SD = 8800, I_VFNMADD213SS = 8786, + I_VFNMADD231PD = 9052, I_VFNMADD231PS = 9038, I_VFNMADD231SD = 9080, I_VFNMADD231SS = 9066, + I_VFNMSUB132PD = 8548, I_VFNMSUB132PS = 8534, I_VFNMSUB132SD = 8576, I_VFNMSUB132SS = 8562, + I_VFNMSUB213PD = 8828, I_VFNMSUB213PS = 8814, I_VFNMSUB213SD = 8856, I_VFNMSUB213SS = 8842, + I_VFNMSUB231PD = 9108, I_VFNMSUB231PS = 9094, I_VFNMSUB231SD = 9136, I_VFNMSUB231SS = 9122, + I_VHADDPD = 4197, I_VHADDPS = 4206, I_VHSUBPD = 4231, I_VHSUBPS = 4240, I_VINSERTF128 = 9503, + I_VINSERTPS = 9557, I_VLDDQU = 7001, I_VLDMXCSR = 9941, I_VMASKMOVDQU = 7131, + I_VMASKMOVPD = 7949, I_VMASKMOVPS = 7937, I_VMAXPD = 3588, I_VMAXPS = 3580, + I_VMAXSD = 3604, I_VMAXSS = 3596, I_VMCALL = 1719, I_VMCLEAR = 9997, I_VMFUNC = 1787, + I_VMINPD = 3468, I_VMINPS = 3460, I_VMINSD = 3484, I_VMINSS = 3476, I_VMLAUNCH = 1727, + I_VMLOAD = 1811, I_VMMCALL = 1802, I_VMOVAPD = 2476, I_VMOVAPS = 2467, I_VMOVD = 3932, + I_VMOVDDUP = 2234, I_VMOVDQA = 3962, I_VMOVDQU = 3971, I_VMOVHLPS = 2195, + I_VMOVHPD = 2382, I_VMOVHPS = 2373, I_VMOVLHPS = 2363, I_VMOVLPD = 2214, I_VMOVLPS = 2205, + I_VMOVMSKPD = 2836, I_VMOVMSKPS = 2825, I_VMOVNTDQ = 6858, I_VMOVNTDQA = 7905, + I_VMOVNTPD = 2593, I_VMOVNTPS = 2583, I_VMOVQ = 3939, I_VMOVSD = 2143, I_VMOVSHDUP = 2391, + I_VMOVSLDUP = 2223, I_VMOVSS = 2135, I_VMOVUPD = 2126, I_VMOVUPS = 2117, I_VMPSADBW = 9637, + I_VMPTRLD = 9988, I_VMPTRST = 6385, I_VMREAD = 4128, I_VMRESUME = 1737, I_VMRUN = 1795, + I_VMSAVE = 1819, I_VMULPD = 3199, I_VMULPS = 3191, I_VMULSD = 3215, I_VMULSS = 3207, + I_VMWRITE = 4152, I_VMXOFF = 1747, I_VMXON = 10006, I_VORPD = 3066, I_VORPS = 3059, + I_VPABSB = 7695, I_VPABSD = 7725, I_VPABSW = 7710, I_VPACKSSDW = 3859, I_VPACKSSWB = 3691, + I_VPACKUSDW = 7926, I_VPACKUSWB = 3769, I_VPADDB = 7211, I_VPADDD = 7241, + I_VPADDQ = 6488, I_VPADDSB = 6938, I_VPADDSW = 6955, I_VPADDUSW = 6629, I_VPADDW = 7226, + I_VPALIGNR = 9419, I_VPAND = 6613, I_VPANDN = 6672, I_VPAVGB = 6687, I_VPAVGW = 6732, + I_VPBLENDVB = 9692, I_VPBLENDW = 9400, I_VPCLMULQDQ = 9658, I_VPCMPEQB = 4052, + I_VPCMPEQD = 4090, I_VPCMPEQQ = 7885, I_VPCMPEQW = 4071, I_VPCMPESTRI = 9737, + I_VPCMPESTRM = 9714, I_VPCMPGTB = 3711, I_VPCMPGTD = 3749, I_VPCMPGTQ = 8096, + I_VPCMPGTW = 3730, I_VPCMPISTRI = 9783, I_VPCMPISTRM = 9760, I_VPERM2F128 = 9265, + I_VPERMILPD = 7570, I_VPERMILPS = 7559, I_VPEXTRB = 9437, I_VPEXTRD = 9462, + I_VPEXTRQ = 9471, I_VPEXTRW = 6319, I_VPHADDD = 7383, I_VPHADDSW = 7401, I_VPHADDW = 7366, + I_VPHMINPOSUW = 8271, I_VPHSUBD = 7459, I_VPHSUBSW = 7477, I_VPHSUBW = 7442, + I_VPINSRB = 9538, I_VPINSRD = 9584, I_VPINSRQ = 9593, I_VPINSRW = 6302, I_VPMADDUBSW = 7422, + I_VPMADDWD = 7082, I_VPMAXSB = 8182, I_VPMAXSD = 8199, I_VPMAXSW = 6972, I_VPMAXUB = 6656, + I_VPMAXUD = 8233, I_VPMAXUW = 8216, I_VPMINSB = 8114, I_VPMINSD = 8131, I_VPMINSW = 6910, + I_VPMINUB = 6598, I_VPMINUD = 8165, I_VPMINUW = 8148, I_VPMOVMSKB = 6541, + I_VPMOVSXBD = 7764, I_VPMOVSXBQ = 7785, I_VPMOVSXBW = 7743, I_VPMOVSXDQ = 7848, + I_VPMOVSXWD = 7806, I_VPMOVSXWQ = 7827, I_VPMOVZXBD = 7992, I_VPMOVZXBQ = 8013, + I_VPMOVZXBW = 7971, I_VPMOVZXDQ = 8076, I_VPMOVZXWD = 8034, I_VPMOVZXWQ = 8055, + I_VPMULDQ = 7867, I_VPMULHRSW = 7548, I_VPMULHUW = 6749, I_VPMULHW = 6767, + I_VPMULLD = 8250, I_VPMULLW = 6504, I_VPMULUDQ = 7063, I_VPOR = 6924, I_VPSADBW = 7100, + I_VPSHUFB = 7349, I_VPSHUFD = 4014, I_VPSHUFHW = 4023, I_VPSHUFLW = 4033, + I_VPSIGNB = 7495, I_VPSIGND = 7529, I_VPSIGNW = 7512, I_VPSLLD = 7031, I_VPSLLDQ = 9855, + I_VPSLLQ = 7046, I_VPSLLW = 7016, I_VPSRAD = 6717, I_VPSRAW = 6702, I_VPSRLD = 6458, + I_VPSRLDQ = 9838, I_VPSRLQ = 6473, I_VPSRLW = 6443, I_VPSUBB = 7151, I_VPSUBD = 7181, + I_VPSUBQ = 7196, I_VPSUBSB = 6876, I_VPSUBSW = 6893, I_VPSUBUSB = 6561, I_VPSUBUSW = 6580, + I_VPSUBW = 7166, I_VPTEST = 7636, I_VPUNPCKHBW = 3791, I_VPUNPCKHDQ = 3837, + I_VPUNPCKHQDQ = 3907, I_VPUNPCKHWD = 3814, I_VPUNPCKLBW = 3623, I_VPUNPCKLDQ = 3669, + I_VPUNPCKLQDQ = 3882, I_VPUNPCKLWD = 3646, I_VPXOR = 6987, I_VRCPPS = 2967, + I_VRCPSS = 2975, I_VROUNDPD = 9305, I_VROUNDPS = 9286, I_VROUNDSD = 9343, + I_VROUNDSS = 9324, I_VRSQRTPS = 2933, I_VRSQRTSS = 2943, I_VSHUFPD = 6353, + I_VSHUFPS = 6344, I_VSQRTPD = 2888, I_VSQRTPS = 2879, I_VSQRTSD = 2906, I_VSQRTSS = 2897, + I_VSTMXCSR = 9970, I_VSUBPD = 3408, I_VSUBPS = 3400, I_VSUBSD = 3424, I_VSUBSS = 3416, + I_VTESTPD = 7590, I_VTESTPS = 7581, I_VUCOMISD = 2761, I_VUCOMISS = 2751, + I_VUNPCKHPD = 2317, I_VUNPCKHPS = 2306, I_VUNPCKLPD = 2275, I_VUNPCKLPS = 2264, + I_VXORPD = 3095, I_VXORPS = 3087, I_VZEROALL = 4118, I_VZEROUPPER = 4106, + I_WAIT = 10028, I_WBINVD = 561, I_WRFSBASE = 9931, I_WRGSBASE = 9960, I_WRMSR = 586, + I_XADD = 946, I_XCHG = 212, I_XGETBV = 1771, I_XLAT = 400, I_XOR = 61, I_XORPD = 3080, + I_XORPS = 3073, I_XRSTOR = 4273, I_XRSTOR64 = 4281, I_XSAVE = 4249, I_XSAVE64 = 4256, + I_XSAVEOPT = 4299, I_XSAVEOPT64 = 4309, I_XSETBV = 1779, I__3DNOW = 10034 +} _InstructionType; + +typedef enum { + R_RAX, R_RCX, R_RDX, R_RBX, R_RSP, R_RBP, R_RSI, R_RDI, R_R8, R_R9, R_R10, R_R11, R_R12, R_R13, R_R14, R_R15, + R_EAX, R_ECX, R_EDX, R_EBX, R_ESP, R_EBP, R_ESI, R_EDI, R_R8D, R_R9D, R_R10D, R_R11D, R_R12D, R_R13D, R_R14D, R_R15D, + R_AX, R_CX, R_DX, R_BX, R_SP, R_BP, R_SI, R_DI, R_R8W, R_R9W, R_R10W, R_R11W, R_R12W, R_R13W, R_R14W, R_R15W, + R_AL, R_CL, R_DL, R_BL, R_AH, R_CH, R_DH, R_BH, R_R8B, R_R9B, R_R10B, R_R11B, R_R12B, R_R13B, R_R14B, R_R15B, + R_SPL, R_BPL, R_SIL, R_DIL, + R_ES, R_CS, R_SS, R_DS, R_FS, R_GS, + R_RIP, + R_ST0, R_ST1, R_ST2, R_ST3, R_ST4, R_ST5, R_ST6, R_ST7, + R_MM0, R_MM1, R_MM2, R_MM3, R_MM4, R_MM5, R_MM6, R_MM7, + R_XMM0, R_XMM1, R_XMM2, R_XMM3, R_XMM4, R_XMM5, R_XMM6, R_XMM7, R_XMM8, R_XMM9, R_XMM10, R_XMM11, R_XMM12, R_XMM13, R_XMM14, R_XMM15, + R_YMM0, R_YMM1, R_YMM2, R_YMM3, R_YMM4, R_YMM5, R_YMM6, R_YMM7, R_YMM8, R_YMM9, R_YMM10, R_YMM11, R_YMM12, R_YMM13, R_YMM14, R_YMM15, + R_CR0, R_UNUSED0, R_CR2, R_CR3, R_CR4, R_UNUSED1, R_UNUSED2, R_UNUSED3, R_CR8, + R_DR0, R_DR1, R_DR2, R_DR3, R_UNUSED4, R_UNUSED5, R_DR6, R_DR7 +} _RegisterType; + +#endif /* MNEMONICS_H */ diff --git a/src/config.h b/src/config.h index 6fbf7af..5676fc5 100644 --- a/src/config.h +++ b/src/config.h @@ -1,8 +1,23 @@ /* config.h -Copyright (C) 2003-2008 Gil Dabah, http://ragestorm.net/distorm/ -This library is licensed under the BSD license. See the file COPYING. +diStorm3 - Powerful disassembler for X86/AMD64 +http://ragestorm.net/distorm/ +distorm at gmail dot com +Copyright (C) 2003-2012 Gil Dabah + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see */ @@ -10,23 +25,36 @@ This library is licensed under the BSD license. See the file COPYING. #define CONFIG_H /* diStorm version number. */ -#define DISTORM_VER 0x01071e +#define __DISTORMV__ 0x030300 + +#include /* memset, memcpy - can be easily self implemented for libc independency. */ + +#include "../include/distorm.h" -#include /* strlen, memset, memcpy - can be easily self implemented for libc independency. */ /* * 64 bit offsets support: * This macro should be defined from compiler command line flags, e.g: -DSUPPORT_64BIT_OFFSET - * #define SUPPORT_64BIT_OFFSET * Note: make sure that the caller (library user) defines it too! */ +/* #define SUPPORT_64BIT_OFFSET */ + +/* + * If you compile diStorm as a dynamic library (.dll or .so) file, make sure you uncomment the next line. + * So the interface functions will be exported, otherwise they are useable only for static library. + * For example, this macro is being set for compiling diStorm as a .dll for Python with CTypes. + */ +/* #define DISTORM_DYNAMIC */ /* - * If you compile diStorm as a .DLL file, make sure you uncomment the next line. - * So the interface functions will be exported, otherwise they are useable only as a library. - * For example, the Python extension module defines this macro in its configuration. + * If DISTORM_LIGHT is defined, everything involved in formatting the instructions + * as text will be excluded from compilation. + * distorm_decode(..) and distorm_format(..) will not be available. + * This will decrease the size of the executable and leave you with decomposition functionality only. + * + * Note: it should be either set in the preprocessor definitions manually or in command line -D switch. + * #define DISTORM_LIGHT */ -/* #define _DLL */ /* * diStorm now supports little/big endian CPU's. @@ -39,12 +67,11 @@ This library is licensed under the BSD license. See the file COPYING. #include -#define _PACKED_ __attribute__((__packed__)) #define _DLLEXPORT_ #define _FASTCALL_ -#define _INLINE_ static __inline__ +#define _INLINE_ static /* GCC ignores this directive... */ -/*#define _FASTCALL_ __attribute__((__fastcall__)) */ +/*#define _FASTCALL_ __attribute__((__fastcall__))*/ /* Set endianity (supposed to be LE though): */ #ifdef __BIG_ENDIAN__ @@ -57,7 +84,6 @@ This library is licensed under the BSD license. See the file COPYING. #include -#define _PACKED_ #define _DLLEXPORT_ #define _FASTCALL_ #define _INLINE_ __inline @@ -68,10 +94,9 @@ This library is licensed under the BSD license. See the file COPYING. #include -#define _PACKED_ #define _DLLEXPORT_ #define _FASTCALL_ -#define _INLINE_ static __inline +#define _INLINE_ __inline /* End of __DMC__ */ @@ -79,42 +104,31 @@ This library is licensed under the BSD license. See the file COPYING. #include -#define _PACKED_ #define _DLLEXPORT_ #define _FASTCALL_ -#define _INLINE_ static +#define _INLINE_ /* End of __TINYC__ */ #elif _MSC_VER -/* Since MSVC isn't shipped with stdint.h, we will have our own: */ -typedef signed __int64 int64_t; -typedef unsigned __int64 uint64_t; -typedef signed __int32 int32_t; -typedef unsigned __int32 uint32_t; -typedef signed __int16 int16_t; -typedef unsigned __int16 uint16_t; -typedef signed __int8 int8_t; -typedef unsigned __int8 uint8_t; - -#define _PACKED_ +/* stdint alternative is defined in distorm.h */ + #define _DLLEXPORT_ __declspec(dllexport) #define _FASTCALL_ __fastcall -#define _INLINE_ static __inline +#define _INLINE_ __inline /* Set endianity (supposed to be LE though): */ -#ifndef _M_IX86 +#if !defined(_M_IX86) && !defined(_M_X64) #define BE_SYSTEM #endif #endif /* #elif _MSC_VER */ -/* 32 or 64 bits integer for instruction offset. */ -#ifdef SUPPORT_64BIT_OFFSET - #define OFFSET_INTEGER uint64_t -#else - #define OFFSET_INTEGER uint32_t +/* If the library isn't compiled as a dynamic library don't export any functions. */ +#ifndef DISTORM_DYNAMIC +#undef _DLLEXPORT_ +#define _DLLEXPORT_ #endif #ifndef FALSE @@ -124,40 +138,44 @@ typedef unsigned __int8 uint8_t; #define TRUE 1 #endif -/* If the library isn't compiled as a .DLL don't export functions. */ -#ifndef _DLL -#undef _DLLEXPORT_ -#define _DLLEXPORT_ -#endif - /* Define stream read functions for big endian systems. */ #ifdef BE_SYSTEM /* * These functions can read from the stream safely! * Swap endianity of input to little endian. */ -_INLINE_ int16_t RSHORT(const uint8_t *s) +static _INLINE_ int16_t RSHORT(const uint8_t *s) { return s[0] | (s[1] << 8); } -_INLINE_ uint16_t RUSHORT(const uint8_t *s) +static _INLINE_ uint16_t RUSHORT(const uint8_t *s) { return s[0] | (s[1] << 8); } -_INLINE_ int32_t RLONG(const uint8_t *s) +static _INLINE_ int32_t RLONG(const uint8_t *s) { return s[0] | (s[1] << 8) | (s[2] << 16) | (s[3] << 24); } -_INLINE_ uint32_t RULONG(const uint8_t *s) +static _INLINE_ uint32_t RULONG(const uint8_t *s) { return s[0] | (s[1] << 8) | (s[2] << 16) | (s[3] << 24); } +static _INLINE_ int64_t RLLONG(const uint8_t *s) +{ + return s[0] | (s[1] << 8) | (s[2] << 16) | (s[3] << 24) | ((uint64_t)s[4] << 32) | ((uint64_t)s[5] << 40) | ((uint64_t)s[6] << 48) | ((uint64_t)s[7] << 56); +} +static _INLINE_ uint64_t RULLONG(const uint8_t *s) +{ + return s[0] | (s[1] << 8) | (s[2] << 16) | (s[3] << 24) | ((uint64_t)s[4] << 32) | ((uint64_t)s[5] << 40) | ((uint64_t)s[6] << 48) | ((uint64_t)s[7] << 56); +} #else /* Little endian macro's will just make the cast. */ #define RSHORT(x) *(int16_t *)x #define RUSHORT(x) *(uint16_t *)x #define RLONG(x) *(int32_t *)x #define RULONG(x) *(uint32_t *)x +#define RLLONG(x) *(int64_t *)x +#define RULLONG(x) *(uint64_t *)x #endif #endif /* CONFIG_H */ diff --git a/src/decoder.h b/src/decoder.h index d9c46c6..baea228 100644 --- a/src/decoder.h +++ b/src/decoder.h @@ -1,8 +1,23 @@ /* decoder.h -Copyright (C) 2003-2008 Gil Dabah, http://ragestorm.net/distorm/ -This library is licensed under the BSD license. See the file COPYING. +diStorm3 - Powerful disassembler for X86/AMD64 +http://ragestorm.net/distorm/ +distorm at gmail dot com +Copyright (C) 2011 Gil Dabah + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see */ @@ -11,33 +26,8 @@ This library is licensed under the BSD license. See the file COPYING. #include "config.h" -#include "wstring.h" - typedef unsigned int _iflags; -/* DEFAULT instructions decoding mode. */ -typedef enum {Decode16Bits = 0, Decode32Bits = 1, Decode64Bits = 2} _DecodeType; - -typedef OFFSET_INTEGER _OffsetType; - -typedef struct { - _WString mnemonic; - _WString operands; - _WString instructionHex; - unsigned int size; - _OffsetType offset; -} _DecodedInst; - -typedef struct { - const uint8_t* code; - int codeLen; - _OffsetType codeOffset; -} _CodeInfo; - -typedef enum {DECRES_NONE, DECRES_SUCCESS, DECRES_MEMORYERR, DECRES_INPUTERR} _DecodeResult; -_DecodeResult internal_decode(_OffsetType codeOffset, const uint8_t* code, int codeLen, _DecodeType dt, _DecodedInst result[], unsigned int maxResultCount, unsigned int* usedEntriesCount); - -_DecodeType ADDR_SIZE_AFFECT(_DecodeType dt, _iflags totalPrefixes); -_DecodeType OP_SIZE_AFFECT(_DecodeType dt, _iflags totalPrefixes, unsigned int rex, _iflags instFlags); +_DecodeResult decode_internal(_CodeInfo* ci, int supportOldIntr, _DInst result[], unsigned int maxResultCount, unsigned int* usedInstructionsCount); #endif /* DECODER_H */ diff --git a/src/extconf_osx.rb b/src/extconf_osx.rb new file mode 100644 index 0000000..6555537 --- /dev/null +++ b/src/extconf_osx.rb @@ -0,0 +1,3 @@ +require 'mkmf' +$LDFLAGS << '-bundle -undefined suppress -flat_namespace -lruby /usr/local/lib/libdistorm3.dylib' +create_makefile('frasm') diff --git a/src/frasm.c b/src/frasm.c index c8283c9..7d28aa3 100644 --- a/src/frasm.c +++ b/src/frasm.c @@ -55,25 +55,25 @@ static VALUE _decode(int argc, VALUE *argv, VALUE self) } ret = rb_ary_new(); - str = (uint8_t *) RSTRING(buf)->ptr; - len = RSTRING(buf)->len; + str = (uint8_t *) RSTRING_PTR(buf); + len = RSTRING_LEN(buf); while(e != DECRES_SUCCESS) { VALUE insn = Qnil; u_char text[MAX_TEXT_SIZE*2]; int i = 0; - e = internal_decode(off, str, len, dect, insns, MAX_INSTRUCTIONS, &icnt); + e = distorm_decode(off, str, len, dect, insns, MAX_INSTRUCTIONS, &icnt); if((e == DECRES_MEMORYERR) && (icnt == 0)) break; for(i = 0; i < icnt; i++) { - if(insns[i].mnemonic.pos > 0) { - memcpy(text, insns[i].mnemonic.p, insns[i].mnemonic.pos + 1); - if(insns[i].operands.pos > 0) - text[insns[i].mnemonic.pos] = SP_CHR; - memcpy(&text[insns[i].mnemonic.pos+1], insns[i].operands.p, insns[i].operands.pos + 1); - text[insns[i].mnemonic.pos+1+insns[i].operands.pos+1] = 0; + if(insns[i].mnemonic.length > 0) { + memcpy(text, insns[i].mnemonic.p, insns[i].mnemonic.length + 1); + if(insns[i].operands.length > 0) + text[insns[i].mnemonic.length] = SP_CHR; + memcpy(&text[insns[i].mnemonic.length+1], insns[i].operands.p, insns[i].operands.length+ 1); + text[insns[i].mnemonic.length+1+insns[i].operands.length+1] = 0; } else text[0] = 0; diff --git a/src/instructions.h b/src/instructions.h index b7dab76..d6a85a2 100644 --- a/src/instructions.h +++ b/src/instructions.h @@ -1,8 +1,23 @@ /* instructions.h -Copyright (C) 2003-2008 Gil Dabah, http://ragestorm.net/distorm/ -This library is licensed under the BSD license. See the file COPYING. +diStorm3 - Powerful disassembler for X86/AMD64 +http://ragestorm.net/distorm/ +distorm at gmail dot com +Copyright (C) 2003-2012 Gil Dabah + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see */ @@ -10,10 +25,9 @@ This library is licensed under the BSD license. See the file COPYING. #define INSTRUCTIONS_H #include "config.h" - -#include "decoder.h" #include "prefix.h" + /* * Operand type possibilities: * Note "_FULL" suffix indicates to decode the operand as 16 bits or 32 bits depends on DecodeType - @@ -33,12 +47,21 @@ typedef enum OpType { /* Read a double-word(32 bits) immediate */ OT_IMM32, - /* Special immediate for two instructions, AAM, AAD which will output the byte only if it's not 0xa (base 10) */ - OT_IMM_AADM, - /* Read a signed extended byte(8 bits) immediate */ OT_SEIMM8, + /* + * Special immediates for instructions which have more than one immediate, + * which is an exception from standard instruction format. + * As to version v1.0: ENTER, INSERTQ, EXTRQ are the only problematic ones. + */ + /* 16 bits immediate using the first imm-slot */ + OT_IMM16_1, + /* 8 bits immediate using the first imm-slot */ + OT_IMM8_1, + /* 8 bits immediate using the second imm-slot */ + OT_IMM8_2, + /* Use a 8bit register */ OT_REG8, /* Use a 16bit register */ @@ -47,15 +70,11 @@ typedef enum OpType { OT_REG_FULL, /* Use a 32bit register */ OT_REG32, - /* MOVSXD uses 64 bits register */ - OT_REG64, /* * If used with REX the reg operand size becomes 64 bits, otherwise 32 bits. * VMX instructions are promoted automatically without a REX prefix. */ OT_REG32_64, - /* Extract a 32bit register from the RM field, used for instructions with register operands only */ - OT_REG32_RM, /* Used only by MOV CR/DR(n). Promoted with REX onlly. */ OT_FREG32_64_RM, @@ -65,8 +84,6 @@ typedef enum OpType { OT_RM16, /* Use or read a 16/32/64bit register or immediate word/dword/qword */ OT_RM_FULL, - /* Use or read a 32bit register or immediate dword */ - OT_RM32, /* * 32 or 64 bits (with REX) operand size indirection memory operand. * Some instructions are promoted automatically without a REX prefix. @@ -149,6 +166,8 @@ typedef enum OpType { /* Use general memory indirection, with varying sizes: */ OT_MEM, + /* Used when a memory indirection is required, but if the mod field is 11, this operand will be ignored. */ + OT_MEM_OPT, OT_MEM32, /* Memory dereference for MOVNTI, either 32 or 64 bits (with REX). */ OT_MEM32_64, @@ -157,8 +176,9 @@ typedef enum OpType { /* Used for cmpxchg8b/16b. */ OT_MEM64_128, - /* Read an immediate as an absolute address, size is known by instruction, used by MOV (offset) only */ - OT_MOFFS, + /* Read an immediate as an absolute address, size is known by instruction, used by MOV (memory offset) only */ + OT_MOFFS8, + OT_MOFFS_FULL, /* Use an immediate of 1, as for SHR R/M, 1 */ OT_CONST1, /* Use CL, as for SHR R/M, CL */ @@ -170,8 +190,6 @@ typedef enum OpType { * Use a 8bit register */ OT_IB_RB, - /* Use a 32 or 64bit (with REX) register, used by BSWAP */ - OT_IB_R_DW_QW, /* Use a 16/32/64bit register */ OT_IB_R_FULL, @@ -217,25 +235,52 @@ typedef enum OpType { /* Implied XMM0 register as operand, used in SSE4. */ OT_REGXMM0, - /* - * DUMMY for cases like CALL WORD [BX+DI], we would like to omit this "WORD". It's useless, - * because the DWORD/WORD/BYTE mechanism is being done automatically, we need some way to disable it in such cases... - */ - OT_DUMMY + /* AVX operands: */ + + /* ModR/M for 32 bits. */ + OT_RM32, + /* Reg32/Reg64 (prefix width) or Mem8. */ + OT_REG32_64_M8, + /* Reg32/Reg64 (prefix width) or Mem16. */ + OT_REG32_64_M16, + /* Reg32/Reg 64 depends on prefix width only. */ + OT_WREG32_64, + /* RM32/RM64 depends on prefix width only. */ + OT_WRM32_64, + /* XMM or Mem32/Mem64 depends on perfix width only. */ + OT_WXMM32_64, + /* XMM is encoded in VEX.VVVV. */ + OT_VXMM, + /* XMM is encoded in the high nibble of an immediate byte. */ + OT_XMM_IMM, + /* YMM/XMM is dependent on VEX.L. */ + OT_YXMM, + /* YMM/XMM (depends on prefix length) is encoded in the high nibble of an immediate byte. */ + OT_YXMM_IMM, + /* YMM is encoded in reg. */ + OT_YMM, + /* YMM or Mem256. */ + OT_YMM256, + /* YMM is encoded in VEX.VVVV. */ + OT_VYMM, + /* YMM/XMM is dependent on VEX.L, and encoded in VEX.VVVV. */ + OT_VYXMM, + /* YMM/XMM or Mem64/Mem256 is dependent on VEX.L. */ + OT_YXMM64_256, + /* YMM/XMM or Mem128/Mem256 is dependent on VEX.L. */ + OT_YXMM128_256, + /* XMM or Mem64/Mem256 is dependent on VEX.L. */ + OT_LXMM64_128, + /* Mem128/Mem256 is dependent on VEX.L. */ + OT_LMEM128_256 } _OpType; /* Flags for instruction: */ /* Empty flags indicator: */ -#define INST_FLAGS_NONE ((_iflags)-1) - -/* - * Explicitly define that the instruction doesn't require a ModRM byte. - * NOTE its value is 0! you can't do much with it, it is used for instructions that for sure don't use the ModR/M byte. - */ -#define INST_EXCLUDE_MODRM (0) -/* The instruction we are going to decode has a ModR/M byte. */ -#define INST_INCLUDE_MODRM (1) +#define INST_FLAGS_NONE (0) +/* The instruction we are going to decode requires ModR/M encoding. */ +#define INST_MODRM_REQUIRED (1) /* Special treatment for instructions which are in the divided-category but still needs the whole byte for ModR/M... */ #define INST_NOT_DIVIDED (1 << 1) /* @@ -246,7 +291,7 @@ typedef enum OpType { /* If the opcode is supported by 80286 and upper models (16/32 bits). */ #define INST_32BITS (1 << 3) /* - * Prefix flags (4 types: lock/rep, seg override, addr-size, oper-size) + * Prefix flags (6 types: lock/rep, seg override, addr-size, oper-size, REX, VEX) * There are several specific instructions that can follow LOCK prefix, * note that they must be using a memory operand form, otherwise they generate an exception. */ @@ -280,16 +325,16 @@ typedef enum OpType { /* Use fourth operand, means it's an _InstInfoEx structure, which contains another operand for special instructions. */ #define INST_USE_OP4 (1 << 18) /* The instruction's mnemonic depends on the mod value of the ModR/M byte (mod=11, mod!=11). */ -#define INST_MODRM_BASED (1 << 19) +#define INST_MNEMONIC_MODRM_BASED (1 << 19) /* The instruction uses a ModR/M byte which the MOD must be 11 (for registers operands only). */ -#define INST_MODRR (1 << 20) +#define INST_MODRR_REQUIRED (1 << 20) /* The way of 3DNow! instructions are built, we have to handle their locating specially. Suffix imm8 tells which instruction it is. */ #define INST_3DNOW_FETCH (1 << 21) /* The instruction needs two suffixes, one for the comparison type (imm8) and the second for its operation size indication (second mnemonic). */ #define INST_PSEUDO_OPCODE (1 << 22) /* Invalid instruction at 64 bits decoding mode. */ #define INST_INVALID_64BITS (1 << 23) -/* Specific instruction is can be promoted to 64 bits (without REX it is promoted automatically). */ +/* Specific instruction can be promoted to 64 bits (without REX, it is promoted automatically). */ #define INST_64BITS (1 << 24) /* Indicates the instruction must be REX prefixed in order to use 64 bits operands. */ #define INST_PRE_REX (1 << 25) @@ -297,68 +342,72 @@ typedef enum OpType { #define INST_USE_EXMNEMONIC2 (1 << 26) /* Instruction is only valid in 64 bits decoding mode. */ #define INST_64BITS_FETCH (1 << 27) +/* Forces that the ModRM-REG/Opcode field will be 0. (For EXTRQ). */ +#define INST_FORCE_REG0 (1 << 28) +/* Indicates that instruction is encoded with a VEX prefix. */ +#define INST_PRE_VEX (1 << 29) +/* Indicates that the instruction is encoded with a ModRM byte (REG field specifically). */ +#define INST_MODRM_INCLUDED (1 << 30) +/* Indicates that the first (/destination) operand of the instruction is writable. */ +#define INST_DST_WR (1 << 31) #define INST_PRE_REPS (INST_PRE_REPNZ | INST_PRE_REP) #define INST_PRE_LOKREP_MASK (INST_PRE_LOCK | INST_PRE_REPNZ | INST_PRE_REP) -#define INST_PRE_SEGOVRD_MASK (INST_PRE_CS | INST_PRE_SS | INST_PRE_DS | INST_PRE_ES | INST_PRE_FS | INST_PRE_GS) - -/* Instructions Set classes: */ -/* Indicates the instruction belongs to the General Integer set. */ -#define ISCT_INTEGER 1 -/* Indicates the instruction belongs to the 387 FPU set. */ -#define ISCT_FPU 2 -/* Indicates the instruction belongs to the P6 set. */ -#define ISCT_P6 3 -/* Indicates the instruction belongs to the MMX set. */ -#define ISCT_MMX 4 -/* Indicates the instruction belongs to the SSE set. */ -#define ISCT_SSE 5 -/* Indicates the instruction belongs to the SSE2 set. */ -#define ISCT_SSE2 6 -/* Indicates the instruction belongs to the SSE3 set. */ -#define ISCT_SSE3 7 -/* Indicates the instruction belongs to the SSSE3 set. */ -#define ISCT_SSSE3 8 -/* Indicates the instruction belongs to the SSE4.1 set. */ -#define ISCT_SSE4_1 9 -/* Indicates the instruction belongs to the SSE4.2 set. */ -#define ISCT_SSE4_2 10 -/* Indicates the instruction belongs to the AMD's SSE4.A set. */ -#define ISCT_SSE4_A 11 -/* Indicates the instruction belongs to the 3DNow! set. */ -#define ISCT_3DNOW 12 -/* Indicates the instruction belongs to the 3DNow! Extensions set. */ -#define ISCT_3DNOWEXT 13 -/* Indicates the instruction belongs to the VMX (Intel) set. */ -#define ISCT_VMX 14 -/* Indicates the instruction belongs to the SVM (AMD) set. */ -#define ISCT_SVM 15 +#define INST_PRE_SEGOVRD_MASK32 (INST_PRE_CS | INST_PRE_SS | INST_PRE_DS | INST_PRE_ES) +#define INST_PRE_SEGOVRD_MASK64 (INST_PRE_FS | INST_PRE_GS) +#define INST_PRE_SEGOVRD_MASK (INST_PRE_SEGOVRD_MASK32 | INST_PRE_SEGOVRD_MASK64) + +/* Extended flags for VEX: */ +/* Indicates that the instruction might have VEX.L encoded. */ +#define INST_VEX_L (1) +/* Indicates that the instruction might have VEX.W encoded. */ +#define INST_VEX_W (1 << 1) +/* Indicates that the mnemonic of the instruction is based on the VEX.W bit. */ +#define INST_MNEMONIC_VEXW_BASED (1 << 2) +/* Indicates that the mnemonic of the instruction is based on the VEX.L bit. */ +#define INST_MNEMONIC_VEXL_BASED (1 << 3) +/* Forces the instruction to be encoded with VEX.L, otherwise it's undefined. */ +#define INST_FORCE_VEXL (1 << 4) +/* + * Indicates that the instruction is based on the MOD field of the ModRM byte. + * (MOD==11: got the right instruction, else skip +4 in prefixed table for the correct instruction). + */ +#define INST_MODRR_BASED (1 << 5) +/* Indicates that the instruction doesn't use the VVVV field of the VEX prefix, if it does then it's undecodable. */ +#define INST_VEX_V_UNUSED (1 << 6) /* * Indicates which operand is being decoded. * Destination (1st), Source (2nd), op3 (3rd), op4 (4th). - * Its main purpose to help the decode-operands function know whether its the first operand (+ it's indirection + there's a lock prefix). + * Used to set the operands' fields in the _DInst structure! */ -typedef enum {ONT_NONE = -1, ONT_1, ONT_2, ONT_3, ONT_4} _OperandNumberType; - -#define MAX_MNEMONIC_LENGTH (32) +typedef enum {ONT_NONE = -1, ONT_1 = 0, ONT_2 = 1, ONT_3 = 2, ONT_4 = 3} _OperandNumberType; -#ifdef _MSC_VER - #pragma pack(push, 1) -#endif +/* + * In order to save more space for storing the DB statically, + * I came up with another level of shared info. + * Because I saw that most of the information that instructions use repeats itself. + * + * Info about the instruction, source/dest types, meta and flags. + * _InstInfo points to a table of _InstSharedInfo. + */ +typedef struct { + uint8_t flagsIndex; /* An index into FlagsTables */ + uint8_t s, d; /* OpType. */ + uint8_t meta; /* Hi 5 bits = Instruction set class | Lo 3 bits = flow control flags. */ + /* The following are CPU flag masks that the instruction changes. */ + uint8_t modifiedFlags; + uint8_t testedFlags; + uint8_t undefinedFlags; +} _InstSharedInfo; /* - * Info about the instruction, source/dest types, its name in text and flags. * This structure is used for the instructions DB and NOT for the disassembled result code! * This is the BASE structure, there are extentions to this structure below. */ - -typedef struct _PACKED_ { - uint8_t type; - uint8_t isc; - uint8_t s, d; /* OpType */ - int8_t* mnemonic; - _iflags flags; +typedef struct { + uint16_t sharedIndex; /* An index into the SharedInfoTable. */ + uint16_t opcodeId; /* The opcodeId is really a byte-offset into the mnemonics table. */ } _InstInfo; /* @@ -371,55 +420,35 @@ typedef struct _PACKED_ { * therefore, I decided to make the extended structure contain all extra info in the same structure. * There are a few instructions (SHLD/SHRD/IMUL and SSE too) which use third operand (or a fourth). * A flag will indicate it uses a third/fourth operand. - * */ -typedef struct _PACKED_ { - uint8_t type; - uint8_t isc; - uint8_t s, d; /* OpType */ - int8_t* mnemonic; - _iflags flags; - uint8_t op3, op4; /* OpType */ - int8_t* mnemonic2; - int8_t* mnemonic3; +typedef struct { + /* Base structure (doesn't get accessed directly from code). */ + _InstInfo BASE; + + /* Extended starts here. */ + uint8_t flagsEx; /* 8 bits are enough, in the future we might make it a bigger integer. */ + uint8_t op3, op4; /* OpType. */ + uint16_t opcodeId2, opcodeId3; } _InstInfoEx; /* Trie data structure node type: */ typedef enum { - INT_NOTEXISTS = -1, /* Not exists (this is used for a return code only). */ - INT_NONE, /* No instruction info or list set. */ - INT_INFO, /* It's an instruction info. */ + INT_NOTEXISTS = 0, /* Not exists. */ + INT_INFO = 1, /* It's an instruction info. */ + INT_INFOEX, INT_LIST_GROUP, INT_LIST_FULL, - INT_LIST_DIVIDED + INT_LIST_DIVIDED, + INT_LIST_PREFIXED } _InstNodeType; -/* - * A node in the instructions DB; - * Can be both a node or an info, depends on type. - */ -typedef struct _PACKED_ InstNode{ - uint8_t type; - uint8_t* ids; - _InstInfo** list; /* The second level might point to _InstNode, this is determined by type in runtime. */ -} _InstNode; - -#ifdef _MSC_VER - #pragma pack(pop) -#endif - -typedef enum {OPERAND_SIZE_NONE = 0, OPERAND_SIZE8, OPERAND_SIZE16, OPERAND_SIZE32, OPERAND_SIZE64, OPERAND_SIZE80, OPERAND_SIZE128} _OperandSizeType; - -/* - * Used for letting the extract operand know the type of operands without knowing the - * instruction itself yet, because of the way those instructions work. - */ -extern _InstInfo II_3dnow; +/* Used to check instType < INT_INFOS, means we got an inst-info. Cause it has to be only one of them. */ +#define INT_INFOS (INT_LIST_GROUP) -_InstInfo* locate_inst(const uint8_t** code, int* codeLen, _OffsetType* codeOffset, _WString* instructionHex, _PrefixState* ps, _DecodeType dt); -_InstInfo* locate_3dnow_inst(_CodeInfo* ci, _WString* instructionHex); +/* Instruction node is treated as { int index:13; int type:3; } */ +typedef uint16_t _InstNode; -/* Concatenates a text describing the size used for indirections form. (MOV *WORD* [BX], 0x12) when it's not cleared from operands. */ -void str_indirection_text(_WString* s, _OperandSizeType opSize); +_InstInfo* inst_lookup(_CodeInfo* ci, _PrefixState* ps); +_InstInfo* inst_lookup_3dnow(_CodeInfo* ci); #endif /* INSTRUCTIONS_H */ diff --git a/src/insts.h b/src/insts.h index 0d4b862..d985e48 100644 --- a/src/insts.h +++ b/src/insts.h @@ -1,8 +1,23 @@ /* insts.h -Copyright (C) 2003-2008 Gil Dabah, http://ragestorm.net/distorm/ -This library is licensed under the BSD license. See the file COPYING. +diStorm3 - Powerful disassembler for X86/AMD64 +http://ragestorm.net/distorm/ +distorm at gmail dot com +Copyright (C) 2003-2012 Gil Dabah + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see */ @@ -11,15 +26,45 @@ This library is licensed under the BSD license. See the file COPYING. #include "instructions.h" + +/* Flags Table */ +extern _iflags FlagsTable[]; + /* Root Trie DB */ -extern _InstNode Instructions; +extern _InstSharedInfo InstSharedInfoTable[]; +extern _InstInfo InstInfos[]; +extern _InstInfoEx InstInfosEx[]; +extern _InstNode InstructionsTree[]; + /* 3DNow! Trie DB */ extern _InstNode Table_0F_0F; -/* NOP/XCHG instruction. */ -extern _InstInfo II_90; -/* LEA instruction. */ -extern _InstInfo II_8D; +/* AVX related: */ +extern _InstNode Table_0F, Table_0F_38, Table_0F_3A; + +/* + * The inst_lookup will return on of these two instructions according to the specified decoding mode. + * ARPL or MOVSXD on 64 bits is one byte instruction at index 0x63. + */ +extern _InstInfo II_ARPL; +extern _InstInfo II_MOVSXD; + +/* + * The NOP instruction can be prefixed by REX in 64bits, therefore we have to decide in runtime whether it's an XCHG or NOP instruction. + * If 0x90 is prefixed by a useable REX it will become XCHG, otherwise it will become a NOP. + * Also note that if it's prefixed by 0xf3, it becomes a Pause. + */ +extern _InstInfo II_NOP; +extern _InstInfo II_PAUSE; + +/* + * Used for letting the extract operand know the type of operands without knowing the + * instruction itself yet, because of the way those instructions work. + * See function instructions.c!inst_lookup_3dnow. + */ +extern _InstInfo II_3DNOW; -/* See instructions.cpp for more info. */ +/* Helper tables for pesudo compare mnemonics. */ +extern uint16_t CmpMnemonicOffsets[8]; /* SSE */ +extern uint16_t VCmpMnemonicOffsets[32]; /* AVX */ #endif /* INSTS_H */ diff --git a/src/operands.h b/src/operands.h index e9b5c70..f0ae04f 100644 --- a/src/operands.h +++ b/src/operands.h @@ -1,8 +1,23 @@ /* operands.h -Copyright (C) 2003-2008 Gil Dabah, http://ragestorm.net/distorm/ -This library is licensed under the BSD license. See the file COPYING. +diStorm3 - Powerful disassembler for X86/AMD64 +http://ragestorm.net/distorm/ +distorm at gmail dot com +Copyright (C) 2003-2012 Gil Dabah + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see */ @@ -10,18 +25,16 @@ This library is licensed under the BSD license. See the file COPYING. #define OPERANDS_H #include "config.h" - #include "decoder.h" #include "prefix.h" -#include "wstring.h" #include "instructions.h" -/* Return codes from extract_operand. */ -typedef enum {EO_HALT, EO_CONTINUE, EO_SUFFIX} _ExOpRCType; -_ExOpRCType extract_operand(_CodeInfo* ci, - _WString* instructionHex, _WString* operandText, _OpType type, _OpType op2, - _OperandNumberType opNum, _iflags instFlags, unsigned int modrm, - _PrefixState* ps, _DecodeType dt, int* lockableInstruction); +extern uint16_t _REGISTERTORCLASS[]; + +int operands_extract(_CodeInfo* ci, _DInst* di, _InstInfo* ii, + _iflags instFlags, _OpType type, _OperandNumberType opNum, + unsigned int modrm, _PrefixState* ps, _DecodeType effOpSz, + _DecodeType effAdrSz, int* lockableInstruction); #endif /* OPERANDS_H */ diff --git a/src/prefix.h b/src/prefix.h index b917ecb..ac65905 100644 --- a/src/prefix.h +++ b/src/prefix.h @@ -1,8 +1,23 @@ /* prefix.h -Copyright (C) 2003-2008 Gil Dabah, http://ragestorm.net/distorm/ -This library is licensed under the BSD license. See the file COPYING. +diStorm3 - Powerful disassembler for X86/AMD64 +http://ragestorm.net/distorm/ +distorm at gmail dot com +Copyright (C) 2003-2012 Gil Dabah + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see */ @@ -10,41 +25,52 @@ This library is licensed under the BSD license. See the file COPYING. #define PREFIX_H #include "config.h" - #include "decoder.h" -#include "wstring.h" + + +/* Specifies the type of the extension prefix, such as: REX, 2 bytes VEX, 3 bytes VEX. */ +typedef enum {PET_NONE = 0, PET_REX, PET_VEX2BYTES, PET_VEX3BYTES} _PrefixExtType; + +/* Specifies an index into a table of prefixes by their type. */ +typedef enum {PFXIDX_NONE = -1, PFXIDX_REX, PFXIDX_LOREP, PFXIDX_SEG, PFXIDX_OP_SIZE, PFXIDX_ADRS, PFXIDX_MAX} _PrefixIndexer; /* * This holds the prefixes state for the current instruction we decode. -* totalPrefixes includes all specific prefixes that the instruction got. +* decodedPrefixes includes all specific prefixes that the instruction got. * start is a pointer to the first prefix to take into account. * last is a pointer to the last byte we scanned. * Other pointers are used to keep track of prefixes positions and help us know if they appeared already and where. */ typedef struct { - unsigned int unusedCount, specialPrefixesSize, isREXPrefixValid; - _iflags totalPrefixes, usedPrefixes; - const uint8_t *lokrepPos, *segovrdPos, *opsizePos, *addrsizePos, *rexpos, *start, *last; -} _PrefixState; + _iflags decodedPrefixes, usedPrefixes; + const uint8_t *start, *last, *vexPos, *rexPos; + _PrefixExtType prefixExtType; + uint16_t unusedPrefixesMask; + /* Indicates whether the operand size prefix (0x66) was used as a mandatory prefix. */ + int isOpSizeMandatory; + /* If VEX prefix is used, store the VEX.vvvv field. */ + unsigned int vexV; + /* The fields B/X/R/W/L of REX and VEX are stored together in this byte. */ + unsigned int vrex; -/* Describe the type of the prefix and which one it was in a flag manner. */ -typedef enum {PRE_NONE = -1, PRE_LOKREP, PRE_SEGOVRD, PRE_OPSIZE, PRE_ADDRSIZE, PRE_REX} _PrefixType; + /* !! Make sure pfxIndexer is LAST! Otherwise memset won't work well with it. !! */ -typedef struct { - _iflags flag; - _PrefixType type; -} _PrefixInfo; + /* Holds the offset to the prefix byte by its type. */ + int pfxIndexer[PFXIDX_MAX]; +} _PrefixState; /* -* Intel supports 4 types of prefixes, whereas AMD supports 5 types (lock is seperated from rep/nz). +* Intel supports 6 types of prefixes, whereas AMD supports 5 types (lock is seperated from rep/nz). * REX is the fifth prefix type, this time I'm based on AMD64. +* VEX is the 6th, though it can't be repeated. */ #define MAX_PREFIXES (5) -int is_prefix(unsigned int ch, _DecodeType dt); -void decode_prefixes(const uint8_t* code, int codeLen, _PrefixState* ps, _DecodeType dt); -void get_unused_prefixes_list(uint8_t unusedList[MAX_PREFIXES], _PrefixState* ps); - -void str_seg_text(_WString* s, _PrefixState* ps, _DecodeType dt); +int prefixes_is_valid(unsigned int ch, _DecodeType dt); +void prefixes_ignore(_PrefixState* ps, _PrefixIndexer pi); +void prefixes_ignore_all(_PrefixState* ps); +uint16_t prefixes_set_unused_mask(_PrefixState* ps); +void prefixes_decode(const uint8_t* code, int codeLen, _PrefixState* ps, _DecodeType dt); +void prefixes_use_segment(_iflags defaultSeg, _PrefixState* ps, _DecodeType dt, _DInst* di); #endif /* PREFIX_H */ diff --git a/src/textdefs.h b/src/textdefs.h index 3e6704e..7a8f9e1 100644 --- a/src/textdefs.h +++ b/src/textdefs.h @@ -1,8 +1,23 @@ /* textdefs.h -Copyright (C) 2003-2008 Gil Dabah, http://ragestorm.net/distorm/ -This library is licensed under the BSD license. See the file COPYING. +diStorm3 - Powerful disassembler for X86/AMD64 +http://ragestorm.net/distorm/ +distorm at gmail dot com +Copyright (C) 2003-2012 Gil Dabah + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see */ @@ -10,18 +25,14 @@ This library is licensed under the BSD license. See the file COPYING. #define TEXTDEFS_H #include "config.h" - #include "wstring.h" +#ifndef DISTORM_LIGHT + #define PLUS_DISP_CHR '+' #define MINUS_DISP_CHR '-' -/* CHR0 is used for FPU, ST(i) register */ -#define OPEN_CHR0 '(' -#define CLOSE_CHR0 ')' #define OPEN_CHR '[' #define CLOSE_CHR ']' -extern int8_t SEP_STR[3]; -#define SEP_CHR ',' #define SP_CHR ' ' #define SEG_OFF_CHR ':' @@ -35,12 +46,10 @@ Naming Convention: * code - means the function is used for disassembled instruction - Big Endian output. * off - means the function is used for 64bit offset - Big Endian output. -* sp - space character in front of the string. * h - '0x' in front of the string. * b - byte -* w - word -* dw - double word +* dw - double word (can be used for word also) * qw - quad word * all numbers are in HEX. @@ -48,28 +57,15 @@ Naming Convention: extern int8_t TextBTable[256][4]; -_INLINE_ int8_t* get_hex_b(unsigned int x) -{ - return &TextBTable[x & 255][1]; /* Skip space character. */ -} - void _FASTCALL_ str_hex_b(_WString* s, unsigned int x); void _FASTCALL_ str_code_hb(_WString* s, unsigned int x); - -void _FASTCALL_ str_hex_sp_b(_WString* s, unsigned int x); -void _FASTCALL_ str_code_sp_hb(_WString* s, unsigned int x); - -void _FASTCALL_ str_hex_sp_w(_WString* s, unsigned int x); -void _FASTCALL_ str_code_hw(_WString* s, unsigned int x); - -void _FASTCALL_ str_hex_sp_dw(_WString* s, uint32_t x); void _FASTCALL_ str_code_hdw(_WString* s, uint32_t x); - -void _FASTCALL_ str_hex_sp_qw(_WString* s, uint8_t src[8]); void _FASTCALL_ str_code_hqw(_WString* s, uint8_t src[8]); #ifdef SUPPORT_64BIT_OFFSET void _FASTCALL_ str_off64(_WString* s, OFFSET_INTEGER x); #endif +#endif /* DISTORM_LIGHT */ + #endif /* TEXTDEFS_H */ diff --git a/src/wstring.h b/src/wstring.h index 067c4a8..81d3050 100644 --- a/src/wstring.h +++ b/src/wstring.h @@ -1,8 +1,23 @@ /* wstring.h -Copyright (C) 2003-2008 Gil Dabah, http://ragestorm.net/distorm/ -This library is licensed under the BSD license. See the file COPYING. +diStorm3 - Powerful disassembler for X86/AMD64 +http://ragestorm.net/distorm/ +distorm at gmail dot com +Copyright (C) 2003-2012 Gil Dabah + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see */ @@ -11,38 +26,22 @@ This library is licensed under the BSD license. See the file COPYING. #include "config.h" -/* Make sure the buffer isn't overflowed. */ -#define MAX_TEXT_SIZE (60) +#ifndef DISTORM_LIGHT -typedef struct { - unsigned int pos; - int8_t p[MAX_TEXT_SIZE]; -} _WString; +void strclear_WS(_WString* s); +void chrcat_WS(_WString* s, uint8_t ch); +void strcpylen_WS(_WString* s, const int8_t* buf, unsigned int len); +void strcatlen_WS(_WString* s, const int8_t* buf, unsigned int len); +void strcat_WS(_WString* s, const _WString* s2); /* * Warning, this macro should be used only when the compiler knows the size of string in advance! * This macro is used in order to spare the call to strlen when the strings are known already. * Note: sizeof includes NULL terminated character. */ -#define strcat_WSN(s, t) strcatlen_WS((s), (t), sizeof((t))-1) -#define strcpy_WSN(s, t) strcpylen_WS((s), (t), sizeof((t))-1) - -void _FASTCALL_ strcpy_WS(_WString* s, const int8_t* buf); -void _FASTCALL_ strcpylen_WS(_WString* s, const int8_t* buf, unsigned int len); -void _FASTCALL_ strcatlen_WS(_WString* s, const int8_t* buf, unsigned int len); - -_INLINE_ void strclear_WS(_WString* s) -{ - s->p[0] = '\0'; - s->pos = 0; -} - -_INLINE_ void chrcat_WS(_WString* s, uint8_t ch) -{ - s->p[s->pos] = ch; - s->p[s->pos + 1] = '\0'; - s->pos += 1; -} +#define strcat_WSN(s, t) strcatlen_WS((s), ((const int8_t*)t), sizeof((t))-1) +#define strcpy_WSN(s, t) strcpylen_WS((s), ((const int8_t*)t), sizeof((t))-1) +#endif /* DISTORM_LIGHT */ #endif /* WSTRING_H */ diff --git a/src/x86defs.h b/src/x86defs.h index 22e9679..8e36633 100644 --- a/src/x86defs.h +++ b/src/x86defs.h @@ -1,100 +1,59 @@ /* x86defs.h -Copyright (C) 2003-2008 Gil Dabah, http://ragestorm.net/distorm/ -This library is licensed under the BSD license. See the file COPYING. +diStorm3 - Powerful disassembler for X86/AMD64 +http://ragestorm.net/distorm/ +distorm at gmail dot com +Copyright (C) 2003-2012 Gil Dabah + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see */ #ifndef X86DEFS_H #define X86DEFS_H -#include "config.h" - -#include "textdefs.h" -#include "insts.h" #define SEG_REGS_MAX (6) #define CREGS_MAX (9) #define DREGS_MAX (8) -/* - * Definitions use this structure for faster copying. - * str_x86def relies on this size, copying aligned dwords. - */ -#define DEF_TEXT_SIZE (8) -typedef struct { - unsigned int size; - int8_t p[DEF_TEXT_SIZE]; -} _DefText; - -extern _DefText _CONDITIONS[16]; -extern _DefText _CONDITIONS_PSEUDO[8]; -extern _DefText _CREGS[CREGS_MAX]; -extern _DefText _DREGS[DREGS_MAX]; -extern _DefText _SREGS[SEG_REGS_MAX]; -extern _DefText _REGS8[16]; -extern _DefText _REGS8_REX[16]; -extern _DefText _REGS16[16]; -extern _DefText _REGS32[16]; -extern _DefText _REGS64[16]; -extern _DefText _REGSMMX[8]; -extern _DefText _REGSSSE[16]; -extern _DefText _MODS16[8]; -extern _DefText _MODS32[16]; -extern _DefText _MODS64[16]; -extern _DefText _BASE32[16]; -extern _DefText _BASE64[16]; -extern _DefText _INDEX32[16]; -extern _DefText _INDEX64[16]; - -extern _DefText _SCALE32[4]; - -extern int8_t FPU_STACK_TEXT[3]; -extern int8_t ONE_CONST_TEXT[4]; -extern int8_t REG_RIP_TEXT[4]; - -extern int8_t BYTE_UNDEFINED[3]; -extern int8_t TEXT_8_BITS[6]; -extern int8_t TEXT_16_BITS[6]; -extern int8_t TEXT_32_BITS[7]; -extern int8_t TEXT_64_BITS[7]; -extern int8_t TEXT_80_BITS[7]; -extern int8_t TEXT_128_BITS[8]; - -extern int8_t PREFIX_LOCK_TEXT[6]; -extern int8_t PREFIX_REP_TEXT[5]; -extern int8_t PREFIX_REPNZ_TEXT[7]; -extern int8_t PREFIX_CS_TEXT[3]; -extern int8_t PREFIX_SS_TEXT[3]; -extern int8_t PREFIX_DS_TEXT[3]; -extern int8_t PREFIX_ES_TEXT[3]; -extern int8_t PREFIX_FS_TEXT[3]; -extern int8_t PREFIX_GS_TEXT[3]; - -extern int8_t SUFFIX_SIZE_BYTE; -extern int8_t SUFFIX_SIZE_WORD; -extern int8_t SUFFIX_SIZE_DWORD; -extern int8_t SUFFIX_SIZE_QWORD; - -extern int8_t SHORT_OPERAND[7]; -extern int8_t SMALL_OPERAND[7]; -extern int8_t LARGE_OPERAND[7]; - /* Maximum instruction size, including prefixes */ #define INST_MAXIMUM_SIZE (15) -/* Maximum range of imm8 (comparison type) of special SSE instructions. */ +/* Maximum range of imm8 (comparison type) of special SSE CMP instructions. */ #define INST_CMP_MAX_RANGE (8) -/* Wait instruction byte code */ -#define WAIT_INSTRUCTION_CODE (0x9b) -extern int8_t WAIT_INSTRUCTION_MNEMONIC[5]; +/* Maximum range of imm8 (comparison type) of special AVX VCMP instructions. */ +#define INST_VCMP_MAX_RANGE (32) + +/* Wait instruction byte code. */ +#define INST_WAIT_INDEX (0x9b) + +/* Lea instruction byte code. */ +#define INST_LEA_INDEX (0x8d) + +/* NOP/XCHG instruction byte code. */ +#define INST_NOP_INDEX (0x90) + +/* ARPL/MOVSXD instruction byte code. */ +#define INST_ARPL_INDEX (0x63) /* -* Minimal MODR/M value of divided instructions. -* It's 0xc0, two msb bits set, which indicates a general purpose register is used too. -*/ + * Minimal MODR/M value of divided instructions. + * It's 0xc0, two MSBs set, which indicates a general purpose register is used too. + */ #define INST_DIVIDED_MODRM (0xc0) /* This is the escape byte value used for 3DNow! instructions. */ @@ -111,47 +70,25 @@ extern int8_t WAIT_INSTRUCTION_MNEMONIC[5]; #define PREFIX_GS (0x65) #define PREFIX_OP_SIZE (0x66) #define PREFIX_ADDR_SIZE (0x67) +#define PREFIX_VEX2b (0xc5) +#define PREFIX_VEX3b (0xc4) /* REX prefix value range, 64 bits mode decoding only. */ #define PREFIX_REX_LOW (0x40) #define PREFIX_REX_HI (0x4f) -/* In otder to use the extended GPR's we have to add 8 to the Modr/M info values. */ -#define REX_GPR_BASE (8) +/* In order to use the extended GPR's we have to add 8 to the Modr/M info values. */ +#define EX_GPR_BASE (8) -/* Mask for REX features: */ +/* Mask for REX and VEX features: */ /* Base */ -#define PREFIX_REX_B (1) +#define PREFIX_EX_B (1) /* Index */ -#define PREFIX_REX_X (2) +#define PREFIX_EX_X (2) /* Register */ -#define PREFIX_REX_R (4) +#define PREFIX_EX_R (4) /* Operand Width */ -#define PREFIX_REX_W (8) - -/* - * The locate_inst will return on of these two instructions according to the specified decoding mode. - * ARPL or MOVSXD on 64 bits is one byte instruction at index 0x63. - */ -#define INST_ARPL_INDEX (0x63) -extern _InstInfo II_arpl; -extern _InstInfoEx II_movsxd; - -/* - * The NOP instruction can be prefixed by REX in 64bits, therefore we have to decide in runtime whether it's an XCHG or NOP instruction. - * If 0x90 is prefixed by a useable REX it will become XCHG, otherwise it will become a NOP. - */ -#define INST_NOP_INDEX (0x90) -extern _InstInfo II_nop; - -_INLINE_ void str_x86def(_WString* s, _DefText* d) -{ - /* - * Copy 2 aligned dwords to speed up things. - * _WString should have that extra space, most of the times it will simply copy null-termianting characters. - */ - *(int32_t*)&s->p[s->pos] = *(int32_t*)d->p; - *(int32_t*)&s->p[s->pos + sizeof(int32_t)] = *(int32_t*)&d->p[sizeof(int32_t)]; - s->pos += d->size; -} +#define PREFIX_EX_W (8) +/* Vector Lengh */ +#define PREFIX_EX_L (0x10) #endif /* X86DEFS_H */ From 35a5d00666becccc5e52428516f58e56e2937385 Mon Sep 17 00:00:00 2001 From: "ronp@winter" Date: Thu, 22 Nov 2012 12:00:10 +0200 Subject: [PATCH 2/2] update to build correctly on linux as well; updated sample makefiles and readme files --- README | 8 ++++---- linux/Makefile | 2 +- osx/Makefile | 2 +- src/extconf.rb | 1 + src/extconf_osx.rb | 3 --- 5 files changed, 7 insertions(+), 9 deletions(-) delete mode 100644 src/extconf_osx.rb diff --git a/README b/README index 7f2d8a5..46d4d42 100644 --- a/README +++ b/README @@ -4,21 +4,21 @@ The frasm binding was originally written by Tom Ptacek and has since been update frasm requires you have built and installed libdistorm -Distorm comes with pystorm by default so when you create distorm64.{so,bundle,dll} be sure to compile like so: +Distorm comes with pystorm by default so when you create distorm3.{so,bundle,dll} be sure to compile like so: 'make clib' otherwise ruby will throw an error due to undefined python symbols -make sure you have the distorm3.so/distorm3.dylib under your search path (the OSX generated makefile looks for /usr/local/lib/distorm3.dylib) +make sure you have the libdistorm3.so (linux) / distorm3.dylib (mac) under your search path --------------- Compiling Frasm --------------- cd src/ -ruby extconf.rb (for OSX: ruby extconf_osx.rb) +ruby extconf.rb make make install (may require sudo) -The directories linux/ osx/ and win32 also have manually created Makefiles if extconf.rb fails you +The directories linux/ osx/ and win32 also have manually created Makefiles if extconf.rb fails you, will need tweaking to work with your local paths and rubies ------------------- Example frasm usage diff --git a/linux/Makefile b/linux/Makefile index fb7ba56..69d4a15 100644 --- a/linux/Makefile +++ b/linux/Makefile @@ -1,5 +1,5 @@ CC=gcc -LDFLAGS=-shared -lruby1.8 /usr/local/lib/libdistorm64.so +LDFLAGS=-shared -lruby1.8 /usr/local/lib/libdistorm3.so CFLAGS=-ggdb -Wall -DSUPPORT_64BIT_OFFSET -I/usr/lib/ruby/1.8/i486-linux/ OBJECTS=../src/frasm.o diff --git a/osx/Makefile b/osx/Makefile index 628df1d..4f70253 100644 --- a/osx/Makefile +++ b/osx/Makefile @@ -1,5 +1,5 @@ CC=gcc -LDFLAGS=-bundle -undefined suppress -flat_namespace -lruby /usr/local/lib/libdistorm64.dylib +LDFLAGS=-bundle -undefined suppress -flat_namespace -lruby /usr/local/lib/libdistorm3.dylib CFLAGS=-ggdb -Wall -DSUPPORT_64BIT_OFFSET -I/opt/local/lib/ruby/1.8/i686-darwin9/ OBJECTS=../src/frasm.o diff --git a/src/extconf.rb b/src/extconf.rb index 8018a8d..8efeb77 100644 --- a/src/extconf.rb +++ b/src/extconf.rb @@ -1,2 +1,3 @@ require 'mkmf' +have_library('distorm3') create_makefile('frasm') diff --git a/src/extconf_osx.rb b/src/extconf_osx.rb deleted file mode 100644 index 6555537..0000000 --- a/src/extconf_osx.rb +++ /dev/null @@ -1,3 +0,0 @@ -require 'mkmf' -$LDFLAGS << '-bundle -undefined suppress -flat_namespace -lruby /usr/local/lib/libdistorm3.dylib' -create_makefile('frasm')