/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /** * @author Alexander V. Astapchuk */ #include "enc_base.h" //#include #include #define USE_ENCODER_DEFINES #include "enc_prvt.h" #include //#define JET_PROTO #ifdef JET_PROTO #include "dec_base.h" #include "jvmti_dasm.h" #endif ENCODER_NAMESPACE_START /** * @file * @brief Main encoding routines and structures. */ #ifndef _WIN32 #define strcmpi strcasecmp #endif int EncoderBase::dummy = EncoderBase::buildTable(); const unsigned char EncoderBase::size_hash[OpndSize_64+1] = { // 0xFF, // OpndSize_Null = 0, 3, // OpndSize_8 = 0x1, 2, // OpndSize_16 = 0x2, 0xFF, // 0x3 1, // OpndSize_32 = 0x4, 0xFF, // 0x5 0xFF, // 0x6 0xFF, // 0x7 0, // OpndSize_64 = 0x8, // }; const unsigned char EncoderBase::kind_hash[OpndKind_Mem+1] = { // //gp reg -> 000 = 0 //memory -> 001 = 1 //immediate -> 010 = 2 //xmm reg -> 011 = 3 //segment regs -> 100 = 4 //fp reg -> 101 = 5 //mmx reg -> 110 = 6 // 0xFF, // 0 OpndKind_Null=0, 0<<2, // 1 OpndKind_GPReg = // OpndKind_MinRegKind=0x1, 4<<2, // 2 OpndKind_SReg=0x2, #ifdef _HAVE_MMX_ 6<<2, // 3 #else 0xFF, // 3 #endif 5<<2, // 4 OpndKind_FPReg=0x4, 0xFF, 0xFF, 0xFF, // 5, 6, 7 3<<2, // OpndKind_XMMReg=0x8, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 9, 0xA, 0xB, 0xC, 0xD, // 0xE, 0xF 0xFF, // OpndKind_MaxRegKind = // OpndKind_StatusReg = // OpndKind_OtherReg=0x10, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 0x11-0x18 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 0x19-0x1F 2<<2, // OpndKind_Immediate=0x20, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 0x21-0x28 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 0x29-0x30 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 0x31-0x38 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 0x39-0x3F 1<<2, // OpndKind_Memory=0x40 }; char * EncoderBase::curRelOpnd[3]; char* EncoderBase::encode_aux(char* stream, unsigned aux, const Operands& opnds, const OpcodeDesc * odesc, unsigned * pargsCount, Rex * prex) { const unsigned byte = aux; OpcodeByteKind kind = (OpcodeByteKind)(byte & OpcodeByteKind_KindMask); // The '>>' here is to force the switch to be table-based) instead of // set of CMP+Jcc. if (*pargsCount >= COUNTOF(opnds)) { assert(false); return stream; } switch(kind>>8) { case OpcodeByteKind_SlashR>>8: // /r - Indicates that the ModR/M byte of the instruction contains // both a register operand and an r/m operand. { assert(opnds.count() > 1); // not true anymore for MOVQ xmm<->r //assert((odesc->opnds[0].kind & OpndKind_Mem) || // (odesc->opnds[1].kind & OpndKind_Mem)); unsigned memidx = odesc->opnds[0].kind & OpndKind_Mem ? 0 : 1; unsigned regidx = memidx == 0 ? 1 : 0; memidx += *pargsCount; regidx += *pargsCount; ModRM& modrm = *(ModRM*)stream; if (memidx >= COUNTOF(opnds) || regidx >= COUNTOF(opnds)) { assert(false); break; } if (opnds[memidx].is_mem()) { stream = encodeModRM(stream, opnds, memidx, odesc, prex); } else { modrm.mod = 3; // 11 modrm.rm = getHWRegIndex(opnds[memidx].reg()); #ifdef _EM64T_ if (opnds[memidx].need_rex() && needs_rex_r(opnds[memidx].reg())) { prex->b = 1; } #endif ++stream; } modrm.reg = getHWRegIndex(opnds[regidx].reg()); #ifdef _EM64T_ if (opnds[regidx].need_rex() && needs_rex_r(opnds[regidx].reg())) { prex->r = 1; } #endif *pargsCount += 2; } break; case OpcodeByteKind_SlashNum>>8: // /digit - A digit between 0 and 7 indicates that the // ModR/M byte of the instruction uses only the r/m // (register or memory) operand. The reg field contains // the digit that provides an extension to the instruction's // opcode. { const unsigned lowByte = (byte & OpcodeByteKind_OpcodeMask); assert(lowByte <= 7); ModRM& modrm = *(ModRM*)stream; unsigned idx = *pargsCount; assert(opnds[idx].is_mem() || opnds[idx].is_reg()); if (opnds[idx].is_mem()) { stream = encodeModRM(stream, opnds, idx, odesc, prex); } else { modrm.mod = 3; // 11 modrm.rm = getHWRegIndex(opnds[idx].reg()); #ifdef _EM64T_ if (opnds[idx].need_rex() && needs_rex_r(opnds[idx].reg())) { prex->b = 1; } #endif ++stream; } modrm.reg = (char)lowByte; *pargsCount += 1; } break; case OpcodeByteKind_plus_i>>8: // +i - A number used in floating-point instructions when one // of the operands is ST(i) from the FPU register stack. The // number i (which can range from 0 to 7) is added to the // hexadecimal byte given at the left of the plus sign to form // a single opcode byte. { unsigned idx = *pargsCount; const unsigned lowByte = (byte & OpcodeByteKind_OpcodeMask); *stream = (char)lowByte + getHWRegIndex(opnds[idx].reg()); ++stream; *pargsCount += 1; } break; case OpcodeByteKind_ib>>8: case OpcodeByteKind_iw>>8: case OpcodeByteKind_id>>8: #ifdef _EM64T_ case OpcodeByteKind_io>>8: #endif //_EM64T_ // ib, iw, id - A 1-byte (ib), 2-byte (iw), or 4-byte (id) // immediate operand to the instruction that follows the // opcode, ModR/M bytes or scale-indexing bytes. The opcode // determines if the operand is a signed value. All words // and double words are given with the low-order byte first. { unsigned idx = *pargsCount; *pargsCount += 1; assert(opnds[idx].is_imm()); if (kind == OpcodeByteKind_ib) { *(unsigned char*)stream = (unsigned char)opnds[idx].imm(); curRelOpnd[idx] = stream; stream += 1; } else if (kind == OpcodeByteKind_iw) { *(unsigned short*)stream = (unsigned short)opnds[idx].imm(); curRelOpnd[idx] = stream; stream += 2; } else if (kind == OpcodeByteKind_id) { *(unsigned*)stream = (unsigned)opnds[idx].imm(); curRelOpnd[idx] = stream; stream += 4; } #ifdef _EM64T_ else { assert(kind == OpcodeByteKind_io); *(long long*)stream = (long long)opnds[idx].imm(); curRelOpnd[idx] = stream; stream += 8; } #else else { assert(false); } #endif } break; case OpcodeByteKind_cb>>8: assert(opnds[*pargsCount].is_imm()); *(unsigned char*)stream = (unsigned char)opnds[*pargsCount].imm(); curRelOpnd[*pargsCount]= stream; stream += 1; *pargsCount += 1; break; case OpcodeByteKind_cw>>8: assert(opnds[*pargsCount].is_imm()); *(unsigned short*)stream = (unsigned short)opnds[*pargsCount].imm(); curRelOpnd[*pargsCount]= stream; stream += 2; *pargsCount += 1; break; case OpcodeByteKind_cd>>8: assert(opnds[*pargsCount].is_imm()); *(unsigned*)stream = (unsigned)opnds[*pargsCount].imm(); curRelOpnd[*pargsCount]= stream; stream += 4; *pargsCount += 1; break; //OpcodeByteKind_cp = 0x0B00, //OpcodeByteKind_co = 0x0C00, //OpcodeByteKind_ct = 0x0D00, case OpcodeByteKind_rb>>8: case OpcodeByteKind_rw>>8: case OpcodeByteKind_rd>>8: // +rb, +rw, +rd - A register code, from 0 through 7, // added to the hexadecimal byte given at the left of // the plus sign to form a single opcode byte. assert(opnds.count() > 0); assert(opnds[*pargsCount].is_reg()); { const unsigned lowByte = (byte & OpcodeByteKind_OpcodeMask); *(unsigned char*)stream = (unsigned char)lowByte + getHWRegIndex(opnds[*pargsCount].reg()); #ifdef _EM64T_ if (opnds[*pargsCount].need_rex() && needs_rex_r(opnds[*pargsCount].reg())) { prex->b = 1; } #endif ++stream; *pargsCount += 1; } break; default: assert(false); break; } return stream; } char * EncoderBase::encode(char * stream, Mnemonic mn, const Operands& opnds) { #ifdef _DEBUG if (opnds.count() > 0) { if (opnds[0].is_mem()) { assert(getRegKind(opnds[0].base()) != OpndKind_SReg); } else if (opnds.count() >1 && opnds[1].is_mem()) { assert(getRegKind(opnds[1].base()) != OpndKind_SReg); } } #endif #ifdef JET_PROTO char* saveStream = stream; #endif const OpcodeDesc * odesc = lookup(mn, opnds); #if !defined(_EM64T_) bool copy_opcode = true; Rex *prex = NULL; #else // We need rex if // either of registers used as operand or address form is new extended register // it's explicitly specified by opcode // So, if we don't have REX in opcode but need_rex, then set rex here // otherwise, wait until opcode is set, and then update REX bool copy_opcode = true; unsigned char _1st = odesc->opcode[0]; Rex *prex = (Rex*)stream; if (opnds.need_rex() && ((_1st == 0x66) || (_1st == 0xF2 || _1st == 0xF3) && odesc->opcode[1] == 0x0F)) { // Special processing // copy_opcode = false; // *(unsigned char*)stream = _1st; ++stream; // prex = (Rex*)stream; prex->dummy = 4; prex->w = 0; prex->b = 0; prex->x = 0; prex->r = 0; ++stream; // memcpy(stream, &odesc->opcode[1], odesc->opcode_len-1); stream += odesc->opcode_len-1; } else if (_1st != 0x48 && opnds.need_rex()) { prex = (Rex*)stream; prex->dummy = 4; prex->w = 0; prex->b = 0; prex->x = 0; prex->r = 0; ++stream; } #endif // ifndef EM64T if (copy_opcode) { if (odesc->opcode_len==1) { unsigned char *dest = (unsigned char *) (stream); unsigned char *src = (unsigned char *) (& (odesc->opcode)); *dest = *src; } else if (odesc->opcode_len==2) { short *dest = (short *) (stream); void *ptr = (void *) (& (odesc->opcode)); short *src = (short *) (ptr); *dest = *src; } else if (odesc->opcode_len==3) { unsigned short *dest = (unsigned short *) (stream); void *ptr = (void *) (& (odesc->opcode)); unsigned short *src = (unsigned short *) (ptr); *dest = *src; //Now handle the last part unsigned char *dest2 = (unsigned char *) (stream + 2); *dest2 = odesc->opcode[2]; } else if (odesc->opcode_len==4) { unsigned int *dest = (unsigned int *) (stream); void *ptr = (void *) (& (odesc->opcode)); unsigned int *src = (unsigned int *) (ptr); *dest = *src; } stream += odesc->opcode_len; } unsigned argsCount = odesc->first_opnd; if (odesc->aux0) { stream = encode_aux(stream, odesc->aux0, opnds, odesc, &argsCount, prex); if (odesc->aux1) { stream = encode_aux(stream, odesc->aux1, opnds, odesc, &argsCount, prex); } } #ifdef JET_PROTO //saveStream Inst inst; unsigned len = DecoderBase::decode(saveStream, &inst); assert(inst.mn == mn); assert(len == (unsigned)(stream-saveStream)); if (mn == Mnemonic_CALL || mn == Mnemonic_JMP || Mnemonic_RET == mn || (Mnemonic_JO<=mn && mn<=Mnemonic_JG)) { assert(inst.argc == opnds.count()); InstructionDisassembler idi(saveStream); for (unsigned i=0; ib = 1; } return stream; } // cool, we do have SIB. ++stream; // bypass SIB in stream // {E|R}SP cannot be scaled index, however, R12 which has the same index in modrm - can assert(op.index() == RegName_Null || !equals(op.index(), REG_STACK)); // Only GPRegs can be encoded in the SIB assert(op.base() == RegName_Null || getRegKind(op.base()) == OpndKind_GPReg); assert(op.index() == RegName_Null || getRegKind(op.index()) == OpndKind_GPReg); modrm.rm = 4; // r/m = 100, means 'we have SIB here' if (op.base() == RegName_Null) { // no base. // already checked above if // the first if() //assert(op.index() != RegName_Null); modrm.mod = 0; // mod=00 - here it means 'no base, but disp32' sib.base = 5; // 101 with mod=00 ^^^ // encode at least fake disp32 to avoid having [base=ebp] *(unsigned*)stream = op.disp(); curRelOpnd[idx]= stream; stream += 4; unsigned sc = op.scale(); if (sc == 1 || sc==0) { sib.scale = 0; } // SS=00 else if (sc == 2) { sib.scale = 1; } // SS=01 else if (sc == 4) { sib.scale = 2; } // SS=10 else if (sc == 8) { sib.scale = 3; } // SS=11 sib.index = getHWRegIndex(op.index()); if (is_em64t_extra_reg(op.index())) { prex->x = 1; } return stream; } if (op.disp() == 0 && getHWRegIndex(op.base()) != getHWRegIndex(RegName_EBP)) { modrm.mod = 0; // mod=00, no disp } else if (disp_fits8) { modrm.mod = 1; // mod=01, use disp8 *(unsigned char*)stream = (unsigned char)op.disp(); curRelOpnd[idx]= stream; stream += 1; } else { modrm.mod = 2; // mod=10, use disp32 *(unsigned*)stream = (unsigned)op.disp(); curRelOpnd[idx]= stream; stream += 4; } if (op.index() == RegName_Null) { assert(op.scale() == 0); // 'scale!=0' has no meaning without index // the only reason we're here without index, is that we have {E|R}SP // or R12 as a base. Another possible reason - EBP without a disp - // is handled above by adding a fake disp8 #ifdef _EM64T_ assert(op.base() != RegName_Null && (equals(op.base(), REG_STACK) || equals(op.base(), RegName_R12))); #else // _EM64T_ assert(op.base() != RegName_Null && equals(op.base(), REG_STACK)); #endif //_EM64T_ sib.scale = 0; // SS = 00 sib.index = 4; // SS + index=100 means 'no index' } else { unsigned sc = op.scale(); if (sc == 1 || sc==0) { sib.scale = 0; } // SS=00 else if (sc == 2) { sib.scale = 1; } // SS=01 else if (sc == 4) { sib.scale = 2; } // SS=10 else if (sc == 8) { sib.scale = 3; } // SS=11 sib.index = getHWRegIndex(op.index()); if (is_em64t_extra_reg(op.index())) { prex->x = 1; } // not an error by itself, but the usage of [index*1] instead // of [base] is discouraged assert(op.base() != RegName_Null || op.scale() != 1); } sib.base = getHWRegIndex(op.base()); if (is_em64t_extra_reg(op.base())) { prex->b = 1; } return stream; } char * EncoderBase::nops(char * stream, unsigned howMany) { // Recommended multi-byte NOPs from the Intel architecture manual static const unsigned char nops[10][9] = { { 0, }, // 0, this line is dummy and not used in the loop below { 0x90, }, // 1-byte NOP { 0x66, 0x90, }, // 2 { 0x0F, 0x1F, 0x00, }, // 3 { 0x0F, 0x1F, 0x40, 0x00, }, // 4 { 0x0F, 0x1F, 0x44, 0x00, 0x00, }, // 5 { 0x66, 0x0F, 0x1F, 0x44, 0x00, 0x00, }, // 6 { 0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00, }, // 7 { 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00, }, // 8 { 0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00 }, // 9-byte NOP }; // Start from delivering the longest possible NOPs, then proceed with shorter ones for (unsigned nopSize=9; nopSize!=0; nopSize--) { while(howMany>=nopSize) { const unsigned char* nopBytes = nops[nopSize]; for (unsigned i=0; i> 24)&0xFF; if (_mn != _mn) { stat.miss(mn); return EncoderBase::NOHASH; } unsigned idx = (pack >> 16) & 0xFF; stat.hit(mn); return idx; } static void put(Mnemonic mn, unsigned hash, unsigned idx) { unsigned pack = hash | (idx<<16) | (mn << 24); unsigned key = hash % COUNTOF(subHash); subHash[key] = pack; } #endif const EncoderBase::OpcodeDesc * EncoderBase::lookup(Mnemonic mn, const Operands& opnds) { const unsigned hash = opnds.hash(); unsigned opcodeIndex = opcodesHashMap[mn][hash]; #ifdef ENCODER_USE_SUBHASH if (opcodeIndex == NOHASH) { opcodeIndex = find(mn, hash); } #endif if (opcodeIndex == NOHASH) { // fast-path did no work. try to lookup sequentially const OpcodeDesc * odesc = opcodes[mn]; int idx = -1; bool found = false; for (idx=0; !odesc[idx].last; idx++) { const OpcodeDesc& opcode = odesc[idx]; if (opcode.platf == OpcodeInfo::decoder) { continue; } if (opcode.roles.count != opnds.count()) { continue; } if (try_match(opcode, opnds, true)) { found = true; break; } } if (!found) { for (idx=0; !odesc[idx].last; idx++) { const OpcodeDesc& opcode = odesc[idx]; if (opcode.platf == OpcodeInfo::decoder) { continue; } if (opcode.roles.count != opnds.count()) { continue; } if (try_match(opcode, opnds, false)) { found = true; break; } } } assert(found); opcodeIndex = idx; #ifdef ENCODER_USE_SUBHASH put(mn, hash, opcodeIndex); #endif } assert(opcodeIndex != NOHASH); const OpcodeDesc * odesc = &opcodes[mn][opcodeIndex]; assert(!odesc->last); assert(odesc->roles.count == opnds.count()); assert(odesc->platf != OpcodeInfo::decoder); #if !defined(_EM64T_) // tuning was done for IA32 only, so no size restriction on EM64T //assert(sizeof(OpcodeDesc)==128); #endif return odesc; } char* EncoderBase::getOpndLocation(int index) { assert(index < 3); return curRelOpnd[index]; } Mnemonic EncoderBase::str2mnemonic(const char * mn_name) { for (unsigned m = 1; m