/*
 *  plex86: run multiple x86 operating systems concurrently
 *  Copyright (C) 1999  Kevin P. Lawton
 *
 *  This library is free software; you can redistribute it and/or
 *  modify it under the terms of the GNU Lesser General Public
 *  License as published by the Free Software Foundation; either
 *  version 2 of the License, or (at your option) any later version.
 *
 *  This library is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 *  Lesser General Public License for more details.
 *
 *  You should have received a copy of the GNU Lesser General Public
 *  License along with this library; if not, write to the Free Software
 *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
 */



#include <stdio.h>
#include <string.h>
#include <sys/time.h>
#include <signal.h>

#include "dt.h"

static void timer_handler(int i);

#if 0
Some notes for myself.  Some may be old.

- Invalidate linear addr range (all, PDE, PTE, arbitrary from user space)
- Invalidate physical addr range.
  This would invalidate the Liaddr to Tiaddr table entries.  Would have
  to either dump that whole table or cycle through the local meta page
  table and hash all the linear addresses to find TLB entries to
  invalidate.  Perhaps for single page range hits, this would be good,
  and for big spans, dump the table.  Dumping the table could be
  expensive since we''d have to rebuild it.
  If we store only one laddr for each phy page meta info section, then we
  have to limit to only one translation per page.  Store Metai
  in phy meta info section so we can correlate.  Especially, DMA
  writes which have no associated laddr need correlation.
- Exceptions from DT code.
  o If tAddr within range of DT buffer:
    - simple 1st effort would be to do exhaustive search of tables
      for that page.
    - could have reverse lookup table
  o If tAddr within range of handlers:
    get Giaddr from storage area

- Ways to accelerate the shim code inserted for branches and certain
  other instruction:

  o If SS naturally has access to handler storage area, then could
    make use of that to accelerate branch handling.
  o Use other segment to do the same.  Even use specific handler
    data segment and patch use of that segment in guest code. (use
    infrequently used one).
  o Make use of guest stack.

Laundry list:

- Invalidation of Hash tables.  Dynamic zeroing?
- Incrementing of PDBR reload context switch ID.
#endif


dtL2MHash_t   dtL2MHash;
dtG2THash_t   dtG2THash;
dtMetaEntry_t dtMetaTable[DT_MetaTableElements];
unsigned dtMetaFreeIndex;

descriptor_t CS;
unsigned     CPL;


static Bit32u   dtTranslateG2T(Bit32u gOff);
static void     dtSetG2TPair(unsigned hashRow, Bit32u gOff, Bit32u tOff);
static unsigned dtTranslateLPAToMI(Bit32u lpa);

static Bit32u   dtMetaLookupTCode(unsigned metaIndex, Bit32u gla);
static Bit32u   dtTranslateSequence(unsigned metaIndex, Bit32u gOff,
                                    Bit32u gla);
static void dtInitialize(void);
//static Bit8u *dtEmitPrelude(Bit8u* tcodePtr, Bit32u gOff);
static Bit8u *dtEmitPushImm32(Bit8u *tcodePtr, Bit32u imm32);
//static Bit8u *dtEmitRet(Bit8u *tcodePtr);
static Bit8u *dtEmitUseR3hESP(Bit8u *tcodePtr);
static Bit8u *dtEmitUseGuestESP(Bit8u *tcodePtr);
static Bit8u *dtEmitPushf(Bit8u *tcodePtr);
static Bit8u *dtEmitPopf(Bit8u *tcodePtr);
static Bit8u *dtEmitIDCheck(Bit8u *tcodePtr, Bit8u **offsetPtr, Bit8u **idPtr);
static Bit8u *dtEmitJmp(Bit8u *tcodePtr, Bit32u **offsetPtr);
static Bit8u *dtEmitCall(Bit8u *tcodePtr, Bit32u **offsetPtr);
//static Bit8u *dtEmitPostlude(Bit8u *tcodePtr);
static Bit8u *dtEmitPushReg(Bit8u *tcodePtr, unsigned reg);

#if DT_INSTR_G2T
unsigned instrG2THit[DT_G2THashWidth];
unsigned instrG2TMiss;
#endif

  int
main(int argc, char *argv[])
{
  Bit32u gOff, tcodeAddr, guest_EFLAGS, r3h_EFLAGS;

  {
  /* Hacks to pretend that the ring3 handler routines have a
   * separate set of code and data segments than the guest code
   */

  /* Get the current DS and copy it to the pretend handler SS. */
  asm volatile (
    "pushl %%ds \n\t"
    "popl  %0"
    : "=g" (r3h_DS)
    :
    : "memory"
    );
  guest_SS = r3h_DS;

  /* The handler ESP will point into our stack area. */
  r3h_ESP     = (unsigned) &r3h_stack[R3H_STACK_SIZE];
  r3h_ESP_empty = r3h_ESP;
  guest_ESP = (unsigned) &guest_stack[GUEST_STACK_SIZE];
  globalID = 1;

  /* Pretend GS is a virtualized segment register, and is the
   * data segment for the R3H code.  Really, just copy the user
   * data segment.
   */
  asm volatile (
    "pushl %ds \n\t"
    "popl  %gs \n\t"
    );
  }


  dtInitialize();
  CS.base = 0;
  CS.limit = 0xffffffff;

  hack_guest_code();


#if DT_ON
  {
  /* Assume execution begins at beginning of guest_page0 */
  gOff = (Bit32u) &guest_page0[0];
  /* Start by translating at that location */
  printf("Priming: translating initial guest code.\n");
  tcodeAddr = dtTranslateG2T(gOff);
  /* Transfer control to the tcode.  This would be a monitor-to-ring3
   * tcode transition in the real VM.
   */
  }

  {
  /* Prime the r3h stack for an initial transition, as if the r3h code had
   * called the monitor and was executing the tail end of the
   * __r3h_branch stub.
   */
  asm volatile ( "pushfl; popl %0\n" : "=g" (guest_EFLAGS) );
  r3h_EFLAGS = guest_EFLAGS;

  /* The guest to r3h context saves */
  r3h_ESP -= 4; * ((Bit32u*) r3h_ESP) = guest_SS; /* DS */
  r3h_ESP -= 4; * ((Bit32u*) r3h_ESP) = guest_SS; /* ES */
  r3h_ESP -= 4; * ((Bit32u*) r3h_ESP) = 0; /* General regsiters */
  r3h_ESP -= 4; * ((Bit32u*) r3h_ESP) = 0;
  r3h_ESP -= 4; * ((Bit32u*) r3h_ESP) = 0;
  r3h_ESP -= 4; * ((Bit32u*) r3h_ESP) = 0;
  r3h_ESP -= 4; * ((Bit32u*) r3h_ESP) = 0;
  r3h_ESP -= 4; * ((Bit32u*) r3h_ESP) = 0;
  r3h_ESP -= 4; * ((Bit32u*) r3h_ESP) = 0;
  r3h_ESP -= 4; * ((Bit32u*) r3h_ESP) = 0;
  r3h_ESP -= 4; * ((Bit32u*) r3h_ESP) = guest_EFLAGS;

  r3h_ESP -= 4; * ((Bit32u*) r3h_ESP) = (Bit32u) __r3h_prime;
  r3h_target_EIP = tcodeAddr;

  /* The r3h to monitor saves */
  r3h_ESP -= 4; * ((Bit32u*) r3h_ESP) = r3h_EFLAGS;
  r3h_ESP -= 4; * ((Bit32u*) r3h_ESP) = 0; /* r3h general regsiters */
  r3h_ESP -= 4; * ((Bit32u*) r3h_ESP) = 0;
  r3h_ESP -= 4; * ((Bit32u*) r3h_ESP) = 0;
  r3h_ESP -= 4; * ((Bit32u*) r3h_ESP) = 0;
  r3h_ESP -= 4; * ((Bit32u*) r3h_ESP) = 0;
  r3h_ESP -= 4; * ((Bit32u*) r3h_ESP) = 0;
  r3h_ESP -= 4; * ((Bit32u*) r3h_ESP) = 0;
  r3h_ESP -= 4; * ((Bit32u*) r3h_ESP) = 0;
  }

  printf("Calling initial tcode.\n");

  {
  /* Set a timer to go off @ N Hz.  The handler can then simulate things
   * that need to be done for context switches in the guest, with
   * respect to tcode.
   */
  struct sigaction sg_act;
  struct itimerval timer;

  memset(&sg_act, 0, sizeof(sg_act));
  sg_act.sa_handler = timer_handler;
  sg_act.sa_flags = SA_RESTART;
  sigaction(SIGVTALRM,&sg_act,NULL);

  memset(&timer, 0, sizeof(timer));
  timer.it_value.tv_sec  = 0;
  timer.it_value.tv_usec = DT_GuestTimeslice;
  timer.it_interval.tv_sec  = 0;
  timer.it_interval.tv_usec = DT_GuestTimeslice;
  setitimer(ITIMER_VIRTUAL, &timer, NULL);
  }

  while (1) {
    __mon2r3h();
    switch (r3h_request) {
      case R3HToMonRequestG2T:
        r3h_data = dtTranslateG2T(r3h_data);
        break;

      case R3HToMonRequestTerminate:
        printf("Guest code finished execution.\n");
        goto done;
        break;

      default:
        printf("R3H Request default, data=0x%x\n", (unsigned) r3h_data);
        return 1;
        break;
      }
    }
done:

#else  /* DT_ON */

{
  extern Bit32u loop_count;

  __execute_guest_code_native();
printf("Control returns OK from native execution, count=%u.\n",
       (unsigned) loop_count);
}

#endif /* DT_ON */

#if DT_INSTR_G2T
  {
  unsigned r, c;
  unsigned found;

  printf("G2T Hits:");
  for (r=0; r<DT_G2THashWidth; r++) {
    printf(" %u", instrG2THit[r]);
    }
  printf("\n");
  printf("G2T Misses: %u\n", instrG2TMiss);

  for (r=0; r<DT_G2THashHeight; r++) {
    found = 0;
    if (dtG2THash[r][0].tOff) {
      found = 1;
      for (c=0; c<DT_G2THashWidth; c++) {
        if (dtG2THash[r][c].tOff)
          printf("%u ", c);
        }
      }
    if (found) printf("\n");
    }
  }
#endif

  return 0;
}


#if DT_ON
  void
timer_handler(int i)
{
  globalID++;
}
#endif

  void
dtInitialize(void)
{
  /* Initialize the DT environment */

  memset(dtL2MHash, 0, sizeof(dtL2MHash));
  memset(dtG2THash, 0, sizeof(dtG2THash));
  memset(dtMetaTable, 0, sizeof(dtMetaTable));
  dtMetaFreeIndex = 0;
}


  Bit32u
dtTranslateG2T(Bit32u gOff)
{
  /* Translate from a guest offset to tcode offset */

  unsigned hashRow, i, metaIndex;
  Bit32u tAddr;
  Bit32u gla;

  /* Search the G2T table first, ideally the instruction will have
   * been translated already, and the translation address in there.
   */
/* +++ what about addr==0 */
  hashRow = DT_G2THashSelect(gOff);
  for (i=0; i<DT_G2THashWidth; i++) {
    if ( dtG2THash[hashRow][i].gOff == gOff ) {
      return( dtG2THash[hashRow][i].tOff );
      }
    }

  gla = CS.base + gOff;
  metaIndex = dtTranslateLPAToMI( gla >> 12 );
  tAddr = dtMetaLookupTCode(metaIndex, gla);
  if ( tAddr == 0 ) {
    /* Instruction does not have associated tcode; we must translate. */
    tAddr = dtTranslateSequence(metaIndex, gOff, gla);
    dtSetG2TPair(hashRow, gOff, tAddr);
    }
  return(tAddr);
}

  void
dtSetG2TPair(unsigned hashRow, Bit32u gOff, Bit32u tOff)
{
  /* Add a {guest offset, tcode offset} pairing to the G2T hash table */

  unsigned i;

/* +++ handle:
 *   - no entries yet
 *   - swapping current 1st entry
 *   - zeroing downstream entries
 */

  for (i=0; i<DT_G2THashWidth; i++) {
    if ( dtG2THash[hashRow][i].tOff == 0 ) {
      dtG2THash[hashRow][i].gOff = gOff;
      dtG2THash[hashRow][i].tOff = tOff;
      return;
      }
    }

  /* +++ For now, bump first entry. */
  dtG2THash[hashRow][0].gOff = gOff;
  dtG2THash[hashRow][0].tOff = tOff;
}

  unsigned
dtTranslateLPAToMI(Bit32u lpa)
{
  /* Translate from a Linear Page Address to a Meta Index */

  unsigned i, hashRow, tag;

  /* Note: lpa is the upper 20 bits, the 32bit linear address >> 12 */
  hashRow = DT_LPAToMIHash(lpa);
  tag     = DT_LPAToMITag(lpa);

  for (i=0; i<DT_L2MHashWidth; i++) {
    if ( dtL2MHash[hashRow][i].tag == tag )
      return(dtL2MHash[hashRow][i].metai);
    }

  /* LPA not in L2M hash table.  See if there is a Meta element which
   * matches this page.
   */

  /* +++ For now, perform a brute-force lookup in the Meta table. */
  for (i=0; i<DT_MetaTableElements; i++) {
    if (dtMetaTable[i].lpa == lpa) {
      /* Found the Meta element corresponding to this Linear Page Address.
       * We should cache this pairing in the LPA to Mi hash table so next
       * time an efficient lookup will occur.
       */
      /* +++ Do this more intelligently */
      dtL2MHash[hashRow][0].tag = tag;
      dtL2MHash[hashRow][0].metai = i;
      return(i);
      }
    }

  /* LPA not in either hash table or meta table.  We need to create
   * a new meta table entry for this page.
   */
  /* +++ For now, simple allocation scheme */
  if (dtMetaFreeIndex >= DT_MetaTableElements) {
    printf("dtTranslateLPAToMI: free_index > table size\n");
    exit(1);
    }
  i = dtMetaFreeIndex++;
  dtMetaTable[i].lpa = lpa;
  dtMetaTable[i].offsetLookupFreeIndex = 0;
  dtMetaTable[i].tcodeBufferFreeIndex = 0;

  return(i);
}

  Bit32u
dtMetaLookupTCode(unsigned metaIndex, Bit32u gla)
{
  /* Lookup a tcode offset associated with the guest linear address */

  unsigned i;
  Bit32u pOff;

  pOff = gla & 0x00000fff;

  for (i=0; i<dtMetaTable[metaIndex].offsetLookupFreeIndex; i++) {
    if (dtMetaTable[metaIndex].offsetLookup[i].pOff == pOff)
      return( dtMetaTable[metaIndex].offsetLookup[i].tcode );
    }
  return(0); /* not found */
}

  Bit32u
dtTranslateSequence(unsigned metaIndex, Bit32u gOff, Bit32u gla)
{
  /* Dynamically translate the guest code sequence at the given
   * offset and linear address
   */

  Bit32u pOff;
  Bit8u tcode[128], *tcodePtr, *iPtr;
  struct {
    Bit32u *tcodePtr;
    Bit32u  tcodeOff;
    } offsetPatches[16];
  unsigned offsetPatchesN = 0;
  unsigned b0, b1, modrm, sib;
  Bit32u targetOffset32, gOffInstructionStart;
  unsigned tcodeLen;
  int tcodeBufRemain;
  unsigned freeIndex;
  Bit32u tcodeOffset;
  Bit8u *tcodeBranchAddr;
  Bit8s displ8;
#if DT_UseBackpatch
  Bit8u *idCheckOffsetPtr, *idPtr;
  Bit32u *directTcodeOffsetPtr;
#endif
  Bit32u *r3hBranchPtr;
  unsigned i;

  pOff = gla & 0x00000fff;

/* +++ page checks here */

  /* +++ Braindead instruction decoder for now */
  iPtr = (Bit8u*) gla;
  tcodePtr = tcode;

loop:
  b0 = *iPtr++;
  gOffInstructionStart = gOff;

/* +++ All instructions in stream could register addresses */

  switch (b0) {

    case 0x0f: /* 2-byte primary opcode */
      b1 = *iPtr++;
      switch (b1) {
        case 0x84: /* JZ Jd */
        case 0x85: /* JNZ Jd */
          gOff += 6;
          targetOffset32 = gOff + * (Bit32s *) iPtr;
          iPtr += 4;

          /* Emit code */
          *tcodePtr++ = 0x0f; /* Negate branch: JZ Jd */
          if (b1==0x84)
            *tcodePtr++ = 0x85;
          else
            *tcodePtr++ = 0x84;
          tcodeBranchAddr = tcodePtr; /* for patching branch offset */
          tcodePtr += 4;

#if DT_UseR3hStack
          tcodePtr = dtEmitUseR3hESP(tcodePtr);
#endif
#if DT_UseBackpatch
          tcodePtr = dtEmitPushf(tcodePtr);
          tcodePtr = dtEmitIDCheck(tcodePtr, &idCheckOffsetPtr, &idPtr);
          tcodePtr = dtEmitPopf(tcodePtr);
#if DT_UseR3hStack
          tcodePtr = dtEmitUseGuestESP(tcodePtr);
#endif
          tcodePtr = dtEmitJmp(tcodePtr, &directTcodeOffsetPtr);
          *directTcodeOffsetPtr = 0;

          /* Patch the 8-bit jump in the ID check fail case here */
          *idCheckOffsetPtr = (tcodePtr - (idCheckOffsetPtr+1));
#endif /* DT_UseBackpatch */

          /* Emit code for when branch handler must be used */
          tcodePtr = dtEmitPushImm32(tcodePtr, targetOffset32);
          tcodePtr = dtEmitCall(tcodePtr, &r3hBranchPtr);
          offsetPatches[offsetPatchesN].tcodePtr = r3hBranchPtr;
          offsetPatches[offsetPatchesN].tcodeOff = (Bit32u) __r3h_branch_static;
          offsetPatchesN++;

#if DT_UseBackpatch
          /* Calculate backpatch offsets of ID and direct branch address */
          idPatchDispl = tcodePtr - idPtr;
          jmpPatchDispl = tcodePtr - (Bit8u *) directTcodeOffsetPtr;
#endif

          /* Patch offset from negated branch here */
          tcodeOffset = tcodePtr - (tcodeBranchAddr+4);
          *(Bit32u*)tcodeBranchAddr = tcodeOffset;
          goto loop;


        default:
          printf("dtTS: opcode 0x0f 0x%02x unhandled\n", b1);
          exit(1);
        }
      break;

    case 0x01: /* ADD EvGv */
      b1 = *iPtr++;
      if ( (b1 & 0xc0) == 0xc0 ) {
        /* ADD Reg,Reg */
        gOff += 2;
        /* Emit code */
        *tcodePtr++ = b0;
        *tcodePtr++ = b1;
        goto loop;
        }
      else {
        printf("dtTS: ADD not Reg,Reg\n");
        exit(1);
        }

    case 0x49: /* decl %ecx */
      *tcodePtr++ = 0x49; /* pass through */
      gOff++;
      goto loop;

    case 0x51: /* pushl %ecx */
      *tcodePtr++ = 0x51; /* pass through */
      gOff++;
      goto loop;

    case 0x59: /* popl %ecx */
    case 0x60: /* pusha */
    case 0x61: /* popa */
      *tcodePtr++ = b0; /* pass through */
      gOff++;
      goto loop;

    case 0x90: /* NOP */
      *tcodePtr++ = 0x90; /* pass through */
      gOff++;
      goto loop;

    case 0xe2: /* LOOP Jb */
      gOff += 2;
      displ8 = * (Bit8s *) iPtr++;
/* +++ hack specific to add cascade loop */
      if (displ8 == -12) {
        }
      else {
        printf("LOOP: Jb not -12\n");
        exit(1);
        }

      *tcodePtr++ = 0xe2;
      *tcodePtr++ = (Bit8u) displ8;
      goto loop;

    case 0xb9: /* mov ECX,Id */
      gOff += 5;
      *tcodePtr++ = b0;
      *(Bit32u*)tcodePtr = * (Bit32s *) iPtr;
      iPtr += 4;
      tcodePtr += 4;
      goto loop;

    case 0xc3: /* RET */
      gOff++;

      /* Emit code */
#if DT_UseR3hStack
      tcodePtr = dtEmitUseR3hESP(tcodePtr);
#endif
      tcodePtr = dtEmitJmp(tcodePtr, &r3hBranchPtr);
      offsetPatches[offsetPatchesN].tcodePtr = r3hBranchPtr;
      offsetPatches[offsetPatchesN].tcodeOff = (Bit32u) __r3h_ret;
      offsetPatchesN++;
      goto finish;

    case 0xe9: /* JMP_Jd */
      gOff += 5;
      targetOffset32 = gOff + * (Bit32s *) iPtr;
      iPtr += 4;

#if DT_UseR3hStack
      tcodePtr = dtEmitUseR3hESP(tcodePtr);
#endif
#if DT_UseBackpatch
      tcodePtr = dtEmitPushf(tcodePtr);
      tcodePtr = dtEmitIDCheck(tcodePtr, &idCheckOffsetPtr, &idPtr);
      tcodePtr = dtEmitPopf(tcodePtr);
#if DT_UseR3hStack
      tcodePtr = dtEmitUseGuestESP(tcodePtr);
#endif
      tcodePtr = dtEmitJmp(tcodePtr, &directTcodeOffsetPtr);
      *directTcodeOffsetPtr = 0;

      /* Patch the 8-bit jump in the ID check fail case here */
      *idCheckOffsetPtr = (tcodePtr - (idCheckOffsetPtr+1));
#endif /* DT_UseBackpatch */

      /* Emit code for when branch handler must be used */
      tcodePtr = dtEmitPushImm32(tcodePtr, targetOffset32);
      tcodePtr = dtEmitCall(tcodePtr, &r3hBranchPtr);
      offsetPatches[offsetPatchesN].tcodePtr = r3hBranchPtr;
      offsetPatches[offsetPatchesN].tcodeOff = (Bit32u) __r3h_branch_static;
      offsetPatchesN++;

#if DT_UseBackpatch
      /* Calculate backpatch offsets of ID and direct branch address */
      idPatchDispl = tcodePtr - idPtr;
      jmpPatchDispl = tcodePtr - (Bit8u *) directTcodeOffsetPtr;
#endif
      goto finish;

    case 0xff: /* Group5 */
      modrm = *iPtr++;
      if (modrm == 0x0d) { /* DEC_Ed */
        gOff += 6;
        targetOffset32 = * (Bit32s *) iPtr;
        iPtr += 4;

        /* Emit pass through */
        *tcodePtr++ = 0xff;
        *tcodePtr++ = 0x0d;
        *(Bit32u*) tcodePtr = targetOffset32;
        tcodePtr += 4;
        goto loop;
        }
      else if (modrm == 0x24) {
        sib = *iPtr++;
        if (sib == 0x8d) {
          /* +++ specific hack for instruction: jmp *table(,%ecx,4) */
          targetOffset32 = * (Bit32s *) iPtr;
          iPtr += 4;
          gOff += 7;

#if DT_UseR3hStack
          tcodePtr = dtEmitUseR3hESP(tcodePtr);
#endif
          *tcodePtr++ = 0xff; /* pushl table(,%ecx,4) */
          *tcodePtr++ = 0x34;
          *tcodePtr++ = 0x8d;
          *(Bit32u*)tcodePtr = targetOffset32;
          tcodePtr += 4;

          tcodePtr = dtEmitCall(tcodePtr, &r3hBranchPtr);
          offsetPatches[offsetPatchesN].tcodePtr = r3hBranchPtr;
          offsetPatches[offsetPatchesN].tcodeOff =
            (Bit32u) __r3h_branch_dynamic;
          offsetPatchesN++;
          goto finish;
          }
        else {
          printf("dtTS: G5 sib=0x%x unhandled\n", sib);
          exit(1);
          }
        }
      else {
        printf("dtTS: G5 modrm 0x%02x unhandled\n", modrm);
        exit(1);
        }
      break;

    default:
      printf("dtTS: opcode 0x%02x unhandled\n", b0);
      exit(1);
    }

  printf("dtTS: finished prematurely\n");
  exit(1);

finish:
  /* Add tcode sequence to tcode buffer. */

  /* +++ For now it's stored more simply in the meta array. */

  /* Find length of translated sequence */
  tcodeLen = (tcodePtr - tcode);
  /* Do we have the room? */
  freeIndex = dtMetaTable[metaIndex].tcodeBufferFreeIndex;
  tcodeBufRemain = (DT_TcodeBufferMax-1) - freeIndex;
  if ( ((int) tcodeLen) > tcodeBufRemain ) {
    printf("dtTS: tcode buffer full\n");
    exit(1);
    }
  /* Reposition pointer to the tcode buffer where we will copy tcode.
   * Copy the tcode and advance the free index.
   */
  tcodePtr = &dtMetaTable[metaIndex].tcodeBuffer[freeIndex];
  memcpy(tcodePtr, tcode, tcodeLen);
  dtMetaTable[metaIndex].tcodeBufferFreeIndex += tcodeLen;

  /* Patch branches.  Because branches use displacements relative to
   * the EIP offset, they depend on the location where the tcode is actually
   * placed.
   */
  for (i=0; i<offsetPatchesN; i++) {
    Bit32u *finalTcodeOffPtr;
    /* Translate the address of the displacement to patch, from the original
     * tcode buffer to the real tcode home
     */
    finalTcodeOffPtr = (Bit32u*) (tcodePtr +
      (((Bit32u)offsetPatches[i].tcodePtr) - (Bit32u) tcode));

    /* Now patch the displacement */
    *finalTcodeOffPtr = (offsetPatches[i].tcodeOff) -
        (((Bit32u)finalTcodeOffPtr)+4);
    }

  /* Add page offset -> tcode lookup entry in page-oriented tables */
  freeIndex = dtMetaTable[metaIndex].offsetLookupFreeIndex;
  if (freeIndex >= (DT_OffsetLookupMax-1)) {
    printf("dtTS: pOff -> tcode table full\n");
    exit(1);
    }
  dtMetaTable[metaIndex].offsetLookup[freeIndex].pOff = pOff;
  dtMetaTable[metaIndex].offsetLookup[freeIndex].tcode = (Bit32u) tcodePtr;
  dtMetaTable[metaIndex].offsetLookupFreeIndex++;

  return( (Bit32u) tcodePtr );
}


/* Some exit functions.  It's important that exits be done from the
 * 'guest' space (which is really the normal unix user space for this
 * test), so that the normal ESP is reloaded before we call any
 * unix calls like exit()!
 */


asm (
  ".text \n\t"
  ".globl __exit_ok   \n\t"
  "__exit_ok:         \n\t"
  "  call exit_ok \n\t"
  );

asm (
  ".text \n\t"
  ".globl __exit_bad  \n\t"
  "__exit_bad:        \n\t"
  "  call exit_bad\n\t"
  );

  void
exit_ok(void)
{
  fprintf(stderr, "Execution returns OK.\n");
  exit(0);
}

  void
exit_bad(void)
{
  fprintf(stderr, "Exit BAD\n");
  fprintf(stderr, "guest_page0 was 0x%x\n", (unsigned) guest_page0);
  fprintf(stderr, "guest_page1 was 0x%x\n", (unsigned) guest_page1);
  exit(-1);
}


#if 0
  Bit8u *
dtEmitPrelude(Bit8u* tcodePtr, Bit32u gOff)
{
  /* cs; mov  r3h_DS, %ss               Load handler SS. */
  *tcodePtr++ = 0x2e;
  *tcodePtr++ = 0x8e;
  *tcodePtr++ = 0x15;
  *(Bit32u*)tcodePtr = (Bit32u) &r3h_DS;
  tcodePtr += 4;

  /* ss; movl %esp,  guest_ESP          Save guest ESP */
  *tcodePtr++ = 0x36;
  *tcodePtr++ = 0x89;
  *tcodePtr++ = 0x25;
  *(Bit32u*)tcodePtr = (Bit32u) &guest_ESP;
  tcodePtr += 4;

  /* ss; movl $guestSrcEIP, guest_EIP   Save guest source EIP */
  *tcodePtr++ = 0x36;
  *tcodePtr++ = 0xc7;
  *tcodePtr++ = 0x05;
  *(Bit32u*)tcodePtr = (Bit32u) &guest_EIP;
  tcodePtr += 4;
  *(Bit32u*)tcodePtr = gOff;
  tcodePtr += 4;

  /* ss; movl r3h_ESP_empty, %esp       Fully loaded handler SS:ESP */
  *tcodePtr++ = 0x36;
  *tcodePtr++ = 0x8b;
  *tcodePtr++ = 0x25;
  *(Bit32u*)tcodePtr = (Bit32u) &r3h_ESP_empty;
  tcodePtr += 4;

  return(tcodePtr);
}
#endif

#if 0
  Bit8u *
dtEmitPostlude(Bit8u *tcodePtr)
{
  /* cs; mov  guest_SS, %ss */
  *tcodePtr++ = 0x2e;
  *tcodePtr++ = 0x8e;
  *tcodePtr++ = 0x15;
  *(Bit32u*)tcodePtr = (Bit32u) &guest_SS;
  tcodePtr += 4;

  /* cs; mov guest_ESP, %esp */
  *tcodePtr++ = 0x2e;
  *tcodePtr++ = 0x8b;
  *tcodePtr++ = 0x25;
  *(Bit32u*)tcodePtr = (Bit32u) &guest_ESP;
  tcodePtr += 4;

  return(tcodePtr);
}
#endif

  Bit8u *
dtEmitPushImm32(Bit8u *tcodePtr, Bit32u imm32)
{
  *tcodePtr++ = 0x68;
  *(Bit32u*)tcodePtr = imm32;
  tcodePtr += 4;

  return(tcodePtr);
}

  Bit8u *
dtEmitUseR3hESP(Bit8u *tcodePtr)
{
  /* gs; movl %esp, guest_ESP */
  *tcodePtr++ = 0x65;
  *tcodePtr++ = 0x89;
  *tcodePtr++ = 0x25;
  *(Bit32u*)tcodePtr = (Bit32u) &guest_ESP;
  tcodePtr += 4;

  /* gs; movl r3h_ESP_empty, %esp */
  *tcodePtr++ = 0x65;
  *tcodePtr++ = 0x8b;
  *tcodePtr++ = 0x25;
  *(Bit32u*)tcodePtr = (Bit32u) &r3h_ESP_empty;
  tcodePtr += 4;

  return(tcodePtr);
}

  Bit8u *
dtEmitUseGuestESP(Bit8u *tcodePtr)
{
  /* gs; movl guest_ESP, %esp */
  *tcodePtr++ = 0x65;
  *tcodePtr++ = 0x8b;
  *tcodePtr++ = 0x25;
  *(Bit32u*)tcodePtr = (Bit32u) &guest_ESP;
  tcodePtr += 4;

  return(tcodePtr);
}

  Bit8u *
dtEmitPushf(Bit8u *tcodePtr)
{
  *tcodePtr++ = 0x9c; /* pushfl */
  return(tcodePtr);
}

  Bit8u *
dtEmitPopf(Bit8u *tcodePtr)
{
  *tcodePtr++ = 0x9d; /* popfl */
  return(tcodePtr);
}

  Bit8u *
dtEmitIDCheck(Bit8u *tcodePtr, Bit8u **offsetPtr, Bit8u **idPtr)
{
  /* cmpl $inline-ID, globalID */
  *tcodePtr++ = 0x81;
  *tcodePtr++ = 0x3d;
  *(Bit32u*)tcodePtr = (Bit32u) &globalID;
  tcodePtr += 4;
  /* Start ID at zero so the first compare creates a miss.  This is
   * dynamically patched by handler routine.
   */
  *idPtr = tcodePtr;
  *(Bit32u*)tcodePtr = 0;
  tcodePtr += 4;

  *tcodePtr++ = 0x75;
  *offsetPtr = tcodePtr;
  *tcodePtr++ = 0x00; /* 8bit offset patched by DT engine */

  return(tcodePtr);
}

  Bit8u *
dtEmitJmp(Bit8u *tcodePtr, Bit32u **offsetPtr)
{
  *tcodePtr++ = 0xe9; /* jmp Jv */
  *offsetPtr = (Bit32u*) tcodePtr;
  *(Bit32u*)tcodePtr = 0; /* Patched in by DT engine */
  tcodePtr += 4;
  return(tcodePtr);
}

  Bit8u *
dtEmitCall(Bit8u *tcodePtr, Bit32u **offsetPtr)
{
  *tcodePtr++ = 0xe8; /* call Av */
  *offsetPtr = (Bit32u*) tcodePtr;
  *(Bit32u*)tcodePtr = 0; /* Patched in by DT engine */
  tcodePtr += 4;
  return(tcodePtr);
}

  Bit8u *
dtEmitPushReg(Bit8u *tcodePtr, unsigned reg)
{
  *tcodePtr++ = 0x50 + reg;
  return(tcodePtr);
}
