/*
 *  plex86: run multiple x86 operating systems concurrently
 *  Copyright (C) 1999-2001 Kevin P. Lawton
 *
 *  mode-nexus.c: deals with which mode to run the guest CPU in,
 *    and mapping the monitor segments according to those modes.
 *    (features available to either space)
 *
 *  This library is free software; you can redistribute it and/or
 *  modify it under the terms of the GNU Lesser General Public
 *  License as published by the Free Software Foundation; either
 *  version 2 of the License, or (at your option) any later version.
 *
 *  This library is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 *  Lesser General Public License for more details.
 *
 *  You should have received a copy of the GNU Lesser General Public
 *  License along with this library; if not, write to the Free Software
 *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
 */


#include "plex86.h"
#define IN_NEXUS_SPACE
#include "monitor.h"



const selector_t nullSelector = { raw: 0 };
const descriptor_t nullDescriptor = {
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  };


//static selector_t getFreeGDTSlot(vm_t *vm, unsigned slot, unsigned rpl);


/* Given the values already in vm->guest_cpu, and the passed 'eflags',
 * map the monitor and guest into the VM.
 */

  unsigned
mapMonitor(vm_t *vm, Bit32u eflags, unsigned remapHints)
{
  selector_t monCsSel, monSsSel, monTssSel, monSsHackSel;
  Bit32u laddr, base;
  unsigned slot, sreg;
  guest_context_t *guestContext;
  nexus_t *nexus;
  descriptor_t *gdt;

// xxx mapMonitor assumes guest_cpu holds all current selector
// xxx and descriptor info.  We should have some checks here?

  /* For convenience, some pointers */
  guestContext = vm->addr->guest_context;
  nexus        = vm->addr->nexus;
  gdt          = vm->addr->gdt;

  /* Eflags: The application oriented flags are stored in
   *   guest_context->eflags, to be used directly by the guest.
   *   Some of the system oriented flags are manipulated by the
   *   monitor for virtualization.  In those case, the actual
   *   guest values are stored in guest_cpu.veflags.
   */
  vm->guest_cpu.veflags.raw = 0;
  guestContext->eflags.raw = FLG_IF | 0x02;
  write_eflags(vm, eflags, ~0);

  /* Set the actual EFLAGS.VM flag according to the mode the
   * guest is monitor in.
   */
  //if (GetMonMode(vm) == MonModeVM)
  //  guestContext->eflags.raw |= FLG_VM;
  //else
  guestContext->eflags.raw &= ~FLG_VM;

/* +++ should zero out GDT, so prev entries do not remain */
/* +++ do that in unmap_mon */

  /*
   * Virtualized guest data selectors and shadow cache descriptors
   */

#warning "unify with similar code in mon-fault.c"

  /* =========================
   * Map in Monitor structures
   * =========================
   */

#if 0
  /* Find GDT slots for monitor system segments. */
  if ( (GetGuestMode(vm) == GuestModeRM) ||
       (GetGuestMode(vm) == GuestModeVM) ||
       vm->vOpcodeMap ) {
    /* If the guest is in RM or VM, or we need to use SIV then we
     * can simply put monitor system segments at the beginning of
     * the GDT, because we use modified selectors/descriptors and
     * just put them right after the monitor system segments.
     */
#endif
    monCsSel.raw     = Selector(1, 0, RPL0);
    monSsSel.raw     = Selector(2, 0, RPL0);
    monTssSel.raw    = Selector(3, 0, RPL0);
    monSsHackSel.raw = Selector(10, 0, RPL0); /* not really used */
#if 0
    }
  else {
    /* Guest is in PM and we are running code natively.  This means
     * the guest descriptors have to be placed in the descriptor tables
     * where the guest selectors expect them.  Thus, we need to look
     * at the guest selectors to find some open slots in the GDT which
     * do not cause conflict with the extra monitor segments.
     */
    monCsSel      = getFreeGDTSlot(vm, 1, RPL0);
    monSsSel      = getFreeGDTSlot(vm, monCsSel.fields.index + 1, RPL0);
    monTssSel     = getFreeGDTSlot(vm, monSsSel.fields.index + 1, RPL0);
    monSsHackSel  = getFreeGDTSlot(vm, monTssSel.fields.index + 1, RPL0);
    }
#endif

  /* Search for unused PDE for nexus PT  (fixed for now) */
  laddr = 0x70000000;
  vm->mon_pde_mask = laddr & 0xffc00000;
  vm->mon_pdi      = laddr >> 22;
  base = MON_BASE_FROM_LADDR(laddr);

  /* Map nexus into monitor/guest address space */
  vm->addr->page_dir[laddr >> 22] = vm->host.nexus_pde;

  /* Monitor segments (code/data/TSS).  Put at fixed GDT location for now */
  SET_DESCRIPTOR(gdt[monCsSel.fields.index], base, 0xfffff, 
                 D_PG, D_D32, D_AVL0, D_PRESENT, D_DPL0, D_CODE | D_READ)
  SET_DESCRIPTOR(gdt[monSsSel.fields.index], base, 0xfffff, 
                 D_PG, D_D32, D_AVL0, D_PRESENT, D_DPL0, D_DATA | D_WRITE)
  SET_DESCRIPTOR(gdt[monTssSel.fields.index],
                 base + (Bit32u) vm->guest.addr.tss,
                 sizeof(tss_t)-1,
                 D_BG, 0, D_AVL0, D_PRESENT, D_DPL0, D_TSS)
  /* SS hack for returning to 16bit stacks */
  gdt[monSsHackSel.fields.index] =
      gdt[monSsSel.fields.index];
  nexus->SSNormal = monSsSel.raw;
  nexus->SS16BitSSHack = monSsHackSel.raw;


  /* ==========================
   * Guest selector/descriptors
   * ==========================
   */
#warning "Is the GDT being cleared of old values?"

#if 0
  if ( GetMonMode(vm) == MonModeVM ) {
    /* Guest VM/RM code running is monitored in VM mode.  Use guest
     * selectors as-is.  No SIV is used.  There are no guest segments
     * used - the descriptor caches are loaded according to VM
     * semantics.
     */

    guestContext->es = vm->guest_cpu.selector[SRegES].raw;
    guestContext->cs = vm->guest_cpu.selector[SRegCS].raw;
    guestContext->ss = vm->guest_cpu.selector[SRegSS].raw;
    guestContext->ds = vm->guest_cpu.selector[SRegDS].raw;
    guestContext->fs = vm->guest_cpu.selector[SRegFS].raw;
    guestContext->gs = vm->guest_cpu.selector[SRegGS].raw;
    }
  else if ( vm->vOpcodeMap ) {
    /* In any case that we are using SIV, we use a descriptor for each
     * guest segment descriptor cache.  These are not placed at the
     * location in the GDT where the guest expects them, so we can
     * just use the next set of GDT slots.
     *
     * CS/GS:
     *   Used for tcode and tcode (ring3) handlers, and
     *   are virtualized.  Thus, CS/GS selector reads/writes and segment
     *   prefixes are monitored.
     *
     * ES/SS/DS/FS:
     *   descriptors: A descriptor is created/maintained which
     *     represents the state of the shadow cache for each segment.
     *   selectors: Select the virtualized descriptors.
     *     Selector reads/writes are monitored, because they are modified.
     *   Prefixes: OK - segments can be used for memory access natively.
     */

    /* Since the guest doesn't need any segments, we can just use
     * some static ones, right after the monitor system segments.
     *
     * Note that due to previous transitions from PM, there may be
     * some descriptors loaded with invalid descriptor caches
     * (like from loading a NULL selector etc).  So we have to
     * check - if a descriptor cache is invalid, set the selector
     * to NULL.  Otherwise, an exception will be generated when we
     * attempt to return back to the guest.
     */
    if ( !vm->guest_cpu.desc_cache[SRegES].valid )
      guestContext->es = nullSelector.raw;
    else
      guestContext->es = Selector(4+SRegES, 0, RPL3);

    if ( !vm->guest_cpu.desc_cache[SRegSS].valid )
      guestContext->ss = nullSelector.raw;
    else
      guestContext->ss = Selector(4+SRegSS, 0, RPL3);

    if ( !vm->guest_cpu.desc_cache[SRegDS].valid )
      guestContext->ds = nullSelector.raw;
    else
      guestContext->ds = Selector(4+SRegDS, 0, RPL3);

    if ( !vm->guest_cpu.desc_cache[SRegFS].valid )
      guestContext->fs = nullSelector.raw;
    else
      guestContext->fs = Selector(4+SRegFS, 0, RPL3);

    /* Set the virtualized descriptors.  These represent the
     * contents of the shadow cache descriptors of each of the guest
     * segment registers.  Thus a separate one is needed, one
     * for each guest descriptor shadow cache.  For now, these
     * are store at fixed locations in the GDT.  To virtualize,
     * just copy guest descriptor shadow caches to the GDT entries,
     * and deprivilege the DPL to 3.
     */
    for (sreg=0; sreg<6; sreg++) {
      /* CS/GS are handled separatedly - they are used for tcode */
      if ( (sreg==SRegCS) || (sreg==SRegGS) )
        continue;
      if (vm->guest_cpu.desc_cache[sreg].valid) {
        gdt[4+sreg] = vm->guest_cpu.desc_cache[sreg].desc;
        gdt[4+sreg].dpl = 3;
        }
      else {
        gdt[4+sreg] = nullDescriptor;
        }
      }
    /* The tcode CS/GS segments use the same values as the monitor
     * code/stack==data segments, with DPL=3.
     */
    guestContext->cs = Selector(4+SRegCS, 0, RPL3);
    guestContext->gs = Selector(4+SRegGS, 0, RPL3);
    gdt[4+SRegCS] = gdt[monCsSel.fields.index];
    gdt[4+SRegCS].dpl = 3;
    gdt[4+SRegGS] = gdt[monSsSel.fields.index];
    gdt[4+SRegGS].dpl = 3;
    }

  else {
#warning "Is the GDT being cleared of old values?"
    monprint(vm, "mapMon: code for vm->vOpcodeMap==0 unfinished.\n");
    return 0;

    /* PM guest, SIV==0.  We place the guest segments where the
     * guest expects them.  The code above makes sure that the monitor
     * system segments do not conflict with these descriptor table slots.
     */
    guestContext->es = vm->guest_cpu.selector[SRegES].raw;
    guestContext->ss = vm->guest_cpu.selector[SRegSS].raw;
    guestContext->ds = vm->guest_cpu.selector[SRegDS].raw;
    guestContext->fs = vm->guest_cpu.selector[SRegFS].raw;
    guestContext->cs = vm->guest_cpu.selector[SRegCS].raw;
    guestContext->gs = vm->guest_cpu.selector[SRegGS].raw;

    /* Initialize guest descriptors in monitor descriptor tables */
    for (sreg=0; sreg<6; sreg++) {
      slot = vm->guest_cpu.selector[sreg].fields.index;
// xxx What about NULL selectors here?
      if ( (slot > 512) || vm->guest_cpu.selector[sreg].fields.ti ) {
        monprint(vm, "mapMon: guest selector OOB\n");
        return 0;
        }
      if (vm->guest_cpu.desc_cache[sreg].valid) {
        gdt[slot] = vm->guest_cpu.desc_cache[sreg].desc;
        gdt[slot].dpl = 3;
        }
      else {
        gdt[slot] = nullDescriptor;
        }
      }
    }
#endif

// xxx Fix this, preGuest() needs to see that the guest was running
// xxx in ring3 or it freaks when first called.
guestContext->cs = Selector(4+SRegCS, 0, RPL3);

  /* All guest segments are now synchronized with their virtualized
   * counterparts */
  vm->segmentUpdated = 0;


  /* Fix up the selectors of all IDT entries */
  for ( slot = 0; slot < 256; slot++ )
      vm->addr->idt[slot].selector = monCsSel;

  /* The monitor GDT/IDT loading info */
  nexus->mon_gdt_info.base  = base + (Bit32u) vm->guest.addr.gdt;
  nexus->mon_gdt_info.limit = MON_GDT_SIZE;
  nexus->mon_idt_info.base  = base + (Bit32u) vm->guest.addr.idt;
  nexus->mon_idt_info.limit = MON_IDT_SIZE;

  /* We don't have a monitor LDT for now */
  nexus->mon_ldt_sel = 0;

  /* The monitor TSS */
  nexus->mon_tss_sel = monTssSel.raw;
#warning "deal with esp0 according to monitor mode"
  //if ( GetMonMode(vm) == MonModeVM ) {
  //  vm->addr->tss->esp0 =
  //    ((Bit32u)vm->guest.addr.nexus) + PAGESIZE;
  //  }
  //else {
    vm->addr->tss->esp0 =
      ((Bit32u)vm->guest.addr.nexus) + PAGESIZE - sizeof(v86_sregs_t);
  //  }
  vm->addr->tss->ss0  = monSsSel.raw;

  /* Monitor code and stack segments */
  nexus->mon_jmp_info.selector   = monCsSel.raw;
  nexus->mon_stack_info.selector = monSsSel.raw;

  /* Monitor PDBR */
#warning "Monitor CRx hacks"
  nexus->mon_cr0 = 0x80010033; /* PG/WP/NE/ET/MP/PE */
  nexus->mon_cr3 = vm->pages.page_dir << 12;
  nexus->mon_cr4 = 0x00000004; /* TSD=1 */

  /* Monitor code/data segment base */
  nexus->mon_base = base;

  vm->modeChange |= ModeChangeRequestMapVSegs;
  return(1);
}

#if 0
  selector_t
getFreeGDTSlot(vm_t *vm, unsigned slot, unsigned rpl)
{
  /* start looking at GDT slot number 'slot'. */
  unsigned sreg;
  selector_t sel;

  for (; slot<512; slot++) { /* search descriptor table slots */
    for (sreg=0; sreg<6; sreg++) { /* compare to guest selectors */
      if (vm->guest_cpu.selector[sreg].fields.index == slot)
        break; /* this slot conflicts with guest selector */
      }
    if (sreg>=6) {
      /* No guest selector found to use this slot, so we can use
       * it for a monitor segment.
       */
      sel.raw = Selector(slot, 0, rpl);
      return( sel );
      }
    }
  if (slot>=512) {
    monprint(vm, "getFreeGDTSlot: slot OOB\n");
    }
  sel.raw = 0; /* Error */
  return(sel);
}
#endif

#if 0
  unsigned
isV86MCompatible(vm_t *vm)
{
  Bit32u desc_raw_dword1, bits, compatValue;
  unsigned sreg;
  descriptor_cache_t *cache;
 
  for (sreg=0; sreg<6; sreg++) {
    cache = &vm->guest_cpu.desc_cache[sreg];
    if ( !(vm->descriptorInEmu & (1<<sreg)) )
      monprint(vm, "isV86MCompatible: desc not synced\n");
    if ( !(vm->selectorInEmu & (1<<sreg)) )
      monprint(vm, "isV86MCompatible: sel not synced\n");
    if (cache->valid==0)
      return 0;
    if (cache->limit_scaled != 0xffff)
      return 0;
    /* Base must = selector << 4, or else a reload of the guest
     * segment will not yield the proper base */
    if ( cache->base != (vm->guest_cpu.selector[sreg].raw << 4) )
      return 0;
 
    /* Get the high dword of the descriptor */
    desc_raw_dword1 = ((Bit32u *) &cache->desc)[1];
 
    /* Make sure that important fields are compatible with RM */
#define RMCompatMask  0x00c0fe00 /* G/D_B/P/DPL/S/Type */
#define RMCompatValueData 0x00009200
#define RMCompatValueCode 0x00009A00
    bits = desc_raw_dword1 & RMCompatMask;
    if (sreg == SRegCS)
      compatValue = RMCompatValueCode;
    else
      compatValue = RMCompatValueData;
    if ( bits != compatValue ) {
      monprint(vm, "isV86MCompatible: not compat, sreg=%u, bits=0x%x\n",
               sreg, bits);
      }
    }
 
  return 1;
}
#endif

#if 0
  unsigned
isPMR3NativeCompatible(vm_t *vm)
{
  return( 0 );
// xxx isPMR3NativeCompatible() not complete.
// xxx Need to convert this to NEXUS space compliant code.
// xxx Was in mode-mod.c before.

  unsigned sreg;

  /* Condition: Guest CPL must equal privilege level that it
   *   is monitored at.  Otherwise, the RPL of segment selectors would
   *   be incorrect and the difference readable by unmonitored guest code.
   *   The calling function should check that the guest is running in
   *   protected mode ring3 before calling.
   */
  VM_ASSERT(vm, G_GetCPL(vm)==3);
  VM_ASSERT(vm, vm->guest_cpu.cr0.fields.pe);
  VM_ASSERT(vm, G_GetVM(vm)==0);

  /* ===================================================================
   * Condition: Guest EFLAGS.IOPL < 3.  This way we can set the monitored
   * IOPL value to the expected guest value, without allowing IOPL
   * sensitive instructions to execute unwantedly.  For example, look at
   * the STI/CLI instructions.  If we let IOPL be 3 (and we know the guest
   * is running at ring3 at this point), then STI/CLI instructions will
   * execute successfully out of control of the VM monitor.  Additionally,
   * IN/OUT instructions would do the same.  As long as we constrain that
   * IOPL < 3, we can set the monitored IOPL value as expected by the
   * guest, thus allowing PUSHF to push expected values.
   */
  if (G_GetIOPL(vm) == 3) {
    InstrNonSIVFail(InstrNonSIVFailIOPL);
    return 0; /* Not compliant */
    }

  /* ===================================================================
   * Condition: Guest EFLAGS.IF must be 1.  If we execute the guest
   * with IF==0, then control may never return to the monitor.  We
   * need to make sure that the next hardware interrupt is received
   * promptly by the monitor and redirected to the host OS.
   */
  if (G_GetIF(vm)==0) {
    InstrNonSIVFail(InstrNonSIVFailIF);
    return 0; /* Not compliant */
    }

  /* ===================================================================
   * Condition: Guest EFLAGS.{VIF,VIP} must be 0 (for now).  I haven't
   * looked into these flags much yet.  So for now, mandate they must
   * be zero.
   */
  if (G_GetVIP(vm) || G_GetVIF(vm)) {
    InstrNonSIVFail(InstrNonSIVFailVIPVIF);
    return 0; /* Not compliant */
    }

  /* ===================================================================
   * Condition: All non-null guest selectors must point to a valid ring3
   * segment descriptor.  Otherwise, the guest selector restore / IRET
   * sequence in the monitor will fail.
   *
   * Condition: The current descriptor cache values must be consistent
   * with the actual descriptor table entries.  Otherwise, the guest
   * selector restore / IRET sequence in the monitor will load values
   * into the segments which are inconsistent with values that the
   * guest has loaded into them.
   */
  if ( (vm->descriptorInEmu & 0x3f) != 0x3f )
    monpanic(vm, "nonSIVCC: dInEmu=0x%x\n", vm->descriptorInEmu);
  if ( (vm->selectorInEmu & 0x3f) != 0x3f )
    monpanic(vm, "nonSIVCC: sInEmu=0x%x\n", vm->selectorInEmu);

  /* CS and SS selectors must be non-NULL */
  if ( IsNullSelector(vm->guest_cpu.selector[SRegCS]) ) {
    InstrNonSIVFail(InstrNonSIVFailSRegCS);
    return 0; /* Not compliant */
    }
  if ( IsNullSelector(vm->guest_cpu.selector[SRegSS]) ) {
    InstrNonSIVFail(InstrNonSIVFailSRegSS);
    return 0; /* Not compliant */
    }
  for (sreg=0; sreg<6; sreg++) {
    selector_t selector;

    selector = vm->guest_cpu.selector[sreg];
    if ( !IsNullSelector(selector) ) {
      Bit32u descriptor_laddr;
      if ( !vm->guest_cpu.desc_cache[sreg].valid ) {
        InstrNonSIVFail(InstrNonSIVFailSReg + sreg);
        return 0; /* Not compliant */
        }
      /* We have a non-null selector, and it's shadow descriptor cache
       * is marked valid.  Now fetch the corresponding guest descriptor
       * table entry.  This entry must be equivalent to the shadow cache
       * values for our assumptions to work.
       */
      if (selector.fields.ti == 0) { /* GDT */
        if ( (selector.raw | 0x0007) > vm->guest_cpu.gdtr.limit ) {
          InstrNonSIVFail(InstrNonSIVFailGDT);
          return 0; /* Not compliant */
          }
        descriptor_laddr = vm->guest_cpu.gdtr.base +
                           (selector.raw & 0xfff8);
        }
      else { /* LDT */
        if (vm->guest_cpu.ldtr_cache.valid == 0) {
          InstrNonSIVFail(InstrNonSIVFailLDT);
          return 0; /* Not compliant */
          }
        if ( (selector.raw | 0x0007) >
              vm->guest_cpu.ldtr_cache.limit_scaled ) {
          InstrNonSIVFail(InstrNonSIVFailLDT);
          return 0; /* Not compliant */
          }
        descriptor_laddr = vm->guest_cpu.ldtr_cache.base +
                           (selector.raw & 0xfff8);
        }

      if (vm->guest_cpu.cr0.fields.pg) {
        monpanic(vm, "nonSIVCC: PG==1 not coded yet.\n");
        }
      else {
        unsigned char *guest_page;
        Bit32u guest_paddr_index;
        descriptor_t *descriptor_p;

        if ( (descriptor_laddr & 0xfff) > 0xffa ) {
          monpanic(vm, "nonSIVCC: PG==0 descriptor crosses page.\n");
          }
        guest_paddr_index = A20Addr(vm, descriptor_laddr) >> 12;
        if (guest_paddr_index >= vm->pages.guest_n_pages)
          monpanic(vm, "nonSIVCC: paddr_i OOB\n");
        guest_page = open_guest_phy_page(vm, guest_paddr_index,
                                         vm->guest.addr.tmp_phy_page0);
        descriptor_p = (descriptor_t *)
          (guest_page + (descriptor_laddr & 0xfff));

        if ( ((Bit32u *) descriptor_p)[0] !=
             ((Bit32u *) &vm->guest_cpu.desc_cache[sreg].desc)[0] ) {
failSReg:
          monprint(vm, "guest DT %x:%x\n",
            ((Bit32u *) descriptor_p)[0], ((Bit32u *) descriptor_p)[1]);
          monpanic(vm, "cache    %x:%x\n",
            ((Bit32u *) &vm->guest_cpu.desc_cache[sreg].desc)[0],
            ((Bit32u *) &vm->guest_cpu.desc_cache[sreg].desc)[1]);
          InstrNonSIVFail(InstrNonSIVFailSReg + sreg);
          return 0; /* Not compliant */
          }
        if ( ((Bit32u *) descriptor_p)[1] !=
             ((Bit32u *) &vm->guest_cpu.desc_cache[sreg].desc)[1] ) {
          goto failSReg;
          }
        }
      }
    } /* for (...) */

  /* Notes:
   *  - A20 considerations?
   *  - user code access to GDT/LDT/IDT pages?
   */

#if 0
From mon-paging.c:
    /* For guest code running with SIV on, it doesn't matter if */
    /* guest accesses a structure such as GDT, IDT, LDT, since we */
    /* virtualize segment loads and update the A bit immediately. */
    if (!vm->vOpcodeMap) {
      monpanic(vm, "...: siv==0\n");
      /* SIV off should only happen when running ring3 guest code, */
      /* which ordinarily doesn't have access to the GDT, IDT, LDT. */
      /* If we add this as a condition before turning off SIV, then */
      /* we don't need to have a check here. */
      }
#endif

  InstrNonSIVPass();
  return 1;
}
#endif
