/*
 * $COPYRIGHT$
 *
 *	$Id: xmpi_sys_snapshot.lam.cc,v 1.6 2000/11/01 22:35:59 bbarrett Exp $
 *
 *	Function:	- get LAM state info. for all MPI processes
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include <app_mgmt.h>
#include <blktype.h>
#include <bfreq.h>
#include <events.h>
#include <mpisys.h>
#include <net.h>
#include <portable.h>
#include <preq.h>
#include <terror.h>
#include <typical.h>
#include <t_types.h>

#include "xmpi.h"
#include "xmpi_misc.h"
#include "xmpi_ctl.h"

/*
 * global functions
 */
int xmpi_sys_snapshot(struct _gps*, int, struct xmproc*);



/*
 * local functions
 */
static void getstateinfo(struct _gps*, int, struct xmproc*, int);

static void getmesginfo(struct _gps*, int, struct xmproc *, int);

static int addmesg(struct xmproc*, int);

static int settables(int);

static void setprocinfo(struct xmproc*, struct pstate*);

static void unpack_fyi(struct _fyiproc*, int*, int*, int*, int*, 
		       int*, int*, int*, int*, int*, int*, int*, int*);

/*
 * local macros and defines
 */
#define	global_rank(p)	((p) >> 16)
#define local_rank(p)	((p) & 0xffff)

/*
 * local variables
 */
static int nentries = 0;	       /* # table entries */

static int bfsize = 0;		       /* size of msg table */

static int bfsmax = XMPI_VWBFSMAX;     /* max size of msg table */

static struct pstate *proctbl = 0;     /* process state table */

static struct bfstatus *msgtbl = 0;    /* message state table */

static char buf[128];		       /* formatting buffer */

/*
 *	xmpi_sys_snapshot
 *
 *	Function:	- gets state information on processes
 *			- abort if LAM error
 *	Accepts:	- GPS array
 *			- # of processes
 *			- XMPI process array
 */
int
xmpi_sys_snapshot(struct _gps *app, int app_n, 
		  struct xmproc *procs)
{
  int i;			       /* favourite index */

  struct xmproc *p;		       /* favourite pointer */

  xmpi_busy();
/*
 * Set the process and message tables.
 */
  if (settables(app_n))
    xmpi_fail((char*) "xmpi (xmpi_sys_snap)");
/*
 * Reset the processes state
 */
  for (i = 0, p = procs; i < app_n; ++i, ++p)
    p->xmp_state = XMPI_SUNDEF;
/*
 * Loop over the processes skipping the initialized ones.
 */
  for (i = 0, p = procs; i < app_n; ++i, ++p) {

    if (p->xmp_state == XMPI_SUNDEF) {

      sprintf(buf, "Probing node n%d", app[i].gps_node);
      xmpi_ctl_setinfo(buf);
/*
 * Get LAM state information for the node of the current process.
 */
      getstateinfo(app, app_n, procs, i);
/*
 * Get message information for all processes on that node.
 */
      getmesginfo(app, app_n, procs, i);
    }
  }

  xmpi_ctl_resetinfo();
  xmpi_unbusy();

  return (0);
}

/*
 *	getstateinfo
 *
 *	Function:	- get process state info for a node
 *			- abort if LAM error
 *	Accepts:	- GPS array
 *			- # processes
 *			- XMPI process array
 *			- process index (global rank)
 */
static void
getstateinfo(struct _gps *app, int app_n, 
	     struct xmproc *parray, int index)
{
  int np;			       /* # processes on node */

  int i, j;			       /* favourite indices */

  struct pstate *pstat;		       /* pointer to state table */

  struct _gps *pproc;		       /* process pointer */

  struct _gps *p;		       /* favourite pointer */

/*
 * Get LAM information.
 */
  pproc = app + index;

  np = rpstate(pproc->gps_node, SELECT_MPI,
	       INT4_NIL, proctbl, (int4) nentries);
  if (np < 0)
    xmpi_fail((char*) "xmpi (getstateinfo)");
/*
 * Loop over remaining MPI processes skipping those on a different node.
 */
  for (i = index, p = pproc; i < app_n; ++i, ++p) {

    if (p->gps_node != pproc->gps_node)
      continue;
/*
 * Find the process in the tables.
 */
    for (j = 0, pstat = proctbl; j < np; ++j, ++pstat) {
/*
 * Fill the proces state information.
 */
      if (pstat->ps_pid == p->gps_pid) {
	setprocinfo(parray + i, pstat);
	break;
      }
    }
  }
}

/*
 *	getmesginfo
 *
 *	Function:	- get message info for a node
 *			- abort if LAM error
 *	Accepts:	- GPS array
 *			- # processes
 *			- XMPI process array
 *			- process index
 */
static void
getmesginfo(struct _gps *app, int app_n, struct xmproc *parray, int index)
{
  struct xmproc *pproc;		       /* process pointer */

  struct xmproc *p;		       /* favourite pointer */

  int i, j;			       /* favourite indices */

  int size;			       /* # of return blocks */

  int limit;			       /* # of blocks to print */

  int tag;			       /* message tag */

  int cid;			       /* message cid */

  int ignore;			       /* ignored return parameter */

  struct bfstatus *bp;		       /* buffered message */

  struct bfparms parms;		       /* buffer parameters */

  struct _fyimsg *fyi;		       /* message FYI */

/*
 * Get message information.
 */
  pproc = parray + index;

  size = rbfstate(app[index].gps_node, msgtbl, bfsmax, &parms);
  if (size < 0)
    xmpi_fail((char*) "xmpi (getmesginfo)");

  limit = (size < bfsmax) ? size : bfsmax;
/*
 * Loop over remaining MPI processes skipping those on a different node.
 */
  for (i = index, p = pproc; i < app_n; ++i, ++p) {

    if (app[i].gps_node != app[index].gps_node)
      continue;
/*
 * Delete all previous messages.
 */
    if (p->xmp_msgs) {
      al_free(p->xmp_msgs);
      p->xmp_msgs = 0;
      p->xmp_nmsg = 0;
      p->xmp_curmsg = 0;
    }
    p->xmp_more = 0;
/*
 * Collect all messages for that process.
 */
    for (j = 0, bp = msgtbl; j < limit; ++j, ++bp) {

      if ((bp->bfs_flags & KSYNCSQL)
	  && !(bp->bfs_flags & N2ND)
	  && !(bp->bfs_event & 0x10000)) {

	fyi = (struct _fyimsg *) & bp->bfs_data[4];

	if (global_rank(fyi->fym_dest) != i)
	  continue;

	lam_unsyncsql(bp->bfs_event, bp->bfs_type, &tag,
		      &cid, &ignore, &ignore, &ignore, &ignore);
/*
 * Skip collective call messages and GER messages.
 */
	if ((cid < 0) || (tag == MPI_GER_TAG))
	  continue;

	if (addmesg(p, j))
	  xmpi_fail((char*) "xmpi (getmesginfo)");
      }
    }
/*
 * Add "more messages" entry if needed.
 */
    if (size > bfsmax) {

      if (addmesg(p, -1))
	xmpi_fail((char*)"xmpi (getmesginfo)");

      p->xmp_more = 1;
    }
/*
 * Reset the message pointer.
 */
    if (p->xmp_msgs)
      p->xmp_curmsg = (xmmsg*) al_top(p->xmp_msgs);
  }
}

/*
 *	addmesg
 *
 *	Function:	- add message to process entry
 *	Accepts:	- ptr to process
 *			- message index (or -1 for "more messages")
 *			- message length
 *	Returns:	- 0 or LAMERROR
 */
static int
addmesg(struct xmproc *pproc, int msgidx)
{
  struct xmmsg newelem;		       /* new list element */

  struct xmmsg *p;		       /* favourite pointer */

  struct bfstatus *pmsg;	       /* ptr to message entry */

  int cid;			       /* context ID */

  int tag;			       /* tag */

  int lsrc;			       /* local source rank */

  int gsrc;			       /* global source rank */

  int dtype;			       /* datatype */

  int ack;			       /* message ack field */

  int dest_idx;			       /* destination index */

  int src_node;			       /* source node */

  int src_idx;			       /* source process index */

  struct _fyimsg *fyi;		       /* message FYI */

/*
 * Create the message list if needed.
 */
  if (pproc->xmp_msgs == 0) {
    pproc->xmp_msgs = al_init((int4) sizeof(struct xmmsg), 0);
    if (pproc->xmp_msgs == 0)
      return (LAMERROR);
  }
/*
 * Locate a previous message of the same type.
 * If found, increment its count.
 */
  if (msgidx >= 0) {
    pmsg = msgtbl + msgidx;
    fyi = (struct _fyimsg *) & pmsg->bfs_data[4];

    lam_unsyncsql(pmsg->bfs_event, pmsg->bfs_type,
		  &tag, &cid, &ack, &dest_idx, &src_node, &src_idx);

    cid = lam_coll2pt(cid);

    gsrc = global_rank(fyi->fym_src);
    lsrc = local_rank(fyi->fym_src);
    dtype = fyi->fym_dtpid >> 16;

    p = (xmmsg*) al_top(pproc->xmp_msgs);
    while (p) {
      if ((p->xmm_cid == cid) && (p->xmm_tag == tag)
	  && (p->xmm_gsrc == gsrc)
	  && (p->xmm_cnt == fyi->fym_count)
	  && (p->xmm_dtype == dtype)) {

	++(p->xmm_nmsg);
	++(pproc->xmp_nmsg);
	return (0);
      }
      p = (xmmsg*) al_next(pproc->xmp_msgs, p);
    }
  }
/*
 * No previous message found (or it's "more messages"), add a new entry.
 */
  if (msgidx < 0) {
    newelem.xmm_nmsg = -1;
  } else {
    ++(pproc->xmp_nmsg);
    newelem.xmm_nmsg = 1;
    newelem.xmm_cid = cid;
    newelem.xmm_tag = tag;
    newelem.xmm_gsrc = gsrc;
    newelem.xmm_lsrc = lsrc;
    newelem.xmm_cnt = fyi->fym_count;
    newelem.xmm_dtype = dtype;
  }

  if (al_append(pproc->xmp_msgs, (char *) &newelem) == 0)
    return (LAMERROR);

  return (0);
}

/*
 *	settables
 *
 *	Function:	- allocate/expand process & message tables
 *	Accepts:	- # of processes
 *	Returns:	- 0 or LAMERROR
 */
static int
settables(int nprocs)
{
/*
 * Handle the process tables.
 */
  if (proctbl == 0) {

    proctbl = (struct pstate *)
      malloc((unsigned) nprocs * sizeof(struct pstate));
    if (proctbl == 0)
      return (LAMERROR);
  } else if (nprocs > nentries) {

    proctbl = (struct pstate *) realloc((char *) proctbl,
			      (unsigned) nprocs * sizeof(struct pstate));
    if (proctbl == 0)
      return (LAMERROR);
  }
  nentries = nprocs;
/*
 * Handle the message table.
 */
  if (msgtbl == 0) {
    bfsize = bfsmax;

    msgtbl = (struct bfstatus *)
      malloc((unsigned) bfsize * sizeof(struct bfstatus));
    if (msgtbl == 0)
      return (LAMERROR);
  } else if (bfsmax > bfsize) {
    bfsize = bfsmax;

    msgtbl = (struct bfstatus *) realloc((char *) msgtbl,
			    (unsigned) bfsize * sizeof(struct bfstatus));
    if (msgtbl == 0)
      return (LAMERROR);
  }
  return (0);
}

/*
 *	setprocinfo
 *
 *	Function:	- fill a processes entry from its state
 *	Accepts:	- ptr to process
 *			- ptr to process state
 */
static void
setprocinfo(struct xmproc *pproc, struct pstate *pstate)
{
  struct _fyiproc *fyi;		       /* ptr process FYI  */

  int psglobal;			       /* process global rank */

  int peernode;			       /* peer node */

  int peeridx;			       /* peer process index */

  int rootnode;			       /* root node */

  int rootidx;			       /* root process index */

  pproc->xmp_state = ((pstate->ps_kernel.ks_state == 0)
		      || (pstate->ps_kernel.ks_state == 3))
    ? XMPI_SRUN : XMPI_SBLOCK;

  fyi = (struct _fyiproc *) pstate->ps_kernel.ks_fyi;
  mttoli4((int4 *) fyi, sizeof(struct _fyiproc) / sizeof(int4));

  unpack_fyi(fyi, &psglobal, &pproc->xmp_lrank, &peernode, &peeridx,
	     &pproc->xmp_gpeer, &pproc->xmp_lpeer,
	     &rootnode, &rootidx, &pproc->xmp_groot, &pproc->xmp_lroot,
	     &pproc->xmp_cid, &pproc->xmp_tag);

  pproc->xmp_func = fyi->fyp_func;
  pproc->xmp_wfunc = 0;
  pproc->xmp_cnt = fyi->fyp_count;
  pproc->xmp_dtype = fyi->fyp_dtype;
}

/*
 *	unpack_fyi
 *
 *	Function:	- unpack process FYI into constituent elements
 *	Accepts:	- ptr process FYI
 *			- ptrs to locations to unpack fyi into
 */
static void
unpack_fyi(struct _fyiproc *fyi, int *psglobal, int *pslocal, 
	   int *peernode, int *peeridx, int *peerglobal, 
	   int *peerlocal, int *rootnode, int *rootidx, 
	   int *rootglobal, int *rootlocal, int *cid, int *tag)
{
  *psglobal = fyi->fyp_me >> 16;
  *pslocal = fyi->fyp_me & 0xFFFF;

  *peernode = fyi->fyp_peergps >> 16;
  *peeridx = fyi->fyp_peergps & 0xFFFF;
  *peerglobal = fyi->fyp_peer >> 16;
  *peerlocal = fyi->fyp_peer & 0xFFFF;

  *rootnode = fyi->fyp_rootgps >> 16;
  *rootidx = fyi->fyp_rootgps & 0xFFFF;
  *rootglobal = fyi->fyp_root >> 16;
  *rootlocal = fyi->fyp_root & 0xFFFF;

  *cid = fyi->fyp_cidtag >> 16;
  *tag = fyi->fyp_cidtag & 0xFFFF;
/*
 * Sign extend.
 */
  if (*cid & 0x8000)
    *cid |= 0xFFFF0000;
  if (*tag & 0x8000)
    *tag |= 0xFFFF0000;

  if (*peerglobal & 0x8000)
    *peerglobal |= 0xFFFF0000;
  if (*peerlocal & 0x8000)
    *peerlocal |= 0xFFFF0000;
}
