/*
 * SCPAR.C - parallel support routines
 *
 * Source Version: 2.0
 * Software Release #92-0043
 *
 */

#include "cpyright.h"

#include "score.h"

SC_THREAD_ATTR(_SC_thread_attr);

static int
 _SC_queue_next_index = 0,
 _SC_chunk_wid        = 0,
 *_SC_chunk_indx      = NULL,
 *_SC_cti             = NULL,
 _SC_ctn;

static SC_thread
 *_SC_tpa = NULL;

static SC_thread_key
 _SC_tk;

#ifdef HAVE_THREADS

static int
 SC_thread_work_size,
 SC_next_work_item,
 SC_n_work_started,
 SC_n_work_finished,
 SC_n_work_items;

SC_dynamic_array
 SC_thread_args,
 SC_thread_work;

static void
 **SC_thread_ret;

static SC_THREAD_LOCK(_SC_chunk_lock);
static SC_THREAD_LOCK(_SC_queue_lock);

#endif

SC_thread_state
 *_SC_tsa = NULL;

int
 SC_n_threads = 0,
 SC_comm_rank = 0,
 SC_comm_size = 1;

SC_THREAD_CONDV(SC_work_available_condv);
SC_THREAD_CONDV(SC_work_finished_condv);

SC_THREAD_LOCK(SC_thread_workqueue_lock);


/*--------------------------------------------------------------------------*/

/*                               SMP ROUTINES                               */

/*--------------------------------------------------------------------------*/

/* SC_CURRENT_THREAD - return the id of the current thread */

int SC_current_thread()
   {int tid;

    SC_TID(tid);

    return(tid);}

/*--------------------------------------------------------------------------*/

#ifdef HAVE_THREADS

/*--------------------------------------------------------------------------*/

/* _SC_WORK_THREAD - */

static void _SC_work_thread(x)
   void *x;
   {PFPVoid fnc;
    void *arg;
    int i, t;

    t = *(int *) x;
    SC_SET_KEY(int, _SC_tk, x);

    while (TRUE)
        {SC_LOCKON(SC_thread_workqueue_lock);
	 SC_n_work_finished++;
           
	 if (SC_n_work_started >= SC_n_work_items)
	    SC_n_work_items = 0;

	 if (SC_n_work_started == SC_n_work_finished &&
	     SC_n_work_items   == 0)
	    SC_THREAD_SIGNAL(SC_work_finished_condv);
           
	 while (SC_n_work_items == 0)
	    SC_COND_WAIT(SC_work_available_condv, SC_thread_workqueue_lock);
           
	 if (SC_n_work_started < SC_n_work_items)
	    {fnc = SC_GET_NTH_DYNAMIC(PFPVoid, SC_thread_work, SC_next_work_item);
	     arg = SC_GET_NTH_DYNAMIC(void *, SC_thread_args, SC_next_work_item);
	     i   = SC_next_work_item;
	     SC_next_work_item++;}
	 else
	    fnc = NULL;

	 SC_n_work_started++;
         SC_LOCKOFF(SC_thread_workqueue_lock);

         if (fnc != NULL)
	    {if (SC_thread_ret == NULL)
	        (*fnc)(arg);
             else
                SC_thread_ret[i+1] = (*fnc)(arg);};};

    return;}

/*--------------------------------------------------------------------------*/
/*--------------------------------------------------------------------------*/

/* _SC_INIT_POOL - initialize a pool of NT threads */

static void _SC_init_pool(nt)
   int nt;
   {int i;

    if (nt > 1)
       {SC_thread_work_size = nt;
	SC_n_work_started   = nt;
	SC_n_work_finished  =  0;
	SC_n_work_items     =  0;

	SC_INIT_DYNAMIC_ARRAY(SC_thread_work, PFPVoid, "PFPVoid",
			      SC_thread_work_size);
	SC_INIT_DYNAMIC_ARRAY(SC_thread_args, void *,  "void *",
			      SC_thread_work_size);

	for (i = 1; i <= nt; i++)
	    {_SC_cti[i] = i;
	     SC_THREAD_CREATE(_SC_tpa[i], _SC_thread_attr,
			      _SC_work_thread, _SC_cti[i]);};

/* wait until all threads are initialized before returning */
	SC_LOCKON(SC_thread_workqueue_lock);

	while (SC_n_work_finished < SC_n_work_started)
	   SC_COND_WAIT(SC_work_finished_condv, SC_thread_workqueue_lock);

	SC_LOCKOFF(SC_thread_workqueue_lock);};

    return;}

/*--------------------------------------------------------------------------*/

#endif

/*--------------------------------------------------------------------------*/

/* _SC_ONE_THREAD - execute each of the given functions once
 *
 *                  n: number of entries in the following arguments
 *                fnc: array of functions to execute
 *             arg[i]: argument list (struct *) for fnc[i]
 *                ret: array to hold return values from fnc, there
 *                     needs to be enough space to handle all the
 *                     function invocations (independent of number
 *                     of threads).
 */

static void _SC_one_thread(n, fnc, arg, ret)
   int n;
   void *(*fnc[])();
   void **arg;
   void **ret;
   {int i, icounter;
    void *larg;
    PFPVoid lfnc;

    if (ret == NULL)
       {for (i = 0; i < n; i++)
	    {larg = (arg == NULL) ? NULL : arg[i];
             lfnc = fnc[i];
	     (*lfnc)((void *) larg);};}
    else
       {icounter = 0;
        for (i = 0; i < n; i++)
	    {larg = (arg == NULL) ? NULL : arg[i];
             lfnc = fnc[i];
             ret[++icounter] = (*lfnc)((void *) larg);};};

    if (_SC_cti != NULL)
       SC_SET_KEY(int, _SC_tk, _SC_cti);

    return;}

/*--------------------------------------------------------------------------*/
/*--------------------------------------------------------------------------*/

/* SC_INIT_TPOOL - create a pool of NT threads
 *               - serial execution follows from NT less than 2
 */

void SC_init_tpool(nt, tid)
   int nt;
   PFVoid tid;
   {

    nt = (nt > 1) ? nt : 0;

    SC_init_threads(nt+1, tid);

    _SC_tpa  = FMAKE_N(SC_thread, nt+1, "SC_INIT_TPOOL:threads");
    _SC_tpa[0] = (nt > 1) ? SC_THREAD_SELF() : 0;

# ifdef HAVE_THREADS

    _SC_init_pool(nt);

# endif
 
    return;}

/*--------------------------------------------------------------------------*/
/*--------------------------------------------------------------------------*/

/* _SC_CT - return the id of the current thread */

static void _SC_ct(tid)
   int *tid;
   {int id, *tmp;

    SC_GET_PKEY(int, _SC_tk, tmp);
    if (tmp == NULL)
       {_SC_cti[_SC_ctn] = _SC_ctn;

        SC_SET_KEY(int, _SC_tk, &_SC_cti[_SC_ctn]);

	id = _SC_ctn++;}

    else
       id = *tmp;

    id = max(id, 0);
    id = min(id, SC_n_threads);

    *tid = id;

    return;}

/*--------------------------------------------------------------------------*/
/*--------------------------------------------------------------------------*/

/* SC_INIT_THREADS - initialize SCORE threads */

void SC_init_threads(nt, tid)
   int nt;
   PFVoid tid;
   {SC_thread_state *tmp;
    int i;

    SC_n_threads = nt;

    _SC_tsa = NMAKE_N(SC_thread_state, SC_n_threads,
		      "SC_INIT_THREADS:_SC_tsa");
    for (tmp = _SC_tsa, i = 0; i < SC_n_threads; i++, tmp++)
        tmp->_SC_addr_ = _SC_addr_;

    _SC_cti = NMAKE_N(int, SC_n_threads,  "SC_INIT_THREADS:_SC_cti");
    _SC_cti[0] = 0;

    SC_CREATE_KEY(_SC_tk, NULL);
    SC_SET_KEY(int, _SC_tk, _SC_cti);

    SC_tid_hook = (nt > 1) ? tid : NULL;
    if (SC_tid_hook == NULL)
       {SC_tid_hook = _SC_ct;

        _SC_ctn = 1;}

    return;}

/*--------------------------------------------------------------------------*/
/*--------------------------------------------------------------------------*/

/* SC_DO_THREADS - execute the given function NT times
 *               - serial execution follows from NT equal to 1
 *
 *                  n: number of entries in the following arguments
 *              nt[i]: number of threads that should execute fnc[i]
 *                fnc: array of functions to execute
 *             arg[i]: argument list (struct *) for fnc[i]
 *                 ik: thread key
 *                ret: array to hold return values from fnc, there
 *                     needs to be enough space to handle all the
 *                     function invocations (independent of number
 *                     of threads).
 */

void SC_do_threads(n, nt, fnc, arg, ret)
   int n, *nt;
   void *(*fnc[])();
   void **arg;
   void **ret;
   {

#ifdef HAVE_THREADS

    int i, j;

    for (i = 0; i < n; i++)
        {if (nt[i] > 1)
            break;
         if (i == (n - 1))
	    {_SC_one_thread(n, fnc, arg, ret);
	     return;};};

    SC_LOCKON(SC_thread_workqueue_lock);
    SC_thread_ret = ret;

    SC_N_DYNAMIC(SC_thread_work) = 0;
    SC_N_DYNAMIC(SC_thread_args) = 0;
    SC_n_work_finished = 0;
    SC_n_work_started  = 0;
    SC_n_work_items    = 0;

    for (i = 0; i < n; i++)
        for (j = 0; j < nt[i]; j++)
	    {SC_REMEMBER_DYNAMIC(PFPVoid, fnc[i], SC_thread_work);
	     if (arg != NULL)
	        {SC_REMEMBER_DYNAMIC(void *, arg[i], SC_thread_args);}
	     else
	        {SC_REMEMBER_DYNAMIC(void *, NULL, SC_thread_args);}
	     SC_n_work_items++;};

    SC_next_work_item = 0;

    SC_THREAD_BROADCAST(SC_work_available_condv);

    while (SC_n_work_finished < SC_n_work_items)
       SC_COND_WAIT(SC_work_finished_condv, SC_thread_workqueue_lock);

    SC_LOCKOFF(SC_thread_workqueue_lock); 

#else

    _SC_one_thread(n, fnc, arg, ret);

#endif

    return;}

/*--------------------------------------------------------------------------*/
/*--------------------------------------------------------------------------*/

/* SC_CHUNK_LOOP - do up the threads in a controlled diagnosable way */

void SC_chunk_loop(fnc, mn, mx, serial, argl)
   PFPByte fnc;
   int mn, mx;
   int serial;
   byte *argl;
   {int i, j, dj, n1, n2, np, nt;
    int nmn, nmx;
    int *it;
    void **ret;
    static int debug = FALSE;

    _SC_chunk_wid = 0;

    nt = (serial) ? 1 : SC_n_threads;
    nt = max(nt, 1);
    np = nt + 1;

    ret            = FMAKE_N(void *, np, "SC_CHUNK_LOOP:ret");
    _SC_chunk_indx = FMAKE_N(int, np, "SC_CHUNK_LOOP:ret");

/* compute the index range each thread is to do */
    dj = (mx - mn)/nt;

    _SC_chunk_indx[0] = mn;
    for (j = 1; j < nt; j++)
        _SC_chunk_indx[j] = _SC_chunk_indx[j-1] + dj;

    _SC_chunk_indx[j] = mx;

/* have the threads do their pieces */
    SC_do_threads(1, &nt, &fnc, &argl, ret);

/* check that each piece has been done */
    nmn = INT_MAX;
    nmx = -INT_MAX;
    for (i = 1; i < np; i++)
        {it = (int *) ret[i];
	 n1 = *it++;
	 n2 = *it++;

	 nmn = min(nmn, n1);
	 nmx = max(nmx, n2);

	 if (debug)
	    PRINT(stdout, "%3d\t%d-%d\n", i, n1, n2);};

    if ((nmn != mn) || (nmx != mx))
       PRINT(stdout,
	     "MISSED CHUNK IN PARALLEL LOOP - SC_CHUNK_LOOP");

    SFREE(ret);
    SFREE(_SC_chunk_indx);

    return;}

/*--------------------------------------------------------------------------*/
/*--------------------------------------------------------------------------*/

/* SC_CHUNK_SPLIT - return the chunk of the loop for the current thread */

void SC_chunk_split(pmn, pmx, prv)
   int *pmn, *pmx;
   void **prv;
   {int *pc;

    SC_LOCKON(_SC_chunk_lock);

    _SC_chunk_wid = (_SC_chunk_wid < SC_n_threads) ? _SC_chunk_wid + 1 : 1;

    pc = _SC_chunk_indx + _SC_chunk_wid - 1;

    SC_LOCKOFF(_SC_chunk_lock);

    *pmn = pc[0];
    *pmx = pc[1];
    *prv = pc;

    return;}

/*--------------------------------------------------------------------------*/
/*--------------------------------------------------------------------------*/

/* SC_QUEUE_NEXT_ITEM - return index for next item in queue */

int SC_queue_next_item(ng)
   int ng;
   {int ig;

    SC_LOCKON(_SC_queue_lock);

/* mark the sequence as ended */
    if (_SC_queue_next_index >= ng)
       ig = -1;

/* pop the next index in the sequence */
    else
       ig = _SC_queue_next_index++;

    SC_LOCKOFF(_SC_queue_lock);

    return(ig);}

/*--------------------------------------------------------------------------*/
/*--------------------------------------------------------------------------*/

/* SC_QUEUE_WORK - execute the given function FNC on NT threads
 *               - this is to process queues of work
 *               - serial execution follows from NT equal to 1
 */

void SC_queue_work(fnc, serial, argl)
   PFPVoid fnc;
   int serial;
   byte *argl;
   {int nt;

    _SC_queue_next_index = 0;

    nt = (serial) ? 1 : SC_n_threads;
    nt = max(nt, 1);

    SC_do_threads(1, &nt, &fnc, &argl, NULL);

    return;}

/*--------------------------------------------------------------------------*/

/*                         FORTRAN API ROUTINES                             */

/*--------------------------------------------------------------------------*/

FIXNUM F77_ID(scinth_, scinth, SCINTH)(pnt, tid)
   FIXNUM *pnt;
   PFVoid tid;
   {int nt;

    nt = *pnt;

    SC_init_threads(nt, tid);

    return((FIXNUM) TRUE);}

/*--------------------------------------------------------------------------*/
/*--------------------------------------------------------------------------*/
