#include <ncbi.h>
#include <accentr.h>
#include <gather.h>
#include <tofasta.h>
#include <urkepi.h>

#define TOP_ERROR 1
static char _this_module[] = "epi";
#undef  THIS_MODULE
#define THIS_MODULE _this_module
static char _this_file[] = __FILE__;
#undef  THIS_FILE
#define THIS_FILE _this_file

typedef struct gather_Prot_Bioseq
{
  Int4      gi;
  BioseqPtr bsp;
} Gather_PBS, PNTR Gather_PBSPtr;

Args myargs[] =
{
  { "protein GI", "0", "0", "4000000", TRUE,
    'g', ARG_INT, 0.0, 0, NULL},
  { "FastA file", NULL, NULL, NULL, TRUE,
    'f', ARG_STRING, 0.0, 0, NULL },
  { "epi window", "16", "6", "64", TRUE,
    'w', ARG_INT, 0.0, 0, NULL},
  { "percent cutoff", "5.0", "0.0", "100.0", TRUE,
    'c', ARG_FLOAT, 0.0, 0, NULL},
  { "linker", "5", "0", "32", TRUE,
    'l', ARG_INT, 0.0, 0, NULL},
  { "high pass filter", "FALSE", "FALSE", "TRUE", TRUE,
    'p', ARG_BOOLEAN, 0.0, 0, NULL },
  { "X-out output for blast", "FALSE", "FALSE", "TRUE", TRUE,
    'x', ARG_BOOLEAN, 0.0, 0, NULL },
  { "numeric output", "FALSE", "FALSE", "TRUE", TRUE,
    'n', ARG_BOOLEAN, 0.0, 0, NULL }
};

static Boolean GetProteinBioseq (GatherContextPtr gcp)
{
  Gather_PBSPtr  gpbsp;
  BioseqPtr      bsp;
  Int4           gi, entrezgi;

  if (gcp == NULL)
    return FALSE;
  if ((gpbsp = (Gather_PBSPtr) gcp->userdata) == NULL)
    return FALSE;

  if (gpbsp->bsp != NULL)
    return TRUE;
  if (gcp->thistype != OBJ_BIOSEQ)
    return TRUE;
  if ((bsp = (BioseqPtr) (gcp->thisitem)) == NULL)
    return TRUE;

  gi = gpbsp->gi;
  if (gi > 0)
  {
    entrezgi = GetGIForSeqId (bsp->id);
    if (gi == entrezgi)
      gpbsp->bsp = bsp;
    return TRUE;
  }
  else
  {
    gpbsp->bsp = bsp;
    return TRUE;
  }
}

Int2 Main ()
{
  Int2        argcount;
  Boolean     flagHaveNet;

  Int4        gi;
  SeqEntryPtr sep;
  EpiDatPtr   epip;
  Int4        i;
  SeqLocPtr   slp, slpn;
  SeqPortPtr  spp;
  Int4        start, stop;
  Uint1Ptr    sequence;

  FILE        *fiop;
  Char        fastafile[256], title[256];
  ValNodePtr  desc;

  static GatherScope  gs;
  GatherScopePtr      gsp;
  static Gather_PBS   gpbs;
  Gather_PBSPtr       gpbsp;

  argcount = sizeof (myargs) / sizeof (Args);
  if (!GetArgs ("Epi", argcount, myargs))
    return 1;

  gsp = &gs;
  gpbsp = &gpbs;

  MemSet ((Pointer) gsp, 0, sizeof (GatherScope));
  MemSet ((Pointer) gsp->ignore, (int) (TRUE),
          (size_t) (OBJ_MAX * sizeof (Boolean)));
  gsp->ignore[OBJ_BIOSEQ] = FALSE;

  gpbsp->bsp = NULL;

  if (myargs[0].intvalue == 0 && myargs[1].strvalue == NULL)
  {
    ErrPostEx (SEV_ERROR, TOP_ERROR, 100,
               "No gi or FastA file given :: for help :   epi -");
    ErrShow ();
    exit (1);
  }

  gi = myargs[0].intvalue;
  if (myargs[1].strvalue != NULL)
    StrCpy (fastafile, myargs[1].strvalue);
  else
    fastafile[0] = '\0';

  if (gi > 0)
  {
    if (!EntrezInit ("Epi", FALSE, &flagHaveNet))
    {
      ErrPostEx (SEV_ERROR, TOP_ERROR, 102,
                 "Entrez init failed");
      ErrShow ();
      exit (1);
    }
  }

  if (gi > 0)
  {
    sep = EntrezSeqEntryGet (gi, SEQENTRY_READ_BIOSEQ);
  }
  else
  {
    if ((fiop = FileOpen (fastafile, "r")) == NULL)
    {
      ErrPostEx (SEV_ERROR, TOP_ERROR, 103,
                 "Failed to open FastA file");
      ErrShow ();
      exit (1);
    }
    sep = FastaToSeqEntry (fiop, FALSE);
  }

  if (sep == NULL)
  {
    ErrPostEx (SEV_ERROR, TOP_ERROR, 104,
               "No seqentry found");
    ErrShow ();
    exit (1);
  }

  while (sep != NULL)
  {
    gpbsp->gi = gi;
    GatherSeqEntry (sep, (Pointer) gpbsp, GetProteinBioseq,
                         (Pointer) gsp);

    if (gpbsp->bsp != NULL)
    {
      if (ISA_aa (gpbsp->bsp->mol))
      {
        epip = EpiDatNew ();
        epip->window = myargs[2].intvalue;
        epip->percentcut = myargs[3].floatvalue;
        epip->linker = myargs[4].intvalue;
        epip->score = PredictEpiBioseq (gpbsp->bsp, 0, gpbsp->bsp->length-1,
                                        epip);
        if (epip->score != NULL)
        {
/* raw scores
          for (i = 0; i < gpbsp->bsp->length; i++)
            printf ("%lf\n", (double) epip->score[i]);
*/
          slpn = slp = FilterEpi (epip, gpbsp->bsp->length, gpbsp->bsp->id,
                                  (Boolean) myargs[5].intvalue);
          if (myargs[7].intvalue == TRUE)
          {
            slp = slpn;
            while (slp != NULL)
            {
              start = SeqLocStart (slp) + 1;
              stop = SeqLocStop (slp) + 1;
              printf ("%9ld %9ld\n", (long) start, (long) stop);
              slp = slp->next;
            }
          }
          else
          {
            sequence = (Uint1Ptr) MemNew ((size_t) (sizeof (Uint1) *
                                                    gpbsp->bsp->length+1));
            spp = SeqPortNew (gpbsp->bsp, 0, gpbsp->bsp->length-1, 0,
                              Seq_code_iupacna);
            SeqPortSeek (spp, 0, SEEK_SET);

            i = 0;
            while ((sequence[i] = SeqPortGetResidue (spp)) != SEQPORT_EOF)
            {
              if (('a' <= (Char) sequence[i] && (Char) sequence[i] <= 'z') ||
                  ('A' <= (Char) sequence[i] && (Char) sequence[i] <= 'Z'))
                i++;
            }
            sequence[i] = 0;

            i = 0;
            while (sequence[i] != 0)
            {
              sequence[i] = (Uint1) TO_UPPER ((Char) sequence[i]);
              i++;
            }

            slp = slpn;
            while (slp != NULL)
            {
              start = SeqLocStart (slp);
              stop = SeqLocStop (slp);
              for (i = start; i <= stop; i++)
              {
                if (myargs[6].intvalue == TRUE)
                  sequence[i] = (Uint1) 'x';
                else
                  sequence[i] = (Uint1) TO_LOWER ((Char) sequence[i]);
              }
              slp = slp->next;
            }

            if (gi > 0)
              sprintf (title, ">%ld\n", (long) gi);
            else
              sprintf (title, ">%s\n", fastafile);

            desc = gpbsp->bsp->descr;
            while (desc != NULL)
            {
              if (desc->choice == Seq_descr_title)
              {
                sprintf (title, ">%s\n", (CharPtr) desc->data.ptrvalue);
                break;
              }
              desc = desc->next;
            }

            printf ("%s", title);
            i = 0;
            while (sequence[i] != 0)
            {
              printf ("%c", (Char) sequence[i]);
              i++;
              if (i % 50 == 0)
              {
                if (myargs[6].intvalue == TRUE)
                  printf ("\n");
                else
                  printf (" %8ld\n", (long) i);
              }
            }
            if (i % 50 != 0)
              printf ("\n");

            SeqPortFree (spp);
            MemFree (sequence);
          }
          slp = slpn;
          while (slp != NULL)
          {
            slpn = slp->next;
            SeqLocFree (slp);
            slp = slpn;
          }
        }
        else
        {
          ErrPostEx (SEV_ERROR, TOP_ERROR, 107,
                     "No epi predictions made");
          ErrShow ();
          exit (1);
        }
        EpiDatFree (epip);
      }
      else
      {
        ErrPostEx (SEV_ERROR, TOP_ERROR, 106,
                   "Not a protein bioseq");
        ErrShow ();
        exit (1);
      }
    }
    else
    {
      ErrPostEx (SEV_ERROR, TOP_ERROR, 105,
                 "No bioseq found");
      ErrShow ();
      exit (1);
    }
    sep = SeqEntryFree (sep);
    if (gi <= 0)
    {
      sep = FastaToSeqEntry (fiop, FALSE);
      gpbsp->bsp = NULL;
    }
  }

  if (gi > 0)
    EntrezFini ();
  else
    FileClose (fiop);

  return 0;
}
