/*
    Theseus - maximum likelihood superpositioning of macromolecular structures

    Copyright (C) 2004-2009 Douglas L. Theobald

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the:

    Free Software Foundation, Inc.,
    59 Temple Place, Suite 330,
    Boston, MA  02111-1307  USA

    -/_|:|_|_\-
*/

#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <math.h>
#include <float.h>

#include "CovMat.h"
#include "DLTutils.h"
#include "DLTmath.h"
#include "Error.h"
#include "PCAstats.h"
#include "pdbIO.h"
#include "pdbMalloc.h"
#include "Coords.h"
#include "PDBCoords.h"
#include "pdbStats.h"
#include "pdbUtils.h"
#include "lapack_dlt.h"


#if defined(__APPLE__)
    #include <CoreServices/CoreServices.h>
    #include <time.h>
    #include <sys/time.h>
    #include <sys/resource.h>
    #include <sys/types.h>
    #include <mach/mach_time.h>

    static double           start_time, end_time;
#endif


void
CalcPCA(CoordsArray *cdsA)
{
    int                      i, j;
    long int                 vlen = (long int) cdsA->vlen;
    long int                 upper, lower, pcanum;
    double                 **CovMat = cdsA->CovMat;
    double                   sum, runsum;
    PDBCoords               *pdbave;
    char                     pcafile_name[256], numstring[5];
    FILE                    *pcavecs_fp, *pcastats_fp;
    double                   biggest, bstick;
    char                     aster;

    #if defined(__APPLE__)
        double               milliseconds;
        start_time = seconds();
    #endif

    pdbave = cdsA->pdbA->avecoords;
    pcanum = cdsA->algo->pca;

    if (pcanum > cdsA->vlen)
        pcanum = cdsA->algo->pca = cdsA->vlen;

    if (pcanum > cdsA->cnum - 1)
        pcanum = cdsA->algo->pca = cdsA->cnum - 1;

    PrintCovMatGnuPlot((const double **) CovMat, vlen, mystrcat(cdsA->algo->rootname, "_cov.mat"));

    /* convert it to a correlation matrix */
    if (cdsA->algo->cormat == 1)
    {
        CovMat2CorMat(CovMat, vlen);
        PrintCovMatGnuPlot((const double **) CovMat, vlen, mystrcat(cdsA->algo->rootname, "_cor.mat"));
    }

//    write_C_mat((const double **) CovMat, vlen, 8, 0);

    /* find the total variance */
    sum = 0.0;
    for (i = 0; i < vlen; ++i)
       sum += CovMat[i][i];

    lower = vlen - pcanum + 1; /* careful -- inclusive indices */
    upper = vlen - 0;
    cdsA->pcamat = MatInit(pcanum, vlen);
    cdsA->pcavals = malloc(vlen * sizeof(double));

    /* LAPACK DSYEVR() computes selected eigenvalues, and optionally, eigenvectors of a
       real symmetric matrix.  Find all eigenvalues (w[]) and eigenvectors (mat[][]).
       The pcanum eigenvalues are in the first pcanum elements of the w[] vector,
       ordered smallest to biggest.  Weird and horrible, but true (and in fact
       makes sense if you think about it). */
    dsyevr_dest(CovMat, vlen, lower, upper, cdsA->pcavals, cdsA->pcamat, 1e-8);

    #if defined(__APPLE__)
        end_time = seconds();
        milliseconds = (double) (end_time - start_time) / 0.001;
        printf("    PCs calculated in %.3f ms\n", milliseconds);
    #endif

    pcavecs_fp = myfopen(mystrcat(cdsA->algo->rootname, "_pcvecs.txt"), "w");
    pcastats_fp = myfopen(mystrcat(cdsA->algo->rootname, "_pcstats.txt"), "w");

    if (pcavecs_fp == NULL || pcastats_fp == NULL)
    {
        fprintf(stderr, "\n  ERROR: Could not open PCA files \n");
        PrintTheseusTag();
        exit(EXIT_FAILURE);
    }

    runsum = 0.0;
    fprintf(pcastats_fp, "eigenv   bstick     raw    raw_%%  cumul_%% \n");
    for (i = pcanum - 1; i >= 0; --i)
    {
        bstick = 0.0;
        for (j = vlen - i; j <= vlen; ++j)
            bstick += (1.0 / (double) j);

        if (bstick < cdsA->pcavals[i] && cdsA->pcavals[i] > 1.0)
            aster = '*';
        else
            aster = ' ';

        runsum += cdsA->pcavals[i];
        fprintf(pcastats_fp, "%-6li %8.3f %8.3f %8.3f %8.3f %c\n",
                pcanum - i, bstick, cdsA->pcavals[i],
                cdsA->pcavals[i] * 100.0 / sum, runsum * 100.0 / sum, aster);
    }
    fputc('\n', pcastats_fp);

    fprintf(pcavecs_fp, "atom ");
    for (j = 1; j <= pcanum; ++j)
        fprintf(pcavecs_fp, "     %3d ", j);
    fputc('\n', pcavecs_fp);

    for (i = 0; i < vlen; ++i)
    {
        fprintf(pcavecs_fp, "%-4d ", i+1);

        for (j = 1; j <= pcanum; ++j)
            fprintf(pcavecs_fp, "%8.3f ", sqrt(cdsA->pcavals[pcanum - j]) * cdsA->pcamat[pcanum - j][i]);

        fputc('\n', pcavecs_fp);
    }

    CopyCoords2PDB(pdbave, cdsA->avecoords);

    for (i = 0; i < pcanum; ++i)
    {
        /* find largest absolute value in the eigenvector PCA */
        biggest = -DBL_MAX;
        for (j = 0; j < vlen; ++j)
            if (biggest < fabs(cdsA->pcamat[pcanum - 1 - i][j]))
                biggest = fabs(cdsA->pcamat[pcanum - 1 - i][j]);

        /* rescale (for rasmol really) so that the largest eigenvalue component
           is = 99.99, i.e. the largest value allowable in the b-value column 
           of a PDB file */
        /* biggest = 1.0; */
        for (j = 0; j < vlen; ++j)
            cdsA->pdbA->avecoords->tempFactor[j] = cdsA->pcamat[pcanum - 1 - i][j] * (99.99 / biggest);

        strcpy(pcafile_name, mystrcat(cdsA->algo->rootname, "_pc"));
        //pcafile_name[11] = '\0';
        strncat(pcafile_name, itoa(i+1, &numstring[0], 10), 5);
        strncat(pcafile_name, "_ave.pdb", 8);

        WriteAvePDBCoordsFile(cdsA->pdbA, pcafile_name);
    }

    for (i = 0; i < vlen; ++i)
        for (j = 0; j < pcanum; ++j)
            cdsA->pcamat[j][i] *= sqrt(cdsA->pcavals[j]);

    if (pcanum == vlen)
        PrintCovMatGnuPlot((const double **) cdsA->pcamat, vlen, mystrcat(cdsA->algo->rootname, "_pcvecs.mat"));

    fclose(pcastats_fp);
    fclose(pcavecs_fp);
}


void
Calc3NPCA(CoordsArray *cdsA)
{
    int                      i, j;
    long int                 vlen = (long int) 3 * cdsA->vlen;
    double                 **mat = NULL;
    long int                 upper, lower, pcanum;
    double                 **evecs = NULL, *evals = NULL;
    double                   sum, runsum;
    PDBCoords               *pdbave = NULL;
    char                     pcafile_name[256], numstring[5];
    FILE                    *pcavecs_fp = NULL, *pcastats_fp = NULL;
    double                   biggest, bstick;
    char                     aster;

    #if defined(__APPLE__)
        double               milliseconds;
        start_time = seconds();
    #endif

    pdbave = cdsA->pdbA->avecoords;
    mat = MatInit(vlen, vlen);

    if (cdsA->algo->pca > cdsA->cnum - 1)
        pcanum = cdsA->algo->pca = cdsA->cnum - 1;
    else
        pcanum = cdsA->algo->pca;

    /* copy over the covariance matrix */
    memcpy(&mat[0][0], &cdsA->FullCovMat[0][0], vlen * vlen * sizeof(double));
    /* MatPrint(cdsA->FullCovMat, vlen); */
    /* fflush(NULL); */

    /* convert it to a correlation matrix */
    if (cdsA->algo->cormat == 1)
        CovMat2CorMat(mat, vlen);

    /* find the total variance */
    sum = 0.0;
    for (i = 0; i < vlen; ++i)
       sum += mat[i][i];

    lower = vlen - pcanum + 1; /* careful -- inclusive indices */
    upper = vlen - 0;
    evecs = MatInit(pcanum, vlen);
    evals = malloc(vlen * sizeof(double));

    /* LAPACK DSYEVR() computes selected eigenvalues, and optionally, eigenvectors of a
       real symmetric matrix.  Find all eigenvalues (w[]) and eigenvectors (mat[][]).
       The pcanum eigenvalues are in the first pcanum elements of the w[] vector,
       ordered smallest to biggest.  Weird and horrible, but true (and in fact
       makes sense if you think about it). */
    dsyevr_opt_dest(mat, vlen, lower, upper, evals, evecs, 1e-8);
    PrintCovMatGnuPlot((const double **) evecs, vlen, "evecs.mat");

    #if defined(__APPLE__)
        end_time = seconds();
        milliseconds = (double) (end_time - start_time) / 0.001;
        printf("  PCs calculated in %.3f ms (Apple CoreServices)\n", milliseconds);
    #endif

    pcavecs_fp = fopen("pcavecs.txt", "w");
    pcastats_fp = fopen("pcastats.txt", "w");
    if (pcavecs_fp == NULL || pcastats_fp == NULL)
    {
        fprintf(stderr, "\n  ERROR1000: Could not open PCA files \n");
        PrintTheseusTag();
        exit(EXIT_FAILURE);
    }

    runsum = 0.0;
    fprintf(pcastats_fp, "eigenv   bstick     raw    raw_%%  cumul_%% \n");
    for (i = pcanum - 1; i >= 0; --i)
    {
        bstick = 0.0;
        for (j = vlen - i; j <= vlen; ++j)
            bstick += (1.0 / (double) j);

        if (bstick < evals[i] && evals[i] > 1.0)
            aster = '*';
        else
            aster = ' ';

        runsum += evals[i];
        fprintf(pcastats_fp, "%-6li %8.3f %8.3f %8.3f %8.3f %c\n",
                pcanum - i, bstick, evals[i], evals[i] * 100.0 / sum, runsum * 100.0 / sum, aster);
    }
    fputc('\n', pcastats_fp);

    fprintf(pcavecs_fp, "atom ");
    for (j = 1; j <= pcanum; ++j)
        fprintf(pcavecs_fp, "     %3d ", j);
    fputc('\n', pcavecs_fp);

    for (i = 0; i < vlen; ++i)
    {
        fprintf(pcavecs_fp, "%-4d ", i+1);

        for (j = 1; j <= pcanum; ++j)
            fprintf(pcavecs_fp, "%8.3f ", sqrt(evals[pcanum - j]) * evecs[pcanum - j][i]);

        fputc('\n', pcavecs_fp);
    }

    CopyCoords2PDB(pdbave, cdsA->avecoords);

    for (i = 0; i < pcanum; ++i)
    {
        /* find largest absolute value in the eigenvector PCA */
        biggest = -DBL_MAX;
        for (j = 0; j < vlen; ++j)
            if (biggest < fabs(evecs[pcanum - 1 - i][j]))
                biggest = fabs(evecs[pcanum - 1 - i][j]);

        /* rescale (for rasmol really) so that the largest eigenvalue component
           is = 99.99, i.e. the largest value allowable in the b-value column 
           of a PDB file */
        for (j = 0; j < vlen; ++j)
            cdsA->pdbA->avecoords->tempFactor[j] = evecs[pcanum - 1 - i][j] * (99.99 / biggest);

        strncpy(pcafile_name, mystrcat(cdsA->algo->rootname, "_pc"), 11);
        pcafile_name[11] = '\0';
        strncat(pcafile_name, itoa(i+1, &numstring[0], 10), 5);

        WriteAvePDBCoordsFile(cdsA->pdbA, pcafile_name);
    }

    cdsA->pcamat = evecs; /* DLT debug -- this should be copied or dealt with better */
    cdsA->pcavals = evals;

    fclose(pcastats_fp);
    fclose(pcavecs_fp);

    MatDestroy(&mat);
}


void
CalcStructPCA(CoordsArray *cdsA)
{
    int                      i, j;
    long int                 cnum = (long int) cdsA->cnum;
    double                 **mat = MatInit(cnum, cnum);
    long int                 info = 0;
    char                     jobz_v = 'V';
    char                     uplo_u = 'U';
    long int                 lwork, negone = -1;
    double                  *work = NULL, *w = NULL;
    double                   sum, runsum;
    FILE                    *pcavecs_fp = NULL, *pcastats_fp = NULL;

    if (cdsA->SCovMat == NULL)
        cdsA->SCovMat = MatInit(cnum, cnum);

    CalcStructCovMat(cdsA);

    /* copy over the covariance matrix */
    memcpy(&mat[0][0], &cdsA->SCovMat[0][0], cnum * cnum * sizeof(double));

/*     MatPrint(cdsA->SCovMat, cnum); */
/*     fflush(NULL); */

    /* convert it to a correlation matrix */
    if (cdsA->algo->cormat == 1)
        CovMat2CorMat(mat, cnum);

    /* find the total variance */
    sum = 0.0;
    for (i = 0; i < cnum; ++i)
       sum += mat[i][i];

    w = (double *) malloc(cnum * sizeof(double));
    work = (double *) malloc(sizeof(double));

    /* first DSYEVR() call lwork and liwork = -1 which
       makes dsyevr calculate optimum workspace given
       the other params */
    DSYEV(&jobz_v, &uplo_u, &cnum, &mat[0][0], &cnum, w, work, &negone, &info);

    lwork = work[0];
    free(work);
    work = (double *) malloc(lwork * sizeof(double));

    DSYEV(&jobz_v, &uplo_u, &cnum, &mat[0][0], &cnum, w, work, &lwork, &info);

    if (info > 0)
    {
        fprintf(stderr, "\n  ERROR117: LAPACK DSYEV() complete eigenvalue/eigenvector solving choked; \n");
        fprintf(stderr, "            the algorithm failed to converge; %ld \n", info);
        fprintf(stderr, "            off-diagonal elements of an intermediate tridiagonal \n");
        fprintf(stderr, "            form did not converge to zero \n");
        PrintTheseusTag();
        exit(EXIT_FAILURE);
    }
    else if (info < 0)
    {

        fprintf(stderr, "\n  ERROR118: the %ld-th argument had an illegal value \n", info);
        PrintTheseusTag();
        exit(EXIT_FAILURE);
    }

    pcavecs_fp = myfopen(mystrcat(cdsA->algo->rootname, "_struct_pcvecs.txt"), "w");
    pcastats_fp = myfopen(mystrcat(cdsA->algo->rootname, "_struct_pcstats.txt"), "w");
    if (pcavecs_fp == NULL || pcastats_fp == NULL)
    {
        fprintf(stderr, "\n  ERROR: Could not open PCA files \n");
        PrintTheseusTag();
        exit(EXIT_FAILURE);
    }

    runsum = 0.0;
    fprintf(pcastats_fp, "eigenv   raw    raw_%%  cumul_%% \n");
    for (i = cnum - 1; i >= 0; --i)
    {
        runsum += w[i];
        fprintf(pcastats_fp, "%-6li %8.3f %8.3f %8.3f\n",
                cnum - i, w[i], w[i] * 100.0 / sum, runsum * 100.0 / sum);
    }
    fputc('\n', pcastats_fp);

    fprintf(pcavecs_fp, "model ");
    for (j = 1; j <= cnum; ++j)
        fprintf(pcavecs_fp, "    %3d ", j);
    fputc('\n', pcavecs_fp);

    for (i = 0; i < cnum; ++i)
    {
        fprintf(pcavecs_fp, "%-4d ", i+1);

        for (j = 1; j <= cnum; ++j)
            fprintf(pcavecs_fp, "%8.3f ", sqrt(w[cnum - j]) * mat[cnum - j][i]);

        fputc('\n', pcavecs_fp);
    }

    cdsA->modpcamat = mat;
    cdsA->modpcavals = w;

    fclose(pcastats_fp);
    fclose(pcavecs_fp);
    free(work);
}


void
WritePCAFile(PDBCoordsArray *parray, CoordsArray *cdsA, const char *outfile_root)
{
    FILE           *pdbfile = NULL;
    char            pcafile_name[256];
    int             i, j, k, m;
    double          biggest = -DBL_MAX;
    const double  **mat = (const double **) cdsA->pcamat;
    const int       cvlen = cdsA->vlen;
    const int       pvlen = parray->vlen;
    double          tempFactor;
    char            numstring[5];
    char            covcor_str[16] = "correlation";

    /* find largest absolute value in the eigenvector PCA */
    for (i = 0; i < cdsA->algo->pca; ++i)
    {
        biggest = -DBL_MAX;
        for (j = 0; j < cvlen; ++j)
        {
            /* printf("\n%3d %3d % f", i, j, mat[i][j]); */
            if (biggest < fabs(mat[cdsA->algo->pca - 1 - i][j]))
                biggest = fabs(mat[cdsA->algo->pca - 1 - i][j]);
        }
/*     } */
        biggest = 99.99 / biggest;

/*     for (i = 0; i < cdsA->algo->pca; ++i) */
/*     { */
        /* rescale (for rasmol really) so that the largest eigenvector component
           is = 99.99, i.e. the largest value allowable in the b-value column 
           of a PDB file */
       if (cdsA->algo->atoms == 0)
       {
            m = 0;
            for (j = 0; j < cvlen; ++j)
            {
                /* skip inital PDBCoords that may have been selected out */ /* DLT debug fix */
                while (/* strncmp(cdsA->coords[0]->resName[j], parray->coords[0]->resName[m], 3) != 0 || */
                       cdsA->coords[0]->chainID[j] != parray->coords[0]->chainID[m] ||
                       cdsA->coords[0]->resSeq[j] != parray->coords[0]->resSeq[m])
                {
                    ++m;

                    if (m >= pvlen)
                        break;
                }

                /* while they match, set the B-factor according to the given PC */
                while (/* strncmp(cdsA->coords[0]->resName[j], parray->coords[0]->resName[m], 3) == 0 && */
                       cdsA->coords[0]->chainID[j] == parray->coords[0]->chainID[m] &&
                       cdsA->coords[0]->resSeq[j] == parray->coords[0]->resSeq[m])
                {
                    tempFactor = mat[cdsA->algo->pca - 1 - i][j] * biggest;
                    /* printf("\n%4d %4d % f", j, m, tempFactor); */
                    for (k = 0; k < parray->cnum; ++k)
                        parray->coords[k]->tempFactor[m] = tempFactor;

                    ++m;

                    if (m >= pvlen)
                        break;
                }

                if (m >= pvlen)
                    break;
            }
        }
        else
        {
            for (j = 0; j < cvlen; ++j)
            {
                tempFactor = mat[cdsA->algo->pca - 1 - i][j] * biggest;

                for (k = 0; k < parray->cnum; ++k)
                    parray->coords[k]->tempFactor[j] = tempFactor;
            }
        }

        strncpy(pcafile_name, outfile_root, strlen(outfile_root));
        pcafile_name[strlen(outfile_root)] = '\0';
        strncat(pcafile_name, "_pc", 4);
        strncat(pcafile_name, itoa(i+1, &numstring[0], 10), 5);
        strcat(pcafile_name, ".pdb");

        pdbfile = fopen(pcafile_name, "w");
        if (pdbfile ==NULL)
        {
            perror("\n  ERROR");
            fprintf(stderr,
                    "\n  ERROR99: could not open file '%s' for writing. \n", pcafile_name);
            PrintTheseusTag();
            exit(EXIT_FAILURE);
        }

        if (cdsA->algo->cormat == 0)
            strncpy(covcor_str, "covariance", 10);

        fprintf(pdbfile, "REMARK ===-===-===-===-===-===-===-===-===-===-===-===-===-===-===-===-===-===-=\n");
        fprintf(pdbfile, "REMARK + File made by Douglas Theobald's THESEUS program\n");
        fprintf(pdbfile, "REMARK + Multiple maximum likelihood superpositioning\n");
        fprintf(pdbfile, "REMARK + Principal component %3d of %s matrix in B-factor column\n", i+1, covcor_str);
        fprintf(pdbfile, "REMARK + All B-factors scaled by %12.3f\n", biggest);
        fprintf(pdbfile, "REMARK + dtheobald@brandeis.edu\n");
        fprintf(pdbfile, "REMARK =-===-===-===-===-===-===-===-===-===-===-===-===-===-===-===-===-===-===\n");

        for (j = 0; j < parray->cnum; ++j)
        {
            fprintf(pdbfile, "MODEL %8d\n", j+1);
            PrintPDBCoords(pdbfile, parray->coords[j]);
            fprintf(pdbfile, "ENDMDL\n");
        }
        fprintf(pdbfile, "END\n");

        fclose(pdbfile);
    }

    PCADestroy(cdsA);
}


void
WriteModelPCAFile(PDBCoordsArray *parray, CoordsArray *cdsA, const char *outfile_root)
{
    FILE           *pdbfile = NULL;
    char            pcafile_name[256];
    int             i, j, k;
    double          biggest = -DBL_MAX;
    const double  **mat = (const double **) cdsA->modpcamat;
    const int       pvlen = parray->vlen;
    const int       cnum = cdsA->cnum;
    double          tempFactor;
    char            numstring[5];
    char            covcor_str[16] = "correlation";
    const int       pcanum = cdsA->cnum;

    /* find largest absolute value in the eigenvector PCA */
    for (i = 0; i < pcanum; ++i)
    {
        biggest = -DBL_MAX;
        for (j = 0; j < cnum; ++j)
        {
            if (biggest < fabs(mat[pcanum - 1 - i][j]))
                biggest = fabs(mat[pcanum - 1 - i][j]);
        }

        biggest = 99.99 / biggest;

        /* rescale (for rasmol really) so that the largest eigenvector component
           is = 99.99, i.e. the largest value allowable in the b-value column 
           of a PDB file */
        for (j = 0; j < cnum; ++j)
        {
            tempFactor = mat[pcanum - 1 - i][j] * biggest;

            for (k = 0; k < pvlen; ++k)
                parray->coords[j]->tempFactor[k] = tempFactor;
        }

        strncpy(pcafile_name, outfile_root, strlen(outfile_root));
        pcafile_name[strlen(outfile_root)] = '\0';
        strncat(pcafile_name, "_mod_pca", 8);
        strncat(pcafile_name, itoa(i+1, &numstring[0], 10), 5);
        strcat(pcafile_name, ".pdb");

        pdbfile = fopen(pcafile_name, "w");
        if (pdbfile ==NULL)
        {
            perror("\n  ERROR");
            fprintf(stderr,
                    "\n  ERROR99: could not open file '%s' for writing. \n\n", pcafile_name);
            PrintTheseusTag();
            exit(EXIT_FAILURE);
        }

        if (cdsA->algo->cormat == 0)
            strncpy(covcor_str, "covariance", 10);

        fprintf(pdbfile, "REMARK ===-===-===-===-===-===-===-===-===-===-===-===-===-===-===-===-===-===-=\n");
        fprintf(pdbfile, "REMARK + File made by Douglas Theobald's THESEUS program\n");
        fprintf(pdbfile, "REMARK + Multiple maximum likelihood superpositioning\n");
        fprintf(pdbfile, "REMARK + Principal component %3d of %s matrix in B-factor column\n", i+1, covcor_str);
        fprintf(pdbfile, "REMARK + All B-factors scaled by %12.3f\n", biggest);
        fprintf(pdbfile, "REMARK + dtheobald@brandeis.edu\n");
        fprintf(pdbfile, "REMARK =-===-===-===-===-===-===-===-===-===-===-===-===-===-===-===-===-===-===\n");

        for (j = 0; j < parray->cnum; ++j)
        {
            fprintf(pdbfile, "MODEL %8d\n", j+1);
            PrintPDBCoords(pdbfile, parray->coords[j]);
            fprintf(pdbfile, "ENDMDL\n");
        }
        fprintf(pdbfile, "END\n");

        fclose(pdbfile);
    }

    PCADestroy(cdsA);
}


/* Writes out PDB format files with the PC eigenvector in the coordinate field.
   Used by CNS/XPLOR in my morph script to incrementally add the PC to the
   median PDB model. */
void
WritePCAMorphFile(PDBCoordsArray *parray, CoordsArray *cdsA, const char *outfile_root)
{
    FILE           *pdbfile = NULL;
    char            pcafile_name[256];
    int             i, j, m, pcanum;
    double        **vecs = cdsA->pcamat;
    double         *vals = cdsA->pcavals;
    const int       cvlen = 3 * cdsA->vlen;
    const int       pvlen = parray->vlen;
    PDBCoords      *pcacoords;
    char            numstring[5];
    char            covcor_str[16] = "correlation";

    pcacoords = PDBCoordsInit();
    PDBCoordsAlloc(pcacoords, pvlen);
    PDBCoordsCopyAll(pcacoords, parray->coords[0]);

    memset(pcacoords->x, 0, pvlen * sizeof(double));
    memset(pcacoords->y, 0, pvlen * sizeof(double));
    memset(pcacoords->z, 0, pvlen * sizeof(double));

    if (cdsA->algo->pca > cdsA->cnum - 1)
        pcanum = cdsA->algo->pca = cdsA->cnum - 1;
    else
        pcanum = cdsA->algo->pca;

CalcFullCovMat(cdsA);

    /* Multiply each PCA by the sqrt of the corresponding eigenvalue.
       If correlation matrix was used, we need to get back into std deviation space,
       so multiply by the sqrt of the corresponding variance */
    if (cdsA->algo->cormat == 1)
    {
        for (i = 0; i < pcanum; ++i)
            for (j = 0; j < cvlen; ++j)
                /* vecs[i][j] *= sqrt(cdsA->FullCovMat[j][j] * vals[i]); */
                vecs[i][j] *= sqrt(cdsA->FullCovMat[j][j]);
    }
    else if (cdsA->algo->cormat == 0)
    {
        for (i = 0; i < pcanum; ++i)
            for (j = 0; j < cvlen; ++j)
                vecs[i][j] *= sqrt(vals[i]);
    }

    for (i = 0; i < pcanum; ++i)
    {
        m = 0;
        for (j = 0; j < cvlen; j += 3)
        {
            /* skip inital PDBCoords that may have been selected out */
            while (strncmp(cdsA->coords[0]->resName[j/3], parray->coords[0]->resName[m], 3) != 0 ||
                   cdsA->coords[0]->chainID[j/3] != parray->coords[0]->chainID[m] ||
                   cdsA->coords[0]->resSeq[j/3] != parray->coords[0]->resSeq[m])
            {
                ++m;
            }

            /* while they match */
            while (strncmp(cdsA->coords[0]->resName[j/3], parray->coords[0]->resName[m], 3) == 0 &&
                   cdsA->coords[0]->chainID[j/3] == parray->coords[0]->chainID[m] &&
                   cdsA->coords[0]->resSeq[j/3] == parray->coords[0]->resSeq[m])
            {
                pcacoords->x[m] = vecs[pcanum - 1 - i][j+0];
                pcacoords->y[m] = vecs[pcanum - 1 - i][j+1];
                pcacoords->z[m] = vecs[pcanum - 1 - i][j+2];

                ++m;

                if (m >= pvlen)
                    break;
            }

            if (m >= pvlen)
                break;
        }

        strncpy(pcafile_name, outfile_root, strlen(outfile_root));
        pcafile_name[strlen(outfile_root)] = '\0';
        strncat(pcafile_name, "_pca", 4);
        strncat(pcafile_name, itoa(i+1, &numstring[0], 10), 5);
        strncat(pcafile_name, "_morph", 6);
        strcat(pcafile_name, ".pdb");

        pdbfile = fopen(pcafile_name, "w");
        if (pdbfile == NULL)
        {
            perror("\n  ERROR");
            fprintf(stderr,
                    "\n  ERROR99: could not open file '%s' for writing. \n", pcafile_name);
            PrintTheseusTag();
            exit(EXIT_FAILURE);
        }

        if (cdsA->algo->cormat == 0)
            strncpy(covcor_str, "covariance", 10);

        fprintf(pdbfile, "REMARK ===-===-===-===-===-===-===-===-===-===-===-===-===-===-===-===-===-===-=\n");
        fprintf(pdbfile, "REMARK + File made by Douglas Theobald's THESEUS program \n");
        fprintf(pdbfile, "REMARK + Multiple maximum likelihood superpositioning \n");
        fprintf(pdbfile, "REMARK + Principal component #%d of %s matrix, one SD in coords fields \n", i+1, covcor_str);
        fprintf(pdbfile, "REMARK + dtheobald@brandeis.edu \n");
        fprintf(pdbfile, "REMARK =-===-===-===-===-===-===-===-===-===-===-===-===-===-===-===-===-===-===\n");
        PrintPDBCoords(pdbfile, pcacoords);
        fprintf(pdbfile, "END\n");

        fclose(pdbfile);
    }

    PDBCoordsDestroy(pcacoords);
    PCADestroy(cdsA);
}


void
WritePCAProjections(PDBCoordsArray *parray, CoordsArray *cdsA, const char *outfile_root)
{
    FILE           *pdbfile = NULL;
    char            pcafile_name[256];
    int             i, j, m, pcanum;
    double          c;
    double        **vecs = cdsA->pcamat;
    double         *vals = cdsA->pcavals;
    const int       cvlen = 3 * cdsA->vlen;
    const int       pvlen = parray->vlen;
    PDBCoords      *pcacoords;
    char            numstring[5], tmpstring[5];

    pcacoords = PDBCoordsInit();
    PDBCoordsAlloc(pcacoords, pvlen);
    PDBCoordsCopyAll(pcacoords, parray->coords[0]);

    if (cdsA->algo->pca > cdsA->cnum - 1)
        pcanum = cdsA->algo->pca = cdsA->cnum - 1;
    else
        pcanum = cdsA->algo->pca;

CalcFullCovMat(cdsA);

    /* Multiply each PCA by the sqrt of the corresponding eigenvalue.
       If correlation matrix was used, we need to get back into std deviation space,
       so multiply by the sqrt of the corresponding variance */
    if (cdsA->algo->cormat == 1)
    {
        for (i = 0; i < pcanum; ++i)
            for (j = 0; j < cvlen; ++j)
                vecs[i][j] *= sqrt(cdsA->FullCovMat[j][j] * vals[i]);
    }
    else if (cdsA->algo->cormat == 0)
    {
        for (i = 0; i < pcanum; ++i)
            for (j = 0; j < cvlen; ++j)
                vecs[i][j] *= sqrt(vals[i]);
    }

/*     for (j = 1; j <= cvlen; j++) */
/*         printf("\n%f", vals[cvlen - j]); */

/*     for (j = 0; j < cvlen; j++) */
/*         printf("\n%f", vecs[cvlen-1][j]); */

    for (i = 0; i < pcanum; ++i)
    {
        for (c = -3.0; c <= 3.0; c += 0.2)
        {
            m = 0;
            for (j = 0; j < cvlen; j += 3)
            {
                /* skip inital PDBCoords that may have been selected out */
                while (strncmp(cdsA->coords[0]->resName[j/3], parray->coords[0]->resName[m], 3) != 0 ||
                       cdsA->coords[0]->chainID[j/3] != parray->coords[0]->chainID[m] ||
                       cdsA->coords[0]->resSeq[j/3] != parray->coords[0]->resSeq[m])
                {
                    ++m;
                }

                /* while they match */
                while (strncmp(cdsA->coords[0]->resName[j/3], parray->coords[0]->resName[m], 3) == 0 &&
                       cdsA->coords[0]->chainID[j/3] == parray->coords[0]->chainID[m] &&
                       cdsA->coords[0]->resSeq[j/3] == parray->coords[0]->resSeq[m])
                {
                    pcacoords->x[m] = parray->coords[cdsA->cnum/2]->x[m] +
                                      c * vecs[pcanum - 1 - i][j+0];
                    pcacoords->y[m] = parray->coords[cdsA->cnum/2]->y[m] +
                                      c * vecs[pcanum - 1 - i][j+1];
                    pcacoords->z[m] = parray->coords[cdsA->cnum/2]->z[m] +
                                      c * vecs[pcanum - 1 - i][j+2];

                    ++m;

                    if (m >= pvlen)
                        break;
                }

                if (m >= pvlen)
                    break;
            }

            strncpy(pcafile_name, outfile_root, strlen(outfile_root));
            pcafile_name[strlen(outfile_root)] = '\0';
            strncat(pcafile_name, "_pca", 4);
            strncat(pcafile_name, itoa(i+1, &numstring[0], 10), 5);
            sprintf(tmpstring, "_%+3.1f", c);
            strncat(pcafile_name, tmpstring, 5);
            strcat(pcafile_name, ".pdb");

            pdbfile = fopen(pcafile_name, "w");
            if (pdbfile == NULL)
            {
                perror("\n  ERROR");
                fprintf(stderr,
                        "\n  ERROR99: could not open file '%s' for writing. \n", pcafile_name);
                PrintTheseusTag();
                exit(EXIT_FAILURE);
            }

            fprintf(pdbfile, "REMARK ===-===-===-===-===-===-===-===-===-===-===-===-===-===-===-===-===-===-=\n");
            fprintf(pdbfile, "REMARK + File made by Douglas Theobald's THESEUS program \n");
            fprintf(pdbfile, "REMARK + Multiple maximum likelihood superpositioning \n");
            fprintf(pdbfile, "REMARK + Principal component %d of correlation matrix in B-factor column \n", i+1);
            fprintf(pdbfile, "REMARK + dtheobald@brandeis.edu \n");
            fprintf(pdbfile, "REMARK =-===-===-===-===-===-===-===-===-===-===-===-===-===-===-===-===-===-===\n");
            PrintPDBCoords(pdbfile, pcacoords);
            fprintf(pdbfile, "END\n");

            fclose(pdbfile);
        }
    }

    PDBCoordsDestroy(pcacoords);
    PCADestroy(cdsA);
}
