/*
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU Library General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
#include <math.h>
#include <glib.h>
#ifdef MMX_DETECTION
#include <mmx.h>
#endif


#include "config.h"
#include "compute.h"
#include "infconfig.h"


typedef struct t_coord {
    int x,y;
} t_coord;

typedef struct t_complex {
    float x,y;
} t_complex;


static t_screen_parameters scr_par;

static byte* surface1;
static byte* surface2;



static inline t_complex fct (t_complex a,int n,int p1,int p2)   /* p1 et p2:0-4 */
{
    t_complex b;
    float fact;
    float an;
    float circle_size;
    float speed;
    float co,si;
    
    a.x-=scr_par.width/2;
    a.y-=scr_par.height/2;
    
    switch (n) {
    case 0:
	an=0.025*(p1-2)+0.002;
	co=cos(an);
	si=sin(an);
	circle_size=scr_par.height*0.25;
	speed=2000+p2*500;
	b.x=(co*a.x-si*a.y);
	b.y=(si*a.x+co*a.y);
	fact=-(sqrt(b.x*b.x+b.y*b.y)-circle_size)/speed+1;
	b.x=(b.x*fact);
	b.y=(b.y*fact);
	break;
    case 1:
	an=0.015*(p1-2)+0.002;
	co=cos(an);
	si=sin(an);
	circle_size=scr_par.height*0.45;
	speed=4000+p2*1000;
	b.x=(co*a.x-si*a.y);
	b.y=(si*a.x+co*a.y);
	fact=(sqrt(b.x*b.x+b.y*b.y)-circle_size)/speed+1;
	b.x=(b.x*fact);
	b.y=(b.y*fact);
	break;
    case 2:
	an=0.002;
	co=cos(an);
	si=sin(an);
	circle_size=scr_par.height*0.25;
	speed=400+p2*100;
	b.x=(co*a.x-si*a.y);
		b.y=(si*a.x+co*a.y);  
		fact=-(sqrt(b.x*b.x+b.y*b.y)-circle_size)/speed+1;
		b.x=(b.x*fact);
		b.y=(b.y*fact);
		break;		
    case 3:
	an=(sin(sqrt(a.x*a.x+a.y*a.y)/20)/20)+0.002;
	co=cos(an);
	si=sin(an);
	circle_size=scr_par.height*0.25;
	speed=4000;
	b.x=(co*a.x-si*a.y);
	b.y=(si*a.x+co*a.y);
	fact=-(sqrt(b.x*b.x+b.y*b.y)-circle_size)/speed+1;
	b.x=(b.x*fact);
	b.y=(b.y*fact);
	break;
    case 4:
	an=0.002;
	co=cos(an);
	si=sin(an); 
	circle_size=scr_par.height*0.25;
	speed=sin(sqrt(a.x*a.x+a.y*a.y)/5)*3000+4000;
	b.x=(co*a.x-si*a.y);
	b.y=(si*a.x+co*a.y);
	fact=-(sqrt(b.x*b.x+b.y*b.y)-circle_size)/speed+1;
	b.x=(b.x*fact);
	b.y=(b.y*fact);    
	break;
    case 5:
	b.x=a.x*1.02;
	b.y=a.y*1.02;
	break;	
    case 6:
	an=0.002;
	co=cos(an);
	si=sin(an); 
	circle_size=scr_par.height*0.25;
	fact=1+cos(atan(a.x/(a.y+0.00001))*6)*0.02;
	b.x=(co*a.x-si*a.y);
	b.y=(si*a.x+co*a.y);
	b.x=(b.x*fact);
	b.y=(b.y*fact);    
	break;		
    }      
    b.x += scr_par.width/2;
    b.y += scr_par.height/2;  
    if (b.x < 0)
	b.x = 0;
    if (b.y < 0)
	b.y = 0;
    if (b.x > scr_par.width - 1)
	b.x = scr_par.width - 1;
    if (b.y > scr_par.height - 1)
	b.y = scr_par.height - 1;
    return b;
}


static inline void compute_generate_sector (int g, int f, int p1, int p2,
                                            int debut, int step, vector_field_t *vector_field)
{
    int fin = debut + step;
    const int width = vector_field->width;
    const int height = vector_field->height;
    const int prop_transmitted = 249;
    const int b_add = g * width * height;
    t_interpol *vector = vector_field->vector;
    t_coord c;

    if (fin > height)
	fin = height;
    for (c.y = debut; c.y < fin; c.y++) {
	for (c.x = 0; c.x < width; c.x++) {
	    t_complex a;
	    float fpy;
	    int rw, lw, add;
	    unsigned int w1, w2, w3, w4;
	    unsigned int x, y;
	    
	    a.x = (float) c.x;
	    a.y = (float) c.y;
	    a = fct (a, f, p1, p2);
	    add = c.x + c.y * width;
	    x = (int) (a.x);
	    y = (int) (a.y);
	    vector[b_add+add].coord = (x << 16) | y;

	    fpy = a.y - floor (a.y);
	    rw = (int) ((a.x - floor (a.x)) * prop_transmitted);
	    lw = prop_transmitted - rw;
	    w4 = (int) (fpy * rw);
	    w2 = rw - w4;
	    w3 = (int) (fpy * lw);
	    w1 = lw - w3; 
	    vector[b_add + add].weight = \
		(w1 << 24) | (w2 << 16) | (w3 << 8) | w4;
	}
    }
}



/*--------------------------------*/
/*        Public functions        */
/*--------------------------------*/


void compute_init (void)
{
    scr_par.width = config_get_xres();
    scr_par.height = config_get_yres();
    scr_par.scale = config_get_sres();

    surface1 = (byte*) g_malloc (scr_par.width * scr_par.height);
    surface2 = (byte*) g_malloc (scr_par.width * scr_par.height);
}

void compute_resize (int width, int height)
{
    scr_par.width = width;
    scr_par.height = height;
    g_free (surface1);
    g_free (surface2);
    surface1 = (byte*) g_malloc (scr_par.width * scr_par.height);
    surface2 = (byte*) g_malloc (scr_par.width * scr_par.height);
}


vector_field_t *compute_vector_field_new (int width, int height)
{
    t_interpol *interpol;
    vector_field_t *field;

    interpol = g_new0 (t_interpol, width * height * NB_FCT);
    field = g_new0 (vector_field_t, 1);
    field->vector = interpol;
    field->width = width;
    field->height = height;
    return field;
}


void compute_vector_field_destroy (vector_field_t *vector_field)
{
    g_assert (vector_field);
    g_free (vector_field->vector);
    g_free (vector_field);
    vector_field = NULL;
}


void compute_generate_vector_field (vector_field_t *vector_field)
{
    int f;
    int i, p1, p2;

    for (f = 0; f < NB_FCT; f++) {
	p1 = 2;
	p2 = 2;
	for (i = 0; i < vector_field->height; i += 10)
	    compute_generate_sector (f, f, p1, p2, i, 10, vector_field);
    }
}


inline byte *compute_surface (t_interpol* vector, int width, int height) 
{
    int i,j;
    int add_dest=0;
    int add_src;
    t_interpol *interpol;
    register byte* ptr_pix;
    int color;
    byte* ptr_swap;
    
    for (j = 0; j < height; j++)
	for (i = 0; i < width; i++) {
	    interpol = &vector[add_dest];
	    add_src = (interpol->coord & 0xFFFF) * width + (interpol->coord >> 16);
	    ptr_pix = &((byte*)surface1)[add_src];
	    color = (*(ptr_pix) * (interpol->weight >> 24)
		   + *(ptr_pix+1) * ((interpol->weight & 0xFFFFFF) >> 16)
		   + *(ptr_pix+width) * ((interpol->weight & 0xFFFF) >> 8)
		   + *(ptr_pix+width+1) * (interpol->weight & 0xFF)) >> 8;
	    if (color > 255)
		surface2[add_dest] = 255;
	    else
		surface2[add_dest] = color;
	    add_dest++;
	}
    ptr_swap = surface2;
    surface2 = surface1;
    surface1 = ptr_swap;
    return surface1;
}


#ifdef MMX_DETECTION
inline byte *compute_surface_mmx (t_interpol* vector, int width, int height)
{
    mmx_t mm0, mm1, mm2;
    mmx_t offsets, r;
    t_interpol *interpol;
    int i, j, color;
    int add_dest = 0;
    int add_src;
    register byte* ptr_pix;
    byte* ptr_swap;
    
    for (j = 0; j < height; j++)
	for (i = 0; i < width; i++) {
	    interpol = &vector[add_dest];
	    add_src = (interpol->coord & 0xFFFF) * width + (interpol->coord >> 16);
	    ptr_pix = &((byte*)surface1)[add_src];
	    /* MMX mode entry */
	    movd_m2r(interpol->weight, mm1);
	    pxor_r2r(mm0, mm0);
	    punpcklbw_m2r(mm0, mm1);
	    ((unsigned short*)&offsets)[0] = (unsigned short)*(ptr_pix+width+1);
	    ((unsigned short*)&offsets)[1] = (unsigned short)*(ptr_pix+width);
	    ((unsigned short*)&offsets)[2] = (unsigned short)*(ptr_pix+1);
	    ((unsigned short*)&offsets)[3] = (unsigned short)*(ptr_pix);
	    movq_m2r(offsets, mm2);
	    /*pmullw_r2r(mm1, mm2);*/
	    pmaddwd_r2r(mm1, mm2);
	    movq_r2m(mm2, r);
	    emms();
	    /* MMX mode exit */
	    color = (((int*)&r)[0] + ((int*)&r)[1] ) >> 8;
	    if (color > 255)
		surface2[add_dest] = 255;
	    else
		surface2[add_dest] = color;
	    add_dest++;
	}
    ptr_swap = surface1;
    surface1 = surface2;
    surface2 = ptr_swap;
    return surface1;
}
#endif /* MMX_DETECTION */


