/************************************************************************

  Copyright
  Alessandro MIRONE
  mirone@esrf.fr

  Copyright 2002  by European Synchrotron Radiation Facility, Grenoble, 
                  France

                               ----------
 
                           All Rights Reserved
 
                               ----------

Permission to use, copy, modify, and distribute this software and its
documentation for any purpose and without fee is hereby granted,
provided that the above copyright notice appear in all copies and that
both that copyright notice and this permission notice appear in
supporting documentation, and that the names of European Synchrotron
Radiation Facility or ESRF or SCISOFT not be used in advertising or 
publicity pertaining to distribution of the software without specific, 
written prior permission.

EUROPEAN SYNCHROTRON RADIATION FACILITY DISCLAIMS ALL WARRANTIES WITH
REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL EUROPEAN SYNCHROTRON
RADIATION FACILITY OR ESRF BE LIABLE FOR ANY SPECIAL, INDIRECT OR 
CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, 
DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.

**************************************************************************/
/***************************************************************************
                          Sparsa.cc  -  description
                             -------------------
    begin                : Tue Feb 1 2000
    copyright            : (C) 2000 by Alessandro MIRONE
    email                : mirone@lure.u-psud.fr
 ***************************************************************************/

#define  _FILE_OFFSET_BITS 64
#define  _LARGEFILE_SOURCE

#
#include<string.h>
#include<string.h>
#include<stdio.h>
#include <stdlib.h>
#include<iostream.h>
#include<math.h>
#include<complex.h>

#include<Sparsa.h>

#include <sys/types.h>
#include <unistd.h>



#define Max(a,b) ( ((a)>(b))?  (a):(b) )

#ifdef SPARSAMPI
#include"mpi.h"
#endif

#define DEBUG(a)
// #define DEBUG(a) a

#define Min(a,b) (((a)<(b))? (a):(b)  )

#define NO_UNROLL_


Sparsa3A::Sparsa3A() {
  n=0;
  dim=0;
  nsize=1000;
  coeff=new double[nsize];  
  col=new int[nsize]   ;
  row=new int[nsize]   ;
  nG=0;
  Gmin=0;
  Gmax=0;

};

Sparsa3A::Sparsa3A(int N, double *c, int *i, int *j) {
  n=N;
 
  dim=0;
  dim2=0;
  for(int k=0; k<N; k++) {
    if(dim<i[k]+1) dim=i[k]+1 ;
    if(dim2<j[k]+1) dim2=j[k]+1 ;
  }

  nsize=N+1000;
  coeff=new double[nsize];  
  col=new int[nsize]   ;
  row=new int[nsize]   ;
  memcpy(coeff,c,N*sizeof(double));
  memcpy(col, i, N*sizeof(int ));
  memcpy(row, j, N*sizeof(int ));

  nG=0;
  Gmin=0;
  Gmax=0;  

};

void Sparsa3A::pulisci() {
  delete 	coeff ;
  delete 	col   ;
  delete 	row   ;

  n=0;
  dim=0;
  nsize=1000;

  coeff=new double[nsize];
  col=new int[nsize]   ;
  row=new int[nsize]   ;
};


Sparsa3A::~Sparsa3A() {
  cout << " DELETE di una sparsa\n";
	if(nsize) {
	  delete coeff  ;
	  delete col   ;
	  delete row  ;
	}

	if(nG) {
	  delete Gmin;
	  delete Gmax;
	}


};


void Sparsa3A::inizializza( Sparsa3A &a ) {
  delete 	coeff ;
  delete 	col   ;
  delete 	row   ;

  n=a.n;

  dim =a.dim ;
  dim2=a.dim2;

  nsize=a.nsize;

  coeff=new double[nsize];
  col=new int[nsize]   ;
  row=new int[nsize]   ;

	memcpy(coeff, a.coeff, nsize*sizeof(double) );
	memcpy(col, a.col, nsize*sizeof(int) );
	memcpy(row, a.row, nsize*sizeof(int) );
}







void Sparsa3A::aggiungimemoria() {
  double *newcoeff;
  int    *newcol;
  int    *newrow;

  newcoeff =  new double [ nsize+1000]      ;
  newcol   =  new int    [ nsize+1000]      ;
  newrow   =  new int    [ nsize+1000]      ;
  
  memcpy(newcoeff, coeff, nsize*sizeof(double));
  memcpy(newcol,   col  , nsize*sizeof(int));
  memcpy(newrow,   row  , nsize*sizeof(int));

  delete col;
  delete row;
  delete coeff;

  col = newcol;
  row = newrow;
  coeff = newcoeff;

  nsize = nsize + 1000;
}


double scalare( int n, double * a, double *b) {
  return scalare(a, b,  n);
}

double sqrtscalare(double * a, double *b, int n) {
  double d;
  d=scalare(a,b,n);
  return sqrt(d);
}

double scalare(double * a, double *b, int n) {
	double ris=0.0;
#ifdef _UNROLL_
  int end;
  end=n/4;
  double r1,r2,r3,r4;
  double b1,b2,b3,b4;
  double a1,a2,a3,a4;
  int ci=0;

  for(int i=0; i<n/4; i++)
    {
	  a1=a[ci];
	  a2=a[ci+1];
	  a3=a[ci+2];
	  a4=a[ci+3];
	
	  b1=b[ci];
	  b2=b[ci+1];
	  b3=b[ci+2];
	  b4=b[ci+3];
	
	  r1=a1*b1;
	  r2=a2*b2;
	  r3=a3*b3;
	  r4=a4*b4;
	
	  ris=ris+r1+r2+r3+r4;	
      ci+=4;
    }

  for(int i=4*(n/4); i<n; i++)
    {
      ris  += a[i]*b[i];
    }

#else
  for(int i=0; i<n; i++)
    {
       ris  += a[i]*b[i];
    }
#endif
	return ris;
}




void normalizzaauto(double * a,  int n) {
  double b=sqrt(scalare(a,a,n));
  normalizza(a,b,n);
}

void normalizza(double * a, double b, int n) {
#ifdef _UNROLL_
  int end;

  end=n/4;
  int ci=0;

  for(int i=0; i<n/4; i++)
    {
	  a[ci]=a[ci]/b;
	
 	  a[ci+1]=a[ci+1]/b;
 	  a[ci+2]=a[ci+2]/b;
 	  a[ci+3]=a[ci+3]/b;
      ci+=4;
    }

  for(int i=4*(n/4); i<n; i++)
    {
	  a[i]=a[i]/b;
    }

#else
  for(int i=0; i<n; i++)
    {
	  a[i]=a[i]/b;
    }
#endif
}


void Sparsa3A::Moltiplica(Array *ris, Array *vect  ) {
  this->Moltiplica(ris->dataAddress(), vect->dataAddress() );
}
void Sparsa3A::MoltiplicaMinus(Array *ris, Array *vect  ) {
  this->MoltiplicaMinus(ris->dataAddress(), vect->dataAddress() );
}
void Sparsa3A::MoltiplicaDiag(Array *ris, Array *vect  ) {
   this->MoltiplicaDiag(ris->dataAddress(), vect->dataAddress() );
}
void  Sparsa3A::MoltiplicaDiagMinus(Array * ris, Array*vect  ){
  this->MoltiplicaDiagMinus(ris->dataAddress(), vect->dataAddress() );
}

void Sparsa3A::Moltiplica(double *ris, double *vect  )
{

#ifdef _UNROLL_
  int end;
  end=n/4;
  double v1,v2,v3,v4;
  double c1,c2,c3,c4;
  double a1,a2,a3,a4;
  int ci=0;

  for(int i=0; i<n/4; i++)
    {

      v1=vect[col[ci  ]];
      v2=vect[col[ci+1]];
      v3=vect[col[ci+2]];
      v4=vect[col[ci+3]];

      c1 = coeff[ci  ];
      c2 = coeff[ci+1];
      c3 = coeff[ci+2];
      c4 = coeff[ci+3];

      a1 = c1*v1;
      a2 = c2*v2;
      a3 = c3*v3;
      a4 = c4*v4;

      ris[row[ci  ]]+=a1;
      ris[row[ci+1]]+=a2;
      ris[row[ci+2]]+=a3;
      ris[row[ci+3]]+=a4;

      ci+=4;
    }

  for(int i=4*(n/4); i<n; i++)
    {
      ris[row[i]] += coeff[i]*vect[col[i]];
    }

#else
  // printf(" dim dim2 %d %d  \n", dim, dim2);
  if( 0 && dim==dim2) {
    printf(" simmetrizzato \n");
    for(int i=0; i<n; i++)
      {
	ris[row[i]] += 0.5*coeff[i]*vect[col[i]];
      }
    for(int i=0; i<n; i++)
      {
	ris[col[i]] += 0.5*coeff[i]*vect[row[i]];
      }
  } else {
    for(int i=0; i<n; i++)
      {
	ris[row[i]] += coeff[i]*vect[col[i]];
      }
  }
#endif
};





void Sparsa3A::MoltiplicaMinus(double *ris, double *vect  )
{
#ifdef _UNROLL_
  int end;
  end=n/4;
  double v1,v2,v3,v4;
  double c1,c2,c3,c4;
  double a1,a2,a3,a4;
  int ci=0;

  for(int i=0; i<n/4; i++)
    {

      v1=vect[col[ci  ]];
      v2=vect[col[ci+1]];
      v3=vect[col[ci+2]];
      v4=vect[col[ci+3]];

      c1 = coeff[ci  ];
      c2 = coeff[ci+1];
      c3 = coeff[ci+2];
      c4 = coeff[ci+3];

      a1 = c1*v1;
      a2 = c2*v2;
      a3 = c3*v3;
      a4 = c4*v4;

      ris[row[ci  ]]-=a1;
      ris[row[ci+1]]-=a2;
      ris[row[ci+2]]-=a3;
      ris[row[ci+3]]-=a4;

      ci+=4;
    }

  for(int i=4*(n/4); i<n; i++)
    {
      ris[row[i]] -= coeff[i]*vect[col[i]];
    }

#else
  // printf(" dim dim2 %d %d  \n", dim, dim2);
  if( 0 && dim==dim2) {
    printf(" simmetrizzato \n");
    for(int i=0; i<n; i++)
      {
	ris[row[i]] -= 0.5*coeff[i]*vect[col[i]];
      }
    for(int i=0; i<n; i++)
      {
	ris[col[i]] -= 0.5*coeff[i]*vect[row[i]];
      }
  } else {
    for(int i=0; i<n; i++)
      {
	ris[row[i]] -= coeff[i]*vect[col[i]];
      }
  }
#endif
};




void Sparsa3A::MoltiplicaDiag(double *ris, double *vect  )
{
  
 
    for(int i=0; i<n; i++)
      {
	if( row[i]==col[i])	ris[row[i]] += coeff[i]*vect[col[i]];
      }
 

};


void Sparsa3A::MoltiplicaDiagMinus(double *ris, double *vect  )
{
  
 
    for(int i=0; i<n; i++)
      {
	if( row[i]==col[i])	ris[row[i]] -= coeff[i]*vect[col[i]];
      }
 

};








void Sparsa3A::Moltiplica(double *ris, double *vect , int start, int end )
{
#ifdef _UNROLL_
  int passages;
  passages=(end-start)/4;
  double v1,v2,v3,v4;
  double c1,c2,c3,c4;
  double a1,a2,a3,a4;
  int ci=start;

  for(int i=0; i<passages; i++)
    {

      v1=vect[col[ci  ]];
      v2=vect[col[ci+1]];
      v3=vect[col[ci+2]];
      v4=vect[col[ci+3]];

      c1 = coeff[ci  ];
      c2 = coeff[ci+1];
      c3 = coeff[ci+2];
      c4 = coeff[ci+3];

      a1 = c1*v1;
      a2 = c2*v2;
      a3 = c3*v3;
      a4 = c4*v4;

      ris[row[ci  ]]+=a1;
      ris[row[ci+1]]+=a2;
      ris[row[ci+2]]+=a3;
      ris[row[ci+3]]+=a4;

      ci+=4;
    }

  for(int i=ci; i<end; i++)
    {
      ris[row[i]] += coeff[i]*vect[col[i]];
    }
  
#else
  for(int i=start; i<end; i++)
    {
      ris[row[i]] += coeff[i]*vect[col[i]];
    }
#endif
};



void Sparsa3A::gohersch()
{
  if(nG) {
		delete Gmin;
		delete Gmax;
  }
	nG=2*dim;
	Gmin= new double[nG];		
	Gmax= new double[nG];	
	
	for(int i=0; i<nG;i++) {
		Gmin[i]=Gmax[i]=0.0;
	}
	
	for(int i=0; i<n;i++) {
		if(row[i]==col[i]) {
			Gmin[col[i]]=Gmax[col[i]]=coeff[i];
		}
	}
	for(int i=0; i<n;i++) {
		if(row[i]!=col[i]) {
			Gmin[col[i]]-=fabs( coeff[i] );
			Gmax[col[i]]+=fabs( coeff[i] );
		}
	}
}

double Sparsa3A::goherschMin()
{
	double res=0;
	for(int i=0; i<nG;i++) {
		if(i==0) res=Gmin[i];
		if(res>Gmin[i]) res=Gmin[i];
	}
	return res;
}

double Sparsa3A::goherschMax()
{
	double res=0;
	for(int i=0; i<nG;i++) {
		if(i==0) res=Gmax[i];
		if(res<Gmax[i]) res=Gmax[i];
	}
	return res;

}

void Sparsa3A::trasforma(double fattore, double addendo) {
  int *fatto;
  fatto = new int [ dim];
  for(int i=0; i<dim; i++) {
    fatto[i]=0;
  }
  for(int i=0; i<n; i++) {
    coeff[i]=fattore*coeff[i];
    if(col[i]==row[i] && fatto[col[i]]==0 )  {
      coeff[i]+=addendo;
      fatto[col[i]]=1;
    }
  }
  for(int i=0; i<dim; i++) {
    if(!fatto[i]) {
      {
	double a;
	int r;
	int c;
	a=addendo;
	r=i;
	c=i;
	
	if(n==nsize) aggiungimemoria();
	row[n]=r;
	col[n]=c;
	
	if(r>dim-1)  dim=r+1;
	if(c>dim-1) dim=c+1;
	
	coeff[n]=a ;
	n++;
      }
    }
  }
  delete fatto;
}

  
extern "C" {
  void dsyev_( char *jobz, char *uplo, int *N, double *a, int *lda, double *w,
	       double *work, int *lwork, int *info );

  void dgemm_( char *transa, char *transb, int *m, int *n, int *k, 
	       double *alpha, double *a, int *Ida, double *b, int *Idb, 
	       double *beta, double* c, int *Idc );
  

}

void diagonalizza4py(int k, int m, double *alpha,  double *beta, double *pevect, double *peval)
{


  double * A = new double [ m*m];
	

  memset( pevect, 0, m*m*sizeof(double) );
  memset( peval , 0, m*m*sizeof(double) );
  memset( A , 0, m*m*sizeof(double) );



#define A(i,j)    A[ (i)*m+(j)]
#define peval(i,j)    peval[ (i)*m+(j)]
#define pevect(i,j)    pevect[ (i)*m+(j)]

  
  
  for(int i=0; i<m;i++)
    {
      A(i,i)=alpha[i];
    }
  for(int i=0;  i<k ; i++)
    {
      A(k,i)=A(i,k)=beta[i];
    }	
  for(int i=k; i<m-1; i++)
    {
      A(i,i+1)=A(i+1,i)=beta[i];
    }	
  
   char jobz = 'V';
   char uplo = 'U';
   int N = m;
   int lda = m;
   int lwork = 3*N-1;
   double *work = new double[lwork];
   int info;

   double w[N];

   dsyev_( &jobz, &uplo, &N, A, &lda, w,
           work, &lwork, &info );


   for (int i = 0; i < N; i++ )
   {
      peval(i,i) = w[i];
   }
//    for(int i=0; i<N; i++) {
//      for (int j=0; j<N; j++) {
//        pevect(i,j)=A(j,i);
//      }
//    }

   memcpy( pevect , A , m*m*sizeof(double)); 

   delete[] work;

#undef A
#undef peva
#undef pevect
}






double scalareA( double *a , double *b, int n ) {
  return scalare(a,b,n);
}



double sqrtscalareA( double *a , double *b, int n ) {
  return sqrtscalare(a,b,n);
}

void normalizzaA(double *a, double b, int c) {
  normalizza(a,b,c);
}
void normalizzaautoA(double *a,  int c) {
  normalizzaauto(a,c);
}



void Array::normalizza( double norm ){
  normalizzaA(data, norm, size   );
}
void Array::normalizzaauto(  ){
  normalizzaautoA(data,  size   );
}

double Array::scalare(Array &a, Array &b){
  if( a.size != b.size) {
    throw std::out_of_range("frames are not aligned");   
  }
  return scalareA( a.dataAddress(), b.dataAddress() , a.len());
}

void  Array::dividebyarray( Array &b){
  if( this->size != b.size) {
    throw std::out_of_range("frames are not aligned");   
  }
  for(int i=0; i<this->size; i++) {
    this->data[i]/=b.data[i];
  }
}


void  Array::set_indices(int N, int *indices, double *values){
  if(  N != len()) {
    throw std::out_of_range("   int  void  Array::set_indices(int N, int indices, double *values)");   
  }
  double *d= dataAddress();
  for (int i=0; i<N; i++) {
    d[i]=values[indices[i]];
  }
}



void  Array::set_indices_inv(int N, int *indices, double *values){
  if(  N != len()) {
    throw std::out_of_range("   int  void  Array::set_indices(int N, int indices, double *values)");   
  }
  double *d=  dataAddress();
  for (int i=0; i<N; i++) {
    values[indices[i]]=d[i];
  }
}



double Array::sqrtscalare(Array &a, Array &b){
  if( a.size != b.size) {
    throw std::out_of_range("frames are not aligned");   
  }
  return sqrtscalareA( a.dataAddress(), b.dataAddress() , a.len());
}


void  Array::mat_mult(Array & b, double * mat,Array & a) {
  int m=a.firstdimension-1;
  int k=b.firstdimension;
  int dim = a.size/a.firstdimension;

  // uMatrix<double> E(m, k,  mat);
//   uMatrix<double> A(dim, m, a.data );
//   uMatrix<double> B(dim, k, b.data );
  
//   ::dumptofile(mat, m*k, "bloccovectpy");
   
  char transa='N', transb='N';
  double alpha = double(1.0);
  double beta = double(0.0);
  
  dgemm_( &transa, &transb, &dim, &k, &m, &alpha, a.data, &dim, 
           mat, &m, &beta, b.data, &dim );
   
}

void  Array::get_numarrayview(int start, int  N,  double *&ArrayFLOAT) {
  ArrayFLOAT =  this->data+N ;
}


#ifdef SPARSAMPI
#define MSGLEN 20
 inizializzaMPI::inizializzaMPI() {
  for (int i=0; i< MAXNSPARSA3AP; i++) {
    Sparsa3AP::items[i]=0;
  }
  for (int i=0; i<MAXNVECTORP ; i++) {
    VectorP::items[i]=0;
  }
  int myid, numprocs;
  MPI_Comm_size(MPI_COMM_WORLD,&numprocs);
  MPI_Comm_rank(MPI_COMM_WORLD,&myid);
  if(myid!=0) {
    while(1) {
      char buffer[MSGLEN];
      MPI_Status status;
      int id2dest;
      printf("aspetto il comando \n");
      MPI_Recv(buffer,MSGLEN , MPI_CHAR, 0 , 99, MPI_COMM_WORLD,&status);
      printf("nel processo %d il comando est %s \n",myid ,  buffer);

      if( strcmp( buffer,"creaS")==0 ) {

	new Sparsa3AP;


      } else if( strcmp( buffer,"distruggiS")==0 ) {

	MPI_Recv(&id2dest,1 , MPI_INT, 0 , 98, MPI_COMM_WORLD,&status);
	delete Sparsa3AP::items[id2dest];
	Sparsa3AP::items[id2dest] = 0 ;
	printf("nel processo %d ho distrutto l' item  %d il comando era %s \n",myid , id2dest, buffer );

  
      } else if( strcmp( buffer,"FINE")==0) {
	MPI_Barrier(MPI_COMM_WORLD );
	std::cout << "sclavo esce \n";
	int ierr;
	exit(0);

      } else if( strcmp( buffer,"caricaArraysS")==0) {
	

	MPI_Recv(&id2dest,1 , MPI_INT, 0 , 98, MPI_COMM_WORLD,&status);
	Sparsa3AP::items[id2dest]->riceviArrays();



	printf(" carica arrays terminato con id = %d nel processo %d  \n", id2dest , myid);

      } else {
	printf("comando %s sconosciuto in  inizializzaMPI \n", buffer );
	
	exit(0);
      }
      
      MPI_Barrier(MPI_COMM_WORLD );

    }
  }
}
inizializzaMPI::~inizializzaMPI() { 
  int myid, numprocs;
  MPI_Comm_size(MPI_COMM_WORLD,&numprocs);
  MPI_Comm_rank(MPI_COMM_WORLD,&myid);
  if(myid==0) {
    char buffer[MSGLEN];
    int status;
    sprintf(buffer,"FINE");
    std:: cout << "mando la parola fine \n";
    for ( int proc =1; proc< numprocs ; proc++) {
      MPI_Send(buffer, strlen(buffer)+1, MPI_CHAR, proc, 99, MPI_COMM_WORLD);
    }
  }
  MPI_Barrier(MPI_COMM_WORLD );
  std::cout << "master esce \n";
}

 Sparsa3AP:: Sparsa3AP() {
   this->crea();
 }

void Sparsa3AP::crea() {
  MPI_Comm_size(MPI_COMM_WORLD,&n_procs);
  MPI_Comm_rank(MPI_COMM_WORLD,&proc_id);
  if( proc_id==0) {
    char buffer[MSGLEN];
    int status;
    for ( int proc =1; proc< n_procs ; proc++) {    
      sprintf(buffer,"creaS");
      MPI_Send(buffer, strlen(buffer)+1, MPI_CHAR, proc, 99, MPI_COMM_WORLD);
    }
    MPI_Barrier(MPI_COMM_WORLD );
  }
  for( item_id=0; item_id< MAXNSPARSA3AP; item_id++) {
    if( Sparsa3AP::items[item_id] ==0 ) {
      Sparsa3AP::items[item_id]=this;
      break;
    }
  }
  if( item_id==MAXNSPARSA3AP ) {
    printf(" raggiunto massino MAXNSPARSA3AP \n");
    exit(0);
  } else {
    printf(" creata una matrice con id = %d nel processo %d  \n", item_id, proc_id);
  }
  
  Nelspp=0;


};


#define DBP 1

void Sparsa3AP::caricaArrays(int Nels, int * from, int * to, double * coeffs,int dim_from, int dim_to) {
  // if( DBP  == proc_id) std::cout << " sono in caricaArrays \n";
  if( proc_id==0) {
    char buffer[MSGLEN];
    int status;
    for ( int proc =1; proc< n_procs ; proc++) {    
      sprintf(buffer,"caricaArraysS");
      MPI_Send(buffer, strlen(buffer)+1, MPI_CHAR, proc, 99, MPI_COMM_WORLD);
      MPI_Send(&item_id, 1, MPI_INT, proc, 98, MPI_COMM_WORLD);
    }
    int pos=0;
    
    for( int  proc =1; proc< n_procs ; proc++) { 
      int len=0;
      int limite =  (dim_from*proc)/(n_procs-1);
      while(pos+len< Nels && from[pos+len]< limite) {
	len++;
      }
      
      MPI_Send(&dim_from, 1, MPI_INT, proc, 97, MPI_COMM_WORLD);
      MPI_Send(&dim_to, 1, MPI_INT, proc, 96, MPI_COMM_WORLD);
      MPI_Send(&len, 1, MPI_INT, proc, 95, MPI_COMM_WORLD);
      
      MPI_Send( from+pos , len, MPI_INT, proc, 94, MPI_COMM_WORLD);
      MPI_Send( to  +pos , len, MPI_INT, proc, 93, MPI_COMM_WORLD);
      MPI_Send( coeffs  +pos , len, MPI_DOUBLE, proc, 92, MPI_COMM_WORLD);

      pos+=len;
    }
    MPI_Barrier(MPI_COMM_WORLD );    
  } 
  


}
void Sparsa3AP::riceviArrays() {
  /*
    int dim_from;
    int dim_to;
    int Nels;
    int *to;
    int *from;
    double * coeffs;
  */

  MPI_Status status;
  
  // if( DBP  == proc_id) std::cout << " sono in riceviarrays \n";

  MPI_Recv(&dim_from ,1 , MPI_INT, 0 , 97, MPI_COMM_WORLD,&status);
  MPI_Recv(&dim_to   ,1 , MPI_INT, 0 , 96, MPI_COMM_WORLD,&status);

  // if( DBP  == proc_id) std::cout << " sono in riceviarrays " << dim_from << " " << dim_to << "\n";

  int Nels;
  MPI_Recv(&Nels     ,1 , MPI_INT, 0 , 95, MPI_COMM_WORLD,&status);
  // if( DBP  == proc_id) std::cout << " sono in riceviarrays Nels " <<Nels <<   "\n";


  int *from = new int [ Nels ] ;
  int *to   = new int [ Nels ] ;
  double *coeffs = new double [ Nels];
  
  MPI_Recv(from    ,Nels , MPI_INT    , 0 , 94, MPI_COMM_WORLD,&status);
  MPI_Recv(to      ,Nels , MPI_INT    , 0 , 93, MPI_COMM_WORLD,&status);
  MPI_Recv(coeffs  ,Nels , MPI_DOUBLE , 0 , 92, MPI_COMM_WORLD,&status);
  // if( DBP  == proc_id) std::cout << "  ricevuti " <<Nels <<   "\n";
  
  int pos =  (dim_from*(proc_id-1))/(n_procs-1);
  for(int i=0; i<Nels; i++) {
    from[i]-=pos;
  }
  int * goes2 = new int [Nels];
  for(int i=0; i<Nels; i++) {
    goes2[i] = (to[i]*(n_procs-1))/dim_to +1 ;
  }
  Nelspp= new int [ n_procs];
  topp  = new int *[ n_procs];
  frompp  = new int *[ n_procs];
  coeffspp  = new double *[ n_procs];


  memset( Nelspp, 0, n_procs*sizeof(int));
  for(int i=0; i<Nels; i++) {
      Nelspp[goes2[i]]++;
  }
  dims2 = new int [n_procs];

  memset( dims2, 0, n_procs*sizeof(int));


  loc2glob = new int * [n_procs] ;

  int * used   = new int [ dim_to] ;

  // if( DBP  == proc_id)std::cout << "  sparpaglio  " <<Nels <<   "\n";

  for(int proc=1; proc< n_procs; proc++) {
    int nels = Nelspp[proc] ;

    topp[proc]=new int [nels ];
    frompp[proc]=new int [nels ];
    coeffspp[proc]=new double [nels ];


    // if( DBP  == proc_id) std::cout << "  sparpaglio bis " <<Nels <<   "\n";
    memset(used, 0, dim_to*sizeof(int));
    for(int i=0; i< Nels; i++) {
      if( goes2[i]==proc ) {
	used[ to[i]]=1;
      }
    }
    dims2[proc]=0;
    for(int i=0; i< dim_to; i++) {
      if( used[i] ) {
	dims2[proc]++;
      }
    }

    
    
    // if( DBP  == proc_id) std::cout << " per proc " <<  proc << " dims est " << dims2[proc]<<   "\n";
    loc2glob[proc]= new int [ dims2[proc] ];
    int pos=0;
    for(int i=0; i< dim_to; i++) {
      if( used[i] ) {
	loc2glob[proc][pos]=i;
	used[i]=pos;
	pos++;
      }
    }  
    int ne=0;
    for(int i=0; i<Nels; i++) {
      if(proc == goes2[i]){
	frompp[proc][ne]=from[i];
	topp  [proc][ne]=used[to[i]];
	coeffspp[proc][ne] = coeffs[i];
	ne++;
      }
    }
  }

  // if( DBP  == proc_id) std::cout << "  sparpaglio 4  " <<Nels <<   "\n";

  others_loc2glob = new int * [n_procs] ;
  others_dims2 = new int [n_procs];
  memset(others_dims2 , 0, n_procs*sizeof(int));



  int proc_to;
  int proc_from;
  for(int Dproc=1; Dproc< 1+(n_procs-1)/2; Dproc++) {
    proc_to=proc_id+Dproc;
    if(proc_to>=n_procs) {
      proc_to=proc_to-n_procs +1;
    }
    proc_from=proc_id - Dproc;
    if(proc_from<1) {
      proc_from=proc_from + n_procs-1;
    }
    
    // if( DBP  == proc_id) std::cout << " multiple send" << proc_id << " "<<proc_from << " " << proc_to <<"  \n";
    MPI_Request statusa, statusb, statusc ;
    MPI_Status status;
    MPI_Sendrecv(dims2+proc_to, 1, MPI_INT , proc_to, 1,
		 others_dims2+proc_from ,  1,  MPI_INT ,  proc_from,  1,  MPI_COMM_WORLD, &status );
    // if( DBP  == proc_id) std::cout << " multiple send OK \n";

    others_loc2glob[proc_from] = new int [others_dims2[proc_from]];

    MPI_Sendrecv( loc2glob+proc_to, dims2[proc_to] , MPI_INT , proc_to, 89,
		  others_loc2glob+proc_to, others_dims2[proc_from] , MPI_INT , proc_from, 89,
		  MPI_COMM_WORLD, &status);

  }
  
  // if( DBP  == proc_id) std::cout << "  sparpaglio 5 " <<Nels <<   "\n";
  if(1) {
    char nfpp[80];
    sprintf(nfpp,"dopo_ricevi_%d", proc_id);
    FILE *fpp=fopen(nfpp,"w");
    for(int i=0; i<Nels; i++ ) {
      fprintf(fpp," to[%d] = %d  goes2[%d]= %d\n", i, to[i],i,goes2[i] );
    }
    fclose(fpp);
    fpp=fopen(nfpp,"a");
    for(int proc=1; proc<n_procs; proc++) {
      fprintf(fpp," per processo %d\n", proc);
      int nel=Nelspp[proc];
      fprintf(fpp," numero elementi  %d\n", nel);
      for(int i=0; i<nel; i++) {
	fprintf(fpp," %d %d %e\n", topp[proc][i], frompp[proc][i], coeffspp[proc][i]);
      }
      
      fclose(fpp);
      fpp=fopen(nfpp,"a");
      
      fprintf(fpp,"la dimensione utile est  %d\n", dims2[proc]);
      for(int i=0; i<dims2[proc]; i++) {
	fprintf(fpp,"%d %d\n", i, loc2glob[proc][i]);
      }
      
      fclose(fpp);
      fpp=fopen(nfpp,"a");

      fprintf(fpp,"invece dal processo %d la  dimensione utile est  %d\n", proc ,others_dims2[proc]);
      for(int i=0; i<others_dims2[proc]; i++) {
	fprintf(fpp,"%d %d\n", i, others_loc2glob[proc][i]);
      }
      
      fclose(fpp);
      fpp=fopen(nfpp,"a");
      
      std::cout << " ############# il processo " << proc_id << " a finito un giro  \n";
      
      
    }
    fclose(fpp);
  }
  std::cout << " ############# il processo " << proc_id << " est uscito \n";
  
  delete used;
  delete coeffs;
  delete to;
  delete from;
  delete goes2;
  
}


Sparsa3AP::~Sparsa3AP() {
  MPI_Comm_size(MPI_COMM_WORLD,&n_procs);
  MPI_Comm_rank(MPI_COMM_WORLD,&proc_id);
  if( proc_id==0) {
    items[item_id]=0;
    char buffer[MSGLEN];
    int status;
    for ( int proc =1; proc< n_procs ; proc++) {
      sprintf(buffer,"distruggiS");
      MPI_Send(buffer, strlen(buffer)+1, MPI_CHAR, proc, 99, MPI_COMM_WORLD);
      MPI_Send(&item_id, 1, MPI_INT, proc, 98, MPI_COMM_WORLD);
    }
    MPI_Barrier(MPI_COMM_WORLD );


  } else {
    return ;
    if(Nelspp) {
      for(int proc=1; proc< n_procs; proc++) {
	delete topp[proc];
	delete frompp[proc];
	delete coeffspp[proc];
	delete loc2glob[proc];
	delete others_loc2glob[proc];
      }
      delete Nelspp;
      delete dims2;
      delete others_dims2;
    }
  }
};

Sparsa3AP ** Sparsa3AP::items = new Sparsa3AP * [MAXNSPARSA3AP];
VectorP ** VectorP::items = new VectorP * [MAXNVECTORP];

#endif
