xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision acdf5bf487cadfa3a8e514db08e260b3198fbf76)
179bdfe76SSatish Balay #ifndef lint
2*acdf5bf4SSatish Balay static char vcid[] = "$Id: mpibaij.c,v 1.5 1996/06/24 22:18:52 balay Exp balay $";
379bdfe76SSatish Balay #endif
479bdfe76SSatish Balay 
579bdfe76SSatish Balay #include "mpibaij.h"
679bdfe76SSatish Balay 
779bdfe76SSatish Balay 
857b952d6SSatish Balay #include "draw.h"
957b952d6SSatish Balay #include "pinclude/pviewer.h"
1057b952d6SSatish Balay 
1157b952d6SSatish Balay extern int MatSetUpMultiply_MPIBAIJ(Mat);
1257b952d6SSatish Balay extern int DisAssemble_MPIBAIJ(Mat);
1357b952d6SSatish Balay 
1457b952d6SSatish Balay /* local utility routine that creates a mapping from the global column
1557b952d6SSatish Balay number to the local number in the off-diagonal part of the local
1657b952d6SSatish Balay storage of the matrix.  This is done in a non scable way since the
1757b952d6SSatish Balay length of colmap equals the global matrix length.
1857b952d6SSatish Balay */
1957b952d6SSatish Balay static int CreateColmap_MPIBAIJ_Private(Mat mat)
2057b952d6SSatish Balay {
2157b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
2257b952d6SSatish Balay   Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data;
2357b952d6SSatish Balay   int        nbs = B->nbs,i;
2457b952d6SSatish Balay 
2557b952d6SSatish Balay   baij->colmap = (int *) PetscMalloc(baij->Nbs*sizeof(int));CHKPTRQ(baij->colmap);
2657b952d6SSatish Balay   PLogObjectMemory(mat,baij->Nbs*sizeof(int));
2757b952d6SSatish Balay   PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));
2857b952d6SSatish Balay   for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i;
2957b952d6SSatish Balay   return 0;
3057b952d6SSatish Balay }
3157b952d6SSatish Balay 
3257b952d6SSatish Balay 
33*acdf5bf4SSatish Balay static int MatGetReordering_MPIBAIJ(Mat mat,MatReordering type,IS *rperm,IS *cperm)
3457b952d6SSatish Balay {
3557b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
3657b952d6SSatish Balay   int         ierr;
3757b952d6SSatish Balay   if (baij->size == 1) {
3857b952d6SSatish Balay     ierr = MatGetReordering(baij->A,type,rperm,cperm); CHKERRQ(ierr);
3957b952d6SSatish Balay   } else SETERRQ(1,"MatGetReordering_MPIBAIJ:not supported in parallel");
4057b952d6SSatish Balay   return 0;
4157b952d6SSatish Balay }
4257b952d6SSatish Balay 
4357b952d6SSatish Balay static int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
4457b952d6SSatish Balay {
4557b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
4657b952d6SSatish Balay   Scalar      value;
4757b952d6SSatish Balay   int         ierr,i,j, rstart = baij->rstart, rend = baij->rend;
4857b952d6SSatish Balay   int         cstart = baij->cstart, cend = baij->cend,row,col;
4957b952d6SSatish Balay   int         roworiented = baij->roworiented,rstart_orig,rend_orig;
5057b952d6SSatish Balay   int         cstart_orig,cend_orig,bs=baij->bs;
5157b952d6SSatish Balay 
5257b952d6SSatish Balay   if (baij->insertmode != NOT_SET_VALUES && baij->insertmode != addv) {
5357b952d6SSatish Balay     SETERRQ(1,"MatSetValues_MPIBAIJ:Cannot mix inserts and adds");
5457b952d6SSatish Balay   }
5557b952d6SSatish Balay   baij->insertmode = addv;
5657b952d6SSatish Balay   rstart_orig = rstart*bs;
5757b952d6SSatish Balay   rend_orig   = rend*bs;
5857b952d6SSatish Balay   cstart_orig = cstart*bs;
5957b952d6SSatish Balay   cend_orig   = cend*bs;
6057b952d6SSatish Balay   for ( i=0; i<m; i++ ) {
6157b952d6SSatish Balay     if (im[i] < 0) SETERRQ(1,"MatSetValues_MPIBAIJ:Negative row");
6257b952d6SSatish Balay     if (im[i] >= baij->M) SETERRQ(1,"MatSetValues_MPIBAIJ:Row too large");
6357b952d6SSatish Balay     if (im[i] >= rstart_orig && im[i] < rend_orig) {
6457b952d6SSatish Balay       row = im[i] - rstart_orig;
6557b952d6SSatish Balay       for ( j=0; j<n; j++ ) {
6657b952d6SSatish Balay         if (in[j] < 0) SETERRQ(1,"MatSetValues_MPIBAIJ:Negative column");
6757b952d6SSatish Balay         if (in[j] >= baij->N) SETERRQ(1,"MatSetValues_MPIBAIJ:Col too large");
6857b952d6SSatish Balay         if (in[j] >= cstart_orig && in[j] < cend_orig){
6957b952d6SSatish Balay           col = in[j] - cstart_orig;
7057b952d6SSatish Balay           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
7157b952d6SSatish Balay           ierr = MatSetValues(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr);
7257b952d6SSatish Balay         }
7357b952d6SSatish Balay         else {
7457b952d6SSatish Balay           if (mat->was_assembled) {
7557b952d6SSatish Balay             if (!baij->colmap) {ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);}
7657b952d6SSatish Balay             col = baij->colmap[in[j]];
7757b952d6SSatish Balay             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
7857b952d6SSatish Balay               ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
7957b952d6SSatish Balay               col =  in[j];
8057b952d6SSatish Balay             }
8157b952d6SSatish Balay           }
8257b952d6SSatish Balay           else col = in[j];
8357b952d6SSatish Balay           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
8457b952d6SSatish Balay           ierr = MatSetValues(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr);
8557b952d6SSatish Balay         }
8657b952d6SSatish Balay       }
8757b952d6SSatish Balay     }
8857b952d6SSatish Balay     else {
8957b952d6SSatish Balay       if (roworiented) {
9057b952d6SSatish Balay         ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr);
9157b952d6SSatish Balay       }
9257b952d6SSatish Balay       else {
9357b952d6SSatish Balay         row = im[i];
9457b952d6SSatish Balay         for ( j=0; j<n; j++ ) {
9557b952d6SSatish Balay           ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr);
9657b952d6SSatish Balay         }
9757b952d6SSatish Balay       }
9857b952d6SSatish Balay     }
9957b952d6SSatish Balay   }
10057b952d6SSatish Balay   return 0;
10157b952d6SSatish Balay }
10257b952d6SSatish Balay 
103d6de1c52SSatish Balay static int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
104d6de1c52SSatish Balay {
105d6de1c52SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
106d6de1c52SSatish Balay   int        bs=baij->bs,ierr,i,j, bsrstart = baij->rstart*bs, bsrend = baij->rend*bs;
107d6de1c52SSatish Balay   int        bscstart = baij->cstart*bs, bscend = baij->cend*bs,row,col;
108d6de1c52SSatish Balay 
109d6de1c52SSatish Balay   for ( i=0; i<m; i++ ) {
110d6de1c52SSatish Balay     if (idxm[i] < 0) SETERRQ(1,"MatGetValues_MPIBAIJ:Negative row");
111d6de1c52SSatish Balay     if (idxm[i] >= baij->M) SETERRQ(1,"MatGetValues_MPIBAIJ:Row too large");
112d6de1c52SSatish Balay     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
113d6de1c52SSatish Balay       row = idxm[i] - bsrstart;
114d6de1c52SSatish Balay       for ( j=0; j<n; j++ ) {
115d6de1c52SSatish Balay         if (idxn[j] < 0) SETERRQ(1,"MatGetValues_MPIBAIJ:Negative column");
116d6de1c52SSatish Balay         if (idxn[j] >= baij->N) SETERRQ(1,"MatGetValues_MPIBAIJ:Col too large");
117d6de1c52SSatish Balay         if (idxn[j] >= bscstart && idxn[j] < bscend){
118d6de1c52SSatish Balay           col = idxn[j] - bscstart;
119d6de1c52SSatish Balay           ierr = MatGetValues(baij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
120d6de1c52SSatish Balay         }
121d6de1c52SSatish Balay         else {
122*acdf5bf4SSatish Balay           if (!baij->colmap) {ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);}
123d6de1c52SSatish Balay           col = baij->colmap[idxn[j]/bs]*bs + col%bs;
124d6de1c52SSatish Balay           ierr = MatGetValues(baij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
125d6de1c52SSatish Balay         }
126d6de1c52SSatish Balay       }
127d6de1c52SSatish Balay     }
128d6de1c52SSatish Balay     else {
129d6de1c52SSatish Balay       SETERRQ(1,"MatGetValues_MPIBAIJ:Only local values currently supported");
130d6de1c52SSatish Balay     }
131d6de1c52SSatish Balay   }
132d6de1c52SSatish Balay   return 0;
133d6de1c52SSatish Balay }
134d6de1c52SSatish Balay 
135d6de1c52SSatish Balay static int MatNorm_MPIBAIJ(Mat mat,NormType type,double *norm)
136d6de1c52SSatish Balay {
137d6de1c52SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
138d6de1c52SSatish Balay   Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*) baij->A->data, *bmat = (Mat_SeqBAIJ*) baij->B->data;
139*acdf5bf4SSatish Balay   int        ierr, i,bs2=baij->bs2;
140d6de1c52SSatish Balay   double     sum = 0.0;
141d6de1c52SSatish Balay   Scalar     *v;
142d6de1c52SSatish Balay 
143d6de1c52SSatish Balay   if (baij->size == 1) {
144d6de1c52SSatish Balay     ierr =  MatNorm(baij->A,type,norm); CHKERRQ(ierr);
145d6de1c52SSatish Balay   } else {
146d6de1c52SSatish Balay     if (type == NORM_FROBENIUS) {
147d6de1c52SSatish Balay       v = amat->a;
148d6de1c52SSatish Balay       for (i=0; i<amat->nz*bs2; i++ ) {
149d6de1c52SSatish Balay #if defined(PETSC_COMPLEX)
150d6de1c52SSatish Balay         sum += real(conj(*v)*(*v)); v++;
151d6de1c52SSatish Balay #else
152d6de1c52SSatish Balay         sum += (*v)*(*v); v++;
153d6de1c52SSatish Balay #endif
154d6de1c52SSatish Balay       }
155d6de1c52SSatish Balay       v = bmat->a;
156d6de1c52SSatish Balay       for (i=0; i<bmat->nz*bs2; i++ ) {
157d6de1c52SSatish Balay #if defined(PETSC_COMPLEX)
158d6de1c52SSatish Balay         sum += real(conj(*v)*(*v)); v++;
159d6de1c52SSatish Balay #else
160d6de1c52SSatish Balay         sum += (*v)*(*v); v++;
161d6de1c52SSatish Balay #endif
162d6de1c52SSatish Balay       }
163d6de1c52SSatish Balay       MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);
164d6de1c52SSatish Balay       *norm = sqrt(*norm);
165d6de1c52SSatish Balay     }
166*acdf5bf4SSatish Balay     else
167*acdf5bf4SSatish Balay       SETERRQ(1,"MatNorm_SeqBAIJ:No support for this norm yet");
168d6de1c52SSatish Balay   }
169d6de1c52SSatish Balay   return 0;
170d6de1c52SSatish Balay }
17157b952d6SSatish Balay 
17257b952d6SSatish Balay static int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
17357b952d6SSatish Balay {
17457b952d6SSatish Balay   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
17557b952d6SSatish Balay   MPI_Comm    comm = mat->comm;
17657b952d6SSatish Balay   int         size = baij->size, *owners = baij->rowners,bs=baij->bs;
17757b952d6SSatish Balay   int         rank = baij->rank,tag = mat->tag, *owner,*starts,count,ierr;
17857b952d6SSatish Balay   MPI_Request *send_waits,*recv_waits;
17957b952d6SSatish Balay   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
18057b952d6SSatish Balay   InsertMode  addv;
18157b952d6SSatish Balay   Scalar      *rvalues,*svalues;
18257b952d6SSatish Balay 
18357b952d6SSatish Balay   /* make sure all processors are either in INSERTMODE or ADDMODE */
18457b952d6SSatish Balay   MPI_Allreduce(&baij->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
18557b952d6SSatish Balay   if (addv == (ADD_VALUES|INSERT_VALUES)) {
18657b952d6SSatish Balay     SETERRQ(1,"MatAssemblyBegin_MPIBAIJ:Some processors inserted others added");
18757b952d6SSatish Balay   }
18857b952d6SSatish Balay   baij->insertmode = addv; /* in case this processor had no cache */
18957b952d6SSatish Balay 
19057b952d6SSatish Balay   /*  first count number of contributors to each processor */
19157b952d6SSatish Balay   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
19257b952d6SSatish Balay   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
19357b952d6SSatish Balay   owner = (int *) PetscMalloc( (baij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
19457b952d6SSatish Balay   for ( i=0; i<baij->stash.n; i++ ) {
19557b952d6SSatish Balay     idx = baij->stash.idx[i];
19657b952d6SSatish Balay     for ( j=0; j<size; j++ ) {
19757b952d6SSatish Balay       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
19857b952d6SSatish Balay         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
19957b952d6SSatish Balay       }
20057b952d6SSatish Balay     }
20157b952d6SSatish Balay   }
20257b952d6SSatish Balay   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
20357b952d6SSatish Balay 
20457b952d6SSatish Balay   /* inform other processors of number of messages and max length*/
20557b952d6SSatish Balay   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
20657b952d6SSatish Balay   MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);
20757b952d6SSatish Balay   nreceives = work[rank];
20857b952d6SSatish Balay   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
20957b952d6SSatish Balay   nmax = work[rank];
21057b952d6SSatish Balay   PetscFree(work);
21157b952d6SSatish Balay 
21257b952d6SSatish Balay   /* post receives:
21357b952d6SSatish Balay        1) each message will consist of ordered pairs
21457b952d6SSatish Balay      (global index,value) we store the global index as a double
21557b952d6SSatish Balay      to simplify the message passing.
21657b952d6SSatish Balay        2) since we don't know how long each individual message is we
21757b952d6SSatish Balay      allocate the largest needed buffer for each receive. Potentially
21857b952d6SSatish Balay      this is a lot of wasted space.
21957b952d6SSatish Balay 
22057b952d6SSatish Balay 
22157b952d6SSatish Balay        This could be done better.
22257b952d6SSatish Balay   */
22357b952d6SSatish Balay   rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));
22457b952d6SSatish Balay   CHKPTRQ(rvalues);
22557b952d6SSatish Balay   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));
22657b952d6SSatish Balay   CHKPTRQ(recv_waits);
22757b952d6SSatish Balay   for ( i=0; i<nreceives; i++ ) {
22857b952d6SSatish Balay     MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
22957b952d6SSatish Balay               comm,recv_waits+i);
23057b952d6SSatish Balay   }
23157b952d6SSatish Balay 
23257b952d6SSatish Balay   /* do sends:
23357b952d6SSatish Balay       1) starts[i] gives the starting index in svalues for stuff going to
23457b952d6SSatish Balay          the ith processor
23557b952d6SSatish Balay   */
23657b952d6SSatish Balay   svalues = (Scalar *) PetscMalloc(3*(baij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
23757b952d6SSatish Balay   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
23857b952d6SSatish Balay   CHKPTRQ(send_waits);
23957b952d6SSatish Balay   starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
24057b952d6SSatish Balay   starts[0] = 0;
24157b952d6SSatish Balay   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
24257b952d6SSatish Balay   for ( i=0; i<baij->stash.n; i++ ) {
24357b952d6SSatish Balay     svalues[3*starts[owner[i]]]       = (Scalar)  baij->stash.idx[i];
24457b952d6SSatish Balay     svalues[3*starts[owner[i]]+1]     = (Scalar)  baij->stash.idy[i];
24557b952d6SSatish Balay     svalues[3*(starts[owner[i]]++)+2] =  baij->stash.array[i];
24657b952d6SSatish Balay   }
24757b952d6SSatish Balay   PetscFree(owner);
24857b952d6SSatish Balay   starts[0] = 0;
24957b952d6SSatish Balay   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
25057b952d6SSatish Balay   count = 0;
25157b952d6SSatish Balay   for ( i=0; i<size; i++ ) {
25257b952d6SSatish Balay     if (procs[i]) {
25357b952d6SSatish Balay       MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,
25457b952d6SSatish Balay                 comm,send_waits+count++);
25557b952d6SSatish Balay     }
25657b952d6SSatish Balay   }
25757b952d6SSatish Balay   PetscFree(starts); PetscFree(nprocs);
25857b952d6SSatish Balay 
25957b952d6SSatish Balay   /* Free cache space */
26057b952d6SSatish Balay   PLogInfo(0,"[%d]MatAssemblyBegin_MPIBAIJ:Number of off processor values %d\n",rank,baij->stash.n);
26157b952d6SSatish Balay   ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr);
26257b952d6SSatish Balay 
26357b952d6SSatish Balay   baij->svalues    = svalues;    baij->rvalues    = rvalues;
26457b952d6SSatish Balay   baij->nsends     = nsends;     baij->nrecvs     = nreceives;
26557b952d6SSatish Balay   baij->send_waits = send_waits; baij->recv_waits = recv_waits;
26657b952d6SSatish Balay   baij->rmax       = nmax;
26757b952d6SSatish Balay 
26857b952d6SSatish Balay   return 0;
26957b952d6SSatish Balay }
27057b952d6SSatish Balay 
27157b952d6SSatish Balay 
27257b952d6SSatish Balay static int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
27357b952d6SSatish Balay {
27457b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
27557b952d6SSatish Balay   MPI_Status  *send_status,recv_status;
27657b952d6SSatish Balay   int         imdex,nrecvs = baij->nrecvs, count = nrecvs, i, n, ierr;
27757b952d6SSatish Balay   int         bs=baij->bs,row,col,other_disassembled;
27857b952d6SSatish Balay   Scalar      *values,val;
27957b952d6SSatish Balay   InsertMode  addv = baij->insertmode;
28057b952d6SSatish Balay 
28157b952d6SSatish Balay   /*  wait on receives */
28257b952d6SSatish Balay   while (count) {
28357b952d6SSatish Balay     MPI_Waitany(nrecvs,baij->recv_waits,&imdex,&recv_status);
28457b952d6SSatish Balay     /* unpack receives into our local space */
28557b952d6SSatish Balay     values = baij->rvalues + 3*imdex*baij->rmax;
28657b952d6SSatish Balay     MPI_Get_count(&recv_status,MPIU_SCALAR,&n);
28757b952d6SSatish Balay     n = n/3;
28857b952d6SSatish Balay     for ( i=0; i<n; i++ ) {
28957b952d6SSatish Balay       row = (int) PetscReal(values[3*i]) - baij->rstart*bs;
29057b952d6SSatish Balay       col = (int) PetscReal(values[3*i+1]);
29157b952d6SSatish Balay       val = values[3*i+2];
29257b952d6SSatish Balay       if (col >= baij->cstart*bs && col < baij->cend*bs) {
29357b952d6SSatish Balay         col -= baij->cstart*bs;
29457b952d6SSatish Balay         MatSetValues(baij->A,1,&row,1,&col,&val,addv);
29557b952d6SSatish Balay       }
29657b952d6SSatish Balay       else {
29757b952d6SSatish Balay         if (mat->was_assembled) {
29857b952d6SSatish Balay           if (!baij->colmap) {ierr = CreateColmap_MPIBAIJ_Private(mat); CHKERRQ(ierr);}
29957b952d6SSatish Balay           col = baij->colmap[col/bs]*bs + col%bs;
30057b952d6SSatish Balay           if (col < 0  && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
30157b952d6SSatish Balay             ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
30257b952d6SSatish Balay             col = (int) PetscReal(values[3*i+1]);
30357b952d6SSatish Balay           }
30457b952d6SSatish Balay         }
30557b952d6SSatish Balay         MatSetValues(baij->B,1,&row,1,&col,&val,addv);
30657b952d6SSatish Balay       }
30757b952d6SSatish Balay     }
30857b952d6SSatish Balay     count--;
30957b952d6SSatish Balay   }
31057b952d6SSatish Balay   PetscFree(baij->recv_waits); PetscFree(baij->rvalues);
31157b952d6SSatish Balay 
31257b952d6SSatish Balay   /* wait on sends */
31357b952d6SSatish Balay   if (baij->nsends) {
31457b952d6SSatish Balay     send_status = (MPI_Status *) PetscMalloc(baij->nsends*sizeof(MPI_Status));
31557b952d6SSatish Balay     CHKPTRQ(send_status);
31657b952d6SSatish Balay     MPI_Waitall(baij->nsends,baij->send_waits,send_status);
31757b952d6SSatish Balay     PetscFree(send_status);
31857b952d6SSatish Balay   }
31957b952d6SSatish Balay   PetscFree(baij->send_waits); PetscFree(baij->svalues);
32057b952d6SSatish Balay 
32157b952d6SSatish Balay   baij->insertmode = NOT_SET_VALUES;
32257b952d6SSatish Balay   ierr = MatAssemblyBegin(baij->A,mode); CHKERRQ(ierr);
32357b952d6SSatish Balay   ierr = MatAssemblyEnd(baij->A,mode); CHKERRQ(ierr);
32457b952d6SSatish Balay 
32557b952d6SSatish Balay   /* determine if any processor has disassembled, if so we must
32657b952d6SSatish Balay      also disassemble ourselfs, in order that we may reassemble. */
32757b952d6SSatish Balay   MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);
32857b952d6SSatish Balay   if (mat->was_assembled && !other_disassembled) {
32957b952d6SSatish Balay     ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
33057b952d6SSatish Balay   }
33157b952d6SSatish Balay 
33257b952d6SSatish Balay   if (!mat->was_assembled && mode == FINAL_ASSEMBLY) {
33357b952d6SSatish Balay     ierr = MatSetUpMultiply_MPIBAIJ(mat); CHKERRQ(ierr);
33457b952d6SSatish Balay   }
33557b952d6SSatish Balay   ierr = MatAssemblyBegin(baij->B,mode); CHKERRQ(ierr);
33657b952d6SSatish Balay   ierr = MatAssemblyEnd(baij->B,mode); CHKERRQ(ierr);
33757b952d6SSatish Balay 
33857b952d6SSatish Balay   if (baij->rowvalues) {PetscFree(baij->rowvalues); baij->rowvalues = 0;}
33957b952d6SSatish Balay   return 0;
34057b952d6SSatish Balay }
34157b952d6SSatish Balay 
34257b952d6SSatish Balay static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer)
34357b952d6SSatish Balay {
34457b952d6SSatish Balay   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
34557b952d6SSatish Balay   int          ierr;
34657b952d6SSatish Balay 
34757b952d6SSatish Balay   if (baij->size == 1) {
34857b952d6SSatish Balay     ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
34957b952d6SSatish Balay   }
35057b952d6SSatish Balay   else SETERRQ(1,"MatView_MPIBAIJ_Binary:Only uniprocessor output supported");
35157b952d6SSatish Balay   return 0;
35257b952d6SSatish Balay }
35357b952d6SSatish Balay 
35457b952d6SSatish Balay static int MatView_MPIBAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer)
35557b952d6SSatish Balay {
35657b952d6SSatish Balay   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
357cee3aa6bSSatish Balay   int          ierr, format,rank,bs=baij->bs;
35857b952d6SSatish Balay   FILE         *fd;
35957b952d6SSatish Balay   ViewerType   vtype;
36057b952d6SSatish Balay 
36157b952d6SSatish Balay   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
36257b952d6SSatish Balay   if (vtype  == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) {
36357b952d6SSatish Balay     ierr = ViewerGetFormat(viewer,&format);
36457b952d6SSatish Balay     if (format == ASCII_FORMAT_INFO_DETAILED) {
36557b952d6SSatish Balay       int nz, nzalloc, mem;
36657b952d6SSatish Balay       MPI_Comm_rank(mat->comm,&rank);
36757b952d6SSatish Balay       ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
36857b952d6SSatish Balay       ierr = MatGetInfo(mat,MAT_LOCAL,&nz,&nzalloc,&mem);
36957b952d6SSatish Balay       PetscSequentialPhaseBegin(mat->comm,1);
37057b952d6SSatish Balay       fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
37157b952d6SSatish Balay               rank,baij->m,nz*bs,nzalloc*bs,baij->bs,mem);
37257b952d6SSatish Balay       ierr = MatGetInfo(baij->A,MAT_LOCAL,&nz,&nzalloc,&mem);
37357b952d6SSatish Balay       fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,nz*bs);
37457b952d6SSatish Balay       ierr = MatGetInfo(baij->B,MAT_LOCAL,&nz,&nzalloc,&mem);
37557b952d6SSatish Balay       fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,nz*bs);
37657b952d6SSatish Balay       fflush(fd);
37757b952d6SSatish Balay       PetscSequentialPhaseEnd(mat->comm,1);
37857b952d6SSatish Balay       ierr = VecScatterView(baij->Mvctx,viewer); CHKERRQ(ierr);
37957b952d6SSatish Balay       return 0;
38057b952d6SSatish Balay     }
38157b952d6SSatish Balay     else if (format == ASCII_FORMAT_INFO) {
38257b952d6SSatish Balay       return 0;
38357b952d6SSatish Balay     }
38457b952d6SSatish Balay   }
38557b952d6SSatish Balay 
38657b952d6SSatish Balay   if (vtype == DRAW_VIEWER) {
38757b952d6SSatish Balay     Draw       draw;
38857b952d6SSatish Balay     PetscTruth isnull;
38957b952d6SSatish Balay     ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr);
39057b952d6SSatish Balay     ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0;
39157b952d6SSatish Balay   }
39257b952d6SSatish Balay 
39357b952d6SSatish Balay   if (vtype == ASCII_FILE_VIEWER) {
39457b952d6SSatish Balay     ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
39557b952d6SSatish Balay     PetscSequentialPhaseBegin(mat->comm,1);
39657b952d6SSatish Balay     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
39757b952d6SSatish Balay            baij->rank,baij->m,baij->rstart*bs,baij->rend*bs,baij->n,
39857b952d6SSatish Balay             baij->cstart*bs,baij->cend*bs);
39957b952d6SSatish Balay     ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
40057b952d6SSatish Balay     ierr = MatView(baij->B,viewer); CHKERRQ(ierr);
40157b952d6SSatish Balay     fflush(fd);
40257b952d6SSatish Balay     PetscSequentialPhaseEnd(mat->comm,1);
40357b952d6SSatish Balay   }
40457b952d6SSatish Balay   else {
40557b952d6SSatish Balay     int size = baij->size;
40657b952d6SSatish Balay     rank = baij->rank;
40757b952d6SSatish Balay     if (size == 1) {
40857b952d6SSatish Balay       ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
40957b952d6SSatish Balay     }
41057b952d6SSatish Balay     else {
41157b952d6SSatish Balay       /* assemble the entire matrix onto first processor. */
41257b952d6SSatish Balay       Mat         A;
41357b952d6SSatish Balay       Mat_SeqBAIJ *Aloc;
41457b952d6SSatish Balay       int         M = baij->M, N = baij->N,*ai,*aj,row,col,i,j,k,*rvals;
41557b952d6SSatish Balay       int         mbs=baij->mbs;
41657b952d6SSatish Balay       Scalar      *a;
41757b952d6SSatish Balay 
41857b952d6SSatish Balay       if (!rank) {
419cee3aa6bSSatish Balay         ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
42057b952d6SSatish Balay         CHKERRQ(ierr);
42157b952d6SSatish Balay       }
42257b952d6SSatish Balay       else {
423cee3aa6bSSatish Balay         ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
42457b952d6SSatish Balay         CHKERRQ(ierr);
42557b952d6SSatish Balay       }
42657b952d6SSatish Balay       PLogObjectParent(mat,A);
42757b952d6SSatish Balay 
42857b952d6SSatish Balay       /* copy over the A part */
42957b952d6SSatish Balay       Aloc = (Mat_SeqBAIJ*) baij->A->data;
43057b952d6SSatish Balay       ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
43157b952d6SSatish Balay       row = baij->rstart;
43257b952d6SSatish Balay       rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals);
43357b952d6SSatish Balay 
43457b952d6SSatish Balay       for ( i=0; i<mbs; i++ ) {
43557b952d6SSatish Balay         rvals[0] = bs*(baij->rstart + i);
43657b952d6SSatish Balay         for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
43757b952d6SSatish Balay         for ( j=ai[i]; j<ai[i+1]; j++ ) {
43857b952d6SSatish Balay           col = (baij->cstart+aj[j])*bs;
43957b952d6SSatish Balay           for (k=0; k<bs; k++ ) {
440cee3aa6bSSatish Balay             ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
441cee3aa6bSSatish Balay             col++; a += bs;
44257b952d6SSatish Balay           }
44357b952d6SSatish Balay         }
44457b952d6SSatish Balay       }
44557b952d6SSatish Balay       /* copy over the B part */
44657b952d6SSatish Balay       Aloc = (Mat_SeqBAIJ*) baij->B->data;
44757b952d6SSatish Balay       ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
44857b952d6SSatish Balay       row = baij->rstart*bs;
44957b952d6SSatish Balay       for ( i=0; i<mbs; i++ ) {
45057b952d6SSatish Balay         rvals[0] = bs*(baij->rstart + i);
45157b952d6SSatish Balay         for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
45257b952d6SSatish Balay         for ( j=ai[i]; j<ai[i+1]; j++ ) {
45357b952d6SSatish Balay           col = baij->garray[aj[j]]*bs;
45457b952d6SSatish Balay           for (k=0; k<bs; k++ ) {
455cee3aa6bSSatish Balay             ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
456cee3aa6bSSatish Balay             col++; a += bs;
45757b952d6SSatish Balay           }
45857b952d6SSatish Balay         }
45957b952d6SSatish Balay       }
46057b952d6SSatish Balay       PetscFree(rvals);
46157b952d6SSatish Balay       ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
46257b952d6SSatish Balay       ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
46357b952d6SSatish Balay       if (!rank) {
46457b952d6SSatish Balay         ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
46557b952d6SSatish Balay       }
46657b952d6SSatish Balay       ierr = MatDestroy(A); CHKERRQ(ierr);
46757b952d6SSatish Balay     }
46857b952d6SSatish Balay   }
46957b952d6SSatish Balay   return 0;
47057b952d6SSatish Balay }
47157b952d6SSatish Balay 
47257b952d6SSatish Balay 
47357b952d6SSatish Balay 
47457b952d6SSatish Balay static int MatView_MPIBAIJ(PetscObject obj,Viewer viewer)
47557b952d6SSatish Balay {
47657b952d6SSatish Balay   Mat         mat = (Mat) obj;
47757b952d6SSatish Balay   int         ierr;
47857b952d6SSatish Balay   ViewerType  vtype;
47957b952d6SSatish Balay 
48057b952d6SSatish Balay   if (!viewer) {
48157b952d6SSatish Balay     viewer = STDOUT_VIEWER_SELF;
48257b952d6SSatish Balay   }
48357b952d6SSatish Balay   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
48457b952d6SSatish Balay   if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER ||
48557b952d6SSatish Balay       vtype == DRAW_VIEWER       || vtype == MATLAB_VIEWER) {
48657b952d6SSatish Balay     ierr = MatView_MPIBAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
48757b952d6SSatish Balay   }
48857b952d6SSatish Balay   else if (vtype == BINARY_FILE_VIEWER) {
48957b952d6SSatish Balay     return MatView_MPIBAIJ_Binary(mat,viewer);
49057b952d6SSatish Balay   }
49157b952d6SSatish Balay   return 0;
49257b952d6SSatish Balay }
49357b952d6SSatish Balay 
49479bdfe76SSatish Balay static int MatDestroy_MPIBAIJ(PetscObject obj)
49579bdfe76SSatish Balay {
49679bdfe76SSatish Balay   Mat         mat = (Mat) obj;
49779bdfe76SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
49879bdfe76SSatish Balay   int         ierr;
49979bdfe76SSatish Balay 
50079bdfe76SSatish Balay #if defined(PETSC_LOG)
50179bdfe76SSatish Balay   PLogObjectState(obj,"Rows=%d, Cols=%d",baij->M,baij->N);
50279bdfe76SSatish Balay #endif
50379bdfe76SSatish Balay 
50479bdfe76SSatish Balay   PetscFree(baij->rowners);
50579bdfe76SSatish Balay   ierr = MatDestroy(baij->A); CHKERRQ(ierr);
50679bdfe76SSatish Balay   ierr = MatDestroy(baij->B); CHKERRQ(ierr);
50779bdfe76SSatish Balay   if (baij->colmap) PetscFree(baij->colmap);
50879bdfe76SSatish Balay   if (baij->garray) PetscFree(baij->garray);
50979bdfe76SSatish Balay   if (baij->lvec)   VecDestroy(baij->lvec);
51079bdfe76SSatish Balay   if (baij->Mvctx)  VecScatterDestroy(baij->Mvctx);
51179bdfe76SSatish Balay   if (baij->rowvalues) PetscFree(baij->rowvalues);
51279bdfe76SSatish Balay   PetscFree(baij);
51379bdfe76SSatish Balay   PLogObjectDestroy(mat);
51479bdfe76SSatish Balay   PetscHeaderDestroy(mat);
51579bdfe76SSatish Balay   return 0;
51679bdfe76SSatish Balay }
51779bdfe76SSatish Balay 
518cee3aa6bSSatish Balay static int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
519cee3aa6bSSatish Balay {
520cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
521cee3aa6bSSatish Balay   int        ierr;
522cee3aa6bSSatish Balay 
523cee3aa6bSSatish Balay   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx); CHKERRQ(ierr);
524cee3aa6bSSatish Balay   ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr);
525cee3aa6bSSatish Balay   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx); CHKERRQ(ierr);
526cee3aa6bSSatish Balay   ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
527cee3aa6bSSatish Balay   return 0;
528cee3aa6bSSatish Balay }
529cee3aa6bSSatish Balay 
530cee3aa6bSSatish Balay static int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
531cee3aa6bSSatish Balay {
532cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
533cee3aa6bSSatish Balay   int        ierr;
534cee3aa6bSSatish Balay   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
535cee3aa6bSSatish Balay   ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
536cee3aa6bSSatish Balay   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
537cee3aa6bSSatish Balay   ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
538cee3aa6bSSatish Balay   return 0;
539cee3aa6bSSatish Balay }
540cee3aa6bSSatish Balay 
541cee3aa6bSSatish Balay static int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy)
542cee3aa6bSSatish Balay {
543cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
544cee3aa6bSSatish Balay   int        ierr;
545cee3aa6bSSatish Balay 
546cee3aa6bSSatish Balay   /* do nondiagonal part */
547cee3aa6bSSatish Balay   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
548cee3aa6bSSatish Balay   /* send it on its way */
549cee3aa6bSSatish Balay   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,
550cee3aa6bSSatish Balay                 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
551cee3aa6bSSatish Balay   /* do local part */
552cee3aa6bSSatish Balay   ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr);
553cee3aa6bSSatish Balay   /* receive remote parts: note this assumes the values are not actually */
554cee3aa6bSSatish Balay   /* inserted in yy until the next line, which is true for my implementation*/
555cee3aa6bSSatish Balay   /* but is not perhaps always true. */
556cee3aa6bSSatish Balay   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,
557cee3aa6bSSatish Balay                   (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
558cee3aa6bSSatish Balay   return 0;
559cee3aa6bSSatish Balay }
560cee3aa6bSSatish Balay 
561cee3aa6bSSatish Balay static int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
562cee3aa6bSSatish Balay {
563cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
564cee3aa6bSSatish Balay   int        ierr;
565cee3aa6bSSatish Balay 
566cee3aa6bSSatish Balay   /* do nondiagonal part */
567cee3aa6bSSatish Balay   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
568cee3aa6bSSatish Balay   /* send it on its way */
569cee3aa6bSSatish Balay   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,
570cee3aa6bSSatish Balay                  (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
571cee3aa6bSSatish Balay   /* do local part */
572cee3aa6bSSatish Balay   ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
573cee3aa6bSSatish Balay   /* receive remote parts: note this assumes the values are not actually */
574cee3aa6bSSatish Balay   /* inserted in yy until the next line, which is true for my implementation*/
575cee3aa6bSSatish Balay   /* but is not perhaps always true. */
576cee3aa6bSSatish Balay   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,
577cee3aa6bSSatish Balay                   (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
578cee3aa6bSSatish Balay   return 0;
579cee3aa6bSSatish Balay }
580cee3aa6bSSatish Balay 
581cee3aa6bSSatish Balay /*
582cee3aa6bSSatish Balay   This only works correctly for square matrices where the subblock A->A is the
583cee3aa6bSSatish Balay    diagonal block
584cee3aa6bSSatish Balay */
585cee3aa6bSSatish Balay static int MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
586cee3aa6bSSatish Balay {
587cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
588cee3aa6bSSatish Balay   if (a->M != a->N)
589cee3aa6bSSatish Balay     SETERRQ(1,"MatGetDiagonal_MPIBAIJ:Supports only square matrix where A->A is diag block");
590cee3aa6bSSatish Balay   return MatGetDiagonal(a->A,v);
591cee3aa6bSSatish Balay }
592cee3aa6bSSatish Balay 
593cee3aa6bSSatish Balay static int MatScale_MPIBAIJ(Scalar *aa,Mat A)
594cee3aa6bSSatish Balay {
595cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
596cee3aa6bSSatish Balay   int        ierr;
597cee3aa6bSSatish Balay   ierr = MatScale(aa,a->A); CHKERRQ(ierr);
598cee3aa6bSSatish Balay   ierr = MatScale(aa,a->B); CHKERRQ(ierr);
599cee3aa6bSSatish Balay   return 0;
600cee3aa6bSSatish Balay }
60157b952d6SSatish Balay static int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n)
60257b952d6SSatish Balay {
60357b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
60457b952d6SSatish Balay   *m = mat->M; *n = mat->N;
60557b952d6SSatish Balay   return 0;
60657b952d6SSatish Balay }
60757b952d6SSatish Balay 
60857b952d6SSatish Balay static int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n)
60957b952d6SSatish Balay {
61057b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
61157b952d6SSatish Balay   *m = mat->m; *n = mat->N;
61257b952d6SSatish Balay   return 0;
61357b952d6SSatish Balay }
61457b952d6SSatish Balay 
61557b952d6SSatish Balay static int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n)
61657b952d6SSatish Balay {
61757b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
61857b952d6SSatish Balay   *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs;
61957b952d6SSatish Balay   return 0;
62057b952d6SSatish Balay }
62157b952d6SSatish Balay 
622*acdf5bf4SSatish Balay extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
623*acdf5bf4SSatish Balay extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
624*acdf5bf4SSatish Balay 
625*acdf5bf4SSatish Balay int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
626*acdf5bf4SSatish Balay {
627*acdf5bf4SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
628*acdf5bf4SSatish Balay   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
629*acdf5bf4SSatish Balay   int        bs = mat->bs, bs2 = mat->bs2, i, ierr, *cworkA, *cworkB, **pcA, **pcB;
630*acdf5bf4SSatish Balay   int        nztot, nzA, nzB, lrow, rstart = mat->rstart*bs, rend = mat->rend*bs;
631*acdf5bf4SSatish Balay   int        *cmap, *idx_p,cstart = mat->cstart*bs;
632*acdf5bf4SSatish Balay 
633*acdf5bf4SSatish Balay   if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,"MatGetRow_MPIBAIJ:Already active");
634*acdf5bf4SSatish Balay   mat->getrowactive = PETSC_TRUE;
635*acdf5bf4SSatish Balay 
636*acdf5bf4SSatish Balay   if (!mat->rowvalues && (idx || v)) {
637*acdf5bf4SSatish Balay     /*
638*acdf5bf4SSatish Balay         allocate enough space to hold information from the longest row.
639*acdf5bf4SSatish Balay     */
640*acdf5bf4SSatish Balay     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *) mat->A->data,*Ba = (Mat_SeqBAIJ *) mat->B->data;
641*acdf5bf4SSatish Balay     int     max = 1,nbs = mat->nbs,tmp;
642*acdf5bf4SSatish Balay     for ( i=0; i<nbs; i++ ) {
643*acdf5bf4SSatish Balay       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
644*acdf5bf4SSatish Balay       if (max < tmp) { max = tmp; }
645*acdf5bf4SSatish Balay     }
646*acdf5bf4SSatish Balay     mat->rowvalues = (Scalar *) PetscMalloc( max*bs2*(sizeof(int)+sizeof(Scalar)));
647*acdf5bf4SSatish Balay     CHKPTRQ(mat->rowvalues);
648*acdf5bf4SSatish Balay     mat->rowindices = (int *) (mat->rowvalues + max*bs2);
649*acdf5bf4SSatish Balay   }
650*acdf5bf4SSatish Balay 
651*acdf5bf4SSatish Balay 
652*acdf5bf4SSatish Balay   if (row < rstart || row >= rend) SETERRQ(1,"MatGetRow_MPIBAIJ:Only local rows")
653*acdf5bf4SSatish Balay   lrow = row - rstart;
654*acdf5bf4SSatish Balay 
655*acdf5bf4SSatish Balay   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
656*acdf5bf4SSatish Balay   if (!v)   {pvA = 0; pvB = 0;}
657*acdf5bf4SSatish Balay   if (!idx) {pcA = 0; if (!v) pcB = 0;}
658*acdf5bf4SSatish Balay   ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
659*acdf5bf4SSatish Balay   ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
660*acdf5bf4SSatish Balay   nztot = nzA + nzB;
661*acdf5bf4SSatish Balay 
662*acdf5bf4SSatish Balay   cmap  = mat->garray;
663*acdf5bf4SSatish Balay   if (v  || idx) {
664*acdf5bf4SSatish Balay     if (nztot) {
665*acdf5bf4SSatish Balay       /* Sort by increasing column numbers, assuming A and B already sorted */
666*acdf5bf4SSatish Balay       int imark = -1;
667*acdf5bf4SSatish Balay       if (v) {
668*acdf5bf4SSatish Balay         *v = v_p = mat->rowvalues;
669*acdf5bf4SSatish Balay         for ( i=0; i<nzB; i++ ) {
670*acdf5bf4SSatish Balay           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
671*acdf5bf4SSatish Balay           else break;
672*acdf5bf4SSatish Balay         }
673*acdf5bf4SSatish Balay         imark = i;
674*acdf5bf4SSatish Balay         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
675*acdf5bf4SSatish Balay         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
676*acdf5bf4SSatish Balay       }
677*acdf5bf4SSatish Balay       if (idx) {
678*acdf5bf4SSatish Balay         *idx = idx_p = mat->rowindices;
679*acdf5bf4SSatish Balay         if (imark > -1) {
680*acdf5bf4SSatish Balay           for ( i=0; i<imark; i++ ) {
681*acdf5bf4SSatish Balay             idx_p[i] = cmap[cworkB[i]];
682*acdf5bf4SSatish Balay           }
683*acdf5bf4SSatish Balay         } else {
684*acdf5bf4SSatish Balay           for ( i=0; i<nzB; i++ ) {
685*acdf5bf4SSatish Balay             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
686*acdf5bf4SSatish Balay             else break;
687*acdf5bf4SSatish Balay           }
688*acdf5bf4SSatish Balay           imark = i;
689*acdf5bf4SSatish Balay         }
690*acdf5bf4SSatish Balay         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart + cworkA[i];
691*acdf5bf4SSatish Balay         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]];
692*acdf5bf4SSatish Balay       }
693*acdf5bf4SSatish Balay     }
694*acdf5bf4SSatish Balay     else {*idx = 0; *v=0;}
695*acdf5bf4SSatish Balay   }
696*acdf5bf4SSatish Balay   *nz = nztot;
697*acdf5bf4SSatish Balay   ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
698*acdf5bf4SSatish Balay   ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
699*acdf5bf4SSatish Balay   return 0;
700*acdf5bf4SSatish Balay }
701*acdf5bf4SSatish Balay 
702*acdf5bf4SSatish Balay int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
703*acdf5bf4SSatish Balay {
704*acdf5bf4SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
705*acdf5bf4SSatish Balay   if (baij->getrowactive == PETSC_FALSE) {
706*acdf5bf4SSatish Balay     SETERRQ(1,"MatRestoreRow_MPIBAIJ:MatGetRow not called");
707*acdf5bf4SSatish Balay   }
708*acdf5bf4SSatish Balay   baij->getrowactive = PETSC_FALSE;
709*acdf5bf4SSatish Balay   return 0;
710*acdf5bf4SSatish Balay }
711*acdf5bf4SSatish Balay 
71279bdfe76SSatish Balay /* -------------------------------------------------------------------*/
71379bdfe76SSatish Balay static struct _MatOps MatOps = {
714cee3aa6bSSatish Balay   MatSetValues_MPIBAIJ,0,0,MatMult_MPIBAIJ,
715cee3aa6bSSatish Balay   MatMultAdd_MPIBAIJ,MatMultTrans_MPIBAIJ,MatMultTransAdd_MPIBAIJ,0,
71679bdfe76SSatish Balay   0,0,0,0,
71779bdfe76SSatish Balay   0,0,0,0,
718*acdf5bf4SSatish Balay   0,MatGetDiagonal_MPIBAIJ,0,MatNorm_MPIBAIJ,
71957b952d6SSatish Balay   MatAssemblyBegin_MPIBAIJ,MatAssemblyEnd_MPIBAIJ,0,0,
72057b952d6SSatish Balay   0,0,MatGetReordering_MPIBAIJ,0,
72157b952d6SSatish Balay   0,0,0,MatGetSize_MPIBAIJ,
72257b952d6SSatish Balay   MatGetLocalSize_MPIBAIJ,MatGetOwnershipRange_MPIBAIJ,0,0,
72379bdfe76SSatish Balay   0,0,0,0,
72479bdfe76SSatish Balay   0,0,0,0,
72579bdfe76SSatish Balay   0,0,0,0,
726*acdf5bf4SSatish Balay   0,MatGetValues_MPIBAIJ,0,0,
727cee3aa6bSSatish Balay   MatScale_MPIBAIJ,0,0};
72879bdfe76SSatish Balay 
72979bdfe76SSatish Balay 
73079bdfe76SSatish Balay /*@C
73179bdfe76SSatish Balay    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
73279bdfe76SSatish Balay    (block compressed row).  For good matrix assembly performance
73379bdfe76SSatish Balay    the user should preallocate the matrix storage by setting the parameters
73479bdfe76SSatish Balay    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
73579bdfe76SSatish Balay    performance can be increased by more than a factor of 50.
73679bdfe76SSatish Balay 
73779bdfe76SSatish Balay    Input Parameters:
73879bdfe76SSatish Balay .  comm - MPI communicator
73979bdfe76SSatish Balay .  bs   - size of blockk
74079bdfe76SSatish Balay .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
74179bdfe76SSatish Balay .  n - number of local columns (or PETSC_DECIDE to have calculated
74279bdfe76SSatish Balay            if N is given)
74379bdfe76SSatish Balay .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
74479bdfe76SSatish Balay .  N - number of global columns (or PETSC_DECIDE to have calculated
74579bdfe76SSatish Balay            if n is given)
74679bdfe76SSatish Balay .  d_nz  - number of block nonzeros per block row in diagonal portion of local
74779bdfe76SSatish Balay            submatrix  (same for all local rows)
74879bdfe76SSatish Balay .  d_nzz - number of block nonzeros per block row in diagonal portion of local
74979bdfe76SSatish Balay            submatrix or null (possibly different for each row).  You must leave
75079bdfe76SSatish Balay            room for the diagonal entry even if it is zero.
75179bdfe76SSatish Balay .  o_nz  - number of block nonzeros per block row in off-diagonal portion of local
75279bdfe76SSatish Balay            submatrix (same for all local rows).
75379bdfe76SSatish Balay .  o_nzz - number of block nonzeros per block row in off-diagonal portion of local
75479bdfe76SSatish Balay            submatrix or null (possibly different for each row).
75579bdfe76SSatish Balay 
75679bdfe76SSatish Balay    Output Parameter:
75779bdfe76SSatish Balay .  A - the matrix
75879bdfe76SSatish Balay 
75979bdfe76SSatish Balay    Notes:
76079bdfe76SSatish Balay    The user MUST specify either the local or global matrix dimensions
76179bdfe76SSatish Balay    (possibly both).
76279bdfe76SSatish Balay 
76379bdfe76SSatish Balay    Storage Information:
76479bdfe76SSatish Balay    For a square global matrix we define each processor's diagonal portion
76579bdfe76SSatish Balay    to be its local rows and the corresponding columns (a square submatrix);
76679bdfe76SSatish Balay    each processor's off-diagonal portion encompasses the remainder of the
76779bdfe76SSatish Balay    local matrix (a rectangular submatrix).
76879bdfe76SSatish Balay 
76979bdfe76SSatish Balay    The user can specify preallocated storage for the diagonal part of
77079bdfe76SSatish Balay    the local submatrix with either d_nz or d_nnz (not both).  Set
77179bdfe76SSatish Balay    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
77279bdfe76SSatish Balay    memory allocation.  Likewise, specify preallocated storage for the
77379bdfe76SSatish Balay    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
77479bdfe76SSatish Balay 
77579bdfe76SSatish Balay    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
77679bdfe76SSatish Balay    the figure below we depict these three local rows and all columns (0-11).
77779bdfe76SSatish Balay 
77879bdfe76SSatish Balay $          0 1 2 3 4 5 6 7 8 9 10 11
77979bdfe76SSatish Balay $         -------------------
78079bdfe76SSatish Balay $  row 3  |  o o o d d d o o o o o o
78179bdfe76SSatish Balay $  row 4  |  o o o d d d o o o o o o
78279bdfe76SSatish Balay $  row 5  |  o o o d d d o o o o o o
78379bdfe76SSatish Balay $         -------------------
78479bdfe76SSatish Balay $
78579bdfe76SSatish Balay 
78679bdfe76SSatish Balay    Thus, any entries in the d locations are stored in the d (diagonal)
78779bdfe76SSatish Balay    submatrix, and any entries in the o locations are stored in the
78879bdfe76SSatish Balay    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
78957b952d6SSatish Balay    stored simply in the MATSEQBAIJ format for compressed row storage.
79079bdfe76SSatish Balay 
79179bdfe76SSatish Balay    Now d_nz should indicate the number of nonzeros per row in the d matrix,
79279bdfe76SSatish Balay    and o_nz should indicate the number of nonzeros per row in the o matrix.
79379bdfe76SSatish Balay    In general, for PDE problems in which most nonzeros are near the diagonal,
79479bdfe76SSatish Balay    one expects d_nz >> o_nz.   For additional details, see the users manual
79579bdfe76SSatish Balay    chapter on matrices and the file $(PETSC_DIR)/Performance.
79679bdfe76SSatish Balay 
79779bdfe76SSatish Balay .keywords: matrix, aij, compressed row, sparse, parallel
79879bdfe76SSatish Balay 
79979bdfe76SSatish Balay .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues()
80079bdfe76SSatish Balay @*/
80179bdfe76SSatish Balay int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,
80279bdfe76SSatish Balay                     int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
80379bdfe76SSatish Balay {
80479bdfe76SSatish Balay   Mat          B;
80579bdfe76SSatish Balay   Mat_MPIBAIJ  *b;
806cee3aa6bSSatish Balay   int          ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE;
80779bdfe76SSatish Balay 
80879bdfe76SSatish Balay   if (bs < 1) SETERRQ(1,"MatCreateMPIBAIJ: invalid block size specified");
80979bdfe76SSatish Balay   *A = 0;
81079bdfe76SSatish Balay   PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATMPIBAIJ,comm);
81179bdfe76SSatish Balay   PLogObjectCreate(B);
81279bdfe76SSatish Balay   B->data       = (void *) (b = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(b);
81379bdfe76SSatish Balay   PetscMemzero(b,sizeof(Mat_MPIBAIJ));
81479bdfe76SSatish Balay   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
81579bdfe76SSatish Balay   B->destroy    = MatDestroy_MPIBAIJ;
81679bdfe76SSatish Balay   B->view       = MatView_MPIBAIJ;
81757b952d6SSatish Balay 
81879bdfe76SSatish Balay   B->factor     = 0;
81979bdfe76SSatish Balay   B->assembled  = PETSC_FALSE;
82079bdfe76SSatish Balay 
82179bdfe76SSatish Balay   b->insertmode = NOT_SET_VALUES;
82279bdfe76SSatish Balay   MPI_Comm_rank(comm,&b->rank);
82379bdfe76SSatish Balay   MPI_Comm_size(comm,&b->size);
82479bdfe76SSatish Balay 
82579bdfe76SSatish Balay   if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL))
82657b952d6SSatish Balay     SETERRQ(1,"MatCreateMPIBAIJ:Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
827cee3aa6bSSatish Balay   if ( M == PETSC_DECIDE && m == PETSC_DECIDE) SETERRQ(1,"MatCreateMPIBAIJ: either M or m should be specified");
828cee3aa6bSSatish Balay   if ( M == PETSC_DECIDE && n == PETSC_DECIDE)SETERRQ(1,"MatCreateMPIBAIJ: either N or n should be specified");
829cee3aa6bSSatish Balay   if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE;
830cee3aa6bSSatish Balay   if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE;
83179bdfe76SSatish Balay 
83279bdfe76SSatish Balay   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
83379bdfe76SSatish Balay     work[0] = m; work[1] = n;
83479bdfe76SSatish Balay     mbs = m/bs; nbs = n/bs;
83579bdfe76SSatish Balay     MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );
83679bdfe76SSatish Balay     if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;}
83779bdfe76SSatish Balay     if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;}
83879bdfe76SSatish Balay   }
83979bdfe76SSatish Balay   if (m == PETSC_DECIDE) {
84079bdfe76SSatish Balay     Mbs = M/bs;
84179bdfe76SSatish Balay     if (Mbs*bs != M) SETERRQ(1,"MatCreateMPIBAIJ: No of global rows must be divisible by blocksize");
84279bdfe76SSatish Balay     mbs = Mbs/b->size + ((Mbs % b->size) > b->rank);
84379bdfe76SSatish Balay     m   = mbs*bs;
84479bdfe76SSatish Balay   }
84579bdfe76SSatish Balay   if (n == PETSC_DECIDE) {
84679bdfe76SSatish Balay     Nbs = N/bs;
84779bdfe76SSatish Balay     if (Nbs*bs != N) SETERRQ(1,"MatCreateMPIBAIJ: No of global cols must be divisible by blocksize");
84879bdfe76SSatish Balay     nbs = Nbs/b->size + ((Nbs % b->size) > b->rank);
84979bdfe76SSatish Balay     n   = nbs*bs;
85079bdfe76SSatish Balay   }
851cee3aa6bSSatish Balay   if (mbs*bs != m || nbs*bs != n) SETERRQ(1,"MatCreateMPIBAIJ: No of local rows, cols must be divisible by blocksize");
85279bdfe76SSatish Balay 
85379bdfe76SSatish Balay   b->m = m; B->m = m;
85479bdfe76SSatish Balay   b->n = n; B->n = n;
85579bdfe76SSatish Balay   b->N = N; B->N = N;
85679bdfe76SSatish Balay   b->M = M; B->M = M;
85779bdfe76SSatish Balay   b->bs  = bs;
85879bdfe76SSatish Balay   b->bs2 = bs*bs;
85979bdfe76SSatish Balay   b->mbs = mbs;
86079bdfe76SSatish Balay   b->nbs = nbs;
86179bdfe76SSatish Balay   b->Mbs = Mbs;
86279bdfe76SSatish Balay   b->Nbs = Nbs;
86379bdfe76SSatish Balay 
86479bdfe76SSatish Balay   /* build local table of row and column ownerships */
86579bdfe76SSatish Balay   b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners);
86679bdfe76SSatish Balay   PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIBAIJ));
86779bdfe76SSatish Balay   b->cowners = b->rowners + b->size + 1;
86879bdfe76SSatish Balay   MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);
86979bdfe76SSatish Balay   b->rowners[0] = 0;
87079bdfe76SSatish Balay   for ( i=2; i<=b->size; i++ ) {
87179bdfe76SSatish Balay     b->rowners[i] += b->rowners[i-1];
87279bdfe76SSatish Balay   }
87379bdfe76SSatish Balay   b->rstart = b->rowners[b->rank];
87479bdfe76SSatish Balay   b->rend   = b->rowners[b->rank+1];
87579bdfe76SSatish Balay   MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);
87679bdfe76SSatish Balay   b->cowners[0] = 0;
87779bdfe76SSatish Balay   for ( i=2; i<=b->size; i++ ) {
87879bdfe76SSatish Balay     b->cowners[i] += b->cowners[i-1];
87979bdfe76SSatish Balay   }
88079bdfe76SSatish Balay   b->cstart = b->cowners[b->rank];
88179bdfe76SSatish Balay   b->cend   = b->cowners[b->rank+1];
88279bdfe76SSatish Balay 
88379bdfe76SSatish Balay   if (d_nz == PETSC_DEFAULT) d_nz = 5;
88479bdfe76SSatish Balay   ierr = MatCreateSeqBAIJ(MPI_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr);
88579bdfe76SSatish Balay   PLogObjectParent(B,b->A);
88679bdfe76SSatish Balay   if (o_nz == PETSC_DEFAULT) o_nz = 0;
88779bdfe76SSatish Balay   ierr = MatCreateSeqBAIJ(MPI_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr);
88879bdfe76SSatish Balay   PLogObjectParent(B,b->B);
88979bdfe76SSatish Balay 
89079bdfe76SSatish Balay   /* build cache for off array entries formed */
89179bdfe76SSatish Balay   ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr);
89279bdfe76SSatish Balay   b->colmap      = 0;
89379bdfe76SSatish Balay   b->garray      = 0;
89479bdfe76SSatish Balay   b->roworiented = 1;
89579bdfe76SSatish Balay 
89679bdfe76SSatish Balay   /* stuff used for matrix vector multiply */
89779bdfe76SSatish Balay   b->lvec      = 0;
89879bdfe76SSatish Balay   b->Mvctx     = 0;
89979bdfe76SSatish Balay 
90079bdfe76SSatish Balay   /* stuff for MatGetRow() */
90179bdfe76SSatish Balay   b->rowindices   = 0;
90279bdfe76SSatish Balay   b->rowvalues    = 0;
90379bdfe76SSatish Balay   b->getrowactive = PETSC_FALSE;
90479bdfe76SSatish Balay 
90579bdfe76SSatish Balay   *A = B;
90679bdfe76SSatish Balay   return 0;
90779bdfe76SSatish Balay }
90857b952d6SSatish Balay 
90957b952d6SSatish Balay #include "sys.h"
91057b952d6SSatish Balay 
91157b952d6SSatish Balay int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat)
91257b952d6SSatish Balay {
91357b952d6SSatish Balay   Mat          A;
91457b952d6SSatish Balay   int          i, nz, ierr, j,rstart, rend, fd;
91557b952d6SSatish Balay   Scalar       *vals,*buf;
91657b952d6SSatish Balay   MPI_Comm     comm = ((PetscObject)viewer)->comm;
91757b952d6SSatish Balay   MPI_Status   status;
918cee3aa6bSSatish Balay   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
91957b952d6SSatish Balay   int          *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf;
92057b952d6SSatish Balay   int          flg,tag = ((PetscObject)viewer)->tag,bs=1,bs2,Mbs,mbs,extra_rows;
92157b952d6SSatish Balay   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
92257b952d6SSatish Balay   int          dcount,kmax,k,nzcount,tmp;
92357b952d6SSatish Balay 
92457b952d6SSatish Balay 
92557b952d6SSatish Balay   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr);
92657b952d6SSatish Balay   bs2  = bs*bs;
92757b952d6SSatish Balay 
92857b952d6SSatish Balay   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
92957b952d6SSatish Balay   if (!rank) {
93057b952d6SSatish Balay     ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
93157b952d6SSatish Balay     ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr);
932cee3aa6bSSatish Balay     if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIBAIJ:not matrix object");
93357b952d6SSatish Balay   }
93457b952d6SSatish Balay 
93557b952d6SSatish Balay   MPI_Bcast(header+1,3,MPI_INT,0,comm);
93657b952d6SSatish Balay   M = header[1]; N = header[2];
93757b952d6SSatish Balay 
93857b952d6SSatish Balay 
93957b952d6SSatish Balay   if (M != N) SETERRQ(1,"MatLoad_SeqBAIJ:Can only do square matrices");
94057b952d6SSatish Balay 
94157b952d6SSatish Balay   /*
94257b952d6SSatish Balay      This code adds extra rows to make sure the number of rows is
94357b952d6SSatish Balay      divisible by the blocksize
94457b952d6SSatish Balay   */
94557b952d6SSatish Balay   Mbs        = M/bs;
94657b952d6SSatish Balay   extra_rows = bs - M + bs*(Mbs);
94757b952d6SSatish Balay   if (extra_rows == bs) extra_rows = 0;
94857b952d6SSatish Balay   else                  Mbs++;
94957b952d6SSatish Balay   if (extra_rows &&!rank) {
95057b952d6SSatish Balay     PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize");
95157b952d6SSatish Balay   }
95257b952d6SSatish Balay   /* determine ownership of all rows */
95357b952d6SSatish Balay   mbs = Mbs/size + ((Mbs % size) > rank);
95457b952d6SSatish Balay   m   = mbs * bs;
955cee3aa6bSSatish Balay   rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int)); CHKPTRQ(rowners);
956cee3aa6bSSatish Balay   browners = rowners + size + 1;
95757b952d6SSatish Balay   MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);
95857b952d6SSatish Balay   rowners[0] = 0;
959cee3aa6bSSatish Balay   for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1];
960cee3aa6bSSatish Balay   for ( i=0; i<=size;  i++ ) browners[i] = rowners[i]*bs;
96157b952d6SSatish Balay   rstart = rowners[rank];
96257b952d6SSatish Balay   rend   = rowners[rank+1];
96357b952d6SSatish Balay 
96457b952d6SSatish Balay   /* distribute row lengths to all processors */
96557b952d6SSatish Balay   locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) ); CHKPTRQ(locrowlens);
96657b952d6SSatish Balay   if (!rank) {
96757b952d6SSatish Balay     rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) ); CHKPTRQ(rowlengths);
96857b952d6SSatish Balay     ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
96957b952d6SSatish Balay     for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
97057b952d6SSatish Balay     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
971cee3aa6bSSatish Balay     for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i];
972cee3aa6bSSatish Balay     MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);
97357b952d6SSatish Balay     PetscFree(sndcounts);
97457b952d6SSatish Balay   }
97557b952d6SSatish Balay   else {
97657b952d6SSatish Balay     MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm);
97757b952d6SSatish Balay   }
97857b952d6SSatish Balay 
97957b952d6SSatish Balay   if (!rank) {
98057b952d6SSatish Balay     /* calculate the number of nonzeros on each processor */
98157b952d6SSatish Balay     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
98257b952d6SSatish Balay     PetscMemzero(procsnz,size*sizeof(int));
98357b952d6SSatish Balay     for ( i=0; i<size; i++ ) {
98457b952d6SSatish Balay       for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) {
98557b952d6SSatish Balay         procsnz[i] += rowlengths[j];
98657b952d6SSatish Balay       }
98757b952d6SSatish Balay     }
98857b952d6SSatish Balay     PetscFree(rowlengths);
98957b952d6SSatish Balay 
99057b952d6SSatish Balay     /* determine max buffer needed and allocate it */
99157b952d6SSatish Balay     maxnz = 0;
99257b952d6SSatish Balay     for ( i=0; i<size; i++ ) {
99357b952d6SSatish Balay       maxnz = PetscMax(maxnz,procsnz[i]);
99457b952d6SSatish Balay     }
99557b952d6SSatish Balay     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
99657b952d6SSatish Balay 
99757b952d6SSatish Balay     /* read in my part of the matrix column indices  */
99857b952d6SSatish Balay     nz = procsnz[0];
99957b952d6SSatish Balay     ibuf = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf);
100057b952d6SSatish Balay     mycols = ibuf;
1001cee3aa6bSSatish Balay     if (size == 1)  nz -= extra_rows;
100257b952d6SSatish Balay     ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr);
1003cee3aa6bSSatish Balay     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
1004cee3aa6bSSatish Balay 
100557b952d6SSatish Balay     /* read in every ones (except the last) and ship off */
100657b952d6SSatish Balay     for ( i=1; i<size-1; i++ ) {
100757b952d6SSatish Balay       nz = procsnz[i];
100857b952d6SSatish Balay       ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr);
100957b952d6SSatish Balay       MPI_Send(cols,nz,MPI_INT,i,tag,comm);
101057b952d6SSatish Balay     }
101157b952d6SSatish Balay     /* read in the stuff for the last proc */
101257b952d6SSatish Balay     if ( size != 1 ) {
101357b952d6SSatish Balay       nz = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
101457b952d6SSatish Balay       ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr);
101557b952d6SSatish Balay       for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i;
1016cee3aa6bSSatish Balay       MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);
101757b952d6SSatish Balay     }
101857b952d6SSatish Balay     PetscFree(cols);
101957b952d6SSatish Balay   }
102057b952d6SSatish Balay   else {
102157b952d6SSatish Balay     /* determine buffer space needed for message */
102257b952d6SSatish Balay     nz = 0;
102357b952d6SSatish Balay     for ( i=0; i<m; i++ ) {
102457b952d6SSatish Balay       nz += locrowlens[i];
102557b952d6SSatish Balay     }
102657b952d6SSatish Balay     ibuf = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf);
102757b952d6SSatish Balay     mycols = ibuf;
102857b952d6SSatish Balay     /* receive message of column indices*/
102957b952d6SSatish Balay     MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
103057b952d6SSatish Balay     MPI_Get_count(&status,MPI_INT,&maxnz);
1031cee3aa6bSSatish Balay     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIBAIJ:something is wrong with file");
103257b952d6SSatish Balay   }
103357b952d6SSatish Balay 
103457b952d6SSatish Balay   /* loop over local rows, determining number of off diagonal entries */
1035cee3aa6bSSatish Balay   dlens  = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) ); CHKPTRQ(dlens);
1036cee3aa6bSSatish Balay   odlens = dlens + (rend-rstart);
103757b952d6SSatish Balay   mask   = (int *) PetscMalloc( 3*Mbs*sizeof(int) ); CHKPTRQ(mask);
1038cee3aa6bSSatish Balay   PetscMemzero(mask,3*Mbs*sizeof(int));
103957b952d6SSatish Balay   masked1 = mask    + Mbs;
104057b952d6SSatish Balay   masked2 = masked1 + Mbs;
104157b952d6SSatish Balay   rowcount = 0; nzcount = 0;
104257b952d6SSatish Balay   for ( i=0; i<mbs; i++ ) {
104357b952d6SSatish Balay     dcount  = 0;
104457b952d6SSatish Balay     odcount = 0;
104557b952d6SSatish Balay     for ( j=0; j<bs; j++ ) {
104657b952d6SSatish Balay       kmax = locrowlens[rowcount];
104757b952d6SSatish Balay       for ( k=0; k<kmax; k++ ) {
104857b952d6SSatish Balay         tmp = mycols[nzcount++]/bs;
104957b952d6SSatish Balay         if (!mask[tmp]) {
105057b952d6SSatish Balay           mask[tmp] = 1;
105157b952d6SSatish Balay           if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp;
105257b952d6SSatish Balay           else masked1[dcount++] = tmp;
105357b952d6SSatish Balay         }
105457b952d6SSatish Balay       }
105557b952d6SSatish Balay       rowcount++;
105657b952d6SSatish Balay     }
1057cee3aa6bSSatish Balay 
105857b952d6SSatish Balay     dlens[i]  = dcount;
105957b952d6SSatish Balay     odlens[i] = odcount;
1060cee3aa6bSSatish Balay 
106157b952d6SSatish Balay     /* zero out the mask elements we set */
106257b952d6SSatish Balay     for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0;
106357b952d6SSatish Balay     for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0;
106457b952d6SSatish Balay   }
1065cee3aa6bSSatish Balay 
106657b952d6SSatish Balay   /* create our matrix */
106757b952d6SSatish Balay   ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);CHKERRQ(ierr);
106857b952d6SSatish Balay   A = *newmat;
106957b952d6SSatish Balay   MatSetOption(A,COLUMNS_SORTED);
107057b952d6SSatish Balay 
107157b952d6SSatish Balay   if (!rank) {
107257b952d6SSatish Balay     buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(buf);
107357b952d6SSatish Balay     /* read in my part of the matrix numerical values  */
107457b952d6SSatish Balay     nz = procsnz[0];
107557b952d6SSatish Balay     vals = buf;
1076cee3aa6bSSatish Balay     mycols = ibuf;
1077cee3aa6bSSatish Balay     if (size == 1)  nz -= extra_rows;
107857b952d6SSatish Balay     ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
1079cee3aa6bSSatish Balay     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
108057b952d6SSatish Balay     /* insert into matrix */
108157b952d6SSatish Balay     jj      = rstart*bs;
108257b952d6SSatish Balay     for ( i=0; i<m; i++ ) {
108357b952d6SSatish Balay       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
108457b952d6SSatish Balay       mycols += locrowlens[i];
108557b952d6SSatish Balay       vals   += locrowlens[i];
108657b952d6SSatish Balay       jj++;
108757b952d6SSatish Balay     }
108857b952d6SSatish Balay     /* read in other processors( except the last one) and ship out */
108957b952d6SSatish Balay     for ( i=1; i<size-1; i++ ) {
109057b952d6SSatish Balay       nz = procsnz[i];
109157b952d6SSatish Balay       vals = buf;
109257b952d6SSatish Balay       ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
109357b952d6SSatish Balay       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
109457b952d6SSatish Balay     }
109557b952d6SSatish Balay     /* the last proc */
109657b952d6SSatish Balay     if ( size != 1 ){
109757b952d6SSatish Balay       nz = procsnz[i] - extra_rows;
1098cee3aa6bSSatish Balay       vals = buf;
109957b952d6SSatish Balay       ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
110057b952d6SSatish Balay       for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0;
1101cee3aa6bSSatish Balay       MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);
110257b952d6SSatish Balay     }
110357b952d6SSatish Balay     PetscFree(procsnz);
110457b952d6SSatish Balay   }
110557b952d6SSatish Balay   else {
110657b952d6SSatish Balay     /* receive numeric values */
110757b952d6SSatish Balay     buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(buf);
110857b952d6SSatish Balay 
110957b952d6SSatish Balay     /* receive message of values*/
111057b952d6SSatish Balay     vals = buf;
1111cee3aa6bSSatish Balay     mycols = ibuf;
111257b952d6SSatish Balay     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
111357b952d6SSatish Balay     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1114cee3aa6bSSatish Balay     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIBAIJ:something is wrong with file");
111557b952d6SSatish Balay 
111657b952d6SSatish Balay     /* insert into matrix */
111757b952d6SSatish Balay     jj      = rstart*bs;
1118cee3aa6bSSatish Balay     for ( i=0; i<m; i++ ) {
111957b952d6SSatish Balay       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
112057b952d6SSatish Balay       mycols += locrowlens[i];
112157b952d6SSatish Balay       vals   += locrowlens[i];
112257b952d6SSatish Balay       jj++;
112357b952d6SSatish Balay     }
112457b952d6SSatish Balay   }
112557b952d6SSatish Balay   PetscFree(locrowlens);
112657b952d6SSatish Balay   PetscFree(buf);
112757b952d6SSatish Balay   PetscFree(ibuf);
112857b952d6SSatish Balay   PetscFree(rowners);
112957b952d6SSatish Balay   PetscFree(dlens);
1130cee3aa6bSSatish Balay   PetscFree(mask);
113157b952d6SSatish Balay   ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
113257b952d6SSatish Balay   ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
113357b952d6SSatish Balay   return 0;
113457b952d6SSatish Balay }
113557b952d6SSatish Balay 
113657b952d6SSatish Balay 
1137