xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision cee3aa6b5ab154541f10d9839bfb45104b86798d)
179bdfe76SSatish Balay #ifndef lint
2*cee3aa6bSSatish Balay static char vcid[] = "$Id: mpibaij.c,v 1.3 1996/06/19 23:03:32 balay Exp balay $";
379bdfe76SSatish Balay #endif
479bdfe76SSatish Balay 
579bdfe76SSatish Balay #include "mpibaij.h"
679bdfe76SSatish Balay 
779bdfe76SSatish Balay 
857b952d6SSatish Balay #include "draw.h"
957b952d6SSatish Balay #include "pinclude/pviewer.h"
1057b952d6SSatish Balay 
1157b952d6SSatish Balay extern int MatSetUpMultiply_MPIBAIJ(Mat);
1257b952d6SSatish Balay extern int DisAssemble_MPIBAIJ(Mat);
1357b952d6SSatish Balay 
1457b952d6SSatish Balay /* local utility routine that creates a mapping from the global column
1557b952d6SSatish Balay number to the local number in the off-diagonal part of the local
1657b952d6SSatish Balay storage of the matrix.  This is done in a non scable way since the
1757b952d6SSatish Balay length of colmap equals the global matrix length.
1857b952d6SSatish Balay */
1957b952d6SSatish Balay static int CreateColmap_MPIBAIJ_Private(Mat mat)
2057b952d6SSatish Balay {
2157b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
2257b952d6SSatish Balay   Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data;
2357b952d6SSatish Balay   int        nbs = B->nbs,i;
2457b952d6SSatish Balay 
2557b952d6SSatish Balay   baij->colmap = (int *) PetscMalloc(baij->Nbs*sizeof(int));CHKPTRQ(baij->colmap);
2657b952d6SSatish Balay   PLogObjectMemory(mat,baij->Nbs*sizeof(int));
2757b952d6SSatish Balay   PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));
2857b952d6SSatish Balay   for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i;
2957b952d6SSatish Balay   return 0;
3057b952d6SSatish Balay }
3157b952d6SSatish Balay 
3257b952d6SSatish Balay 
3357b952d6SSatish Balay static int MatGetReordering_MPIBAIJ(Mat mat,MatOrdering type,IS *rperm,IS *cperm)
3457b952d6SSatish Balay {
3557b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
3657b952d6SSatish Balay   int         ierr;
3757b952d6SSatish Balay   if (baij->size == 1) {
3857b952d6SSatish Balay     ierr = MatGetReordering(baij->A,type,rperm,cperm); CHKERRQ(ierr);
3957b952d6SSatish Balay   } else SETERRQ(1,"MatGetReordering_MPIBAIJ:not supported in parallel");
4057b952d6SSatish Balay   return 0;
4157b952d6SSatish Balay }
4257b952d6SSatish Balay 
4357b952d6SSatish Balay static int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
4457b952d6SSatish Balay {
4557b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
4657b952d6SSatish Balay   Scalar      value;
4757b952d6SSatish Balay   int         ierr,i,j, rstart = baij->rstart, rend = baij->rend;
4857b952d6SSatish Balay   int         cstart = baij->cstart, cend = baij->cend,row,col;
4957b952d6SSatish Balay   int         roworiented = baij->roworiented,rstart_orig,rend_orig;
5057b952d6SSatish Balay   int         cstart_orig,cend_orig,bs=baij->bs;
5157b952d6SSatish Balay 
5257b952d6SSatish Balay   if (baij->insertmode != NOT_SET_VALUES && baij->insertmode != addv) {
5357b952d6SSatish Balay     SETERRQ(1,"MatSetValues_MPIBAIJ:Cannot mix inserts and adds");
5457b952d6SSatish Balay   }
5557b952d6SSatish Balay   baij->insertmode = addv;
5657b952d6SSatish Balay   rstart_orig = rstart*bs;
5757b952d6SSatish Balay   rend_orig   = rend*bs;
5857b952d6SSatish Balay   cstart_orig = cstart*bs;
5957b952d6SSatish Balay   cend_orig   = cend*bs;
6057b952d6SSatish Balay   for ( i=0; i<m; i++ ) {
6157b952d6SSatish Balay     if (im[i] < 0) SETERRQ(1,"MatSetValues_MPIBAIJ:Negative row");
6257b952d6SSatish Balay     if (im[i] >= baij->M) SETERRQ(1,"MatSetValues_MPIBAIJ:Row too large");
6357b952d6SSatish Balay     if (im[i] >= rstart_orig && im[i] < rend_orig) {
6457b952d6SSatish Balay       row = im[i] - rstart_orig;
6557b952d6SSatish Balay       for ( j=0; j<n; j++ ) {
6657b952d6SSatish Balay         if (in[j] < 0) SETERRQ(1,"MatSetValues_MPIBAIJ:Negative column");
6757b952d6SSatish Balay         if (in[j] >= baij->N) SETERRQ(1,"MatSetValues_MPIBAIJ:Col too large");
6857b952d6SSatish Balay         if (in[j] >= cstart_orig && in[j] < cend_orig){
6957b952d6SSatish Balay           col = in[j] - cstart_orig;
7057b952d6SSatish Balay           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
7157b952d6SSatish Balay           ierr = MatSetValues(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr);
7257b952d6SSatish Balay         }
7357b952d6SSatish Balay         else {
7457b952d6SSatish Balay           if (mat->was_assembled) {
7557b952d6SSatish Balay             if (!baij->colmap) {ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);}
7657b952d6SSatish Balay             col = baij->colmap[in[j]];
7757b952d6SSatish Balay             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
7857b952d6SSatish Balay               ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
7957b952d6SSatish Balay               col =  in[j];
8057b952d6SSatish Balay             }
8157b952d6SSatish Balay           }
8257b952d6SSatish Balay           else col = in[j];
8357b952d6SSatish Balay           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
8457b952d6SSatish Balay           ierr = MatSetValues(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr);
8557b952d6SSatish Balay         }
8657b952d6SSatish Balay       }
8757b952d6SSatish Balay     }
8857b952d6SSatish Balay     else {
8957b952d6SSatish Balay       if (roworiented) {
9057b952d6SSatish Balay         ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr);
9157b952d6SSatish Balay       }
9257b952d6SSatish Balay       else {
9357b952d6SSatish Balay         row = im[i];
9457b952d6SSatish Balay         for ( j=0; j<n; j++ ) {
9557b952d6SSatish Balay           ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr);
9657b952d6SSatish Balay         }
9757b952d6SSatish Balay       }
9857b952d6SSatish Balay     }
9957b952d6SSatish Balay   }
10057b952d6SSatish Balay   return 0;
10157b952d6SSatish Balay }
10257b952d6SSatish Balay 
10357b952d6SSatish Balay 
10457b952d6SSatish Balay static int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
10557b952d6SSatish Balay {
10657b952d6SSatish Balay   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
10757b952d6SSatish Balay   MPI_Comm    comm = mat->comm;
10857b952d6SSatish Balay   int         size = baij->size, *owners = baij->rowners,bs=baij->bs;
10957b952d6SSatish Balay   int         rank = baij->rank,tag = mat->tag, *owner,*starts,count,ierr;
11057b952d6SSatish Balay   MPI_Request *send_waits,*recv_waits;
11157b952d6SSatish Balay   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
11257b952d6SSatish Balay   InsertMode  addv;
11357b952d6SSatish Balay   Scalar      *rvalues,*svalues;
11457b952d6SSatish Balay 
11557b952d6SSatish Balay   /* make sure all processors are either in INSERTMODE or ADDMODE */
11657b952d6SSatish Balay   MPI_Allreduce(&baij->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
11757b952d6SSatish Balay   if (addv == (ADD_VALUES|INSERT_VALUES)) {
11857b952d6SSatish Balay     SETERRQ(1,"MatAssemblyBegin_MPIBAIJ:Some processors inserted others added");
11957b952d6SSatish Balay   }
12057b952d6SSatish Balay   baij->insertmode = addv; /* in case this processor had no cache */
12157b952d6SSatish Balay 
12257b952d6SSatish Balay   /*  first count number of contributors to each processor */
12357b952d6SSatish Balay   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
12457b952d6SSatish Balay   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
12557b952d6SSatish Balay   owner = (int *) PetscMalloc( (baij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
12657b952d6SSatish Balay   for ( i=0; i<baij->stash.n; i++ ) {
12757b952d6SSatish Balay     idx = baij->stash.idx[i];
12857b952d6SSatish Balay     for ( j=0; j<size; j++ ) {
12957b952d6SSatish Balay       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
13057b952d6SSatish Balay         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
13157b952d6SSatish Balay       }
13257b952d6SSatish Balay     }
13357b952d6SSatish Balay   }
13457b952d6SSatish Balay   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
13557b952d6SSatish Balay 
13657b952d6SSatish Balay   /* inform other processors of number of messages and max length*/
13757b952d6SSatish Balay   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
13857b952d6SSatish Balay   MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);
13957b952d6SSatish Balay   nreceives = work[rank];
14057b952d6SSatish Balay   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
14157b952d6SSatish Balay   nmax = work[rank];
14257b952d6SSatish Balay   PetscFree(work);
14357b952d6SSatish Balay 
14457b952d6SSatish Balay   /* post receives:
14557b952d6SSatish Balay        1) each message will consist of ordered pairs
14657b952d6SSatish Balay      (global index,value) we store the global index as a double
14757b952d6SSatish Balay      to simplify the message passing.
14857b952d6SSatish Balay        2) since we don't know how long each individual message is we
14957b952d6SSatish Balay      allocate the largest needed buffer for each receive. Potentially
15057b952d6SSatish Balay      this is a lot of wasted space.
15157b952d6SSatish Balay 
15257b952d6SSatish Balay 
15357b952d6SSatish Balay        This could be done better.
15457b952d6SSatish Balay   */
15557b952d6SSatish Balay   rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));
15657b952d6SSatish Balay   CHKPTRQ(rvalues);
15757b952d6SSatish Balay   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));
15857b952d6SSatish Balay   CHKPTRQ(recv_waits);
15957b952d6SSatish Balay   for ( i=0; i<nreceives; i++ ) {
16057b952d6SSatish Balay     MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
16157b952d6SSatish Balay               comm,recv_waits+i);
16257b952d6SSatish Balay   }
16357b952d6SSatish Balay 
16457b952d6SSatish Balay   /* do sends:
16557b952d6SSatish Balay       1) starts[i] gives the starting index in svalues for stuff going to
16657b952d6SSatish Balay          the ith processor
16757b952d6SSatish Balay   */
16857b952d6SSatish Balay   svalues = (Scalar *) PetscMalloc(3*(baij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
16957b952d6SSatish Balay   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
17057b952d6SSatish Balay   CHKPTRQ(send_waits);
17157b952d6SSatish Balay   starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
17257b952d6SSatish Balay   starts[0] = 0;
17357b952d6SSatish Balay   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
17457b952d6SSatish Balay   for ( i=0; i<baij->stash.n; i++ ) {
17557b952d6SSatish Balay     svalues[3*starts[owner[i]]]       = (Scalar)  baij->stash.idx[i];
17657b952d6SSatish Balay     svalues[3*starts[owner[i]]+1]     = (Scalar)  baij->stash.idy[i];
17757b952d6SSatish Balay     svalues[3*(starts[owner[i]]++)+2] =  baij->stash.array[i];
17857b952d6SSatish Balay   }
17957b952d6SSatish Balay   PetscFree(owner);
18057b952d6SSatish Balay   starts[0] = 0;
18157b952d6SSatish Balay   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
18257b952d6SSatish Balay   count = 0;
18357b952d6SSatish Balay   for ( i=0; i<size; i++ ) {
18457b952d6SSatish Balay     if (procs[i]) {
18557b952d6SSatish Balay       MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,
18657b952d6SSatish Balay                 comm,send_waits+count++);
18757b952d6SSatish Balay     }
18857b952d6SSatish Balay   }
18957b952d6SSatish Balay   PetscFree(starts); PetscFree(nprocs);
19057b952d6SSatish Balay 
19157b952d6SSatish Balay   /* Free cache space */
19257b952d6SSatish Balay   PLogInfo(0,"[%d]MatAssemblyBegin_MPIBAIJ:Number of off processor values %d\n",rank,baij->stash.n);
19357b952d6SSatish Balay   ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr);
19457b952d6SSatish Balay 
19557b952d6SSatish Balay   baij->svalues    = svalues;    baij->rvalues    = rvalues;
19657b952d6SSatish Balay   baij->nsends     = nsends;     baij->nrecvs     = nreceives;
19757b952d6SSatish Balay   baij->send_waits = send_waits; baij->recv_waits = recv_waits;
19857b952d6SSatish Balay   baij->rmax       = nmax;
19957b952d6SSatish Balay 
20057b952d6SSatish Balay   return 0;
20157b952d6SSatish Balay }
20257b952d6SSatish Balay 
20357b952d6SSatish Balay 
20457b952d6SSatish Balay static int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
20557b952d6SSatish Balay {
20657b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
20757b952d6SSatish Balay   MPI_Status  *send_status,recv_status;
20857b952d6SSatish Balay   int         imdex,nrecvs = baij->nrecvs, count = nrecvs, i, n, ierr;
20957b952d6SSatish Balay   int         bs=baij->bs,row,col,other_disassembled;
21057b952d6SSatish Balay   Scalar      *values,val;
21157b952d6SSatish Balay   InsertMode  addv = baij->insertmode;
21257b952d6SSatish Balay 
21357b952d6SSatish Balay   /*  wait on receives */
21457b952d6SSatish Balay   while (count) {
21557b952d6SSatish Balay     MPI_Waitany(nrecvs,baij->recv_waits,&imdex,&recv_status);
21657b952d6SSatish Balay     /* unpack receives into our local space */
21757b952d6SSatish Balay     values = baij->rvalues + 3*imdex*baij->rmax;
21857b952d6SSatish Balay     MPI_Get_count(&recv_status,MPIU_SCALAR,&n);
21957b952d6SSatish Balay     n = n/3;
22057b952d6SSatish Balay     for ( i=0; i<n; i++ ) {
22157b952d6SSatish Balay       row = (int) PetscReal(values[3*i]) - baij->rstart*bs;
22257b952d6SSatish Balay       col = (int) PetscReal(values[3*i+1]);
22357b952d6SSatish Balay       val = values[3*i+2];
22457b952d6SSatish Balay       if (col >= baij->cstart*bs && col < baij->cend*bs) {
22557b952d6SSatish Balay         col -= baij->cstart*bs;
22657b952d6SSatish Balay         MatSetValues(baij->A,1,&row,1,&col,&val,addv);
22757b952d6SSatish Balay       }
22857b952d6SSatish Balay       else {
22957b952d6SSatish Balay         if (mat->was_assembled) {
23057b952d6SSatish Balay           if (!baij->colmap) {ierr = CreateColmap_MPIBAIJ_Private(mat); CHKERRQ(ierr);}
23157b952d6SSatish Balay           col = baij->colmap[col/bs]*bs + col%bs;
23257b952d6SSatish Balay           if (col < 0  && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
23357b952d6SSatish Balay             ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
23457b952d6SSatish Balay             col = (int) PetscReal(values[3*i+1]);
23557b952d6SSatish Balay           }
23657b952d6SSatish Balay         }
23757b952d6SSatish Balay         MatSetValues(baij->B,1,&row,1,&col,&val,addv);
23857b952d6SSatish Balay       }
23957b952d6SSatish Balay     }
24057b952d6SSatish Balay     count--;
24157b952d6SSatish Balay   }
24257b952d6SSatish Balay   PetscFree(baij->recv_waits); PetscFree(baij->rvalues);
24357b952d6SSatish Balay 
24457b952d6SSatish Balay   /* wait on sends */
24557b952d6SSatish Balay   if (baij->nsends) {
24657b952d6SSatish Balay     send_status = (MPI_Status *) PetscMalloc(baij->nsends*sizeof(MPI_Status));
24757b952d6SSatish Balay     CHKPTRQ(send_status);
24857b952d6SSatish Balay     MPI_Waitall(baij->nsends,baij->send_waits,send_status);
24957b952d6SSatish Balay     PetscFree(send_status);
25057b952d6SSatish Balay   }
25157b952d6SSatish Balay   PetscFree(baij->send_waits); PetscFree(baij->svalues);
25257b952d6SSatish Balay 
25357b952d6SSatish Balay   baij->insertmode = NOT_SET_VALUES;
25457b952d6SSatish Balay   ierr = MatAssemblyBegin(baij->A,mode); CHKERRQ(ierr);
25557b952d6SSatish Balay   ierr = MatAssemblyEnd(baij->A,mode); CHKERRQ(ierr);
25657b952d6SSatish Balay 
25757b952d6SSatish Balay   /* determine if any processor has disassembled, if so we must
25857b952d6SSatish Balay      also disassemble ourselfs, in order that we may reassemble. */
25957b952d6SSatish Balay   MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);
26057b952d6SSatish Balay   if (mat->was_assembled && !other_disassembled) {
26157b952d6SSatish Balay     ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
26257b952d6SSatish Balay   }
26357b952d6SSatish Balay 
26457b952d6SSatish Balay   if (!mat->was_assembled && mode == FINAL_ASSEMBLY) {
26557b952d6SSatish Balay     ierr = MatSetUpMultiply_MPIBAIJ(mat); CHKERRQ(ierr);
26657b952d6SSatish Balay   }
26757b952d6SSatish Balay   ierr = MatAssemblyBegin(baij->B,mode); CHKERRQ(ierr);
26857b952d6SSatish Balay   ierr = MatAssemblyEnd(baij->B,mode); CHKERRQ(ierr);
26957b952d6SSatish Balay 
27057b952d6SSatish Balay   if (baij->rowvalues) {PetscFree(baij->rowvalues); baij->rowvalues = 0;}
27157b952d6SSatish Balay   return 0;
27257b952d6SSatish Balay }
27357b952d6SSatish Balay 
27457b952d6SSatish Balay static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer)
27557b952d6SSatish Balay {
27657b952d6SSatish Balay   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
27757b952d6SSatish Balay   int          ierr;
27857b952d6SSatish Balay 
27957b952d6SSatish Balay   if (baij->size == 1) {
28057b952d6SSatish Balay     ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
28157b952d6SSatish Balay   }
28257b952d6SSatish Balay   else SETERRQ(1,"MatView_MPIBAIJ_Binary:Only uniprocessor output supported");
28357b952d6SSatish Balay   return 0;
28457b952d6SSatish Balay }
28557b952d6SSatish Balay 
28657b952d6SSatish Balay static int MatView_MPIBAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer)
28757b952d6SSatish Balay {
28857b952d6SSatish Balay   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
289*cee3aa6bSSatish Balay   int          ierr, format,rank,bs=baij->bs;
29057b952d6SSatish Balay   FILE         *fd;
29157b952d6SSatish Balay   ViewerType   vtype;
29257b952d6SSatish Balay 
29357b952d6SSatish Balay   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
29457b952d6SSatish Balay   if (vtype  == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) {
29557b952d6SSatish Balay     ierr = ViewerGetFormat(viewer,&format);
29657b952d6SSatish Balay     if (format == ASCII_FORMAT_INFO_DETAILED) {
29757b952d6SSatish Balay       int nz, nzalloc, mem;
29857b952d6SSatish Balay       MPI_Comm_rank(mat->comm,&rank);
29957b952d6SSatish Balay       ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
30057b952d6SSatish Balay       ierr = MatGetInfo(mat,MAT_LOCAL,&nz,&nzalloc,&mem);
30157b952d6SSatish Balay       PetscSequentialPhaseBegin(mat->comm,1);
30257b952d6SSatish Balay       fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
30357b952d6SSatish Balay               rank,baij->m,nz*bs,nzalloc*bs,baij->bs,mem);
30457b952d6SSatish Balay       ierr = MatGetInfo(baij->A,MAT_LOCAL,&nz,&nzalloc,&mem);
30557b952d6SSatish Balay       fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,nz*bs);
30657b952d6SSatish Balay       ierr = MatGetInfo(baij->B,MAT_LOCAL,&nz,&nzalloc,&mem);
30757b952d6SSatish Balay       fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,nz*bs);
30857b952d6SSatish Balay       fflush(fd);
30957b952d6SSatish Balay       PetscSequentialPhaseEnd(mat->comm,1);
31057b952d6SSatish Balay       ierr = VecScatterView(baij->Mvctx,viewer); CHKERRQ(ierr);
31157b952d6SSatish Balay       return 0;
31257b952d6SSatish Balay     }
31357b952d6SSatish Balay     else if (format == ASCII_FORMAT_INFO) {
31457b952d6SSatish Balay       return 0;
31557b952d6SSatish Balay     }
31657b952d6SSatish Balay   }
31757b952d6SSatish Balay 
31857b952d6SSatish Balay   if (vtype == DRAW_VIEWER) {
31957b952d6SSatish Balay     Draw       draw;
32057b952d6SSatish Balay     PetscTruth isnull;
32157b952d6SSatish Balay     ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr);
32257b952d6SSatish Balay     ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0;
32357b952d6SSatish Balay   }
32457b952d6SSatish Balay 
32557b952d6SSatish Balay   if (vtype == ASCII_FILE_VIEWER) {
32657b952d6SSatish Balay     ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
32757b952d6SSatish Balay     PetscSequentialPhaseBegin(mat->comm,1);
32857b952d6SSatish Balay     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
32957b952d6SSatish Balay            baij->rank,baij->m,baij->rstart*bs,baij->rend*bs,baij->n,
33057b952d6SSatish Balay             baij->cstart*bs,baij->cend*bs);
33157b952d6SSatish Balay     ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
33257b952d6SSatish Balay     ierr = MatView(baij->B,viewer); CHKERRQ(ierr);
33357b952d6SSatish Balay     fflush(fd);
33457b952d6SSatish Balay     PetscSequentialPhaseEnd(mat->comm,1);
33557b952d6SSatish Balay   }
33657b952d6SSatish Balay   else {
33757b952d6SSatish Balay     int size = baij->size;
33857b952d6SSatish Balay     rank = baij->rank;
33957b952d6SSatish Balay     if (size == 1) {
34057b952d6SSatish Balay       ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
34157b952d6SSatish Balay     }
34257b952d6SSatish Balay     else {
34357b952d6SSatish Balay       /* assemble the entire matrix onto first processor. */
34457b952d6SSatish Balay       Mat         A;
34557b952d6SSatish Balay       Mat_SeqBAIJ *Aloc;
34657b952d6SSatish Balay       int         M = baij->M, N = baij->N,*ai,*aj,row,col,i,j,k,*rvals;
34757b952d6SSatish Balay       int         mbs=baij->mbs;
34857b952d6SSatish Balay       Scalar      *a;
34957b952d6SSatish Balay 
35057b952d6SSatish Balay       if (!rank) {
351*cee3aa6bSSatish Balay         ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
35257b952d6SSatish Balay         CHKERRQ(ierr);
35357b952d6SSatish Balay       }
35457b952d6SSatish Balay       else {
355*cee3aa6bSSatish Balay         ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
35657b952d6SSatish Balay         CHKERRQ(ierr);
35757b952d6SSatish Balay       }
35857b952d6SSatish Balay       PLogObjectParent(mat,A);
35957b952d6SSatish Balay 
36057b952d6SSatish Balay       /* copy over the A part */
36157b952d6SSatish Balay       Aloc = (Mat_SeqBAIJ*) baij->A->data;
36257b952d6SSatish Balay       ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
36357b952d6SSatish Balay       row = baij->rstart;
36457b952d6SSatish Balay       rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals);
36557b952d6SSatish Balay 
36657b952d6SSatish Balay       for ( i=0; i<mbs; i++ ) {
36757b952d6SSatish Balay         rvals[0] = bs*(baij->rstart + i);
36857b952d6SSatish Balay         for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
36957b952d6SSatish Balay         for ( j=ai[i]; j<ai[i+1]; j++ ) {
37057b952d6SSatish Balay           col = (baij->cstart+aj[j])*bs;
37157b952d6SSatish Balay           for (k=0; k<bs; k++ ) {
372*cee3aa6bSSatish Balay             ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
373*cee3aa6bSSatish Balay             col++; a += bs;
37457b952d6SSatish Balay           }
37557b952d6SSatish Balay         }
37657b952d6SSatish Balay       }
37757b952d6SSatish Balay       /* copy over the B part */
37857b952d6SSatish Balay       Aloc = (Mat_SeqBAIJ*) baij->B->data;
37957b952d6SSatish Balay       ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
38057b952d6SSatish Balay       row = baij->rstart*bs;
38157b952d6SSatish Balay       for ( i=0; i<mbs; i++ ) {
38257b952d6SSatish Balay         rvals[0] = bs*(baij->rstart + i);
38357b952d6SSatish Balay         for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
38457b952d6SSatish Balay         for ( j=ai[i]; j<ai[i+1]; j++ ) {
38557b952d6SSatish Balay           col = baij->garray[aj[j]]*bs;
38657b952d6SSatish Balay           for (k=0; k<bs; k++ ) {
387*cee3aa6bSSatish Balay             ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
388*cee3aa6bSSatish Balay             col++; a += bs;
38957b952d6SSatish Balay           }
39057b952d6SSatish Balay         }
39157b952d6SSatish Balay       }
39257b952d6SSatish Balay       PetscFree(rvals);
39357b952d6SSatish Balay       ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
39457b952d6SSatish Balay       ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
39557b952d6SSatish Balay       if (!rank) {
39657b952d6SSatish Balay         ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
39757b952d6SSatish Balay       }
39857b952d6SSatish Balay       ierr = MatDestroy(A); CHKERRQ(ierr);
39957b952d6SSatish Balay     }
40057b952d6SSatish Balay   }
40157b952d6SSatish Balay   return 0;
40257b952d6SSatish Balay }
40357b952d6SSatish Balay 
40457b952d6SSatish Balay 
40557b952d6SSatish Balay 
40657b952d6SSatish Balay static int MatView_MPIBAIJ(PetscObject obj,Viewer viewer)
40757b952d6SSatish Balay {
40857b952d6SSatish Balay   Mat         mat = (Mat) obj;
40957b952d6SSatish Balay   int         ierr;
41057b952d6SSatish Balay   ViewerType  vtype;
41157b952d6SSatish Balay 
41257b952d6SSatish Balay   if (!viewer) {
41357b952d6SSatish Balay     viewer = STDOUT_VIEWER_SELF;
41457b952d6SSatish Balay   }
41557b952d6SSatish Balay   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
41657b952d6SSatish Balay   if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER ||
41757b952d6SSatish Balay       vtype == DRAW_VIEWER       || vtype == MATLAB_VIEWER) {
41857b952d6SSatish Balay     ierr = MatView_MPIBAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
41957b952d6SSatish Balay   }
42057b952d6SSatish Balay   else if (vtype == BINARY_FILE_VIEWER) {
42157b952d6SSatish Balay     return MatView_MPIBAIJ_Binary(mat,viewer);
42257b952d6SSatish Balay   }
42357b952d6SSatish Balay   return 0;
42457b952d6SSatish Balay }
42557b952d6SSatish Balay 
42679bdfe76SSatish Balay static int MatDestroy_MPIBAIJ(PetscObject obj)
42779bdfe76SSatish Balay {
42879bdfe76SSatish Balay   Mat         mat = (Mat) obj;
42979bdfe76SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
43079bdfe76SSatish Balay   int         ierr;
43179bdfe76SSatish Balay 
43279bdfe76SSatish Balay #if defined(PETSC_LOG)
43379bdfe76SSatish Balay   PLogObjectState(obj,"Rows=%d, Cols=%d",baij->M,baij->N);
43479bdfe76SSatish Balay #endif
43579bdfe76SSatish Balay 
43679bdfe76SSatish Balay   PetscFree(baij->rowners);
43779bdfe76SSatish Balay   ierr = MatDestroy(baij->A); CHKERRQ(ierr);
43879bdfe76SSatish Balay   ierr = MatDestroy(baij->B); CHKERRQ(ierr);
43979bdfe76SSatish Balay   if (baij->colmap) PetscFree(baij->colmap);
44079bdfe76SSatish Balay   if (baij->garray) PetscFree(baij->garray);
44179bdfe76SSatish Balay   if (baij->lvec)   VecDestroy(baij->lvec);
44279bdfe76SSatish Balay   if (baij->Mvctx)  VecScatterDestroy(baij->Mvctx);
44379bdfe76SSatish Balay   if (baij->rowvalues) PetscFree(baij->rowvalues);
44479bdfe76SSatish Balay   PetscFree(baij);
44579bdfe76SSatish Balay   PLogObjectDestroy(mat);
44679bdfe76SSatish Balay   PetscHeaderDestroy(mat);
44779bdfe76SSatish Balay   return 0;
44879bdfe76SSatish Balay }
44979bdfe76SSatish Balay 
450*cee3aa6bSSatish Balay static int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
451*cee3aa6bSSatish Balay {
452*cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
453*cee3aa6bSSatish Balay   int        ierr;
454*cee3aa6bSSatish Balay 
455*cee3aa6bSSatish Balay   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx); CHKERRQ(ierr);
456*cee3aa6bSSatish Balay   ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr);
457*cee3aa6bSSatish Balay   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx); CHKERRQ(ierr);
458*cee3aa6bSSatish Balay   ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
459*cee3aa6bSSatish Balay   return 0;
460*cee3aa6bSSatish Balay }
461*cee3aa6bSSatish Balay 
462*cee3aa6bSSatish Balay static int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
463*cee3aa6bSSatish Balay {
464*cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
465*cee3aa6bSSatish Balay   int        ierr;
466*cee3aa6bSSatish Balay   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
467*cee3aa6bSSatish Balay   ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
468*cee3aa6bSSatish Balay   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
469*cee3aa6bSSatish Balay   ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
470*cee3aa6bSSatish Balay   return 0;
471*cee3aa6bSSatish Balay }
472*cee3aa6bSSatish Balay 
473*cee3aa6bSSatish Balay static int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy)
474*cee3aa6bSSatish Balay {
475*cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
476*cee3aa6bSSatish Balay   int        ierr;
477*cee3aa6bSSatish Balay 
478*cee3aa6bSSatish Balay   /* do nondiagonal part */
479*cee3aa6bSSatish Balay   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
480*cee3aa6bSSatish Balay   /* send it on its way */
481*cee3aa6bSSatish Balay   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,
482*cee3aa6bSSatish Balay                 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
483*cee3aa6bSSatish Balay   /* do local part */
484*cee3aa6bSSatish Balay   ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr);
485*cee3aa6bSSatish Balay   /* receive remote parts: note this assumes the values are not actually */
486*cee3aa6bSSatish Balay   /* inserted in yy until the next line, which is true for my implementation*/
487*cee3aa6bSSatish Balay   /* but is not perhaps always true. */
488*cee3aa6bSSatish Balay   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,
489*cee3aa6bSSatish Balay                   (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
490*cee3aa6bSSatish Balay   return 0;
491*cee3aa6bSSatish Balay }
492*cee3aa6bSSatish Balay 
493*cee3aa6bSSatish Balay static int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
494*cee3aa6bSSatish Balay {
495*cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
496*cee3aa6bSSatish Balay   int        ierr;
497*cee3aa6bSSatish Balay 
498*cee3aa6bSSatish Balay   /* do nondiagonal part */
499*cee3aa6bSSatish Balay   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
500*cee3aa6bSSatish Balay   /* send it on its way */
501*cee3aa6bSSatish Balay   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,
502*cee3aa6bSSatish Balay                  (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
503*cee3aa6bSSatish Balay   /* do local part */
504*cee3aa6bSSatish Balay   ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
505*cee3aa6bSSatish Balay   /* receive remote parts: note this assumes the values are not actually */
506*cee3aa6bSSatish Balay   /* inserted in yy until the next line, which is true for my implementation*/
507*cee3aa6bSSatish Balay   /* but is not perhaps always true. */
508*cee3aa6bSSatish Balay   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,
509*cee3aa6bSSatish Balay                   (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
510*cee3aa6bSSatish Balay   return 0;
511*cee3aa6bSSatish Balay }
512*cee3aa6bSSatish Balay 
513*cee3aa6bSSatish Balay /*
514*cee3aa6bSSatish Balay   This only works correctly for square matrices where the subblock A->A is the
515*cee3aa6bSSatish Balay    diagonal block
516*cee3aa6bSSatish Balay */
517*cee3aa6bSSatish Balay static int MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
518*cee3aa6bSSatish Balay {
519*cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
520*cee3aa6bSSatish Balay   if (a->M != a->N)
521*cee3aa6bSSatish Balay     SETERRQ(1,"MatGetDiagonal_MPIBAIJ:Supports only square matrix where A->A is diag block");
522*cee3aa6bSSatish Balay   return MatGetDiagonal(a->A,v);
523*cee3aa6bSSatish Balay }
524*cee3aa6bSSatish Balay 
525*cee3aa6bSSatish Balay static int MatScale_MPIBAIJ(Scalar *aa,Mat A)
526*cee3aa6bSSatish Balay {
527*cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
528*cee3aa6bSSatish Balay   int        ierr;
529*cee3aa6bSSatish Balay   ierr = MatScale(aa,a->A); CHKERRQ(ierr);
530*cee3aa6bSSatish Balay   ierr = MatScale(aa,a->B); CHKERRQ(ierr);
531*cee3aa6bSSatish Balay   return 0;
532*cee3aa6bSSatish Balay }
53357b952d6SSatish Balay static int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n)
53457b952d6SSatish Balay {
53557b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
53657b952d6SSatish Balay   *m = mat->M; *n = mat->N;
53757b952d6SSatish Balay   return 0;
53857b952d6SSatish Balay }
53957b952d6SSatish Balay 
54057b952d6SSatish Balay static int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n)
54157b952d6SSatish Balay {
54257b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
54357b952d6SSatish Balay   *m = mat->m; *n = mat->N;
54457b952d6SSatish Balay   return 0;
54557b952d6SSatish Balay }
54657b952d6SSatish Balay 
54757b952d6SSatish Balay static int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n)
54857b952d6SSatish Balay {
54957b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
55057b952d6SSatish Balay   *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs;
55157b952d6SSatish Balay   return 0;
55257b952d6SSatish Balay }
55357b952d6SSatish Balay 
55479bdfe76SSatish Balay /* -------------------------------------------------------------------*/
55579bdfe76SSatish Balay static struct _MatOps MatOps = {
556*cee3aa6bSSatish Balay   MatSetValues_MPIBAIJ,0,0,MatMult_MPIBAIJ,
557*cee3aa6bSSatish Balay   MatMultAdd_MPIBAIJ,MatMultTrans_MPIBAIJ,MatMultTransAdd_MPIBAIJ,0,
55879bdfe76SSatish Balay   0,0,0,0,
55979bdfe76SSatish Balay   0,0,0,0,
560*cee3aa6bSSatish Balay   0,MatGetDiagonal_MPIBAIJ,0,0,
56157b952d6SSatish Balay   MatAssemblyBegin_MPIBAIJ,MatAssemblyEnd_MPIBAIJ,0,0,
56257b952d6SSatish Balay   0,0,MatGetReordering_MPIBAIJ,0,
56357b952d6SSatish Balay   0,0,0,MatGetSize_MPIBAIJ,
56457b952d6SSatish Balay   MatGetLocalSize_MPIBAIJ,MatGetOwnershipRange_MPIBAIJ,0,0,
56579bdfe76SSatish Balay   0,0,0,0,
56679bdfe76SSatish Balay   0,0,0,0,
56779bdfe76SSatish Balay   0,0,0,0,
56879bdfe76SSatish Balay   0,0,0,0,
569*cee3aa6bSSatish Balay   MatScale_MPIBAIJ,0,0};
57079bdfe76SSatish Balay 
57179bdfe76SSatish Balay 
57279bdfe76SSatish Balay /*@C
57379bdfe76SSatish Balay    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
57479bdfe76SSatish Balay    (block compressed row).  For good matrix assembly performance
57579bdfe76SSatish Balay    the user should preallocate the matrix storage by setting the parameters
57679bdfe76SSatish Balay    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
57779bdfe76SSatish Balay    performance can be increased by more than a factor of 50.
57879bdfe76SSatish Balay 
57979bdfe76SSatish Balay    Input Parameters:
58079bdfe76SSatish Balay .  comm - MPI communicator
58179bdfe76SSatish Balay .  bs   - size of blockk
58279bdfe76SSatish Balay .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
58379bdfe76SSatish Balay .  n - number of local columns (or PETSC_DECIDE to have calculated
58479bdfe76SSatish Balay            if N is given)
58579bdfe76SSatish Balay .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
58679bdfe76SSatish Balay .  N - number of global columns (or PETSC_DECIDE to have calculated
58779bdfe76SSatish Balay            if n is given)
58879bdfe76SSatish Balay .  d_nz  - number of block nonzeros per block row in diagonal portion of local
58979bdfe76SSatish Balay            submatrix  (same for all local rows)
59079bdfe76SSatish Balay .  d_nzz - number of block nonzeros per block row in diagonal portion of local
59179bdfe76SSatish Balay            submatrix or null (possibly different for each row).  You must leave
59279bdfe76SSatish Balay            room for the diagonal entry even if it is zero.
59379bdfe76SSatish Balay .  o_nz  - number of block nonzeros per block row in off-diagonal portion of local
59479bdfe76SSatish Balay            submatrix (same for all local rows).
59579bdfe76SSatish Balay .  o_nzz - number of block nonzeros per block row in off-diagonal portion of local
59679bdfe76SSatish Balay            submatrix or null (possibly different for each row).
59779bdfe76SSatish Balay 
59879bdfe76SSatish Balay    Output Parameter:
59979bdfe76SSatish Balay .  A - the matrix
60079bdfe76SSatish Balay 
60179bdfe76SSatish Balay    Notes:
60279bdfe76SSatish Balay    The user MUST specify either the local or global matrix dimensions
60379bdfe76SSatish Balay    (possibly both).
60479bdfe76SSatish Balay 
60579bdfe76SSatish Balay    Storage Information:
60679bdfe76SSatish Balay    For a square global matrix we define each processor's diagonal portion
60779bdfe76SSatish Balay    to be its local rows and the corresponding columns (a square submatrix);
60879bdfe76SSatish Balay    each processor's off-diagonal portion encompasses the remainder of the
60979bdfe76SSatish Balay    local matrix (a rectangular submatrix).
61079bdfe76SSatish Balay 
61179bdfe76SSatish Balay    The user can specify preallocated storage for the diagonal part of
61279bdfe76SSatish Balay    the local submatrix with either d_nz or d_nnz (not both).  Set
61379bdfe76SSatish Balay    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
61479bdfe76SSatish Balay    memory allocation.  Likewise, specify preallocated storage for the
61579bdfe76SSatish Balay    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
61679bdfe76SSatish Balay 
61779bdfe76SSatish Balay    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
61879bdfe76SSatish Balay    the figure below we depict these three local rows and all columns (0-11).
61979bdfe76SSatish Balay 
62079bdfe76SSatish Balay $          0 1 2 3 4 5 6 7 8 9 10 11
62179bdfe76SSatish Balay $         -------------------
62279bdfe76SSatish Balay $  row 3  |  o o o d d d o o o o o o
62379bdfe76SSatish Balay $  row 4  |  o o o d d d o o o o o o
62479bdfe76SSatish Balay $  row 5  |  o o o d d d o o o o o o
62579bdfe76SSatish Balay $         -------------------
62679bdfe76SSatish Balay $
62779bdfe76SSatish Balay 
62879bdfe76SSatish Balay    Thus, any entries in the d locations are stored in the d (diagonal)
62979bdfe76SSatish Balay    submatrix, and any entries in the o locations are stored in the
63079bdfe76SSatish Balay    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
63157b952d6SSatish Balay    stored simply in the MATSEQBAIJ format for compressed row storage.
63279bdfe76SSatish Balay 
63379bdfe76SSatish Balay    Now d_nz should indicate the number of nonzeros per row in the d matrix,
63479bdfe76SSatish Balay    and o_nz should indicate the number of nonzeros per row in the o matrix.
63579bdfe76SSatish Balay    In general, for PDE problems in which most nonzeros are near the diagonal,
63679bdfe76SSatish Balay    one expects d_nz >> o_nz.   For additional details, see the users manual
63779bdfe76SSatish Balay    chapter on matrices and the file $(PETSC_DIR)/Performance.
63879bdfe76SSatish Balay 
63979bdfe76SSatish Balay .keywords: matrix, aij, compressed row, sparse, parallel
64079bdfe76SSatish Balay 
64179bdfe76SSatish Balay .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues()
64279bdfe76SSatish Balay @*/
64379bdfe76SSatish Balay int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,
64479bdfe76SSatish Balay                     int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
64579bdfe76SSatish Balay {
64679bdfe76SSatish Balay   Mat          B;
64779bdfe76SSatish Balay   Mat_MPIBAIJ  *b;
648*cee3aa6bSSatish Balay   int          ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE;
64979bdfe76SSatish Balay 
65079bdfe76SSatish Balay   if (bs < 1) SETERRQ(1,"MatCreateMPIBAIJ: invalid block size specified");
65179bdfe76SSatish Balay   *A = 0;
65279bdfe76SSatish Balay   PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATMPIBAIJ,comm);
65379bdfe76SSatish Balay   PLogObjectCreate(B);
65479bdfe76SSatish Balay   B->data       = (void *) (b = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(b);
65579bdfe76SSatish Balay   PetscMemzero(b,sizeof(Mat_MPIBAIJ));
65679bdfe76SSatish Balay   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
65779bdfe76SSatish Balay   B->destroy    = MatDestroy_MPIBAIJ;
65879bdfe76SSatish Balay   B->view       = MatView_MPIBAIJ;
65957b952d6SSatish Balay 
66079bdfe76SSatish Balay   B->factor     = 0;
66179bdfe76SSatish Balay   B->assembled  = PETSC_FALSE;
66279bdfe76SSatish Balay 
66379bdfe76SSatish Balay   b->insertmode = NOT_SET_VALUES;
66479bdfe76SSatish Balay   MPI_Comm_rank(comm,&b->rank);
66579bdfe76SSatish Balay   MPI_Comm_size(comm,&b->size);
66679bdfe76SSatish Balay 
66779bdfe76SSatish Balay   if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL))
66857b952d6SSatish Balay     SETERRQ(1,"MatCreateMPIBAIJ:Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
669*cee3aa6bSSatish Balay   if ( M == PETSC_DECIDE && m == PETSC_DECIDE) SETERRQ(1,"MatCreateMPIBAIJ: either M or m should be specified");
670*cee3aa6bSSatish Balay   if ( M == PETSC_DECIDE && n == PETSC_DECIDE)SETERRQ(1,"MatCreateMPIBAIJ: either N or n should be specified");
671*cee3aa6bSSatish Balay   if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE;
672*cee3aa6bSSatish Balay   if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE;
67379bdfe76SSatish Balay 
67479bdfe76SSatish Balay   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
67579bdfe76SSatish Balay     work[0] = m; work[1] = n;
67679bdfe76SSatish Balay     mbs = m/bs; nbs = n/bs;
67779bdfe76SSatish Balay     MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );
67879bdfe76SSatish Balay     if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;}
67979bdfe76SSatish Balay     if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;}
68079bdfe76SSatish Balay   }
68179bdfe76SSatish Balay   if (m == PETSC_DECIDE) {
68279bdfe76SSatish Balay     Mbs = M/bs;
68379bdfe76SSatish Balay     if (Mbs*bs != M) SETERRQ(1,"MatCreateMPIBAIJ: No of global rows must be divisible by blocksize");
68479bdfe76SSatish Balay     mbs = Mbs/b->size + ((Mbs % b->size) > b->rank);
68579bdfe76SSatish Balay     m   = mbs*bs;
68679bdfe76SSatish Balay   }
68779bdfe76SSatish Balay   if (n == PETSC_DECIDE) {
68879bdfe76SSatish Balay     Nbs = N/bs;
68979bdfe76SSatish Balay     if (Nbs*bs != N) SETERRQ(1,"MatCreateMPIBAIJ: No of global cols must be divisible by blocksize");
69079bdfe76SSatish Balay     nbs = Nbs/b->size + ((Nbs % b->size) > b->rank);
69179bdfe76SSatish Balay     n   = nbs*bs;
69279bdfe76SSatish Balay   }
693*cee3aa6bSSatish Balay   if (mbs*bs != m || nbs*bs != n) SETERRQ(1,"MatCreateMPIBAIJ: No of local rows, cols must be divisible by blocksize");
69479bdfe76SSatish Balay 
69579bdfe76SSatish Balay   b->m = m; B->m = m;
69679bdfe76SSatish Balay   b->n = n; B->n = n;
69779bdfe76SSatish Balay   b->N = N; B->N = N;
69879bdfe76SSatish Balay   b->M = M; B->M = M;
69979bdfe76SSatish Balay   b->bs  = bs;
70079bdfe76SSatish Balay   b->bs2 = bs*bs;
70179bdfe76SSatish Balay   b->mbs = mbs;
70279bdfe76SSatish Balay   b->nbs = nbs;
70379bdfe76SSatish Balay   b->Mbs = Mbs;
70479bdfe76SSatish Balay   b->Nbs = Nbs;
70579bdfe76SSatish Balay 
70679bdfe76SSatish Balay   /* build local table of row and column ownerships */
70779bdfe76SSatish Balay   b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners);
70879bdfe76SSatish Balay   PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIBAIJ));
70979bdfe76SSatish Balay   b->cowners = b->rowners + b->size + 1;
71079bdfe76SSatish Balay   MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);
71179bdfe76SSatish Balay   b->rowners[0] = 0;
71279bdfe76SSatish Balay   for ( i=2; i<=b->size; i++ ) {
71379bdfe76SSatish Balay     b->rowners[i] += b->rowners[i-1];
71479bdfe76SSatish Balay   }
71579bdfe76SSatish Balay   b->rstart = b->rowners[b->rank];
71679bdfe76SSatish Balay   b->rend   = b->rowners[b->rank+1];
71779bdfe76SSatish Balay   MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);
71879bdfe76SSatish Balay   b->cowners[0] = 0;
71979bdfe76SSatish Balay   for ( i=2; i<=b->size; i++ ) {
72079bdfe76SSatish Balay     b->cowners[i] += b->cowners[i-1];
72179bdfe76SSatish Balay   }
72279bdfe76SSatish Balay   b->cstart = b->cowners[b->rank];
72379bdfe76SSatish Balay   b->cend   = b->cowners[b->rank+1];
72479bdfe76SSatish Balay 
72579bdfe76SSatish Balay   if (d_nz == PETSC_DEFAULT) d_nz = 5;
72679bdfe76SSatish Balay   ierr = MatCreateSeqBAIJ(MPI_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr);
72779bdfe76SSatish Balay   PLogObjectParent(B,b->A);
72879bdfe76SSatish Balay   if (o_nz == PETSC_DEFAULT) o_nz = 0;
72979bdfe76SSatish Balay   ierr = MatCreateSeqBAIJ(MPI_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr);
73079bdfe76SSatish Balay   PLogObjectParent(B,b->B);
73179bdfe76SSatish Balay 
73279bdfe76SSatish Balay   /* build cache for off array entries formed */
73379bdfe76SSatish Balay   ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr);
73479bdfe76SSatish Balay   b->colmap      = 0;
73579bdfe76SSatish Balay   b->garray      = 0;
73679bdfe76SSatish Balay   b->roworiented = 1;
73779bdfe76SSatish Balay 
73879bdfe76SSatish Balay   /* stuff used for matrix vector multiply */
73979bdfe76SSatish Balay   b->lvec      = 0;
74079bdfe76SSatish Balay   b->Mvctx     = 0;
74179bdfe76SSatish Balay 
74279bdfe76SSatish Balay   /* stuff for MatGetRow() */
74379bdfe76SSatish Balay   b->rowindices   = 0;
74479bdfe76SSatish Balay   b->rowvalues    = 0;
74579bdfe76SSatish Balay   b->getrowactive = PETSC_FALSE;
74679bdfe76SSatish Balay 
74779bdfe76SSatish Balay   *A = B;
74879bdfe76SSatish Balay   return 0;
74979bdfe76SSatish Balay }
75057b952d6SSatish Balay 
75157b952d6SSatish Balay #include "sys.h"
75257b952d6SSatish Balay 
75357b952d6SSatish Balay int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat)
75457b952d6SSatish Balay {
75557b952d6SSatish Balay   Mat          A;
75657b952d6SSatish Balay   int          i, nz, ierr, j,rstart, rend, fd;
75757b952d6SSatish Balay   Scalar       *vals,*buf;
75857b952d6SSatish Balay   MPI_Comm     comm = ((PetscObject)viewer)->comm;
75957b952d6SSatish Balay   MPI_Status   status;
760*cee3aa6bSSatish Balay   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
76157b952d6SSatish Balay   int          *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf;
76257b952d6SSatish Balay   int          flg,tag = ((PetscObject)viewer)->tag,bs=1,bs2,Mbs,mbs,extra_rows;
76357b952d6SSatish Balay   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
76457b952d6SSatish Balay   int          dcount,kmax,k,nzcount,tmp;
76557b952d6SSatish Balay 
76657b952d6SSatish Balay 
76757b952d6SSatish Balay   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr);
76857b952d6SSatish Balay   bs2  = bs*bs;
76957b952d6SSatish Balay 
77057b952d6SSatish Balay   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
77157b952d6SSatish Balay   if (!rank) {
77257b952d6SSatish Balay     ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
77357b952d6SSatish Balay     ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr);
774*cee3aa6bSSatish Balay     if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIBAIJ:not matrix object");
77557b952d6SSatish Balay   }
77657b952d6SSatish Balay 
77757b952d6SSatish Balay   MPI_Bcast(header+1,3,MPI_INT,0,comm);
77857b952d6SSatish Balay   M = header[1]; N = header[2];
77957b952d6SSatish Balay 
78057b952d6SSatish Balay 
78157b952d6SSatish Balay   if (M != N) SETERRQ(1,"MatLoad_SeqBAIJ:Can only do square matrices");
78257b952d6SSatish Balay 
78357b952d6SSatish Balay   /*
78457b952d6SSatish Balay      This code adds extra rows to make sure the number of rows is
78557b952d6SSatish Balay      divisible by the blocksize
78657b952d6SSatish Balay   */
78757b952d6SSatish Balay   Mbs        = M/bs;
78857b952d6SSatish Balay   extra_rows = bs - M + bs*(Mbs);
78957b952d6SSatish Balay   if (extra_rows == bs) extra_rows = 0;
79057b952d6SSatish Balay   else                  Mbs++;
79157b952d6SSatish Balay   if (extra_rows &&!rank) {
79257b952d6SSatish Balay     PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize");
79357b952d6SSatish Balay   }
79457b952d6SSatish Balay   /* determine ownership of all rows */
79557b952d6SSatish Balay   mbs = Mbs/size + ((Mbs % size) > rank);
79657b952d6SSatish Balay   m   = mbs * bs;
797*cee3aa6bSSatish Balay   rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int)); CHKPTRQ(rowners);
798*cee3aa6bSSatish Balay   browners = rowners + size + 1;
79957b952d6SSatish Balay   MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);
80057b952d6SSatish Balay   rowners[0] = 0;
801*cee3aa6bSSatish Balay   for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1];
802*cee3aa6bSSatish Balay   for ( i=0; i<=size;  i++ ) browners[i] = rowners[i]*bs;
80357b952d6SSatish Balay   rstart = rowners[rank];
80457b952d6SSatish Balay   rend   = rowners[rank+1];
80557b952d6SSatish Balay 
80657b952d6SSatish Balay   /* distribute row lengths to all processors */
80757b952d6SSatish Balay   locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) ); CHKPTRQ(locrowlens);
80857b952d6SSatish Balay   if (!rank) {
80957b952d6SSatish Balay     rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) ); CHKPTRQ(rowlengths);
81057b952d6SSatish Balay     ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
81157b952d6SSatish Balay     for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
81257b952d6SSatish Balay     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
813*cee3aa6bSSatish Balay     for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i];
814*cee3aa6bSSatish Balay     MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);
81557b952d6SSatish Balay     PetscFree(sndcounts);
81657b952d6SSatish Balay   }
81757b952d6SSatish Balay   else {
81857b952d6SSatish Balay     MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm);
81957b952d6SSatish Balay   }
82057b952d6SSatish Balay 
82157b952d6SSatish Balay   if (!rank) {
82257b952d6SSatish Balay     /* calculate the number of nonzeros on each processor */
82357b952d6SSatish Balay     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
82457b952d6SSatish Balay     PetscMemzero(procsnz,size*sizeof(int));
82557b952d6SSatish Balay     for ( i=0; i<size; i++ ) {
82657b952d6SSatish Balay       for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) {
82757b952d6SSatish Balay         procsnz[i] += rowlengths[j];
82857b952d6SSatish Balay       }
82957b952d6SSatish Balay     }
83057b952d6SSatish Balay     PetscFree(rowlengths);
83157b952d6SSatish Balay 
83257b952d6SSatish Balay     /* determine max buffer needed and allocate it */
83357b952d6SSatish Balay     maxnz = 0;
83457b952d6SSatish Balay     for ( i=0; i<size; i++ ) {
83557b952d6SSatish Balay       maxnz = PetscMax(maxnz,procsnz[i]);
83657b952d6SSatish Balay     }
83757b952d6SSatish Balay     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
83857b952d6SSatish Balay 
83957b952d6SSatish Balay     /* read in my part of the matrix column indices  */
84057b952d6SSatish Balay     nz = procsnz[0];
84157b952d6SSatish Balay     ibuf = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf);
84257b952d6SSatish Balay     mycols = ibuf;
843*cee3aa6bSSatish Balay     if (size == 1)  nz -= extra_rows;
84457b952d6SSatish Balay     ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr);
845*cee3aa6bSSatish Balay     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
846*cee3aa6bSSatish Balay 
84757b952d6SSatish Balay     /* read in every ones (except the last) and ship off */
84857b952d6SSatish Balay     for ( i=1; i<size-1; i++ ) {
84957b952d6SSatish Balay       nz = procsnz[i];
85057b952d6SSatish Balay       ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr);
85157b952d6SSatish Balay       MPI_Send(cols,nz,MPI_INT,i,tag,comm);
85257b952d6SSatish Balay     }
85357b952d6SSatish Balay     /* read in the stuff for the last proc */
85457b952d6SSatish Balay     if ( size != 1 ) {
85557b952d6SSatish Balay       nz = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
85657b952d6SSatish Balay       ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr);
85757b952d6SSatish Balay       for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i;
858*cee3aa6bSSatish Balay       MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);
85957b952d6SSatish Balay     }
86057b952d6SSatish Balay     PetscFree(cols);
86157b952d6SSatish Balay   }
86257b952d6SSatish Balay   else {
86357b952d6SSatish Balay     /* determine buffer space needed for message */
86457b952d6SSatish Balay     nz = 0;
86557b952d6SSatish Balay     for ( i=0; i<m; i++ ) {
86657b952d6SSatish Balay       nz += locrowlens[i];
86757b952d6SSatish Balay     }
86857b952d6SSatish Balay     ibuf = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf);
86957b952d6SSatish Balay     mycols = ibuf;
87057b952d6SSatish Balay     /* receive message of column indices*/
87157b952d6SSatish Balay     MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
87257b952d6SSatish Balay     MPI_Get_count(&status,MPI_INT,&maxnz);
873*cee3aa6bSSatish Balay     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIBAIJ:something is wrong with file");
87457b952d6SSatish Balay   }
87557b952d6SSatish Balay 
87657b952d6SSatish Balay   /* loop over local rows, determining number of off diagonal entries */
877*cee3aa6bSSatish Balay   dlens  = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) ); CHKPTRQ(dlens);
878*cee3aa6bSSatish Balay   odlens = dlens + (rend-rstart);
87957b952d6SSatish Balay   mask   = (int *) PetscMalloc( 3*Mbs*sizeof(int) ); CHKPTRQ(mask);
880*cee3aa6bSSatish Balay   PetscMemzero(mask,3*Mbs*sizeof(int));
88157b952d6SSatish Balay   masked1 = mask    + Mbs;
88257b952d6SSatish Balay   masked2 = masked1 + Mbs;
88357b952d6SSatish Balay   rowcount = 0; nzcount = 0;
88457b952d6SSatish Balay   for ( i=0; i<mbs; i++ ) {
88557b952d6SSatish Balay     dcount  = 0;
88657b952d6SSatish Balay     odcount = 0;
88757b952d6SSatish Balay     for ( j=0; j<bs; j++ ) {
88857b952d6SSatish Balay       kmax = locrowlens[rowcount];
88957b952d6SSatish Balay       for ( k=0; k<kmax; k++ ) {
89057b952d6SSatish Balay         tmp = mycols[nzcount++]/bs;
89157b952d6SSatish Balay         if (!mask[tmp]) {
89257b952d6SSatish Balay           mask[tmp] = 1;
89357b952d6SSatish Balay           if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp;
89457b952d6SSatish Balay           else masked1[dcount++] = tmp;
89557b952d6SSatish Balay         }
89657b952d6SSatish Balay       }
89757b952d6SSatish Balay       rowcount++;
89857b952d6SSatish Balay     }
899*cee3aa6bSSatish Balay 
90057b952d6SSatish Balay     dlens[i]  = dcount;
90157b952d6SSatish Balay     odlens[i] = odcount;
902*cee3aa6bSSatish Balay 
90357b952d6SSatish Balay     /* zero out the mask elements we set */
90457b952d6SSatish Balay     for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0;
90557b952d6SSatish Balay     for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0;
90657b952d6SSatish Balay   }
907*cee3aa6bSSatish Balay 
90857b952d6SSatish Balay   /* create our matrix */
90957b952d6SSatish Balay   ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);CHKERRQ(ierr);
91057b952d6SSatish Balay   A = *newmat;
91157b952d6SSatish Balay   MatSetOption(A,COLUMNS_SORTED);
91257b952d6SSatish Balay 
91357b952d6SSatish Balay   if (!rank) {
91457b952d6SSatish Balay     buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(buf);
91557b952d6SSatish Balay     /* read in my part of the matrix numerical values  */
91657b952d6SSatish Balay     nz = procsnz[0];
91757b952d6SSatish Balay     vals = buf;
918*cee3aa6bSSatish Balay     mycols = ibuf;
919*cee3aa6bSSatish Balay     if (size == 1)  nz -= extra_rows;
92057b952d6SSatish Balay     ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
921*cee3aa6bSSatish Balay     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
92257b952d6SSatish Balay     /* insert into matrix */
92357b952d6SSatish Balay     jj      = rstart*bs;
92457b952d6SSatish Balay     for ( i=0; i<m; i++ ) {
92557b952d6SSatish Balay       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
92657b952d6SSatish Balay       mycols += locrowlens[i];
92757b952d6SSatish Balay       vals   += locrowlens[i];
92857b952d6SSatish Balay       jj++;
92957b952d6SSatish Balay     }
93057b952d6SSatish Balay     /* read in other processors( except the last one) and ship out */
93157b952d6SSatish Balay     for ( i=1; i<size-1; i++ ) {
93257b952d6SSatish Balay       nz = procsnz[i];
93357b952d6SSatish Balay       vals = buf;
93457b952d6SSatish Balay       ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
93557b952d6SSatish Balay       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
93657b952d6SSatish Balay     }
93757b952d6SSatish Balay     /* the last proc */
93857b952d6SSatish Balay     if ( size != 1 ){
93957b952d6SSatish Balay       nz = procsnz[i] - extra_rows;
940*cee3aa6bSSatish Balay       vals = buf;
94157b952d6SSatish Balay       ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
94257b952d6SSatish Balay       for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0;
943*cee3aa6bSSatish Balay       MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);
94457b952d6SSatish Balay     }
94557b952d6SSatish Balay     PetscFree(procsnz);
94657b952d6SSatish Balay   }
94757b952d6SSatish Balay   else {
94857b952d6SSatish Balay     /* receive numeric values */
94957b952d6SSatish Balay     buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(buf);
95057b952d6SSatish Balay 
95157b952d6SSatish Balay     /* receive message of values*/
95257b952d6SSatish Balay     vals = buf;
953*cee3aa6bSSatish Balay     mycols = ibuf;
95457b952d6SSatish Balay     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
95557b952d6SSatish Balay     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
956*cee3aa6bSSatish Balay     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIBAIJ:something is wrong with file");
95757b952d6SSatish Balay 
95857b952d6SSatish Balay     /* insert into matrix */
95957b952d6SSatish Balay     jj      = rstart*bs;
960*cee3aa6bSSatish Balay     for ( i=0; i<m; i++ ) {
96157b952d6SSatish Balay       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
96257b952d6SSatish Balay       mycols += locrowlens[i];
96357b952d6SSatish Balay       vals   += locrowlens[i];
96457b952d6SSatish Balay       jj++;
96557b952d6SSatish Balay     }
96657b952d6SSatish Balay   }
96757b952d6SSatish Balay   PetscFree(locrowlens);
96857b952d6SSatish Balay   PetscFree(buf);
96957b952d6SSatish Balay   PetscFree(ibuf);
97057b952d6SSatish Balay   PetscFree(rowners);
97157b952d6SSatish Balay   PetscFree(dlens);
972*cee3aa6bSSatish Balay   PetscFree(mask);
97357b952d6SSatish Balay   ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
97457b952d6SSatish Balay   ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
97557b952d6SSatish Balay   return 0;
97657b952d6SSatish Balay }
97757b952d6SSatish Balay 
97857b952d6SSatish Balay 
979