xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision c351683db805ec9f67f2fea2a48fb8c3a14cecd1)
1cb512458SBarry Smith #ifndef lint
2*c351683dSSatish Balay static char vcid[] = "$Id: mpiaij.c,v 1.172 1996/11/01 23:25:33 balay Exp balay $";
3cb512458SBarry Smith #endif
48a729477SBarry Smith 
570f55243SBarry Smith #include "src/mat/impls/aij/mpi/mpiaij.h"
6f5eb4b81SSatish Balay #include "src/vec/vecimpl.h"
7d9942c19SSatish Balay #include "src/inline/spops.h"
88a729477SBarry Smith 
99e25ed09SBarry Smith /* local utility routine that creates a mapping from the global column
109e25ed09SBarry Smith number to the local number in the off-diagonal part of the local
119e25ed09SBarry Smith storage of the matrix.  This is done in a non scable way since the
129e25ed09SBarry Smith length of colmap equals the global matrix length.
139e25ed09SBarry Smith */
140a198c4cSBarry Smith int CreateColmap_MPIAIJ_Private(Mat mat)
159e25ed09SBarry Smith {
1644a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
17ec8511deSBarry Smith   Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data;
18905e6a2fSBarry Smith   int        n = B->n,i;
19dbb450caSBarry Smith 
200452661fSBarry Smith   aij->colmap = (int *) PetscMalloc(aij->N*sizeof(int));CHKPTRQ(aij->colmap);
21464493b3SBarry Smith   PLogObjectMemory(mat,aij->N*sizeof(int));
22cddf8d76SBarry Smith   PetscMemzero(aij->colmap,aij->N*sizeof(int));
23905e6a2fSBarry Smith   for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1;
249e25ed09SBarry Smith   return 0;
259e25ed09SBarry Smith }
269e25ed09SBarry Smith 
272493cbb0SBarry Smith extern int DisAssemble_MPIAIJ(Mat);
282493cbb0SBarry Smith 
293b2fbd54SBarry Smith static int MatGetRowIJ_MPIAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja,
303b2fbd54SBarry Smith                            PetscTruth *done)
31299609e3SLois Curfman McInnes {
32299609e3SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
33299609e3SLois Curfman McInnes   int        ierr;
3417699dbbSLois Curfman McInnes   if (aij->size == 1) {
353b2fbd54SBarry Smith     ierr = MatGetRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr);
363b2fbd54SBarry Smith   } else SETERRQ(1,"MatGetRowIJ_MPIAIJ:not supported in parallel");
373b2fbd54SBarry Smith   return 0;
383b2fbd54SBarry Smith }
393b2fbd54SBarry Smith 
403b2fbd54SBarry Smith static int MatRestoreRowIJ_MPIAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja,
413b2fbd54SBarry Smith                                PetscTruth *done)
423b2fbd54SBarry Smith {
433b2fbd54SBarry Smith   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
443b2fbd54SBarry Smith   int        ierr;
453b2fbd54SBarry Smith   if (aij->size == 1) {
463b2fbd54SBarry Smith     ierr = MatRestoreRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr);
473b2fbd54SBarry Smith   } else SETERRQ(1,"MatRestoreRowIJ_MPIAIJ:not supported in parallel");
48299609e3SLois Curfman McInnes   return 0;
49299609e3SLois Curfman McInnes }
50299609e3SLois Curfman McInnes 
510a198c4cSBarry Smith extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
520a198c4cSBarry Smith 
534b0e389bSBarry Smith static int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
548a729477SBarry Smith {
5544a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
564b0e389bSBarry Smith   Scalar     value;
571eb62cbbSBarry Smith   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
581eb62cbbSBarry Smith   int        cstart = aij->cstart, cend = aij->cend,row,col;
59905e6a2fSBarry Smith   int        roworiented = aij->roworiented;
608a729477SBarry Smith 
610a198c4cSBarry Smith #if defined(PETSC_BOPT_g)
6264119d58SLois Curfman McInnes   if (aij->insertmode != NOT_SET_VALUES && aij->insertmode != addv) {
6348d91487SBarry Smith     SETERRQ(1,"MatSetValues_MPIAIJ:Cannot mix inserts and adds");
648a729477SBarry Smith   }
650a198c4cSBarry Smith #endif
661eb62cbbSBarry Smith   aij->insertmode = addv;
678a729477SBarry Smith   for ( i=0; i<m; i++ ) {
680a198c4cSBarry Smith #if defined(PETSC_BOPT_g)
694b0e389bSBarry Smith     if (im[i] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative row");
704b0e389bSBarry Smith     if (im[i] >= aij->M) SETERRQ(1,"MatSetValues_MPIAIJ:Row too large");
710a198c4cSBarry Smith #endif
724b0e389bSBarry Smith     if (im[i] >= rstart && im[i] < rend) {
734b0e389bSBarry Smith       row = im[i] - rstart;
741eb62cbbSBarry Smith       for ( j=0; j<n; j++ ) {
754b0e389bSBarry Smith         if (in[j] >= cstart && in[j] < cend){
764b0e389bSBarry Smith           col = in[j] - cstart;
774b0e389bSBarry Smith           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
780a198c4cSBarry Smith           ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr);
791eb62cbbSBarry Smith         }
800a198c4cSBarry Smith #if defined(PETSC_BOPT_g)
810a198c4cSBarry Smith         else if (in[j] < 0) {SETERRQ(1,"MatSetValues_MPIAIJ:Negative column");}
820a198c4cSBarry Smith         else if (in[j] >= aij->N) {SETERRQ(1,"MatSetValues_MPIAIJ:Col too large");}
830a198c4cSBarry Smith #endif
841eb62cbbSBarry Smith         else {
85227d817aSBarry Smith           if (mat->was_assembled) {
86905e6a2fSBarry Smith             if (!aij->colmap) {
87905e6a2fSBarry Smith               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
88905e6a2fSBarry Smith             }
89905e6a2fSBarry Smith             col = aij->colmap[in[j]] - 1;
90ec8511deSBarry Smith             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
912493cbb0SBarry Smith               ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
924b0e389bSBarry Smith               col =  in[j];
93d6dfbf8fSBarry Smith             }
949e25ed09SBarry Smith           }
954b0e389bSBarry Smith           else col = in[j];
964b0e389bSBarry Smith           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
970a198c4cSBarry Smith           ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr);
981eb62cbbSBarry Smith         }
991eb62cbbSBarry Smith       }
1001eb62cbbSBarry Smith     }
1011eb62cbbSBarry Smith     else {
1024b0e389bSBarry Smith       if (roworiented) {
1034b0e389bSBarry Smith         ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr);
1044b0e389bSBarry Smith       }
1054b0e389bSBarry Smith       else {
1064b0e389bSBarry Smith         row = im[i];
1074b0e389bSBarry Smith         for ( j=0; j<n; j++ ) {
1084b0e389bSBarry Smith           ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr);
1094b0e389bSBarry Smith         }
1104b0e389bSBarry Smith       }
1111eb62cbbSBarry Smith     }
1128a729477SBarry Smith   }
1138a729477SBarry Smith   return 0;
1148a729477SBarry Smith }
1158a729477SBarry Smith 
116b49de8d1SLois Curfman McInnes static int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
117b49de8d1SLois Curfman McInnes {
118b49de8d1SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
119b49de8d1SLois Curfman McInnes   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
120b49de8d1SLois Curfman McInnes   int        cstart = aij->cstart, cend = aij->cend,row,col;
121b49de8d1SLois Curfman McInnes 
122b49de8d1SLois Curfman McInnes   for ( i=0; i<m; i++ ) {
123b49de8d1SLois Curfman McInnes     if (idxm[i] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative row");
124b49de8d1SLois Curfman McInnes     if (idxm[i] >= aij->M) SETERRQ(1,"MatGetValues_MPIAIJ:Row too large");
125b49de8d1SLois Curfman McInnes     if (idxm[i] >= rstart && idxm[i] < rend) {
126b49de8d1SLois Curfman McInnes       row = idxm[i] - rstart;
127b49de8d1SLois Curfman McInnes       for ( j=0; j<n; j++ ) {
128b49de8d1SLois Curfman McInnes         if (idxn[j] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative column");
129b49de8d1SLois Curfman McInnes         if (idxn[j] >= aij->N) SETERRQ(1,"MatGetValues_MPIAIJ:Col too large");
130b49de8d1SLois Curfman McInnes         if (idxn[j] >= cstart && idxn[j] < cend){
131b49de8d1SLois Curfman McInnes           col = idxn[j] - cstart;
132b49de8d1SLois Curfman McInnes           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
133b49de8d1SLois Curfman McInnes         }
134b49de8d1SLois Curfman McInnes         else {
135905e6a2fSBarry Smith           if (!aij->colmap) {
136905e6a2fSBarry Smith             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
137905e6a2fSBarry Smith           }
138905e6a2fSBarry Smith           col = aij->colmap[idxn[j]] - 1;
139e60e1c95SSatish Balay           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
140d9d09a02SSatish Balay           else {
141b49de8d1SLois Curfman McInnes             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
142b49de8d1SLois Curfman McInnes           }
143b49de8d1SLois Curfman McInnes         }
144b49de8d1SLois Curfman McInnes       }
145d9d09a02SSatish Balay     }
146b49de8d1SLois Curfman McInnes     else {
147b49de8d1SLois Curfman McInnes       SETERRQ(1,"MatGetValues_MPIAIJ:Only local values currently supported");
148b49de8d1SLois Curfman McInnes     }
149b49de8d1SLois Curfman McInnes   }
150b49de8d1SLois Curfman McInnes   return 0;
151b49de8d1SLois Curfman McInnes }
152b49de8d1SLois Curfman McInnes 
153fc76ce87SLois Curfman McInnes static int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
1548a729477SBarry Smith {
15544a69424SLois Curfman McInnes   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
156d6dfbf8fSBarry Smith   MPI_Comm    comm = mat->comm;
15717699dbbSLois Curfman McInnes   int         size = aij->size, *owners = aij->rowners;
15817699dbbSLois Curfman McInnes   int         rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr;
1591eb62cbbSBarry Smith   MPI_Request *send_waits,*recv_waits;
1606abc6512SBarry Smith   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
1611eb62cbbSBarry Smith   InsertMode  addv;
1621eb62cbbSBarry Smith   Scalar      *rvalues,*svalues;
1631eb62cbbSBarry Smith 
1641eb62cbbSBarry Smith   /* make sure all processors are either in INSERTMODE or ADDMODE */
165d65a2f8fSBarry Smith   MPI_Allreduce(&aij->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
166dbb450caSBarry Smith   if (addv == (ADD_VALUES|INSERT_VALUES)) {
167bbb6d6a8SBarry Smith     SETERRQ(1,"MatAssemblyBegin_MPIAIJ:Some processors inserted others added");
1681eb62cbbSBarry Smith   }
1691eb62cbbSBarry Smith   aij->insertmode = addv; /* in case this processor had no cache */
1701eb62cbbSBarry Smith 
1711eb62cbbSBarry Smith   /*  first count number of contributors to each processor */
1720452661fSBarry Smith   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
173cddf8d76SBarry Smith   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
1740452661fSBarry Smith   owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
1751eb62cbbSBarry Smith   for ( i=0; i<aij->stash.n; i++ ) {
1761eb62cbbSBarry Smith     idx = aij->stash.idx[i];
17717699dbbSLois Curfman McInnes     for ( j=0; j<size; j++ ) {
1781eb62cbbSBarry Smith       if (idx >= owners[j] && idx < owners[j+1]) {
1791eb62cbbSBarry Smith         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
1808a729477SBarry Smith       }
1818a729477SBarry Smith     }
1828a729477SBarry Smith   }
18317699dbbSLois Curfman McInnes   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
1841eb62cbbSBarry Smith 
1851eb62cbbSBarry Smith   /* inform other processors of number of messages and max length*/
1860452661fSBarry Smith   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
18717699dbbSLois Curfman McInnes   MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);
18817699dbbSLois Curfman McInnes   nreceives = work[rank];
18917699dbbSLois Curfman McInnes   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
19017699dbbSLois Curfman McInnes   nmax = work[rank];
1910452661fSBarry Smith   PetscFree(work);
1921eb62cbbSBarry Smith 
1931eb62cbbSBarry Smith   /* post receives:
1941eb62cbbSBarry Smith        1) each message will consist of ordered pairs
1951eb62cbbSBarry Smith      (global index,value) we store the global index as a double
196d6dfbf8fSBarry Smith      to simplify the message passing.
1971eb62cbbSBarry Smith        2) since we don't know how long each individual message is we
1981eb62cbbSBarry Smith      allocate the largest needed buffer for each receive. Potentially
1991eb62cbbSBarry Smith      this is a lot of wasted space.
2001eb62cbbSBarry Smith 
2011eb62cbbSBarry Smith 
2021eb62cbbSBarry Smith        This could be done better.
2031eb62cbbSBarry Smith   */
2040452661fSBarry Smith   rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));
20578b31e54SBarry Smith   CHKPTRQ(rvalues);
2060452661fSBarry Smith   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));
20778b31e54SBarry Smith   CHKPTRQ(recv_waits);
2081eb62cbbSBarry Smith   for ( i=0; i<nreceives; i++ ) {
209416022c9SBarry Smith     MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
2101eb62cbbSBarry Smith               comm,recv_waits+i);
2111eb62cbbSBarry Smith   }
2121eb62cbbSBarry Smith 
2131eb62cbbSBarry Smith   /* do sends:
2141eb62cbbSBarry Smith       1) starts[i] gives the starting index in svalues for stuff going to
2151eb62cbbSBarry Smith          the ith processor
2161eb62cbbSBarry Smith   */
2170452661fSBarry Smith   svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
2180452661fSBarry Smith   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
21978b31e54SBarry Smith   CHKPTRQ(send_waits);
2200452661fSBarry Smith   starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
2211eb62cbbSBarry Smith   starts[0] = 0;
22217699dbbSLois Curfman McInnes   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
2231eb62cbbSBarry Smith   for ( i=0; i<aij->stash.n; i++ ) {
2241eb62cbbSBarry Smith     svalues[3*starts[owner[i]]]       = (Scalar)  aij->stash.idx[i];
2251eb62cbbSBarry Smith     svalues[3*starts[owner[i]]+1]     = (Scalar)  aij->stash.idy[i];
2261eb62cbbSBarry Smith     svalues[3*(starts[owner[i]]++)+2] =  aij->stash.array[i];
2271eb62cbbSBarry Smith   }
2280452661fSBarry Smith   PetscFree(owner);
2291eb62cbbSBarry Smith   starts[0] = 0;
23017699dbbSLois Curfman McInnes   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
2311eb62cbbSBarry Smith   count = 0;
23217699dbbSLois Curfman McInnes   for ( i=0; i<size; i++ ) {
2331eb62cbbSBarry Smith     if (procs[i]) {
234416022c9SBarry Smith       MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,
2351eb62cbbSBarry Smith                 comm,send_waits+count++);
2361eb62cbbSBarry Smith     }
2371eb62cbbSBarry Smith   }
2380452661fSBarry Smith   PetscFree(starts); PetscFree(nprocs);
2391eb62cbbSBarry Smith 
2401eb62cbbSBarry Smith   /* Free cache space */
2412191d07cSBarry Smith   PLogInfo(0,"[%d]MatAssemblyBegin_MPIAIJ:Number of off processor values %d\n",rank,aij->stash.n);
24278b31e54SBarry Smith   ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr);
2431eb62cbbSBarry Smith 
2441eb62cbbSBarry Smith   aij->svalues    = svalues;    aij->rvalues    = rvalues;
2451eb62cbbSBarry Smith   aij->nsends     = nsends;     aij->nrecvs     = nreceives;
2461eb62cbbSBarry Smith   aij->send_waits = send_waits; aij->recv_waits = recv_waits;
2471eb62cbbSBarry Smith   aij->rmax       = nmax;
2481eb62cbbSBarry Smith 
2491eb62cbbSBarry Smith   return 0;
2501eb62cbbSBarry Smith }
25144a69424SLois Curfman McInnes extern int MatSetUpMultiply_MPIAIJ(Mat);
2521eb62cbbSBarry Smith 
253fc76ce87SLois Curfman McInnes static int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
2541eb62cbbSBarry Smith {
25544a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
2561eb62cbbSBarry Smith   MPI_Status  *send_status,recv_status;
257416022c9SBarry Smith   int         imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr;
258905e6a2fSBarry Smith   int         row,col,other_disassembled;
2591eb62cbbSBarry Smith   Scalar      *values,val;
2601eb62cbbSBarry Smith   InsertMode  addv = aij->insertmode;
2611eb62cbbSBarry Smith 
2621eb62cbbSBarry Smith   /*  wait on receives */
2631eb62cbbSBarry Smith   while (count) {
264d6dfbf8fSBarry Smith     MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status);
2651eb62cbbSBarry Smith     /* unpack receives into our local space */
266d6dfbf8fSBarry Smith     values = aij->rvalues + 3*imdex*aij->rmax;
267c60448a5SBarry Smith     MPI_Get_count(&recv_status,MPIU_SCALAR,&n);
2681eb62cbbSBarry Smith     n = n/3;
2691eb62cbbSBarry Smith     for ( i=0; i<n; i++ ) {
270227d817aSBarry Smith       row = (int) PetscReal(values[3*i]) - aij->rstart;
271227d817aSBarry Smith       col = (int) PetscReal(values[3*i+1]);
2721eb62cbbSBarry Smith       val = values[3*i+2];
2731eb62cbbSBarry Smith       if (col >= aij->cstart && col < aij->cend) {
2741eb62cbbSBarry Smith         col -= aij->cstart;
2751eb62cbbSBarry Smith         MatSetValues(aij->A,1,&row,1,&col,&val,addv);
2761eb62cbbSBarry Smith       }
2771eb62cbbSBarry Smith       else {
27855a1b374SBarry Smith         if (mat->was_assembled || mat->assembled) {
279905e6a2fSBarry Smith           if (!aij->colmap) {
280905e6a2fSBarry Smith             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
281905e6a2fSBarry Smith           }
282905e6a2fSBarry Smith           col = aij->colmap[col] - 1;
283ec8511deSBarry Smith           if (col < 0  && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
2842493cbb0SBarry Smith             ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
285227d817aSBarry Smith             col = (int) PetscReal(values[3*i+1]);
286d6dfbf8fSBarry Smith           }
2879e25ed09SBarry Smith         }
2881eb62cbbSBarry Smith         MatSetValues(aij->B,1,&row,1,&col,&val,addv);
2891eb62cbbSBarry Smith       }
2901eb62cbbSBarry Smith     }
2911eb62cbbSBarry Smith     count--;
2921eb62cbbSBarry Smith   }
2930452661fSBarry Smith   PetscFree(aij->recv_waits); PetscFree(aij->rvalues);
2941eb62cbbSBarry Smith 
2951eb62cbbSBarry Smith   /* wait on sends */
2961eb62cbbSBarry Smith   if (aij->nsends) {
2970a198c4cSBarry Smith     send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
2981eb62cbbSBarry Smith     MPI_Waitall(aij->nsends,aij->send_waits,send_status);
2990452661fSBarry Smith     PetscFree(send_status);
3001eb62cbbSBarry Smith   }
3010452661fSBarry Smith   PetscFree(aij->send_waits); PetscFree(aij->svalues);
3021eb62cbbSBarry Smith 
30364119d58SLois Curfman McInnes   aij->insertmode = NOT_SET_VALUES;
30478b31e54SBarry Smith   ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr);
30578b31e54SBarry Smith   ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr);
3061eb62cbbSBarry Smith 
3072493cbb0SBarry Smith   /* determine if any processor has disassembled, if so we must
3082493cbb0SBarry Smith      also disassemble ourselfs, in order that we may reassemble. */
309227d817aSBarry Smith   MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);
310227d817aSBarry Smith   if (mat->was_assembled && !other_disassembled) {
3112493cbb0SBarry Smith     ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
3122493cbb0SBarry Smith   }
3132493cbb0SBarry Smith 
3146d4a8577SBarry Smith   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
31578b31e54SBarry Smith     ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr);
3165e42470aSBarry Smith   }
31778b31e54SBarry Smith   ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr);
31878b31e54SBarry Smith   ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr);
3195e42470aSBarry Smith 
3207a0afa10SBarry Smith   if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;}
3218a729477SBarry Smith   return 0;
3228a729477SBarry Smith }
3238a729477SBarry Smith 
3242ee70a88SLois Curfman McInnes static int MatZeroEntries_MPIAIJ(Mat A)
3251eb62cbbSBarry Smith {
32644a69424SLois Curfman McInnes   Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data;
327dbd7a890SLois Curfman McInnes   int        ierr;
32878b31e54SBarry Smith   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
32978b31e54SBarry Smith   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
3301eb62cbbSBarry Smith   return 0;
3311eb62cbbSBarry Smith }
3321eb62cbbSBarry Smith 
3331eb62cbbSBarry Smith /* the code does not do the diagonal entries correctly unless the
3341eb62cbbSBarry Smith    matrix is square and the column and row owerships are identical.
3351eb62cbbSBarry Smith    This is a BUG. The only way to fix it seems to be to access
3361eb62cbbSBarry Smith    aij->A and aij->B directly and not through the MatZeroRows()
3371eb62cbbSBarry Smith    routine.
3381eb62cbbSBarry Smith */
33944a69424SLois Curfman McInnes static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag)
3401eb62cbbSBarry Smith {
34144a69424SLois Curfman McInnes   Mat_MPIAIJ     *l = (Mat_MPIAIJ *) A->data;
34217699dbbSLois Curfman McInnes   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
3436abc6512SBarry Smith   int            *procs,*nprocs,j,found,idx,nsends,*work;
34417699dbbSLois Curfman McInnes   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
3455392566eSBarry Smith   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
346d6dfbf8fSBarry Smith   int            *lens,imdex,*lrows,*values;
347d6dfbf8fSBarry Smith   MPI_Comm       comm = A->comm;
3481eb62cbbSBarry Smith   MPI_Request    *send_waits,*recv_waits;
3491eb62cbbSBarry Smith   MPI_Status     recv_status,*send_status;
3501eb62cbbSBarry Smith   IS             istmp;
3511eb62cbbSBarry Smith 
35277c4ece6SBarry Smith   ierr = ISGetSize(is,&N); CHKERRQ(ierr);
35378b31e54SBarry Smith   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
3541eb62cbbSBarry Smith 
3551eb62cbbSBarry Smith   /*  first count number of contributors to each processor */
3560452661fSBarry Smith   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
357cddf8d76SBarry Smith   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
3580452661fSBarry Smith   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
3591eb62cbbSBarry Smith   for ( i=0; i<N; i++ ) {
3601eb62cbbSBarry Smith     idx = rows[i];
3611eb62cbbSBarry Smith     found = 0;
36217699dbbSLois Curfman McInnes     for ( j=0; j<size; j++ ) {
3631eb62cbbSBarry Smith       if (idx >= owners[j] && idx < owners[j+1]) {
3641eb62cbbSBarry Smith         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
3651eb62cbbSBarry Smith       }
3661eb62cbbSBarry Smith     }
367bbb6d6a8SBarry Smith     if (!found) SETERRQ(1,"MatZeroRows_MPIAIJ:Index out of range");
3681eb62cbbSBarry Smith   }
36917699dbbSLois Curfman McInnes   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
3701eb62cbbSBarry Smith 
3711eb62cbbSBarry Smith   /* inform other processors of number of messages and max length*/
3720452661fSBarry Smith   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
37317699dbbSLois Curfman McInnes   MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);
37417699dbbSLois Curfman McInnes   nrecvs = work[rank];
37517699dbbSLois Curfman McInnes   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
37617699dbbSLois Curfman McInnes   nmax = work[rank];
3770452661fSBarry Smith   PetscFree(work);
3781eb62cbbSBarry Smith 
3791eb62cbbSBarry Smith   /* post receives:   */
3800452661fSBarry Smith   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */
38178b31e54SBarry Smith   CHKPTRQ(rvalues);
3820452661fSBarry Smith   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));
38378b31e54SBarry Smith   CHKPTRQ(recv_waits);
3841eb62cbbSBarry Smith   for ( i=0; i<nrecvs; i++ ) {
385416022c9SBarry Smith     MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
3861eb62cbbSBarry Smith   }
3871eb62cbbSBarry Smith 
3881eb62cbbSBarry Smith   /* do sends:
3891eb62cbbSBarry Smith       1) starts[i] gives the starting index in svalues for stuff going to
3901eb62cbbSBarry Smith          the ith processor
3911eb62cbbSBarry Smith   */
3920452661fSBarry Smith   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
3930452661fSBarry Smith   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
39478b31e54SBarry Smith   CHKPTRQ(send_waits);
3950452661fSBarry Smith   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
3961eb62cbbSBarry Smith   starts[0] = 0;
39717699dbbSLois Curfman McInnes   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
3981eb62cbbSBarry Smith   for ( i=0; i<N; i++ ) {
3991eb62cbbSBarry Smith     svalues[starts[owner[i]]++] = rows[i];
4001eb62cbbSBarry Smith   }
4011eb62cbbSBarry Smith   ISRestoreIndices(is,&rows);
4021eb62cbbSBarry Smith 
4031eb62cbbSBarry Smith   starts[0] = 0;
40417699dbbSLois Curfman McInnes   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
4051eb62cbbSBarry Smith   count = 0;
40617699dbbSLois Curfman McInnes   for ( i=0; i<size; i++ ) {
4071eb62cbbSBarry Smith     if (procs[i]) {
408416022c9SBarry Smith       MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);
4091eb62cbbSBarry Smith     }
4101eb62cbbSBarry Smith   }
4110452661fSBarry Smith   PetscFree(starts);
4121eb62cbbSBarry Smith 
41317699dbbSLois Curfman McInnes   base = owners[rank];
4141eb62cbbSBarry Smith 
4151eb62cbbSBarry Smith   /*  wait on receives */
4160452661fSBarry Smith   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
4171eb62cbbSBarry Smith   source = lens + nrecvs;
4181eb62cbbSBarry Smith   count  = nrecvs; slen = 0;
4191eb62cbbSBarry Smith   while (count) {
420d6dfbf8fSBarry Smith     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
4211eb62cbbSBarry Smith     /* unpack receives into our local space */
4221eb62cbbSBarry Smith     MPI_Get_count(&recv_status,MPI_INT,&n);
423d6dfbf8fSBarry Smith     source[imdex]  = recv_status.MPI_SOURCE;
424d6dfbf8fSBarry Smith     lens[imdex]  = n;
4251eb62cbbSBarry Smith     slen += n;
4261eb62cbbSBarry Smith     count--;
4271eb62cbbSBarry Smith   }
4280452661fSBarry Smith   PetscFree(recv_waits);
4291eb62cbbSBarry Smith 
4301eb62cbbSBarry Smith   /* move the data into the send scatter */
4310452661fSBarry Smith   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
4321eb62cbbSBarry Smith   count = 0;
4331eb62cbbSBarry Smith   for ( i=0; i<nrecvs; i++ ) {
4341eb62cbbSBarry Smith     values = rvalues + i*nmax;
4351eb62cbbSBarry Smith     for ( j=0; j<lens[i]; j++ ) {
4361eb62cbbSBarry Smith       lrows[count++] = values[j] - base;
4371eb62cbbSBarry Smith     }
4381eb62cbbSBarry Smith   }
4390452661fSBarry Smith   PetscFree(rvalues); PetscFree(lens);
4400452661fSBarry Smith   PetscFree(owner); PetscFree(nprocs);
4411eb62cbbSBarry Smith 
4421eb62cbbSBarry Smith   /* actually zap the local rows */
443537820f0SBarry Smith   ierr = ISCreateGeneral(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
444464493b3SBarry Smith   PLogObjectParent(A,istmp);
4450452661fSBarry Smith   PetscFree(lrows);
44678b31e54SBarry Smith   ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
44778b31e54SBarry Smith   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
44878b31e54SBarry Smith   ierr = ISDestroy(istmp); CHKERRQ(ierr);
4491eb62cbbSBarry Smith 
4501eb62cbbSBarry Smith   /* wait on sends */
4511eb62cbbSBarry Smith   if (nsends) {
4520452661fSBarry Smith     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));
45378b31e54SBarry Smith     CHKPTRQ(send_status);
4541eb62cbbSBarry Smith     MPI_Waitall(nsends,send_waits,send_status);
4550452661fSBarry Smith     PetscFree(send_status);
4561eb62cbbSBarry Smith   }
4570452661fSBarry Smith   PetscFree(send_waits); PetscFree(svalues);
4581eb62cbbSBarry Smith 
4591eb62cbbSBarry Smith   return 0;
4601eb62cbbSBarry Smith }
4611eb62cbbSBarry Smith 
462416022c9SBarry Smith static int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
4631eb62cbbSBarry Smith {
464416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
465fbd6ef76SBarry Smith   int        ierr,nt;
466416022c9SBarry Smith 
467a2ce50c7SBarry Smith   ierr = VecGetLocalSize(xx,&nt);  CHKERRQ(ierr);
468fbd6ef76SBarry Smith   if (nt != a->n) {
46947b4a8eaSLois Curfman McInnes     SETERRQ(1,"MatMult_MPIAIJ:Incompatible parition of A and xx");
470fbd6ef76SBarry Smith   }
47164119d58SLois Curfman McInnes   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx); CHKERRQ(ierr);
47238597bd4SSatish Balay   ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr);
47364119d58SLois Curfman McInnes   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx); CHKERRQ(ierr);
47438597bd4SSatish Balay   ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
4751eb62cbbSBarry Smith   return 0;
4761eb62cbbSBarry Smith }
4771eb62cbbSBarry Smith 
478416022c9SBarry Smith static int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
479da3a660dSBarry Smith {
480416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
481da3a660dSBarry Smith   int        ierr;
48264119d58SLois Curfman McInnes   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
48338597bd4SSatish Balay   ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
48464119d58SLois Curfman McInnes   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
48538597bd4SSatish Balay   ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
486da3a660dSBarry Smith   return 0;
487da3a660dSBarry Smith }
488da3a660dSBarry Smith 
489416022c9SBarry Smith static int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy)
490da3a660dSBarry Smith {
491416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
492da3a660dSBarry Smith   int        ierr;
493da3a660dSBarry Smith 
494da3a660dSBarry Smith   /* do nondiagonal part */
49538597bd4SSatish Balay   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
496da3a660dSBarry Smith   /* send it on its way */
497537820f0SBarry Smith   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
498da3a660dSBarry Smith   /* do local part */
49938597bd4SSatish Balay   ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr);
500da3a660dSBarry Smith   /* receive remote parts: note this assumes the values are not actually */
501da3a660dSBarry Smith   /* inserted in yy until the next line, which is true for my implementation*/
502da3a660dSBarry Smith   /* but is not perhaps always true. */
503537820f0SBarry Smith   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
504da3a660dSBarry Smith   return 0;
505da3a660dSBarry Smith }
506da3a660dSBarry Smith 
507416022c9SBarry Smith static int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
508da3a660dSBarry Smith {
509416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
510da3a660dSBarry Smith   int        ierr;
511da3a660dSBarry Smith 
512da3a660dSBarry Smith   /* do nondiagonal part */
51338597bd4SSatish Balay   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
514da3a660dSBarry Smith   /* send it on its way */
515537820f0SBarry Smith   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
516da3a660dSBarry Smith   /* do local part */
51738597bd4SSatish Balay   ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
518da3a660dSBarry Smith   /* receive remote parts: note this assumes the values are not actually */
519da3a660dSBarry Smith   /* inserted in yy until the next line, which is true for my implementation*/
520da3a660dSBarry Smith   /* but is not perhaps always true. */
5210a198c4cSBarry Smith   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
522da3a660dSBarry Smith   return 0;
523da3a660dSBarry Smith }
524da3a660dSBarry Smith 
5251eb62cbbSBarry Smith /*
5261eb62cbbSBarry Smith   This only works correctly for square matrices where the subblock A->A is the
5271eb62cbbSBarry Smith    diagonal block
5281eb62cbbSBarry Smith */
529416022c9SBarry Smith static int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
5301eb62cbbSBarry Smith {
531416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
53244cd7ae7SLois Curfman McInnes   if (a->M != a->N)
53344cd7ae7SLois Curfman McInnes     SETERRQ(1,"MatGetDiagonal_MPIAIJ:Supports only square matrix where A->A is diag block");
5345baf8537SBarry Smith   if (a->rstart != a->cstart || a->rend != a->cend) {
5355baf8537SBarry Smith     SETERRQ(1,"MatGetDiagonal_MPIAIJ:row partition must equal col partition");  }
536416022c9SBarry Smith   return MatGetDiagonal(a->A,v);
5371eb62cbbSBarry Smith }
5381eb62cbbSBarry Smith 
539052efed2SBarry Smith static int MatScale_MPIAIJ(Scalar *aa,Mat A)
540052efed2SBarry Smith {
541052efed2SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
542052efed2SBarry Smith   int        ierr;
543052efed2SBarry Smith   ierr = MatScale(aa,a->A); CHKERRQ(ierr);
544052efed2SBarry Smith   ierr = MatScale(aa,a->B); CHKERRQ(ierr);
545052efed2SBarry Smith   return 0;
546052efed2SBarry Smith }
547052efed2SBarry Smith 
54844a69424SLois Curfman McInnes static int MatDestroy_MPIAIJ(PetscObject obj)
5491eb62cbbSBarry Smith {
5501eb62cbbSBarry Smith   Mat        mat = (Mat) obj;
55144a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
5521eb62cbbSBarry Smith   int        ierr;
553a5a9c739SBarry Smith #if defined(PETSC_LOG)
5546e35fa57SLois Curfman McInnes   PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N);
555a5a9c739SBarry Smith #endif
5560452661fSBarry Smith   PetscFree(aij->rowners);
55778b31e54SBarry Smith   ierr = MatDestroy(aij->A); CHKERRQ(ierr);
55878b31e54SBarry Smith   ierr = MatDestroy(aij->B); CHKERRQ(ierr);
5590452661fSBarry Smith   if (aij->colmap) PetscFree(aij->colmap);
5600452661fSBarry Smith   if (aij->garray) PetscFree(aij->garray);
5611eb62cbbSBarry Smith   if (aij->lvec)   VecDestroy(aij->lvec);
562a56f8943SBarry Smith   if (aij->Mvctx)  VecScatterDestroy(aij->Mvctx);
5637a0afa10SBarry Smith   if (aij->rowvalues) PetscFree(aij->rowvalues);
5640452661fSBarry Smith   PetscFree(aij);
565a5a9c739SBarry Smith   PLogObjectDestroy(mat);
5660452661fSBarry Smith   PetscHeaderDestroy(mat);
5671eb62cbbSBarry Smith   return 0;
5681eb62cbbSBarry Smith }
5697857610eSBarry Smith #include "draw.h"
570b16a3bb1SBarry Smith #include "pinclude/pviewer.h"
571ee50ffe9SBarry Smith 
572416022c9SBarry Smith static int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer)
5731eb62cbbSBarry Smith {
574416022c9SBarry Smith   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
575416022c9SBarry Smith   int         ierr;
576416022c9SBarry Smith 
57717699dbbSLois Curfman McInnes   if (aij->size == 1) {
578416022c9SBarry Smith     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
579416022c9SBarry Smith   }
580a523beb4SLois Curfman McInnes   else SETERRQ(1,"MatView_MPIAIJ_Binary:Only uniprocessor output supported");
581416022c9SBarry Smith   return 0;
582416022c9SBarry Smith }
583416022c9SBarry Smith 
584d7e8b826SBarry Smith static int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer)
585416022c9SBarry Smith {
58644a69424SLois Curfman McInnes   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
587dbb450caSBarry Smith   Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data;
588a56f8943SBarry Smith   int         ierr, format,shift = C->indexshift,rank;
589d636dbe3SBarry Smith   FILE        *fd;
59019bcc07fSBarry Smith   ViewerType  vtype;
591416022c9SBarry Smith 
59219bcc07fSBarry Smith   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
59319bcc07fSBarry Smith   if (vtype  == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) {
59490ace30eSBarry Smith     ierr = ViewerGetFormat(viewer,&format);
5950a198c4cSBarry Smith     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
5964e220ebcSLois Curfman McInnes       MatInfo info;
5974e220ebcSLois Curfman McInnes       int     flg;
598a56f8943SBarry Smith       MPI_Comm_rank(mat->comm,&rank);
59990ace30eSBarry Smith       ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
6004e220ebcSLois Curfman McInnes       ierr = MatGetInfo(mat,MAT_LOCAL,&info);
60195e01e2fSLois Curfman McInnes       ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr);
60277c4ece6SBarry Smith       PetscSequentialPhaseBegin(mat->comm,1);
60395e01e2fSLois Curfman McInnes       if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
6044e220ebcSLois Curfman McInnes          rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
60595e01e2fSLois Curfman McInnes       else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
6064e220ebcSLois Curfman McInnes          rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
6074e220ebcSLois Curfman McInnes       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);
6084e220ebcSLois Curfman McInnes       fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);
6094e220ebcSLois Curfman McInnes       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);
6104e220ebcSLois Curfman McInnes       fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);
611a56f8943SBarry Smith       fflush(fd);
61277c4ece6SBarry Smith       PetscSequentialPhaseEnd(mat->comm,1);
613a40aa06bSLois Curfman McInnes       ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr);
614416022c9SBarry Smith       return 0;
615416022c9SBarry Smith     }
6160a198c4cSBarry Smith     else if (format == VIEWER_FORMAT_ASCII_INFO) {
61708480c60SBarry Smith       return 0;
61808480c60SBarry Smith     }
619416022c9SBarry Smith   }
620416022c9SBarry Smith 
62119bcc07fSBarry Smith   if (vtype == DRAW_VIEWER) {
62219bcc07fSBarry Smith     Draw       draw;
62319bcc07fSBarry Smith     PetscTruth isnull;
62419bcc07fSBarry Smith     ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr);
62519bcc07fSBarry Smith     ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0;
62619bcc07fSBarry Smith   }
62719bcc07fSBarry Smith 
62819bcc07fSBarry Smith   if (vtype == ASCII_FILE_VIEWER) {
62990ace30eSBarry Smith     ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
63077c4ece6SBarry Smith     PetscSequentialPhaseBegin(mat->comm,1);
631d13ab20cSBarry Smith     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
63217699dbbSLois Curfman McInnes            aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart,
6331eb62cbbSBarry Smith            aij->cend);
63478b31e54SBarry Smith     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
63578b31e54SBarry Smith     ierr = MatView(aij->B,viewer); CHKERRQ(ierr);
636d13ab20cSBarry Smith     fflush(fd);
63777c4ece6SBarry Smith     PetscSequentialPhaseEnd(mat->comm,1);
638d13ab20cSBarry Smith   }
639416022c9SBarry Smith   else {
640a56f8943SBarry Smith     int size = aij->size;
641a56f8943SBarry Smith     rank = aij->rank;
64217699dbbSLois Curfman McInnes     if (size == 1) {
64378b31e54SBarry Smith       ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
64495373324SBarry Smith     }
64595373324SBarry Smith     else {
64695373324SBarry Smith       /* assemble the entire matrix onto first processor. */
64795373324SBarry Smith       Mat         A;
648ec8511deSBarry Smith       Mat_SeqAIJ *Aloc;
6492eb8c8abSBarry Smith       int         M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct;
65095373324SBarry Smith       Scalar      *a;
6512ee70a88SLois Curfman McInnes 
65217699dbbSLois Curfman McInnes       if (!rank) {
653b4fd4287SBarry Smith         ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
654c451ab25SLois Curfman McInnes                CHKERRQ(ierr);
65595373324SBarry Smith       }
65695373324SBarry Smith       else {
657b4fd4287SBarry Smith         ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
658c451ab25SLois Curfman McInnes                CHKERRQ(ierr);
65995373324SBarry Smith       }
660464493b3SBarry Smith       PLogObjectParent(mat,A);
661416022c9SBarry Smith 
66295373324SBarry Smith       /* copy over the A part */
663ec8511deSBarry Smith       Aloc = (Mat_SeqAIJ*) aij->A->data;
6642ee70a88SLois Curfman McInnes       m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
66595373324SBarry Smith       row = aij->rstart;
666dbb450caSBarry Smith       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;}
66795373324SBarry Smith       for ( i=0; i<m; i++ ) {
668416022c9SBarry Smith         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
66995373324SBarry Smith         row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
67095373324SBarry Smith       }
6712ee70a88SLois Curfman McInnes       aj = Aloc->j;
672dbb450caSBarry Smith       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;}
67395373324SBarry Smith 
67495373324SBarry Smith       /* copy over the B part */
675ec8511deSBarry Smith       Aloc = (Mat_SeqAIJ*) aij->B->data;
6762ee70a88SLois Curfman McInnes       m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
67795373324SBarry Smith       row = aij->rstart;
6780452661fSBarry Smith       ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols);
679dbb450caSBarry Smith       for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];}
68095373324SBarry Smith       for ( i=0; i<m; i++ ) {
681416022c9SBarry Smith         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
68295373324SBarry Smith         row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
68395373324SBarry Smith       }
6840452661fSBarry Smith       PetscFree(ct);
6856d4a8577SBarry Smith       ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
6866d4a8577SBarry Smith       ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
68717699dbbSLois Curfman McInnes       if (!rank) {
68878b31e54SBarry Smith         ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
68995373324SBarry Smith       }
69078b31e54SBarry Smith       ierr = MatDestroy(A); CHKERRQ(ierr);
69195373324SBarry Smith     }
69295373324SBarry Smith   }
6931eb62cbbSBarry Smith   return 0;
6941eb62cbbSBarry Smith }
6951eb62cbbSBarry Smith 
696416022c9SBarry Smith static int MatView_MPIAIJ(PetscObject obj,Viewer viewer)
697416022c9SBarry Smith {
698416022c9SBarry Smith   Mat         mat = (Mat) obj;
699416022c9SBarry Smith   int         ierr;
70019bcc07fSBarry Smith   ViewerType  vtype;
701416022c9SBarry Smith 
70219bcc07fSBarry Smith   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
70319bcc07fSBarry Smith   if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER ||
70419bcc07fSBarry Smith       vtype == DRAW_VIEWER       || vtype == MATLAB_VIEWER) {
705d7e8b826SBarry Smith     ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
706416022c9SBarry Smith   }
70719bcc07fSBarry Smith   else if (vtype == BINARY_FILE_VIEWER) {
708416022c9SBarry Smith     return MatView_MPIAIJ_Binary(mat,viewer);
709416022c9SBarry Smith   }
710416022c9SBarry Smith   return 0;
711416022c9SBarry Smith }
712416022c9SBarry Smith 
7131eb62cbbSBarry Smith /*
7141eb62cbbSBarry Smith     This has to provide several versions.
7151eb62cbbSBarry Smith 
7161eb62cbbSBarry Smith      2) a) use only local smoothing updating outer values only once.
7171eb62cbbSBarry Smith         b) local smoothing updating outer values each inner iteration
718d6dfbf8fSBarry Smith      3) color updating out values betwen colors.
7191eb62cbbSBarry Smith */
720fc76ce87SLois Curfman McInnes static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag,
721dbb450caSBarry Smith                            double fshift,int its,Vec xx)
7228a729477SBarry Smith {
72344a69424SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
724d6dfbf8fSBarry Smith   Mat        AA = mat->A, BB = mat->B;
725ec8511deSBarry Smith   Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data;
726c16cb8f2SBarry Smith   Scalar     *b,*x,*xs,*ls,d,*v,sum;
7276abc6512SBarry Smith   int        ierr,*idx, *diag;
728416022c9SBarry Smith   int        n = mat->n, m = mat->m, i,shift = A->indexshift;
7298a729477SBarry Smith 
730d6dfbf8fSBarry Smith   VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls);
731dbb450caSBarry Smith   xs = x + shift; /* shift by one for index start of 1 */
732dbb450caSBarry Smith   ls = ls + shift;
733ec8511deSBarry Smith   if (!A->diag) {if ((ierr = MatMarkDiag_SeqAIJ(AA))) return ierr;}
734d6dfbf8fSBarry Smith   diag = A->diag;
735c16cb8f2SBarry Smith   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
736da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
73738597bd4SSatish Balay       return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx);
738da3a660dSBarry Smith     }
73964119d58SLois Curfman McInnes     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
74078b31e54SBarry Smith     CHKERRQ(ierr);
74164119d58SLois Curfman McInnes     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
74278b31e54SBarry Smith     CHKERRQ(ierr);
743d6dfbf8fSBarry Smith     while (its--) {
744d6dfbf8fSBarry Smith       /* go down through the rows */
745d6dfbf8fSBarry Smith       for ( i=0; i<m; i++ ) {
746d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
747dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
748dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
749d6dfbf8fSBarry Smith         sum  = b[i];
750d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
751dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
752d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
753dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
754dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
755d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
75655a1b374SBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
757d6dfbf8fSBarry Smith       }
758d6dfbf8fSBarry Smith       /* come up through the rows */
759d6dfbf8fSBarry Smith       for ( i=m-1; i>-1; i-- ) {
760d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
761dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
762dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
763d6dfbf8fSBarry Smith         sum  = b[i];
764d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
765dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
766d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
767dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
768dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
769d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
77055a1b374SBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
771d6dfbf8fSBarry Smith       }
772d6dfbf8fSBarry Smith     }
773d6dfbf8fSBarry Smith   }
774d6dfbf8fSBarry Smith   else if (flag & SOR_LOCAL_FORWARD_SWEEP){
775da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
77638597bd4SSatish Balay       return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx);
777da3a660dSBarry Smith     }
77864119d58SLois Curfman McInnes     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
77978b31e54SBarry Smith     CHKERRQ(ierr);
78064119d58SLois Curfman McInnes     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
78178b31e54SBarry Smith     CHKERRQ(ierr);
782d6dfbf8fSBarry Smith     while (its--) {
783d6dfbf8fSBarry Smith       for ( i=0; i<m; i++ ) {
784d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
785dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
786dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
787d6dfbf8fSBarry Smith         sum  = b[i];
788d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
789dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
790d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
791dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
792dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
793d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
79455a1b374SBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
795d6dfbf8fSBarry Smith       }
796d6dfbf8fSBarry Smith     }
797d6dfbf8fSBarry Smith   }
798d6dfbf8fSBarry Smith   else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
799da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
80038597bd4SSatish Balay       return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx);
801da3a660dSBarry Smith     }
80264119d58SLois Curfman McInnes     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,
80378b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
80464119d58SLois Curfman McInnes     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,
80578b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
806d6dfbf8fSBarry Smith     while (its--) {
807d6dfbf8fSBarry Smith       for ( i=m-1; i>-1; i-- ) {
808d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
809dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
810dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
811d6dfbf8fSBarry Smith         sum  = b[i];
812d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
813dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
814d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
815dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
816dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
817d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
81855a1b374SBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
819d6dfbf8fSBarry Smith       }
820d6dfbf8fSBarry Smith     }
821d6dfbf8fSBarry Smith   }
822c16cb8f2SBarry Smith   else {
823c16cb8f2SBarry Smith     SETERRQ(1,"MatRelax_MPIAIJ:Option not supported");
824c16cb8f2SBarry Smith   }
8258a729477SBarry Smith   return 0;
8268a729477SBarry Smith }
827a66be287SLois Curfman McInnes 
8284e220ebcSLois Curfman McInnes static int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
829a66be287SLois Curfman McInnes {
830a66be287SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
831a66be287SLois Curfman McInnes   Mat        A = mat->A, B = mat->B;
8324e220ebcSLois Curfman McInnes   int        ierr;
8334e220ebcSLois Curfman McInnes   double     isend[5], irecv[5];
834a66be287SLois Curfman McInnes 
8354e220ebcSLois Curfman McInnes   info->rows_global    = (double)mat->M;
8364e220ebcSLois Curfman McInnes   info->columns_global = (double)mat->N;
8374e220ebcSLois Curfman McInnes   info->rows_local     = (double)mat->m;
8384e220ebcSLois Curfman McInnes   info->columns_local  = (double)mat->N;
8394e220ebcSLois Curfman McInnes   info->block_size     = 1.0;
8404e220ebcSLois Curfman McInnes   ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr);
8414e220ebcSLois Curfman McInnes   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
8424e220ebcSLois Curfman McInnes   isend[3] = info->memory;  isend[4] = info->mallocs;
8434e220ebcSLois Curfman McInnes   ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr);
8444e220ebcSLois Curfman McInnes   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
8454e220ebcSLois Curfman McInnes   isend[3] += info->memory;  isend[4] += info->mallocs;
846a66be287SLois Curfman McInnes   if (flag == MAT_LOCAL) {
8474e220ebcSLois Curfman McInnes     info->nz_used      = isend[0];
8484e220ebcSLois Curfman McInnes     info->nz_allocated = isend[1];
8494e220ebcSLois Curfman McInnes     info->nz_unneeded  = isend[2];
8504e220ebcSLois Curfman McInnes     info->memory       = isend[3];
8514e220ebcSLois Curfman McInnes     info->mallocs      = isend[4];
852a66be287SLois Curfman McInnes   } else if (flag == MAT_GLOBAL_MAX) {
8534e220ebcSLois Curfman McInnes     MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);
8544e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
8554e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
8564e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
8574e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
8584e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
859a66be287SLois Curfman McInnes   } else if (flag == MAT_GLOBAL_SUM) {
8604e220ebcSLois Curfman McInnes     MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);
8614e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
8624e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
8634e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
8644e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
8654e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
866a66be287SLois Curfman McInnes   }
8674e220ebcSLois Curfman McInnes   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
8684e220ebcSLois Curfman McInnes   info->fill_ratio_needed = 0;
8694e220ebcSLois Curfman McInnes   info->factor_mallocs    = 0;
8704e220ebcSLois Curfman McInnes 
871a66be287SLois Curfman McInnes   return 0;
872a66be287SLois Curfman McInnes }
873a66be287SLois Curfman McInnes 
874299609e3SLois Curfman McInnes extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*);
875299609e3SLois Curfman McInnes extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*);
876299609e3SLois Curfman McInnes extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double);
877299609e3SLois Curfman McInnes extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *);
878299609e3SLois Curfman McInnes extern int MatSolve_MPIAIJ(Mat,Vec,Vec);
879299609e3SLois Curfman McInnes extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec);
880299609e3SLois Curfman McInnes extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec);
881299609e3SLois Curfman McInnes extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec);
882299609e3SLois Curfman McInnes 
883416022c9SBarry Smith static int MatSetOption_MPIAIJ(Mat A,MatOption op)
884c74985f6SBarry Smith {
885c0bbcb79SLois Curfman McInnes   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
886c74985f6SBarry Smith 
8876d4a8577SBarry Smith   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
8886d4a8577SBarry Smith       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
8896d4a8577SBarry Smith       op == MAT_COLUMNS_SORTED ||
8906d4a8577SBarry Smith       op == MAT_ROW_ORIENTED) {
891c0bbcb79SLois Curfman McInnes         MatSetOption(a->A,op);
892c0bbcb79SLois Curfman McInnes         MatSetOption(a->B,op);
893c74985f6SBarry Smith   }
8946d4a8577SBarry Smith   else if (op == MAT_ROWS_SORTED ||
8956d4a8577SBarry Smith            op == MAT_SYMMETRIC ||
8966d4a8577SBarry Smith            op == MAT_STRUCTURALLY_SYMMETRIC ||
8976d4a8577SBarry Smith            op == MAT_YES_NEW_DIAGONALS)
89894a424c1SBarry Smith     PLogInfo(A,"Info:MatSetOption_MPIAIJ:Option ignored\n");
8996d4a8577SBarry Smith   else if (op == MAT_COLUMN_ORIENTED) {
9004b0e389bSBarry Smith     a->roworiented = 0;
9014b0e389bSBarry Smith     MatSetOption(a->A,op);
9024b0e389bSBarry Smith     MatSetOption(a->B,op);
9034b0e389bSBarry Smith   }
9046d4a8577SBarry Smith   else if (op == MAT_NO_NEW_DIAGONALS)
9056d4a8577SBarry Smith     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:MAT_NO_NEW_DIAGONALS");}
906c0bbcb79SLois Curfman McInnes   else
9074d39ef84SLois Curfman McInnes     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:unknown option");}
908c74985f6SBarry Smith   return 0;
909c74985f6SBarry Smith }
910c74985f6SBarry Smith 
911d1710a03SLois Curfman McInnes static int MatGetSize_MPIAIJ(Mat matin,int *m,int *n)
912c74985f6SBarry Smith {
91344a69424SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
914c74985f6SBarry Smith   *m = mat->M; *n = mat->N;
915c74985f6SBarry Smith   return 0;
916c74985f6SBarry Smith }
917c74985f6SBarry Smith 
918d1710a03SLois Curfman McInnes static int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n)
919c74985f6SBarry Smith {
92044a69424SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
921b7c46309SBarry Smith   *m = mat->m; *n = mat->N;
922c74985f6SBarry Smith   return 0;
923c74985f6SBarry Smith }
924c74985f6SBarry Smith 
925d1710a03SLois Curfman McInnes static int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n)
926c74985f6SBarry Smith {
92744a69424SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
928c74985f6SBarry Smith   *m = mat->rstart; *n = mat->rend;
929c74985f6SBarry Smith   return 0;
930c74985f6SBarry Smith }
931c74985f6SBarry Smith 
9326d84be18SBarry Smith extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
9336d84be18SBarry Smith extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
9346d84be18SBarry Smith 
9356d84be18SBarry Smith int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
93639e00950SLois Curfman McInnes {
937154123eaSLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
93870f0671dSBarry Smith   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
939154123eaSLois Curfman McInnes   int        i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart;
940154123eaSLois Curfman McInnes   int        nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend;
94170f0671dSBarry Smith   int        *cmap, *idx_p;
94239e00950SLois Curfman McInnes 
9437a0afa10SBarry Smith   if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,"MatGetRow_MPIAIJ:Already active");
9447a0afa10SBarry Smith   mat->getrowactive = PETSC_TRUE;
9457a0afa10SBarry Smith 
94670f0671dSBarry Smith   if (!mat->rowvalues && (idx || v)) {
9477a0afa10SBarry Smith     /*
9487a0afa10SBarry Smith         allocate enough space to hold information from the longest row.
9497a0afa10SBarry Smith     */
9507a0afa10SBarry Smith     Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data;
951c16cb8f2SBarry Smith     int     max = 1,m = mat->m,tmp;
952c16cb8f2SBarry Smith     for ( i=0; i<m; i++ ) {
9537a0afa10SBarry Smith       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
9547a0afa10SBarry Smith       if (max < tmp) { max = tmp; }
9557a0afa10SBarry Smith     }
9567a0afa10SBarry Smith     mat->rowvalues = (Scalar *) PetscMalloc( max*(sizeof(int)+sizeof(Scalar)));
9577a0afa10SBarry Smith     CHKPTRQ(mat->rowvalues);
9587a0afa10SBarry Smith     mat->rowindices = (int *) (mat->rowvalues + max);
9597a0afa10SBarry Smith   }
9607a0afa10SBarry Smith 
961b49de8d1SLois Curfman McInnes   if (row < rstart || row >= rend) SETERRQ(1,"MatGetRow_MPIAIJ:Only local rows")
962abc0e9e4SLois Curfman McInnes   lrow = row - rstart;
96339e00950SLois Curfman McInnes 
964154123eaSLois Curfman McInnes   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
965154123eaSLois Curfman McInnes   if (!v)   {pvA = 0; pvB = 0;}
966154123eaSLois Curfman McInnes   if (!idx) {pcA = 0; if (!v) pcB = 0;}
96738597bd4SSatish Balay   ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
96838597bd4SSatish Balay   ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
969154123eaSLois Curfman McInnes   nztot = nzA + nzB;
970154123eaSLois Curfman McInnes 
97170f0671dSBarry Smith   cmap  = mat->garray;
972154123eaSLois Curfman McInnes   if (v  || idx) {
973154123eaSLois Curfman McInnes     if (nztot) {
974154123eaSLois Curfman McInnes       /* Sort by increasing column numbers, assuming A and B already sorted */
97570f0671dSBarry Smith       int imark = -1;
976154123eaSLois Curfman McInnes       if (v) {
97770f0671dSBarry Smith         *v = v_p = mat->rowvalues;
97839e00950SLois Curfman McInnes         for ( i=0; i<nzB; i++ ) {
97970f0671dSBarry Smith           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
980154123eaSLois Curfman McInnes           else break;
981154123eaSLois Curfman McInnes         }
982154123eaSLois Curfman McInnes         imark = i;
98370f0671dSBarry Smith         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
98470f0671dSBarry Smith         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
985154123eaSLois Curfman McInnes       }
986154123eaSLois Curfman McInnes       if (idx) {
98770f0671dSBarry Smith         *idx = idx_p = mat->rowindices;
98870f0671dSBarry Smith         if (imark > -1) {
98970f0671dSBarry Smith           for ( i=0; i<imark; i++ ) {
99070f0671dSBarry Smith             idx_p[i] = cmap[cworkB[i]];
99170f0671dSBarry Smith           }
99270f0671dSBarry Smith         } else {
993154123eaSLois Curfman McInnes           for ( i=0; i<nzB; i++ ) {
99470f0671dSBarry Smith             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
995154123eaSLois Curfman McInnes             else break;
996154123eaSLois Curfman McInnes           }
997154123eaSLois Curfman McInnes           imark = i;
99870f0671dSBarry Smith         }
99970f0671dSBarry Smith         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart + cworkA[i];
100070f0671dSBarry Smith         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]];
100139e00950SLois Curfman McInnes       }
100239e00950SLois Curfman McInnes     }
10031ca473b0SSatish Balay     else {
10041ca473b0SSatish Balay       if (idx) *idx = 0;
10051ca473b0SSatish Balay       if (v)   *v   = 0;
10061ca473b0SSatish Balay     }
1007154123eaSLois Curfman McInnes   }
100839e00950SLois Curfman McInnes   *nz = nztot;
100938597bd4SSatish Balay   ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
101038597bd4SSatish Balay   ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
101139e00950SLois Curfman McInnes   return 0;
101239e00950SLois Curfman McInnes }
101339e00950SLois Curfman McInnes 
10146d84be18SBarry Smith int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
101539e00950SLois Curfman McInnes {
10167a0afa10SBarry Smith   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
10177a0afa10SBarry Smith   if (aij->getrowactive == PETSC_FALSE) {
10187a0afa10SBarry Smith     SETERRQ(1,"MatRestoreRow_MPIAIJ:MatGetRow not called");
10197a0afa10SBarry Smith   }
10207a0afa10SBarry Smith   aij->getrowactive = PETSC_FALSE;
102139e00950SLois Curfman McInnes   return 0;
102239e00950SLois Curfman McInnes }
102339e00950SLois Curfman McInnes 
1024cddf8d76SBarry Smith static int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm)
1025855ac2c5SLois Curfman McInnes {
1026855ac2c5SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1027ec8511deSBarry Smith   Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data;
1028416022c9SBarry Smith   int        ierr, i, j, cstart = aij->cstart,shift = amat->indexshift;
1029416022c9SBarry Smith   double     sum = 0.0;
103004ca555eSLois Curfman McInnes   Scalar     *v;
103104ca555eSLois Curfman McInnes 
103217699dbbSLois Curfman McInnes   if (aij->size == 1) {
103314183eadSLois Curfman McInnes     ierr =  MatNorm(aij->A,type,norm); CHKERRQ(ierr);
103437fa93a5SLois Curfman McInnes   } else {
103504ca555eSLois Curfman McInnes     if (type == NORM_FROBENIUS) {
103604ca555eSLois Curfman McInnes       v = amat->a;
103704ca555eSLois Curfman McInnes       for (i=0; i<amat->nz; i++ ) {
103804ca555eSLois Curfman McInnes #if defined(PETSC_COMPLEX)
103904ca555eSLois Curfman McInnes         sum += real(conj(*v)*(*v)); v++;
104004ca555eSLois Curfman McInnes #else
104104ca555eSLois Curfman McInnes         sum += (*v)*(*v); v++;
104204ca555eSLois Curfman McInnes #endif
104304ca555eSLois Curfman McInnes       }
104404ca555eSLois Curfman McInnes       v = bmat->a;
104504ca555eSLois Curfman McInnes       for (i=0; i<bmat->nz; i++ ) {
104604ca555eSLois Curfman McInnes #if defined(PETSC_COMPLEX)
104704ca555eSLois Curfman McInnes         sum += real(conj(*v)*(*v)); v++;
104804ca555eSLois Curfman McInnes #else
104904ca555eSLois Curfman McInnes         sum += (*v)*(*v); v++;
105004ca555eSLois Curfman McInnes #endif
105104ca555eSLois Curfman McInnes       }
10526d84be18SBarry Smith       MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);
105304ca555eSLois Curfman McInnes       *norm = sqrt(*norm);
105404ca555eSLois Curfman McInnes     }
105504ca555eSLois Curfman McInnes     else if (type == NORM_1) { /* max column norm */
105604ca555eSLois Curfman McInnes       double *tmp, *tmp2;
105704ca555eSLois Curfman McInnes       int    *jj, *garray = aij->garray;
10580452661fSBarry Smith       tmp  = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp);
10590452661fSBarry Smith       tmp2 = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp2);
1060cddf8d76SBarry Smith       PetscMemzero(tmp,aij->N*sizeof(double));
106104ca555eSLois Curfman McInnes       *norm = 0.0;
106204ca555eSLois Curfman McInnes       v = amat->a; jj = amat->j;
106304ca555eSLois Curfman McInnes       for ( j=0; j<amat->nz; j++ ) {
1064579c6b6fSBarry Smith         tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v);  v++;
106504ca555eSLois Curfman McInnes       }
106604ca555eSLois Curfman McInnes       v = bmat->a; jj = bmat->j;
106704ca555eSLois Curfman McInnes       for ( j=0; j<bmat->nz; j++ ) {
1068579c6b6fSBarry Smith         tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
106904ca555eSLois Curfman McInnes       }
10706d84be18SBarry Smith       MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);
107104ca555eSLois Curfman McInnes       for ( j=0; j<aij->N; j++ ) {
107204ca555eSLois Curfman McInnes         if (tmp2[j] > *norm) *norm = tmp2[j];
107304ca555eSLois Curfman McInnes       }
10740452661fSBarry Smith       PetscFree(tmp); PetscFree(tmp2);
107504ca555eSLois Curfman McInnes     }
107604ca555eSLois Curfman McInnes     else if (type == NORM_INFINITY) { /* max row norm */
1077515d9167SLois Curfman McInnes       double ntemp = 0.0;
107804ca555eSLois Curfman McInnes       for ( j=0; j<amat->m; j++ ) {
1079dbb450caSBarry Smith         v = amat->a + amat->i[j] + shift;
108004ca555eSLois Curfman McInnes         sum = 0.0;
108104ca555eSLois Curfman McInnes         for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) {
1082cddf8d76SBarry Smith           sum += PetscAbsScalar(*v); v++;
108304ca555eSLois Curfman McInnes         }
1084dbb450caSBarry Smith         v = bmat->a + bmat->i[j] + shift;
108504ca555eSLois Curfman McInnes         for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) {
1086cddf8d76SBarry Smith           sum += PetscAbsScalar(*v); v++;
108704ca555eSLois Curfman McInnes         }
1088515d9167SLois Curfman McInnes         if (sum > ntemp) ntemp = sum;
108904ca555eSLois Curfman McInnes       }
10906d84be18SBarry Smith       MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);
109104ca555eSLois Curfman McInnes     }
109204ca555eSLois Curfman McInnes     else {
1093515d9167SLois Curfman McInnes       SETERRQ(1,"MatNorm_MPIAIJ:No support for two norm");
109404ca555eSLois Curfman McInnes     }
109537fa93a5SLois Curfman McInnes   }
1096855ac2c5SLois Curfman McInnes   return 0;
1097855ac2c5SLois Curfman McInnes }
1098855ac2c5SLois Curfman McInnes 
10990de55854SLois Curfman McInnes static int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1100b7c46309SBarry Smith {
1101b7c46309SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1102dbb450caSBarry Smith   Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data;
1103416022c9SBarry Smith   int        ierr,shift = Aloc->indexshift;
1104416022c9SBarry Smith   Mat        B;
1105b7c46309SBarry Smith   int        M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct;
1106b7c46309SBarry Smith   Scalar     *array;
1107b7c46309SBarry Smith 
11083638b69dSLois Curfman McInnes   if (matout == PETSC_NULL && M != N)
11093638b69dSLois Curfman McInnes     SETERRQ(1,"MatTranspose_MPIAIJ:Square matrix only for in-place");
1110b4fd4287SBarry Smith   ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0,
1111b4fd4287SBarry Smith          PETSC_NULL,&B); CHKERRQ(ierr);
1112b7c46309SBarry Smith 
1113b7c46309SBarry Smith   /* copy over the A part */
1114ec8511deSBarry Smith   Aloc = (Mat_SeqAIJ*) a->A->data;
1115b7c46309SBarry Smith   m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1116b7c46309SBarry Smith   row = a->rstart;
1117dbb450caSBarry Smith   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;}
1118b7c46309SBarry Smith   for ( i=0; i<m; i++ ) {
1119416022c9SBarry Smith     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1120b7c46309SBarry Smith     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1121b7c46309SBarry Smith   }
1122b7c46309SBarry Smith   aj = Aloc->j;
11234af08d9eSBarry Smith   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;}
1124b7c46309SBarry Smith 
1125b7c46309SBarry Smith   /* copy over the B part */
1126ec8511deSBarry Smith   Aloc = (Mat_SeqAIJ*) a->B->data;
1127b7c46309SBarry Smith   m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1128b7c46309SBarry Smith   row = a->rstart;
11290452661fSBarry Smith   ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols);
1130dbb450caSBarry Smith   for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];}
1131b7c46309SBarry Smith   for ( i=0; i<m; i++ ) {
1132416022c9SBarry Smith     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1133b7c46309SBarry Smith     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1134b7c46309SBarry Smith   }
11350452661fSBarry Smith   PetscFree(ct);
11366d4a8577SBarry Smith   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
11376d4a8577SBarry Smith   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
11383638b69dSLois Curfman McInnes   if (matout != PETSC_NULL) {
11390de55854SLois Curfman McInnes     *matout = B;
11400de55854SLois Curfman McInnes   } else {
11410de55854SLois Curfman McInnes     /* This isn't really an in-place transpose .... but free data structures from a */
11420452661fSBarry Smith     PetscFree(a->rowners);
11430de55854SLois Curfman McInnes     ierr = MatDestroy(a->A); CHKERRQ(ierr);
11440de55854SLois Curfman McInnes     ierr = MatDestroy(a->B); CHKERRQ(ierr);
11450452661fSBarry Smith     if (a->colmap) PetscFree(a->colmap);
11460452661fSBarry Smith     if (a->garray) PetscFree(a->garray);
11470de55854SLois Curfman McInnes     if (a->lvec) VecDestroy(a->lvec);
1148a56f8943SBarry Smith     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
11490452661fSBarry Smith     PetscFree(a);
1150416022c9SBarry Smith     PetscMemcpy(A,B,sizeof(struct _Mat));
11510452661fSBarry Smith     PetscHeaderDestroy(B);
11520de55854SLois Curfman McInnes   }
1153b7c46309SBarry Smith   return 0;
1154b7c46309SBarry Smith }
1155b7c46309SBarry Smith 
11564b967eb1SSatish Balay int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1157a008b906SSatish Balay {
11584b967eb1SSatish Balay   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
11594b967eb1SSatish Balay   Mat a = aij->A, b = aij->B;
1160a008b906SSatish Balay   int ierr,s1,s2,s3;
1161a008b906SSatish Balay 
11624b967eb1SSatish Balay   ierr = MatGetLocalSize(mat,&s2,&s3); CHKERRQ(ierr);
11634b967eb1SSatish Balay   if (rr) {
11644b967eb1SSatish Balay     s3 = aij->n;
11654b967eb1SSatish Balay     VecGetLocalSize_Fast(rr,s1);
11664b967eb1SSatish Balay     if (s1!=s3) SETERRQ(1,"MatDiagonalScale: right vector non-conforming local size");
11674b967eb1SSatish Balay     /* Overlap communication with computation. */
11684b967eb1SSatish Balay     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_ALL,aij->Mvctx); CHKERRQ(ierr);
1169a008b906SSatish Balay   }
11704b967eb1SSatish Balay   if (ll) {
11714b967eb1SSatish Balay     VecGetLocalSize_Fast(ll,s1);
11724b967eb1SSatish Balay     if (s1!=s2) SETERRQ(1,"MatDiagonalScale: left vector non-conforming local size");
1173*c351683dSSatish Balay     ierr = (*b->ops.diagonalscale)(b,ll,0); CHKERRQ(ierr);
11744b967eb1SSatish Balay   }
11754b967eb1SSatish Balay   /* scale  the diagonal block */
1176*c351683dSSatish Balay   ierr = (*a->ops.diagonalscale)(a,ll,rr); CHKERRQ(ierr);
11774b967eb1SSatish Balay 
11784b967eb1SSatish Balay   if (rr) {
11794b967eb1SSatish Balay     /* Do a scatter end and then right scale the off-diagonal block */
11804b967eb1SSatish Balay     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_ALL,aij->Mvctx); CHKERRQ(ierr);
1181*c351683dSSatish Balay     ierr = (*b->ops.diagonalscale)(b,0,aij->lvec); CHKERRQ(ierr);
11824b967eb1SSatish Balay   }
11834b967eb1SSatish Balay 
1184a008b906SSatish Balay   return 0;
1185a008b906SSatish Balay }
1186a008b906SSatish Balay 
1187a008b906SSatish Balay 
1188682d7d0cSBarry Smith extern int MatPrintHelp_SeqAIJ(Mat);
1189682d7d0cSBarry Smith static int MatPrintHelp_MPIAIJ(Mat A)
1190682d7d0cSBarry Smith {
1191682d7d0cSBarry Smith   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1192682d7d0cSBarry Smith 
1193682d7d0cSBarry Smith   if (!a->rank) return MatPrintHelp_SeqAIJ(a->A);
1194682d7d0cSBarry Smith   else return 0;
1195682d7d0cSBarry Smith }
1196682d7d0cSBarry Smith 
11975a838052SSatish Balay static int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
11985a838052SSatish Balay {
11995a838052SSatish Balay   *bs = 1;
12005a838052SSatish Balay   return 0;
12015a838052SSatish Balay }
12025a838052SSatish Balay 
1203fc76ce87SLois Curfman McInnes extern int MatConvert_MPIAIJ(Mat,MatType,Mat *);
12043d1612f7SBarry Smith static int MatConvertSameType_MPIAIJ(Mat,Mat *,int);
12052f86bd48SSatish Balay extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int);
12060a198c4cSBarry Smith extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring);
12070a198c4cSBarry Smith extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatGetSubMatrixCall,Mat **);
12088a729477SBarry Smith /* -------------------------------------------------------------------*/
12092ee70a88SLois Curfman McInnes static struct _MatOps MatOps = {MatSetValues_MPIAIJ,
121039e00950SLois Curfman McInnes        MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ,
121144a69424SLois Curfman McInnes        MatMult_MPIAIJ,MatMultAdd_MPIAIJ,
121244a69424SLois Curfman McInnes        MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ,
121336ce4990SBarry Smith        0,0,
121436ce4990SBarry Smith        0,0,
121536ce4990SBarry Smith        0,0,
121644a69424SLois Curfman McInnes        MatRelax_MPIAIJ,
1217b7c46309SBarry Smith        MatTranspose_MPIAIJ,
1218a66be287SLois Curfman McInnes        MatGetInfo_MPIAIJ,0,
1219a008b906SSatish Balay        MatGetDiagonal_MPIAIJ,MatDiagonalScale_MPIAIJ,MatNorm_MPIAIJ,
1220ee50ffe9SBarry Smith        MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ,
12211eb62cbbSBarry Smith        0,
1222299609e3SLois Curfman McInnes        MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ,
122336ce4990SBarry Smith        0,0,0,0,
1224d1710a03SLois Curfman McInnes        MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ,
122536ce4990SBarry Smith        0,0,
12263e85bedfSBarry Smith        0,0,MatConvert_MPIAIJ,MatConvertSameType_MPIAIJ,0,0,
1227b49de8d1SLois Curfman McInnes        0,0,0,
1228598137ffSSatish Balay        MatGetSubMatrices_MPIAIJ,MatIncreaseOverlap_MPIAIJ,MatGetValues_MPIAIJ,0,
1229052efed2SBarry Smith        MatPrintHelp_MPIAIJ,
12303b2fbd54SBarry Smith        MatScale_MPIAIJ,0,0,0,
12310a198c4cSBarry Smith        MatGetBlockSize_MPIAIJ,0,0,0,0,
12320a198c4cSBarry Smith        MatFDColoringCreate_MPIAIJ};
123336ce4990SBarry Smith 
12348a729477SBarry Smith 
12351987afe7SBarry Smith /*@C
1236ff756334SLois Curfman McInnes    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
12373a511b96SLois Curfman McInnes    (the default parallel PETSc format).  For good matrix assembly performance
12383a511b96SLois Curfman McInnes    the user should preallocate the matrix storage by setting the parameters
12393a511b96SLois Curfman McInnes    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
12403a511b96SLois Curfman McInnes    performance can be increased by more than a factor of 50.
12418a729477SBarry Smith 
12428a729477SBarry Smith    Input Parameters:
12431eb62cbbSBarry Smith .  comm - MPI communicator
12447d3e4905SLois Curfman McInnes .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
124592e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
124692e8d321SLois Curfman McInnes            y vector for the matrix-vector product y = Ax.
124792e8d321SLois Curfman McInnes .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
124892e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
124992e8d321SLois Curfman McInnes            x vector for the matrix-vector product y = Ax.
12507d3e4905SLois Curfman McInnes .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
125192e8d321SLois Curfman McInnes .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1252ab693e5aSLois Curfman McInnes .  d_nz - number of nonzeros per row in diagonal portion of local submatrix
1253ff756334SLois Curfman McInnes            (same for all local rows)
12542bd5e0b2SLois Curfman McInnes .  d_nzz - array containing the number of nonzeros in the various rows of the
125592e8d321SLois Curfman McInnes            diagonal portion of the local submatrix (possibly different for each row)
12562bd5e0b2SLois Curfman McInnes            or PETSC_NULL. You must leave room for the diagonal entry even if
12572bd5e0b2SLois Curfman McInnes            it is zero.
12582bd5e0b2SLois Curfman McInnes .  o_nz - number of nonzeros per row in the off-diagonal portion of local
1259ab693e5aSLois Curfman McInnes            submatrix (same for all local rows).
12602bd5e0b2SLois Curfman McInnes .  o_nzz - array containing the number of nonzeros in the various rows of the
12612bd5e0b2SLois Curfman McInnes            off-diagonal portion of the local submatrix (possibly different for
12622bd5e0b2SLois Curfman McInnes            each row) or PETSC_NULL.
12638a729477SBarry Smith 
1264ff756334SLois Curfman McInnes    Output Parameter:
126544cd7ae7SLois Curfman McInnes .  A - the matrix
12668a729477SBarry Smith 
1267ff756334SLois Curfman McInnes    Notes:
1268ff756334SLois Curfman McInnes    The AIJ format (also called the Yale sparse matrix format or
1269ff756334SLois Curfman McInnes    compressed row storage), is fully compatible with standard Fortran 77
12700002213bSLois Curfman McInnes    storage.  That is, the stored row and column indices can begin at
12710002213bSLois Curfman McInnes    either one (as in Fortran) or zero.  See the users manual for details.
1272ff756334SLois Curfman McInnes 
1273ff756334SLois Curfman McInnes    The user MUST specify either the local or global matrix dimensions
1274ff756334SLois Curfman McInnes    (possibly both).
1275ff756334SLois Curfman McInnes 
12765511cfe3SLois Curfman McInnes    By default, this format uses inodes (identical nodes) when possible.
12775511cfe3SLois Curfman McInnes    We search for consecutive rows with the same nonzero structure, thereby
12785511cfe3SLois Curfman McInnes    reusing matrix information to achieve increased efficiency.
12795511cfe3SLois Curfman McInnes 
12805511cfe3SLois Curfman McInnes    Options Database Keys:
12815511cfe3SLois Curfman McInnes $    -mat_aij_no_inode  - Do not use inodes
12826e62573dSLois Curfman McInnes $    -mat_aij_inode_limit <limit> - Set inode limit.
12836e62573dSLois Curfman McInnes $        (max limit=5)
12846e62573dSLois Curfman McInnes $    -mat_aij_oneindex - Internally use indexing starting at 1
12856e62573dSLois Curfman McInnes $        rather than 0.  Note: When calling MatSetValues(),
12866e62573dSLois Curfman McInnes $        the user still MUST index entries starting at 0!
12875511cfe3SLois Curfman McInnes 
1288e0245417SLois Curfman McInnes    Storage Information:
1289e0245417SLois Curfman McInnes    For a square global matrix we define each processor's diagonal portion
1290e0245417SLois Curfman McInnes    to be its local rows and the corresponding columns (a square submatrix);
1291e0245417SLois Curfman McInnes    each processor's off-diagonal portion encompasses the remainder of the
1292e0245417SLois Curfman McInnes    local matrix (a rectangular submatrix).
1293e0245417SLois Curfman McInnes 
1294e0245417SLois Curfman McInnes    The user can specify preallocated storage for the diagonal part of
12955ace5be8SLois Curfman McInnes    the local submatrix with either d_nz or d_nnz (not both).  Set
12965ace5be8SLois Curfman McInnes    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
12975ace5be8SLois Curfman McInnes    memory allocation.  Likewise, specify preallocated storage for the
12985ace5be8SLois Curfman McInnes    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1299ff756334SLois Curfman McInnes 
13005511cfe3SLois Curfman McInnes    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
13015511cfe3SLois Curfman McInnes    the figure below we depict these three local rows and all columns (0-11).
13022191d07cSBarry Smith 
1303b810aeb4SBarry Smith $          0 1 2 3 4 5 6 7 8 9 10 11
1304b810aeb4SBarry Smith $         -------------------
13055511cfe3SLois Curfman McInnes $  row 3  |  o o o d d d o o o o o o
13065511cfe3SLois Curfman McInnes $  row 4  |  o o o d d d o o o o o o
13075511cfe3SLois Curfman McInnes $  row 5  |  o o o d d d o o o o o o
13085511cfe3SLois Curfman McInnes $         -------------------
1309b810aeb4SBarry Smith $
1310b810aeb4SBarry Smith 
13115511cfe3SLois Curfman McInnes    Thus, any entries in the d locations are stored in the d (diagonal)
13125511cfe3SLois Curfman McInnes    submatrix, and any entries in the o locations are stored in the
13135511cfe3SLois Curfman McInnes    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
13145511cfe3SLois Curfman McInnes    stored simply in the MATSEQAIJ format for compressed row storage.
13155511cfe3SLois Curfman McInnes 
13165511cfe3SLois Curfman McInnes    Now d_nz should indicate the number of nonzeros per row in the d matrix,
13175511cfe3SLois Curfman McInnes    and o_nz should indicate the number of nonzeros per row in the o matrix.
13185511cfe3SLois Curfman McInnes    In general, for PDE problems in which most nonzeros are near the diagonal,
13193d323bbdSBarry Smith    one expects d_nz >> o_nz. For large problems you MUST preallocate memory
132092e8d321SLois Curfman McInnes    or you will get TERRIBLE performance; see the users' manual chapter on
13213d323bbdSBarry Smith    matrices and the file $(PETSC_DIR)/Performance.
13223a511b96SLois Curfman McInnes 
1323dbd7a890SLois Curfman McInnes .keywords: matrix, aij, compressed row, sparse, parallel
1324ff756334SLois Curfman McInnes 
1325fafbff53SBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
13268a729477SBarry Smith @*/
13271eb62cbbSBarry Smith int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,
132844cd7ae7SLois Curfman McInnes                     int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
13298a729477SBarry Smith {
133044cd7ae7SLois Curfman McInnes   Mat          B;
133144cd7ae7SLois Curfman McInnes   Mat_MPIAIJ   *b;
133236ce4990SBarry Smith   int          ierr, i,sum[2],work[2],size;
1333416022c9SBarry Smith 
133444cd7ae7SLois Curfman McInnes   *A = 0;
133544cd7ae7SLois Curfman McInnes   PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATMPIAIJ,comm);
133644cd7ae7SLois Curfman McInnes   PLogObjectCreate(B);
133744cd7ae7SLois Curfman McInnes   B->data       = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b);
133844cd7ae7SLois Curfman McInnes   PetscMemzero(b,sizeof(Mat_MPIAIJ));
133944cd7ae7SLois Curfman McInnes   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
134036ce4990SBarry Smith   MPI_Comm_size(comm,&size);
134136ce4990SBarry Smith   if (size == 1) {
134236ce4990SBarry Smith     B->ops.getrowij          = MatGetRowIJ_MPIAIJ;
134336ce4990SBarry Smith     B->ops.restorerowij      = MatRestoreRowIJ_MPIAIJ;
134436ce4990SBarry Smith     B->ops.lufactorsymbolic  = MatLUFactorSymbolic_MPIAIJ;
134536ce4990SBarry Smith     B->ops.lufactornumeric   = MatLUFactorNumeric_MPIAIJ;
134636ce4990SBarry Smith     B->ops.lufactor          = MatLUFactor_MPIAIJ;
134736ce4990SBarry Smith     B->ops.solve             = MatSolve_MPIAIJ;
134836ce4990SBarry Smith     B->ops.solveadd          = MatSolveAdd_MPIAIJ;
134936ce4990SBarry Smith     B->ops.solvetrans        = MatSolveTrans_MPIAIJ;
135036ce4990SBarry Smith     B->ops.solvetransadd     = MatSolveTransAdd_MPIAIJ;
135136ce4990SBarry Smith     B->ops.ilufactorsymbolic = MatILUFactorSymbolic_MPIAIJ;
135236ce4990SBarry Smith   }
135344cd7ae7SLois Curfman McInnes   B->destroy    = MatDestroy_MPIAIJ;
135444cd7ae7SLois Curfman McInnes   B->view       = MatView_MPIAIJ;
135544cd7ae7SLois Curfman McInnes   B->factor     = 0;
135644cd7ae7SLois Curfman McInnes   B->assembled  = PETSC_FALSE;
1357d6dfbf8fSBarry Smith 
135844cd7ae7SLois Curfman McInnes   b->insertmode = NOT_SET_VALUES;
135944cd7ae7SLois Curfman McInnes   MPI_Comm_rank(comm,&b->rank);
136044cd7ae7SLois Curfman McInnes   MPI_Comm_size(comm,&b->size);
13611eb62cbbSBarry Smith 
1362b4fd4287SBarry Smith   if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL))
13632e0155e0SLois Curfman McInnes     SETERRQ(1,"MatCreateMPIAIJ:Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
13641987afe7SBarry Smith 
1365dbd7a890SLois Curfman McInnes   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
13661eb62cbbSBarry Smith     work[0] = m; work[1] = n;
1367d65a2f8fSBarry Smith     MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );
1368dbd7a890SLois Curfman McInnes     if (M == PETSC_DECIDE) M = sum[0];
1369dbd7a890SLois Curfman McInnes     if (N == PETSC_DECIDE) N = sum[1];
13701eb62cbbSBarry Smith   }
137144cd7ae7SLois Curfman McInnes   if (m == PETSC_DECIDE) {m = M/b->size + ((M % b->size) > b->rank);}
137244cd7ae7SLois Curfman McInnes   if (n == PETSC_DECIDE) {n = N/b->size + ((N % b->size) > b->rank);}
137344cd7ae7SLois Curfman McInnes   b->m = m; B->m = m;
137444cd7ae7SLois Curfman McInnes   b->n = n; B->n = n;
137544cd7ae7SLois Curfman McInnes   b->N = N; B->N = N;
137644cd7ae7SLois Curfman McInnes   b->M = M; B->M = M;
13771eb62cbbSBarry Smith 
13781eb62cbbSBarry Smith   /* build local table of row and column ownerships */
137944cd7ae7SLois Curfman McInnes   b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners);
138044cd7ae7SLois Curfman McInnes   PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ));
1381603f58a4SSatish Balay   b->cowners = b->rowners + b->size + 2;
138244cd7ae7SLois Curfman McInnes   MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);
138344cd7ae7SLois Curfman McInnes   b->rowners[0] = 0;
138444cd7ae7SLois Curfman McInnes   for ( i=2; i<=b->size; i++ ) {
138544cd7ae7SLois Curfman McInnes     b->rowners[i] += b->rowners[i-1];
13868a729477SBarry Smith   }
138744cd7ae7SLois Curfman McInnes   b->rstart = b->rowners[b->rank];
138844cd7ae7SLois Curfman McInnes   b->rend   = b->rowners[b->rank+1];
138944cd7ae7SLois Curfman McInnes   MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);
139044cd7ae7SLois Curfman McInnes   b->cowners[0] = 0;
139144cd7ae7SLois Curfman McInnes   for ( i=2; i<=b->size; i++ ) {
139244cd7ae7SLois Curfman McInnes     b->cowners[i] += b->cowners[i-1];
13938a729477SBarry Smith   }
139444cd7ae7SLois Curfman McInnes   b->cstart = b->cowners[b->rank];
139544cd7ae7SLois Curfman McInnes   b->cend   = b->cowners[b->rank+1];
13968a729477SBarry Smith 
13975ace5be8SLois Curfman McInnes   if (d_nz == PETSC_DEFAULT) d_nz = 5;
139844cd7ae7SLois Curfman McInnes   ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr);
139944cd7ae7SLois Curfman McInnes   PLogObjectParent(B,b->A);
14007b8455f0SLois Curfman McInnes   if (o_nz == PETSC_DEFAULT) o_nz = 0;
140144cd7ae7SLois Curfman McInnes   ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr);
140244cd7ae7SLois Curfman McInnes   PLogObjectParent(B,b->B);
14038a729477SBarry Smith 
14041eb62cbbSBarry Smith   /* build cache for off array entries formed */
140544cd7ae7SLois Curfman McInnes   ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr);
140644cd7ae7SLois Curfman McInnes   b->colmap      = 0;
140744cd7ae7SLois Curfman McInnes   b->garray      = 0;
140844cd7ae7SLois Curfman McInnes   b->roworiented = 1;
14098a729477SBarry Smith 
14101eb62cbbSBarry Smith   /* stuff used for matrix vector multiply */
141144cd7ae7SLois Curfman McInnes   b->lvec      = 0;
141244cd7ae7SLois Curfman McInnes   b->Mvctx     = 0;
14138a729477SBarry Smith 
14147a0afa10SBarry Smith   /* stuff for MatGetRow() */
141544cd7ae7SLois Curfman McInnes   b->rowindices   = 0;
141644cd7ae7SLois Curfman McInnes   b->rowvalues    = 0;
141744cd7ae7SLois Curfman McInnes   b->getrowactive = PETSC_FALSE;
14187a0afa10SBarry Smith 
141944cd7ae7SLois Curfman McInnes   *A = B;
1420d6dfbf8fSBarry Smith   return 0;
1421d6dfbf8fSBarry Smith }
1422c74985f6SBarry Smith 
14233d1612f7SBarry Smith static int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues)
1424d6dfbf8fSBarry Smith {
1425d6dfbf8fSBarry Smith   Mat        mat;
1426416022c9SBarry Smith   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data;
1427a1b97e82SLois Curfman McInnes   int        ierr, len=0, flg;
1428d6dfbf8fSBarry Smith 
1429416022c9SBarry Smith   *newmat       = 0;
14300452661fSBarry Smith   PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm);
1431a5a9c739SBarry Smith   PLogObjectCreate(mat);
14320452661fSBarry Smith   mat->data       = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a);
1433416022c9SBarry Smith   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
143444a69424SLois Curfman McInnes   mat->destroy    = MatDestroy_MPIAIJ;
143544a69424SLois Curfman McInnes   mat->view       = MatView_MPIAIJ;
1436d6dfbf8fSBarry Smith   mat->factor     = matin->factor;
1437c456f294SBarry Smith   mat->assembled  = PETSC_TRUE;
1438d6dfbf8fSBarry Smith 
143944cd7ae7SLois Curfman McInnes   a->m = mat->m   = oldmat->m;
144044cd7ae7SLois Curfman McInnes   a->n = mat->n   = oldmat->n;
144144cd7ae7SLois Curfman McInnes   a->M = mat->M   = oldmat->M;
144244cd7ae7SLois Curfman McInnes   a->N = mat->N   = oldmat->N;
1443d6dfbf8fSBarry Smith 
1444416022c9SBarry Smith   a->rstart       = oldmat->rstart;
1445416022c9SBarry Smith   a->rend         = oldmat->rend;
1446416022c9SBarry Smith   a->cstart       = oldmat->cstart;
1447416022c9SBarry Smith   a->cend         = oldmat->cend;
144817699dbbSLois Curfman McInnes   a->size         = oldmat->size;
144917699dbbSLois Curfman McInnes   a->rank         = oldmat->rank;
145064119d58SLois Curfman McInnes   a->insertmode   = NOT_SET_VALUES;
1451bcd2baecSBarry Smith   a->rowvalues    = 0;
1452bcd2baecSBarry Smith   a->getrowactive = PETSC_FALSE;
1453d6dfbf8fSBarry Smith 
1454603f58a4SSatish Balay   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
1455603f58a4SSatish Balay   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ));
1456603f58a4SSatish Balay   a->cowners = a->rowners + a->size + 2;
1457603f58a4SSatish Balay   PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));
1458416022c9SBarry Smith   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
14592ee70a88SLois Curfman McInnes   if (oldmat->colmap) {
14600452661fSBarry Smith     a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap);
1461416022c9SBarry Smith     PLogObjectMemory(mat,(a->N)*sizeof(int));
1462416022c9SBarry Smith     PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int));
1463416022c9SBarry Smith   } else a->colmap = 0;
14643f41c07dSBarry Smith   if (oldmat->garray) {
14653f41c07dSBarry Smith     len = ((Mat_SeqAIJ *) (oldmat->B->data))->n;
14663f41c07dSBarry Smith     a->garray = (int *) PetscMalloc((len+1)*sizeof(int)); CHKPTRQ(a->garray);
1467464493b3SBarry Smith     PLogObjectMemory(mat,len*sizeof(int));
14683f41c07dSBarry Smith     if (len) PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
1469416022c9SBarry Smith   } else a->garray = 0;
1470d6dfbf8fSBarry Smith 
1471416022c9SBarry Smith   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
1472416022c9SBarry Smith   PLogObjectParent(mat,a->lvec);
1473a56f8943SBarry Smith   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
1474416022c9SBarry Smith   PLogObjectParent(mat,a->Mvctx);
1475416022c9SBarry Smith   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
1476416022c9SBarry Smith   PLogObjectParent(mat,a->A);
1477416022c9SBarry Smith   ierr =  MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr);
1478416022c9SBarry Smith   PLogObjectParent(mat,a->B);
14795dd7a6c7SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
148025cdf11fSBarry Smith   if (flg) {
1481682d7d0cSBarry Smith     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
1482682d7d0cSBarry Smith   }
14838a729477SBarry Smith   *newmat = mat;
14848a729477SBarry Smith   return 0;
14858a729477SBarry Smith }
1486416022c9SBarry Smith 
148777c4ece6SBarry Smith #include "sys.h"
1488416022c9SBarry Smith 
148919bcc07fSBarry Smith int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat)
1490416022c9SBarry Smith {
1491d65a2f8fSBarry Smith   Mat          A;
1492416022c9SBarry Smith   int          i, nz, ierr, j,rstart, rend, fd;
1493d65a2f8fSBarry Smith   Scalar       *vals,*svals;
149419bcc07fSBarry Smith   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1495416022c9SBarry Smith   MPI_Status   status;
149617699dbbSLois Curfman McInnes   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1497d65a2f8fSBarry Smith   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
149819bcc07fSBarry Smith   int          tag = ((PetscObject)viewer)->tag;
1499416022c9SBarry Smith 
150017699dbbSLois Curfman McInnes   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
150117699dbbSLois Curfman McInnes   if (!rank) {
150290ace30eSBarry Smith     ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
150377c4ece6SBarry Smith     ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr);
1504c456f294SBarry Smith     if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIAIJ:not matrix object");
1505416022c9SBarry Smith   }
1506416022c9SBarry Smith 
1507416022c9SBarry Smith   MPI_Bcast(header+1,3,MPI_INT,0,comm);
1508416022c9SBarry Smith   M = header[1]; N = header[2];
1509416022c9SBarry Smith   /* determine ownership of all rows */
151017699dbbSLois Curfman McInnes   m = M/size + ((M % size) > rank);
15110452661fSBarry Smith   rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners);
1512416022c9SBarry Smith   MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1513416022c9SBarry Smith   rowners[0] = 0;
151417699dbbSLois Curfman McInnes   for ( i=2; i<=size; i++ ) {
1515416022c9SBarry Smith     rowners[i] += rowners[i-1];
1516416022c9SBarry Smith   }
151717699dbbSLois Curfman McInnes   rstart = rowners[rank];
151817699dbbSLois Curfman McInnes   rend   = rowners[rank+1];
1519416022c9SBarry Smith 
1520416022c9SBarry Smith   /* distribute row lengths to all processors */
15210452661fSBarry Smith   ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens);
1522416022c9SBarry Smith   offlens = ourlens + (rend-rstart);
152317699dbbSLois Curfman McInnes   if (!rank) {
15240452661fSBarry Smith     rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths);
152577c4ece6SBarry Smith     ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
15260452661fSBarry Smith     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
152717699dbbSLois Curfman McInnes     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
1528d65a2f8fSBarry Smith     MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
15290452661fSBarry Smith     PetscFree(sndcounts);
1530416022c9SBarry Smith   }
1531416022c9SBarry Smith   else {
1532416022c9SBarry Smith     MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);
1533416022c9SBarry Smith   }
1534416022c9SBarry Smith 
153517699dbbSLois Curfman McInnes   if (!rank) {
1536416022c9SBarry Smith     /* calculate the number of nonzeros on each processor */
15370452661fSBarry Smith     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
1538cddf8d76SBarry Smith     PetscMemzero(procsnz,size*sizeof(int));
153917699dbbSLois Curfman McInnes     for ( i=0; i<size; i++ ) {
1540416022c9SBarry Smith       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
1541416022c9SBarry Smith         procsnz[i] += rowlengths[j];
1542416022c9SBarry Smith       }
1543416022c9SBarry Smith     }
15440452661fSBarry Smith     PetscFree(rowlengths);
1545416022c9SBarry Smith 
1546416022c9SBarry Smith     /* determine max buffer needed and allocate it */
1547416022c9SBarry Smith     maxnz = 0;
154817699dbbSLois Curfman McInnes     for ( i=0; i<size; i++ ) {
15490452661fSBarry Smith       maxnz = PetscMax(maxnz,procsnz[i]);
1550416022c9SBarry Smith     }
15510452661fSBarry Smith     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
1552416022c9SBarry Smith 
1553416022c9SBarry Smith     /* read in my part of the matrix column indices  */
1554416022c9SBarry Smith     nz = procsnz[0];
15550452661fSBarry Smith     mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
155677c4ece6SBarry Smith     ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr);
1557d65a2f8fSBarry Smith 
1558d65a2f8fSBarry Smith     /* read in every one elses and ship off */
155917699dbbSLois Curfman McInnes     for ( i=1; i<size; i++ ) {
1560d65a2f8fSBarry Smith       nz = procsnz[i];
156177c4ece6SBarry Smith       ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr);
1562d65a2f8fSBarry Smith       MPI_Send(cols,nz,MPI_INT,i,tag,comm);
1563d65a2f8fSBarry Smith     }
15640452661fSBarry Smith     PetscFree(cols);
1565416022c9SBarry Smith   }
1566416022c9SBarry Smith   else {
1567416022c9SBarry Smith     /* determine buffer space needed for message */
1568416022c9SBarry Smith     nz = 0;
1569416022c9SBarry Smith     for ( i=0; i<m; i++ ) {
1570416022c9SBarry Smith       nz += ourlens[i];
1571416022c9SBarry Smith     }
15720452661fSBarry Smith     mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1573416022c9SBarry Smith 
1574416022c9SBarry Smith     /* receive message of column indices*/
1575d65a2f8fSBarry Smith     MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
1576416022c9SBarry Smith     MPI_Get_count(&status,MPI_INT,&maxnz);
1577c456f294SBarry Smith     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJ:something is wrong with file");
1578416022c9SBarry Smith   }
1579416022c9SBarry Smith 
1580416022c9SBarry Smith   /* loop over local rows, determining number of off diagonal entries */
1581cddf8d76SBarry Smith   PetscMemzero(offlens,m*sizeof(int));
1582416022c9SBarry Smith   jj = 0;
1583416022c9SBarry Smith   for ( i=0; i<m; i++ ) {
1584416022c9SBarry Smith     for ( j=0; j<ourlens[i]; j++ ) {
1585d65a2f8fSBarry Smith       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1586416022c9SBarry Smith       jj++;
1587416022c9SBarry Smith     }
1588416022c9SBarry Smith   }
1589d65a2f8fSBarry Smith 
1590d65a2f8fSBarry Smith   /* create our matrix */
1591416022c9SBarry Smith   for ( i=0; i<m; i++ ) {
1592416022c9SBarry Smith     ourlens[i] -= offlens[i];
1593416022c9SBarry Smith   }
1594d65a2f8fSBarry Smith   ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
1595d65a2f8fSBarry Smith   A = *newmat;
15966d4a8577SBarry Smith   MatSetOption(A,MAT_COLUMNS_SORTED);
1597d65a2f8fSBarry Smith   for ( i=0; i<m; i++ ) {
1598d65a2f8fSBarry Smith     ourlens[i] += offlens[i];
1599d65a2f8fSBarry Smith   }
1600416022c9SBarry Smith 
160117699dbbSLois Curfman McInnes   if (!rank) {
16020452661fSBarry Smith     vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals);
1603416022c9SBarry Smith 
1604416022c9SBarry Smith     /* read in my part of the matrix numerical values  */
1605416022c9SBarry Smith     nz = procsnz[0];
160677c4ece6SBarry Smith     ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
1607d65a2f8fSBarry Smith 
1608d65a2f8fSBarry Smith     /* insert into matrix */
1609d65a2f8fSBarry Smith     jj      = rstart;
1610d65a2f8fSBarry Smith     smycols = mycols;
1611d65a2f8fSBarry Smith     svals   = vals;
1612d65a2f8fSBarry Smith     for ( i=0; i<m; i++ ) {
1613d65a2f8fSBarry Smith       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1614d65a2f8fSBarry Smith       smycols += ourlens[i];
1615d65a2f8fSBarry Smith       svals   += ourlens[i];
1616d65a2f8fSBarry Smith       jj++;
1617416022c9SBarry Smith     }
1618416022c9SBarry Smith 
1619d65a2f8fSBarry Smith     /* read in other processors and ship out */
162017699dbbSLois Curfman McInnes     for ( i=1; i<size; i++ ) {
1621416022c9SBarry Smith       nz = procsnz[i];
162277c4ece6SBarry Smith       ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
1623d65a2f8fSBarry Smith       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
1624416022c9SBarry Smith     }
16250452661fSBarry Smith     PetscFree(procsnz);
1626416022c9SBarry Smith   }
1627d65a2f8fSBarry Smith   else {
1628d65a2f8fSBarry Smith     /* receive numeric values */
16290452661fSBarry Smith     vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals);
1630416022c9SBarry Smith 
1631d65a2f8fSBarry Smith     /* receive message of values*/
1632d65a2f8fSBarry Smith     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
1633d65a2f8fSBarry Smith     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1634c456f294SBarry Smith     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJ:something is wrong with file");
1635d65a2f8fSBarry Smith 
1636d65a2f8fSBarry Smith     /* insert into matrix */
1637d65a2f8fSBarry Smith     jj      = rstart;
1638d65a2f8fSBarry Smith     smycols = mycols;
1639d65a2f8fSBarry Smith     svals   = vals;
1640d65a2f8fSBarry Smith     for ( i=0; i<m; i++ ) {
1641d65a2f8fSBarry Smith       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1642d65a2f8fSBarry Smith       smycols += ourlens[i];
1643d65a2f8fSBarry Smith       svals   += ourlens[i];
1644d65a2f8fSBarry Smith       jj++;
1645d65a2f8fSBarry Smith     }
1646d65a2f8fSBarry Smith   }
16470452661fSBarry Smith   PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners);
1648d65a2f8fSBarry Smith 
16496d4a8577SBarry Smith   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
16506d4a8577SBarry Smith   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1651416022c9SBarry Smith   return 0;
1652416022c9SBarry Smith }
1653