xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision bb5a7306ba2b79be99f6d9dff5d00348384195e8)
1cb512458SBarry Smith #ifndef lint
2*bb5a7306SBarry Smith static char vcid[] = "$Id: mpiaij.c,v 1.183 1997/01/01 03:37:44 bsmith Exp bsmith $";
3cb512458SBarry Smith #endif
48a729477SBarry Smith 
570f55243SBarry Smith #include "src/mat/impls/aij/mpi/mpiaij.h"
6f5eb4b81SSatish Balay #include "src/vec/vecimpl.h"
7d9942c19SSatish Balay #include "src/inline/spops.h"
88a729477SBarry Smith 
99e25ed09SBarry Smith /* local utility routine that creates a mapping from the global column
109e25ed09SBarry Smith number to the local number in the off-diagonal part of the local
119e25ed09SBarry Smith storage of the matrix.  This is done in a non scable way since the
129e25ed09SBarry Smith length of colmap equals the global matrix length.
139e25ed09SBarry Smith */
14e0f21b7aSSatish Balay #undef __FUNCTION__
15e0f21b7aSSatish Balay #define __FUNCTION__ "CreateColmap_MPIAIJ_Private"
160a198c4cSBarry Smith int CreateColmap_MPIAIJ_Private(Mat mat)
179e25ed09SBarry Smith {
1844a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
19ec8511deSBarry Smith   Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data;
20905e6a2fSBarry Smith   int        n = B->n,i;
21dbb450caSBarry Smith 
220452661fSBarry Smith   aij->colmap = (int *) PetscMalloc(aij->N*sizeof(int));CHKPTRQ(aij->colmap);
23464493b3SBarry Smith   PLogObjectMemory(mat,aij->N*sizeof(int));
24cddf8d76SBarry Smith   PetscMemzero(aij->colmap,aij->N*sizeof(int));
25905e6a2fSBarry Smith   for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1;
269e25ed09SBarry Smith   return 0;
279e25ed09SBarry Smith }
289e25ed09SBarry Smith 
292493cbb0SBarry Smith extern int DisAssemble_MPIAIJ(Mat);
302493cbb0SBarry Smith 
31e0f21b7aSSatish Balay #undef __FUNCTION__
32e0f21b7aSSatish Balay #define __FUNCTION__ "MatGetRowIJ_MPIAIJ"
333b2fbd54SBarry Smith static int MatGetRowIJ_MPIAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja,
343b2fbd54SBarry Smith                            PetscTruth *done)
35299609e3SLois Curfman McInnes {
36299609e3SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
37299609e3SLois Curfman McInnes   int        ierr;
3817699dbbSLois Curfman McInnes   if (aij->size == 1) {
393b2fbd54SBarry Smith     ierr = MatGetRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr);
40e3372554SBarry Smith   } else SETERRQ(1,0,"not supported in parallel");
413b2fbd54SBarry Smith   return 0;
423b2fbd54SBarry Smith }
433b2fbd54SBarry Smith 
44e0f21b7aSSatish Balay #undef __FUNCTION__
45e0f21b7aSSatish Balay #define __FUNCTION__ "MatRestoreRowIJ_MPIAIJ"
463b2fbd54SBarry Smith static int MatRestoreRowIJ_MPIAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja,
473b2fbd54SBarry Smith                                PetscTruth *done)
483b2fbd54SBarry Smith {
493b2fbd54SBarry Smith   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
503b2fbd54SBarry Smith   int        ierr;
513b2fbd54SBarry Smith   if (aij->size == 1) {
523b2fbd54SBarry Smith     ierr = MatRestoreRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr);
53e3372554SBarry Smith   } else SETERRQ(1,0,"not supported in parallel");
54299609e3SLois Curfman McInnes   return 0;
55299609e3SLois Curfman McInnes }
56299609e3SLois Curfman McInnes 
570a198c4cSBarry Smith extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
580a198c4cSBarry Smith 
59e0f21b7aSSatish Balay #undef __FUNCTION__
60e0f21b7aSSatish Balay #define __FUNCTION__ "MatSetValues_MPIAIJ"
614b0e389bSBarry Smith static int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
628a729477SBarry Smith {
6344a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
644b0e389bSBarry Smith   Scalar     value;
651eb62cbbSBarry Smith   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
661eb62cbbSBarry Smith   int        cstart = aij->cstart, cend = aij->cend,row,col;
67905e6a2fSBarry Smith   int        roworiented = aij->roworiented;
688a729477SBarry Smith 
690a198c4cSBarry Smith #if defined(PETSC_BOPT_g)
7064119d58SLois Curfman McInnes   if (aij->insertmode != NOT_SET_VALUES && aij->insertmode != addv) {
71e3372554SBarry Smith     SETERRQ(1,0,"Cannot mix inserts and adds");
728a729477SBarry Smith   }
730a198c4cSBarry Smith #endif
741eb62cbbSBarry Smith   aij->insertmode = addv;
758a729477SBarry Smith   for ( i=0; i<m; i++ ) {
760a198c4cSBarry Smith #if defined(PETSC_BOPT_g)
77e3372554SBarry Smith     if (im[i] < 0) SETERRQ(1,0,"Negative row");
78e3372554SBarry Smith     if (im[i] >= aij->M) SETERRQ(1,0,"Row too large");
790a198c4cSBarry Smith #endif
804b0e389bSBarry Smith     if (im[i] >= rstart && im[i] < rend) {
814b0e389bSBarry Smith       row = im[i] - rstart;
821eb62cbbSBarry Smith       for ( j=0; j<n; j++ ) {
834b0e389bSBarry Smith         if (in[j] >= cstart && in[j] < cend){
844b0e389bSBarry Smith           col = in[j] - cstart;
854b0e389bSBarry Smith           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
860a198c4cSBarry Smith           ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr);
871eb62cbbSBarry Smith         }
880a198c4cSBarry Smith #if defined(PETSC_BOPT_g)
89e3372554SBarry Smith         else if (in[j] < 0) {SETERRQ(1,0,"Negative column");}
90e3372554SBarry Smith         else if (in[j] >= aij->N) {SETERRQ(1,0,"Col too large");}
910a198c4cSBarry Smith #endif
921eb62cbbSBarry Smith         else {
93227d817aSBarry Smith           if (mat->was_assembled) {
94905e6a2fSBarry Smith             if (!aij->colmap) {
95905e6a2fSBarry Smith               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
96905e6a2fSBarry Smith             }
97905e6a2fSBarry Smith             col = aij->colmap[in[j]] - 1;
98ec8511deSBarry Smith             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
992493cbb0SBarry Smith               ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
1004b0e389bSBarry Smith               col =  in[j];
101d6dfbf8fSBarry Smith             }
1029e25ed09SBarry Smith           }
1034b0e389bSBarry Smith           else col = in[j];
1044b0e389bSBarry Smith           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
1050a198c4cSBarry Smith           ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr);
1061eb62cbbSBarry Smith         }
1071eb62cbbSBarry Smith       }
1081eb62cbbSBarry Smith     }
1091eb62cbbSBarry Smith     else {
11090f02eecSBarry Smith       if (roworiented && !aij->donotstash) {
1114b0e389bSBarry Smith         ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr);
1124b0e389bSBarry Smith       }
1134b0e389bSBarry Smith       else {
11490f02eecSBarry Smith         if (!aij->donotstash) {
1154b0e389bSBarry Smith           row = im[i];
1164b0e389bSBarry Smith           for ( j=0; j<n; j++ ) {
1174b0e389bSBarry Smith             ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr);
1184b0e389bSBarry Smith           }
1194b0e389bSBarry Smith         }
1201eb62cbbSBarry Smith       }
1218a729477SBarry Smith     }
12290f02eecSBarry Smith   }
1238a729477SBarry Smith   return 0;
1248a729477SBarry Smith }
1258a729477SBarry Smith 
126e0f21b7aSSatish Balay #undef __FUNCTION__
127e0f21b7aSSatish Balay #define __FUNCTION__ "MatGetValues_MPIAIJ"
128b49de8d1SLois Curfman McInnes static int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
129b49de8d1SLois Curfman McInnes {
130b49de8d1SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
131b49de8d1SLois Curfman McInnes   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
132b49de8d1SLois Curfman McInnes   int        cstart = aij->cstart, cend = aij->cend,row,col;
133b49de8d1SLois Curfman McInnes 
134b49de8d1SLois Curfman McInnes   for ( i=0; i<m; i++ ) {
135e3372554SBarry Smith     if (idxm[i] < 0) SETERRQ(1,0,"Negative row");
136e3372554SBarry Smith     if (idxm[i] >= aij->M) SETERRQ(1,0,"Row too large");
137b49de8d1SLois Curfman McInnes     if (idxm[i] >= rstart && idxm[i] < rend) {
138b49de8d1SLois Curfman McInnes       row = idxm[i] - rstart;
139b49de8d1SLois Curfman McInnes       for ( j=0; j<n; j++ ) {
140e3372554SBarry Smith         if (idxn[j] < 0) SETERRQ(1,0,"Negative column");
141e3372554SBarry Smith         if (idxn[j] >= aij->N) SETERRQ(1,0,"Col too large");
142b49de8d1SLois Curfman McInnes         if (idxn[j] >= cstart && idxn[j] < cend){
143b49de8d1SLois Curfman McInnes           col = idxn[j] - cstart;
144b49de8d1SLois Curfman McInnes           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
145b49de8d1SLois Curfman McInnes         }
146b49de8d1SLois Curfman McInnes         else {
147905e6a2fSBarry Smith           if (!aij->colmap) {
148905e6a2fSBarry Smith             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
149905e6a2fSBarry Smith           }
150905e6a2fSBarry Smith           col = aij->colmap[idxn[j]] - 1;
151e60e1c95SSatish Balay           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
152d9d09a02SSatish Balay           else {
153b49de8d1SLois Curfman McInnes             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
154b49de8d1SLois Curfman McInnes           }
155b49de8d1SLois Curfman McInnes         }
156b49de8d1SLois Curfman McInnes       }
157d9d09a02SSatish Balay     }
158b49de8d1SLois Curfman McInnes     else {
159e3372554SBarry Smith       SETERRQ(1,0,"Only local values currently supported");
160b49de8d1SLois Curfman McInnes     }
161b49de8d1SLois Curfman McInnes   }
162b49de8d1SLois Curfman McInnes   return 0;
163b49de8d1SLois Curfman McInnes }
164b49de8d1SLois Curfman McInnes 
165e0f21b7aSSatish Balay #undef __FUNCTION__
166e0f21b7aSSatish Balay #define __FUNCTION__ "MatAssemblyBegin_MPIAIJ"
167fc76ce87SLois Curfman McInnes static int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
1688a729477SBarry Smith {
16944a69424SLois Curfman McInnes   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
170d6dfbf8fSBarry Smith   MPI_Comm    comm = mat->comm;
17117699dbbSLois Curfman McInnes   int         size = aij->size, *owners = aij->rowners;
17217699dbbSLois Curfman McInnes   int         rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr;
1731eb62cbbSBarry Smith   MPI_Request *send_waits,*recv_waits;
1746abc6512SBarry Smith   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
1751eb62cbbSBarry Smith   InsertMode  addv;
1761eb62cbbSBarry Smith   Scalar      *rvalues,*svalues;
1771eb62cbbSBarry Smith 
1781eb62cbbSBarry Smith   /* make sure all processors are either in INSERTMODE or ADDMODE */
179d65a2f8fSBarry Smith   MPI_Allreduce(&aij->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
180dbb450caSBarry Smith   if (addv == (ADD_VALUES|INSERT_VALUES)) {
181e3372554SBarry Smith     SETERRQ(1,0,"Some processors inserted others added");
1821eb62cbbSBarry Smith   }
1831eb62cbbSBarry Smith   aij->insertmode = addv; /* in case this processor had no cache */
1841eb62cbbSBarry Smith 
1851eb62cbbSBarry Smith   /*  first count number of contributors to each processor */
1860452661fSBarry Smith   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
187cddf8d76SBarry Smith   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
1880452661fSBarry Smith   owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
1891eb62cbbSBarry Smith   for ( i=0; i<aij->stash.n; i++ ) {
1901eb62cbbSBarry Smith     idx = aij->stash.idx[i];
19117699dbbSLois Curfman McInnes     for ( j=0; j<size; j++ ) {
1921eb62cbbSBarry Smith       if (idx >= owners[j] && idx < owners[j+1]) {
1931eb62cbbSBarry Smith         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
1948a729477SBarry Smith       }
1958a729477SBarry Smith     }
1968a729477SBarry Smith   }
19717699dbbSLois Curfman McInnes   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
1981eb62cbbSBarry Smith 
1991eb62cbbSBarry Smith   /* inform other processors of number of messages and max length*/
2000452661fSBarry Smith   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
20117699dbbSLois Curfman McInnes   MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);
20217699dbbSLois Curfman McInnes   nreceives = work[rank];
20317699dbbSLois Curfman McInnes   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
20417699dbbSLois Curfman McInnes   nmax = work[rank];
2050452661fSBarry Smith   PetscFree(work);
2061eb62cbbSBarry Smith 
2071eb62cbbSBarry Smith   /* post receives:
2081eb62cbbSBarry Smith        1) each message will consist of ordered pairs
2091eb62cbbSBarry Smith      (global index,value) we store the global index as a double
210d6dfbf8fSBarry Smith      to simplify the message passing.
2111eb62cbbSBarry Smith        2) since we don't know how long each individual message is we
2121eb62cbbSBarry Smith      allocate the largest needed buffer for each receive. Potentially
2131eb62cbbSBarry Smith      this is a lot of wasted space.
2141eb62cbbSBarry Smith 
2151eb62cbbSBarry Smith 
2161eb62cbbSBarry Smith        This could be done better.
2171eb62cbbSBarry Smith   */
2180452661fSBarry Smith   rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));
21978b31e54SBarry Smith   CHKPTRQ(rvalues);
2200452661fSBarry Smith   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));
22178b31e54SBarry Smith   CHKPTRQ(recv_waits);
2221eb62cbbSBarry Smith   for ( i=0; i<nreceives; i++ ) {
223416022c9SBarry Smith     MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
2241eb62cbbSBarry Smith               comm,recv_waits+i);
2251eb62cbbSBarry Smith   }
2261eb62cbbSBarry Smith 
2271eb62cbbSBarry Smith   /* do sends:
2281eb62cbbSBarry Smith       1) starts[i] gives the starting index in svalues for stuff going to
2291eb62cbbSBarry Smith          the ith processor
2301eb62cbbSBarry Smith   */
2310452661fSBarry Smith   svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
2320452661fSBarry Smith   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
23378b31e54SBarry Smith   CHKPTRQ(send_waits);
2340452661fSBarry Smith   starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
2351eb62cbbSBarry Smith   starts[0] = 0;
23617699dbbSLois Curfman McInnes   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
2371eb62cbbSBarry Smith   for ( i=0; i<aij->stash.n; i++ ) {
2381eb62cbbSBarry Smith     svalues[3*starts[owner[i]]]       = (Scalar)  aij->stash.idx[i];
2391eb62cbbSBarry Smith     svalues[3*starts[owner[i]]+1]     = (Scalar)  aij->stash.idy[i];
2401eb62cbbSBarry Smith     svalues[3*(starts[owner[i]]++)+2] =  aij->stash.array[i];
2411eb62cbbSBarry Smith   }
2420452661fSBarry Smith   PetscFree(owner);
2431eb62cbbSBarry Smith   starts[0] = 0;
24417699dbbSLois Curfman McInnes   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
2451eb62cbbSBarry Smith   count = 0;
24617699dbbSLois Curfman McInnes   for ( i=0; i<size; i++ ) {
2471eb62cbbSBarry Smith     if (procs[i]) {
248416022c9SBarry Smith       MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,
2491eb62cbbSBarry Smith                 comm,send_waits+count++);
2501eb62cbbSBarry Smith     }
2511eb62cbbSBarry Smith   }
2520452661fSBarry Smith   PetscFree(starts); PetscFree(nprocs);
2531eb62cbbSBarry Smith 
2541eb62cbbSBarry Smith   /* Free cache space */
25555908cccSLois Curfman McInnes   PLogInfo(mat,"MatAssemblyBegin_MPIAIJ:Number of off-processor values %d\n",aij->stash.n);
25678b31e54SBarry Smith   ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr);
2571eb62cbbSBarry Smith 
2581eb62cbbSBarry Smith   aij->svalues    = svalues;    aij->rvalues    = rvalues;
2591eb62cbbSBarry Smith   aij->nsends     = nsends;     aij->nrecvs     = nreceives;
2601eb62cbbSBarry Smith   aij->send_waits = send_waits; aij->recv_waits = recv_waits;
2611eb62cbbSBarry Smith   aij->rmax       = nmax;
2621eb62cbbSBarry Smith 
2631eb62cbbSBarry Smith   return 0;
2641eb62cbbSBarry Smith }
26544a69424SLois Curfman McInnes extern int MatSetUpMultiply_MPIAIJ(Mat);
2661eb62cbbSBarry Smith 
267e0f21b7aSSatish Balay #undef __FUNCTION__
268e0f21b7aSSatish Balay #define __FUNCTION__ "MatAssemblyEnd_MPIAIJ"
269fc76ce87SLois Curfman McInnes static int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
2701eb62cbbSBarry Smith {
27144a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
2721eb62cbbSBarry Smith   MPI_Status  *send_status,recv_status;
273416022c9SBarry Smith   int         imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr;
274905e6a2fSBarry Smith   int         row,col,other_disassembled;
2751eb62cbbSBarry Smith   Scalar      *values,val;
2761eb62cbbSBarry Smith   InsertMode  addv = aij->insertmode;
2771eb62cbbSBarry Smith 
2781eb62cbbSBarry Smith   /*  wait on receives */
2791eb62cbbSBarry Smith   while (count) {
280d6dfbf8fSBarry Smith     MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status);
2811eb62cbbSBarry Smith     /* unpack receives into our local space */
282d6dfbf8fSBarry Smith     values = aij->rvalues + 3*imdex*aij->rmax;
283c60448a5SBarry Smith     MPI_Get_count(&recv_status,MPIU_SCALAR,&n);
2841eb62cbbSBarry Smith     n = n/3;
2851eb62cbbSBarry Smith     for ( i=0; i<n; i++ ) {
286227d817aSBarry Smith       row = (int) PetscReal(values[3*i]) - aij->rstart;
287227d817aSBarry Smith       col = (int) PetscReal(values[3*i+1]);
2881eb62cbbSBarry Smith       val = values[3*i+2];
2891eb62cbbSBarry Smith       if (col >= aij->cstart && col < aij->cend) {
2901eb62cbbSBarry Smith         col -= aij->cstart;
2911eb62cbbSBarry Smith         MatSetValues(aij->A,1,&row,1,&col,&val,addv);
2921eb62cbbSBarry Smith       }
2931eb62cbbSBarry Smith       else {
29455a1b374SBarry Smith         if (mat->was_assembled || mat->assembled) {
295905e6a2fSBarry Smith           if (!aij->colmap) {
296905e6a2fSBarry Smith             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
297905e6a2fSBarry Smith           }
298905e6a2fSBarry Smith           col = aij->colmap[col] - 1;
299ec8511deSBarry Smith           if (col < 0  && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
3002493cbb0SBarry Smith             ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
301227d817aSBarry Smith             col = (int) PetscReal(values[3*i+1]);
302d6dfbf8fSBarry Smith           }
3039e25ed09SBarry Smith         }
3041eb62cbbSBarry Smith         MatSetValues(aij->B,1,&row,1,&col,&val,addv);
3051eb62cbbSBarry Smith       }
3061eb62cbbSBarry Smith     }
3071eb62cbbSBarry Smith     count--;
3081eb62cbbSBarry Smith   }
3090452661fSBarry Smith   PetscFree(aij->recv_waits); PetscFree(aij->rvalues);
3101eb62cbbSBarry Smith 
3111eb62cbbSBarry Smith   /* wait on sends */
3121eb62cbbSBarry Smith   if (aij->nsends) {
3130a198c4cSBarry Smith     send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
3141eb62cbbSBarry Smith     MPI_Waitall(aij->nsends,aij->send_waits,send_status);
3150452661fSBarry Smith     PetscFree(send_status);
3161eb62cbbSBarry Smith   }
3170452661fSBarry Smith   PetscFree(aij->send_waits); PetscFree(aij->svalues);
3181eb62cbbSBarry Smith 
31964119d58SLois Curfman McInnes   aij->insertmode = NOT_SET_VALUES;
32078b31e54SBarry Smith   ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr);
32178b31e54SBarry Smith   ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr);
3221eb62cbbSBarry Smith 
3232493cbb0SBarry Smith   /* determine if any processor has disassembled, if so we must
3242493cbb0SBarry Smith      also disassemble ourselfs, in order that we may reassemble. */
325227d817aSBarry Smith   MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);
326227d817aSBarry Smith   if (mat->was_assembled && !other_disassembled) {
3272493cbb0SBarry Smith     ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
3282493cbb0SBarry Smith   }
3292493cbb0SBarry Smith 
3306d4a8577SBarry Smith   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
33178b31e54SBarry Smith     ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr);
3325e42470aSBarry Smith   }
33378b31e54SBarry Smith   ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr);
33478b31e54SBarry Smith   ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr);
3355e42470aSBarry Smith 
3367a0afa10SBarry Smith   if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;}
3378a729477SBarry Smith   return 0;
3388a729477SBarry Smith }
3398a729477SBarry Smith 
340e0f21b7aSSatish Balay #undef __FUNCTION__
341e0f21b7aSSatish Balay #define __FUNCTION__ "MatZeroEntries_MPIAIJ"
3422ee70a88SLois Curfman McInnes static int MatZeroEntries_MPIAIJ(Mat A)
3431eb62cbbSBarry Smith {
34444a69424SLois Curfman McInnes   Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data;
345dbd7a890SLois Curfman McInnes   int        ierr;
34678b31e54SBarry Smith   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
34778b31e54SBarry Smith   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
3481eb62cbbSBarry Smith   return 0;
3491eb62cbbSBarry Smith }
3501eb62cbbSBarry Smith 
3511eb62cbbSBarry Smith /* the code does not do the diagonal entries correctly unless the
3521eb62cbbSBarry Smith    matrix is square and the column and row owerships are identical.
3531eb62cbbSBarry Smith    This is a BUG. The only way to fix it seems to be to access
3541eb62cbbSBarry Smith    aij->A and aij->B directly and not through the MatZeroRows()
3551eb62cbbSBarry Smith    routine.
3561eb62cbbSBarry Smith */
357e0f21b7aSSatish Balay #undef __FUNCTION__
358e0f21b7aSSatish Balay #define __FUNCTION__ "MatZeroRows_MPIAIJ"
35944a69424SLois Curfman McInnes static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag)
3601eb62cbbSBarry Smith {
36144a69424SLois Curfman McInnes   Mat_MPIAIJ     *l = (Mat_MPIAIJ *) A->data;
36217699dbbSLois Curfman McInnes   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
3636abc6512SBarry Smith   int            *procs,*nprocs,j,found,idx,nsends,*work;
36417699dbbSLois Curfman McInnes   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
3655392566eSBarry Smith   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
366d6dfbf8fSBarry Smith   int            *lens,imdex,*lrows,*values;
367d6dfbf8fSBarry Smith   MPI_Comm       comm = A->comm;
3681eb62cbbSBarry Smith   MPI_Request    *send_waits,*recv_waits;
3691eb62cbbSBarry Smith   MPI_Status     recv_status,*send_status;
3701eb62cbbSBarry Smith   IS             istmp;
3711eb62cbbSBarry Smith 
37277c4ece6SBarry Smith   ierr = ISGetSize(is,&N); CHKERRQ(ierr);
37378b31e54SBarry Smith   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
3741eb62cbbSBarry Smith 
3751eb62cbbSBarry Smith   /*  first count number of contributors to each processor */
3760452661fSBarry Smith   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
377cddf8d76SBarry Smith   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
3780452661fSBarry Smith   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
3791eb62cbbSBarry Smith   for ( i=0; i<N; i++ ) {
3801eb62cbbSBarry Smith     idx = rows[i];
3811eb62cbbSBarry Smith     found = 0;
38217699dbbSLois Curfman McInnes     for ( j=0; j<size; j++ ) {
3831eb62cbbSBarry Smith       if (idx >= owners[j] && idx < owners[j+1]) {
3841eb62cbbSBarry Smith         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
3851eb62cbbSBarry Smith       }
3861eb62cbbSBarry Smith     }
387e3372554SBarry Smith     if (!found) SETERRQ(1,0,"Index out of range");
3881eb62cbbSBarry Smith   }
38917699dbbSLois Curfman McInnes   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
3901eb62cbbSBarry Smith 
3911eb62cbbSBarry Smith   /* inform other processors of number of messages and max length*/
3920452661fSBarry Smith   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
39317699dbbSLois Curfman McInnes   MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);
39417699dbbSLois Curfman McInnes   nrecvs = work[rank];
39517699dbbSLois Curfman McInnes   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
39617699dbbSLois Curfman McInnes   nmax = work[rank];
3970452661fSBarry Smith   PetscFree(work);
3981eb62cbbSBarry Smith 
3991eb62cbbSBarry Smith   /* post receives:   */
4000452661fSBarry Smith   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */
40178b31e54SBarry Smith   CHKPTRQ(rvalues);
4020452661fSBarry Smith   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));
40378b31e54SBarry Smith   CHKPTRQ(recv_waits);
4041eb62cbbSBarry Smith   for ( i=0; i<nrecvs; i++ ) {
405416022c9SBarry Smith     MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
4061eb62cbbSBarry Smith   }
4071eb62cbbSBarry Smith 
4081eb62cbbSBarry Smith   /* do sends:
4091eb62cbbSBarry Smith       1) starts[i] gives the starting index in svalues for stuff going to
4101eb62cbbSBarry Smith          the ith processor
4111eb62cbbSBarry Smith   */
4120452661fSBarry Smith   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
4130452661fSBarry Smith   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
41478b31e54SBarry Smith   CHKPTRQ(send_waits);
4150452661fSBarry Smith   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
4161eb62cbbSBarry Smith   starts[0] = 0;
41717699dbbSLois Curfman McInnes   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
4181eb62cbbSBarry Smith   for ( i=0; i<N; i++ ) {
4191eb62cbbSBarry Smith     svalues[starts[owner[i]]++] = rows[i];
4201eb62cbbSBarry Smith   }
4211eb62cbbSBarry Smith   ISRestoreIndices(is,&rows);
4221eb62cbbSBarry Smith 
4231eb62cbbSBarry Smith   starts[0] = 0;
42417699dbbSLois Curfman McInnes   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
4251eb62cbbSBarry Smith   count = 0;
42617699dbbSLois Curfman McInnes   for ( i=0; i<size; i++ ) {
4271eb62cbbSBarry Smith     if (procs[i]) {
428416022c9SBarry Smith       MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);
4291eb62cbbSBarry Smith     }
4301eb62cbbSBarry Smith   }
4310452661fSBarry Smith   PetscFree(starts);
4321eb62cbbSBarry Smith 
43317699dbbSLois Curfman McInnes   base = owners[rank];
4341eb62cbbSBarry Smith 
4351eb62cbbSBarry Smith   /*  wait on receives */
4360452661fSBarry Smith   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
4371eb62cbbSBarry Smith   source = lens + nrecvs;
4381eb62cbbSBarry Smith   count  = nrecvs; slen = 0;
4391eb62cbbSBarry Smith   while (count) {
440d6dfbf8fSBarry Smith     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
4411eb62cbbSBarry Smith     /* unpack receives into our local space */
4421eb62cbbSBarry Smith     MPI_Get_count(&recv_status,MPI_INT,&n);
443d6dfbf8fSBarry Smith     source[imdex]  = recv_status.MPI_SOURCE;
444d6dfbf8fSBarry Smith     lens[imdex]  = n;
4451eb62cbbSBarry Smith     slen += n;
4461eb62cbbSBarry Smith     count--;
4471eb62cbbSBarry Smith   }
4480452661fSBarry Smith   PetscFree(recv_waits);
4491eb62cbbSBarry Smith 
4501eb62cbbSBarry Smith   /* move the data into the send scatter */
4510452661fSBarry Smith   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
4521eb62cbbSBarry Smith   count = 0;
4531eb62cbbSBarry Smith   for ( i=0; i<nrecvs; i++ ) {
4541eb62cbbSBarry Smith     values = rvalues + i*nmax;
4551eb62cbbSBarry Smith     for ( j=0; j<lens[i]; j++ ) {
4561eb62cbbSBarry Smith       lrows[count++] = values[j] - base;
4571eb62cbbSBarry Smith     }
4581eb62cbbSBarry Smith   }
4590452661fSBarry Smith   PetscFree(rvalues); PetscFree(lens);
4600452661fSBarry Smith   PetscFree(owner); PetscFree(nprocs);
4611eb62cbbSBarry Smith 
4621eb62cbbSBarry Smith   /* actually zap the local rows */
463537820f0SBarry Smith   ierr = ISCreateGeneral(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
464464493b3SBarry Smith   PLogObjectParent(A,istmp);
4650452661fSBarry Smith   PetscFree(lrows);
46678b31e54SBarry Smith   ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
46778b31e54SBarry Smith   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
46878b31e54SBarry Smith   ierr = ISDestroy(istmp); CHKERRQ(ierr);
4691eb62cbbSBarry Smith 
4701eb62cbbSBarry Smith   /* wait on sends */
4711eb62cbbSBarry Smith   if (nsends) {
4720452661fSBarry Smith     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));
47378b31e54SBarry Smith     CHKPTRQ(send_status);
4741eb62cbbSBarry Smith     MPI_Waitall(nsends,send_waits,send_status);
4750452661fSBarry Smith     PetscFree(send_status);
4761eb62cbbSBarry Smith   }
4770452661fSBarry Smith   PetscFree(send_waits); PetscFree(svalues);
4781eb62cbbSBarry Smith 
4791eb62cbbSBarry Smith   return 0;
4801eb62cbbSBarry Smith }
4811eb62cbbSBarry Smith 
482e0f21b7aSSatish Balay #undef __FUNCTION__
483e0f21b7aSSatish Balay #define __FUNCTION__ "MatMult_MPIAIJ"
484416022c9SBarry Smith static int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
4851eb62cbbSBarry Smith {
486416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
487fbd6ef76SBarry Smith   int        ierr,nt;
488416022c9SBarry Smith 
489a2ce50c7SBarry Smith   ierr = VecGetLocalSize(xx,&nt);  CHKERRQ(ierr);
490fbd6ef76SBarry Smith   if (nt != a->n) {
491e3372554SBarry Smith     SETERRQ(1,0,"Incompatible parition of A and xx");
492fbd6ef76SBarry Smith   }
49343a90d84SBarry Smith   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr);
49438597bd4SSatish Balay   ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr);
49543a90d84SBarry Smith   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr);
49638597bd4SSatish Balay   ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
4971eb62cbbSBarry Smith   return 0;
4981eb62cbbSBarry Smith }
4991eb62cbbSBarry Smith 
500e0f21b7aSSatish Balay #undef __FUNCTION__
501e0f21b7aSSatish Balay #define __FUNCTION__ "MatMultAdd_MPIAIJ"
502416022c9SBarry Smith static int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
503da3a660dSBarry Smith {
504416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
505da3a660dSBarry Smith   int        ierr;
50643a90d84SBarry Smith   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
50738597bd4SSatish Balay   ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
50843a90d84SBarry Smith   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
50938597bd4SSatish Balay   ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
510da3a660dSBarry Smith   return 0;
511da3a660dSBarry Smith }
512da3a660dSBarry Smith 
513e0f21b7aSSatish Balay #undef __FUNCTION__
514e0f21b7aSSatish Balay #define __FUNCTION__ "MatMultTrans_MPIAIJ"
515416022c9SBarry Smith static int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy)
516da3a660dSBarry Smith {
517416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
518da3a660dSBarry Smith   int        ierr;
519da3a660dSBarry Smith 
520da3a660dSBarry Smith   /* do nondiagonal part */
52138597bd4SSatish Balay   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
522da3a660dSBarry Smith   /* send it on its way */
523537820f0SBarry Smith   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
524da3a660dSBarry Smith   /* do local part */
52538597bd4SSatish Balay   ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr);
526da3a660dSBarry Smith   /* receive remote parts: note this assumes the values are not actually */
527da3a660dSBarry Smith   /* inserted in yy until the next line, which is true for my implementation*/
528da3a660dSBarry Smith   /* but is not perhaps always true. */
529537820f0SBarry Smith   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
530da3a660dSBarry Smith   return 0;
531da3a660dSBarry Smith }
532da3a660dSBarry Smith 
533e0f21b7aSSatish Balay #undef __FUNCTION__
534e0f21b7aSSatish Balay #define __FUNCTION__ "MatMultTransAdd_MPIAIJ"
535416022c9SBarry Smith static int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
536da3a660dSBarry Smith {
537416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
538da3a660dSBarry Smith   int        ierr;
539da3a660dSBarry Smith 
540da3a660dSBarry Smith   /* do nondiagonal part */
54138597bd4SSatish Balay   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
542da3a660dSBarry Smith   /* send it on its way */
543537820f0SBarry Smith   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
544da3a660dSBarry Smith   /* do local part */
54538597bd4SSatish Balay   ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
546da3a660dSBarry Smith   /* receive remote parts: note this assumes the values are not actually */
547da3a660dSBarry Smith   /* inserted in yy until the next line, which is true for my implementation*/
548da3a660dSBarry Smith   /* but is not perhaps always true. */
5490a198c4cSBarry Smith   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
550da3a660dSBarry Smith   return 0;
551da3a660dSBarry Smith }
552da3a660dSBarry Smith 
5531eb62cbbSBarry Smith /*
5541eb62cbbSBarry Smith   This only works correctly for square matrices where the subblock A->A is the
5551eb62cbbSBarry Smith    diagonal block
5561eb62cbbSBarry Smith */
557e0f21b7aSSatish Balay #undef __FUNCTION__
558e0f21b7aSSatish Balay #define __FUNCTION__ "MatGetDiagonal_MPIAIJ"
559416022c9SBarry Smith static int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
5601eb62cbbSBarry Smith {
561416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
56244cd7ae7SLois Curfman McInnes   if (a->M != a->N)
563e3372554SBarry Smith     SETERRQ(1,0,"Supports only square matrix where A->A is diag block");
5645baf8537SBarry Smith   if (a->rstart != a->cstart || a->rend != a->cend) {
565e3372554SBarry Smith     SETERRQ(1,0,"row partition must equal col partition");  }
566416022c9SBarry Smith   return MatGetDiagonal(a->A,v);
5671eb62cbbSBarry Smith }
5681eb62cbbSBarry Smith 
569e0f21b7aSSatish Balay #undef __FUNCTION__
570e0f21b7aSSatish Balay #define __FUNCTION__ "MatScale_MPIAIJ"
571052efed2SBarry Smith static int MatScale_MPIAIJ(Scalar *aa,Mat A)
572052efed2SBarry Smith {
573052efed2SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
574052efed2SBarry Smith   int        ierr;
575052efed2SBarry Smith   ierr = MatScale(aa,a->A); CHKERRQ(ierr);
576052efed2SBarry Smith   ierr = MatScale(aa,a->B); CHKERRQ(ierr);
577052efed2SBarry Smith   return 0;
578052efed2SBarry Smith }
579052efed2SBarry Smith 
580e0f21b7aSSatish Balay #undef __FUNCTION__
581e0f21b7aSSatish Balay #define __FUNCTION__ "MatDestroy_MPIAIJ"
58244a69424SLois Curfman McInnes static int MatDestroy_MPIAIJ(PetscObject obj)
5831eb62cbbSBarry Smith {
5841eb62cbbSBarry Smith   Mat        mat = (Mat) obj;
58544a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
5861eb62cbbSBarry Smith   int        ierr;
587a5a9c739SBarry Smith #if defined(PETSC_LOG)
5886e35fa57SLois Curfman McInnes   PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N);
589a5a9c739SBarry Smith #endif
5900452661fSBarry Smith   PetscFree(aij->rowners);
59178b31e54SBarry Smith   ierr = MatDestroy(aij->A); CHKERRQ(ierr);
59278b31e54SBarry Smith   ierr = MatDestroy(aij->B); CHKERRQ(ierr);
5930452661fSBarry Smith   if (aij->colmap) PetscFree(aij->colmap);
5940452661fSBarry Smith   if (aij->garray) PetscFree(aij->garray);
5951eb62cbbSBarry Smith   if (aij->lvec)   VecDestroy(aij->lvec);
596a56f8943SBarry Smith   if (aij->Mvctx)  VecScatterDestroy(aij->Mvctx);
5977a0afa10SBarry Smith   if (aij->rowvalues) PetscFree(aij->rowvalues);
5980452661fSBarry Smith   PetscFree(aij);
59990f02eecSBarry Smith   if (mat->mapping) {
60090f02eecSBarry Smith     ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr);
60190f02eecSBarry Smith   }
602a5a9c739SBarry Smith   PLogObjectDestroy(mat);
6030452661fSBarry Smith   PetscHeaderDestroy(mat);
6041eb62cbbSBarry Smith   return 0;
6051eb62cbbSBarry Smith }
6067857610eSBarry Smith #include "draw.h"
607b16a3bb1SBarry Smith #include "pinclude/pviewer.h"
608ee50ffe9SBarry Smith 
609e0f21b7aSSatish Balay #undef __FUNCTION__
610e0f21b7aSSatish Balay #define __FUNCTION__ "MatView_MPIAIJ_Binary"
611416022c9SBarry Smith static int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer)
6121eb62cbbSBarry Smith {
613416022c9SBarry Smith   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
614416022c9SBarry Smith   int         ierr;
615416022c9SBarry Smith 
61617699dbbSLois Curfman McInnes   if (aij->size == 1) {
617416022c9SBarry Smith     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
618416022c9SBarry Smith   }
619e3372554SBarry Smith   else SETERRQ(1,0,"Only uniprocessor output supported");
620416022c9SBarry Smith   return 0;
621416022c9SBarry Smith }
622416022c9SBarry Smith 
623e0f21b7aSSatish Balay #undef __FUNCTION__
624e0f21b7aSSatish Balay #define __FUNCTION__ "MatView_MPIAIJ_ASCIIorDraworMatlab"
625d7e8b826SBarry Smith static int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer)
626416022c9SBarry Smith {
62744a69424SLois Curfman McInnes   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
628dbb450caSBarry Smith   Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data;
629a56f8943SBarry Smith   int         ierr, format,shift = C->indexshift,rank;
630d636dbe3SBarry Smith   FILE        *fd;
63119bcc07fSBarry Smith   ViewerType  vtype;
632416022c9SBarry Smith 
63319bcc07fSBarry Smith   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
63419bcc07fSBarry Smith   if (vtype  == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) {
63590ace30eSBarry Smith     ierr = ViewerGetFormat(viewer,&format);
6360a198c4cSBarry Smith     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
6374e220ebcSLois Curfman McInnes       MatInfo info;
6384e220ebcSLois Curfman McInnes       int     flg;
639a56f8943SBarry Smith       MPI_Comm_rank(mat->comm,&rank);
64090ace30eSBarry Smith       ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
6414e220ebcSLois Curfman McInnes       ierr = MatGetInfo(mat,MAT_LOCAL,&info);
64295e01e2fSLois Curfman McInnes       ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr);
64377c4ece6SBarry Smith       PetscSequentialPhaseBegin(mat->comm,1);
64495e01e2fSLois Curfman McInnes       if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
6454e220ebcSLois Curfman McInnes          rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
64695e01e2fSLois Curfman McInnes       else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
6474e220ebcSLois Curfman McInnes          rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
6484e220ebcSLois Curfman McInnes       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);
6494e220ebcSLois Curfman McInnes       fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);
6504e220ebcSLois Curfman McInnes       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);
6514e220ebcSLois Curfman McInnes       fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);
652a56f8943SBarry Smith       fflush(fd);
65377c4ece6SBarry Smith       PetscSequentialPhaseEnd(mat->comm,1);
654a40aa06bSLois Curfman McInnes       ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr);
655416022c9SBarry Smith       return 0;
656416022c9SBarry Smith     }
6570a198c4cSBarry Smith     else if (format == VIEWER_FORMAT_ASCII_INFO) {
65808480c60SBarry Smith       return 0;
65908480c60SBarry Smith     }
660416022c9SBarry Smith   }
661416022c9SBarry Smith 
66219bcc07fSBarry Smith   if (vtype == DRAW_VIEWER) {
66319bcc07fSBarry Smith     Draw       draw;
66419bcc07fSBarry Smith     PetscTruth isnull;
66519bcc07fSBarry Smith     ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr);
66619bcc07fSBarry Smith     ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0;
66719bcc07fSBarry Smith   }
66819bcc07fSBarry Smith 
66919bcc07fSBarry Smith   if (vtype == ASCII_FILE_VIEWER) {
67090ace30eSBarry Smith     ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
67177c4ece6SBarry Smith     PetscSequentialPhaseBegin(mat->comm,1);
672d13ab20cSBarry Smith     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
67317699dbbSLois Curfman McInnes            aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart,
6741eb62cbbSBarry Smith            aij->cend);
67578b31e54SBarry Smith     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
67678b31e54SBarry Smith     ierr = MatView(aij->B,viewer); CHKERRQ(ierr);
677d13ab20cSBarry Smith     fflush(fd);
67877c4ece6SBarry Smith     PetscSequentialPhaseEnd(mat->comm,1);
679d13ab20cSBarry Smith   }
680416022c9SBarry Smith   else {
681a56f8943SBarry Smith     int size = aij->size;
682a56f8943SBarry Smith     rank = aij->rank;
68317699dbbSLois Curfman McInnes     if (size == 1) {
68478b31e54SBarry Smith       ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
68595373324SBarry Smith     }
68695373324SBarry Smith     else {
68795373324SBarry Smith       /* assemble the entire matrix onto first processor. */
68895373324SBarry Smith       Mat         A;
689ec8511deSBarry Smith       Mat_SeqAIJ *Aloc;
6902eb8c8abSBarry Smith       int         M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct;
69195373324SBarry Smith       Scalar      *a;
6922ee70a88SLois Curfman McInnes 
69317699dbbSLois Curfman McInnes       if (!rank) {
694b4fd4287SBarry Smith         ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
695c451ab25SLois Curfman McInnes                CHKERRQ(ierr);
69695373324SBarry Smith       }
69795373324SBarry Smith       else {
698b4fd4287SBarry Smith         ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
699c451ab25SLois Curfman McInnes                CHKERRQ(ierr);
70095373324SBarry Smith       }
701464493b3SBarry Smith       PLogObjectParent(mat,A);
702416022c9SBarry Smith 
70395373324SBarry Smith       /* copy over the A part */
704ec8511deSBarry Smith       Aloc = (Mat_SeqAIJ*) aij->A->data;
7052ee70a88SLois Curfman McInnes       m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
70695373324SBarry Smith       row = aij->rstart;
707dbb450caSBarry Smith       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;}
70895373324SBarry Smith       for ( i=0; i<m; i++ ) {
709416022c9SBarry Smith         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
71095373324SBarry Smith         row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
71195373324SBarry Smith       }
7122ee70a88SLois Curfman McInnes       aj = Aloc->j;
713dbb450caSBarry Smith       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;}
71495373324SBarry Smith 
71595373324SBarry Smith       /* copy over the B part */
716ec8511deSBarry Smith       Aloc = (Mat_SeqAIJ*) aij->B->data;
7172ee70a88SLois Curfman McInnes       m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
71895373324SBarry Smith       row = aij->rstart;
7190452661fSBarry Smith       ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols);
720dbb450caSBarry Smith       for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];}
72195373324SBarry Smith       for ( i=0; i<m; i++ ) {
722416022c9SBarry Smith         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
72395373324SBarry Smith         row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
72495373324SBarry Smith       }
7250452661fSBarry Smith       PetscFree(ct);
7266d4a8577SBarry Smith       ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
7276d4a8577SBarry Smith       ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
72817699dbbSLois Curfman McInnes       if (!rank) {
72978b31e54SBarry Smith         ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
73095373324SBarry Smith       }
73178b31e54SBarry Smith       ierr = MatDestroy(A); CHKERRQ(ierr);
73295373324SBarry Smith     }
73395373324SBarry Smith   }
7341eb62cbbSBarry Smith   return 0;
7351eb62cbbSBarry Smith }
7361eb62cbbSBarry Smith 
737e0f21b7aSSatish Balay #undef __FUNCTION__
738e0f21b7aSSatish Balay #define __FUNCTION__ "MatView_MPIAIJ"
739416022c9SBarry Smith static int MatView_MPIAIJ(PetscObject obj,Viewer viewer)
740416022c9SBarry Smith {
741416022c9SBarry Smith   Mat         mat = (Mat) obj;
742416022c9SBarry Smith   int         ierr;
74319bcc07fSBarry Smith   ViewerType  vtype;
744416022c9SBarry Smith 
74519bcc07fSBarry Smith   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
74619bcc07fSBarry Smith   if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER ||
74719bcc07fSBarry Smith       vtype == DRAW_VIEWER       || vtype == MATLAB_VIEWER) {
748d7e8b826SBarry Smith     ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
749416022c9SBarry Smith   }
75019bcc07fSBarry Smith   else if (vtype == BINARY_FILE_VIEWER) {
751416022c9SBarry Smith     return MatView_MPIAIJ_Binary(mat,viewer);
752416022c9SBarry Smith   }
753416022c9SBarry Smith   return 0;
754416022c9SBarry Smith }
755416022c9SBarry Smith 
7561eb62cbbSBarry Smith /*
7571eb62cbbSBarry Smith     This has to provide several versions.
7581eb62cbbSBarry Smith 
7591eb62cbbSBarry Smith      2) a) use only local smoothing updating outer values only once.
7601eb62cbbSBarry Smith         b) local smoothing updating outer values each inner iteration
761d6dfbf8fSBarry Smith      3) color updating out values betwen colors.
7621eb62cbbSBarry Smith */
763e0f21b7aSSatish Balay #undef __FUNCTION__
764e0f21b7aSSatish Balay #define __FUNCTION__ "MatRelax_MPIAIJ"
765fc76ce87SLois Curfman McInnes static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag,
766dbb450caSBarry Smith                            double fshift,int its,Vec xx)
7678a729477SBarry Smith {
76844a69424SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
769d6dfbf8fSBarry Smith   Mat        AA = mat->A, BB = mat->B;
770ec8511deSBarry Smith   Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data;
771c16cb8f2SBarry Smith   Scalar     *b,*x,*xs,*ls,d,*v,sum;
7726abc6512SBarry Smith   int        ierr,*idx, *diag;
773416022c9SBarry Smith   int        n = mat->n, m = mat->m, i,shift = A->indexshift;
7748a729477SBarry Smith 
775d6dfbf8fSBarry Smith   VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls);
776dbb450caSBarry Smith   xs = x + shift; /* shift by one for index start of 1 */
777dbb450caSBarry Smith   ls = ls + shift;
778ec8511deSBarry Smith   if (!A->diag) {if ((ierr = MatMarkDiag_SeqAIJ(AA))) return ierr;}
779d6dfbf8fSBarry Smith   diag = A->diag;
780c16cb8f2SBarry Smith   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
781da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
78238597bd4SSatish Balay       return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx);
783da3a660dSBarry Smith     }
78443a90d84SBarry Smith     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
78578b31e54SBarry Smith     CHKERRQ(ierr);
78643a90d84SBarry Smith     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
78778b31e54SBarry Smith     CHKERRQ(ierr);
788d6dfbf8fSBarry Smith     while (its--) {
789d6dfbf8fSBarry Smith       /* go down through the rows */
790d6dfbf8fSBarry Smith       for ( i=0; i<m; i++ ) {
791d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
792dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
793dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
794d6dfbf8fSBarry Smith         sum  = b[i];
795d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
796dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
797d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
798dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
799dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
800d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
80155a1b374SBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
802d6dfbf8fSBarry Smith       }
803d6dfbf8fSBarry Smith       /* come up through the rows */
804d6dfbf8fSBarry Smith       for ( i=m-1; i>-1; i-- ) {
805d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
806dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
807dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
808d6dfbf8fSBarry Smith         sum  = b[i];
809d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
810dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
811d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
812dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
813dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
814d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
81555a1b374SBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
816d6dfbf8fSBarry Smith       }
817d6dfbf8fSBarry Smith     }
818d6dfbf8fSBarry Smith   }
819d6dfbf8fSBarry Smith   else if (flag & SOR_LOCAL_FORWARD_SWEEP){
820da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
82138597bd4SSatish Balay       return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx);
822da3a660dSBarry Smith     }
82343a90d84SBarry Smith     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
82478b31e54SBarry Smith     CHKERRQ(ierr);
82543a90d84SBarry Smith     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
82678b31e54SBarry Smith     CHKERRQ(ierr);
827d6dfbf8fSBarry Smith     while (its--) {
828d6dfbf8fSBarry Smith       for ( i=0; i<m; i++ ) {
829d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
830dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
831dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
832d6dfbf8fSBarry Smith         sum  = b[i];
833d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
834dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
835d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
836dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
837dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
838d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
83955a1b374SBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
840d6dfbf8fSBarry Smith       }
841d6dfbf8fSBarry Smith     }
842d6dfbf8fSBarry Smith   }
843d6dfbf8fSBarry Smith   else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
844da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
84538597bd4SSatish Balay       return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx);
846da3a660dSBarry Smith     }
84743a90d84SBarry Smith     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,
84878b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
84943a90d84SBarry Smith     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,
85078b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
851d6dfbf8fSBarry Smith     while (its--) {
852d6dfbf8fSBarry Smith       for ( i=m-1; i>-1; i-- ) {
853d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
854dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
855dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
856d6dfbf8fSBarry Smith         sum  = b[i];
857d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
858dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
859d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
860dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
861dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
862d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
86355a1b374SBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
864d6dfbf8fSBarry Smith       }
865d6dfbf8fSBarry Smith     }
866d6dfbf8fSBarry Smith   }
867c16cb8f2SBarry Smith   else {
868e3372554SBarry Smith     SETERRQ(1,0,"Option not supported");
869c16cb8f2SBarry Smith   }
8708a729477SBarry Smith   return 0;
8718a729477SBarry Smith }
872a66be287SLois Curfman McInnes 
873e0f21b7aSSatish Balay #undef __FUNCTION__
874e0f21b7aSSatish Balay #define __FUNCTION__ "MatGetInfo_MPIAIJ"
8754e220ebcSLois Curfman McInnes static int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
876a66be287SLois Curfman McInnes {
877a66be287SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
878a66be287SLois Curfman McInnes   Mat        A = mat->A, B = mat->B;
8794e220ebcSLois Curfman McInnes   int        ierr;
8804e220ebcSLois Curfman McInnes   double     isend[5], irecv[5];
881a66be287SLois Curfman McInnes 
8824e220ebcSLois Curfman McInnes   info->rows_global    = (double)mat->M;
8834e220ebcSLois Curfman McInnes   info->columns_global = (double)mat->N;
8844e220ebcSLois Curfman McInnes   info->rows_local     = (double)mat->m;
8854e220ebcSLois Curfman McInnes   info->columns_local  = (double)mat->N;
8864e220ebcSLois Curfman McInnes   info->block_size     = 1.0;
8874e220ebcSLois Curfman McInnes   ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr);
8884e220ebcSLois Curfman McInnes   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
8894e220ebcSLois Curfman McInnes   isend[3] = info->memory;  isend[4] = info->mallocs;
8904e220ebcSLois Curfman McInnes   ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr);
8914e220ebcSLois Curfman McInnes   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
8924e220ebcSLois Curfman McInnes   isend[3] += info->memory;  isend[4] += info->mallocs;
893a66be287SLois Curfman McInnes   if (flag == MAT_LOCAL) {
8944e220ebcSLois Curfman McInnes     info->nz_used      = isend[0];
8954e220ebcSLois Curfman McInnes     info->nz_allocated = isend[1];
8964e220ebcSLois Curfman McInnes     info->nz_unneeded  = isend[2];
8974e220ebcSLois Curfman McInnes     info->memory       = isend[3];
8984e220ebcSLois Curfman McInnes     info->mallocs      = isend[4];
899a66be287SLois Curfman McInnes   } else if (flag == MAT_GLOBAL_MAX) {
9004e220ebcSLois Curfman McInnes     MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);
9014e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
9024e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
9034e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
9044e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
9054e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
906a66be287SLois Curfman McInnes   } else if (flag == MAT_GLOBAL_SUM) {
9074e220ebcSLois Curfman McInnes     MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);
9084e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
9094e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
9104e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
9114e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
9124e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
913a66be287SLois Curfman McInnes   }
9144e220ebcSLois Curfman McInnes   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
9154e220ebcSLois Curfman McInnes   info->fill_ratio_needed = 0;
9164e220ebcSLois Curfman McInnes   info->factor_mallocs    = 0;
9174e220ebcSLois Curfman McInnes 
918a66be287SLois Curfman McInnes   return 0;
919a66be287SLois Curfman McInnes }
920a66be287SLois Curfman McInnes 
921299609e3SLois Curfman McInnes extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*);
922299609e3SLois Curfman McInnes extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*);
923299609e3SLois Curfman McInnes extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double);
924299609e3SLois Curfman McInnes extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *);
925299609e3SLois Curfman McInnes extern int MatSolve_MPIAIJ(Mat,Vec,Vec);
926299609e3SLois Curfman McInnes extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec);
927299609e3SLois Curfman McInnes extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec);
928299609e3SLois Curfman McInnes extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec);
929299609e3SLois Curfman McInnes 
930e0f21b7aSSatish Balay #undef __FUNCTION__
931e0f21b7aSSatish Balay #define __FUNCTION__ "MatSetOption_MPIAIJ"
932416022c9SBarry Smith static int MatSetOption_MPIAIJ(Mat A,MatOption op)
933c74985f6SBarry Smith {
934c0bbcb79SLois Curfman McInnes   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
935c74985f6SBarry Smith 
9366d4a8577SBarry Smith   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
9376d4a8577SBarry Smith       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
9386da5968aSLois Curfman McInnes       op == MAT_COLUMNS_UNSORTED ||
939b1fbbac0SLois Curfman McInnes       op == MAT_COLUMNS_SORTED) {
940b1fbbac0SLois Curfman McInnes         MatSetOption(a->A,op);
941b1fbbac0SLois Curfman McInnes         MatSetOption(a->B,op);
942b1fbbac0SLois Curfman McInnes   } else if (op == MAT_ROW_ORIENTED) {
943aeafbbfcSLois Curfman McInnes     a->roworiented = 1;
944c0bbcb79SLois Curfman McInnes     MatSetOption(a->A,op);
945c0bbcb79SLois Curfman McInnes     MatSetOption(a->B,op);
946b1fbbac0SLois Curfman McInnes   } else if (op == MAT_ROWS_SORTED ||
9476da5968aSLois Curfman McInnes              op == MAT_ROWS_UNSORTED ||
9486d4a8577SBarry Smith              op == MAT_SYMMETRIC ||
9496d4a8577SBarry Smith              op == MAT_STRUCTURALLY_SYMMETRIC ||
9506d4a8577SBarry Smith              op == MAT_YES_NEW_DIAGONALS)
95194a424c1SBarry Smith     PLogInfo(A,"Info:MatSetOption_MPIAIJ:Option ignored\n");
9526d4a8577SBarry Smith   else if (op == MAT_COLUMN_ORIENTED) {
9534b0e389bSBarry Smith     a->roworiented = 0;
9544b0e389bSBarry Smith     MatSetOption(a->A,op);
9554b0e389bSBarry Smith     MatSetOption(a->B,op);
95690f02eecSBarry Smith   } else if (op == MAT_IGNORE_OFF_PROCESSOR_ENTRIES) {
95790f02eecSBarry Smith     a->donotstash = 1;
95890f02eecSBarry Smith   } else if (op == MAT_NO_NEW_DIAGONALS)
959e3372554SBarry Smith     {SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");}
960c0bbcb79SLois Curfman McInnes   else
961e3372554SBarry Smith     {SETERRQ(PETSC_ERR_SUP,0,"unknown option");}
962c74985f6SBarry Smith   return 0;
963c74985f6SBarry Smith }
964c74985f6SBarry Smith 
965e0f21b7aSSatish Balay #undef __FUNCTION__
966e0f21b7aSSatish Balay #define __FUNCTION__ "MatGetSize_MPIAIJ"
967d1710a03SLois Curfman McInnes static int MatGetSize_MPIAIJ(Mat matin,int *m,int *n)
968c74985f6SBarry Smith {
96944a69424SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
970c74985f6SBarry Smith   *m = mat->M; *n = mat->N;
971c74985f6SBarry Smith   return 0;
972c74985f6SBarry Smith }
973c74985f6SBarry Smith 
974e0f21b7aSSatish Balay #undef __FUNCTION__
975e0f21b7aSSatish Balay #define __FUNCTION__ "MatGetLocalSize_MPIAIJ"
976d1710a03SLois Curfman McInnes static int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n)
977c74985f6SBarry Smith {
97844a69424SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
979b7c46309SBarry Smith   *m = mat->m; *n = mat->N;
980c74985f6SBarry Smith   return 0;
981c74985f6SBarry Smith }
982c74985f6SBarry Smith 
983e0f21b7aSSatish Balay #undef __FUNCTION__
984e0f21b7aSSatish Balay #define __FUNCTION__ "MatGetOwnershipRange_MPIAIJ"
985d1710a03SLois Curfman McInnes static int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n)
986c74985f6SBarry Smith {
98744a69424SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
988c74985f6SBarry Smith   *m = mat->rstart; *n = mat->rend;
989c74985f6SBarry Smith   return 0;
990c74985f6SBarry Smith }
991c74985f6SBarry Smith 
9926d84be18SBarry Smith extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
9936d84be18SBarry Smith extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
9946d84be18SBarry Smith 
995e0f21b7aSSatish Balay #undef __FUNCTION__
996e0f21b7aSSatish Balay #define __FUNCTION__ "MatGetRow_MPIAIJ"
9976d84be18SBarry Smith int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
99839e00950SLois Curfman McInnes {
999154123eaSLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
100070f0671dSBarry Smith   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
1001154123eaSLois Curfman McInnes   int        i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart;
1002154123eaSLois Curfman McInnes   int        nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend;
100370f0671dSBarry Smith   int        *cmap, *idx_p;
100439e00950SLois Curfman McInnes 
1005e3372554SBarry Smith   if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,0,"Already active");
10067a0afa10SBarry Smith   mat->getrowactive = PETSC_TRUE;
10077a0afa10SBarry Smith 
100870f0671dSBarry Smith   if (!mat->rowvalues && (idx || v)) {
10097a0afa10SBarry Smith     /*
10107a0afa10SBarry Smith         allocate enough space to hold information from the longest row.
10117a0afa10SBarry Smith     */
10127a0afa10SBarry Smith     Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data;
1013c16cb8f2SBarry Smith     int     max = 1,m = mat->m,tmp;
1014c16cb8f2SBarry Smith     for ( i=0; i<m; i++ ) {
10157a0afa10SBarry Smith       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
10167a0afa10SBarry Smith       if (max < tmp) { max = tmp; }
10177a0afa10SBarry Smith     }
10187a0afa10SBarry Smith     mat->rowvalues = (Scalar *) PetscMalloc( max*(sizeof(int)+sizeof(Scalar)));
10197a0afa10SBarry Smith     CHKPTRQ(mat->rowvalues);
10207a0afa10SBarry Smith     mat->rowindices = (int *) (mat->rowvalues + max);
10217a0afa10SBarry Smith   }
10227a0afa10SBarry Smith 
1023e3372554SBarry Smith   if (row < rstart || row >= rend) SETERRQ(1,0,"Only local rows")
1024abc0e9e4SLois Curfman McInnes   lrow = row - rstart;
102539e00950SLois Curfman McInnes 
1026154123eaSLois Curfman McInnes   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1027154123eaSLois Curfman McInnes   if (!v)   {pvA = 0; pvB = 0;}
1028154123eaSLois Curfman McInnes   if (!idx) {pcA = 0; if (!v) pcB = 0;}
102938597bd4SSatish Balay   ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
103038597bd4SSatish Balay   ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1031154123eaSLois Curfman McInnes   nztot = nzA + nzB;
1032154123eaSLois Curfman McInnes 
103370f0671dSBarry Smith   cmap  = mat->garray;
1034154123eaSLois Curfman McInnes   if (v  || idx) {
1035154123eaSLois Curfman McInnes     if (nztot) {
1036154123eaSLois Curfman McInnes       /* Sort by increasing column numbers, assuming A and B already sorted */
103770f0671dSBarry Smith       int imark = -1;
1038154123eaSLois Curfman McInnes       if (v) {
103970f0671dSBarry Smith         *v = v_p = mat->rowvalues;
104039e00950SLois Curfman McInnes         for ( i=0; i<nzB; i++ ) {
104170f0671dSBarry Smith           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1042154123eaSLois Curfman McInnes           else break;
1043154123eaSLois Curfman McInnes         }
1044154123eaSLois Curfman McInnes         imark = i;
104570f0671dSBarry Smith         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
104670f0671dSBarry Smith         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
1047154123eaSLois Curfman McInnes       }
1048154123eaSLois Curfman McInnes       if (idx) {
104970f0671dSBarry Smith         *idx = idx_p = mat->rowindices;
105070f0671dSBarry Smith         if (imark > -1) {
105170f0671dSBarry Smith           for ( i=0; i<imark; i++ ) {
105270f0671dSBarry Smith             idx_p[i] = cmap[cworkB[i]];
105370f0671dSBarry Smith           }
105470f0671dSBarry Smith         } else {
1055154123eaSLois Curfman McInnes           for ( i=0; i<nzB; i++ ) {
105670f0671dSBarry Smith             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1057154123eaSLois Curfman McInnes             else break;
1058154123eaSLois Curfman McInnes           }
1059154123eaSLois Curfman McInnes           imark = i;
106070f0671dSBarry Smith         }
106170f0671dSBarry Smith         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart + cworkA[i];
106270f0671dSBarry Smith         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]];
106339e00950SLois Curfman McInnes       }
106439e00950SLois Curfman McInnes     }
10651ca473b0SSatish Balay     else {
10661ca473b0SSatish Balay       if (idx) *idx = 0;
10671ca473b0SSatish Balay       if (v)   *v   = 0;
10681ca473b0SSatish Balay     }
1069154123eaSLois Curfman McInnes   }
107039e00950SLois Curfman McInnes   *nz = nztot;
107138597bd4SSatish Balay   ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
107238597bd4SSatish Balay   ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
107339e00950SLois Curfman McInnes   return 0;
107439e00950SLois Curfman McInnes }
107539e00950SLois Curfman McInnes 
1076e0f21b7aSSatish Balay #undef __FUNCTION__
1077e0f21b7aSSatish Balay #define __FUNCTION__ "MatRestoreRow_MPIAIJ"
10786d84be18SBarry Smith int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
107939e00950SLois Curfman McInnes {
10807a0afa10SBarry Smith   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
10817a0afa10SBarry Smith   if (aij->getrowactive == PETSC_FALSE) {
1082e3372554SBarry Smith     SETERRQ(1,0,"MatGetRow not called");
10837a0afa10SBarry Smith   }
10847a0afa10SBarry Smith   aij->getrowactive = PETSC_FALSE;
108539e00950SLois Curfman McInnes   return 0;
108639e00950SLois Curfman McInnes }
108739e00950SLois Curfman McInnes 
1088e0f21b7aSSatish Balay #undef __FUNCTION__
1089e0f21b7aSSatish Balay #define __FUNCTION__ "MatNorm_MPIAIJ"
1090cddf8d76SBarry Smith static int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm)
1091855ac2c5SLois Curfman McInnes {
1092855ac2c5SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1093ec8511deSBarry Smith   Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data;
1094416022c9SBarry Smith   int        ierr, i, j, cstart = aij->cstart,shift = amat->indexshift;
1095416022c9SBarry Smith   double     sum = 0.0;
109604ca555eSLois Curfman McInnes   Scalar     *v;
109704ca555eSLois Curfman McInnes 
109817699dbbSLois Curfman McInnes   if (aij->size == 1) {
109914183eadSLois Curfman McInnes     ierr =  MatNorm(aij->A,type,norm); CHKERRQ(ierr);
110037fa93a5SLois Curfman McInnes   } else {
110104ca555eSLois Curfman McInnes     if (type == NORM_FROBENIUS) {
110204ca555eSLois Curfman McInnes       v = amat->a;
110304ca555eSLois Curfman McInnes       for (i=0; i<amat->nz; i++ ) {
110404ca555eSLois Curfman McInnes #if defined(PETSC_COMPLEX)
110504ca555eSLois Curfman McInnes         sum += real(conj(*v)*(*v)); v++;
110604ca555eSLois Curfman McInnes #else
110704ca555eSLois Curfman McInnes         sum += (*v)*(*v); v++;
110804ca555eSLois Curfman McInnes #endif
110904ca555eSLois Curfman McInnes       }
111004ca555eSLois Curfman McInnes       v = bmat->a;
111104ca555eSLois Curfman McInnes       for (i=0; i<bmat->nz; i++ ) {
111204ca555eSLois Curfman McInnes #if defined(PETSC_COMPLEX)
111304ca555eSLois Curfman McInnes         sum += real(conj(*v)*(*v)); v++;
111404ca555eSLois Curfman McInnes #else
111504ca555eSLois Curfman McInnes         sum += (*v)*(*v); v++;
111604ca555eSLois Curfman McInnes #endif
111704ca555eSLois Curfman McInnes       }
11186d84be18SBarry Smith       MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);
111904ca555eSLois Curfman McInnes       *norm = sqrt(*norm);
112004ca555eSLois Curfman McInnes     }
112104ca555eSLois Curfman McInnes     else if (type == NORM_1) { /* max column norm */
112204ca555eSLois Curfman McInnes       double *tmp, *tmp2;
112304ca555eSLois Curfman McInnes       int    *jj, *garray = aij->garray;
11240452661fSBarry Smith       tmp  = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp);
11250452661fSBarry Smith       tmp2 = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp2);
1126cddf8d76SBarry Smith       PetscMemzero(tmp,aij->N*sizeof(double));
112704ca555eSLois Curfman McInnes       *norm = 0.0;
112804ca555eSLois Curfman McInnes       v = amat->a; jj = amat->j;
112904ca555eSLois Curfman McInnes       for ( j=0; j<amat->nz; j++ ) {
1130579c6b6fSBarry Smith         tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v);  v++;
113104ca555eSLois Curfman McInnes       }
113204ca555eSLois Curfman McInnes       v = bmat->a; jj = bmat->j;
113304ca555eSLois Curfman McInnes       for ( j=0; j<bmat->nz; j++ ) {
1134579c6b6fSBarry Smith         tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
113504ca555eSLois Curfman McInnes       }
11366d84be18SBarry Smith       MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);
113704ca555eSLois Curfman McInnes       for ( j=0; j<aij->N; j++ ) {
113804ca555eSLois Curfman McInnes         if (tmp2[j] > *norm) *norm = tmp2[j];
113904ca555eSLois Curfman McInnes       }
11400452661fSBarry Smith       PetscFree(tmp); PetscFree(tmp2);
114104ca555eSLois Curfman McInnes     }
114204ca555eSLois Curfman McInnes     else if (type == NORM_INFINITY) { /* max row norm */
1143515d9167SLois Curfman McInnes       double ntemp = 0.0;
114404ca555eSLois Curfman McInnes       for ( j=0; j<amat->m; j++ ) {
1145dbb450caSBarry Smith         v = amat->a + amat->i[j] + shift;
114604ca555eSLois Curfman McInnes         sum = 0.0;
114704ca555eSLois Curfman McInnes         for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) {
1148cddf8d76SBarry Smith           sum += PetscAbsScalar(*v); v++;
114904ca555eSLois Curfman McInnes         }
1150dbb450caSBarry Smith         v = bmat->a + bmat->i[j] + shift;
115104ca555eSLois Curfman McInnes         for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) {
1152cddf8d76SBarry Smith           sum += PetscAbsScalar(*v); v++;
115304ca555eSLois Curfman McInnes         }
1154515d9167SLois Curfman McInnes         if (sum > ntemp) ntemp = sum;
115504ca555eSLois Curfman McInnes       }
11566d84be18SBarry Smith       MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);
115704ca555eSLois Curfman McInnes     }
115804ca555eSLois Curfman McInnes     else {
1159e3372554SBarry Smith       SETERRQ(1,0,"No support for two norm");
116004ca555eSLois Curfman McInnes     }
116137fa93a5SLois Curfman McInnes   }
1162855ac2c5SLois Curfman McInnes   return 0;
1163855ac2c5SLois Curfman McInnes }
1164855ac2c5SLois Curfman McInnes 
1165e0f21b7aSSatish Balay #undef __FUNCTION__
1166e0f21b7aSSatish Balay #define __FUNCTION__ "MatTranspose_MPIAIJ"
11670de55854SLois Curfman McInnes static int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1168b7c46309SBarry Smith {
1169b7c46309SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1170dbb450caSBarry Smith   Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data;
1171416022c9SBarry Smith   int        ierr,shift = Aloc->indexshift;
1172416022c9SBarry Smith   Mat        B;
1173b7c46309SBarry Smith   int        M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct;
1174b7c46309SBarry Smith   Scalar     *array;
1175b7c46309SBarry Smith 
11763638b69dSLois Curfman McInnes   if (matout == PETSC_NULL && M != N)
1177e3372554SBarry Smith     SETERRQ(1,0,"Square matrix only for in-place");
1178b4fd4287SBarry Smith   ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0,
1179b4fd4287SBarry Smith          PETSC_NULL,&B); CHKERRQ(ierr);
1180b7c46309SBarry Smith 
1181b7c46309SBarry Smith   /* copy over the A part */
1182ec8511deSBarry Smith   Aloc = (Mat_SeqAIJ*) a->A->data;
1183b7c46309SBarry Smith   m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1184b7c46309SBarry Smith   row = a->rstart;
1185dbb450caSBarry Smith   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;}
1186b7c46309SBarry Smith   for ( i=0; i<m; i++ ) {
1187416022c9SBarry Smith     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1188b7c46309SBarry Smith     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1189b7c46309SBarry Smith   }
1190b7c46309SBarry Smith   aj = Aloc->j;
11914af08d9eSBarry Smith   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;}
1192b7c46309SBarry Smith 
1193b7c46309SBarry Smith   /* copy over the B part */
1194ec8511deSBarry Smith   Aloc = (Mat_SeqAIJ*) a->B->data;
1195b7c46309SBarry Smith   m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1196b7c46309SBarry Smith   row = a->rstart;
11970452661fSBarry Smith   ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols);
1198dbb450caSBarry Smith   for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];}
1199b7c46309SBarry Smith   for ( i=0; i<m; i++ ) {
1200416022c9SBarry Smith     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1201b7c46309SBarry Smith     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1202b7c46309SBarry Smith   }
12030452661fSBarry Smith   PetscFree(ct);
12046d4a8577SBarry Smith   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
12056d4a8577SBarry Smith   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
12063638b69dSLois Curfman McInnes   if (matout != PETSC_NULL) {
12070de55854SLois Curfman McInnes     *matout = B;
12080de55854SLois Curfman McInnes   } else {
12090de55854SLois Curfman McInnes     /* This isn't really an in-place transpose .... but free data structures from a */
12100452661fSBarry Smith     PetscFree(a->rowners);
12110de55854SLois Curfman McInnes     ierr = MatDestroy(a->A); CHKERRQ(ierr);
12120de55854SLois Curfman McInnes     ierr = MatDestroy(a->B); CHKERRQ(ierr);
12130452661fSBarry Smith     if (a->colmap) PetscFree(a->colmap);
12140452661fSBarry Smith     if (a->garray) PetscFree(a->garray);
12150de55854SLois Curfman McInnes     if (a->lvec) VecDestroy(a->lvec);
1216a56f8943SBarry Smith     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
12170452661fSBarry Smith     PetscFree(a);
1218416022c9SBarry Smith     PetscMemcpy(A,B,sizeof(struct _Mat));
12190452661fSBarry Smith     PetscHeaderDestroy(B);
12200de55854SLois Curfman McInnes   }
1221b7c46309SBarry Smith   return 0;
1222b7c46309SBarry Smith }
1223b7c46309SBarry Smith 
1224e0f21b7aSSatish Balay #undef __FUNCTION__
1225e0f21b7aSSatish Balay #define __FUNCTION__ "MatDiagonalScale_MPIAIJ"
12264b967eb1SSatish Balay int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1227a008b906SSatish Balay {
12284b967eb1SSatish Balay   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
12294b967eb1SSatish Balay   Mat a = aij->A, b = aij->B;
1230a008b906SSatish Balay   int ierr,s1,s2,s3;
1231a008b906SSatish Balay 
12324b967eb1SSatish Balay   ierr = MatGetLocalSize(mat,&s2,&s3); CHKERRQ(ierr);
12334b967eb1SSatish Balay   if (rr) {
12344b967eb1SSatish Balay     s3 = aij->n;
12354b967eb1SSatish Balay     VecGetLocalSize_Fast(rr,s1);
1236e3372554SBarry Smith     if (s1!=s3) SETERRQ(1,0,"right vector non-conforming local size");
12374b967eb1SSatish Balay     /* Overlap communication with computation. */
123843a90d84SBarry Smith     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr);
1239a008b906SSatish Balay   }
12404b967eb1SSatish Balay   if (ll) {
12414b967eb1SSatish Balay     VecGetLocalSize_Fast(ll,s1);
1242e3372554SBarry Smith     if (s1!=s2) SETERRQ(1,0,"left vector non-conforming local size");
1243c351683dSSatish Balay     ierr = (*b->ops.diagonalscale)(b,ll,0); CHKERRQ(ierr);
12444b967eb1SSatish Balay   }
12454b967eb1SSatish Balay   /* scale  the diagonal block */
1246c351683dSSatish Balay   ierr = (*a->ops.diagonalscale)(a,ll,rr); CHKERRQ(ierr);
12474b967eb1SSatish Balay 
12484b967eb1SSatish Balay   if (rr) {
12494b967eb1SSatish Balay     /* Do a scatter end and then right scale the off-diagonal block */
125043a90d84SBarry Smith     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr);
1251c351683dSSatish Balay     ierr = (*b->ops.diagonalscale)(b,0,aij->lvec); CHKERRQ(ierr);
12524b967eb1SSatish Balay   }
12534b967eb1SSatish Balay 
1254a008b906SSatish Balay   return 0;
1255a008b906SSatish Balay }
1256a008b906SSatish Balay 
1257a008b906SSatish Balay 
1258682d7d0cSBarry Smith extern int MatPrintHelp_SeqAIJ(Mat);
1259e0f21b7aSSatish Balay #undef __FUNCTION__
1260e0f21b7aSSatish Balay #define __FUNCTION__ "MatPrintHelp_MPIAIJ"
1261682d7d0cSBarry Smith static int MatPrintHelp_MPIAIJ(Mat A)
1262682d7d0cSBarry Smith {
1263682d7d0cSBarry Smith   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1264682d7d0cSBarry Smith 
1265682d7d0cSBarry Smith   if (!a->rank) return MatPrintHelp_SeqAIJ(a->A);
1266682d7d0cSBarry Smith   else return 0;
1267682d7d0cSBarry Smith }
1268682d7d0cSBarry Smith 
1269e0f21b7aSSatish Balay #undef __FUNCTION__
1270e0f21b7aSSatish Balay #define __FUNCTION__ "MatGetBlockSize_MPIAIJ"
12715a838052SSatish Balay static int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
12725a838052SSatish Balay {
12735a838052SSatish Balay   *bs = 1;
12745a838052SSatish Balay   return 0;
12755a838052SSatish Balay }
1276*bb5a7306SBarry Smith #undef __FUNCTION__
1277*bb5a7306SBarry Smith #define __FUNCTION__ "MatSetUnfactored_MPIAIJ"
1278*bb5a7306SBarry Smith static int MatSetUnfactored_MPIAIJ(Mat A)
1279*bb5a7306SBarry Smith {
1280*bb5a7306SBarry Smith   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1281*bb5a7306SBarry Smith   int        ierr;
1282*bb5a7306SBarry Smith   ierr = MatSetUnfactored(a->A); CHKERRQ(ierr);
1283*bb5a7306SBarry Smith   return 0;
1284*bb5a7306SBarry Smith }
1285*bb5a7306SBarry Smith 
12865a838052SSatish Balay 
1287fc76ce87SLois Curfman McInnes extern int MatConvert_MPIAIJ(Mat,MatType,Mat *);
12883d1612f7SBarry Smith static int MatConvertSameType_MPIAIJ(Mat,Mat *,int);
12892f86bd48SSatish Balay extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int);
12900a198c4cSBarry Smith extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring);
12910a198c4cSBarry Smith extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatGetSubMatrixCall,Mat **);
12928a729477SBarry Smith /* -------------------------------------------------------------------*/
12932ee70a88SLois Curfman McInnes static struct _MatOps MatOps = {MatSetValues_MPIAIJ,
129439e00950SLois Curfman McInnes        MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ,
129544a69424SLois Curfman McInnes        MatMult_MPIAIJ,MatMultAdd_MPIAIJ,
129644a69424SLois Curfman McInnes        MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ,
129736ce4990SBarry Smith        0,0,
129836ce4990SBarry Smith        0,0,
129936ce4990SBarry Smith        0,0,
130044a69424SLois Curfman McInnes        MatRelax_MPIAIJ,
1301b7c46309SBarry Smith        MatTranspose_MPIAIJ,
1302a66be287SLois Curfman McInnes        MatGetInfo_MPIAIJ,0,
1303a008b906SSatish Balay        MatGetDiagonal_MPIAIJ,MatDiagonalScale_MPIAIJ,MatNorm_MPIAIJ,
1304ee50ffe9SBarry Smith        MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ,
13051eb62cbbSBarry Smith        0,
1306299609e3SLois Curfman McInnes        MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ,
130736ce4990SBarry Smith        0,0,0,0,
1308d1710a03SLois Curfman McInnes        MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ,
130936ce4990SBarry Smith        0,0,
13103e85bedfSBarry Smith        0,0,MatConvert_MPIAIJ,MatConvertSameType_MPIAIJ,0,0,
1311b49de8d1SLois Curfman McInnes        0,0,0,
1312598137ffSSatish Balay        MatGetSubMatrices_MPIAIJ,MatIncreaseOverlap_MPIAIJ,MatGetValues_MPIAIJ,0,
1313052efed2SBarry Smith        MatPrintHelp_MPIAIJ,
13143b2fbd54SBarry Smith        MatScale_MPIAIJ,0,0,0,
13150a198c4cSBarry Smith        MatGetBlockSize_MPIAIJ,0,0,0,0,
1316*bb5a7306SBarry Smith        MatFDColoringCreate_MPIAIJ,0,MatSetUnfactored_MPIAIJ};
131736ce4990SBarry Smith 
13188a729477SBarry Smith 
1319e0f21b7aSSatish Balay #undef __FUNCTION__
1320e0f21b7aSSatish Balay #define __FUNCTION__ "MatCreateMPIAIJ"
13211987afe7SBarry Smith /*@C
1322ff756334SLois Curfman McInnes    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
13233a511b96SLois Curfman McInnes    (the default parallel PETSc format).  For good matrix assembly performance
13243a511b96SLois Curfman McInnes    the user should preallocate the matrix storage by setting the parameters
13253a511b96SLois Curfman McInnes    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
13263a511b96SLois Curfman McInnes    performance can be increased by more than a factor of 50.
13278a729477SBarry Smith 
13288a729477SBarry Smith    Input Parameters:
13291eb62cbbSBarry Smith .  comm - MPI communicator
13307d3e4905SLois Curfman McInnes .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
133192e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
133292e8d321SLois Curfman McInnes            y vector for the matrix-vector product y = Ax.
133392e8d321SLois Curfman McInnes .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
133492e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
133592e8d321SLois Curfman McInnes            x vector for the matrix-vector product y = Ax.
13367d3e4905SLois Curfman McInnes .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
133792e8d321SLois Curfman McInnes .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1338ab693e5aSLois Curfman McInnes .  d_nz - number of nonzeros per row in diagonal portion of local submatrix
1339ff756334SLois Curfman McInnes            (same for all local rows)
13402bd5e0b2SLois Curfman McInnes .  d_nzz - array containing the number of nonzeros in the various rows of the
134192e8d321SLois Curfman McInnes            diagonal portion of the local submatrix (possibly different for each row)
13422bd5e0b2SLois Curfman McInnes            or PETSC_NULL. You must leave room for the diagonal entry even if
13432bd5e0b2SLois Curfman McInnes            it is zero.
13442bd5e0b2SLois Curfman McInnes .  o_nz - number of nonzeros per row in the off-diagonal portion of local
1345ab693e5aSLois Curfman McInnes            submatrix (same for all local rows).
13462bd5e0b2SLois Curfman McInnes .  o_nzz - array containing the number of nonzeros in the various rows of the
13472bd5e0b2SLois Curfman McInnes            off-diagonal portion of the local submatrix (possibly different for
13482bd5e0b2SLois Curfman McInnes            each row) or PETSC_NULL.
13498a729477SBarry Smith 
1350ff756334SLois Curfman McInnes    Output Parameter:
135144cd7ae7SLois Curfman McInnes .  A - the matrix
13528a729477SBarry Smith 
1353ff756334SLois Curfman McInnes    Notes:
1354ff756334SLois Curfman McInnes    The AIJ format (also called the Yale sparse matrix format or
1355ff756334SLois Curfman McInnes    compressed row storage), is fully compatible with standard Fortran 77
13560002213bSLois Curfman McInnes    storage.  That is, the stored row and column indices can begin at
13570002213bSLois Curfman McInnes    either one (as in Fortran) or zero.  See the users manual for details.
1358ff756334SLois Curfman McInnes 
1359ff756334SLois Curfman McInnes    The user MUST specify either the local or global matrix dimensions
1360ff756334SLois Curfman McInnes    (possibly both).
1361ff756334SLois Curfman McInnes 
13625511cfe3SLois Curfman McInnes    By default, this format uses inodes (identical nodes) when possible.
13635511cfe3SLois Curfman McInnes    We search for consecutive rows with the same nonzero structure, thereby
13645511cfe3SLois Curfman McInnes    reusing matrix information to achieve increased efficiency.
13655511cfe3SLois Curfman McInnes 
13665511cfe3SLois Curfman McInnes    Options Database Keys:
13675511cfe3SLois Curfman McInnes $    -mat_aij_no_inode  - Do not use inodes
13686e62573dSLois Curfman McInnes $    -mat_aij_inode_limit <limit> - Set inode limit.
13696e62573dSLois Curfman McInnes $        (max limit=5)
13706e62573dSLois Curfman McInnes $    -mat_aij_oneindex - Internally use indexing starting at 1
13716e62573dSLois Curfman McInnes $        rather than 0.  Note: When calling MatSetValues(),
13726e62573dSLois Curfman McInnes $        the user still MUST index entries starting at 0!
13735511cfe3SLois Curfman McInnes 
1374e0245417SLois Curfman McInnes    Storage Information:
1375e0245417SLois Curfman McInnes    For a square global matrix we define each processor's diagonal portion
1376e0245417SLois Curfman McInnes    to be its local rows and the corresponding columns (a square submatrix);
1377e0245417SLois Curfman McInnes    each processor's off-diagonal portion encompasses the remainder of the
1378e0245417SLois Curfman McInnes    local matrix (a rectangular submatrix).
1379e0245417SLois Curfman McInnes 
1380e0245417SLois Curfman McInnes    The user can specify preallocated storage for the diagonal part of
13815ace5be8SLois Curfman McInnes    the local submatrix with either d_nz or d_nnz (not both).  Set
13825ace5be8SLois Curfman McInnes    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
13835ace5be8SLois Curfman McInnes    memory allocation.  Likewise, specify preallocated storage for the
13845ace5be8SLois Curfman McInnes    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1385ff756334SLois Curfman McInnes 
13865511cfe3SLois Curfman McInnes    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
13875511cfe3SLois Curfman McInnes    the figure below we depict these three local rows and all columns (0-11).
13882191d07cSBarry Smith 
1389b810aeb4SBarry Smith $          0 1 2 3 4 5 6 7 8 9 10 11
1390b810aeb4SBarry Smith $         -------------------
13915511cfe3SLois Curfman McInnes $  row 3  |  o o o d d d o o o o o o
13925511cfe3SLois Curfman McInnes $  row 4  |  o o o d d d o o o o o o
13935511cfe3SLois Curfman McInnes $  row 5  |  o o o d d d o o o o o o
13945511cfe3SLois Curfman McInnes $         -------------------
1395b810aeb4SBarry Smith $
1396b810aeb4SBarry Smith 
13975511cfe3SLois Curfman McInnes    Thus, any entries in the d locations are stored in the d (diagonal)
13985511cfe3SLois Curfman McInnes    submatrix, and any entries in the o locations are stored in the
13995511cfe3SLois Curfman McInnes    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
14005511cfe3SLois Curfman McInnes    stored simply in the MATSEQAIJ format for compressed row storage.
14015511cfe3SLois Curfman McInnes 
14025511cfe3SLois Curfman McInnes    Now d_nz should indicate the number of nonzeros per row in the d matrix,
14035511cfe3SLois Curfman McInnes    and o_nz should indicate the number of nonzeros per row in the o matrix.
14045511cfe3SLois Curfman McInnes    In general, for PDE problems in which most nonzeros are near the diagonal,
14053d323bbdSBarry Smith    one expects d_nz >> o_nz. For large problems you MUST preallocate memory
140692e8d321SLois Curfman McInnes    or you will get TERRIBLE performance; see the users' manual chapter on
14076da5968aSLois Curfman McInnes    matrices.
14083a511b96SLois Curfman McInnes 
1409dbd7a890SLois Curfman McInnes .keywords: matrix, aij, compressed row, sparse, parallel
1410ff756334SLois Curfman McInnes 
1411fafbff53SBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
14128a729477SBarry Smith @*/
14131eb62cbbSBarry Smith int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,
141444cd7ae7SLois Curfman McInnes                     int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
14158a729477SBarry Smith {
141644cd7ae7SLois Curfman McInnes   Mat          B;
141744cd7ae7SLois Curfman McInnes   Mat_MPIAIJ   *b;
141836ce4990SBarry Smith   int          ierr, i,sum[2],work[2],size;
1419416022c9SBarry Smith 
142044cd7ae7SLois Curfman McInnes   *A = 0;
142144cd7ae7SLois Curfman McInnes   PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATMPIAIJ,comm);
142244cd7ae7SLois Curfman McInnes   PLogObjectCreate(B);
142344cd7ae7SLois Curfman McInnes   B->data       = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b);
142444cd7ae7SLois Curfman McInnes   PetscMemzero(b,sizeof(Mat_MPIAIJ));
142544cd7ae7SLois Curfman McInnes   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
142636ce4990SBarry Smith   MPI_Comm_size(comm,&size);
142736ce4990SBarry Smith   if (size == 1) {
142836ce4990SBarry Smith     B->ops.getrowij          = MatGetRowIJ_MPIAIJ;
142936ce4990SBarry Smith     B->ops.restorerowij      = MatRestoreRowIJ_MPIAIJ;
143036ce4990SBarry Smith     B->ops.lufactorsymbolic  = MatLUFactorSymbolic_MPIAIJ;
143136ce4990SBarry Smith     B->ops.lufactornumeric   = MatLUFactorNumeric_MPIAIJ;
143236ce4990SBarry Smith     B->ops.lufactor          = MatLUFactor_MPIAIJ;
143336ce4990SBarry Smith     B->ops.solve             = MatSolve_MPIAIJ;
143436ce4990SBarry Smith     B->ops.solveadd          = MatSolveAdd_MPIAIJ;
143536ce4990SBarry Smith     B->ops.solvetrans        = MatSolveTrans_MPIAIJ;
143636ce4990SBarry Smith     B->ops.solvetransadd     = MatSolveTransAdd_MPIAIJ;
143736ce4990SBarry Smith     B->ops.ilufactorsymbolic = MatILUFactorSymbolic_MPIAIJ;
143836ce4990SBarry Smith   }
143944cd7ae7SLois Curfman McInnes   B->destroy    = MatDestroy_MPIAIJ;
144044cd7ae7SLois Curfman McInnes   B->view       = MatView_MPIAIJ;
144144cd7ae7SLois Curfman McInnes   B->factor     = 0;
144244cd7ae7SLois Curfman McInnes   B->assembled  = PETSC_FALSE;
144390f02eecSBarry Smith   B->mapping    = 0;
1444d6dfbf8fSBarry Smith 
144544cd7ae7SLois Curfman McInnes   b->insertmode = NOT_SET_VALUES;
144644cd7ae7SLois Curfman McInnes   MPI_Comm_rank(comm,&b->rank);
144744cd7ae7SLois Curfman McInnes   MPI_Comm_size(comm,&b->size);
14481eb62cbbSBarry Smith 
1449b4fd4287SBarry Smith   if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL))
1450e3372554SBarry Smith     SETERRQ(1,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
14511987afe7SBarry Smith 
1452dbd7a890SLois Curfman McInnes   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
14531eb62cbbSBarry Smith     work[0] = m; work[1] = n;
1454d65a2f8fSBarry Smith     MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );
1455dbd7a890SLois Curfman McInnes     if (M == PETSC_DECIDE) M = sum[0];
1456dbd7a890SLois Curfman McInnes     if (N == PETSC_DECIDE) N = sum[1];
14571eb62cbbSBarry Smith   }
145844cd7ae7SLois Curfman McInnes   if (m == PETSC_DECIDE) {m = M/b->size + ((M % b->size) > b->rank);}
145944cd7ae7SLois Curfman McInnes   if (n == PETSC_DECIDE) {n = N/b->size + ((N % b->size) > b->rank);}
146044cd7ae7SLois Curfman McInnes   b->m = m; B->m = m;
146144cd7ae7SLois Curfman McInnes   b->n = n; B->n = n;
146244cd7ae7SLois Curfman McInnes   b->N = N; B->N = N;
146344cd7ae7SLois Curfman McInnes   b->M = M; B->M = M;
14641eb62cbbSBarry Smith 
14651eb62cbbSBarry Smith   /* build local table of row and column ownerships */
146644cd7ae7SLois Curfman McInnes   b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners);
146744cd7ae7SLois Curfman McInnes   PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ));
1468603f58a4SSatish Balay   b->cowners = b->rowners + b->size + 2;
146944cd7ae7SLois Curfman McInnes   MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);
147044cd7ae7SLois Curfman McInnes   b->rowners[0] = 0;
147144cd7ae7SLois Curfman McInnes   for ( i=2; i<=b->size; i++ ) {
147244cd7ae7SLois Curfman McInnes     b->rowners[i] += b->rowners[i-1];
14738a729477SBarry Smith   }
147444cd7ae7SLois Curfman McInnes   b->rstart = b->rowners[b->rank];
147544cd7ae7SLois Curfman McInnes   b->rend   = b->rowners[b->rank+1];
147644cd7ae7SLois Curfman McInnes   MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);
147744cd7ae7SLois Curfman McInnes   b->cowners[0] = 0;
147844cd7ae7SLois Curfman McInnes   for ( i=2; i<=b->size; i++ ) {
147944cd7ae7SLois Curfman McInnes     b->cowners[i] += b->cowners[i-1];
14808a729477SBarry Smith   }
148144cd7ae7SLois Curfman McInnes   b->cstart = b->cowners[b->rank];
148244cd7ae7SLois Curfman McInnes   b->cend   = b->cowners[b->rank+1];
14838a729477SBarry Smith 
14845ace5be8SLois Curfman McInnes   if (d_nz == PETSC_DEFAULT) d_nz = 5;
148544cd7ae7SLois Curfman McInnes   ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr);
148644cd7ae7SLois Curfman McInnes   PLogObjectParent(B,b->A);
14877b8455f0SLois Curfman McInnes   if (o_nz == PETSC_DEFAULT) o_nz = 0;
148844cd7ae7SLois Curfman McInnes   ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr);
148944cd7ae7SLois Curfman McInnes   PLogObjectParent(B,b->B);
14908a729477SBarry Smith 
14911eb62cbbSBarry Smith   /* build cache for off array entries formed */
149244cd7ae7SLois Curfman McInnes   ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr);
149390f02eecSBarry Smith   b->donotstash  = 0;
149444cd7ae7SLois Curfman McInnes   b->colmap      = 0;
149544cd7ae7SLois Curfman McInnes   b->garray      = 0;
149644cd7ae7SLois Curfman McInnes   b->roworiented = 1;
14978a729477SBarry Smith 
14981eb62cbbSBarry Smith   /* stuff used for matrix vector multiply */
149944cd7ae7SLois Curfman McInnes   b->lvec      = 0;
150044cd7ae7SLois Curfman McInnes   b->Mvctx     = 0;
15018a729477SBarry Smith 
15027a0afa10SBarry Smith   /* stuff for MatGetRow() */
150344cd7ae7SLois Curfman McInnes   b->rowindices   = 0;
150444cd7ae7SLois Curfman McInnes   b->rowvalues    = 0;
150544cd7ae7SLois Curfman McInnes   b->getrowactive = PETSC_FALSE;
15067a0afa10SBarry Smith 
150744cd7ae7SLois Curfman McInnes   *A = B;
1508d6dfbf8fSBarry Smith   return 0;
1509d6dfbf8fSBarry Smith }
1510c74985f6SBarry Smith 
1511e0f21b7aSSatish Balay #undef __FUNCTION__
1512e0f21b7aSSatish Balay #define __FUNCTION__ "MatConvertSameType_MPIAIJ"
15133d1612f7SBarry Smith static int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues)
1514d6dfbf8fSBarry Smith {
1515d6dfbf8fSBarry Smith   Mat        mat;
1516416022c9SBarry Smith   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data;
1517a1b97e82SLois Curfman McInnes   int        ierr, len=0, flg;
1518d6dfbf8fSBarry Smith 
1519416022c9SBarry Smith   *newmat       = 0;
15200452661fSBarry Smith   PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm);
1521a5a9c739SBarry Smith   PLogObjectCreate(mat);
15220452661fSBarry Smith   mat->data       = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a);
1523416022c9SBarry Smith   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
152444a69424SLois Curfman McInnes   mat->destroy    = MatDestroy_MPIAIJ;
152544a69424SLois Curfman McInnes   mat->view       = MatView_MPIAIJ;
1526d6dfbf8fSBarry Smith   mat->factor     = matin->factor;
1527c456f294SBarry Smith   mat->assembled  = PETSC_TRUE;
1528d6dfbf8fSBarry Smith 
152944cd7ae7SLois Curfman McInnes   a->m = mat->m   = oldmat->m;
153044cd7ae7SLois Curfman McInnes   a->n = mat->n   = oldmat->n;
153144cd7ae7SLois Curfman McInnes   a->M = mat->M   = oldmat->M;
153244cd7ae7SLois Curfman McInnes   a->N = mat->N   = oldmat->N;
1533d6dfbf8fSBarry Smith 
1534416022c9SBarry Smith   a->rstart       = oldmat->rstart;
1535416022c9SBarry Smith   a->rend         = oldmat->rend;
1536416022c9SBarry Smith   a->cstart       = oldmat->cstart;
1537416022c9SBarry Smith   a->cend         = oldmat->cend;
153817699dbbSLois Curfman McInnes   a->size         = oldmat->size;
153917699dbbSLois Curfman McInnes   a->rank         = oldmat->rank;
154064119d58SLois Curfman McInnes   a->insertmode   = NOT_SET_VALUES;
1541bcd2baecSBarry Smith   a->rowvalues    = 0;
1542bcd2baecSBarry Smith   a->getrowactive = PETSC_FALSE;
1543d6dfbf8fSBarry Smith 
1544603f58a4SSatish Balay   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
1545603f58a4SSatish Balay   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ));
1546603f58a4SSatish Balay   a->cowners = a->rowners + a->size + 2;
1547603f58a4SSatish Balay   PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));
1548416022c9SBarry Smith   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
15492ee70a88SLois Curfman McInnes   if (oldmat->colmap) {
15500452661fSBarry Smith     a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap);
1551416022c9SBarry Smith     PLogObjectMemory(mat,(a->N)*sizeof(int));
1552416022c9SBarry Smith     PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int));
1553416022c9SBarry Smith   } else a->colmap = 0;
15543f41c07dSBarry Smith   if (oldmat->garray) {
15553f41c07dSBarry Smith     len = ((Mat_SeqAIJ *) (oldmat->B->data))->n;
15563f41c07dSBarry Smith     a->garray = (int *) PetscMalloc((len+1)*sizeof(int)); CHKPTRQ(a->garray);
1557464493b3SBarry Smith     PLogObjectMemory(mat,len*sizeof(int));
15583f41c07dSBarry Smith     if (len) PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
1559416022c9SBarry Smith   } else a->garray = 0;
1560d6dfbf8fSBarry Smith 
1561416022c9SBarry Smith   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
1562416022c9SBarry Smith   PLogObjectParent(mat,a->lvec);
1563a56f8943SBarry Smith   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
1564416022c9SBarry Smith   PLogObjectParent(mat,a->Mvctx);
1565416022c9SBarry Smith   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
1566416022c9SBarry Smith   PLogObjectParent(mat,a->A);
1567416022c9SBarry Smith   ierr =  MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr);
1568416022c9SBarry Smith   PLogObjectParent(mat,a->B);
15695dd7a6c7SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
157025cdf11fSBarry Smith   if (flg) {
1571682d7d0cSBarry Smith     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
1572682d7d0cSBarry Smith   }
15738a729477SBarry Smith   *newmat = mat;
15748a729477SBarry Smith   return 0;
15758a729477SBarry Smith }
1576416022c9SBarry Smith 
157777c4ece6SBarry Smith #include "sys.h"
1578416022c9SBarry Smith 
1579e0f21b7aSSatish Balay #undef __FUNCTION__
1580e0f21b7aSSatish Balay #define __FUNCTION__ "MatLoad_MPIAIJ"
158119bcc07fSBarry Smith int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat)
1582416022c9SBarry Smith {
1583d65a2f8fSBarry Smith   Mat          A;
1584416022c9SBarry Smith   int          i, nz, ierr, j,rstart, rend, fd;
1585d65a2f8fSBarry Smith   Scalar       *vals,*svals;
158619bcc07fSBarry Smith   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1587416022c9SBarry Smith   MPI_Status   status;
158817699dbbSLois Curfman McInnes   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1589d65a2f8fSBarry Smith   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
159019bcc07fSBarry Smith   int          tag = ((PetscObject)viewer)->tag;
1591416022c9SBarry Smith 
159217699dbbSLois Curfman McInnes   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
159317699dbbSLois Curfman McInnes   if (!rank) {
159490ace30eSBarry Smith     ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
159577c4ece6SBarry Smith     ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr);
1596e3372554SBarry Smith     if (header[0] != MAT_COOKIE) SETERRQ(1,0,"not matrix object");
1597416022c9SBarry Smith   }
1598416022c9SBarry Smith 
1599416022c9SBarry Smith   MPI_Bcast(header+1,3,MPI_INT,0,comm);
1600416022c9SBarry Smith   M = header[1]; N = header[2];
1601416022c9SBarry Smith   /* determine ownership of all rows */
160217699dbbSLois Curfman McInnes   m = M/size + ((M % size) > rank);
16030452661fSBarry Smith   rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners);
1604416022c9SBarry Smith   MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1605416022c9SBarry Smith   rowners[0] = 0;
160617699dbbSLois Curfman McInnes   for ( i=2; i<=size; i++ ) {
1607416022c9SBarry Smith     rowners[i] += rowners[i-1];
1608416022c9SBarry Smith   }
160917699dbbSLois Curfman McInnes   rstart = rowners[rank];
161017699dbbSLois Curfman McInnes   rend   = rowners[rank+1];
1611416022c9SBarry Smith 
1612416022c9SBarry Smith   /* distribute row lengths to all processors */
16130452661fSBarry Smith   ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens);
1614416022c9SBarry Smith   offlens = ourlens + (rend-rstart);
161517699dbbSLois Curfman McInnes   if (!rank) {
16160452661fSBarry Smith     rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths);
161777c4ece6SBarry Smith     ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
16180452661fSBarry Smith     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
161917699dbbSLois Curfman McInnes     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
1620d65a2f8fSBarry Smith     MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
16210452661fSBarry Smith     PetscFree(sndcounts);
1622416022c9SBarry Smith   }
1623416022c9SBarry Smith   else {
1624416022c9SBarry Smith     MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);
1625416022c9SBarry Smith   }
1626416022c9SBarry Smith 
162717699dbbSLois Curfman McInnes   if (!rank) {
1628416022c9SBarry Smith     /* calculate the number of nonzeros on each processor */
16290452661fSBarry Smith     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
1630cddf8d76SBarry Smith     PetscMemzero(procsnz,size*sizeof(int));
163117699dbbSLois Curfman McInnes     for ( i=0; i<size; i++ ) {
1632416022c9SBarry Smith       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
1633416022c9SBarry Smith         procsnz[i] += rowlengths[j];
1634416022c9SBarry Smith       }
1635416022c9SBarry Smith     }
16360452661fSBarry Smith     PetscFree(rowlengths);
1637416022c9SBarry Smith 
1638416022c9SBarry Smith     /* determine max buffer needed and allocate it */
1639416022c9SBarry Smith     maxnz = 0;
164017699dbbSLois Curfman McInnes     for ( i=0; i<size; i++ ) {
16410452661fSBarry Smith       maxnz = PetscMax(maxnz,procsnz[i]);
1642416022c9SBarry Smith     }
16430452661fSBarry Smith     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
1644416022c9SBarry Smith 
1645416022c9SBarry Smith     /* read in my part of the matrix column indices  */
1646416022c9SBarry Smith     nz = procsnz[0];
16470452661fSBarry Smith     mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
164877c4ece6SBarry Smith     ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr);
1649d65a2f8fSBarry Smith 
1650d65a2f8fSBarry Smith     /* read in every one elses and ship off */
165117699dbbSLois Curfman McInnes     for ( i=1; i<size; i++ ) {
1652d65a2f8fSBarry Smith       nz = procsnz[i];
165377c4ece6SBarry Smith       ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr);
1654d65a2f8fSBarry Smith       MPI_Send(cols,nz,MPI_INT,i,tag,comm);
1655d65a2f8fSBarry Smith     }
16560452661fSBarry Smith     PetscFree(cols);
1657416022c9SBarry Smith   }
1658416022c9SBarry Smith   else {
1659416022c9SBarry Smith     /* determine buffer space needed for message */
1660416022c9SBarry Smith     nz = 0;
1661416022c9SBarry Smith     for ( i=0; i<m; i++ ) {
1662416022c9SBarry Smith       nz += ourlens[i];
1663416022c9SBarry Smith     }
16640452661fSBarry Smith     mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1665416022c9SBarry Smith 
1666416022c9SBarry Smith     /* receive message of column indices*/
1667d65a2f8fSBarry Smith     MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
1668416022c9SBarry Smith     MPI_Get_count(&status,MPI_INT,&maxnz);
1669e3372554SBarry Smith     if (maxnz != nz) SETERRQ(1,0,"something is wrong with file");
1670416022c9SBarry Smith   }
1671416022c9SBarry Smith 
1672416022c9SBarry Smith   /* loop over local rows, determining number of off diagonal entries */
1673cddf8d76SBarry Smith   PetscMemzero(offlens,m*sizeof(int));
1674416022c9SBarry Smith   jj = 0;
1675416022c9SBarry Smith   for ( i=0; i<m; i++ ) {
1676416022c9SBarry Smith     for ( j=0; j<ourlens[i]; j++ ) {
1677d65a2f8fSBarry Smith       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1678416022c9SBarry Smith       jj++;
1679416022c9SBarry Smith     }
1680416022c9SBarry Smith   }
1681d65a2f8fSBarry Smith 
1682d65a2f8fSBarry Smith   /* create our matrix */
1683416022c9SBarry Smith   for ( i=0; i<m; i++ ) {
1684416022c9SBarry Smith     ourlens[i] -= offlens[i];
1685416022c9SBarry Smith   }
1686d65a2f8fSBarry Smith   ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
1687d65a2f8fSBarry Smith   A = *newmat;
16886d4a8577SBarry Smith   MatSetOption(A,MAT_COLUMNS_SORTED);
1689d65a2f8fSBarry Smith   for ( i=0; i<m; i++ ) {
1690d65a2f8fSBarry Smith     ourlens[i] += offlens[i];
1691d65a2f8fSBarry Smith   }
1692416022c9SBarry Smith 
169317699dbbSLois Curfman McInnes   if (!rank) {
16940452661fSBarry Smith     vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals);
1695416022c9SBarry Smith 
1696416022c9SBarry Smith     /* read in my part of the matrix numerical values  */
1697416022c9SBarry Smith     nz = procsnz[0];
169877c4ece6SBarry Smith     ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
1699d65a2f8fSBarry Smith 
1700d65a2f8fSBarry Smith     /* insert into matrix */
1701d65a2f8fSBarry Smith     jj      = rstart;
1702d65a2f8fSBarry Smith     smycols = mycols;
1703d65a2f8fSBarry Smith     svals   = vals;
1704d65a2f8fSBarry Smith     for ( i=0; i<m; i++ ) {
1705d65a2f8fSBarry Smith       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1706d65a2f8fSBarry Smith       smycols += ourlens[i];
1707d65a2f8fSBarry Smith       svals   += ourlens[i];
1708d65a2f8fSBarry Smith       jj++;
1709416022c9SBarry Smith     }
1710416022c9SBarry Smith 
1711d65a2f8fSBarry Smith     /* read in other processors and ship out */
171217699dbbSLois Curfman McInnes     for ( i=1; i<size; i++ ) {
1713416022c9SBarry Smith       nz = procsnz[i];
171477c4ece6SBarry Smith       ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
1715d65a2f8fSBarry Smith       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
1716416022c9SBarry Smith     }
17170452661fSBarry Smith     PetscFree(procsnz);
1718416022c9SBarry Smith   }
1719d65a2f8fSBarry Smith   else {
1720d65a2f8fSBarry Smith     /* receive numeric values */
17210452661fSBarry Smith     vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals);
1722416022c9SBarry Smith 
1723d65a2f8fSBarry Smith     /* receive message of values*/
1724d65a2f8fSBarry Smith     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
1725d65a2f8fSBarry Smith     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1726e3372554SBarry Smith     if (maxnz != nz) SETERRQ(1,0,"something is wrong with file");
1727d65a2f8fSBarry Smith 
1728d65a2f8fSBarry Smith     /* insert into matrix */
1729d65a2f8fSBarry Smith     jj      = rstart;
1730d65a2f8fSBarry Smith     smycols = mycols;
1731d65a2f8fSBarry Smith     svals   = vals;
1732d65a2f8fSBarry Smith     for ( i=0; i<m; i++ ) {
1733d65a2f8fSBarry Smith       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1734d65a2f8fSBarry Smith       smycols += ourlens[i];
1735d65a2f8fSBarry Smith       svals   += ourlens[i];
1736d65a2f8fSBarry Smith       jj++;
1737d65a2f8fSBarry Smith     }
1738d65a2f8fSBarry Smith   }
17390452661fSBarry Smith   PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners);
1740d65a2f8fSBarry Smith 
17416d4a8577SBarry Smith   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
17426d4a8577SBarry Smith   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1743416022c9SBarry Smith   return 0;
1744416022c9SBarry Smith }
1745