xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 0a198c4c79a1f783fe701dd7a3eec31dea3d09ee)
1905e6a2fSBarry Smith 
2cb512458SBarry Smith #ifndef lint
3*0a198c4cSBarry Smith static char vcid[] = "$Id: mpiaij.c,v 1.170 1996/09/24 20:14:40 bsmith Exp bsmith $";
4cb512458SBarry Smith #endif
58a729477SBarry Smith 
670f55243SBarry Smith #include "src/mat/impls/aij/mpi/mpiaij.h"
7f5eb4b81SSatish Balay #include "src/vec/vecimpl.h"
8d9942c19SSatish Balay #include "src/inline/spops.h"
98a729477SBarry Smith 
109e25ed09SBarry Smith /* local utility routine that creates a mapping from the global column
119e25ed09SBarry Smith number to the local number in the off-diagonal part of the local
129e25ed09SBarry Smith storage of the matrix.  This is done in a non scable way since the
139e25ed09SBarry Smith length of colmap equals the global matrix length.
149e25ed09SBarry Smith */
15*0a198c4cSBarry Smith int CreateColmap_MPIAIJ_Private(Mat mat)
169e25ed09SBarry Smith {
1744a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
18ec8511deSBarry Smith   Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data;
19905e6a2fSBarry Smith   int        n = B->n,i;
20dbb450caSBarry Smith 
210452661fSBarry Smith   aij->colmap = (int *) PetscMalloc(aij->N*sizeof(int));CHKPTRQ(aij->colmap);
22464493b3SBarry Smith   PLogObjectMemory(mat,aij->N*sizeof(int));
23cddf8d76SBarry Smith   PetscMemzero(aij->colmap,aij->N*sizeof(int));
24905e6a2fSBarry Smith   for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1;
259e25ed09SBarry Smith   return 0;
269e25ed09SBarry Smith }
279e25ed09SBarry Smith 
282493cbb0SBarry Smith extern int DisAssemble_MPIAIJ(Mat);
292493cbb0SBarry Smith 
303b2fbd54SBarry Smith static int MatGetRowIJ_MPIAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja,
313b2fbd54SBarry Smith                            PetscTruth *done)
32299609e3SLois Curfman McInnes {
33299609e3SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
34299609e3SLois Curfman McInnes   int        ierr;
3517699dbbSLois Curfman McInnes   if (aij->size == 1) {
363b2fbd54SBarry Smith     ierr = MatGetRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr);
373b2fbd54SBarry Smith   } else SETERRQ(1,"MatGetRowIJ_MPIAIJ:not supported in parallel");
383b2fbd54SBarry Smith   return 0;
393b2fbd54SBarry Smith }
403b2fbd54SBarry Smith 
413b2fbd54SBarry Smith static int MatRestoreRowIJ_MPIAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja,
423b2fbd54SBarry Smith                                PetscTruth *done)
433b2fbd54SBarry Smith {
443b2fbd54SBarry Smith   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
453b2fbd54SBarry Smith   int        ierr;
463b2fbd54SBarry Smith   if (aij->size == 1) {
473b2fbd54SBarry Smith     ierr = MatRestoreRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr);
483b2fbd54SBarry Smith   } else SETERRQ(1,"MatRestoreRowIJ_MPIAIJ:not supported in parallel");
49299609e3SLois Curfman McInnes   return 0;
50299609e3SLois Curfman McInnes }
51299609e3SLois Curfman McInnes 
52*0a198c4cSBarry Smith extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
53*0a198c4cSBarry Smith 
544b0e389bSBarry Smith static int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
558a729477SBarry Smith {
5644a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
574b0e389bSBarry Smith   Scalar     value;
581eb62cbbSBarry Smith   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
591eb62cbbSBarry Smith   int        cstart = aij->cstart, cend = aij->cend,row,col;
60905e6a2fSBarry Smith   int        roworiented = aij->roworiented;
618a729477SBarry Smith 
62*0a198c4cSBarry Smith #if defined(PETSC_BOPT_g)
6364119d58SLois Curfman McInnes   if (aij->insertmode != NOT_SET_VALUES && aij->insertmode != addv) {
6448d91487SBarry Smith     SETERRQ(1,"MatSetValues_MPIAIJ:Cannot mix inserts and adds");
658a729477SBarry Smith   }
66*0a198c4cSBarry Smith #endif
671eb62cbbSBarry Smith   aij->insertmode = addv;
688a729477SBarry Smith   for ( i=0; i<m; i++ ) {
69*0a198c4cSBarry Smith #if defined(PETSC_BOPT_g)
704b0e389bSBarry Smith     if (im[i] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative row");
714b0e389bSBarry Smith     if (im[i] >= aij->M) SETERRQ(1,"MatSetValues_MPIAIJ:Row too large");
72*0a198c4cSBarry Smith #endif
734b0e389bSBarry Smith     if (im[i] >= rstart && im[i] < rend) {
744b0e389bSBarry Smith       row = im[i] - rstart;
751eb62cbbSBarry Smith       for ( j=0; j<n; j++ ) {
764b0e389bSBarry Smith         if (in[j] >= cstart && in[j] < cend){
774b0e389bSBarry Smith           col = in[j] - cstart;
784b0e389bSBarry Smith           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
79*0a198c4cSBarry Smith           ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr);
801eb62cbbSBarry Smith         }
81*0a198c4cSBarry Smith #if defined(PETSC_BOPT_g)
82*0a198c4cSBarry Smith         else if (in[j] < 0) {SETERRQ(1,"MatSetValues_MPIAIJ:Negative column");}
83*0a198c4cSBarry Smith         else if (in[j] >= aij->N) {SETERRQ(1,"MatSetValues_MPIAIJ:Col too large");}
84*0a198c4cSBarry Smith #endif
851eb62cbbSBarry Smith         else {
86227d817aSBarry Smith           if (mat->was_assembled) {
87905e6a2fSBarry Smith             if (!aij->colmap) {
88905e6a2fSBarry Smith               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
89905e6a2fSBarry Smith             }
90905e6a2fSBarry Smith             col = aij->colmap[in[j]] - 1;
91ec8511deSBarry Smith             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
922493cbb0SBarry Smith               ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
934b0e389bSBarry Smith               col =  in[j];
94d6dfbf8fSBarry Smith             }
959e25ed09SBarry Smith           }
964b0e389bSBarry Smith           else col = in[j];
974b0e389bSBarry Smith           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
98*0a198c4cSBarry Smith           ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr);
991eb62cbbSBarry Smith         }
1001eb62cbbSBarry Smith       }
1011eb62cbbSBarry Smith     }
1021eb62cbbSBarry Smith     else {
1034b0e389bSBarry Smith       if (roworiented) {
1044b0e389bSBarry Smith         ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr);
1054b0e389bSBarry Smith       }
1064b0e389bSBarry Smith       else {
1074b0e389bSBarry Smith         row = im[i];
1084b0e389bSBarry Smith         for ( j=0; j<n; j++ ) {
1094b0e389bSBarry Smith           ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr);
1104b0e389bSBarry Smith         }
1114b0e389bSBarry Smith       }
1121eb62cbbSBarry Smith     }
1138a729477SBarry Smith   }
1148a729477SBarry Smith   return 0;
1158a729477SBarry Smith }
1168a729477SBarry Smith 
117b49de8d1SLois Curfman McInnes static int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
118b49de8d1SLois Curfman McInnes {
119b49de8d1SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
120b49de8d1SLois Curfman McInnes   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
121b49de8d1SLois Curfman McInnes   int        cstart = aij->cstart, cend = aij->cend,row,col;
122b49de8d1SLois Curfman McInnes 
123b49de8d1SLois Curfman McInnes   for ( i=0; i<m; i++ ) {
124b49de8d1SLois Curfman McInnes     if (idxm[i] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative row");
125b49de8d1SLois Curfman McInnes     if (idxm[i] >= aij->M) SETERRQ(1,"MatGetValues_MPIAIJ:Row too large");
126b49de8d1SLois Curfman McInnes     if (idxm[i] >= rstart && idxm[i] < rend) {
127b49de8d1SLois Curfman McInnes       row = idxm[i] - rstart;
128b49de8d1SLois Curfman McInnes       for ( j=0; j<n; j++ ) {
129b49de8d1SLois Curfman McInnes         if (idxn[j] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative column");
130b49de8d1SLois Curfman McInnes         if (idxn[j] >= aij->N) SETERRQ(1,"MatGetValues_MPIAIJ:Col too large");
131b49de8d1SLois Curfman McInnes         if (idxn[j] >= cstart && idxn[j] < cend){
132b49de8d1SLois Curfman McInnes           col = idxn[j] - cstart;
133b49de8d1SLois Curfman McInnes           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
134b49de8d1SLois Curfman McInnes         }
135b49de8d1SLois Curfman McInnes         else {
136905e6a2fSBarry Smith           if (!aij->colmap) {
137905e6a2fSBarry Smith             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
138905e6a2fSBarry Smith           }
139905e6a2fSBarry Smith           col = aij->colmap[idxn[j]] - 1;
140e60e1c95SSatish Balay           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
141d9d09a02SSatish Balay           else {
142b49de8d1SLois Curfman McInnes             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
143b49de8d1SLois Curfman McInnes           }
144b49de8d1SLois Curfman McInnes         }
145b49de8d1SLois Curfman McInnes       }
146d9d09a02SSatish Balay     }
147b49de8d1SLois Curfman McInnes     else {
148b49de8d1SLois Curfman McInnes       SETERRQ(1,"MatGetValues_MPIAIJ:Only local values currently supported");
149b49de8d1SLois Curfman McInnes     }
150b49de8d1SLois Curfman McInnes   }
151b49de8d1SLois Curfman McInnes   return 0;
152b49de8d1SLois Curfman McInnes }
153b49de8d1SLois Curfman McInnes 
154fc76ce87SLois Curfman McInnes static int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
1558a729477SBarry Smith {
15644a69424SLois Curfman McInnes   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
157d6dfbf8fSBarry Smith   MPI_Comm    comm = mat->comm;
15817699dbbSLois Curfman McInnes   int         size = aij->size, *owners = aij->rowners;
15917699dbbSLois Curfman McInnes   int         rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr;
1601eb62cbbSBarry Smith   MPI_Request *send_waits,*recv_waits;
1616abc6512SBarry Smith   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
1621eb62cbbSBarry Smith   InsertMode  addv;
1631eb62cbbSBarry Smith   Scalar      *rvalues,*svalues;
1641eb62cbbSBarry Smith 
1651eb62cbbSBarry Smith   /* make sure all processors are either in INSERTMODE or ADDMODE */
166d65a2f8fSBarry Smith   MPI_Allreduce(&aij->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
167dbb450caSBarry Smith   if (addv == (ADD_VALUES|INSERT_VALUES)) {
168bbb6d6a8SBarry Smith     SETERRQ(1,"MatAssemblyBegin_MPIAIJ:Some processors inserted others added");
1691eb62cbbSBarry Smith   }
1701eb62cbbSBarry Smith   aij->insertmode = addv; /* in case this processor had no cache */
1711eb62cbbSBarry Smith 
1721eb62cbbSBarry Smith   /*  first count number of contributors to each processor */
1730452661fSBarry Smith   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
174cddf8d76SBarry Smith   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
1750452661fSBarry Smith   owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
1761eb62cbbSBarry Smith   for ( i=0; i<aij->stash.n; i++ ) {
1771eb62cbbSBarry Smith     idx = aij->stash.idx[i];
17817699dbbSLois Curfman McInnes     for ( j=0; j<size; j++ ) {
1791eb62cbbSBarry Smith       if (idx >= owners[j] && idx < owners[j+1]) {
1801eb62cbbSBarry Smith         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
1818a729477SBarry Smith       }
1828a729477SBarry Smith     }
1838a729477SBarry Smith   }
18417699dbbSLois Curfman McInnes   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
1851eb62cbbSBarry Smith 
1861eb62cbbSBarry Smith   /* inform other processors of number of messages and max length*/
1870452661fSBarry Smith   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
18817699dbbSLois Curfman McInnes   MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);
18917699dbbSLois Curfman McInnes   nreceives = work[rank];
19017699dbbSLois Curfman McInnes   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
19117699dbbSLois Curfman McInnes   nmax = work[rank];
1920452661fSBarry Smith   PetscFree(work);
1931eb62cbbSBarry Smith 
1941eb62cbbSBarry Smith   /* post receives:
1951eb62cbbSBarry Smith        1) each message will consist of ordered pairs
1961eb62cbbSBarry Smith      (global index,value) we store the global index as a double
197d6dfbf8fSBarry Smith      to simplify the message passing.
1981eb62cbbSBarry Smith        2) since we don't know how long each individual message is we
1991eb62cbbSBarry Smith      allocate the largest needed buffer for each receive. Potentially
2001eb62cbbSBarry Smith      this is a lot of wasted space.
2011eb62cbbSBarry Smith 
2021eb62cbbSBarry Smith 
2031eb62cbbSBarry Smith        This could be done better.
2041eb62cbbSBarry Smith   */
2050452661fSBarry Smith   rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));
20678b31e54SBarry Smith   CHKPTRQ(rvalues);
2070452661fSBarry Smith   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));
20878b31e54SBarry Smith   CHKPTRQ(recv_waits);
2091eb62cbbSBarry Smith   for ( i=0; i<nreceives; i++ ) {
210416022c9SBarry Smith     MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
2111eb62cbbSBarry Smith               comm,recv_waits+i);
2121eb62cbbSBarry Smith   }
2131eb62cbbSBarry Smith 
2141eb62cbbSBarry Smith   /* do sends:
2151eb62cbbSBarry Smith       1) starts[i] gives the starting index in svalues for stuff going to
2161eb62cbbSBarry Smith          the ith processor
2171eb62cbbSBarry Smith   */
2180452661fSBarry Smith   svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
2190452661fSBarry Smith   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
22078b31e54SBarry Smith   CHKPTRQ(send_waits);
2210452661fSBarry Smith   starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
2221eb62cbbSBarry Smith   starts[0] = 0;
22317699dbbSLois Curfman McInnes   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
2241eb62cbbSBarry Smith   for ( i=0; i<aij->stash.n; i++ ) {
2251eb62cbbSBarry Smith     svalues[3*starts[owner[i]]]       = (Scalar)  aij->stash.idx[i];
2261eb62cbbSBarry Smith     svalues[3*starts[owner[i]]+1]     = (Scalar)  aij->stash.idy[i];
2271eb62cbbSBarry Smith     svalues[3*(starts[owner[i]]++)+2] =  aij->stash.array[i];
2281eb62cbbSBarry Smith   }
2290452661fSBarry Smith   PetscFree(owner);
2301eb62cbbSBarry Smith   starts[0] = 0;
23117699dbbSLois Curfman McInnes   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
2321eb62cbbSBarry Smith   count = 0;
23317699dbbSLois Curfman McInnes   for ( i=0; i<size; i++ ) {
2341eb62cbbSBarry Smith     if (procs[i]) {
235416022c9SBarry Smith       MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,
2361eb62cbbSBarry Smith                 comm,send_waits+count++);
2371eb62cbbSBarry Smith     }
2381eb62cbbSBarry Smith   }
2390452661fSBarry Smith   PetscFree(starts); PetscFree(nprocs);
2401eb62cbbSBarry Smith 
2411eb62cbbSBarry Smith   /* Free cache space */
2422191d07cSBarry Smith   PLogInfo(0,"[%d]MatAssemblyBegin_MPIAIJ:Number of off processor values %d\n",rank,aij->stash.n);
24378b31e54SBarry Smith   ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr);
2441eb62cbbSBarry Smith 
2451eb62cbbSBarry Smith   aij->svalues    = svalues;    aij->rvalues    = rvalues;
2461eb62cbbSBarry Smith   aij->nsends     = nsends;     aij->nrecvs     = nreceives;
2471eb62cbbSBarry Smith   aij->send_waits = send_waits; aij->recv_waits = recv_waits;
2481eb62cbbSBarry Smith   aij->rmax       = nmax;
2491eb62cbbSBarry Smith 
2501eb62cbbSBarry Smith   return 0;
2511eb62cbbSBarry Smith }
25244a69424SLois Curfman McInnes extern int MatSetUpMultiply_MPIAIJ(Mat);
2531eb62cbbSBarry Smith 
254fc76ce87SLois Curfman McInnes static int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
2551eb62cbbSBarry Smith {
25644a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
2571eb62cbbSBarry Smith   MPI_Status  *send_status,recv_status;
258416022c9SBarry Smith   int         imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr;
259905e6a2fSBarry Smith   int         row,col,other_disassembled;
2601eb62cbbSBarry Smith   Scalar      *values,val;
2611eb62cbbSBarry Smith   InsertMode  addv = aij->insertmode;
2621eb62cbbSBarry Smith 
2631eb62cbbSBarry Smith   /*  wait on receives */
2641eb62cbbSBarry Smith   while (count) {
265d6dfbf8fSBarry Smith     MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status);
2661eb62cbbSBarry Smith     /* unpack receives into our local space */
267d6dfbf8fSBarry Smith     values = aij->rvalues + 3*imdex*aij->rmax;
268c60448a5SBarry Smith     MPI_Get_count(&recv_status,MPIU_SCALAR,&n);
2691eb62cbbSBarry Smith     n = n/3;
2701eb62cbbSBarry Smith     for ( i=0; i<n; i++ ) {
271227d817aSBarry Smith       row = (int) PetscReal(values[3*i]) - aij->rstart;
272227d817aSBarry Smith       col = (int) PetscReal(values[3*i+1]);
2731eb62cbbSBarry Smith       val = values[3*i+2];
2741eb62cbbSBarry Smith       if (col >= aij->cstart && col < aij->cend) {
2751eb62cbbSBarry Smith         col -= aij->cstart;
2761eb62cbbSBarry Smith         MatSetValues(aij->A,1,&row,1,&col,&val,addv);
2771eb62cbbSBarry Smith       }
2781eb62cbbSBarry Smith       else {
27955a1b374SBarry Smith         if (mat->was_assembled || mat->assembled) {
280905e6a2fSBarry Smith           if (!aij->colmap) {
281905e6a2fSBarry Smith             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
282905e6a2fSBarry Smith           }
283905e6a2fSBarry Smith           col = aij->colmap[col] - 1;
284ec8511deSBarry Smith           if (col < 0  && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
2852493cbb0SBarry Smith             ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
286227d817aSBarry Smith             col = (int) PetscReal(values[3*i+1]);
287d6dfbf8fSBarry Smith           }
2889e25ed09SBarry Smith         }
2891eb62cbbSBarry Smith         MatSetValues(aij->B,1,&row,1,&col,&val,addv);
2901eb62cbbSBarry Smith       }
2911eb62cbbSBarry Smith     }
2921eb62cbbSBarry Smith     count--;
2931eb62cbbSBarry Smith   }
2940452661fSBarry Smith   PetscFree(aij->recv_waits); PetscFree(aij->rvalues);
2951eb62cbbSBarry Smith 
2961eb62cbbSBarry Smith   /* wait on sends */
2971eb62cbbSBarry Smith   if (aij->nsends) {
298*0a198c4cSBarry Smith     send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
2991eb62cbbSBarry Smith     MPI_Waitall(aij->nsends,aij->send_waits,send_status);
3000452661fSBarry Smith     PetscFree(send_status);
3011eb62cbbSBarry Smith   }
3020452661fSBarry Smith   PetscFree(aij->send_waits); PetscFree(aij->svalues);
3031eb62cbbSBarry Smith 
30464119d58SLois Curfman McInnes   aij->insertmode = NOT_SET_VALUES;
30578b31e54SBarry Smith   ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr);
30678b31e54SBarry Smith   ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr);
3071eb62cbbSBarry Smith 
3082493cbb0SBarry Smith   /* determine if any processor has disassembled, if so we must
3092493cbb0SBarry Smith      also disassemble ourselfs, in order that we may reassemble. */
310227d817aSBarry Smith   MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);
311227d817aSBarry Smith   if (mat->was_assembled && !other_disassembled) {
3122493cbb0SBarry Smith     ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
3132493cbb0SBarry Smith   }
3142493cbb0SBarry Smith 
3156d4a8577SBarry Smith   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
31678b31e54SBarry Smith     ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr);
3175e42470aSBarry Smith   }
31878b31e54SBarry Smith   ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr);
31978b31e54SBarry Smith   ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr);
3205e42470aSBarry Smith 
3217a0afa10SBarry Smith   if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;}
3228a729477SBarry Smith   return 0;
3238a729477SBarry Smith }
3248a729477SBarry Smith 
3252ee70a88SLois Curfman McInnes static int MatZeroEntries_MPIAIJ(Mat A)
3261eb62cbbSBarry Smith {
32744a69424SLois Curfman McInnes   Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data;
328dbd7a890SLois Curfman McInnes   int        ierr;
32978b31e54SBarry Smith   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
33078b31e54SBarry Smith   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
3311eb62cbbSBarry Smith   return 0;
3321eb62cbbSBarry Smith }
3331eb62cbbSBarry Smith 
3341eb62cbbSBarry Smith /* the code does not do the diagonal entries correctly unless the
3351eb62cbbSBarry Smith    matrix is square and the column and row owerships are identical.
3361eb62cbbSBarry Smith    This is a BUG. The only way to fix it seems to be to access
3371eb62cbbSBarry Smith    aij->A and aij->B directly and not through the MatZeroRows()
3381eb62cbbSBarry Smith    routine.
3391eb62cbbSBarry Smith */
34044a69424SLois Curfman McInnes static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag)
3411eb62cbbSBarry Smith {
34244a69424SLois Curfman McInnes   Mat_MPIAIJ     *l = (Mat_MPIAIJ *) A->data;
34317699dbbSLois Curfman McInnes   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
3446abc6512SBarry Smith   int            *procs,*nprocs,j,found,idx,nsends,*work;
34517699dbbSLois Curfman McInnes   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
3465392566eSBarry Smith   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
347d6dfbf8fSBarry Smith   int            *lens,imdex,*lrows,*values;
348d6dfbf8fSBarry Smith   MPI_Comm       comm = A->comm;
3491eb62cbbSBarry Smith   MPI_Request    *send_waits,*recv_waits;
3501eb62cbbSBarry Smith   MPI_Status     recv_status,*send_status;
3511eb62cbbSBarry Smith   IS             istmp;
3521eb62cbbSBarry Smith 
35377c4ece6SBarry Smith   ierr = ISGetSize(is,&N); CHKERRQ(ierr);
35478b31e54SBarry Smith   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
3551eb62cbbSBarry Smith 
3561eb62cbbSBarry Smith   /*  first count number of contributors to each processor */
3570452661fSBarry Smith   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
358cddf8d76SBarry Smith   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
3590452661fSBarry Smith   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
3601eb62cbbSBarry Smith   for ( i=0; i<N; i++ ) {
3611eb62cbbSBarry Smith     idx = rows[i];
3621eb62cbbSBarry Smith     found = 0;
36317699dbbSLois Curfman McInnes     for ( j=0; j<size; j++ ) {
3641eb62cbbSBarry Smith       if (idx >= owners[j] && idx < owners[j+1]) {
3651eb62cbbSBarry Smith         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
3661eb62cbbSBarry Smith       }
3671eb62cbbSBarry Smith     }
368bbb6d6a8SBarry Smith     if (!found) SETERRQ(1,"MatZeroRows_MPIAIJ:Index out of range");
3691eb62cbbSBarry Smith   }
37017699dbbSLois Curfman McInnes   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
3711eb62cbbSBarry Smith 
3721eb62cbbSBarry Smith   /* inform other processors of number of messages and max length*/
3730452661fSBarry Smith   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
37417699dbbSLois Curfman McInnes   MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);
37517699dbbSLois Curfman McInnes   nrecvs = work[rank];
37617699dbbSLois Curfman McInnes   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
37717699dbbSLois Curfman McInnes   nmax = work[rank];
3780452661fSBarry Smith   PetscFree(work);
3791eb62cbbSBarry Smith 
3801eb62cbbSBarry Smith   /* post receives:   */
3810452661fSBarry Smith   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */
38278b31e54SBarry Smith   CHKPTRQ(rvalues);
3830452661fSBarry Smith   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));
38478b31e54SBarry Smith   CHKPTRQ(recv_waits);
3851eb62cbbSBarry Smith   for ( i=0; i<nrecvs; i++ ) {
386416022c9SBarry Smith     MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
3871eb62cbbSBarry Smith   }
3881eb62cbbSBarry Smith 
3891eb62cbbSBarry Smith   /* do sends:
3901eb62cbbSBarry Smith       1) starts[i] gives the starting index in svalues for stuff going to
3911eb62cbbSBarry Smith          the ith processor
3921eb62cbbSBarry Smith   */
3930452661fSBarry Smith   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
3940452661fSBarry Smith   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
39578b31e54SBarry Smith   CHKPTRQ(send_waits);
3960452661fSBarry Smith   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
3971eb62cbbSBarry Smith   starts[0] = 0;
39817699dbbSLois Curfman McInnes   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
3991eb62cbbSBarry Smith   for ( i=0; i<N; i++ ) {
4001eb62cbbSBarry Smith     svalues[starts[owner[i]]++] = rows[i];
4011eb62cbbSBarry Smith   }
4021eb62cbbSBarry Smith   ISRestoreIndices(is,&rows);
4031eb62cbbSBarry Smith 
4041eb62cbbSBarry Smith   starts[0] = 0;
40517699dbbSLois Curfman McInnes   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
4061eb62cbbSBarry Smith   count = 0;
40717699dbbSLois Curfman McInnes   for ( i=0; i<size; i++ ) {
4081eb62cbbSBarry Smith     if (procs[i]) {
409416022c9SBarry Smith       MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);
4101eb62cbbSBarry Smith     }
4111eb62cbbSBarry Smith   }
4120452661fSBarry Smith   PetscFree(starts);
4131eb62cbbSBarry Smith 
41417699dbbSLois Curfman McInnes   base = owners[rank];
4151eb62cbbSBarry Smith 
4161eb62cbbSBarry Smith   /*  wait on receives */
4170452661fSBarry Smith   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
4181eb62cbbSBarry Smith   source = lens + nrecvs;
4191eb62cbbSBarry Smith   count  = nrecvs; slen = 0;
4201eb62cbbSBarry Smith   while (count) {
421d6dfbf8fSBarry Smith     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
4221eb62cbbSBarry Smith     /* unpack receives into our local space */
4231eb62cbbSBarry Smith     MPI_Get_count(&recv_status,MPI_INT,&n);
424d6dfbf8fSBarry Smith     source[imdex]  = recv_status.MPI_SOURCE;
425d6dfbf8fSBarry Smith     lens[imdex]  = n;
4261eb62cbbSBarry Smith     slen += n;
4271eb62cbbSBarry Smith     count--;
4281eb62cbbSBarry Smith   }
4290452661fSBarry Smith   PetscFree(recv_waits);
4301eb62cbbSBarry Smith 
4311eb62cbbSBarry Smith   /* move the data into the send scatter */
4320452661fSBarry Smith   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
4331eb62cbbSBarry Smith   count = 0;
4341eb62cbbSBarry Smith   for ( i=0; i<nrecvs; i++ ) {
4351eb62cbbSBarry Smith     values = rvalues + i*nmax;
4361eb62cbbSBarry Smith     for ( j=0; j<lens[i]; j++ ) {
4371eb62cbbSBarry Smith       lrows[count++] = values[j] - base;
4381eb62cbbSBarry Smith     }
4391eb62cbbSBarry Smith   }
4400452661fSBarry Smith   PetscFree(rvalues); PetscFree(lens);
4410452661fSBarry Smith   PetscFree(owner); PetscFree(nprocs);
4421eb62cbbSBarry Smith 
4431eb62cbbSBarry Smith   /* actually zap the local rows */
444537820f0SBarry Smith   ierr = ISCreateGeneral(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
445464493b3SBarry Smith   PLogObjectParent(A,istmp);
4460452661fSBarry Smith   PetscFree(lrows);
44778b31e54SBarry Smith   ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
44878b31e54SBarry Smith   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
44978b31e54SBarry Smith   ierr = ISDestroy(istmp); CHKERRQ(ierr);
4501eb62cbbSBarry Smith 
4511eb62cbbSBarry Smith   /* wait on sends */
4521eb62cbbSBarry Smith   if (nsends) {
4530452661fSBarry Smith     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));
45478b31e54SBarry Smith     CHKPTRQ(send_status);
4551eb62cbbSBarry Smith     MPI_Waitall(nsends,send_waits,send_status);
4560452661fSBarry Smith     PetscFree(send_status);
4571eb62cbbSBarry Smith   }
4580452661fSBarry Smith   PetscFree(send_waits); PetscFree(svalues);
4591eb62cbbSBarry Smith 
4601eb62cbbSBarry Smith   return 0;
4611eb62cbbSBarry Smith }
4621eb62cbbSBarry Smith 
463416022c9SBarry Smith static int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
4641eb62cbbSBarry Smith {
465416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
466fbd6ef76SBarry Smith   int        ierr,nt;
467416022c9SBarry Smith 
468a2ce50c7SBarry Smith   ierr = VecGetLocalSize(xx,&nt);  CHKERRQ(ierr);
469fbd6ef76SBarry Smith   if (nt != a->n) {
47047b4a8eaSLois Curfman McInnes     SETERRQ(1,"MatMult_MPIAIJ:Incompatible parition of A and xx");
471fbd6ef76SBarry Smith   }
47264119d58SLois Curfman McInnes   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx); CHKERRQ(ierr);
47338597bd4SSatish Balay   ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr);
47464119d58SLois Curfman McInnes   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx); CHKERRQ(ierr);
47538597bd4SSatish Balay   ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
4761eb62cbbSBarry Smith   return 0;
4771eb62cbbSBarry Smith }
4781eb62cbbSBarry Smith 
479416022c9SBarry Smith static int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
480da3a660dSBarry Smith {
481416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
482da3a660dSBarry Smith   int        ierr;
48364119d58SLois Curfman McInnes   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
48438597bd4SSatish Balay   ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
48564119d58SLois Curfman McInnes   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
48638597bd4SSatish Balay   ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
487da3a660dSBarry Smith   return 0;
488da3a660dSBarry Smith }
489da3a660dSBarry Smith 
490416022c9SBarry Smith static int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy)
491da3a660dSBarry Smith {
492416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
493da3a660dSBarry Smith   int        ierr;
494da3a660dSBarry Smith 
495da3a660dSBarry Smith   /* do nondiagonal part */
49638597bd4SSatish Balay   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
497da3a660dSBarry Smith   /* send it on its way */
498537820f0SBarry Smith   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
499da3a660dSBarry Smith   /* do local part */
50038597bd4SSatish Balay   ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr);
501da3a660dSBarry Smith   /* receive remote parts: note this assumes the values are not actually */
502da3a660dSBarry Smith   /* inserted in yy until the next line, which is true for my implementation*/
503da3a660dSBarry Smith   /* but is not perhaps always true. */
504537820f0SBarry Smith   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
505da3a660dSBarry Smith   return 0;
506da3a660dSBarry Smith }
507da3a660dSBarry Smith 
508416022c9SBarry Smith static int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
509da3a660dSBarry Smith {
510416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
511da3a660dSBarry Smith   int        ierr;
512da3a660dSBarry Smith 
513da3a660dSBarry Smith   /* do nondiagonal part */
51438597bd4SSatish Balay   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
515da3a660dSBarry Smith   /* send it on its way */
516537820f0SBarry Smith   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
517da3a660dSBarry Smith   /* do local part */
51838597bd4SSatish Balay   ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
519da3a660dSBarry Smith   /* receive remote parts: note this assumes the values are not actually */
520da3a660dSBarry Smith   /* inserted in yy until the next line, which is true for my implementation*/
521da3a660dSBarry Smith   /* but is not perhaps always true. */
522*0a198c4cSBarry Smith   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
523da3a660dSBarry Smith   return 0;
524da3a660dSBarry Smith }
525da3a660dSBarry Smith 
5261eb62cbbSBarry Smith /*
5271eb62cbbSBarry Smith   This only works correctly for square matrices where the subblock A->A is the
5281eb62cbbSBarry Smith    diagonal block
5291eb62cbbSBarry Smith */
530416022c9SBarry Smith static int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
5311eb62cbbSBarry Smith {
532416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
53344cd7ae7SLois Curfman McInnes   if (a->M != a->N)
53444cd7ae7SLois Curfman McInnes     SETERRQ(1,"MatGetDiagonal_MPIAIJ:Supports only square matrix where A->A is diag block");
5355baf8537SBarry Smith   if (a->rstart != a->cstart || a->rend != a->cend) {
5365baf8537SBarry Smith     SETERRQ(1,"MatGetDiagonal_MPIAIJ:row partition must equal col partition");  }
537416022c9SBarry Smith   return MatGetDiagonal(a->A,v);
5381eb62cbbSBarry Smith }
5391eb62cbbSBarry Smith 
540052efed2SBarry Smith static int MatScale_MPIAIJ(Scalar *aa,Mat A)
541052efed2SBarry Smith {
542052efed2SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
543052efed2SBarry Smith   int        ierr;
544052efed2SBarry Smith   ierr = MatScale(aa,a->A); CHKERRQ(ierr);
545052efed2SBarry Smith   ierr = MatScale(aa,a->B); CHKERRQ(ierr);
546052efed2SBarry Smith   return 0;
547052efed2SBarry Smith }
548052efed2SBarry Smith 
54944a69424SLois Curfman McInnes static int MatDestroy_MPIAIJ(PetscObject obj)
5501eb62cbbSBarry Smith {
5511eb62cbbSBarry Smith   Mat        mat = (Mat) obj;
55244a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
5531eb62cbbSBarry Smith   int        ierr;
554a5a9c739SBarry Smith #if defined(PETSC_LOG)
5556e35fa57SLois Curfman McInnes   PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N);
556a5a9c739SBarry Smith #endif
5570452661fSBarry Smith   PetscFree(aij->rowners);
55878b31e54SBarry Smith   ierr = MatDestroy(aij->A); CHKERRQ(ierr);
55978b31e54SBarry Smith   ierr = MatDestroy(aij->B); CHKERRQ(ierr);
5600452661fSBarry Smith   if (aij->colmap) PetscFree(aij->colmap);
5610452661fSBarry Smith   if (aij->garray) PetscFree(aij->garray);
5621eb62cbbSBarry Smith   if (aij->lvec)   VecDestroy(aij->lvec);
563a56f8943SBarry Smith   if (aij->Mvctx)  VecScatterDestroy(aij->Mvctx);
5647a0afa10SBarry Smith   if (aij->rowvalues) PetscFree(aij->rowvalues);
5650452661fSBarry Smith   PetscFree(aij);
566a5a9c739SBarry Smith   PLogObjectDestroy(mat);
5670452661fSBarry Smith   PetscHeaderDestroy(mat);
5681eb62cbbSBarry Smith   return 0;
5691eb62cbbSBarry Smith }
5707857610eSBarry Smith #include "draw.h"
571b16a3bb1SBarry Smith #include "pinclude/pviewer.h"
572ee50ffe9SBarry Smith 
573416022c9SBarry Smith static int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer)
5741eb62cbbSBarry Smith {
575416022c9SBarry Smith   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
576416022c9SBarry Smith   int         ierr;
577416022c9SBarry Smith 
57817699dbbSLois Curfman McInnes   if (aij->size == 1) {
579416022c9SBarry Smith     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
580416022c9SBarry Smith   }
581a523beb4SLois Curfman McInnes   else SETERRQ(1,"MatView_MPIAIJ_Binary:Only uniprocessor output supported");
582416022c9SBarry Smith   return 0;
583416022c9SBarry Smith }
584416022c9SBarry Smith 
585d7e8b826SBarry Smith static int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer)
586416022c9SBarry Smith {
58744a69424SLois Curfman McInnes   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
588dbb450caSBarry Smith   Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data;
589a56f8943SBarry Smith   int         ierr, format,shift = C->indexshift,rank;
590d636dbe3SBarry Smith   FILE        *fd;
59119bcc07fSBarry Smith   ViewerType  vtype;
592416022c9SBarry Smith 
59319bcc07fSBarry Smith   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
59419bcc07fSBarry Smith   if (vtype  == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) {
59590ace30eSBarry Smith     ierr = ViewerGetFormat(viewer,&format);
596*0a198c4cSBarry Smith     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
5974e220ebcSLois Curfman McInnes       MatInfo info;
5984e220ebcSLois Curfman McInnes       int     flg;
599a56f8943SBarry Smith       MPI_Comm_rank(mat->comm,&rank);
60090ace30eSBarry Smith       ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
6014e220ebcSLois Curfman McInnes       ierr = MatGetInfo(mat,MAT_LOCAL,&info);
60295e01e2fSLois Curfman McInnes       ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr);
60377c4ece6SBarry Smith       PetscSequentialPhaseBegin(mat->comm,1);
60495e01e2fSLois Curfman McInnes       if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
6054e220ebcSLois Curfman McInnes          rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
60695e01e2fSLois Curfman McInnes       else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
6074e220ebcSLois Curfman McInnes          rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
6084e220ebcSLois Curfman McInnes       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);
6094e220ebcSLois Curfman McInnes       fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);
6104e220ebcSLois Curfman McInnes       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);
6114e220ebcSLois Curfman McInnes       fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);
612a56f8943SBarry Smith       fflush(fd);
61377c4ece6SBarry Smith       PetscSequentialPhaseEnd(mat->comm,1);
614a40aa06bSLois Curfman McInnes       ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr);
615416022c9SBarry Smith       return 0;
616416022c9SBarry Smith     }
617*0a198c4cSBarry Smith     else if (format == VIEWER_FORMAT_ASCII_INFO) {
61808480c60SBarry Smith       return 0;
61908480c60SBarry Smith     }
620416022c9SBarry Smith   }
621416022c9SBarry Smith 
62219bcc07fSBarry Smith   if (vtype == DRAW_VIEWER) {
62319bcc07fSBarry Smith     Draw       draw;
62419bcc07fSBarry Smith     PetscTruth isnull;
62519bcc07fSBarry Smith     ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr);
62619bcc07fSBarry Smith     ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0;
62719bcc07fSBarry Smith   }
62819bcc07fSBarry Smith 
62919bcc07fSBarry Smith   if (vtype == ASCII_FILE_VIEWER) {
63090ace30eSBarry Smith     ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
63177c4ece6SBarry Smith     PetscSequentialPhaseBegin(mat->comm,1);
632d13ab20cSBarry Smith     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
63317699dbbSLois Curfman McInnes            aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart,
6341eb62cbbSBarry Smith            aij->cend);
63578b31e54SBarry Smith     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
63678b31e54SBarry Smith     ierr = MatView(aij->B,viewer); CHKERRQ(ierr);
637d13ab20cSBarry Smith     fflush(fd);
63877c4ece6SBarry Smith     PetscSequentialPhaseEnd(mat->comm,1);
639d13ab20cSBarry Smith   }
640416022c9SBarry Smith   else {
641a56f8943SBarry Smith     int size = aij->size;
642a56f8943SBarry Smith     rank = aij->rank;
64317699dbbSLois Curfman McInnes     if (size == 1) {
64478b31e54SBarry Smith       ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
64595373324SBarry Smith     }
64695373324SBarry Smith     else {
64795373324SBarry Smith       /* assemble the entire matrix onto first processor. */
64895373324SBarry Smith       Mat         A;
649ec8511deSBarry Smith       Mat_SeqAIJ *Aloc;
6502eb8c8abSBarry Smith       int         M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct;
65195373324SBarry Smith       Scalar      *a;
6522ee70a88SLois Curfman McInnes 
65317699dbbSLois Curfman McInnes       if (!rank) {
654b4fd4287SBarry Smith         ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
655c451ab25SLois Curfman McInnes                CHKERRQ(ierr);
65695373324SBarry Smith       }
65795373324SBarry Smith       else {
658b4fd4287SBarry Smith         ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
659c451ab25SLois Curfman McInnes                CHKERRQ(ierr);
66095373324SBarry Smith       }
661464493b3SBarry Smith       PLogObjectParent(mat,A);
662416022c9SBarry Smith 
66395373324SBarry Smith       /* copy over the A part */
664ec8511deSBarry Smith       Aloc = (Mat_SeqAIJ*) aij->A->data;
6652ee70a88SLois Curfman McInnes       m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
66695373324SBarry Smith       row = aij->rstart;
667dbb450caSBarry Smith       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;}
66895373324SBarry Smith       for ( i=0; i<m; i++ ) {
669416022c9SBarry Smith         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
67095373324SBarry Smith         row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
67195373324SBarry Smith       }
6722ee70a88SLois Curfman McInnes       aj = Aloc->j;
673dbb450caSBarry Smith       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;}
67495373324SBarry Smith 
67595373324SBarry Smith       /* copy over the B part */
676ec8511deSBarry Smith       Aloc = (Mat_SeqAIJ*) aij->B->data;
6772ee70a88SLois Curfman McInnes       m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
67895373324SBarry Smith       row = aij->rstart;
6790452661fSBarry Smith       ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols);
680dbb450caSBarry Smith       for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];}
68195373324SBarry Smith       for ( i=0; i<m; i++ ) {
682416022c9SBarry Smith         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
68395373324SBarry Smith         row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
68495373324SBarry Smith       }
6850452661fSBarry Smith       PetscFree(ct);
6866d4a8577SBarry Smith       ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
6876d4a8577SBarry Smith       ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
68817699dbbSLois Curfman McInnes       if (!rank) {
68978b31e54SBarry Smith         ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
69095373324SBarry Smith       }
69178b31e54SBarry Smith       ierr = MatDestroy(A); CHKERRQ(ierr);
69295373324SBarry Smith     }
69395373324SBarry Smith   }
6941eb62cbbSBarry Smith   return 0;
6951eb62cbbSBarry Smith }
6961eb62cbbSBarry Smith 
697416022c9SBarry Smith static int MatView_MPIAIJ(PetscObject obj,Viewer viewer)
698416022c9SBarry Smith {
699416022c9SBarry Smith   Mat         mat = (Mat) obj;
700416022c9SBarry Smith   int         ierr;
70119bcc07fSBarry Smith   ViewerType  vtype;
702416022c9SBarry Smith 
70319bcc07fSBarry Smith   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
70419bcc07fSBarry Smith   if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER ||
70519bcc07fSBarry Smith       vtype == DRAW_VIEWER       || vtype == MATLAB_VIEWER) {
706d7e8b826SBarry Smith     ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
707416022c9SBarry Smith   }
70819bcc07fSBarry Smith   else if (vtype == BINARY_FILE_VIEWER) {
709416022c9SBarry Smith     return MatView_MPIAIJ_Binary(mat,viewer);
710416022c9SBarry Smith   }
711416022c9SBarry Smith   return 0;
712416022c9SBarry Smith }
713416022c9SBarry Smith 
7141eb62cbbSBarry Smith /*
7151eb62cbbSBarry Smith     This has to provide several versions.
7161eb62cbbSBarry Smith 
7171eb62cbbSBarry Smith      2) a) use only local smoothing updating outer values only once.
7181eb62cbbSBarry Smith         b) local smoothing updating outer values each inner iteration
719d6dfbf8fSBarry Smith      3) color updating out values betwen colors.
7201eb62cbbSBarry Smith */
721fc76ce87SLois Curfman McInnes static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag,
722dbb450caSBarry Smith                            double fshift,int its,Vec xx)
7238a729477SBarry Smith {
72444a69424SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
725d6dfbf8fSBarry Smith   Mat        AA = mat->A, BB = mat->B;
726ec8511deSBarry Smith   Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data;
727c16cb8f2SBarry Smith   Scalar     *b,*x,*xs,*ls,d,*v,sum;
7286abc6512SBarry Smith   int        ierr,*idx, *diag;
729416022c9SBarry Smith   int        n = mat->n, m = mat->m, i,shift = A->indexshift;
7308a729477SBarry Smith 
731d6dfbf8fSBarry Smith   VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls);
732dbb450caSBarry Smith   xs = x + shift; /* shift by one for index start of 1 */
733dbb450caSBarry Smith   ls = ls + shift;
734ec8511deSBarry Smith   if (!A->diag) {if ((ierr = MatMarkDiag_SeqAIJ(AA))) return ierr;}
735d6dfbf8fSBarry Smith   diag = A->diag;
736c16cb8f2SBarry Smith   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
737da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
73838597bd4SSatish Balay       return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx);
739da3a660dSBarry Smith     }
74064119d58SLois Curfman McInnes     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
74178b31e54SBarry Smith     CHKERRQ(ierr);
74264119d58SLois Curfman McInnes     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
74378b31e54SBarry Smith     CHKERRQ(ierr);
744d6dfbf8fSBarry Smith     while (its--) {
745d6dfbf8fSBarry Smith       /* go down through the rows */
746d6dfbf8fSBarry Smith       for ( i=0; i<m; i++ ) {
747d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
748dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
749dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
750d6dfbf8fSBarry Smith         sum  = b[i];
751d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
752dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
753d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
754dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
755dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
756d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
75755a1b374SBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
758d6dfbf8fSBarry Smith       }
759d6dfbf8fSBarry Smith       /* come up through the rows */
760d6dfbf8fSBarry Smith       for ( i=m-1; i>-1; i-- ) {
761d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
762dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
763dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
764d6dfbf8fSBarry Smith         sum  = b[i];
765d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
766dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
767d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
768dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
769dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
770d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
77155a1b374SBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
772d6dfbf8fSBarry Smith       }
773d6dfbf8fSBarry Smith     }
774d6dfbf8fSBarry Smith   }
775d6dfbf8fSBarry Smith   else if (flag & SOR_LOCAL_FORWARD_SWEEP){
776da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
77738597bd4SSatish Balay       return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx);
778da3a660dSBarry Smith     }
77964119d58SLois Curfman McInnes     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
78078b31e54SBarry Smith     CHKERRQ(ierr);
78164119d58SLois Curfman McInnes     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
78278b31e54SBarry Smith     CHKERRQ(ierr);
783d6dfbf8fSBarry Smith     while (its--) {
784d6dfbf8fSBarry Smith       for ( i=0; i<m; i++ ) {
785d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
786dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
787dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
788d6dfbf8fSBarry Smith         sum  = b[i];
789d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
790dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
791d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
792dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
793dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
794d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
79555a1b374SBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
796d6dfbf8fSBarry Smith       }
797d6dfbf8fSBarry Smith     }
798d6dfbf8fSBarry Smith   }
799d6dfbf8fSBarry Smith   else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
800da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
80138597bd4SSatish Balay       return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx);
802da3a660dSBarry Smith     }
80364119d58SLois Curfman McInnes     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,
80478b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
80564119d58SLois Curfman McInnes     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,
80678b31e54SBarry Smith                             mat->Mvctx); CHKERRQ(ierr);
807d6dfbf8fSBarry Smith     while (its--) {
808d6dfbf8fSBarry Smith       for ( i=m-1; i>-1; i-- ) {
809d6dfbf8fSBarry Smith         n    = A->i[i+1] - A->i[i];
810dbb450caSBarry Smith         idx  = A->j + A->i[i] + shift;
811dbb450caSBarry Smith         v    = A->a + A->i[i] + shift;
812d6dfbf8fSBarry Smith         sum  = b[i];
813d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,xs,v,idx,n);
814dbb450caSBarry Smith         d    = fshift + A->a[diag[i]+shift];
815d6dfbf8fSBarry Smith         n    = B->i[i+1] - B->i[i];
816dbb450caSBarry Smith         idx  = B->j + B->i[i] + shift;
817dbb450caSBarry Smith         v    = B->a + B->i[i] + shift;
818d6dfbf8fSBarry Smith         SPARSEDENSEMDOT(sum,ls,v,idx,n);
81955a1b374SBarry Smith         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
820d6dfbf8fSBarry Smith       }
821d6dfbf8fSBarry Smith     }
822d6dfbf8fSBarry Smith   }
823c16cb8f2SBarry Smith   else {
824c16cb8f2SBarry Smith     SETERRQ(1,"MatRelax_MPIAIJ:Option not supported");
825c16cb8f2SBarry Smith   }
8268a729477SBarry Smith   return 0;
8278a729477SBarry Smith }
828a66be287SLois Curfman McInnes 
8294e220ebcSLois Curfman McInnes static int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
830a66be287SLois Curfman McInnes {
831a66be287SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
832a66be287SLois Curfman McInnes   Mat        A = mat->A, B = mat->B;
8334e220ebcSLois Curfman McInnes   int        ierr;
8344e220ebcSLois Curfman McInnes   double     isend[5], irecv[5];
835a66be287SLois Curfman McInnes 
8364e220ebcSLois Curfman McInnes   info->rows_global    = (double)mat->M;
8374e220ebcSLois Curfman McInnes   info->columns_global = (double)mat->N;
8384e220ebcSLois Curfman McInnes   info->rows_local     = (double)mat->m;
8394e220ebcSLois Curfman McInnes   info->columns_local  = (double)mat->N;
8404e220ebcSLois Curfman McInnes   info->block_size     = 1.0;
8414e220ebcSLois Curfman McInnes   ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr);
8424e220ebcSLois Curfman McInnes   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
8434e220ebcSLois Curfman McInnes   isend[3] = info->memory;  isend[4] = info->mallocs;
8444e220ebcSLois Curfman McInnes   ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr);
8454e220ebcSLois Curfman McInnes   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
8464e220ebcSLois Curfman McInnes   isend[3] += info->memory;  isend[4] += info->mallocs;
847a66be287SLois Curfman McInnes   if (flag == MAT_LOCAL) {
8484e220ebcSLois Curfman McInnes     info->nz_used      = isend[0];
8494e220ebcSLois Curfman McInnes     info->nz_allocated = isend[1];
8504e220ebcSLois Curfman McInnes     info->nz_unneeded  = isend[2];
8514e220ebcSLois Curfman McInnes     info->memory       = isend[3];
8524e220ebcSLois Curfman McInnes     info->mallocs      = isend[4];
853a66be287SLois Curfman McInnes   } else if (flag == MAT_GLOBAL_MAX) {
8544e220ebcSLois Curfman McInnes     MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);
8554e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
8564e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
8574e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
8584e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
8594e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
860a66be287SLois Curfman McInnes   } else if (flag == MAT_GLOBAL_SUM) {
8614e220ebcSLois Curfman McInnes     MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);
8624e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
8634e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
8644e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
8654e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
8664e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
867a66be287SLois Curfman McInnes   }
8684e220ebcSLois Curfman McInnes   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
8694e220ebcSLois Curfman McInnes   info->fill_ratio_needed = 0;
8704e220ebcSLois Curfman McInnes   info->factor_mallocs    = 0;
8714e220ebcSLois Curfman McInnes 
872a66be287SLois Curfman McInnes   return 0;
873a66be287SLois Curfman McInnes }
874a66be287SLois Curfman McInnes 
875299609e3SLois Curfman McInnes extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*);
876299609e3SLois Curfman McInnes extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*);
877299609e3SLois Curfman McInnes extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double);
878299609e3SLois Curfman McInnes extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *);
879299609e3SLois Curfman McInnes extern int MatSolve_MPIAIJ(Mat,Vec,Vec);
880299609e3SLois Curfman McInnes extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec);
881299609e3SLois Curfman McInnes extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec);
882299609e3SLois Curfman McInnes extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec);
883299609e3SLois Curfman McInnes 
884416022c9SBarry Smith static int MatSetOption_MPIAIJ(Mat A,MatOption op)
885c74985f6SBarry Smith {
886c0bbcb79SLois Curfman McInnes   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
887c74985f6SBarry Smith 
8886d4a8577SBarry Smith   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
8896d4a8577SBarry Smith       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
8906d4a8577SBarry Smith       op == MAT_COLUMNS_SORTED ||
8916d4a8577SBarry Smith       op == MAT_ROW_ORIENTED) {
892c0bbcb79SLois Curfman McInnes         MatSetOption(a->A,op);
893c0bbcb79SLois Curfman McInnes         MatSetOption(a->B,op);
894c74985f6SBarry Smith   }
8956d4a8577SBarry Smith   else if (op == MAT_ROWS_SORTED ||
8966d4a8577SBarry Smith            op == MAT_SYMMETRIC ||
8976d4a8577SBarry Smith            op == MAT_STRUCTURALLY_SYMMETRIC ||
8986d4a8577SBarry Smith            op == MAT_YES_NEW_DIAGONALS)
89994a424c1SBarry Smith     PLogInfo(A,"Info:MatSetOption_MPIAIJ:Option ignored\n");
9006d4a8577SBarry Smith   else if (op == MAT_COLUMN_ORIENTED) {
9014b0e389bSBarry Smith     a->roworiented = 0;
9024b0e389bSBarry Smith     MatSetOption(a->A,op);
9034b0e389bSBarry Smith     MatSetOption(a->B,op);
9044b0e389bSBarry Smith   }
9056d4a8577SBarry Smith   else if (op == MAT_NO_NEW_DIAGONALS)
9066d4a8577SBarry Smith     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:MAT_NO_NEW_DIAGONALS");}
907c0bbcb79SLois Curfman McInnes   else
9084d39ef84SLois Curfman McInnes     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:unknown option");}
909c74985f6SBarry Smith   return 0;
910c74985f6SBarry Smith }
911c74985f6SBarry Smith 
912d1710a03SLois Curfman McInnes static int MatGetSize_MPIAIJ(Mat matin,int *m,int *n)
913c74985f6SBarry Smith {
91444a69424SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
915c74985f6SBarry Smith   *m = mat->M; *n = mat->N;
916c74985f6SBarry Smith   return 0;
917c74985f6SBarry Smith }
918c74985f6SBarry Smith 
919d1710a03SLois Curfman McInnes static int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n)
920c74985f6SBarry Smith {
92144a69424SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
922b7c46309SBarry Smith   *m = mat->m; *n = mat->N;
923c74985f6SBarry Smith   return 0;
924c74985f6SBarry Smith }
925c74985f6SBarry Smith 
926d1710a03SLois Curfman McInnes static int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n)
927c74985f6SBarry Smith {
92844a69424SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
929c74985f6SBarry Smith   *m = mat->rstart; *n = mat->rend;
930c74985f6SBarry Smith   return 0;
931c74985f6SBarry Smith }
932c74985f6SBarry Smith 
9336d84be18SBarry Smith extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
9346d84be18SBarry Smith extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
9356d84be18SBarry Smith 
9366d84be18SBarry Smith int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
93739e00950SLois Curfman McInnes {
938154123eaSLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
93970f0671dSBarry Smith   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
940154123eaSLois Curfman McInnes   int        i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart;
941154123eaSLois Curfman McInnes   int        nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend;
94270f0671dSBarry Smith   int        *cmap, *idx_p;
94339e00950SLois Curfman McInnes 
9447a0afa10SBarry Smith   if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,"MatGetRow_MPIAIJ:Already active");
9457a0afa10SBarry Smith   mat->getrowactive = PETSC_TRUE;
9467a0afa10SBarry Smith 
94770f0671dSBarry Smith   if (!mat->rowvalues && (idx || v)) {
9487a0afa10SBarry Smith     /*
9497a0afa10SBarry Smith         allocate enough space to hold information from the longest row.
9507a0afa10SBarry Smith     */
9517a0afa10SBarry Smith     Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data;
952c16cb8f2SBarry Smith     int     max = 1,m = mat->m,tmp;
953c16cb8f2SBarry Smith     for ( i=0; i<m; i++ ) {
9547a0afa10SBarry Smith       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
9557a0afa10SBarry Smith       if (max < tmp) { max = tmp; }
9567a0afa10SBarry Smith     }
9577a0afa10SBarry Smith     mat->rowvalues = (Scalar *) PetscMalloc( max*(sizeof(int)+sizeof(Scalar)));
9587a0afa10SBarry Smith     CHKPTRQ(mat->rowvalues);
9597a0afa10SBarry Smith     mat->rowindices = (int *) (mat->rowvalues + max);
9607a0afa10SBarry Smith   }
9617a0afa10SBarry Smith 
962b49de8d1SLois Curfman McInnes   if (row < rstart || row >= rend) SETERRQ(1,"MatGetRow_MPIAIJ:Only local rows")
963abc0e9e4SLois Curfman McInnes   lrow = row - rstart;
96439e00950SLois Curfman McInnes 
965154123eaSLois Curfman McInnes   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
966154123eaSLois Curfman McInnes   if (!v)   {pvA = 0; pvB = 0;}
967154123eaSLois Curfman McInnes   if (!idx) {pcA = 0; if (!v) pcB = 0;}
96838597bd4SSatish Balay   ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
96938597bd4SSatish Balay   ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
970154123eaSLois Curfman McInnes   nztot = nzA + nzB;
971154123eaSLois Curfman McInnes 
97270f0671dSBarry Smith   cmap  = mat->garray;
973154123eaSLois Curfman McInnes   if (v  || idx) {
974154123eaSLois Curfman McInnes     if (nztot) {
975154123eaSLois Curfman McInnes       /* Sort by increasing column numbers, assuming A and B already sorted */
97670f0671dSBarry Smith       int imark = -1;
977154123eaSLois Curfman McInnes       if (v) {
97870f0671dSBarry Smith         *v = v_p = mat->rowvalues;
97939e00950SLois Curfman McInnes         for ( i=0; i<nzB; i++ ) {
98070f0671dSBarry Smith           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
981154123eaSLois Curfman McInnes           else break;
982154123eaSLois Curfman McInnes         }
983154123eaSLois Curfman McInnes         imark = i;
98470f0671dSBarry Smith         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
98570f0671dSBarry Smith         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
986154123eaSLois Curfman McInnes       }
987154123eaSLois Curfman McInnes       if (idx) {
98870f0671dSBarry Smith         *idx = idx_p = mat->rowindices;
98970f0671dSBarry Smith         if (imark > -1) {
99070f0671dSBarry Smith           for ( i=0; i<imark; i++ ) {
99170f0671dSBarry Smith             idx_p[i] = cmap[cworkB[i]];
99270f0671dSBarry Smith           }
99370f0671dSBarry Smith         } else {
994154123eaSLois Curfman McInnes           for ( i=0; i<nzB; i++ ) {
99570f0671dSBarry Smith             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
996154123eaSLois Curfman McInnes             else break;
997154123eaSLois Curfman McInnes           }
998154123eaSLois Curfman McInnes           imark = i;
99970f0671dSBarry Smith         }
100070f0671dSBarry Smith         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart + cworkA[i];
100170f0671dSBarry Smith         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]];
100239e00950SLois Curfman McInnes       }
100339e00950SLois Curfman McInnes     }
10041ca473b0SSatish Balay     else {
10051ca473b0SSatish Balay       if (idx) *idx = 0;
10061ca473b0SSatish Balay       if (v)   *v   = 0;
10071ca473b0SSatish Balay     }
1008154123eaSLois Curfman McInnes   }
100939e00950SLois Curfman McInnes   *nz = nztot;
101038597bd4SSatish Balay   ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
101138597bd4SSatish Balay   ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
101239e00950SLois Curfman McInnes   return 0;
101339e00950SLois Curfman McInnes }
101439e00950SLois Curfman McInnes 
10156d84be18SBarry Smith int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
101639e00950SLois Curfman McInnes {
10177a0afa10SBarry Smith   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
10187a0afa10SBarry Smith   if (aij->getrowactive == PETSC_FALSE) {
10197a0afa10SBarry Smith     SETERRQ(1,"MatRestoreRow_MPIAIJ:MatGetRow not called");
10207a0afa10SBarry Smith   }
10217a0afa10SBarry Smith   aij->getrowactive = PETSC_FALSE;
102239e00950SLois Curfman McInnes   return 0;
102339e00950SLois Curfman McInnes }
102439e00950SLois Curfman McInnes 
1025cddf8d76SBarry Smith static int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm)
1026855ac2c5SLois Curfman McInnes {
1027855ac2c5SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1028ec8511deSBarry Smith   Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data;
1029416022c9SBarry Smith   int        ierr, i, j, cstart = aij->cstart,shift = amat->indexshift;
1030416022c9SBarry Smith   double     sum = 0.0;
103104ca555eSLois Curfman McInnes   Scalar     *v;
103204ca555eSLois Curfman McInnes 
103317699dbbSLois Curfman McInnes   if (aij->size == 1) {
103414183eadSLois Curfman McInnes     ierr =  MatNorm(aij->A,type,norm); CHKERRQ(ierr);
103537fa93a5SLois Curfman McInnes   } else {
103604ca555eSLois Curfman McInnes     if (type == NORM_FROBENIUS) {
103704ca555eSLois Curfman McInnes       v = amat->a;
103804ca555eSLois Curfman McInnes       for (i=0; i<amat->nz; i++ ) {
103904ca555eSLois Curfman McInnes #if defined(PETSC_COMPLEX)
104004ca555eSLois Curfman McInnes         sum += real(conj(*v)*(*v)); v++;
104104ca555eSLois Curfman McInnes #else
104204ca555eSLois Curfman McInnes         sum += (*v)*(*v); v++;
104304ca555eSLois Curfman McInnes #endif
104404ca555eSLois Curfman McInnes       }
104504ca555eSLois Curfman McInnes       v = bmat->a;
104604ca555eSLois Curfman McInnes       for (i=0; i<bmat->nz; i++ ) {
104704ca555eSLois Curfman McInnes #if defined(PETSC_COMPLEX)
104804ca555eSLois Curfman McInnes         sum += real(conj(*v)*(*v)); v++;
104904ca555eSLois Curfman McInnes #else
105004ca555eSLois Curfman McInnes         sum += (*v)*(*v); v++;
105104ca555eSLois Curfman McInnes #endif
105204ca555eSLois Curfman McInnes       }
10536d84be18SBarry Smith       MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);
105404ca555eSLois Curfman McInnes       *norm = sqrt(*norm);
105504ca555eSLois Curfman McInnes     }
105604ca555eSLois Curfman McInnes     else if (type == NORM_1) { /* max column norm */
105704ca555eSLois Curfman McInnes       double *tmp, *tmp2;
105804ca555eSLois Curfman McInnes       int    *jj, *garray = aij->garray;
10590452661fSBarry Smith       tmp  = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp);
10600452661fSBarry Smith       tmp2 = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp2);
1061cddf8d76SBarry Smith       PetscMemzero(tmp,aij->N*sizeof(double));
106204ca555eSLois Curfman McInnes       *norm = 0.0;
106304ca555eSLois Curfman McInnes       v = amat->a; jj = amat->j;
106404ca555eSLois Curfman McInnes       for ( j=0; j<amat->nz; j++ ) {
1065579c6b6fSBarry Smith         tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v);  v++;
106604ca555eSLois Curfman McInnes       }
106704ca555eSLois Curfman McInnes       v = bmat->a; jj = bmat->j;
106804ca555eSLois Curfman McInnes       for ( j=0; j<bmat->nz; j++ ) {
1069579c6b6fSBarry Smith         tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
107004ca555eSLois Curfman McInnes       }
10716d84be18SBarry Smith       MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);
107204ca555eSLois Curfman McInnes       for ( j=0; j<aij->N; j++ ) {
107304ca555eSLois Curfman McInnes         if (tmp2[j] > *norm) *norm = tmp2[j];
107404ca555eSLois Curfman McInnes       }
10750452661fSBarry Smith       PetscFree(tmp); PetscFree(tmp2);
107604ca555eSLois Curfman McInnes     }
107704ca555eSLois Curfman McInnes     else if (type == NORM_INFINITY) { /* max row norm */
1078515d9167SLois Curfman McInnes       double ntemp = 0.0;
107904ca555eSLois Curfman McInnes       for ( j=0; j<amat->m; j++ ) {
1080dbb450caSBarry Smith         v = amat->a + amat->i[j] + shift;
108104ca555eSLois Curfman McInnes         sum = 0.0;
108204ca555eSLois Curfman McInnes         for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) {
1083cddf8d76SBarry Smith           sum += PetscAbsScalar(*v); v++;
108404ca555eSLois Curfman McInnes         }
1085dbb450caSBarry Smith         v = bmat->a + bmat->i[j] + shift;
108604ca555eSLois Curfman McInnes         for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) {
1087cddf8d76SBarry Smith           sum += PetscAbsScalar(*v); v++;
108804ca555eSLois Curfman McInnes         }
1089515d9167SLois Curfman McInnes         if (sum > ntemp) ntemp = sum;
109004ca555eSLois Curfman McInnes       }
10916d84be18SBarry Smith       MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);
109204ca555eSLois Curfman McInnes     }
109304ca555eSLois Curfman McInnes     else {
1094515d9167SLois Curfman McInnes       SETERRQ(1,"MatNorm_MPIAIJ:No support for two norm");
109504ca555eSLois Curfman McInnes     }
109637fa93a5SLois Curfman McInnes   }
1097855ac2c5SLois Curfman McInnes   return 0;
1098855ac2c5SLois Curfman McInnes }
1099855ac2c5SLois Curfman McInnes 
11000de55854SLois Curfman McInnes static int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1101b7c46309SBarry Smith {
1102b7c46309SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1103dbb450caSBarry Smith   Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data;
1104416022c9SBarry Smith   int        ierr,shift = Aloc->indexshift;
1105416022c9SBarry Smith   Mat        B;
1106b7c46309SBarry Smith   int        M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct;
1107b7c46309SBarry Smith   Scalar     *array;
1108b7c46309SBarry Smith 
11093638b69dSLois Curfman McInnes   if (matout == PETSC_NULL && M != N)
11103638b69dSLois Curfman McInnes     SETERRQ(1,"MatTranspose_MPIAIJ:Square matrix only for in-place");
1111b4fd4287SBarry Smith   ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0,
1112b4fd4287SBarry Smith          PETSC_NULL,&B); CHKERRQ(ierr);
1113b7c46309SBarry Smith 
1114b7c46309SBarry Smith   /* copy over the A part */
1115ec8511deSBarry Smith   Aloc = (Mat_SeqAIJ*) a->A->data;
1116b7c46309SBarry Smith   m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1117b7c46309SBarry Smith   row = a->rstart;
1118dbb450caSBarry Smith   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;}
1119b7c46309SBarry Smith   for ( i=0; i<m; i++ ) {
1120416022c9SBarry Smith     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1121b7c46309SBarry Smith     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1122b7c46309SBarry Smith   }
1123b7c46309SBarry Smith   aj = Aloc->j;
11244af08d9eSBarry Smith   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;}
1125b7c46309SBarry Smith 
1126b7c46309SBarry Smith   /* copy over the B part */
1127ec8511deSBarry Smith   Aloc = (Mat_SeqAIJ*) a->B->data;
1128b7c46309SBarry Smith   m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1129b7c46309SBarry Smith   row = a->rstart;
11300452661fSBarry Smith   ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols);
1131dbb450caSBarry Smith   for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];}
1132b7c46309SBarry Smith   for ( i=0; i<m; i++ ) {
1133416022c9SBarry Smith     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1134b7c46309SBarry Smith     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1135b7c46309SBarry Smith   }
11360452661fSBarry Smith   PetscFree(ct);
11376d4a8577SBarry Smith   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
11386d4a8577SBarry Smith   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
11393638b69dSLois Curfman McInnes   if (matout != PETSC_NULL) {
11400de55854SLois Curfman McInnes     *matout = B;
11410de55854SLois Curfman McInnes   } else {
11420de55854SLois Curfman McInnes     /* This isn't really an in-place transpose .... but free data structures from a */
11430452661fSBarry Smith     PetscFree(a->rowners);
11440de55854SLois Curfman McInnes     ierr = MatDestroy(a->A); CHKERRQ(ierr);
11450de55854SLois Curfman McInnes     ierr = MatDestroy(a->B); CHKERRQ(ierr);
11460452661fSBarry Smith     if (a->colmap) PetscFree(a->colmap);
11470452661fSBarry Smith     if (a->garray) PetscFree(a->garray);
11480de55854SLois Curfman McInnes     if (a->lvec) VecDestroy(a->lvec);
1149a56f8943SBarry Smith     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
11500452661fSBarry Smith     PetscFree(a);
1151416022c9SBarry Smith     PetscMemcpy(A,B,sizeof(struct _Mat));
11520452661fSBarry Smith     PetscHeaderDestroy(B);
11530de55854SLois Curfman McInnes   }
1154b7c46309SBarry Smith   return 0;
1155b7c46309SBarry Smith }
1156b7c46309SBarry Smith 
1157a008b906SSatish Balay int MatDiagonalScale_MPIAIJ(Mat A,Vec ll,Vec rr)
1158a008b906SSatish Balay {
1159a008b906SSatish Balay   Mat a = ((Mat_MPIAIJ *) A->data)->A;
1160a008b906SSatish Balay   Mat b = ((Mat_MPIAIJ *) A->data)->B;
1161a008b906SSatish Balay   int ierr,s1,s2,s3;
1162a008b906SSatish Balay 
1163a008b906SSatish Balay   if (ll)  {
1164a008b906SSatish Balay     ierr = VecGetLocalSize(ll,&s1); CHKERRQ(ierr);
1165a008b906SSatish Balay     ierr = MatGetLocalSize(A,&s2,&s3); CHKERRQ(ierr);
11660e95ebc0SSatish Balay     if (s1!=s2) SETERRQ(1,"MatDiagonalScale_MPIAIJ:non-conforming local sizes");
1167a008b906SSatish Balay     ierr = MatDiagonalScale(a,ll,0); CHKERRQ(ierr);
1168a008b906SSatish Balay     ierr = MatDiagonalScale(b,ll,0); CHKERRQ(ierr);
1169a008b906SSatish Balay   }
11700e95ebc0SSatish Balay   if (rr) SETERRQ(1,"MatDiagonalScale_MPIAIJ:not supported for right vector");
1171a008b906SSatish Balay   return 0;
1172a008b906SSatish Balay }
1173a008b906SSatish Balay 
1174a008b906SSatish Balay 
1175682d7d0cSBarry Smith extern int MatPrintHelp_SeqAIJ(Mat);
1176682d7d0cSBarry Smith static int MatPrintHelp_MPIAIJ(Mat A)
1177682d7d0cSBarry Smith {
1178682d7d0cSBarry Smith   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1179682d7d0cSBarry Smith 
1180682d7d0cSBarry Smith   if (!a->rank) return MatPrintHelp_SeqAIJ(a->A);
1181682d7d0cSBarry Smith   else return 0;
1182682d7d0cSBarry Smith }
1183682d7d0cSBarry Smith 
11845a838052SSatish Balay static int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
11855a838052SSatish Balay {
11865a838052SSatish Balay   *bs = 1;
11875a838052SSatish Balay   return 0;
11885a838052SSatish Balay }
11895a838052SSatish Balay 
1190fc76ce87SLois Curfman McInnes extern int MatConvert_MPIAIJ(Mat,MatType,Mat *);
11913d1612f7SBarry Smith static int MatConvertSameType_MPIAIJ(Mat,Mat *,int);
11922f86bd48SSatish Balay extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int);
1193*0a198c4cSBarry Smith extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring);
1194*0a198c4cSBarry Smith extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatGetSubMatrixCall,Mat **);
11958a729477SBarry Smith /* -------------------------------------------------------------------*/
11962ee70a88SLois Curfman McInnes static struct _MatOps MatOps = {MatSetValues_MPIAIJ,
119739e00950SLois Curfman McInnes        MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ,
119844a69424SLois Curfman McInnes        MatMult_MPIAIJ,MatMultAdd_MPIAIJ,
119944a69424SLois Curfman McInnes        MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ,
120036ce4990SBarry Smith        0,0,
120136ce4990SBarry Smith        0,0,
120236ce4990SBarry Smith        0,0,
120344a69424SLois Curfman McInnes        MatRelax_MPIAIJ,
1204b7c46309SBarry Smith        MatTranspose_MPIAIJ,
1205a66be287SLois Curfman McInnes        MatGetInfo_MPIAIJ,0,
1206a008b906SSatish Balay        MatGetDiagonal_MPIAIJ,MatDiagonalScale_MPIAIJ,MatNorm_MPIAIJ,
1207ee50ffe9SBarry Smith        MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ,
12081eb62cbbSBarry Smith        0,
1209299609e3SLois Curfman McInnes        MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ,
121036ce4990SBarry Smith        0,0,0,0,
1211d1710a03SLois Curfman McInnes        MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ,
121236ce4990SBarry Smith        0,0,
12133e85bedfSBarry Smith        0,0,MatConvert_MPIAIJ,MatConvertSameType_MPIAIJ,0,0,
1214b49de8d1SLois Curfman McInnes        0,0,0,
1215598137ffSSatish Balay        MatGetSubMatrices_MPIAIJ,MatIncreaseOverlap_MPIAIJ,MatGetValues_MPIAIJ,0,
1216052efed2SBarry Smith        MatPrintHelp_MPIAIJ,
12173b2fbd54SBarry Smith        MatScale_MPIAIJ,0,0,0,
1218*0a198c4cSBarry Smith        MatGetBlockSize_MPIAIJ,0,0,0,0,
1219*0a198c4cSBarry Smith        MatFDColoringCreate_MPIAIJ};
122036ce4990SBarry Smith 
12218a729477SBarry Smith 
12221987afe7SBarry Smith /*@C
1223ff756334SLois Curfman McInnes    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
12243a511b96SLois Curfman McInnes    (the default parallel PETSc format).  For good matrix assembly performance
12253a511b96SLois Curfman McInnes    the user should preallocate the matrix storage by setting the parameters
12263a511b96SLois Curfman McInnes    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
12273a511b96SLois Curfman McInnes    performance can be increased by more than a factor of 50.
12288a729477SBarry Smith 
12298a729477SBarry Smith    Input Parameters:
12301eb62cbbSBarry Smith .  comm - MPI communicator
12317d3e4905SLois Curfman McInnes .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
123292e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
123392e8d321SLois Curfman McInnes            y vector for the matrix-vector product y = Ax.
123492e8d321SLois Curfman McInnes .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
123592e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
123692e8d321SLois Curfman McInnes            x vector for the matrix-vector product y = Ax.
12377d3e4905SLois Curfman McInnes .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
123892e8d321SLois Curfman McInnes .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1239ab693e5aSLois Curfman McInnes .  d_nz - number of nonzeros per row in diagonal portion of local submatrix
1240ff756334SLois Curfman McInnes            (same for all local rows)
12412bd5e0b2SLois Curfman McInnes .  d_nzz - array containing the number of nonzeros in the various rows of the
124292e8d321SLois Curfman McInnes            diagonal portion of the local submatrix (possibly different for each row)
12432bd5e0b2SLois Curfman McInnes            or PETSC_NULL. You must leave room for the diagonal entry even if
12442bd5e0b2SLois Curfman McInnes            it is zero.
12452bd5e0b2SLois Curfman McInnes .  o_nz - number of nonzeros per row in the off-diagonal portion of local
1246ab693e5aSLois Curfman McInnes            submatrix (same for all local rows).
12472bd5e0b2SLois Curfman McInnes .  o_nzz - array containing the number of nonzeros in the various rows of the
12482bd5e0b2SLois Curfman McInnes            off-diagonal portion of the local submatrix (possibly different for
12492bd5e0b2SLois Curfman McInnes            each row) or PETSC_NULL.
12508a729477SBarry Smith 
1251ff756334SLois Curfman McInnes    Output Parameter:
125244cd7ae7SLois Curfman McInnes .  A - the matrix
12538a729477SBarry Smith 
1254ff756334SLois Curfman McInnes    Notes:
1255ff756334SLois Curfman McInnes    The AIJ format (also called the Yale sparse matrix format or
1256ff756334SLois Curfman McInnes    compressed row storage), is fully compatible with standard Fortran 77
12570002213bSLois Curfman McInnes    storage.  That is, the stored row and column indices can begin at
12580002213bSLois Curfman McInnes    either one (as in Fortran) or zero.  See the users manual for details.
1259ff756334SLois Curfman McInnes 
1260ff756334SLois Curfman McInnes    The user MUST specify either the local or global matrix dimensions
1261ff756334SLois Curfman McInnes    (possibly both).
1262ff756334SLois Curfman McInnes 
12635511cfe3SLois Curfman McInnes    By default, this format uses inodes (identical nodes) when possible.
12645511cfe3SLois Curfman McInnes    We search for consecutive rows with the same nonzero structure, thereby
12655511cfe3SLois Curfman McInnes    reusing matrix information to achieve increased efficiency.
12665511cfe3SLois Curfman McInnes 
12675511cfe3SLois Curfman McInnes    Options Database Keys:
12685511cfe3SLois Curfman McInnes $    -mat_aij_no_inode  - Do not use inodes
12696e62573dSLois Curfman McInnes $    -mat_aij_inode_limit <limit> - Set inode limit.
12706e62573dSLois Curfman McInnes $        (max limit=5)
12716e62573dSLois Curfman McInnes $    -mat_aij_oneindex - Internally use indexing starting at 1
12726e62573dSLois Curfman McInnes $        rather than 0.  Note: When calling MatSetValues(),
12736e62573dSLois Curfman McInnes $        the user still MUST index entries starting at 0!
12745511cfe3SLois Curfman McInnes 
1275e0245417SLois Curfman McInnes    Storage Information:
1276e0245417SLois Curfman McInnes    For a square global matrix we define each processor's diagonal portion
1277e0245417SLois Curfman McInnes    to be its local rows and the corresponding columns (a square submatrix);
1278e0245417SLois Curfman McInnes    each processor's off-diagonal portion encompasses the remainder of the
1279e0245417SLois Curfman McInnes    local matrix (a rectangular submatrix).
1280e0245417SLois Curfman McInnes 
1281e0245417SLois Curfman McInnes    The user can specify preallocated storage for the diagonal part of
12825ace5be8SLois Curfman McInnes    the local submatrix with either d_nz or d_nnz (not both).  Set
12835ace5be8SLois Curfman McInnes    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
12845ace5be8SLois Curfman McInnes    memory allocation.  Likewise, specify preallocated storage for the
12855ace5be8SLois Curfman McInnes    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1286ff756334SLois Curfman McInnes 
12875511cfe3SLois Curfman McInnes    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
12885511cfe3SLois Curfman McInnes    the figure below we depict these three local rows and all columns (0-11).
12892191d07cSBarry Smith 
1290b810aeb4SBarry Smith $          0 1 2 3 4 5 6 7 8 9 10 11
1291b810aeb4SBarry Smith $         -------------------
12925511cfe3SLois Curfman McInnes $  row 3  |  o o o d d d o o o o o o
12935511cfe3SLois Curfman McInnes $  row 4  |  o o o d d d o o o o o o
12945511cfe3SLois Curfman McInnes $  row 5  |  o o o d d d o o o o o o
12955511cfe3SLois Curfman McInnes $         -------------------
1296b810aeb4SBarry Smith $
1297b810aeb4SBarry Smith 
12985511cfe3SLois Curfman McInnes    Thus, any entries in the d locations are stored in the d (diagonal)
12995511cfe3SLois Curfman McInnes    submatrix, and any entries in the o locations are stored in the
13005511cfe3SLois Curfman McInnes    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
13015511cfe3SLois Curfman McInnes    stored simply in the MATSEQAIJ format for compressed row storage.
13025511cfe3SLois Curfman McInnes 
13035511cfe3SLois Curfman McInnes    Now d_nz should indicate the number of nonzeros per row in the d matrix,
13045511cfe3SLois Curfman McInnes    and o_nz should indicate the number of nonzeros per row in the o matrix.
13055511cfe3SLois Curfman McInnes    In general, for PDE problems in which most nonzeros are near the diagonal,
13063d323bbdSBarry Smith    one expects d_nz >> o_nz. For large problems you MUST preallocate memory
130792e8d321SLois Curfman McInnes    or you will get TERRIBLE performance; see the users' manual chapter on
13083d323bbdSBarry Smith    matrices and the file $(PETSC_DIR)/Performance.
13093a511b96SLois Curfman McInnes 
1310dbd7a890SLois Curfman McInnes .keywords: matrix, aij, compressed row, sparse, parallel
1311ff756334SLois Curfman McInnes 
1312fafbff53SBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
13138a729477SBarry Smith @*/
13141eb62cbbSBarry Smith int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,
131544cd7ae7SLois Curfman McInnes                     int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
13168a729477SBarry Smith {
131744cd7ae7SLois Curfman McInnes   Mat          B;
131844cd7ae7SLois Curfman McInnes   Mat_MPIAIJ   *b;
131936ce4990SBarry Smith   int          ierr, i,sum[2],work[2],size;
1320416022c9SBarry Smith 
132144cd7ae7SLois Curfman McInnes   *A = 0;
132244cd7ae7SLois Curfman McInnes   PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATMPIAIJ,comm);
132344cd7ae7SLois Curfman McInnes   PLogObjectCreate(B);
132444cd7ae7SLois Curfman McInnes   B->data       = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b);
132544cd7ae7SLois Curfman McInnes   PetscMemzero(b,sizeof(Mat_MPIAIJ));
132644cd7ae7SLois Curfman McInnes   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
132736ce4990SBarry Smith   MPI_Comm_size(comm,&size);
132836ce4990SBarry Smith   if (size == 1) {
132936ce4990SBarry Smith     B->ops.getrowij          = MatGetRowIJ_MPIAIJ;
133036ce4990SBarry Smith     B->ops.restorerowij      = MatRestoreRowIJ_MPIAIJ;
133136ce4990SBarry Smith     B->ops.lufactorsymbolic  = MatLUFactorSymbolic_MPIAIJ;
133236ce4990SBarry Smith     B->ops.lufactornumeric   = MatLUFactorNumeric_MPIAIJ;
133336ce4990SBarry Smith     B->ops.lufactor          = MatLUFactor_MPIAIJ;
133436ce4990SBarry Smith     B->ops.solve             = MatSolve_MPIAIJ;
133536ce4990SBarry Smith     B->ops.solveadd          = MatSolveAdd_MPIAIJ;
133636ce4990SBarry Smith     B->ops.solvetrans        = MatSolveTrans_MPIAIJ;
133736ce4990SBarry Smith     B->ops.solvetransadd     = MatSolveTransAdd_MPIAIJ;
133836ce4990SBarry Smith     B->ops.ilufactorsymbolic = MatILUFactorSymbolic_MPIAIJ;
133936ce4990SBarry Smith   }
134044cd7ae7SLois Curfman McInnes   B->destroy    = MatDestroy_MPIAIJ;
134144cd7ae7SLois Curfman McInnes   B->view       = MatView_MPIAIJ;
134244cd7ae7SLois Curfman McInnes   B->factor     = 0;
134344cd7ae7SLois Curfman McInnes   B->assembled  = PETSC_FALSE;
1344d6dfbf8fSBarry Smith 
134544cd7ae7SLois Curfman McInnes   b->insertmode = NOT_SET_VALUES;
134644cd7ae7SLois Curfman McInnes   MPI_Comm_rank(comm,&b->rank);
134744cd7ae7SLois Curfman McInnes   MPI_Comm_size(comm,&b->size);
13481eb62cbbSBarry Smith 
1349b4fd4287SBarry Smith   if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL))
13502e0155e0SLois Curfman McInnes     SETERRQ(1,"MatCreateMPIAIJ:Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
13511987afe7SBarry Smith 
1352dbd7a890SLois Curfman McInnes   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
13531eb62cbbSBarry Smith     work[0] = m; work[1] = n;
1354d65a2f8fSBarry Smith     MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );
1355dbd7a890SLois Curfman McInnes     if (M == PETSC_DECIDE) M = sum[0];
1356dbd7a890SLois Curfman McInnes     if (N == PETSC_DECIDE) N = sum[1];
13571eb62cbbSBarry Smith   }
135844cd7ae7SLois Curfman McInnes   if (m == PETSC_DECIDE) {m = M/b->size + ((M % b->size) > b->rank);}
135944cd7ae7SLois Curfman McInnes   if (n == PETSC_DECIDE) {n = N/b->size + ((N % b->size) > b->rank);}
136044cd7ae7SLois Curfman McInnes   b->m = m; B->m = m;
136144cd7ae7SLois Curfman McInnes   b->n = n; B->n = n;
136244cd7ae7SLois Curfman McInnes   b->N = N; B->N = N;
136344cd7ae7SLois Curfman McInnes   b->M = M; B->M = M;
13641eb62cbbSBarry Smith 
13651eb62cbbSBarry Smith   /* build local table of row and column ownerships */
136644cd7ae7SLois Curfman McInnes   b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners);
136744cd7ae7SLois Curfman McInnes   PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ));
1368603f58a4SSatish Balay   b->cowners = b->rowners + b->size + 2;
136944cd7ae7SLois Curfman McInnes   MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);
137044cd7ae7SLois Curfman McInnes   b->rowners[0] = 0;
137144cd7ae7SLois Curfman McInnes   for ( i=2; i<=b->size; i++ ) {
137244cd7ae7SLois Curfman McInnes     b->rowners[i] += b->rowners[i-1];
13738a729477SBarry Smith   }
137444cd7ae7SLois Curfman McInnes   b->rstart = b->rowners[b->rank];
137544cd7ae7SLois Curfman McInnes   b->rend   = b->rowners[b->rank+1];
137644cd7ae7SLois Curfman McInnes   MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);
137744cd7ae7SLois Curfman McInnes   b->cowners[0] = 0;
137844cd7ae7SLois Curfman McInnes   for ( i=2; i<=b->size; i++ ) {
137944cd7ae7SLois Curfman McInnes     b->cowners[i] += b->cowners[i-1];
13808a729477SBarry Smith   }
138144cd7ae7SLois Curfman McInnes   b->cstart = b->cowners[b->rank];
138244cd7ae7SLois Curfman McInnes   b->cend   = b->cowners[b->rank+1];
13838a729477SBarry Smith 
13845ace5be8SLois Curfman McInnes   if (d_nz == PETSC_DEFAULT) d_nz = 5;
138544cd7ae7SLois Curfman McInnes   ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr);
138644cd7ae7SLois Curfman McInnes   PLogObjectParent(B,b->A);
13877b8455f0SLois Curfman McInnes   if (o_nz == PETSC_DEFAULT) o_nz = 0;
138844cd7ae7SLois Curfman McInnes   ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr);
138944cd7ae7SLois Curfman McInnes   PLogObjectParent(B,b->B);
13908a729477SBarry Smith 
13911eb62cbbSBarry Smith   /* build cache for off array entries formed */
139244cd7ae7SLois Curfman McInnes   ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr);
139344cd7ae7SLois Curfman McInnes   b->colmap      = 0;
139444cd7ae7SLois Curfman McInnes   b->garray      = 0;
139544cd7ae7SLois Curfman McInnes   b->roworiented = 1;
13968a729477SBarry Smith 
13971eb62cbbSBarry Smith   /* stuff used for matrix vector multiply */
139844cd7ae7SLois Curfman McInnes   b->lvec      = 0;
139944cd7ae7SLois Curfman McInnes   b->Mvctx     = 0;
14008a729477SBarry Smith 
14017a0afa10SBarry Smith   /* stuff for MatGetRow() */
140244cd7ae7SLois Curfman McInnes   b->rowindices   = 0;
140344cd7ae7SLois Curfman McInnes   b->rowvalues    = 0;
140444cd7ae7SLois Curfman McInnes   b->getrowactive = PETSC_FALSE;
14057a0afa10SBarry Smith 
140644cd7ae7SLois Curfman McInnes   *A = B;
1407d6dfbf8fSBarry Smith   return 0;
1408d6dfbf8fSBarry Smith }
1409c74985f6SBarry Smith 
14103d1612f7SBarry Smith static int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues)
1411d6dfbf8fSBarry Smith {
1412d6dfbf8fSBarry Smith   Mat        mat;
1413416022c9SBarry Smith   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data;
1414a1b97e82SLois Curfman McInnes   int        ierr, len=0, flg;
1415d6dfbf8fSBarry Smith 
1416416022c9SBarry Smith   *newmat       = 0;
14170452661fSBarry Smith   PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm);
1418a5a9c739SBarry Smith   PLogObjectCreate(mat);
14190452661fSBarry Smith   mat->data       = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a);
1420416022c9SBarry Smith   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
142144a69424SLois Curfman McInnes   mat->destroy    = MatDestroy_MPIAIJ;
142244a69424SLois Curfman McInnes   mat->view       = MatView_MPIAIJ;
1423d6dfbf8fSBarry Smith   mat->factor     = matin->factor;
1424c456f294SBarry Smith   mat->assembled  = PETSC_TRUE;
1425d6dfbf8fSBarry Smith 
142644cd7ae7SLois Curfman McInnes   a->m = mat->m   = oldmat->m;
142744cd7ae7SLois Curfman McInnes   a->n = mat->n   = oldmat->n;
142844cd7ae7SLois Curfman McInnes   a->M = mat->M   = oldmat->M;
142944cd7ae7SLois Curfman McInnes   a->N = mat->N   = oldmat->N;
1430d6dfbf8fSBarry Smith 
1431416022c9SBarry Smith   a->rstart       = oldmat->rstart;
1432416022c9SBarry Smith   a->rend         = oldmat->rend;
1433416022c9SBarry Smith   a->cstart       = oldmat->cstart;
1434416022c9SBarry Smith   a->cend         = oldmat->cend;
143517699dbbSLois Curfman McInnes   a->size         = oldmat->size;
143617699dbbSLois Curfman McInnes   a->rank         = oldmat->rank;
143764119d58SLois Curfman McInnes   a->insertmode   = NOT_SET_VALUES;
1438bcd2baecSBarry Smith   a->rowvalues    = 0;
1439bcd2baecSBarry Smith   a->getrowactive = PETSC_FALSE;
1440d6dfbf8fSBarry Smith 
1441603f58a4SSatish Balay   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
1442603f58a4SSatish Balay   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ));
1443603f58a4SSatish Balay   a->cowners = a->rowners + a->size + 2;
1444603f58a4SSatish Balay   PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));
1445416022c9SBarry Smith   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
14462ee70a88SLois Curfman McInnes   if (oldmat->colmap) {
14470452661fSBarry Smith     a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap);
1448416022c9SBarry Smith     PLogObjectMemory(mat,(a->N)*sizeof(int));
1449416022c9SBarry Smith     PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int));
1450416022c9SBarry Smith   } else a->colmap = 0;
14513f41c07dSBarry Smith   if (oldmat->garray) {
14523f41c07dSBarry Smith     len = ((Mat_SeqAIJ *) (oldmat->B->data))->n;
14533f41c07dSBarry Smith     a->garray = (int *) PetscMalloc((len+1)*sizeof(int)); CHKPTRQ(a->garray);
1454464493b3SBarry Smith     PLogObjectMemory(mat,len*sizeof(int));
14553f41c07dSBarry Smith     if (len) PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
1456416022c9SBarry Smith   } else a->garray = 0;
1457d6dfbf8fSBarry Smith 
1458416022c9SBarry Smith   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
1459416022c9SBarry Smith   PLogObjectParent(mat,a->lvec);
1460a56f8943SBarry Smith   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
1461416022c9SBarry Smith   PLogObjectParent(mat,a->Mvctx);
1462416022c9SBarry Smith   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
1463416022c9SBarry Smith   PLogObjectParent(mat,a->A);
1464416022c9SBarry Smith   ierr =  MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr);
1465416022c9SBarry Smith   PLogObjectParent(mat,a->B);
14665dd7a6c7SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
146725cdf11fSBarry Smith   if (flg) {
1468682d7d0cSBarry Smith     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
1469682d7d0cSBarry Smith   }
14708a729477SBarry Smith   *newmat = mat;
14718a729477SBarry Smith   return 0;
14728a729477SBarry Smith }
1473416022c9SBarry Smith 
147477c4ece6SBarry Smith #include "sys.h"
1475416022c9SBarry Smith 
147619bcc07fSBarry Smith int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat)
1477416022c9SBarry Smith {
1478d65a2f8fSBarry Smith   Mat          A;
1479416022c9SBarry Smith   int          i, nz, ierr, j,rstart, rend, fd;
1480d65a2f8fSBarry Smith   Scalar       *vals,*svals;
148119bcc07fSBarry Smith   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1482416022c9SBarry Smith   MPI_Status   status;
148317699dbbSLois Curfman McInnes   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1484d65a2f8fSBarry Smith   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
148519bcc07fSBarry Smith   int          tag = ((PetscObject)viewer)->tag;
1486416022c9SBarry Smith 
148717699dbbSLois Curfman McInnes   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
148817699dbbSLois Curfman McInnes   if (!rank) {
148990ace30eSBarry Smith     ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
149077c4ece6SBarry Smith     ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr);
1491c456f294SBarry Smith     if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIAIJ:not matrix object");
1492416022c9SBarry Smith   }
1493416022c9SBarry Smith 
1494416022c9SBarry Smith   MPI_Bcast(header+1,3,MPI_INT,0,comm);
1495416022c9SBarry Smith   M = header[1]; N = header[2];
1496416022c9SBarry Smith   /* determine ownership of all rows */
149717699dbbSLois Curfman McInnes   m = M/size + ((M % size) > rank);
14980452661fSBarry Smith   rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners);
1499416022c9SBarry Smith   MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1500416022c9SBarry Smith   rowners[0] = 0;
150117699dbbSLois Curfman McInnes   for ( i=2; i<=size; i++ ) {
1502416022c9SBarry Smith     rowners[i] += rowners[i-1];
1503416022c9SBarry Smith   }
150417699dbbSLois Curfman McInnes   rstart = rowners[rank];
150517699dbbSLois Curfman McInnes   rend   = rowners[rank+1];
1506416022c9SBarry Smith 
1507416022c9SBarry Smith   /* distribute row lengths to all processors */
15080452661fSBarry Smith   ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens);
1509416022c9SBarry Smith   offlens = ourlens + (rend-rstart);
151017699dbbSLois Curfman McInnes   if (!rank) {
15110452661fSBarry Smith     rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths);
151277c4ece6SBarry Smith     ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
15130452661fSBarry Smith     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
151417699dbbSLois Curfman McInnes     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
1515d65a2f8fSBarry Smith     MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
15160452661fSBarry Smith     PetscFree(sndcounts);
1517416022c9SBarry Smith   }
1518416022c9SBarry Smith   else {
1519416022c9SBarry Smith     MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);
1520416022c9SBarry Smith   }
1521416022c9SBarry Smith 
152217699dbbSLois Curfman McInnes   if (!rank) {
1523416022c9SBarry Smith     /* calculate the number of nonzeros on each processor */
15240452661fSBarry Smith     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
1525cddf8d76SBarry Smith     PetscMemzero(procsnz,size*sizeof(int));
152617699dbbSLois Curfman McInnes     for ( i=0; i<size; i++ ) {
1527416022c9SBarry Smith       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
1528416022c9SBarry Smith         procsnz[i] += rowlengths[j];
1529416022c9SBarry Smith       }
1530416022c9SBarry Smith     }
15310452661fSBarry Smith     PetscFree(rowlengths);
1532416022c9SBarry Smith 
1533416022c9SBarry Smith     /* determine max buffer needed and allocate it */
1534416022c9SBarry Smith     maxnz = 0;
153517699dbbSLois Curfman McInnes     for ( i=0; i<size; i++ ) {
15360452661fSBarry Smith       maxnz = PetscMax(maxnz,procsnz[i]);
1537416022c9SBarry Smith     }
15380452661fSBarry Smith     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
1539416022c9SBarry Smith 
1540416022c9SBarry Smith     /* read in my part of the matrix column indices  */
1541416022c9SBarry Smith     nz = procsnz[0];
15420452661fSBarry Smith     mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
154377c4ece6SBarry Smith     ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr);
1544d65a2f8fSBarry Smith 
1545d65a2f8fSBarry Smith     /* read in every one elses and ship off */
154617699dbbSLois Curfman McInnes     for ( i=1; i<size; i++ ) {
1547d65a2f8fSBarry Smith       nz = procsnz[i];
154877c4ece6SBarry Smith       ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr);
1549d65a2f8fSBarry Smith       MPI_Send(cols,nz,MPI_INT,i,tag,comm);
1550d65a2f8fSBarry Smith     }
15510452661fSBarry Smith     PetscFree(cols);
1552416022c9SBarry Smith   }
1553416022c9SBarry Smith   else {
1554416022c9SBarry Smith     /* determine buffer space needed for message */
1555416022c9SBarry Smith     nz = 0;
1556416022c9SBarry Smith     for ( i=0; i<m; i++ ) {
1557416022c9SBarry Smith       nz += ourlens[i];
1558416022c9SBarry Smith     }
15590452661fSBarry Smith     mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1560416022c9SBarry Smith 
1561416022c9SBarry Smith     /* receive message of column indices*/
1562d65a2f8fSBarry Smith     MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
1563416022c9SBarry Smith     MPI_Get_count(&status,MPI_INT,&maxnz);
1564c456f294SBarry Smith     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJ:something is wrong with file");
1565416022c9SBarry Smith   }
1566416022c9SBarry Smith 
1567416022c9SBarry Smith   /* loop over local rows, determining number of off diagonal entries */
1568cddf8d76SBarry Smith   PetscMemzero(offlens,m*sizeof(int));
1569416022c9SBarry Smith   jj = 0;
1570416022c9SBarry Smith   for ( i=0; i<m; i++ ) {
1571416022c9SBarry Smith     for ( j=0; j<ourlens[i]; j++ ) {
1572d65a2f8fSBarry Smith       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1573416022c9SBarry Smith       jj++;
1574416022c9SBarry Smith     }
1575416022c9SBarry Smith   }
1576d65a2f8fSBarry Smith 
1577d65a2f8fSBarry Smith   /* create our matrix */
1578416022c9SBarry Smith   for ( i=0; i<m; i++ ) {
1579416022c9SBarry Smith     ourlens[i] -= offlens[i];
1580416022c9SBarry Smith   }
1581d65a2f8fSBarry Smith   ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
1582d65a2f8fSBarry Smith   A = *newmat;
15836d4a8577SBarry Smith   MatSetOption(A,MAT_COLUMNS_SORTED);
1584d65a2f8fSBarry Smith   for ( i=0; i<m; i++ ) {
1585d65a2f8fSBarry Smith     ourlens[i] += offlens[i];
1586d65a2f8fSBarry Smith   }
1587416022c9SBarry Smith 
158817699dbbSLois Curfman McInnes   if (!rank) {
15890452661fSBarry Smith     vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals);
1590416022c9SBarry Smith 
1591416022c9SBarry Smith     /* read in my part of the matrix numerical values  */
1592416022c9SBarry Smith     nz = procsnz[0];
159377c4ece6SBarry Smith     ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
1594d65a2f8fSBarry Smith 
1595d65a2f8fSBarry Smith     /* insert into matrix */
1596d65a2f8fSBarry Smith     jj      = rstart;
1597d65a2f8fSBarry Smith     smycols = mycols;
1598d65a2f8fSBarry Smith     svals   = vals;
1599d65a2f8fSBarry Smith     for ( i=0; i<m; i++ ) {
1600d65a2f8fSBarry Smith       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1601d65a2f8fSBarry Smith       smycols += ourlens[i];
1602d65a2f8fSBarry Smith       svals   += ourlens[i];
1603d65a2f8fSBarry Smith       jj++;
1604416022c9SBarry Smith     }
1605416022c9SBarry Smith 
1606d65a2f8fSBarry Smith     /* read in other processors and ship out */
160717699dbbSLois Curfman McInnes     for ( i=1; i<size; i++ ) {
1608416022c9SBarry Smith       nz = procsnz[i];
160977c4ece6SBarry Smith       ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
1610d65a2f8fSBarry Smith       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
1611416022c9SBarry Smith     }
16120452661fSBarry Smith     PetscFree(procsnz);
1613416022c9SBarry Smith   }
1614d65a2f8fSBarry Smith   else {
1615d65a2f8fSBarry Smith     /* receive numeric values */
16160452661fSBarry Smith     vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals);
1617416022c9SBarry Smith 
1618d65a2f8fSBarry Smith     /* receive message of values*/
1619d65a2f8fSBarry Smith     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
1620d65a2f8fSBarry Smith     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1621c456f294SBarry Smith     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJ:something is wrong with file");
1622d65a2f8fSBarry Smith 
1623d65a2f8fSBarry Smith     /* insert into matrix */
1624d65a2f8fSBarry Smith     jj      = rstart;
1625d65a2f8fSBarry Smith     smycols = mycols;
1626d65a2f8fSBarry Smith     svals   = vals;
1627d65a2f8fSBarry Smith     for ( i=0; i<m; i++ ) {
1628d65a2f8fSBarry Smith       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1629d65a2f8fSBarry Smith       smycols += ourlens[i];
1630d65a2f8fSBarry Smith       svals   += ourlens[i];
1631d65a2f8fSBarry Smith       jj++;
1632d65a2f8fSBarry Smith     }
1633d65a2f8fSBarry Smith   }
16340452661fSBarry Smith   PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners);
1635d65a2f8fSBarry Smith 
16366d4a8577SBarry Smith   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
16376d4a8577SBarry Smith   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1638416022c9SBarry Smith   return 0;
1639416022c9SBarry Smith }
1640