xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision bb5a7306ba2b79be99f6d9dff5d00348384195e8)
1 #ifndef lint
2 static char vcid[] = "$Id: mpiaij.c,v 1.183 1997/01/01 03:37:44 bsmith Exp bsmith $";
3 #endif
4 
5 #include "src/mat/impls/aij/mpi/mpiaij.h"
6 #include "src/vec/vecimpl.h"
7 #include "src/inline/spops.h"
8 
9 /* local utility routine that creates a mapping from the global column
10 number to the local number in the off-diagonal part of the local
11 storage of the matrix.  This is done in a non scable way since the
12 length of colmap equals the global matrix length.
13 */
14 #undef __FUNCTION__
15 #define __FUNCTION__ "CreateColmap_MPIAIJ_Private"
16 int CreateColmap_MPIAIJ_Private(Mat mat)
17 {
18   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
19   Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data;
20   int        n = B->n,i;
21 
22   aij->colmap = (int *) PetscMalloc(aij->N*sizeof(int));CHKPTRQ(aij->colmap);
23   PLogObjectMemory(mat,aij->N*sizeof(int));
24   PetscMemzero(aij->colmap,aij->N*sizeof(int));
25   for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1;
26   return 0;
27 }
28 
29 extern int DisAssemble_MPIAIJ(Mat);
30 
31 #undef __FUNCTION__
32 #define __FUNCTION__ "MatGetRowIJ_MPIAIJ"
33 static int MatGetRowIJ_MPIAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja,
34                            PetscTruth *done)
35 {
36   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
37   int        ierr;
38   if (aij->size == 1) {
39     ierr = MatGetRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr);
40   } else SETERRQ(1,0,"not supported in parallel");
41   return 0;
42 }
43 
44 #undef __FUNCTION__
45 #define __FUNCTION__ "MatRestoreRowIJ_MPIAIJ"
46 static int MatRestoreRowIJ_MPIAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja,
47                                PetscTruth *done)
48 {
49   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
50   int        ierr;
51   if (aij->size == 1) {
52     ierr = MatRestoreRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr);
53   } else SETERRQ(1,0,"not supported in parallel");
54   return 0;
55 }
56 
57 extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
58 
59 #undef __FUNCTION__
60 #define __FUNCTION__ "MatSetValues_MPIAIJ"
61 static int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
62 {
63   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
64   Scalar     value;
65   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
66   int        cstart = aij->cstart, cend = aij->cend,row,col;
67   int        roworiented = aij->roworiented;
68 
69 #if defined(PETSC_BOPT_g)
70   if (aij->insertmode != NOT_SET_VALUES && aij->insertmode != addv) {
71     SETERRQ(1,0,"Cannot mix inserts and adds");
72   }
73 #endif
74   aij->insertmode = addv;
75   for ( i=0; i<m; i++ ) {
76 #if defined(PETSC_BOPT_g)
77     if (im[i] < 0) SETERRQ(1,0,"Negative row");
78     if (im[i] >= aij->M) SETERRQ(1,0,"Row too large");
79 #endif
80     if (im[i] >= rstart && im[i] < rend) {
81       row = im[i] - rstart;
82       for ( j=0; j<n; j++ ) {
83         if (in[j] >= cstart && in[j] < cend){
84           col = in[j] - cstart;
85           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
86           ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr);
87         }
88 #if defined(PETSC_BOPT_g)
89         else if (in[j] < 0) {SETERRQ(1,0,"Negative column");}
90         else if (in[j] >= aij->N) {SETERRQ(1,0,"Col too large");}
91 #endif
92         else {
93           if (mat->was_assembled) {
94             if (!aij->colmap) {
95               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
96             }
97             col = aij->colmap[in[j]] - 1;
98             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
99               ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
100               col =  in[j];
101             }
102           }
103           else col = in[j];
104           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
105           ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr);
106         }
107       }
108     }
109     else {
110       if (roworiented && !aij->donotstash) {
111         ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr);
112       }
113       else {
114         if (!aij->donotstash) {
115           row = im[i];
116           for ( j=0; j<n; j++ ) {
117             ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr);
118           }
119         }
120       }
121     }
122   }
123   return 0;
124 }
125 
126 #undef __FUNCTION__
127 #define __FUNCTION__ "MatGetValues_MPIAIJ"
128 static int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
129 {
130   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
131   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
132   int        cstart = aij->cstart, cend = aij->cend,row,col;
133 
134   for ( i=0; i<m; i++ ) {
135     if (idxm[i] < 0) SETERRQ(1,0,"Negative row");
136     if (idxm[i] >= aij->M) SETERRQ(1,0,"Row too large");
137     if (idxm[i] >= rstart && idxm[i] < rend) {
138       row = idxm[i] - rstart;
139       for ( j=0; j<n; j++ ) {
140         if (idxn[j] < 0) SETERRQ(1,0,"Negative column");
141         if (idxn[j] >= aij->N) SETERRQ(1,0,"Col too large");
142         if (idxn[j] >= cstart && idxn[j] < cend){
143           col = idxn[j] - cstart;
144           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
145         }
146         else {
147           if (!aij->colmap) {
148             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
149           }
150           col = aij->colmap[idxn[j]] - 1;
151           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
152           else {
153             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
154           }
155         }
156       }
157     }
158     else {
159       SETERRQ(1,0,"Only local values currently supported");
160     }
161   }
162   return 0;
163 }
164 
165 #undef __FUNCTION__
166 #define __FUNCTION__ "MatAssemblyBegin_MPIAIJ"
167 static int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
168 {
169   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
170   MPI_Comm    comm = mat->comm;
171   int         size = aij->size, *owners = aij->rowners;
172   int         rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr;
173   MPI_Request *send_waits,*recv_waits;
174   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
175   InsertMode  addv;
176   Scalar      *rvalues,*svalues;
177 
178   /* make sure all processors are either in INSERTMODE or ADDMODE */
179   MPI_Allreduce(&aij->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
180   if (addv == (ADD_VALUES|INSERT_VALUES)) {
181     SETERRQ(1,0,"Some processors inserted others added");
182   }
183   aij->insertmode = addv; /* in case this processor had no cache */
184 
185   /*  first count number of contributors to each processor */
186   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
187   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
188   owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
189   for ( i=0; i<aij->stash.n; i++ ) {
190     idx = aij->stash.idx[i];
191     for ( j=0; j<size; j++ ) {
192       if (idx >= owners[j] && idx < owners[j+1]) {
193         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
194       }
195     }
196   }
197   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
198 
199   /* inform other processors of number of messages and max length*/
200   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
201   MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);
202   nreceives = work[rank];
203   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
204   nmax = work[rank];
205   PetscFree(work);
206 
207   /* post receives:
208        1) each message will consist of ordered pairs
209      (global index,value) we store the global index as a double
210      to simplify the message passing.
211        2) since we don't know how long each individual message is we
212      allocate the largest needed buffer for each receive. Potentially
213      this is a lot of wasted space.
214 
215 
216        This could be done better.
217   */
218   rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));
219   CHKPTRQ(rvalues);
220   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));
221   CHKPTRQ(recv_waits);
222   for ( i=0; i<nreceives; i++ ) {
223     MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
224               comm,recv_waits+i);
225   }
226 
227   /* do sends:
228       1) starts[i] gives the starting index in svalues for stuff going to
229          the ith processor
230   */
231   svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
232   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
233   CHKPTRQ(send_waits);
234   starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
235   starts[0] = 0;
236   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
237   for ( i=0; i<aij->stash.n; i++ ) {
238     svalues[3*starts[owner[i]]]       = (Scalar)  aij->stash.idx[i];
239     svalues[3*starts[owner[i]]+1]     = (Scalar)  aij->stash.idy[i];
240     svalues[3*(starts[owner[i]]++)+2] =  aij->stash.array[i];
241   }
242   PetscFree(owner);
243   starts[0] = 0;
244   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
245   count = 0;
246   for ( i=0; i<size; i++ ) {
247     if (procs[i]) {
248       MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,
249                 comm,send_waits+count++);
250     }
251   }
252   PetscFree(starts); PetscFree(nprocs);
253 
254   /* Free cache space */
255   PLogInfo(mat,"MatAssemblyBegin_MPIAIJ:Number of off-processor values %d\n",aij->stash.n);
256   ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr);
257 
258   aij->svalues    = svalues;    aij->rvalues    = rvalues;
259   aij->nsends     = nsends;     aij->nrecvs     = nreceives;
260   aij->send_waits = send_waits; aij->recv_waits = recv_waits;
261   aij->rmax       = nmax;
262 
263   return 0;
264 }
265 extern int MatSetUpMultiply_MPIAIJ(Mat);
266 
267 #undef __FUNCTION__
268 #define __FUNCTION__ "MatAssemblyEnd_MPIAIJ"
269 static int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
270 {
271   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
272   MPI_Status  *send_status,recv_status;
273   int         imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr;
274   int         row,col,other_disassembled;
275   Scalar      *values,val;
276   InsertMode  addv = aij->insertmode;
277 
278   /*  wait on receives */
279   while (count) {
280     MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status);
281     /* unpack receives into our local space */
282     values = aij->rvalues + 3*imdex*aij->rmax;
283     MPI_Get_count(&recv_status,MPIU_SCALAR,&n);
284     n = n/3;
285     for ( i=0; i<n; i++ ) {
286       row = (int) PetscReal(values[3*i]) - aij->rstart;
287       col = (int) PetscReal(values[3*i+1]);
288       val = values[3*i+2];
289       if (col >= aij->cstart && col < aij->cend) {
290         col -= aij->cstart;
291         MatSetValues(aij->A,1,&row,1,&col,&val,addv);
292       }
293       else {
294         if (mat->was_assembled || mat->assembled) {
295           if (!aij->colmap) {
296             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
297           }
298           col = aij->colmap[col] - 1;
299           if (col < 0  && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
300             ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
301             col = (int) PetscReal(values[3*i+1]);
302           }
303         }
304         MatSetValues(aij->B,1,&row,1,&col,&val,addv);
305       }
306     }
307     count--;
308   }
309   PetscFree(aij->recv_waits); PetscFree(aij->rvalues);
310 
311   /* wait on sends */
312   if (aij->nsends) {
313     send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
314     MPI_Waitall(aij->nsends,aij->send_waits,send_status);
315     PetscFree(send_status);
316   }
317   PetscFree(aij->send_waits); PetscFree(aij->svalues);
318 
319   aij->insertmode = NOT_SET_VALUES;
320   ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr);
321   ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr);
322 
323   /* determine if any processor has disassembled, if so we must
324      also disassemble ourselfs, in order that we may reassemble. */
325   MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);
326   if (mat->was_assembled && !other_disassembled) {
327     ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
328   }
329 
330   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
331     ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr);
332   }
333   ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr);
334   ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr);
335 
336   if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;}
337   return 0;
338 }
339 
340 #undef __FUNCTION__
341 #define __FUNCTION__ "MatZeroEntries_MPIAIJ"
342 static int MatZeroEntries_MPIAIJ(Mat A)
343 {
344   Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data;
345   int        ierr;
346   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
347   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
348   return 0;
349 }
350 
351 /* the code does not do the diagonal entries correctly unless the
352    matrix is square and the column and row owerships are identical.
353    This is a BUG. The only way to fix it seems to be to access
354    aij->A and aij->B directly and not through the MatZeroRows()
355    routine.
356 */
357 #undef __FUNCTION__
358 #define __FUNCTION__ "MatZeroRows_MPIAIJ"
359 static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag)
360 {
361   Mat_MPIAIJ     *l = (Mat_MPIAIJ *) A->data;
362   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
363   int            *procs,*nprocs,j,found,idx,nsends,*work;
364   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
365   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
366   int            *lens,imdex,*lrows,*values;
367   MPI_Comm       comm = A->comm;
368   MPI_Request    *send_waits,*recv_waits;
369   MPI_Status     recv_status,*send_status;
370   IS             istmp;
371 
372   ierr = ISGetSize(is,&N); CHKERRQ(ierr);
373   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
374 
375   /*  first count number of contributors to each processor */
376   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
377   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
378   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
379   for ( i=0; i<N; i++ ) {
380     idx = rows[i];
381     found = 0;
382     for ( j=0; j<size; j++ ) {
383       if (idx >= owners[j] && idx < owners[j+1]) {
384         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
385       }
386     }
387     if (!found) SETERRQ(1,0,"Index out of range");
388   }
389   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
390 
391   /* inform other processors of number of messages and max length*/
392   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
393   MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);
394   nrecvs = work[rank];
395   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
396   nmax = work[rank];
397   PetscFree(work);
398 
399   /* post receives:   */
400   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */
401   CHKPTRQ(rvalues);
402   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));
403   CHKPTRQ(recv_waits);
404   for ( i=0; i<nrecvs; i++ ) {
405     MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
406   }
407 
408   /* do sends:
409       1) starts[i] gives the starting index in svalues for stuff going to
410          the ith processor
411   */
412   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
413   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
414   CHKPTRQ(send_waits);
415   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
416   starts[0] = 0;
417   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
418   for ( i=0; i<N; i++ ) {
419     svalues[starts[owner[i]]++] = rows[i];
420   }
421   ISRestoreIndices(is,&rows);
422 
423   starts[0] = 0;
424   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
425   count = 0;
426   for ( i=0; i<size; i++ ) {
427     if (procs[i]) {
428       MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);
429     }
430   }
431   PetscFree(starts);
432 
433   base = owners[rank];
434 
435   /*  wait on receives */
436   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
437   source = lens + nrecvs;
438   count  = nrecvs; slen = 0;
439   while (count) {
440     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
441     /* unpack receives into our local space */
442     MPI_Get_count(&recv_status,MPI_INT,&n);
443     source[imdex]  = recv_status.MPI_SOURCE;
444     lens[imdex]  = n;
445     slen += n;
446     count--;
447   }
448   PetscFree(recv_waits);
449 
450   /* move the data into the send scatter */
451   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
452   count = 0;
453   for ( i=0; i<nrecvs; i++ ) {
454     values = rvalues + i*nmax;
455     for ( j=0; j<lens[i]; j++ ) {
456       lrows[count++] = values[j] - base;
457     }
458   }
459   PetscFree(rvalues); PetscFree(lens);
460   PetscFree(owner); PetscFree(nprocs);
461 
462   /* actually zap the local rows */
463   ierr = ISCreateGeneral(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
464   PLogObjectParent(A,istmp);
465   PetscFree(lrows);
466   ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
467   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
468   ierr = ISDestroy(istmp); CHKERRQ(ierr);
469 
470   /* wait on sends */
471   if (nsends) {
472     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));
473     CHKPTRQ(send_status);
474     MPI_Waitall(nsends,send_waits,send_status);
475     PetscFree(send_status);
476   }
477   PetscFree(send_waits); PetscFree(svalues);
478 
479   return 0;
480 }
481 
482 #undef __FUNCTION__
483 #define __FUNCTION__ "MatMult_MPIAIJ"
484 static int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
485 {
486   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
487   int        ierr,nt;
488 
489   ierr = VecGetLocalSize(xx,&nt);  CHKERRQ(ierr);
490   if (nt != a->n) {
491     SETERRQ(1,0,"Incompatible parition of A and xx");
492   }
493   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr);
494   ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr);
495   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr);
496   ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
497   return 0;
498 }
499 
500 #undef __FUNCTION__
501 #define __FUNCTION__ "MatMultAdd_MPIAIJ"
502 static int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
503 {
504   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
505   int        ierr;
506   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
507   ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
508   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
509   ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
510   return 0;
511 }
512 
513 #undef __FUNCTION__
514 #define __FUNCTION__ "MatMultTrans_MPIAIJ"
515 static int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy)
516 {
517   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
518   int        ierr;
519 
520   /* do nondiagonal part */
521   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
522   /* send it on its way */
523   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
524   /* do local part */
525   ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr);
526   /* receive remote parts: note this assumes the values are not actually */
527   /* inserted in yy until the next line, which is true for my implementation*/
528   /* but is not perhaps always true. */
529   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
530   return 0;
531 }
532 
533 #undef __FUNCTION__
534 #define __FUNCTION__ "MatMultTransAdd_MPIAIJ"
535 static int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
536 {
537   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
538   int        ierr;
539 
540   /* do nondiagonal part */
541   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
542   /* send it on its way */
543   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
544   /* do local part */
545   ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
546   /* receive remote parts: note this assumes the values are not actually */
547   /* inserted in yy until the next line, which is true for my implementation*/
548   /* but is not perhaps always true. */
549   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
550   return 0;
551 }
552 
553 /*
554   This only works correctly for square matrices where the subblock A->A is the
555    diagonal block
556 */
557 #undef __FUNCTION__
558 #define __FUNCTION__ "MatGetDiagonal_MPIAIJ"
559 static int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
560 {
561   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
562   if (a->M != a->N)
563     SETERRQ(1,0,"Supports only square matrix where A->A is diag block");
564   if (a->rstart != a->cstart || a->rend != a->cend) {
565     SETERRQ(1,0,"row partition must equal col partition");  }
566   return MatGetDiagonal(a->A,v);
567 }
568 
569 #undef __FUNCTION__
570 #define __FUNCTION__ "MatScale_MPIAIJ"
571 static int MatScale_MPIAIJ(Scalar *aa,Mat A)
572 {
573   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
574   int        ierr;
575   ierr = MatScale(aa,a->A); CHKERRQ(ierr);
576   ierr = MatScale(aa,a->B); CHKERRQ(ierr);
577   return 0;
578 }
579 
580 #undef __FUNCTION__
581 #define __FUNCTION__ "MatDestroy_MPIAIJ"
582 static int MatDestroy_MPIAIJ(PetscObject obj)
583 {
584   Mat        mat = (Mat) obj;
585   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
586   int        ierr;
587 #if defined(PETSC_LOG)
588   PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N);
589 #endif
590   PetscFree(aij->rowners);
591   ierr = MatDestroy(aij->A); CHKERRQ(ierr);
592   ierr = MatDestroy(aij->B); CHKERRQ(ierr);
593   if (aij->colmap) PetscFree(aij->colmap);
594   if (aij->garray) PetscFree(aij->garray);
595   if (aij->lvec)   VecDestroy(aij->lvec);
596   if (aij->Mvctx)  VecScatterDestroy(aij->Mvctx);
597   if (aij->rowvalues) PetscFree(aij->rowvalues);
598   PetscFree(aij);
599   if (mat->mapping) {
600     ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr);
601   }
602   PLogObjectDestroy(mat);
603   PetscHeaderDestroy(mat);
604   return 0;
605 }
606 #include "draw.h"
607 #include "pinclude/pviewer.h"
608 
609 #undef __FUNCTION__
610 #define __FUNCTION__ "MatView_MPIAIJ_Binary"
611 static int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer)
612 {
613   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
614   int         ierr;
615 
616   if (aij->size == 1) {
617     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
618   }
619   else SETERRQ(1,0,"Only uniprocessor output supported");
620   return 0;
621 }
622 
623 #undef __FUNCTION__
624 #define __FUNCTION__ "MatView_MPIAIJ_ASCIIorDraworMatlab"
625 static int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer)
626 {
627   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
628   Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data;
629   int         ierr, format,shift = C->indexshift,rank;
630   FILE        *fd;
631   ViewerType  vtype;
632 
633   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
634   if (vtype  == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) {
635     ierr = ViewerGetFormat(viewer,&format);
636     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
637       MatInfo info;
638       int     flg;
639       MPI_Comm_rank(mat->comm,&rank);
640       ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
641       ierr = MatGetInfo(mat,MAT_LOCAL,&info);
642       ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr);
643       PetscSequentialPhaseBegin(mat->comm,1);
644       if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
645          rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
646       else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
647          rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
648       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);
649       fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);
650       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);
651       fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);
652       fflush(fd);
653       PetscSequentialPhaseEnd(mat->comm,1);
654       ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr);
655       return 0;
656     }
657     else if (format == VIEWER_FORMAT_ASCII_INFO) {
658       return 0;
659     }
660   }
661 
662   if (vtype == DRAW_VIEWER) {
663     Draw       draw;
664     PetscTruth isnull;
665     ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr);
666     ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0;
667   }
668 
669   if (vtype == ASCII_FILE_VIEWER) {
670     ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
671     PetscSequentialPhaseBegin(mat->comm,1);
672     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
673            aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart,
674            aij->cend);
675     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
676     ierr = MatView(aij->B,viewer); CHKERRQ(ierr);
677     fflush(fd);
678     PetscSequentialPhaseEnd(mat->comm,1);
679   }
680   else {
681     int size = aij->size;
682     rank = aij->rank;
683     if (size == 1) {
684       ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
685     }
686     else {
687       /* assemble the entire matrix onto first processor. */
688       Mat         A;
689       Mat_SeqAIJ *Aloc;
690       int         M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct;
691       Scalar      *a;
692 
693       if (!rank) {
694         ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
695                CHKERRQ(ierr);
696       }
697       else {
698         ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
699                CHKERRQ(ierr);
700       }
701       PLogObjectParent(mat,A);
702 
703       /* copy over the A part */
704       Aloc = (Mat_SeqAIJ*) aij->A->data;
705       m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
706       row = aij->rstart;
707       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;}
708       for ( i=0; i<m; i++ ) {
709         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
710         row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
711       }
712       aj = Aloc->j;
713       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;}
714 
715       /* copy over the B part */
716       Aloc = (Mat_SeqAIJ*) aij->B->data;
717       m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
718       row = aij->rstart;
719       ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols);
720       for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];}
721       for ( i=0; i<m; i++ ) {
722         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
723         row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
724       }
725       PetscFree(ct);
726       ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
727       ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
728       if (!rank) {
729         ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
730       }
731       ierr = MatDestroy(A); CHKERRQ(ierr);
732     }
733   }
734   return 0;
735 }
736 
737 #undef __FUNCTION__
738 #define __FUNCTION__ "MatView_MPIAIJ"
739 static int MatView_MPIAIJ(PetscObject obj,Viewer viewer)
740 {
741   Mat         mat = (Mat) obj;
742   int         ierr;
743   ViewerType  vtype;
744 
745   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
746   if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER ||
747       vtype == DRAW_VIEWER       || vtype == MATLAB_VIEWER) {
748     ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
749   }
750   else if (vtype == BINARY_FILE_VIEWER) {
751     return MatView_MPIAIJ_Binary(mat,viewer);
752   }
753   return 0;
754 }
755 
756 /*
757     This has to provide several versions.
758 
759      2) a) use only local smoothing updating outer values only once.
760         b) local smoothing updating outer values each inner iteration
761      3) color updating out values betwen colors.
762 */
763 #undef __FUNCTION__
764 #define __FUNCTION__ "MatRelax_MPIAIJ"
765 static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag,
766                            double fshift,int its,Vec xx)
767 {
768   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
769   Mat        AA = mat->A, BB = mat->B;
770   Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data;
771   Scalar     *b,*x,*xs,*ls,d,*v,sum;
772   int        ierr,*idx, *diag;
773   int        n = mat->n, m = mat->m, i,shift = A->indexshift;
774 
775   VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls);
776   xs = x + shift; /* shift by one for index start of 1 */
777   ls = ls + shift;
778   if (!A->diag) {if ((ierr = MatMarkDiag_SeqAIJ(AA))) return ierr;}
779   diag = A->diag;
780   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
781     if (flag & SOR_ZERO_INITIAL_GUESS) {
782       return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx);
783     }
784     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
785     CHKERRQ(ierr);
786     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
787     CHKERRQ(ierr);
788     while (its--) {
789       /* go down through the rows */
790       for ( i=0; i<m; i++ ) {
791         n    = A->i[i+1] - A->i[i];
792         idx  = A->j + A->i[i] + shift;
793         v    = A->a + A->i[i] + shift;
794         sum  = b[i];
795         SPARSEDENSEMDOT(sum,xs,v,idx,n);
796         d    = fshift + A->a[diag[i]+shift];
797         n    = B->i[i+1] - B->i[i];
798         idx  = B->j + B->i[i] + shift;
799         v    = B->a + B->i[i] + shift;
800         SPARSEDENSEMDOT(sum,ls,v,idx,n);
801         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
802       }
803       /* come up through the rows */
804       for ( i=m-1; i>-1; i-- ) {
805         n    = A->i[i+1] - A->i[i];
806         idx  = A->j + A->i[i] + shift;
807         v    = A->a + A->i[i] + shift;
808         sum  = b[i];
809         SPARSEDENSEMDOT(sum,xs,v,idx,n);
810         d    = fshift + A->a[diag[i]+shift];
811         n    = B->i[i+1] - B->i[i];
812         idx  = B->j + B->i[i] + shift;
813         v    = B->a + B->i[i] + shift;
814         SPARSEDENSEMDOT(sum,ls,v,idx,n);
815         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
816       }
817     }
818   }
819   else if (flag & SOR_LOCAL_FORWARD_SWEEP){
820     if (flag & SOR_ZERO_INITIAL_GUESS) {
821       return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx);
822     }
823     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
824     CHKERRQ(ierr);
825     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
826     CHKERRQ(ierr);
827     while (its--) {
828       for ( i=0; i<m; i++ ) {
829         n    = A->i[i+1] - A->i[i];
830         idx  = A->j + A->i[i] + shift;
831         v    = A->a + A->i[i] + shift;
832         sum  = b[i];
833         SPARSEDENSEMDOT(sum,xs,v,idx,n);
834         d    = fshift + A->a[diag[i]+shift];
835         n    = B->i[i+1] - B->i[i];
836         idx  = B->j + B->i[i] + shift;
837         v    = B->a + B->i[i] + shift;
838         SPARSEDENSEMDOT(sum,ls,v,idx,n);
839         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
840       }
841     }
842   }
843   else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
844     if (flag & SOR_ZERO_INITIAL_GUESS) {
845       return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx);
846     }
847     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,
848                             mat->Mvctx); CHKERRQ(ierr);
849     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,
850                             mat->Mvctx); CHKERRQ(ierr);
851     while (its--) {
852       for ( i=m-1; i>-1; i-- ) {
853         n    = A->i[i+1] - A->i[i];
854         idx  = A->j + A->i[i] + shift;
855         v    = A->a + A->i[i] + shift;
856         sum  = b[i];
857         SPARSEDENSEMDOT(sum,xs,v,idx,n);
858         d    = fshift + A->a[diag[i]+shift];
859         n    = B->i[i+1] - B->i[i];
860         idx  = B->j + B->i[i] + shift;
861         v    = B->a + B->i[i] + shift;
862         SPARSEDENSEMDOT(sum,ls,v,idx,n);
863         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
864       }
865     }
866   }
867   else {
868     SETERRQ(1,0,"Option not supported");
869   }
870   return 0;
871 }
872 
873 #undef __FUNCTION__
874 #define __FUNCTION__ "MatGetInfo_MPIAIJ"
875 static int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
876 {
877   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
878   Mat        A = mat->A, B = mat->B;
879   int        ierr;
880   double     isend[5], irecv[5];
881 
882   info->rows_global    = (double)mat->M;
883   info->columns_global = (double)mat->N;
884   info->rows_local     = (double)mat->m;
885   info->columns_local  = (double)mat->N;
886   info->block_size     = 1.0;
887   ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr);
888   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
889   isend[3] = info->memory;  isend[4] = info->mallocs;
890   ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr);
891   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
892   isend[3] += info->memory;  isend[4] += info->mallocs;
893   if (flag == MAT_LOCAL) {
894     info->nz_used      = isend[0];
895     info->nz_allocated = isend[1];
896     info->nz_unneeded  = isend[2];
897     info->memory       = isend[3];
898     info->mallocs      = isend[4];
899   } else if (flag == MAT_GLOBAL_MAX) {
900     MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);
901     info->nz_used      = irecv[0];
902     info->nz_allocated = irecv[1];
903     info->nz_unneeded  = irecv[2];
904     info->memory       = irecv[3];
905     info->mallocs      = irecv[4];
906   } else if (flag == MAT_GLOBAL_SUM) {
907     MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);
908     info->nz_used      = irecv[0];
909     info->nz_allocated = irecv[1];
910     info->nz_unneeded  = irecv[2];
911     info->memory       = irecv[3];
912     info->mallocs      = irecv[4];
913   }
914   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
915   info->fill_ratio_needed = 0;
916   info->factor_mallocs    = 0;
917 
918   return 0;
919 }
920 
921 extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*);
922 extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*);
923 extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double);
924 extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *);
925 extern int MatSolve_MPIAIJ(Mat,Vec,Vec);
926 extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec);
927 extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec);
928 extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec);
929 
930 #undef __FUNCTION__
931 #define __FUNCTION__ "MatSetOption_MPIAIJ"
932 static int MatSetOption_MPIAIJ(Mat A,MatOption op)
933 {
934   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
935 
936   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
937       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
938       op == MAT_COLUMNS_UNSORTED ||
939       op == MAT_COLUMNS_SORTED) {
940         MatSetOption(a->A,op);
941         MatSetOption(a->B,op);
942   } else if (op == MAT_ROW_ORIENTED) {
943     a->roworiented = 1;
944     MatSetOption(a->A,op);
945     MatSetOption(a->B,op);
946   } else if (op == MAT_ROWS_SORTED ||
947              op == MAT_ROWS_UNSORTED ||
948              op == MAT_SYMMETRIC ||
949              op == MAT_STRUCTURALLY_SYMMETRIC ||
950              op == MAT_YES_NEW_DIAGONALS)
951     PLogInfo(A,"Info:MatSetOption_MPIAIJ:Option ignored\n");
952   else if (op == MAT_COLUMN_ORIENTED) {
953     a->roworiented = 0;
954     MatSetOption(a->A,op);
955     MatSetOption(a->B,op);
956   } else if (op == MAT_IGNORE_OFF_PROCESSOR_ENTRIES) {
957     a->donotstash = 1;
958   } else if (op == MAT_NO_NEW_DIAGONALS)
959     {SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");}
960   else
961     {SETERRQ(PETSC_ERR_SUP,0,"unknown option");}
962   return 0;
963 }
964 
965 #undef __FUNCTION__
966 #define __FUNCTION__ "MatGetSize_MPIAIJ"
967 static int MatGetSize_MPIAIJ(Mat matin,int *m,int *n)
968 {
969   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
970   *m = mat->M; *n = mat->N;
971   return 0;
972 }
973 
974 #undef __FUNCTION__
975 #define __FUNCTION__ "MatGetLocalSize_MPIAIJ"
976 static int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n)
977 {
978   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
979   *m = mat->m; *n = mat->N;
980   return 0;
981 }
982 
983 #undef __FUNCTION__
984 #define __FUNCTION__ "MatGetOwnershipRange_MPIAIJ"
985 static int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n)
986 {
987   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
988   *m = mat->rstart; *n = mat->rend;
989   return 0;
990 }
991 
992 extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
993 extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
994 
995 #undef __FUNCTION__
996 #define __FUNCTION__ "MatGetRow_MPIAIJ"
997 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
998 {
999   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1000   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
1001   int        i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart;
1002   int        nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend;
1003   int        *cmap, *idx_p;
1004 
1005   if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,0,"Already active");
1006   mat->getrowactive = PETSC_TRUE;
1007 
1008   if (!mat->rowvalues && (idx || v)) {
1009     /*
1010         allocate enough space to hold information from the longest row.
1011     */
1012     Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data;
1013     int     max = 1,m = mat->m,tmp;
1014     for ( i=0; i<m; i++ ) {
1015       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1016       if (max < tmp) { max = tmp; }
1017     }
1018     mat->rowvalues = (Scalar *) PetscMalloc( max*(sizeof(int)+sizeof(Scalar)));
1019     CHKPTRQ(mat->rowvalues);
1020     mat->rowindices = (int *) (mat->rowvalues + max);
1021   }
1022 
1023   if (row < rstart || row >= rend) SETERRQ(1,0,"Only local rows")
1024   lrow = row - rstart;
1025 
1026   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1027   if (!v)   {pvA = 0; pvB = 0;}
1028   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1029   ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1030   ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1031   nztot = nzA + nzB;
1032 
1033   cmap  = mat->garray;
1034   if (v  || idx) {
1035     if (nztot) {
1036       /* Sort by increasing column numbers, assuming A and B already sorted */
1037       int imark = -1;
1038       if (v) {
1039         *v = v_p = mat->rowvalues;
1040         for ( i=0; i<nzB; i++ ) {
1041           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1042           else break;
1043         }
1044         imark = i;
1045         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
1046         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
1047       }
1048       if (idx) {
1049         *idx = idx_p = mat->rowindices;
1050         if (imark > -1) {
1051           for ( i=0; i<imark; i++ ) {
1052             idx_p[i] = cmap[cworkB[i]];
1053           }
1054         } else {
1055           for ( i=0; i<nzB; i++ ) {
1056             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1057             else break;
1058           }
1059           imark = i;
1060         }
1061         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart + cworkA[i];
1062         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]];
1063       }
1064     }
1065     else {
1066       if (idx) *idx = 0;
1067       if (v)   *v   = 0;
1068     }
1069   }
1070   *nz = nztot;
1071   ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1072   ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1073   return 0;
1074 }
1075 
1076 #undef __FUNCTION__
1077 #define __FUNCTION__ "MatRestoreRow_MPIAIJ"
1078 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1079 {
1080   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1081   if (aij->getrowactive == PETSC_FALSE) {
1082     SETERRQ(1,0,"MatGetRow not called");
1083   }
1084   aij->getrowactive = PETSC_FALSE;
1085   return 0;
1086 }
1087 
1088 #undef __FUNCTION__
1089 #define __FUNCTION__ "MatNorm_MPIAIJ"
1090 static int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm)
1091 {
1092   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1093   Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data;
1094   int        ierr, i, j, cstart = aij->cstart,shift = amat->indexshift;
1095   double     sum = 0.0;
1096   Scalar     *v;
1097 
1098   if (aij->size == 1) {
1099     ierr =  MatNorm(aij->A,type,norm); CHKERRQ(ierr);
1100   } else {
1101     if (type == NORM_FROBENIUS) {
1102       v = amat->a;
1103       for (i=0; i<amat->nz; i++ ) {
1104 #if defined(PETSC_COMPLEX)
1105         sum += real(conj(*v)*(*v)); v++;
1106 #else
1107         sum += (*v)*(*v); v++;
1108 #endif
1109       }
1110       v = bmat->a;
1111       for (i=0; i<bmat->nz; i++ ) {
1112 #if defined(PETSC_COMPLEX)
1113         sum += real(conj(*v)*(*v)); v++;
1114 #else
1115         sum += (*v)*(*v); v++;
1116 #endif
1117       }
1118       MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);
1119       *norm = sqrt(*norm);
1120     }
1121     else if (type == NORM_1) { /* max column norm */
1122       double *tmp, *tmp2;
1123       int    *jj, *garray = aij->garray;
1124       tmp  = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp);
1125       tmp2 = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp2);
1126       PetscMemzero(tmp,aij->N*sizeof(double));
1127       *norm = 0.0;
1128       v = amat->a; jj = amat->j;
1129       for ( j=0; j<amat->nz; j++ ) {
1130         tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v);  v++;
1131       }
1132       v = bmat->a; jj = bmat->j;
1133       for ( j=0; j<bmat->nz; j++ ) {
1134         tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
1135       }
1136       MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);
1137       for ( j=0; j<aij->N; j++ ) {
1138         if (tmp2[j] > *norm) *norm = tmp2[j];
1139       }
1140       PetscFree(tmp); PetscFree(tmp2);
1141     }
1142     else if (type == NORM_INFINITY) { /* max row norm */
1143       double ntemp = 0.0;
1144       for ( j=0; j<amat->m; j++ ) {
1145         v = amat->a + amat->i[j] + shift;
1146         sum = 0.0;
1147         for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) {
1148           sum += PetscAbsScalar(*v); v++;
1149         }
1150         v = bmat->a + bmat->i[j] + shift;
1151         for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) {
1152           sum += PetscAbsScalar(*v); v++;
1153         }
1154         if (sum > ntemp) ntemp = sum;
1155       }
1156       MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);
1157     }
1158     else {
1159       SETERRQ(1,0,"No support for two norm");
1160     }
1161   }
1162   return 0;
1163 }
1164 
1165 #undef __FUNCTION__
1166 #define __FUNCTION__ "MatTranspose_MPIAIJ"
1167 static int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1168 {
1169   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1170   Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data;
1171   int        ierr,shift = Aloc->indexshift;
1172   Mat        B;
1173   int        M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct;
1174   Scalar     *array;
1175 
1176   if (matout == PETSC_NULL && M != N)
1177     SETERRQ(1,0,"Square matrix only for in-place");
1178   ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0,
1179          PETSC_NULL,&B); CHKERRQ(ierr);
1180 
1181   /* copy over the A part */
1182   Aloc = (Mat_SeqAIJ*) a->A->data;
1183   m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1184   row = a->rstart;
1185   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;}
1186   for ( i=0; i<m; i++ ) {
1187     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1188     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1189   }
1190   aj = Aloc->j;
1191   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;}
1192 
1193   /* copy over the B part */
1194   Aloc = (Mat_SeqAIJ*) a->B->data;
1195   m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1196   row = a->rstart;
1197   ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols);
1198   for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];}
1199   for ( i=0; i<m; i++ ) {
1200     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1201     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1202   }
1203   PetscFree(ct);
1204   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1205   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1206   if (matout != PETSC_NULL) {
1207     *matout = B;
1208   } else {
1209     /* This isn't really an in-place transpose .... but free data structures from a */
1210     PetscFree(a->rowners);
1211     ierr = MatDestroy(a->A); CHKERRQ(ierr);
1212     ierr = MatDestroy(a->B); CHKERRQ(ierr);
1213     if (a->colmap) PetscFree(a->colmap);
1214     if (a->garray) PetscFree(a->garray);
1215     if (a->lvec) VecDestroy(a->lvec);
1216     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
1217     PetscFree(a);
1218     PetscMemcpy(A,B,sizeof(struct _Mat));
1219     PetscHeaderDestroy(B);
1220   }
1221   return 0;
1222 }
1223 
1224 #undef __FUNCTION__
1225 #define __FUNCTION__ "MatDiagonalScale_MPIAIJ"
1226 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1227 {
1228   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1229   Mat a = aij->A, b = aij->B;
1230   int ierr,s1,s2,s3;
1231 
1232   ierr = MatGetLocalSize(mat,&s2,&s3); CHKERRQ(ierr);
1233   if (rr) {
1234     s3 = aij->n;
1235     VecGetLocalSize_Fast(rr,s1);
1236     if (s1!=s3) SETERRQ(1,0,"right vector non-conforming local size");
1237     /* Overlap communication with computation. */
1238     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr);
1239   }
1240   if (ll) {
1241     VecGetLocalSize_Fast(ll,s1);
1242     if (s1!=s2) SETERRQ(1,0,"left vector non-conforming local size");
1243     ierr = (*b->ops.diagonalscale)(b,ll,0); CHKERRQ(ierr);
1244   }
1245   /* scale  the diagonal block */
1246   ierr = (*a->ops.diagonalscale)(a,ll,rr); CHKERRQ(ierr);
1247 
1248   if (rr) {
1249     /* Do a scatter end and then right scale the off-diagonal block */
1250     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr);
1251     ierr = (*b->ops.diagonalscale)(b,0,aij->lvec); CHKERRQ(ierr);
1252   }
1253 
1254   return 0;
1255 }
1256 
1257 
1258 extern int MatPrintHelp_SeqAIJ(Mat);
1259 #undef __FUNCTION__
1260 #define __FUNCTION__ "MatPrintHelp_MPIAIJ"
1261 static int MatPrintHelp_MPIAIJ(Mat A)
1262 {
1263   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1264 
1265   if (!a->rank) return MatPrintHelp_SeqAIJ(a->A);
1266   else return 0;
1267 }
1268 
1269 #undef __FUNCTION__
1270 #define __FUNCTION__ "MatGetBlockSize_MPIAIJ"
1271 static int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
1272 {
1273   *bs = 1;
1274   return 0;
1275 }
1276 #undef __FUNCTION__
1277 #define __FUNCTION__ "MatSetUnfactored_MPIAIJ"
1278 static int MatSetUnfactored_MPIAIJ(Mat A)
1279 {
1280   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1281   int        ierr;
1282   ierr = MatSetUnfactored(a->A); CHKERRQ(ierr);
1283   return 0;
1284 }
1285 
1286 
1287 extern int MatConvert_MPIAIJ(Mat,MatType,Mat *);
1288 static int MatConvertSameType_MPIAIJ(Mat,Mat *,int);
1289 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int);
1290 extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring);
1291 extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatGetSubMatrixCall,Mat **);
1292 /* -------------------------------------------------------------------*/
1293 static struct _MatOps MatOps = {MatSetValues_MPIAIJ,
1294        MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ,
1295        MatMult_MPIAIJ,MatMultAdd_MPIAIJ,
1296        MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ,
1297        0,0,
1298        0,0,
1299        0,0,
1300        MatRelax_MPIAIJ,
1301        MatTranspose_MPIAIJ,
1302        MatGetInfo_MPIAIJ,0,
1303        MatGetDiagonal_MPIAIJ,MatDiagonalScale_MPIAIJ,MatNorm_MPIAIJ,
1304        MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ,
1305        0,
1306        MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ,
1307        0,0,0,0,
1308        MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ,
1309        0,0,
1310        0,0,MatConvert_MPIAIJ,MatConvertSameType_MPIAIJ,0,0,
1311        0,0,0,
1312        MatGetSubMatrices_MPIAIJ,MatIncreaseOverlap_MPIAIJ,MatGetValues_MPIAIJ,0,
1313        MatPrintHelp_MPIAIJ,
1314        MatScale_MPIAIJ,0,0,0,
1315        MatGetBlockSize_MPIAIJ,0,0,0,0,
1316        MatFDColoringCreate_MPIAIJ,0,MatSetUnfactored_MPIAIJ};
1317 
1318 
1319 #undef __FUNCTION__
1320 #define __FUNCTION__ "MatCreateMPIAIJ"
1321 /*@C
1322    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
1323    (the default parallel PETSc format).  For good matrix assembly performance
1324    the user should preallocate the matrix storage by setting the parameters
1325    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1326    performance can be increased by more than a factor of 50.
1327 
1328    Input Parameters:
1329 .  comm - MPI communicator
1330 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1331            This value should be the same as the local size used in creating the
1332            y vector for the matrix-vector product y = Ax.
1333 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1334            This value should be the same as the local size used in creating the
1335            x vector for the matrix-vector product y = Ax.
1336 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1337 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1338 .  d_nz - number of nonzeros per row in diagonal portion of local submatrix
1339            (same for all local rows)
1340 .  d_nzz - array containing the number of nonzeros in the various rows of the
1341            diagonal portion of the local submatrix (possibly different for each row)
1342            or PETSC_NULL. You must leave room for the diagonal entry even if
1343            it is zero.
1344 .  o_nz - number of nonzeros per row in the off-diagonal portion of local
1345            submatrix (same for all local rows).
1346 .  o_nzz - array containing the number of nonzeros in the various rows of the
1347            off-diagonal portion of the local submatrix (possibly different for
1348            each row) or PETSC_NULL.
1349 
1350    Output Parameter:
1351 .  A - the matrix
1352 
1353    Notes:
1354    The AIJ format (also called the Yale sparse matrix format or
1355    compressed row storage), is fully compatible with standard Fortran 77
1356    storage.  That is, the stored row and column indices can begin at
1357    either one (as in Fortran) or zero.  See the users manual for details.
1358 
1359    The user MUST specify either the local or global matrix dimensions
1360    (possibly both).
1361 
1362    By default, this format uses inodes (identical nodes) when possible.
1363    We search for consecutive rows with the same nonzero structure, thereby
1364    reusing matrix information to achieve increased efficiency.
1365 
1366    Options Database Keys:
1367 $    -mat_aij_no_inode  - Do not use inodes
1368 $    -mat_aij_inode_limit <limit> - Set inode limit.
1369 $        (max limit=5)
1370 $    -mat_aij_oneindex - Internally use indexing starting at 1
1371 $        rather than 0.  Note: When calling MatSetValues(),
1372 $        the user still MUST index entries starting at 0!
1373 
1374    Storage Information:
1375    For a square global matrix we define each processor's diagonal portion
1376    to be its local rows and the corresponding columns (a square submatrix);
1377    each processor's off-diagonal portion encompasses the remainder of the
1378    local matrix (a rectangular submatrix).
1379 
1380    The user can specify preallocated storage for the diagonal part of
1381    the local submatrix with either d_nz or d_nnz (not both).  Set
1382    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1383    memory allocation.  Likewise, specify preallocated storage for the
1384    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1385 
1386    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1387    the figure below we depict these three local rows and all columns (0-11).
1388 
1389 $          0 1 2 3 4 5 6 7 8 9 10 11
1390 $         -------------------
1391 $  row 3  |  o o o d d d o o o o o o
1392 $  row 4  |  o o o d d d o o o o o o
1393 $  row 5  |  o o o d d d o o o o o o
1394 $         -------------------
1395 $
1396 
1397    Thus, any entries in the d locations are stored in the d (diagonal)
1398    submatrix, and any entries in the o locations are stored in the
1399    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
1400    stored simply in the MATSEQAIJ format for compressed row storage.
1401 
1402    Now d_nz should indicate the number of nonzeros per row in the d matrix,
1403    and o_nz should indicate the number of nonzeros per row in the o matrix.
1404    In general, for PDE problems in which most nonzeros are near the diagonal,
1405    one expects d_nz >> o_nz. For large problems you MUST preallocate memory
1406    or you will get TERRIBLE performance; see the users' manual chapter on
1407    matrices.
1408 
1409 .keywords: matrix, aij, compressed row, sparse, parallel
1410 
1411 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
1412 @*/
1413 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,
1414                     int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
1415 {
1416   Mat          B;
1417   Mat_MPIAIJ   *b;
1418   int          ierr, i,sum[2],work[2],size;
1419 
1420   *A = 0;
1421   PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATMPIAIJ,comm);
1422   PLogObjectCreate(B);
1423   B->data       = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b);
1424   PetscMemzero(b,sizeof(Mat_MPIAIJ));
1425   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
1426   MPI_Comm_size(comm,&size);
1427   if (size == 1) {
1428     B->ops.getrowij          = MatGetRowIJ_MPIAIJ;
1429     B->ops.restorerowij      = MatRestoreRowIJ_MPIAIJ;
1430     B->ops.lufactorsymbolic  = MatLUFactorSymbolic_MPIAIJ;
1431     B->ops.lufactornumeric   = MatLUFactorNumeric_MPIAIJ;
1432     B->ops.lufactor          = MatLUFactor_MPIAIJ;
1433     B->ops.solve             = MatSolve_MPIAIJ;
1434     B->ops.solveadd          = MatSolveAdd_MPIAIJ;
1435     B->ops.solvetrans        = MatSolveTrans_MPIAIJ;
1436     B->ops.solvetransadd     = MatSolveTransAdd_MPIAIJ;
1437     B->ops.ilufactorsymbolic = MatILUFactorSymbolic_MPIAIJ;
1438   }
1439   B->destroy    = MatDestroy_MPIAIJ;
1440   B->view       = MatView_MPIAIJ;
1441   B->factor     = 0;
1442   B->assembled  = PETSC_FALSE;
1443   B->mapping    = 0;
1444 
1445   b->insertmode = NOT_SET_VALUES;
1446   MPI_Comm_rank(comm,&b->rank);
1447   MPI_Comm_size(comm,&b->size);
1448 
1449   if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL))
1450     SETERRQ(1,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
1451 
1452   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
1453     work[0] = m; work[1] = n;
1454     MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );
1455     if (M == PETSC_DECIDE) M = sum[0];
1456     if (N == PETSC_DECIDE) N = sum[1];
1457   }
1458   if (m == PETSC_DECIDE) {m = M/b->size + ((M % b->size) > b->rank);}
1459   if (n == PETSC_DECIDE) {n = N/b->size + ((N % b->size) > b->rank);}
1460   b->m = m; B->m = m;
1461   b->n = n; B->n = n;
1462   b->N = N; B->N = N;
1463   b->M = M; B->M = M;
1464 
1465   /* build local table of row and column ownerships */
1466   b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners);
1467   PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ));
1468   b->cowners = b->rowners + b->size + 2;
1469   MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);
1470   b->rowners[0] = 0;
1471   for ( i=2; i<=b->size; i++ ) {
1472     b->rowners[i] += b->rowners[i-1];
1473   }
1474   b->rstart = b->rowners[b->rank];
1475   b->rend   = b->rowners[b->rank+1];
1476   MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);
1477   b->cowners[0] = 0;
1478   for ( i=2; i<=b->size; i++ ) {
1479     b->cowners[i] += b->cowners[i-1];
1480   }
1481   b->cstart = b->cowners[b->rank];
1482   b->cend   = b->cowners[b->rank+1];
1483 
1484   if (d_nz == PETSC_DEFAULT) d_nz = 5;
1485   ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr);
1486   PLogObjectParent(B,b->A);
1487   if (o_nz == PETSC_DEFAULT) o_nz = 0;
1488   ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr);
1489   PLogObjectParent(B,b->B);
1490 
1491   /* build cache for off array entries formed */
1492   ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr);
1493   b->donotstash  = 0;
1494   b->colmap      = 0;
1495   b->garray      = 0;
1496   b->roworiented = 1;
1497 
1498   /* stuff used for matrix vector multiply */
1499   b->lvec      = 0;
1500   b->Mvctx     = 0;
1501 
1502   /* stuff for MatGetRow() */
1503   b->rowindices   = 0;
1504   b->rowvalues    = 0;
1505   b->getrowactive = PETSC_FALSE;
1506 
1507   *A = B;
1508   return 0;
1509 }
1510 
1511 #undef __FUNCTION__
1512 #define __FUNCTION__ "MatConvertSameType_MPIAIJ"
1513 static int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues)
1514 {
1515   Mat        mat;
1516   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data;
1517   int        ierr, len=0, flg;
1518 
1519   *newmat       = 0;
1520   PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm);
1521   PLogObjectCreate(mat);
1522   mat->data       = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a);
1523   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
1524   mat->destroy    = MatDestroy_MPIAIJ;
1525   mat->view       = MatView_MPIAIJ;
1526   mat->factor     = matin->factor;
1527   mat->assembled  = PETSC_TRUE;
1528 
1529   a->m = mat->m   = oldmat->m;
1530   a->n = mat->n   = oldmat->n;
1531   a->M = mat->M   = oldmat->M;
1532   a->N = mat->N   = oldmat->N;
1533 
1534   a->rstart       = oldmat->rstart;
1535   a->rend         = oldmat->rend;
1536   a->cstart       = oldmat->cstart;
1537   a->cend         = oldmat->cend;
1538   a->size         = oldmat->size;
1539   a->rank         = oldmat->rank;
1540   a->insertmode   = NOT_SET_VALUES;
1541   a->rowvalues    = 0;
1542   a->getrowactive = PETSC_FALSE;
1543 
1544   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
1545   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ));
1546   a->cowners = a->rowners + a->size + 2;
1547   PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));
1548   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
1549   if (oldmat->colmap) {
1550     a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap);
1551     PLogObjectMemory(mat,(a->N)*sizeof(int));
1552     PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int));
1553   } else a->colmap = 0;
1554   if (oldmat->garray) {
1555     len = ((Mat_SeqAIJ *) (oldmat->B->data))->n;
1556     a->garray = (int *) PetscMalloc((len+1)*sizeof(int)); CHKPTRQ(a->garray);
1557     PLogObjectMemory(mat,len*sizeof(int));
1558     if (len) PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
1559   } else a->garray = 0;
1560 
1561   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
1562   PLogObjectParent(mat,a->lvec);
1563   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
1564   PLogObjectParent(mat,a->Mvctx);
1565   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
1566   PLogObjectParent(mat,a->A);
1567   ierr =  MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr);
1568   PLogObjectParent(mat,a->B);
1569   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
1570   if (flg) {
1571     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
1572   }
1573   *newmat = mat;
1574   return 0;
1575 }
1576 
1577 #include "sys.h"
1578 
1579 #undef __FUNCTION__
1580 #define __FUNCTION__ "MatLoad_MPIAIJ"
1581 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat)
1582 {
1583   Mat          A;
1584   int          i, nz, ierr, j,rstart, rend, fd;
1585   Scalar       *vals,*svals;
1586   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1587   MPI_Status   status;
1588   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1589   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
1590   int          tag = ((PetscObject)viewer)->tag;
1591 
1592   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
1593   if (!rank) {
1594     ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
1595     ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr);
1596     if (header[0] != MAT_COOKIE) SETERRQ(1,0,"not matrix object");
1597   }
1598 
1599   MPI_Bcast(header+1,3,MPI_INT,0,comm);
1600   M = header[1]; N = header[2];
1601   /* determine ownership of all rows */
1602   m = M/size + ((M % size) > rank);
1603   rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners);
1604   MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1605   rowners[0] = 0;
1606   for ( i=2; i<=size; i++ ) {
1607     rowners[i] += rowners[i-1];
1608   }
1609   rstart = rowners[rank];
1610   rend   = rowners[rank+1];
1611 
1612   /* distribute row lengths to all processors */
1613   ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens);
1614   offlens = ourlens + (rend-rstart);
1615   if (!rank) {
1616     rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths);
1617     ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
1618     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
1619     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
1620     MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
1621     PetscFree(sndcounts);
1622   }
1623   else {
1624     MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);
1625   }
1626 
1627   if (!rank) {
1628     /* calculate the number of nonzeros on each processor */
1629     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
1630     PetscMemzero(procsnz,size*sizeof(int));
1631     for ( i=0; i<size; i++ ) {
1632       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
1633         procsnz[i] += rowlengths[j];
1634       }
1635     }
1636     PetscFree(rowlengths);
1637 
1638     /* determine max buffer needed and allocate it */
1639     maxnz = 0;
1640     for ( i=0; i<size; i++ ) {
1641       maxnz = PetscMax(maxnz,procsnz[i]);
1642     }
1643     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
1644 
1645     /* read in my part of the matrix column indices  */
1646     nz = procsnz[0];
1647     mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1648     ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr);
1649 
1650     /* read in every one elses and ship off */
1651     for ( i=1; i<size; i++ ) {
1652       nz = procsnz[i];
1653       ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr);
1654       MPI_Send(cols,nz,MPI_INT,i,tag,comm);
1655     }
1656     PetscFree(cols);
1657   }
1658   else {
1659     /* determine buffer space needed for message */
1660     nz = 0;
1661     for ( i=0; i<m; i++ ) {
1662       nz += ourlens[i];
1663     }
1664     mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1665 
1666     /* receive message of column indices*/
1667     MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
1668     MPI_Get_count(&status,MPI_INT,&maxnz);
1669     if (maxnz != nz) SETERRQ(1,0,"something is wrong with file");
1670   }
1671 
1672   /* loop over local rows, determining number of off diagonal entries */
1673   PetscMemzero(offlens,m*sizeof(int));
1674   jj = 0;
1675   for ( i=0; i<m; i++ ) {
1676     for ( j=0; j<ourlens[i]; j++ ) {
1677       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1678       jj++;
1679     }
1680   }
1681 
1682   /* create our matrix */
1683   for ( i=0; i<m; i++ ) {
1684     ourlens[i] -= offlens[i];
1685   }
1686   ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
1687   A = *newmat;
1688   MatSetOption(A,MAT_COLUMNS_SORTED);
1689   for ( i=0; i<m; i++ ) {
1690     ourlens[i] += offlens[i];
1691   }
1692 
1693   if (!rank) {
1694     vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals);
1695 
1696     /* read in my part of the matrix numerical values  */
1697     nz = procsnz[0];
1698     ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
1699 
1700     /* insert into matrix */
1701     jj      = rstart;
1702     smycols = mycols;
1703     svals   = vals;
1704     for ( i=0; i<m; i++ ) {
1705       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1706       smycols += ourlens[i];
1707       svals   += ourlens[i];
1708       jj++;
1709     }
1710 
1711     /* read in other processors and ship out */
1712     for ( i=1; i<size; i++ ) {
1713       nz = procsnz[i];
1714       ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
1715       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
1716     }
1717     PetscFree(procsnz);
1718   }
1719   else {
1720     /* receive numeric values */
1721     vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals);
1722 
1723     /* receive message of values*/
1724     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
1725     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1726     if (maxnz != nz) SETERRQ(1,0,"something is wrong with file");
1727 
1728     /* insert into matrix */
1729     jj      = rstart;
1730     smycols = mycols;
1731     svals   = vals;
1732     for ( i=0; i<m; i++ ) {
1733       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1734       smycols += ourlens[i];
1735       svals   += ourlens[i];
1736       jj++;
1737     }
1738   }
1739   PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners);
1740 
1741   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1742   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1743   return 0;
1744 }
1745