xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision aedafe7b699310a2306b4b06daeb01eb53cda0a4)
1 #ifndef lint
2 static char vcid[] = "$Id: mpiaij.c,v 1.14 1995/03/23 22:59:30 curfman Exp curfman $";
3 #endif
4 
5 #include "mpiaij.h"
6 #include "vec/vecimpl.h"
7 #include "inline/spops.h"
8 
9 #define CHUNCKSIZE   100
10 /*
11    This is a simple minded stash. Do a linear search to determine if
12  in stash, if not add to end.
13 */
14 static int StashValues(Stash *stash,int row,int n, int *idxn,
15                        Scalar *values,InsertMode addv)
16 {
17   int    i,j,N = stash->n,found,*n_idx, *n_idy;
18   Scalar val,*n_array;
19 
20   for ( i=0; i<n; i++ ) {
21     found = 0;
22     val = *values++;
23     for ( j=0; j<N; j++ ) {
24       if ( stash->idx[j] == row && stash->idy[j] == idxn[i]) {
25         /* found a match */
26         if (addv == AddValues) stash->array[j] += val;
27         else stash->array[j] = val;
28         found = 1;
29         break;
30       }
31     }
32     if (!found) { /* not found so add to end */
33       if ( stash->n == stash->nmax ) {
34         /* allocate a larger stash */
35         n_array = (Scalar *) MALLOC( (stash->nmax + CHUNCKSIZE)*(
36                                      2*sizeof(int) + sizeof(Scalar)));
37         CHKPTR(n_array);
38         n_idx = (int *) (n_array + stash->nmax + CHUNCKSIZE);
39         n_idy = (int *) (n_idx + stash->nmax + CHUNCKSIZE);
40         MEMCPY(n_array,stash->array,stash->nmax*sizeof(Scalar));
41         MEMCPY(n_idx,stash->idx,stash->nmax*sizeof(int));
42         MEMCPY(n_idy,stash->idy,stash->nmax*sizeof(int));
43         if (stash->array) FREE(stash->array);
44         stash->array = n_array; stash->idx = n_idx; stash->idy = n_idy;
45         stash->nmax += CHUNCKSIZE;
46       }
47       stash->array[stash->n]   = val;
48       stash->idx[stash->n]     = row;
49       stash->idy[stash->n++]   = idxn[i];
50     }
51   }
52   return 0;
53 }
54 
55 /* local utility routine that creates a mapping from the global column
56 number to the local number in the off-diagonal part of the local
57 storage of the matrix.  This is done in a non scable way since the
58 length of colmap equals the global matrix length.
59 */
60 static int CreateColmap(Mat mat)
61 {
62   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
63   Mat_AIJ    *B = (Mat_AIJ*) aij->B->data;
64   int        n = B->n,i;
65   aij->colmap = (int *) MALLOC( aij->N*sizeof(int) ); CHKPTR(aij->colmap);
66   MEMSET(aij->colmap,0,aij->N*sizeof(int));
67   for ( i=0; i<n; i++ ) {
68     aij->colmap[aij->garray[i]] = i+1;
69   }
70   return 0;
71 }
72 
73 static int MatInsertValues_MPIAIJ(Mat mat,int m,int *idxm,int n,
74                             int *idxn,Scalar *v,InsertMode addv)
75 {
76   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
77   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
78   int        cstart = aij->cstart, cend = aij->cend,row,col;
79 
80   if (aij->insertmode != NotSetValues && aij->insertmode != addv) {
81     SETERR(1,"You cannot mix inserts and adds");
82   }
83   aij->insertmode = addv;
84   for ( i=0; i<m; i++ ) {
85     if (idxm[i] < 0) SETERR(1,"Negative row index");
86     if (idxm[i] >= aij->M) SETERR(1,"Row index too large");
87     if (idxm[i] >= rstart && idxm[i] < rend) {
88       row = idxm[i] - rstart;
89       for ( j=0; j<n; j++ ) {
90         if (idxn[j] < 0) SETERR(1,"Negative column index");
91         if (idxn[j] >= aij->N) SETERR(1,"Column index too large");
92         if (idxn[j] >= cstart && idxn[j] < cend){
93           col = idxn[j] - cstart;
94           ierr = MatSetValues(aij->A,1,&row,1,&col,v+i*n+j,addv);CHKERR(ierr);
95         }
96         else {
97           if (aij->assembled) {
98             if (!aij->colmap) {ierr = CreateColmap(mat); CHKERR(ierr);}
99             col = aij->colmap[idxn[j]] - 1;
100             if (col < 0) {
101               SETERR(1,"Cannot insert new off diagonal block nonzero in\
102                      already\
103                      assembled matrix. Contact petsc-maint@mcs.anl.gov\
104                      if your need this feature");
105             }
106           }
107           else col = idxn[j];
108           ierr = MatSetValues(aij->B,1,&row,1,&col,v+i*n+j,addv);CHKERR(ierr);
109         }
110       }
111     }
112     else {
113       ierr = StashValues(&aij->stash,idxm[i],n,idxn,v+i*n,addv);CHKERR(ierr);
114     }
115   }
116   return 0;
117 }
118 
119 /*
120     the assembly code is alot like the code for vectors, we should
121     sometime derive a single assembly code that can be used for
122     either case.
123 */
124 
125 static int MatBeginAssemble_MPIAIJ(Mat mat)
126 {
127   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
128   MPI_Comm    comm = mat->comm;
129   int         numtids = aij->numtids, *owners = aij->rowners;
130   int         mytid = aij->mytid;
131   MPI_Request *send_waits,*recv_waits;
132   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
133   int         tag = 50, *owner,*starts,count;
134   InsertMode  addv;
135   Scalar      *rvalues,*svalues;
136 
137   /* make sure all processors are either in INSERTMODE or ADDMODE */
138   MPI_Allreduce((void *) &aij->insertmode,(void *) &addv,1,MPI_INT,
139                 MPI_BOR,comm);
140   if (addv == (AddValues|InsertValues)) {
141     SETERR(1,"Some processors have inserted while others have added");
142   }
143   aij->insertmode = addv; /* in case this processor had no cache */
144 
145   /*  first count number of contributors to each processor */
146   nprocs = (int *) MALLOC( 2*numtids*sizeof(int) ); CHKPTR(nprocs);
147   MEMSET(nprocs,0,2*numtids*sizeof(int)); procs = nprocs + numtids;
148   owner = (int *) MALLOC( (aij->stash.n+1)*sizeof(int) ); CHKPTR(owner);
149   for ( i=0; i<aij->stash.n; i++ ) {
150     idx = aij->stash.idx[i];
151     for ( j=0; j<numtids; j++ ) {
152       if (idx >= owners[j] && idx < owners[j+1]) {
153         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
154       }
155     }
156   }
157   nsends = 0;  for ( i=0; i<numtids; i++ ) { nsends += procs[i];}
158 
159   /* inform other processors of number of messages and max length*/
160   work = (int *) MALLOC( numtids*sizeof(int) ); CHKPTR(work);
161   MPI_Allreduce((void *) procs,(void *) work,numtids,MPI_INT,MPI_SUM,comm);
162   nreceives = work[mytid];
163   MPI_Allreduce((void *) nprocs,(void *) work,numtids,MPI_INT,MPI_MAX,comm);
164   nmax = work[mytid];
165   FREE(work);
166 
167   /* post receives:
168        1) each message will consist of ordered pairs
169      (global index,value) we store the global index as a double
170      to simplify the message passing.
171        2) since we don't know how long each individual message is we
172      allocate the largest needed buffer for each receive. Potentially
173      this is a lot of wasted space.
174 
175 
176        This could be done better.
177   */
178   rvalues = (Scalar *) MALLOC(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));
179   CHKPTR(rvalues);
180   recv_waits = (MPI_Request *) MALLOC((nreceives+1)*sizeof(MPI_Request));
181   CHKPTR(recv_waits);
182   for ( i=0; i<nreceives; i++ ) {
183     MPI_Irecv((void *)(rvalues+3*nmax*i),3*nmax,MPI_SCALAR,MPI_ANY_SOURCE,tag,
184               comm,recv_waits+i);
185   }
186 
187   /* do sends:
188       1) starts[i] gives the starting index in svalues for stuff going to
189          the ith processor
190   */
191   svalues = (Scalar *) MALLOC( 3*(aij->stash.n+1)*sizeof(Scalar) );
192   CHKPTR(svalues);
193   send_waits = (MPI_Request *) MALLOC( (nsends+1)*sizeof(MPI_Request));
194   CHKPTR(send_waits);
195   starts = (int *) MALLOC( numtids*sizeof(int) ); CHKPTR(starts);
196   starts[0] = 0;
197   for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
198   for ( i=0; i<aij->stash.n; i++ ) {
199     svalues[3*starts[owner[i]]]       = (Scalar)  aij->stash.idx[i];
200     svalues[3*starts[owner[i]]+1]     = (Scalar)  aij->stash.idy[i];
201     svalues[3*(starts[owner[i]]++)+2] =  aij->stash.array[i];
202   }
203   FREE(owner);
204   starts[0] = 0;
205   for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
206   count = 0;
207   for ( i=0; i<numtids; i++ ) {
208     if (procs[i]) {
209       MPI_Isend((void*)(svalues+3*starts[i]),3*nprocs[i],MPI_SCALAR,i,tag,
210                 comm,send_waits+count++);
211     }
212   }
213   FREE(starts); FREE(nprocs);
214 
215   /* Free cache space */
216   aij->stash.nmax = aij->stash.n = 0;
217   if (aij->stash.array){ FREE(aij->stash.array); aij->stash.array = 0;}
218 
219   aij->svalues    = svalues;       aij->rvalues = rvalues;
220   aij->nsends     = nsends;         aij->nrecvs = nreceives;
221   aij->send_waits = send_waits; aij->recv_waits = recv_waits;
222   aij->rmax       = nmax;
223 
224   return 0;
225 }
226 extern int MatSetUpMultiply_MPIAIJ(Mat);
227 
228 static int MatEndAssemble_MPIAIJ(Mat mat)
229 {
230   int        ierr;
231   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
232 
233   MPI_Status  *send_status,recv_status;
234   int         imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n;
235   int         row,col;
236   Scalar      *values,val;
237   InsertMode  addv = aij->insertmode;
238 
239   /*  wait on receives */
240   while (count) {
241     MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status);
242     /* unpack receives into our local space */
243     values = aij->rvalues + 3*imdex*aij->rmax;
244     MPI_Get_count(&recv_status,MPI_SCALAR,&n);
245     n = n/3;
246     for ( i=0; i<n; i++ ) {
247       row = (int) PETSCREAL(values[3*i]) - aij->rstart;
248       col = (int) PETSCREAL(values[3*i+1]);
249       val = values[3*i+2];
250       if (col >= aij->cstart && col < aij->cend) {
251           col -= aij->cstart;
252         MatSetValues(aij->A,1,&row,1,&col,&val,addv);
253       }
254       else {
255         if (aij->assembled) {
256           if (!aij->colmap) {ierr = CreateColmap(mat); CHKERR(ierr);}
257           col = aij->colmap[col] - 1;
258           if (col < 0) {
259             SETERR(1,"Cannot insert new off diagonal block nonzero in\
260                      already\
261                      assembled matrix. Contact petsc-maint@mcs.anl.gov\
262                      if your need this feature");
263           }
264         }
265         MatSetValues(aij->B,1,&row,1,&col,&val,addv);
266       }
267     }
268     count--;
269   }
270   FREE(aij->recv_waits); FREE(aij->rvalues);
271 
272   /* wait on sends */
273   if (aij->nsends) {
274     send_status = (MPI_Status *) MALLOC( aij->nsends*sizeof(MPI_Status) );
275     CHKPTR(send_status);
276     MPI_Waitall(aij->nsends,aij->send_waits,send_status);
277     FREE(send_status);
278   }
279   FREE(aij->send_waits); FREE(aij->svalues);
280 
281   aij->insertmode = NotSetValues;
282   ierr = MatBeginAssembly(aij->A); CHKERR(ierr);
283   ierr = MatEndAssembly(aij->A); CHKERR(ierr);
284 
285   if (!aij->assembled) {
286     ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERR(ierr);
287   }
288   ierr = MatBeginAssembly(aij->B); CHKERR(ierr);
289   ierr = MatEndAssembly(aij->B); CHKERR(ierr);
290 
291   aij->assembled = 1;
292   return 0;
293 }
294 
295 static int MatZero_MPIAIJ(Mat A)
296 {
297   Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data;
298 
299   MatZeroEntries(l->A); MatZeroEntries(l->B);
300   return 0;
301 }
302 
303 /* again this uses the same basic stratagy as in the assembly and
304    scatter create routines, we should try to do it systemamatically
305    if we can figure out the proper level of generality. */
306 
307 /* the code does not do the diagonal entries correctly unless the
308    matrix is square and the column and row owerships are identical.
309    This is a BUG. The only way to fix it seems to be to access
310    aij->A and aij->B directly and not through the MatZeroRows()
311    routine.
312 */
313 static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag)
314 {
315   Mat_MPIAIJ     *l = (Mat_MPIAIJ *) A->data;
316   int            i,ierr,N, *rows,*owners = l->rowners,numtids = l->numtids;
317   int            *procs,*nprocs,j,found,idx,nsends,*work;
318   int            nmax,*svalues,*starts,*owner,nrecvs,mytid = l->mytid;
319   int            *rvalues,tag = 67,count,base,slen,n,*source;
320   int            *lens,imdex,*lrows,*values;
321   MPI_Comm       comm = A->comm;
322   MPI_Request    *send_waits,*recv_waits;
323   MPI_Status     recv_status,*send_status;
324   IS             istmp;
325 
326   if (!l->assembled) SETERR(1,"MatZeroRows_MPIAIJ: must assemble matrix first");
327   ierr = ISGetLocalSize(is,&N); CHKERR(ierr);
328   ierr = ISGetIndices(is,&rows); CHKERR(ierr);
329 
330   /*  first count number of contributors to each processor */
331   nprocs = (int *) MALLOC( 2*numtids*sizeof(int) ); CHKPTR(nprocs);
332   MEMSET(nprocs,0,2*numtids*sizeof(int)); procs = nprocs + numtids;
333   owner = (int *) MALLOC((N+1)*sizeof(int)); CHKPTR(owner); /* see note*/
334   for ( i=0; i<N; i++ ) {
335     idx = rows[i];
336     found = 0;
337     for ( j=0; j<numtids; j++ ) {
338       if (idx >= owners[j] && idx < owners[j+1]) {
339         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
340       }
341     }
342     if (!found) SETERR(1,"Imdex out of range");
343   }
344   nsends = 0;  for ( i=0; i<numtids; i++ ) { nsends += procs[i];}
345 
346   /* inform other processors of number of messages and max length*/
347   work = (int *) MALLOC( numtids*sizeof(int) ); CHKPTR(work);
348   MPI_Allreduce((void *) procs,(void *) work,numtids,MPI_INT,MPI_SUM,comm);
349   nrecvs = work[mytid];
350   MPI_Allreduce((void *) nprocs,(void *) work,numtids,MPI_INT,MPI_MAX,comm);
351   nmax = work[mytid];
352   FREE(work);
353 
354   /* post receives:   */
355   rvalues = (int *) MALLOC((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */
356   CHKPTR(rvalues);
357   recv_waits = (MPI_Request *) MALLOC((nrecvs+1)*sizeof(MPI_Request));
358   CHKPTR(recv_waits);
359   for ( i=0; i<nrecvs; i++ ) {
360     MPI_Irecv((void *)(rvalues+nmax*i),nmax,MPI_INT,MPI_ANY_SOURCE,tag,
361               comm,recv_waits+i);
362   }
363 
364   /* do sends:
365       1) starts[i] gives the starting index in svalues for stuff going to
366          the ith processor
367   */
368   svalues = (int *) MALLOC( (N+1)*sizeof(int) ); CHKPTR(svalues);
369   send_waits = (MPI_Request *) MALLOC( (nsends+1)*sizeof(MPI_Request));
370   CHKPTR(send_waits);
371   starts = (int *) MALLOC( (numtids+1)*sizeof(int) ); CHKPTR(starts);
372   starts[0] = 0;
373   for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
374   for ( i=0; i<N; i++ ) {
375     svalues[starts[owner[i]]++] = rows[i];
376   }
377   ISRestoreIndices(is,&rows);
378 
379   starts[0] = 0;
380   for ( i=1; i<numtids+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
381   count = 0;
382   for ( i=0; i<numtids; i++ ) {
383     if (procs[i]) {
384       MPI_Isend((void*)(svalues+starts[i]),nprocs[i],MPI_INT,i,tag,
385                 comm,send_waits+count++);
386     }
387   }
388   FREE(starts);
389 
390   base = owners[mytid];
391 
392   /*  wait on receives */
393   lens = (int *) MALLOC( 2*(nrecvs+1)*sizeof(int) ); CHKPTR(lens);
394   source = lens + nrecvs;
395   count = nrecvs; slen = 0;
396   while (count) {
397     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
398     /* unpack receives into our local space */
399     MPI_Get_count(&recv_status,MPI_INT,&n);
400     source[imdex]  = recv_status.MPI_SOURCE;
401     lens[imdex]  = n;
402     slen += n;
403     count--;
404   }
405   FREE(recv_waits);
406 
407   /* move the data into the send scatter */
408   lrows = (int *) MALLOC( slen*sizeof(int) ); CHKPTR(lrows);
409   count = 0;
410   for ( i=0; i<nrecvs; i++ ) {
411     values = rvalues + i*nmax;
412     for ( j=0; j<lens[i]; j++ ) {
413       lrows[count++] = values[j] - base;
414     }
415   }
416   FREE(rvalues); FREE(lens);
417   FREE(owner); FREE(nprocs);
418 
419   /* actually zap the local rows */
420   ierr = ISCreateSequential(slen,lrows,&istmp); CHKERR(ierr);  FREE(lrows);
421   ierr = MatZeroRows(l->A,istmp,diag); CHKERR(ierr);
422   ierr = MatZeroRows(l->B,istmp,0); CHKERR(ierr);
423   ierr = ISDestroy(istmp); CHKERR(ierr);
424 
425   /* wait on sends */
426   if (nsends) {
427     send_status = (MPI_Status *) MALLOC( nsends*sizeof(MPI_Status) );
428     CHKPTR(send_status);
429     MPI_Waitall(nsends,send_waits,send_status);
430     FREE(send_status);
431   }
432   FREE(send_waits); FREE(svalues);
433 
434 
435   return 0;
436 }
437 
438 static int MatMult_MPIAIJ(Mat aijin,Vec xx,Vec yy)
439 {
440   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data;
441   int        ierr;
442   if (!aij->assembled) SETERR(1,"MatMult_MPIAIJ: must assemble matrix first");
443   ierr = VecScatterBegin(xx,0,aij->lvec,0,InsertValues,ScatterAll,aij->Mvctx);
444   CHKERR(ierr);
445   ierr = MatMult(aij->A,xx,yy); CHKERR(ierr);
446   ierr = VecScatterEnd(xx,0,aij->lvec,0,InsertValues,ScatterAll,aij->Mvctx);
447   CHKERR(ierr);
448   ierr = MatMultAdd(aij->B,aij->lvec,yy,yy); CHKERR(ierr);
449   return 0;
450 }
451 
452 static int MatMultAdd_MPIAIJ(Mat aijin,Vec xx,Vec yy,Vec zz)
453 {
454   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data;
455   int        ierr;
456   if (!aij->assembled) SETERR(1,"MatMult_MPIAIJ: must assemble matrix first");
457   ierr = VecScatterBegin(xx,0,aij->lvec,0,InsertValues,ScatterAll,aij->Mvctx);
458   CHKERR(ierr);
459   ierr = MatMultAdd(aij->A,xx,yy,zz); CHKERR(ierr);
460   ierr = VecScatterEnd(xx,0,aij->lvec,0,InsertValues,ScatterAll,aij->Mvctx);
461   CHKERR(ierr);
462   ierr = MatMultAdd(aij->B,aij->lvec,zz,zz); CHKERR(ierr);
463   return 0;
464 }
465 
466 static int MatMultTrans_MPIAIJ(Mat aijin,Vec xx,Vec yy)
467 {
468   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data;
469   int        ierr;
470 
471   if (!aij->assembled)
472     SETERR(1,"MatMulTrans_MPIAIJ: must assemble matrix first");
473   /* do nondiagonal part */
474   ierr = MatMultTrans(aij->B,xx,aij->lvec); CHKERR(ierr);
475   /* send it on its way */
476   ierr = VecScatterBegin(aij->lvec,0,yy,0,AddValues,
477                          ScatterAll|ScatterReverse,aij->Mvctx); CHKERR(ierr);
478   /* do local part */
479   ierr = MatMultTrans(aij->A,xx,yy); CHKERR(ierr);
480   /* receive remote parts: note this assumes the values are not actually */
481   /* inserted in yy until the next line, which is true for my implementation*/
482   /* but is not perhaps always true. */
483   ierr = VecScatterEnd(aij->lvec,0,yy,0,AddValues,ScatterAll|ScatterReverse,
484                          aij->Mvctx); CHKERR(ierr);
485   return 0;
486 }
487 
488 static int MatMultTransAdd_MPIAIJ(Mat aijin,Vec xx,Vec yy,Vec zz)
489 {
490   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data;
491   int        ierr;
492 
493   if (!aij->assembled)
494     SETERR(1,"MatMulTransAdd_MPIAIJ: must assemble matrix first");
495   /* do nondiagonal part */
496   ierr = MatMultTrans(aij->B,xx,aij->lvec); CHKERR(ierr);
497   /* send it on its way */
498   ierr = VecScatterBegin(aij->lvec,0,zz,0,AddValues,
499                          ScatterAll|ScatterReverse,aij->Mvctx); CHKERR(ierr);
500   /* do local part */
501   ierr = MatMultTransAdd(aij->A,xx,yy,zz); CHKERR(ierr);
502   /* receive remote parts: note this assumes the values are not actually */
503   /* inserted in yy until the next line, which is true for my implementation*/
504   /* but is not perhaps always true. */
505   ierr = VecScatterEnd(aij->lvec,0,zz,0,AddValues,ScatterAll|ScatterReverse,
506                          aij->Mvctx); CHKERR(ierr);
507   return 0;
508 }
509 
510 /*
511   This only works correctly for square matrices where the subblock A->A is the
512    diagonal block
513 */
514 static int MatGetDiag_MPIAIJ(Mat Ain,Vec v)
515 {
516   Mat_MPIAIJ *A = (Mat_MPIAIJ *) Ain->data;
517   if (!A->assembled) SETERR(1,"MatGetDiag_MPIAIJ: must assemble matrix first");
518   return MatGetDiagonal(A->A,v);
519 }
520 
521 static int MatDestroy_MPIAIJ(PetscObject obj)
522 {
523   Mat        mat = (Mat) obj;
524   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
525   int        ierr;
526 #if defined(PETSC_LOG)
527   PLogObjectState(obj,"Rows %d Cols %d",aij->M,aij->N);
528 #endif
529   FREE(aij->rowners);
530   ierr = MatDestroy(aij->A); CHKERR(ierr);
531   ierr = MatDestroy(aij->B); CHKERR(ierr);
532   if (aij->colmap) FREE(aij->colmap);
533   if (aij->garray) FREE(aij->garray);
534   if (aij->lvec) VecDestroy(aij->lvec);
535   if (aij->Mvctx) VecScatterCtxDestroy(aij->Mvctx);
536   FREE(aij);
537   PLogObjectDestroy(mat);
538   PETSCHEADERDESTROY(mat);
539   return 0;
540 }
541 
542 static int MatView_MPIAIJ(PetscObject obj,Viewer viewer)
543 {
544   Mat        mat = (Mat) obj;
545   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
546   int        ierr;
547 
548   if (!aij->assembled) SETERR(1,"MatView_MPIAIJ: must assemble matrix first");
549   MPE_Seq_begin(mat->comm,1);
550     ViewerPrintf(viewer,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
551           aij->mytid,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart,
552           aij->cend);
553     ierr = MatView(aij->A,viewer); CHKERR(ierr);
554     ierr = MatView(aij->B,viewer); CHKERR(ierr);
555     ViewerFlush(viewer);
556   MPE_Seq_end(mat->comm,1);
557   return 0;
558 }
559 
560 extern int MatMarkDiag_AIJ(Mat_AIJ  *);
561 /*
562     This has to provide several versions.
563 
564      1) per sequential
565      2) a) use only local smoothing updating outer values only once.
566         b) local smoothing updating outer values each inner iteration
567      3) color updating out values betwen colors.
568 */
569 static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,int flag,double shift,
570                         int its,Vec xx)
571 {
572   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
573   Mat        AA = mat->A, BB = mat->B;
574   Mat_AIJ    *A = (Mat_AIJ *) AA->data, *B = (Mat_AIJ *)BB->data;
575   Scalar     zero = 0.0,*b,*x,*xs,*ls,d,*v,sum,scale,*t,*ts;
576   int        ierr,*idx, *diag;
577   int        n = mat->n, m = mat->m, i;
578   Vec        tt;
579 
580   if (!mat->assembled) SETERR(1,"MatRelax_MPIAIJ: must assemble matrix first");
581 
582   VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls);
583   xs = x -1; /* shift by one for index start of 1 */
584   ls--;
585   if (!A->diag) {if ((ierr = MatMarkDiag_AIJ(A))) return ierr;}
586   diag = A->diag;
587   if (flag == SOR_APPLY_UPPER || flag == SOR_APPLY_LOWER) {
588     SETERR(1,"That option not yet support for parallel AIJ matrices");
589   }
590   if (flag & SOR_EISENSTAT) {
591     /* Let  A = L + U + D; where L is lower trianglar,
592     U is upper triangular, E is diagonal; This routine applies
593 
594             (L + E)^{-1} A (U + E)^{-1}
595 
596     to a vector efficiently using Eisenstat's trick. This is for
597     the case of SSOR preconditioner, so E is D/omega where omega
598     is the relaxation factor.
599     */
600     ierr = VecCreate(xx,&tt); CHKERR(ierr);
601     VecGetArray(tt,&t);
602     scale = (2.0/omega) - 1.0;
603     /*  x = (E + U)^{-1} b */
604     VecSet(&zero,mat->lvec);
605     ierr = VecPipelineBegin(xx,0,mat->lvec,0,InsertValues,PipelineUp,
606                               mat->Mvctx); CHKERR(ierr);
607     for ( i=m-1; i>-1; i-- ) {
608       n    = A->i[i+1] - diag[i] - 1;
609       idx  = A->j + diag[i];
610       v    = A->a + diag[i];
611       sum  = b[i];
612       SPARSEDENSEMDOT(sum,xs,v,idx,n);
613       d    = shift + A->a[diag[i]-1];
614       n    = B->i[i+1] - B->i[i];
615       idx  = B->j + B->i[i] - 1;
616       v    = B->a + B->i[i] - 1;
617       SPARSEDENSEMDOT(sum,ls,v,idx,n);
618       x[i] = omega*(sum/d);
619     }
620     ierr = VecPipelineEnd(xx,0,mat->lvec,0,InsertValues,PipelineUp,
621                             mat->Mvctx); CHKERR(ierr);
622 
623     /*  t = b - (2*E - D)x */
624     v = A->a;
625     for ( i=0; i<m; i++ ) { t[i] = b[i] - scale*(v[*diag++ - 1])*x[i]; }
626 
627     /*  t = (E + L)^{-1}t */
628     ts = t - 1; /* shifted by one for index start of a or mat->j*/
629     diag = A->diag;
630     VecSet(&zero,mat->lvec);
631     ierr = VecPipelineBegin(tt,0,mat->lvec,0,InsertValues,PipelineDown,
632                                                  mat->Mvctx); CHKERR(ierr);
633     for ( i=0; i<m; i++ ) {
634       n    = diag[i] - A->i[i];
635       idx  = A->j + A->i[i] - 1;
636       v    = A->a + A->i[i] - 1;
637       sum  = t[i];
638       SPARSEDENSEMDOT(sum,ts,v,idx,n);
639       d    = shift + A->a[diag[i]-1];
640       n    = B->i[i+1] - B->i[i];
641       idx  = B->j + B->i[i] - 1;
642       v    = B->a + B->i[i] - 1;
643       SPARSEDENSEMDOT(sum,ls,v,idx,n);
644       t[i] = omega*(sum/d);
645     }
646     ierr = VecPipelineEnd(tt,0,mat->lvec,0,InsertValues,PipelineDown,
647                                                     mat->Mvctx); CHKERR(ierr);
648     /*  x = x + t */
649     for ( i=0; i<m; i++ ) { x[i] += t[i]; }
650     VecDestroy(tt);
651     return 0;
652   }
653 
654 
655   if ((flag & SOR_SYMMETRIC_SWEEP) == SOR_SYMMETRIC_SWEEP){
656     if (flag & SOR_ZERO_INITIAL_GUESS) {
657       VecSet(&zero,mat->lvec); VecSet(&zero,xx);
658     }
659     else {
660       ierr=VecScatterBegin(xx,0,mat->lvec,0,InsertValues,ScatterUp,mat->Mvctx);
661       CHKERR(ierr);
662       ierr = VecScatterEnd(xx,0,mat->lvec,0,InsertValues,ScatterUp,mat->Mvctx);
663       CHKERR(ierr);
664     }
665     while (its--) {
666       /* go down through the rows */
667       ierr = VecPipelineBegin(xx,0,mat->lvec,0,InsertValues,PipelineDown,
668                               mat->Mvctx); CHKERR(ierr);
669       for ( i=0; i<m; i++ ) {
670         n    = A->i[i+1] - A->i[i];
671         idx  = A->j + A->i[i] - 1;
672         v    = A->a + A->i[i] - 1;
673         sum  = b[i];
674         SPARSEDENSEMDOT(sum,xs,v,idx,n);
675         d    = shift + A->a[diag[i]-1];
676         n    = B->i[i+1] - B->i[i];
677         idx  = B->j + B->i[i] - 1;
678         v    = B->a + B->i[i] - 1;
679         SPARSEDENSEMDOT(sum,ls,v,idx,n);
680         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
681       }
682       ierr = VecPipelineEnd(xx,0,mat->lvec,0,InsertValues,PipelineDown,
683                             mat->Mvctx); CHKERR(ierr);
684       /* come up through the rows */
685       ierr = VecPipelineBegin(xx,0,mat->lvec,0,InsertValues,PipelineUp,
686                               mat->Mvctx); CHKERR(ierr);
687       for ( i=m-1; i>-1; i-- ) {
688         n    = A->i[i+1] - A->i[i];
689         idx  = A->j + A->i[i] - 1;
690         v    = A->a + A->i[i] - 1;
691         sum  = b[i];
692         SPARSEDENSEMDOT(sum,xs,v,idx,n);
693         d    = shift + A->a[diag[i]-1];
694         n    = B->i[i+1] - B->i[i];
695         idx  = B->j + B->i[i] - 1;
696         v    = B->a + B->i[i] - 1;
697         SPARSEDENSEMDOT(sum,ls,v,idx,n);
698         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
699       }
700       ierr = VecPipelineEnd(xx,0,mat->lvec,0,InsertValues,PipelineUp,
701                             mat->Mvctx); CHKERR(ierr);
702     }
703   }
704   else if (flag & SOR_FORWARD_SWEEP){
705     if (flag & SOR_ZERO_INITIAL_GUESS) {
706       VecSet(&zero,mat->lvec);
707       ierr = VecPipelineBegin(xx,0,mat->lvec,0,InsertValues,PipelineDown,
708                               mat->Mvctx); CHKERR(ierr);
709       for ( i=0; i<m; i++ ) {
710         n    = diag[i] - A->i[i];
711         idx  = A->j + A->i[i] - 1;
712         v    = A->a + A->i[i] - 1;
713         sum  = b[i];
714         SPARSEDENSEMDOT(sum,xs,v,idx,n);
715         d    = shift + A->a[diag[i]-1];
716         n    = B->i[i+1] - B->i[i];
717         idx  = B->j + B->i[i] - 1;
718         v    = B->a + B->i[i] - 1;
719         SPARSEDENSEMDOT(sum,ls,v,idx,n);
720         x[i] = omega*(sum/d);
721       }
722       ierr = VecPipelineEnd(xx,0,mat->lvec,0,InsertValues,PipelineDown,
723                             mat->Mvctx); CHKERR(ierr);
724       its--;
725     }
726     while (its--) {
727       ierr=VecScatterBegin(xx,0,mat->lvec,0,InsertValues,ScatterUp,mat->Mvctx);
728       CHKERR(ierr);
729       ierr = VecScatterEnd(xx,0,mat->lvec,0,InsertValues,ScatterUp,mat->Mvctx);
730       CHKERR(ierr);
731       ierr = VecPipelineBegin(xx,0,mat->lvec,0,InsertValues,PipelineDown,
732                               mat->Mvctx); CHKERR(ierr);
733       for ( i=0; i<m; i++ ) {
734         n    = A->i[i+1] - A->i[i];
735         idx  = A->j + A->i[i] - 1;
736         v    = A->a + A->i[i] - 1;
737         sum  = b[i];
738         SPARSEDENSEMDOT(sum,xs,v,idx,n);
739         d    = shift + A->a[diag[i]-1];
740         n    = B->i[i+1] - B->i[i];
741         idx  = B->j + B->i[i] - 1;
742         v    = B->a + B->i[i] - 1;
743         SPARSEDENSEMDOT(sum,ls,v,idx,n);
744         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
745       }
746       ierr = VecPipelineEnd(xx,0,mat->lvec,0,InsertValues,PipelineDown,
747                             mat->Mvctx); CHKERR(ierr);
748     }
749   }
750   else if (flag & SOR_BACKWARD_SWEEP){
751     if (flag & SOR_ZERO_INITIAL_GUESS) {
752       VecSet(&zero,mat->lvec);
753       ierr = VecPipelineBegin(xx,0,mat->lvec,0,InsertValues,PipelineUp,
754                               mat->Mvctx); CHKERR(ierr);
755       for ( i=m-1; i>-1; i-- ) {
756         n    = A->i[i+1] - diag[i] - 1;
757         idx  = A->j + diag[i];
758         v    = A->a + diag[i];
759         sum  = b[i];
760         SPARSEDENSEMDOT(sum,xs,v,idx,n);
761         d    = shift + A->a[diag[i]-1];
762         n    = B->i[i+1] - B->i[i];
763         idx  = B->j + B->i[i] - 1;
764         v    = B->a + B->i[i] - 1;
765         SPARSEDENSEMDOT(sum,ls,v,idx,n);
766         x[i] = omega*(sum/d);
767       }
768       ierr = VecPipelineEnd(xx,0,mat->lvec,0,InsertValues,PipelineUp,
769                             mat->Mvctx); CHKERR(ierr);
770       its--;
771     }
772     while (its--) {
773       ierr = VecScatterBegin(xx,0,mat->lvec,0,InsertValues,ScatterDown,
774                             mat->Mvctx); CHKERR(ierr);
775       ierr = VecScatterEnd(xx,0,mat->lvec,0,InsertValues,ScatterDown,
776                             mat->Mvctx); CHKERR(ierr);
777       ierr = VecPipelineBegin(xx,0,mat->lvec,0,InsertValues,PipelineUp,
778                               mat->Mvctx); CHKERR(ierr);
779       for ( i=m-1; i>-1; i-- ) {
780         n    = A->i[i+1] - A->i[i];
781         idx  = A->j + A->i[i] - 1;
782         v    = A->a + A->i[i] - 1;
783         sum  = b[i];
784         SPARSEDENSEMDOT(sum,xs,v,idx,n);
785         d    = shift + A->a[diag[i]-1];
786         n    = B->i[i+1] - B->i[i];
787         idx  = B->j + B->i[i] - 1;
788         v    = B->a + B->i[i] - 1;
789         SPARSEDENSEMDOT(sum,ls,v,idx,n);
790         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
791       }
792       ierr = VecPipelineEnd(xx,0,mat->lvec,0,InsertValues,PipelineUp,
793                             mat->Mvctx); CHKERR(ierr);
794     }
795   }
796   else if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
797     if (flag & SOR_ZERO_INITIAL_GUESS) {
798       return MatRelax(mat->A,bb,omega,flag,shift,its,xx);
799     }
800     ierr=VecScatterBegin(xx,0,mat->lvec,0,InsertValues,ScatterAll,mat->Mvctx);
801     CHKERR(ierr);
802     ierr = VecScatterEnd(xx,0,mat->lvec,0,InsertValues,ScatterAll,mat->Mvctx);
803     CHKERR(ierr);
804     while (its--) {
805       /* go down through the rows */
806       for ( i=0; i<m; i++ ) {
807         n    = A->i[i+1] - A->i[i];
808         idx  = A->j + A->i[i] - 1;
809         v    = A->a + A->i[i] - 1;
810         sum  = b[i];
811         SPARSEDENSEMDOT(sum,xs,v,idx,n);
812         d    = shift + A->a[diag[i]-1];
813         n    = B->i[i+1] - B->i[i];
814         idx  = B->j + B->i[i] - 1;
815         v    = B->a + B->i[i] - 1;
816         SPARSEDENSEMDOT(sum,ls,v,idx,n);
817         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
818       }
819       /* come up through the rows */
820       for ( i=m-1; i>-1; i-- ) {
821         n    = A->i[i+1] - A->i[i];
822         idx  = A->j + A->i[i] - 1;
823         v    = A->a + A->i[i] - 1;
824         sum  = b[i];
825         SPARSEDENSEMDOT(sum,xs,v,idx,n);
826         d    = shift + A->a[diag[i]-1];
827         n    = B->i[i+1] - B->i[i];
828         idx  = B->j + B->i[i] - 1;
829         v    = B->a + B->i[i] - 1;
830         SPARSEDENSEMDOT(sum,ls,v,idx,n);
831         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
832       }
833     }
834   }
835   else if (flag & SOR_LOCAL_FORWARD_SWEEP){
836     if (flag & SOR_ZERO_INITIAL_GUESS) {
837       return MatRelax(mat->A,bb,omega,flag,shift,its,xx);
838     }
839     ierr=VecScatterBegin(xx,0,mat->lvec,0,InsertValues,ScatterAll,mat->Mvctx);
840     CHKERR(ierr);
841     ierr = VecScatterEnd(xx,0,mat->lvec,0,InsertValues,ScatterAll,mat->Mvctx);
842     CHKERR(ierr);
843     while (its--) {
844       for ( i=0; i<m; i++ ) {
845         n    = A->i[i+1] - A->i[i];
846         idx  = A->j + A->i[i] - 1;
847         v    = A->a + A->i[i] - 1;
848         sum  = b[i];
849         SPARSEDENSEMDOT(sum,xs,v,idx,n);
850         d    = shift + A->a[diag[i]-1];
851         n    = B->i[i+1] - B->i[i];
852         idx  = B->j + B->i[i] - 1;
853         v    = B->a + B->i[i] - 1;
854         SPARSEDENSEMDOT(sum,ls,v,idx,n);
855         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
856       }
857     }
858   }
859   else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
860     if (flag & SOR_ZERO_INITIAL_GUESS) {
861       return MatRelax(mat->A,bb,omega,flag,shift,its,xx);
862     }
863     ierr = VecScatterBegin(xx,0,mat->lvec,0,InsertValues,ScatterAll,
864                             mat->Mvctx); CHKERR(ierr);
865     ierr = VecScatterEnd(xx,0,mat->lvec,0,InsertValues,ScatterAll,
866                             mat->Mvctx); CHKERR(ierr);
867     while (its--) {
868       for ( i=m-1; i>-1; i-- ) {
869         n    = A->i[i+1] - A->i[i];
870         idx  = A->j + A->i[i] - 1;
871         v    = A->a + A->i[i] - 1;
872         sum  = b[i];
873         SPARSEDENSEMDOT(sum,xs,v,idx,n);
874         d    = shift + A->a[diag[i]-1];
875         n    = B->i[i+1] - B->i[i];
876         idx  = B->j + B->i[i] - 1;
877         v    = B->a + B->i[i] - 1;
878         SPARSEDENSEMDOT(sum,ls,v,idx,n);
879         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
880       }
881     }
882   }
883   return 0;
884 }
885 static int MatInsOpt_MPIAIJ(Mat aijin,int op)
886 {
887   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data;
888 
889   if      (op == NO_NEW_NONZERO_LOCATIONS)  {
890     MatSetOption(aij->A,op);
891     MatSetOption(aij->B,op);
892   }
893   else if (op == YES_NEW_NONZERO_LOCATIONS) {
894     MatSetOption(aij->A,op);
895     MatSetOption(aij->B,op);
896   }
897   else if (op == COLUMN_ORIENTED) SETERR(1,"Column oriented not supported");
898   return 0;
899 }
900 
901 static int MatSize_MPIAIJ(Mat matin,int *m,int *n)
902 {
903   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
904   *m = mat->M; *n = mat->N;
905   return 0;
906 }
907 
908 static int MatLocalSize_MPIAIJ(Mat matin,int *m,int *n)
909 {
910   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
911   *m = mat->m; *n = mat->n;
912   return 0;
913 }
914 
915 static int MatRange_MPIAIJ(Mat matin,int *m,int *n)
916 {
917   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
918   *m = mat->rstart; *n = mat->rend;
919   return 0;
920 }
921 
922 static int MatCopy_MPIAIJ(Mat,Mat *);
923 extern int MatConvert_MPIAIJ(Mat,MATTYPE,Mat *);
924 
925 /* -------------------------------------------------------------------*/
926 static struct _MatOps MatOps = {MatInsertValues_MPIAIJ,
927        0, 0,
928        MatMult_MPIAIJ,MatMultAdd_MPIAIJ,
929        MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ,
930        0,0,0,0,
931        0,0,
932        MatRelax_MPIAIJ,
933        0,
934        0,0,0,
935        MatCopy_MPIAIJ,
936        MatGetDiag_MPIAIJ,0,0,
937        MatBeginAssemble_MPIAIJ,MatEndAssemble_MPIAIJ,
938        0,
939        MatInsOpt_MPIAIJ,MatZero_MPIAIJ,MatZeroRows_MPIAIJ,0,
940        0,0,0,0,
941        MatSize_MPIAIJ,MatLocalSize_MPIAIJ,MatRange_MPIAIJ,
942        0,0,
943        0,MatConvert_MPIAIJ };
944 
945 /*@
946 
947       MatCreateMPIAIJ - Creates a sparse parallel matrix
948                                  in AIJ format.
949 
950   Input Parameters:
951 .   comm - MPI communicator
952 .   m,n - number of local rows and columns (or -1 to have calculated)
953 .   M,N - global rows and columns (or -1 to have calculated)
954 .   d_nz - total number nonzeros in diagonal portion of matrix
955 .   d_nzz - number of nonzeros per row in diagonal portion of matrix or null
956 .           You must leave room for the diagonal entry even if it is zero.
957 .   o_nz - total number nonzeros in off-diagonal portion of matrix
958 .   o_nzz - number of nonzeros per row in off-diagonal portion of matrix
959 .           or null. You must have at least one nonzero per row.
960 
961   Output parameters:
962 .  newmat - the matrix
963 
964   Keywords: matrix, aij, compressed row, sparse, parallel
965 @*/
966 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,
967                  int d_nz,int *d_nnz, int o_nz,int *o_nnz,Mat *newmat)
968 {
969   Mat          mat;
970   Mat_MPIAIJ   *aij;
971   int          ierr, i,sum[2],work[2];
972   *newmat         = 0;
973   PETSCHEADERCREATE(mat,_Mat,MAT_COOKIE,MATMPIAIJ,comm);
974   PLogObjectCreate(mat);
975   mat->data       = (void *) (aij = NEW(Mat_MPIAIJ)); CHKPTR(aij);
976   mat->ops        = &MatOps;
977   mat->destroy    = MatDestroy_MPIAIJ;
978   mat->view       = MatView_MPIAIJ;
979   mat->factor     = 0;
980   mat->row        = 0;
981   mat->col        = 0;
982 
983   mat->comm       = comm;
984   aij->insertmode = NotSetValues;
985   MPI_Comm_rank(comm,&aij->mytid);
986   MPI_Comm_size(comm,&aij->numtids);
987 
988   if (M == -1 || N == -1) {
989     work[0] = m; work[1] = n;
990     MPI_Allreduce((void *) work,(void *) sum,2,MPI_INT,MPI_SUM,comm );
991     if (M == -1) M = sum[0];
992     if (N == -1) N = sum[1];
993   }
994   if (m == -1) {m = M/aij->numtids + ((M % aij->numtids) > aij->mytid);}
995   if (n == -1) {n = N/aij->numtids + ((N % aij->numtids) > aij->mytid);}
996   aij->m       = m;
997   aij->n       = n;
998   aij->N       = N;
999   aij->M       = M;
1000 
1001   /* build local table of row and column ownerships */
1002   aij->rowners = (int *) MALLOC(2*(aij->numtids+2)*sizeof(int));
1003   CHKPTR(aij->rowners);
1004   aij->cowners = aij->rowners + aij->numtids + 1;
1005   MPI_Allgather(&m,1,MPI_INT,aij->rowners+1,1,MPI_INT,comm);
1006   aij->rowners[0] = 0;
1007   for ( i=2; i<=aij->numtids; i++ ) {
1008     aij->rowners[i] += aij->rowners[i-1];
1009   }
1010   aij->rstart = aij->rowners[aij->mytid];
1011   aij->rend   = aij->rowners[aij->mytid+1];
1012   MPI_Allgather(&n,1,MPI_INT,aij->cowners+1,1,MPI_INT,comm);
1013   aij->cowners[0] = 0;
1014   for ( i=2; i<=aij->numtids; i++ ) {
1015     aij->cowners[i] += aij->cowners[i-1];
1016   }
1017   aij->cstart = aij->cowners[aij->mytid];
1018   aij->cend   = aij->cowners[aij->mytid+1];
1019 
1020 
1021   ierr = MatCreateSequentialAIJ(m,n,d_nz,d_nnz,&aij->A); CHKERR(ierr);
1022   PLogObjectParent(mat,aij->A);
1023   ierr = MatCreateSequentialAIJ(m,N,o_nz,o_nnz,&aij->B); CHKERR(ierr);
1024   PLogObjectParent(mat,aij->B);
1025 
1026   /* build cache for off array entries formed */
1027   aij->stash.nmax = CHUNCKSIZE; /* completely arbratray number */
1028   aij->stash.n    = 0;
1029   aij->stash.array = (Scalar *) MALLOC( aij->stash.nmax*(2*sizeof(int) +
1030                             sizeof(Scalar))); CHKPTR(aij->stash.array);
1031   aij->stash.idx = (int *) (aij->stash.array + aij->stash.nmax);
1032   aij->stash.idy = (int *) (aij->stash.idx + aij->stash.nmax);
1033   aij->colmap    = 0;
1034   aij->garray    = 0;
1035 
1036   /* stuff used for matrix vector multiply */
1037   aij->lvec      = 0;
1038   aij->Mvctx     = 0;
1039   aij->assembled = 0;
1040 
1041   *newmat = mat;
1042   return 0;
1043 }
1044 
1045 static int MatCopy_MPIAIJ(Mat matin,Mat *newmat)
1046 {
1047   Mat        mat;
1048   Mat_MPIAIJ *aij,*oldmat = (Mat_MPIAIJ *) matin->data;
1049   int        ierr;
1050   *newmat      = 0;
1051 
1052   if (!oldmat->assembled) SETERR(1,"Cannot copy unassembled matrix");
1053   PETSCHEADERCREATE(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm);
1054   PLogObjectCreate(mat);
1055   mat->data       = (void *) (aij = NEW(Mat_MPIAIJ)); CHKPTR(aij);
1056   mat->ops        = &MatOps;
1057   mat->destroy    = MatDestroy_MPIAIJ;
1058   mat->view       = MatView_MPIAIJ;
1059   mat->factor     = matin->factor;
1060   mat->row        = 0;
1061   mat->col        = 0;
1062 
1063   aij->m          = oldmat->m;
1064   aij->n          = oldmat->n;
1065   aij->M          = oldmat->M;
1066   aij->N          = oldmat->N;
1067 
1068   aij->assembled  = 1;
1069   aij->rstart     = oldmat->rstart;
1070   aij->rend       = oldmat->rend;
1071   aij->cstart     = oldmat->cstart;
1072   aij->cend       = oldmat->cend;
1073   aij->numtids    = oldmat->numtids;
1074   aij->mytid      = oldmat->mytid;
1075   aij->insertmode = NotSetValues;
1076 
1077   aij->rowners    = (int *) MALLOC( (aij->numtids+1)*sizeof(int) );
1078   CHKPTR(aij->rowners);
1079   MEMCPY(aij->rowners,oldmat->rowners,(aij->numtids+1)*sizeof(int));
1080   aij->stash.nmax = 0;
1081   aij->stash.n    = 0;
1082   aij->stash.array= 0;
1083   aij->colmap     = 0;
1084   aij->garray     = 0;
1085   mat->comm       = matin->comm;
1086 
1087   ierr =  VecCreate(oldmat->lvec,&aij->lvec); CHKERR(ierr);
1088   PLogObjectParent(mat,aij->lvec);
1089   ierr =  VecScatterCtxCopy(oldmat->Mvctx,&aij->Mvctx); CHKERR(ierr);
1090   PLogObjectParent(mat,aij->Mvctx);
1091   ierr =  MatCopy(oldmat->A,&aij->A); CHKERR(ierr);
1092   PLogObjectParent(mat,aij->A);
1093   ierr =  MatCopy(oldmat->B,&aij->B); CHKERR(ierr);
1094   PLogObjectParent(mat,aij->B);
1095   *newmat = mat;
1096   return 0;
1097 }
1098