xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision d13ab20cb7394c5ccee56b4115f771bcdc64b76a)
1 #ifndef lint
2 static char vcid[] = "$Id: mpiaij.c,v 1.16 1995/03/25 01:10:01 curfman Exp bsmith $";
3 #endif
4 
5 #include "mpiaij.h"
6 #include "vec/vecimpl.h"
7 #include "inline/spops.h"
8 
9 #define CHUNCKSIZE   100
10 /*
11    This is a simple minded stash. Do a linear search to determine if
12  in stash, if not add to end.
13 */
14 static int StashValues(Stash *stash,int row,int n, int *idxn,
15                        Scalar *values,InsertMode addv)
16 {
17   int    i,j,N = stash->n,found,*n_idx, *n_idy;
18   Scalar val,*n_array;
19 
20   for ( i=0; i<n; i++ ) {
21     found = 0;
22     val = *values++;
23     for ( j=0; j<N; j++ ) {
24       if ( stash->idx[j] == row && stash->idy[j] == idxn[i]) {
25         /* found a match */
26         if (addv == AddValues) stash->array[j] += val;
27         else stash->array[j] = val;
28         found = 1;
29         break;
30       }
31     }
32     if (!found) { /* not found so add to end */
33       if ( stash->n == stash->nmax ) {
34         /* allocate a larger stash */
35         n_array = (Scalar *) MALLOC( (stash->nmax + CHUNCKSIZE)*(
36                                      2*sizeof(int) + sizeof(Scalar)));
37         CHKPTR(n_array);
38         n_idx = (int *) (n_array + stash->nmax + CHUNCKSIZE);
39         n_idy = (int *) (n_idx + stash->nmax + CHUNCKSIZE);
40         MEMCPY(n_array,stash->array,stash->nmax*sizeof(Scalar));
41         MEMCPY(n_idx,stash->idx,stash->nmax*sizeof(int));
42         MEMCPY(n_idy,stash->idy,stash->nmax*sizeof(int));
43         if (stash->array) FREE(stash->array);
44         stash->array = n_array; stash->idx = n_idx; stash->idy = n_idy;
45         stash->nmax += CHUNCKSIZE;
46       }
47       stash->array[stash->n]   = val;
48       stash->idx[stash->n]     = row;
49       stash->idy[stash->n++]   = idxn[i];
50     }
51   }
52   return 0;
53 }
54 
55 /* local utility routine that creates a mapping from the global column
56 number to the local number in the off-diagonal part of the local
57 storage of the matrix.  This is done in a non scable way since the
58 length of colmap equals the global matrix length.
59 */
60 static int CreateColmap(Mat mat)
61 {
62   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
63   Mat_AIJ    *B = (Mat_AIJ*) aij->B->data;
64   int        n = B->n,i;
65   aij->colmap = (int *) MALLOC( aij->N*sizeof(int) ); CHKPTR(aij->colmap);
66   aij->invcolmap = (int *) MALLOC( aij->N*sizeof(int) ); CHKPTR(aij->invcolmap);
67   MEMSET(aij->colmap,0,aij->N*sizeof(int));
68   MEMSET(aij->invcolmap,0,aij->N*sizeof(int));
69   for ( i=0; i<n; i++ ) {
70     aij->colmap[aij->garray[i]] = i+1;
71     aij->invcolmap[i+1] = aij->garray[i];
72   }
73   return 0;
74 }
75 
76 static int MatInsertValues_MPIAIJ(Mat mat,int m,int *idxm,int n,
77                             int *idxn,Scalar *v,InsertMode addv)
78 {
79   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
80   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
81   int        cstart = aij->cstart, cend = aij->cend,row,col;
82 
83   if (aij->insertmode != NotSetValues && aij->insertmode != addv) {
84     SETERR(1,"You cannot mix inserts and adds");
85   }
86   aij->insertmode = addv;
87   for ( i=0; i<m; i++ ) {
88     if (idxm[i] < 0) SETERR(1,"Negative row index");
89     if (idxm[i] >= aij->M) SETERR(1,"Row index too large");
90     if (idxm[i] >= rstart && idxm[i] < rend) {
91       row = idxm[i] - rstart;
92       for ( j=0; j<n; j++ ) {
93         if (idxn[j] < 0) SETERR(1,"Negative column index");
94         if (idxn[j] >= aij->N) SETERR(1,"Column index too large");
95         if (idxn[j] >= cstart && idxn[j] < cend){
96           col = idxn[j] - cstart;
97           ierr = MatSetValues(aij->A,1,&row,1,&col,v+i*n+j,addv);CHKERR(ierr);
98         }
99         else {
100           if (aij->assembled) {
101             if (!aij->colmap) {ierr = CreateColmap(mat); CHKERR(ierr);}
102             col = aij->colmap[idxn[j]] - 1;
103             if (col < 0) {
104               SETERR(1,"Cannot insert new off diagonal block nonzero in\
105                      already\
106                      assembled matrix. Contact petsc-maint@mcs.anl.gov\
107                      if your need this feature");
108             }
109           }
110           else col = idxn[j];
111           ierr = MatSetValues(aij->B,1,&row,1,&col,v+i*n+j,addv);CHKERR(ierr);
112         }
113       }
114     }
115     else {
116       ierr = StashValues(&aij->stash,idxm[i],n,idxn,v+i*n,addv);CHKERR(ierr);
117     }
118   }
119   return 0;
120 }
121 
122 /*
123     the assembly code is alot like the code for vectors, we should
124     sometime derive a single assembly code that can be used for
125     either case.
126 */
127 
128 static int MatBeginAssemble_MPIAIJ(Mat mat)
129 {
130   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
131   MPI_Comm    comm = mat->comm;
132   int         numtids = aij->numtids, *owners = aij->rowners;
133   int         mytid = aij->mytid;
134   MPI_Request *send_waits,*recv_waits;
135   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
136   int         tag = 50, *owner,*starts,count;
137   InsertMode  addv;
138   Scalar      *rvalues,*svalues;
139 
140   /* make sure all processors are either in INSERTMODE or ADDMODE */
141   MPI_Allreduce((void *) &aij->insertmode,(void *) &addv,1,MPI_INT,
142                 MPI_BOR,comm);
143   if (addv == (AddValues|InsertValues)) {
144     SETERR(1,"Some processors have inserted while others have added");
145   }
146   aij->insertmode = addv; /* in case this processor had no cache */
147 
148   /*  first count number of contributors to each processor */
149   nprocs = (int *) MALLOC( 2*numtids*sizeof(int) ); CHKPTR(nprocs);
150   MEMSET(nprocs,0,2*numtids*sizeof(int)); procs = nprocs + numtids;
151   owner = (int *) MALLOC( (aij->stash.n+1)*sizeof(int) ); CHKPTR(owner);
152   for ( i=0; i<aij->stash.n; i++ ) {
153     idx = aij->stash.idx[i];
154     for ( j=0; j<numtids; j++ ) {
155       if (idx >= owners[j] && idx < owners[j+1]) {
156         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
157       }
158     }
159   }
160   nsends = 0;  for ( i=0; i<numtids; i++ ) { nsends += procs[i];}
161 
162   /* inform other processors of number of messages and max length*/
163   work = (int *) MALLOC( numtids*sizeof(int) ); CHKPTR(work);
164   MPI_Allreduce((void *) procs,(void *) work,numtids,MPI_INT,MPI_SUM,comm);
165   nreceives = work[mytid];
166   MPI_Allreduce((void *) nprocs,(void *) work,numtids,MPI_INT,MPI_MAX,comm);
167   nmax = work[mytid];
168   FREE(work);
169 
170   /* post receives:
171        1) each message will consist of ordered pairs
172      (global index,value) we store the global index as a double
173      to simplify the message passing.
174        2) since we don't know how long each individual message is we
175      allocate the largest needed buffer for each receive. Potentially
176      this is a lot of wasted space.
177 
178 
179        This could be done better.
180   */
181   rvalues = (Scalar *) MALLOC(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));
182   CHKPTR(rvalues);
183   recv_waits = (MPI_Request *) MALLOC((nreceives+1)*sizeof(MPI_Request));
184   CHKPTR(recv_waits);
185   for ( i=0; i<nreceives; i++ ) {
186     MPI_Irecv((void *)(rvalues+3*nmax*i),3*nmax,MPI_SCALAR,MPI_ANY_SOURCE,tag,
187               comm,recv_waits+i);
188   }
189 
190   /* do sends:
191       1) starts[i] gives the starting index in svalues for stuff going to
192          the ith processor
193   */
194   svalues = (Scalar *) MALLOC( 3*(aij->stash.n+1)*sizeof(Scalar) );
195   CHKPTR(svalues);
196   send_waits = (MPI_Request *) MALLOC( (nsends+1)*sizeof(MPI_Request));
197   CHKPTR(send_waits);
198   starts = (int *) MALLOC( numtids*sizeof(int) ); CHKPTR(starts);
199   starts[0] = 0;
200   for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
201   for ( i=0; i<aij->stash.n; i++ ) {
202     svalues[3*starts[owner[i]]]       = (Scalar)  aij->stash.idx[i];
203     svalues[3*starts[owner[i]]+1]     = (Scalar)  aij->stash.idy[i];
204     svalues[3*(starts[owner[i]]++)+2] =  aij->stash.array[i];
205   }
206   FREE(owner);
207   starts[0] = 0;
208   for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
209   count = 0;
210   for ( i=0; i<numtids; i++ ) {
211     if (procs[i]) {
212       MPI_Isend((void*)(svalues+3*starts[i]),3*nprocs[i],MPI_SCALAR,i,tag,
213                 comm,send_waits+count++);
214     }
215   }
216   FREE(starts); FREE(nprocs);
217 
218   /* Free cache space */
219   aij->stash.nmax = aij->stash.n = 0;
220   if (aij->stash.array){ FREE(aij->stash.array); aij->stash.array = 0;}
221 
222   aij->svalues    = svalues;       aij->rvalues = rvalues;
223   aij->nsends     = nsends;         aij->nrecvs = nreceives;
224   aij->send_waits = send_waits; aij->recv_waits = recv_waits;
225   aij->rmax       = nmax;
226 
227   return 0;
228 }
229 extern int MatSetUpMultiply_MPIAIJ(Mat);
230 
231 static int MatEndAssemble_MPIAIJ(Mat mat)
232 {
233   int        ierr;
234   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
235 
236   MPI_Status  *send_status,recv_status;
237   int         imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n;
238   int         row,col;
239   Scalar      *values,val;
240   InsertMode  addv = aij->insertmode;
241 
242   /*  wait on receives */
243   while (count) {
244     MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status);
245     /* unpack receives into our local space */
246     values = aij->rvalues + 3*imdex*aij->rmax;
247     MPI_Get_count(&recv_status,MPI_SCALAR,&n);
248     n = n/3;
249     for ( i=0; i<n; i++ ) {
250       row = (int) PETSCREAL(values[3*i]) - aij->rstart;
251       col = (int) PETSCREAL(values[3*i+1]);
252       val = values[3*i+2];
253       if (col >= aij->cstart && col < aij->cend) {
254           col -= aij->cstart;
255         MatSetValues(aij->A,1,&row,1,&col,&val,addv);
256       }
257       else {
258         if (aij->assembled) {
259           if (!aij->colmap) {ierr = CreateColmap(mat); CHKERR(ierr);}
260           col = aij->colmap[col] - 1;
261           if (col < 0) {
262             SETERR(1,"Cannot insert new off diagonal block nonzero in\
263                      already\
264                      assembled matrix. Contact petsc-maint@mcs.anl.gov\
265                      if your need this feature");
266           }
267         }
268         MatSetValues(aij->B,1,&row,1,&col,&val,addv);
269       }
270     }
271     count--;
272   }
273   FREE(aij->recv_waits); FREE(aij->rvalues);
274 
275   /* wait on sends */
276   if (aij->nsends) {
277     send_status = (MPI_Status *) MALLOC( aij->nsends*sizeof(MPI_Status) );
278     CHKPTR(send_status);
279     MPI_Waitall(aij->nsends,aij->send_waits,send_status);
280     FREE(send_status);
281   }
282   FREE(aij->send_waits); FREE(aij->svalues);
283 
284   aij->insertmode = NotSetValues;
285   ierr = MatBeginAssembly(aij->A); CHKERR(ierr);
286   ierr = MatEndAssembly(aij->A); CHKERR(ierr);
287 
288   if (!aij->assembled) {
289     ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERR(ierr);
290   }
291   ierr = MatBeginAssembly(aij->B); CHKERR(ierr);
292   ierr = MatEndAssembly(aij->B); CHKERR(ierr);
293 
294   aij->assembled = 1;
295   return 0;
296 }
297 
298 static int MatZero_MPIAIJ(Mat A)
299 {
300   Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data;
301 
302   MatZeroEntries(l->A); MatZeroEntries(l->B);
303   return 0;
304 }
305 
306 /* again this uses the same basic stratagy as in the assembly and
307    scatter create routines, we should try to do it systemamatically
308    if we can figure out the proper level of generality. */
309 
310 /* the code does not do the diagonal entries correctly unless the
311    matrix is square and the column and row owerships are identical.
312    This is a BUG. The only way to fix it seems to be to access
313    aij->A and aij->B directly and not through the MatZeroRows()
314    routine.
315 */
316 static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag)
317 {
318   Mat_MPIAIJ     *l = (Mat_MPIAIJ *) A->data;
319   int            i,ierr,N, *rows,*owners = l->rowners,numtids = l->numtids;
320   int            *procs,*nprocs,j,found,idx,nsends,*work;
321   int            nmax,*svalues,*starts,*owner,nrecvs,mytid = l->mytid;
322   int            *rvalues,tag = 67,count,base,slen,n,*source;
323   int            *lens,imdex,*lrows,*values;
324   MPI_Comm       comm = A->comm;
325   MPI_Request    *send_waits,*recv_waits;
326   MPI_Status     recv_status,*send_status;
327   IS             istmp;
328 
329   if (!l->assembled) SETERR(1,"MatZeroRows_MPIAIJ: must assemble matrix first");
330   ierr = ISGetLocalSize(is,&N); CHKERR(ierr);
331   ierr = ISGetIndices(is,&rows); CHKERR(ierr);
332 
333   /*  first count number of contributors to each processor */
334   nprocs = (int *) MALLOC( 2*numtids*sizeof(int) ); CHKPTR(nprocs);
335   MEMSET(nprocs,0,2*numtids*sizeof(int)); procs = nprocs + numtids;
336   owner = (int *) MALLOC((N+1)*sizeof(int)); CHKPTR(owner); /* see note*/
337   for ( i=0; i<N; i++ ) {
338     idx = rows[i];
339     found = 0;
340     for ( j=0; j<numtids; j++ ) {
341       if (idx >= owners[j] && idx < owners[j+1]) {
342         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
343       }
344     }
345     if (!found) SETERR(1,"Imdex out of range");
346   }
347   nsends = 0;  for ( i=0; i<numtids; i++ ) { nsends += procs[i];}
348 
349   /* inform other processors of number of messages and max length*/
350   work = (int *) MALLOC( numtids*sizeof(int) ); CHKPTR(work);
351   MPI_Allreduce((void *) procs,(void *) work,numtids,MPI_INT,MPI_SUM,comm);
352   nrecvs = work[mytid];
353   MPI_Allreduce((void *) nprocs,(void *) work,numtids,MPI_INT,MPI_MAX,comm);
354   nmax = work[mytid];
355   FREE(work);
356 
357   /* post receives:   */
358   rvalues = (int *) MALLOC((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */
359   CHKPTR(rvalues);
360   recv_waits = (MPI_Request *) MALLOC((nrecvs+1)*sizeof(MPI_Request));
361   CHKPTR(recv_waits);
362   for ( i=0; i<nrecvs; i++ ) {
363     MPI_Irecv((void *)(rvalues+nmax*i),nmax,MPI_INT,MPI_ANY_SOURCE,tag,
364               comm,recv_waits+i);
365   }
366 
367   /* do sends:
368       1) starts[i] gives the starting index in svalues for stuff going to
369          the ith processor
370   */
371   svalues = (int *) MALLOC( (N+1)*sizeof(int) ); CHKPTR(svalues);
372   send_waits = (MPI_Request *) MALLOC( (nsends+1)*sizeof(MPI_Request));
373   CHKPTR(send_waits);
374   starts = (int *) MALLOC( (numtids+1)*sizeof(int) ); CHKPTR(starts);
375   starts[0] = 0;
376   for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
377   for ( i=0; i<N; i++ ) {
378     svalues[starts[owner[i]]++] = rows[i];
379   }
380   ISRestoreIndices(is,&rows);
381 
382   starts[0] = 0;
383   for ( i=1; i<numtids+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
384   count = 0;
385   for ( i=0; i<numtids; i++ ) {
386     if (procs[i]) {
387       MPI_Isend((void*)(svalues+starts[i]),nprocs[i],MPI_INT,i,tag,
388                 comm,send_waits+count++);
389     }
390   }
391   FREE(starts);
392 
393   base = owners[mytid];
394 
395   /*  wait on receives */
396   lens = (int *) MALLOC( 2*(nrecvs+1)*sizeof(int) ); CHKPTR(lens);
397   source = lens + nrecvs;
398   count = nrecvs; slen = 0;
399   while (count) {
400     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
401     /* unpack receives into our local space */
402     MPI_Get_count(&recv_status,MPI_INT,&n);
403     source[imdex]  = recv_status.MPI_SOURCE;
404     lens[imdex]  = n;
405     slen += n;
406     count--;
407   }
408   FREE(recv_waits);
409 
410   /* move the data into the send scatter */
411   lrows = (int *) MALLOC( slen*sizeof(int) ); CHKPTR(lrows);
412   count = 0;
413   for ( i=0; i<nrecvs; i++ ) {
414     values = rvalues + i*nmax;
415     for ( j=0; j<lens[i]; j++ ) {
416       lrows[count++] = values[j] - base;
417     }
418   }
419   FREE(rvalues); FREE(lens);
420   FREE(owner); FREE(nprocs);
421 
422   /* actually zap the local rows */
423   ierr = ISCreateSequential(slen,lrows,&istmp); CHKERR(ierr);  FREE(lrows);
424   ierr = MatZeroRows(l->A,istmp,diag); CHKERR(ierr);
425   ierr = MatZeroRows(l->B,istmp,0); CHKERR(ierr);
426   ierr = ISDestroy(istmp); CHKERR(ierr);
427 
428   /* wait on sends */
429   if (nsends) {
430     send_status = (MPI_Status *) MALLOC( nsends*sizeof(MPI_Status) );
431     CHKPTR(send_status);
432     MPI_Waitall(nsends,send_waits,send_status);
433     FREE(send_status);
434   }
435   FREE(send_waits); FREE(svalues);
436 
437 
438   return 0;
439 }
440 
441 static int MatMult_MPIAIJ(Mat aijin,Vec xx,Vec yy)
442 {
443   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data;
444   int        ierr;
445   if (!aij->assembled) SETERR(1,"MatMult_MPIAIJ: must assemble matrix first");
446   ierr = VecScatterBegin(xx,0,aij->lvec,0,InsertValues,ScatterAll,aij->Mvctx);
447   CHKERR(ierr);
448   ierr = MatMult(aij->A,xx,yy); CHKERR(ierr);
449   ierr = VecScatterEnd(xx,0,aij->lvec,0,InsertValues,ScatterAll,aij->Mvctx);
450   CHKERR(ierr);
451   ierr = MatMultAdd(aij->B,aij->lvec,yy,yy); CHKERR(ierr);
452   return 0;
453 }
454 
455 static int MatMultAdd_MPIAIJ(Mat aijin,Vec xx,Vec yy,Vec zz)
456 {
457   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data;
458   int        ierr;
459   if (!aij->assembled) SETERR(1,"MatMult_MPIAIJ: must assemble matrix first");
460   ierr = VecScatterBegin(xx,0,aij->lvec,0,InsertValues,ScatterAll,aij->Mvctx);
461   CHKERR(ierr);
462   ierr = MatMultAdd(aij->A,xx,yy,zz); CHKERR(ierr);
463   ierr = VecScatterEnd(xx,0,aij->lvec,0,InsertValues,ScatterAll,aij->Mvctx);
464   CHKERR(ierr);
465   ierr = MatMultAdd(aij->B,aij->lvec,zz,zz); CHKERR(ierr);
466   return 0;
467 }
468 
469 static int MatMultTrans_MPIAIJ(Mat aijin,Vec xx,Vec yy)
470 {
471   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data;
472   int        ierr;
473 
474   if (!aij->assembled)
475     SETERR(1,"MatMulTrans_MPIAIJ: must assemble matrix first");
476   /* do nondiagonal part */
477   ierr = MatMultTrans(aij->B,xx,aij->lvec); CHKERR(ierr);
478   /* send it on its way */
479   ierr = VecScatterBegin(aij->lvec,0,yy,0,AddValues,
480                          ScatterAll|ScatterReverse,aij->Mvctx); CHKERR(ierr);
481   /* do local part */
482   ierr = MatMultTrans(aij->A,xx,yy); CHKERR(ierr);
483   /* receive remote parts: note this assumes the values are not actually */
484   /* inserted in yy until the next line, which is true for my implementation*/
485   /* but is not perhaps always true. */
486   ierr = VecScatterEnd(aij->lvec,0,yy,0,AddValues,ScatterAll|ScatterReverse,
487                          aij->Mvctx); CHKERR(ierr);
488   return 0;
489 }
490 
491 static int MatMultTransAdd_MPIAIJ(Mat aijin,Vec xx,Vec yy,Vec zz)
492 {
493   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data;
494   int        ierr;
495 
496   if (!aij->assembled)
497     SETERR(1,"MatMulTransAdd_MPIAIJ: must assemble matrix first");
498   /* do nondiagonal part */
499   ierr = MatMultTrans(aij->B,xx,aij->lvec); CHKERR(ierr);
500   /* send it on its way */
501   ierr = VecScatterBegin(aij->lvec,0,zz,0,AddValues,
502                          ScatterAll|ScatterReverse,aij->Mvctx); CHKERR(ierr);
503   /* do local part */
504   ierr = MatMultTransAdd(aij->A,xx,yy,zz); CHKERR(ierr);
505   /* receive remote parts: note this assumes the values are not actually */
506   /* inserted in yy until the next line, which is true for my implementation*/
507   /* but is not perhaps always true. */
508   ierr = VecScatterEnd(aij->lvec,0,zz,0,AddValues,ScatterAll|ScatterReverse,
509                          aij->Mvctx); CHKERR(ierr);
510   return 0;
511 }
512 
513 /*
514   This only works correctly for square matrices where the subblock A->A is the
515    diagonal block
516 */
517 static int MatGetDiag_MPIAIJ(Mat Ain,Vec v)
518 {
519   Mat_MPIAIJ *A = (Mat_MPIAIJ *) Ain->data;
520   if (!A->assembled) SETERR(1,"MatGetDiag_MPIAIJ: must assemble matrix first");
521   return MatGetDiagonal(A->A,v);
522 }
523 
524 static int MatDestroy_MPIAIJ(PetscObject obj)
525 {
526   Mat        mat = (Mat) obj;
527   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
528   int        ierr;
529 #if defined(PETSC_LOG)
530   PLogObjectState(obj,"Rows %d Cols %d",aij->M,aij->N);
531 #endif
532   FREE(aij->rowners);
533   ierr = MatDestroy(aij->A); CHKERR(ierr);
534   ierr = MatDestroy(aij->B); CHKERR(ierr);
535   if (aij->colmap) FREE(aij->colmap);
536   if (aij->garray) FREE(aij->garray);
537   if (aij->lvec) VecDestroy(aij->lvec);
538   if (aij->Mvctx) VecScatterCtxDestroy(aij->Mvctx);
539   FREE(aij);
540   PLogObjectDestroy(mat);
541   PETSCHEADERDESTROY(mat);
542   return 0;
543 }
544 
545 static int MatView_MPIAIJ(PetscObject obj,Viewer viewer)
546 {
547   Mat        mat = (Mat) obj;
548   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
549   int        ierr;
550   PetscObject vobj = (PetscObject) viewer;
551 
552   if (!aij->assembled) SETERR(1,"MatView_MPIAIJ: must assemble matrix first");
553   if (vobj->cookie == VIEWER_COOKIE) {
554     FILE *fd = ViewerFileGetPointer(viewer);
555     if (vobj->type == FILE_VIEWER) {
556       MPE_Seq_begin(mat->comm,1);
557       fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
558              aij->mytid,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart,
559              aij->cend);
560       ierr = MatView(aij->A,viewer); CHKERR(ierr);
561       ierr = MatView(aij->B,viewer); CHKERR(ierr);
562       fflush(fd);
563       MPE_Seq_end(mat->comm,1);
564     }
565   }
566   return 0;
567 }
568 
569 extern int MatMarkDiag_AIJ(Mat_AIJ  *);
570 /*
571     This has to provide several versions.
572 
573      1) per sequential
574      2) a) use only local smoothing updating outer values only once.
575         b) local smoothing updating outer values each inner iteration
576      3) color updating out values betwen colors.
577 */
578 static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,int flag,double shift,
579                         int its,Vec xx)
580 {
581   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
582   Mat        AA = mat->A, BB = mat->B;
583   Mat_AIJ    *A = (Mat_AIJ *) AA->data, *B = (Mat_AIJ *)BB->data;
584   Scalar     zero = 0.0,*b,*x,*xs,*ls,d,*v,sum,scale,*t,*ts;
585   int        ierr,*idx, *diag;
586   int        n = mat->n, m = mat->m, i;
587   Vec        tt;
588 
589   if (!mat->assembled) SETERR(1,"MatRelax_MPIAIJ: must assemble matrix first");
590 
591   VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls);
592   xs = x -1; /* shift by one for index start of 1 */
593   ls--;
594   if (!A->diag) {if ((ierr = MatMarkDiag_AIJ(A))) return ierr;}
595   diag = A->diag;
596   if (flag == SOR_APPLY_UPPER || flag == SOR_APPLY_LOWER) {
597     SETERR(1,"That option not yet support for parallel AIJ matrices");
598   }
599   if (flag & SOR_EISENSTAT) {
600     /* Let  A = L + U + D; where L is lower trianglar,
601     U is upper triangular, E is diagonal; This routine applies
602 
603             (L + E)^{-1} A (U + E)^{-1}
604 
605     to a vector efficiently using Eisenstat's trick. This is for
606     the case of SSOR preconditioner, so E is D/omega where omega
607     is the relaxation factor.
608     */
609     ierr = VecCreate(xx,&tt); CHKERR(ierr);
610     VecGetArray(tt,&t);
611     scale = (2.0/omega) - 1.0;
612     /*  x = (E + U)^{-1} b */
613     VecSet(&zero,mat->lvec);
614     ierr = VecPipelineBegin(xx,0,mat->lvec,0,InsertValues,PipelineUp,
615                               mat->Mvctx); CHKERR(ierr);
616     for ( i=m-1; i>-1; i-- ) {
617       n    = A->i[i+1] - diag[i] - 1;
618       idx  = A->j + diag[i];
619       v    = A->a + diag[i];
620       sum  = b[i];
621       SPARSEDENSEMDOT(sum,xs,v,idx,n);
622       d    = shift + A->a[diag[i]-1];
623       n    = B->i[i+1] - B->i[i];
624       idx  = B->j + B->i[i] - 1;
625       v    = B->a + B->i[i] - 1;
626       SPARSEDENSEMDOT(sum,ls,v,idx,n);
627       x[i] = omega*(sum/d);
628     }
629     ierr = VecPipelineEnd(xx,0,mat->lvec,0,InsertValues,PipelineUp,
630                             mat->Mvctx); CHKERR(ierr);
631 
632     /*  t = b - (2*E - D)x */
633     v = A->a;
634     for ( i=0; i<m; i++ ) { t[i] = b[i] - scale*(v[*diag++ - 1])*x[i]; }
635 
636     /*  t = (E + L)^{-1}t */
637     ts = t - 1; /* shifted by one for index start of a or mat->j*/
638     diag = A->diag;
639     VecSet(&zero,mat->lvec);
640     ierr = VecPipelineBegin(tt,0,mat->lvec,0,InsertValues,PipelineDown,
641                                                  mat->Mvctx); CHKERR(ierr);
642     for ( i=0; i<m; i++ ) {
643       n    = diag[i] - A->i[i];
644       idx  = A->j + A->i[i] - 1;
645       v    = A->a + A->i[i] - 1;
646       sum  = t[i];
647       SPARSEDENSEMDOT(sum,ts,v,idx,n);
648       d    = shift + A->a[diag[i]-1];
649       n    = B->i[i+1] - B->i[i];
650       idx  = B->j + B->i[i] - 1;
651       v    = B->a + B->i[i] - 1;
652       SPARSEDENSEMDOT(sum,ls,v,idx,n);
653       t[i] = omega*(sum/d);
654     }
655     ierr = VecPipelineEnd(tt,0,mat->lvec,0,InsertValues,PipelineDown,
656                                                     mat->Mvctx); CHKERR(ierr);
657     /*  x = x + t */
658     for ( i=0; i<m; i++ ) { x[i] += t[i]; }
659     VecDestroy(tt);
660     return 0;
661   }
662 
663 
664   if ((flag & SOR_SYMMETRIC_SWEEP) == SOR_SYMMETRIC_SWEEP){
665     if (flag & SOR_ZERO_INITIAL_GUESS) {
666       VecSet(&zero,mat->lvec); VecSet(&zero,xx);
667     }
668     else {
669       ierr=VecScatterBegin(xx,0,mat->lvec,0,InsertValues,ScatterUp,mat->Mvctx);
670       CHKERR(ierr);
671       ierr = VecScatterEnd(xx,0,mat->lvec,0,InsertValues,ScatterUp,mat->Mvctx);
672       CHKERR(ierr);
673     }
674     while (its--) {
675       /* go down through the rows */
676       ierr = VecPipelineBegin(xx,0,mat->lvec,0,InsertValues,PipelineDown,
677                               mat->Mvctx); CHKERR(ierr);
678       for ( i=0; i<m; i++ ) {
679         n    = A->i[i+1] - A->i[i];
680         idx  = A->j + A->i[i] - 1;
681         v    = A->a + A->i[i] - 1;
682         sum  = b[i];
683         SPARSEDENSEMDOT(sum,xs,v,idx,n);
684         d    = shift + A->a[diag[i]-1];
685         n    = B->i[i+1] - B->i[i];
686         idx  = B->j + B->i[i] - 1;
687         v    = B->a + B->i[i] - 1;
688         SPARSEDENSEMDOT(sum,ls,v,idx,n);
689         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
690       }
691       ierr = VecPipelineEnd(xx,0,mat->lvec,0,InsertValues,PipelineDown,
692                             mat->Mvctx); CHKERR(ierr);
693       /* come up through the rows */
694       ierr = VecPipelineBegin(xx,0,mat->lvec,0,InsertValues,PipelineUp,
695                               mat->Mvctx); CHKERR(ierr);
696       for ( i=m-1; i>-1; i-- ) {
697         n    = A->i[i+1] - A->i[i];
698         idx  = A->j + A->i[i] - 1;
699         v    = A->a + A->i[i] - 1;
700         sum  = b[i];
701         SPARSEDENSEMDOT(sum,xs,v,idx,n);
702         d    = shift + A->a[diag[i]-1];
703         n    = B->i[i+1] - B->i[i];
704         idx  = B->j + B->i[i] - 1;
705         v    = B->a + B->i[i] - 1;
706         SPARSEDENSEMDOT(sum,ls,v,idx,n);
707         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
708       }
709       ierr = VecPipelineEnd(xx,0,mat->lvec,0,InsertValues,PipelineUp,
710                             mat->Mvctx); CHKERR(ierr);
711     }
712   }
713   else if (flag & SOR_FORWARD_SWEEP){
714     if (flag & SOR_ZERO_INITIAL_GUESS) {
715       VecSet(&zero,mat->lvec);
716       ierr = VecPipelineBegin(xx,0,mat->lvec,0,InsertValues,PipelineDown,
717                               mat->Mvctx); CHKERR(ierr);
718       for ( i=0; i<m; i++ ) {
719         n    = diag[i] - A->i[i];
720         idx  = A->j + A->i[i] - 1;
721         v    = A->a + A->i[i] - 1;
722         sum  = b[i];
723         SPARSEDENSEMDOT(sum,xs,v,idx,n);
724         d    = shift + A->a[diag[i]-1];
725         n    = B->i[i+1] - B->i[i];
726         idx  = B->j + B->i[i] - 1;
727         v    = B->a + B->i[i] - 1;
728         SPARSEDENSEMDOT(sum,ls,v,idx,n);
729         x[i] = omega*(sum/d);
730       }
731       ierr = VecPipelineEnd(xx,0,mat->lvec,0,InsertValues,PipelineDown,
732                             mat->Mvctx); CHKERR(ierr);
733       its--;
734     }
735     while (its--) {
736       ierr=VecScatterBegin(xx,0,mat->lvec,0,InsertValues,ScatterUp,mat->Mvctx);
737       CHKERR(ierr);
738       ierr = VecScatterEnd(xx,0,mat->lvec,0,InsertValues,ScatterUp,mat->Mvctx);
739       CHKERR(ierr);
740       ierr = VecPipelineBegin(xx,0,mat->lvec,0,InsertValues,PipelineDown,
741                               mat->Mvctx); CHKERR(ierr);
742       for ( i=0; i<m; i++ ) {
743         n    = A->i[i+1] - A->i[i];
744         idx  = A->j + A->i[i] - 1;
745         v    = A->a + A->i[i] - 1;
746         sum  = b[i];
747         SPARSEDENSEMDOT(sum,xs,v,idx,n);
748         d    = shift + A->a[diag[i]-1];
749         n    = B->i[i+1] - B->i[i];
750         idx  = B->j + B->i[i] - 1;
751         v    = B->a + B->i[i] - 1;
752         SPARSEDENSEMDOT(sum,ls,v,idx,n);
753         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
754       }
755       ierr = VecPipelineEnd(xx,0,mat->lvec,0,InsertValues,PipelineDown,
756                             mat->Mvctx); CHKERR(ierr);
757     }
758   }
759   else if (flag & SOR_BACKWARD_SWEEP){
760     if (flag & SOR_ZERO_INITIAL_GUESS) {
761       VecSet(&zero,mat->lvec);
762       ierr = VecPipelineBegin(xx,0,mat->lvec,0,InsertValues,PipelineUp,
763                               mat->Mvctx); CHKERR(ierr);
764       for ( i=m-1; i>-1; i-- ) {
765         n    = A->i[i+1] - diag[i] - 1;
766         idx  = A->j + diag[i];
767         v    = A->a + diag[i];
768         sum  = b[i];
769         SPARSEDENSEMDOT(sum,xs,v,idx,n);
770         d    = shift + A->a[diag[i]-1];
771         n    = B->i[i+1] - B->i[i];
772         idx  = B->j + B->i[i] - 1;
773         v    = B->a + B->i[i] - 1;
774         SPARSEDENSEMDOT(sum,ls,v,idx,n);
775         x[i] = omega*(sum/d);
776       }
777       ierr = VecPipelineEnd(xx,0,mat->lvec,0,InsertValues,PipelineUp,
778                             mat->Mvctx); CHKERR(ierr);
779       its--;
780     }
781     while (its--) {
782       ierr = VecScatterBegin(xx,0,mat->lvec,0,InsertValues,ScatterDown,
783                             mat->Mvctx); CHKERR(ierr);
784       ierr = VecScatterEnd(xx,0,mat->lvec,0,InsertValues,ScatterDown,
785                             mat->Mvctx); CHKERR(ierr);
786       ierr = VecPipelineBegin(xx,0,mat->lvec,0,InsertValues,PipelineUp,
787                               mat->Mvctx); CHKERR(ierr);
788       for ( i=m-1; i>-1; i-- ) {
789         n    = A->i[i+1] - A->i[i];
790         idx  = A->j + A->i[i] - 1;
791         v    = A->a + A->i[i] - 1;
792         sum  = b[i];
793         SPARSEDENSEMDOT(sum,xs,v,idx,n);
794         d    = shift + A->a[diag[i]-1];
795         n    = B->i[i+1] - B->i[i];
796         idx  = B->j + B->i[i] - 1;
797         v    = B->a + B->i[i] - 1;
798         SPARSEDENSEMDOT(sum,ls,v,idx,n);
799         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
800       }
801       ierr = VecPipelineEnd(xx,0,mat->lvec,0,InsertValues,PipelineUp,
802                             mat->Mvctx); CHKERR(ierr);
803     }
804   }
805   else if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
806     if (flag & SOR_ZERO_INITIAL_GUESS) {
807       return MatRelax(mat->A,bb,omega,flag,shift,its,xx);
808     }
809     ierr=VecScatterBegin(xx,0,mat->lvec,0,InsertValues,ScatterAll,mat->Mvctx);
810     CHKERR(ierr);
811     ierr = VecScatterEnd(xx,0,mat->lvec,0,InsertValues,ScatterAll,mat->Mvctx);
812     CHKERR(ierr);
813     while (its--) {
814       /* go down through the rows */
815       for ( i=0; i<m; i++ ) {
816         n    = A->i[i+1] - A->i[i];
817         idx  = A->j + A->i[i] - 1;
818         v    = A->a + A->i[i] - 1;
819         sum  = b[i];
820         SPARSEDENSEMDOT(sum,xs,v,idx,n);
821         d    = shift + A->a[diag[i]-1];
822         n    = B->i[i+1] - B->i[i];
823         idx  = B->j + B->i[i] - 1;
824         v    = B->a + B->i[i] - 1;
825         SPARSEDENSEMDOT(sum,ls,v,idx,n);
826         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
827       }
828       /* come up through the rows */
829       for ( i=m-1; i>-1; i-- ) {
830         n    = A->i[i+1] - A->i[i];
831         idx  = A->j + A->i[i] - 1;
832         v    = A->a + A->i[i] - 1;
833         sum  = b[i];
834         SPARSEDENSEMDOT(sum,xs,v,idx,n);
835         d    = shift + A->a[diag[i]-1];
836         n    = B->i[i+1] - B->i[i];
837         idx  = B->j + B->i[i] - 1;
838         v    = B->a + B->i[i] - 1;
839         SPARSEDENSEMDOT(sum,ls,v,idx,n);
840         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
841       }
842     }
843   }
844   else if (flag & SOR_LOCAL_FORWARD_SWEEP){
845     if (flag & SOR_ZERO_INITIAL_GUESS) {
846       return MatRelax(mat->A,bb,omega,flag,shift,its,xx);
847     }
848     ierr=VecScatterBegin(xx,0,mat->lvec,0,InsertValues,ScatterAll,mat->Mvctx);
849     CHKERR(ierr);
850     ierr = VecScatterEnd(xx,0,mat->lvec,0,InsertValues,ScatterAll,mat->Mvctx);
851     CHKERR(ierr);
852     while (its--) {
853       for ( i=0; i<m; i++ ) {
854         n    = A->i[i+1] - A->i[i];
855         idx  = A->j + A->i[i] - 1;
856         v    = A->a + A->i[i] - 1;
857         sum  = b[i];
858         SPARSEDENSEMDOT(sum,xs,v,idx,n);
859         d    = shift + A->a[diag[i]-1];
860         n    = B->i[i+1] - B->i[i];
861         idx  = B->j + B->i[i] - 1;
862         v    = B->a + B->i[i] - 1;
863         SPARSEDENSEMDOT(sum,ls,v,idx,n);
864         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
865       }
866     }
867   }
868   else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
869     if (flag & SOR_ZERO_INITIAL_GUESS) {
870       return MatRelax(mat->A,bb,omega,flag,shift,its,xx);
871     }
872     ierr = VecScatterBegin(xx,0,mat->lvec,0,InsertValues,ScatterAll,
873                             mat->Mvctx); CHKERR(ierr);
874     ierr = VecScatterEnd(xx,0,mat->lvec,0,InsertValues,ScatterAll,
875                             mat->Mvctx); CHKERR(ierr);
876     while (its--) {
877       for ( i=m-1; i>-1; i-- ) {
878         n    = A->i[i+1] - A->i[i];
879         idx  = A->j + A->i[i] - 1;
880         v    = A->a + A->i[i] - 1;
881         sum  = b[i];
882         SPARSEDENSEMDOT(sum,xs,v,idx,n);
883         d    = shift + A->a[diag[i]-1];
884         n    = B->i[i+1] - B->i[i];
885         idx  = B->j + B->i[i] - 1;
886         v    = B->a + B->i[i] - 1;
887         SPARSEDENSEMDOT(sum,ls,v,idx,n);
888         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
889       }
890     }
891   }
892   return 0;
893 }
894 static int MatInsOpt_MPIAIJ(Mat aijin,int op)
895 {
896   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data;
897 
898   if      (op == NO_NEW_NONZERO_LOCATIONS)  {
899     MatSetOption(aij->A,op);
900     MatSetOption(aij->B,op);
901   }
902   else if (op == YES_NEW_NONZERO_LOCATIONS) {
903     MatSetOption(aij->A,op);
904     MatSetOption(aij->B,op);
905   }
906   else if (op == COLUMN_ORIENTED) SETERR(1,"Column oriented not supported");
907   return 0;
908 }
909 
910 static int MatSize_MPIAIJ(Mat matin,int *m,int *n)
911 {
912   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
913   *m = mat->M; *n = mat->N;
914   return 0;
915 }
916 
917 static int MatLocalSize_MPIAIJ(Mat matin,int *m,int *n)
918 {
919   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
920   *m = mat->m; *n = mat->n;
921   return 0;
922 }
923 
924 static int MatRange_MPIAIJ(Mat matin,int *m,int *n)
925 {
926   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
927   *m = mat->rstart; *n = mat->rend;
928   return 0;
929 }
930 
931 static int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
932 {
933   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) matin->data;
934   Mat        A = aij->A, B = aij->B;
935   Mat_AIJ    *Ad = (Mat_AIJ *)(A->data), *Bd = (Mat_AIJ *)(B->data);
936   Scalar     *vworkA, *vworkB;
937   int        ierr, *cworkA, *cworkB;
938   int        nztot, nzA, nzB, i, rstart = aij->rstart, rend = aij->rend;
939   int        cstart = aij->cstart;
940 
941   if (!aij->assembled)
942     SETERR(1,"MatGetRow_MPIAIJ: Must assemble matrix first.");
943   if (row < rstart || row >= rend)
944     SETERR(1,"MatGetRow_MPIAIJ: Currently you can get only the local rows.")
945 
946   ierr = MatGetRow(A,row,&nzA,&cworkA,&vworkA);	CHKERR(ierr);
947   for (i=0; i<nzA; i++) cworkA[i] += cstart;
948   ierr = MatGetRow(B,row,&nzB,&cworkB,&vworkB);	CHKERR(ierr);
949   for (i=0; i<nzB; i++) {
950       printf("i=%d, invcolmap[i]=%d, cwork=%d, vwork=%g\n",
951                i, aij->invcolmap[i], cworkB[i], vworkB[i] );
952 /*    cworkB[i] = aij->invcolmap[cworkB[i]]; */
953    }
954 
955   if (nztot = nzA + nzB) {
956     *idx = (int *) MALLOC( (nztot)*sizeof(int) ); CHKPTR(*idx);
957     *v   = (Scalar *) MALLOC( (nztot)*sizeof(Scalar) ); CHKPTR(*v);
958     for ( i=0; i<nzA; i++ ) {
959       (*idx)[i] = cworkA[i];
960       (*v)[i] = vworkA[i];
961     }
962     for ( i=0; i<nzB; i++ ) {
963       (*idx)[i+nzA] = cworkB[i];
964       (*v)[i] = vworkB[i];
965     }
966   }
967   else {*idx = 0; *v=0;}
968   *nz = nztot;
969   ierr = MatRestoreRow(A,row,&nzA,&cworkA,&vworkA);	CHKERR(ierr);
970   ierr = MatRestoreRow(B,row,&nzB,&cworkB,&vworkB);	CHKERR(ierr);
971   return 0;
972 }
973 
974 static int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
975 {
976   if (*idx) FREE(*idx);
977   if (*v) FREE(*v);
978   return 0;
979 }
980 
981 static int MatCopy_MPIAIJ(Mat,Mat *);
982 extern int MatConvert_MPIAIJ(Mat,MATTYPE,Mat *);
983 
984 /* -------------------------------------------------------------------*/
985 static struct _MatOps MatOps = {MatInsertValues_MPIAIJ,
986        MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ,
987        MatMult_MPIAIJ,MatMultAdd_MPIAIJ,
988        MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ,
989        0,0,0,0,
990        0,0,
991        MatRelax_MPIAIJ,
992        0,
993        0,0,0,
994        MatCopy_MPIAIJ,
995        MatGetDiag_MPIAIJ,0,0,
996        MatBeginAssemble_MPIAIJ,MatEndAssemble_MPIAIJ,
997        0,
998        MatInsOpt_MPIAIJ,MatZero_MPIAIJ,MatZeroRows_MPIAIJ,0,
999        0,0,0,0,
1000        MatSize_MPIAIJ,MatLocalSize_MPIAIJ,MatRange_MPIAIJ,
1001        0,0,
1002        0,MatConvert_MPIAIJ };
1003 
1004 /*@
1005 
1006       MatCreateMPIAIJ - Creates a sparse parallel matrix
1007                                  in AIJ format.
1008 
1009   Input Parameters:
1010 .   comm - MPI communicator
1011 .   m,n - number of local rows and columns (or -1 to have calculated)
1012 .   M,N - global rows and columns (or -1 to have calculated)
1013 .   d_nz - total number nonzeros in diagonal portion of matrix
1014 .   d_nzz - number of nonzeros per row in diagonal portion of matrix or null
1015 .           You must leave room for the diagonal entry even if it is zero.
1016 .   o_nz - total number nonzeros in off-diagonal portion of matrix
1017 .   o_nzz - number of nonzeros per row in off-diagonal portion of matrix
1018 .           or null. You must have at least one nonzero per row.
1019 
1020   Output parameters:
1021 .  newmat - the matrix
1022 
1023   Keywords: matrix, aij, compressed row, sparse, parallel
1024 @*/
1025 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,
1026                  int d_nz,int *d_nnz, int o_nz,int *o_nnz,Mat *newmat)
1027 {
1028   Mat          mat;
1029   Mat_MPIAIJ   *aij;
1030   int          ierr, i,sum[2],work[2];
1031   *newmat         = 0;
1032   PETSCHEADERCREATE(mat,_Mat,MAT_COOKIE,MATMPIAIJ,comm);
1033   PLogObjectCreate(mat);
1034   mat->data       = (void *) (aij = NEW(Mat_MPIAIJ)); CHKPTR(aij);
1035   mat->ops        = &MatOps;
1036   mat->destroy    = MatDestroy_MPIAIJ;
1037   mat->view       = MatView_MPIAIJ;
1038   mat->factor     = 0;
1039   mat->row        = 0;
1040   mat->col        = 0;
1041 
1042   mat->comm       = comm;
1043   aij->insertmode = NotSetValues;
1044   MPI_Comm_rank(comm,&aij->mytid);
1045   MPI_Comm_size(comm,&aij->numtids);
1046 
1047   if (M == -1 || N == -1) {
1048     work[0] = m; work[1] = n;
1049     MPI_Allreduce((void *) work,(void *) sum,2,MPI_INT,MPI_SUM,comm );
1050     if (M == -1) M = sum[0];
1051     if (N == -1) N = sum[1];
1052   }
1053   if (m == -1) {m = M/aij->numtids + ((M % aij->numtids) > aij->mytid);}
1054   if (n == -1) {n = N/aij->numtids + ((N % aij->numtids) > aij->mytid);}
1055   aij->m       = m;
1056   aij->n       = n;
1057   aij->N       = N;
1058   aij->M       = M;
1059 
1060   /* build local table of row and column ownerships */
1061   aij->rowners = (int *) MALLOC(2*(aij->numtids+2)*sizeof(int));
1062   CHKPTR(aij->rowners);
1063   aij->cowners = aij->rowners + aij->numtids + 1;
1064   MPI_Allgather(&m,1,MPI_INT,aij->rowners+1,1,MPI_INT,comm);
1065   aij->rowners[0] = 0;
1066   for ( i=2; i<=aij->numtids; i++ ) {
1067     aij->rowners[i] += aij->rowners[i-1];
1068   }
1069   aij->rstart = aij->rowners[aij->mytid];
1070   aij->rend   = aij->rowners[aij->mytid+1];
1071   MPI_Allgather(&n,1,MPI_INT,aij->cowners+1,1,MPI_INT,comm);
1072   aij->cowners[0] = 0;
1073   for ( i=2; i<=aij->numtids; i++ ) {
1074     aij->cowners[i] += aij->cowners[i-1];
1075   }
1076   aij->cstart = aij->cowners[aij->mytid];
1077   aij->cend   = aij->cowners[aij->mytid+1];
1078 
1079 
1080   ierr = MatCreateSequentialAIJ(m,n,d_nz,d_nnz,&aij->A); CHKERR(ierr);
1081   PLogObjectParent(mat,aij->A);
1082   ierr = MatCreateSequentialAIJ(m,N,o_nz,o_nnz,&aij->B); CHKERR(ierr);
1083   PLogObjectParent(mat,aij->B);
1084 
1085   /* build cache for off array entries formed */
1086   aij->stash.nmax = CHUNCKSIZE; /* completely arbratray number */
1087   aij->stash.n    = 0;
1088   aij->stash.array = (Scalar *) MALLOC( aij->stash.nmax*(2*sizeof(int) +
1089                             sizeof(Scalar))); CHKPTR(aij->stash.array);
1090   aij->stash.idx = (int *) (aij->stash.array + aij->stash.nmax);
1091   aij->stash.idy = (int *) (aij->stash.idx + aij->stash.nmax);
1092   aij->colmap    = 0;
1093   aij->garray    = 0;
1094 
1095   /* stuff used for matrix vector multiply */
1096   aij->lvec      = 0;
1097   aij->Mvctx     = 0;
1098   aij->assembled = 0;
1099 
1100   *newmat = mat;
1101   return 0;
1102 }
1103 
1104 static int MatCopy_MPIAIJ(Mat matin,Mat *newmat)
1105 {
1106   Mat        mat;
1107   Mat_MPIAIJ *aij,*oldmat = (Mat_MPIAIJ *) matin->data;
1108   int        ierr;
1109   *newmat      = 0;
1110 
1111   if (!oldmat->assembled) SETERR(1,"Cannot copy unassembled matrix");
1112   PETSCHEADERCREATE(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm);
1113   PLogObjectCreate(mat);
1114   mat->data       = (void *) (aij = NEW(Mat_MPIAIJ)); CHKPTR(aij);
1115   mat->ops        = &MatOps;
1116   mat->destroy    = MatDestroy_MPIAIJ;
1117   mat->view       = MatView_MPIAIJ;
1118   mat->factor     = matin->factor;
1119   mat->row        = 0;
1120   mat->col        = 0;
1121 
1122   aij->m          = oldmat->m;
1123   aij->n          = oldmat->n;
1124   aij->M          = oldmat->M;
1125   aij->N          = oldmat->N;
1126 
1127   aij->assembled  = 1;
1128   aij->rstart     = oldmat->rstart;
1129   aij->rend       = oldmat->rend;
1130   aij->cstart     = oldmat->cstart;
1131   aij->cend       = oldmat->cend;
1132   aij->numtids    = oldmat->numtids;
1133   aij->mytid      = oldmat->mytid;
1134   aij->insertmode = NotSetValues;
1135 
1136   aij->rowners    = (int *) MALLOC( (aij->numtids+1)*sizeof(int) );
1137   CHKPTR(aij->rowners);
1138   MEMCPY(aij->rowners,oldmat->rowners,(aij->numtids+1)*sizeof(int));
1139   aij->stash.nmax = 0;
1140   aij->stash.n    = 0;
1141   aij->stash.array= 0;
1142   aij->colmap     = 0;
1143   aij->garray     = 0;
1144   mat->comm       = matin->comm;
1145 
1146   ierr =  VecCreate(oldmat->lvec,&aij->lvec); CHKERR(ierr);
1147   PLogObjectParent(mat,aij->lvec);
1148   ierr =  VecScatterCtxCopy(oldmat->Mvctx,&aij->Mvctx); CHKERR(ierr);
1149   PLogObjectParent(mat,aij->Mvctx);
1150   ierr =  MatCopy(oldmat->A,&aij->A); CHKERR(ierr);
1151   PLogObjectParent(mat,aij->A);
1152   ierr =  MatCopy(oldmat->B,&aij->B); CHKERR(ierr);
1153   PLogObjectParent(mat,aij->B);
1154   *newmat = mat;
1155   return 0;
1156 }
1157