xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 49d8b64db95a85a38fa4f01657ccb6cfe21becf1)
1 #ifndef lint
2 static char vcid[] = "$Id: mpiaij.c,v 1.55 1995/07/06 17:47:13 curfman Exp curfman $";
3 #endif
4 
5 #include "mpiaij.h"
6 #include "vec/vecimpl.h"
7 #include "inline/spops.h"
8 
9 /* local utility routine that creates a mapping from the global column
10 number to the local number in the off-diagonal part of the local
11 storage of the matrix.  This is done in a non scable way since the
12 length of colmap equals the global matrix length.
13 */
14 static int CreateColmap_Private(Mat mat)
15 {
16   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
17   Mat_AIJ    *B = (Mat_AIJ*) aij->B->data;
18   int        n = B->n,i;
19   aij->colmap = (int *) PETSCMALLOC(aij->N*sizeof(int));CHKPTRQ(aij->colmap);
20   PETSCMEMSET(aij->colmap,0,aij->N*sizeof(int));
21   for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1;
22   return 0;
23 }
24 
25 extern int DisAssemble_MPIAIJ(Mat);
26 
27 static int MatSetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,
28                             int *idxn,Scalar *v,InsertMode addv)
29 {
30   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
31   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
32   int        cstart = aij->cstart, cend = aij->cend,row,col;
33 
34   if (aij->insertmode != NOTSETVALUES && aij->insertmode != addv) {
35     SETERRQ(1,"You cannot mix inserts and adds");
36   }
37   aij->insertmode = addv;
38   for ( i=0; i<m; i++ ) {
39     if (idxm[i] < 0) SETERRQ(1,"Negative row index");
40     if (idxm[i] >= aij->M) SETERRQ(1,"Row index too large");
41     if (idxm[i] >= rstart && idxm[i] < rend) {
42       row = idxm[i] - rstart;
43       for ( j=0; j<n; j++ ) {
44         if (idxn[j] < 0) SETERRQ(1,"Negative column index");
45         if (idxn[j] >= aij->N) SETERRQ(1,"Column index too large");
46         if (idxn[j] >= cstart && idxn[j] < cend){
47           col = idxn[j] - cstart;
48           ierr = MatSetValues(aij->A,1,&row,1,&col,v+i*n+j,addv);CHKERRQ(ierr);
49         }
50         else {
51           if (aij->assembled) {
52             if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);}
53             col = aij->colmap[idxn[j]] - 1;
54             if (col < 0 && !((Mat_AIJ*)(aij->A->data))->nonew) {
55               ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
56               col =  idxn[j];
57             }
58           }
59           else col = idxn[j];
60           ierr = MatSetValues(aij->B,1,&row,1,&col,v+i*n+j,addv);CHKERRQ(ierr);
61         }
62       }
63     }
64     else {
65       ierr = StashValues_Private(&aij->stash,idxm[i],n,idxn,v+i*n,addv);
66       CHKERRQ(ierr);
67     }
68   }
69   return 0;
70 }
71 
72 /*
73     the assembly code is a lot like the code for vectors, we should
74     sometime derive a single assembly code that can be used for
75     either case.
76 */
77 
78 static int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
79 {
80   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
81   MPI_Comm    comm = mat->comm;
82   int         numtids = aij->numtids, *owners = aij->rowners;
83   int         mytid = aij->mytid;
84   MPI_Request *send_waits,*recv_waits;
85   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
86   int         tag = mat->tag, *owner,*starts,count,ierr;
87   InsertMode  addv;
88   Scalar      *rvalues,*svalues;
89 
90   /* make sure all processors are either in INSERTMODE or ADDMODE */
91   MPI_Allreduce((void *) &aij->insertmode,(void *) &addv,1,MPI_INT,
92                 MPI_BOR,comm);
93   if (addv == (ADDVALUES|INSERTVALUES)) {
94     SETERRQ(1,"Some processors have inserted while others have added");
95   }
96   aij->insertmode = addv; /* in case this processor had no cache */
97 
98   /*  first count number of contributors to each processor */
99   nprocs = (int *) PETSCMALLOC( 2*numtids*sizeof(int) ); CHKPTRQ(nprocs);
100   PETSCMEMSET(nprocs,0,2*numtids*sizeof(int)); procs = nprocs + numtids;
101   owner = (int *) PETSCMALLOC( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
102   for ( i=0; i<aij->stash.n; i++ ) {
103     idx = aij->stash.idx[i];
104     for ( j=0; j<numtids; j++ ) {
105       if (idx >= owners[j] && idx < owners[j+1]) {
106         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
107       }
108     }
109   }
110   nsends = 0;  for ( i=0; i<numtids; i++ ) { nsends += procs[i];}
111 
112   /* inform other processors of number of messages and max length*/
113   work = (int *) PETSCMALLOC( numtids*sizeof(int) ); CHKPTRQ(work);
114   MPI_Allreduce((void *) procs,(void *) work,numtids,MPI_INT,MPI_SUM,comm);
115   nreceives = work[mytid];
116   MPI_Allreduce((void *) nprocs,(void *) work,numtids,MPI_INT,MPI_MAX,comm);
117   nmax = work[mytid];
118   PETSCFREE(work);
119 
120   /* post receives:
121        1) each message will consist of ordered pairs
122      (global index,value) we store the global index as a double
123      to simplify the message passing.
124        2) since we don't know how long each individual message is we
125      allocate the largest needed buffer for each receive. Potentially
126      this is a lot of wasted space.
127 
128 
129        This could be done better.
130   */
131   rvalues = (Scalar *) PETSCMALLOC(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));
132   CHKPTRQ(rvalues);
133   recv_waits = (MPI_Request *) PETSCMALLOC((nreceives+1)*sizeof(MPI_Request));
134   CHKPTRQ(recv_waits);
135   for ( i=0; i<nreceives; i++ ) {
136     MPI_Irecv((void *)(rvalues+3*nmax*i),3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
137               comm,recv_waits+i);
138   }
139 
140   /* do sends:
141       1) starts[i] gives the starting index in svalues for stuff going to
142          the ith processor
143   */
144   svalues = (Scalar *) PETSCMALLOC( 3*(aij->stash.n+1)*sizeof(Scalar) );
145   CHKPTRQ(svalues);
146   send_waits = (MPI_Request *) PETSCMALLOC( (nsends+1)*sizeof(MPI_Request));
147   CHKPTRQ(send_waits);
148   starts = (int *) PETSCMALLOC( numtids*sizeof(int) ); CHKPTRQ(starts);
149   starts[0] = 0;
150   for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
151   for ( i=0; i<aij->stash.n; i++ ) {
152     svalues[3*starts[owner[i]]]       = (Scalar)  aij->stash.idx[i];
153     svalues[3*starts[owner[i]]+1]     = (Scalar)  aij->stash.idy[i];
154     svalues[3*(starts[owner[i]]++)+2] =  aij->stash.array[i];
155   }
156   PETSCFREE(owner);
157   starts[0] = 0;
158   for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
159   count = 0;
160   for ( i=0; i<numtids; i++ ) {
161     if (procs[i]) {
162       MPI_Isend((void*)(svalues+3*starts[i]),3*nprocs[i],MPIU_SCALAR,i,tag,
163                 comm,send_waits+count++);
164     }
165   }
166   PETSCFREE(starts); PETSCFREE(nprocs);
167 
168   /* Free cache space */
169   ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr);
170 
171   aij->svalues    = svalues;    aij->rvalues = rvalues;
172   aij->nsends     = nsends;     aij->nrecvs = nreceives;
173   aij->send_waits = send_waits; aij->recv_waits = recv_waits;
174   aij->rmax       = nmax;
175 
176   return 0;
177 }
178 extern int MatSetUpMultiply_MPIAIJ(Mat);
179 
180 static int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
181 {
182   int        ierr;
183   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
184   MPI_Status  *send_status,recv_status;
185   int         imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n;
186   int         row,col,other_disassembled;
187   Scalar      *values,val;
188   InsertMode  addv = aij->insertmode;
189 
190   /*  wait on receives */
191   while (count) {
192     MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status);
193     /* unpack receives into our local space */
194     values = aij->rvalues + 3*imdex*aij->rmax;
195     MPI_Get_count(&recv_status,MPIU_SCALAR,&n);
196     n = n/3;
197     for ( i=0; i<n; i++ ) {
198       row = (int) PETSCREAL(values[3*i]) - aij->rstart;
199       col = (int) PETSCREAL(values[3*i+1]);
200       val = values[3*i+2];
201       if (col >= aij->cstart && col < aij->cend) {
202           col -= aij->cstart;
203         MatSetValues(aij->A,1,&row,1,&col,&val,addv);
204       }
205       else {
206         if (aij->assembled) {
207           if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);}
208           col = aij->colmap[col] - 1;
209           if (col < 0  && !((Mat_AIJ*)(aij->A->data))->nonew) {
210             ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
211             col = (int) PETSCREAL(values[3*i+1]);
212           }
213         }
214         MatSetValues(aij->B,1,&row,1,&col,&val,addv);
215       }
216     }
217     count--;
218   }
219   PETSCFREE(aij->recv_waits); PETSCFREE(aij->rvalues);
220 
221   /* wait on sends */
222   if (aij->nsends) {
223     send_status = (MPI_Status *) PETSCMALLOC( aij->nsends*sizeof(MPI_Status) );
224     CHKPTRQ(send_status);
225     MPI_Waitall(aij->nsends,aij->send_waits,send_status);
226     PETSCFREE(send_status);
227   }
228   PETSCFREE(aij->send_waits); PETSCFREE(aij->svalues);
229 
230   aij->insertmode = NOTSETVALUES;
231   ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr);
232   ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr);
233 
234   /* determine if any processor has disassembled, if so we must
235      also disassemble ourselfs, in order that we may reassemble. */
236   MPI_Allreduce((void *) &aij->assembled,(void *) &other_disassembled,1,
237                  MPI_INT,MPI_PROD,mat->comm);
238   if (aij->assembled && !other_disassembled) {
239     ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
240   }
241 
242   if (!aij->assembled && mode == FINAL_ASSEMBLY) {
243     ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr);
244     aij->assembled = 1;
245   }
246   ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr);
247   ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr);
248 
249   return 0;
250 }
251 
252 static int MatZeroEntries_MPIAIJ(Mat A)
253 {
254   Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data;
255   int ierr;
256   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
257   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
258   return 0;
259 }
260 
261 /* again this uses the same basic stratagy as in the assembly and
262    scatter create routines, we should try to do it systemamatically
263    if we can figure out the proper level of generality. */
264 
265 /* the code does not do the diagonal entries correctly unless the
266    matrix is square and the column and row owerships are identical.
267    This is a BUG. The only way to fix it seems to be to access
268    aij->A and aij->B directly and not through the MatZeroRows()
269    routine.
270 */
271 static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag)
272 {
273   Mat_MPIAIJ     *l = (Mat_MPIAIJ *) A->data;
274   int            i,ierr,N, *rows,*owners = l->rowners,numtids = l->numtids;
275   int            *procs,*nprocs,j,found,idx,nsends,*work;
276   int            nmax,*svalues,*starts,*owner,nrecvs,mytid = l->mytid;
277   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
278   int            *lens,imdex,*lrows,*values;
279   MPI_Comm       comm = A->comm;
280   MPI_Request    *send_waits,*recv_waits;
281   MPI_Status     recv_status,*send_status;
282   IS             istmp;
283 
284   if (!l->assembled)
285     SETERRQ(1,"MatZeroRows_MPIAIJ: Must assemble matrix first");
286   ierr = ISGetLocalSize(is,&N); CHKERRQ(ierr);
287   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
288 
289   /*  first count number of contributors to each processor */
290   nprocs = (int *) PETSCMALLOC( 2*numtids*sizeof(int) ); CHKPTRQ(nprocs);
291   PETSCMEMSET(nprocs,0,2*numtids*sizeof(int)); procs = nprocs + numtids;
292   owner = (int *) PETSCMALLOC((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
293   for ( i=0; i<N; i++ ) {
294     idx = rows[i];
295     found = 0;
296     for ( j=0; j<numtids; j++ ) {
297       if (idx >= owners[j] && idx < owners[j+1]) {
298         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
299       }
300     }
301     if (!found) SETERRQ(1,"Index out of range.");
302   }
303   nsends = 0;  for ( i=0; i<numtids; i++ ) { nsends += procs[i];}
304 
305   /* inform other processors of number of messages and max length*/
306   work = (int *) PETSCMALLOC( numtids*sizeof(int) ); CHKPTRQ(work);
307   MPI_Allreduce((void *) procs,(void *) work,numtids,MPI_INT,MPI_SUM,comm);
308   nrecvs = work[mytid];
309   MPI_Allreduce((void *) nprocs,(void *) work,numtids,MPI_INT,MPI_MAX,comm);
310   nmax = work[mytid];
311   PETSCFREE(work);
312 
313   /* post receives:   */
314   rvalues = (int *) PETSCMALLOC((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */
315   CHKPTRQ(rvalues);
316   recv_waits = (MPI_Request *) PETSCMALLOC((nrecvs+1)*sizeof(MPI_Request));
317   CHKPTRQ(recv_waits);
318   for ( i=0; i<nrecvs; i++ ) {
319     MPI_Irecv((void *)(rvalues+nmax*i),nmax,MPI_INT,MPI_ANY_SOURCE,tag,
320               comm,recv_waits+i);
321   }
322 
323   /* do sends:
324       1) starts[i] gives the starting index in svalues for stuff going to
325          the ith processor
326   */
327   svalues = (int *) PETSCMALLOC( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
328   send_waits = (MPI_Request *) PETSCMALLOC( (nsends+1)*sizeof(MPI_Request));
329   CHKPTRQ(send_waits);
330   starts = (int *) PETSCMALLOC( (numtids+1)*sizeof(int) ); CHKPTRQ(starts);
331   starts[0] = 0;
332   for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
333   for ( i=0; i<N; i++ ) {
334     svalues[starts[owner[i]]++] = rows[i];
335   }
336   ISRestoreIndices(is,&rows);
337 
338   starts[0] = 0;
339   for ( i=1; i<numtids+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
340   count = 0;
341   for ( i=0; i<numtids; i++ ) {
342     if (procs[i]) {
343       MPI_Isend((void*)(svalues+starts[i]),nprocs[i],MPI_INT,i,tag,
344                 comm,send_waits+count++);
345     }
346   }
347   PETSCFREE(starts);
348 
349   base = owners[mytid];
350 
351   /*  wait on receives */
352   lens = (int *) PETSCMALLOC( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
353   source = lens + nrecvs;
354   count = nrecvs; slen = 0;
355   while (count) {
356     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
357     /* unpack receives into our local space */
358     MPI_Get_count(&recv_status,MPI_INT,&n);
359     source[imdex]  = recv_status.MPI_SOURCE;
360     lens[imdex]  = n;
361     slen += n;
362     count--;
363   }
364   PETSCFREE(recv_waits);
365 
366   /* move the data into the send scatter */
367   lrows = (int *) PETSCMALLOC( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
368   count = 0;
369   for ( i=0; i<nrecvs; i++ ) {
370     values = rvalues + i*nmax;
371     for ( j=0; j<lens[i]; j++ ) {
372       lrows[count++] = values[j] - base;
373     }
374   }
375   PETSCFREE(rvalues); PETSCFREE(lens);
376   PETSCFREE(owner); PETSCFREE(nprocs);
377 
378   /* actually zap the local rows */
379   ierr = ISCreateSequential(MPI_COMM_SELF,slen,lrows,&istmp);
380   CHKERRQ(ierr);  PETSCFREE(lrows);
381   ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
382   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
383   ierr = ISDestroy(istmp); CHKERRQ(ierr);
384 
385   /* wait on sends */
386   if (nsends) {
387     send_status = (MPI_Status *) PETSCMALLOC( nsends*sizeof(MPI_Status) );
388     CHKPTRQ(send_status);
389     MPI_Waitall(nsends,send_waits,send_status);
390     PETSCFREE(send_status);
391   }
392   PETSCFREE(send_waits); PETSCFREE(svalues);
393 
394 
395   return 0;
396 }
397 
398 static int MatMult_MPIAIJ(Mat aijin,Vec xx,Vec yy)
399 {
400   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data;
401   int        ierr;
402   if (!aij->assembled) SETERRQ(1,"MatMult_MPIAIJ: must assemble matrix first");
403   ierr = VecScatterBegin(xx,aij->lvec,INSERTVALUES,SCATTERALL,aij->Mvctx);
404   CHKERRQ(ierr);
405   ierr = MatMult(aij->A,xx,yy); CHKERRQ(ierr);
406   ierr = VecScatterEnd(xx,aij->lvec,INSERTVALUES,SCATTERALL,aij->Mvctx);
407   CHKERRQ(ierr);
408   ierr = MatMultAdd(aij->B,aij->lvec,yy,yy); CHKERRQ(ierr);
409   return 0;
410 }
411 
412 static int MatMultAdd_MPIAIJ(Mat aijin,Vec xx,Vec yy,Vec zz)
413 {
414   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data;
415   int        ierr;
416   if (!aij->assembled) SETERRQ(1,"MatMult_MPIAIJ: must assemble matrix first");
417   ierr = VecScatterBegin(xx,aij->lvec,INSERTVALUES,SCATTERALL,aij->Mvctx);
418   CHKERRQ(ierr);
419   ierr = MatMultAdd(aij->A,xx,yy,zz); CHKERRQ(ierr);
420   ierr = VecScatterEnd(xx,aij->lvec,INSERTVALUES,SCATTERALL,aij->Mvctx);
421   CHKERRQ(ierr);
422   ierr = MatMultAdd(aij->B,aij->lvec,zz,zz); CHKERRQ(ierr);
423   return 0;
424 }
425 
426 static int MatMultTrans_MPIAIJ(Mat aijin,Vec xx,Vec yy)
427 {
428   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data;
429   int        ierr;
430 
431   if (!aij->assembled)
432     SETERRQ(1,"MatMulTrans_MPIAIJ: must assemble matrix first");
433   /* do nondiagonal part */
434   ierr = MatMultTrans(aij->B,xx,aij->lvec); CHKERRQ(ierr);
435   /* send it on its way */
436   ierr = VecScatterBegin(aij->lvec,yy,ADDVALUES,
437            (ScatterMode)(SCATTERALL|SCATTERREVERSE),aij->Mvctx); CHKERRQ(ierr);
438   /* do local part */
439   ierr = MatMultTrans(aij->A,xx,yy); CHKERRQ(ierr);
440   /* receive remote parts: note this assumes the values are not actually */
441   /* inserted in yy until the next line, which is true for my implementation*/
442   /* but is not perhaps always true. */
443   ierr = VecScatterEnd(aij->lvec,yy,ADDVALUES,
444          (ScatterMode)(SCATTERALL|SCATTERREVERSE),aij->Mvctx); CHKERRQ(ierr);
445   return 0;
446 }
447 
448 static int MatMultTransAdd_MPIAIJ(Mat aijin,Vec xx,Vec yy,Vec zz)
449 {
450   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data;
451   int        ierr;
452 
453   if (!aij->assembled)
454     SETERRQ(1,"MatMulTransAdd_MPIAIJ: must assemble matrix first");
455   /* do nondiagonal part */
456   ierr = MatMultTrans(aij->B,xx,aij->lvec); CHKERRQ(ierr);
457   /* send it on its way */
458   ierr = VecScatterBegin(aij->lvec,zz,ADDVALUES,
459          (ScatterMode)(SCATTERALL|SCATTERREVERSE),aij->Mvctx); CHKERRQ(ierr);
460   /* do local part */
461   ierr = MatMultTransAdd(aij->A,xx,yy,zz); CHKERRQ(ierr);
462   /* receive remote parts: note this assumes the values are not actually */
463   /* inserted in yy until the next line, which is true for my implementation*/
464   /* but is not perhaps always true. */
465   ierr = VecScatterEnd(aij->lvec,zz,ADDVALUES,
466        (ScatterMode)(SCATTERALL|SCATTERREVERSE),aij->Mvctx); CHKERRQ(ierr);
467   return 0;
468 }
469 
470 /*
471   This only works correctly for square matrices where the subblock A->A is the
472    diagonal block
473 */
474 static int MatGetDiagonal_MPIAIJ(Mat Ain,Vec v)
475 {
476   Mat_MPIAIJ *A = (Mat_MPIAIJ *) Ain->data;
477   if (!A->assembled) SETERRQ(1,"MatGetDiag_MPIAIJ: must assemble matrix first");
478   return MatGetDiagonal(A->A,v);
479 }
480 
481 static int MatDestroy_MPIAIJ(PetscObject obj)
482 {
483   Mat        mat = (Mat) obj;
484   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
485   int        ierr;
486 #if defined(PETSC_LOG)
487   PLogObjectState(obj,"Rows %d Cols %d",aij->M,aij->N);
488 #endif
489   PETSCFREE(aij->rowners);
490   ierr = MatDestroy(aij->A); CHKERRQ(ierr);
491   ierr = MatDestroy(aij->B); CHKERRQ(ierr);
492   if (aij->colmap) PETSCFREE(aij->colmap);
493   if (aij->garray) PETSCFREE(aij->garray);
494   if (aij->lvec) VecDestroy(aij->lvec);
495   if (aij->Mvctx) VecScatterCtxDestroy(aij->Mvctx);
496   PETSCFREE(aij);
497   PLogObjectDestroy(mat);
498   PETSCHEADERDESTROY(mat);
499   return 0;
500 }
501 #include "draw.h"
502 #include "pviewer.h"
503 
504 static int MatView_MPIAIJ(PetscObject obj,Viewer viewer)
505 {
506   Mat        mat = (Mat) obj;
507   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
508   int        ierr;
509   PetscObject vobj = (PetscObject) viewer;
510 
511   if (!aij->assembled) SETERRQ(1,"MatView_MPIAIJ: must assemble matrix first");
512   if (!viewer) { /* so that viewers may be used from debuggers */
513     viewer = STDOUT_VIEWER; vobj = (PetscObject) viewer;
514   }
515   if (vobj->cookie == DRAW_COOKIE && vobj->type == NULLWINDOW) return 0;
516   if (vobj->cookie == VIEWER_COOKIE && vobj->type == FILE_VIEWER) {
517     FILE *fd = ViewerFileGetPointer_Private(viewer);
518     MPIU_Seq_begin(mat->comm,1);
519     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
520              aij->mytid,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart,
521              aij->cend);
522     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
523     ierr = MatView(aij->B,viewer); CHKERRQ(ierr);
524     fflush(fd);
525     MPIU_Seq_end(mat->comm,1);
526   }
527   else if ((vobj->cookie == VIEWER_COOKIE && vobj->type == FILES_VIEWER) ||
528             vobj->cookie == DRAW_COOKIE) {
529     int numtids = aij->numtids, mytid = aij->mytid;
530     if (numtids == 1) {
531       ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
532     }
533     else {
534       /* assemble the entire matrix onto first processor. */
535       Mat     A;
536       Mat_AIJ *Aloc;
537       int     M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct;
538       Scalar  *a;
539 
540       if (!mytid) {
541         ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,0,0,0,&A);
542       }
543       else {
544         ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,0,0,0,&A);
545       }
546       CHKERRQ(ierr);
547 
548       /* copy over the A part */
549       Aloc = (Mat_AIJ*) aij->A->data;
550       m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
551       row = aij->rstart;
552       for ( i=0; i<ai[m]-1; i++ ) {aj[i] += aij->cstart - 1;}
553       for ( i=0; i<m; i++ ) {
554         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERTVALUES);
555         CHKERRQ(ierr);
556         row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
557       }
558       aj = Aloc->j;
559       for ( i=0; i<ai[m]-1; i++ ) {aj[i] -= aij->cstart - 1;}
560 
561       /* copy over the B part */
562       Aloc = (Mat_AIJ*) aij->B->data;
563       m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
564       row = aij->rstart;
565       ct = cols = (int *) PETSCMALLOC( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols);
566       for ( i=0; i<ai[m]-1; i++ ) {cols[i] = aij->garray[aj[i]-1];}
567       for ( i=0; i<m; i++ ) {
568         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERTVALUES);
569         CHKERRQ(ierr);
570         row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
571       }
572       PETSCFREE(ct);
573       ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
574       ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
575       if (!mytid) {
576         ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
577       }
578       ierr = MatDestroy(A); CHKERRQ(ierr);
579     }
580   }
581   return 0;
582 }
583 
584 extern int MatMarkDiag_AIJ(Mat_AIJ  *);
585 /*
586     This has to provide several versions.
587 
588      1) per sequential
589      2) a) use only local smoothing updating outer values only once.
590         b) local smoothing updating outer values each inner iteration
591      3) color updating out values betwen colors.
592 */
593 static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag,
594                            double shift,int its,Vec xx)
595 {
596   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
597   Mat        AA = mat->A, BB = mat->B;
598   Mat_AIJ    *A = (Mat_AIJ *) AA->data, *B = (Mat_AIJ *)BB->data;
599   Scalar     zero = 0.0,*b,*x,*xs,*ls,d,*v,sum,scale,*t,*ts;
600   int        ierr,*idx, *diag;
601   int        n = mat->n, m = mat->m, i;
602   Vec        tt;
603 
604   if (!mat->assembled) SETERRQ(1,"MatRelax_MPIAIJ: must assemble matrix first");
605 
606   VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls);
607   xs = x -1; /* shift by one for index start of 1 */
608   ls--;
609   if (!A->diag) {if ((ierr = MatMarkDiag_AIJ(A))) return ierr;}
610   diag = A->diag;
611   if (flag == SOR_APPLY_UPPER || flag == SOR_APPLY_LOWER) {
612     SETERRQ(1,"That option not yet support for parallel AIJ matrices");
613   }
614   if (flag & SOR_EISENSTAT) {
615     /* Let  A = L + U + D; where L is lower trianglar,
616     U is upper triangular, E is diagonal; This routine applies
617 
618             (L + E)^{-1} A (U + E)^{-1}
619 
620     to a vector efficiently using Eisenstat's trick. This is for
621     the case of SSOR preconditioner, so E is D/omega where omega
622     is the relaxation factor.
623     */
624     ierr = VecDuplicate(xx,&tt); CHKERRQ(ierr);
625     VecGetArray(tt,&t);
626     scale = (2.0/omega) - 1.0;
627     /*  x = (E + U)^{-1} b */
628     VecSet(&zero,mat->lvec);
629     ierr = VecPipelineBegin(xx,mat->lvec,INSERTVALUES,PIPELINEUP,
630                               mat->Mvctx); CHKERRQ(ierr);
631     for ( i=m-1; i>-1; i-- ) {
632       n    = A->i[i+1] - diag[i] - 1;
633       idx  = A->j + diag[i];
634       v    = A->a + diag[i];
635       sum  = b[i];
636       SPARSEDENSEMDOT(sum,xs,v,idx,n);
637       d    = shift + A->a[diag[i]-1];
638       n    = B->i[i+1] - B->i[i];
639       idx  = B->j + B->i[i] - 1;
640       v    = B->a + B->i[i] - 1;
641       SPARSEDENSEMDOT(sum,ls,v,idx,n);
642       x[i] = omega*(sum/d);
643     }
644     ierr = VecPipelineEnd(xx,mat->lvec,INSERTVALUES,PIPELINEUP,
645                             mat->Mvctx); CHKERRQ(ierr);
646 
647     /*  t = b - (2*E - D)x */
648     v = A->a;
649     for ( i=0; i<m; i++ ) { t[i] = b[i] - scale*(v[*diag++ - 1])*x[i]; }
650 
651     /*  t = (E + L)^{-1}t */
652     ts = t - 1; /* shifted by one for index start of a or mat->j*/
653     diag = A->diag;
654     VecSet(&zero,mat->lvec);
655     ierr = VecPipelineBegin(tt,mat->lvec,INSERTVALUES,PIPELINEDOWN,
656                                                  mat->Mvctx); CHKERRQ(ierr);
657     for ( i=0; i<m; i++ ) {
658       n    = diag[i] - A->i[i];
659       idx  = A->j + A->i[i] - 1;
660       v    = A->a + A->i[i] - 1;
661       sum  = t[i];
662       SPARSEDENSEMDOT(sum,ts,v,idx,n);
663       d    = shift + A->a[diag[i]-1];
664       n    = B->i[i+1] - B->i[i];
665       idx  = B->j + B->i[i] - 1;
666       v    = B->a + B->i[i] - 1;
667       SPARSEDENSEMDOT(sum,ls,v,idx,n);
668       t[i] = omega*(sum/d);
669     }
670     ierr = VecPipelineEnd(tt,mat->lvec,INSERTVALUES,PIPELINEDOWN,
671                                                     mat->Mvctx); CHKERRQ(ierr);
672     /*  x = x + t */
673     for ( i=0; i<m; i++ ) { x[i] += t[i]; }
674     VecDestroy(tt);
675     return 0;
676   }
677 
678 
679   if ((flag & SOR_SYMMETRIC_SWEEP) == SOR_SYMMETRIC_SWEEP){
680     if (flag & SOR_ZERO_INITIAL_GUESS) {
681       VecSet(&zero,mat->lvec); VecSet(&zero,xx);
682     }
683     else {
684       ierr=VecScatterBegin(xx,mat->lvec,INSERTVALUES,SCATTERUP,mat->Mvctx);
685       CHKERRQ(ierr);
686       ierr = VecScatterEnd(xx,mat->lvec,INSERTVALUES,SCATTERUP,mat->Mvctx);
687       CHKERRQ(ierr);
688     }
689     while (its--) {
690       /* go down through the rows */
691       ierr = VecPipelineBegin(xx,mat->lvec,INSERTVALUES,PIPELINEDOWN,
692                               mat->Mvctx); CHKERRQ(ierr);
693       for ( i=0; i<m; i++ ) {
694         n    = A->i[i+1] - A->i[i];
695         idx  = A->j + A->i[i] - 1;
696         v    = A->a + A->i[i] - 1;
697         sum  = b[i];
698         SPARSEDENSEMDOT(sum,xs,v,idx,n);
699         d    = shift + A->a[diag[i]-1];
700         n    = B->i[i+1] - B->i[i];
701         idx  = B->j + B->i[i] - 1;
702         v    = B->a + B->i[i] - 1;
703         SPARSEDENSEMDOT(sum,ls,v,idx,n);
704         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
705       }
706       ierr = VecPipelineEnd(xx,mat->lvec,INSERTVALUES,PIPELINEDOWN,
707                             mat->Mvctx); CHKERRQ(ierr);
708       /* come up through the rows */
709       ierr = VecPipelineBegin(xx,mat->lvec,INSERTVALUES,PIPELINEUP,
710                               mat->Mvctx); CHKERRQ(ierr);
711       for ( i=m-1; i>-1; i-- ) {
712         n    = A->i[i+1] - A->i[i];
713         idx  = A->j + A->i[i] - 1;
714         v    = A->a + A->i[i] - 1;
715         sum  = b[i];
716         SPARSEDENSEMDOT(sum,xs,v,idx,n);
717         d    = shift + A->a[diag[i]-1];
718         n    = B->i[i+1] - B->i[i];
719         idx  = B->j + B->i[i] - 1;
720         v    = B->a + B->i[i] - 1;
721         SPARSEDENSEMDOT(sum,ls,v,idx,n);
722         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
723       }
724       ierr = VecPipelineEnd(xx,mat->lvec,INSERTVALUES,PIPELINEUP,
725                             mat->Mvctx); CHKERRQ(ierr);
726     }
727   }
728   else if (flag & SOR_FORWARD_SWEEP){
729     if (flag & SOR_ZERO_INITIAL_GUESS) {
730       VecSet(&zero,mat->lvec);
731       ierr = VecPipelineBegin(xx,mat->lvec,INSERTVALUES,PIPELINEDOWN,
732                               mat->Mvctx); CHKERRQ(ierr);
733       for ( i=0; i<m; i++ ) {
734         n    = diag[i] - A->i[i];
735         idx  = A->j + A->i[i] - 1;
736         v    = A->a + A->i[i] - 1;
737         sum  = b[i];
738         SPARSEDENSEMDOT(sum,xs,v,idx,n);
739         d    = shift + A->a[diag[i]-1];
740         n    = B->i[i+1] - B->i[i];
741         idx  = B->j + B->i[i] - 1;
742         v    = B->a + B->i[i] - 1;
743         SPARSEDENSEMDOT(sum,ls,v,idx,n);
744         x[i] = omega*(sum/d);
745       }
746       ierr = VecPipelineEnd(xx,mat->lvec,INSERTVALUES,PIPELINEDOWN,
747                             mat->Mvctx); CHKERRQ(ierr);
748       its--;
749     }
750     while (its--) {
751       ierr=VecScatterBegin(xx,mat->lvec,INSERTVALUES,SCATTERUP,mat->Mvctx);
752       CHKERRQ(ierr);
753       ierr = VecScatterEnd(xx,mat->lvec,INSERTVALUES,SCATTERUP,mat->Mvctx);
754       CHKERRQ(ierr);
755       ierr = VecPipelineBegin(xx,mat->lvec,INSERTVALUES,PIPELINEDOWN,
756                               mat->Mvctx); CHKERRQ(ierr);
757       for ( i=0; i<m; i++ ) {
758         n    = A->i[i+1] - A->i[i];
759         idx  = A->j + A->i[i] - 1;
760         v    = A->a + A->i[i] - 1;
761         sum  = b[i];
762         SPARSEDENSEMDOT(sum,xs,v,idx,n);
763         d    = shift + A->a[diag[i]-1];
764         n    = B->i[i+1] - B->i[i];
765         idx  = B->j + B->i[i] - 1;
766         v    = B->a + B->i[i] - 1;
767         SPARSEDENSEMDOT(sum,ls,v,idx,n);
768         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
769       }
770       ierr = VecPipelineEnd(xx,mat->lvec,INSERTVALUES,PIPELINEDOWN,
771                             mat->Mvctx); CHKERRQ(ierr);
772     }
773   }
774   else if (flag & SOR_BACKWARD_SWEEP){
775     if (flag & SOR_ZERO_INITIAL_GUESS) {
776       VecSet(&zero,mat->lvec);
777       ierr = VecPipelineBegin(xx,mat->lvec,INSERTVALUES,PIPELINEUP,
778                               mat->Mvctx); CHKERRQ(ierr);
779       for ( i=m-1; i>-1; i-- ) {
780         n    = A->i[i+1] - diag[i] - 1;
781         idx  = A->j + diag[i];
782         v    = A->a + diag[i];
783         sum  = b[i];
784         SPARSEDENSEMDOT(sum,xs,v,idx,n);
785         d    = shift + A->a[diag[i]-1];
786         n    = B->i[i+1] - B->i[i];
787         idx  = B->j + B->i[i] - 1;
788         v    = B->a + B->i[i] - 1;
789         SPARSEDENSEMDOT(sum,ls,v,idx,n);
790         x[i] = omega*(sum/d);
791       }
792       ierr = VecPipelineEnd(xx,mat->lvec,INSERTVALUES,PIPELINEUP,
793                             mat->Mvctx); CHKERRQ(ierr);
794       its--;
795     }
796     while (its--) {
797       ierr = VecScatterBegin(xx,mat->lvec,INSERTVALUES,SCATTERDOWN,
798                             mat->Mvctx); CHKERRQ(ierr);
799       ierr = VecScatterEnd(xx,mat->lvec,INSERTVALUES,SCATTERDOWN,
800                             mat->Mvctx); CHKERRQ(ierr);
801       ierr = VecPipelineBegin(xx,mat->lvec,INSERTVALUES,PIPELINEUP,
802                               mat->Mvctx); CHKERRQ(ierr);
803       for ( i=m-1; i>-1; i-- ) {
804         n    = A->i[i+1] - A->i[i];
805         idx  = A->j + A->i[i] - 1;
806         v    = A->a + A->i[i] - 1;
807         sum  = b[i];
808         SPARSEDENSEMDOT(sum,xs,v,idx,n);
809         d    = shift + A->a[diag[i]-1];
810         n    = B->i[i+1] - B->i[i];
811         idx  = B->j + B->i[i] - 1;
812         v    = B->a + B->i[i] - 1;
813         SPARSEDENSEMDOT(sum,ls,v,idx,n);
814         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
815       }
816       ierr = VecPipelineEnd(xx,mat->lvec,INSERTVALUES,PIPELINEUP,
817                             mat->Mvctx); CHKERRQ(ierr);
818     }
819   }
820   else if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
821     if (flag & SOR_ZERO_INITIAL_GUESS) {
822       return MatRelax(mat->A,bb,omega,flag,shift,its,xx);
823     }
824     ierr=VecScatterBegin(xx,mat->lvec,INSERTVALUES,SCATTERALL,mat->Mvctx);
825     CHKERRQ(ierr);
826     ierr = VecScatterEnd(xx,mat->lvec,INSERTVALUES,SCATTERALL,mat->Mvctx);
827     CHKERRQ(ierr);
828     while (its--) {
829       /* go down through the rows */
830       for ( i=0; i<m; i++ ) {
831         n    = A->i[i+1] - A->i[i];
832         idx  = A->j + A->i[i] - 1;
833         v    = A->a + A->i[i] - 1;
834         sum  = b[i];
835         SPARSEDENSEMDOT(sum,xs,v,idx,n);
836         d    = shift + A->a[diag[i]-1];
837         n    = B->i[i+1] - B->i[i];
838         idx  = B->j + B->i[i] - 1;
839         v    = B->a + B->i[i] - 1;
840         SPARSEDENSEMDOT(sum,ls,v,idx,n);
841         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
842       }
843       /* come up through the rows */
844       for ( i=m-1; i>-1; i-- ) {
845         n    = A->i[i+1] - A->i[i];
846         idx  = A->j + A->i[i] - 1;
847         v    = A->a + A->i[i] - 1;
848         sum  = b[i];
849         SPARSEDENSEMDOT(sum,xs,v,idx,n);
850         d    = shift + A->a[diag[i]-1];
851         n    = B->i[i+1] - B->i[i];
852         idx  = B->j + B->i[i] - 1;
853         v    = B->a + B->i[i] - 1;
854         SPARSEDENSEMDOT(sum,ls,v,idx,n);
855         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
856       }
857     }
858   }
859   else if (flag & SOR_LOCAL_FORWARD_SWEEP){
860     if (flag & SOR_ZERO_INITIAL_GUESS) {
861       return MatRelax(mat->A,bb,omega,flag,shift,its,xx);
862     }
863     ierr=VecScatterBegin(xx,mat->lvec,INSERTVALUES,SCATTERALL,mat->Mvctx);
864     CHKERRQ(ierr);
865     ierr = VecScatterEnd(xx,mat->lvec,INSERTVALUES,SCATTERALL,mat->Mvctx);
866     CHKERRQ(ierr);
867     while (its--) {
868       for ( i=0; i<m; i++ ) {
869         n    = A->i[i+1] - A->i[i];
870         idx  = A->j + A->i[i] - 1;
871         v    = A->a + A->i[i] - 1;
872         sum  = b[i];
873         SPARSEDENSEMDOT(sum,xs,v,idx,n);
874         d    = shift + A->a[diag[i]-1];
875         n    = B->i[i+1] - B->i[i];
876         idx  = B->j + B->i[i] - 1;
877         v    = B->a + B->i[i] - 1;
878         SPARSEDENSEMDOT(sum,ls,v,idx,n);
879         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
880       }
881     }
882   }
883   else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
884     if (flag & SOR_ZERO_INITIAL_GUESS) {
885       return MatRelax(mat->A,bb,omega,flag,shift,its,xx);
886     }
887     ierr = VecScatterBegin(xx,mat->lvec,INSERTVALUES,SCATTERALL,
888                             mat->Mvctx); CHKERRQ(ierr);
889     ierr = VecScatterEnd(xx,mat->lvec,INSERTVALUES,SCATTERALL,
890                             mat->Mvctx); CHKERRQ(ierr);
891     while (its--) {
892       for ( i=m-1; i>-1; i-- ) {
893         n    = A->i[i+1] - A->i[i];
894         idx  = A->j + A->i[i] - 1;
895         v    = A->a + A->i[i] - 1;
896         sum  = b[i];
897         SPARSEDENSEMDOT(sum,xs,v,idx,n);
898         d    = shift + A->a[diag[i]-1];
899         n    = B->i[i+1] - B->i[i];
900         idx  = B->j + B->i[i] - 1;
901         v    = B->a + B->i[i] - 1;
902         SPARSEDENSEMDOT(sum,ls,v,idx,n);
903         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
904       }
905     }
906   }
907   return 0;
908 }
909 
910 static int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,int *nz,
911                              int *nzalloc,int *mem)
912 {
913   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
914   Mat        A = mat->A, B = mat->B;
915   int        ierr, isend[3], irecv[3], nzA, nzallocA, memA;
916 
917   ierr = MatGetInfo(A,MAT_LOCAL,&nzA,&nzallocA,&memA); CHKERRQ(ierr);
918   ierr = MatGetInfo(B,MAT_LOCAL,&isend[0],&isend[1],&isend[2]); CHKERRQ(ierr);
919   isend[0] += nzA; isend[1] += nzallocA; isend[2] += memA;
920   if (flag == MAT_LOCAL) {
921     *nz = isend[0]; *nzalloc = isend[1]; *mem = isend[2];
922   } else if (flag == MAT_GLOBAL_MAX) {
923     MPI_Allreduce((void *) isend,(void *) irecv,3,MPI_INT,MPI_MAX,matin->comm);
924     *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2];
925   } else if (flag == MAT_GLOBAL_SUM) {
926     MPI_Allreduce((void *) isend,(void *) irecv,3,MPI_INT,MPI_SUM,matin->comm);
927     *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2];
928   }
929   return 0;
930 }
931 
932 static int MatSetOption_MPIAIJ(Mat aijin,MatOption op)
933 {
934   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data;
935 
936   if      (op == NO_NEW_NONZERO_LOCATIONS)  {
937     MatSetOption(aij->A,op);
938     MatSetOption(aij->B,op);
939   }
940   else if (op == YES_NEW_NONZERO_LOCATIONS) {
941     MatSetOption(aij->A,op);
942     MatSetOption(aij->B,op);
943   }
944   else if (op == COLUMN_ORIENTED) SETERRQ(1,"Column oriented not supported");
945   return 0;
946 }
947 
948 static int MatGetSize_MPIAIJ(Mat matin,int *m,int *n)
949 {
950   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
951   *m = mat->M; *n = mat->N;
952   return 0;
953 }
954 
955 static int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n)
956 {
957   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
958   *m = mat->m; *n = mat->N;
959   return 0;
960 }
961 
962 static int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n)
963 {
964   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
965   *m = mat->rstart; *n = mat->rend;
966   return 0;
967 }
968 
969 static int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
970 {
971   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
972   Scalar     *vworkA, *vworkB, **pvA, **pvB;
973   int        i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart;
974   int        nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend;
975 
976   if (row < rstart || row >= rend)
977     SETERRQ(1,"MatGetRow_MPIAIJ: Currently you can get only local rows.")
978   lrow = row - rstart;
979 
980   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
981   if (!v)   {pvA = 0; pvB = 0;}
982   if (!idx) {pcA = 0; if (!v) pcB = 0;}
983   ierr = MatGetRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
984   ierr = MatGetRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
985   nztot = nzA + nzB;
986 
987   if (v  || idx) {
988     if (nztot) {
989       /* Sort by increasing column numbers, assuming A and B already sorted */
990       int imark, imark2;
991       if (mat->assembled) {
992         for (i=0; i<nzB; i++) cworkB[i] = mat->garray[cworkB[i]];
993       }
994       if (v) {
995         *v = (Scalar *) PETSCMALLOC( (nztot)*sizeof(Scalar) ); CHKPTRQ(*v);
996         for ( i=0; i<nzB; i++ ) {
997           if (cworkB[i] < cstart)   (*v)[i] = vworkB[i];
998           else break;
999         }
1000         imark = i;
1001         for ( i=0; i<nzA; i++ )     (*v)[imark+i] = vworkA[i];
1002         imark2 = imark+nzA;
1003         for ( i=imark; i<nzB; i++ ) (*v)[imark2+i] = vworkB[i];
1004       }
1005       if (idx) {
1006         *idx = (int *) PETSCMALLOC( (nztot)*sizeof(int) ); CHKPTRQ(*idx);
1007         for (i=0; i<nzA; i++) cworkA[i] += cstart;
1008         for ( i=0; i<nzB; i++ ) {
1009           if (cworkB[i] < cstart)   (*idx)[i] = cworkB[i];
1010           else break;
1011         }
1012         imark = i;
1013         for ( i=0; i<nzA; i++ )     (*idx)[imark+i] = cworkA[i];
1014         imark2 = imark+nzA;
1015         for ( i=imark; i<nzB; i++ ) (*idx)[imark2+i] = cworkB[i];
1016       }
1017     }
1018     else {*idx = 0; *v=0;}
1019   }
1020   *nz = nztot;
1021   ierr = MatRestoreRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1022   ierr = MatRestoreRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1023   return 0;
1024 }
1025 
1026 static int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1027 {
1028   if (idx) PETSCFREE(*idx);
1029   if (v) PETSCFREE(*v);
1030   return 0;
1031 }
1032 
1033 static int MatTranspose_MPIAIJ(Mat A,Mat *Bin)
1034 {
1035   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1036   int        ierr;
1037   Mat        B;
1038   Mat_AIJ    *Aloc;
1039   int        M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct;
1040   Scalar     *array;
1041 
1042   ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,0,0,0,&B);
1043   CHKERRQ(ierr);
1044 
1045   /* copy over the A part */
1046   Aloc = (Mat_AIJ*) a->A->data;
1047   m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1048   row = a->rstart;
1049   for ( i=0; i<ai[m]-1; i++ ) {aj[i] += a->cstart - 1;}
1050   for ( i=0; i<m; i++ ) {
1051       ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERTVALUES);
1052       CHKERRQ(ierr);
1053       row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1054   }
1055   aj = Aloc->j;
1056   for ( i=0; i<ai[m]-1; i++ ) {aj[i] -= a->cstart - 1;}
1057 
1058   /* copy over the B part */
1059   Aloc = (Mat_AIJ*) a->B->data;
1060   m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1061   row = a->rstart;
1062   ct = cols = (int *) PETSCMALLOC( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols);
1063   for ( i=0; i<ai[m]-1; i++ ) {cols[i] = a->garray[aj[i]-1];}
1064   for ( i=0; i<m; i++ ) {
1065     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERTVALUES);
1066     CHKERRQ(ierr);
1067     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1068   }
1069   PETSCFREE(ct);
1070   ierr = MatAssemblyBegin(B,FINAL_ASSEMBLY); CHKERRQ(ierr);
1071   ierr = MatAssemblyEnd(B,FINAL_ASSEMBLY); CHKERRQ(ierr);
1072   *Bin = B;
1073   return 0;
1074 }
1075 
1076 extern int MatConvert_MPIAIJ(Mat,MatType,Mat *);
1077 static int MatCopyPrivate_MPIAIJ(Mat,Mat *);
1078 
1079 /* -------------------------------------------------------------------*/
1080 static struct _MatOps MatOps = {MatSetValues_MPIAIJ,
1081        MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ,
1082        MatMult_MPIAIJ,MatMultAdd_MPIAIJ,
1083        MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ,
1084        0,0,0,0,
1085        0,0,
1086        MatRelax_MPIAIJ,
1087        MatTranspose_MPIAIJ,
1088        MatGetInfo_MPIAIJ,0,
1089        MatGetDiagonal_MPIAIJ,0,0,
1090        MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ,
1091        0,
1092        MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ,0,
1093        0,0,0,0,
1094        MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ,
1095        0,0,
1096        0,0,MatConvert_MPIAIJ,0,0,MatCopyPrivate_MPIAIJ};
1097 
1098 /*@
1099    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
1100    (the default parallel PETSc format).
1101 
1102    Input Parameters:
1103 .  comm - MPI communicator
1104 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1105 .  n - number of local columns (or PETSC_DECIDE to have calculated
1106            if N is given)
1107 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1108 .  N - number of global columns (or PETSC_DECIDE to have calculated
1109            if n is given)
1110 .  d_nz - number of nonzeros per row in diagonal portion of local submatrix
1111            (same for all local rows)
1112 .  d_nzz - number of nonzeros per row in diagonal portion of local submatrix
1113            or null (possibly different for each row).  You must leave room
1114            for the diagonal entry even if it is zero.
1115 .  o_nz - number of nonzeros per row in off-diagonal portion of local
1116            submatrix (same for all local rows).
1117 .  o_nzz - number of nonzeros per row in off-diagonal portion of local
1118            submatrix or null (possibly different for each row).
1119 
1120    Output Parameter:
1121 .  newmat - the matrix
1122 
1123    Notes:
1124    The AIJ format (also called the Yale sparse matrix format or
1125    compressed row storage), is fully compatible with standard Fortran 77
1126    storage.  That is, the stored row and column indices begin at
1127    one, not zero.  See the users manual for further details.
1128 
1129    The user MUST specify either the local or global matrix dimensions
1130    (possibly both).
1131 
1132    Storage Information:
1133    For a square global matrix we define each processor's diagonal portion
1134    to be its local rows and the corresponding columns (a square submatrix);
1135    each processor's off-diagonal portion encompasses the remainder of the
1136    local matrix (a rectangular submatrix).
1137 
1138    The user can specify preallocated storage for the diagonal part of
1139    the local submatrix with either d_nz or d_nnz (not both). Set both
1140    d_nz and d_nnz to zero for PETSc to control dynamic memory allocation.
1141    Likewise, specify preallocated storage for the off-diagonal part of
1142    the local submatrix with o_nz or o_nnz (not both).
1143 
1144 .keywords: matrix, aij, compressed row, sparse, parallel
1145 
1146 .seealso: MatCreate(), MatCreateSequentialAIJ(), MatSetValues()
1147 @*/
1148 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,
1149                  int d_nz,int *d_nnz, int o_nz,int *o_nnz,Mat *newmat)
1150 {
1151   Mat          mat;
1152   Mat_MPIAIJ   *aij;
1153   int          ierr, i,sum[2],work[2];
1154   *newmat         = 0;
1155   PETSCHEADERCREATE(mat,_Mat,MAT_COOKIE,MATMPIAIJ,comm);
1156   PLogObjectCreate(mat);
1157   mat->data       = (void *) (aij = PETSCNEW(Mat_MPIAIJ)); CHKPTRQ(aij);
1158   mat->ops        = &MatOps;
1159   mat->destroy    = MatDestroy_MPIAIJ;
1160   mat->view       = MatView_MPIAIJ;
1161   mat->factor     = 0;
1162 
1163   aij->insertmode = NOTSETVALUES;
1164   MPI_Comm_rank(comm,&aij->mytid);
1165   MPI_Comm_size(comm,&aij->numtids);
1166 
1167   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
1168     work[0] = m; work[1] = n;
1169     MPI_Allreduce((void *) work,(void *) sum,2,MPI_INT,MPI_SUM,comm );
1170     if (M == PETSC_DECIDE) M = sum[0];
1171     if (N == PETSC_DECIDE) N = sum[1];
1172   }
1173   if (m == PETSC_DECIDE)
1174     {m = M/aij->numtids + ((M % aij->numtids) > aij->mytid);}
1175   if (n == PETSC_DECIDE)
1176     {n = N/aij->numtids + ((N % aij->numtids) > aij->mytid);}
1177   aij->m = m;
1178   aij->n = n;
1179   aij->N = N;
1180   aij->M = M;
1181 
1182   /* build local table of row and column ownerships */
1183   aij->rowners = (int *) PETSCMALLOC(2*(aij->numtids+2)*sizeof(int));
1184   CHKPTRQ(aij->rowners);
1185   aij->cowners = aij->rowners + aij->numtids + 1;
1186   MPI_Allgather(&m,1,MPI_INT,aij->rowners+1,1,MPI_INT,comm);
1187   aij->rowners[0] = 0;
1188   for ( i=2; i<=aij->numtids; i++ ) {
1189     aij->rowners[i] += aij->rowners[i-1];
1190   }
1191   aij->rstart = aij->rowners[aij->mytid];
1192   aij->rend   = aij->rowners[aij->mytid+1];
1193   MPI_Allgather(&n,1,MPI_INT,aij->cowners+1,1,MPI_INT,comm);
1194   aij->cowners[0] = 0;
1195   for ( i=2; i<=aij->numtids; i++ ) {
1196     aij->cowners[i] += aij->cowners[i-1];
1197   }
1198   aij->cstart = aij->cowners[aij->mytid];
1199   aij->cend   = aij->cowners[aij->mytid+1];
1200 
1201 
1202   ierr = MatCreateSequentialAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&aij->A);
1203   CHKERRQ(ierr);
1204   PLogObjectParent(mat,aij->A);
1205   ierr = MatCreateSequentialAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&aij->B);
1206   CHKERRQ(ierr);
1207   PLogObjectParent(mat,aij->B);
1208 
1209   /* build cache for off array entries formed */
1210   ierr = StashBuild_Private(&aij->stash); CHKERRQ(ierr);
1211   aij->colmap    = 0;
1212   aij->garray    = 0;
1213 
1214   /* stuff used for matrix vector multiply */
1215   aij->lvec      = 0;
1216   aij->Mvctx     = 0;
1217   aij->assembled = 0;
1218 
1219   *newmat = mat;
1220   return 0;
1221 }
1222 
1223 static int MatCopyPrivate_MPIAIJ(Mat matin,Mat *newmat)
1224 {
1225   Mat        mat;
1226   Mat_MPIAIJ *aij,*oldmat = (Mat_MPIAIJ *) matin->data;
1227   int        ierr, len;
1228   *newmat      = 0;
1229 
1230   if (!oldmat->assembled) SETERRQ(1,"Cannot copy unassembled matrix");
1231   PETSCHEADERCREATE(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm);
1232   PLogObjectCreate(mat);
1233   mat->data       = (void *) (aij = PETSCNEW(Mat_MPIAIJ)); CHKPTRQ(aij);
1234   mat->ops        = &MatOps;
1235   mat->destroy    = MatDestroy_MPIAIJ;
1236   mat->view       = MatView_MPIAIJ;
1237   mat->factor     = matin->factor;
1238 
1239   aij->m          = oldmat->m;
1240   aij->n          = oldmat->n;
1241   aij->M          = oldmat->M;
1242   aij->N          = oldmat->N;
1243 
1244   aij->assembled  = 1;
1245   aij->rstart     = oldmat->rstart;
1246   aij->rend       = oldmat->rend;
1247   aij->cstart     = oldmat->cstart;
1248   aij->cend       = oldmat->cend;
1249   aij->numtids    = oldmat->numtids;
1250   aij->mytid      = oldmat->mytid;
1251   aij->insertmode = NOTSETVALUES;
1252 
1253   aij->rowners        = (int *) PETSCMALLOC( (aij->numtids+1)*sizeof(int) );
1254   CHKPTRQ(aij->rowners);
1255   PETSCMEMCPY(aij->rowners,oldmat->rowners,(aij->numtids+1)*sizeof(int));
1256   ierr = StashInitialize_Private(&aij->stash); CHKERRQ(ierr);
1257   if (oldmat->colmap) {
1258     aij->colmap      = (int *) PETSCMALLOC( (aij->N)*sizeof(int) );
1259     CHKPTRQ(aij->colmap);
1260     PETSCMEMCPY(aij->colmap,oldmat->colmap,(aij->N)*sizeof(int));
1261   } else aij->colmap = 0;
1262   if (oldmat->garray && (len = ((Mat_AIJ *) (oldmat->B->data))->n)) {
1263     aij->garray      = (int *) PETSCMALLOC(len*sizeof(int) ); CHKPTRQ(aij->garray);
1264     PETSCMEMCPY(aij->garray,oldmat->garray,len*sizeof(int));
1265   } else aij->garray = 0;
1266 
1267   ierr =  VecDuplicate(oldmat->lvec,&aij->lvec); CHKERRQ(ierr);
1268   PLogObjectParent(mat,aij->lvec);
1269   ierr =  VecScatterCtxCopy(oldmat->Mvctx,&aij->Mvctx); CHKERRQ(ierr);
1270   PLogObjectParent(mat,aij->Mvctx);
1271   ierr =  MatConvert(oldmat->A,MATSAME,&aij->A); CHKERRQ(ierr);
1272   PLogObjectParent(mat,aij->A);
1273   ierr =  MatConvert(oldmat->B,MATSAME,&aij->B); CHKERRQ(ierr);
1274   PLogObjectParent(mat,aij->B);
1275   *newmat = mat;
1276   return 0;
1277 }
1278