xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 7c17b2cf1529c5c7fd3cb384b9e20a602d8843c8)
1 #ifndef lint
2 static char vcid[] = "$Id: mpiaij.c,v 1.57 1995/07/12 03:04:20 curfman Exp curfman $";
3 #endif
4 
5 #include "mpiaij.h"
6 #include "vec/vecimpl.h"
7 #include "inline/spops.h"
8 
9 /* local utility routine that creates a mapping from the global column
10 number to the local number in the off-diagonal part of the local
11 storage of the matrix.  This is done in a non scable way since the
12 length of colmap equals the global matrix length.
13 */
14 static int CreateColmap_Private(Mat mat)
15 {
16   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
17   Mat_AIJ    *B = (Mat_AIJ*) aij->B->data;
18   int        n = B->n,i;
19   aij->colmap = (int *) PETSCMALLOC(aij->N*sizeof(int));CHKPTRQ(aij->colmap);
20   PETSCMEMSET(aij->colmap,0,aij->N*sizeof(int));
21   for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1;
22   return 0;
23 }
24 
25 extern int DisAssemble_MPIAIJ(Mat);
26 
27 static int MatGetReordering_MPIAIJ(Mat mat,MatOrdering type,IS *rperm,IS *cperm)
28 {
29   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
30   int ierr;
31   if (aij->numtids == 1) {
32     ierr = MatGetReordering(aij->A,type,rperm,cperm); CHKERRQ(ierr);
33   } else
34     SETERRQ(1,"MatGetReordering_MPIAIJ:  not yet supported in parallel.");
35   return 0;
36 }
37 
38 static int MatSetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,
39                             int *idxn,Scalar *v,InsertMode addv)
40 {
41   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
42   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
43   int        cstart = aij->cstart, cend = aij->cend,row,col;
44 
45   if (aij->insertmode != NOTSETVALUES && aij->insertmode != addv) {
46     SETERRQ(1,"You cannot mix inserts and adds");
47   }
48   aij->insertmode = addv;
49   for ( i=0; i<m; i++ ) {
50     if (idxm[i] < 0) SETERRQ(1,"Negative row index");
51     if (idxm[i] >= aij->M) SETERRQ(1,"Row index too large");
52     if (idxm[i] >= rstart && idxm[i] < rend) {
53       row = idxm[i] - rstart;
54       for ( j=0; j<n; j++ ) {
55         if (idxn[j] < 0) SETERRQ(1,"Negative column index");
56         if (idxn[j] >= aij->N) SETERRQ(1,"Column index too large");
57         if (idxn[j] >= cstart && idxn[j] < cend){
58           col = idxn[j] - cstart;
59           ierr = MatSetValues(aij->A,1,&row,1,&col,v+i*n+j,addv);CHKERRQ(ierr);
60         }
61         else {
62           if (aij->assembled) {
63             if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);}
64             col = aij->colmap[idxn[j]] - 1;
65             if (col < 0 && !((Mat_AIJ*)(aij->A->data))->nonew) {
66               ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
67               col =  idxn[j];
68             }
69           }
70           else col = idxn[j];
71           ierr = MatSetValues(aij->B,1,&row,1,&col,v+i*n+j,addv);CHKERRQ(ierr);
72         }
73       }
74     }
75     else {
76       ierr = StashValues_Private(&aij->stash,idxm[i],n,idxn,v+i*n,addv);
77       CHKERRQ(ierr);
78     }
79   }
80   return 0;
81 }
82 
83 /*
84     the assembly code is a lot like the code for vectors, we should
85     sometime derive a single assembly code that can be used for
86     either case.
87 */
88 
89 static int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
90 {
91   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
92   MPI_Comm    comm = mat->comm;
93   int         numtids = aij->numtids, *owners = aij->rowners;
94   int         mytid = aij->mytid;
95   MPI_Request *send_waits,*recv_waits;
96   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
97   int         tag = mat->tag, *owner,*starts,count,ierr;
98   InsertMode  addv;
99   Scalar      *rvalues,*svalues;
100 
101   /* make sure all processors are either in INSERTMODE or ADDMODE */
102   MPI_Allreduce((void *) &aij->insertmode,(void *) &addv,1,MPI_INT,
103                 MPI_BOR,comm);
104   if (addv == (ADDVALUES|INSERTVALUES)) {
105     SETERRQ(1,"Some processors have inserted while others have added");
106   }
107   aij->insertmode = addv; /* in case this processor had no cache */
108 
109   /*  first count number of contributors to each processor */
110   nprocs = (int *) PETSCMALLOC( 2*numtids*sizeof(int) ); CHKPTRQ(nprocs);
111   PETSCMEMSET(nprocs,0,2*numtids*sizeof(int)); procs = nprocs + numtids;
112   owner = (int *) PETSCMALLOC( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
113   for ( i=0; i<aij->stash.n; i++ ) {
114     idx = aij->stash.idx[i];
115     for ( j=0; j<numtids; j++ ) {
116       if (idx >= owners[j] && idx < owners[j+1]) {
117         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
118       }
119     }
120   }
121   nsends = 0;  for ( i=0; i<numtids; i++ ) { nsends += procs[i];}
122 
123   /* inform other processors of number of messages and max length*/
124   work = (int *) PETSCMALLOC( numtids*sizeof(int) ); CHKPTRQ(work);
125   MPI_Allreduce((void *) procs,(void *) work,numtids,MPI_INT,MPI_SUM,comm);
126   nreceives = work[mytid];
127   MPI_Allreduce((void *) nprocs,(void *) work,numtids,MPI_INT,MPI_MAX,comm);
128   nmax = work[mytid];
129   PETSCFREE(work);
130 
131   /* post receives:
132        1) each message will consist of ordered pairs
133      (global index,value) we store the global index as a double
134      to simplify the message passing.
135        2) since we don't know how long each individual message is we
136      allocate the largest needed buffer for each receive. Potentially
137      this is a lot of wasted space.
138 
139 
140        This could be done better.
141   */
142   rvalues = (Scalar *) PETSCMALLOC(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));
143   CHKPTRQ(rvalues);
144   recv_waits = (MPI_Request *) PETSCMALLOC((nreceives+1)*sizeof(MPI_Request));
145   CHKPTRQ(recv_waits);
146   for ( i=0; i<nreceives; i++ ) {
147     MPI_Irecv((void *)(rvalues+3*nmax*i),3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
148               comm,recv_waits+i);
149   }
150 
151   /* do sends:
152       1) starts[i] gives the starting index in svalues for stuff going to
153          the ith processor
154   */
155   svalues = (Scalar *) PETSCMALLOC( 3*(aij->stash.n+1)*sizeof(Scalar) );
156   CHKPTRQ(svalues);
157   send_waits = (MPI_Request *) PETSCMALLOC( (nsends+1)*sizeof(MPI_Request));
158   CHKPTRQ(send_waits);
159   starts = (int *) PETSCMALLOC( numtids*sizeof(int) ); CHKPTRQ(starts);
160   starts[0] = 0;
161   for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
162   for ( i=0; i<aij->stash.n; i++ ) {
163     svalues[3*starts[owner[i]]]       = (Scalar)  aij->stash.idx[i];
164     svalues[3*starts[owner[i]]+1]     = (Scalar)  aij->stash.idy[i];
165     svalues[3*(starts[owner[i]]++)+2] =  aij->stash.array[i];
166   }
167   PETSCFREE(owner);
168   starts[0] = 0;
169   for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
170   count = 0;
171   for ( i=0; i<numtids; i++ ) {
172     if (procs[i]) {
173       MPI_Isend((void*)(svalues+3*starts[i]),3*nprocs[i],MPIU_SCALAR,i,tag,
174                 comm,send_waits+count++);
175     }
176   }
177   PETSCFREE(starts); PETSCFREE(nprocs);
178 
179   /* Free cache space */
180   ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr);
181 
182   aij->svalues    = svalues;    aij->rvalues = rvalues;
183   aij->nsends     = nsends;     aij->nrecvs = nreceives;
184   aij->send_waits = send_waits; aij->recv_waits = recv_waits;
185   aij->rmax       = nmax;
186 
187   return 0;
188 }
189 extern int MatSetUpMultiply_MPIAIJ(Mat);
190 
191 static int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
192 {
193   int        ierr;
194   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
195   MPI_Status  *send_status,recv_status;
196   int         imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n;
197   int         row,col,other_disassembled;
198   Scalar      *values,val;
199   InsertMode  addv = aij->insertmode;
200 
201   /*  wait on receives */
202   while (count) {
203     MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status);
204     /* unpack receives into our local space */
205     values = aij->rvalues + 3*imdex*aij->rmax;
206     MPI_Get_count(&recv_status,MPIU_SCALAR,&n);
207     n = n/3;
208     for ( i=0; i<n; i++ ) {
209       row = (int) PETSCREAL(values[3*i]) - aij->rstart;
210       col = (int) PETSCREAL(values[3*i+1]);
211       val = values[3*i+2];
212       if (col >= aij->cstart && col < aij->cend) {
213           col -= aij->cstart;
214         MatSetValues(aij->A,1,&row,1,&col,&val,addv);
215       }
216       else {
217         if (aij->assembled) {
218           if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);}
219           col = aij->colmap[col] - 1;
220           if (col < 0  && !((Mat_AIJ*)(aij->A->data))->nonew) {
221             ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
222             col = (int) PETSCREAL(values[3*i+1]);
223           }
224         }
225         MatSetValues(aij->B,1,&row,1,&col,&val,addv);
226       }
227     }
228     count--;
229   }
230   PETSCFREE(aij->recv_waits); PETSCFREE(aij->rvalues);
231 
232   /* wait on sends */
233   if (aij->nsends) {
234     send_status = (MPI_Status *) PETSCMALLOC( aij->nsends*sizeof(MPI_Status) );
235     CHKPTRQ(send_status);
236     MPI_Waitall(aij->nsends,aij->send_waits,send_status);
237     PETSCFREE(send_status);
238   }
239   PETSCFREE(aij->send_waits); PETSCFREE(aij->svalues);
240 
241   aij->insertmode = NOTSETVALUES;
242   ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr);
243   ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr);
244 
245   /* determine if any processor has disassembled, if so we must
246      also disassemble ourselfs, in order that we may reassemble. */
247   MPI_Allreduce((void *) &aij->assembled,(void *) &other_disassembled,1,
248                  MPI_INT,MPI_PROD,mat->comm);
249   if (aij->assembled && !other_disassembled) {
250     ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
251   }
252 
253   if (!aij->assembled && mode == FINAL_ASSEMBLY) {
254     ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr);
255     aij->assembled = 1;
256   }
257   ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr);
258   ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr);
259 
260   return 0;
261 }
262 
263 static int MatZeroEntries_MPIAIJ(Mat A)
264 {
265   Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data;
266   int ierr;
267   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
268   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
269   return 0;
270 }
271 
272 /* again this uses the same basic stratagy as in the assembly and
273    scatter create routines, we should try to do it systemamatically
274    if we can figure out the proper level of generality. */
275 
276 /* the code does not do the diagonal entries correctly unless the
277    matrix is square and the column and row owerships are identical.
278    This is a BUG. The only way to fix it seems to be to access
279    aij->A and aij->B directly and not through the MatZeroRows()
280    routine.
281 */
282 static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag)
283 {
284   Mat_MPIAIJ     *l = (Mat_MPIAIJ *) A->data;
285   int            i,ierr,N, *rows,*owners = l->rowners,numtids = l->numtids;
286   int            *procs,*nprocs,j,found,idx,nsends,*work;
287   int            nmax,*svalues,*starts,*owner,nrecvs,mytid = l->mytid;
288   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
289   int            *lens,imdex,*lrows,*values;
290   MPI_Comm       comm = A->comm;
291   MPI_Request    *send_waits,*recv_waits;
292   MPI_Status     recv_status,*send_status;
293   IS             istmp;
294 
295   if (!l->assembled)
296     SETERRQ(1,"MatZeroRows_MPIAIJ: Must assemble matrix first");
297   ierr = ISGetLocalSize(is,&N); CHKERRQ(ierr);
298   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
299 
300   /*  first count number of contributors to each processor */
301   nprocs = (int *) PETSCMALLOC( 2*numtids*sizeof(int) ); CHKPTRQ(nprocs);
302   PETSCMEMSET(nprocs,0,2*numtids*sizeof(int)); procs = nprocs + numtids;
303   owner = (int *) PETSCMALLOC((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
304   for ( i=0; i<N; i++ ) {
305     idx = rows[i];
306     found = 0;
307     for ( j=0; j<numtids; j++ ) {
308       if (idx >= owners[j] && idx < owners[j+1]) {
309         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
310       }
311     }
312     if (!found) SETERRQ(1,"Index out of range.");
313   }
314   nsends = 0;  for ( i=0; i<numtids; i++ ) { nsends += procs[i];}
315 
316   /* inform other processors of number of messages and max length*/
317   work = (int *) PETSCMALLOC( numtids*sizeof(int) ); CHKPTRQ(work);
318   MPI_Allreduce((void *) procs,(void *) work,numtids,MPI_INT,MPI_SUM,comm);
319   nrecvs = work[mytid];
320   MPI_Allreduce((void *) nprocs,(void *) work,numtids,MPI_INT,MPI_MAX,comm);
321   nmax = work[mytid];
322   PETSCFREE(work);
323 
324   /* post receives:   */
325   rvalues = (int *) PETSCMALLOC((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */
326   CHKPTRQ(rvalues);
327   recv_waits = (MPI_Request *) PETSCMALLOC((nrecvs+1)*sizeof(MPI_Request));
328   CHKPTRQ(recv_waits);
329   for ( i=0; i<nrecvs; i++ ) {
330     MPI_Irecv((void *)(rvalues+nmax*i),nmax,MPI_INT,MPI_ANY_SOURCE,tag,
331               comm,recv_waits+i);
332   }
333 
334   /* do sends:
335       1) starts[i] gives the starting index in svalues for stuff going to
336          the ith processor
337   */
338   svalues = (int *) PETSCMALLOC( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
339   send_waits = (MPI_Request *) PETSCMALLOC( (nsends+1)*sizeof(MPI_Request));
340   CHKPTRQ(send_waits);
341   starts = (int *) PETSCMALLOC( (numtids+1)*sizeof(int) ); CHKPTRQ(starts);
342   starts[0] = 0;
343   for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
344   for ( i=0; i<N; i++ ) {
345     svalues[starts[owner[i]]++] = rows[i];
346   }
347   ISRestoreIndices(is,&rows);
348 
349   starts[0] = 0;
350   for ( i=1; i<numtids+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
351   count = 0;
352   for ( i=0; i<numtids; i++ ) {
353     if (procs[i]) {
354       MPI_Isend((void*)(svalues+starts[i]),nprocs[i],MPI_INT,i,tag,
355                 comm,send_waits+count++);
356     }
357   }
358   PETSCFREE(starts);
359 
360   base = owners[mytid];
361 
362   /*  wait on receives */
363   lens = (int *) PETSCMALLOC( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
364   source = lens + nrecvs;
365   count = nrecvs; slen = 0;
366   while (count) {
367     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
368     /* unpack receives into our local space */
369     MPI_Get_count(&recv_status,MPI_INT,&n);
370     source[imdex]  = recv_status.MPI_SOURCE;
371     lens[imdex]  = n;
372     slen += n;
373     count--;
374   }
375   PETSCFREE(recv_waits);
376 
377   /* move the data into the send scatter */
378   lrows = (int *) PETSCMALLOC( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
379   count = 0;
380   for ( i=0; i<nrecvs; i++ ) {
381     values = rvalues + i*nmax;
382     for ( j=0; j<lens[i]; j++ ) {
383       lrows[count++] = values[j] - base;
384     }
385   }
386   PETSCFREE(rvalues); PETSCFREE(lens);
387   PETSCFREE(owner); PETSCFREE(nprocs);
388 
389   /* actually zap the local rows */
390   ierr = ISCreateSequential(MPI_COMM_SELF,slen,lrows,&istmp);
391   CHKERRQ(ierr);  PETSCFREE(lrows);
392   ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
393   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
394   ierr = ISDestroy(istmp); CHKERRQ(ierr);
395 
396   /* wait on sends */
397   if (nsends) {
398     send_status = (MPI_Status *) PETSCMALLOC( nsends*sizeof(MPI_Status) );
399     CHKPTRQ(send_status);
400     MPI_Waitall(nsends,send_waits,send_status);
401     PETSCFREE(send_status);
402   }
403   PETSCFREE(send_waits); PETSCFREE(svalues);
404 
405 
406   return 0;
407 }
408 
409 static int MatMult_MPIAIJ(Mat aijin,Vec xx,Vec yy)
410 {
411   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data;
412   int        ierr;
413   if (!aij->assembled) SETERRQ(1,"MatMult_MPIAIJ: must assemble matrix first");
414   ierr = VecScatterBegin(xx,aij->lvec,INSERTVALUES,SCATTERALL,aij->Mvctx);
415   CHKERRQ(ierr);
416   ierr = MatMult(aij->A,xx,yy); CHKERRQ(ierr);
417   ierr = VecScatterEnd(xx,aij->lvec,INSERTVALUES,SCATTERALL,aij->Mvctx);
418   CHKERRQ(ierr);
419   ierr = MatMultAdd(aij->B,aij->lvec,yy,yy); CHKERRQ(ierr);
420   return 0;
421 }
422 
423 static int MatMultAdd_MPIAIJ(Mat aijin,Vec xx,Vec yy,Vec zz)
424 {
425   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data;
426   int        ierr;
427   if (!aij->assembled) SETERRQ(1,"MatMult_MPIAIJ: must assemble matrix first");
428   ierr = VecScatterBegin(xx,aij->lvec,INSERTVALUES,SCATTERALL,aij->Mvctx);
429   CHKERRQ(ierr);
430   ierr = MatMultAdd(aij->A,xx,yy,zz); CHKERRQ(ierr);
431   ierr = VecScatterEnd(xx,aij->lvec,INSERTVALUES,SCATTERALL,aij->Mvctx);
432   CHKERRQ(ierr);
433   ierr = MatMultAdd(aij->B,aij->lvec,zz,zz); CHKERRQ(ierr);
434   return 0;
435 }
436 
437 static int MatMultTrans_MPIAIJ(Mat aijin,Vec xx,Vec yy)
438 {
439   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data;
440   int        ierr;
441 
442   if (!aij->assembled)
443     SETERRQ(1,"MatMulTrans_MPIAIJ: must assemble matrix first");
444   /* do nondiagonal part */
445   ierr = MatMultTrans(aij->B,xx,aij->lvec); CHKERRQ(ierr);
446   /* send it on its way */
447   ierr = VecScatterBegin(aij->lvec,yy,ADDVALUES,
448            (ScatterMode)(SCATTERALL|SCATTERREVERSE),aij->Mvctx); CHKERRQ(ierr);
449   /* do local part */
450   ierr = MatMultTrans(aij->A,xx,yy); CHKERRQ(ierr);
451   /* receive remote parts: note this assumes the values are not actually */
452   /* inserted in yy until the next line, which is true for my implementation*/
453   /* but is not perhaps always true. */
454   ierr = VecScatterEnd(aij->lvec,yy,ADDVALUES,
455          (ScatterMode)(SCATTERALL|SCATTERREVERSE),aij->Mvctx); CHKERRQ(ierr);
456   return 0;
457 }
458 
459 static int MatMultTransAdd_MPIAIJ(Mat aijin,Vec xx,Vec yy,Vec zz)
460 {
461   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data;
462   int        ierr;
463 
464   if (!aij->assembled)
465     SETERRQ(1,"MatMulTransAdd_MPIAIJ: must assemble matrix first");
466   /* do nondiagonal part */
467   ierr = MatMultTrans(aij->B,xx,aij->lvec); CHKERRQ(ierr);
468   /* send it on its way */
469   ierr = VecScatterBegin(aij->lvec,zz,ADDVALUES,
470          (ScatterMode)(SCATTERALL|SCATTERREVERSE),aij->Mvctx); CHKERRQ(ierr);
471   /* do local part */
472   ierr = MatMultTransAdd(aij->A,xx,yy,zz); CHKERRQ(ierr);
473   /* receive remote parts: note this assumes the values are not actually */
474   /* inserted in yy until the next line, which is true for my implementation*/
475   /* but is not perhaps always true. */
476   ierr = VecScatterEnd(aij->lvec,zz,ADDVALUES,
477        (ScatterMode)(SCATTERALL|SCATTERREVERSE),aij->Mvctx); CHKERRQ(ierr);
478   return 0;
479 }
480 
481 /*
482   This only works correctly for square matrices where the subblock A->A is the
483    diagonal block
484 */
485 static int MatGetDiagonal_MPIAIJ(Mat Ain,Vec v)
486 {
487   Mat_MPIAIJ *A = (Mat_MPIAIJ *) Ain->data;
488   if (!A->assembled) SETERRQ(1,"MatGetDiag_MPIAIJ: must assemble matrix first");
489   return MatGetDiagonal(A->A,v);
490 }
491 
492 static int MatDestroy_MPIAIJ(PetscObject obj)
493 {
494   Mat        mat = (Mat) obj;
495   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
496   int        ierr;
497 #if defined(PETSC_LOG)
498   PLogObjectState(obj,"Rows %d Cols %d",aij->M,aij->N);
499 #endif
500   PETSCFREE(aij->rowners);
501   ierr = MatDestroy(aij->A); CHKERRQ(ierr);
502   ierr = MatDestroy(aij->B); CHKERRQ(ierr);
503   if (aij->colmap) PETSCFREE(aij->colmap);
504   if (aij->garray) PETSCFREE(aij->garray);
505   if (aij->lvec) VecDestroy(aij->lvec);
506   if (aij->Mvctx) VecScatterCtxDestroy(aij->Mvctx);
507   PETSCFREE(aij);
508   PLogObjectDestroy(mat);
509   PETSCHEADERDESTROY(mat);
510   return 0;
511 }
512 #include "draw.h"
513 #include "pviewer.h"
514 
515 static int MatView_MPIAIJ(PetscObject obj,Viewer viewer)
516 {
517   Mat        mat = (Mat) obj;
518   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
519   int        ierr;
520   PetscObject vobj = (PetscObject) viewer;
521 
522   if (!aij->assembled) SETERRQ(1,"MatView_MPIAIJ: must assemble matrix first");
523   if (!viewer) { /* so that viewers may be used from debuggers */
524     viewer = STDOUT_VIEWER; vobj = (PetscObject) viewer;
525   }
526   if (vobj->cookie == DRAW_COOKIE && vobj->type == NULLWINDOW) return 0;
527   if (vobj->cookie == VIEWER_COOKIE && vobj->type == FILE_VIEWER) {
528     FILE *fd = ViewerFileGetPointer_Private(viewer);
529     MPIU_Seq_begin(mat->comm,1);
530     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
531              aij->mytid,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart,
532              aij->cend);
533     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
534     ierr = MatView(aij->B,viewer); CHKERRQ(ierr);
535     fflush(fd);
536     MPIU_Seq_end(mat->comm,1);
537   }
538   else if ((vobj->cookie == VIEWER_COOKIE && vobj->type == FILES_VIEWER) ||
539             vobj->cookie == DRAW_COOKIE) {
540     int numtids = aij->numtids, mytid = aij->mytid;
541     if (numtids == 1) {
542       ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
543     }
544     else {
545       /* assemble the entire matrix onto first processor. */
546       Mat     A;
547       Mat_AIJ *Aloc;
548       int     M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct;
549       Scalar  *a;
550 
551       if (!mytid) {
552         ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,0,0,0,&A);
553       }
554       else {
555         ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,0,0,0,&A);
556       }
557       CHKERRQ(ierr);
558 
559       /* copy over the A part */
560       Aloc = (Mat_AIJ*) aij->A->data;
561       m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
562       row = aij->rstart;
563       for ( i=0; i<ai[m]-1; i++ ) {aj[i] += aij->cstart - 1;}
564       for ( i=0; i<m; i++ ) {
565         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERTVALUES);
566         CHKERRQ(ierr);
567         row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
568       }
569       aj = Aloc->j;
570       for ( i=0; i<ai[m]-1; i++ ) {aj[i] -= aij->cstart - 1;}
571 
572       /* copy over the B part */
573       Aloc = (Mat_AIJ*) aij->B->data;
574       m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
575       row = aij->rstart;
576       ct = cols = (int *) PETSCMALLOC( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols);
577       for ( i=0; i<ai[m]-1; i++ ) {cols[i] = aij->garray[aj[i]-1];}
578       for ( i=0; i<m; i++ ) {
579         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERTVALUES);
580         CHKERRQ(ierr);
581         row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
582       }
583       PETSCFREE(ct);
584       ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
585       ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
586       if (!mytid) {
587         ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
588       }
589       ierr = MatDestroy(A); CHKERRQ(ierr);
590     }
591   }
592   return 0;
593 }
594 
595 extern int MatMarkDiag_AIJ(Mat_AIJ  *);
596 /*
597     This has to provide several versions.
598 
599      1) per sequential
600      2) a) use only local smoothing updating outer values only once.
601         b) local smoothing updating outer values each inner iteration
602      3) color updating out values betwen colors.
603 */
604 static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag,
605                            double shift,int its,Vec xx)
606 {
607   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
608   Mat        AA = mat->A, BB = mat->B;
609   Mat_AIJ    *A = (Mat_AIJ *) AA->data, *B = (Mat_AIJ *)BB->data;
610   Scalar     zero = 0.0,*b,*x,*xs,*ls,d,*v,sum,scale,*t,*ts;
611   int        ierr,*idx, *diag;
612   int        n = mat->n, m = mat->m, i;
613   Vec        tt;
614 
615   if (!mat->assembled) SETERRQ(1,"MatRelax_MPIAIJ: must assemble matrix first");
616 
617   VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls);
618   xs = x -1; /* shift by one for index start of 1 */
619   ls--;
620   if (!A->diag) {if ((ierr = MatMarkDiag_AIJ(A))) return ierr;}
621   diag = A->diag;
622   if (flag == SOR_APPLY_UPPER || flag == SOR_APPLY_LOWER) {
623     SETERRQ(1,"That option not yet support for parallel AIJ matrices");
624   }
625   if (flag & SOR_EISENSTAT) {
626     /* Let  A = L + U + D; where L is lower trianglar,
627     U is upper triangular, E is diagonal; This routine applies
628 
629             (L + E)^{-1} A (U + E)^{-1}
630 
631     to a vector efficiently using Eisenstat's trick. This is for
632     the case of SSOR preconditioner, so E is D/omega where omega
633     is the relaxation factor.
634     */
635     ierr = VecDuplicate(xx,&tt); CHKERRQ(ierr);
636     VecGetArray(tt,&t);
637     scale = (2.0/omega) - 1.0;
638     /*  x = (E + U)^{-1} b */
639     VecSet(&zero,mat->lvec);
640     ierr = VecPipelineBegin(xx,mat->lvec,INSERTVALUES,PIPELINEUP,
641                               mat->Mvctx); CHKERRQ(ierr);
642     for ( i=m-1; i>-1; i-- ) {
643       n    = A->i[i+1] - diag[i] - 1;
644       idx  = A->j + diag[i];
645       v    = A->a + diag[i];
646       sum  = b[i];
647       SPARSEDENSEMDOT(sum,xs,v,idx,n);
648       d    = shift + A->a[diag[i]-1];
649       n    = B->i[i+1] - B->i[i];
650       idx  = B->j + B->i[i] - 1;
651       v    = B->a + B->i[i] - 1;
652       SPARSEDENSEMDOT(sum,ls,v,idx,n);
653       x[i] = omega*(sum/d);
654     }
655     ierr = VecPipelineEnd(xx,mat->lvec,INSERTVALUES,PIPELINEUP,
656                             mat->Mvctx); CHKERRQ(ierr);
657 
658     /*  t = b - (2*E - D)x */
659     v = A->a;
660     for ( i=0; i<m; i++ ) { t[i] = b[i] - scale*(v[*diag++ - 1])*x[i]; }
661 
662     /*  t = (E + L)^{-1}t */
663     ts = t - 1; /* shifted by one for index start of a or mat->j*/
664     diag = A->diag;
665     VecSet(&zero,mat->lvec);
666     ierr = VecPipelineBegin(tt,mat->lvec,INSERTVALUES,PIPELINEDOWN,
667                                                  mat->Mvctx); CHKERRQ(ierr);
668     for ( i=0; i<m; i++ ) {
669       n    = diag[i] - A->i[i];
670       idx  = A->j + A->i[i] - 1;
671       v    = A->a + A->i[i] - 1;
672       sum  = t[i];
673       SPARSEDENSEMDOT(sum,ts,v,idx,n);
674       d    = shift + A->a[diag[i]-1];
675       n    = B->i[i+1] - B->i[i];
676       idx  = B->j + B->i[i] - 1;
677       v    = B->a + B->i[i] - 1;
678       SPARSEDENSEMDOT(sum,ls,v,idx,n);
679       t[i] = omega*(sum/d);
680     }
681     ierr = VecPipelineEnd(tt,mat->lvec,INSERTVALUES,PIPELINEDOWN,
682                                                     mat->Mvctx); CHKERRQ(ierr);
683     /*  x = x + t */
684     for ( i=0; i<m; i++ ) { x[i] += t[i]; }
685     VecDestroy(tt);
686     return 0;
687   }
688 
689 
690   if ((flag & SOR_SYMMETRIC_SWEEP) == SOR_SYMMETRIC_SWEEP){
691     if (flag & SOR_ZERO_INITIAL_GUESS) {
692       VecSet(&zero,mat->lvec); VecSet(&zero,xx);
693     }
694     else {
695       ierr=VecScatterBegin(xx,mat->lvec,INSERTVALUES,SCATTERUP,mat->Mvctx);
696       CHKERRQ(ierr);
697       ierr = VecScatterEnd(xx,mat->lvec,INSERTVALUES,SCATTERUP,mat->Mvctx);
698       CHKERRQ(ierr);
699     }
700     while (its--) {
701       /* go down through the rows */
702       ierr = VecPipelineBegin(xx,mat->lvec,INSERTVALUES,PIPELINEDOWN,
703                               mat->Mvctx); CHKERRQ(ierr);
704       for ( i=0; i<m; i++ ) {
705         n    = A->i[i+1] - A->i[i];
706         idx  = A->j + A->i[i] - 1;
707         v    = A->a + A->i[i] - 1;
708         sum  = b[i];
709         SPARSEDENSEMDOT(sum,xs,v,idx,n);
710         d    = shift + A->a[diag[i]-1];
711         n    = B->i[i+1] - B->i[i];
712         idx  = B->j + B->i[i] - 1;
713         v    = B->a + B->i[i] - 1;
714         SPARSEDENSEMDOT(sum,ls,v,idx,n);
715         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
716       }
717       ierr = VecPipelineEnd(xx,mat->lvec,INSERTVALUES,PIPELINEDOWN,
718                             mat->Mvctx); CHKERRQ(ierr);
719       /* come up through the rows */
720       ierr = VecPipelineBegin(xx,mat->lvec,INSERTVALUES,PIPELINEUP,
721                               mat->Mvctx); CHKERRQ(ierr);
722       for ( i=m-1; i>-1; i-- ) {
723         n    = A->i[i+1] - A->i[i];
724         idx  = A->j + A->i[i] - 1;
725         v    = A->a + A->i[i] - 1;
726         sum  = b[i];
727         SPARSEDENSEMDOT(sum,xs,v,idx,n);
728         d    = shift + A->a[diag[i]-1];
729         n    = B->i[i+1] - B->i[i];
730         idx  = B->j + B->i[i] - 1;
731         v    = B->a + B->i[i] - 1;
732         SPARSEDENSEMDOT(sum,ls,v,idx,n);
733         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
734       }
735       ierr = VecPipelineEnd(xx,mat->lvec,INSERTVALUES,PIPELINEUP,
736                             mat->Mvctx); CHKERRQ(ierr);
737     }
738   }
739   else if (flag & SOR_FORWARD_SWEEP){
740     if (flag & SOR_ZERO_INITIAL_GUESS) {
741       VecSet(&zero,mat->lvec);
742       ierr = VecPipelineBegin(xx,mat->lvec,INSERTVALUES,PIPELINEDOWN,
743                               mat->Mvctx); CHKERRQ(ierr);
744       for ( i=0; i<m; i++ ) {
745         n    = diag[i] - A->i[i];
746         idx  = A->j + A->i[i] - 1;
747         v    = A->a + A->i[i] - 1;
748         sum  = b[i];
749         SPARSEDENSEMDOT(sum,xs,v,idx,n);
750         d    = shift + A->a[diag[i]-1];
751         n    = B->i[i+1] - B->i[i];
752         idx  = B->j + B->i[i] - 1;
753         v    = B->a + B->i[i] - 1;
754         SPARSEDENSEMDOT(sum,ls,v,idx,n);
755         x[i] = omega*(sum/d);
756       }
757       ierr = VecPipelineEnd(xx,mat->lvec,INSERTVALUES,PIPELINEDOWN,
758                             mat->Mvctx); CHKERRQ(ierr);
759       its--;
760     }
761     while (its--) {
762       ierr=VecScatterBegin(xx,mat->lvec,INSERTVALUES,SCATTERUP,mat->Mvctx);
763       CHKERRQ(ierr);
764       ierr = VecScatterEnd(xx,mat->lvec,INSERTVALUES,SCATTERUP,mat->Mvctx);
765       CHKERRQ(ierr);
766       ierr = VecPipelineBegin(xx,mat->lvec,INSERTVALUES,PIPELINEDOWN,
767                               mat->Mvctx); CHKERRQ(ierr);
768       for ( i=0; i<m; i++ ) {
769         n    = A->i[i+1] - A->i[i];
770         idx  = A->j + A->i[i] - 1;
771         v    = A->a + A->i[i] - 1;
772         sum  = b[i];
773         SPARSEDENSEMDOT(sum,xs,v,idx,n);
774         d    = shift + A->a[diag[i]-1];
775         n    = B->i[i+1] - B->i[i];
776         idx  = B->j + B->i[i] - 1;
777         v    = B->a + B->i[i] - 1;
778         SPARSEDENSEMDOT(sum,ls,v,idx,n);
779         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
780       }
781       ierr = VecPipelineEnd(xx,mat->lvec,INSERTVALUES,PIPELINEDOWN,
782                             mat->Mvctx); CHKERRQ(ierr);
783     }
784   }
785   else if (flag & SOR_BACKWARD_SWEEP){
786     if (flag & SOR_ZERO_INITIAL_GUESS) {
787       VecSet(&zero,mat->lvec);
788       ierr = VecPipelineBegin(xx,mat->lvec,INSERTVALUES,PIPELINEUP,
789                               mat->Mvctx); CHKERRQ(ierr);
790       for ( i=m-1; i>-1; i-- ) {
791         n    = A->i[i+1] - diag[i] - 1;
792         idx  = A->j + diag[i];
793         v    = A->a + diag[i];
794         sum  = b[i];
795         SPARSEDENSEMDOT(sum,xs,v,idx,n);
796         d    = shift + A->a[diag[i]-1];
797         n    = B->i[i+1] - B->i[i];
798         idx  = B->j + B->i[i] - 1;
799         v    = B->a + B->i[i] - 1;
800         SPARSEDENSEMDOT(sum,ls,v,idx,n);
801         x[i] = omega*(sum/d);
802       }
803       ierr = VecPipelineEnd(xx,mat->lvec,INSERTVALUES,PIPELINEUP,
804                             mat->Mvctx); CHKERRQ(ierr);
805       its--;
806     }
807     while (its--) {
808       ierr = VecScatterBegin(xx,mat->lvec,INSERTVALUES,SCATTERDOWN,
809                             mat->Mvctx); CHKERRQ(ierr);
810       ierr = VecScatterEnd(xx,mat->lvec,INSERTVALUES,SCATTERDOWN,
811                             mat->Mvctx); CHKERRQ(ierr);
812       ierr = VecPipelineBegin(xx,mat->lvec,INSERTVALUES,PIPELINEUP,
813                               mat->Mvctx); CHKERRQ(ierr);
814       for ( i=m-1; i>-1; i-- ) {
815         n    = A->i[i+1] - A->i[i];
816         idx  = A->j + A->i[i] - 1;
817         v    = A->a + A->i[i] - 1;
818         sum  = b[i];
819         SPARSEDENSEMDOT(sum,xs,v,idx,n);
820         d    = shift + A->a[diag[i]-1];
821         n    = B->i[i+1] - B->i[i];
822         idx  = B->j + B->i[i] - 1;
823         v    = B->a + B->i[i] - 1;
824         SPARSEDENSEMDOT(sum,ls,v,idx,n);
825         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
826       }
827       ierr = VecPipelineEnd(xx,mat->lvec,INSERTVALUES,PIPELINEUP,
828                             mat->Mvctx); CHKERRQ(ierr);
829     }
830   }
831   else if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
832     if (flag & SOR_ZERO_INITIAL_GUESS) {
833       return MatRelax(mat->A,bb,omega,flag,shift,its,xx);
834     }
835     ierr=VecScatterBegin(xx,mat->lvec,INSERTVALUES,SCATTERALL,mat->Mvctx);
836     CHKERRQ(ierr);
837     ierr = VecScatterEnd(xx,mat->lvec,INSERTVALUES,SCATTERALL,mat->Mvctx);
838     CHKERRQ(ierr);
839     while (its--) {
840       /* go down through the rows */
841       for ( i=0; i<m; i++ ) {
842         n    = A->i[i+1] - A->i[i];
843         idx  = A->j + A->i[i] - 1;
844         v    = A->a + A->i[i] - 1;
845         sum  = b[i];
846         SPARSEDENSEMDOT(sum,xs,v,idx,n);
847         d    = shift + A->a[diag[i]-1];
848         n    = B->i[i+1] - B->i[i];
849         idx  = B->j + B->i[i] - 1;
850         v    = B->a + B->i[i] - 1;
851         SPARSEDENSEMDOT(sum,ls,v,idx,n);
852         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
853       }
854       /* come up through the rows */
855       for ( i=m-1; i>-1; i-- ) {
856         n    = A->i[i+1] - A->i[i];
857         idx  = A->j + A->i[i] - 1;
858         v    = A->a + A->i[i] - 1;
859         sum  = b[i];
860         SPARSEDENSEMDOT(sum,xs,v,idx,n);
861         d    = shift + A->a[diag[i]-1];
862         n    = B->i[i+1] - B->i[i];
863         idx  = B->j + B->i[i] - 1;
864         v    = B->a + B->i[i] - 1;
865         SPARSEDENSEMDOT(sum,ls,v,idx,n);
866         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
867       }
868     }
869   }
870   else if (flag & SOR_LOCAL_FORWARD_SWEEP){
871     if (flag & SOR_ZERO_INITIAL_GUESS) {
872       return MatRelax(mat->A,bb,omega,flag,shift,its,xx);
873     }
874     ierr=VecScatterBegin(xx,mat->lvec,INSERTVALUES,SCATTERALL,mat->Mvctx);
875     CHKERRQ(ierr);
876     ierr = VecScatterEnd(xx,mat->lvec,INSERTVALUES,SCATTERALL,mat->Mvctx);
877     CHKERRQ(ierr);
878     while (its--) {
879       for ( i=0; i<m; i++ ) {
880         n    = A->i[i+1] - A->i[i];
881         idx  = A->j + A->i[i] - 1;
882         v    = A->a + A->i[i] - 1;
883         sum  = b[i];
884         SPARSEDENSEMDOT(sum,xs,v,idx,n);
885         d    = shift + A->a[diag[i]-1];
886         n    = B->i[i+1] - B->i[i];
887         idx  = B->j + B->i[i] - 1;
888         v    = B->a + B->i[i] - 1;
889         SPARSEDENSEMDOT(sum,ls,v,idx,n);
890         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
891       }
892     }
893   }
894   else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
895     if (flag & SOR_ZERO_INITIAL_GUESS) {
896       return MatRelax(mat->A,bb,omega,flag,shift,its,xx);
897     }
898     ierr = VecScatterBegin(xx,mat->lvec,INSERTVALUES,SCATTERALL,
899                             mat->Mvctx); CHKERRQ(ierr);
900     ierr = VecScatterEnd(xx,mat->lvec,INSERTVALUES,SCATTERALL,
901                             mat->Mvctx); CHKERRQ(ierr);
902     while (its--) {
903       for ( i=m-1; i>-1; i-- ) {
904         n    = A->i[i+1] - A->i[i];
905         idx  = A->j + A->i[i] - 1;
906         v    = A->a + A->i[i] - 1;
907         sum  = b[i];
908         SPARSEDENSEMDOT(sum,xs,v,idx,n);
909         d    = shift + A->a[diag[i]-1];
910         n    = B->i[i+1] - B->i[i];
911         idx  = B->j + B->i[i] - 1;
912         v    = B->a + B->i[i] - 1;
913         SPARSEDENSEMDOT(sum,ls,v,idx,n);
914         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
915       }
916     }
917   }
918   return 0;
919 }
920 
921 static int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,int *nz,
922                              int *nzalloc,int *mem)
923 {
924   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
925   Mat        A = mat->A, B = mat->B;
926   int        ierr, isend[3], irecv[3], nzA, nzallocA, memA;
927 
928   ierr = MatGetInfo(A,MAT_LOCAL,&nzA,&nzallocA,&memA); CHKERRQ(ierr);
929   ierr = MatGetInfo(B,MAT_LOCAL,&isend[0],&isend[1],&isend[2]); CHKERRQ(ierr);
930   isend[0] += nzA; isend[1] += nzallocA; isend[2] += memA;
931   if (flag == MAT_LOCAL) {
932     *nz = isend[0]; *nzalloc = isend[1]; *mem = isend[2];
933   } else if (flag == MAT_GLOBAL_MAX) {
934     MPI_Allreduce((void *) isend,(void *) irecv,3,MPI_INT,MPI_MAX,matin->comm);
935     *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2];
936   } else if (flag == MAT_GLOBAL_SUM) {
937     MPI_Allreduce((void *) isend,(void *) irecv,3,MPI_INT,MPI_SUM,matin->comm);
938     *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2];
939   }
940   return 0;
941 }
942 
943 extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*);
944 extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*);
945 extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double);
946 extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *);
947 extern int MatSolve_MPIAIJ(Mat,Vec,Vec);
948 extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec);
949 extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec);
950 extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec);
951 
952 static int MatSetOption_MPIAIJ(Mat aijin,MatOption op)
953 {
954   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data;
955 
956   if      (op == NO_NEW_NONZERO_LOCATIONS)  {
957     MatSetOption(aij->A,op);
958     MatSetOption(aij->B,op);
959   }
960   else if (op == YES_NEW_NONZERO_LOCATIONS) {
961     MatSetOption(aij->A,op);
962     MatSetOption(aij->B,op);
963   }
964   else if (op == COLUMN_ORIENTED) SETERRQ(1,"Column oriented not supported");
965   return 0;
966 }
967 
968 static int MatGetSize_MPIAIJ(Mat matin,int *m,int *n)
969 {
970   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
971   *m = mat->M; *n = mat->N;
972   return 0;
973 }
974 
975 static int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n)
976 {
977   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
978   *m = mat->m; *n = mat->N;
979   return 0;
980 }
981 
982 static int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n)
983 {
984   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
985   *m = mat->rstart; *n = mat->rend;
986   return 0;
987 }
988 
989 static int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
990 {
991   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
992   Scalar     *vworkA, *vworkB, **pvA, **pvB;
993   int        i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart;
994   int        nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend;
995 
996   if (row < rstart || row >= rend)
997     SETERRQ(1,"MatGetRow_MPIAIJ: Currently you can get only local rows.")
998   lrow = row - rstart;
999 
1000   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1001   if (!v)   {pvA = 0; pvB = 0;}
1002   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1003   ierr = MatGetRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1004   ierr = MatGetRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1005   nztot = nzA + nzB;
1006 
1007   if (v  || idx) {
1008     if (nztot) {
1009       /* Sort by increasing column numbers, assuming A and B already sorted */
1010       int imark;
1011       if (mat->assembled) {
1012         for (i=0; i<nzB; i++) cworkB[i] = mat->garray[cworkB[i]];
1013       }
1014       if (v) {
1015         *v = (Scalar *) PETSCMALLOC( (nztot)*sizeof(Scalar) ); CHKPTRQ(*v);
1016         for ( i=0; i<nzB; i++ ) {
1017           if (cworkB[i] < cstart)   (*v)[i] = vworkB[i];
1018           else break;
1019         }
1020         imark = i;
1021         for ( i=0; i<nzA; i++ )     (*v)[imark+i] = vworkA[i];
1022         for ( i=imark; i<nzB; i++ ) (*v)[nzA+i] = vworkB[i];
1023       }
1024       if (idx) {
1025         *idx = (int *) PETSCMALLOC( (nztot)*sizeof(int) ); CHKPTRQ(*idx);
1026         for (i=0; i<nzA; i++) cworkA[i] += cstart;
1027         for ( i=0; i<nzB; i++ ) {
1028           if (cworkB[i] < cstart)   (*idx)[i] = cworkB[i];
1029           else break;
1030         }
1031         imark = i;
1032         for ( i=0; i<nzA; i++ )     (*idx)[imark+i] = cworkA[i];
1033         for ( i=imark; i<nzB; i++ ) (*idx)[nzA+i] = cworkB[i];
1034       }
1035     }
1036     else {*idx = 0; *v=0;}
1037   }
1038   *nz = nztot;
1039   ierr = MatRestoreRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1040   ierr = MatRestoreRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1041   return 0;
1042 }
1043 
1044 static int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1045 {
1046   if (idx) PETSCFREE(*idx);
1047   if (v) PETSCFREE(*v);
1048   return 0;
1049 }
1050 
1051 static int MatTranspose_MPIAIJ(Mat A,Mat *Bin)
1052 {
1053   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1054   int        ierr;
1055   Mat        B;
1056   Mat_AIJ    *Aloc;
1057   int        M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct;
1058   Scalar     *array;
1059 
1060   ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,0,0,0,&B);
1061   CHKERRQ(ierr);
1062 
1063   /* copy over the A part */
1064   Aloc = (Mat_AIJ*) a->A->data;
1065   m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1066   row = a->rstart;
1067   for ( i=0; i<ai[m]-1; i++ ) {aj[i] += a->cstart - 1;}
1068   for ( i=0; i<m; i++ ) {
1069       ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERTVALUES);
1070       CHKERRQ(ierr);
1071       row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1072   }
1073   aj = Aloc->j;
1074   for ( i=0; i<ai[m]-1; i++ ) {aj[i] -= a->cstart - 1;}
1075 
1076   /* copy over the B part */
1077   Aloc = (Mat_AIJ*) a->B->data;
1078   m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1079   row = a->rstart;
1080   ct = cols = (int *) PETSCMALLOC( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols);
1081   for ( i=0; i<ai[m]-1; i++ ) {cols[i] = a->garray[aj[i]-1];}
1082   for ( i=0; i<m; i++ ) {
1083     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERTVALUES);
1084     CHKERRQ(ierr);
1085     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1086   }
1087   PETSCFREE(ct);
1088   ierr = MatAssemblyBegin(B,FINAL_ASSEMBLY); CHKERRQ(ierr);
1089   ierr = MatAssemblyEnd(B,FINAL_ASSEMBLY); CHKERRQ(ierr);
1090   *Bin = B;
1091   return 0;
1092 }
1093 
1094 extern int MatConvert_MPIAIJ(Mat,MatType,Mat *);
1095 static int MatCopyPrivate_MPIAIJ(Mat,Mat *);
1096 
1097 /* -------------------------------------------------------------------*/
1098 static struct _MatOps MatOps = {MatSetValues_MPIAIJ,
1099        MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ,
1100        MatMult_MPIAIJ,MatMultAdd_MPIAIJ,
1101        MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ,
1102        MatSolve_MPIAIJ,MatSolveAdd_MPIAIJ,
1103        MatSolveTrans_MPIAIJ,MatSolveTransAdd_MPIAIJ,
1104        MatLUFactor_MPIAIJ,0,
1105        MatRelax_MPIAIJ,
1106        MatTranspose_MPIAIJ,
1107        MatGetInfo_MPIAIJ,0,
1108        MatGetDiagonal_MPIAIJ,0,0,
1109        MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ,
1110        0,
1111        MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ,
1112        MatGetReordering_MPIAIJ,
1113        MatLUFactorSymbolic_MPIAIJ,MatLUFactorNumeric_MPIAIJ,0,0,
1114        MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ,
1115        MatILUFactorSymbolic_MPIAIJ,0,
1116        0,0,MatConvert_MPIAIJ,0,0,MatCopyPrivate_MPIAIJ};
1117 
1118 /*@
1119    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
1120    (the default parallel PETSc format).
1121 
1122    Input Parameters:
1123 .  comm - MPI communicator
1124 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1125 .  n - number of local columns (or PETSC_DECIDE to have calculated
1126            if N is given)
1127 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1128 .  N - number of global columns (or PETSC_DECIDE to have calculated
1129            if n is given)
1130 .  d_nz - number of nonzeros per row in diagonal portion of local submatrix
1131            (same for all local rows)
1132 .  d_nzz - number of nonzeros per row in diagonal portion of local submatrix
1133            or null (possibly different for each row).  You must leave room
1134            for the diagonal entry even if it is zero.
1135 .  o_nz - number of nonzeros per row in off-diagonal portion of local
1136            submatrix (same for all local rows).
1137 .  o_nzz - number of nonzeros per row in off-diagonal portion of local
1138            submatrix or null (possibly different for each row).
1139 
1140    Output Parameter:
1141 .  newmat - the matrix
1142 
1143    Notes:
1144    The AIJ format (also called the Yale sparse matrix format or
1145    compressed row storage), is fully compatible with standard Fortran 77
1146    storage.  That is, the stored row and column indices begin at
1147    one, not zero.  See the users manual for further details.
1148 
1149    The user MUST specify either the local or global matrix dimensions
1150    (possibly both).
1151 
1152    Storage Information:
1153    For a square global matrix we define each processor's diagonal portion
1154    to be its local rows and the corresponding columns (a square submatrix);
1155    each processor's off-diagonal portion encompasses the remainder of the
1156    local matrix (a rectangular submatrix).
1157 
1158    The user can specify preallocated storage for the diagonal part of
1159    the local submatrix with either d_nz or d_nnz (not both). Set both
1160    d_nz and d_nnz to zero for PETSc to control dynamic memory allocation.
1161    Likewise, specify preallocated storage for the off-diagonal part of
1162    the local submatrix with o_nz or o_nnz (not both).
1163 
1164 .keywords: matrix, aij, compressed row, sparse, parallel
1165 
1166 .seealso: MatCreate(), MatCreateSequentialAIJ(), MatSetValues()
1167 @*/
1168 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,
1169                  int d_nz,int *d_nnz, int o_nz,int *o_nnz,Mat *newmat)
1170 {
1171   Mat          mat;
1172   Mat_MPIAIJ   *aij;
1173   int          ierr, i,sum[2],work[2];
1174   *newmat         = 0;
1175   PETSCHEADERCREATE(mat,_Mat,MAT_COOKIE,MATMPIAIJ,comm);
1176   PLogObjectCreate(mat);
1177   mat->data       = (void *) (aij = PETSCNEW(Mat_MPIAIJ)); CHKPTRQ(aij);
1178   mat->ops        = &MatOps;
1179   mat->destroy    = MatDestroy_MPIAIJ;
1180   mat->view       = MatView_MPIAIJ;
1181   mat->factor     = 0;
1182 
1183   aij->insertmode = NOTSETVALUES;
1184   MPI_Comm_rank(comm,&aij->mytid);
1185   MPI_Comm_size(comm,&aij->numtids);
1186 
1187   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
1188     work[0] = m; work[1] = n;
1189     MPI_Allreduce((void *) work,(void *) sum,2,MPI_INT,MPI_SUM,comm );
1190     if (M == PETSC_DECIDE) M = sum[0];
1191     if (N == PETSC_DECIDE) N = sum[1];
1192   }
1193   if (m == PETSC_DECIDE)
1194     {m = M/aij->numtids + ((M % aij->numtids) > aij->mytid);}
1195   if (n == PETSC_DECIDE)
1196     {n = N/aij->numtids + ((N % aij->numtids) > aij->mytid);}
1197   aij->m = m;
1198   aij->n = n;
1199   aij->N = N;
1200   aij->M = M;
1201 
1202   /* build local table of row and column ownerships */
1203   aij->rowners = (int *) PETSCMALLOC(2*(aij->numtids+2)*sizeof(int));
1204   CHKPTRQ(aij->rowners);
1205   aij->cowners = aij->rowners + aij->numtids + 1;
1206   MPI_Allgather(&m,1,MPI_INT,aij->rowners+1,1,MPI_INT,comm);
1207   aij->rowners[0] = 0;
1208   for ( i=2; i<=aij->numtids; i++ ) {
1209     aij->rowners[i] += aij->rowners[i-1];
1210   }
1211   aij->rstart = aij->rowners[aij->mytid];
1212   aij->rend   = aij->rowners[aij->mytid+1];
1213   MPI_Allgather(&n,1,MPI_INT,aij->cowners+1,1,MPI_INT,comm);
1214   aij->cowners[0] = 0;
1215   for ( i=2; i<=aij->numtids; i++ ) {
1216     aij->cowners[i] += aij->cowners[i-1];
1217   }
1218   aij->cstart = aij->cowners[aij->mytid];
1219   aij->cend   = aij->cowners[aij->mytid+1];
1220 
1221 
1222   ierr = MatCreateSequentialAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&aij->A);
1223   CHKERRQ(ierr);
1224   PLogObjectParent(mat,aij->A);
1225   ierr = MatCreateSequentialAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&aij->B);
1226   CHKERRQ(ierr);
1227   PLogObjectParent(mat,aij->B);
1228 
1229   /* build cache for off array entries formed */
1230   ierr = StashBuild_Private(&aij->stash); CHKERRQ(ierr);
1231   aij->colmap    = 0;
1232   aij->garray    = 0;
1233 
1234   /* stuff used for matrix vector multiply */
1235   aij->lvec      = 0;
1236   aij->Mvctx     = 0;
1237   aij->assembled = 0;
1238 
1239   *newmat = mat;
1240   return 0;
1241 }
1242 
1243 static int MatCopyPrivate_MPIAIJ(Mat matin,Mat *newmat)
1244 {
1245   Mat        mat;
1246   Mat_MPIAIJ *aij,*oldmat = (Mat_MPIAIJ *) matin->data;
1247   int        ierr, len;
1248   *newmat      = 0;
1249 
1250   if (!oldmat->assembled) SETERRQ(1,"Cannot copy unassembled matrix");
1251   PETSCHEADERCREATE(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm);
1252   PLogObjectCreate(mat);
1253   mat->data       = (void *) (aij = PETSCNEW(Mat_MPIAIJ)); CHKPTRQ(aij);
1254   mat->ops        = &MatOps;
1255   mat->destroy    = MatDestroy_MPIAIJ;
1256   mat->view       = MatView_MPIAIJ;
1257   mat->factor     = matin->factor;
1258 
1259   aij->m          = oldmat->m;
1260   aij->n          = oldmat->n;
1261   aij->M          = oldmat->M;
1262   aij->N          = oldmat->N;
1263 
1264   aij->assembled  = 1;
1265   aij->rstart     = oldmat->rstart;
1266   aij->rend       = oldmat->rend;
1267   aij->cstart     = oldmat->cstart;
1268   aij->cend       = oldmat->cend;
1269   aij->numtids    = oldmat->numtids;
1270   aij->mytid      = oldmat->mytid;
1271   aij->insertmode = NOTSETVALUES;
1272 
1273   aij->rowners        = (int *) PETSCMALLOC( (aij->numtids+1)*sizeof(int) );
1274   CHKPTRQ(aij->rowners);
1275   PETSCMEMCPY(aij->rowners,oldmat->rowners,(aij->numtids+1)*sizeof(int));
1276   ierr = StashInitialize_Private(&aij->stash); CHKERRQ(ierr);
1277   if (oldmat->colmap) {
1278     aij->colmap      = (int *) PETSCMALLOC( (aij->N)*sizeof(int) );
1279     CHKPTRQ(aij->colmap);
1280     PETSCMEMCPY(aij->colmap,oldmat->colmap,(aij->N)*sizeof(int));
1281   } else aij->colmap = 0;
1282   if (oldmat->garray && (len = ((Mat_AIJ *) (oldmat->B->data))->n)) {
1283     aij->garray      = (int *) PETSCMALLOC(len*sizeof(int) ); CHKPTRQ(aij->garray);
1284     PETSCMEMCPY(aij->garray,oldmat->garray,len*sizeof(int));
1285   } else aij->garray = 0;
1286 
1287   ierr =  VecDuplicate(oldmat->lvec,&aij->lvec); CHKERRQ(ierr);
1288   PLogObjectParent(mat,aij->lvec);
1289   ierr =  VecScatterCtxCopy(oldmat->Mvctx,&aij->Mvctx); CHKERRQ(ierr);
1290   PLogObjectParent(mat,aij->Mvctx);
1291   ierr =  MatConvert(oldmat->A,MATSAME,&aij->A); CHKERRQ(ierr);
1292   PLogObjectParent(mat,aij->A);
1293   ierr =  MatConvert(oldmat->B,MATSAME,&aij->B); CHKERRQ(ierr);
1294   PLogObjectParent(mat,aij->B);
1295   *newmat = mat;
1296   return 0;
1297 }
1298