xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision d9ff3d1bd4a4091bb027f3d8aef772bd6a8ece6e)
1 #ifndef lint
2 static char vcid[] = "$Id: mpiaij.c,v 1.59 1995/07/13 15:15:39 curfman Exp curfman $";
3 #endif
4 
5 #include "mpiaij.h"
6 #include "vec/vecimpl.h"
7 #include "inline/spops.h"
8 
9 /* local utility routine that creates a mapping from the global column
10 number to the local number in the off-diagonal part of the local
11 storage of the matrix.  This is done in a non scable way since the
12 length of colmap equals the global matrix length.
13 */
14 static int CreateColmap_Private(Mat mat)
15 {
16   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
17   Mat_AIJ    *B = (Mat_AIJ*) aij->B->data;
18   int        n = B->n,i;
19   aij->colmap = (int *) PETSCMALLOC(aij->N*sizeof(int));CHKPTRQ(aij->colmap);
20   PETSCMEMSET(aij->colmap,0,aij->N*sizeof(int));
21   for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1;
22   return 0;
23 }
24 
25 extern int DisAssemble_MPIAIJ(Mat);
26 
27 static int MatGetReordering_MPIAIJ(Mat mat,MatOrdering type,IS *rperm,IS *cperm)
28 {
29   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
30   int ierr;
31   if (aij->numtids == 1) {
32     ierr = MatGetReordering(aij->A,type,rperm,cperm); CHKERRQ(ierr);
33   } else
34     SETERRQ(1,"MatGetReordering_MPIAIJ:  not yet supported in parallel.");
35   return 0;
36 }
37 
38 static int MatSetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,
39                             int *idxn,Scalar *v,InsertMode addv)
40 {
41   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
42   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
43   int        cstart = aij->cstart, cend = aij->cend,row,col;
44 
45   if (aij->insertmode != NOTSETVALUES && aij->insertmode != addv) {
46     SETERRQ(1,"You cannot mix inserts and adds");
47   }
48   aij->insertmode = addv;
49   for ( i=0; i<m; i++ ) {
50     if (idxm[i] < 0) SETERRQ(1,"Negative row index");
51     if (idxm[i] >= aij->M) SETERRQ(1,"Row index too large");
52     if (idxm[i] >= rstart && idxm[i] < rend) {
53       row = idxm[i] - rstart;
54       for ( j=0; j<n; j++ ) {
55         if (idxn[j] < 0) SETERRQ(1,"Negative column index");
56         if (idxn[j] >= aij->N) SETERRQ(1,"Column index too large");
57         if (idxn[j] >= cstart && idxn[j] < cend){
58           col = idxn[j] - cstart;
59           ierr = MatSetValues(aij->A,1,&row,1,&col,v+i*n+j,addv);CHKERRQ(ierr);
60         }
61         else {
62           if (aij->assembled) {
63             if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);}
64             col = aij->colmap[idxn[j]] - 1;
65             if (col < 0 && !((Mat_AIJ*)(aij->A->data))->nonew) {
66               ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
67               col =  idxn[j];
68             }
69           }
70           else col = idxn[j];
71           ierr = MatSetValues(aij->B,1,&row,1,&col,v+i*n+j,addv);CHKERRQ(ierr);
72         }
73       }
74     }
75     else {
76       ierr = StashValues_Private(&aij->stash,idxm[i],n,idxn,v+i*n,addv);
77       CHKERRQ(ierr);
78     }
79   }
80   return 0;
81 }
82 
83 /*
84     the assembly code is a lot like the code for vectors, we should
85     sometime derive a single assembly code that can be used for
86     either case.
87 */
88 
89 static int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
90 {
91   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
92   MPI_Comm    comm = mat->comm;
93   int         numtids = aij->numtids, *owners = aij->rowners;
94   int         mytid = aij->mytid;
95   MPI_Request *send_waits,*recv_waits;
96   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
97   int         tag = mat->tag, *owner,*starts,count,ierr;
98   InsertMode  addv;
99   Scalar      *rvalues,*svalues;
100 
101   /* make sure all processors are either in INSERTMODE or ADDMODE */
102   MPI_Allreduce((void *) &aij->insertmode,(void *) &addv,1,MPI_INT,
103                 MPI_BOR,comm);
104   if (addv == (ADDVALUES|INSERTVALUES)) {
105     SETERRQ(1,"Some processors have inserted while others have added");
106   }
107   aij->insertmode = addv; /* in case this processor had no cache */
108 
109   /*  first count number of contributors to each processor */
110   nprocs = (int *) PETSCMALLOC( 2*numtids*sizeof(int) ); CHKPTRQ(nprocs);
111   PETSCMEMSET(nprocs,0,2*numtids*sizeof(int)); procs = nprocs + numtids;
112   owner = (int *) PETSCMALLOC( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
113   for ( i=0; i<aij->stash.n; i++ ) {
114     idx = aij->stash.idx[i];
115     for ( j=0; j<numtids; j++ ) {
116       if (idx >= owners[j] && idx < owners[j+1]) {
117         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
118       }
119     }
120   }
121   nsends = 0;  for ( i=0; i<numtids; i++ ) { nsends += procs[i];}
122 
123   /* inform other processors of number of messages and max length*/
124   work = (int *) PETSCMALLOC( numtids*sizeof(int) ); CHKPTRQ(work);
125   MPI_Allreduce((void *) procs,(void *) work,numtids,MPI_INT,MPI_SUM,comm);
126   nreceives = work[mytid];
127   MPI_Allreduce((void *) nprocs,(void *) work,numtids,MPI_INT,MPI_MAX,comm);
128   nmax = work[mytid];
129   PETSCFREE(work);
130 
131   /* post receives:
132        1) each message will consist of ordered pairs
133      (global index,value) we store the global index as a double
134      to simplify the message passing.
135        2) since we don't know how long each individual message is we
136      allocate the largest needed buffer for each receive. Potentially
137      this is a lot of wasted space.
138 
139 
140        This could be done better.
141   */
142   rvalues = (Scalar *) PETSCMALLOC(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));
143   CHKPTRQ(rvalues);
144   recv_waits = (MPI_Request *) PETSCMALLOC((nreceives+1)*sizeof(MPI_Request));
145   CHKPTRQ(recv_waits);
146   for ( i=0; i<nreceives; i++ ) {
147     MPI_Irecv((void *)(rvalues+3*nmax*i),3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
148               comm,recv_waits+i);
149   }
150 
151   /* do sends:
152       1) starts[i] gives the starting index in svalues for stuff going to
153          the ith processor
154   */
155   svalues = (Scalar *) PETSCMALLOC( 3*(aij->stash.n+1)*sizeof(Scalar) );
156   CHKPTRQ(svalues);
157   send_waits = (MPI_Request *) PETSCMALLOC( (nsends+1)*sizeof(MPI_Request));
158   CHKPTRQ(send_waits);
159   starts = (int *) PETSCMALLOC( numtids*sizeof(int) ); CHKPTRQ(starts);
160   starts[0] = 0;
161   for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
162   for ( i=0; i<aij->stash.n; i++ ) {
163     svalues[3*starts[owner[i]]]       = (Scalar)  aij->stash.idx[i];
164     svalues[3*starts[owner[i]]+1]     = (Scalar)  aij->stash.idy[i];
165     svalues[3*(starts[owner[i]]++)+2] =  aij->stash.array[i];
166   }
167   PETSCFREE(owner);
168   starts[0] = 0;
169   for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
170   count = 0;
171   for ( i=0; i<numtids; i++ ) {
172     if (procs[i]) {
173       MPI_Isend((void*)(svalues+3*starts[i]),3*nprocs[i],MPIU_SCALAR,i,tag,
174                 comm,send_waits+count++);
175     }
176   }
177   PETSCFREE(starts); PETSCFREE(nprocs);
178 
179   /* Free cache space */
180   ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr);
181 
182   aij->svalues    = svalues;    aij->rvalues = rvalues;
183   aij->nsends     = nsends;     aij->nrecvs = nreceives;
184   aij->send_waits = send_waits; aij->recv_waits = recv_waits;
185   aij->rmax       = nmax;
186 
187   return 0;
188 }
189 extern int MatSetUpMultiply_MPIAIJ(Mat);
190 
191 static int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
192 {
193   int        ierr;
194   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
195   MPI_Status  *send_status,recv_status;
196   int         imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n;
197   int         row,col,other_disassembled;
198   Scalar      *values,val;
199   InsertMode  addv = aij->insertmode;
200 
201   /*  wait on receives */
202   while (count) {
203     MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status);
204     /* unpack receives into our local space */
205     values = aij->rvalues + 3*imdex*aij->rmax;
206     MPI_Get_count(&recv_status,MPIU_SCALAR,&n);
207     n = n/3;
208     for ( i=0; i<n; i++ ) {
209       row = (int) PETSCREAL(values[3*i]) - aij->rstart;
210       col = (int) PETSCREAL(values[3*i+1]);
211       val = values[3*i+2];
212       if (col >= aij->cstart && col < aij->cend) {
213           col -= aij->cstart;
214         MatSetValues(aij->A,1,&row,1,&col,&val,addv);
215       }
216       else {
217         if (aij->assembled) {
218           if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);}
219           col = aij->colmap[col] - 1;
220           if (col < 0  && !((Mat_AIJ*)(aij->A->data))->nonew) {
221             ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
222             col = (int) PETSCREAL(values[3*i+1]);
223           }
224         }
225         MatSetValues(aij->B,1,&row,1,&col,&val,addv);
226       }
227     }
228     count--;
229   }
230   PETSCFREE(aij->recv_waits); PETSCFREE(aij->rvalues);
231 
232   /* wait on sends */
233   if (aij->nsends) {
234     send_status = (MPI_Status *) PETSCMALLOC( aij->nsends*sizeof(MPI_Status) );
235     CHKPTRQ(send_status);
236     MPI_Waitall(aij->nsends,aij->send_waits,send_status);
237     PETSCFREE(send_status);
238   }
239   PETSCFREE(aij->send_waits); PETSCFREE(aij->svalues);
240 
241   aij->insertmode = NOTSETVALUES;
242   ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr);
243   ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr);
244 
245   /* determine if any processor has disassembled, if so we must
246      also disassemble ourselfs, in order that we may reassemble. */
247   MPI_Allreduce((void *) &aij->assembled,(void *) &other_disassembled,1,
248                  MPI_INT,MPI_PROD,mat->comm);
249   if (aij->assembled && !other_disassembled) {
250     ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
251   }
252 
253   if (!aij->assembled && mode == FINAL_ASSEMBLY) {
254     ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr);
255     aij->assembled = 1;
256   }
257   ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr);
258   ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr);
259 
260   return 0;
261 }
262 
263 static int MatZeroEntries_MPIAIJ(Mat A)
264 {
265   Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data;
266   int ierr;
267   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
268   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
269   return 0;
270 }
271 
272 /* again this uses the same basic stratagy as in the assembly and
273    scatter create routines, we should try to do it systemamatically
274    if we can figure out the proper level of generality. */
275 
276 /* the code does not do the diagonal entries correctly unless the
277    matrix is square and the column and row owerships are identical.
278    This is a BUG. The only way to fix it seems to be to access
279    aij->A and aij->B directly and not through the MatZeroRows()
280    routine.
281 */
282 static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag)
283 {
284   Mat_MPIAIJ     *l = (Mat_MPIAIJ *) A->data;
285   int            i,ierr,N, *rows,*owners = l->rowners,numtids = l->numtids;
286   int            *procs,*nprocs,j,found,idx,nsends,*work;
287   int            nmax,*svalues,*starts,*owner,nrecvs,mytid = l->mytid;
288   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
289   int            *lens,imdex,*lrows,*values;
290   MPI_Comm       comm = A->comm;
291   MPI_Request    *send_waits,*recv_waits;
292   MPI_Status     recv_status,*send_status;
293   IS             istmp;
294 
295   if (!l->assembled)
296     SETERRQ(1,"MatZeroRows_MPIAIJ: Must assemble matrix first");
297   ierr = ISGetLocalSize(is,&N); CHKERRQ(ierr);
298   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
299 
300   /*  first count number of contributors to each processor */
301   nprocs = (int *) PETSCMALLOC( 2*numtids*sizeof(int) ); CHKPTRQ(nprocs);
302   PETSCMEMSET(nprocs,0,2*numtids*sizeof(int)); procs = nprocs + numtids;
303   owner = (int *) PETSCMALLOC((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
304   for ( i=0; i<N; i++ ) {
305     idx = rows[i];
306     found = 0;
307     for ( j=0; j<numtids; j++ ) {
308       if (idx >= owners[j] && idx < owners[j+1]) {
309         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
310       }
311     }
312     if (!found) SETERRQ(1,"Index out of range.");
313   }
314   nsends = 0;  for ( i=0; i<numtids; i++ ) { nsends += procs[i];}
315 
316   /* inform other processors of number of messages and max length*/
317   work = (int *) PETSCMALLOC( numtids*sizeof(int) ); CHKPTRQ(work);
318   MPI_Allreduce((void *) procs,(void *) work,numtids,MPI_INT,MPI_SUM,comm);
319   nrecvs = work[mytid];
320   MPI_Allreduce((void *) nprocs,(void *) work,numtids,MPI_INT,MPI_MAX,comm);
321   nmax = work[mytid];
322   PETSCFREE(work);
323 
324   /* post receives:   */
325   rvalues = (int *) PETSCMALLOC((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */
326   CHKPTRQ(rvalues);
327   recv_waits = (MPI_Request *) PETSCMALLOC((nrecvs+1)*sizeof(MPI_Request));
328   CHKPTRQ(recv_waits);
329   for ( i=0; i<nrecvs; i++ ) {
330     MPI_Irecv((void *)(rvalues+nmax*i),nmax,MPI_INT,MPI_ANY_SOURCE,tag,
331               comm,recv_waits+i);
332   }
333 
334   /* do sends:
335       1) starts[i] gives the starting index in svalues for stuff going to
336          the ith processor
337   */
338   svalues = (int *) PETSCMALLOC( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
339   send_waits = (MPI_Request *) PETSCMALLOC( (nsends+1)*sizeof(MPI_Request));
340   CHKPTRQ(send_waits);
341   starts = (int *) PETSCMALLOC( (numtids+1)*sizeof(int) ); CHKPTRQ(starts);
342   starts[0] = 0;
343   for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
344   for ( i=0; i<N; i++ ) {
345     svalues[starts[owner[i]]++] = rows[i];
346   }
347   ISRestoreIndices(is,&rows);
348 
349   starts[0] = 0;
350   for ( i=1; i<numtids+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
351   count = 0;
352   for ( i=0; i<numtids; i++ ) {
353     if (procs[i]) {
354       MPI_Isend((void*)(svalues+starts[i]),nprocs[i],MPI_INT,i,tag,
355                 comm,send_waits+count++);
356     }
357   }
358   PETSCFREE(starts);
359 
360   base = owners[mytid];
361 
362   /*  wait on receives */
363   lens = (int *) PETSCMALLOC( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
364   source = lens + nrecvs;
365   count = nrecvs; slen = 0;
366   while (count) {
367     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
368     /* unpack receives into our local space */
369     MPI_Get_count(&recv_status,MPI_INT,&n);
370     source[imdex]  = recv_status.MPI_SOURCE;
371     lens[imdex]  = n;
372     slen += n;
373     count--;
374   }
375   PETSCFREE(recv_waits);
376 
377   /* move the data into the send scatter */
378   lrows = (int *) PETSCMALLOC( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
379   count = 0;
380   for ( i=0; i<nrecvs; i++ ) {
381     values = rvalues + i*nmax;
382     for ( j=0; j<lens[i]; j++ ) {
383       lrows[count++] = values[j] - base;
384     }
385   }
386   PETSCFREE(rvalues); PETSCFREE(lens);
387   PETSCFREE(owner); PETSCFREE(nprocs);
388 
389   /* actually zap the local rows */
390   ierr = ISCreateSequential(MPI_COMM_SELF,slen,lrows,&istmp);
391   CHKERRQ(ierr);  PETSCFREE(lrows);
392   ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
393   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
394   ierr = ISDestroy(istmp); CHKERRQ(ierr);
395 
396   /* wait on sends */
397   if (nsends) {
398     send_status = (MPI_Status *) PETSCMALLOC( nsends*sizeof(MPI_Status) );
399     CHKPTRQ(send_status);
400     MPI_Waitall(nsends,send_waits,send_status);
401     PETSCFREE(send_status);
402   }
403   PETSCFREE(send_waits); PETSCFREE(svalues);
404 
405 
406   return 0;
407 }
408 
409 static int MatMult_MPIAIJ(Mat aijin,Vec xx,Vec yy)
410 {
411   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data;
412   int        ierr;
413   if (!aij->assembled) SETERRQ(1,"MatMult_MPIAIJ: must assemble matrix first");
414   ierr = VecScatterBegin(xx,aij->lvec,INSERTVALUES,SCATTERALL,aij->Mvctx);
415   CHKERRQ(ierr);
416   ierr = MatMult(aij->A,xx,yy); CHKERRQ(ierr);
417   ierr = VecScatterEnd(xx,aij->lvec,INSERTVALUES,SCATTERALL,aij->Mvctx);
418   CHKERRQ(ierr);
419   ierr = MatMultAdd(aij->B,aij->lvec,yy,yy); CHKERRQ(ierr);
420   return 0;
421 }
422 
423 static int MatMultAdd_MPIAIJ(Mat aijin,Vec xx,Vec yy,Vec zz)
424 {
425   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data;
426   int        ierr;
427   if (!aij->assembled) SETERRQ(1,"MatMult_MPIAIJ: must assemble matrix first");
428   ierr = VecScatterBegin(xx,aij->lvec,INSERTVALUES,SCATTERALL,aij->Mvctx);
429   CHKERRQ(ierr);
430   ierr = MatMultAdd(aij->A,xx,yy,zz); CHKERRQ(ierr);
431   ierr = VecScatterEnd(xx,aij->lvec,INSERTVALUES,SCATTERALL,aij->Mvctx);
432   CHKERRQ(ierr);
433   ierr = MatMultAdd(aij->B,aij->lvec,zz,zz); CHKERRQ(ierr);
434   return 0;
435 }
436 
437 static int MatMultTrans_MPIAIJ(Mat aijin,Vec xx,Vec yy)
438 {
439   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data;
440   int        ierr;
441 
442   if (!aij->assembled)
443     SETERRQ(1,"MatMulTrans_MPIAIJ: must assemble matrix first");
444   /* do nondiagonal part */
445   ierr = MatMultTrans(aij->B,xx,aij->lvec); CHKERRQ(ierr);
446   /* send it on its way */
447   ierr = VecScatterBegin(aij->lvec,yy,ADDVALUES,
448            (ScatterMode)(SCATTERALL|SCATTERREVERSE),aij->Mvctx); CHKERRQ(ierr);
449   /* do local part */
450   ierr = MatMultTrans(aij->A,xx,yy); CHKERRQ(ierr);
451   /* receive remote parts: note this assumes the values are not actually */
452   /* inserted in yy until the next line, which is true for my implementation*/
453   /* but is not perhaps always true. */
454   ierr = VecScatterEnd(aij->lvec,yy,ADDVALUES,
455          (ScatterMode)(SCATTERALL|SCATTERREVERSE),aij->Mvctx); CHKERRQ(ierr);
456   return 0;
457 }
458 
459 static int MatMultTransAdd_MPIAIJ(Mat aijin,Vec xx,Vec yy,Vec zz)
460 {
461   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data;
462   int        ierr;
463 
464   if (!aij->assembled)
465     SETERRQ(1,"MatMulTransAdd_MPIAIJ: must assemble matrix first");
466   /* do nondiagonal part */
467   ierr = MatMultTrans(aij->B,xx,aij->lvec); CHKERRQ(ierr);
468   /* send it on its way */
469   ierr = VecScatterBegin(aij->lvec,zz,ADDVALUES,
470          (ScatterMode)(SCATTERALL|SCATTERREVERSE),aij->Mvctx); CHKERRQ(ierr);
471   /* do local part */
472   ierr = MatMultTransAdd(aij->A,xx,yy,zz); CHKERRQ(ierr);
473   /* receive remote parts: note this assumes the values are not actually */
474   /* inserted in yy until the next line, which is true for my implementation*/
475   /* but is not perhaps always true. */
476   ierr = VecScatterEnd(aij->lvec,zz,ADDVALUES,
477        (ScatterMode)(SCATTERALL|SCATTERREVERSE),aij->Mvctx); CHKERRQ(ierr);
478   return 0;
479 }
480 
481 /*
482   This only works correctly for square matrices where the subblock A->A is the
483    diagonal block
484 */
485 static int MatGetDiagonal_MPIAIJ(Mat Ain,Vec v)
486 {
487   Mat_MPIAIJ *A = (Mat_MPIAIJ *) Ain->data;
488   if (!A->assembled) SETERRQ(1,"MatGetDiag_MPIAIJ: must assemble matrix first");
489   return MatGetDiagonal(A->A,v);
490 }
491 
492 static int MatDestroy_MPIAIJ(PetscObject obj)
493 {
494   Mat        mat = (Mat) obj;
495   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
496   int        ierr;
497 #if defined(PETSC_LOG)
498   PLogObjectState(obj,"Rows %d Cols %d",aij->M,aij->N);
499 #endif
500   PETSCFREE(aij->rowners);
501   ierr = MatDestroy(aij->A); CHKERRQ(ierr);
502   ierr = MatDestroy(aij->B); CHKERRQ(ierr);
503   if (aij->colmap) PETSCFREE(aij->colmap);
504   if (aij->garray) PETSCFREE(aij->garray);
505   if (aij->lvec) VecDestroy(aij->lvec);
506   if (aij->Mvctx) VecScatterCtxDestroy(aij->Mvctx);
507   PETSCFREE(aij);
508   PLogObjectDestroy(mat);
509   PETSCHEADERDESTROY(mat);
510   return 0;
511 }
512 #include "draw.h"
513 #include "pviewer.h"
514 
515 static int MatView_MPIAIJ(PetscObject obj,Viewer viewer)
516 {
517   Mat        mat = (Mat) obj;
518   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
519   int        ierr;
520   PetscObject vobj = (PetscObject) viewer;
521 
522   if (!aij->assembled) SETERRQ(1,"MatView_MPIAIJ: must assemble matrix first");
523   if (!viewer) { /* so that viewers may be used from debuggers */
524     viewer = STDOUT_VIEWER; vobj = (PetscObject) viewer;
525   }
526   if (vobj->cookie == DRAW_COOKIE && vobj->type == NULLWINDOW) return 0;
527   if (vobj->cookie == VIEWER_COOKIE && vobj->type == FILE_VIEWER) {
528     FILE *fd = ViewerFileGetPointer_Private(viewer);
529     MPIU_Seq_begin(mat->comm,1);
530     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
531              aij->mytid,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart,
532              aij->cend);
533     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
534     ierr = MatView(aij->B,viewer); CHKERRQ(ierr);
535     fflush(fd);
536     MPIU_Seq_end(mat->comm,1);
537   }
538   else if ((vobj->cookie == VIEWER_COOKIE && vobj->type == FILES_VIEWER) ||
539             vobj->cookie == DRAW_COOKIE) {
540     int numtids = aij->numtids, mytid = aij->mytid;
541     if (numtids == 1) {
542       ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
543     }
544     else {
545       /* assemble the entire matrix onto first processor. */
546       Mat     A;
547       Mat_AIJ *Aloc;
548       int     M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct;
549       Scalar  *a;
550 
551       if (!mytid) {
552         ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,0,0,0,&A);
553       }
554       else {
555         ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,0,0,0,&A);
556       }
557       CHKERRQ(ierr);
558 
559       /* copy over the A part */
560       Aloc = (Mat_AIJ*) aij->A->data;
561       m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
562       row = aij->rstart;
563       for ( i=0; i<ai[m]-1; i++ ) {aj[i] += aij->cstart - 1;}
564       for ( i=0; i<m; i++ ) {
565         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERTVALUES);
566         CHKERRQ(ierr);
567         row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
568       }
569       aj = Aloc->j;
570       for ( i=0; i<ai[m]-1; i++ ) {aj[i] -= aij->cstart - 1;}
571 
572       /* copy over the B part */
573       Aloc = (Mat_AIJ*) aij->B->data;
574       m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
575       row = aij->rstart;
576       ct = cols = (int *) PETSCMALLOC( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols);
577       for ( i=0; i<ai[m]-1; i++ ) {cols[i] = aij->garray[aj[i]-1];}
578       for ( i=0; i<m; i++ ) {
579         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERTVALUES);
580         CHKERRQ(ierr);
581         row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
582       }
583       PETSCFREE(ct);
584       ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
585       ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
586       if (!mytid) {
587         ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
588       }
589       ierr = MatDestroy(A); CHKERRQ(ierr);
590     }
591   }
592   return 0;
593 }
594 
595 extern int MatMarkDiag_AIJ(Mat_AIJ  *);
596 /*
597     This has to provide several versions.
598 
599      1) per sequential
600      2) a) use only local smoothing updating outer values only once.
601         b) local smoothing updating outer values each inner iteration
602      3) color updating out values betwen colors.
603 */
604 static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag,
605                            double shift,int its,Vec xx)
606 {
607   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
608   Mat        AA = mat->A, BB = mat->B;
609   Mat_AIJ    *A = (Mat_AIJ *) AA->data, *B = (Mat_AIJ *)BB->data;
610   Scalar     zero = 0.0,*b,*x,*xs,*ls,d,*v,sum,scale,*t,*ts;
611   int        ierr,*idx, *diag;
612   int        n = mat->n, m = mat->m, i;
613   Vec        tt;
614 
615   if (!mat->assembled) SETERRQ(1,"MatRelax_MPIAIJ: must assemble matrix first");
616 
617   VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls);
618   xs = x -1; /* shift by one for index start of 1 */
619   ls--;
620   if (!A->diag) {if ((ierr = MatMarkDiag_AIJ(A))) return ierr;}
621   diag = A->diag;
622   if (flag == SOR_APPLY_UPPER || flag == SOR_APPLY_LOWER) {
623     SETERRQ(1,"That option not yet support for parallel AIJ matrices");
624   }
625   if (flag & SOR_EISENSTAT) {
626     /* Let  A = L + U + D; where L is lower trianglar,
627     U is upper triangular, E is diagonal; This routine applies
628 
629             (L + E)^{-1} A (U + E)^{-1}
630 
631     to a vector efficiently using Eisenstat's trick. This is for
632     the case of SSOR preconditioner, so E is D/omega where omega
633     is the relaxation factor.
634     */
635     ierr = VecDuplicate(xx,&tt); CHKERRQ(ierr);
636     VecGetArray(tt,&t);
637     scale = (2.0/omega) - 1.0;
638     /*  x = (E + U)^{-1} b */
639     VecSet(&zero,mat->lvec);
640     ierr = VecPipelineBegin(xx,mat->lvec,INSERTVALUES,PIPELINEUP,
641                               mat->Mvctx); CHKERRQ(ierr);
642     for ( i=m-1; i>-1; i-- ) {
643       n    = A->i[i+1] - diag[i] - 1;
644       idx  = A->j + diag[i];
645       v    = A->a + diag[i];
646       sum  = b[i];
647       SPARSEDENSEMDOT(sum,xs,v,idx,n);
648       d    = shift + A->a[diag[i]-1];
649       n    = B->i[i+1] - B->i[i];
650       idx  = B->j + B->i[i] - 1;
651       v    = B->a + B->i[i] - 1;
652       SPARSEDENSEMDOT(sum,ls,v,idx,n);
653       x[i] = omega*(sum/d);
654     }
655     ierr = VecPipelineEnd(xx,mat->lvec,INSERTVALUES,PIPELINEUP,
656                             mat->Mvctx); CHKERRQ(ierr);
657 
658     /*  t = b - (2*E - D)x */
659     v = A->a;
660     for ( i=0; i<m; i++ ) { t[i] = b[i] - scale*(v[*diag++ - 1])*x[i]; }
661 
662     /*  t = (E + L)^{-1}t */
663     ts = t - 1; /* shifted by one for index start of a or mat->j*/
664     diag = A->diag;
665     VecSet(&zero,mat->lvec);
666     ierr = VecPipelineBegin(tt,mat->lvec,INSERTVALUES,PIPELINEDOWN,
667                                                  mat->Mvctx); CHKERRQ(ierr);
668     for ( i=0; i<m; i++ ) {
669       n    = diag[i] - A->i[i];
670       idx  = A->j + A->i[i] - 1;
671       v    = A->a + A->i[i] - 1;
672       sum  = t[i];
673       SPARSEDENSEMDOT(sum,ts,v,idx,n);
674       d    = shift + A->a[diag[i]-1];
675       n    = B->i[i+1] - B->i[i];
676       idx  = B->j + B->i[i] - 1;
677       v    = B->a + B->i[i] - 1;
678       SPARSEDENSEMDOT(sum,ls,v,idx,n);
679       t[i] = omega*(sum/d);
680     }
681     ierr = VecPipelineEnd(tt,mat->lvec,INSERTVALUES,PIPELINEDOWN,
682                                                     mat->Mvctx); CHKERRQ(ierr);
683     /*  x = x + t */
684     for ( i=0; i<m; i++ ) { x[i] += t[i]; }
685     VecDestroy(tt);
686     return 0;
687   }
688 
689 
690   if ((flag & SOR_SYMMETRIC_SWEEP) == SOR_SYMMETRIC_SWEEP){
691     if (flag & SOR_ZERO_INITIAL_GUESS) {
692       VecSet(&zero,mat->lvec); VecSet(&zero,xx);
693     }
694     else {
695       ierr=VecScatterBegin(xx,mat->lvec,INSERTVALUES,SCATTERUP,mat->Mvctx);
696       CHKERRQ(ierr);
697       ierr = VecScatterEnd(xx,mat->lvec,INSERTVALUES,SCATTERUP,mat->Mvctx);
698       CHKERRQ(ierr);
699     }
700     while (its--) {
701       /* go down through the rows */
702       ierr = VecPipelineBegin(xx,mat->lvec,INSERTVALUES,PIPELINEDOWN,
703                               mat->Mvctx); CHKERRQ(ierr);
704       for ( i=0; i<m; i++ ) {
705         n    = A->i[i+1] - A->i[i];
706         idx  = A->j + A->i[i] - 1;
707         v    = A->a + A->i[i] - 1;
708         sum  = b[i];
709         SPARSEDENSEMDOT(sum,xs,v,idx,n);
710         d    = shift + A->a[diag[i]-1];
711         n    = B->i[i+1] - B->i[i];
712         idx  = B->j + B->i[i] - 1;
713         v    = B->a + B->i[i] - 1;
714         SPARSEDENSEMDOT(sum,ls,v,idx,n);
715         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
716       }
717       ierr = VecPipelineEnd(xx,mat->lvec,INSERTVALUES,PIPELINEDOWN,
718                             mat->Mvctx); CHKERRQ(ierr);
719       /* come up through the rows */
720       ierr = VecPipelineBegin(xx,mat->lvec,INSERTVALUES,PIPELINEUP,
721                               mat->Mvctx); CHKERRQ(ierr);
722       for ( i=m-1; i>-1; i-- ) {
723         n    = A->i[i+1] - A->i[i];
724         idx  = A->j + A->i[i] - 1;
725         v    = A->a + A->i[i] - 1;
726         sum  = b[i];
727         SPARSEDENSEMDOT(sum,xs,v,idx,n);
728         d    = shift + A->a[diag[i]-1];
729         n    = B->i[i+1] - B->i[i];
730         idx  = B->j + B->i[i] - 1;
731         v    = B->a + B->i[i] - 1;
732         SPARSEDENSEMDOT(sum,ls,v,idx,n);
733         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
734       }
735       ierr = VecPipelineEnd(xx,mat->lvec,INSERTVALUES,PIPELINEUP,
736                             mat->Mvctx); CHKERRQ(ierr);
737     }
738   }
739   else if (flag & SOR_FORWARD_SWEEP){
740     if (flag & SOR_ZERO_INITIAL_GUESS) {
741       VecSet(&zero,mat->lvec);
742       ierr = VecPipelineBegin(xx,mat->lvec,INSERTVALUES,PIPELINEDOWN,
743                               mat->Mvctx); CHKERRQ(ierr);
744       for ( i=0; i<m; i++ ) {
745         n    = diag[i] - A->i[i];
746         idx  = A->j + A->i[i] - 1;
747         v    = A->a + A->i[i] - 1;
748         sum  = b[i];
749         SPARSEDENSEMDOT(sum,xs,v,idx,n);
750         d    = shift + A->a[diag[i]-1];
751         n    = B->i[i+1] - B->i[i];
752         idx  = B->j + B->i[i] - 1;
753         v    = B->a + B->i[i] - 1;
754         SPARSEDENSEMDOT(sum,ls,v,idx,n);
755         x[i] = omega*(sum/d);
756       }
757       ierr = VecPipelineEnd(xx,mat->lvec,INSERTVALUES,PIPELINEDOWN,
758                             mat->Mvctx); CHKERRQ(ierr);
759       its--;
760     }
761     while (its--) {
762       ierr=VecScatterBegin(xx,mat->lvec,INSERTVALUES,SCATTERUP,mat->Mvctx);
763       CHKERRQ(ierr);
764       ierr = VecScatterEnd(xx,mat->lvec,INSERTVALUES,SCATTERUP,mat->Mvctx);
765       CHKERRQ(ierr);
766       ierr = VecPipelineBegin(xx,mat->lvec,INSERTVALUES,PIPELINEDOWN,
767                               mat->Mvctx); CHKERRQ(ierr);
768       for ( i=0; i<m; i++ ) {
769         n    = A->i[i+1] - A->i[i];
770         idx  = A->j + A->i[i] - 1;
771         v    = A->a + A->i[i] - 1;
772         sum  = b[i];
773         SPARSEDENSEMDOT(sum,xs,v,idx,n);
774         d    = shift + A->a[diag[i]-1];
775         n    = B->i[i+1] - B->i[i];
776         idx  = B->j + B->i[i] - 1;
777         v    = B->a + B->i[i] - 1;
778         SPARSEDENSEMDOT(sum,ls,v,idx,n);
779         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
780       }
781       ierr = VecPipelineEnd(xx,mat->lvec,INSERTVALUES,PIPELINEDOWN,
782                             mat->Mvctx); CHKERRQ(ierr);
783     }
784   }
785   else if (flag & SOR_BACKWARD_SWEEP){
786     if (flag & SOR_ZERO_INITIAL_GUESS) {
787       VecSet(&zero,mat->lvec);
788       ierr = VecPipelineBegin(xx,mat->lvec,INSERTVALUES,PIPELINEUP,
789                               mat->Mvctx); CHKERRQ(ierr);
790       for ( i=m-1; i>-1; i-- ) {
791         n    = A->i[i+1] - diag[i] - 1;
792         idx  = A->j + diag[i];
793         v    = A->a + diag[i];
794         sum  = b[i];
795         SPARSEDENSEMDOT(sum,xs,v,idx,n);
796         d    = shift + A->a[diag[i]-1];
797         n    = B->i[i+1] - B->i[i];
798         idx  = B->j + B->i[i] - 1;
799         v    = B->a + B->i[i] - 1;
800         SPARSEDENSEMDOT(sum,ls,v,idx,n);
801         x[i] = omega*(sum/d);
802       }
803       ierr = VecPipelineEnd(xx,mat->lvec,INSERTVALUES,PIPELINEUP,
804                             mat->Mvctx); CHKERRQ(ierr);
805       its--;
806     }
807     while (its--) {
808       ierr = VecScatterBegin(xx,mat->lvec,INSERTVALUES,SCATTERDOWN,
809                             mat->Mvctx); CHKERRQ(ierr);
810       ierr = VecScatterEnd(xx,mat->lvec,INSERTVALUES,SCATTERDOWN,
811                             mat->Mvctx); CHKERRQ(ierr);
812       ierr = VecPipelineBegin(xx,mat->lvec,INSERTVALUES,PIPELINEUP,
813                               mat->Mvctx); CHKERRQ(ierr);
814       for ( i=m-1; i>-1; i-- ) {
815         n    = A->i[i+1] - A->i[i];
816         idx  = A->j + A->i[i] - 1;
817         v    = A->a + A->i[i] - 1;
818         sum  = b[i];
819         SPARSEDENSEMDOT(sum,xs,v,idx,n);
820         d    = shift + A->a[diag[i]-1];
821         n    = B->i[i+1] - B->i[i];
822         idx  = B->j + B->i[i] - 1;
823         v    = B->a + B->i[i] - 1;
824         SPARSEDENSEMDOT(sum,ls,v,idx,n);
825         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
826       }
827       ierr = VecPipelineEnd(xx,mat->lvec,INSERTVALUES,PIPELINEUP,
828                             mat->Mvctx); CHKERRQ(ierr);
829     }
830   }
831   else if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
832     if (flag & SOR_ZERO_INITIAL_GUESS) {
833       return MatRelax(mat->A,bb,omega,flag,shift,its,xx);
834     }
835     ierr=VecScatterBegin(xx,mat->lvec,INSERTVALUES,SCATTERALL,mat->Mvctx);
836     CHKERRQ(ierr);
837     ierr = VecScatterEnd(xx,mat->lvec,INSERTVALUES,SCATTERALL,mat->Mvctx);
838     CHKERRQ(ierr);
839     while (its--) {
840       /* go down through the rows */
841       for ( i=0; i<m; i++ ) {
842         n    = A->i[i+1] - A->i[i];
843         idx  = A->j + A->i[i] - 1;
844         v    = A->a + A->i[i] - 1;
845         sum  = b[i];
846         SPARSEDENSEMDOT(sum,xs,v,idx,n);
847         d    = shift + A->a[diag[i]-1];
848         n    = B->i[i+1] - B->i[i];
849         idx  = B->j + B->i[i] - 1;
850         v    = B->a + B->i[i] - 1;
851         SPARSEDENSEMDOT(sum,ls,v,idx,n);
852         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
853       }
854       /* come up through the rows */
855       for ( i=m-1; i>-1; i-- ) {
856         n    = A->i[i+1] - A->i[i];
857         idx  = A->j + A->i[i] - 1;
858         v    = A->a + A->i[i] - 1;
859         sum  = b[i];
860         SPARSEDENSEMDOT(sum,xs,v,idx,n);
861         d    = shift + A->a[diag[i]-1];
862         n    = B->i[i+1] - B->i[i];
863         idx  = B->j + B->i[i] - 1;
864         v    = B->a + B->i[i] - 1;
865         SPARSEDENSEMDOT(sum,ls,v,idx,n);
866         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
867       }
868     }
869   }
870   else if (flag & SOR_LOCAL_FORWARD_SWEEP){
871     if (flag & SOR_ZERO_INITIAL_GUESS) {
872       return MatRelax(mat->A,bb,omega,flag,shift,its,xx);
873     }
874     ierr=VecScatterBegin(xx,mat->lvec,INSERTVALUES,SCATTERALL,mat->Mvctx);
875     CHKERRQ(ierr);
876     ierr = VecScatterEnd(xx,mat->lvec,INSERTVALUES,SCATTERALL,mat->Mvctx);
877     CHKERRQ(ierr);
878     while (its--) {
879       for ( i=0; i<m; i++ ) {
880         n    = A->i[i+1] - A->i[i];
881         idx  = A->j + A->i[i] - 1;
882         v    = A->a + A->i[i] - 1;
883         sum  = b[i];
884         SPARSEDENSEMDOT(sum,xs,v,idx,n);
885         d    = shift + A->a[diag[i]-1];
886         n    = B->i[i+1] - B->i[i];
887         idx  = B->j + B->i[i] - 1;
888         v    = B->a + B->i[i] - 1;
889         SPARSEDENSEMDOT(sum,ls,v,idx,n);
890         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
891       }
892     }
893   }
894   else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
895     if (flag & SOR_ZERO_INITIAL_GUESS) {
896       return MatRelax(mat->A,bb,omega,flag,shift,its,xx);
897     }
898     ierr = VecScatterBegin(xx,mat->lvec,INSERTVALUES,SCATTERALL,
899                             mat->Mvctx); CHKERRQ(ierr);
900     ierr = VecScatterEnd(xx,mat->lvec,INSERTVALUES,SCATTERALL,
901                             mat->Mvctx); CHKERRQ(ierr);
902     while (its--) {
903       for ( i=m-1; i>-1; i-- ) {
904         n    = A->i[i+1] - A->i[i];
905         idx  = A->j + A->i[i] - 1;
906         v    = A->a + A->i[i] - 1;
907         sum  = b[i];
908         SPARSEDENSEMDOT(sum,xs,v,idx,n);
909         d    = shift + A->a[diag[i]-1];
910         n    = B->i[i+1] - B->i[i];
911         idx  = B->j + B->i[i] - 1;
912         v    = B->a + B->i[i] - 1;
913         SPARSEDENSEMDOT(sum,ls,v,idx,n);
914         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
915       }
916     }
917   }
918   return 0;
919 }
920 
921 static int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,int *nz,
922                              int *nzalloc,int *mem)
923 {
924   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
925   Mat        A = mat->A, B = mat->B;
926   int        ierr, isend[3], irecv[3], nzA, nzallocA, memA;
927 
928   ierr = MatGetInfo(A,MAT_LOCAL,&nzA,&nzallocA,&memA); CHKERRQ(ierr);
929   ierr = MatGetInfo(B,MAT_LOCAL,&isend[0],&isend[1],&isend[2]); CHKERRQ(ierr);
930   isend[0] += nzA; isend[1] += nzallocA; isend[2] += memA;
931   if (flag == MAT_LOCAL) {
932     *nz = isend[0]; *nzalloc = isend[1]; *mem = isend[2];
933   } else if (flag == MAT_GLOBAL_MAX) {
934     MPI_Allreduce((void *) isend,(void *) irecv,3,MPI_INT,MPI_MAX,matin->comm);
935     *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2];
936   } else if (flag == MAT_GLOBAL_SUM) {
937     MPI_Allreduce((void *) isend,(void *) irecv,3,MPI_INT,MPI_SUM,matin->comm);
938     *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2];
939   }
940   return 0;
941 }
942 
943 extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*);
944 extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*);
945 extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double);
946 extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *);
947 extern int MatSolve_MPIAIJ(Mat,Vec,Vec);
948 extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec);
949 extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec);
950 extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec);
951 
952 static int MatSetOption_MPIAIJ(Mat aijin,MatOption op)
953 {
954   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data;
955 
956   if      (op == NO_NEW_NONZERO_LOCATIONS)  {
957     MatSetOption(aij->A,op);
958     MatSetOption(aij->B,op);
959   }
960   else if (op == YES_NEW_NONZERO_LOCATIONS) {
961     MatSetOption(aij->A,op);
962     MatSetOption(aij->B,op);
963   }
964   else if (op == COLUMN_ORIENTED) SETERRQ(1,"Column oriented not supported");
965   return 0;
966 }
967 
968 static int MatGetSize_MPIAIJ(Mat matin,int *m,int *n)
969 {
970   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
971   *m = mat->M; *n = mat->N;
972   return 0;
973 }
974 
975 static int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n)
976 {
977   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
978   *m = mat->m; *n = mat->N;
979   return 0;
980 }
981 
982 static int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n)
983 {
984   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
985   *m = mat->rstart; *n = mat->rend;
986   return 0;
987 }
988 
989 static int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
990 {
991   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
992   Scalar     *vworkA, *vworkB, **pvA, **pvB;
993   int        i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart;
994   int        nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend;
995 
996   if (row < rstart || row >= rend)
997     SETERRQ(1,"MatGetRow_MPIAIJ: Currently you can get only local rows.")
998   lrow = row - rstart;
999 
1000   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1001   if (!v)   {pvA = 0; pvB = 0;}
1002   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1003   ierr = MatGetRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1004   ierr = MatGetRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1005   nztot = nzA + nzB;
1006 
1007   if (v  || idx) {
1008     if (nztot) {
1009       /* Sort by increasing column numbers, assuming A and B already sorted */
1010       int imark;
1011       if (mat->assembled) {
1012         for (i=0; i<nzB; i++) cworkB[i] = mat->garray[cworkB[i]];
1013       }
1014       if (v) {
1015         *v = (Scalar *) PETSCMALLOC( (nztot)*sizeof(Scalar) ); CHKPTRQ(*v);
1016         for ( i=0; i<nzB; i++ ) {
1017           if (cworkB[i] < cstart)   (*v)[i] = vworkB[i];
1018           else break;
1019         }
1020         imark = i;
1021         for ( i=0; i<nzA; i++ )     (*v)[imark+i] = vworkA[i];
1022         for ( i=imark; i<nzB; i++ ) (*v)[nzA+i] = vworkB[i];
1023       }
1024       if (idx) {
1025         *idx = (int *) PETSCMALLOC( (nztot)*sizeof(int) ); CHKPTRQ(*idx);
1026         for (i=0; i<nzA; i++) cworkA[i] += cstart;
1027         for ( i=0; i<nzB; i++ ) {
1028           if (cworkB[i] < cstart)   (*idx)[i] = cworkB[i];
1029           else break;
1030         }
1031         imark = i;
1032         for ( i=0; i<nzA; i++ )     (*idx)[imark+i] = cworkA[i];
1033         for ( i=imark; i<nzB; i++ ) (*idx)[nzA+i] = cworkB[i];
1034       }
1035     }
1036     else {*idx = 0; *v=0;}
1037   }
1038   *nz = nztot;
1039   ierr = MatRestoreRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1040   ierr = MatRestoreRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1041   return 0;
1042 }
1043 
1044 static int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1045 {
1046   if (idx) PETSCFREE(*idx);
1047   if (v) PETSCFREE(*v);
1048   return 0;
1049 }
1050 
1051 static int MatNorm_MPIAIJ(Mat mat,MatNormType type,double *norm)
1052 {
1053   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1054   int ierr;
1055   if (aij->numtids == 1) {
1056     ierr =  MatNorm(aij->A,type,norm); CHKERRQ(ierr);
1057   } else
1058     SETERRQ(1,"MatNorm_MPIAIJ:  not yet supported in parallel.");
1059   return 0;
1060 }
1061 
1062 static int MatTranspose_MPIAIJ(Mat A,Mat *Bin)
1063 {
1064   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1065   int        ierr;
1066   Mat        B;
1067   Mat_AIJ    *Aloc;
1068   int        M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct;
1069   Scalar     *array;
1070 
1071   ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,0,0,0,&B);
1072   CHKERRQ(ierr);
1073 
1074   /* copy over the A part */
1075   Aloc = (Mat_AIJ*) a->A->data;
1076   m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1077   row = a->rstart;
1078   for ( i=0; i<ai[m]-1; i++ ) {aj[i] += a->cstart - 1;}
1079   for ( i=0; i<m; i++ ) {
1080       ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERTVALUES);
1081       CHKERRQ(ierr);
1082       row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1083   }
1084   aj = Aloc->j;
1085   for ( i=0; i<ai[m]-1; i++ ) {aj[i] -= a->cstart - 1;}
1086 
1087   /* copy over the B part */
1088   Aloc = (Mat_AIJ*) a->B->data;
1089   m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1090   row = a->rstart;
1091   ct = cols = (int *) PETSCMALLOC( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols);
1092   for ( i=0; i<ai[m]-1; i++ ) {cols[i] = a->garray[aj[i]-1];}
1093   for ( i=0; i<m; i++ ) {
1094     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERTVALUES);
1095     CHKERRQ(ierr);
1096     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1097   }
1098   PETSCFREE(ct);
1099   ierr = MatAssemblyBegin(B,FINAL_ASSEMBLY); CHKERRQ(ierr);
1100   ierr = MatAssemblyEnd(B,FINAL_ASSEMBLY); CHKERRQ(ierr);
1101   *Bin = B;
1102   return 0;
1103 }
1104 
1105 extern int MatConvert_MPIAIJ(Mat,MatType,Mat *);
1106 static int MatCopyPrivate_MPIAIJ(Mat,Mat *);
1107 
1108 /* -------------------------------------------------------------------*/
1109 static struct _MatOps MatOps = {MatSetValues_MPIAIJ,
1110        MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ,
1111        MatMult_MPIAIJ,MatMultAdd_MPIAIJ,
1112        MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ,
1113        MatSolve_MPIAIJ,MatSolveAdd_MPIAIJ,
1114        MatSolveTrans_MPIAIJ,MatSolveTransAdd_MPIAIJ,
1115        MatLUFactor_MPIAIJ,0,
1116        MatRelax_MPIAIJ,
1117        MatTranspose_MPIAIJ,
1118        MatGetInfo_MPIAIJ,0,
1119        MatGetDiagonal_MPIAIJ,0,MatNorm_MPIAIJ,
1120        MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ,
1121        0,
1122        MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ,
1123        MatGetReordering_MPIAIJ,
1124        MatLUFactorSymbolic_MPIAIJ,MatLUFactorNumeric_MPIAIJ,0,0,
1125        MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ,
1126        MatILUFactorSymbolic_MPIAIJ,0,
1127        0,0,MatConvert_MPIAIJ,0,0,MatCopyPrivate_MPIAIJ};
1128 
1129 /*@
1130    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
1131    (the default parallel PETSc format).
1132 
1133    Input Parameters:
1134 .  comm - MPI communicator
1135 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1136 .  n - number of local columns (or PETSC_DECIDE to have calculated
1137            if N is given)
1138 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1139 .  N - number of global columns (or PETSC_DECIDE to have calculated
1140            if n is given)
1141 .  d_nz - number of nonzeros per row in diagonal portion of local submatrix
1142            (same for all local rows)
1143 .  d_nzz - number of nonzeros per row in diagonal portion of local submatrix
1144            or null (possibly different for each row).  You must leave room
1145            for the diagonal entry even if it is zero.
1146 .  o_nz - number of nonzeros per row in off-diagonal portion of local
1147            submatrix (same for all local rows).
1148 .  o_nzz - number of nonzeros per row in off-diagonal portion of local
1149            submatrix or null (possibly different for each row).
1150 
1151    Output Parameter:
1152 .  newmat - the matrix
1153 
1154    Notes:
1155    The AIJ format (also called the Yale sparse matrix format or
1156    compressed row storage), is fully compatible with standard Fortran 77
1157    storage.  That is, the stored row and column indices begin at
1158    one, not zero.  See the users manual for further details.
1159 
1160    The user MUST specify either the local or global matrix dimensions
1161    (possibly both).
1162 
1163    Storage Information:
1164    For a square global matrix we define each processor's diagonal portion
1165    to be its local rows and the corresponding columns (a square submatrix);
1166    each processor's off-diagonal portion encompasses the remainder of the
1167    local matrix (a rectangular submatrix).
1168 
1169    The user can specify preallocated storage for the diagonal part of
1170    the local submatrix with either d_nz or d_nnz (not both). Set both
1171    d_nz and d_nnz to zero for PETSc to control dynamic memory allocation.
1172    Likewise, specify preallocated storage for the off-diagonal part of
1173    the local submatrix with o_nz or o_nnz (not both).
1174 
1175 .keywords: matrix, aij, compressed row, sparse, parallel
1176 
1177 .seealso: MatCreate(), MatCreateSequentialAIJ(), MatSetValues()
1178 @*/
1179 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,
1180                  int d_nz,int *d_nnz, int o_nz,int *o_nnz,Mat *newmat)
1181 {
1182   Mat          mat;
1183   Mat_MPIAIJ   *aij;
1184   int          ierr, i,sum[2],work[2];
1185   *newmat         = 0;
1186   PETSCHEADERCREATE(mat,_Mat,MAT_COOKIE,MATMPIAIJ,comm);
1187   PLogObjectCreate(mat);
1188   mat->data       = (void *) (aij = PETSCNEW(Mat_MPIAIJ)); CHKPTRQ(aij);
1189   mat->ops        = &MatOps;
1190   mat->destroy    = MatDestroy_MPIAIJ;
1191   mat->view       = MatView_MPIAIJ;
1192   mat->factor     = 0;
1193 
1194   aij->insertmode = NOTSETVALUES;
1195   MPI_Comm_rank(comm,&aij->mytid);
1196   MPI_Comm_size(comm,&aij->numtids);
1197 
1198   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
1199     work[0] = m; work[1] = n;
1200     MPI_Allreduce((void *) work,(void *) sum,2,MPI_INT,MPI_SUM,comm );
1201     if (M == PETSC_DECIDE) M = sum[0];
1202     if (N == PETSC_DECIDE) N = sum[1];
1203   }
1204   if (m == PETSC_DECIDE)
1205     {m = M/aij->numtids + ((M % aij->numtids) > aij->mytid);}
1206   if (n == PETSC_DECIDE)
1207     {n = N/aij->numtids + ((N % aij->numtids) > aij->mytid);}
1208   aij->m = m;
1209   aij->n = n;
1210   aij->N = N;
1211   aij->M = M;
1212 
1213   /* build local table of row and column ownerships */
1214   aij->rowners = (int *) PETSCMALLOC(2*(aij->numtids+2)*sizeof(int));
1215   CHKPTRQ(aij->rowners);
1216   aij->cowners = aij->rowners + aij->numtids + 1;
1217   MPI_Allgather(&m,1,MPI_INT,aij->rowners+1,1,MPI_INT,comm);
1218   aij->rowners[0] = 0;
1219   for ( i=2; i<=aij->numtids; i++ ) {
1220     aij->rowners[i] += aij->rowners[i-1];
1221   }
1222   aij->rstart = aij->rowners[aij->mytid];
1223   aij->rend   = aij->rowners[aij->mytid+1];
1224   MPI_Allgather(&n,1,MPI_INT,aij->cowners+1,1,MPI_INT,comm);
1225   aij->cowners[0] = 0;
1226   for ( i=2; i<=aij->numtids; i++ ) {
1227     aij->cowners[i] += aij->cowners[i-1];
1228   }
1229   aij->cstart = aij->cowners[aij->mytid];
1230   aij->cend   = aij->cowners[aij->mytid+1];
1231 
1232 
1233   ierr = MatCreateSequentialAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&aij->A);
1234   CHKERRQ(ierr);
1235   PLogObjectParent(mat,aij->A);
1236   ierr = MatCreateSequentialAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&aij->B);
1237   CHKERRQ(ierr);
1238   PLogObjectParent(mat,aij->B);
1239 
1240   /* build cache for off array entries formed */
1241   ierr = StashBuild_Private(&aij->stash); CHKERRQ(ierr);
1242   aij->colmap    = 0;
1243   aij->garray    = 0;
1244 
1245   /* stuff used for matrix vector multiply */
1246   aij->lvec      = 0;
1247   aij->Mvctx     = 0;
1248   aij->assembled = 0;
1249 
1250   *newmat = mat;
1251   return 0;
1252 }
1253 
1254 static int MatCopyPrivate_MPIAIJ(Mat matin,Mat *newmat)
1255 {
1256   Mat        mat;
1257   Mat_MPIAIJ *aij,*oldmat = (Mat_MPIAIJ *) matin->data;
1258   int        ierr, len;
1259   *newmat      = 0;
1260 
1261   if (!oldmat->assembled) SETERRQ(1,"Cannot copy unassembled matrix");
1262   PETSCHEADERCREATE(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm);
1263   PLogObjectCreate(mat);
1264   mat->data       = (void *) (aij = PETSCNEW(Mat_MPIAIJ)); CHKPTRQ(aij);
1265   mat->ops        = &MatOps;
1266   mat->destroy    = MatDestroy_MPIAIJ;
1267   mat->view       = MatView_MPIAIJ;
1268   mat->factor     = matin->factor;
1269 
1270   aij->m          = oldmat->m;
1271   aij->n          = oldmat->n;
1272   aij->M          = oldmat->M;
1273   aij->N          = oldmat->N;
1274 
1275   aij->assembled  = 1;
1276   aij->rstart     = oldmat->rstart;
1277   aij->rend       = oldmat->rend;
1278   aij->cstart     = oldmat->cstart;
1279   aij->cend       = oldmat->cend;
1280   aij->numtids    = oldmat->numtids;
1281   aij->mytid      = oldmat->mytid;
1282   aij->insertmode = NOTSETVALUES;
1283 
1284   aij->rowners        = (int *) PETSCMALLOC( (aij->numtids+1)*sizeof(int) );
1285   CHKPTRQ(aij->rowners);
1286   PETSCMEMCPY(aij->rowners,oldmat->rowners,(aij->numtids+1)*sizeof(int));
1287   ierr = StashInitialize_Private(&aij->stash); CHKERRQ(ierr);
1288   if (oldmat->colmap) {
1289     aij->colmap      = (int *) PETSCMALLOC( (aij->N)*sizeof(int) );
1290     CHKPTRQ(aij->colmap);
1291     PETSCMEMCPY(aij->colmap,oldmat->colmap,(aij->N)*sizeof(int));
1292   } else aij->colmap = 0;
1293   if (oldmat->garray && (len = ((Mat_AIJ *) (oldmat->B->data))->n)) {
1294     aij->garray      = (int *) PETSCMALLOC(len*sizeof(int) ); CHKPTRQ(aij->garray);
1295     PETSCMEMCPY(aij->garray,oldmat->garray,len*sizeof(int));
1296   } else aij->garray = 0;
1297 
1298   ierr =  VecDuplicate(oldmat->lvec,&aij->lvec); CHKERRQ(ierr);
1299   PLogObjectParent(mat,aij->lvec);
1300   ierr =  VecScatterCtxCopy(oldmat->Mvctx,&aij->Mvctx); CHKERRQ(ierr);
1301   PLogObjectParent(mat,aij->Mvctx);
1302   ierr =  MatConvert(oldmat->A,MATSAME,&aij->A); CHKERRQ(ierr);
1303   PLogObjectParent(mat,aij->A);
1304   ierr =  MatConvert(oldmat->B,MATSAME,&aij->B); CHKERRQ(ierr);
1305   PLogObjectParent(mat,aij->B);
1306   *newmat = mat;
1307   return 0;
1308 }
1309