xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 4c04a9de2e624de0fa2e8b46feb619f94d1e86ab)
1 #ifndef lint
2 static char vcid[] = "$Id: mpiaij.c,v 1.60 1995/07/13 16:15:46 curfman Exp bsmith $";
3 #endif
4 
5 #include "mpiaij.h"
6 #include "vec/vecimpl.h"
7 #include "inline/spops.h"
8 
9 /* local utility routine that creates a mapping from the global column
10 number to the local number in the off-diagonal part of the local
11 storage of the matrix.  This is done in a non scable way since the
12 length of colmap equals the global matrix length.
13 */
14 static int CreateColmap_Private(Mat mat)
15 {
16   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
17   Mat_AIJ    *B = (Mat_AIJ*) aij->B->data;
18   int        n = B->n,i;
19   aij->colmap = (int *) PETSCMALLOC(aij->N*sizeof(int));CHKPTRQ(aij->colmap);
20   PETSCMEMSET(aij->colmap,0,aij->N*sizeof(int));
21   for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1;
22   return 0;
23 }
24 
25 extern int DisAssemble_MPIAIJ(Mat);
26 
27 static int MatGetReordering_MPIAIJ(Mat mat,MatOrdering type,IS *rperm,IS *cperm)
28 {
29   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
30   int ierr;
31   if (aij->numtids == 1) {
32     ierr = MatGetReordering(aij->A,type,rperm,cperm); CHKERRQ(ierr);
33   } else
34     SETERRQ(1,"MatGetReordering_MPIAIJ:  not yet supported in parallel");
35   return 0;
36 }
37 
38 static int MatSetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,
39                             int *idxn,Scalar *v,InsertMode addv)
40 {
41   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
42   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
43   int        cstart = aij->cstart, cend = aij->cend,row,col;
44 
45   if (aij->insertmode != NOTSETVALUES && aij->insertmode != addv) {
46     SETERRQ(1,"MatSetValues_MPIAIJ:You cannot mix inserts and adds");
47   }
48   aij->insertmode = addv;
49   for ( i=0; i<m; i++ ) {
50     if (idxm[i] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative row");
51     if (idxm[i] >= aij->M) SETERRQ(1,"MatSetValues_MPIAIJ:Row too large");
52     if (idxm[i] >= rstart && idxm[i] < rend) {
53       row = idxm[i] - rstart;
54       for ( j=0; j<n; j++ ) {
55         if (idxn[j] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative column");
56         if (idxn[j] >= aij->N) SETERRQ(1,"MatSetValues_MPIAIJ:Col too large");
57         if (idxn[j] >= cstart && idxn[j] < cend){
58           col = idxn[j] - cstart;
59           ierr = MatSetValues(aij->A,1,&row,1,&col,v+i*n+j,addv);CHKERRQ(ierr);
60         }
61         else {
62           if (aij->assembled) {
63             if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);}
64             col = aij->colmap[idxn[j]] - 1;
65             if (col < 0 && !((Mat_AIJ*)(aij->A->data))->nonew) {
66               ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
67               col =  idxn[j];
68             }
69           }
70           else col = idxn[j];
71           ierr = MatSetValues(aij->B,1,&row,1,&col,v+i*n+j,addv);CHKERRQ(ierr);
72         }
73       }
74     }
75     else {
76       ierr = StashValues_Private(&aij->stash,idxm[i],n,idxn,v+i*n,addv);
77       CHKERRQ(ierr);
78     }
79   }
80   return 0;
81 }
82 
83 /*
84     the assembly code is a lot like the code for vectors, we should
85     sometime derive a single assembly code that can be used for
86     either case.
87 */
88 
89 static int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
90 {
91   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
92   MPI_Comm    comm = mat->comm;
93   int         numtids = aij->numtids, *owners = aij->rowners;
94   int         mytid = aij->mytid;
95   MPI_Request *send_waits,*recv_waits;
96   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
97   int         tag = mat->tag, *owner,*starts,count,ierr;
98   InsertMode  addv;
99   Scalar      *rvalues,*svalues;
100 
101   /* make sure all processors are either in INSERTMODE or ADDMODE */
102   MPI_Allreduce((void *) &aij->insertmode,(void *) &addv,1,MPI_INT,
103                 MPI_BOR,comm);
104   if (addv == (ADDVALUES|INSERTVALUES)) {
105     SETERRQ(1,"MatAssemblyBegin_MPIAIJ:Some processors inserted others added");
106   }
107   aij->insertmode = addv; /* in case this processor had no cache */
108 
109   /*  first count number of contributors to each processor */
110   nprocs = (int *) PETSCMALLOC( 2*numtids*sizeof(int) ); CHKPTRQ(nprocs);
111   PETSCMEMSET(nprocs,0,2*numtids*sizeof(int)); procs = nprocs + numtids;
112   owner = (int *) PETSCMALLOC( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
113   for ( i=0; i<aij->stash.n; i++ ) {
114     idx = aij->stash.idx[i];
115     for ( j=0; j<numtids; j++ ) {
116       if (idx >= owners[j] && idx < owners[j+1]) {
117         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
118       }
119     }
120   }
121   nsends = 0;  for ( i=0; i<numtids; i++ ) { nsends += procs[i];}
122 
123   /* inform other processors of number of messages and max length*/
124   work = (int *) PETSCMALLOC( numtids*sizeof(int) ); CHKPTRQ(work);
125   MPI_Allreduce((void *) procs,(void *) work,numtids,MPI_INT,MPI_SUM,comm);
126   nreceives = work[mytid];
127   MPI_Allreduce((void *) nprocs,(void *) work,numtids,MPI_INT,MPI_MAX,comm);
128   nmax = work[mytid];
129   PETSCFREE(work);
130 
131   /* post receives:
132        1) each message will consist of ordered pairs
133      (global index,value) we store the global index as a double
134      to simplify the message passing.
135        2) since we don't know how long each individual message is we
136      allocate the largest needed buffer for each receive. Potentially
137      this is a lot of wasted space.
138 
139 
140        This could be done better.
141   */
142   rvalues = (Scalar *) PETSCMALLOC(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));
143   CHKPTRQ(rvalues);
144   recv_waits = (MPI_Request *) PETSCMALLOC((nreceives+1)*sizeof(MPI_Request));
145   CHKPTRQ(recv_waits);
146   for ( i=0; i<nreceives; i++ ) {
147     MPI_Irecv((void *)(rvalues+3*nmax*i),3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
148               comm,recv_waits+i);
149   }
150 
151   /* do sends:
152       1) starts[i] gives the starting index in svalues for stuff going to
153          the ith processor
154   */
155   svalues = (Scalar *) PETSCMALLOC( 3*(aij->stash.n+1)*sizeof(Scalar) );
156   CHKPTRQ(svalues);
157   send_waits = (MPI_Request *) PETSCMALLOC( (nsends+1)*sizeof(MPI_Request));
158   CHKPTRQ(send_waits);
159   starts = (int *) PETSCMALLOC( numtids*sizeof(int) ); CHKPTRQ(starts);
160   starts[0] = 0;
161   for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
162   for ( i=0; i<aij->stash.n; i++ ) {
163     svalues[3*starts[owner[i]]]       = (Scalar)  aij->stash.idx[i];
164     svalues[3*starts[owner[i]]+1]     = (Scalar)  aij->stash.idy[i];
165     svalues[3*(starts[owner[i]]++)+2] =  aij->stash.array[i];
166   }
167   PETSCFREE(owner);
168   starts[0] = 0;
169   for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
170   count = 0;
171   for ( i=0; i<numtids; i++ ) {
172     if (procs[i]) {
173       MPI_Isend((void*)(svalues+3*starts[i]),3*nprocs[i],MPIU_SCALAR,i,tag,
174                 comm,send_waits+count++);
175     }
176   }
177   PETSCFREE(starts); PETSCFREE(nprocs);
178 
179   /* Free cache space */
180   ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr);
181 
182   aij->svalues    = svalues;    aij->rvalues = rvalues;
183   aij->nsends     = nsends;     aij->nrecvs = nreceives;
184   aij->send_waits = send_waits; aij->recv_waits = recv_waits;
185   aij->rmax       = nmax;
186 
187   return 0;
188 }
189 extern int MatSetUpMultiply_MPIAIJ(Mat);
190 
191 static int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
192 {
193   int        ierr;
194   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
195   MPI_Status  *send_status,recv_status;
196   int         imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n;
197   int         row,col,other_disassembled;
198   Scalar      *values,val;
199   InsertMode  addv = aij->insertmode;
200 
201   /*  wait on receives */
202   while (count) {
203     MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status);
204     /* unpack receives into our local space */
205     values = aij->rvalues + 3*imdex*aij->rmax;
206     MPI_Get_count(&recv_status,MPIU_SCALAR,&n);
207     n = n/3;
208     for ( i=0; i<n; i++ ) {
209       row = (int) PETSCREAL(values[3*i]) - aij->rstart;
210       col = (int) PETSCREAL(values[3*i+1]);
211       val = values[3*i+2];
212       if (col >= aij->cstart && col < aij->cend) {
213           col -= aij->cstart;
214         MatSetValues(aij->A,1,&row,1,&col,&val,addv);
215       }
216       else {
217         if (aij->assembled) {
218           if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);}
219           col = aij->colmap[col] - 1;
220           if (col < 0  && !((Mat_AIJ*)(aij->A->data))->nonew) {
221             ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
222             col = (int) PETSCREAL(values[3*i+1]);
223           }
224         }
225         MatSetValues(aij->B,1,&row,1,&col,&val,addv);
226       }
227     }
228     count--;
229   }
230   PETSCFREE(aij->recv_waits); PETSCFREE(aij->rvalues);
231 
232   /* wait on sends */
233   if (aij->nsends) {
234     send_status = (MPI_Status *) PETSCMALLOC( aij->nsends*sizeof(MPI_Status) );
235     CHKPTRQ(send_status);
236     MPI_Waitall(aij->nsends,aij->send_waits,send_status);
237     PETSCFREE(send_status);
238   }
239   PETSCFREE(aij->send_waits); PETSCFREE(aij->svalues);
240 
241   aij->insertmode = NOTSETVALUES;
242   ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr);
243   ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr);
244 
245   /* determine if any processor has disassembled, if so we must
246      also disassemble ourselfs, in order that we may reassemble. */
247   MPI_Allreduce((void *) &aij->assembled,(void *) &other_disassembled,1,
248                  MPI_INT,MPI_PROD,mat->comm);
249   if (aij->assembled && !other_disassembled) {
250     ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
251   }
252 
253   if (!aij->assembled && mode == FINAL_ASSEMBLY) {
254     ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr);
255     aij->assembled = 1;
256   }
257   ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr);
258   ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr);
259 
260   return 0;
261 }
262 
263 static int MatZeroEntries_MPIAIJ(Mat A)
264 {
265   Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data;
266   int ierr;
267   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
268   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
269   return 0;
270 }
271 
272 /* again this uses the same basic stratagy as in the assembly and
273    scatter create routines, we should try to do it systemamatically
274    if we can figure out the proper level of generality. */
275 
276 /* the code does not do the diagonal entries correctly unless the
277    matrix is square and the column and row owerships are identical.
278    This is a BUG. The only way to fix it seems to be to access
279    aij->A and aij->B directly and not through the MatZeroRows()
280    routine.
281 */
282 static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag)
283 {
284   Mat_MPIAIJ     *l = (Mat_MPIAIJ *) A->data;
285   int            i,ierr,N, *rows,*owners = l->rowners,numtids = l->numtids;
286   int            *procs,*nprocs,j,found,idx,nsends,*work;
287   int            nmax,*svalues,*starts,*owner,nrecvs,mytid = l->mytid;
288   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
289   int            *lens,imdex,*lrows,*values;
290   MPI_Comm       comm = A->comm;
291   MPI_Request    *send_waits,*recv_waits;
292   MPI_Status     recv_status,*send_status;
293   IS             istmp;
294 
295   if (!l->assembled)
296     SETERRQ(1,"MatZeroRows_MPIAIJ:Must assemble matrix first");
297   ierr = ISGetLocalSize(is,&N); CHKERRQ(ierr);
298   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
299 
300   /*  first count number of contributors to each processor */
301   nprocs = (int *) PETSCMALLOC( 2*numtids*sizeof(int) ); CHKPTRQ(nprocs);
302   PETSCMEMSET(nprocs,0,2*numtids*sizeof(int)); procs = nprocs + numtids;
303   owner = (int *) PETSCMALLOC((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
304   for ( i=0; i<N; i++ ) {
305     idx = rows[i];
306     found = 0;
307     for ( j=0; j<numtids; j++ ) {
308       if (idx >= owners[j] && idx < owners[j+1]) {
309         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
310       }
311     }
312     if (!found) SETERRQ(1,"MatZeroRows_MPIAIJ:Index out of range");
313   }
314   nsends = 0;  for ( i=0; i<numtids; i++ ) { nsends += procs[i];}
315 
316   /* inform other processors of number of messages and max length*/
317   work = (int *) PETSCMALLOC( numtids*sizeof(int) ); CHKPTRQ(work);
318   MPI_Allreduce((void *) procs,(void *) work,numtids,MPI_INT,MPI_SUM,comm);
319   nrecvs = work[mytid];
320   MPI_Allreduce((void *) nprocs,(void *) work,numtids,MPI_INT,MPI_MAX,comm);
321   nmax = work[mytid];
322   PETSCFREE(work);
323 
324   /* post receives:   */
325   rvalues = (int *) PETSCMALLOC((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */
326   CHKPTRQ(rvalues);
327   recv_waits = (MPI_Request *) PETSCMALLOC((nrecvs+1)*sizeof(MPI_Request));
328   CHKPTRQ(recv_waits);
329   for ( i=0; i<nrecvs; i++ ) {
330     MPI_Irecv((void *)(rvalues+nmax*i),nmax,MPI_INT,MPI_ANY_SOURCE,tag,
331               comm,recv_waits+i);
332   }
333 
334   /* do sends:
335       1) starts[i] gives the starting index in svalues for stuff going to
336          the ith processor
337   */
338   svalues = (int *) PETSCMALLOC( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
339   send_waits = (MPI_Request *) PETSCMALLOC( (nsends+1)*sizeof(MPI_Request));
340   CHKPTRQ(send_waits);
341   starts = (int *) PETSCMALLOC( (numtids+1)*sizeof(int) ); CHKPTRQ(starts);
342   starts[0] = 0;
343   for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
344   for ( i=0; i<N; i++ ) {
345     svalues[starts[owner[i]]++] = rows[i];
346   }
347   ISRestoreIndices(is,&rows);
348 
349   starts[0] = 0;
350   for ( i=1; i<numtids+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
351   count = 0;
352   for ( i=0; i<numtids; i++ ) {
353     if (procs[i]) {
354       MPI_Isend((void*)(svalues+starts[i]),nprocs[i],MPI_INT,i,tag,
355                 comm,send_waits+count++);
356     }
357   }
358   PETSCFREE(starts);
359 
360   base = owners[mytid];
361 
362   /*  wait on receives */
363   lens = (int *) PETSCMALLOC( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
364   source = lens + nrecvs;
365   count = nrecvs; slen = 0;
366   while (count) {
367     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
368     /* unpack receives into our local space */
369     MPI_Get_count(&recv_status,MPI_INT,&n);
370     source[imdex]  = recv_status.MPI_SOURCE;
371     lens[imdex]  = n;
372     slen += n;
373     count--;
374   }
375   PETSCFREE(recv_waits);
376 
377   /* move the data into the send scatter */
378   lrows = (int *) PETSCMALLOC( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
379   count = 0;
380   for ( i=0; i<nrecvs; i++ ) {
381     values = rvalues + i*nmax;
382     for ( j=0; j<lens[i]; j++ ) {
383       lrows[count++] = values[j] - base;
384     }
385   }
386   PETSCFREE(rvalues); PETSCFREE(lens);
387   PETSCFREE(owner); PETSCFREE(nprocs);
388 
389   /* actually zap the local rows */
390   ierr = ISCreateSequential(MPI_COMM_SELF,slen,lrows,&istmp);
391   CHKERRQ(ierr);  PETSCFREE(lrows);
392   ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
393   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
394   ierr = ISDestroy(istmp); CHKERRQ(ierr);
395 
396   /* wait on sends */
397   if (nsends) {
398     send_status = (MPI_Status *) PETSCMALLOC( nsends*sizeof(MPI_Status) );
399     CHKPTRQ(send_status);
400     MPI_Waitall(nsends,send_waits,send_status);
401     PETSCFREE(send_status);
402   }
403   PETSCFREE(send_waits); PETSCFREE(svalues);
404 
405 
406   return 0;
407 }
408 
409 static int MatMult_MPIAIJ(Mat aijin,Vec xx,Vec yy)
410 {
411   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data;
412   int        ierr;
413   if (!aij->assembled) SETERRQ(1,"MatMult_MPIAIJ:must assemble matrix first");
414   ierr = VecScatterBegin(xx,aij->lvec,INSERTVALUES,SCATTERALL,aij->Mvctx);
415   CHKERRQ(ierr);
416   ierr = MatMult(aij->A,xx,yy); CHKERRQ(ierr);
417   ierr = VecScatterEnd(xx,aij->lvec,INSERTVALUES,SCATTERALL,aij->Mvctx);
418   CHKERRQ(ierr);
419   ierr = MatMultAdd(aij->B,aij->lvec,yy,yy); CHKERRQ(ierr);
420   return 0;
421 }
422 
423 static int MatMultAdd_MPIAIJ(Mat aijin,Vec xx,Vec yy,Vec zz)
424 {
425   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data;
426   int        ierr;
427   if (!aij->assembled) SETERRQ(1,"MatMult_MPIAIJ:must assemble matrix first");
428   ierr = VecScatterBegin(xx,aij->lvec,INSERTVALUES,SCATTERALL,aij->Mvctx);
429   CHKERRQ(ierr);
430   ierr = MatMultAdd(aij->A,xx,yy,zz); CHKERRQ(ierr);
431   ierr = VecScatterEnd(xx,aij->lvec,INSERTVALUES,SCATTERALL,aij->Mvctx);
432   CHKERRQ(ierr);
433   ierr = MatMultAdd(aij->B,aij->lvec,zz,zz); CHKERRQ(ierr);
434   return 0;
435 }
436 
437 static int MatMultTrans_MPIAIJ(Mat aijin,Vec xx,Vec yy)
438 {
439   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data;
440   int        ierr;
441 
442   if (!aij->assembled)
443     SETERRQ(1,"MatMulTrans_MPIAIJ:must assemble matrix first");
444   /* do nondiagonal part */
445   ierr = MatMultTrans(aij->B,xx,aij->lvec); CHKERRQ(ierr);
446   /* send it on its way */
447   ierr = VecScatterBegin(aij->lvec,yy,ADDVALUES,
448            (ScatterMode)(SCATTERALL|SCATTERREVERSE),aij->Mvctx); CHKERRQ(ierr);
449   /* do local part */
450   ierr = MatMultTrans(aij->A,xx,yy); CHKERRQ(ierr);
451   /* receive remote parts: note this assumes the values are not actually */
452   /* inserted in yy until the next line, which is true for my implementation*/
453   /* but is not perhaps always true. */
454   ierr = VecScatterEnd(aij->lvec,yy,ADDVALUES,
455          (ScatterMode)(SCATTERALL|SCATTERREVERSE),aij->Mvctx); CHKERRQ(ierr);
456   return 0;
457 }
458 
459 static int MatMultTransAdd_MPIAIJ(Mat aijin,Vec xx,Vec yy,Vec zz)
460 {
461   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data;
462   int        ierr;
463 
464   if (!aij->assembled)
465     SETERRQ(1,"MatMulTransAdd_MPIAIJ:must assemble matrix first");
466   /* do nondiagonal part */
467   ierr = MatMultTrans(aij->B,xx,aij->lvec); CHKERRQ(ierr);
468   /* send it on its way */
469   ierr = VecScatterBegin(aij->lvec,zz,ADDVALUES,
470          (ScatterMode)(SCATTERALL|SCATTERREVERSE),aij->Mvctx); CHKERRQ(ierr);
471   /* do local part */
472   ierr = MatMultTransAdd(aij->A,xx,yy,zz); CHKERRQ(ierr);
473   /* receive remote parts: note this assumes the values are not actually */
474   /* inserted in yy until the next line, which is true for my implementation*/
475   /* but is not perhaps always true. */
476   ierr = VecScatterEnd(aij->lvec,zz,ADDVALUES,
477        (ScatterMode)(SCATTERALL|SCATTERREVERSE),aij->Mvctx); CHKERRQ(ierr);
478   return 0;
479 }
480 
481 /*
482   This only works correctly for square matrices where the subblock A->A is the
483    diagonal block
484 */
485 static int MatGetDiagonal_MPIAIJ(Mat Ain,Vec v)
486 {
487   Mat_MPIAIJ *A = (Mat_MPIAIJ *) Ain->data;
488   if (!A->assembled) SETERRQ(1,"MatGetDiag_MPIAIJ:must assemble matrix first");
489   return MatGetDiagonal(A->A,v);
490 }
491 
492 static int MatDestroy_MPIAIJ(PetscObject obj)
493 {
494   Mat        mat = (Mat) obj;
495   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
496   int        ierr;
497 #if defined(PETSC_LOG)
498   PLogObjectState(obj,"Rows %d Cols %d",aij->M,aij->N);
499 #endif
500   PETSCFREE(aij->rowners);
501   ierr = MatDestroy(aij->A); CHKERRQ(ierr);
502   ierr = MatDestroy(aij->B); CHKERRQ(ierr);
503   if (aij->colmap) PETSCFREE(aij->colmap);
504   if (aij->garray) PETSCFREE(aij->garray);
505   if (aij->lvec) VecDestroy(aij->lvec);
506   if (aij->Mvctx) VecScatterCtxDestroy(aij->Mvctx);
507   PETSCFREE(aij);
508   PLogObjectDestroy(mat);
509   PETSCHEADERDESTROY(mat);
510   return 0;
511 }
512 #include "draw.h"
513 #include "pviewer.h"
514 
515 static int MatView_MPIAIJ(PetscObject obj,Viewer viewer)
516 {
517   Mat        mat = (Mat) obj;
518   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
519   int        ierr;
520   PetscObject vobj = (PetscObject) viewer;
521 
522   if (!aij->assembled) SETERRQ(1,"MatView_MPIAIJ:must assemble matrix first");
523   if (!viewer) { /* so that viewers may be used from debuggers */
524     viewer = STDOUT_VIEWER; vobj = (PetscObject) viewer;
525   }
526   if (vobj->cookie == DRAW_COOKIE && vobj->type == NULLWINDOW) return 0;
527   if (vobj->cookie == VIEWER_COOKIE && vobj->type == FILE_VIEWER) {
528     FILE *fd = ViewerFileGetPointer_Private(viewer);
529     MPIU_Seq_begin(mat->comm,1);
530     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
531              aij->mytid,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart,
532              aij->cend);
533     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
534     ierr = MatView(aij->B,viewer); CHKERRQ(ierr);
535     fflush(fd);
536     MPIU_Seq_end(mat->comm,1);
537   }
538   else if ((vobj->cookie == VIEWER_COOKIE && vobj->type == FILES_VIEWER) ||
539             vobj->cookie == DRAW_COOKIE) {
540     int numtids = aij->numtids, mytid = aij->mytid;
541     if (numtids == 1) {
542       ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
543     }
544     else {
545       /* assemble the entire matrix onto first processor. */
546       Mat     A;
547       Mat_AIJ *Aloc;
548       int     M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct;
549       Scalar  *a;
550 
551       if (!mytid) {
552         ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,0,0,0,&A);
553       }
554       else {
555         ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,0,0,0,&A);
556       }
557       CHKERRQ(ierr);
558 
559       /* copy over the A part */
560       Aloc = (Mat_AIJ*) aij->A->data;
561       m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
562       row = aij->rstart;
563       for ( i=0; i<ai[m]-1; i++ ) {aj[i] += aij->cstart - 1;}
564       for ( i=0; i<m; i++ ) {
565         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERTVALUES);
566         CHKERRQ(ierr);
567         row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
568       }
569       aj = Aloc->j;
570       for ( i=0; i<ai[m]-1; i++ ) {aj[i] -= aij->cstart - 1;}
571 
572       /* copy over the B part */
573       Aloc = (Mat_AIJ*) aij->B->data;
574       m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
575       row = aij->rstart;
576       ct = cols = (int *) PETSCMALLOC( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols);
577       for ( i=0; i<ai[m]-1; i++ ) {cols[i] = aij->garray[aj[i]-1];}
578       for ( i=0; i<m; i++ ) {
579         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERTVALUES);
580         CHKERRQ(ierr);
581         row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
582       }
583       PETSCFREE(ct);
584       ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
585       ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
586       if (!mytid) {
587         ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
588       }
589       ierr = MatDestroy(A); CHKERRQ(ierr);
590     }
591   }
592   return 0;
593 }
594 
595 extern int MatMarkDiag_AIJ(Mat_AIJ  *);
596 /*
597     This has to provide several versions.
598 
599      1) per sequential
600      2) a) use only local smoothing updating outer values only once.
601         b) local smoothing updating outer values each inner iteration
602      3) color updating out values betwen colors.
603 */
604 static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag,
605                            double shift,int its,Vec xx)
606 {
607   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
608   Mat        AA = mat->A, BB = mat->B;
609   Mat_AIJ    *A = (Mat_AIJ *) AA->data, *B = (Mat_AIJ *)BB->data;
610   Scalar     zero = 0.0,*b,*x,*xs,*ls,d,*v,sum,scale,*t,*ts;
611   int        ierr,*idx, *diag;
612   int        n = mat->n, m = mat->m, i;
613   Vec        tt;
614 
615   if (!mat->assembled) SETERRQ(1,"MatRelax_MPIAIJ:must assemble matrix first");
616 
617   VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls);
618   xs = x -1; /* shift by one for index start of 1 */
619   ls--;
620   if (!A->diag) {if ((ierr = MatMarkDiag_AIJ(A))) return ierr;}
621   diag = A->diag;
622   if (flag == SOR_APPLY_UPPER || flag == SOR_APPLY_LOWER) {
623     SETERRQ(1,"MatRelax_MPIAIJ:option not yet support");
624   }
625   if (flag & SOR_EISENSTAT) {
626     /* Let  A = L + U + D; where L is lower trianglar,
627     U is upper triangular, E is diagonal; This routine applies
628 
629             (L + E)^{-1} A (U + E)^{-1}
630 
631     to a vector efficiently using Eisenstat's trick. This is for
632     the case of SSOR preconditioner, so E is D/omega where omega
633     is the relaxation factor.
634     */
635     ierr = VecDuplicate(xx,&tt); CHKERRQ(ierr);
636     VecGetArray(tt,&t);
637     scale = (2.0/omega) - 1.0;
638     /*  x = (E + U)^{-1} b */
639     VecSet(&zero,mat->lvec);
640     ierr = VecPipelineBegin(xx,mat->lvec,INSERTVALUES,PIPELINEUP,
641                               mat->Mvctx); CHKERRQ(ierr);
642     for ( i=m-1; i>-1; i-- ) {
643       n    = A->i[i+1] - diag[i] - 1;
644       idx  = A->j + diag[i];
645       v    = A->a + diag[i];
646       sum  = b[i];
647       SPARSEDENSEMDOT(sum,xs,v,idx,n);
648       d    = shift + A->a[diag[i]-1];
649       n    = B->i[i+1] - B->i[i];
650       idx  = B->j + B->i[i] - 1;
651       v    = B->a + B->i[i] - 1;
652       SPARSEDENSEMDOT(sum,ls,v,idx,n);
653       x[i] = omega*(sum/d);
654     }
655     ierr = VecPipelineEnd(xx,mat->lvec,INSERTVALUES,PIPELINEUP,
656                             mat->Mvctx); CHKERRQ(ierr);
657 
658     /*  t = b - (2*E - D)x */
659     v = A->a;
660     for ( i=0; i<m; i++ ) { t[i] = b[i] - scale*(v[*diag++ - 1])*x[i]; }
661 
662     /*  t = (E + L)^{-1}t */
663     ts = t - 1; /* shifted by one for index start of a or mat->j*/
664     diag = A->diag;
665     VecSet(&zero,mat->lvec);
666     ierr = VecPipelineBegin(tt,mat->lvec,INSERTVALUES,PIPELINEDOWN,
667                                                  mat->Mvctx); CHKERRQ(ierr);
668     for ( i=0; i<m; i++ ) {
669       n    = diag[i] - A->i[i];
670       idx  = A->j + A->i[i] - 1;
671       v    = A->a + A->i[i] - 1;
672       sum  = t[i];
673       SPARSEDENSEMDOT(sum,ts,v,idx,n);
674       d    = shift + A->a[diag[i]-1];
675       n    = B->i[i+1] - B->i[i];
676       idx  = B->j + B->i[i] - 1;
677       v    = B->a + B->i[i] - 1;
678       SPARSEDENSEMDOT(sum,ls,v,idx,n);
679       t[i] = omega*(sum/d);
680     }
681     ierr = VecPipelineEnd(tt,mat->lvec,INSERTVALUES,PIPELINEDOWN,
682                                                     mat->Mvctx); CHKERRQ(ierr);
683     /*  x = x + t */
684     for ( i=0; i<m; i++ ) { x[i] += t[i]; }
685     VecDestroy(tt);
686     return 0;
687   }
688 
689 
690   if ((flag & SOR_SYMMETRIC_SWEEP) == SOR_SYMMETRIC_SWEEP){
691     if (flag & SOR_ZERO_INITIAL_GUESS) {
692       VecSet(&zero,mat->lvec); VecSet(&zero,xx);
693     }
694     else {
695       ierr=VecScatterBegin(xx,mat->lvec,INSERTVALUES,SCATTERUP,mat->Mvctx);
696       CHKERRQ(ierr);
697       ierr = VecScatterEnd(xx,mat->lvec,INSERTVALUES,SCATTERUP,mat->Mvctx);
698       CHKERRQ(ierr);
699     }
700     while (its--) {
701       /* go down through the rows */
702       ierr = VecPipelineBegin(xx,mat->lvec,INSERTVALUES,PIPELINEDOWN,
703                               mat->Mvctx); CHKERRQ(ierr);
704       for ( i=0; i<m; i++ ) {
705         n    = A->i[i+1] - A->i[i];
706         idx  = A->j + A->i[i] - 1;
707         v    = A->a + A->i[i] - 1;
708         sum  = b[i];
709         SPARSEDENSEMDOT(sum,xs,v,idx,n);
710         d    = shift + A->a[diag[i]-1];
711         n    = B->i[i+1] - B->i[i];
712         idx  = B->j + B->i[i] - 1;
713         v    = B->a + B->i[i] - 1;
714         SPARSEDENSEMDOT(sum,ls,v,idx,n);
715         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
716       }
717       ierr = VecPipelineEnd(xx,mat->lvec,INSERTVALUES,PIPELINEDOWN,
718                             mat->Mvctx); CHKERRQ(ierr);
719       /* come up through the rows */
720       ierr = VecPipelineBegin(xx,mat->lvec,INSERTVALUES,PIPELINEUP,
721                               mat->Mvctx); CHKERRQ(ierr);
722       for ( i=m-1; i>-1; i-- ) {
723         n    = A->i[i+1] - A->i[i];
724         idx  = A->j + A->i[i] - 1;
725         v    = A->a + A->i[i] - 1;
726         sum  = b[i];
727         SPARSEDENSEMDOT(sum,xs,v,idx,n);
728         d    = shift + A->a[diag[i]-1];
729         n    = B->i[i+1] - B->i[i];
730         idx  = B->j + B->i[i] - 1;
731         v    = B->a + B->i[i] - 1;
732         SPARSEDENSEMDOT(sum,ls,v,idx,n);
733         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
734       }
735       ierr = VecPipelineEnd(xx,mat->lvec,INSERTVALUES,PIPELINEUP,
736                             mat->Mvctx); CHKERRQ(ierr);
737     }
738   }
739   else if (flag & SOR_FORWARD_SWEEP){
740     if (flag & SOR_ZERO_INITIAL_GUESS) {
741       VecSet(&zero,mat->lvec);
742       ierr = VecPipelineBegin(xx,mat->lvec,INSERTVALUES,PIPELINEDOWN,
743                               mat->Mvctx); CHKERRQ(ierr);
744       for ( i=0; i<m; i++ ) {
745         n    = diag[i] - A->i[i];
746         idx  = A->j + A->i[i] - 1;
747         v    = A->a + A->i[i] - 1;
748         sum  = b[i];
749         SPARSEDENSEMDOT(sum,xs,v,idx,n);
750         d    = shift + A->a[diag[i]-1];
751         n    = B->i[i+1] - B->i[i];
752         idx  = B->j + B->i[i] - 1;
753         v    = B->a + B->i[i] - 1;
754         SPARSEDENSEMDOT(sum,ls,v,idx,n);
755         x[i] = omega*(sum/d);
756       }
757       ierr = VecPipelineEnd(xx,mat->lvec,INSERTVALUES,PIPELINEDOWN,
758                             mat->Mvctx); CHKERRQ(ierr);
759       its--;
760     }
761     while (its--) {
762       ierr=VecScatterBegin(xx,mat->lvec,INSERTVALUES,SCATTERUP,mat->Mvctx);
763       CHKERRQ(ierr);
764       ierr = VecScatterEnd(xx,mat->lvec,INSERTVALUES,SCATTERUP,mat->Mvctx);
765       CHKERRQ(ierr);
766       ierr = VecPipelineBegin(xx,mat->lvec,INSERTVALUES,PIPELINEDOWN,
767                               mat->Mvctx); CHKERRQ(ierr);
768       for ( i=0; i<m; i++ ) {
769         n    = A->i[i+1] - A->i[i];
770         idx  = A->j + A->i[i] - 1;
771         v    = A->a + A->i[i] - 1;
772         sum  = b[i];
773         SPARSEDENSEMDOT(sum,xs,v,idx,n);
774         d    = shift + A->a[diag[i]-1];
775         n    = B->i[i+1] - B->i[i];
776         idx  = B->j + B->i[i] - 1;
777         v    = B->a + B->i[i] - 1;
778         SPARSEDENSEMDOT(sum,ls,v,idx,n);
779         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
780       }
781       ierr = VecPipelineEnd(xx,mat->lvec,INSERTVALUES,PIPELINEDOWN,
782                             mat->Mvctx); CHKERRQ(ierr);
783     }
784   }
785   else if (flag & SOR_BACKWARD_SWEEP){
786     if (flag & SOR_ZERO_INITIAL_GUESS) {
787       VecSet(&zero,mat->lvec);
788       ierr = VecPipelineBegin(xx,mat->lvec,INSERTVALUES,PIPELINEUP,
789                               mat->Mvctx); CHKERRQ(ierr);
790       for ( i=m-1; i>-1; i-- ) {
791         n    = A->i[i+1] - diag[i] - 1;
792         idx  = A->j + diag[i];
793         v    = A->a + diag[i];
794         sum  = b[i];
795         SPARSEDENSEMDOT(sum,xs,v,idx,n);
796         d    = shift + A->a[diag[i]-1];
797         n    = B->i[i+1] - B->i[i];
798         idx  = B->j + B->i[i] - 1;
799         v    = B->a + B->i[i] - 1;
800         SPARSEDENSEMDOT(sum,ls,v,idx,n);
801         x[i] = omega*(sum/d);
802       }
803       ierr = VecPipelineEnd(xx,mat->lvec,INSERTVALUES,PIPELINEUP,
804                             mat->Mvctx); CHKERRQ(ierr);
805       its--;
806     }
807     while (its--) {
808       ierr = VecScatterBegin(xx,mat->lvec,INSERTVALUES,SCATTERDOWN,
809                             mat->Mvctx); CHKERRQ(ierr);
810       ierr = VecScatterEnd(xx,mat->lvec,INSERTVALUES,SCATTERDOWN,
811                             mat->Mvctx); CHKERRQ(ierr);
812       ierr = VecPipelineBegin(xx,mat->lvec,INSERTVALUES,PIPELINEUP,
813                               mat->Mvctx); CHKERRQ(ierr);
814       for ( i=m-1; i>-1; i-- ) {
815         n    = A->i[i+1] - A->i[i];
816         idx  = A->j + A->i[i] - 1;
817         v    = A->a + A->i[i] - 1;
818         sum  = b[i];
819         SPARSEDENSEMDOT(sum,xs,v,idx,n);
820         d    = shift + A->a[diag[i]-1];
821         n    = B->i[i+1] - B->i[i];
822         idx  = B->j + B->i[i] - 1;
823         v    = B->a + B->i[i] - 1;
824         SPARSEDENSEMDOT(sum,ls,v,idx,n);
825         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
826       }
827       ierr = VecPipelineEnd(xx,mat->lvec,INSERTVALUES,PIPELINEUP,
828                             mat->Mvctx); CHKERRQ(ierr);
829     }
830   }
831   else if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
832     if (flag & SOR_ZERO_INITIAL_GUESS) {
833       return MatRelax(mat->A,bb,omega,flag,shift,its,xx);
834     }
835     ierr=VecScatterBegin(xx,mat->lvec,INSERTVALUES,SCATTERALL,mat->Mvctx);
836     CHKERRQ(ierr);
837     ierr = VecScatterEnd(xx,mat->lvec,INSERTVALUES,SCATTERALL,mat->Mvctx);
838     CHKERRQ(ierr);
839     while (its--) {
840       /* go down through the rows */
841       for ( i=0; i<m; i++ ) {
842         n    = A->i[i+1] - A->i[i];
843         idx  = A->j + A->i[i] - 1;
844         v    = A->a + A->i[i] - 1;
845         sum  = b[i];
846         SPARSEDENSEMDOT(sum,xs,v,idx,n);
847         d    = shift + A->a[diag[i]-1];
848         n    = B->i[i+1] - B->i[i];
849         idx  = B->j + B->i[i] - 1;
850         v    = B->a + B->i[i] - 1;
851         SPARSEDENSEMDOT(sum,ls,v,idx,n);
852         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
853       }
854       /* come up through the rows */
855       for ( i=m-1; i>-1; i-- ) {
856         n    = A->i[i+1] - A->i[i];
857         idx  = A->j + A->i[i] - 1;
858         v    = A->a + A->i[i] - 1;
859         sum  = b[i];
860         SPARSEDENSEMDOT(sum,xs,v,idx,n);
861         d    = shift + A->a[diag[i]-1];
862         n    = B->i[i+1] - B->i[i];
863         idx  = B->j + B->i[i] - 1;
864         v    = B->a + B->i[i] - 1;
865         SPARSEDENSEMDOT(sum,ls,v,idx,n);
866         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
867       }
868     }
869   }
870   else if (flag & SOR_LOCAL_FORWARD_SWEEP){
871     if (flag & SOR_ZERO_INITIAL_GUESS) {
872       return MatRelax(mat->A,bb,omega,flag,shift,its,xx);
873     }
874     ierr=VecScatterBegin(xx,mat->lvec,INSERTVALUES,SCATTERALL,mat->Mvctx);
875     CHKERRQ(ierr);
876     ierr = VecScatterEnd(xx,mat->lvec,INSERTVALUES,SCATTERALL,mat->Mvctx);
877     CHKERRQ(ierr);
878     while (its--) {
879       for ( i=0; i<m; i++ ) {
880         n    = A->i[i+1] - A->i[i];
881         idx  = A->j + A->i[i] - 1;
882         v    = A->a + A->i[i] - 1;
883         sum  = b[i];
884         SPARSEDENSEMDOT(sum,xs,v,idx,n);
885         d    = shift + A->a[diag[i]-1];
886         n    = B->i[i+1] - B->i[i];
887         idx  = B->j + B->i[i] - 1;
888         v    = B->a + B->i[i] - 1;
889         SPARSEDENSEMDOT(sum,ls,v,idx,n);
890         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
891       }
892     }
893   }
894   else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
895     if (flag & SOR_ZERO_INITIAL_GUESS) {
896       return MatRelax(mat->A,bb,omega,flag,shift,its,xx);
897     }
898     ierr = VecScatterBegin(xx,mat->lvec,INSERTVALUES,SCATTERALL,
899                             mat->Mvctx); CHKERRQ(ierr);
900     ierr = VecScatterEnd(xx,mat->lvec,INSERTVALUES,SCATTERALL,
901                             mat->Mvctx); CHKERRQ(ierr);
902     while (its--) {
903       for ( i=m-1; i>-1; i-- ) {
904         n    = A->i[i+1] - A->i[i];
905         idx  = A->j + A->i[i] - 1;
906         v    = A->a + A->i[i] - 1;
907         sum  = b[i];
908         SPARSEDENSEMDOT(sum,xs,v,idx,n);
909         d    = shift + A->a[diag[i]-1];
910         n    = B->i[i+1] - B->i[i];
911         idx  = B->j + B->i[i] - 1;
912         v    = B->a + B->i[i] - 1;
913         SPARSEDENSEMDOT(sum,ls,v,idx,n);
914         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
915       }
916     }
917   }
918   return 0;
919 }
920 
921 static int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,int *nz,
922                              int *nzalloc,int *mem)
923 {
924   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
925   Mat        A = mat->A, B = mat->B;
926   int        ierr, isend[3], irecv[3], nzA, nzallocA, memA;
927 
928   ierr = MatGetInfo(A,MAT_LOCAL,&nzA,&nzallocA,&memA); CHKERRQ(ierr);
929   ierr = MatGetInfo(B,MAT_LOCAL,&isend[0],&isend[1],&isend[2]); CHKERRQ(ierr);
930   isend[0] += nzA; isend[1] += nzallocA; isend[2] += memA;
931   if (flag == MAT_LOCAL) {
932     *nz = isend[0]; *nzalloc = isend[1]; *mem = isend[2];
933   } else if (flag == MAT_GLOBAL_MAX) {
934     MPI_Allreduce((void *) isend,(void *) irecv,3,MPI_INT,MPI_MAX,matin->comm);
935     *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2];
936   } else if (flag == MAT_GLOBAL_SUM) {
937     MPI_Allreduce((void *) isend,(void *) irecv,3,MPI_INT,MPI_SUM,matin->comm);
938     *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2];
939   }
940   return 0;
941 }
942 
943 extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*);
944 extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*);
945 extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double);
946 extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *);
947 extern int MatSolve_MPIAIJ(Mat,Vec,Vec);
948 extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec);
949 extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec);
950 extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec);
951 
952 static int MatSetOption_MPIAIJ(Mat aijin,MatOption op)
953 {
954   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data;
955 
956   if      (op == NO_NEW_NONZERO_LOCATIONS)  {
957     MatSetOption(aij->A,op);
958     MatSetOption(aij->B,op);
959   }
960   else if (op == YES_NEW_NONZERO_LOCATIONS) {
961     MatSetOption(aij->A,op);
962     MatSetOption(aij->B,op);
963   }
964   else if (op == COLUMN_ORIENTED)
965     SETERRQ(1,"MatSetOption_MPIAIJ:Column oriented not supported");
966   return 0;
967 }
968 
969 static int MatGetSize_MPIAIJ(Mat matin,int *m,int *n)
970 {
971   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
972   *m = mat->M; *n = mat->N;
973   return 0;
974 }
975 
976 static int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n)
977 {
978   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
979   *m = mat->m; *n = mat->N;
980   return 0;
981 }
982 
983 static int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n)
984 {
985   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
986   *m = mat->rstart; *n = mat->rend;
987   return 0;
988 }
989 
990 static int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
991 {
992   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
993   Scalar     *vworkA, *vworkB, **pvA, **pvB;
994   int        i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart;
995   int        nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend;
996 
997   if (row < rstart || row >= rend)
998     SETERRQ(1,"MatGetRow_MPIAIJ:Currently you can get only local rows")
999   lrow = row - rstart;
1000 
1001   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1002   if (!v)   {pvA = 0; pvB = 0;}
1003   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1004   ierr = MatGetRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1005   ierr = MatGetRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1006   nztot = nzA + nzB;
1007 
1008   if (v  || idx) {
1009     if (nztot) {
1010       /* Sort by increasing column numbers, assuming A and B already sorted */
1011       int imark;
1012       if (mat->assembled) {
1013         for (i=0; i<nzB; i++) cworkB[i] = mat->garray[cworkB[i]];
1014       }
1015       if (v) {
1016         *v = (Scalar *) PETSCMALLOC( (nztot)*sizeof(Scalar) ); CHKPTRQ(*v);
1017         for ( i=0; i<nzB; i++ ) {
1018           if (cworkB[i] < cstart)   (*v)[i] = vworkB[i];
1019           else break;
1020         }
1021         imark = i;
1022         for ( i=0; i<nzA; i++ )     (*v)[imark+i] = vworkA[i];
1023         for ( i=imark; i<nzB; i++ ) (*v)[nzA+i] = vworkB[i];
1024       }
1025       if (idx) {
1026         *idx = (int *) PETSCMALLOC( (nztot)*sizeof(int) ); CHKPTRQ(*idx);
1027         for (i=0; i<nzA; i++) cworkA[i] += cstart;
1028         for ( i=0; i<nzB; i++ ) {
1029           if (cworkB[i] < cstart)   (*idx)[i] = cworkB[i];
1030           else break;
1031         }
1032         imark = i;
1033         for ( i=0; i<nzA; i++ )     (*idx)[imark+i] = cworkA[i];
1034         for ( i=imark; i<nzB; i++ ) (*idx)[nzA+i] = cworkB[i];
1035       }
1036     }
1037     else {*idx = 0; *v=0;}
1038   }
1039   *nz = nztot;
1040   ierr = MatRestoreRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1041   ierr = MatRestoreRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1042   return 0;
1043 }
1044 
1045 static int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1046 {
1047   if (idx) PETSCFREE(*idx);
1048   if (v) PETSCFREE(*v);
1049   return 0;
1050 }
1051 
1052 static int MatNorm_MPIAIJ(Mat mat,MatNormType type,double *norm)
1053 {
1054   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1055   int ierr;
1056   if (aij->numtids == 1) {
1057     ierr =  MatNorm(aij->A,type,norm); CHKERRQ(ierr);
1058   } else
1059     SETERRQ(1,"MatNorm_MPIAIJ:not yet supported in parallel");
1060   return 0;
1061 }
1062 
1063 static int MatTranspose_MPIAIJ(Mat A,Mat *Bin)
1064 {
1065   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1066   int        ierr;
1067   Mat        B;
1068   Mat_AIJ    *Aloc;
1069   int        M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct;
1070   Scalar     *array;
1071 
1072   ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,0,0,0,&B);
1073   CHKERRQ(ierr);
1074 
1075   /* copy over the A part */
1076   Aloc = (Mat_AIJ*) a->A->data;
1077   m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1078   row = a->rstart;
1079   for ( i=0; i<ai[m]-1; i++ ) {aj[i] += a->cstart - 1;}
1080   for ( i=0; i<m; i++ ) {
1081       ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERTVALUES);
1082       CHKERRQ(ierr);
1083       row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1084   }
1085   aj = Aloc->j;
1086   for ( i=0; i<ai[m]-1; i++ ) {aj[i] -= a->cstart - 1;}
1087 
1088   /* copy over the B part */
1089   Aloc = (Mat_AIJ*) a->B->data;
1090   m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1091   row = a->rstart;
1092   ct = cols = (int *) PETSCMALLOC( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols);
1093   for ( i=0; i<ai[m]-1; i++ ) {cols[i] = a->garray[aj[i]-1];}
1094   for ( i=0; i<m; i++ ) {
1095     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERTVALUES);
1096     CHKERRQ(ierr);
1097     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1098   }
1099   PETSCFREE(ct);
1100   ierr = MatAssemblyBegin(B,FINAL_ASSEMBLY); CHKERRQ(ierr);
1101   ierr = MatAssemblyEnd(B,FINAL_ASSEMBLY); CHKERRQ(ierr);
1102   *Bin = B;
1103   return 0;
1104 }
1105 
1106 extern int MatConvert_MPIAIJ(Mat,MatType,Mat *);
1107 static int MatCopyPrivate_MPIAIJ(Mat,Mat *);
1108 
1109 /* -------------------------------------------------------------------*/
1110 static struct _MatOps MatOps = {MatSetValues_MPIAIJ,
1111        MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ,
1112        MatMult_MPIAIJ,MatMultAdd_MPIAIJ,
1113        MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ,
1114        MatSolve_MPIAIJ,MatSolveAdd_MPIAIJ,
1115        MatSolveTrans_MPIAIJ,MatSolveTransAdd_MPIAIJ,
1116        MatLUFactor_MPIAIJ,0,
1117        MatRelax_MPIAIJ,
1118        MatTranspose_MPIAIJ,
1119        MatGetInfo_MPIAIJ,0,
1120        MatGetDiagonal_MPIAIJ,0,MatNorm_MPIAIJ,
1121        MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ,
1122        0,
1123        MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ,
1124        MatGetReordering_MPIAIJ,
1125        MatLUFactorSymbolic_MPIAIJ,MatLUFactorNumeric_MPIAIJ,0,0,
1126        MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ,
1127        MatILUFactorSymbolic_MPIAIJ,0,
1128        0,0,MatConvert_MPIAIJ,0,0,MatCopyPrivate_MPIAIJ};
1129 
1130 /*@
1131    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
1132    (the default parallel PETSc format).
1133 
1134    Input Parameters:
1135 .  comm - MPI communicator
1136 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1137 .  n - number of local columns (or PETSC_DECIDE to have calculated
1138            if N is given)
1139 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1140 .  N - number of global columns (or PETSC_DECIDE to have calculated
1141            if n is given)
1142 .  d_nz - number of nonzeros per row in diagonal portion of local submatrix
1143            (same for all local rows)
1144 .  d_nzz - number of nonzeros per row in diagonal portion of local submatrix
1145            or null (possibly different for each row).  You must leave room
1146            for the diagonal entry even if it is zero.
1147 .  o_nz - number of nonzeros per row in off-diagonal portion of local
1148            submatrix (same for all local rows).
1149 .  o_nzz - number of nonzeros per row in off-diagonal portion of local
1150            submatrix or null (possibly different for each row).
1151 
1152    Output Parameter:
1153 .  newmat - the matrix
1154 
1155    Notes:
1156    The AIJ format (also called the Yale sparse matrix format or
1157    compressed row storage), is fully compatible with standard Fortran 77
1158    storage.  That is, the stored row and column indices begin at
1159    one, not zero.  See the users manual for further details.
1160 
1161    The user MUST specify either the local or global matrix dimensions
1162    (possibly both).
1163 
1164    Storage Information:
1165    For a square global matrix we define each processor's diagonal portion
1166    to be its local rows and the corresponding columns (a square submatrix);
1167    each processor's off-diagonal portion encompasses the remainder of the
1168    local matrix (a rectangular submatrix).
1169 
1170    The user can specify preallocated storage for the diagonal part of
1171    the local submatrix with either d_nz or d_nnz (not both). Set both
1172    d_nz and d_nnz to zero for PETSc to control dynamic memory allocation.
1173    Likewise, specify preallocated storage for the off-diagonal part of
1174    the local submatrix with o_nz or o_nnz (not both).
1175 
1176 .keywords: matrix, aij, compressed row, sparse, parallel
1177 
1178 .seealso: MatCreate(), MatCreateSequentialAIJ(), MatSetValues()
1179 @*/
1180 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,
1181                  int d_nz,int *d_nnz, int o_nz,int *o_nnz,Mat *newmat)
1182 {
1183   Mat          mat;
1184   Mat_MPIAIJ   *aij;
1185   int          ierr, i,sum[2],work[2];
1186   *newmat         = 0;
1187   PETSCHEADERCREATE(mat,_Mat,MAT_COOKIE,MATMPIAIJ,comm);
1188   PLogObjectCreate(mat);
1189   mat->data       = (void *) (aij = PETSCNEW(Mat_MPIAIJ)); CHKPTRQ(aij);
1190   mat->ops        = &MatOps;
1191   mat->destroy    = MatDestroy_MPIAIJ;
1192   mat->view       = MatView_MPIAIJ;
1193   mat->factor     = 0;
1194 
1195   aij->insertmode = NOTSETVALUES;
1196   MPI_Comm_rank(comm,&aij->mytid);
1197   MPI_Comm_size(comm,&aij->numtids);
1198 
1199   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
1200     work[0] = m; work[1] = n;
1201     MPI_Allreduce((void *) work,(void *) sum,2,MPI_INT,MPI_SUM,comm );
1202     if (M == PETSC_DECIDE) M = sum[0];
1203     if (N == PETSC_DECIDE) N = sum[1];
1204   }
1205   if (m == PETSC_DECIDE)
1206     {m = M/aij->numtids + ((M % aij->numtids) > aij->mytid);}
1207   if (n == PETSC_DECIDE)
1208     {n = N/aij->numtids + ((N % aij->numtids) > aij->mytid);}
1209   aij->m = m;
1210   aij->n = n;
1211   aij->N = N;
1212   aij->M = M;
1213 
1214   /* build local table of row and column ownerships */
1215   aij->rowners = (int *) PETSCMALLOC(2*(aij->numtids+2)*sizeof(int));
1216   CHKPTRQ(aij->rowners);
1217   aij->cowners = aij->rowners + aij->numtids + 1;
1218   MPI_Allgather(&m,1,MPI_INT,aij->rowners+1,1,MPI_INT,comm);
1219   aij->rowners[0] = 0;
1220   for ( i=2; i<=aij->numtids; i++ ) {
1221     aij->rowners[i] += aij->rowners[i-1];
1222   }
1223   aij->rstart = aij->rowners[aij->mytid];
1224   aij->rend   = aij->rowners[aij->mytid+1];
1225   MPI_Allgather(&n,1,MPI_INT,aij->cowners+1,1,MPI_INT,comm);
1226   aij->cowners[0] = 0;
1227   for ( i=2; i<=aij->numtids; i++ ) {
1228     aij->cowners[i] += aij->cowners[i-1];
1229   }
1230   aij->cstart = aij->cowners[aij->mytid];
1231   aij->cend   = aij->cowners[aij->mytid+1];
1232 
1233 
1234   ierr = MatCreateSequentialAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&aij->A);
1235   CHKERRQ(ierr);
1236   PLogObjectParent(mat,aij->A);
1237   ierr = MatCreateSequentialAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&aij->B);
1238   CHKERRQ(ierr);
1239   PLogObjectParent(mat,aij->B);
1240 
1241   /* build cache for off array entries formed */
1242   ierr = StashBuild_Private(&aij->stash); CHKERRQ(ierr);
1243   aij->colmap    = 0;
1244   aij->garray    = 0;
1245 
1246   /* stuff used for matrix vector multiply */
1247   aij->lvec      = 0;
1248   aij->Mvctx     = 0;
1249   aij->assembled = 0;
1250 
1251   *newmat = mat;
1252   return 0;
1253 }
1254 
1255 static int MatCopyPrivate_MPIAIJ(Mat matin,Mat *newmat)
1256 {
1257   Mat        mat;
1258   Mat_MPIAIJ *aij,*oldmat = (Mat_MPIAIJ *) matin->data;
1259   int        ierr, len;
1260   *newmat      = 0;
1261 
1262   if (!oldmat->assembled)
1263     SETERRQ(1,"MatCopyPrivate_MPIAIJ:Cannot copy unassembled matrix");
1264   PETSCHEADERCREATE(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm);
1265   PLogObjectCreate(mat);
1266   mat->data       = (void *) (aij = PETSCNEW(Mat_MPIAIJ)); CHKPTRQ(aij);
1267   mat->ops        = &MatOps;
1268   mat->destroy    = MatDestroy_MPIAIJ;
1269   mat->view       = MatView_MPIAIJ;
1270   mat->factor     = matin->factor;
1271 
1272   aij->m          = oldmat->m;
1273   aij->n          = oldmat->n;
1274   aij->M          = oldmat->M;
1275   aij->N          = oldmat->N;
1276 
1277   aij->assembled  = 1;
1278   aij->rstart     = oldmat->rstart;
1279   aij->rend       = oldmat->rend;
1280   aij->cstart     = oldmat->cstart;
1281   aij->cend       = oldmat->cend;
1282   aij->numtids    = oldmat->numtids;
1283   aij->mytid      = oldmat->mytid;
1284   aij->insertmode = NOTSETVALUES;
1285 
1286   aij->rowners        = (int *) PETSCMALLOC( (aij->numtids+1)*sizeof(int) );
1287   CHKPTRQ(aij->rowners);
1288   PETSCMEMCPY(aij->rowners,oldmat->rowners,(aij->numtids+1)*sizeof(int));
1289   ierr = StashInitialize_Private(&aij->stash); CHKERRQ(ierr);
1290   if (oldmat->colmap) {
1291     aij->colmap      = (int *) PETSCMALLOC( (aij->N)*sizeof(int) );
1292     CHKPTRQ(aij->colmap);
1293     PETSCMEMCPY(aij->colmap,oldmat->colmap,(aij->N)*sizeof(int));
1294   } else aij->colmap = 0;
1295   if (oldmat->garray && (len = ((Mat_AIJ *) (oldmat->B->data))->n)) {
1296     aij->garray      = (int *) PETSCMALLOC(len*sizeof(int) ); CHKPTRQ(aij->garray);
1297     PETSCMEMCPY(aij->garray,oldmat->garray,len*sizeof(int));
1298   } else aij->garray = 0;
1299 
1300   ierr =  VecDuplicate(oldmat->lvec,&aij->lvec); CHKERRQ(ierr);
1301   PLogObjectParent(mat,aij->lvec);
1302   ierr =  VecScatterCtxCopy(oldmat->Mvctx,&aij->Mvctx); CHKERRQ(ierr);
1303   PLogObjectParent(mat,aij->Mvctx);
1304   ierr =  MatConvert(oldmat->A,MATSAME,&aij->A); CHKERRQ(ierr);
1305   PLogObjectParent(mat,aij->A);
1306   ierr =  MatConvert(oldmat->B,MATSAME,&aij->B); CHKERRQ(ierr);
1307   PLogObjectParent(mat,aij->B);
1308   *newmat = mat;
1309   return 0;
1310 }
1311