xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 80b4ade8ca6f14c7a8695353deb39be397bcc1ce)
1 #ifndef lint
2 static char vcid[] = "$Id: mpiaij.c,v 1.63 1995/08/15 20:28:20 bsmith Exp curfman $";
3 #endif
4 
5 #include "mpiaij.h"
6 #include "vec/vecimpl.h"
7 #include "inline/spops.h"
8 
9 /* local utility routine that creates a mapping from the global column
10 number to the local number in the off-diagonal part of the local
11 storage of the matrix.  This is done in a non scable way since the
12 length of colmap equals the global matrix length.
13 */
14 static int CreateColmap_Private(Mat mat)
15 {
16   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
17   Mat_AIJ    *B = (Mat_AIJ*) aij->B->data;
18   int        n = B->n,i;
19   aij->colmap = (int *) PETSCMALLOC(aij->N*sizeof(int));CHKPTRQ(aij->colmap);
20   PLogObjectMemory(mat,aij->N*sizeof(int));
21   PETSCMEMSET(aij->colmap,0,aij->N*sizeof(int));
22   for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1;
23   return 0;
24 }
25 
26 extern int DisAssemble_MPIAIJ(Mat);
27 
28 static int MatGetReordering_MPIAIJ(Mat mat,MatOrdering type,IS *rperm,IS *cperm)
29 {
30   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
31   int ierr;
32   if (aij->numtids == 1) {
33     ierr = MatGetReordering(aij->A,type,rperm,cperm); CHKERRQ(ierr);
34   } else
35     SETERRQ(1,"MatGetReordering_MPIAIJ:  not yet supported in parallel");
36   return 0;
37 }
38 
39 static int MatSetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,
40                             int *idxn,Scalar *v,InsertMode addv)
41 {
42   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
43   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
44   int        cstart = aij->cstart, cend = aij->cend,row,col;
45 
46   if (aij->insertmode != NOTSETVALUES && aij->insertmode != addv) {
47     SETERRQ(1,"MatSetValues_MPIAIJ:You cannot mix inserts and adds");
48   }
49   aij->insertmode = addv;
50   for ( i=0; i<m; i++ ) {
51     if (idxm[i] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative row");
52     if (idxm[i] >= aij->M) SETERRQ(1,"MatSetValues_MPIAIJ:Row too large");
53     if (idxm[i] >= rstart && idxm[i] < rend) {
54       row = idxm[i] - rstart;
55       for ( j=0; j<n; j++ ) {
56         if (idxn[j] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative column");
57         if (idxn[j] >= aij->N) SETERRQ(1,"MatSetValues_MPIAIJ:Col too large");
58         if (idxn[j] >= cstart && idxn[j] < cend){
59           col = idxn[j] - cstart;
60           ierr = MatSetValues(aij->A,1,&row,1,&col,v+i*n+j,addv);CHKERRQ(ierr);
61         }
62         else {
63           if (aij->assembled) {
64             if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);}
65             col = aij->colmap[idxn[j]] - 1;
66             if (col < 0 && !((Mat_AIJ*)(aij->A->data))->nonew) {
67               ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
68               col =  idxn[j];
69             }
70           }
71           else col = idxn[j];
72           ierr = MatSetValues(aij->B,1,&row,1,&col,v+i*n+j,addv);CHKERRQ(ierr);
73         }
74       }
75     }
76     else {
77       ierr = StashValues_Private(&aij->stash,idxm[i],n,idxn,v+i*n,addv);
78       CHKERRQ(ierr);
79     }
80   }
81   return 0;
82 }
83 
84 /*
85     the assembly code is a lot like the code for vectors, we should
86     sometime derive a single assembly code that can be used for
87     either case.
88 */
89 
90 static int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
91 {
92   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
93   MPI_Comm    comm = mat->comm;
94   int         numtids = aij->numtids, *owners = aij->rowners;
95   int         mytid = aij->mytid;
96   MPI_Request *send_waits,*recv_waits;
97   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
98   int         tag = mat->tag, *owner,*starts,count,ierr;
99   InsertMode  addv;
100   Scalar      *rvalues,*svalues;
101 
102   /* make sure all processors are either in INSERTMODE or ADDMODE */
103   MPI_Allreduce((void *) &aij->insertmode,(void *) &addv,1,MPI_INT,
104                 MPI_BOR,comm);
105   if (addv == (ADDVALUES|INSERTVALUES)) {
106     SETERRQ(1,"MatAssemblyBegin_MPIAIJ:Some processors inserted others added");
107   }
108   aij->insertmode = addv; /* in case this processor had no cache */
109 
110   /*  first count number of contributors to each processor */
111   nprocs = (int *) PETSCMALLOC( 2*numtids*sizeof(int) ); CHKPTRQ(nprocs);
112   PETSCMEMSET(nprocs,0,2*numtids*sizeof(int)); procs = nprocs + numtids;
113   owner = (int *) PETSCMALLOC( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
114   for ( i=0; i<aij->stash.n; i++ ) {
115     idx = aij->stash.idx[i];
116     for ( j=0; j<numtids; j++ ) {
117       if (idx >= owners[j] && idx < owners[j+1]) {
118         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
119       }
120     }
121   }
122   nsends = 0;  for ( i=0; i<numtids; i++ ) { nsends += procs[i];}
123 
124   /* inform other processors of number of messages and max length*/
125   work = (int *) PETSCMALLOC( numtids*sizeof(int) ); CHKPTRQ(work);
126   MPI_Allreduce((void *) procs,(void *) work,numtids,MPI_INT,MPI_SUM,comm);
127   nreceives = work[mytid];
128   MPI_Allreduce((void *) nprocs,(void *) work,numtids,MPI_INT,MPI_MAX,comm);
129   nmax = work[mytid];
130   PETSCFREE(work);
131 
132   /* post receives:
133        1) each message will consist of ordered pairs
134      (global index,value) we store the global index as a double
135      to simplify the message passing.
136        2) since we don't know how long each individual message is we
137      allocate the largest needed buffer for each receive. Potentially
138      this is a lot of wasted space.
139 
140 
141        This could be done better.
142   */
143   rvalues = (Scalar *) PETSCMALLOC(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));
144   CHKPTRQ(rvalues);
145   recv_waits = (MPI_Request *) PETSCMALLOC((nreceives+1)*sizeof(MPI_Request));
146   CHKPTRQ(recv_waits);
147   for ( i=0; i<nreceives; i++ ) {
148     MPI_Irecv((void *)(rvalues+3*nmax*i),3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
149               comm,recv_waits+i);
150   }
151 
152   /* do sends:
153       1) starts[i] gives the starting index in svalues for stuff going to
154          the ith processor
155   */
156   svalues = (Scalar *) PETSCMALLOC( 3*(aij->stash.n+1)*sizeof(Scalar) );
157   CHKPTRQ(svalues);
158   send_waits = (MPI_Request *) PETSCMALLOC( (nsends+1)*sizeof(MPI_Request));
159   CHKPTRQ(send_waits);
160   starts = (int *) PETSCMALLOC( numtids*sizeof(int) ); CHKPTRQ(starts);
161   starts[0] = 0;
162   for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
163   for ( i=0; i<aij->stash.n; i++ ) {
164     svalues[3*starts[owner[i]]]       = (Scalar)  aij->stash.idx[i];
165     svalues[3*starts[owner[i]]+1]     = (Scalar)  aij->stash.idy[i];
166     svalues[3*(starts[owner[i]]++)+2] =  aij->stash.array[i];
167   }
168   PETSCFREE(owner);
169   starts[0] = 0;
170   for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
171   count = 0;
172   for ( i=0; i<numtids; i++ ) {
173     if (procs[i]) {
174       MPI_Isend((void*)(svalues+3*starts[i]),3*nprocs[i],MPIU_SCALAR,i,tag,
175                 comm,send_waits+count++);
176     }
177   }
178   PETSCFREE(starts); PETSCFREE(nprocs);
179 
180   /* Free cache space */
181   ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr);
182 
183   aij->svalues    = svalues;    aij->rvalues = rvalues;
184   aij->nsends     = nsends;     aij->nrecvs = nreceives;
185   aij->send_waits = send_waits; aij->recv_waits = recv_waits;
186   aij->rmax       = nmax;
187 
188   return 0;
189 }
190 extern int MatSetUpMultiply_MPIAIJ(Mat);
191 
192 static int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
193 {
194   int        ierr;
195   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
196   MPI_Status  *send_status,recv_status;
197   int         imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n;
198   int         row,col,other_disassembled;
199   Scalar      *values,val;
200   InsertMode  addv = aij->insertmode;
201 
202   /*  wait on receives */
203   while (count) {
204     MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status);
205     /* unpack receives into our local space */
206     values = aij->rvalues + 3*imdex*aij->rmax;
207     MPI_Get_count(&recv_status,MPIU_SCALAR,&n);
208     n = n/3;
209     for ( i=0; i<n; i++ ) {
210       row = (int) PETSCREAL(values[3*i]) - aij->rstart;
211       col = (int) PETSCREAL(values[3*i+1]);
212       val = values[3*i+2];
213       if (col >= aij->cstart && col < aij->cend) {
214           col -= aij->cstart;
215         MatSetValues(aij->A,1,&row,1,&col,&val,addv);
216       }
217       else {
218         if (aij->assembled) {
219           if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);}
220           col = aij->colmap[col] - 1;
221           if (col < 0  && !((Mat_AIJ*)(aij->A->data))->nonew) {
222             ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
223             col = (int) PETSCREAL(values[3*i+1]);
224           }
225         }
226         MatSetValues(aij->B,1,&row,1,&col,&val,addv);
227       }
228     }
229     count--;
230   }
231   PETSCFREE(aij->recv_waits); PETSCFREE(aij->rvalues);
232 
233   /* wait on sends */
234   if (aij->nsends) {
235     send_status = (MPI_Status *) PETSCMALLOC( aij->nsends*sizeof(MPI_Status) );
236     CHKPTRQ(send_status);
237     MPI_Waitall(aij->nsends,aij->send_waits,send_status);
238     PETSCFREE(send_status);
239   }
240   PETSCFREE(aij->send_waits); PETSCFREE(aij->svalues);
241 
242   aij->insertmode = NOTSETVALUES;
243   ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr);
244   ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr);
245 
246   /* determine if any processor has disassembled, if so we must
247      also disassemble ourselfs, in order that we may reassemble. */
248   MPI_Allreduce((void *) &aij->assembled,(void *) &other_disassembled,1,
249                  MPI_INT,MPI_PROD,mat->comm);
250   if (aij->assembled && !other_disassembled) {
251     ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
252   }
253 
254   if (!aij->assembled && mode == FINAL_ASSEMBLY) {
255     ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr);
256     aij->assembled = 1;
257   }
258   ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr);
259   ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr);
260 
261   return 0;
262 }
263 
264 static int MatZeroEntries_MPIAIJ(Mat A)
265 {
266   Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data;
267   int ierr;
268   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
269   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
270   return 0;
271 }
272 
273 /* again this uses the same basic stratagy as in the assembly and
274    scatter create routines, we should try to do it systemamatically
275    if we can figure out the proper level of generality. */
276 
277 /* the code does not do the diagonal entries correctly unless the
278    matrix is square and the column and row owerships are identical.
279    This is a BUG. The only way to fix it seems to be to access
280    aij->A and aij->B directly and not through the MatZeroRows()
281    routine.
282 */
283 static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag)
284 {
285   Mat_MPIAIJ     *l = (Mat_MPIAIJ *) A->data;
286   int            i,ierr,N, *rows,*owners = l->rowners,numtids = l->numtids;
287   int            *procs,*nprocs,j,found,idx,nsends,*work;
288   int            nmax,*svalues,*starts,*owner,nrecvs,mytid = l->mytid;
289   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
290   int            *lens,imdex,*lrows,*values;
291   MPI_Comm       comm = A->comm;
292   MPI_Request    *send_waits,*recv_waits;
293   MPI_Status     recv_status,*send_status;
294   IS             istmp;
295 
296   if (!l->assembled)
297     SETERRQ(1,"MatZeroRows_MPIAIJ:Must assemble matrix first");
298   ierr = ISGetLocalSize(is,&N); CHKERRQ(ierr);
299   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
300 
301   /*  first count number of contributors to each processor */
302   nprocs = (int *) PETSCMALLOC( 2*numtids*sizeof(int) ); CHKPTRQ(nprocs);
303   PETSCMEMSET(nprocs,0,2*numtids*sizeof(int)); procs = nprocs + numtids;
304   owner = (int *) PETSCMALLOC((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
305   for ( i=0; i<N; i++ ) {
306     idx = rows[i];
307     found = 0;
308     for ( j=0; j<numtids; j++ ) {
309       if (idx >= owners[j] && idx < owners[j+1]) {
310         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
311       }
312     }
313     if (!found) SETERRQ(1,"MatZeroRows_MPIAIJ:Index out of range");
314   }
315   nsends = 0;  for ( i=0; i<numtids; i++ ) { nsends += procs[i];}
316 
317   /* inform other processors of number of messages and max length*/
318   work = (int *) PETSCMALLOC( numtids*sizeof(int) ); CHKPTRQ(work);
319   MPI_Allreduce((void *) procs,(void *) work,numtids,MPI_INT,MPI_SUM,comm);
320   nrecvs = work[mytid];
321   MPI_Allreduce((void *) nprocs,(void *) work,numtids,MPI_INT,MPI_MAX,comm);
322   nmax = work[mytid];
323   PETSCFREE(work);
324 
325   /* post receives:   */
326   rvalues = (int *) PETSCMALLOC((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */
327   CHKPTRQ(rvalues);
328   recv_waits = (MPI_Request *) PETSCMALLOC((nrecvs+1)*sizeof(MPI_Request));
329   CHKPTRQ(recv_waits);
330   for ( i=0; i<nrecvs; i++ ) {
331     MPI_Irecv((void *)(rvalues+nmax*i),nmax,MPI_INT,MPI_ANY_SOURCE,tag,
332               comm,recv_waits+i);
333   }
334 
335   /* do sends:
336       1) starts[i] gives the starting index in svalues for stuff going to
337          the ith processor
338   */
339   svalues = (int *) PETSCMALLOC( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
340   send_waits = (MPI_Request *) PETSCMALLOC( (nsends+1)*sizeof(MPI_Request));
341   CHKPTRQ(send_waits);
342   starts = (int *) PETSCMALLOC( (numtids+1)*sizeof(int) ); CHKPTRQ(starts);
343   starts[0] = 0;
344   for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
345   for ( i=0; i<N; i++ ) {
346     svalues[starts[owner[i]]++] = rows[i];
347   }
348   ISRestoreIndices(is,&rows);
349 
350   starts[0] = 0;
351   for ( i=1; i<numtids+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
352   count = 0;
353   for ( i=0; i<numtids; i++ ) {
354     if (procs[i]) {
355       MPI_Isend((void*)(svalues+starts[i]),nprocs[i],MPI_INT,i,tag,
356                 comm,send_waits+count++);
357     }
358   }
359   PETSCFREE(starts);
360 
361   base = owners[mytid];
362 
363   /*  wait on receives */
364   lens = (int *) PETSCMALLOC( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
365   source = lens + nrecvs;
366   count = nrecvs; slen = 0;
367   while (count) {
368     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
369     /* unpack receives into our local space */
370     MPI_Get_count(&recv_status,MPI_INT,&n);
371     source[imdex]  = recv_status.MPI_SOURCE;
372     lens[imdex]  = n;
373     slen += n;
374     count--;
375   }
376   PETSCFREE(recv_waits);
377 
378   /* move the data into the send scatter */
379   lrows = (int *) PETSCMALLOC( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
380   count = 0;
381   for ( i=0; i<nrecvs; i++ ) {
382     values = rvalues + i*nmax;
383     for ( j=0; j<lens[i]; j++ ) {
384       lrows[count++] = values[j] - base;
385     }
386   }
387   PETSCFREE(rvalues); PETSCFREE(lens);
388   PETSCFREE(owner); PETSCFREE(nprocs);
389 
390   /* actually zap the local rows */
391   ierr = ISCreateSequential(MPI_COMM_SELF,slen,lrows,&istmp);
392   PLogObjectParent(A,istmp);
393   CHKERRQ(ierr);  PETSCFREE(lrows);
394   ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
395   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
396   ierr = ISDestroy(istmp); CHKERRQ(ierr);
397 
398   /* wait on sends */
399   if (nsends) {
400     send_status = (MPI_Status *) PETSCMALLOC( nsends*sizeof(MPI_Status) );
401     CHKPTRQ(send_status);
402     MPI_Waitall(nsends,send_waits,send_status);
403     PETSCFREE(send_status);
404   }
405   PETSCFREE(send_waits); PETSCFREE(svalues);
406 
407 
408   return 0;
409 }
410 
411 static int MatMult_MPIAIJ(Mat aijin,Vec xx,Vec yy)
412 {
413   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data;
414   int        ierr;
415   if (!aij->assembled) SETERRQ(1,"MatMult_MPIAIJ:must assemble matrix first");
416   ierr = VecScatterBegin(xx,aij->lvec,INSERTVALUES,SCATTERALL,aij->Mvctx);
417   CHKERRQ(ierr);
418   ierr = MatMult(aij->A,xx,yy); CHKERRQ(ierr);
419   ierr = VecScatterEnd(xx,aij->lvec,INSERTVALUES,SCATTERALL,aij->Mvctx);
420   CHKERRQ(ierr);
421   ierr = MatMultAdd(aij->B,aij->lvec,yy,yy); CHKERRQ(ierr);
422   return 0;
423 }
424 
425 static int MatMultAdd_MPIAIJ(Mat aijin,Vec xx,Vec yy,Vec zz)
426 {
427   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data;
428   int        ierr;
429   if (!aij->assembled) SETERRQ(1,"MatMult_MPIAIJ:must assemble matrix first");
430   ierr = VecScatterBegin(xx,aij->lvec,INSERTVALUES,SCATTERALL,aij->Mvctx);
431   CHKERRQ(ierr);
432   ierr = MatMultAdd(aij->A,xx,yy,zz); CHKERRQ(ierr);
433   ierr = VecScatterEnd(xx,aij->lvec,INSERTVALUES,SCATTERALL,aij->Mvctx);
434   CHKERRQ(ierr);
435   ierr = MatMultAdd(aij->B,aij->lvec,zz,zz); CHKERRQ(ierr);
436   return 0;
437 }
438 
439 static int MatMultTrans_MPIAIJ(Mat aijin,Vec xx,Vec yy)
440 {
441   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data;
442   int        ierr;
443 
444   if (!aij->assembled)
445     SETERRQ(1,"MatMulTrans_MPIAIJ:must assemble matrix first");
446   /* do nondiagonal part */
447   ierr = MatMultTrans(aij->B,xx,aij->lvec); CHKERRQ(ierr);
448   /* send it on its way */
449   ierr = VecScatterBegin(aij->lvec,yy,ADDVALUES,
450            (ScatterMode)(SCATTERALL|SCATTERREVERSE),aij->Mvctx); CHKERRQ(ierr);
451   /* do local part */
452   ierr = MatMultTrans(aij->A,xx,yy); CHKERRQ(ierr);
453   /* receive remote parts: note this assumes the values are not actually */
454   /* inserted in yy until the next line, which is true for my implementation*/
455   /* but is not perhaps always true. */
456   ierr = VecScatterEnd(aij->lvec,yy,ADDVALUES,
457          (ScatterMode)(SCATTERALL|SCATTERREVERSE),aij->Mvctx); CHKERRQ(ierr);
458   return 0;
459 }
460 
461 static int MatMultTransAdd_MPIAIJ(Mat aijin,Vec xx,Vec yy,Vec zz)
462 {
463   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data;
464   int        ierr;
465 
466   if (!aij->assembled)
467     SETERRQ(1,"MatMulTransAdd_MPIAIJ:must assemble matrix first");
468   /* do nondiagonal part */
469   ierr = MatMultTrans(aij->B,xx,aij->lvec); CHKERRQ(ierr);
470   /* send it on its way */
471   ierr = VecScatterBegin(aij->lvec,zz,ADDVALUES,
472          (ScatterMode)(SCATTERALL|SCATTERREVERSE),aij->Mvctx); CHKERRQ(ierr);
473   /* do local part */
474   ierr = MatMultTransAdd(aij->A,xx,yy,zz); CHKERRQ(ierr);
475   /* receive remote parts: note this assumes the values are not actually */
476   /* inserted in yy until the next line, which is true for my implementation*/
477   /* but is not perhaps always true. */
478   ierr = VecScatterEnd(aij->lvec,zz,ADDVALUES,
479        (ScatterMode)(SCATTERALL|SCATTERREVERSE),aij->Mvctx); CHKERRQ(ierr);
480   return 0;
481 }
482 
483 /*
484   This only works correctly for square matrices where the subblock A->A is the
485    diagonal block
486 */
487 static int MatGetDiagonal_MPIAIJ(Mat Ain,Vec v)
488 {
489   Mat_MPIAIJ *A = (Mat_MPIAIJ *) Ain->data;
490   if (!A->assembled) SETERRQ(1,"MatGetDiag_MPIAIJ:must assemble matrix first");
491   return MatGetDiagonal(A->A,v);
492 }
493 
494 static int MatDestroy_MPIAIJ(PetscObject obj)
495 {
496   Mat        mat = (Mat) obj;
497   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
498   int        ierr;
499 #if defined(PETSC_LOG)
500   PLogObjectState(obj,"Rows %d Cols %d",aij->M,aij->N);
501 #endif
502   PETSCFREE(aij->rowners);
503   ierr = MatDestroy(aij->A); CHKERRQ(ierr);
504   ierr = MatDestroy(aij->B); CHKERRQ(ierr);
505   if (aij->colmap) PETSCFREE(aij->colmap);
506   if (aij->garray) PETSCFREE(aij->garray);
507   if (aij->lvec) VecDestroy(aij->lvec);
508   if (aij->Mvctx) VecScatterCtxDestroy(aij->Mvctx);
509   PETSCFREE(aij);
510   PLogObjectDestroy(mat);
511   PETSCHEADERDESTROY(mat);
512   return 0;
513 }
514 #include "draw.h"
515 #include "pviewer.h"
516 
517 static int MatView_MPIAIJ(PetscObject obj,Viewer viewer)
518 {
519   Mat        mat = (Mat) obj;
520   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
521   int        ierr;
522   PetscObject vobj = (PetscObject) viewer;
523 
524   if (!aij->assembled) SETERRQ(1,"MatView_MPIAIJ:must assemble matrix first");
525   if (!viewer) { /* so that viewers may be used from debuggers */
526     viewer = STDOUT_VIEWER; vobj = (PetscObject) viewer;
527   }
528   if (vobj->cookie == DRAW_COOKIE && vobj->type == NULLWINDOW) return 0;
529   if (vobj->cookie == VIEWER_COOKIE && vobj->type == FILE_VIEWER) {
530     FILE *fd = ViewerFileGetPointer_Private(viewer);
531     MPIU_Seq_begin(mat->comm,1);
532     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
533              aij->mytid,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart,
534              aij->cend);
535     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
536     ierr = MatView(aij->B,viewer); CHKERRQ(ierr);
537     fflush(fd);
538     MPIU_Seq_end(mat->comm,1);
539   }
540   else if ((vobj->cookie == VIEWER_COOKIE && vobj->type == FILES_VIEWER) ||
541             vobj->cookie == DRAW_COOKIE) {
542     int numtids = aij->numtids, mytid = aij->mytid;
543     if (numtids == 1) {
544       ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
545     }
546     else {
547       /* assemble the entire matrix onto first processor. */
548       Mat     A;
549       Mat_AIJ *Aloc;
550       int     M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct;
551       Scalar  *a;
552 
553       if (!mytid) {
554         ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,0,0,0,&A);
555       }
556       else {
557         ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,0,0,0,&A);
558       }
559       PLogObjectParent(mat,A);
560       CHKERRQ(ierr);
561 
562       /* copy over the A part */
563       Aloc = (Mat_AIJ*) aij->A->data;
564       m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
565       row = aij->rstart;
566       for ( i=0; i<ai[m]-1; i++ ) {aj[i] += aij->cstart - 1;}
567       for ( i=0; i<m; i++ ) {
568         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERTVALUES);
569         CHKERRQ(ierr);
570         row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
571       }
572       aj = Aloc->j;
573       for ( i=0; i<ai[m]-1; i++ ) {aj[i] -= aij->cstart - 1;}
574 
575       /* copy over the B part */
576       Aloc = (Mat_AIJ*) aij->B->data;
577       m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
578       row = aij->rstart;
579       ct = cols = (int *) PETSCMALLOC( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols);
580       for ( i=0; i<ai[m]-1; i++ ) {cols[i] = aij->garray[aj[i]-1];}
581       for ( i=0; i<m; i++ ) {
582         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERTVALUES);
583         CHKERRQ(ierr);
584         row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
585       }
586       PETSCFREE(ct);
587       ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
588       ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
589       if (!mytid) {
590         ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
591       }
592       ierr = MatDestroy(A); CHKERRQ(ierr);
593     }
594   }
595   return 0;
596 }
597 
598 extern int MatMarkDiag_AIJ(Mat);
599 /*
600     This has to provide several versions.
601 
602      1) per sequential
603      2) a) use only local smoothing updating outer values only once.
604         b) local smoothing updating outer values each inner iteration
605      3) color updating out values betwen colors.
606 */
607 static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag,
608                            double shift,int its,Vec xx)
609 {
610   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
611   Mat        AA = mat->A, BB = mat->B;
612   Mat_AIJ    *A = (Mat_AIJ *) AA->data, *B = (Mat_AIJ *)BB->data;
613   Scalar     zero = 0.0,*b,*x,*xs,*ls,d,*v,sum,scale,*t,*ts;
614   int        ierr,*idx, *diag;
615   int        n = mat->n, m = mat->m, i;
616   Vec        tt;
617 
618   if (!mat->assembled) SETERRQ(1,"MatRelax_MPIAIJ:must assemble matrix first");
619 
620   VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls);
621   xs = x -1; /* shift by one for index start of 1 */
622   ls--;
623   if (!A->diag) {if ((ierr = MatMarkDiag_AIJ(AA))) return ierr;}
624   diag = A->diag;
625   if (flag == SOR_APPLY_UPPER || flag == SOR_APPLY_LOWER) {
626     SETERRQ(1,"MatRelax_MPIAIJ:Option not yet supported");
627   }
628   if (flag & SOR_EISENSTAT) {
629     /* Let  A = L + U + D; where L is lower trianglar,
630     U is upper triangular, E is diagonal; This routine applies
631 
632             (L + E)^{-1} A (U + E)^{-1}
633 
634     to a vector efficiently using Eisenstat's trick. This is for
635     the case of SSOR preconditioner, so E is D/omega where omega
636     is the relaxation factor.
637     */
638     ierr = VecDuplicate(xx,&tt); CHKERRQ(ierr);
639     PLogObjectParent(matin,tt);
640     VecGetArray(tt,&t);
641     scale = (2.0/omega) - 1.0;
642     /*  x = (E + U)^{-1} b */
643     VecSet(&zero,mat->lvec);
644     ierr = VecPipelineBegin(xx,mat->lvec,INSERTVALUES,PIPELINEUP,
645                               mat->Mvctx); CHKERRQ(ierr);
646     for ( i=m-1; i>-1; i-- ) {
647       n    = A->i[i+1] - diag[i] - 1;
648       idx  = A->j + diag[i];
649       v    = A->a + diag[i];
650       sum  = b[i];
651       SPARSEDENSEMDOT(sum,xs,v,idx,n);
652       d    = shift + A->a[diag[i]-1];
653       n    = B->i[i+1] - B->i[i];
654       idx  = B->j + B->i[i] - 1;
655       v    = B->a + B->i[i] - 1;
656       SPARSEDENSEMDOT(sum,ls,v,idx,n);
657       x[i] = omega*(sum/d);
658     }
659     ierr = VecPipelineEnd(xx,mat->lvec,INSERTVALUES,PIPELINEUP,
660                             mat->Mvctx); CHKERRQ(ierr);
661 
662     /*  t = b - (2*E - D)x */
663     v = A->a;
664     for ( i=0; i<m; i++ ) { t[i] = b[i] - scale*(v[*diag++ - 1])*x[i]; }
665 
666     /*  t = (E + L)^{-1}t */
667     ts = t - 1; /* shifted by one for index start of a or mat->j*/
668     diag = A->diag;
669     VecSet(&zero,mat->lvec);
670     ierr = VecPipelineBegin(tt,mat->lvec,INSERTVALUES,PIPELINEDOWN,
671                                                  mat->Mvctx); CHKERRQ(ierr);
672     for ( i=0; i<m; i++ ) {
673       n    = diag[i] - A->i[i];
674       idx  = A->j + A->i[i] - 1;
675       v    = A->a + A->i[i] - 1;
676       sum  = t[i];
677       SPARSEDENSEMDOT(sum,ts,v,idx,n);
678       d    = shift + A->a[diag[i]-1];
679       n    = B->i[i+1] - B->i[i];
680       idx  = B->j + B->i[i] - 1;
681       v    = B->a + B->i[i] - 1;
682       SPARSEDENSEMDOT(sum,ls,v,idx,n);
683       t[i] = omega*(sum/d);
684     }
685     ierr = VecPipelineEnd(tt,mat->lvec,INSERTVALUES,PIPELINEDOWN,
686                                                     mat->Mvctx); CHKERRQ(ierr);
687     /*  x = x + t */
688     for ( i=0; i<m; i++ ) { x[i] += t[i]; }
689     VecDestroy(tt);
690     return 0;
691   }
692 
693 
694   if ((flag & SOR_SYMMETRIC_SWEEP) == SOR_SYMMETRIC_SWEEP){
695     if (flag & SOR_ZERO_INITIAL_GUESS) {
696       VecSet(&zero,mat->lvec); VecSet(&zero,xx);
697     }
698     else {
699       ierr=VecScatterBegin(xx,mat->lvec,INSERTVALUES,SCATTERUP,mat->Mvctx);
700       CHKERRQ(ierr);
701       ierr = VecScatterEnd(xx,mat->lvec,INSERTVALUES,SCATTERUP,mat->Mvctx);
702       CHKERRQ(ierr);
703     }
704     while (its--) {
705       /* go down through the rows */
706       ierr = VecPipelineBegin(xx,mat->lvec,INSERTVALUES,PIPELINEDOWN,
707                               mat->Mvctx); CHKERRQ(ierr);
708       for ( i=0; i<m; i++ ) {
709         n    = A->i[i+1] - A->i[i];
710         idx  = A->j + A->i[i] - 1;
711         v    = A->a + A->i[i] - 1;
712         sum  = b[i];
713         SPARSEDENSEMDOT(sum,xs,v,idx,n);
714         d    = shift + A->a[diag[i]-1];
715         n    = B->i[i+1] - B->i[i];
716         idx  = B->j + B->i[i] - 1;
717         v    = B->a + B->i[i] - 1;
718         SPARSEDENSEMDOT(sum,ls,v,idx,n);
719         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
720       }
721       ierr = VecPipelineEnd(xx,mat->lvec,INSERTVALUES,PIPELINEDOWN,
722                             mat->Mvctx); CHKERRQ(ierr);
723       /* come up through the rows */
724       ierr = VecPipelineBegin(xx,mat->lvec,INSERTVALUES,PIPELINEUP,
725                               mat->Mvctx); CHKERRQ(ierr);
726       for ( i=m-1; i>-1; i-- ) {
727         n    = A->i[i+1] - A->i[i];
728         idx  = A->j + A->i[i] - 1;
729         v    = A->a + A->i[i] - 1;
730         sum  = b[i];
731         SPARSEDENSEMDOT(sum,xs,v,idx,n);
732         d    = shift + A->a[diag[i]-1];
733         n    = B->i[i+1] - B->i[i];
734         idx  = B->j + B->i[i] - 1;
735         v    = B->a + B->i[i] - 1;
736         SPARSEDENSEMDOT(sum,ls,v,idx,n);
737         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
738       }
739       ierr = VecPipelineEnd(xx,mat->lvec,INSERTVALUES,PIPELINEUP,
740                             mat->Mvctx); CHKERRQ(ierr);
741     }
742   }
743   else if (flag & SOR_FORWARD_SWEEP){
744     if (flag & SOR_ZERO_INITIAL_GUESS) {
745       VecSet(&zero,mat->lvec);
746       ierr = VecPipelineBegin(xx,mat->lvec,INSERTVALUES,PIPELINEDOWN,
747                               mat->Mvctx); CHKERRQ(ierr);
748       for ( i=0; i<m; i++ ) {
749         n    = diag[i] - A->i[i];
750         idx  = A->j + A->i[i] - 1;
751         v    = A->a + A->i[i] - 1;
752         sum  = b[i];
753         SPARSEDENSEMDOT(sum,xs,v,idx,n);
754         d    = shift + A->a[diag[i]-1];
755         n    = B->i[i+1] - B->i[i];
756         idx  = B->j + B->i[i] - 1;
757         v    = B->a + B->i[i] - 1;
758         SPARSEDENSEMDOT(sum,ls,v,idx,n);
759         x[i] = omega*(sum/d);
760       }
761       ierr = VecPipelineEnd(xx,mat->lvec,INSERTVALUES,PIPELINEDOWN,
762                             mat->Mvctx); CHKERRQ(ierr);
763       its--;
764     }
765     while (its--) {
766       ierr=VecScatterBegin(xx,mat->lvec,INSERTVALUES,SCATTERUP,mat->Mvctx);
767       CHKERRQ(ierr);
768       ierr = VecScatterEnd(xx,mat->lvec,INSERTVALUES,SCATTERUP,mat->Mvctx);
769       CHKERRQ(ierr);
770       ierr = VecPipelineBegin(xx,mat->lvec,INSERTVALUES,PIPELINEDOWN,
771                               mat->Mvctx); CHKERRQ(ierr);
772       for ( i=0; i<m; i++ ) {
773         n    = A->i[i+1] - A->i[i];
774         idx  = A->j + A->i[i] - 1;
775         v    = A->a + A->i[i] - 1;
776         sum  = b[i];
777         SPARSEDENSEMDOT(sum,xs,v,idx,n);
778         d    = shift + A->a[diag[i]-1];
779         n    = B->i[i+1] - B->i[i];
780         idx  = B->j + B->i[i] - 1;
781         v    = B->a + B->i[i] - 1;
782         SPARSEDENSEMDOT(sum,ls,v,idx,n);
783         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
784       }
785       ierr = VecPipelineEnd(xx,mat->lvec,INSERTVALUES,PIPELINEDOWN,
786                             mat->Mvctx); CHKERRQ(ierr);
787     }
788   }
789   else if (flag & SOR_BACKWARD_SWEEP){
790     if (flag & SOR_ZERO_INITIAL_GUESS) {
791       VecSet(&zero,mat->lvec);
792       ierr = VecPipelineBegin(xx,mat->lvec,INSERTVALUES,PIPELINEUP,
793                               mat->Mvctx); CHKERRQ(ierr);
794       for ( i=m-1; i>-1; i-- ) {
795         n    = A->i[i+1] - diag[i] - 1;
796         idx  = A->j + diag[i];
797         v    = A->a + diag[i];
798         sum  = b[i];
799         SPARSEDENSEMDOT(sum,xs,v,idx,n);
800         d    = shift + A->a[diag[i]-1];
801         n    = B->i[i+1] - B->i[i];
802         idx  = B->j + B->i[i] - 1;
803         v    = B->a + B->i[i] - 1;
804         SPARSEDENSEMDOT(sum,ls,v,idx,n);
805         x[i] = omega*(sum/d);
806       }
807       ierr = VecPipelineEnd(xx,mat->lvec,INSERTVALUES,PIPELINEUP,
808                             mat->Mvctx); CHKERRQ(ierr);
809       its--;
810     }
811     while (its--) {
812       ierr = VecScatterBegin(xx,mat->lvec,INSERTVALUES,SCATTERDOWN,
813                             mat->Mvctx); CHKERRQ(ierr);
814       ierr = VecScatterEnd(xx,mat->lvec,INSERTVALUES,SCATTERDOWN,
815                             mat->Mvctx); CHKERRQ(ierr);
816       ierr = VecPipelineBegin(xx,mat->lvec,INSERTVALUES,PIPELINEUP,
817                               mat->Mvctx); CHKERRQ(ierr);
818       for ( i=m-1; i>-1; i-- ) {
819         n    = A->i[i+1] - A->i[i];
820         idx  = A->j + A->i[i] - 1;
821         v    = A->a + A->i[i] - 1;
822         sum  = b[i];
823         SPARSEDENSEMDOT(sum,xs,v,idx,n);
824         d    = shift + A->a[diag[i]-1];
825         n    = B->i[i+1] - B->i[i];
826         idx  = B->j + B->i[i] - 1;
827         v    = B->a + B->i[i] - 1;
828         SPARSEDENSEMDOT(sum,ls,v,idx,n);
829         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
830       }
831       ierr = VecPipelineEnd(xx,mat->lvec,INSERTVALUES,PIPELINEUP,
832                             mat->Mvctx); CHKERRQ(ierr);
833     }
834   }
835   else if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
836     if (flag & SOR_ZERO_INITIAL_GUESS) {
837       return MatRelax(mat->A,bb,omega,flag,shift,its,xx);
838     }
839     ierr=VecScatterBegin(xx,mat->lvec,INSERTVALUES,SCATTERALL,mat->Mvctx);
840     CHKERRQ(ierr);
841     ierr = VecScatterEnd(xx,mat->lvec,INSERTVALUES,SCATTERALL,mat->Mvctx);
842     CHKERRQ(ierr);
843     while (its--) {
844       /* go down through the rows */
845       for ( i=0; i<m; i++ ) {
846         n    = A->i[i+1] - A->i[i];
847         idx  = A->j + A->i[i] - 1;
848         v    = A->a + A->i[i] - 1;
849         sum  = b[i];
850         SPARSEDENSEMDOT(sum,xs,v,idx,n);
851         d    = shift + A->a[diag[i]-1];
852         n    = B->i[i+1] - B->i[i];
853         idx  = B->j + B->i[i] - 1;
854         v    = B->a + B->i[i] - 1;
855         SPARSEDENSEMDOT(sum,ls,v,idx,n);
856         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
857       }
858       /* come up through the rows */
859       for ( i=m-1; i>-1; i-- ) {
860         n    = A->i[i+1] - A->i[i];
861         idx  = A->j + A->i[i] - 1;
862         v    = A->a + A->i[i] - 1;
863         sum  = b[i];
864         SPARSEDENSEMDOT(sum,xs,v,idx,n);
865         d    = shift + A->a[diag[i]-1];
866         n    = B->i[i+1] - B->i[i];
867         idx  = B->j + B->i[i] - 1;
868         v    = B->a + B->i[i] - 1;
869         SPARSEDENSEMDOT(sum,ls,v,idx,n);
870         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
871       }
872     }
873   }
874   else if (flag & SOR_LOCAL_FORWARD_SWEEP){
875     if (flag & SOR_ZERO_INITIAL_GUESS) {
876       return MatRelax(mat->A,bb,omega,flag,shift,its,xx);
877     }
878     ierr=VecScatterBegin(xx,mat->lvec,INSERTVALUES,SCATTERALL,mat->Mvctx);
879     CHKERRQ(ierr);
880     ierr = VecScatterEnd(xx,mat->lvec,INSERTVALUES,SCATTERALL,mat->Mvctx);
881     CHKERRQ(ierr);
882     while (its--) {
883       for ( i=0; i<m; i++ ) {
884         n    = A->i[i+1] - A->i[i];
885         idx  = A->j + A->i[i] - 1;
886         v    = A->a + A->i[i] - 1;
887         sum  = b[i];
888         SPARSEDENSEMDOT(sum,xs,v,idx,n);
889         d    = shift + A->a[diag[i]-1];
890         n    = B->i[i+1] - B->i[i];
891         idx  = B->j + B->i[i] - 1;
892         v    = B->a + B->i[i] - 1;
893         SPARSEDENSEMDOT(sum,ls,v,idx,n);
894         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
895       }
896     }
897   }
898   else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
899     if (flag & SOR_ZERO_INITIAL_GUESS) {
900       return MatRelax(mat->A,bb,omega,flag,shift,its,xx);
901     }
902     ierr = VecScatterBegin(xx,mat->lvec,INSERTVALUES,SCATTERALL,
903                             mat->Mvctx); CHKERRQ(ierr);
904     ierr = VecScatterEnd(xx,mat->lvec,INSERTVALUES,SCATTERALL,
905                             mat->Mvctx); CHKERRQ(ierr);
906     while (its--) {
907       for ( i=m-1; i>-1; i-- ) {
908         n    = A->i[i+1] - A->i[i];
909         idx  = A->j + A->i[i] - 1;
910         v    = A->a + A->i[i] - 1;
911         sum  = b[i];
912         SPARSEDENSEMDOT(sum,xs,v,idx,n);
913         d    = shift + A->a[diag[i]-1];
914         n    = B->i[i+1] - B->i[i];
915         idx  = B->j + B->i[i] - 1;
916         v    = B->a + B->i[i] - 1;
917         SPARSEDENSEMDOT(sum,ls,v,idx,n);
918         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
919       }
920     }
921   }
922   return 0;
923 }
924 
925 static int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,int *nz,
926                              int *nzalloc,int *mem)
927 {
928   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
929   Mat        A = mat->A, B = mat->B;
930   int        ierr, isend[3], irecv[3], nzA, nzallocA, memA;
931 
932   ierr = MatGetInfo(A,MAT_LOCAL,&nzA,&nzallocA,&memA); CHKERRQ(ierr);
933   ierr = MatGetInfo(B,MAT_LOCAL,&isend[0],&isend[1],&isend[2]); CHKERRQ(ierr);
934   isend[0] += nzA; isend[1] += nzallocA; isend[2] += memA;
935   if (flag == MAT_LOCAL) {
936     *nz = isend[0]; *nzalloc = isend[1]; *mem = isend[2];
937   } else if (flag == MAT_GLOBAL_MAX) {
938     MPI_Allreduce((void *) isend,(void *) irecv,3,MPI_INT,MPI_MAX,matin->comm);
939     *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2];
940   } else if (flag == MAT_GLOBAL_SUM) {
941     MPI_Allreduce((void *) isend,(void *) irecv,3,MPI_INT,MPI_SUM,matin->comm);
942     *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2];
943   }
944   return 0;
945 }
946 
947 extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*);
948 extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*);
949 extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double);
950 extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *);
951 extern int MatSolve_MPIAIJ(Mat,Vec,Vec);
952 extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec);
953 extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec);
954 extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec);
955 
956 static int MatSetOption_MPIAIJ(Mat aijin,MatOption op)
957 {
958   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data;
959 
960   if      (op == NO_NEW_NONZERO_LOCATIONS)  {
961     MatSetOption(aij->A,op);
962     MatSetOption(aij->B,op);
963   }
964   else if (op == YES_NEW_NONZERO_LOCATIONS) {
965     MatSetOption(aij->A,op);
966     MatSetOption(aij->B,op);
967   }
968   else if (op == COLUMN_ORIENTED)
969     SETERRQ(1,"MatSetOption_MPIAIJ:Column oriented not supported");
970   return 0;
971 }
972 
973 static int MatGetSize_MPIAIJ(Mat matin,int *m,int *n)
974 {
975   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
976   *m = mat->M; *n = mat->N;
977   return 0;
978 }
979 
980 static int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n)
981 {
982   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
983   *m = mat->m; *n = mat->N;
984   return 0;
985 }
986 
987 static int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n)
988 {
989   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
990   *m = mat->rstart; *n = mat->rend;
991   return 0;
992 }
993 
994 static int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
995 {
996   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
997   Scalar     *vworkA, *vworkB, **pvA, **pvB;
998   int        i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart;
999   int        nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend;
1000 
1001   if (row < rstart || row >= rend)
1002     SETERRQ(1,"MatGetRow_MPIAIJ:Currently you can get only local rows")
1003   lrow = row - rstart;
1004 
1005   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1006   if (!v)   {pvA = 0; pvB = 0;}
1007   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1008   ierr = MatGetRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1009   ierr = MatGetRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1010   nztot = nzA + nzB;
1011 
1012   if (v  || idx) {
1013     if (nztot) {
1014       /* Sort by increasing column numbers, assuming A and B already sorted */
1015       int imark;
1016       if (mat->assembled) {
1017         for (i=0; i<nzB; i++) cworkB[i] = mat->garray[cworkB[i]];
1018       }
1019       if (v) {
1020         *v = (Scalar *) PETSCMALLOC( (nztot)*sizeof(Scalar) ); CHKPTRQ(*v);
1021         for ( i=0; i<nzB; i++ ) {
1022           if (cworkB[i] < cstart)   (*v)[i] = vworkB[i];
1023           else break;
1024         }
1025         imark = i;
1026         for ( i=0; i<nzA; i++ )     (*v)[imark+i] = vworkA[i];
1027         for ( i=imark; i<nzB; i++ ) (*v)[nzA+i] = vworkB[i];
1028       }
1029       if (idx) {
1030         *idx = (int *) PETSCMALLOC( (nztot)*sizeof(int) ); CHKPTRQ(*idx);
1031         for (i=0; i<nzA; i++) cworkA[i] += cstart;
1032         for ( i=0; i<nzB; i++ ) {
1033           if (cworkB[i] < cstart)   (*idx)[i] = cworkB[i];
1034           else break;
1035         }
1036         imark = i;
1037         for ( i=0; i<nzA; i++ )     (*idx)[imark+i] = cworkA[i];
1038         for ( i=imark; i<nzB; i++ ) (*idx)[nzA+i] = cworkB[i];
1039       }
1040     }
1041     else {*idx = 0; *v=0;}
1042   }
1043   *nz = nztot;
1044   ierr = MatRestoreRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1045   ierr = MatRestoreRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1046   return 0;
1047 }
1048 
1049 static int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1050 {
1051   if (idx) PETSCFREE(*idx);
1052   if (v) PETSCFREE(*v);
1053   return 0;
1054 }
1055 
1056 static int MatNorm_MPIAIJ(Mat mat,MatNormType type,double *norm)
1057 {
1058   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1059   int ierr;
1060   if (aij->numtids == 1) {
1061     ierr =  MatNorm(aij->A,type,norm); CHKERRQ(ierr);
1062   } else
1063     SETERRQ(1,"MatNorm_MPIAIJ:not yet supported in parallel");
1064   return 0;
1065 }
1066 
1067 static int MatTranspose_MPIAIJ(Mat A,Mat *Bin)
1068 {
1069   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1070   int        ierr;
1071   Mat        B;
1072   Mat_AIJ    *Aloc;
1073   int        M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct;
1074   Scalar     *array;
1075 
1076   ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,0,0,0,&B);
1077   CHKERRQ(ierr);
1078 
1079   /* copy over the A part */
1080   Aloc = (Mat_AIJ*) a->A->data;
1081   m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1082   row = a->rstart;
1083   for ( i=0; i<ai[m]-1; i++ ) {aj[i] += a->cstart - 1;}
1084   for ( i=0; i<m; i++ ) {
1085       ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERTVALUES);
1086       CHKERRQ(ierr);
1087       row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1088   }
1089   aj = Aloc->j;
1090   for ( i=0; i<ai[m]-1; i++ ) {aj[i] -= a->cstart - 1;}
1091 
1092   /* copy over the B part */
1093   Aloc = (Mat_AIJ*) a->B->data;
1094   m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1095   row = a->rstart;
1096   ct = cols = (int *) PETSCMALLOC( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols);
1097   for ( i=0; i<ai[m]-1; i++ ) {cols[i] = a->garray[aj[i]-1];}
1098   for ( i=0; i<m; i++ ) {
1099     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERTVALUES);
1100     CHKERRQ(ierr);
1101     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1102   }
1103   PETSCFREE(ct);
1104   ierr = MatAssemblyBegin(B,FINAL_ASSEMBLY); CHKERRQ(ierr);
1105   ierr = MatAssemblyEnd(B,FINAL_ASSEMBLY); CHKERRQ(ierr);
1106   *Bin = B;
1107   return 0;
1108 }
1109 
1110 extern int MatConvert_MPIAIJ(Mat,MatType,Mat *);
1111 static int MatCopyPrivate_MPIAIJ(Mat,Mat *);
1112 
1113 /* -------------------------------------------------------------------*/
1114 static struct _MatOps MatOps = {MatSetValues_MPIAIJ,
1115        MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ,
1116        MatMult_MPIAIJ,MatMultAdd_MPIAIJ,
1117        MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ,
1118        MatSolve_MPIAIJ,MatSolveAdd_MPIAIJ,
1119        MatSolveTrans_MPIAIJ,MatSolveTransAdd_MPIAIJ,
1120        MatLUFactor_MPIAIJ,0,
1121        MatRelax_MPIAIJ,
1122        MatTranspose_MPIAIJ,
1123        MatGetInfo_MPIAIJ,0,
1124        MatGetDiagonal_MPIAIJ,0,MatNorm_MPIAIJ,
1125        MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ,
1126        0,
1127        MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ,
1128        MatGetReordering_MPIAIJ,
1129        MatLUFactorSymbolic_MPIAIJ,MatLUFactorNumeric_MPIAIJ,0,0,
1130        MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ,
1131        MatILUFactorSymbolic_MPIAIJ,0,
1132        0,0,MatConvert_MPIAIJ,0,0,MatCopyPrivate_MPIAIJ};
1133 
1134 /*@
1135    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
1136    (the default parallel PETSc format).
1137 
1138    Input Parameters:
1139 .  comm - MPI communicator
1140 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1141 .  n - number of local columns (or PETSC_DECIDE to have calculated
1142            if N is given)
1143 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1144 .  N - number of global columns (or PETSC_DECIDE to have calculated
1145            if n is given)
1146 .  d_nz - number of nonzeros per row in diagonal portion of local submatrix
1147            (same for all local rows)
1148 .  d_nzz - number of nonzeros per row in diagonal portion of local submatrix
1149            or null (possibly different for each row).  You must leave room
1150            for the diagonal entry even if it is zero.
1151 .  o_nz - number of nonzeros per row in off-diagonal portion of local
1152            submatrix (same for all local rows).
1153 .  o_nzz - number of nonzeros per row in off-diagonal portion of local
1154            submatrix or null (possibly different for each row).
1155 
1156    Output Parameter:
1157 .  newmat - the matrix
1158 
1159    Notes:
1160    The AIJ format (also called the Yale sparse matrix format or
1161    compressed row storage), is fully compatible with standard Fortran 77
1162    storage.  That is, the stored row and column indices begin at
1163    one, not zero.  See the users manual for further details.
1164 
1165    The user MUST specify either the local or global matrix dimensions
1166    (possibly both).
1167 
1168    Storage Information:
1169    For a square global matrix we define each processor's diagonal portion
1170    to be its local rows and the corresponding columns (a square submatrix);
1171    each processor's off-diagonal portion encompasses the remainder of the
1172    local matrix (a rectangular submatrix).
1173 
1174    The user can specify preallocated storage for the diagonal part of
1175    the local submatrix with either d_nz or d_nnz (not both). Set both
1176    d_nz and d_nnz to zero for PETSc to control dynamic memory allocation.
1177    Likewise, specify preallocated storage for the off-diagonal part of
1178    the local submatrix with o_nz or o_nnz (not both).
1179 
1180 .keywords: matrix, aij, compressed row, sparse, parallel
1181 
1182 .seealso: MatCreate(), MatCreateSequentialAIJ(), MatSetValues()
1183 @*/
1184 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,
1185                  int d_nz,int *d_nnz, int o_nz,int *o_nnz,Mat *newmat)
1186 {
1187   Mat          mat;
1188   Mat_MPIAIJ   *aij;
1189   int          ierr, i,sum[2],work[2];
1190   *newmat         = 0;
1191   PETSCHEADERCREATE(mat,_Mat,MAT_COOKIE,MATMPIAIJ,comm);
1192   PLogObjectCreate(mat);
1193   mat->data       = (void *) (aij = PETSCNEW(Mat_MPIAIJ)); CHKPTRQ(aij);
1194   mat->ops        = &MatOps;
1195   mat->destroy    = MatDestroy_MPIAIJ;
1196   mat->view       = MatView_MPIAIJ;
1197   mat->factor     = 0;
1198 
1199   aij->insertmode = NOTSETVALUES;
1200   MPI_Comm_rank(comm,&aij->mytid);
1201   MPI_Comm_size(comm,&aij->numtids);
1202 
1203   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
1204     work[0] = m; work[1] = n;
1205     MPI_Allreduce((void *) work,(void *) sum,2,MPI_INT,MPI_SUM,comm );
1206     if (M == PETSC_DECIDE) M = sum[0];
1207     if (N == PETSC_DECIDE) N = sum[1];
1208   }
1209   if (m == PETSC_DECIDE)
1210     {m = M/aij->numtids + ((M % aij->numtids) > aij->mytid);}
1211   if (n == PETSC_DECIDE)
1212     {n = N/aij->numtids + ((N % aij->numtids) > aij->mytid);}
1213   aij->m = m;
1214   aij->n = n;
1215   aij->N = N;
1216   aij->M = M;
1217 
1218   /* build local table of row and column ownerships */
1219   aij->rowners = (int *) PETSCMALLOC(2*(aij->numtids+2)*sizeof(int));
1220   CHKPTRQ(aij->rowners);
1221   PLogObjectMemory(mat,2*(aij->numtids+2)*sizeof(int)+sizeof(struct _Mat)+
1222                        sizeof(Mat_MPIAIJ));
1223   aij->cowners = aij->rowners + aij->numtids + 1;
1224   MPI_Allgather(&m,1,MPI_INT,aij->rowners+1,1,MPI_INT,comm);
1225   aij->rowners[0] = 0;
1226   for ( i=2; i<=aij->numtids; i++ ) {
1227     aij->rowners[i] += aij->rowners[i-1];
1228   }
1229   aij->rstart = aij->rowners[aij->mytid];
1230   aij->rend   = aij->rowners[aij->mytid+1];
1231   MPI_Allgather(&n,1,MPI_INT,aij->cowners+1,1,MPI_INT,comm);
1232   aij->cowners[0] = 0;
1233   for ( i=2; i<=aij->numtids; i++ ) {
1234     aij->cowners[i] += aij->cowners[i-1];
1235   }
1236   aij->cstart = aij->cowners[aij->mytid];
1237   aij->cend   = aij->cowners[aij->mytid+1];
1238 
1239 
1240   ierr = MatCreateSequentialAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&aij->A);
1241   CHKERRQ(ierr);
1242   PLogObjectParent(mat,aij->A);
1243   ierr = MatCreateSequentialAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&aij->B);
1244   CHKERRQ(ierr);
1245   PLogObjectParent(mat,aij->B);
1246 
1247   /* build cache for off array entries formed */
1248   ierr = StashBuild_Private(&aij->stash); CHKERRQ(ierr);
1249   aij->colmap    = 0;
1250   aij->garray    = 0;
1251 
1252   /* stuff used for matrix vector multiply */
1253   aij->lvec      = 0;
1254   aij->Mvctx     = 0;
1255   aij->assembled = 0;
1256 
1257   *newmat = mat;
1258   return 0;
1259 }
1260 
1261 static int MatCopyPrivate_MPIAIJ(Mat matin,Mat *newmat)
1262 {
1263   Mat        mat;
1264   Mat_MPIAIJ *aij,*oldmat = (Mat_MPIAIJ *) matin->data;
1265   int        ierr, len;
1266   *newmat      = 0;
1267 
1268   if (!oldmat->assembled)
1269     SETERRQ(1,"MatCopyPrivate_MPIAIJ:Cannot copy unassembled matrix");
1270   PETSCHEADERCREATE(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm);
1271   PLogObjectCreate(mat);
1272   mat->data       = (void *) (aij = PETSCNEW(Mat_MPIAIJ)); CHKPTRQ(aij);
1273   mat->ops        = &MatOps;
1274   mat->destroy    = MatDestroy_MPIAIJ;
1275   mat->view       = MatView_MPIAIJ;
1276   mat->factor     = matin->factor;
1277 
1278   aij->m          = oldmat->m;
1279   aij->n          = oldmat->n;
1280   aij->M          = oldmat->M;
1281   aij->N          = oldmat->N;
1282 
1283   aij->assembled  = 1;
1284   aij->rstart     = oldmat->rstart;
1285   aij->rend       = oldmat->rend;
1286   aij->cstart     = oldmat->cstart;
1287   aij->cend       = oldmat->cend;
1288   aij->numtids    = oldmat->numtids;
1289   aij->mytid      = oldmat->mytid;
1290   aij->insertmode = NOTSETVALUES;
1291 
1292   aij->rowners        = (int *) PETSCMALLOC( (aij->numtids+1)*sizeof(int) );
1293   CHKPTRQ(aij->rowners);
1294   PLogObjectMemory(mat,(aij->numtids+1)*sizeof(int)+sizeof(struct _Mat)+
1295                        sizeof(Mat_MPIAIJ));
1296   PETSCMEMCPY(aij->rowners,oldmat->rowners,(aij->numtids+1)*sizeof(int));
1297   ierr = StashInitialize_Private(&aij->stash); CHKERRQ(ierr);
1298   if (oldmat->colmap) {
1299     aij->colmap      = (int *) PETSCMALLOC( (aij->N)*sizeof(int) );
1300     CHKPTRQ(aij->colmap);
1301     PLogObjectMemory(mat,(aij->N)*sizeof(int));
1302     PETSCMEMCPY(aij->colmap,oldmat->colmap,(aij->N)*sizeof(int));
1303   } else aij->colmap = 0;
1304   if (oldmat->garray && (len = ((Mat_AIJ *) (oldmat->B->data))->n)) {
1305     aij->garray      = (int *) PETSCMALLOC(len*sizeof(int) ); CHKPTRQ(aij->garray);
1306     PLogObjectMemory(mat,len*sizeof(int));
1307     PETSCMEMCPY(aij->garray,oldmat->garray,len*sizeof(int));
1308   } else aij->garray = 0;
1309 
1310   ierr =  VecDuplicate(oldmat->lvec,&aij->lvec); CHKERRQ(ierr);
1311   PLogObjectParent(mat,aij->lvec);
1312   ierr =  VecScatterCtxCopy(oldmat->Mvctx,&aij->Mvctx); CHKERRQ(ierr);
1313   PLogObjectParent(mat,aij->Mvctx);
1314   ierr =  MatConvert(oldmat->A,MATSAME,&aij->A); CHKERRQ(ierr);
1315   PLogObjectParent(mat,aij->A);
1316   ierr =  MatConvert(oldmat->B,MATSAME,&aij->B); CHKERRQ(ierr);
1317   PLogObjectParent(mat,aij->B);
1318   *newmat = mat;
1319   return 0;
1320 }
1321