xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 2eb8c8ab7520400270f737732844e74a35f1ebd5)
1 #ifndef lint
2 static char vcid[] = "$Id: mpiaij.c,v 1.19 1995/03/26 04:43:10 bsmith Exp bsmith $";
3 #endif
4 
5 #include "mpiaij.h"
6 #include "vec/vecimpl.h"
7 #include "inline/spops.h"
8 
9 #define CHUNCKSIZE   100
10 /*
11    This is a simple minded stash. Do a linear search to determine if
12  in stash, if not add to end.
13 */
14 static int StashValues(Stash *stash,int row,int n, int *idxn,
15                        Scalar *values,InsertMode addv)
16 {
17   int    i,j,N = stash->n,found,*n_idx, *n_idy;
18   Scalar val,*n_array;
19 
20   for ( i=0; i<n; i++ ) {
21     found = 0;
22     val = *values++;
23     for ( j=0; j<N; j++ ) {
24       if ( stash->idx[j] == row && stash->idy[j] == idxn[i]) {
25         /* found a match */
26         if (addv == AddValues) stash->array[j] += val;
27         else stash->array[j] = val;
28         found = 1;
29         break;
30       }
31     }
32     if (!found) { /* not found so add to end */
33       if ( stash->n == stash->nmax ) {
34         /* allocate a larger stash */
35         n_array = (Scalar *) MALLOC( (stash->nmax + CHUNCKSIZE)*(
36                                      2*sizeof(int) + sizeof(Scalar)));
37         CHKPTR(n_array);
38         n_idx = (int *) (n_array + stash->nmax + CHUNCKSIZE);
39         n_idy = (int *) (n_idx + stash->nmax + CHUNCKSIZE);
40         MEMCPY(n_array,stash->array,stash->nmax*sizeof(Scalar));
41         MEMCPY(n_idx,stash->idx,stash->nmax*sizeof(int));
42         MEMCPY(n_idy,stash->idy,stash->nmax*sizeof(int));
43         if (stash->array) FREE(stash->array);
44         stash->array = n_array; stash->idx = n_idx; stash->idy = n_idy;
45         stash->nmax += CHUNCKSIZE;
46       }
47       stash->array[stash->n]   = val;
48       stash->idx[stash->n]     = row;
49       stash->idy[stash->n++]   = idxn[i];
50     }
51   }
52   return 0;
53 }
54 
55 /* local utility routine that creates a mapping from the global column
56 number to the local number in the off-diagonal part of the local
57 storage of the matrix.  This is done in a non scable way since the
58 length of colmap equals the global matrix length.
59 */
60 static int CreateColmap(Mat mat)
61 {
62   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
63   Mat_AIJ    *B = (Mat_AIJ*) aij->B->data;
64   int        n = B->n,i;
65   aij->colmap = (int *) MALLOC( aij->N*sizeof(int) ); CHKPTR(aij->colmap);
66   MEMSET(aij->colmap,0,aij->N*sizeof(int));
67   for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1;
68   return 0;
69 }
70 
71 static int MatInsertValues_MPIAIJ(Mat mat,int m,int *idxm,int n,
72                             int *idxn,Scalar *v,InsertMode addv)
73 {
74   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
75   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
76   int        cstart = aij->cstart, cend = aij->cend,row,col;
77 
78   if (aij->insertmode != NotSetValues && aij->insertmode != addv) {
79     SETERR(1,"You cannot mix inserts and adds");
80   }
81   aij->insertmode = addv;
82   for ( i=0; i<m; i++ ) {
83     if (idxm[i] < 0) SETERR(1,"Negative row index");
84     if (idxm[i] >= aij->M) SETERR(1,"Row index too large");
85     if (idxm[i] >= rstart && idxm[i] < rend) {
86       row = idxm[i] - rstart;
87       for ( j=0; j<n; j++ ) {
88         if (idxn[j] < 0) SETERR(1,"Negative column index");
89         if (idxn[j] >= aij->N) SETERR(1,"Column index too large");
90         if (idxn[j] >= cstart && idxn[j] < cend){
91           col = idxn[j] - cstart;
92           ierr = MatSetValues(aij->A,1,&row,1,&col,v+i*n+j,addv);CHKERR(ierr);
93         }
94         else {
95           if (aij->assembled) {
96             if (!aij->colmap) {ierr = CreateColmap(mat); CHKERR(ierr);}
97             col = aij->colmap[idxn[j]] - 1;
98             if (col < 0) {
99               SETERR(1,"Cannot insert new off diagonal block nonzero in\
100                      already\
101                      assembled matrix. Contact petsc-maint@mcs.anl.gov\
102                      if your need this feature");
103             }
104           }
105           else col = idxn[j];
106           ierr = MatSetValues(aij->B,1,&row,1,&col,v+i*n+j,addv);CHKERR(ierr);
107         }
108       }
109     }
110     else {
111       ierr = StashValues(&aij->stash,idxm[i],n,idxn,v+i*n,addv);CHKERR(ierr);
112     }
113   }
114   return 0;
115 }
116 
117 /*
118     the assembly code is alot like the code for vectors, we should
119     sometime derive a single assembly code that can be used for
120     either case.
121 */
122 
123 static int MatBeginAssemble_MPIAIJ(Mat mat)
124 {
125   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
126   MPI_Comm    comm = mat->comm;
127   int         numtids = aij->numtids, *owners = aij->rowners;
128   int         mytid = aij->mytid;
129   MPI_Request *send_waits,*recv_waits;
130   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
131   int         tag = 50, *owner,*starts,count;
132   InsertMode  addv;
133   Scalar      *rvalues,*svalues;
134 
135   /* make sure all processors are either in INSERTMODE or ADDMODE */
136   MPI_Allreduce((void *) &aij->insertmode,(void *) &addv,1,MPI_INT,
137                 MPI_BOR,comm);
138   if (addv == (AddValues|InsertValues)) {
139     SETERR(1,"Some processors have inserted while others have added");
140   }
141   aij->insertmode = addv; /* in case this processor had no cache */
142 
143   /*  first count number of contributors to each processor */
144   nprocs = (int *) MALLOC( 2*numtids*sizeof(int) ); CHKPTR(nprocs);
145   MEMSET(nprocs,0,2*numtids*sizeof(int)); procs = nprocs + numtids;
146   owner = (int *) MALLOC( (aij->stash.n+1)*sizeof(int) ); CHKPTR(owner);
147   for ( i=0; i<aij->stash.n; i++ ) {
148     idx = aij->stash.idx[i];
149     for ( j=0; j<numtids; j++ ) {
150       if (idx >= owners[j] && idx < owners[j+1]) {
151         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
152       }
153     }
154   }
155   nsends = 0;  for ( i=0; i<numtids; i++ ) { nsends += procs[i];}
156 
157   /* inform other processors of number of messages and max length*/
158   work = (int *) MALLOC( numtids*sizeof(int) ); CHKPTR(work);
159   MPI_Allreduce((void *) procs,(void *) work,numtids,MPI_INT,MPI_SUM,comm);
160   nreceives = work[mytid];
161   MPI_Allreduce((void *) nprocs,(void *) work,numtids,MPI_INT,MPI_MAX,comm);
162   nmax = work[mytid];
163   FREE(work);
164 
165   /* post receives:
166        1) each message will consist of ordered pairs
167      (global index,value) we store the global index as a double
168      to simplify the message passing.
169        2) since we don't know how long each individual message is we
170      allocate the largest needed buffer for each receive. Potentially
171      this is a lot of wasted space.
172 
173 
174        This could be done better.
175   */
176   rvalues = (Scalar *) MALLOC(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));
177   CHKPTR(rvalues);
178   recv_waits = (MPI_Request *) MALLOC((nreceives+1)*sizeof(MPI_Request));
179   CHKPTR(recv_waits);
180   for ( i=0; i<nreceives; i++ ) {
181     MPI_Irecv((void *)(rvalues+3*nmax*i),3*nmax,MPI_SCALAR,MPI_ANY_SOURCE,tag,
182               comm,recv_waits+i);
183   }
184 
185   /* do sends:
186       1) starts[i] gives the starting index in svalues for stuff going to
187          the ith processor
188   */
189   svalues = (Scalar *) MALLOC( 3*(aij->stash.n+1)*sizeof(Scalar) );
190   CHKPTR(svalues);
191   send_waits = (MPI_Request *) MALLOC( (nsends+1)*sizeof(MPI_Request));
192   CHKPTR(send_waits);
193   starts = (int *) MALLOC( numtids*sizeof(int) ); CHKPTR(starts);
194   starts[0] = 0;
195   for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
196   for ( i=0; i<aij->stash.n; i++ ) {
197     svalues[3*starts[owner[i]]]       = (Scalar)  aij->stash.idx[i];
198     svalues[3*starts[owner[i]]+1]     = (Scalar)  aij->stash.idy[i];
199     svalues[3*(starts[owner[i]]++)+2] =  aij->stash.array[i];
200   }
201   FREE(owner);
202   starts[0] = 0;
203   for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
204   count = 0;
205   for ( i=0; i<numtids; i++ ) {
206     if (procs[i]) {
207       MPI_Isend((void*)(svalues+3*starts[i]),3*nprocs[i],MPI_SCALAR,i,tag,
208                 comm,send_waits+count++);
209     }
210   }
211   FREE(starts); FREE(nprocs);
212 
213   /* Free cache space */
214   aij->stash.nmax = aij->stash.n = 0;
215   if (aij->stash.array){ FREE(aij->stash.array); aij->stash.array = 0;}
216 
217   aij->svalues    = svalues;       aij->rvalues = rvalues;
218   aij->nsends     = nsends;         aij->nrecvs = nreceives;
219   aij->send_waits = send_waits; aij->recv_waits = recv_waits;
220   aij->rmax       = nmax;
221 
222   return 0;
223 }
224 extern int MatSetUpMultiply_MPIAIJ(Mat);
225 
226 static int MatEndAssemble_MPIAIJ(Mat mat)
227 {
228   int        ierr;
229   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
230 
231   MPI_Status  *send_status,recv_status;
232   int         imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n;
233   int         row,col;
234   Scalar      *values,val;
235   InsertMode  addv = aij->insertmode;
236 
237   /*  wait on receives */
238   while (count) {
239     MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status);
240     /* unpack receives into our local space */
241     values = aij->rvalues + 3*imdex*aij->rmax;
242     MPI_Get_count(&recv_status,MPI_SCALAR,&n);
243     n = n/3;
244     for ( i=0; i<n; i++ ) {
245       row = (int) PETSCREAL(values[3*i]) - aij->rstart;
246       col = (int) PETSCREAL(values[3*i+1]);
247       val = values[3*i+2];
248       if (col >= aij->cstart && col < aij->cend) {
249           col -= aij->cstart;
250         MatSetValues(aij->A,1,&row,1,&col,&val,addv);
251       }
252       else {
253         if (aij->assembled) {
254           if (!aij->colmap) {ierr = CreateColmap(mat); CHKERR(ierr);}
255           col = aij->colmap[col] - 1;
256           if (col < 0) {
257             SETERR(1,"Cannot insert new off diagonal block nonzero in\
258                      already\
259                      assembled matrix. Contact petsc-maint@mcs.anl.gov\
260                      if your need this feature");
261           }
262         }
263         MatSetValues(aij->B,1,&row,1,&col,&val,addv);
264       }
265     }
266     count--;
267   }
268   FREE(aij->recv_waits); FREE(aij->rvalues);
269 
270   /* wait on sends */
271   if (aij->nsends) {
272     send_status = (MPI_Status *) MALLOC( aij->nsends*sizeof(MPI_Status) );
273     CHKPTR(send_status);
274     MPI_Waitall(aij->nsends,aij->send_waits,send_status);
275     FREE(send_status);
276   }
277   FREE(aij->send_waits); FREE(aij->svalues);
278 
279   aij->insertmode = NotSetValues;
280   ierr = MatBeginAssembly(aij->A); CHKERR(ierr);
281   ierr = MatEndAssembly(aij->A); CHKERR(ierr);
282 
283   if (!aij->assembled) {
284     ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERR(ierr);
285   }
286   ierr = MatBeginAssembly(aij->B); CHKERR(ierr);
287   ierr = MatEndAssembly(aij->B); CHKERR(ierr);
288 
289   aij->assembled = 1;
290   return 0;
291 }
292 
293 static int MatZero_MPIAIJ(Mat A)
294 {
295   Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data;
296 
297   MatZeroEntries(l->A); MatZeroEntries(l->B);
298   return 0;
299 }
300 
301 /* again this uses the same basic stratagy as in the assembly and
302    scatter create routines, we should try to do it systemamatically
303    if we can figure out the proper level of generality. */
304 
305 /* the code does not do the diagonal entries correctly unless the
306    matrix is square and the column and row owerships are identical.
307    This is a BUG. The only way to fix it seems to be to access
308    aij->A and aij->B directly and not through the MatZeroRows()
309    routine.
310 */
311 static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag)
312 {
313   Mat_MPIAIJ     *l = (Mat_MPIAIJ *) A->data;
314   int            i,ierr,N, *rows,*owners = l->rowners,numtids = l->numtids;
315   int            *procs,*nprocs,j,found,idx,nsends,*work;
316   int            nmax,*svalues,*starts,*owner,nrecvs,mytid = l->mytid;
317   int            *rvalues,tag = 67,count,base,slen,n,*source;
318   int            *lens,imdex,*lrows,*values;
319   MPI_Comm       comm = A->comm;
320   MPI_Request    *send_waits,*recv_waits;
321   MPI_Status     recv_status,*send_status;
322   IS             istmp;
323 
324   if (!l->assembled) SETERR(1,"MatZeroRows_MPIAIJ: must assemble matrix first");
325   ierr = ISGetLocalSize(is,&N); CHKERR(ierr);
326   ierr = ISGetIndices(is,&rows); CHKERR(ierr);
327 
328   /*  first count number of contributors to each processor */
329   nprocs = (int *) MALLOC( 2*numtids*sizeof(int) ); CHKPTR(nprocs);
330   MEMSET(nprocs,0,2*numtids*sizeof(int)); procs = nprocs + numtids;
331   owner = (int *) MALLOC((N+1)*sizeof(int)); CHKPTR(owner); /* see note*/
332   for ( i=0; i<N; i++ ) {
333     idx = rows[i];
334     found = 0;
335     for ( j=0; j<numtids; j++ ) {
336       if (idx >= owners[j] && idx < owners[j+1]) {
337         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
338       }
339     }
340     if (!found) SETERR(1,"Imdex out of range");
341   }
342   nsends = 0;  for ( i=0; i<numtids; i++ ) { nsends += procs[i];}
343 
344   /* inform other processors of number of messages and max length*/
345   work = (int *) MALLOC( numtids*sizeof(int) ); CHKPTR(work);
346   MPI_Allreduce((void *) procs,(void *) work,numtids,MPI_INT,MPI_SUM,comm);
347   nrecvs = work[mytid];
348   MPI_Allreduce((void *) nprocs,(void *) work,numtids,MPI_INT,MPI_MAX,comm);
349   nmax = work[mytid];
350   FREE(work);
351 
352   /* post receives:   */
353   rvalues = (int *) MALLOC((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */
354   CHKPTR(rvalues);
355   recv_waits = (MPI_Request *) MALLOC((nrecvs+1)*sizeof(MPI_Request));
356   CHKPTR(recv_waits);
357   for ( i=0; i<nrecvs; i++ ) {
358     MPI_Irecv((void *)(rvalues+nmax*i),nmax,MPI_INT,MPI_ANY_SOURCE,tag,
359               comm,recv_waits+i);
360   }
361 
362   /* do sends:
363       1) starts[i] gives the starting index in svalues for stuff going to
364          the ith processor
365   */
366   svalues = (int *) MALLOC( (N+1)*sizeof(int) ); CHKPTR(svalues);
367   send_waits = (MPI_Request *) MALLOC( (nsends+1)*sizeof(MPI_Request));
368   CHKPTR(send_waits);
369   starts = (int *) MALLOC( (numtids+1)*sizeof(int) ); CHKPTR(starts);
370   starts[0] = 0;
371   for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
372   for ( i=0; i<N; i++ ) {
373     svalues[starts[owner[i]]++] = rows[i];
374   }
375   ISRestoreIndices(is,&rows);
376 
377   starts[0] = 0;
378   for ( i=1; i<numtids+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
379   count = 0;
380   for ( i=0; i<numtids; i++ ) {
381     if (procs[i]) {
382       MPI_Isend((void*)(svalues+starts[i]),nprocs[i],MPI_INT,i,tag,
383                 comm,send_waits+count++);
384     }
385   }
386   FREE(starts);
387 
388   base = owners[mytid];
389 
390   /*  wait on receives */
391   lens = (int *) MALLOC( 2*(nrecvs+1)*sizeof(int) ); CHKPTR(lens);
392   source = lens + nrecvs;
393   count = nrecvs; slen = 0;
394   while (count) {
395     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
396     /* unpack receives into our local space */
397     MPI_Get_count(&recv_status,MPI_INT,&n);
398     source[imdex]  = recv_status.MPI_SOURCE;
399     lens[imdex]  = n;
400     slen += n;
401     count--;
402   }
403   FREE(recv_waits);
404 
405   /* move the data into the send scatter */
406   lrows = (int *) MALLOC( slen*sizeof(int) ); CHKPTR(lrows);
407   count = 0;
408   for ( i=0; i<nrecvs; i++ ) {
409     values = rvalues + i*nmax;
410     for ( j=0; j<lens[i]; j++ ) {
411       lrows[count++] = values[j] - base;
412     }
413   }
414   FREE(rvalues); FREE(lens);
415   FREE(owner); FREE(nprocs);
416 
417   /* actually zap the local rows */
418   ierr = ISCreateSequential(slen,lrows,&istmp); CHKERR(ierr);  FREE(lrows);
419   ierr = MatZeroRows(l->A,istmp,diag); CHKERR(ierr);
420   ierr = MatZeroRows(l->B,istmp,0); CHKERR(ierr);
421   ierr = ISDestroy(istmp); CHKERR(ierr);
422 
423   /* wait on sends */
424   if (nsends) {
425     send_status = (MPI_Status *) MALLOC( nsends*sizeof(MPI_Status) );
426     CHKPTR(send_status);
427     MPI_Waitall(nsends,send_waits,send_status);
428     FREE(send_status);
429   }
430   FREE(send_waits); FREE(svalues);
431 
432 
433   return 0;
434 }
435 
436 static int MatMult_MPIAIJ(Mat aijin,Vec xx,Vec yy)
437 {
438   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data;
439   int        ierr;
440   if (!aij->assembled) SETERR(1,"MatMult_MPIAIJ: must assemble matrix first");
441   ierr = VecScatterBegin(xx,0,aij->lvec,0,InsertValues,ScatterAll,aij->Mvctx);
442   CHKERR(ierr);
443   ierr = MatMult(aij->A,xx,yy); CHKERR(ierr);
444   ierr = VecScatterEnd(xx,0,aij->lvec,0,InsertValues,ScatterAll,aij->Mvctx);
445   CHKERR(ierr);
446   ierr = MatMultAdd(aij->B,aij->lvec,yy,yy); CHKERR(ierr);
447   return 0;
448 }
449 
450 static int MatMultAdd_MPIAIJ(Mat aijin,Vec xx,Vec yy,Vec zz)
451 {
452   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data;
453   int        ierr;
454   if (!aij->assembled) SETERR(1,"MatMult_MPIAIJ: must assemble matrix first");
455   ierr = VecScatterBegin(xx,0,aij->lvec,0,InsertValues,ScatterAll,aij->Mvctx);
456   CHKERR(ierr);
457   ierr = MatMultAdd(aij->A,xx,yy,zz); CHKERR(ierr);
458   ierr = VecScatterEnd(xx,0,aij->lvec,0,InsertValues,ScatterAll,aij->Mvctx);
459   CHKERR(ierr);
460   ierr = MatMultAdd(aij->B,aij->lvec,zz,zz); CHKERR(ierr);
461   return 0;
462 }
463 
464 static int MatMultTrans_MPIAIJ(Mat aijin,Vec xx,Vec yy)
465 {
466   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data;
467   int        ierr;
468 
469   if (!aij->assembled)
470     SETERR(1,"MatMulTrans_MPIAIJ: must assemble matrix first");
471   /* do nondiagonal part */
472   ierr = MatMultTrans(aij->B,xx,aij->lvec); CHKERR(ierr);
473   /* send it on its way */
474   ierr = VecScatterBegin(aij->lvec,0,yy,0,AddValues,
475                          ScatterAll|ScatterReverse,aij->Mvctx); CHKERR(ierr);
476   /* do local part */
477   ierr = MatMultTrans(aij->A,xx,yy); CHKERR(ierr);
478   /* receive remote parts: note this assumes the values are not actually */
479   /* inserted in yy until the next line, which is true for my implementation*/
480   /* but is not perhaps always true. */
481   ierr = VecScatterEnd(aij->lvec,0,yy,0,AddValues,ScatterAll|ScatterReverse,
482                          aij->Mvctx); CHKERR(ierr);
483   return 0;
484 }
485 
486 static int MatMultTransAdd_MPIAIJ(Mat aijin,Vec xx,Vec yy,Vec zz)
487 {
488   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data;
489   int        ierr;
490 
491   if (!aij->assembled)
492     SETERR(1,"MatMulTransAdd_MPIAIJ: must assemble matrix first");
493   /* do nondiagonal part */
494   ierr = MatMultTrans(aij->B,xx,aij->lvec); CHKERR(ierr);
495   /* send it on its way */
496   ierr = VecScatterBegin(aij->lvec,0,zz,0,AddValues,
497                          ScatterAll|ScatterReverse,aij->Mvctx); CHKERR(ierr);
498   /* do local part */
499   ierr = MatMultTransAdd(aij->A,xx,yy,zz); CHKERR(ierr);
500   /* receive remote parts: note this assumes the values are not actually */
501   /* inserted in yy until the next line, which is true for my implementation*/
502   /* but is not perhaps always true. */
503   ierr = VecScatterEnd(aij->lvec,0,zz,0,AddValues,ScatterAll|ScatterReverse,
504                          aij->Mvctx); CHKERR(ierr);
505   return 0;
506 }
507 
508 /*
509   This only works correctly for square matrices where the subblock A->A is the
510    diagonal block
511 */
512 static int MatGetDiag_MPIAIJ(Mat Ain,Vec v)
513 {
514   Mat_MPIAIJ *A = (Mat_MPIAIJ *) Ain->data;
515   if (!A->assembled) SETERR(1,"MatGetDiag_MPIAIJ: must assemble matrix first");
516   return MatGetDiagonal(A->A,v);
517 }
518 
519 static int MatDestroy_MPIAIJ(PetscObject obj)
520 {
521   Mat        mat = (Mat) obj;
522   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
523   int        ierr;
524 #if defined(PETSC_LOG)
525   PLogObjectState(obj,"Rows %d Cols %d",aij->M,aij->N);
526 #endif
527   FREE(aij->rowners);
528   ierr = MatDestroy(aij->A); CHKERR(ierr);
529   ierr = MatDestroy(aij->B); CHKERR(ierr);
530   if (aij->colmap) FREE(aij->colmap);
531   if (aij->garray) FREE(aij->garray);
532   if (aij->lvec) VecDestroy(aij->lvec);
533   if (aij->Mvctx) VecScatterCtxDestroy(aij->Mvctx);
534   FREE(aij);
535   PLogObjectDestroy(mat);
536   PETSCHEADERDESTROY(mat);
537   return 0;
538 }
539 
540 static int MatView_MPIAIJ(PetscObject obj,Viewer viewer)
541 {
542   Mat        mat = (Mat) obj;
543   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
544   int        ierr;
545   PetscObject vobj = (PetscObject) viewer;
546 
547   if (!aij->assembled) SETERR(1,"MatView_MPIAIJ: must assemble matrix first");
548   if (vobj->cookie == VIEWER_COOKIE) {
549     FILE *fd = ViewerFileGetPointer(viewer);
550     if (vobj->type == FILE_VIEWER) {
551       MPE_Seq_begin(mat->comm,1);
552       fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
553              aij->mytid,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart,
554              aij->cend);
555       ierr = MatView(aij->A,viewer); CHKERR(ierr);
556       ierr = MatView(aij->B,viewer); CHKERR(ierr);
557       fflush(fd);
558       MPE_Seq_end(mat->comm,1);
559     }
560     else if (vobj->type == FILES_VIEWER) {
561       int numtids = aij->numtids, mytid = aij->mytid;
562       if (numtids == 1) {
563         ierr = MatView(aij->A,viewer); CHKERR(ierr);
564       }
565       else {
566         /* assemble the entire matrix onto first processor. */
567         Mat     A;
568         Mat_AIJ *Aaij;
569         int     M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct;
570         Scalar  *a;
571         if (!mytid) {
572           ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,0,0,0,&A);
573         }
574         else {
575           ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,0,0,0,&A);
576         }
577         CHKERR(ierr);
578 
579         /* copy over the A part */
580         Aaij = (Mat_AIJ*) aij->A->data;
581         m = Aaij->m; ai = Aaij->i; aj = Aaij->j; a = Aaij->a;
582         row = aij->rstart;
583         for ( i=0; i<ai[m]; i++ ) {aj[i] += aij->cstart - 1;}
584         for ( i=0; i<m; i++ ) {
585           ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,InsertValues);
586           CHKERR(ierr);
587           row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
588         }
589         aj = Aaij->j;
590         for ( i=0; i<ai[m]; i++ ) {aj[i] -= aij->cstart - 1;}
591 
592         /* copy over the B part */
593         Aaij = (Mat_AIJ*) aij->B->data;
594         m = Aaij->m;  ai = Aaij->i; aj = Aaij->j; a = Aaij->a;
595         row = aij->rstart;
596         ct = cols = (int *) MALLOC( (ai[m]+1)*sizeof(int) ); CHKPTR(cols);
597         for ( i=0; i<ai[m]; i++ ) {cols[i] = aij->garray[aj[i]-1];}
598         for ( i=0; i<m; i++ ) {
599           ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,InsertValues);
600           CHKERR(ierr);
601           row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
602         }
603         FREE(ct);
604 
605         ierr = MatBeginAssembly(A); CHKERR(ierr);
606         ierr = MatEndAssembly(A); CHKERR(ierr);
607         if (!mytid) {
608           ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERR(ierr);
609         }
610         ierr = MatDestroy(A); CHKERR(ierr);
611       }
612     }
613   }
614   return 0;
615 }
616 
617 extern int MatMarkDiag_AIJ(Mat_AIJ  *);
618 /*
619     This has to provide several versions.
620 
621      1) per sequential
622      2) a) use only local smoothing updating outer values only once.
623         b) local smoothing updating outer values each inner iteration
624      3) color updating out values betwen colors.
625 */
626 static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,int flag,double shift,
627                         int its,Vec xx)
628 {
629   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
630   Mat        AA = mat->A, BB = mat->B;
631   Mat_AIJ    *A = (Mat_AIJ *) AA->data, *B = (Mat_AIJ *)BB->data;
632   Scalar     zero = 0.0,*b,*x,*xs,*ls,d,*v,sum,scale,*t,*ts;
633   int        ierr,*idx, *diag;
634   int        n = mat->n, m = mat->m, i;
635   Vec        tt;
636 
637   if (!mat->assembled) SETERR(1,"MatRelax_MPIAIJ: must assemble matrix first");
638 
639   VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls);
640   xs = x -1; /* shift by one for index start of 1 */
641   ls--;
642   if (!A->diag) {if ((ierr = MatMarkDiag_AIJ(A))) return ierr;}
643   diag = A->diag;
644   if (flag == SOR_APPLY_UPPER || flag == SOR_APPLY_LOWER) {
645     SETERR(1,"That option not yet support for parallel AIJ matrices");
646   }
647   if (flag & SOR_EISENSTAT) {
648     /* Let  A = L + U + D; where L is lower trianglar,
649     U is upper triangular, E is diagonal; This routine applies
650 
651             (L + E)^{-1} A (U + E)^{-1}
652 
653     to a vector efficiently using Eisenstat's trick. This is for
654     the case of SSOR preconditioner, so E is D/omega where omega
655     is the relaxation factor.
656     */
657     ierr = VecCreate(xx,&tt); CHKERR(ierr);
658     VecGetArray(tt,&t);
659     scale = (2.0/omega) - 1.0;
660     /*  x = (E + U)^{-1} b */
661     VecSet(&zero,mat->lvec);
662     ierr = VecPipelineBegin(xx,0,mat->lvec,0,InsertValues,PipelineUp,
663                               mat->Mvctx); CHKERR(ierr);
664     for ( i=m-1; i>-1; i-- ) {
665       n    = A->i[i+1] - diag[i] - 1;
666       idx  = A->j + diag[i];
667       v    = A->a + diag[i];
668       sum  = b[i];
669       SPARSEDENSEMDOT(sum,xs,v,idx,n);
670       d    = shift + A->a[diag[i]-1];
671       n    = B->i[i+1] - B->i[i];
672       idx  = B->j + B->i[i] - 1;
673       v    = B->a + B->i[i] - 1;
674       SPARSEDENSEMDOT(sum,ls,v,idx,n);
675       x[i] = omega*(sum/d);
676     }
677     ierr = VecPipelineEnd(xx,0,mat->lvec,0,InsertValues,PipelineUp,
678                             mat->Mvctx); CHKERR(ierr);
679 
680     /*  t = b - (2*E - D)x */
681     v = A->a;
682     for ( i=0; i<m; i++ ) { t[i] = b[i] - scale*(v[*diag++ - 1])*x[i]; }
683 
684     /*  t = (E + L)^{-1}t */
685     ts = t - 1; /* shifted by one for index start of a or mat->j*/
686     diag = A->diag;
687     VecSet(&zero,mat->lvec);
688     ierr = VecPipelineBegin(tt,0,mat->lvec,0,InsertValues,PipelineDown,
689                                                  mat->Mvctx); CHKERR(ierr);
690     for ( i=0; i<m; i++ ) {
691       n    = diag[i] - A->i[i];
692       idx  = A->j + A->i[i] - 1;
693       v    = A->a + A->i[i] - 1;
694       sum  = t[i];
695       SPARSEDENSEMDOT(sum,ts,v,idx,n);
696       d    = shift + A->a[diag[i]-1];
697       n    = B->i[i+1] - B->i[i];
698       idx  = B->j + B->i[i] - 1;
699       v    = B->a + B->i[i] - 1;
700       SPARSEDENSEMDOT(sum,ls,v,idx,n);
701       t[i] = omega*(sum/d);
702     }
703     ierr = VecPipelineEnd(tt,0,mat->lvec,0,InsertValues,PipelineDown,
704                                                     mat->Mvctx); CHKERR(ierr);
705     /*  x = x + t */
706     for ( i=0; i<m; i++ ) { x[i] += t[i]; }
707     VecDestroy(tt);
708     return 0;
709   }
710 
711 
712   if ((flag & SOR_SYMMETRIC_SWEEP) == SOR_SYMMETRIC_SWEEP){
713     if (flag & SOR_ZERO_INITIAL_GUESS) {
714       VecSet(&zero,mat->lvec); VecSet(&zero,xx);
715     }
716     else {
717       ierr=VecScatterBegin(xx,0,mat->lvec,0,InsertValues,ScatterUp,mat->Mvctx);
718       CHKERR(ierr);
719       ierr = VecScatterEnd(xx,0,mat->lvec,0,InsertValues,ScatterUp,mat->Mvctx);
720       CHKERR(ierr);
721     }
722     while (its--) {
723       /* go down through the rows */
724       ierr = VecPipelineBegin(xx,0,mat->lvec,0,InsertValues,PipelineDown,
725                               mat->Mvctx); CHKERR(ierr);
726       for ( i=0; i<m; i++ ) {
727         n    = A->i[i+1] - A->i[i];
728         idx  = A->j + A->i[i] - 1;
729         v    = A->a + A->i[i] - 1;
730         sum  = b[i];
731         SPARSEDENSEMDOT(sum,xs,v,idx,n);
732         d    = shift + A->a[diag[i]-1];
733         n    = B->i[i+1] - B->i[i];
734         idx  = B->j + B->i[i] - 1;
735         v    = B->a + B->i[i] - 1;
736         SPARSEDENSEMDOT(sum,ls,v,idx,n);
737         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
738       }
739       ierr = VecPipelineEnd(xx,0,mat->lvec,0,InsertValues,PipelineDown,
740                             mat->Mvctx); CHKERR(ierr);
741       /* come up through the rows */
742       ierr = VecPipelineBegin(xx,0,mat->lvec,0,InsertValues,PipelineUp,
743                               mat->Mvctx); CHKERR(ierr);
744       for ( i=m-1; i>-1; i-- ) {
745         n    = A->i[i+1] - A->i[i];
746         idx  = A->j + A->i[i] - 1;
747         v    = A->a + A->i[i] - 1;
748         sum  = b[i];
749         SPARSEDENSEMDOT(sum,xs,v,idx,n);
750         d    = shift + A->a[diag[i]-1];
751         n    = B->i[i+1] - B->i[i];
752         idx  = B->j + B->i[i] - 1;
753         v    = B->a + B->i[i] - 1;
754         SPARSEDENSEMDOT(sum,ls,v,idx,n);
755         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
756       }
757       ierr = VecPipelineEnd(xx,0,mat->lvec,0,InsertValues,PipelineUp,
758                             mat->Mvctx); CHKERR(ierr);
759     }
760   }
761   else if (flag & SOR_FORWARD_SWEEP){
762     if (flag & SOR_ZERO_INITIAL_GUESS) {
763       VecSet(&zero,mat->lvec);
764       ierr = VecPipelineBegin(xx,0,mat->lvec,0,InsertValues,PipelineDown,
765                               mat->Mvctx); CHKERR(ierr);
766       for ( i=0; i<m; i++ ) {
767         n    = diag[i] - A->i[i];
768         idx  = A->j + A->i[i] - 1;
769         v    = A->a + A->i[i] - 1;
770         sum  = b[i];
771         SPARSEDENSEMDOT(sum,xs,v,idx,n);
772         d    = shift + A->a[diag[i]-1];
773         n    = B->i[i+1] - B->i[i];
774         idx  = B->j + B->i[i] - 1;
775         v    = B->a + B->i[i] - 1;
776         SPARSEDENSEMDOT(sum,ls,v,idx,n);
777         x[i] = omega*(sum/d);
778       }
779       ierr = VecPipelineEnd(xx,0,mat->lvec,0,InsertValues,PipelineDown,
780                             mat->Mvctx); CHKERR(ierr);
781       its--;
782     }
783     while (its--) {
784       ierr=VecScatterBegin(xx,0,mat->lvec,0,InsertValues,ScatterUp,mat->Mvctx);
785       CHKERR(ierr);
786       ierr = VecScatterEnd(xx,0,mat->lvec,0,InsertValues,ScatterUp,mat->Mvctx);
787       CHKERR(ierr);
788       ierr = VecPipelineBegin(xx,0,mat->lvec,0,InsertValues,PipelineDown,
789                               mat->Mvctx); CHKERR(ierr);
790       for ( i=0; i<m; i++ ) {
791         n    = A->i[i+1] - A->i[i];
792         idx  = A->j + A->i[i] - 1;
793         v    = A->a + A->i[i] - 1;
794         sum  = b[i];
795         SPARSEDENSEMDOT(sum,xs,v,idx,n);
796         d    = shift + A->a[diag[i]-1];
797         n    = B->i[i+1] - B->i[i];
798         idx  = B->j + B->i[i] - 1;
799         v    = B->a + B->i[i] - 1;
800         SPARSEDENSEMDOT(sum,ls,v,idx,n);
801         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
802       }
803       ierr = VecPipelineEnd(xx,0,mat->lvec,0,InsertValues,PipelineDown,
804                             mat->Mvctx); CHKERR(ierr);
805     }
806   }
807   else if (flag & SOR_BACKWARD_SWEEP){
808     if (flag & SOR_ZERO_INITIAL_GUESS) {
809       VecSet(&zero,mat->lvec);
810       ierr = VecPipelineBegin(xx,0,mat->lvec,0,InsertValues,PipelineUp,
811                               mat->Mvctx); CHKERR(ierr);
812       for ( i=m-1; i>-1; i-- ) {
813         n    = A->i[i+1] - diag[i] - 1;
814         idx  = A->j + diag[i];
815         v    = A->a + diag[i];
816         sum  = b[i];
817         SPARSEDENSEMDOT(sum,xs,v,idx,n);
818         d    = shift + A->a[diag[i]-1];
819         n    = B->i[i+1] - B->i[i];
820         idx  = B->j + B->i[i] - 1;
821         v    = B->a + B->i[i] - 1;
822         SPARSEDENSEMDOT(sum,ls,v,idx,n);
823         x[i] = omega*(sum/d);
824       }
825       ierr = VecPipelineEnd(xx,0,mat->lvec,0,InsertValues,PipelineUp,
826                             mat->Mvctx); CHKERR(ierr);
827       its--;
828     }
829     while (its--) {
830       ierr = VecScatterBegin(xx,0,mat->lvec,0,InsertValues,ScatterDown,
831                             mat->Mvctx); CHKERR(ierr);
832       ierr = VecScatterEnd(xx,0,mat->lvec,0,InsertValues,ScatterDown,
833                             mat->Mvctx); CHKERR(ierr);
834       ierr = VecPipelineBegin(xx,0,mat->lvec,0,InsertValues,PipelineUp,
835                               mat->Mvctx); CHKERR(ierr);
836       for ( i=m-1; i>-1; i-- ) {
837         n    = A->i[i+1] - A->i[i];
838         idx  = A->j + A->i[i] - 1;
839         v    = A->a + A->i[i] - 1;
840         sum  = b[i];
841         SPARSEDENSEMDOT(sum,xs,v,idx,n);
842         d    = shift + A->a[diag[i]-1];
843         n    = B->i[i+1] - B->i[i];
844         idx  = B->j + B->i[i] - 1;
845         v    = B->a + B->i[i] - 1;
846         SPARSEDENSEMDOT(sum,ls,v,idx,n);
847         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
848       }
849       ierr = VecPipelineEnd(xx,0,mat->lvec,0,InsertValues,PipelineUp,
850                             mat->Mvctx); CHKERR(ierr);
851     }
852   }
853   else if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
854     if (flag & SOR_ZERO_INITIAL_GUESS) {
855       return MatRelax(mat->A,bb,omega,flag,shift,its,xx);
856     }
857     ierr=VecScatterBegin(xx,0,mat->lvec,0,InsertValues,ScatterAll,mat->Mvctx);
858     CHKERR(ierr);
859     ierr = VecScatterEnd(xx,0,mat->lvec,0,InsertValues,ScatterAll,mat->Mvctx);
860     CHKERR(ierr);
861     while (its--) {
862       /* go down through the rows */
863       for ( i=0; i<m; i++ ) {
864         n    = A->i[i+1] - A->i[i];
865         idx  = A->j + A->i[i] - 1;
866         v    = A->a + A->i[i] - 1;
867         sum  = b[i];
868         SPARSEDENSEMDOT(sum,xs,v,idx,n);
869         d    = shift + A->a[diag[i]-1];
870         n    = B->i[i+1] - B->i[i];
871         idx  = B->j + B->i[i] - 1;
872         v    = B->a + B->i[i] - 1;
873         SPARSEDENSEMDOT(sum,ls,v,idx,n);
874         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
875       }
876       /* come up through the rows */
877       for ( i=m-1; i>-1; i-- ) {
878         n    = A->i[i+1] - A->i[i];
879         idx  = A->j + A->i[i] - 1;
880         v    = A->a + A->i[i] - 1;
881         sum  = b[i];
882         SPARSEDENSEMDOT(sum,xs,v,idx,n);
883         d    = shift + A->a[diag[i]-1];
884         n    = B->i[i+1] - B->i[i];
885         idx  = B->j + B->i[i] - 1;
886         v    = B->a + B->i[i] - 1;
887         SPARSEDENSEMDOT(sum,ls,v,idx,n);
888         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
889       }
890     }
891   }
892   else if (flag & SOR_LOCAL_FORWARD_SWEEP){
893     if (flag & SOR_ZERO_INITIAL_GUESS) {
894       return MatRelax(mat->A,bb,omega,flag,shift,its,xx);
895     }
896     ierr=VecScatterBegin(xx,0,mat->lvec,0,InsertValues,ScatterAll,mat->Mvctx);
897     CHKERR(ierr);
898     ierr = VecScatterEnd(xx,0,mat->lvec,0,InsertValues,ScatterAll,mat->Mvctx);
899     CHKERR(ierr);
900     while (its--) {
901       for ( i=0; i<m; i++ ) {
902         n    = A->i[i+1] - A->i[i];
903         idx  = A->j + A->i[i] - 1;
904         v    = A->a + A->i[i] - 1;
905         sum  = b[i];
906         SPARSEDENSEMDOT(sum,xs,v,idx,n);
907         d    = shift + A->a[diag[i]-1];
908         n    = B->i[i+1] - B->i[i];
909         idx  = B->j + B->i[i] - 1;
910         v    = B->a + B->i[i] - 1;
911         SPARSEDENSEMDOT(sum,ls,v,idx,n);
912         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
913       }
914     }
915   }
916   else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
917     if (flag & SOR_ZERO_INITIAL_GUESS) {
918       return MatRelax(mat->A,bb,omega,flag,shift,its,xx);
919     }
920     ierr = VecScatterBegin(xx,0,mat->lvec,0,InsertValues,ScatterAll,
921                             mat->Mvctx); CHKERR(ierr);
922     ierr = VecScatterEnd(xx,0,mat->lvec,0,InsertValues,ScatterAll,
923                             mat->Mvctx); CHKERR(ierr);
924     while (its--) {
925       for ( i=m-1; i>-1; i-- ) {
926         n    = A->i[i+1] - A->i[i];
927         idx  = A->j + A->i[i] - 1;
928         v    = A->a + A->i[i] - 1;
929         sum  = b[i];
930         SPARSEDENSEMDOT(sum,xs,v,idx,n);
931         d    = shift + A->a[diag[i]-1];
932         n    = B->i[i+1] - B->i[i];
933         idx  = B->j + B->i[i] - 1;
934         v    = B->a + B->i[i] - 1;
935         SPARSEDENSEMDOT(sum,ls,v,idx,n);
936         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
937       }
938     }
939   }
940   return 0;
941 }
942 static int MatInsOpt_MPIAIJ(Mat aijin,int op)
943 {
944   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data;
945 
946   if      (op == NO_NEW_NONZERO_LOCATIONS)  {
947     MatSetOption(aij->A,op);
948     MatSetOption(aij->B,op);
949   }
950   else if (op == YES_NEW_NONZERO_LOCATIONS) {
951     MatSetOption(aij->A,op);
952     MatSetOption(aij->B,op);
953   }
954   else if (op == COLUMN_ORIENTED) SETERR(1,"Column oriented not supported");
955   return 0;
956 }
957 
958 static int MatSize_MPIAIJ(Mat matin,int *m,int *n)
959 {
960   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
961   *m = mat->M; *n = mat->N;
962   return 0;
963 }
964 
965 static int MatLocalSize_MPIAIJ(Mat matin,int *m,int *n)
966 {
967   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
968   *m = mat->m; *n = mat->n;
969   return 0;
970 }
971 
972 static int MatRange_MPIAIJ(Mat matin,int *m,int *n)
973 {
974   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
975   *m = mat->rstart; *n = mat->rend;
976   return 0;
977 }
978 
979 static int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
980 {
981   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) matin->data;
982   Scalar     *vworkA, *vworkB;
983   int        ierr, *cworkA, *cworkB, lrow, cstart = aij->cstart;
984   int        nztot, nzA, nzB, i, rstart = aij->rstart, rend = aij->rend;
985 
986   if (!aij->assembled)
987     SETERR(1,"MatGetRow_MPIAIJ: Must assemble matrix first.");
988   if (row < rstart || row >= rend)
989     SETERR(1,"MatGetRow_MPIAIJ: Currently you can get only local rows.")
990   lrow = row - rstart;
991   ierr = MatGetRow(aij->A,lrow,&nzA,&cworkA,&vworkA); CHKERR(ierr);
992   for (i=0; i<nzA; i++) cworkA[i] += cstart;
993   ierr = MatGetRow(aij->B,lrow,&nzB,&cworkB,&vworkB); CHKERR(ierr);
994   for (i=0; i<nzB; i++) cworkB[i] = aij->garray[cworkB[i]];
995 
996   if ((nztot = nzA + nzB)) {
997     *idx = (int *) MALLOC( (nztot)*sizeof(int) ); CHKPTR(*idx);
998     *v   = (Scalar *) MALLOC( (nztot)*sizeof(Scalar) ); CHKPTR(*v);
999     for ( i=0; i<nzA; i++ ) {
1000       (*idx)[i] = cworkA[i];
1001       (*v)[i] = vworkA[i];
1002     }
1003     for ( i=0; i<nzB; i++ ) {
1004       (*idx)[i+nzA] = cworkB[i];
1005       (*v)[i+nzA] = vworkB[i];
1006     }
1007   }
1008   else {*idx = 0; *v=0;}
1009   *nz = nztot;
1010   ierr = MatRestoreRow(aij->A,lrow,&nzA,&cworkA,&vworkA); CHKERR(ierr);
1011   ierr = MatRestoreRow(aij->B,lrow,&nzB,&cworkB,&vworkB); CHKERR(ierr);
1012   return 0;
1013 }
1014 
1015 static int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1016 {
1017   if (*idx) FREE(*idx);
1018   if (*v) FREE(*v);
1019   return 0;
1020 }
1021 
1022 static int MatCopy_MPIAIJ(Mat,Mat *);
1023 extern int MatConvert_MPIAIJ(Mat,MATTYPE,Mat *);
1024 
1025 /* -------------------------------------------------------------------*/
1026 static struct _MatOps MatOps = {MatInsertValues_MPIAIJ,
1027        MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ,
1028        MatMult_MPIAIJ,MatMultAdd_MPIAIJ,
1029        MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ,
1030        0,0,0,0,
1031        0,0,
1032        MatRelax_MPIAIJ,
1033        0,
1034        0,0,0,
1035        MatCopy_MPIAIJ,
1036        MatGetDiag_MPIAIJ,0,0,
1037        MatBeginAssemble_MPIAIJ,MatEndAssemble_MPIAIJ,
1038        0,
1039        MatInsOpt_MPIAIJ,MatZero_MPIAIJ,MatZeroRows_MPIAIJ,0,
1040        0,0,0,0,
1041        MatSize_MPIAIJ,MatLocalSize_MPIAIJ,MatRange_MPIAIJ,
1042        0,0,
1043        0,MatConvert_MPIAIJ };
1044 
1045 /*@
1046 
1047       MatCreateMPIAIJ - Creates a sparse parallel matrix
1048                                  in AIJ format.
1049 
1050   Input Parameters:
1051 .   comm - MPI communicator
1052 .   m,n - number of local rows and columns (or -1 to have calculated)
1053 .   M,N - global rows and columns (or -1 to have calculated)
1054 .   d_nz - total number nonzeros in diagonal portion of matrix
1055 .   d_nzz - number of nonzeros per row in diagonal portion of matrix or null
1056 .           You must leave room for the diagonal entry even if it is zero.
1057 .   o_nz - total number nonzeros in off-diagonal portion of matrix
1058 .   o_nzz - number of nonzeros per row in off-diagonal portion of matrix
1059 .           or null. You must have at least one nonzero per row.
1060 
1061   Output parameters:
1062 .  newmat - the matrix
1063 
1064   Keywords: matrix, aij, compressed row, sparse, parallel
1065 @*/
1066 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,
1067                  int d_nz,int *d_nnz, int o_nz,int *o_nnz,Mat *newmat)
1068 {
1069   Mat          mat;
1070   Mat_MPIAIJ   *aij;
1071   int          ierr, i,sum[2],work[2];
1072   *newmat         = 0;
1073   PETSCHEADERCREATE(mat,_Mat,MAT_COOKIE,MATMPIAIJ,comm);
1074   PLogObjectCreate(mat);
1075   mat->data       = (void *) (aij = NEW(Mat_MPIAIJ)); CHKPTR(aij);
1076   mat->ops        = &MatOps;
1077   mat->destroy    = MatDestroy_MPIAIJ;
1078   mat->view       = MatView_MPIAIJ;
1079   mat->factor     = 0;
1080   mat->row        = 0;
1081   mat->col        = 0;
1082 
1083   mat->comm       = comm;
1084   aij->insertmode = NotSetValues;
1085   MPI_Comm_rank(comm,&aij->mytid);
1086   MPI_Comm_size(comm,&aij->numtids);
1087 
1088   if (M == -1 || N == -1) {
1089     work[0] = m; work[1] = n;
1090     MPI_Allreduce((void *) work,(void *) sum,2,MPI_INT,MPI_SUM,comm );
1091     if (M == -1) M = sum[0];
1092     if (N == -1) N = sum[1];
1093   }
1094   if (m == -1) {m = M/aij->numtids + ((M % aij->numtids) > aij->mytid);}
1095   if (n == -1) {n = N/aij->numtids + ((N % aij->numtids) > aij->mytid);}
1096   aij->m       = m;
1097   aij->n       = n;
1098   aij->N       = N;
1099   aij->M       = M;
1100 
1101   /* build local table of row and column ownerships */
1102   aij->rowners = (int *) MALLOC(2*(aij->numtids+2)*sizeof(int));
1103   CHKPTR(aij->rowners);
1104   aij->cowners = aij->rowners + aij->numtids + 1;
1105   MPI_Allgather(&m,1,MPI_INT,aij->rowners+1,1,MPI_INT,comm);
1106   aij->rowners[0] = 0;
1107   for ( i=2; i<=aij->numtids; i++ ) {
1108     aij->rowners[i] += aij->rowners[i-1];
1109   }
1110   aij->rstart = aij->rowners[aij->mytid];
1111   aij->rend   = aij->rowners[aij->mytid+1];
1112   MPI_Allgather(&n,1,MPI_INT,aij->cowners+1,1,MPI_INT,comm);
1113   aij->cowners[0] = 0;
1114   for ( i=2; i<=aij->numtids; i++ ) {
1115     aij->cowners[i] += aij->cowners[i-1];
1116   }
1117   aij->cstart = aij->cowners[aij->mytid];
1118   aij->cend   = aij->cowners[aij->mytid+1];
1119 
1120 
1121   ierr = MatCreateSequentialAIJ(m,n,d_nz,d_nnz,&aij->A); CHKERR(ierr);
1122   PLogObjectParent(mat,aij->A);
1123   ierr = MatCreateSequentialAIJ(m,N,o_nz,o_nnz,&aij->B); CHKERR(ierr);
1124   PLogObjectParent(mat,aij->B);
1125 
1126   /* build cache for off array entries formed */
1127   aij->stash.nmax = CHUNCKSIZE; /* completely arbratray number */
1128   aij->stash.n    = 0;
1129   aij->stash.array = (Scalar *) MALLOC( aij->stash.nmax*(2*sizeof(int) +
1130                             sizeof(Scalar))); CHKPTR(aij->stash.array);
1131   aij->stash.idx = (int *) (aij->stash.array + aij->stash.nmax);
1132   aij->stash.idy = (int *) (aij->stash.idx + aij->stash.nmax);
1133   aij->colmap    = 0;
1134   aij->garray    = 0;
1135 
1136   /* stuff used for matrix vector multiply */
1137   aij->lvec      = 0;
1138   aij->Mvctx     = 0;
1139   aij->assembled = 0;
1140 
1141   *newmat = mat;
1142   return 0;
1143 }
1144 
1145 static int MatCopy_MPIAIJ(Mat matin,Mat *newmat)
1146 {
1147   Mat        mat;
1148   Mat_MPIAIJ *aij,*oldmat = (Mat_MPIAIJ *) matin->data;
1149   int        ierr;
1150   *newmat      = 0;
1151 
1152   if (!oldmat->assembled) SETERR(1,"Cannot copy unassembled matrix");
1153   PETSCHEADERCREATE(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm);
1154   PLogObjectCreate(mat);
1155   mat->data       = (void *) (aij = NEW(Mat_MPIAIJ)); CHKPTR(aij);
1156   mat->ops        = &MatOps;
1157   mat->destroy    = MatDestroy_MPIAIJ;
1158   mat->view       = MatView_MPIAIJ;
1159   mat->factor     = matin->factor;
1160   mat->row        = 0;
1161   mat->col        = 0;
1162 
1163   aij->m          = oldmat->m;
1164   aij->n          = oldmat->n;
1165   aij->M          = oldmat->M;
1166   aij->N          = oldmat->N;
1167 
1168   aij->assembled  = 1;
1169   aij->rstart     = oldmat->rstart;
1170   aij->rend       = oldmat->rend;
1171   aij->cstart     = oldmat->cstart;
1172   aij->cend       = oldmat->cend;
1173   aij->numtids    = oldmat->numtids;
1174   aij->mytid      = oldmat->mytid;
1175   aij->insertmode = NotSetValues;
1176 
1177   aij->rowners    = (int *) MALLOC( (aij->numtids+1)*sizeof(int) );
1178   CHKPTR(aij->rowners);
1179   MEMCPY(aij->rowners,oldmat->rowners,(aij->numtids+1)*sizeof(int));
1180   aij->stash.nmax = 0;
1181   aij->stash.n    = 0;
1182   aij->stash.array= 0;
1183   aij->colmap     = 0;
1184   aij->garray     = 0;
1185   mat->comm       = matin->comm;
1186 
1187   ierr =  VecCreate(oldmat->lvec,&aij->lvec); CHKERR(ierr);
1188   PLogObjectParent(mat,aij->lvec);
1189   ierr =  VecScatterCtxCopy(oldmat->Mvctx,&aij->Mvctx); CHKERR(ierr);
1190   PLogObjectParent(mat,aij->Mvctx);
1191   ierr =  MatCopy(oldmat->A,&aij->A); CHKERR(ierr);
1192   PLogObjectParent(mat,aij->A);
1193   ierr =  MatCopy(oldmat->B,&aij->B); CHKERR(ierr);
1194   PLogObjectParent(mat,aij->B);
1195   *newmat = mat;
1196   return 0;
1197 }
1198