xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 9e25ed09632ec91e2227f4e6f1ce3bfe794e20d8)
1 #ifndef lint
2 static char vcid[] = "$Id: mpiaij.c,v 1.9 1995/03/17 00:29:15 bsmith Exp bsmith $";
3 #endif
4 
5 #include "mpiaij.h"
6 #include "vec/vecimpl.h"
7 #include "inline/spops.h"
8 
9 #define CHUNCKSIZE   100
10 /*
11    This is a simple minded stash. Do a linear search to determine if
12  in stash, if not add to end.
13 */
14 static int StashValues(Stash *stash,int row,int n, int *idxn,
15                        Scalar *values,InsertMode addv)
16 {
17   int    i,j,N = stash->n,found,*n_idx, *n_idy;
18   Scalar val,*n_array;
19 
20   for ( i=0; i<n; i++ ) {
21     found = 0;
22     val = *values++;
23     for ( j=0; j<N; j++ ) {
24       if ( stash->idx[j] == row && stash->idy[j] == idxn[i]) {
25         /* found a match */
26         if (addv == AddValues) stash->array[j] += val;
27         else stash->array[j] = val;
28         found = 1;
29         break;
30       }
31     }
32     if (!found) { /* not found so add to end */
33       if ( stash->n == stash->nmax ) {
34         /* allocate a larger stash */
35         n_array = (Scalar *) MALLOC( (stash->nmax + CHUNCKSIZE)*(
36                                      2*sizeof(int) + sizeof(Scalar)));
37         CHKPTR(n_array);
38         n_idx = (int *) (n_array + stash->nmax + CHUNCKSIZE);
39         n_idy = (int *) (n_idx + stash->nmax + CHUNCKSIZE);
40         MEMCPY(n_array,stash->array,stash->nmax*sizeof(Scalar));
41         MEMCPY(n_idx,stash->idx,stash->nmax*sizeof(int));
42         MEMCPY(n_idy,stash->idy,stash->nmax*sizeof(int));
43         if (stash->array) FREE(stash->array);
44         stash->array = n_array; stash->idx = n_idx; stash->idy = n_idy;
45         stash->nmax += CHUNCKSIZE;
46       }
47       stash->array[stash->n]   = val;
48       stash->idx[stash->n]     = row;
49       stash->idy[stash->n++]   = idxn[i];
50     }
51   }
52   return 0;
53 }
54 
55 /* local utility routine that creates a mapping from the global column
56 number to the local number in the off-diagonal part of the local
57 storage of the matrix.  This is done in a non scable way since the
58 length of colmap equals the global matrix length.
59 */
60 static int CreateColmap(Mat mat)
61 {
62   Matimpiaij *aij = (Matimpiaij *) mat->data;
63   Matiaij    *B = (Matiaij*) aij->B->data;
64   int        n = B->n,i;
65   aij->colmap = (int *) MALLOC( aij->N*sizeof(int) ); CHKPTR(aij->colmap);
66   MEMSET(aij->colmap,0,aij->N*sizeof(int));
67   for ( i=0; i<n; i++ ) {
68     aij->colmap[aij->garray[i]] = i+1;
69   }
70   return 0;
71 }
72 
73 static int MatiAIJInsertValues(Mat mat,int m,int *idxm,int n,
74                             int *idxn,Scalar *v,InsertMode addv)
75 {
76   Matimpiaij *aij = (Matimpiaij *) mat->data;
77   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
78   int        cstart = aij->cstart, cend = aij->cend,row,col;
79 
80   if (aij->insertmode != NotSetValues && aij->insertmode != addv) {
81     SETERR(1,"You cannot mix inserts and adds");
82   }
83   aij->insertmode = addv;
84   for ( i=0; i<m; i++ ) {
85     if (idxm[i] < 0) SETERR(1,"Negative row index");
86     if (idxm[i] >= aij->M) SETERR(1,"Row index too large");
87     if (idxm[i] >= rstart && idxm[i] < rend) {
88       row = idxm[i] - rstart;
89       for ( j=0; j<n; j++ ) {
90         if (idxn[j] < 0) SETERR(1,"Negative column index");
91         if (idxn[j] >= aij->N) SETERR(1,"Column index too large");
92         if (idxn[j] >= cstart && idxn[j] < cend){
93           col = idxn[j] - cstart;
94           ierr = MatSetValues(aij->A,1,&row,1,&col,v+i*n+j,addv);CHKERR(ierr);
95         }
96         else {
97           if (aij->assembled) {
98             if (!aij->colmap) {ierr = CreateColmap(mat); CHKERR(ierr);}
99             col = aij->colmap[idxn[j]] - 1;
100             if (col < 0) {
101               SETERR(1,"Cannot insert new off diagonal block nonzero in\
102                      already\
103                      assembled matrix. Contact petsc-maint@mcs.anl.gov\
104                      if your need this feature");
105             }
106           }
107           else col = idxn[j];
108           ierr = MatSetValues(aij->B,1,&row,1,&col,v+i*n+j,addv);CHKERR(ierr);
109         }
110       }
111     }
112     else {
113       ierr = StashValues(&aij->stash,idxm[i],n,idxn,v+i*n,addv);CHKERR(ierr);
114     }
115   }
116   return 0;
117 }
118 
119 /*
120     the assembly code is alot like the code for vectors, we should
121     sometime derive a single assembly code that can be used for
122     either case.
123 */
124 
125 static int MatiAIJBeginAssemble(Mat mat)
126 {
127   Matimpiaij  *aij = (Matimpiaij *) mat->data;
128   MPI_Comm    comm = mat->comm;
129   int         numtids = aij->numtids, *owners = aij->rowners;
130   int         mytid = aij->mytid;
131   MPI_Request *send_waits,*recv_waits;
132   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
133   int         tag = 50, *owner,*starts,count;
134   InsertMode  addv;
135   Scalar      *rvalues,*svalues;
136 
137   /* make sure all processors are either in INSERTMODE or ADDMODE */
138   MPI_Allreduce((void *) &aij->insertmode,(void *) &addv,1,MPI_INT,
139                 MPI_BOR,comm);
140   if (addv == (AddValues|InsertValues)) {
141     SETERR(1,"Some processors have inserted while others have added");
142   }
143   aij->insertmode = addv; /* in case this processor had no cache */
144 
145   /*  first count number of contributors to each processor */
146   nprocs = (int *) MALLOC( 2*numtids*sizeof(int) ); CHKPTR(nprocs);
147   MEMSET(nprocs,0,2*numtids*sizeof(int)); procs = nprocs + numtids;
148   owner = (int *) MALLOC( (aij->stash.n+1)*sizeof(int) ); CHKPTR(owner);
149   for ( i=0; i<aij->stash.n; i++ ) {
150     idx = aij->stash.idx[i];
151     for ( j=0; j<numtids; j++ ) {
152       if (idx >= owners[j] && idx < owners[j+1]) {
153         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
154       }
155     }
156   }
157   nsends = 0;  for ( i=0; i<numtids; i++ ) { nsends += procs[i];}
158 
159   /* inform other processors of number of messages and max length*/
160   work = (int *) MALLOC( numtids*sizeof(int) ); CHKPTR(work);
161   MPI_Allreduce((void *) procs,(void *) work,numtids,MPI_INT,MPI_SUM,comm);
162   nreceives = work[mytid];
163   MPI_Allreduce((void *) nprocs,(void *) work,numtids,MPI_INT,MPI_MAX,comm);
164   nmax = work[mytid];
165   FREE(work);
166 
167   /* post receives:
168        1) each message will consist of ordered pairs
169      (global index,value) we store the global index as a double
170      to simplify the message passing.
171        2) since we don't know how long each individual message is we
172      allocate the largest needed buffer for each receive. Potentially
173      this is a lot of wasted space.
174 
175 
176        This could be done better.
177   */
178   rvalues = (Scalar *) MALLOC(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));
179   CHKPTR(rvalues);
180   recv_waits = (MPI_Request *) MALLOC((nreceives+1)*sizeof(MPI_Request));
181   CHKPTR(recv_waits);
182   for ( i=0; i<nreceives; i++ ) {
183     MPI_Irecv((void *)(rvalues+3*nmax*i),3*nmax,MPI_SCALAR,MPI_ANY_SOURCE,tag,
184               comm,recv_waits+i);
185   }
186 
187   /* do sends:
188       1) starts[i] gives the starting index in svalues for stuff going to
189          the ith processor
190   */
191   svalues = (Scalar *) MALLOC( 3*(aij->stash.n+1)*sizeof(Scalar) );
192   CHKPTR(svalues);
193   send_waits = (MPI_Request *) MALLOC( (nsends+1)*sizeof(MPI_Request));
194   CHKPTR(send_waits);
195   starts = (int *) MALLOC( numtids*sizeof(int) ); CHKPTR(starts);
196   starts[0] = 0;
197   for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
198   for ( i=0; i<aij->stash.n; i++ ) {
199     svalues[3*starts[owner[i]]]       = (Scalar)  aij->stash.idx[i];
200     svalues[3*starts[owner[i]]+1]     = (Scalar)  aij->stash.idy[i];
201     svalues[3*(starts[owner[i]]++)+2] =  aij->stash.array[i];
202   }
203   FREE(owner);
204   starts[0] = 0;
205   for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
206   count = 0;
207   for ( i=0; i<numtids; i++ ) {
208     if (procs[i]) {
209       MPI_Isend((void*)(svalues+3*starts[i]),3*nprocs[i],MPI_SCALAR,i,tag,
210                 comm,send_waits+count++);
211     }
212   }
213   FREE(starts); FREE(nprocs);
214 
215   /* Free cache space */
216   aij->stash.nmax = aij->stash.n = 0;
217   if (aij->stash.array){ FREE(aij->stash.array); aij->stash.array = 0;}
218 
219   aij->svalues    = svalues;       aij->rvalues = rvalues;
220   aij->nsends     = nsends;         aij->nrecvs = nreceives;
221   aij->send_waits = send_waits; aij->recv_waits = recv_waits;
222   aij->rmax       = nmax;
223 
224   return 0;
225 }
226 extern int MPIAIJSetUpMultiply(Mat);
227 
228 static int MatiAIJEndAssemble(Mat mat)
229 {
230   int        ierr;
231   Matimpiaij *aij = (Matimpiaij *) mat->data;
232 
233   MPI_Status  *send_status,recv_status;
234   int         imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n;
235   int         row,col;
236   Scalar      *values,val;
237   InsertMode  addv = aij->insertmode;
238 
239   /*  wait on receives */
240   while (count) {
241     MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status);
242     /* unpack receives into our local space */
243     values = aij->rvalues + 3*imdex*aij->rmax;
244     MPI_Get_count(&recv_status,MPI_SCALAR,&n);
245     n = n/3;
246     for ( i=0; i<n; i++ ) {
247       row = (int) PETSCREAL(values[3*i]) - aij->rstart;
248       col = (int) PETSCREAL(values[3*i+1]);
249       val = values[3*i+2];
250       if (col >= aij->cstart && col < aij->cend) {
251           col -= aij->cstart;
252         MatSetValues(aij->A,1,&row,1,&col,&val,addv);
253       }
254       else {
255         if (aij->assembled) {
256           if (!aij->colmap) {ierr = CreateColmap(mat); CHKERR(ierr);}
257           col = aij->colmap[col] - 1;
258           if (col < 0) {
259             SETERR(1,"Cannot insert new off diagonal block nonzero in\
260                      already\
261                      assembled matrix. Contact petsc-maint@mcs.anl.gov\
262                      if your need this feature");
263           }
264         }
265         MatSetValues(aij->B,1,&row,1,&col,&val,addv);
266       }
267     }
268     count--;
269   }
270   FREE(aij->recv_waits); FREE(aij->rvalues);
271 
272   /* wait on sends */
273   if (aij->nsends) {
274     send_status = (MPI_Status *) MALLOC( aij->nsends*sizeof(MPI_Status) );
275     CHKPTR(send_status);
276     MPI_Waitall(aij->nsends,aij->send_waits,send_status);
277     FREE(send_status);
278   }
279   FREE(aij->send_waits); FREE(aij->svalues);
280 
281   aij->insertmode = NotSetValues;
282   ierr = MatBeginAssembly(aij->A); CHKERR(ierr);
283   ierr = MatEndAssembly(aij->A); CHKERR(ierr);
284 
285   if (!aij->assembled) {
286     ierr = MPIAIJSetUpMultiply(mat); CHKERR(ierr);
287   }
288   ierr = MatBeginAssembly(aij->B); CHKERR(ierr);
289   ierr = MatEndAssembly(aij->B); CHKERR(ierr);
290 
291   aij->assembled = 1;
292   return 0;
293 }
294 
295 static int MatiZero(Mat A)
296 {
297   Matimpiaij *l = (Matimpiaij *) A->data;
298 
299   MatZeroEntries(l->A); MatZeroEntries(l->B);
300   return 0;
301 }
302 
303 /* again this uses the same basic stratagy as in the assembly and
304    scatter create routines, we should try to do it systemamatically
305    if we can figure out the proper level of generality. */
306 
307 /* the code does not do the diagonal entries correctly unless the
308    matrix is square and the column and row owerships are identical.
309    This is a BUG. The only way to fix it seems to be to access
310    aij->A and aij->B directly and not through the MatZeroRows()
311    routine.
312 */
313 static int MatiZerorows(Mat A,IS is,Scalar *diag)
314 {
315   Matimpiaij     *l = (Matimpiaij *) A->data;
316   int            i,ierr,N, *rows,*owners = l->rowners,numtids = l->numtids;
317   int            *procs,*nprocs,j,found,idx,nsends,*work;
318   int            nmax,*svalues,*starts,*owner,nrecvs,mytid = l->mytid;
319   int            *rvalues,tag = 67,count,base,slen,n,*source;
320   int            *lens,imdex,*lrows,*values;
321   MPI_Comm       comm = A->comm;
322   MPI_Request    *send_waits,*recv_waits;
323   MPI_Status     recv_status,*send_status;
324   IS             istmp;
325 
326   if (!l->assembled) SETERR(1,"MatiZerorows: must assmble matrix first");
327   ierr = ISGetLocalSize(is,&N); CHKERR(ierr);
328   ierr = ISGetIndices(is,&rows); CHKERR(ierr);
329 
330   /*  first count number of contributors to each processor */
331   nprocs = (int *) MALLOC( 2*numtids*sizeof(int) ); CHKPTR(nprocs);
332   MEMSET(nprocs,0,2*numtids*sizeof(int)); procs = nprocs + numtids;
333   owner = (int *) MALLOC((N+1)*sizeof(int)); CHKPTR(owner); /* see note*/
334   for ( i=0; i<N; i++ ) {
335     idx = rows[i];
336     found = 0;
337     for ( j=0; j<numtids; j++ ) {
338       if (idx >= owners[j] && idx < owners[j+1]) {
339         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
340       }
341     }
342     if (!found) SETERR(1,"Imdex out of range");
343   }
344   nsends = 0;  for ( i=0; i<numtids; i++ ) { nsends += procs[i];}
345 
346   /* inform other processors of number of messages and max length*/
347   work = (int *) MALLOC( numtids*sizeof(int) ); CHKPTR(work);
348   MPI_Allreduce((void *) procs,(void *) work,numtids,MPI_INT,MPI_SUM,comm);
349   nrecvs = work[mytid];
350   MPI_Allreduce((void *) nprocs,(void *) work,numtids,MPI_INT,MPI_MAX,comm);
351   nmax = work[mytid];
352   FREE(work);
353 
354   /* post receives:   */
355   rvalues = (int *) MALLOC((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */
356   CHKPTR(rvalues);
357   recv_waits = (MPI_Request *) MALLOC((nrecvs+1)*sizeof(MPI_Request));
358   CHKPTR(recv_waits);
359   for ( i=0; i<nrecvs; i++ ) {
360     MPI_Irecv((void *)(rvalues+nmax*i),nmax,MPI_INT,MPI_ANY_SOURCE,tag,
361               comm,recv_waits+i);
362   }
363 
364   /* do sends:
365       1) starts[i] gives the starting index in svalues for stuff going to
366          the ith processor
367   */
368   svalues = (int *) MALLOC( (N+1)*sizeof(int) ); CHKPTR(svalues);
369   send_waits = (MPI_Request *) MALLOC( (nsends+1)*sizeof(MPI_Request));
370   CHKPTR(send_waits);
371   starts = (int *) MALLOC( (numtids+1)*sizeof(int) ); CHKPTR(starts);
372   starts[0] = 0;
373   for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
374   for ( i=0; i<N; i++ ) {
375     svalues[starts[owner[i]]++] = rows[i];
376   }
377   ISRestoreIndices(is,&rows);
378 
379   starts[0] = 0;
380   for ( i=1; i<numtids+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
381   count = 0;
382   for ( i=0; i<numtids; i++ ) {
383     if (procs[i]) {
384       MPI_Isend((void*)(svalues+starts[i]),nprocs[i],MPI_INT,i,tag,
385                 comm,send_waits+count++);
386     }
387   }
388   FREE(starts);
389 
390   base = owners[mytid];
391 
392   /*  wait on receives */
393   lens = (int *) MALLOC( 2*(nrecvs+1)*sizeof(int) ); CHKPTR(lens);
394   source = lens + nrecvs;
395   count = nrecvs; slen = 0;
396   while (count) {
397     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
398     /* unpack receives into our local space */
399     MPI_Get_count(&recv_status,MPI_INT,&n);
400     source[imdex]  = recv_status.MPI_SOURCE;
401     lens[imdex]  = n;
402     slen += n;
403     count--;
404   }
405   FREE(recv_waits);
406 
407   /* move the data into the send scatter */
408   lrows = (int *) MALLOC( slen*sizeof(int) ); CHKPTR(lrows);
409   count = 0;
410   for ( i=0; i<nrecvs; i++ ) {
411     values = rvalues + i*nmax;
412     for ( j=0; j<lens[i]; j++ ) {
413       lrows[count++] = values[j] - base;
414     }
415   }
416   FREE(rvalues); FREE(lens);
417   FREE(owner); FREE(nprocs);
418 
419   /* actually zap the local rows */
420   ierr = ISCreateSequential(slen,lrows,&istmp); CHKERR(ierr);  FREE(lrows);
421   ierr = MatZeroRows(l->A,istmp,diag); CHKERR(ierr);
422   ierr = MatZeroRows(l->B,istmp,0); CHKERR(ierr);
423   ierr = ISDestroy(istmp); CHKERR(ierr);
424 
425   /* wait on sends */
426   if (nsends) {
427     send_status = (MPI_Status *) MALLOC( nsends*sizeof(MPI_Status) );
428     CHKPTR(send_status);
429     MPI_Waitall(nsends,send_waits,send_status);
430     FREE(send_status);
431   }
432   FREE(send_waits); FREE(svalues);
433 
434 
435   return 0;
436 }
437 
438 static int MatiAIJMult(Mat aijin,Vec xx,Vec yy)
439 {
440   Matimpiaij *aij = (Matimpiaij *) aijin->data;
441   int        ierr;
442   if (!aij->assembled) SETERR(1,"MatiAIJMul: must assmble matrix first");
443   ierr = VecScatterBegin(xx,0,aij->lvec,0,InsertValues,ScatterAll,aij->Mvctx);
444   CHKERR(ierr);
445   ierr = MatMult(aij->A,xx,yy); CHKERR(ierr);
446   ierr = VecScatterEnd(xx,0,aij->lvec,0,InsertValues,ScatterAll,aij->Mvctx);
447   CHKERR(ierr);
448   ierr = MatMultAdd(aij->B,aij->lvec,yy,yy); CHKERR(ierr);
449   return 0;
450 }
451 
452 static int MatiAIJMultadd(Mat aijin,Vec xx,Vec yy,Vec zz)
453 {
454   Matimpiaij *aij = (Matimpiaij *) aijin->data;
455   int        ierr;
456   if (!aij->assembled) SETERR(1,"MatiAIJMul: must assmble matrix first");
457   ierr = VecScatterBegin(xx,0,aij->lvec,0,InsertValues,ScatterAll,aij->Mvctx);
458   CHKERR(ierr);
459   ierr = MatMultAdd(aij->A,xx,yy,zz); CHKERR(ierr);
460   ierr = VecScatterEnd(xx,0,aij->lvec,0,InsertValues,ScatterAll,aij->Mvctx);
461   CHKERR(ierr);
462   ierr = MatMultAdd(aij->B,aij->lvec,zz,zz); CHKERR(ierr);
463   return 0;
464 }
465 
466 static int MatiAIJMultTrans(Mat aijin,Vec xx,Vec yy)
467 {
468   Matimpiaij *aij = (Matimpiaij *) aijin->data;
469   int        ierr;
470 
471   if (!aij->assembled) SETERR(1,"MatiAIJMulTrans: must assmble matrix first");
472   /* do nondiagonal part */
473   ierr = MatMultTrans(aij->B,xx,aij->lvec); CHKERR(ierr);
474   /* send it on its way */
475   ierr = VecScatterBegin(aij->lvec,0,yy,0,AddValues,
476                          ScatterAll|ScatterReverse,aij->Mvctx); CHKERR(ierr);
477   /* do local part */
478   ierr = MatMultTrans(aij->A,xx,yy); CHKERR(ierr);
479   /* receive remote parts: note this assumes the values are not actually */
480   /* inserted in yy until the next line, which is true for my implementation*/
481   /* but is not perhaps always true. */
482   ierr = VecScatterEnd(aij->lvec,0,yy,0,AddValues,ScatterAll|ScatterReverse,
483                          aij->Mvctx); CHKERR(ierr);
484   return 0;
485 }
486 
487 static int MatiAIJMultTransadd(Mat aijin,Vec xx,Vec yy,Vec zz)
488 {
489   Matimpiaij *aij = (Matimpiaij *) aijin->data;
490   int        ierr;
491 
492   if (!aij->assembled) SETERR(1,"MatiAIJMulTrans: must assmble matrix first");
493   /* do nondiagonal part */
494   ierr = MatMultTrans(aij->B,xx,aij->lvec); CHKERR(ierr);
495   /* send it on its way */
496   ierr = VecScatterBegin(aij->lvec,0,zz,0,AddValues,
497                          ScatterAll|ScatterReverse,aij->Mvctx); CHKERR(ierr);
498   /* do local part */
499   ierr = MatMultTransAdd(aij->A,xx,yy,zz); CHKERR(ierr);
500   /* receive remote parts: note this assumes the values are not actually */
501   /* inserted in yy until the next line, which is true for my implementation*/
502   /* but is not perhaps always true. */
503   ierr = VecScatterEnd(aij->lvec,0,zz,0,AddValues,ScatterAll|ScatterReverse,
504                          aij->Mvctx); CHKERR(ierr);
505   return 0;
506 }
507 
508 /*
509   This only works correctly for square matrices where the subblock A->A is the
510    diagonal block
511 */
512 static int MatiAIJgetdiag(Mat Ain,Vec v)
513 {
514   Matimpiaij *A = (Matimpiaij *) Ain->data;
515   if (!A->assembled) SETERR(1,"MatiAIJgetdiag: must assmble matrix first");
516   return MatGetDiagonal(A->A,v);
517 }
518 
519 static int MatiAIJdestroy(PetscObject obj)
520 {
521   Mat        mat = (Mat) obj;
522   Matimpiaij *aij = (Matimpiaij *) mat->data;
523   int        ierr;
524   FREE(aij->rowners);
525   ierr = MatDestroy(aij->A); CHKERR(ierr);
526   ierr = MatDestroy(aij->B); CHKERR(ierr);
527   if (aij->colmap) FREE(aij->colmap);
528   if (aij->garray) FREE(aij->garray);
529   if (aij->lvec) VecDestroy(aij->lvec);
530   if (aij->Mvctx) VecScatterCtxDestroy(aij->Mvctx);
531   FREE(aij); PETSCHEADERDESTROY(mat);
532   return 0;
533 }
534 
535 static int MatiView(PetscObject obj,Viewer viewer)
536 {
537   Mat        mat = (Mat) obj;
538   Matimpiaij *aij = (Matimpiaij *) mat->data;
539   int        ierr;
540 
541   if (!aij->assembled) SETERR(1,"MatiAIJMulTrans: must assmble matrix first");
542   MPE_Seq_begin(mat->comm,1);
543     printf("[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
544           aij->mytid,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart,
545           aij->cend);
546     ierr = MatView(aij->A,0); CHKERR(ierr);
547     ierr = MatView(aij->B,0); CHKERR(ierr);
548     fflush(stdout);
549   MPE_Seq_end(mat->comm,1);
550   return 0;
551 }
552 
553 extern int MatiAIJmarkdiag(Matiaij  *);
554 /*
555     This has to provide several versions.
556 
557      1) per sequential
558      2) a) use only local smoothing updating outer values only once.
559         b) local smoothing updating outer values each inner iteration
560      3) color updating out values betwen colors.
561 */
562 static int MatiAIJrelax(Mat matin,Vec bb,double omega,int flag,double shift,
563                         int its,Vec xx)
564 {
565   Matimpiaij *mat = (Matimpiaij *) matin->data;
566   Mat        AA = mat->A, BB = mat->B;
567   Matiaij    *A = (Matiaij *) AA->data, *B = (Matiaij *)BB->data;
568   Scalar     zero = 0.0,*b,*x,*xs,*ls,d,*v,sum,scale,*t,*ts;
569   int        ierr,*idx, *diag;
570   int        n = mat->n, m = mat->m, i;
571   Vec        tt;
572 
573   if (!mat->assembled) SETERR(1,"MatiAIJRelax: must assmble matrix first");
574 
575   VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls);
576   xs = x -1; /* shift by one for index start of 1 */
577   ls--;
578   if (!A->diag) {if ((ierr = MatiAIJmarkdiag(A))) return ierr;}
579   diag = A->diag;
580   if (flag == SOR_APPLY_UPPER || flag == SOR_APPLY_LOWER) {
581     SETERR(1,"That option not yet support for parallel AIJ matrices");
582   }
583   if (flag & SOR_EISENSTAT) {
584     /* Let  A = L + U + D; where L is lower trianglar,
585     U is upper triangular, E is diagonal; This routine applies
586 
587             (L + E)^{-1} A (U + E)^{-1}
588 
589     to a vector efficiently using Eisenstat's trick. This is for
590     the case of SSOR preconditioner, so E is D/omega where omega
591     is the relaxation factor.
592     */
593     ierr = VecCreate(xx,&tt); CHKERR(ierr);
594     VecGetArray(tt,&t);
595     scale = (2.0/omega) - 1.0;
596     /*  x = (E + U)^{-1} b */
597     VecSet(&zero,mat->lvec);
598     ierr = VecPipelineBegin(xx,0,mat->lvec,0,InsertValues,PipelineUp,
599                               mat->Mvctx); CHKERR(ierr);
600     for ( i=m-1; i>-1; i-- ) {
601       n    = A->i[i+1] - diag[i] - 1;
602       idx  = A->j + diag[i];
603       v    = A->a + diag[i];
604       sum  = b[i];
605       SPARSEDENSEMDOT(sum,xs,v,idx,n);
606       d    = shift + A->a[diag[i]-1];
607       n    = B->i[i+1] - B->i[i];
608       idx  = B->j + B->i[i] - 1;
609       v    = B->a + B->i[i] - 1;
610       SPARSEDENSEMDOT(sum,ls,v,idx,n);
611       x[i] = omega*(sum/d);
612     }
613     ierr = VecPipelineEnd(xx,0,mat->lvec,0,InsertValues,PipelineUp,
614                             mat->Mvctx); CHKERR(ierr);
615 
616     /*  t = b - (2*E - D)x */
617     v = A->a;
618     for ( i=0; i<m; i++ ) { t[i] = b[i] - scale*(v[*diag++ - 1])*x[i]; }
619 
620     /*  t = (E + L)^{-1}t */
621     ts = t - 1; /* shifted by one for index start of a or mat->j*/
622     diag = A->diag;
623     VecSet(&zero,mat->lvec);
624     ierr = VecPipelineBegin(tt,0,mat->lvec,0,InsertValues,PipelineDown,
625                                                  mat->Mvctx); CHKERR(ierr);
626     for ( i=0; i<m; i++ ) {
627       n    = diag[i] - A->i[i];
628       idx  = A->j + A->i[i] - 1;
629       v    = A->a + A->i[i] - 1;
630       sum  = t[i];
631       SPARSEDENSEMDOT(sum,ts,v,idx,n);
632       d    = shift + A->a[diag[i]-1];
633       n    = B->i[i+1] - B->i[i];
634       idx  = B->j + B->i[i] - 1;
635       v    = B->a + B->i[i] - 1;
636       SPARSEDENSEMDOT(sum,ls,v,idx,n);
637       t[i] = omega*(sum/d);
638     }
639     ierr = VecPipelineEnd(tt,0,mat->lvec,0,InsertValues,PipelineDown,
640                                                     mat->Mvctx); CHKERR(ierr);
641     /*  x = x + t */
642     for ( i=0; i<m; i++ ) { x[i] += t[i]; }
643     VecDestroy(tt);
644     return 0;
645   }
646 
647 
648   if ((flag & SOR_SYMMETRIC_SWEEP) == SOR_SYMMETRIC_SWEEP){
649     if (flag & SOR_ZERO_INITIAL_GUESS) {
650       VecSet(&zero,mat->lvec); VecSet(&zero,xx);
651     }
652     else {
653       ierr=VecScatterBegin(xx,0,mat->lvec,0,InsertValues,ScatterUp,mat->Mvctx);
654       CHKERR(ierr);
655       ierr = VecScatterEnd(xx,0,mat->lvec,0,InsertValues,ScatterUp,mat->Mvctx);
656       CHKERR(ierr);
657     }
658     while (its--) {
659       /* go down through the rows */
660       ierr = VecPipelineBegin(xx,0,mat->lvec,0,InsertValues,PipelineDown,
661                               mat->Mvctx); CHKERR(ierr);
662       for ( i=0; i<m; i++ ) {
663         n    = A->i[i+1] - A->i[i];
664         idx  = A->j + A->i[i] - 1;
665         v    = A->a + A->i[i] - 1;
666         sum  = b[i];
667         SPARSEDENSEMDOT(sum,xs,v,idx,n);
668         d    = shift + A->a[diag[i]-1];
669         n    = B->i[i+1] - B->i[i];
670         idx  = B->j + B->i[i] - 1;
671         v    = B->a + B->i[i] - 1;
672         SPARSEDENSEMDOT(sum,ls,v,idx,n);
673         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
674       }
675       ierr = VecPipelineEnd(xx,0,mat->lvec,0,InsertValues,PipelineDown,
676                             mat->Mvctx); CHKERR(ierr);
677       /* come up through the rows */
678       ierr = VecPipelineBegin(xx,0,mat->lvec,0,InsertValues,PipelineUp,
679                               mat->Mvctx); CHKERR(ierr);
680       for ( i=m-1; i>-1; i-- ) {
681         n    = A->i[i+1] - A->i[i];
682         idx  = A->j + A->i[i] - 1;
683         v    = A->a + A->i[i] - 1;
684         sum  = b[i];
685         SPARSEDENSEMDOT(sum,xs,v,idx,n);
686         d    = shift + A->a[diag[i]-1];
687         n    = B->i[i+1] - B->i[i];
688         idx  = B->j + B->i[i] - 1;
689         v    = B->a + B->i[i] - 1;
690         SPARSEDENSEMDOT(sum,ls,v,idx,n);
691         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
692       }
693       ierr = VecPipelineEnd(xx,0,mat->lvec,0,InsertValues,PipelineUp,
694                             mat->Mvctx); CHKERR(ierr);
695     }
696   }
697   else if (flag & SOR_FORWARD_SWEEP){
698     if (flag & SOR_ZERO_INITIAL_GUESS) {
699       VecSet(&zero,mat->lvec);
700       ierr = VecPipelineBegin(xx,0,mat->lvec,0,InsertValues,PipelineDown,
701                               mat->Mvctx); CHKERR(ierr);
702       for ( i=0; i<m; i++ ) {
703         n    = diag[i] - A->i[i];
704         idx  = A->j + A->i[i] - 1;
705         v    = A->a + A->i[i] - 1;
706         sum  = b[i];
707         SPARSEDENSEMDOT(sum,xs,v,idx,n);
708         d    = shift + A->a[diag[i]-1];
709         n    = B->i[i+1] - B->i[i];
710         idx  = B->j + B->i[i] - 1;
711         v    = B->a + B->i[i] - 1;
712         SPARSEDENSEMDOT(sum,ls,v,idx,n);
713         x[i] = omega*(sum/d);
714       }
715       ierr = VecPipelineEnd(xx,0,mat->lvec,0,InsertValues,PipelineDown,
716                             mat->Mvctx); CHKERR(ierr);
717       its--;
718     }
719     while (its--) {
720       ierr=VecScatterBegin(xx,0,mat->lvec,0,InsertValues,ScatterUp,mat->Mvctx);
721       CHKERR(ierr);
722       ierr = VecScatterEnd(xx,0,mat->lvec,0,InsertValues,ScatterUp,mat->Mvctx);
723       CHKERR(ierr);
724       ierr = VecPipelineBegin(xx,0,mat->lvec,0,InsertValues,PipelineDown,
725                               mat->Mvctx); CHKERR(ierr);
726       for ( i=0; i<m; i++ ) {
727         n    = A->i[i+1] - A->i[i];
728         idx  = A->j + A->i[i] - 1;
729         v    = A->a + A->i[i] - 1;
730         sum  = b[i];
731         SPARSEDENSEMDOT(sum,xs,v,idx,n);
732         d    = shift + A->a[diag[i]-1];
733         n    = B->i[i+1] - B->i[i];
734         idx  = B->j + B->i[i] - 1;
735         v    = B->a + B->i[i] - 1;
736         SPARSEDENSEMDOT(sum,ls,v,idx,n);
737         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
738       }
739       ierr = VecPipelineEnd(xx,0,mat->lvec,0,InsertValues,PipelineDown,
740                             mat->Mvctx); CHKERR(ierr);
741     }
742   }
743   else if (flag & SOR_BACKWARD_SWEEP){
744     if (flag & SOR_ZERO_INITIAL_GUESS) {
745       VecSet(&zero,mat->lvec);
746       ierr = VecPipelineBegin(xx,0,mat->lvec,0,InsertValues,PipelineUp,
747                               mat->Mvctx); CHKERR(ierr);
748       for ( i=m-1; i>-1; i-- ) {
749         n    = A->i[i+1] - diag[i] - 1;
750         idx  = A->j + diag[i];
751         v    = A->a + diag[i];
752         sum  = b[i];
753         SPARSEDENSEMDOT(sum,xs,v,idx,n);
754         d    = shift + A->a[diag[i]-1];
755         n    = B->i[i+1] - B->i[i];
756         idx  = B->j + B->i[i] - 1;
757         v    = B->a + B->i[i] - 1;
758         SPARSEDENSEMDOT(sum,ls,v,idx,n);
759         x[i] = omega*(sum/d);
760       }
761       ierr = VecPipelineEnd(xx,0,mat->lvec,0,InsertValues,PipelineUp,
762                             mat->Mvctx); CHKERR(ierr);
763       its--;
764     }
765     while (its--) {
766       ierr = VecScatterBegin(xx,0,mat->lvec,0,InsertValues,ScatterDown,
767                             mat->Mvctx); CHKERR(ierr);
768       ierr = VecScatterEnd(xx,0,mat->lvec,0,InsertValues,ScatterDown,
769                             mat->Mvctx); CHKERR(ierr);
770       ierr = VecPipelineBegin(xx,0,mat->lvec,0,InsertValues,PipelineUp,
771                               mat->Mvctx); CHKERR(ierr);
772       for ( i=m-1; i>-1; i-- ) {
773         n    = A->i[i+1] - A->i[i];
774         idx  = A->j + A->i[i] - 1;
775         v    = A->a + A->i[i] - 1;
776         sum  = b[i];
777         SPARSEDENSEMDOT(sum,xs,v,idx,n);
778         d    = shift + A->a[diag[i]-1];
779         n    = B->i[i+1] - B->i[i];
780         idx  = B->j + B->i[i] - 1;
781         v    = B->a + B->i[i] - 1;
782         SPARSEDENSEMDOT(sum,ls,v,idx,n);
783         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
784       }
785       ierr = VecPipelineEnd(xx,0,mat->lvec,0,InsertValues,PipelineUp,
786                             mat->Mvctx); CHKERR(ierr);
787     }
788   }
789   else if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
790     if (flag & SOR_ZERO_INITIAL_GUESS) {
791       return MatRelax(mat->A,bb,omega,flag,shift,its,xx);
792     }
793     ierr=VecScatterBegin(xx,0,mat->lvec,0,InsertValues,ScatterAll,mat->Mvctx);
794     CHKERR(ierr);
795     ierr = VecScatterEnd(xx,0,mat->lvec,0,InsertValues,ScatterAll,mat->Mvctx);
796     CHKERR(ierr);
797     while (its--) {
798       /* go down through the rows */
799       for ( i=0; i<m; i++ ) {
800         n    = A->i[i+1] - A->i[i];
801         idx  = A->j + A->i[i] - 1;
802         v    = A->a + A->i[i] - 1;
803         sum  = b[i];
804         SPARSEDENSEMDOT(sum,xs,v,idx,n);
805         d    = shift + A->a[diag[i]-1];
806         n    = B->i[i+1] - B->i[i];
807         idx  = B->j + B->i[i] - 1;
808         v    = B->a + B->i[i] - 1;
809         SPARSEDENSEMDOT(sum,ls,v,idx,n);
810         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
811       }
812       /* come up through the rows */
813       for ( i=m-1; i>-1; i-- ) {
814         n    = A->i[i+1] - A->i[i];
815         idx  = A->j + A->i[i] - 1;
816         v    = A->a + A->i[i] - 1;
817         sum  = b[i];
818         SPARSEDENSEMDOT(sum,xs,v,idx,n);
819         d    = shift + A->a[diag[i]-1];
820         n    = B->i[i+1] - B->i[i];
821         idx  = B->j + B->i[i] - 1;
822         v    = B->a + B->i[i] - 1;
823         SPARSEDENSEMDOT(sum,ls,v,idx,n);
824         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
825       }
826     }
827   }
828   else if (flag & SOR_LOCAL_FORWARD_SWEEP){
829     if (flag & SOR_ZERO_INITIAL_GUESS) {
830       return MatRelax(mat->A,bb,omega,flag,shift,its,xx);
831     }
832     ierr=VecScatterBegin(xx,0,mat->lvec,0,InsertValues,ScatterAll,mat->Mvctx);
833     CHKERR(ierr);
834     ierr = VecScatterEnd(xx,0,mat->lvec,0,InsertValues,ScatterAll,mat->Mvctx);
835     CHKERR(ierr);
836     while (its--) {
837       for ( i=0; i<m; i++ ) {
838         n    = A->i[i+1] - A->i[i];
839         idx  = A->j + A->i[i] - 1;
840         v    = A->a + A->i[i] - 1;
841         sum  = b[i];
842         SPARSEDENSEMDOT(sum,xs,v,idx,n);
843         d    = shift + A->a[diag[i]-1];
844         n    = B->i[i+1] - B->i[i];
845         idx  = B->j + B->i[i] - 1;
846         v    = B->a + B->i[i] - 1;
847         SPARSEDENSEMDOT(sum,ls,v,idx,n);
848         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
849       }
850     }
851   }
852   else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
853     if (flag & SOR_ZERO_INITIAL_GUESS) {
854       return MatRelax(mat->A,bb,omega,flag,shift,its,xx);
855     }
856     ierr = VecScatterBegin(xx,0,mat->lvec,0,InsertValues,ScatterAll,
857                             mat->Mvctx); CHKERR(ierr);
858     ierr = VecScatterEnd(xx,0,mat->lvec,0,InsertValues,ScatterAll,
859                             mat->Mvctx); CHKERR(ierr);
860     while (its--) {
861       for ( i=m-1; i>-1; i-- ) {
862         n    = A->i[i+1] - A->i[i];
863         idx  = A->j + A->i[i] - 1;
864         v    = A->a + A->i[i] - 1;
865         sum  = b[i];
866         SPARSEDENSEMDOT(sum,xs,v,idx,n);
867         d    = shift + A->a[diag[i]-1];
868         n    = B->i[i+1] - B->i[i];
869         idx  = B->j + B->i[i] - 1;
870         v    = B->a + B->i[i] - 1;
871         SPARSEDENSEMDOT(sum,ls,v,idx,n);
872         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
873       }
874     }
875   }
876   return 0;
877 }
878 static int MatiAIJinsopt(Mat aijin,int op)
879 {
880   Matimpiaij *aij = (Matimpiaij *) aijin->data;
881 
882   if      (op == NO_NEW_NONZERO_LOCATIONS)  {
883     MatSetOption(aij->A,op);
884     MatSetOption(aij->B,op);
885   }
886   else if (op == YES_NEW_NONZERO_LOCATIONS) {
887     MatSetOption(aij->A,op);
888     MatSetOption(aij->B,op);
889   }
890   else if (op == COLUMN_ORIENTED) SETERR(1,"Column oriented not supported");
891   return 0;
892 }
893 
894 static int MatiAIJsize(Mat matin,int *m,int *n)
895 {
896   Matimpiaij *mat = (Matimpiaij *) matin->data;
897   *m = mat->M; *n = mat->N;
898   return 0;
899 }
900 
901 static int MatiAIJlocalsize(Mat matin,int *m,int *n)
902 {
903   Matimpiaij *mat = (Matimpiaij *) matin->data;
904   *m = mat->m; *n = mat->n;
905   return 0;
906 }
907 
908 static int MatiAIJrange(Mat matin,int *m,int *n)
909 {
910   Matimpiaij *mat = (Matimpiaij *) matin->data;
911   *m = mat->rstart; *n = mat->rend;
912   return 0;
913 }
914 
915 static int MatiCopy(Mat,Mat *);
916 
917 /* -------------------------------------------------------------------*/
918 static struct _MatOps MatOps = {MatiAIJInsertValues,
919        0, 0,
920        MatiAIJMult,MatiAIJMultadd,MatiAIJMultTrans,MatiAIJMultTransadd,
921        0,0,0,0,
922        0,0,
923        MatiAIJrelax,
924        0,
925        0,0,0,
926        MatiCopy,
927        MatiAIJgetdiag,0,0,
928        MatiAIJBeginAssemble,MatiAIJEndAssemble,
929        0,
930        MatiAIJinsopt,MatiZero,MatiZerorows,0,
931        0,0,0,0,
932        MatiAIJsize,MatiAIJlocalsize,MatiAIJrange };
933 
934 
935 
936 /*@
937 
938       MatCreateMPIAIJ - Creates a sparse parallel matrix
939                                  in AIJ format.
940 
941   Input Parameters:
942 .   comm - MPI communicator
943 .   m,n - number of local rows and columns (or -1 to have calculated)
944 .   M,N - global rows and columns (or -1 to have calculated)
945 .   d_nz - total number nonzeros in diagonal portion of matrix
946 .   d_nzz - number of nonzeros per row in diagonal portion of matrix or null
947 .           You must leave room for the diagonal entry even if it is zero.
948 .   o_nz - total number nonzeros in off-diagonal portion of matrix
949 .   o_nzz - number of nonzeros per row in off-diagonal portion of matrix
950 .           or null. You must have at least one nonzero per row.
951 
952   Output parameters:
953 .  newmat - the matrix
954 
955   Keywords: matrix, aij, compressed row, sparse, parallel
956 @*/
957 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,
958                  int d_nz,int *d_nnz, int o_nz,int *o_nnz,Mat *newmat)
959 {
960   Mat          mat;
961   Matimpiaij   *aij;
962   int          ierr, i,sum[2],work[2];
963   *newmat         = 0;
964   PETSCHEADERCREATE(mat,_Mat,MAT_COOKIE,MATAIJMPI,comm);
965   mat->data       = (void *) (aij = NEW(Matimpiaij)); CHKPTR(aij);
966   mat->ops        = &MatOps;
967   mat->destroy    = MatiAIJdestroy;
968   mat->view       = MatiView;
969   mat->factor     = 0;
970   mat->row        = 0;
971   mat->col        = 0;
972 
973   mat->comm       = comm;
974   aij->insertmode = NotSetValues;
975   MPI_Comm_rank(comm,&aij->mytid);
976   MPI_Comm_size(comm,&aij->numtids);
977 
978   if (M == -1 || N == -1) {
979     work[0] = m; work[1] = n;
980     MPI_Allreduce((void *) work,(void *) sum,2,MPI_INT,MPI_SUM,comm );
981     if (M == -1) M = sum[0];
982     if (N == -1) N = sum[1];
983   }
984   if (m == -1) {m = M/aij->numtids + ((M % aij->numtids) > aij->mytid);}
985   if (n == -1) {n = N/aij->numtids + ((N % aij->numtids) > aij->mytid);}
986   aij->m       = m;
987   aij->n       = n;
988   aij->N       = N;
989   aij->M       = M;
990 
991   /* build local table of row and column ownerships */
992   aij->rowners = (int *) MALLOC(2*(aij->numtids+2)*sizeof(int));
993   CHKPTR(aij->rowners);
994   aij->cowners = aij->rowners + aij->numtids + 1;
995   MPI_Allgather(&m,1,MPI_INT,aij->rowners+1,1,MPI_INT,comm);
996   aij->rowners[0] = 0;
997   for ( i=2; i<=aij->numtids; i++ ) {
998     aij->rowners[i] += aij->rowners[i-1];
999   }
1000   aij->rstart = aij->rowners[aij->mytid];
1001   aij->rend   = aij->rowners[aij->mytid+1];
1002   MPI_Allgather(&n,1,MPI_INT,aij->cowners+1,1,MPI_INT,comm);
1003   aij->cowners[0] = 0;
1004   for ( i=2; i<=aij->numtids; i++ ) {
1005     aij->cowners[i] += aij->cowners[i-1];
1006   }
1007   aij->cstart = aij->cowners[aij->mytid];
1008   aij->cend   = aij->cowners[aij->mytid+1];
1009 
1010 
1011   ierr = MatCreateSequentialAIJ(m,n,d_nz,d_nnz,&aij->A); CHKERR(ierr);
1012   ierr = MatCreateSequentialAIJ(m,N,o_nz,o_nnz,&aij->B); CHKERR(ierr);
1013 
1014   /* build cache for off array entries formed */
1015   aij->stash.nmax = CHUNCKSIZE; /* completely arbratray number */
1016   aij->stash.n    = 0;
1017   aij->stash.array = (Scalar *) MALLOC( aij->stash.nmax*(2*sizeof(int) +
1018                             sizeof(Scalar))); CHKPTR(aij->stash.array);
1019   aij->stash.idx = (int *) (aij->stash.array + aij->stash.nmax);
1020   aij->stash.idy = (int *) (aij->stash.idx + aij->stash.nmax);
1021   aij->colmap    = 0;
1022   aij->garray    = 0;
1023 
1024   /* stuff used for matrix vector multiply */
1025   aij->lvec      = 0;
1026   aij->Mvctx     = 0;
1027   aij->assembled = 0;
1028 
1029   *newmat = mat;
1030   return 0;
1031 }
1032 
1033 static int MatiCopy(Mat matin,Mat *newmat)
1034 {
1035   Mat        mat;
1036   Matimpiaij *aij,*oldmat = (Matimpiaij *) matin->data;
1037   int        ierr;
1038   *newmat      = 0;
1039 
1040   if (!oldmat->assembled) SETERR(1,"Cannot copy unassembled matrix");
1041   PETSCHEADERCREATE(mat,_Mat,MAT_COOKIE,MATAIJMPI,matin->comm);
1042   mat->data       = (void *) (aij = NEW(Matimpiaij)); CHKPTR(aij);
1043   mat->ops        = &MatOps;
1044   mat->destroy    = MatiAIJdestroy;
1045   mat->view       = MatiView;
1046   mat->factor     = matin->factor;
1047   mat->row        = 0;
1048   mat->col        = 0;
1049 
1050   aij->m          = oldmat->m;
1051   aij->n          = oldmat->n;
1052   aij->M          = oldmat->M;
1053   aij->N          = oldmat->N;
1054 
1055   aij->assembled  = 1;
1056   aij->rstart     = oldmat->rstart;
1057   aij->rend       = oldmat->rend;
1058   aij->cstart     = oldmat->cstart;
1059   aij->cend       = oldmat->cend;
1060   aij->numtids    = oldmat->numtids;
1061   aij->mytid      = oldmat->mytid;
1062   aij->insertmode = NotSetValues;
1063 
1064   aij->rowners    = (int *) MALLOC( (aij->numtids+1)*sizeof(int) );
1065   CHKPTR(aij->rowners);
1066   MEMCPY(aij->rowners,oldmat->rowners,(aij->numtids+1)*sizeof(int));
1067   aij->stash.nmax = 0;
1068   aij->stash.n    = 0;
1069   aij->stash.array= 0;
1070   aij->colmap     = 0;
1071   aij->garray     = 0;
1072   mat->comm       = matin->comm;
1073 
1074   ierr =  VecCreate(oldmat->lvec,&aij->lvec); CHKERR(ierr);
1075   ierr =  VecScatterCtxCopy(oldmat->Mvctx,&aij->Mvctx); CHKERR(ierr);
1076   ierr =  MatCopy(oldmat->A,&aij->A); CHKERR(ierr);
1077   ierr =  MatCopy(oldmat->B,&aij->B); CHKERR(ierr);
1078   *newmat = mat;
1079   return 0;
1080 }
1081