xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 0f5bd95cca42d693ada6d048329ab39533680180)
1 #ifdef PETSC_RCS_HEADER
2 static char vcid[] = "$Id: mpidense.c,v 1.127 1999/10/04 18:51:01 bsmith Exp bsmith $";
3 #endif
4 
5 /*
6    Basic functions for basic parallel dense matrices.
7 */
8 
9 #include "src/mat/impls/dense/mpi/mpidense.h"
10 #include "src/vec/vecimpl.h"
11 
12 EXTERN_C_BEGIN
13 #undef __FUNC__
14 #define __FUNC__ "MatGetDiagonalBlock_MPIDense"
15 int MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B)
16 {
17   Mat_MPIDense *mdn = (Mat_MPIDense *) A->data;
18   int          m = mdn->m,rstart = mdn->rstart,rank,ierr;
19   Scalar       *array;
20   MPI_Comm     comm;
21 
22   PetscFunctionBegin;
23   if (mdn->M != mdn->N) SETERRQ(PETSC_ERR_SUP,0,"Only square matrices supported.");
24 
25   /* The reuse aspect is not implemented efficiently */
26   if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);}
27 
28   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
29   ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
30   ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr);
31   ierr = MatCreateSeqDense(comm,m,m,array+m*rstart,B);CHKERRQ(ierr);
32   ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr);
33   ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
34   ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
35 
36   *iscopy = PETSC_TRUE;
37   PetscFunctionReturn(0);
38 }
39 EXTERN_C_END
40 
41 extern int MatSetUpMultiply_MPIDense(Mat);
42 
43 #undef __FUNC__
44 #define __FUNC__ "MatSetValues_MPIDense"
45 int MatSetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v,InsertMode addv)
46 {
47   Mat_MPIDense *A = (Mat_MPIDense *) mat->data;
48   int          ierr, i, j, rstart = A->rstart, rend = A->rend, row;
49   int          roworiented = A->roworiented;
50 
51   PetscFunctionBegin;
52   for ( i=0; i<m; i++ ) {
53     if (idxm[i] < 0) continue;
54     if (idxm[i] >= A->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
55     if (idxm[i] >= rstart && idxm[i] < rend) {
56       row = idxm[i] - rstart;
57       if (roworiented) {
58         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
59       } else {
60         for ( j=0; j<n; j++ ) {
61           if (idxn[j] < 0) continue;
62           if (idxn[j] >= A->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
63           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
64         }
65       }
66     } else {
67       if ( !A->donotstash) {
68         if (roworiented) {
69           ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr);
70         } else {
71           ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr);
72         }
73       }
74     }
75   }
76   PetscFunctionReturn(0);
77 }
78 
79 #undef __FUNC__
80 #define __FUNC__ "MatGetValues_MPIDense"
81 int MatGetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
82 {
83   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
84   int          ierr, i, j, rstart = mdn->rstart, rend = mdn->rend, row;
85 
86   PetscFunctionBegin;
87   for ( i=0; i<m; i++ ) {
88     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
89     if (idxm[i] >= mdn->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
90     if (idxm[i] >= rstart && idxm[i] < rend) {
91       row = idxm[i] - rstart;
92       for ( j=0; j<n; j++ ) {
93         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
94         if (idxn[j] >= mdn->N) {
95           SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
96         }
97         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
98       }
99     } else {
100       SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported");
101     }
102   }
103   PetscFunctionReturn(0);
104 }
105 
106 #undef __FUNC__
107 #define __FUNC__ "MatGetArray_MPIDense"
108 int MatGetArray_MPIDense(Mat A,Scalar **array)
109 {
110   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
111   int          ierr;
112 
113   PetscFunctionBegin;
114   ierr = MatGetArray(a->A,array);CHKERRQ(ierr);
115   PetscFunctionReturn(0);
116 }
117 
118 #undef __FUNC__
119 #define __FUNC__ "MatGetSubMatrix_MPIDense"
120 static int MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B)
121 {
122   Mat_MPIDense *mat = (Mat_MPIDense *) A->data, *newmatd;
123   Mat_SeqDense *lmat = (Mat_SeqDense *) mat->A->data;
124   int          i, j, ierr, *irow, *icol, rstart, rend, nrows, ncols, nlrows, nlcols, rank;
125   Scalar       *av, *bv, *v = lmat->v;
126   Mat          newmat;
127 
128   PetscFunctionBegin;
129   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
130   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
131   ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr);
132   ierr = ISGetSize(isrow,&nrows);CHKERRQ(ierr);
133   ierr = ISGetSize(iscol,&ncols);CHKERRQ(ierr);
134 
135   /* No parallel redistribution currently supported! Should really check each index set
136      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
137      original matrix! */
138 
139   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
140   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
141 
142   /* Check submatrix call */
143   if (scall == MAT_REUSE_MATRIX) {
144     /* SETERRQ(PETSC_ERR_ARG_SIZ,0,"Reused submatrix wrong size"); */
145     /* Really need to test rows and column sizes! */
146     newmat = *B;
147   } else {
148     /* Create and fill new matrix */
149     ierr = MatCreateMPIDense(A->comm,nrows,ncols,PETSC_DECIDE,PETSC_DECIDE,PETSC_NULL,&newmat);CHKERRQ(ierr);
150   }
151 
152   /* Now extract the data pointers and do the copy, column at a time */
153   newmatd = (Mat_MPIDense *) newmat->data;
154   bv = ((Mat_SeqDense *)newmatd->A->data)->v;
155 
156   for ( i=0; i<ncols; i++ ) {
157     av = v + nlrows*icol[i];
158     for (j=0; j<nrows; j++ ) {
159       *bv++ = av[irow[j] - rstart];
160     }
161   }
162 
163   /* Assemble the matrices so that the correct flags are set */
164   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
165   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
166 
167   /* Free work space */
168   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
169   ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr);
170   *B = newmat;
171   PetscFunctionReturn(0);
172 }
173 
174 #undef __FUNC__
175 #define __FUNC__ "MatRestoreArray_MPIDense"
176 int MatRestoreArray_MPIDense(Mat A,Scalar **array)
177 {
178   PetscFunctionBegin;
179   PetscFunctionReturn(0);
180 }
181 
182 #undef __FUNC__
183 #define __FUNC__ "MatAssemblyBegin_MPIDense"
184 int MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
185 {
186   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
187   MPI_Comm     comm = mat->comm;
188   int          ierr,nstash,reallocs;
189   InsertMode   addv;
190 
191   PetscFunctionBegin;
192   /* make sure all processors are either in INSERTMODE or ADDMODE */
193   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr);
194   if (addv == (ADD_VALUES|INSERT_VALUES)) {
195     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Cannot mix adds/inserts on different procs");
196   }
197   mat->insertmode = addv; /* in case this processor had no cache */
198 
199   ierr = MatStashScatterBegin_Private(&mat->stash,mdn->rowners);CHKERRQ(ierr);
200   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
201   PLogInfo(mdn->A,"MatAssemblyBegin_MPIDense:Stash has %d entries, uses %d mallocs.\n",
202            nstash,reallocs);
203   PetscFunctionReturn(0);
204 }
205 
206 #undef __FUNC__
207 #define __FUNC__ "MatAssemblyEnd_MPIDense"
208 int MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
209 {
210   Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data;
211   int          i,n,ierr,*row,*col,flg,j,rstart,ncols;
212   Scalar       *val;
213   InsertMode   addv=mat->insertmode;
214 
215   PetscFunctionBegin;
216   /*  wait on receives */
217   while (1) {
218     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
219     if (!flg) break;
220 
221     for ( i=0; i<n; ) {
222       /* Now identify the consecutive vals belonging to the same row */
223       for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; }
224       if (j < n) ncols = j-i;
225       else       ncols = n-i;
226       /* Now assemble all these values with a single function call */
227       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
228       i = j;
229     }
230   }
231   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
232 
233   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
234   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
235 
236   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
237     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
238   }
239   PetscFunctionReturn(0);
240 }
241 
242 #undef __FUNC__
243 #define __FUNC__ "MatZeroEntries_MPIDense"
244 int MatZeroEntries_MPIDense(Mat A)
245 {
246   int          ierr;
247   Mat_MPIDense *l = (Mat_MPIDense *) A->data;
248 
249   PetscFunctionBegin;
250   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
251   PetscFunctionReturn(0);
252 }
253 
254 #undef __FUNC__
255 #define __FUNC__ "MatGetBlockSize_MPIDense"
256 int MatGetBlockSize_MPIDense(Mat A,int *bs)
257 {
258   PetscFunctionBegin;
259   *bs = 1;
260   PetscFunctionReturn(0);
261 }
262 
263 /* the code does not do the diagonal entries correctly unless the
264    matrix is square and the column and row owerships are identical.
265    This is a BUG. The only way to fix it seems to be to access
266    mdn->A and mdn->B directly and not through the MatZeroRows()
267    routine.
268 */
269 #undef __FUNC__
270 #define __FUNC__ "MatZeroRows_MPIDense"
271 int MatZeroRows_MPIDense(Mat A,IS is,Scalar *diag)
272 {
273   Mat_MPIDense   *l = (Mat_MPIDense *) A->data;
274   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
275   int            *procs,*nprocs,j,found,idx,nsends,*work;
276   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
277   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
278   int            *lens,imdex,*lrows,*values;
279   MPI_Comm       comm = A->comm;
280   MPI_Request    *send_waits,*recv_waits;
281   MPI_Status     recv_status,*send_status;
282   IS             istmp;
283 
284   PetscFunctionBegin;
285   ierr = ISGetSize(is,&N);CHKERRQ(ierr);
286   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
287 
288   /*  first count number of contributors to each processor */
289   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) );CHKPTRQ(nprocs);
290   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
291   procs  = nprocs + size;
292   owner  = (int *) PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/
293   for ( i=0; i<N; i++ ) {
294     idx = rows[i];
295     found = 0;
296     for ( j=0; j<size; j++ ) {
297       if (idx >= owners[j] && idx < owners[j+1]) {
298         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
299       }
300     }
301     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range");
302   }
303   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
304 
305   /* inform other processors of number of messages and max length*/
306   work   = (int *) PetscMalloc( size*sizeof(int) );CHKPTRQ(work);
307   ierr   = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
308   nrecvs = work[rank];
309   ierr   = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
310   nmax   = work[rank];
311   ierr = PetscFree(work);CHKERRQ(ierr);
312 
313   /* post receives:   */
314   rvalues    = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues);
315   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
316   for ( i=0; i<nrecvs; i++ ) {
317     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
318   }
319 
320   /* do sends:
321       1) starts[i] gives the starting index in svalues for stuff going to
322          the ith processor
323   */
324   svalues    = (int *) PetscMalloc( (N+1)*sizeof(int) );CHKPTRQ(svalues);
325   send_waits = (MPI_Request *) PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
326   starts     = (int *) PetscMalloc( (size+1)*sizeof(int) );CHKPTRQ(starts);
327   starts[0]  = 0;
328   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
329   for ( i=0; i<N; i++ ) {
330     svalues[starts[owner[i]]++] = rows[i];
331   }
332   ISRestoreIndices(is,&rows);
333 
334   starts[0] = 0;
335   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
336   count = 0;
337   for ( i=0; i<size; i++ ) {
338     if (procs[i]) {
339       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
340     }
341   }
342   ierr = PetscFree(starts);CHKERRQ(ierr);
343 
344   base = owners[rank];
345 
346   /*  wait on receives */
347   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) );CHKPTRQ(lens);
348   source = lens + nrecvs;
349   count  = nrecvs; slen = 0;
350   while (count) {
351     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
352     /* unpack receives into our local space */
353     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
354     source[imdex]  = recv_status.MPI_SOURCE;
355     lens[imdex]  = n;
356     slen += n;
357     count--;
358   }
359   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
360 
361   /* move the data into the send scatter */
362   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) );CHKPTRQ(lrows);
363   count = 0;
364   for ( i=0; i<nrecvs; i++ ) {
365     values = rvalues + i*nmax;
366     for ( j=0; j<lens[i]; j++ ) {
367       lrows[count++] = values[j] - base;
368     }
369   }
370   ierr = PetscFree(rvalues);CHKERRQ(ierr);
371   ierr = PetscFree(lens);CHKERRQ(ierr);
372   ierr = PetscFree(owner);CHKERRQ(ierr);
373   ierr = PetscFree(nprocs);CHKERRQ(ierr);
374 
375   /* actually zap the local rows */
376   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
377   PLogObjectParent(A,istmp);
378   ierr = PetscFree(lrows);CHKERRQ(ierr);
379   ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
380   ierr = ISDestroy(istmp);CHKERRQ(ierr);
381 
382   /* wait on sends */
383   if (nsends) {
384     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
385     ierr        = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
386     ierr = PetscFree(send_status);CHKERRQ(ierr);
387   }
388   ierr = PetscFree(send_waits);CHKERRQ(ierr);
389   ierr = PetscFree(svalues);CHKERRQ(ierr);
390 
391   PetscFunctionReturn(0);
392 }
393 
394 #undef __FUNC__
395 #define __FUNC__ "MatMult_MPIDense"
396 int MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
397 {
398   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
399   int          ierr;
400 
401   PetscFunctionBegin;
402   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
403   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
404   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
405   PetscFunctionReturn(0);
406 }
407 
408 #undef __FUNC__
409 #define __FUNC__ "MatMultAdd_MPIDense"
410 int MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
411 {
412   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
413   int          ierr;
414 
415   PetscFunctionBegin;
416   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
417   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
418   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
419   PetscFunctionReturn(0);
420 }
421 
422 #undef __FUNC__
423 #define __FUNC__ "MatMultTrans_MPIDense"
424 int MatMultTrans_MPIDense(Mat A,Vec xx,Vec yy)
425 {
426   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
427   int          ierr;
428   Scalar       zero = 0.0;
429 
430   PetscFunctionBegin;
431   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
432   ierr = MatMultTrans_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
433   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
434   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
435   PetscFunctionReturn(0);
436 }
437 
438 #undef __FUNC__
439 #define __FUNC__ "MatMultTransAdd_MPIDense"
440 int MatMultTransAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
441 {
442   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
443   int          ierr;
444 
445   PetscFunctionBegin;
446   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
447   ierr = MatMultTrans_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
448   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
449   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
450   PetscFunctionReturn(0);
451 }
452 
453 #undef __FUNC__
454 #define __FUNC__ "MatGetDiagonal_MPIDense"
455 int MatGetDiagonal_MPIDense(Mat A,Vec v)
456 {
457   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
458   Mat_SeqDense *aloc = (Mat_SeqDense *) a->A->data;
459   int          ierr, len, i, n, m = a->m, radd;
460   Scalar       *x, zero = 0.0;
461 
462   PetscFunctionBegin;
463   VecSet(&zero,v);
464   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
465   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
466   if (n != a->M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Nonconforming mat and vec");
467   len = PetscMin(aloc->m,aloc->n);
468   radd = a->rstart*m;
469   for ( i=0; i<len; i++ ) {
470     x[i] = aloc->v[radd + i*m + i];
471   }
472   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
473   PetscFunctionReturn(0);
474 }
475 
476 #undef __FUNC__
477 #define __FUNC__ "MatDestroy_MPIDense"
478 int MatDestroy_MPIDense(Mat mat)
479 {
480   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
481   int          ierr;
482 
483   PetscFunctionBegin;
484 
485   if (mat->mapping) {
486     ierr = ISLocalToGlobalMappingDestroy(mat->mapping);CHKERRQ(ierr);
487   }
488   if (mat->bmapping) {
489     ierr = ISLocalToGlobalMappingDestroy(mat->bmapping);CHKERRQ(ierr);
490   }
491 #if defined(PETSC_USE_LOG)
492   PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mdn->M,mdn->N);
493 #endif
494   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
495   ierr = PetscFree(mdn->rowners);CHKERRQ(ierr);
496   ierr = MatDestroy(mdn->A);CHKERRQ(ierr);
497   if (mdn->lvec)   VecDestroy(mdn->lvec);
498   if (mdn->Mvctx)  VecScatterDestroy(mdn->Mvctx);
499   if (mdn->factor) {
500     if (mdn->factor->temp)   {ierr = PetscFree(mdn->factor->temp);CHKERRQ(ierr);}
501     if (mdn->factor->tag)    {ierr = PetscFree(mdn->factor->tag);CHKERRQ(ierr);}
502     if (mdn->factor->pivots) {ierr = PetscFree(mdn->factor->pivots);CHKERRQ(ierr);}
503     ierr = PetscFree(mdn->factor);CHKERRQ(ierr);
504   }
505   ierr = PetscFree(mdn);CHKERRQ(ierr);
506   if (mat->rmap) {
507     ierr = MapDestroy(mat->rmap);CHKERRQ(ierr);
508   }
509   if (mat->cmap) {
510     ierr = MapDestroy(mat->cmap);CHKERRQ(ierr);
511   }
512   PLogObjectDestroy(mat);
513   PetscHeaderDestroy(mat);
514   PetscFunctionReturn(0);
515 }
516 
517 #undef __FUNC__
518 #define __FUNC__ "MatView_MPIDense_Binary"
519 static int MatView_MPIDense_Binary(Mat mat,Viewer viewer)
520 {
521   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
522   int          ierr;
523 
524   PetscFunctionBegin;
525   if (mdn->size == 1) {
526     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
527   }
528   else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported");
529   PetscFunctionReturn(0);
530 }
531 
532 #undef __FUNC__
533 #define __FUNC__ "MatView_MPIDense_ASCII"
534 static int MatView_MPIDense_ASCII(Mat mat,Viewer viewer)
535 {
536   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
537   int          ierr, format, size = mdn->size, rank = mdn->rank;
538   FILE         *fd;
539   ViewerType   vtype;
540 
541   PetscFunctionBegin;
542   ierr = ViewerGetType(viewer,&vtype);CHKERRQ(ierr);
543   ierr = ViewerASCIIGetPointer(viewer,&fd);CHKERRQ(ierr);
544   ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr);
545   if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
546     MatInfo info;
547     ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
548     ierr = PetscSequentialPhaseBegin(mat->comm,1);CHKERRQ(ierr);
549       fprintf(fd,"  [%d] local rows %d nz %d nz alloced %d mem %d \n",rank,mdn->m,
550          (int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
551       fflush(fd);
552     ierr = PetscSequentialPhaseEnd(mat->comm,1);CHKERRQ(ierr);
553     ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
554     PetscFunctionReturn(0);
555   } else if (format == VIEWER_FORMAT_ASCII_INFO) {
556     PetscFunctionReturn(0);
557   }
558 
559   if (size == 1) {
560     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
561   } else {
562     /* assemble the entire matrix onto first processor. */
563     Mat          A;
564     int          M = mdn->M, N = mdn->N,m,row,i, nz, *cols;
565     Scalar       *vals;
566     Mat_SeqDense *Amdn = (Mat_SeqDense*) mdn->A->data;
567 
568     if (!rank) {
569       ierr = MatCreateMPIDense(mat->comm,M,mdn->nvec,M,N,PETSC_NULL,&A);CHKERRQ(ierr);
570     } else {
571       ierr = MatCreateMPIDense(mat->comm,0,mdn->nvec,M,N,PETSC_NULL,&A);CHKERRQ(ierr);
572     }
573     PLogObjectParent(mat,A);
574 
575     /* Copy the matrix ... This isn't the most efficient means,
576        but it's quick for now */
577     row = mdn->rstart; m = Amdn->m;
578     for ( i=0; i<m; i++ ) {
579       ierr = MatGetRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
580       ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
581       ierr = MatRestoreRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
582       row++;
583     }
584 
585     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
586     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
587     if (!rank) {
588       ierr = MatView(((Mat_MPIDense*)(A->data))->A,viewer);CHKERRQ(ierr);
589     }
590     ierr = MatDestroy(A);CHKERRQ(ierr);
591   }
592   PetscFunctionReturn(0);
593 }
594 
595 #undef __FUNC__
596 #define __FUNC__ "MatView_MPIDense"
597 int MatView_MPIDense(Mat mat,Viewer viewer)
598 {
599   int ierr;
600   int isascii,isbinary;
601 
602   PetscFunctionBegin;
603 
604   isascii  = PetscTypeCompare(viewer,ASCII_VIEWER);
605   isbinary = PetscTypeCompare(viewer,BINARY_VIEWER);
606 
607   if (isascii) {
608     ierr = MatView_MPIDense_ASCII(mat,viewer);CHKERRQ(ierr);
609   } else if (isbinary) {
610     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
611   } else {
612     SETERRQ1(1,1,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name);
613   }
614   PetscFunctionReturn(0);
615 }
616 
617 #undef __FUNC__
618 #define __FUNC__ "MatGetInfo_MPIDense"
619 int MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
620 {
621   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
622   Mat          mdn = mat->A;
623   int          ierr;
624   double       isend[5], irecv[5];
625 
626   PetscFunctionBegin;
627   info->rows_global    = (double)mat->M;
628   info->columns_global = (double)mat->N;
629   info->rows_local     = (double)mat->m;
630   info->columns_local  = (double)mat->N;
631   info->block_size     = 1.0;
632   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
633   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
634   isend[3] = info->memory;  isend[4] = info->mallocs;
635   if (flag == MAT_LOCAL) {
636     info->nz_used      = isend[0];
637     info->nz_allocated = isend[1];
638     info->nz_unneeded  = isend[2];
639     info->memory       = isend[3];
640     info->mallocs      = isend[4];
641   } else if (flag == MAT_GLOBAL_MAX) {
642     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,A->comm);CHKERRQ(ierr);
643     info->nz_used      = irecv[0];
644     info->nz_allocated = irecv[1];
645     info->nz_unneeded  = irecv[2];
646     info->memory       = irecv[3];
647     info->mallocs      = irecv[4];
648   } else if (flag == MAT_GLOBAL_SUM) {
649     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr);
650     info->nz_used      = irecv[0];
651     info->nz_allocated = irecv[1];
652     info->nz_unneeded  = irecv[2];
653     info->memory       = irecv[3];
654     info->mallocs      = irecv[4];
655   }
656   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
657   info->fill_ratio_needed = 0;
658   info->factor_mallocs    = 0;
659   PetscFunctionReturn(0);
660 }
661 
662 /* extern int MatLUFactorSymbolic_MPIDense(Mat,IS,IS,double,Mat*);
663    extern int MatLUFactorNumeric_MPIDense(Mat,Mat*);
664    extern int MatLUFactor_MPIDense(Mat,IS,IS,double);
665    extern int MatSolve_MPIDense(Mat,Vec,Vec);
666    extern int MatSolveAdd_MPIDense(Mat,Vec,Vec,Vec);
667    extern int MatSolveTrans_MPIDense(Mat,Vec,Vec);
668    extern int MatSolveTransAdd_MPIDense(Mat,Vec,Vec,Vec); */
669 
670 #undef __FUNC__
671 #define __FUNC__ "MatSetOption_MPIDense"
672 int MatSetOption_MPIDense(Mat A,MatOption op)
673 {
674   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
675 
676   PetscFunctionBegin;
677   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
678       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
679       op == MAT_NEW_NONZERO_LOCATION_ERR ||
680       op == MAT_NEW_NONZERO_ALLOCATION_ERR ||
681       op == MAT_COLUMNS_SORTED ||
682       op == MAT_COLUMNS_UNSORTED) {
683         MatSetOption(a->A,op);
684   } else if (op == MAT_ROW_ORIENTED) {
685         a->roworiented = 1;
686         MatSetOption(a->A,op);
687   } else if (op == MAT_ROWS_SORTED ||
688              op == MAT_ROWS_UNSORTED ||
689              op == MAT_SYMMETRIC ||
690              op == MAT_STRUCTURALLY_SYMMETRIC ||
691              op == MAT_YES_NEW_DIAGONALS ||
692              op == MAT_USE_HASH_TABLE) {
693     PLogInfo(A,"MatSetOption_MPIDense:Option ignored\n");
694   } else if (op == MAT_COLUMN_ORIENTED) {
695     a->roworiented = 0; MatSetOption(a->A,op);
696   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
697     a->donotstash = 1;
698   } else if (op == MAT_NO_NEW_DIAGONALS) {
699     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
700   } else {
701     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
702   }
703   PetscFunctionReturn(0);
704 }
705 
706 #undef __FUNC__
707 #define __FUNC__ "MatGetSize_MPIDense"
708 int MatGetSize_MPIDense(Mat A,int *m,int *n)
709 {
710   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
711 
712   PetscFunctionBegin;
713   *m = mat->M; *n = mat->N;
714   PetscFunctionReturn(0);
715 }
716 
717 #undef __FUNC__
718 #define __FUNC__ "MatGetLocalSize_MPIDense"
719 int MatGetLocalSize_MPIDense(Mat A,int *m,int *n)
720 {
721   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
722 
723   PetscFunctionBegin;
724   *m = mat->m; *n = mat->N;
725   PetscFunctionReturn(0);
726 }
727 
728 #undef __FUNC__
729 #define __FUNC__ "MatGetOwnershipRange_MPIDense"
730 int MatGetOwnershipRange_MPIDense(Mat A,int *m,int *n)
731 {
732   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
733 
734   PetscFunctionBegin;
735   *m = mat->rstart; *n = mat->rend;
736   PetscFunctionReturn(0);
737 }
738 
739 #undef __FUNC__
740 #define __FUNC__ "MatGetRow_MPIDense"
741 int MatGetRow_MPIDense(Mat A,int row,int *nz,int **idx,Scalar **v)
742 {
743   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
744   int          lrow, rstart = mat->rstart, rend = mat->rend,ierr;
745 
746   PetscFunctionBegin;
747   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,0,"only local rows")
748   lrow = row - rstart;
749   ierr = MatGetRow(mat->A,lrow,nz,idx,v);CHKERRQ(ierr);
750   PetscFunctionReturn(0);
751 }
752 
753 #undef __FUNC__
754 #define __FUNC__ "MatRestoreRow_MPIDense"
755 int MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,Scalar **v)
756 {
757   int ierr;
758 
759   PetscFunctionBegin;
760   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
761   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
762   PetscFunctionReturn(0);
763 }
764 
765 #undef __FUNC__
766 #define __FUNC__ "MatDiagonalScale_MPIDense"
767 int MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
768 {
769   Mat_MPIDense *mdn = (Mat_MPIDense *) A->data;
770   Mat_SeqDense *mat = (Mat_SeqDense*) mdn->A->data;
771   Scalar       *l,*r,x,*v;
772   int          ierr,i,j,s2a,s3a,s2,s3,m=mat->m,n=mat->n;
773 
774   PetscFunctionBegin;
775   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
776   if (ll) {
777     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
778     if (s2a != s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Left scaling vector non-conforming local size");
779     ierr = VecGetArray(ll,&l);CHKERRQ(ierr);
780     for ( i=0; i<m; i++ ) {
781       x = l[i];
782       v = mat->v + i;
783       for ( j=0; j<n; j++ ) { (*v) *= x; v+= m;}
784     }
785     ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr);
786     PLogFlops(n*m);
787   }
788   if (rr) {
789     ierr = VecGetSize(rr,&s3a);CHKERRQ(ierr);
790     if (s3a != s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Right scaling vec non-conforming local size");
791     ierr = VecScatterBegin(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
792     ierr = VecScatterEnd(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
793     ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
794     for ( i=0; i<n; i++ ) {
795       x = r[i];
796       v = mat->v + i*m;
797       for ( j=0; j<m; j++ ) { (*v++) *= x;}
798     }
799     ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr);
800     PLogFlops(n*m);
801   }
802   PetscFunctionReturn(0);
803 }
804 
805 #undef __FUNC__
806 #define __FUNC__ "MatNorm_MPIDense"
807 int MatNorm_MPIDense(Mat A,NormType type,double *norm)
808 {
809   Mat_MPIDense *mdn = (Mat_MPIDense *) A->data;
810   Mat_SeqDense *mat = (Mat_SeqDense*) mdn->A->data;
811   int          ierr, i, j;
812   double       sum = 0.0;
813   Scalar       *v = mat->v;
814 
815   PetscFunctionBegin;
816   if (mdn->size == 1) {
817     ierr =  MatNorm(mdn->A,type,norm);CHKERRQ(ierr);
818   } else {
819     if (type == NORM_FROBENIUS) {
820       for (i=0; i<mat->n*mat->m; i++ ) {
821 #if defined(PETSC_USE_COMPLEX)
822         sum += PetscReal(PetscConj(*v)*(*v)); v++;
823 #else
824         sum += (*v)*(*v); v++;
825 #endif
826       }
827       ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr);
828       *norm = sqrt(*norm);
829       PLogFlops(2*mat->n*mat->m);
830     } else if (type == NORM_1) {
831       double *tmp, *tmp2;
832       tmp  = (double *) PetscMalloc( 2*mdn->N*sizeof(double) );CHKPTRQ(tmp);
833       tmp2 = tmp + mdn->N;
834       ierr = PetscMemzero(tmp,2*mdn->N*sizeof(double));CHKERRQ(ierr);
835       *norm = 0.0;
836       v = mat->v;
837       for ( j=0; j<mat->n; j++ ) {
838         for ( i=0; i<mat->m; i++ ) {
839           tmp[j] += PetscAbsScalar(*v);  v++;
840         }
841       }
842       ierr = MPI_Allreduce(tmp,tmp2,mdn->N,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr);
843       for ( j=0; j<mdn->N; j++ ) {
844         if (tmp2[j] > *norm) *norm = tmp2[j];
845       }
846       ierr = PetscFree(tmp);CHKERRQ(ierr);
847       PLogFlops(mat->n*mat->m);
848     } else if (type == NORM_INFINITY) { /* max row norm */
849       double ntemp;
850       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
851       ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,A->comm);CHKERRQ(ierr);
852     } else {
853       SETERRQ(PETSC_ERR_SUP,0,"No support for two norm");
854     }
855   }
856   PetscFunctionReturn(0);
857 }
858 
859 #undef __FUNC__
860 #define __FUNC__ "MatTranspose_MPIDense"
861 int MatTranspose_MPIDense(Mat A,Mat *matout)
862 {
863   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
864   Mat_SeqDense *Aloc = (Mat_SeqDense *) a->A->data;
865   Mat          B;
866   int          M = a->M, N = a->N, m, n, *rwork, rstart = a->rstart;
867   int          j, i, ierr;
868   Scalar       *v;
869 
870   PetscFunctionBegin;
871   if (matout == PETSC_NULL && M != N) {
872     SETERRQ(PETSC_ERR_SUP,0,"Supports square matrix only in-place");
873   }
874   ierr = MatCreateMPIDense(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,PETSC_NULL,&B);CHKERRQ(ierr);
875 
876   m = Aloc->m; n = Aloc->n; v = Aloc->v;
877   rwork = (int *) PetscMalloc(n*sizeof(int));CHKPTRQ(rwork);
878   for ( j=0; j<n; j++ ) {
879     for (i=0; i<m; i++) rwork[i] = rstart + i;
880     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
881     v   += m;
882   }
883   ierr = PetscFree(rwork);CHKERRQ(ierr);
884   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
885   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
886   if (matout != PETSC_NULL) {
887     *matout = B;
888   } else {
889     PetscOps *Abops;
890     MatOps   Aops;
891 
892     /* This isn't really an in-place transpose, but free data struct from a */
893     ierr = PetscFree(a->rowners);CHKERRQ(ierr);
894     ierr = MatDestroy(a->A);CHKERRQ(ierr);
895     if (a->lvec) VecDestroy(a->lvec);
896     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
897     ierr = PetscFree(a);CHKERRQ(ierr);
898 
899     /*
900          This is horrible, horrible code. We need to keep the
901       A pointers for the bops and ops but copy everything
902       else from C.
903     */
904     Abops   = A->bops;
905     Aops    = A->ops;
906     ierr    = PetscMemcpy(A,B,sizeof(struct _p_Mat));CHKERRQ(ierr);
907     A->bops = Abops;
908     A->ops  = Aops;
909 
910     PetscHeaderDestroy(B);
911   }
912   PetscFunctionReturn(0);
913 }
914 
915 #include "pinclude/blaslapack.h"
916 #undef __FUNC__
917 #define __FUNC__ "MatScale_MPIDense"
918 int MatScale_MPIDense(Scalar *alpha,Mat inA)
919 {
920   Mat_MPIDense *A = (Mat_MPIDense *) inA->data;
921   Mat_SeqDense *a = (Mat_SeqDense *) A->A->data;
922   int          one = 1, nz;
923 
924   PetscFunctionBegin;
925   nz = a->m*a->n;
926   BLscal_( &nz, alpha, a->v, &one );
927   PLogFlops(nz);
928   PetscFunctionReturn(0);
929 }
930 
931 static int MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *);
932 extern int MatGetSubMatrices_MPIDense(Mat,int,IS *,IS *,MatReuse,Mat **);
933 
934 /* -------------------------------------------------------------------*/
935 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
936        MatGetRow_MPIDense,
937        MatRestoreRow_MPIDense,
938        MatMult_MPIDense,
939        MatMultAdd_MPIDense,
940        MatMultTrans_MPIDense,
941        MatMultTransAdd_MPIDense,
942        0,
943        0,
944        0,
945        0,
946        0,
947        0,
948        0,
949        MatTranspose_MPIDense,
950        MatGetInfo_MPIDense,0,
951        MatGetDiagonal_MPIDense,
952        MatDiagonalScale_MPIDense,
953        MatNorm_MPIDense,
954        MatAssemblyBegin_MPIDense,
955        MatAssemblyEnd_MPIDense,
956        0,
957        MatSetOption_MPIDense,
958        MatZeroEntries_MPIDense,
959        MatZeroRows_MPIDense,
960        0,
961        0,
962        0,
963        0,
964        MatGetSize_MPIDense,
965        MatGetLocalSize_MPIDense,
966        MatGetOwnershipRange_MPIDense,
967        0,
968        0,
969        MatGetArray_MPIDense,
970        MatRestoreArray_MPIDense,
971        MatDuplicate_MPIDense,
972        0,
973        0,
974        0,
975        0,
976        0,
977        MatGetSubMatrices_MPIDense,
978        0,
979        MatGetValues_MPIDense,
980        0,
981        0,
982        MatScale_MPIDense,
983        0,
984        0,
985        0,
986        MatGetBlockSize_MPIDense,
987        0,
988        0,
989        0,
990        0,
991        0,
992        0,
993        0,
994        0,
995        0,
996        MatGetSubMatrix_MPIDense,
997        0,
998        0,
999        MatGetMaps_Petsc};
1000 
1001 #undef __FUNC__
1002 #define __FUNC__ "MatCreateMPIDense"
1003 /*@C
1004    MatCreateMPIDense - Creates a sparse parallel matrix in dense format.
1005 
1006    Collective on MPI_Comm
1007 
1008    Input Parameters:
1009 +  comm - MPI communicator
1010 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1011 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1012 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1013 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1014 -  data - optional location of matrix data.  Set data=PETSC_NULL for PETSc
1015    to control all matrix memory allocation.
1016 
1017    Output Parameter:
1018 .  A - the matrix
1019 
1020    Notes:
1021    The dense format is fully compatible with standard Fortran 77
1022    storage by columns.
1023 
1024    The data input variable is intended primarily for Fortran programmers
1025    who wish to allocate their own matrix memory space.  Most users should
1026    set data=PETSC_NULL.
1027 
1028    The user MUST specify either the local or global matrix dimensions
1029    (possibly both).
1030 
1031    Level: intermediate
1032 
1033 .keywords: matrix, dense, parallel
1034 
1035 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1036 @*/
1037 int MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,Scalar *data,Mat *A)
1038 {
1039   Mat          mat;
1040   Mat_MPIDense *a;
1041   int          ierr, i,flg;
1042 
1043   PetscFunctionBegin;
1044   /* Note:  For now, when data is specified above, this assumes the user correctly
1045    allocates the local dense storage space.  We should add error checking. */
1046 
1047   *A = 0;
1048   PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIDENSE,"Mat",comm,MatDestroy,MatView);
1049   PLogObjectCreate(mat);
1050   mat->data         = (void *) (a = PetscNew(Mat_MPIDense));CHKPTRQ(a);
1051   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1052   mat->ops->destroy = MatDestroy_MPIDense;
1053   mat->ops->view    = MatView_MPIDense;
1054   mat->factor       = 0;
1055   mat->mapping      = 0;
1056 
1057   a->factor       = 0;
1058   mat->insertmode = NOT_SET_VALUES;
1059   ierr = MPI_Comm_rank(comm,&a->rank);CHKERRQ(ierr);
1060   ierr = MPI_Comm_size(comm,&a->size);CHKERRQ(ierr);
1061 
1062   ierr = PetscSplitOwnership(comm,&m,&M);CHKERRQ(ierr);
1063 
1064   ierr = PetscSplitOwnership(comm,&m,&M);CHKERRQ(ierr);
1065   a->nvec = n;
1066 
1067   /* each row stores all columns */
1068   a->N = mat->N = N;
1069   a->M = mat->M = M;
1070   a->m = mat->m = m;
1071   a->n = mat->n = N;   /* NOTE: n == N */
1072 
1073   /* the information in the maps duplicates the information computed below, eventually
1074      we should remove the duplicate information that is not contained in the maps */
1075   ierr = MapCreateMPI(comm,m,M,&mat->rmap);CHKERRQ(ierr);
1076   ierr = MapCreateMPI(comm,n,N,&mat->cmap);CHKERRQ(ierr);
1077 
1078   /* build local table of row and column ownerships */
1079   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners);
1080   a->cowners = a->rowners + a->size + 1;
1081   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));
1082   ierr = MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1083   a->rowners[0] = 0;
1084   for ( i=2; i<=a->size; i++ ) {
1085     a->rowners[i] += a->rowners[i-1];
1086   }
1087   a->rstart = a->rowners[a->rank];
1088   a->rend   = a->rowners[a->rank+1];
1089   ierr      = MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1090   a->cowners[0] = 0;
1091   for ( i=2; i<=a->size; i++ ) {
1092     a->cowners[i] += a->cowners[i-1];
1093   }
1094 
1095   ierr = MatCreateSeqDense(PETSC_COMM_SELF,m,N,data,&a->A);CHKERRQ(ierr);
1096   PLogObjectParent(mat,a->A);
1097 
1098   /* build cache for off array entries formed */
1099   a->donotstash = 0;
1100   ierr = MatStashCreate_Private(comm,1,&mat->stash);CHKERRQ(ierr);
1101 
1102   /* stuff used for matrix vector multiply */
1103   a->lvec        = 0;
1104   a->Mvctx       = 0;
1105   a->roworiented = 1;
1106 
1107   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",
1108                                      "MatGetDiagonalBlock_MPIDense",
1109                                      (void*)MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr);
1110 
1111   *A = mat;
1112   ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr);
1113   if (flg) {
1114     ierr = MatPrintHelp(mat);CHKERRQ(ierr);
1115   }
1116   PetscFunctionReturn(0);
1117 }
1118 
1119 #undef __FUNC__
1120 #define __FUNC__ "MatDuplicate_MPIDense"
1121 static int MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1122 {
1123   Mat          mat;
1124   Mat_MPIDense *a,*oldmat = (Mat_MPIDense *) A->data;
1125   int          ierr;
1126   FactorCtx    *factor;
1127 
1128   PetscFunctionBegin;
1129   *newmat       = 0;
1130   PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIDENSE,"Mat",A->comm,MatDestroy,MatView);
1131   PLogObjectCreate(mat);
1132   mat->data         = (void *) (a = PetscNew(Mat_MPIDense));CHKPTRQ(a);
1133   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1134   mat->ops->destroy = MatDestroy_MPIDense;
1135   mat->ops->view    = MatView_MPIDense;
1136   mat->factor       = A->factor;
1137   mat->assembled    = PETSC_TRUE;
1138 
1139   a->m = mat->m = oldmat->m;
1140   a->n = mat->n = oldmat->n;
1141   a->M = mat->M = oldmat->M;
1142   a->N = mat->N = oldmat->N;
1143   if (oldmat->factor) {
1144     a->factor = (FactorCtx *) (factor = PetscNew(FactorCtx));CHKPTRQ(factor);
1145     /* copy factor contents ... add this code! */
1146   } else a->factor = 0;
1147 
1148   a->rstart       = oldmat->rstart;
1149   a->rend         = oldmat->rend;
1150   a->size         = oldmat->size;
1151   a->rank         = oldmat->rank;
1152   mat->insertmode = NOT_SET_VALUES;
1153   a->donotstash   = oldmat->donotstash;
1154   a->rowners = (int *) PetscMalloc((a->size+1)*sizeof(int));CHKPTRQ(a->rowners);
1155   PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));
1156   ierr = PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));CHKERRQ(ierr);
1157   ierr = MatStashCreate_Private(A->comm,1,&mat->stash);CHKERRQ(ierr);
1158 
1159   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1160   PLogObjectParent(mat,a->lvec);
1161   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1162   PLogObjectParent(mat,a->Mvctx);
1163   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1164   PLogObjectParent(mat,a->A);
1165   *newmat = mat;
1166   PetscFunctionReturn(0);
1167 }
1168 
1169 #include "sys.h"
1170 
1171 #undef __FUNC__
1172 #define __FUNC__ "MatLoad_MPIDense_DenseInFile"
1173 int MatLoad_MPIDense_DenseInFile(MPI_Comm comm,int fd,int M, int N, Mat *newmat)
1174 {
1175   int        *rowners, i,size,rank,m,ierr,nz,j;
1176   Scalar     *array,*vals,*vals_ptr;
1177   MPI_Status status;
1178 
1179   PetscFunctionBegin;
1180   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1181   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1182 
1183   /* determine ownership of all rows */
1184   m          = M/size + ((M % size) > rank);
1185   rowners    = (int *) PetscMalloc((size+2)*sizeof(int));CHKPTRQ(rowners);
1186   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1187   rowners[0] = 0;
1188   for ( i=2; i<=size; i++ ) {
1189     rowners[i] += rowners[i-1];
1190   }
1191 
1192   ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr);
1193   ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr);
1194 
1195   if (!rank) {
1196     vals = (Scalar *) PetscMalloc( m*N*sizeof(Scalar) );CHKPTRQ(vals);
1197 
1198     /* read in my part of the matrix numerical values  */
1199     ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr);
1200 
1201     /* insert into matrix-by row (this is why cannot directly read into array */
1202     vals_ptr = vals;
1203     for ( i=0; i<m; i++ ) {
1204       for ( j=0; j<N; j++ ) {
1205         array[i + j*m] = *vals_ptr++;
1206       }
1207     }
1208 
1209     /* read in other processors and ship out */
1210     for ( i=1; i<size; i++ ) {
1211       nz   = (rowners[i+1] - rowners[i])*N;
1212       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1213       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm);CHKERRQ(ierr);
1214     }
1215   } else {
1216     /* receive numeric values */
1217     vals = (Scalar*) PetscMalloc( m*N*sizeof(Scalar) );CHKPTRQ(vals);
1218 
1219     /* receive message of values*/
1220     ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status);CHKERRQ(ierr);
1221 
1222     /* insert into matrix-by row (this is why cannot directly read into array */
1223     vals_ptr = vals;
1224     for ( i=0; i<m; i++ ) {
1225       for ( j=0; j<N; j++ ) {
1226         array[i + j*m] = *vals_ptr++;
1227       }
1228     }
1229   }
1230   ierr = PetscFree(rowners);CHKERRQ(ierr);
1231   ierr = PetscFree(vals);CHKERRQ(ierr);
1232   ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1233   ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1234   PetscFunctionReturn(0);
1235 }
1236 
1237 
1238 #undef __FUNC__
1239 #define __FUNC__ "MatLoad_MPIDense"
1240 int MatLoad_MPIDense(Viewer viewer,MatType type,Mat *newmat)
1241 {
1242   Mat          A;
1243   Scalar       *vals,*svals;
1244   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1245   MPI_Status   status;
1246   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1247   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
1248   int          tag = ((PetscObject)viewer)->tag;
1249   int          i, nz, ierr, j,rstart, rend, fd;
1250 
1251   PetscFunctionBegin;
1252   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1253   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1254   if (!rank) {
1255     ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1256     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1257     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object");
1258   }
1259 
1260   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1261   M = header[1]; N = header[2]; nz = header[3];
1262 
1263   /*
1264        Handle case where matrix is stored on disk as a dense matrix
1265   */
1266   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1267     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);CHKERRQ(ierr);
1268     PetscFunctionReturn(0);
1269   }
1270 
1271   /* determine ownership of all rows */
1272   m          = M/size + ((M % size) > rank);
1273   rowners    = (int *) PetscMalloc((size+2)*sizeof(int));CHKPTRQ(rowners);
1274   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1275   rowners[0] = 0;
1276   for ( i=2; i<=size; i++ ) {
1277     rowners[i] += rowners[i-1];
1278   }
1279   rstart = rowners[rank];
1280   rend   = rowners[rank+1];
1281 
1282   /* distribute row lengths to all processors */
1283   ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) );CHKPTRQ(ourlens);
1284   offlens = ourlens + (rend-rstart);
1285   if (!rank) {
1286     rowlengths = (int*) PetscMalloc( M*sizeof(int) );CHKPTRQ(rowlengths);
1287     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1288     sndcounts = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(sndcounts);
1289     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
1290     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1291     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1292   } else {
1293     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);CHKERRQ(ierr);
1294   }
1295 
1296   if (!rank) {
1297     /* calculate the number of nonzeros on each processor */
1298     procsnz = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(procsnz);
1299     ierr    = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
1300     for ( i=0; i<size; i++ ) {
1301       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
1302         procsnz[i] += rowlengths[j];
1303       }
1304     }
1305     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1306 
1307     /* determine max buffer needed and allocate it */
1308     maxnz = 0;
1309     for ( i=0; i<size; i++ ) {
1310       maxnz = PetscMax(maxnz,procsnz[i]);
1311     }
1312     cols = (int *) PetscMalloc( maxnz*sizeof(int) );CHKPTRQ(cols);
1313 
1314     /* read in my part of the matrix column indices  */
1315     nz = procsnz[0];
1316     mycols = (int *) PetscMalloc( nz*sizeof(int) );CHKPTRQ(mycols);
1317     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1318 
1319     /* read in every one elses and ship off */
1320     for ( i=1; i<size; i++ ) {
1321       nz   = procsnz[i];
1322       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1323       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
1324     }
1325     ierr = PetscFree(cols);CHKERRQ(ierr);
1326   } else {
1327     /* determine buffer space needed for message */
1328     nz = 0;
1329     for ( i=0; i<m; i++ ) {
1330       nz += ourlens[i];
1331     }
1332     mycols = (int*) PetscMalloc( nz*sizeof(int) );CHKPTRQ(mycols);
1333 
1334     /* receive message of column indices*/
1335     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
1336     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
1337     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
1338   }
1339 
1340   /* loop over local rows, determining number of off diagonal entries */
1341   ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr);
1342   jj = 0;
1343   for ( i=0; i<m; i++ ) {
1344     for ( j=0; j<ourlens[i]; j++ ) {
1345       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1346       jj++;
1347     }
1348   }
1349 
1350   /* create our matrix */
1351   for ( i=0; i<m; i++ ) {
1352     ourlens[i] -= offlens[i];
1353   }
1354   ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr);
1355   A = *newmat;
1356   for ( i=0; i<m; i++ ) {
1357     ourlens[i] += offlens[i];
1358   }
1359 
1360   if (!rank) {
1361     vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) );CHKPTRQ(vals);
1362 
1363     /* read in my part of the matrix numerical values  */
1364     nz = procsnz[0];
1365     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1366 
1367     /* insert into matrix */
1368     jj      = rstart;
1369     smycols = mycols;
1370     svals   = vals;
1371     for ( i=0; i<m; i++ ) {
1372       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1373       smycols += ourlens[i];
1374       svals   += ourlens[i];
1375       jj++;
1376     }
1377 
1378     /* read in other processors and ship out */
1379     for ( i=1; i<size; i++ ) {
1380       nz   = procsnz[i];
1381       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1382       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
1383     }
1384     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1385   } else {
1386     /* receive numeric values */
1387     vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) );CHKPTRQ(vals);
1388 
1389     /* receive message of values*/
1390     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
1391     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1392     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
1393 
1394     /* insert into matrix */
1395     jj      = rstart;
1396     smycols = mycols;
1397     svals   = vals;
1398     for ( i=0; i<m; i++ ) {
1399       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1400       smycols += ourlens[i];
1401       svals   += ourlens[i];
1402       jj++;
1403     }
1404   }
1405   ierr = PetscFree(ourlens);CHKERRQ(ierr);
1406   ierr = PetscFree(vals);CHKERRQ(ierr);
1407   ierr = PetscFree(mycols);CHKERRQ(ierr);
1408   ierr = PetscFree(rowners);CHKERRQ(ierr);
1409 
1410   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1411   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1412   PetscFunctionReturn(0);
1413 }
1414 
1415 
1416 
1417 
1418 
1419