xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 9775878a801383c4a9b7610408cc59e6b425f90a)
1 
2 /*
3    Basic functions for basic parallel dense matrices.
4 */
5 
6 
7 #include <../src/mat/impls/dense/mpi/mpidense.h>    /*I   "petscmat.h"  I*/
8 #include <../src/mat/impls/aij/mpi/mpiaij.h>
9 #include <petscblaslapack.h>
10 
11 #undef __FUNCT__
12 #define __FUNCT__ "MatDenseGetLocalMatrix"
13 /*@
14 
15       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
16               matrix that represents the operator. For sequential matrices it returns itself.
17 
18     Input Parameter:
19 .      A - the Seq or MPI dense matrix
20 
21     Output Parameter:
22 .      B - the inner matrix
23 
24     Level: intermediate
25 
26 @*/
27 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
28 {
29   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
30   PetscErrorCode ierr;
31   PetscBool      flg;
32 
33   PetscFunctionBegin;
34   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr);
35   if (flg) *B = mat->A;
36   else *B = A;
37   PetscFunctionReturn(0);
38 }
39 
40 #undef __FUNCT__
41 #define __FUNCT__ "MatGetRow_MPIDense"
42 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
43 {
44   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
45   PetscErrorCode ierr;
46   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
47 
48   PetscFunctionBegin;
49   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
50   lrow = row - rstart;
51   ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr);
52   PetscFunctionReturn(0);
53 }
54 
55 #undef __FUNCT__
56 #define __FUNCT__ "MatRestoreRow_MPIDense"
57 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
58 {
59   PetscErrorCode ierr;
60 
61   PetscFunctionBegin;
62   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
63   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
64   PetscFunctionReturn(0);
65 }
66 
67 #undef __FUNCT__
68 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense"
69 PetscErrorCode  MatGetDiagonalBlock_MPIDense(Mat A,Mat *a)
70 {
71   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
72   PetscErrorCode ierr;
73   PetscInt       m = A->rmap->n,rstart = A->rmap->rstart;
74   PetscScalar    *array;
75   MPI_Comm       comm;
76   Mat            B;
77 
78   PetscFunctionBegin;
79   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported.");
80 
81   ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr);
82   if (!B) {
83     ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
84     ierr = MatCreate(comm,&B);CHKERRQ(ierr);
85     ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr);
86     ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr);
87     ierr = MatDenseGetArray(mdn->A,&array);CHKERRQ(ierr);
88     ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr);
89     ierr = MatDenseRestoreArray(mdn->A,&array);CHKERRQ(ierr);
90     ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
91     ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
92     ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr);
93     *a   = B;
94     ierr = MatDestroy(&B);CHKERRQ(ierr);
95   } else *a = B;
96   PetscFunctionReturn(0);
97 }
98 
99 #undef __FUNCT__
100 #define __FUNCT__ "MatSetValues_MPIDense"
101 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
102 {
103   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
104   PetscErrorCode ierr;
105   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
106   PetscBool      roworiented = A->roworiented;
107 
108   PetscFunctionBegin;
109   for (i=0; i<m; i++) {
110     if (idxm[i] < 0) continue;
111     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
112     if (idxm[i] >= rstart && idxm[i] < rend) {
113       row = idxm[i] - rstart;
114       if (roworiented) {
115         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
116       } else {
117         for (j=0; j<n; j++) {
118           if (idxn[j] < 0) continue;
119           if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
120           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
121         }
122       }
123     } else if (!A->donotstash) {
124       mat->assembled = PETSC_FALSE;
125       if (roworiented) {
126         ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
127       } else {
128         ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
129       }
130     }
131   }
132   PetscFunctionReturn(0);
133 }
134 
135 #undef __FUNCT__
136 #define __FUNCT__ "MatGetValues_MPIDense"
137 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
138 {
139   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
140   PetscErrorCode ierr;
141   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
142 
143   PetscFunctionBegin;
144   for (i=0; i<m; i++) {
145     if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
146     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
147     if (idxm[i] >= rstart && idxm[i] < rend) {
148       row = idxm[i] - rstart;
149       for (j=0; j<n; j++) {
150         if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
151         if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
152         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
153       }
154     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
155   }
156   PetscFunctionReturn(0);
157 }
158 
159 #undef __FUNCT__
160 #define __FUNCT__ "MatDenseGetArray_MPIDense"
161 PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar *array[])
162 {
163   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
164   PetscErrorCode ierr;
165 
166   PetscFunctionBegin;
167   ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr);
168   PetscFunctionReturn(0);
169 }
170 
171 #undef __FUNCT__
172 #define __FUNCT__ "MatGetSubMatrix_MPIDense"
173 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
174 {
175   Mat_MPIDense   *mat  = (Mat_MPIDense*)A->data,*newmatd;
176   Mat_SeqDense   *lmat = (Mat_SeqDense*)mat->A->data;
177   PetscErrorCode ierr;
178   PetscInt       i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols;
179   const PetscInt *irow,*icol;
180   PetscScalar    *av,*bv,*v = lmat->v;
181   Mat            newmat;
182   IS             iscol_local;
183 
184   PetscFunctionBegin;
185   ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
186   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
187   ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr);
188   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
189   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
190   ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */
191 
192   /* No parallel redistribution currently supported! Should really check each index set
193      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
194      original matrix! */
195 
196   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
197   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
198 
199   /* Check submatrix call */
200   if (scall == MAT_REUSE_MATRIX) {
201     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
202     /* Really need to test rows and column sizes! */
203     newmat = *B;
204   } else {
205     /* Create and fill new matrix */
206     ierr = MatCreate(PetscObjectComm((PetscObject)A),&newmat);CHKERRQ(ierr);
207     ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
208     ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr);
209     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
210   }
211 
212   /* Now extract the data pointers and do the copy, column at a time */
213   newmatd = (Mat_MPIDense*)newmat->data;
214   bv      = ((Mat_SeqDense*)newmatd->A->data)->v;
215 
216   for (i=0; i<Ncols; i++) {
217     av = v + ((Mat_SeqDense*)mat->A->data)->lda*icol[i];
218     for (j=0; j<nrows; j++) {
219       *bv++ = av[irow[j] - rstart];
220     }
221   }
222 
223   /* Assemble the matrices so that the correct flags are set */
224   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
225   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
226 
227   /* Free work space */
228   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
229   ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr);
230   ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
231   *B   = newmat;
232   PetscFunctionReturn(0);
233 }
234 
235 #undef __FUNCT__
236 #define __FUNCT__ "MatDenseRestoreArray_MPIDense"
237 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar *array[])
238 {
239   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
240   PetscErrorCode ierr;
241 
242   PetscFunctionBegin;
243   ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr);
244   PetscFunctionReturn(0);
245 }
246 
247 #undef __FUNCT__
248 #define __FUNCT__ "MatAssemblyBegin_MPIDense"
249 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
250 {
251   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
252   MPI_Comm       comm;
253   PetscErrorCode ierr;
254   PetscInt       nstash,reallocs;
255   InsertMode     addv;
256 
257   PetscFunctionBegin;
258   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
259   /* make sure all processors are either in INSERTMODE or ADDMODE */
260   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,comm);CHKERRQ(ierr);
261   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
262   mat->insertmode = addv; /* in case this processor had no cache */
263 
264   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
265   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
266   ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
267   PetscFunctionReturn(0);
268 }
269 
270 #undef __FUNCT__
271 #define __FUNCT__ "MatAssemblyEnd_MPIDense"
272 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
273 {
274   Mat_MPIDense   *mdn=(Mat_MPIDense*)mat->data;
275   PetscErrorCode ierr;
276   PetscInt       i,*row,*col,flg,j,rstart,ncols;
277   PetscMPIInt    n;
278   PetscScalar    *val;
279 
280   PetscFunctionBegin;
281   /*  wait on receives */
282   while (1) {
283     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
284     if (!flg) break;
285 
286     for (i=0; i<n;) {
287       /* Now identify the consecutive vals belonging to the same row */
288       for (j=i,rstart=row[j]; j<n; j++) {
289         if (row[j] != rstart) break;
290       }
291       if (j < n) ncols = j-i;
292       else       ncols = n-i;
293       /* Now assemble all these values with a single function call */
294       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
295       i    = j;
296     }
297   }
298   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
299 
300   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
301   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
302 
303   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
304     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
305   }
306   PetscFunctionReturn(0);
307 }
308 
309 #undef __FUNCT__
310 #define __FUNCT__ "MatZeroEntries_MPIDense"
311 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
312 {
313   PetscErrorCode ierr;
314   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
315 
316   PetscFunctionBegin;
317   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
318   PetscFunctionReturn(0);
319 }
320 
321 /* the code does not do the diagonal entries correctly unless the
322    matrix is square and the column and row owerships are identical.
323    This is a BUG. The only way to fix it seems to be to access
324    mdn->A and mdn->B directly and not through the MatZeroRows()
325    routine.
326 */
327 #undef __FUNCT__
328 #define __FUNCT__ "MatZeroRows_MPIDense"
329 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
330 {
331   Mat_MPIDense      *l = (Mat_MPIDense*)A->data;
332   PetscErrorCode    ierr;
333   PetscInt          i,*owners = A->rmap->range;
334   PetscInt          *sizes,j,idx,nsends;
335   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
336   PetscInt          *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source;
337   PetscInt          *lens,*lrows,*values;
338   PetscMPIInt       n,imdex,rank = l->rank,size = l->size;
339   MPI_Comm          comm;
340   MPI_Request       *send_waits,*recv_waits;
341   MPI_Status        recv_status,*send_status;
342   PetscBool         found;
343   const PetscScalar *xx;
344   PetscScalar       *bb;
345 
346   PetscFunctionBegin;
347   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
348   if (A->rmap->N != A->cmap->N) SETERRQ(comm,PETSC_ERR_SUP,"Only handles square matrices");
349   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only handles matrices with identical column and row ownership");
350   /*  first count number of contributors to each processor */
351   ierr = PetscCalloc1(2*size,&sizes);CHKERRQ(ierr);
352   ierr = PetscMalloc1(N+1,&owner);CHKERRQ(ierr);  /* see note*/
353   for (i=0; i<N; i++) {
354     idx   = rows[i];
355     found = PETSC_FALSE;
356     for (j=0; j<size; j++) {
357       if (idx >= owners[j] && idx < owners[j+1]) {
358         sizes[2*j]++; sizes[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
359       }
360     }
361     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
362   }
363   nsends = 0;
364   for (i=0; i<size; i++) nsends += sizes[2*i+1];
365 
366   /* inform other processors of number of messages and max length*/
367   ierr = PetscMaxSum(comm,sizes,&nmax,&nrecvs);CHKERRQ(ierr);
368 
369   /* post receives:   */
370   ierr = PetscMalloc1((nrecvs+1)*(nmax+1),&rvalues);CHKERRQ(ierr);
371   ierr = PetscMalloc1(nrecvs+1,&recv_waits);CHKERRQ(ierr);
372   for (i=0; i<nrecvs; i++) {
373     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
374   }
375 
376   /* do sends:
377       1) starts[i] gives the starting index in svalues for stuff going to
378          the ith processor
379   */
380   ierr = PetscMalloc1(N+1,&svalues);CHKERRQ(ierr);
381   ierr = PetscMalloc1(nsends+1,&send_waits);CHKERRQ(ierr);
382   ierr = PetscMalloc1(size+1,&starts);CHKERRQ(ierr);
383 
384   starts[0] = 0;
385   for (i=1; i<size; i++) starts[i] = starts[i-1] + sizes[2*i-2];
386   for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i];
387 
388   starts[0] = 0;
389   for (i=1; i<size+1; i++) starts[i] = starts[i-1] + sizes[2*i-2];
390   count = 0;
391   for (i=0; i<size; i++) {
392     if (sizes[2*i+1]) {
393       ierr = MPI_Isend(svalues+starts[i],sizes[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
394     }
395   }
396   ierr = PetscFree(starts);CHKERRQ(ierr);
397 
398   base = owners[rank];
399 
400   /*  wait on receives */
401   ierr  = PetscMalloc2(nrecvs,&lens,nrecvs,&source);CHKERRQ(ierr);
402   count = nrecvs;
403   slen  = 0;
404   while (count) {
405     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
406     /* unpack receives into our local space */
407     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
408 
409     source[imdex] = recv_status.MPI_SOURCE;
410     lens[imdex]   = n;
411     slen += n;
412     count--;
413   }
414   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
415 
416   /* move the data into the send scatter */
417   ierr  = PetscMalloc1(slen+1,&lrows);CHKERRQ(ierr);
418   count = 0;
419   for (i=0; i<nrecvs; i++) {
420     values = rvalues + i*nmax;
421     for (j=0; j<lens[i]; j++) {
422       lrows[count++] = values[j] - base;
423     }
424   }
425   ierr = PetscFree(rvalues);CHKERRQ(ierr);
426   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
427   ierr = PetscFree(owner);CHKERRQ(ierr);
428   ierr = PetscFree(sizes);CHKERRQ(ierr);
429 
430   /* fix right hand side if needed */
431   if (x && b) {
432     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
433     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
434     for (i=0; i<slen; i++) {
435       bb[lrows[i]] = diag*xx[lrows[i]];
436     }
437     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
438     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
439   }
440 
441   /* actually zap the local rows */
442   ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
443   if (diag != 0.0) {
444     Mat_SeqDense *ll = (Mat_SeqDense*)l->A->data;
445     PetscInt     m   = ll->lda, i;
446 
447     for (i=0; i<slen; i++) {
448       ll->v[lrows[i] + m*(A->cmap->rstart + lrows[i])] = diag;
449     }
450   }
451   ierr = PetscFree(lrows);CHKERRQ(ierr);
452 
453   /* wait on sends */
454   if (nsends) {
455     ierr = PetscMalloc1(nsends,&send_status);CHKERRQ(ierr);
456     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
457     ierr = PetscFree(send_status);CHKERRQ(ierr);
458   }
459   ierr = PetscFree(send_waits);CHKERRQ(ierr);
460   ierr = PetscFree(svalues);CHKERRQ(ierr);
461   PetscFunctionReturn(0);
462 }
463 
464 #undef __FUNCT__
465 #define __FUNCT__ "MatMult_MPIDense"
466 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
467 {
468   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
469   PetscErrorCode ierr;
470 
471   PetscFunctionBegin;
472   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
473   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
474   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
475   PetscFunctionReturn(0);
476 }
477 
478 #undef __FUNCT__
479 #define __FUNCT__ "MatMultAdd_MPIDense"
480 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
481 {
482   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
483   PetscErrorCode ierr;
484 
485   PetscFunctionBegin;
486   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
487   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
488   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
489   PetscFunctionReturn(0);
490 }
491 
492 #undef __FUNCT__
493 #define __FUNCT__ "MatMultTranspose_MPIDense"
494 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
495 {
496   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
497   PetscErrorCode ierr;
498   PetscScalar    zero = 0.0;
499 
500   PetscFunctionBegin;
501   ierr = VecSet(yy,zero);CHKERRQ(ierr);
502   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
503   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
504   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
505   PetscFunctionReturn(0);
506 }
507 
508 #undef __FUNCT__
509 #define __FUNCT__ "MatMultTransposeAdd_MPIDense"
510 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
511 {
512   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
513   PetscErrorCode ierr;
514 
515   PetscFunctionBegin;
516   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
517   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
518   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
519   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
520   PetscFunctionReturn(0);
521 }
522 
523 #undef __FUNCT__
524 #define __FUNCT__ "MatGetDiagonal_MPIDense"
525 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
526 {
527   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
528   Mat_SeqDense   *aloc = (Mat_SeqDense*)a->A->data;
529   PetscErrorCode ierr;
530   PetscInt       len,i,n,m = A->rmap->n,radd;
531   PetscScalar    *x,zero = 0.0;
532 
533   PetscFunctionBegin;
534   ierr = VecSet(v,zero);CHKERRQ(ierr);
535   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
536   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
537   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
538   len  = PetscMin(a->A->rmap->n,a->A->cmap->n);
539   radd = A->rmap->rstart*m;
540   for (i=0; i<len; i++) {
541     x[i] = aloc->v[radd + i*m + i];
542   }
543   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
544   PetscFunctionReturn(0);
545 }
546 
547 #undef __FUNCT__
548 #define __FUNCT__ "MatDestroy_MPIDense"
549 PetscErrorCode MatDestroy_MPIDense(Mat mat)
550 {
551   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
552   PetscErrorCode ierr;
553 
554   PetscFunctionBegin;
555 #if defined(PETSC_USE_LOG)
556   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
557 #endif
558   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
559   ierr = MatDestroy(&mdn->A);CHKERRQ(ierr);
560   ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr);
561   ierr = VecScatterDestroy(&mdn->Mvctx);CHKERRQ(ierr);
562 
563   ierr = PetscFree(mat->data);CHKERRQ(ierr);
564   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
565 
566   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL);CHKERRQ(ierr);
567   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL);CHKERRQ(ierr);
568 #if defined(PETSC_HAVE_ELEMENTAL)
569   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL);CHKERRQ(ierr);
570 #endif
571   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
572   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);CHKERRQ(ierr);
573   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
574   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
575   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
576   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
577   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
578   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
579   PetscFunctionReturn(0);
580 }
581 
582 #undef __FUNCT__
583 #define __FUNCT__ "MatView_MPIDense_Binary"
584 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
585 {
586   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
587   PetscErrorCode    ierr;
588   PetscViewerFormat format;
589   int               fd;
590   PetscInt          header[4],mmax,N = mat->cmap->N,i,j,m,k;
591   PetscMPIInt       rank,tag  = ((PetscObject)viewer)->tag,size;
592   PetscScalar       *work,*v,*vv;
593   Mat_SeqDense      *a = (Mat_SeqDense*)mdn->A->data;
594 
595   PetscFunctionBegin;
596   if (mdn->size == 1) {
597     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
598   } else {
599     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
600     ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
601     ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
602 
603     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
604     if (format == PETSC_VIEWER_NATIVE) {
605 
606       if (!rank) {
607         /* store the matrix as a dense matrix */
608         header[0] = MAT_FILE_CLASSID;
609         header[1] = mat->rmap->N;
610         header[2] = N;
611         header[3] = MATRIX_BINARY_FORMAT_DENSE;
612         ierr      = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
613 
614         /* get largest work array needed for transposing array */
615         mmax = mat->rmap->n;
616         for (i=1; i<size; i++) {
617           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
618         }
619         ierr = PetscMalloc1(mmax*N,&work);CHKERRQ(ierr);
620 
621         /* write out local array, by rows */
622         m = mat->rmap->n;
623         v = a->v;
624         for (j=0; j<N; j++) {
625           for (i=0; i<m; i++) {
626             work[j + i*N] = *v++;
627           }
628         }
629         ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
630         /* get largest work array to receive messages from other processes, excludes process zero */
631         mmax = 0;
632         for (i=1; i<size; i++) {
633           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
634         }
635         ierr = PetscMalloc1(mmax*N,&vv);CHKERRQ(ierr);
636         for (k = 1; k < size; k++) {
637           v    = vv;
638           m    = mat->rmap->range[k+1] - mat->rmap->range[k];
639           ierr = MPIULong_Recv(v,m*N,MPIU_SCALAR,k,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
640 
641           for (j = 0; j < N; j++) {
642             for (i = 0; i < m; i++) {
643               work[j + i*N] = *v++;
644             }
645           }
646           ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
647         }
648         ierr = PetscFree(work);CHKERRQ(ierr);
649         ierr = PetscFree(vv);CHKERRQ(ierr);
650       } else {
651         ierr = MPIULong_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
652       }
653     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerSetFormat(viewer,PETSC_VIEWER_NATIVE)");
654   }
655   PetscFunctionReturn(0);
656 }
657 
658 extern PetscErrorCode MatView_SeqDense(Mat,PetscViewer);
659 #include <petscdraw.h>
660 #undef __FUNCT__
661 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket"
662 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
663 {
664   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
665   PetscErrorCode    ierr;
666   PetscMPIInt       rank = mdn->rank;
667   PetscViewerType   vtype;
668   PetscBool         iascii,isdraw;
669   PetscViewer       sviewer;
670   PetscViewerFormat format;
671 
672   PetscFunctionBegin;
673   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
674   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
675   if (iascii) {
676     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
677     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
678     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
679       MatInfo info;
680       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
681       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
682       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
683       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
684       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
685       ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
686       PetscFunctionReturn(0);
687     } else if (format == PETSC_VIEWER_ASCII_INFO) {
688       PetscFunctionReturn(0);
689     }
690   } else if (isdraw) {
691     PetscDraw draw;
692     PetscBool isnull;
693 
694     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
695     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
696     if (isnull) PetscFunctionReturn(0);
697   }
698 
699   {
700     /* assemble the entire matrix onto first processor. */
701     Mat         A;
702     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz;
703     PetscInt    *cols;
704     PetscScalar *vals;
705 
706     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
707     if (!rank) {
708       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
709     } else {
710       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
711     }
712     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
713     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
714     ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr);
715     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
716 
717     /* Copy the matrix ... This isn't the most efficient means,
718        but it's quick for now */
719     A->insertmode = INSERT_VALUES;
720 
721     row = mat->rmap->rstart;
722     m   = mdn->A->rmap->n;
723     for (i=0; i<m; i++) {
724       ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
725       ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
726       ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
727       row++;
728     }
729 
730     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
731     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
732     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
733     if (!rank) {
734         ierr = MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
735     }
736     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
737     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
738     ierr = MatDestroy(&A);CHKERRQ(ierr);
739   }
740   PetscFunctionReturn(0);
741 }
742 
743 #undef __FUNCT__
744 #define __FUNCT__ "MatView_MPIDense"
745 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
746 {
747   PetscErrorCode ierr;
748   PetscBool      iascii,isbinary,isdraw,issocket;
749 
750   PetscFunctionBegin;
751   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
752   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
753   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
754   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
755 
756   if (iascii || issocket || isdraw) {
757     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
758   } else if (isbinary) {
759     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
760   }
761   PetscFunctionReturn(0);
762 }
763 
764 #undef __FUNCT__
765 #define __FUNCT__ "MatGetInfo_MPIDense"
766 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
767 {
768   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
769   Mat            mdn  = mat->A;
770   PetscErrorCode ierr;
771   PetscReal      isend[5],irecv[5];
772 
773   PetscFunctionBegin;
774   info->block_size = 1.0;
775 
776   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
777 
778   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
779   isend[3] = info->memory;  isend[4] = info->mallocs;
780   if (flag == MAT_LOCAL) {
781     info->nz_used      = isend[0];
782     info->nz_allocated = isend[1];
783     info->nz_unneeded  = isend[2];
784     info->memory       = isend[3];
785     info->mallocs      = isend[4];
786   } else if (flag == MAT_GLOBAL_MAX) {
787     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
788 
789     info->nz_used      = irecv[0];
790     info->nz_allocated = irecv[1];
791     info->nz_unneeded  = irecv[2];
792     info->memory       = irecv[3];
793     info->mallocs      = irecv[4];
794   } else if (flag == MAT_GLOBAL_SUM) {
795     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
796 
797     info->nz_used      = irecv[0];
798     info->nz_allocated = irecv[1];
799     info->nz_unneeded  = irecv[2];
800     info->memory       = irecv[3];
801     info->mallocs      = irecv[4];
802   }
803   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
804   info->fill_ratio_needed = 0;
805   info->factor_mallocs    = 0;
806   PetscFunctionReturn(0);
807 }
808 
809 #undef __FUNCT__
810 #define __FUNCT__ "MatSetOption_MPIDense"
811 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg)
812 {
813   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
814   PetscErrorCode ierr;
815 
816   PetscFunctionBegin;
817   switch (op) {
818   case MAT_NEW_NONZERO_LOCATIONS:
819   case MAT_NEW_NONZERO_LOCATION_ERR:
820   case MAT_NEW_NONZERO_ALLOCATION_ERR:
821     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
822     break;
823   case MAT_ROW_ORIENTED:
824     a->roworiented = flg;
825 
826     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
827     break;
828   case MAT_NEW_DIAGONALS:
829   case MAT_KEEP_NONZERO_PATTERN:
830   case MAT_USE_HASH_TABLE:
831     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
832     break;
833   case MAT_IGNORE_OFF_PROC_ENTRIES:
834     a->donotstash = flg;
835     break;
836   case MAT_SYMMETRIC:
837   case MAT_STRUCTURALLY_SYMMETRIC:
838   case MAT_HERMITIAN:
839   case MAT_SYMMETRY_ETERNAL:
840   case MAT_IGNORE_LOWER_TRIANGULAR:
841     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
842     break;
843   default:
844     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
845   }
846   PetscFunctionReturn(0);
847 }
848 
849 
850 #undef __FUNCT__
851 #define __FUNCT__ "MatDiagonalScale_MPIDense"
852 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
853 {
854   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
855   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
856   PetscScalar    *l,*r,x,*v;
857   PetscErrorCode ierr;
858   PetscInt       i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n;
859 
860   PetscFunctionBegin;
861   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
862   if (ll) {
863     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
864     if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2);
865     ierr = VecGetArray(ll,&l);CHKERRQ(ierr);
866     for (i=0; i<m; i++) {
867       x = l[i];
868       v = mat->v + i;
869       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
870     }
871     ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr);
872     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
873   }
874   if (rr) {
875     ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr);
876     if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
877     ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
878     ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
879     ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
880     for (i=0; i<n; i++) {
881       x = r[i];
882       v = mat->v + i*m;
883       for (j=0; j<m; j++) (*v++) *= x;
884     }
885     ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr);
886     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
887   }
888   PetscFunctionReturn(0);
889 }
890 
891 #undef __FUNCT__
892 #define __FUNCT__ "MatNorm_MPIDense"
893 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
894 {
895   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
896   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
897   PetscErrorCode ierr;
898   PetscInt       i,j;
899   PetscReal      sum = 0.0;
900   PetscScalar    *v  = mat->v;
901 
902   PetscFunctionBegin;
903   if (mdn->size == 1) {
904     ierr =  MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
905   } else {
906     if (type == NORM_FROBENIUS) {
907       for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) {
908         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
909       }
910       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
911       *nrm = PetscSqrtReal(*nrm);
912       ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr);
913     } else if (type == NORM_1) {
914       PetscReal *tmp,*tmp2;
915       ierr = PetscMalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);CHKERRQ(ierr);
916       ierr = PetscMemzero(tmp,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
917       ierr = PetscMemzero(tmp2,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
918       *nrm = 0.0;
919       v    = mat->v;
920       for (j=0; j<mdn->A->cmap->n; j++) {
921         for (i=0; i<mdn->A->rmap->n; i++) {
922           tmp[j] += PetscAbsScalar(*v);  v++;
923         }
924       }
925       ierr = MPI_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
926       for (j=0; j<A->cmap->N; j++) {
927         if (tmp2[j] > *nrm) *nrm = tmp2[j];
928       }
929       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
930       ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr);
931     } else if (type == NORM_INFINITY) { /* max row norm */
932       PetscReal ntemp;
933       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
934       ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
935     } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm");
936   }
937   PetscFunctionReturn(0);
938 }
939 
940 #undef __FUNCT__
941 #define __FUNCT__ "MatTranspose_MPIDense"
942 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
943 {
944   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
945   Mat_SeqDense   *Aloc = (Mat_SeqDense*)a->A->data;
946   Mat            B;
947   PetscInt       M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart;
948   PetscErrorCode ierr;
949   PetscInt       j,i;
950   PetscScalar    *v;
951 
952   PetscFunctionBegin;
953   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports square matrix only in-place");
954   if (reuse == MAT_INITIAL_MATRIX || A == *matout) {
955     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
956     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
957     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
958     ierr = MatMPIDenseSetPreallocation(B,NULL);CHKERRQ(ierr);
959   } else {
960     B = *matout;
961   }
962 
963   m    = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v;
964   ierr = PetscMalloc1(m,&rwork);CHKERRQ(ierr);
965   for (i=0; i<m; i++) rwork[i] = rstart + i;
966   for (j=0; j<n; j++) {
967     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
968     v   += m;
969   }
970   ierr = PetscFree(rwork);CHKERRQ(ierr);
971   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
972   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
973   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
974     *matout = B;
975   } else {
976     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
977   }
978   PetscFunctionReturn(0);
979 }
980 
981 
982 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*);
983 extern PetscErrorCode MatScale_MPIDense(Mat,PetscScalar);
984 
985 #undef __FUNCT__
986 #define __FUNCT__ "MatSetUp_MPIDense"
987 PetscErrorCode MatSetUp_MPIDense(Mat A)
988 {
989   PetscErrorCode ierr;
990 
991   PetscFunctionBegin;
992   ierr =  MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
993   PetscFunctionReturn(0);
994 }
995 
996 #undef __FUNCT__
997 #define __FUNCT__ "MatAXPY_MPIDense"
998 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
999 {
1000   PetscErrorCode ierr;
1001   Mat_MPIDense   *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data;
1002 
1003   PetscFunctionBegin;
1004   ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr);
1005   ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
1006   PetscFunctionReturn(0);
1007 }
1008 
1009 #undef __FUNCT__
1010 #define __FUNCT__ "MatConjugate_MPIDense"
1011 PetscErrorCode  MatConjugate_MPIDense(Mat mat)
1012 {
1013   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;
1014   PetscErrorCode ierr;
1015 
1016   PetscFunctionBegin;
1017   ierr = MatConjugate(a->A);CHKERRQ(ierr);
1018   PetscFunctionReturn(0);
1019 }
1020 
1021 #undef __FUNCT__
1022 #define __FUNCT__ "MatRealPart_MPIDense"
1023 PetscErrorCode MatRealPart_MPIDense(Mat A)
1024 {
1025   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1026   PetscErrorCode ierr;
1027 
1028   PetscFunctionBegin;
1029   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1030   PetscFunctionReturn(0);
1031 }
1032 
1033 #undef __FUNCT__
1034 #define __FUNCT__ "MatImaginaryPart_MPIDense"
1035 PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
1036 {
1037   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1038   PetscErrorCode ierr;
1039 
1040   PetscFunctionBegin;
1041   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1042   PetscFunctionReturn(0);
1043 }
1044 
1045 extern PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*);
1046 #undef __FUNCT__
1047 #define __FUNCT__ "MatGetColumnNorms_MPIDense"
1048 PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms)
1049 {
1050   PetscErrorCode ierr;
1051   PetscInt       i,n;
1052   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
1053   PetscReal      *work;
1054 
1055   PetscFunctionBegin;
1056   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
1057   ierr = PetscMalloc1(n,&work);CHKERRQ(ierr);
1058   ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr);
1059   if (type == NORM_2) {
1060     for (i=0; i<n; i++) work[i] *= work[i];
1061   }
1062   if (type == NORM_INFINITY) {
1063     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
1064   } else {
1065     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
1066   }
1067   ierr = PetscFree(work);CHKERRQ(ierr);
1068   if (type == NORM_2) {
1069     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
1070   }
1071   PetscFunctionReturn(0);
1072 }
1073 
1074 #undef __FUNCT__
1075 #define __FUNCT__ "MatSetRandom_MPIDense"
1076 static PetscErrorCode  MatSetRandom_MPIDense(Mat x,PetscRandom rctx)
1077 {
1078   Mat_MPIDense   *d = (Mat_MPIDense*)x->data;
1079   PetscErrorCode ierr;
1080   PetscScalar    *a;
1081   PetscInt       m,n,i;
1082 
1083   PetscFunctionBegin;
1084   ierr = MatGetSize(d->A,&m,&n);CHKERRQ(ierr);
1085   ierr = MatDenseGetArray(d->A,&a);CHKERRQ(ierr);
1086   for (i=0; i<m*n; i++) {
1087     ierr = PetscRandomGetValue(rctx,a+i);CHKERRQ(ierr);
1088   }
1089   ierr = MatDenseRestoreArray(d->A,&a);CHKERRQ(ierr);
1090   PetscFunctionReturn(0);
1091 }
1092 
1093 extern PetscErrorCode MatMatMultNumeric_MPIDense(Mat A,Mat,Mat);
1094 
1095 #undef __FUNCT__
1096 #define __FUNCT__ "MatMissingDiagonal_MPIDense"
1097 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool  *missing,PetscInt *d)
1098 {
1099   PetscFunctionBegin;
1100   *missing = PETSC_FALSE;
1101   PetscFunctionReturn(0);
1102 }
1103 
1104 /* -------------------------------------------------------------------*/
1105 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense,
1106                                         MatGetRow_MPIDense,
1107                                         MatRestoreRow_MPIDense,
1108                                         MatMult_MPIDense,
1109                                 /*  4*/ MatMultAdd_MPIDense,
1110                                         MatMultTranspose_MPIDense,
1111                                         MatMultTransposeAdd_MPIDense,
1112                                         0,
1113                                         0,
1114                                         0,
1115                                 /* 10*/ 0,
1116                                         0,
1117                                         0,
1118                                         0,
1119                                         MatTranspose_MPIDense,
1120                                 /* 15*/ MatGetInfo_MPIDense,
1121                                         MatEqual_MPIDense,
1122                                         MatGetDiagonal_MPIDense,
1123                                         MatDiagonalScale_MPIDense,
1124                                         MatNorm_MPIDense,
1125                                 /* 20*/ MatAssemblyBegin_MPIDense,
1126                                         MatAssemblyEnd_MPIDense,
1127                                         MatSetOption_MPIDense,
1128                                         MatZeroEntries_MPIDense,
1129                                 /* 24*/ MatZeroRows_MPIDense,
1130                                         0,
1131                                         0,
1132                                         0,
1133                                         0,
1134                                 /* 29*/ MatSetUp_MPIDense,
1135                                         0,
1136                                         0,
1137                                         0,
1138                                         0,
1139                                 /* 34*/ MatDuplicate_MPIDense,
1140                                         0,
1141                                         0,
1142                                         0,
1143                                         0,
1144                                 /* 39*/ MatAXPY_MPIDense,
1145                                         MatGetSubMatrices_MPIDense,
1146                                         0,
1147                                         MatGetValues_MPIDense,
1148                                         0,
1149                                 /* 44*/ 0,
1150                                         MatScale_MPIDense,
1151                                         MatShift_Basic,
1152                                         0,
1153                                         0,
1154                                 /* 49*/ MatSetRandom_MPIDense,
1155                                         0,
1156                                         0,
1157                                         0,
1158                                         0,
1159                                 /* 54*/ 0,
1160                                         0,
1161                                         0,
1162                                         0,
1163                                         0,
1164                                 /* 59*/ MatGetSubMatrix_MPIDense,
1165                                         MatDestroy_MPIDense,
1166                                         MatView_MPIDense,
1167                                         0,
1168                                         0,
1169                                 /* 64*/ 0,
1170                                         0,
1171                                         0,
1172                                         0,
1173                                         0,
1174                                 /* 69*/ 0,
1175                                         0,
1176                                         0,
1177                                         0,
1178                                         0,
1179                                 /* 74*/ 0,
1180                                         0,
1181                                         0,
1182                                         0,
1183                                         0,
1184                                 /* 79*/ 0,
1185                                         0,
1186                                         0,
1187                                         0,
1188                                 /* 83*/ MatLoad_MPIDense,
1189                                         0,
1190                                         0,
1191                                         0,
1192                                         0,
1193                                         0,
1194 #if defined(PETSC_HAVE_ELEMENTAL)
1195                                 /* 89*/ MatMatMult_MPIDense_MPIDense,
1196                                         MatMatMultSymbolic_MPIDense_MPIDense,
1197 #else
1198                                 /* 89*/ 0,
1199                                         0,
1200 #endif
1201                                         MatMatMultNumeric_MPIDense,
1202                                         0,
1203                                         0,
1204                                 /* 94*/ 0,
1205                                         0,
1206                                         0,
1207                                         0,
1208                                         0,
1209                                 /* 99*/ 0,
1210                                         0,
1211                                         0,
1212                                         MatConjugate_MPIDense,
1213                                         0,
1214                                 /*104*/ 0,
1215                                         MatRealPart_MPIDense,
1216                                         MatImaginaryPart_MPIDense,
1217                                         0,
1218                                         0,
1219                                 /*109*/ 0,
1220                                         0,
1221                                         0,
1222                                         0,
1223                                         MatMissingDiagonal_MPIDense,
1224                                 /*114*/ 0,
1225                                         0,
1226                                         0,
1227                                         0,
1228                                         0,
1229                                 /*119*/ 0,
1230                                         0,
1231                                         0,
1232                                         0,
1233                                         0,
1234                                 /*124*/ 0,
1235                                         MatGetColumnNorms_MPIDense,
1236                                         0,
1237                                         0,
1238                                         0,
1239                                 /*129*/ 0,
1240                                         MatTransposeMatMult_MPIDense_MPIDense,
1241                                         MatTransposeMatMultSymbolic_MPIDense_MPIDense,
1242                                         MatTransposeMatMultNumeric_MPIDense_MPIDense,
1243                                         0,
1244                                 /*134*/ 0,
1245                                         0,
1246                                         0,
1247                                         0,
1248                                         0,
1249                                 /*139*/ 0,
1250                                         0,
1251                                         0
1252 };
1253 
1254 #undef __FUNCT__
1255 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense"
1256 PetscErrorCode  MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1257 {
1258   Mat_MPIDense   *a;
1259   PetscErrorCode ierr;
1260 
1261   PetscFunctionBegin;
1262   mat->preallocated = PETSC_TRUE;
1263   /* Note:  For now, when data is specified above, this assumes the user correctly
1264    allocates the local dense storage space.  We should add error checking. */
1265 
1266   a       = (Mat_MPIDense*)mat->data;
1267   ierr    = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
1268   ierr    = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
1269   a->nvec = mat->cmap->n;
1270 
1271   ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
1272   ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr);
1273   ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr);
1274   ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr);
1275   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
1276   PetscFunctionReturn(0);
1277 }
1278 
1279 #if defined(PETSC_HAVE_ELEMENTAL)
1280 #undef __FUNCT__
1281 #define __FUNCT__ "MatConvert_MPIDense_Elemental"
1282 PETSC_EXTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1283 {
1284   Mat            mat_elemental;
1285   PetscErrorCode ierr;
1286   PetscScalar    *array,*v_rowwise;
1287   PetscInt       m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,j,k,*rows,*cols;
1288 
1289   PetscFunctionBegin;
1290   ierr = PetscMalloc3(m*N,&v_rowwise,m,&rows,N,&cols);CHKERRQ(ierr);
1291   ierr = MatDenseGetArray(A,&array);CHKERRQ(ierr);
1292   /* convert column-wise array into row-wise v_rowwise, see MatSetValues_Elemental() */
1293   k = 0;
1294   for (j=0; j<N; j++) {
1295     cols[j] = j;
1296     for (i=0; i<m; i++) {
1297       v_rowwise[i*N+j] = array[k++];
1298     }
1299   }
1300   for (i=0; i<m; i++) {
1301     rows[i] = rstart + i;
1302   }
1303   ierr = MatDenseRestoreArray(A,&array);CHKERRQ(ierr);
1304 
1305   ierr = MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);CHKERRQ(ierr);
1306   ierr = MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1307   ierr = MatSetType(mat_elemental,MATELEMENTAL);CHKERRQ(ierr);
1308   ierr = MatSetUp(mat_elemental);CHKERRQ(ierr);
1309 
1310   /* PETSc-Elemental interaface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1311   ierr = MatSetValues(mat_elemental,m,rows,N,cols,v_rowwise,ADD_VALUES);CHKERRQ(ierr);
1312   ierr = MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1313   ierr = MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1314   ierr = PetscFree3(v_rowwise,rows,cols);CHKERRQ(ierr);
1315 
1316   if (reuse == MAT_INPLACE_MATRIX) {
1317     ierr = MatHeaderReplace(A,&mat_elemental);CHKERRQ(ierr);
1318   } else {
1319     *newmat = mat_elemental;
1320   }
1321   PetscFunctionReturn(0);
1322 }
1323 #endif
1324 
1325 #undef __FUNCT__
1326 #define __FUNCT__ "MatCreate_MPIDense"
1327 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat)
1328 {
1329   Mat_MPIDense   *a;
1330   PetscErrorCode ierr;
1331 
1332   PetscFunctionBegin;
1333   ierr      = PetscNewLog(mat,&a);CHKERRQ(ierr);
1334   mat->data = (void*)a;
1335   ierr      = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1336 
1337   mat->insertmode = NOT_SET_VALUES;
1338   ierr            = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&a->rank);CHKERRQ(ierr);
1339   ierr            = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&a->size);CHKERRQ(ierr);
1340 
1341   /* build cache for off array entries formed */
1342   a->donotstash = PETSC_FALSE;
1343 
1344   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);CHKERRQ(ierr);
1345 
1346   /* stuff used for matrix vector multiply */
1347   a->lvec        = 0;
1348   a->Mvctx       = 0;
1349   a->roworiented = PETSC_TRUE;
1350 
1351   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);CHKERRQ(ierr);
1352   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr);
1353 #if defined(PETSC_HAVE_ELEMENTAL)
1354   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental);CHKERRQ(ierr);
1355 #endif
1356   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr);
1357   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1358   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr);
1359   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1360   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1361 
1362   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",MatTransposeMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr);
1363   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",MatTransposeMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1364   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",MatTransposeMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1365   ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr);
1366   PetscFunctionReturn(0);
1367 }
1368 
1369 /*MC
1370    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1371 
1372    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1373    and MATMPIDENSE otherwise.
1374 
1375    Options Database Keys:
1376 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1377 
1378   Level: beginner
1379 
1380 
1381 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE
1382 M*/
1383 
1384 #undef __FUNCT__
1385 #define __FUNCT__ "MatMPIDenseSetPreallocation"
1386 /*@C
1387    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1388 
1389    Not collective
1390 
1391    Input Parameters:
1392 .  B - the matrix
1393 -  data - optional location of matrix data.  Set data=NULL for PETSc
1394    to control all matrix memory allocation.
1395 
1396    Notes:
1397    The dense format is fully compatible with standard Fortran 77
1398    storage by columns.
1399 
1400    The data input variable is intended primarily for Fortran programmers
1401    who wish to allocate their own matrix memory space.  Most users should
1402    set data=NULL.
1403 
1404    Level: intermediate
1405 
1406 .keywords: matrix,dense, parallel
1407 
1408 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1409 @*/
1410 PetscErrorCode  MatMPIDenseSetPreallocation(Mat B,PetscScalar *data)
1411 {
1412   PetscErrorCode ierr;
1413 
1414   PetscFunctionBegin;
1415   ierr = PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));CHKERRQ(ierr);
1416   PetscFunctionReturn(0);
1417 }
1418 
1419 #undef __FUNCT__
1420 #define __FUNCT__ "MatCreateDense"
1421 /*@C
1422    MatCreateDense - Creates a parallel matrix in dense format.
1423 
1424    Collective on MPI_Comm
1425 
1426    Input Parameters:
1427 +  comm - MPI communicator
1428 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1429 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1430 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1431 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1432 -  data - optional location of matrix data.  Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1433    to control all matrix memory allocation.
1434 
1435    Output Parameter:
1436 .  A - the matrix
1437 
1438    Notes:
1439    The dense format is fully compatible with standard Fortran 77
1440    storage by columns.
1441 
1442    The data input variable is intended primarily for Fortran programmers
1443    who wish to allocate their own matrix memory space.  Most users should
1444    set data=NULL (PETSC_NULL_SCALAR for Fortran users).
1445 
1446    The user MUST specify either the local or global matrix dimensions
1447    (possibly both).
1448 
1449    Level: intermediate
1450 
1451 .keywords: matrix,dense, parallel
1452 
1453 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1454 @*/
1455 PetscErrorCode  MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
1456 {
1457   PetscErrorCode ierr;
1458   PetscMPIInt    size;
1459 
1460   PetscFunctionBegin;
1461   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1462   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
1463   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1464   if (size > 1) {
1465     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
1466     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1467     if (data) {  /* user provided data array, so no need to assemble */
1468       ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr);
1469       (*A)->assembled = PETSC_TRUE;
1470     }
1471   } else {
1472     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
1473     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1474   }
1475   PetscFunctionReturn(0);
1476 }
1477 
1478 #undef __FUNCT__
1479 #define __FUNCT__ "MatDuplicate_MPIDense"
1480 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1481 {
1482   Mat            mat;
1483   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;
1484   PetscErrorCode ierr;
1485 
1486   PetscFunctionBegin;
1487   *newmat = 0;
1488   ierr    = MatCreate(PetscObjectComm((PetscObject)A),&mat);CHKERRQ(ierr);
1489   ierr    = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1490   ierr    = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr);
1491   a       = (Mat_MPIDense*)mat->data;
1492   ierr    = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1493 
1494   mat->factortype   = A->factortype;
1495   mat->assembled    = PETSC_TRUE;
1496   mat->preallocated = PETSC_TRUE;
1497 
1498   a->size         = oldmat->size;
1499   a->rank         = oldmat->rank;
1500   mat->insertmode = NOT_SET_VALUES;
1501   a->nvec         = oldmat->nvec;
1502   a->donotstash   = oldmat->donotstash;
1503 
1504   ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr);
1505   ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr);
1506 
1507   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
1508   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1509   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
1510 
1511   *newmat = mat;
1512   PetscFunctionReturn(0);
1513 }
1514 
1515 #undef __FUNCT__
1516 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile"
1517 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N,Mat newmat)
1518 {
1519   PetscErrorCode ierr;
1520   PetscMPIInt    rank,size;
1521   const PetscInt *rowners;
1522   PetscInt       i,m,n,nz,j,mMax;
1523   PetscScalar    *array,*vals,*vals_ptr;
1524   Mat_MPIDense   *a = (Mat_MPIDense*)newmat->data;
1525 
1526   PetscFunctionBegin;
1527   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1528   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1529 
1530   /* determine ownership of rows and columns */
1531   m = (newmat->rmap->n < 0) ? PETSC_DECIDE : newmat->rmap->n;
1532   n = (newmat->cmap->n < 0) ? PETSC_DECIDE : newmat->cmap->n;
1533 
1534   ierr = MatSetSizes(newmat,m,n,M,N);CHKERRQ(ierr);
1535   if (!a->A || !((Mat_SeqDense*)(a->A->data))->user_alloc) {
1536     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
1537   }
1538   ierr = MatDenseGetArray(newmat,&array);CHKERRQ(ierr);
1539   ierr = MatGetLocalSize(newmat,&m,NULL);CHKERRQ(ierr);
1540   ierr = MatGetOwnershipRanges(newmat,&rowners);CHKERRQ(ierr);
1541   ierr = MPI_Reduce(&m,&mMax,1,MPIU_INT,MPI_MAX,0,comm);CHKERRQ(ierr);
1542   if (!rank) {
1543     ierr = PetscMalloc1(mMax*N,&vals);CHKERRQ(ierr);
1544 
1545     /* read in my part of the matrix numerical values  */
1546     ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr);
1547 
1548     /* insert into matrix-by row (this is why cannot directly read into array */
1549     vals_ptr = vals;
1550     for (i=0; i<m; i++) {
1551       for (j=0; j<N; j++) {
1552         array[i + j*m] = *vals_ptr++;
1553       }
1554     }
1555 
1556     /* read in other processors and ship out */
1557     for (i=1; i<size; i++) {
1558       nz   = (rowners[i+1] - rowners[i])*N;
1559       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1560       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr);
1561     }
1562   } else {
1563     /* receive numeric values */
1564     ierr = PetscMalloc1(m*N,&vals);CHKERRQ(ierr);
1565 
1566     /* receive message of values*/
1567     ierr = MPIULong_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr);
1568 
1569     /* insert into matrix-by row (this is why cannot directly read into array */
1570     vals_ptr = vals;
1571     for (i=0; i<m; i++) {
1572       for (j=0; j<N; j++) {
1573         array[i + j*m] = *vals_ptr++;
1574       }
1575     }
1576   }
1577   ierr = MatDenseRestoreArray(newmat,&array);CHKERRQ(ierr);
1578   ierr = PetscFree(vals);CHKERRQ(ierr);
1579   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1580   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1581   PetscFunctionReturn(0);
1582 }
1583 
1584 #undef __FUNCT__
1585 #define __FUNCT__ "MatLoad_MPIDense"
1586 PetscErrorCode MatLoad_MPIDense(Mat newmat,PetscViewer viewer)
1587 {
1588   Mat_MPIDense   *a;
1589   PetscScalar    *vals,*svals;
1590   MPI_Comm       comm;
1591   MPI_Status     status;
1592   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,n,maxnz;
1593   PetscInt       header[4],*rowlengths = 0,M,N,*cols;
1594   PetscInt       *ourlens,*procsnz = 0,jj,*mycols,*smycols;
1595   PetscInt       i,nz,j,rstart,rend;
1596   int            fd;
1597   PetscErrorCode ierr;
1598 
1599   PetscFunctionBegin;
1600   /* force binary viewer to load .info file if it has not yet done so */
1601   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
1602   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
1603   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1604   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1605   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1606   if (!rank) {
1607     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
1608     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1609   }
1610   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
1611   M    = header[1]; N = header[2]; nz = header[3];
1612 
1613   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
1614   if (newmat->rmap->N < 0) newmat->rmap->N = M;
1615   if (newmat->cmap->N < 0) newmat->cmap->N = N;
1616 
1617   if (newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",M,newmat->rmap->N);
1618   if (newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",N,newmat->cmap->N);
1619 
1620   /*
1621        Handle case where matrix is stored on disk as a dense matrix
1622   */
1623   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1624     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);CHKERRQ(ierr);
1625     PetscFunctionReturn(0);
1626   }
1627 
1628   /* determine ownership of all rows */
1629   if (newmat->rmap->n < 0) {
1630     ierr = PetscMPIIntCast(M/size + ((M % size) > rank),&m);CHKERRQ(ierr);
1631   } else {
1632     ierr = PetscMPIIntCast(newmat->rmap->n,&m);CHKERRQ(ierr);
1633   }
1634   if (newmat->cmap->n < 0) {
1635     n = PETSC_DECIDE;
1636   } else {
1637     ierr = PetscMPIIntCast(newmat->cmap->n,&n);CHKERRQ(ierr);
1638   }
1639 
1640   ierr       = PetscMalloc1(size+2,&rowners);CHKERRQ(ierr);
1641   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1642   rowners[0] = 0;
1643   for (i=2; i<=size; i++) {
1644     rowners[i] += rowners[i-1];
1645   }
1646   rstart = rowners[rank];
1647   rend   = rowners[rank+1];
1648 
1649   /* distribute row lengths to all processors */
1650   ierr = PetscMalloc1(rend-rstart,&ourlens);CHKERRQ(ierr);
1651   if (!rank) {
1652     ierr = PetscMalloc1(M,&rowlengths);CHKERRQ(ierr);
1653     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1654     ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr);
1655     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1656     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1657     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1658   } else {
1659     ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1660   }
1661 
1662   if (!rank) {
1663     /* calculate the number of nonzeros on each processor */
1664     ierr = PetscMalloc1(size,&procsnz);CHKERRQ(ierr);
1665     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
1666     for (i=0; i<size; i++) {
1667       for (j=rowners[i]; j< rowners[i+1]; j++) {
1668         procsnz[i] += rowlengths[j];
1669       }
1670     }
1671     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1672 
1673     /* determine max buffer needed and allocate it */
1674     maxnz = 0;
1675     for (i=0; i<size; i++) {
1676       maxnz = PetscMax(maxnz,procsnz[i]);
1677     }
1678     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
1679 
1680     /* read in my part of the matrix column indices  */
1681     nz   = procsnz[0];
1682     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
1683     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1684 
1685     /* read in every one elses and ship off */
1686     for (i=1; i<size; i++) {
1687       nz   = procsnz[i];
1688       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1689       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
1690     }
1691     ierr = PetscFree(cols);CHKERRQ(ierr);
1692   } else {
1693     /* determine buffer space needed for message */
1694     nz = 0;
1695     for (i=0; i<m; i++) {
1696       nz += ourlens[i];
1697     }
1698     ierr = PetscMalloc1(nz+1,&mycols);CHKERRQ(ierr);
1699 
1700     /* receive message of column indices*/
1701     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
1702     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
1703     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1704   }
1705 
1706   ierr = MatSetSizes(newmat,m,n,M,N);CHKERRQ(ierr);
1707   a = (Mat_MPIDense*)newmat->data;
1708   if (!a->A || !((Mat_SeqDense*)(a->A->data))->user_alloc) {
1709     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
1710   }
1711 
1712   if (!rank) {
1713     ierr = PetscMalloc1(maxnz,&vals);CHKERRQ(ierr);
1714 
1715     /* read in my part of the matrix numerical values  */
1716     nz   = procsnz[0];
1717     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1718 
1719     /* insert into matrix */
1720     jj      = rstart;
1721     smycols = mycols;
1722     svals   = vals;
1723     for (i=0; i<m; i++) {
1724       ierr     = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1725       smycols += ourlens[i];
1726       svals   += ourlens[i];
1727       jj++;
1728     }
1729 
1730     /* read in other processors and ship out */
1731     for (i=1; i<size; i++) {
1732       nz   = procsnz[i];
1733       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1734       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
1735     }
1736     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1737   } else {
1738     /* receive numeric values */
1739     ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr);
1740 
1741     /* receive message of values*/
1742     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
1743     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1744     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1745 
1746     /* insert into matrix */
1747     jj      = rstart;
1748     smycols = mycols;
1749     svals   = vals;
1750     for (i=0; i<m; i++) {
1751       ierr     = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1752       smycols += ourlens[i];
1753       svals   += ourlens[i];
1754       jj++;
1755     }
1756   }
1757   ierr = PetscFree(ourlens);CHKERRQ(ierr);
1758   ierr = PetscFree(vals);CHKERRQ(ierr);
1759   ierr = PetscFree(mycols);CHKERRQ(ierr);
1760   ierr = PetscFree(rowners);CHKERRQ(ierr);
1761 
1762   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1763   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1764   PetscFunctionReturn(0);
1765 }
1766 
1767 #undef __FUNCT__
1768 #define __FUNCT__ "MatEqual_MPIDense"
1769 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool  *flag)
1770 {
1771   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
1772   Mat            a,b;
1773   PetscBool      flg;
1774   PetscErrorCode ierr;
1775 
1776   PetscFunctionBegin;
1777   a    = matA->A;
1778   b    = matB->A;
1779   ierr = MatEqual(a,b,&flg);CHKERRQ(ierr);
1780   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1781   PetscFunctionReturn(0);
1782 }
1783 
1784 #undef __FUNCT__
1785 #define __FUNCT__ "MatDestroy_MatTransMatMult_MPIDense_MPIDense"
1786 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(Mat A)
1787 {
1788   PetscErrorCode        ierr;
1789   Mat_MPIDense          *a = (Mat_MPIDense*)A->data;
1790   Mat_TransMatMultDense *atb = a->atbdense;
1791 
1792   PetscFunctionBegin;
1793   ierr = PetscFree3(atb->sendbuf,atb->atbarray,atb->recvcounts);CHKERRQ(ierr);
1794   ierr = (atb->destroy)(A);CHKERRQ(ierr);
1795   ierr = PetscFree(atb);CHKERRQ(ierr);
1796   PetscFunctionReturn(0);
1797 }
1798 
1799 #undef __FUNCT__
1800 #define __FUNCT__ "MatTransposeMatMultNumeric_MPIDense_MPIDense"
1801 PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
1802 {
1803   Mat_MPIDense   *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
1804   Mat_SeqDense   *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data;
1805   Mat_TransMatMultDense *atb = c->atbdense;
1806   PetscErrorCode ierr;
1807   MPI_Comm       comm;
1808   PetscMPIInt    rank,size,*recvcounts=atb->recvcounts;
1809   PetscScalar    *carray,*atbarray=atb->atbarray,*sendbuf=atb->sendbuf;
1810   PetscInt       i,cN=C->cmap->N,cM=C->rmap->N,proc,k,j;
1811   PetscScalar    _DOne=1.0,_DZero=0.0;
1812   PetscBLASInt   am,an,bn,aN;
1813   const PetscInt *ranges;
1814 
1815   PetscFunctionBegin;
1816   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1817   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1818   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1819 
1820   /* compute atbarray = aseq^T * bseq */
1821   ierr = PetscBLASIntCast(a->A->cmap->n,&an);CHKERRQ(ierr);
1822   ierr = PetscBLASIntCast(b->A->cmap->n,&bn);CHKERRQ(ierr);
1823   ierr = PetscBLASIntCast(a->A->rmap->n,&am);CHKERRQ(ierr);
1824   ierr = PetscBLASIntCast(A->cmap->N,&aN);CHKERRQ(ierr);
1825   PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&an,&bn,&am,&_DOne,aseq->v,&aseq->lda,bseq->v,&bseq->lda,&_DZero,atbarray,&aN));
1826 
1827   ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr);
1828   for (i=0; i<size; i++) recvcounts[i] = (ranges[i+1] - ranges[i])*cN;
1829 
1830   /* arrange atbarray into sendbuf */
1831   k = 0;
1832   for (proc=0; proc<size; proc++) {
1833     for (j=0; j<cN; j++) {
1834       for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*cM];
1835     }
1836   }
1837   /* sum all atbarray to local values of C */
1838   ierr = MatDenseGetArray(c->A,&carray);CHKERRQ(ierr);
1839   ierr = MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm);CHKERRQ(ierr);
1840   ierr = MatDenseRestoreArray(c->A,&carray);CHKERRQ(ierr);
1841   PetscFunctionReturn(0);
1842 }
1843 
1844 #undef __FUNCT__
1845 #define __FUNCT__ "MatTransposeMatMultSymbolic_MPIDense_MPIDense"
1846 PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C)
1847 {
1848   PetscErrorCode        ierr;
1849   Mat                   Cdense;
1850   MPI_Comm              comm;
1851   PetscMPIInt           size;
1852   PetscInt              cm=A->cmap->n,cM,cN=B->cmap->N;
1853   Mat_MPIDense          *c;
1854   Mat_TransMatMultDense *atb;
1855 
1856   PetscFunctionBegin;
1857   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1858   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
1859     SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
1860   }
1861 
1862   /* create matrix product Cdense */
1863   ierr = MatCreate(comm,&Cdense);CHKERRQ(ierr);
1864   ierr = MatSetSizes(Cdense,cm,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
1865   ierr = MatSetType(Cdense,MATMPIDENSE);CHKERRQ(ierr);
1866   ierr = MatMPIDenseSetPreallocation(Cdense,NULL);CHKERRQ(ierr);
1867   ierr = MatAssemblyBegin(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1868   ierr = MatAssemblyEnd(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1869   *C   = Cdense;
1870 
1871   /* create data structure for reuse Cdense */
1872   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1873   ierr = PetscNew(&atb);CHKERRQ(ierr);
1874   cM = Cdense->rmap->N;
1875   ierr = PetscMalloc3(cM*cN,&atb->sendbuf,cM*cN,&atb->atbarray,size,&atb->recvcounts);CHKERRQ(ierr);
1876 
1877   c                    = (Mat_MPIDense*)Cdense->data;
1878   c->atbdense          = atb;
1879   atb->destroy         = Cdense->ops->destroy;
1880   Cdense->ops->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
1881   PetscFunctionReturn(0);
1882 }
1883 
1884 #undef __FUNCT__
1885 #define __FUNCT__ "MatTransposeMatMult_MPIDense_MPIDense"
1886 PetscErrorCode MatTransposeMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
1887 {
1888   PetscErrorCode ierr;
1889 
1890   PetscFunctionBegin;
1891   if (scall == MAT_INITIAL_MATRIX) {
1892     ierr = MatTransposeMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr);
1893   }
1894   ierr = MatTransposeMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr);
1895   PetscFunctionReturn(0);
1896 }
1897 
1898 #undef __FUNCT__
1899 #define __FUNCT__ "MatDestroy_MatMatMult_MPIDense_MPIDense"
1900 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(Mat A)
1901 {
1902   PetscErrorCode   ierr;
1903   Mat_MPIDense     *a = (Mat_MPIDense*)A->data;
1904   Mat_MatMultDense *ab = a->abdense;
1905 
1906   PetscFunctionBegin;
1907   ierr = MatDestroy(&ab->Ce);CHKERRQ(ierr);
1908   ierr = MatDestroy(&ab->Ae);CHKERRQ(ierr);
1909   ierr = MatDestroy(&ab->Be);CHKERRQ(ierr);
1910 
1911   ierr = (ab->destroy)(A);CHKERRQ(ierr);
1912   ierr = PetscFree(ab);CHKERRQ(ierr);
1913   PetscFunctionReturn(0);
1914 }
1915 
1916 #if defined(PETSC_HAVE_ELEMENTAL)
1917 #undef __FUNCT__
1918 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIDense"
1919 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
1920 {
1921   PetscErrorCode   ierr;
1922   Mat              Cnew;
1923   Mat_MPIDense     *c=(Mat_MPIDense*)C->data;
1924   Mat_MatMultDense *ab=c->abdense;
1925 
1926   PetscFunctionBegin;
1927   ierr = MatDestroy(&ab->Ae);CHKERRQ(ierr);
1928   ierr = MatDestroy(&ab->Be);CHKERRQ(ierr);
1929 
1930   ierr = MatConvert(A,MATELEMENTAL,MAT_INITIAL_MATRIX, &ab->Ae);CHKERRQ(ierr);
1931   ierr = MatConvert(B,MATELEMENTAL,MAT_INITIAL_MATRIX, &ab->Be);CHKERRQ(ierr);
1932   ierr = MatMatMultNumeric(ab->Ae,ab->Be,ab->Ce);CHKERRQ(ierr);
1933 
1934   ierr = MatConvert(ab->Ce,MATMPIDENSE,MAT_INITIAL_MATRIX,&Cnew);CHKERRQ(ierr);
1935 
1936   C->ops->destroy = ab->destroy; /* prevent struct ab being destroyeed by MatHeaderReplace() */
1937   ierr = MatHeaderReplace(C,&Cnew);CHKERRQ(ierr);
1938   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
1939   C->ops->destroy        = MatDestroy_MatMatMult_MPIDense_MPIDense;
1940   c                      = (Mat_MPIDense*)C->data;
1941   c->abdense             = ab;
1942   PetscFunctionReturn(0);
1943 }
1944 
1945 #undef __FUNCT__
1946 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIDense"
1947 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C)
1948 {
1949   PetscErrorCode   ierr;
1950   MPI_Comm         comm;
1951   Mat              Ae,Be,Ce;
1952   Mat_MPIDense     *c;
1953   Mat_MatMultDense *ab;
1954 
1955   PetscFunctionBegin;
1956   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1957   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
1958     SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
1959   }
1960 
1961   /* convert A and B to Elemental matrices Ae and Be */
1962   ierr = MatConvert(A,MATELEMENTAL,MAT_INITIAL_MATRIX, &Ae);CHKERRQ(ierr);
1963   ierr = MatConvert(B,MATELEMENTAL,MAT_INITIAL_MATRIX, &Be);CHKERRQ(ierr);
1964 
1965   /* Ce = Ae*Be */
1966   ierr = MatMatMultSymbolic(Ae,Be,fill,&Ce);CHKERRQ(ierr);
1967   ierr = MatMatMultNumeric(Ae,Be,Ce);CHKERRQ(ierr);
1968 
1969   /* convert Ce to C */
1970   ierr = MatConvert(Ce,MATMPIDENSE,MAT_INITIAL_MATRIX,C);CHKERRQ(ierr);
1971 
1972   /* create data structure for reuse Cdense */
1973   ierr = PetscNew(&ab);CHKERRQ(ierr);
1974   c                  = (Mat_MPIDense*)(*C)->data;
1975   c->abdense         = ab;
1976 
1977   ab->Ae             = Ae;
1978   ab->Be             = Be;
1979   ab->Ce             = Ce;
1980   ab->destroy        = (*C)->ops->destroy;
1981   (*C)->ops->destroy        = MatDestroy_MatMatMult_MPIDense_MPIDense;
1982   (*C)->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
1983   PetscFunctionReturn(0);
1984 }
1985 
1986 #undef __FUNCT__
1987 #define __FUNCT__ "MatMatMult_MPIDense_MPIDense"
1988 PetscErrorCode MatMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
1989 {
1990   PetscErrorCode ierr;
1991 
1992   PetscFunctionBegin;
1993   if (scall == MAT_INITIAL_MATRIX) { /* simbolic product includes numeric product */
1994     ierr = MatMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr);
1995   } else {
1996     ierr = MatMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr);
1997   }
1998   PetscFunctionReturn(0);
1999 }
2000 #endif
2001