xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 89583661dbdda284bc23265230c2f308532cda40)
1 
2 /*
3    Basic functions for basic parallel dense matrices.
4 */
5 
6 
7 #include <../src/mat/impls/dense/mpi/mpidense.h>    /*I   "petscmat.h"  I*/
8 #include <../src/mat/impls/aij/mpi/mpiaij.h>
9 #include <petscblaslapack.h>
10 
11 /*@
12 
13       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
14               matrix that represents the operator. For sequential matrices it returns itself.
15 
16     Input Parameter:
17 .      A - the Seq or MPI dense matrix
18 
19     Output Parameter:
20 .      B - the inner matrix
21 
22     Level: intermediate
23 
24 @*/
25 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
26 {
27   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
28   PetscErrorCode ierr;
29   PetscBool      flg;
30 
31   PetscFunctionBegin;
32   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr);
33   if (flg) *B = mat->A;
34   else *B = A;
35   PetscFunctionReturn(0);
36 }
37 
38 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
39 {
40   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
41   PetscErrorCode ierr;
42   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
43 
44   PetscFunctionBegin;
45   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
46   lrow = row - rstart;
47   ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr);
48   PetscFunctionReturn(0);
49 }
50 
51 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
52 {
53   PetscErrorCode ierr;
54 
55   PetscFunctionBegin;
56   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
57   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
58   PetscFunctionReturn(0);
59 }
60 
61 PetscErrorCode  MatGetDiagonalBlock_MPIDense(Mat A,Mat *a)
62 {
63   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
64   PetscErrorCode ierr;
65   PetscInt       m = A->rmap->n,rstart = A->rmap->rstart;
66   PetscScalar    *array;
67   MPI_Comm       comm;
68   Mat            B;
69 
70   PetscFunctionBegin;
71   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported.");
72 
73   ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr);
74   if (!B) {
75     ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
76     ierr = MatCreate(comm,&B);CHKERRQ(ierr);
77     ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr);
78     ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr);
79     ierr = MatDenseGetArray(mdn->A,&array);CHKERRQ(ierr);
80     ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr);
81     ierr = MatDenseRestoreArray(mdn->A,&array);CHKERRQ(ierr);
82     ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
83     ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
84     ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr);
85     *a   = B;
86     ierr = MatDestroy(&B);CHKERRQ(ierr);
87   } else *a = B;
88   PetscFunctionReturn(0);
89 }
90 
91 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
92 {
93   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
94   PetscErrorCode ierr;
95   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
96   PetscBool      roworiented = A->roworiented;
97 
98   PetscFunctionBegin;
99   for (i=0; i<m; i++) {
100     if (idxm[i] < 0) continue;
101     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
102     if (idxm[i] >= rstart && idxm[i] < rend) {
103       row = idxm[i] - rstart;
104       if (roworiented) {
105         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
106       } else {
107         for (j=0; j<n; j++) {
108           if (idxn[j] < 0) continue;
109           if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
110           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
111         }
112       }
113     } else if (!A->donotstash) {
114       mat->assembled = PETSC_FALSE;
115       if (roworiented) {
116         ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
117       } else {
118         ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
119       }
120     }
121   }
122   PetscFunctionReturn(0);
123 }
124 
125 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
126 {
127   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
128   PetscErrorCode ierr;
129   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
130 
131   PetscFunctionBegin;
132   for (i=0; i<m; i++) {
133     if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
134     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
135     if (idxm[i] >= rstart && idxm[i] < rend) {
136       row = idxm[i] - rstart;
137       for (j=0; j<n; j++) {
138         if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
139         if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
140         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
141       }
142     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
143   }
144   PetscFunctionReturn(0);
145 }
146 
147 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A,PetscInt *lda)
148 {
149   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
150   PetscErrorCode ierr;
151 
152   PetscFunctionBegin;
153   ierr = MatDenseGetLDA(a->A,lda);CHKERRQ(ierr);
154   PetscFunctionReturn(0);
155 }
156 
157 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar *array[])
158 {
159   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
160   PetscErrorCode ierr;
161 
162   PetscFunctionBegin;
163   ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr);
164   PetscFunctionReturn(0);
165 }
166 
167 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A,const PetscScalar *array[])
168 {
169   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
170   PetscErrorCode ierr;
171 
172   PetscFunctionBegin;
173   ierr = MatDenseGetArrayRead(a->A,array);CHKERRQ(ierr);
174   PetscFunctionReturn(0);
175 }
176 
177 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A,const PetscScalar array[])
178 {
179   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
180   PetscErrorCode ierr;
181 
182   PetscFunctionBegin;
183   ierr = MatDensePlaceArray(a->A,array);CHKERRQ(ierr);
184   PetscFunctionReturn(0);
185 }
186 
187 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A)
188 {
189   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
190   PetscErrorCode ierr;
191 
192   PetscFunctionBegin;
193   ierr = MatDenseResetArray(a->A);CHKERRQ(ierr);
194   PetscFunctionReturn(0);
195 }
196 
197 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
198 {
199   Mat_MPIDense   *mat  = (Mat_MPIDense*)A->data,*newmatd;
200   Mat_SeqDense   *lmat = (Mat_SeqDense*)mat->A->data;
201   PetscErrorCode ierr;
202   PetscInt       i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols;
203   const PetscInt *irow,*icol;
204   PetscScalar    *av,*bv,*v = lmat->v;
205   Mat            newmat;
206   IS             iscol_local;
207   MPI_Comm       comm_is,comm_mat;
208 
209   PetscFunctionBegin;
210   ierr = PetscObjectGetComm((PetscObject)A,&comm_mat);CHKERRQ(ierr);
211   ierr = PetscObjectGetComm((PetscObject)iscol,&comm_is);CHKERRQ(ierr);
212   if (comm_mat != comm_is) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NOTSAMECOMM,"IS communicator must match matrix communicator");
213 
214   ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
215   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
216   ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr);
217   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
218   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
219   ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */
220 
221   /* No parallel redistribution currently supported! Should really check each index set
222      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
223      original matrix! */
224 
225   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
226   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
227 
228   /* Check submatrix call */
229   if (scall == MAT_REUSE_MATRIX) {
230     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
231     /* Really need to test rows and column sizes! */
232     newmat = *B;
233   } else {
234     /* Create and fill new matrix */
235     ierr = MatCreate(PetscObjectComm((PetscObject)A),&newmat);CHKERRQ(ierr);
236     ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
237     ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr);
238     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
239   }
240 
241   /* Now extract the data pointers and do the copy, column at a time */
242   newmatd = (Mat_MPIDense*)newmat->data;
243   bv      = ((Mat_SeqDense*)newmatd->A->data)->v;
244 
245   for (i=0; i<Ncols; i++) {
246     av = v + ((Mat_SeqDense*)mat->A->data)->lda*icol[i];
247     for (j=0; j<nrows; j++) {
248       *bv++ = av[irow[j] - rstart];
249     }
250   }
251 
252   /* Assemble the matrices so that the correct flags are set */
253   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
254   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
255 
256   /* Free work space */
257   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
258   ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr);
259   ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
260   *B   = newmat;
261   PetscFunctionReturn(0);
262 }
263 
264 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar *array[])
265 {
266   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
267   PetscErrorCode ierr;
268 
269   PetscFunctionBegin;
270   ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr);
271   PetscFunctionReturn(0);
272 }
273 
274 PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A,const PetscScalar *array[])
275 {
276   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
277   PetscErrorCode ierr;
278 
279   PetscFunctionBegin;
280   ierr = MatDenseRestoreArrayRead(a->A,array);CHKERRQ(ierr);
281   PetscFunctionReturn(0);
282 }
283 
284 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
285 {
286   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
287   MPI_Comm       comm;
288   PetscErrorCode ierr;
289   PetscInt       nstash,reallocs;
290   InsertMode     addv;
291 
292   PetscFunctionBegin;
293   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
294   /* make sure all processors are either in INSERTMODE or ADDMODE */
295   ierr = MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,comm);CHKERRQ(ierr);
296   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
297   mat->insertmode = addv; /* in case this processor had no cache */
298 
299   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
300   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
301   ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
302   PetscFunctionReturn(0);
303 }
304 
305 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
306 {
307   Mat_MPIDense   *mdn=(Mat_MPIDense*)mat->data;
308   PetscErrorCode ierr;
309   PetscInt       i,*row,*col,flg,j,rstart,ncols;
310   PetscMPIInt    n;
311   PetscScalar    *val;
312 
313   PetscFunctionBegin;
314   /*  wait on receives */
315   while (1) {
316     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
317     if (!flg) break;
318 
319     for (i=0; i<n;) {
320       /* Now identify the consecutive vals belonging to the same row */
321       for (j=i,rstart=row[j]; j<n; j++) {
322         if (row[j] != rstart) break;
323       }
324       if (j < n) ncols = j-i;
325       else       ncols = n-i;
326       /* Now assemble all these values with a single function call */
327       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
328       i    = j;
329     }
330   }
331   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
332 
333   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
334   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
335 
336   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
337     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
338   }
339   PetscFunctionReturn(0);
340 }
341 
342 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
343 {
344   PetscErrorCode ierr;
345   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
346 
347   PetscFunctionBegin;
348   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
349   PetscFunctionReturn(0);
350 }
351 
352 /* the code does not do the diagonal entries correctly unless the
353    matrix is square and the column and row owerships are identical.
354    This is a BUG. The only way to fix it seems to be to access
355    mdn->A and mdn->B directly and not through the MatZeroRows()
356    routine.
357 */
358 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
359 {
360   Mat_MPIDense      *l = (Mat_MPIDense*)A->data;
361   PetscErrorCode    ierr;
362   PetscInt          i,*owners = A->rmap->range;
363   PetscInt          *sizes,j,idx,nsends;
364   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
365   PetscInt          *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source;
366   PetscInt          *lens,*lrows,*values;
367   PetscMPIInt       n,imdex,rank = l->rank,size = l->size;
368   MPI_Comm          comm;
369   MPI_Request       *send_waits,*recv_waits;
370   MPI_Status        recv_status,*send_status;
371   PetscBool         found;
372   const PetscScalar *xx;
373   PetscScalar       *bb;
374 
375   PetscFunctionBegin;
376   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
377   if (A->rmap->N != A->cmap->N) SETERRQ(comm,PETSC_ERR_SUP,"Only handles square matrices");
378   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only handles matrices with identical column and row ownership");
379   /*  first count number of contributors to each processor */
380   ierr = PetscCalloc1(2*size,&sizes);CHKERRQ(ierr);
381   ierr = PetscMalloc1(N+1,&owner);CHKERRQ(ierr);  /* see note*/
382   for (i=0; i<N; i++) {
383     idx   = rows[i];
384     found = PETSC_FALSE;
385     for (j=0; j<size; j++) {
386       if (idx >= owners[j] && idx < owners[j+1]) {
387         sizes[2*j]++; sizes[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
388       }
389     }
390     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
391   }
392   nsends = 0;
393   for (i=0; i<size; i++) nsends += sizes[2*i+1];
394 
395   /* inform other processors of number of messages and max length*/
396   ierr = PetscMaxSum(comm,sizes,&nmax,&nrecvs);CHKERRQ(ierr);
397 
398   /* post receives:   */
399   ierr = PetscMalloc1((nrecvs+1)*(nmax+1),&rvalues);CHKERRQ(ierr);
400   ierr = PetscMalloc1(nrecvs+1,&recv_waits);CHKERRQ(ierr);
401   for (i=0; i<nrecvs; i++) {
402     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
403   }
404 
405   /* do sends:
406       1) starts[i] gives the starting index in svalues for stuff going to
407          the ith processor
408   */
409   ierr = PetscMalloc1(N+1,&svalues);CHKERRQ(ierr);
410   ierr = PetscMalloc1(nsends+1,&send_waits);CHKERRQ(ierr);
411   ierr = PetscMalloc1(size+1,&starts);CHKERRQ(ierr);
412 
413   starts[0] = 0;
414   for (i=1; i<size; i++) starts[i] = starts[i-1] + sizes[2*i-2];
415   for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i];
416 
417   starts[0] = 0;
418   for (i=1; i<size+1; i++) starts[i] = starts[i-1] + sizes[2*i-2];
419   count = 0;
420   for (i=0; i<size; i++) {
421     if (sizes[2*i+1]) {
422       ierr = MPI_Isend(svalues+starts[i],sizes[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
423     }
424   }
425   ierr = PetscFree(starts);CHKERRQ(ierr);
426 
427   base = owners[rank];
428 
429   /*  wait on receives */
430   ierr  = PetscMalloc2(nrecvs,&lens,nrecvs,&source);CHKERRQ(ierr);
431   count = nrecvs;
432   slen  = 0;
433   while (count) {
434     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
435     /* unpack receives into our local space */
436     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
437 
438     source[imdex] = recv_status.MPI_SOURCE;
439     lens[imdex]   = n;
440     slen += n;
441     count--;
442   }
443   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
444 
445   /* move the data into the send scatter */
446   ierr  = PetscMalloc1(slen+1,&lrows);CHKERRQ(ierr);
447   count = 0;
448   for (i=0; i<nrecvs; i++) {
449     values = rvalues + i*nmax;
450     for (j=0; j<lens[i]; j++) {
451       lrows[count++] = values[j] - base;
452     }
453   }
454   ierr = PetscFree(rvalues);CHKERRQ(ierr);
455   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
456   ierr = PetscFree(owner);CHKERRQ(ierr);
457   ierr = PetscFree(sizes);CHKERRQ(ierr);
458 
459   /* fix right hand side if needed */
460   if (x && b) {
461     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
462     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
463     for (i=0; i<slen; i++) {
464       bb[lrows[i]] = diag*xx[lrows[i]];
465     }
466     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
467     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
468   }
469 
470   /* actually zap the local rows */
471   ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
472   if (diag != 0.0) {
473     Mat_SeqDense *ll = (Mat_SeqDense*)l->A->data;
474     PetscInt     m   = ll->lda, i;
475 
476     for (i=0; i<slen; i++) {
477       ll->v[lrows[i] + m*(A->cmap->rstart + lrows[i])] = diag;
478     }
479   }
480   ierr = PetscFree(lrows);CHKERRQ(ierr);
481 
482   /* wait on sends */
483   if (nsends) {
484     ierr = PetscMalloc1(nsends,&send_status);CHKERRQ(ierr);
485     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
486     ierr = PetscFree(send_status);CHKERRQ(ierr);
487   }
488   ierr = PetscFree(send_waits);CHKERRQ(ierr);
489   ierr = PetscFree(svalues);CHKERRQ(ierr);
490   PetscFunctionReturn(0);
491 }
492 
493 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec);
494 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec);
495 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec);
496 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec);
497 
498 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
499 {
500   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
501   PetscErrorCode ierr;
502 
503   PetscFunctionBegin;
504   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
505   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
506   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
507   PetscFunctionReturn(0);
508 }
509 
510 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
511 {
512   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
513   PetscErrorCode ierr;
514 
515   PetscFunctionBegin;
516   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
517   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
518   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
519   PetscFunctionReturn(0);
520 }
521 
522 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
523 {
524   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
525   PetscErrorCode ierr;
526   PetscScalar    zero = 0.0;
527 
528   PetscFunctionBegin;
529   ierr = VecSet(yy,zero);CHKERRQ(ierr);
530   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
531   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
532   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
533   PetscFunctionReturn(0);
534 }
535 
536 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
537 {
538   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
539   PetscErrorCode ierr;
540 
541   PetscFunctionBegin;
542   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
543   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
544   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
545   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
546   PetscFunctionReturn(0);
547 }
548 
549 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
550 {
551   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
552   Mat_SeqDense   *aloc = (Mat_SeqDense*)a->A->data;
553   PetscErrorCode ierr;
554   PetscInt       len,i,n,m = A->rmap->n,radd;
555   PetscScalar    *x,zero = 0.0;
556 
557   PetscFunctionBegin;
558   ierr = VecSet(v,zero);CHKERRQ(ierr);
559   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
560   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
561   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
562   len  = PetscMin(a->A->rmap->n,a->A->cmap->n);
563   radd = A->rmap->rstart*m;
564   for (i=0; i<len; i++) {
565     x[i] = aloc->v[radd + i*m + i];
566   }
567   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
568   PetscFunctionReturn(0);
569 }
570 
571 PetscErrorCode MatDestroy_MPIDense(Mat mat)
572 {
573   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
574   PetscErrorCode ierr;
575 
576   PetscFunctionBegin;
577 #if defined(PETSC_USE_LOG)
578   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
579 #endif
580   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
581   ierr = MatDestroy(&mdn->A);CHKERRQ(ierr);
582   ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr);
583   ierr = VecScatterDestroy(&mdn->Mvctx);CHKERRQ(ierr);
584 
585   ierr = PetscFree(mat->data);CHKERRQ(ierr);
586   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
587 
588   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",NULL);CHKERRQ(ierr);
589   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL);CHKERRQ(ierr);
590   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL);CHKERRQ(ierr);
591   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",NULL);CHKERRQ(ierr);
592   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",NULL);CHKERRQ(ierr);
593   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",NULL);CHKERRQ(ierr);
594   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",NULL);CHKERRQ(ierr);
595 #if defined(PETSC_HAVE_ELEMENTAL)
596   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL);CHKERRQ(ierr);
597 #endif
598   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);CHKERRQ(ierr);
599   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
600   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
601   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
602   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
603   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
604   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
605   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",NULL);CHKERRQ(ierr);
606   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",NULL);CHKERRQ(ierr);
607   PetscFunctionReturn(0);
608 }
609 
610 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
611 {
612   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
613   PetscErrorCode    ierr;
614   PetscViewerFormat format;
615   int               fd;
616   PetscInt          header[4],mmax,N = mat->cmap->N,i,j,m,k;
617   PetscMPIInt       rank,tag  = ((PetscObject)viewer)->tag,size;
618   PetscScalar       *work,*v,*vv;
619   Mat_SeqDense      *a = (Mat_SeqDense*)mdn->A->data;
620 
621   PetscFunctionBegin;
622   if (mdn->size == 1) {
623     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
624   } else {
625     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
626     ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
627     ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
628 
629     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
630     if (format == PETSC_VIEWER_NATIVE) {
631 
632       if (!rank) {
633         /* store the matrix as a dense matrix */
634         header[0] = MAT_FILE_CLASSID;
635         header[1] = mat->rmap->N;
636         header[2] = N;
637         header[3] = MATRIX_BINARY_FORMAT_DENSE;
638         ierr      = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
639 
640         /* get largest work array needed for transposing array */
641         mmax = mat->rmap->n;
642         for (i=1; i<size; i++) {
643           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
644         }
645         ierr = PetscMalloc1(mmax*N,&work);CHKERRQ(ierr);
646 
647         /* write out local array, by rows */
648         m = mat->rmap->n;
649         v = a->v;
650         for (j=0; j<N; j++) {
651           for (i=0; i<m; i++) {
652             work[j + i*N] = *v++;
653           }
654         }
655         ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
656         /* get largest work array to receive messages from other processes, excludes process zero */
657         mmax = 0;
658         for (i=1; i<size; i++) {
659           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
660         }
661         ierr = PetscMalloc1(mmax*N,&vv);CHKERRQ(ierr);
662         for (k = 1; k < size; k++) {
663           v    = vv;
664           m    = mat->rmap->range[k+1] - mat->rmap->range[k];
665           ierr = MPIULong_Recv(v,m*N,MPIU_SCALAR,k,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
666 
667           for (j = 0; j < N; j++) {
668             for (i = 0; i < m; i++) {
669               work[j + i*N] = *v++;
670             }
671           }
672           ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
673         }
674         ierr = PetscFree(work);CHKERRQ(ierr);
675         ierr = PetscFree(vv);CHKERRQ(ierr);
676       } else {
677         ierr = MPIULong_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
678       }
679     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerPushFormat(viewer,PETSC_VIEWER_NATIVE)");
680   }
681   PetscFunctionReturn(0);
682 }
683 
684 extern PetscErrorCode MatView_SeqDense(Mat,PetscViewer);
685 #include <petscdraw.h>
686 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
687 {
688   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
689   PetscErrorCode    ierr;
690   PetscMPIInt       rank = mdn->rank;
691   PetscViewerType   vtype;
692   PetscBool         iascii,isdraw;
693   PetscViewer       sviewer;
694   PetscViewerFormat format;
695 
696   PetscFunctionBegin;
697   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
698   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
699   if (iascii) {
700     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
701     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
702     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
703       MatInfo info;
704       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
705       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
706       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
707       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
708       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
709       ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
710       PetscFunctionReturn(0);
711     } else if (format == PETSC_VIEWER_ASCII_INFO) {
712       PetscFunctionReturn(0);
713     }
714   } else if (isdraw) {
715     PetscDraw draw;
716     PetscBool isnull;
717 
718     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
719     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
720     if (isnull) PetscFunctionReturn(0);
721   }
722 
723   {
724     /* assemble the entire matrix onto first processor. */
725     Mat         A;
726     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz;
727     PetscInt    *cols;
728     PetscScalar *vals;
729 
730     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
731     if (!rank) {
732       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
733     } else {
734       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
735     }
736     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
737     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
738     ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr);
739     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
740 
741     /* Copy the matrix ... This isn't the most efficient means,
742        but it's quick for now */
743     A->insertmode = INSERT_VALUES;
744 
745     row = mat->rmap->rstart;
746     m   = mdn->A->rmap->n;
747     for (i=0; i<m; i++) {
748       ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
749       ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
750       ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
751       row++;
752     }
753 
754     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
755     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
756     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
757     if (!rank) {
758       ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
759       ierr = MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
760     }
761     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
762     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
763     ierr = MatDestroy(&A);CHKERRQ(ierr);
764   }
765   PetscFunctionReturn(0);
766 }
767 
768 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
769 {
770   PetscErrorCode ierr;
771   PetscBool      iascii,isbinary,isdraw,issocket;
772 
773   PetscFunctionBegin;
774   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
775   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
776   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
777   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
778 
779   if (iascii || issocket || isdraw) {
780     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
781   } else if (isbinary) {
782     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
783   }
784   PetscFunctionReturn(0);
785 }
786 
787 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
788 {
789   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
790   Mat            mdn  = mat->A;
791   PetscErrorCode ierr;
792   PetscReal      isend[5],irecv[5];
793 
794   PetscFunctionBegin;
795   info->block_size = 1.0;
796 
797   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
798 
799   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
800   isend[3] = info->memory;  isend[4] = info->mallocs;
801   if (flag == MAT_LOCAL) {
802     info->nz_used      = isend[0];
803     info->nz_allocated = isend[1];
804     info->nz_unneeded  = isend[2];
805     info->memory       = isend[3];
806     info->mallocs      = isend[4];
807   } else if (flag == MAT_GLOBAL_MAX) {
808     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
809 
810     info->nz_used      = irecv[0];
811     info->nz_allocated = irecv[1];
812     info->nz_unneeded  = irecv[2];
813     info->memory       = irecv[3];
814     info->mallocs      = irecv[4];
815   } else if (flag == MAT_GLOBAL_SUM) {
816     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
817 
818     info->nz_used      = irecv[0];
819     info->nz_allocated = irecv[1];
820     info->nz_unneeded  = irecv[2];
821     info->memory       = irecv[3];
822     info->mallocs      = irecv[4];
823   }
824   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
825   info->fill_ratio_needed = 0;
826   info->factor_mallocs    = 0;
827   PetscFunctionReturn(0);
828 }
829 
830 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg)
831 {
832   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
833   PetscErrorCode ierr;
834 
835   PetscFunctionBegin;
836   switch (op) {
837   case MAT_NEW_NONZERO_LOCATIONS:
838   case MAT_NEW_NONZERO_LOCATION_ERR:
839   case MAT_NEW_NONZERO_ALLOCATION_ERR:
840     MatCheckPreallocated(A,1);
841     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
842     break;
843   case MAT_ROW_ORIENTED:
844     MatCheckPreallocated(A,1);
845     a->roworiented = flg;
846     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
847     break;
848   case MAT_NEW_DIAGONALS:
849   case MAT_KEEP_NONZERO_PATTERN:
850   case MAT_USE_HASH_TABLE:
851     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
852     break;
853   case MAT_IGNORE_OFF_PROC_ENTRIES:
854     a->donotstash = flg;
855     break;
856   case MAT_SYMMETRIC:
857   case MAT_STRUCTURALLY_SYMMETRIC:
858   case MAT_HERMITIAN:
859   case MAT_SYMMETRY_ETERNAL:
860   case MAT_IGNORE_LOWER_TRIANGULAR:
861   case MAT_IGNORE_ZERO_ENTRIES:
862     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
863     break;
864   default:
865     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
866   }
867   PetscFunctionReturn(0);
868 }
869 
870 
871 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
872 {
873   Mat_MPIDense      *mdn = (Mat_MPIDense*)A->data;
874   Mat_SeqDense      *mat = (Mat_SeqDense*)mdn->A->data;
875   const PetscScalar *l,*r;
876   PetscScalar       x,*v;
877   PetscErrorCode    ierr;
878   PetscInt          i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n;
879 
880   PetscFunctionBegin;
881   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
882   if (ll) {
883     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
884     if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2);
885     ierr = VecGetArrayRead(ll,&l);CHKERRQ(ierr);
886     for (i=0; i<m; i++) {
887       x = l[i];
888       v = mat->v + i;
889       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
890     }
891     ierr = VecRestoreArrayRead(ll,&l);CHKERRQ(ierr);
892     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
893   }
894   if (rr) {
895     ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr);
896     if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
897     ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
898     ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
899     ierr = VecGetArrayRead(mdn->lvec,&r);CHKERRQ(ierr);
900     for (i=0; i<n; i++) {
901       x = r[i];
902       v = mat->v + i*m;
903       for (j=0; j<m; j++) (*v++) *= x;
904     }
905     ierr = VecRestoreArrayRead(mdn->lvec,&r);CHKERRQ(ierr);
906     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
907   }
908   PetscFunctionReturn(0);
909 }
910 
911 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
912 {
913   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
914   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
915   PetscErrorCode ierr;
916   PetscInt       i,j;
917   PetscReal      sum = 0.0;
918   PetscScalar    *v  = mat->v;
919 
920   PetscFunctionBegin;
921   if (mdn->size == 1) {
922     ierr =  MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
923   } else {
924     if (type == NORM_FROBENIUS) {
925       for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) {
926         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
927       }
928       ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
929       *nrm = PetscSqrtReal(*nrm);
930       ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr);
931     } else if (type == NORM_1) {
932       PetscReal *tmp,*tmp2;
933       ierr = PetscCalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);CHKERRQ(ierr);
934       *nrm = 0.0;
935       v    = mat->v;
936       for (j=0; j<mdn->A->cmap->n; j++) {
937         for (i=0; i<mdn->A->rmap->n; i++) {
938           tmp[j] += PetscAbsScalar(*v);  v++;
939         }
940       }
941       ierr = MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
942       for (j=0; j<A->cmap->N; j++) {
943         if (tmp2[j] > *nrm) *nrm = tmp2[j];
944       }
945       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
946       ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr);
947     } else if (type == NORM_INFINITY) { /* max row norm */
948       PetscReal ntemp;
949       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
950       ierr = MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
951     } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm");
952   }
953   PetscFunctionReturn(0);
954 }
955 
956 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
957 {
958   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
959   Mat_SeqDense   *Aloc = (Mat_SeqDense*)a->A->data;
960   Mat            B;
961   PetscInt       M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart;
962   PetscErrorCode ierr;
963   PetscInt       j,i;
964   PetscScalar    *v;
965 
966   PetscFunctionBegin;
967   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
968     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
969     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
970     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
971     ierr = MatMPIDenseSetPreallocation(B,NULL);CHKERRQ(ierr);
972   } else {
973     B = *matout;
974   }
975 
976   m    = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v;
977   ierr = PetscMalloc1(m,&rwork);CHKERRQ(ierr);
978   for (i=0; i<m; i++) rwork[i] = rstart + i;
979   for (j=0; j<n; j++) {
980     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
981     v   += m;
982   }
983   ierr = PetscFree(rwork);CHKERRQ(ierr);
984   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
985   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
986   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
987     *matout = B;
988   } else {
989     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
990   }
991   PetscFunctionReturn(0);
992 }
993 
994 
995 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*);
996 extern PetscErrorCode MatScale_MPIDense(Mat,PetscScalar);
997 
998 PetscErrorCode MatSetUp_MPIDense(Mat A)
999 {
1000   PetscErrorCode ierr;
1001 
1002   PetscFunctionBegin;
1003   ierr =  MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
1004   PetscFunctionReturn(0);
1005 }
1006 
1007 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
1008 {
1009   PetscErrorCode ierr;
1010   Mat_MPIDense   *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data;
1011 
1012   PetscFunctionBegin;
1013   ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr);
1014   ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
1015   PetscFunctionReturn(0);
1016 }
1017 
1018 PetscErrorCode  MatConjugate_MPIDense(Mat mat)
1019 {
1020   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;
1021   PetscErrorCode ierr;
1022 
1023   PetscFunctionBegin;
1024   ierr = MatConjugate(a->A);CHKERRQ(ierr);
1025   PetscFunctionReturn(0);
1026 }
1027 
1028 PetscErrorCode MatRealPart_MPIDense(Mat A)
1029 {
1030   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1031   PetscErrorCode ierr;
1032 
1033   PetscFunctionBegin;
1034   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1035   PetscFunctionReturn(0);
1036 }
1037 
1038 PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
1039 {
1040   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1041   PetscErrorCode ierr;
1042 
1043   PetscFunctionBegin;
1044   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1045   PetscFunctionReturn(0);
1046 }
1047 
1048 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A,Vec v,PetscInt col)
1049 {
1050   PetscErrorCode    ierr;
1051   PetscScalar       *x;
1052   const PetscScalar *a;
1053   PetscInt          lda;
1054 
1055   PetscFunctionBegin;
1056   if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1057   ierr = MatDenseGetLDA(A,&lda);CHKERRQ(ierr);
1058   ierr = MatDenseGetArrayRead(A,&a);CHKERRQ(ierr);
1059   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
1060   ierr = PetscArraycpy(x,a+col*lda,A->rmap->n);CHKERRQ(ierr);
1061   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
1062   ierr = MatDenseGetArrayRead(A,&a);CHKERRQ(ierr);
1063   PetscFunctionReturn(0);
1064 }
1065 
1066 extern PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*);
1067 PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms)
1068 {
1069   PetscErrorCode ierr;
1070   PetscInt       i,n;
1071   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
1072   PetscReal      *work;
1073 
1074   PetscFunctionBegin;
1075   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
1076   ierr = PetscMalloc1(n,&work);CHKERRQ(ierr);
1077   ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr);
1078   if (type == NORM_2) {
1079     for (i=0; i<n; i++) work[i] *= work[i];
1080   }
1081   if (type == NORM_INFINITY) {
1082     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
1083   } else {
1084     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
1085   }
1086   ierr = PetscFree(work);CHKERRQ(ierr);
1087   if (type == NORM_2) {
1088     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
1089   }
1090   PetscFunctionReturn(0);
1091 }
1092 
1093 static PetscErrorCode  MatSetRandom_MPIDense(Mat x,PetscRandom rctx)
1094 {
1095   Mat_MPIDense   *d = (Mat_MPIDense*)x->data;
1096   PetscErrorCode ierr;
1097   PetscScalar    *a;
1098   PetscInt       m,n,i;
1099 
1100   PetscFunctionBegin;
1101   ierr = MatGetSize(d->A,&m,&n);CHKERRQ(ierr);
1102   ierr = MatDenseGetArray(d->A,&a);CHKERRQ(ierr);
1103   for (i=0; i<m*n; i++) {
1104     ierr = PetscRandomGetValue(rctx,a+i);CHKERRQ(ierr);
1105   }
1106   ierr = MatDenseRestoreArray(d->A,&a);CHKERRQ(ierr);
1107   PetscFunctionReturn(0);
1108 }
1109 
1110 extern PetscErrorCode MatMatMultNumeric_MPIDense(Mat A,Mat,Mat);
1111 
1112 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool  *missing,PetscInt *d)
1113 {
1114   PetscFunctionBegin;
1115   *missing = PETSC_FALSE;
1116   PetscFunctionReturn(0);
1117 }
1118 
1119 static PetscErrorCode MatMatTransposeMult_MPIDense_MPIDense(Mat,Mat,MatReuse,PetscReal,Mat*);
1120 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat*);
1121 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat);
1122 
1123 /* -------------------------------------------------------------------*/
1124 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense,
1125                                         MatGetRow_MPIDense,
1126                                         MatRestoreRow_MPIDense,
1127                                         MatMult_MPIDense,
1128                                 /*  4*/ MatMultAdd_MPIDense,
1129                                         MatMultTranspose_MPIDense,
1130                                         MatMultTransposeAdd_MPIDense,
1131                                         0,
1132                                         0,
1133                                         0,
1134                                 /* 10*/ 0,
1135                                         0,
1136                                         0,
1137                                         0,
1138                                         MatTranspose_MPIDense,
1139                                 /* 15*/ MatGetInfo_MPIDense,
1140                                         MatEqual_MPIDense,
1141                                         MatGetDiagonal_MPIDense,
1142                                         MatDiagonalScale_MPIDense,
1143                                         MatNorm_MPIDense,
1144                                 /* 20*/ MatAssemblyBegin_MPIDense,
1145                                         MatAssemblyEnd_MPIDense,
1146                                         MatSetOption_MPIDense,
1147                                         MatZeroEntries_MPIDense,
1148                                 /* 24*/ MatZeroRows_MPIDense,
1149                                         0,
1150                                         0,
1151                                         0,
1152                                         0,
1153                                 /* 29*/ MatSetUp_MPIDense,
1154                                         0,
1155                                         0,
1156                                         MatGetDiagonalBlock_MPIDense,
1157                                         0,
1158                                 /* 34*/ MatDuplicate_MPIDense,
1159                                         0,
1160                                         0,
1161                                         0,
1162                                         0,
1163                                 /* 39*/ MatAXPY_MPIDense,
1164                                         MatCreateSubMatrices_MPIDense,
1165                                         0,
1166                                         MatGetValues_MPIDense,
1167                                         0,
1168                                 /* 44*/ 0,
1169                                         MatScale_MPIDense,
1170                                         MatShift_Basic,
1171                                         0,
1172                                         0,
1173                                 /* 49*/ MatSetRandom_MPIDense,
1174                                         0,
1175                                         0,
1176                                         0,
1177                                         0,
1178                                 /* 54*/ 0,
1179                                         0,
1180                                         0,
1181                                         0,
1182                                         0,
1183                                 /* 59*/ MatCreateSubMatrix_MPIDense,
1184                                         MatDestroy_MPIDense,
1185                                         MatView_MPIDense,
1186                                         0,
1187                                         0,
1188                                 /* 64*/ 0,
1189                                         0,
1190                                         0,
1191                                         0,
1192                                         0,
1193                                 /* 69*/ 0,
1194                                         0,
1195                                         0,
1196                                         0,
1197                                         0,
1198                                 /* 74*/ 0,
1199                                         0,
1200                                         0,
1201                                         0,
1202                                         0,
1203                                 /* 79*/ 0,
1204                                         0,
1205                                         0,
1206                                         0,
1207                                 /* 83*/ MatLoad_MPIDense,
1208                                         0,
1209                                         0,
1210                                         0,
1211                                         0,
1212                                         0,
1213 #if defined(PETSC_HAVE_ELEMENTAL)
1214                                 /* 89*/ MatMatMult_MPIDense_MPIDense,
1215                                         MatMatMultSymbolic_MPIDense_MPIDense,
1216 #else
1217                                 /* 89*/ 0,
1218                                         0,
1219 #endif
1220                                         MatMatMultNumeric_MPIDense,
1221                                         0,
1222                                         0,
1223                                 /* 94*/ 0,
1224                                         MatMatTransposeMult_MPIDense_MPIDense,
1225                                         MatMatTransposeMultSymbolic_MPIDense_MPIDense,
1226                                         MatMatTransposeMultNumeric_MPIDense_MPIDense,
1227                                         0,
1228                                 /* 99*/ 0,
1229                                         0,
1230                                         0,
1231                                         MatConjugate_MPIDense,
1232                                         0,
1233                                 /*104*/ 0,
1234                                         MatRealPart_MPIDense,
1235                                         MatImaginaryPart_MPIDense,
1236                                         0,
1237                                         0,
1238                                 /*109*/ 0,
1239                                         0,
1240                                         0,
1241                                         MatGetColumnVector_MPIDense,
1242                                         MatMissingDiagonal_MPIDense,
1243                                 /*114*/ 0,
1244                                         0,
1245                                         0,
1246                                         0,
1247                                         0,
1248                                 /*119*/ 0,
1249                                         0,
1250                                         0,
1251                                         0,
1252                                         0,
1253                                 /*124*/ 0,
1254                                         MatGetColumnNorms_MPIDense,
1255                                         0,
1256                                         0,
1257                                         0,
1258                                 /*129*/ 0,
1259                                         MatTransposeMatMult_MPIDense_MPIDense,
1260                                         MatTransposeMatMultSymbolic_MPIDense_MPIDense,
1261                                         MatTransposeMatMultNumeric_MPIDense_MPIDense,
1262                                         0,
1263                                 /*134*/ 0,
1264                                         0,
1265                                         0,
1266                                         0,
1267                                         0,
1268                                 /*139*/ 0,
1269                                         0,
1270                                         0,
1271                                         0,
1272                                         0,
1273                                 /*144*/ MatCreateMPIMatConcatenateSeqMat_MPIDense
1274 };
1275 
1276 PetscErrorCode  MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1277 {
1278   Mat_MPIDense   *a;
1279   PetscErrorCode ierr;
1280 
1281   PetscFunctionBegin;
1282   if (mat->preallocated) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Matrix has already been preallocated");
1283   mat->preallocated = PETSC_TRUE;
1284   /* Note:  For now, when data is specified above, this assumes the user correctly
1285    allocates the local dense storage space.  We should add error checking. */
1286 
1287   a       = (Mat_MPIDense*)mat->data;
1288   ierr    = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
1289   ierr    = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
1290   a->nvec = mat->cmap->n;
1291 
1292   ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
1293   ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr);
1294   ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr);
1295   ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr);
1296   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
1297   PetscFunctionReturn(0);
1298 }
1299 
1300 #if defined(PETSC_HAVE_ELEMENTAL)
1301 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1302 {
1303   Mat            mat_elemental;
1304   PetscErrorCode ierr;
1305   PetscScalar    *v;
1306   PetscInt       m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols;
1307 
1308   PetscFunctionBegin;
1309   if (reuse == MAT_REUSE_MATRIX) {
1310     mat_elemental = *newmat;
1311     ierr = MatZeroEntries(*newmat);CHKERRQ(ierr);
1312   } else {
1313     ierr = MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);CHKERRQ(ierr);
1314     ierr = MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1315     ierr = MatSetType(mat_elemental,MATELEMENTAL);CHKERRQ(ierr);
1316     ierr = MatSetUp(mat_elemental);CHKERRQ(ierr);
1317     ierr = MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
1318   }
1319 
1320   ierr = PetscMalloc2(m,&rows,N,&cols);CHKERRQ(ierr);
1321   for (i=0; i<N; i++) cols[i] = i;
1322   for (i=0; i<m; i++) rows[i] = rstart + i;
1323 
1324   /* PETSc-Elemental interaface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1325   ierr = MatDenseGetArray(A,&v);CHKERRQ(ierr);
1326   ierr = MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES);CHKERRQ(ierr);
1327   ierr = MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1328   ierr = MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1329   ierr = MatDenseRestoreArray(A,&v);CHKERRQ(ierr);
1330   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1331 
1332   if (reuse == MAT_INPLACE_MATRIX) {
1333     ierr = MatHeaderReplace(A,&mat_elemental);CHKERRQ(ierr);
1334   } else {
1335     *newmat = mat_elemental;
1336   }
1337   PetscFunctionReturn(0);
1338 }
1339 #endif
1340 
1341 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A,PetscInt col,PetscScalar **vals)
1342 {
1343   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
1344   PetscErrorCode ierr;
1345 
1346   PetscFunctionBegin;
1347   ierr = MatDenseGetColumn(mat->A,col,vals);CHKERRQ(ierr);
1348   PetscFunctionReturn(0);
1349 }
1350 
1351 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A,PetscScalar **vals)
1352 {
1353   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
1354   PetscErrorCode ierr;
1355 
1356   PetscFunctionBegin;
1357   ierr = MatDenseRestoreColumn(mat->A,vals);CHKERRQ(ierr);
1358   PetscFunctionReturn(0);
1359 }
1360 
1361 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
1362 {
1363   PetscErrorCode ierr;
1364   Mat_MPIDense   *mat;
1365   PetscInt       m,nloc,N;
1366 
1367   PetscFunctionBegin;
1368   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
1369   ierr = MatGetLocalSize(inmat,NULL,&nloc);CHKERRQ(ierr);
1370   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
1371     PetscInt sum;
1372 
1373     if (n == PETSC_DECIDE) {
1374       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
1375     }
1376     /* Check sum(n) = N */
1377     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
1378     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
1379 
1380     ierr = MatCreateDense(comm,m,n,PETSC_DETERMINE,N,NULL,outmat);CHKERRQ(ierr);
1381   }
1382 
1383   /* numeric phase */
1384   mat = (Mat_MPIDense*)(*outmat)->data;
1385   ierr = MatCopy(inmat,mat->A,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1386   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1387   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1388   PetscFunctionReturn(0);
1389 }
1390 
1391 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat)
1392 {
1393   Mat_MPIDense   *a;
1394   PetscErrorCode ierr;
1395 
1396   PetscFunctionBegin;
1397   ierr      = PetscNewLog(mat,&a);CHKERRQ(ierr);
1398   mat->data = (void*)a;
1399   ierr      = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1400 
1401   mat->insertmode = NOT_SET_VALUES;
1402   ierr            = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&a->rank);CHKERRQ(ierr);
1403   ierr            = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&a->size);CHKERRQ(ierr);
1404 
1405   /* build cache for off array entries formed */
1406   a->donotstash = PETSC_FALSE;
1407 
1408   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);CHKERRQ(ierr);
1409 
1410   /* stuff used for matrix vector multiply */
1411   a->lvec        = 0;
1412   a->Mvctx       = 0;
1413   a->roworiented = PETSC_TRUE;
1414 
1415   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",MatDenseGetLDA_MPIDense);CHKERRQ(ierr);
1416   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);CHKERRQ(ierr);
1417   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr);
1418   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",MatDenseGetArrayRead_MPIDense);CHKERRQ(ierr);
1419   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",MatDenseRestoreArrayRead_MPIDense);CHKERRQ(ierr);
1420   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",MatDensePlaceArray_MPIDense);CHKERRQ(ierr);
1421   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",MatDenseResetArray_MPIDense);CHKERRQ(ierr);
1422 #if defined(PETSC_HAVE_ELEMENTAL)
1423   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental);CHKERRQ(ierr);
1424 #endif
1425   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1426   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr);
1427   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1428   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1429 
1430   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",MatTransposeMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr);
1431   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",MatTransposeMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1432   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",MatTransposeMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1433   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",MatDenseGetColumn_MPIDense);CHKERRQ(ierr);
1434   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",MatDenseRestoreColumn_MPIDense);CHKERRQ(ierr);
1435   ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr);
1436   PetscFunctionReturn(0);
1437 }
1438 
1439 /*MC
1440    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1441 
1442    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1443    and MATMPIDENSE otherwise.
1444 
1445    Options Database Keys:
1446 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1447 
1448   Level: beginner
1449 
1450 
1451 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE
1452 M*/
1453 
1454 /*@C
1455    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1456 
1457    Not collective
1458 
1459    Input Parameters:
1460 .  B - the matrix
1461 -  data - optional location of matrix data.  Set data=NULL for PETSc
1462    to control all matrix memory allocation.
1463 
1464    Notes:
1465    The dense format is fully compatible with standard Fortran 77
1466    storage by columns.
1467 
1468    The data input variable is intended primarily for Fortran programmers
1469    who wish to allocate their own matrix memory space.  Most users should
1470    set data=NULL.
1471 
1472    Level: intermediate
1473 
1474 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1475 @*/
1476 PetscErrorCode  MatMPIDenseSetPreallocation(Mat B,PetscScalar *data)
1477 {
1478   PetscErrorCode ierr;
1479 
1480   PetscFunctionBegin;
1481   ierr = PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));CHKERRQ(ierr);
1482   PetscFunctionReturn(0);
1483 }
1484 
1485 /*@
1486    MatDensePlaceArray - Allows one to replace the array in a dense array with an
1487    array provided by the user. This is useful to avoid copying an array
1488    into a matrix
1489 
1490    Not Collective
1491 
1492    Input Parameters:
1493 +  mat - the matrix
1494 -  array - the array in column major order
1495 
1496    Notes:
1497    You can return to the original array with a call to MatDenseResetArray(). The user is responsible for freeing this array; it will not be
1498    freed when the matrix is destroyed.
1499 
1500    Level: developer
1501 
1502 .seealso: MatDenseGetArray(), MatDenseResetArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()
1503 
1504 @*/
1505 PetscErrorCode  MatDensePlaceArray(Mat mat,const PetscScalar array[])
1506 {
1507   PetscErrorCode ierr;
1508   PetscFunctionBegin;
1509   ierr = PetscUseMethod(mat,"MatDensePlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr);
1510   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
1511   PetscFunctionReturn(0);
1512 }
1513 
1514 /*@
1515    MatDenseResetArray - Resets the matrix array to that it previously had before the call to MatDensePlaceArray()
1516 
1517    Not Collective
1518 
1519    Input Parameters:
1520 .  mat - the matrix
1521 
1522    Notes:
1523    You can only call this after a call to MatDensePlaceArray()
1524 
1525    Level: developer
1526 
1527 .seealso: MatDenseGetArray(), MatDensePlaceArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()
1528 
1529 @*/
1530 PetscErrorCode  MatDenseResetArray(Mat mat)
1531 {
1532   PetscErrorCode ierr;
1533   PetscFunctionBegin;
1534   ierr = PetscUseMethod(mat,"MatDenseResetArray_C",(Mat),(mat));CHKERRQ(ierr);
1535   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
1536   PetscFunctionReturn(0);
1537 }
1538 
1539 /*@C
1540    MatCreateDense - Creates a parallel matrix in dense format.
1541 
1542    Collective
1543 
1544    Input Parameters:
1545 +  comm - MPI communicator
1546 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1547 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1548 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1549 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1550 -  data - optional location of matrix data.  Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1551    to control all matrix memory allocation.
1552 
1553    Output Parameter:
1554 .  A - the matrix
1555 
1556    Notes:
1557    The dense format is fully compatible with standard Fortran 77
1558    storage by columns.
1559 
1560    The data input variable is intended primarily for Fortran programmers
1561    who wish to allocate their own matrix memory space.  Most users should
1562    set data=NULL (PETSC_NULL_SCALAR for Fortran users).
1563 
1564    The user MUST specify either the local or global matrix dimensions
1565    (possibly both).
1566 
1567    Level: intermediate
1568 
1569 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1570 @*/
1571 PetscErrorCode  MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
1572 {
1573   PetscErrorCode ierr;
1574   PetscMPIInt    size;
1575 
1576   PetscFunctionBegin;
1577   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1578   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
1579   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1580   if (size > 1) {
1581     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
1582     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1583     if (data) {  /* user provided data array, so no need to assemble */
1584       ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr);
1585       (*A)->assembled = PETSC_TRUE;
1586     }
1587   } else {
1588     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
1589     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1590   }
1591   PetscFunctionReturn(0);
1592 }
1593 
1594 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1595 {
1596   Mat            mat;
1597   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;
1598   PetscErrorCode ierr;
1599 
1600   PetscFunctionBegin;
1601   *newmat = 0;
1602   ierr    = MatCreate(PetscObjectComm((PetscObject)A),&mat);CHKERRQ(ierr);
1603   ierr    = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1604   ierr    = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr);
1605   a       = (Mat_MPIDense*)mat->data;
1606 
1607   mat->factortype   = A->factortype;
1608   mat->assembled    = PETSC_TRUE;
1609   mat->preallocated = PETSC_TRUE;
1610 
1611   a->size         = oldmat->size;
1612   a->rank         = oldmat->rank;
1613   mat->insertmode = NOT_SET_VALUES;
1614   a->nvec         = oldmat->nvec;
1615   a->donotstash   = oldmat->donotstash;
1616 
1617   ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr);
1618   ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr);
1619 
1620   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
1621   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1622   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
1623 
1624   *newmat = mat;
1625   PetscFunctionReturn(0);
1626 }
1627 
1628 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N,Mat newmat)
1629 {
1630   PetscErrorCode ierr;
1631   PetscMPIInt    rank,size;
1632   const PetscInt *rowners;
1633   PetscInt       i,m,n,nz,j,mMax;
1634   PetscScalar    *array,*vals,*vals_ptr;
1635   Mat_MPIDense   *a = (Mat_MPIDense*)newmat->data;
1636 
1637   PetscFunctionBegin;
1638   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1639   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1640 
1641   /* determine ownership of rows and columns */
1642   m = (newmat->rmap->n < 0) ? PETSC_DECIDE : newmat->rmap->n;
1643   n = (newmat->cmap->n < 0) ? PETSC_DECIDE : newmat->cmap->n;
1644 
1645   ierr = MatSetSizes(newmat,m,n,M,N);CHKERRQ(ierr);
1646   if (!a->A) {
1647     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
1648   }
1649   ierr = MatDenseGetArray(newmat,&array);CHKERRQ(ierr);
1650   ierr = MatGetLocalSize(newmat,&m,NULL);CHKERRQ(ierr);
1651   ierr = MatGetOwnershipRanges(newmat,&rowners);CHKERRQ(ierr);
1652   ierr = MPI_Reduce(&m,&mMax,1,MPIU_INT,MPI_MAX,0,comm);CHKERRQ(ierr);
1653   if (!rank) {
1654     ierr = PetscMalloc1(mMax*N,&vals);CHKERRQ(ierr);
1655 
1656     /* read in my part of the matrix numerical values  */
1657     ierr = PetscBinaryRead(fd,vals,m*N,NULL,PETSC_SCALAR);CHKERRQ(ierr);
1658 
1659     /* insert into matrix-by row (this is why cannot directly read into array */
1660     vals_ptr = vals;
1661     for (i=0; i<m; i++) {
1662       for (j=0; j<N; j++) {
1663         array[i + j*m] = *vals_ptr++;
1664       }
1665     }
1666 
1667     /* read in other processors and ship out */
1668     for (i=1; i<size; i++) {
1669       nz   = (rowners[i+1] - rowners[i])*N;
1670       ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr);
1671       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr);
1672     }
1673   } else {
1674     /* receive numeric values */
1675     ierr = PetscMalloc1(m*N,&vals);CHKERRQ(ierr);
1676 
1677     /* receive message of values*/
1678     ierr = MPIULong_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr);
1679 
1680     /* insert into matrix-by row (this is why cannot directly read into array */
1681     vals_ptr = vals;
1682     for (i=0; i<m; i++) {
1683       for (j=0; j<N; j++) {
1684         array[i + j*m] = *vals_ptr++;
1685       }
1686     }
1687   }
1688   ierr = MatDenseRestoreArray(newmat,&array);CHKERRQ(ierr);
1689   ierr = PetscFree(vals);CHKERRQ(ierr);
1690   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1691   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1692   PetscFunctionReturn(0);
1693 }
1694 
1695 PetscErrorCode MatLoad_MPIDense(Mat newmat,PetscViewer viewer)
1696 {
1697   Mat_MPIDense   *a;
1698   PetscScalar    *vals,*svals;
1699   MPI_Comm       comm;
1700   MPI_Status     status;
1701   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,n,maxnz;
1702   PetscInt       header[4],*rowlengths = 0,M,N,*cols;
1703   PetscInt       *ourlens,*procsnz = 0,jj,*mycols,*smycols;
1704   PetscInt       i,nz,j,rstart,rend;
1705   int            fd;
1706   PetscBool      isbinary;
1707   PetscErrorCode ierr;
1708 
1709   PetscFunctionBegin;
1710   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1711   if (!isbinary) SETERRQ2(PetscObjectComm((PetscObject)newmat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newmat)->type_name);
1712 
1713   /* force binary viewer to load .info file if it has not yet done so */
1714   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
1715   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
1716   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1717   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1718   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1719   if (!rank) {
1720     ierr = PetscBinaryRead(fd,(char*)header,4,NULL,PETSC_INT);CHKERRQ(ierr);
1721     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1722   }
1723   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
1724   M    = header[1]; N = header[2]; nz = header[3];
1725 
1726   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
1727   if (newmat->rmap->N < 0) newmat->rmap->N = M;
1728   if (newmat->cmap->N < 0) newmat->cmap->N = N;
1729 
1730   if (newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",M,newmat->rmap->N);
1731   if (newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",N,newmat->cmap->N);
1732 
1733   /*
1734        Handle case where matrix is stored on disk as a dense matrix
1735   */
1736   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1737     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);CHKERRQ(ierr);
1738     PetscFunctionReturn(0);
1739   }
1740 
1741   /* determine ownership of all rows */
1742   if (newmat->rmap->n < 0) {
1743     ierr = PetscMPIIntCast(M/size + ((M % size) > rank),&m);CHKERRQ(ierr);
1744   } else {
1745     ierr = PetscMPIIntCast(newmat->rmap->n,&m);CHKERRQ(ierr);
1746   }
1747   if (newmat->cmap->n < 0) {
1748     n = PETSC_DECIDE;
1749   } else {
1750     ierr = PetscMPIIntCast(newmat->cmap->n,&n);CHKERRQ(ierr);
1751   }
1752 
1753   ierr       = PetscMalloc1(size+2,&rowners);CHKERRQ(ierr);
1754   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1755   rowners[0] = 0;
1756   for (i=2; i<=size; i++) {
1757     rowners[i] += rowners[i-1];
1758   }
1759   rstart = rowners[rank];
1760   rend   = rowners[rank+1];
1761 
1762   /* distribute row lengths to all processors */
1763   ierr = PetscMalloc1(rend-rstart,&ourlens);CHKERRQ(ierr);
1764   if (!rank) {
1765     ierr = PetscMalloc1(M,&rowlengths);CHKERRQ(ierr);
1766     ierr = PetscBinaryRead(fd,rowlengths,M,NULL,PETSC_INT);CHKERRQ(ierr);
1767     ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr);
1768     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1769     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1770     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1771   } else {
1772     ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1773   }
1774 
1775   if (!rank) {
1776     /* calculate the number of nonzeros on each processor */
1777     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
1778     for (i=0; i<size; i++) {
1779       for (j=rowners[i]; j< rowners[i+1]; j++) {
1780         procsnz[i] += rowlengths[j];
1781       }
1782     }
1783     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1784 
1785     /* determine max buffer needed and allocate it */
1786     maxnz = 0;
1787     for (i=0; i<size; i++) {
1788       maxnz = PetscMax(maxnz,procsnz[i]);
1789     }
1790     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
1791 
1792     /* read in my part of the matrix column indices  */
1793     nz   = procsnz[0];
1794     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
1795     ierr = PetscBinaryRead(fd,mycols,nz,NULL,PETSC_INT);CHKERRQ(ierr);
1796 
1797     /* read in every one elses and ship off */
1798     for (i=1; i<size; i++) {
1799       nz   = procsnz[i];
1800       ierr = PetscBinaryRead(fd,cols,nz,NULL,PETSC_INT);CHKERRQ(ierr);
1801       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
1802     }
1803     ierr = PetscFree(cols);CHKERRQ(ierr);
1804   } else {
1805     /* determine buffer space needed for message */
1806     nz = 0;
1807     for (i=0; i<m; i++) {
1808       nz += ourlens[i];
1809     }
1810     ierr = PetscMalloc1(nz+1,&mycols);CHKERRQ(ierr);
1811 
1812     /* receive message of column indices*/
1813     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
1814     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
1815     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1816   }
1817 
1818   ierr = MatSetSizes(newmat,m,n,M,N);CHKERRQ(ierr);
1819   a = (Mat_MPIDense*)newmat->data;
1820   if (!a->A) {
1821     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
1822   }
1823 
1824   if (!rank) {
1825     ierr = PetscMalloc1(maxnz,&vals);CHKERRQ(ierr);
1826 
1827     /* read in my part of the matrix numerical values  */
1828     nz   = procsnz[0];
1829     ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr);
1830 
1831     /* insert into matrix */
1832     jj      = rstart;
1833     smycols = mycols;
1834     svals   = vals;
1835     for (i=0; i<m; i++) {
1836       ierr     = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1837       smycols += ourlens[i];
1838       svals   += ourlens[i];
1839       jj++;
1840     }
1841 
1842     /* read in other processors and ship out */
1843     for (i=1; i<size; i++) {
1844       nz   = procsnz[i];
1845       ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr);
1846       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
1847     }
1848     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1849   } else {
1850     /* receive numeric values */
1851     ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr);
1852 
1853     /* receive message of values*/
1854     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
1855     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1856     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1857 
1858     /* insert into matrix */
1859     jj      = rstart;
1860     smycols = mycols;
1861     svals   = vals;
1862     for (i=0; i<m; i++) {
1863       ierr     = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1864       smycols += ourlens[i];
1865       svals   += ourlens[i];
1866       jj++;
1867     }
1868   }
1869   ierr = PetscFree(ourlens);CHKERRQ(ierr);
1870   ierr = PetscFree(vals);CHKERRQ(ierr);
1871   ierr = PetscFree(mycols);CHKERRQ(ierr);
1872   ierr = PetscFree(rowners);CHKERRQ(ierr);
1873 
1874   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1875   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1876   PetscFunctionReturn(0);
1877 }
1878 
1879 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool  *flag)
1880 {
1881   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
1882   Mat            a,b;
1883   PetscBool      flg;
1884   PetscErrorCode ierr;
1885 
1886   PetscFunctionBegin;
1887   a    = matA->A;
1888   b    = matB->A;
1889   ierr = MatEqual(a,b,&flg);CHKERRQ(ierr);
1890   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1891   PetscFunctionReturn(0);
1892 }
1893 
1894 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(Mat A)
1895 {
1896   PetscErrorCode        ierr;
1897   Mat_MPIDense          *a = (Mat_MPIDense*)A->data;
1898   Mat_TransMatMultDense *atb = a->atbdense;
1899 
1900   PetscFunctionBegin;
1901   ierr = PetscFree3(atb->sendbuf,atb->atbarray,atb->recvcounts);CHKERRQ(ierr);
1902   ierr = (atb->destroy)(A);CHKERRQ(ierr);
1903   ierr = PetscFree(atb);CHKERRQ(ierr);
1904   PetscFunctionReturn(0);
1905 }
1906 
1907 PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(Mat A)
1908 {
1909   PetscErrorCode        ierr;
1910   Mat_MPIDense          *a = (Mat_MPIDense*)A->data;
1911   Mat_MatTransMultDense *abt = a->abtdense;
1912 
1913   PetscFunctionBegin;
1914   ierr = PetscFree2(abt->buf[0],abt->buf[1]);CHKERRQ(ierr);
1915   ierr = PetscFree2(abt->recvcounts,abt->recvdispls);CHKERRQ(ierr);
1916   ierr = (abt->destroy)(A);CHKERRQ(ierr);
1917   ierr = PetscFree(abt);CHKERRQ(ierr);
1918   PetscFunctionReturn(0);
1919 }
1920 
1921 PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
1922 {
1923   Mat_MPIDense   *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
1924   Mat_SeqDense   *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data;
1925   Mat_TransMatMultDense *atb = c->atbdense;
1926   PetscErrorCode ierr;
1927   MPI_Comm       comm;
1928   PetscMPIInt    rank,size,*recvcounts=atb->recvcounts;
1929   PetscScalar    *carray,*atbarray=atb->atbarray,*sendbuf=atb->sendbuf;
1930   PetscInt       i,cN=C->cmap->N,cM=C->rmap->N,proc,k,j;
1931   PetscScalar    _DOne=1.0,_DZero=0.0;
1932   PetscBLASInt   am,an,bn,aN;
1933   const PetscInt *ranges;
1934 
1935   PetscFunctionBegin;
1936   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1937   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1938   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1939 
1940   /* compute atbarray = aseq^T * bseq */
1941   ierr = PetscBLASIntCast(a->A->cmap->n,&an);CHKERRQ(ierr);
1942   ierr = PetscBLASIntCast(b->A->cmap->n,&bn);CHKERRQ(ierr);
1943   ierr = PetscBLASIntCast(a->A->rmap->n,&am);CHKERRQ(ierr);
1944   ierr = PetscBLASIntCast(A->cmap->N,&aN);CHKERRQ(ierr);
1945   PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&an,&bn,&am,&_DOne,aseq->v,&aseq->lda,bseq->v,&bseq->lda,&_DZero,atbarray,&aN));
1946 
1947   ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr);
1948   for (i=0; i<size; i++) recvcounts[i] = (ranges[i+1] - ranges[i])*cN;
1949 
1950   /* arrange atbarray into sendbuf */
1951   k = 0;
1952   for (proc=0; proc<size; proc++) {
1953     for (j=0; j<cN; j++) {
1954       for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*cM];
1955     }
1956   }
1957   /* sum all atbarray to local values of C */
1958   ierr = MatDenseGetArray(c->A,&carray);CHKERRQ(ierr);
1959   ierr = MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm);CHKERRQ(ierr);
1960   ierr = MatDenseRestoreArray(c->A,&carray);CHKERRQ(ierr);
1961   PetscFunctionReturn(0);
1962 }
1963 
1964 PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C)
1965 {
1966   PetscErrorCode        ierr;
1967   Mat                   Cdense;
1968   MPI_Comm              comm;
1969   PetscMPIInt           size;
1970   PetscInt              cm=A->cmap->n,cM,cN=B->cmap->N;
1971   Mat_MPIDense          *c;
1972   Mat_TransMatMultDense *atb;
1973 
1974   PetscFunctionBegin;
1975   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1976   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
1977     SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
1978   }
1979 
1980   /* create matrix product Cdense */
1981   ierr = MatCreate(comm,&Cdense);CHKERRQ(ierr);
1982   ierr = MatSetSizes(Cdense,cm,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
1983   ierr = MatSetType(Cdense,MATMPIDENSE);CHKERRQ(ierr);
1984   ierr = MatMPIDenseSetPreallocation(Cdense,NULL);CHKERRQ(ierr);
1985   ierr = MatAssemblyBegin(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1986   ierr = MatAssemblyEnd(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1987   *C   = Cdense;
1988 
1989   /* create data structure for reuse Cdense */
1990   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1991   ierr = PetscNew(&atb);CHKERRQ(ierr);
1992   cM = Cdense->rmap->N;
1993   ierr = PetscMalloc3(cM*cN,&atb->sendbuf,cM*cN,&atb->atbarray,size,&atb->recvcounts);CHKERRQ(ierr);
1994 
1995   c                    = (Mat_MPIDense*)Cdense->data;
1996   c->atbdense          = atb;
1997   atb->destroy         = Cdense->ops->destroy;
1998   Cdense->ops->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
1999   PetscFunctionReturn(0);
2000 }
2001 
2002 PetscErrorCode MatTransposeMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
2003 {
2004   PetscErrorCode ierr;
2005 
2006   PetscFunctionBegin;
2007   if (scall == MAT_INITIAL_MATRIX) {
2008     ierr = MatTransposeMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr);
2009   }
2010   ierr = MatTransposeMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr);
2011   PetscFunctionReturn(0);
2012 }
2013 
2014 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat *C)
2015 {
2016   PetscErrorCode        ierr;
2017   Mat                   Cdense;
2018   MPI_Comm              comm;
2019   PetscMPIInt           i, size;
2020   PetscInt              maxRows, bufsiz;
2021   Mat_MPIDense          *c;
2022   PetscMPIInt           tag;
2023   const char            *algTypes[2] = {"allgatherv","cyclic"};
2024   PetscInt              alg, nalg = 2;
2025   Mat_MatTransMultDense *abt;
2026 
2027   PetscFunctionBegin;
2028   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2029   alg = 0; /* default is allgatherv */
2030   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"MatMatTransposeMult","Mat");CHKERRQ(ierr);
2031   ierr = PetscOptionsEList("-matmattransmult_mpidense_mpidense_via","Algorithmic approach","MatMatTransposeMult",algTypes,nalg,algTypes[0],&alg,NULL);CHKERRQ(ierr);
2032   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2033   if (A->cmap->N != B->cmap->N) {
2034     SETERRQ2(comm,PETSC_ERR_ARG_SIZ,"Matrix global column dimensions are incompatible, A (%D) != B (%D)",A->cmap->N,B->cmap->N);
2035   }
2036 
2037   /* create matrix product Cdense */
2038   ierr = MatCreate(comm,&Cdense);CHKERRQ(ierr);
2039   ierr = MatSetSizes(Cdense,A->rmap->n,B->rmap->n,A->rmap->N,B->rmap->N);CHKERRQ(ierr);
2040   ierr = MatSetType(Cdense,MATMPIDENSE);CHKERRQ(ierr);
2041   ierr = MatMPIDenseSetPreallocation(Cdense,NULL);CHKERRQ(ierr);
2042   ierr = PetscObjectGetNewTag((PetscObject)Cdense, &tag);CHKERRQ(ierr);
2043   ierr = MatAssemblyBegin(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2044   ierr = MatAssemblyEnd(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2045   *C   = Cdense;
2046 
2047   /* create data structure for reuse Cdense */
2048   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2049   ierr = PetscNew(&abt);CHKERRQ(ierr);
2050   abt->tag = tag;
2051   abt->alg = alg;
2052   switch (alg) {
2053   case 1:
2054     for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i]));
2055     bufsiz = A->cmap->N * maxRows;
2056     ierr = PetscMalloc2(bufsiz,&(abt->buf[0]),bufsiz,&(abt->buf[1]));CHKERRQ(ierr);
2057     break;
2058   default:
2059     ierr = PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1]));CHKERRQ(ierr);
2060     ierr = PetscMalloc2(size,&(abt->recvcounts),size+1,&(abt->recvdispls));CHKERRQ(ierr);
2061     for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N;
2062     for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i];
2063     break;
2064   }
2065 
2066   c                    = (Mat_MPIDense*)Cdense->data;
2067   c->abtdense          = abt;
2068   abt->destroy         = Cdense->ops->destroy;
2069   Cdense->ops->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense;
2070   PetscFunctionReturn(0);
2071 }
2072 
2073 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C)
2074 {
2075   Mat_MPIDense   *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2076   Mat_SeqDense   *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data;
2077   Mat_SeqDense   *cseq=(Mat_SeqDense*)(c->A)->data;
2078   Mat_MatTransMultDense *abt = c->abtdense;
2079   PetscErrorCode ierr;
2080   MPI_Comm       comm;
2081   PetscMPIInt    rank,size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom;
2082   PetscScalar    *sendbuf, *recvbuf=0, *carray;
2083   PetscInt       i,cK=A->cmap->N,k,j,bn;
2084   PetscScalar    _DOne=1.0,_DZero=0.0;
2085   PetscBLASInt   cm, cn, ck;
2086   MPI_Request    reqs[2];
2087   const PetscInt *ranges;
2088 
2089   PetscFunctionBegin;
2090   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2091   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2092   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2093 
2094   ierr = MatGetOwnershipRanges(B,&ranges);CHKERRQ(ierr);
2095   bn = B->rmap->n;
2096   if (bseq->lda == bn) {
2097     sendbuf = bseq->v;
2098   } else {
2099     sendbuf = abt->buf[0];
2100     for (k = 0, i = 0; i < cK; i++) {
2101       for (j = 0; j < bn; j++, k++) {
2102         sendbuf[k] = bseq->v[i * bseq->lda + j];
2103       }
2104     }
2105   }
2106   if (size > 1) {
2107     sendto = (rank + size - 1) % size;
2108     recvfrom = (rank + size + 1) % size;
2109   } else {
2110     sendto = recvfrom = 0;
2111   }
2112   ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr);
2113   ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr);
2114   recvisfrom = rank;
2115   for (i = 0; i < size; i++) {
2116     /* we have finished receiving in sending, bufs can be read/modified */
2117     PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */
2118     PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom];
2119 
2120     if (nextrecvisfrom != rank) {
2121       /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */
2122       sendsiz = cK * bn;
2123       recvsiz = cK * nextbn;
2124       recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1];
2125       ierr = MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]);CHKERRQ(ierr);
2126       ierr = MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]);CHKERRQ(ierr);
2127     }
2128 
2129     /* local aseq * sendbuf^T */
2130     ierr = PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn);CHKERRQ(ierr);
2131     carray = &cseq->v[cseq->lda * ranges[recvisfrom]];
2132     PetscStackCallBLAS("BLASgemm",BLASgemm_("N","T",&cm,&cn,&ck,&_DOne,aseq->v,&aseq->lda,sendbuf,&cn,&_DZero,carray,&cseq->lda));
2133 
2134     if (nextrecvisfrom != rank) {
2135       /* wait for the sends and receives to complete, swap sendbuf and recvbuf */
2136       ierr = MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2137     }
2138     bn = nextbn;
2139     recvisfrom = nextrecvisfrom;
2140     sendbuf = recvbuf;
2141   }
2142   PetscFunctionReturn(0);
2143 }
2144 
2145 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C)
2146 {
2147   Mat_MPIDense   *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2148   Mat_SeqDense   *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data;
2149   Mat_SeqDense   *cseq=(Mat_SeqDense*)(c->A)->data;
2150   Mat_MatTransMultDense *abt = c->abtdense;
2151   PetscErrorCode ierr;
2152   MPI_Comm       comm;
2153   PetscMPIInt    rank,size;
2154   PetscScalar    *sendbuf, *recvbuf;
2155   PetscInt       i,cK=A->cmap->N,k,j,bn;
2156   PetscScalar    _DOne=1.0,_DZero=0.0;
2157   PetscBLASInt   cm, cn, ck;
2158 
2159   PetscFunctionBegin;
2160   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2161   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2162   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2163 
2164   /* copy transpose of B into buf[0] */
2165   bn      = B->rmap->n;
2166   sendbuf = abt->buf[0];
2167   recvbuf = abt->buf[1];
2168   for (k = 0, j = 0; j < bn; j++) {
2169     for (i = 0; i < cK; i++, k++) {
2170       sendbuf[k] = bseq->v[i * bseq->lda + j];
2171     }
2172   }
2173   ierr = MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm);CHKERRQ(ierr);
2174   ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr);
2175   ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr);
2176   ierr = PetscBLASIntCast(c->A->cmap->n,&cn);CHKERRQ(ierr);
2177   PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&cm,&cn,&ck,&_DOne,aseq->v,&aseq->lda,recvbuf,&ck,&_DZero,cseq->v,&cseq->lda));
2178   PetscFunctionReturn(0);
2179 }
2180 
2181 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2182 {
2183   Mat_MPIDense   *c=(Mat_MPIDense*)C->data;
2184   Mat_MatTransMultDense *abt = c->abtdense;
2185   PetscErrorCode ierr;
2186 
2187   PetscFunctionBegin;
2188   switch (abt->alg) {
2189   case 1:
2190     ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C);CHKERRQ(ierr);
2191     break;
2192   default:
2193     ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C);CHKERRQ(ierr);
2194     break;
2195   }
2196   PetscFunctionReturn(0);
2197 }
2198 
2199 static PetscErrorCode MatMatTransposeMult_MPIDense_MPIDense(Mat A,Mat B, MatReuse scall, PetscReal fill, Mat *C)
2200 {
2201   PetscErrorCode ierr;
2202 
2203   PetscFunctionBegin;
2204   if (scall == MAT_INITIAL_MATRIX) {
2205     ierr = MatMatTransposeMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr);
2206   }
2207   ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr);
2208   PetscFunctionReturn(0);
2209 }
2210 
2211 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(Mat A)
2212 {
2213   PetscErrorCode   ierr;
2214   Mat_MPIDense     *a = (Mat_MPIDense*)A->data;
2215   Mat_MatMultDense *ab = a->abdense;
2216 
2217   PetscFunctionBegin;
2218   ierr = MatDestroy(&ab->Ce);CHKERRQ(ierr);
2219   ierr = MatDestroy(&ab->Ae);CHKERRQ(ierr);
2220   ierr = MatDestroy(&ab->Be);CHKERRQ(ierr);
2221 
2222   ierr = (ab->destroy)(A);CHKERRQ(ierr);
2223   ierr = PetscFree(ab);CHKERRQ(ierr);
2224   PetscFunctionReturn(0);
2225 }
2226 
2227 #if defined(PETSC_HAVE_ELEMENTAL)
2228 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
2229 {
2230   PetscErrorCode   ierr;
2231   Mat_MPIDense     *c=(Mat_MPIDense*)C->data;
2232   Mat_MatMultDense *ab=c->abdense;
2233 
2234   PetscFunctionBegin;
2235   ierr = MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae);CHKERRQ(ierr);
2236   ierr = MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be);CHKERRQ(ierr);
2237   ierr = MatMatMultNumeric(ab->Ae,ab->Be,ab->Ce);CHKERRQ(ierr);
2238   ierr = MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
2239   PetscFunctionReturn(0);
2240 }
2241 
2242 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C)
2243 {
2244   PetscErrorCode   ierr;
2245   Mat              Ae,Be,Ce;
2246   Mat_MPIDense     *c;
2247   Mat_MatMultDense *ab;
2248 
2249   PetscFunctionBegin;
2250   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
2251     SETERRQ4(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2252   }
2253 
2254   /* convert A and B to Elemental matrices Ae and Be */
2255   ierr = MatConvert(A,MATELEMENTAL,MAT_INITIAL_MATRIX, &Ae);CHKERRQ(ierr);
2256   ierr = MatConvert(B,MATELEMENTAL,MAT_INITIAL_MATRIX, &Be);CHKERRQ(ierr);
2257 
2258   /* Ce = Ae*Be */
2259   ierr = MatMatMultSymbolic(Ae,Be,fill,&Ce);CHKERRQ(ierr);
2260   ierr = MatMatMultNumeric(Ae,Be,Ce);CHKERRQ(ierr);
2261 
2262   /* convert Ce to C */
2263   ierr = MatConvert(Ce,MATMPIDENSE,MAT_INITIAL_MATRIX,C);CHKERRQ(ierr);
2264 
2265   /* create data structure for reuse Cdense */
2266   ierr = PetscNew(&ab);CHKERRQ(ierr);
2267   c                  = (Mat_MPIDense*)(*C)->data;
2268   c->abdense         = ab;
2269 
2270   ab->Ae             = Ae;
2271   ab->Be             = Be;
2272   ab->Ce             = Ce;
2273   ab->destroy        = (*C)->ops->destroy;
2274   (*C)->ops->destroy        = MatDestroy_MatMatMult_MPIDense_MPIDense;
2275   (*C)->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
2276   PetscFunctionReturn(0);
2277 }
2278 
2279 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
2280 {
2281   PetscErrorCode ierr;
2282 
2283   PetscFunctionBegin;
2284   if (scall == MAT_INITIAL_MATRIX) { /* simbolic product includes numeric product */
2285     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
2286     ierr = MatMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr);
2287     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
2288   } else {
2289     ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
2290     ierr = MatMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr);
2291     ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
2292   }
2293   PetscFunctionReturn(0);
2294 }
2295 #endif
2296 
2297