xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 87d5ce666d0f801879d7583bcc51c9796b13afff)
1 
2 /*
3    Basic functions for basic parallel dense matrices.
4 */
5 
6 
7 #include <../src/mat/impls/dense/mpi/mpidense.h>    /*I   "petscmat.h"  I*/
8 #include <../src/mat/impls/aij/mpi/mpiaij.h>
9 #include <petscblaslapack.h>
10 
11 /*@
12 
13       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
14               matrix that represents the operator. For sequential matrices it returns itself.
15 
16     Input Parameter:
17 .      A - the Seq or MPI dense matrix
18 
19     Output Parameter:
20 .      B - the inner matrix
21 
22     Level: intermediate
23 
24 @*/
25 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
26 {
27   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
28   PetscErrorCode ierr;
29   PetscBool      flg;
30 
31   PetscFunctionBegin;
32   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr);
33   if (flg) *B = mat->A;
34   else *B = A;
35   PetscFunctionReturn(0);
36 }
37 
38 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
39 {
40   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
41   PetscErrorCode ierr;
42   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
43 
44   PetscFunctionBegin;
45   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
46   lrow = row - rstart;
47   ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr);
48   PetscFunctionReturn(0);
49 }
50 
51 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
52 {
53   PetscErrorCode ierr;
54 
55   PetscFunctionBegin;
56   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
57   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
58   PetscFunctionReturn(0);
59 }
60 
61 PetscErrorCode  MatGetDiagonalBlock_MPIDense(Mat A,Mat *a)
62 {
63   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
64   PetscErrorCode ierr;
65   PetscInt       m = A->rmap->n,rstart = A->rmap->rstart;
66   PetscScalar    *array;
67   MPI_Comm       comm;
68   Mat            B;
69 
70   PetscFunctionBegin;
71   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported.");
72 
73   ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr);
74   if (!B) {
75     ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
76     ierr = MatCreate(comm,&B);CHKERRQ(ierr);
77     ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr);
78     ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr);
79     ierr = MatDenseGetArray(mdn->A,&array);CHKERRQ(ierr);
80     ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr);
81     ierr = MatDenseRestoreArray(mdn->A,&array);CHKERRQ(ierr);
82     ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
83     ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
84     ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr);
85     *a   = B;
86     ierr = MatDestroy(&B);CHKERRQ(ierr);
87   } else *a = B;
88   PetscFunctionReturn(0);
89 }
90 
91 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
92 {
93   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
94   PetscErrorCode ierr;
95   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
96   PetscBool      roworiented = A->roworiented;
97 
98   PetscFunctionBegin;
99   for (i=0; i<m; i++) {
100     if (idxm[i] < 0) continue;
101     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
102     if (idxm[i] >= rstart && idxm[i] < rend) {
103       row = idxm[i] - rstart;
104       if (roworiented) {
105         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
106       } else {
107         for (j=0; j<n; j++) {
108           if (idxn[j] < 0) continue;
109           if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
110           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
111         }
112       }
113     } else if (!A->donotstash) {
114       mat->assembled = PETSC_FALSE;
115       if (roworiented) {
116         ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
117       } else {
118         ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
119       }
120     }
121   }
122   PetscFunctionReturn(0);
123 }
124 
125 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
126 {
127   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
128   PetscErrorCode ierr;
129   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
130 
131   PetscFunctionBegin;
132   for (i=0; i<m; i++) {
133     if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
134     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
135     if (idxm[i] >= rstart && idxm[i] < rend) {
136       row = idxm[i] - rstart;
137       for (j=0; j<n; j++) {
138         if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
139         if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
140         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
141       }
142     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
143   }
144   PetscFunctionReturn(0);
145 }
146 
147 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A,PetscInt *lda)
148 {
149   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
150   PetscErrorCode ierr;
151 
152   PetscFunctionBegin;
153   ierr = MatDenseGetLDA(a->A,lda);CHKERRQ(ierr);
154   PetscFunctionReturn(0);
155 }
156 
157 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar *array[])
158 {
159   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
160   PetscErrorCode ierr;
161 
162   PetscFunctionBegin;
163   ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr);
164   PetscFunctionReturn(0);
165 }
166 
167 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A,const PetscScalar *array[])
168 {
169   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
170   PetscErrorCode ierr;
171 
172   PetscFunctionBegin;
173   ierr = MatDenseGetArrayRead(a->A,array);CHKERRQ(ierr);
174   PetscFunctionReturn(0);
175 }
176 
177 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A,const PetscScalar array[])
178 {
179   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
180   PetscErrorCode ierr;
181 
182   PetscFunctionBegin;
183   ierr = MatDensePlaceArray(a->A,array);CHKERRQ(ierr);
184   PetscFunctionReturn(0);
185 }
186 
187 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A)
188 {
189   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
190   PetscErrorCode ierr;
191 
192   PetscFunctionBegin;
193   ierr = MatDenseResetArray(a->A);CHKERRQ(ierr);
194   PetscFunctionReturn(0);
195 }
196 
197 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
198 {
199   Mat_MPIDense   *mat  = (Mat_MPIDense*)A->data,*newmatd;
200   Mat_SeqDense   *lmat = (Mat_SeqDense*)mat->A->data;
201   PetscErrorCode ierr;
202   PetscInt       i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols;
203   const PetscInt *irow,*icol;
204   PetscScalar    *av,*bv,*v = lmat->v;
205   Mat            newmat;
206   IS             iscol_local;
207   MPI_Comm       comm_is,comm_mat;
208 
209   PetscFunctionBegin;
210   ierr = PetscObjectGetComm((PetscObject)A,&comm_mat);CHKERRQ(ierr);
211   ierr = PetscObjectGetComm((PetscObject)iscol,&comm_is);CHKERRQ(ierr);
212   if (comm_mat != comm_is) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NOTSAMECOMM,"IS communicator must match matrix communicator");
213 
214   ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
215   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
216   ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr);
217   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
218   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
219   ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */
220 
221   /* No parallel redistribution currently supported! Should really check each index set
222      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
223      original matrix! */
224 
225   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
226   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
227 
228   /* Check submatrix call */
229   if (scall == MAT_REUSE_MATRIX) {
230     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
231     /* Really need to test rows and column sizes! */
232     newmat = *B;
233   } else {
234     /* Create and fill new matrix */
235     ierr = MatCreate(PetscObjectComm((PetscObject)A),&newmat);CHKERRQ(ierr);
236     ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
237     ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr);
238     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
239   }
240 
241   /* Now extract the data pointers and do the copy, column at a time */
242   newmatd = (Mat_MPIDense*)newmat->data;
243   bv      = ((Mat_SeqDense*)newmatd->A->data)->v;
244 
245   for (i=0; i<Ncols; i++) {
246     av = v + ((Mat_SeqDense*)mat->A->data)->lda*icol[i];
247     for (j=0; j<nrows; j++) {
248       *bv++ = av[irow[j] - rstart];
249     }
250   }
251 
252   /* Assemble the matrices so that the correct flags are set */
253   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
254   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
255 
256   /* Free work space */
257   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
258   ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr);
259   ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
260   *B   = newmat;
261   PetscFunctionReturn(0);
262 }
263 
264 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar *array[])
265 {
266   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
267   PetscErrorCode ierr;
268 
269   PetscFunctionBegin;
270   ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr);
271   PetscFunctionReturn(0);
272 }
273 
274 PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A,const PetscScalar *array[])
275 {
276   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
277   PetscErrorCode ierr;
278 
279   PetscFunctionBegin;
280   ierr = MatDenseRestoreArrayRead(a->A,array);CHKERRQ(ierr);
281   PetscFunctionReturn(0);
282 }
283 
284 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
285 {
286   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
287   MPI_Comm       comm;
288   PetscErrorCode ierr;
289   PetscInt       nstash,reallocs;
290   InsertMode     addv;
291 
292   PetscFunctionBegin;
293   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
294   /* make sure all processors are either in INSERTMODE or ADDMODE */
295   ierr = MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,comm);CHKERRQ(ierr);
296   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
297   mat->insertmode = addv; /* in case this processor had no cache */
298 
299   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
300   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
301   ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
302   PetscFunctionReturn(0);
303 }
304 
305 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
306 {
307   Mat_MPIDense   *mdn=(Mat_MPIDense*)mat->data;
308   PetscErrorCode ierr;
309   PetscInt       i,*row,*col,flg,j,rstart,ncols;
310   PetscMPIInt    n;
311   PetscScalar    *val;
312 
313   PetscFunctionBegin;
314   /*  wait on receives */
315   while (1) {
316     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
317     if (!flg) break;
318 
319     for (i=0; i<n;) {
320       /* Now identify the consecutive vals belonging to the same row */
321       for (j=i,rstart=row[j]; j<n; j++) {
322         if (row[j] != rstart) break;
323       }
324       if (j < n) ncols = j-i;
325       else       ncols = n-i;
326       /* Now assemble all these values with a single function call */
327       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
328       i    = j;
329     }
330   }
331   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
332 
333   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
334   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
335 
336   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
337     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
338   }
339   PetscFunctionReturn(0);
340 }
341 
342 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
343 {
344   PetscErrorCode ierr;
345   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
346 
347   PetscFunctionBegin;
348   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
349   PetscFunctionReturn(0);
350 }
351 
352 /* the code does not do the diagonal entries correctly unless the
353    matrix is square and the column and row owerships are identical.
354    This is a BUG. The only way to fix it seems to be to access
355    mdn->A and mdn->B directly and not through the MatZeroRows()
356    routine.
357 */
358 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
359 {
360   Mat_MPIDense      *l = (Mat_MPIDense*)A->data;
361   PetscErrorCode    ierr;
362   PetscInt          i,*owners = A->rmap->range;
363   PetscInt          *sizes,j,idx,nsends;
364   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
365   PetscInt          *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source;
366   PetscInt          *lens,*lrows,*values;
367   PetscMPIInt       n,imdex,rank = l->rank,size = l->size;
368   MPI_Comm          comm;
369   MPI_Request       *send_waits,*recv_waits;
370   MPI_Status        recv_status,*send_status;
371   PetscBool         found;
372   const PetscScalar *xx;
373   PetscScalar       *bb;
374 
375   PetscFunctionBegin;
376   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
377   if (A->rmap->N != A->cmap->N) SETERRQ(comm,PETSC_ERR_SUP,"Only handles square matrices");
378   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only handles matrices with identical column and row ownership");
379   /*  first count number of contributors to each processor */
380   ierr = PetscCalloc1(2*size,&sizes);CHKERRQ(ierr);
381   ierr = PetscMalloc1(N+1,&owner);CHKERRQ(ierr);  /* see note*/
382   for (i=0; i<N; i++) {
383     idx   = rows[i];
384     found = PETSC_FALSE;
385     for (j=0; j<size; j++) {
386       if (idx >= owners[j] && idx < owners[j+1]) {
387         sizes[2*j]++; sizes[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
388       }
389     }
390     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
391   }
392   nsends = 0;
393   for (i=0; i<size; i++) nsends += sizes[2*i+1];
394 
395   /* inform other processors of number of messages and max length*/
396   ierr = PetscMaxSum(comm,sizes,&nmax,&nrecvs);CHKERRQ(ierr);
397 
398   /* post receives:   */
399   ierr = PetscMalloc1((nrecvs+1)*(nmax+1),&rvalues);CHKERRQ(ierr);
400   ierr = PetscMalloc1(nrecvs+1,&recv_waits);CHKERRQ(ierr);
401   for (i=0; i<nrecvs; i++) {
402     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
403   }
404 
405   /* do sends:
406       1) starts[i] gives the starting index in svalues for stuff going to
407          the ith processor
408   */
409   ierr = PetscMalloc1(N+1,&svalues);CHKERRQ(ierr);
410   ierr = PetscMalloc1(nsends+1,&send_waits);CHKERRQ(ierr);
411   ierr = PetscMalloc1(size+1,&starts);CHKERRQ(ierr);
412 
413   starts[0] = 0;
414   for (i=1; i<size; i++) starts[i] = starts[i-1] + sizes[2*i-2];
415   for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i];
416 
417   starts[0] = 0;
418   for (i=1; i<size+1; i++) starts[i] = starts[i-1] + sizes[2*i-2];
419   count = 0;
420   for (i=0; i<size; i++) {
421     if (sizes[2*i+1]) {
422       ierr = MPI_Isend(svalues+starts[i],sizes[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
423     }
424   }
425   ierr = PetscFree(starts);CHKERRQ(ierr);
426 
427   base = owners[rank];
428 
429   /*  wait on receives */
430   ierr  = PetscMalloc2(nrecvs,&lens,nrecvs,&source);CHKERRQ(ierr);
431   count = nrecvs;
432   slen  = 0;
433   while (count) {
434     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
435     /* unpack receives into our local space */
436     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
437 
438     source[imdex] = recv_status.MPI_SOURCE;
439     lens[imdex]   = n;
440     slen += n;
441     count--;
442   }
443   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
444 
445   /* move the data into the send scatter */
446   ierr  = PetscMalloc1(slen+1,&lrows);CHKERRQ(ierr);
447   count = 0;
448   for (i=0; i<nrecvs; i++) {
449     values = rvalues + i*nmax;
450     for (j=0; j<lens[i]; j++) {
451       lrows[count++] = values[j] - base;
452     }
453   }
454   ierr = PetscFree(rvalues);CHKERRQ(ierr);
455   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
456   ierr = PetscFree(owner);CHKERRQ(ierr);
457   ierr = PetscFree(sizes);CHKERRQ(ierr);
458 
459   /* fix right hand side if needed */
460   if (x && b) {
461     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
462     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
463     for (i=0; i<slen; i++) {
464       bb[lrows[i]] = diag*xx[lrows[i]];
465     }
466     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
467     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
468   }
469 
470   /* actually zap the local rows */
471   ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
472   if (diag != 0.0) {
473     Mat_SeqDense *ll = (Mat_SeqDense*)l->A->data;
474     PetscInt     m   = ll->lda, i;
475 
476     for (i=0; i<slen; i++) {
477       ll->v[lrows[i] + m*(A->cmap->rstart + lrows[i])] = diag;
478     }
479   }
480   ierr = PetscFree(lrows);CHKERRQ(ierr);
481 
482   /* wait on sends */
483   if (nsends) {
484     ierr = PetscMalloc1(nsends,&send_status);CHKERRQ(ierr);
485     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
486     ierr = PetscFree(send_status);CHKERRQ(ierr);
487   }
488   ierr = PetscFree(send_waits);CHKERRQ(ierr);
489   ierr = PetscFree(svalues);CHKERRQ(ierr);
490   PetscFunctionReturn(0);
491 }
492 
493 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec);
494 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec);
495 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec);
496 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec);
497 
498 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
499 {
500   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
501   PetscErrorCode ierr;
502 
503   PetscFunctionBegin;
504   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
505   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
506   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
507   PetscFunctionReturn(0);
508 }
509 
510 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
511 {
512   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
513   PetscErrorCode ierr;
514 
515   PetscFunctionBegin;
516   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
517   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
518   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
519   PetscFunctionReturn(0);
520 }
521 
522 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
523 {
524   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
525   PetscErrorCode ierr;
526   PetscScalar    zero = 0.0;
527 
528   PetscFunctionBegin;
529   ierr = VecSet(yy,zero);CHKERRQ(ierr);
530   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
531   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
532   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
533   PetscFunctionReturn(0);
534 }
535 
536 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
537 {
538   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
539   PetscErrorCode ierr;
540 
541   PetscFunctionBegin;
542   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
543   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
544   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
545   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
546   PetscFunctionReturn(0);
547 }
548 
549 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
550 {
551   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
552   Mat_SeqDense   *aloc = (Mat_SeqDense*)a->A->data;
553   PetscErrorCode ierr;
554   PetscInt       len,i,n,m = A->rmap->n,radd;
555   PetscScalar    *x,zero = 0.0;
556 
557   PetscFunctionBegin;
558   ierr = VecSet(v,zero);CHKERRQ(ierr);
559   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
560   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
561   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
562   len  = PetscMin(a->A->rmap->n,a->A->cmap->n);
563   radd = A->rmap->rstart*m;
564   for (i=0; i<len; i++) {
565     x[i] = aloc->v[radd + i*m + i];
566   }
567   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
568   PetscFunctionReturn(0);
569 }
570 
571 PetscErrorCode MatDestroy_MPIDense(Mat mat)
572 {
573   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
574   PetscErrorCode ierr;
575 
576   PetscFunctionBegin;
577 #if defined(PETSC_USE_LOG)
578   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
579 #endif
580   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
581   ierr = MatDestroy(&mdn->A);CHKERRQ(ierr);
582   ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr);
583   ierr = VecScatterDestroy(&mdn->Mvctx);CHKERRQ(ierr);
584 
585   ierr = PetscFree(mat->data);CHKERRQ(ierr);
586   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
587 
588   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",NULL);CHKERRQ(ierr);
589   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL);CHKERRQ(ierr);
590   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL);CHKERRQ(ierr);
591   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",NULL);CHKERRQ(ierr);
592   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",NULL);CHKERRQ(ierr);
593   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",NULL);CHKERRQ(ierr);
594   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",NULL);CHKERRQ(ierr);
595 #if defined(PETSC_HAVE_ELEMENTAL)
596   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL);CHKERRQ(ierr);
597 #endif
598   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);CHKERRQ(ierr);
599   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
600   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
601   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
602   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
603   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
604   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
605   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",NULL);CHKERRQ(ierr);
606   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",NULL);CHKERRQ(ierr);
607   PetscFunctionReturn(0);
608 }
609 
610 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
611 {
612   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
613   PetscErrorCode    ierr;
614   PetscViewerFormat format;
615   int               fd;
616   PetscInt          header[4],mmax,N = mat->cmap->N,i,j,m,k;
617   PetscMPIInt       rank,tag  = ((PetscObject)viewer)->tag,size;
618   PetscScalar       *work,*v,*vv;
619   Mat_SeqDense      *a = (Mat_SeqDense*)mdn->A->data;
620 
621   PetscFunctionBegin;
622   if (mdn->size == 1) {
623     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
624   } else {
625     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
626     ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
627     ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
628 
629     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
630     if (format == PETSC_VIEWER_NATIVE) {
631 
632       if (!rank) {
633         /* store the matrix as a dense matrix */
634         header[0] = MAT_FILE_CLASSID;
635         header[1] = mat->rmap->N;
636         header[2] = N;
637         header[3] = MATRIX_BINARY_FORMAT_DENSE;
638         ierr      = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
639 
640         /* get largest work array needed for transposing array */
641         mmax = mat->rmap->n;
642         for (i=1; i<size; i++) {
643           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
644         }
645         ierr = PetscMalloc1(mmax*N,&work);CHKERRQ(ierr);
646 
647         /* write out local array, by rows */
648         m = mat->rmap->n;
649         v = a->v;
650         for (j=0; j<N; j++) {
651           for (i=0; i<m; i++) {
652             work[j + i*N] = *v++;
653           }
654         }
655         ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
656         /* get largest work array to receive messages from other processes, excludes process zero */
657         mmax = 0;
658         for (i=1; i<size; i++) {
659           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
660         }
661         ierr = PetscMalloc1(mmax*N,&vv);CHKERRQ(ierr);
662         for (k = 1; k < size; k++) {
663           v    = vv;
664           m    = mat->rmap->range[k+1] - mat->rmap->range[k];
665           ierr = MPIULong_Recv(v,m*N,MPIU_SCALAR,k,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
666 
667           for (j = 0; j < N; j++) {
668             for (i = 0; i < m; i++) {
669               work[j + i*N] = *v++;
670             }
671           }
672           ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
673         }
674         ierr = PetscFree(work);CHKERRQ(ierr);
675         ierr = PetscFree(vv);CHKERRQ(ierr);
676       } else {
677         ierr = MPIULong_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
678       }
679     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerPushFormat(viewer,PETSC_VIEWER_NATIVE)");
680   }
681   PetscFunctionReturn(0);
682 }
683 
684 extern PetscErrorCode MatView_SeqDense(Mat,PetscViewer);
685 #include <petscdraw.h>
686 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
687 {
688   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
689   PetscErrorCode    ierr;
690   PetscMPIInt       rank = mdn->rank;
691   PetscViewerType   vtype;
692   PetscBool         iascii,isdraw;
693   PetscViewer       sviewer;
694   PetscViewerFormat format;
695 
696   PetscFunctionBegin;
697   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
698   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
699   if (iascii) {
700     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
701     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
702     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
703       MatInfo info;
704       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
705       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
706       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
707       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
708       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
709       ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
710       PetscFunctionReturn(0);
711     } else if (format == PETSC_VIEWER_ASCII_INFO) {
712       PetscFunctionReturn(0);
713     }
714   } else if (isdraw) {
715     PetscDraw draw;
716     PetscBool isnull;
717 
718     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
719     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
720     if (isnull) PetscFunctionReturn(0);
721   }
722 
723   {
724     /* assemble the entire matrix onto first processor. */
725     Mat         A;
726     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz;
727     PetscInt    *cols;
728     PetscScalar *vals;
729 
730     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
731     if (!rank) {
732       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
733     } else {
734       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
735     }
736     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
737     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
738     ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr);
739     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
740 
741     /* Copy the matrix ... This isn't the most efficient means,
742        but it's quick for now */
743     A->insertmode = INSERT_VALUES;
744 
745     row = mat->rmap->rstart;
746     m   = mdn->A->rmap->n;
747     for (i=0; i<m; i++) {
748       ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
749       ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
750       ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
751       row++;
752     }
753 
754     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
755     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
756     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
757     if (!rank) {
758       ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
759       ierr = MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
760     }
761     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
762     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
763     ierr = MatDestroy(&A);CHKERRQ(ierr);
764   }
765   PetscFunctionReturn(0);
766 }
767 
768 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
769 {
770   PetscErrorCode ierr;
771   PetscBool      iascii,isbinary,isdraw,issocket;
772 
773   PetscFunctionBegin;
774   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
775   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
776   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
777   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
778 
779   if (iascii || issocket || isdraw) {
780     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
781   } else if (isbinary) {
782     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
783   }
784   PetscFunctionReturn(0);
785 }
786 
787 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
788 {
789   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
790   Mat            mdn  = mat->A;
791   PetscErrorCode ierr;
792   PetscReal      isend[5],irecv[5];
793 
794   PetscFunctionBegin;
795   info->block_size = 1.0;
796 
797   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
798 
799   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
800   isend[3] = info->memory;  isend[4] = info->mallocs;
801   if (flag == MAT_LOCAL) {
802     info->nz_used      = isend[0];
803     info->nz_allocated = isend[1];
804     info->nz_unneeded  = isend[2];
805     info->memory       = isend[3];
806     info->mallocs      = isend[4];
807   } else if (flag == MAT_GLOBAL_MAX) {
808     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
809 
810     info->nz_used      = irecv[0];
811     info->nz_allocated = irecv[1];
812     info->nz_unneeded  = irecv[2];
813     info->memory       = irecv[3];
814     info->mallocs      = irecv[4];
815   } else if (flag == MAT_GLOBAL_SUM) {
816     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
817 
818     info->nz_used      = irecv[0];
819     info->nz_allocated = irecv[1];
820     info->nz_unneeded  = irecv[2];
821     info->memory       = irecv[3];
822     info->mallocs      = irecv[4];
823   }
824   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
825   info->fill_ratio_needed = 0;
826   info->factor_mallocs    = 0;
827   PetscFunctionReturn(0);
828 }
829 
830 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg)
831 {
832   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
833   PetscErrorCode ierr;
834 
835   PetscFunctionBegin;
836   switch (op) {
837   case MAT_NEW_NONZERO_LOCATIONS:
838   case MAT_NEW_NONZERO_LOCATION_ERR:
839   case MAT_NEW_NONZERO_ALLOCATION_ERR:
840     MatCheckPreallocated(A,1);
841     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
842     break;
843   case MAT_ROW_ORIENTED:
844     MatCheckPreallocated(A,1);
845     a->roworiented = flg;
846     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
847     break;
848   case MAT_NEW_DIAGONALS:
849   case MAT_KEEP_NONZERO_PATTERN:
850   case MAT_USE_HASH_TABLE:
851     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
852     break;
853   case MAT_IGNORE_OFF_PROC_ENTRIES:
854     a->donotstash = flg;
855     break;
856   case MAT_SYMMETRIC:
857   case MAT_STRUCTURALLY_SYMMETRIC:
858   case MAT_HERMITIAN:
859   case MAT_SYMMETRY_ETERNAL:
860   case MAT_IGNORE_LOWER_TRIANGULAR:
861   case MAT_IGNORE_ZERO_ENTRIES:
862     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
863     break;
864   default:
865     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
866   }
867   PetscFunctionReturn(0);
868 }
869 
870 
871 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
872 {
873   Mat_MPIDense      *mdn = (Mat_MPIDense*)A->data;
874   Mat_SeqDense      *mat = (Mat_SeqDense*)mdn->A->data;
875   const PetscScalar *l,*r;
876   PetscScalar       x,*v;
877   PetscErrorCode    ierr;
878   PetscInt          i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n;
879 
880   PetscFunctionBegin;
881   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
882   if (ll) {
883     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
884     if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2);
885     ierr = VecGetArrayRead(ll,&l);CHKERRQ(ierr);
886     for (i=0; i<m; i++) {
887       x = l[i];
888       v = mat->v + i;
889       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
890     }
891     ierr = VecRestoreArrayRead(ll,&l);CHKERRQ(ierr);
892     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
893   }
894   if (rr) {
895     ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr);
896     if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
897     ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
898     ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
899     ierr = VecGetArrayRead(mdn->lvec,&r);CHKERRQ(ierr);
900     for (i=0; i<n; i++) {
901       x = r[i];
902       v = mat->v + i*m;
903       for (j=0; j<m; j++) (*v++) *= x;
904     }
905     ierr = VecRestoreArrayRead(mdn->lvec,&r);CHKERRQ(ierr);
906     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
907   }
908   PetscFunctionReturn(0);
909 }
910 
911 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
912 {
913   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
914   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
915   PetscErrorCode ierr;
916   PetscInt       i,j;
917   PetscReal      sum = 0.0;
918   PetscScalar    *v  = mat->v;
919 
920   PetscFunctionBegin;
921   if (mdn->size == 1) {
922     ierr =  MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
923   } else {
924     if (type == NORM_FROBENIUS) {
925       for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) {
926         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
927       }
928       ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
929       *nrm = PetscSqrtReal(*nrm);
930       ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr);
931     } else if (type == NORM_1) {
932       PetscReal *tmp,*tmp2;
933       ierr = PetscMalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);CHKERRQ(ierr);
934       ierr = PetscMemzero(tmp,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
935       ierr = PetscMemzero(tmp2,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
936       *nrm = 0.0;
937       v    = mat->v;
938       for (j=0; j<mdn->A->cmap->n; j++) {
939         for (i=0; i<mdn->A->rmap->n; i++) {
940           tmp[j] += PetscAbsScalar(*v);  v++;
941         }
942       }
943       ierr = MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
944       for (j=0; j<A->cmap->N; j++) {
945         if (tmp2[j] > *nrm) *nrm = tmp2[j];
946       }
947       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
948       ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr);
949     } else if (type == NORM_INFINITY) { /* max row norm */
950       PetscReal ntemp;
951       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
952       ierr = MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
953     } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm");
954   }
955   PetscFunctionReturn(0);
956 }
957 
958 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
959 {
960   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
961   Mat_SeqDense   *Aloc = (Mat_SeqDense*)a->A->data;
962   Mat            B;
963   PetscInt       M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart;
964   PetscErrorCode ierr;
965   PetscInt       j,i;
966   PetscScalar    *v;
967 
968   PetscFunctionBegin;
969   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
970     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
971     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
972     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
973     ierr = MatMPIDenseSetPreallocation(B,NULL);CHKERRQ(ierr);
974   } else {
975     B = *matout;
976   }
977 
978   m    = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v;
979   ierr = PetscMalloc1(m,&rwork);CHKERRQ(ierr);
980   for (i=0; i<m; i++) rwork[i] = rstart + i;
981   for (j=0; j<n; j++) {
982     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
983     v   += m;
984   }
985   ierr = PetscFree(rwork);CHKERRQ(ierr);
986   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
987   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
988   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
989     *matout = B;
990   } else {
991     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
992   }
993   PetscFunctionReturn(0);
994 }
995 
996 
997 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*);
998 extern PetscErrorCode MatScale_MPIDense(Mat,PetscScalar);
999 
1000 PetscErrorCode MatSetUp_MPIDense(Mat A)
1001 {
1002   PetscErrorCode ierr;
1003 
1004   PetscFunctionBegin;
1005   ierr =  MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
1006   PetscFunctionReturn(0);
1007 }
1008 
1009 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
1010 {
1011   PetscErrorCode ierr;
1012   Mat_MPIDense   *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data;
1013 
1014   PetscFunctionBegin;
1015   ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr);
1016   ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
1017   PetscFunctionReturn(0);
1018 }
1019 
1020 PetscErrorCode  MatConjugate_MPIDense(Mat mat)
1021 {
1022   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;
1023   PetscErrorCode ierr;
1024 
1025   PetscFunctionBegin;
1026   ierr = MatConjugate(a->A);CHKERRQ(ierr);
1027   PetscFunctionReturn(0);
1028 }
1029 
1030 PetscErrorCode MatRealPart_MPIDense(Mat A)
1031 {
1032   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1033   PetscErrorCode ierr;
1034 
1035   PetscFunctionBegin;
1036   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1037   PetscFunctionReturn(0);
1038 }
1039 
1040 PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
1041 {
1042   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1043   PetscErrorCode ierr;
1044 
1045   PetscFunctionBegin;
1046   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1047   PetscFunctionReturn(0);
1048 }
1049 
1050 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A,Vec v,PetscInt col)
1051 {
1052   PetscErrorCode    ierr;
1053   PetscScalar       *x;
1054   const PetscScalar *a;
1055   PetscInt          lda;
1056 
1057   PetscFunctionBegin;
1058   if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1059   ierr = MatDenseGetLDA(A,&lda);CHKERRQ(ierr);
1060   ierr = MatDenseGetArrayRead(A,&a);CHKERRQ(ierr);
1061   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
1062   ierr = PetscMemcpy(x,a+col*lda,A->rmap->n*sizeof(PetscScalar));CHKERRQ(ierr);
1063   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
1064   ierr = MatDenseGetArrayRead(A,&a);CHKERRQ(ierr);
1065   PetscFunctionReturn(0);
1066 }
1067 
1068 extern PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*);
1069 PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms)
1070 {
1071   PetscErrorCode ierr;
1072   PetscInt       i,n;
1073   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
1074   PetscReal      *work;
1075 
1076   PetscFunctionBegin;
1077   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
1078   ierr = PetscMalloc1(n,&work);CHKERRQ(ierr);
1079   ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr);
1080   if (type == NORM_2) {
1081     for (i=0; i<n; i++) work[i] *= work[i];
1082   }
1083   if (type == NORM_INFINITY) {
1084     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
1085   } else {
1086     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
1087   }
1088   ierr = PetscFree(work);CHKERRQ(ierr);
1089   if (type == NORM_2) {
1090     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
1091   }
1092   PetscFunctionReturn(0);
1093 }
1094 
1095 static PetscErrorCode  MatSetRandom_MPIDense(Mat x,PetscRandom rctx)
1096 {
1097   Mat_MPIDense   *d = (Mat_MPIDense*)x->data;
1098   PetscErrorCode ierr;
1099   PetscScalar    *a;
1100   PetscInt       m,n,i;
1101 
1102   PetscFunctionBegin;
1103   ierr = MatGetSize(d->A,&m,&n);CHKERRQ(ierr);
1104   ierr = MatDenseGetArray(d->A,&a);CHKERRQ(ierr);
1105   for (i=0; i<m*n; i++) {
1106     ierr = PetscRandomGetValue(rctx,a+i);CHKERRQ(ierr);
1107   }
1108   ierr = MatDenseRestoreArray(d->A,&a);CHKERRQ(ierr);
1109   PetscFunctionReturn(0);
1110 }
1111 
1112 extern PetscErrorCode MatMatMultNumeric_MPIDense(Mat A,Mat,Mat);
1113 
1114 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool  *missing,PetscInt *d)
1115 {
1116   PetscFunctionBegin;
1117   *missing = PETSC_FALSE;
1118   PetscFunctionReturn(0);
1119 }
1120 
1121 static PetscErrorCode MatMatTransposeMult_MPIDense_MPIDense(Mat,Mat,MatReuse,PetscReal,Mat*);
1122 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat*);
1123 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat);
1124 
1125 /* -------------------------------------------------------------------*/
1126 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense,
1127                                         MatGetRow_MPIDense,
1128                                         MatRestoreRow_MPIDense,
1129                                         MatMult_MPIDense,
1130                                 /*  4*/ MatMultAdd_MPIDense,
1131                                         MatMultTranspose_MPIDense,
1132                                         MatMultTransposeAdd_MPIDense,
1133                                         0,
1134                                         0,
1135                                         0,
1136                                 /* 10*/ 0,
1137                                         0,
1138                                         0,
1139                                         0,
1140                                         MatTranspose_MPIDense,
1141                                 /* 15*/ MatGetInfo_MPIDense,
1142                                         MatEqual_MPIDense,
1143                                         MatGetDiagonal_MPIDense,
1144                                         MatDiagonalScale_MPIDense,
1145                                         MatNorm_MPIDense,
1146                                 /* 20*/ MatAssemblyBegin_MPIDense,
1147                                         MatAssemblyEnd_MPIDense,
1148                                         MatSetOption_MPIDense,
1149                                         MatZeroEntries_MPIDense,
1150                                 /* 24*/ MatZeroRows_MPIDense,
1151                                         0,
1152                                         0,
1153                                         0,
1154                                         0,
1155                                 /* 29*/ MatSetUp_MPIDense,
1156                                         0,
1157                                         0,
1158                                         MatGetDiagonalBlock_MPIDense,
1159                                         0,
1160                                 /* 34*/ MatDuplicate_MPIDense,
1161                                         0,
1162                                         0,
1163                                         0,
1164                                         0,
1165                                 /* 39*/ MatAXPY_MPIDense,
1166                                         MatCreateSubMatrices_MPIDense,
1167                                         0,
1168                                         MatGetValues_MPIDense,
1169                                         0,
1170                                 /* 44*/ 0,
1171                                         MatScale_MPIDense,
1172                                         MatShift_Basic,
1173                                         0,
1174                                         0,
1175                                 /* 49*/ MatSetRandom_MPIDense,
1176                                         0,
1177                                         0,
1178                                         0,
1179                                         0,
1180                                 /* 54*/ 0,
1181                                         0,
1182                                         0,
1183                                         0,
1184                                         0,
1185                                 /* 59*/ MatCreateSubMatrix_MPIDense,
1186                                         MatDestroy_MPIDense,
1187                                         MatView_MPIDense,
1188                                         0,
1189                                         0,
1190                                 /* 64*/ 0,
1191                                         0,
1192                                         0,
1193                                         0,
1194                                         0,
1195                                 /* 69*/ 0,
1196                                         0,
1197                                         0,
1198                                         0,
1199                                         0,
1200                                 /* 74*/ 0,
1201                                         0,
1202                                         0,
1203                                         0,
1204                                         0,
1205                                 /* 79*/ 0,
1206                                         0,
1207                                         0,
1208                                         0,
1209                                 /* 83*/ MatLoad_MPIDense,
1210                                         0,
1211                                         0,
1212                                         0,
1213                                         0,
1214                                         0,
1215 #if defined(PETSC_HAVE_ELEMENTAL)
1216                                 /* 89*/ MatMatMult_MPIDense_MPIDense,
1217                                         MatMatMultSymbolic_MPIDense_MPIDense,
1218 #else
1219                                 /* 89*/ 0,
1220                                         0,
1221 #endif
1222                                         MatMatMultNumeric_MPIDense,
1223                                         0,
1224                                         0,
1225                                 /* 94*/ 0,
1226                                         MatMatTransposeMult_MPIDense_MPIDense,
1227                                         MatMatTransposeMultSymbolic_MPIDense_MPIDense,
1228                                         MatMatTransposeMultNumeric_MPIDense_MPIDense,
1229                                         0,
1230                                 /* 99*/ 0,
1231                                         0,
1232                                         0,
1233                                         MatConjugate_MPIDense,
1234                                         0,
1235                                 /*104*/ 0,
1236                                         MatRealPart_MPIDense,
1237                                         MatImaginaryPart_MPIDense,
1238                                         0,
1239                                         0,
1240                                 /*109*/ 0,
1241                                         0,
1242                                         0,
1243                                         MatGetColumnVector_MPIDense,
1244                                         MatMissingDiagonal_MPIDense,
1245                                 /*114*/ 0,
1246                                         0,
1247                                         0,
1248                                         0,
1249                                         0,
1250                                 /*119*/ 0,
1251                                         0,
1252                                         0,
1253                                         0,
1254                                         0,
1255                                 /*124*/ 0,
1256                                         MatGetColumnNorms_MPIDense,
1257                                         0,
1258                                         0,
1259                                         0,
1260                                 /*129*/ 0,
1261                                         MatTransposeMatMult_MPIDense_MPIDense,
1262                                         MatTransposeMatMultSymbolic_MPIDense_MPIDense,
1263                                         MatTransposeMatMultNumeric_MPIDense_MPIDense,
1264                                         0,
1265                                 /*134*/ 0,
1266                                         0,
1267                                         0,
1268                                         0,
1269                                         0,
1270                                 /*139*/ 0,
1271                                         0,
1272                                         0,
1273                                         0,
1274                                         0,
1275                                 /*144*/ MatCreateMPIMatConcatenateSeqMat_MPIDense
1276 };
1277 
1278 PetscErrorCode  MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1279 {
1280   Mat_MPIDense   *a;
1281   PetscErrorCode ierr;
1282 
1283   PetscFunctionBegin;
1284   if (mat->preallocated) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Matrix has already been preallocated");
1285   mat->preallocated = PETSC_TRUE;
1286   /* Note:  For now, when data is specified above, this assumes the user correctly
1287    allocates the local dense storage space.  We should add error checking. */
1288 
1289   a       = (Mat_MPIDense*)mat->data;
1290   ierr    = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
1291   ierr    = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
1292   a->nvec = mat->cmap->n;
1293 
1294   ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
1295   ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr);
1296   ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr);
1297   ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr);
1298   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
1299   PetscFunctionReturn(0);
1300 }
1301 
1302 #if defined(PETSC_HAVE_ELEMENTAL)
1303 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1304 {
1305   Mat            mat_elemental;
1306   PetscErrorCode ierr;
1307   PetscScalar    *v;
1308   PetscInt       m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols;
1309 
1310   PetscFunctionBegin;
1311   if (reuse == MAT_REUSE_MATRIX) {
1312     mat_elemental = *newmat;
1313     ierr = MatZeroEntries(*newmat);CHKERRQ(ierr);
1314   } else {
1315     ierr = MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);CHKERRQ(ierr);
1316     ierr = MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1317     ierr = MatSetType(mat_elemental,MATELEMENTAL);CHKERRQ(ierr);
1318     ierr = MatSetUp(mat_elemental);CHKERRQ(ierr);
1319     ierr = MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
1320   }
1321 
1322   ierr = PetscMalloc2(m,&rows,N,&cols);CHKERRQ(ierr);
1323   for (i=0; i<N; i++) cols[i] = i;
1324   for (i=0; i<m; i++) rows[i] = rstart + i;
1325 
1326   /* PETSc-Elemental interaface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1327   ierr = MatDenseGetArray(A,&v);CHKERRQ(ierr);
1328   ierr = MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES);CHKERRQ(ierr);
1329   ierr = MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1330   ierr = MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1331   ierr = MatDenseRestoreArray(A,&v);CHKERRQ(ierr);
1332   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1333 
1334   if (reuse == MAT_INPLACE_MATRIX) {
1335     ierr = MatHeaderReplace(A,&mat_elemental);CHKERRQ(ierr);
1336   } else {
1337     *newmat = mat_elemental;
1338   }
1339   PetscFunctionReturn(0);
1340 }
1341 #endif
1342 
1343 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A,PetscInt col,PetscScalar **vals)
1344 {
1345   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
1346   PetscErrorCode ierr;
1347 
1348   PetscFunctionBegin;
1349   ierr = MatDenseGetColumn(mat->A,col,vals);CHKERRQ(ierr);
1350   PetscFunctionReturn(0);
1351 }
1352 
1353 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A,PetscScalar **vals)
1354 {
1355   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
1356   PetscErrorCode ierr;
1357 
1358   PetscFunctionBegin;
1359   ierr = MatDenseRestoreColumn(mat->A,vals);CHKERRQ(ierr);
1360   PetscFunctionReturn(0);
1361 }
1362 
1363 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
1364 {
1365   PetscErrorCode ierr;
1366   Mat_MPIDense   *mat;
1367   PetscInt       m,nloc,N;
1368 
1369   PetscFunctionBegin;
1370   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
1371   ierr = MatGetLocalSize(inmat,NULL,&nloc);CHKERRQ(ierr);
1372   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
1373     PetscInt sum;
1374 
1375     if (n == PETSC_DECIDE) {
1376       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
1377     }
1378     /* Check sum(n) = N */
1379     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
1380     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
1381 
1382     ierr = MatCreateDense(comm,m,n,PETSC_DETERMINE,N,NULL,outmat);CHKERRQ(ierr);
1383   }
1384 
1385   /* numeric phase */
1386   mat = (Mat_MPIDense*)(*outmat)->data;
1387   ierr = MatCopy(inmat,mat->A,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1388   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1389   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1390   PetscFunctionReturn(0);
1391 }
1392 
1393 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat)
1394 {
1395   Mat_MPIDense   *a;
1396   PetscErrorCode ierr;
1397 
1398   PetscFunctionBegin;
1399   ierr      = PetscNewLog(mat,&a);CHKERRQ(ierr);
1400   mat->data = (void*)a;
1401   ierr      = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1402 
1403   mat->insertmode = NOT_SET_VALUES;
1404   ierr            = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&a->rank);CHKERRQ(ierr);
1405   ierr            = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&a->size);CHKERRQ(ierr);
1406 
1407   /* build cache for off array entries formed */
1408   a->donotstash = PETSC_FALSE;
1409 
1410   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);CHKERRQ(ierr);
1411 
1412   /* stuff used for matrix vector multiply */
1413   a->lvec        = 0;
1414   a->Mvctx       = 0;
1415   a->roworiented = PETSC_TRUE;
1416 
1417   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",MatDenseGetLDA_MPIDense);CHKERRQ(ierr);
1418   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);CHKERRQ(ierr);
1419   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr);
1420   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",MatDenseGetArrayRead_MPIDense);CHKERRQ(ierr);
1421   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",MatDenseRestoreArrayRead_MPIDense);CHKERRQ(ierr);
1422   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",MatDensePlaceArray_MPIDense);CHKERRQ(ierr);
1423   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",MatDenseResetArray_MPIDense);CHKERRQ(ierr);
1424 #if defined(PETSC_HAVE_ELEMENTAL)
1425   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental);CHKERRQ(ierr);
1426 #endif
1427   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1428   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr);
1429   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1430   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1431 
1432   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",MatTransposeMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr);
1433   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",MatTransposeMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1434   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",MatTransposeMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1435   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",MatDenseGetColumn_MPIDense);CHKERRQ(ierr);
1436   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",MatDenseRestoreColumn_MPIDense);CHKERRQ(ierr);
1437   ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr);
1438   PetscFunctionReturn(0);
1439 }
1440 
1441 /*MC
1442    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1443 
1444    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1445    and MATMPIDENSE otherwise.
1446 
1447    Options Database Keys:
1448 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1449 
1450   Level: beginner
1451 
1452 
1453 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE
1454 M*/
1455 
1456 /*@C
1457    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1458 
1459    Not collective
1460 
1461    Input Parameters:
1462 .  B - the matrix
1463 -  data - optional location of matrix data.  Set data=NULL for PETSc
1464    to control all matrix memory allocation.
1465 
1466    Notes:
1467    The dense format is fully compatible with standard Fortran 77
1468    storage by columns.
1469 
1470    The data input variable is intended primarily for Fortran programmers
1471    who wish to allocate their own matrix memory space.  Most users should
1472    set data=NULL.
1473 
1474    Level: intermediate
1475 
1476 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1477 @*/
1478 PetscErrorCode  MatMPIDenseSetPreallocation(Mat B,PetscScalar *data)
1479 {
1480   PetscErrorCode ierr;
1481 
1482   PetscFunctionBegin;
1483   ierr = PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));CHKERRQ(ierr);
1484   PetscFunctionReturn(0);
1485 }
1486 
1487 /*@
1488    MatDensePlaceArray - Allows one to replace the array in a dense array with an
1489    array provided by the user. This is useful to avoid copying an array
1490    into a matrix
1491 
1492    Not Collective
1493 
1494    Input Parameters:
1495 +  mat - the matrix
1496 -  array - the array in column major order
1497 
1498    Notes:
1499    You can return to the original array with a call to MatDenseResetArray(). The user is responsible for freeing this array; it will not be
1500    freed when the matrix is destroyed.
1501 
1502    Level: developer
1503 
1504 .seealso: MatDenseGetArray(), MatDenseResetArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()
1505 
1506 @*/
1507 PetscErrorCode  MatDensePlaceArray(Mat mat,const PetscScalar array[])
1508 {
1509   PetscErrorCode ierr;
1510   PetscFunctionBegin;
1511   ierr = PetscUseMethod(mat,"MatDensePlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr);
1512   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
1513   PetscFunctionReturn(0);
1514 }
1515 
1516 /*@
1517    MatDenseResetArray - Resets the matrix array to that it previously had before the call to MatDensePlaceArray()
1518 
1519    Not Collective
1520 
1521    Input Parameters:
1522 .  mat - the matrix
1523 
1524    Notes:
1525    You can only call this after a call to MatDensePlaceArray()
1526 
1527    Level: developer
1528 
1529 .seealso: MatDenseGetArray(), MatDensePlaceArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()
1530 
1531 @*/
1532 PetscErrorCode  MatDenseResetArray(Mat mat)
1533 {
1534   PetscErrorCode ierr;
1535   PetscFunctionBegin;
1536   ierr = PetscUseMethod(mat,"MatDenseResetArray_C",(Mat),(mat));CHKERRQ(ierr);
1537   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
1538   PetscFunctionReturn(0);
1539 }
1540 
1541 /*@C
1542    MatCreateDense - Creates a parallel matrix in dense format.
1543 
1544    Collective
1545 
1546    Input Parameters:
1547 +  comm - MPI communicator
1548 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1549 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1550 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1551 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1552 -  data - optional location of matrix data.  Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1553    to control all matrix memory allocation.
1554 
1555    Output Parameter:
1556 .  A - the matrix
1557 
1558    Notes:
1559    The dense format is fully compatible with standard Fortran 77
1560    storage by columns.
1561 
1562    The data input variable is intended primarily for Fortran programmers
1563    who wish to allocate their own matrix memory space.  Most users should
1564    set data=NULL (PETSC_NULL_SCALAR for Fortran users).
1565 
1566    The user MUST specify either the local or global matrix dimensions
1567    (possibly both).
1568 
1569    Level: intermediate
1570 
1571 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1572 @*/
1573 PetscErrorCode  MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
1574 {
1575   PetscErrorCode ierr;
1576   PetscMPIInt    size;
1577 
1578   PetscFunctionBegin;
1579   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1580   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
1581   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1582   if (size > 1) {
1583     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
1584     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1585     if (data) {  /* user provided data array, so no need to assemble */
1586       ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr);
1587       (*A)->assembled = PETSC_TRUE;
1588     }
1589   } else {
1590     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
1591     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1592   }
1593   PetscFunctionReturn(0);
1594 }
1595 
1596 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1597 {
1598   Mat            mat;
1599   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;
1600   PetscErrorCode ierr;
1601 
1602   PetscFunctionBegin;
1603   *newmat = 0;
1604   ierr    = MatCreate(PetscObjectComm((PetscObject)A),&mat);CHKERRQ(ierr);
1605   ierr    = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1606   ierr    = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr);
1607   a       = (Mat_MPIDense*)mat->data;
1608 
1609   mat->factortype   = A->factortype;
1610   mat->assembled    = PETSC_TRUE;
1611   mat->preallocated = PETSC_TRUE;
1612 
1613   a->size         = oldmat->size;
1614   a->rank         = oldmat->rank;
1615   mat->insertmode = NOT_SET_VALUES;
1616   a->nvec         = oldmat->nvec;
1617   a->donotstash   = oldmat->donotstash;
1618 
1619   ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr);
1620   ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr);
1621 
1622   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
1623   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1624   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
1625 
1626   *newmat = mat;
1627   PetscFunctionReturn(0);
1628 }
1629 
1630 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N,Mat newmat)
1631 {
1632   PetscErrorCode ierr;
1633   PetscMPIInt    rank,size;
1634   const PetscInt *rowners;
1635   PetscInt       i,m,n,nz,j,mMax;
1636   PetscScalar    *array,*vals,*vals_ptr;
1637   Mat_MPIDense   *a = (Mat_MPIDense*)newmat->data;
1638 
1639   PetscFunctionBegin;
1640   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1641   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1642 
1643   /* determine ownership of rows and columns */
1644   m = (newmat->rmap->n < 0) ? PETSC_DECIDE : newmat->rmap->n;
1645   n = (newmat->cmap->n < 0) ? PETSC_DECIDE : newmat->cmap->n;
1646 
1647   ierr = MatSetSizes(newmat,m,n,M,N);CHKERRQ(ierr);
1648   if (!a->A) {
1649     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
1650   }
1651   ierr = MatDenseGetArray(newmat,&array);CHKERRQ(ierr);
1652   ierr = MatGetLocalSize(newmat,&m,NULL);CHKERRQ(ierr);
1653   ierr = MatGetOwnershipRanges(newmat,&rowners);CHKERRQ(ierr);
1654   ierr = MPI_Reduce(&m,&mMax,1,MPIU_INT,MPI_MAX,0,comm);CHKERRQ(ierr);
1655   if (!rank) {
1656     ierr = PetscMalloc1(mMax*N,&vals);CHKERRQ(ierr);
1657 
1658     /* read in my part of the matrix numerical values  */
1659     ierr = PetscBinaryRead(fd,vals,m*N,NULL,PETSC_SCALAR);CHKERRQ(ierr);
1660 
1661     /* insert into matrix-by row (this is why cannot directly read into array */
1662     vals_ptr = vals;
1663     for (i=0; i<m; i++) {
1664       for (j=0; j<N; j++) {
1665         array[i + j*m] = *vals_ptr++;
1666       }
1667     }
1668 
1669     /* read in other processors and ship out */
1670     for (i=1; i<size; i++) {
1671       nz   = (rowners[i+1] - rowners[i])*N;
1672       ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr);
1673       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr);
1674     }
1675   } else {
1676     /* receive numeric values */
1677     ierr = PetscMalloc1(m*N,&vals);CHKERRQ(ierr);
1678 
1679     /* receive message of values*/
1680     ierr = MPIULong_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr);
1681 
1682     /* insert into matrix-by row (this is why cannot directly read into array */
1683     vals_ptr = vals;
1684     for (i=0; i<m; i++) {
1685       for (j=0; j<N; j++) {
1686         array[i + j*m] = *vals_ptr++;
1687       }
1688     }
1689   }
1690   ierr = MatDenseRestoreArray(newmat,&array);CHKERRQ(ierr);
1691   ierr = PetscFree(vals);CHKERRQ(ierr);
1692   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1693   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1694   PetscFunctionReturn(0);
1695 }
1696 
1697 static PetscErrorCode MatLoad_MPIDense_Binary(Mat newmat,PetscViewer viewer)
1698 {
1699   Mat_MPIDense   *a;
1700   PetscScalar    *vals,*svals;
1701   MPI_Comm       comm;
1702   MPI_Status     status;
1703   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,n,maxnz;
1704   PetscInt       header[4],*rowlengths = 0,M,N,*cols;
1705   PetscInt       *ourlens,*procsnz = 0,jj,*mycols,*smycols;
1706   PetscInt       i,nz,j,rstart,rend;
1707   int            fd;
1708   PetscErrorCode ierr;
1709 
1710   PetscFunctionBegin;
1711   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
1712   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1713   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1714   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1715   if (!rank) {
1716     ierr = PetscBinaryRead(fd,(char*)header,4,NULL,PETSC_INT);CHKERRQ(ierr);
1717     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1718   }
1719   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
1720   M    = header[1]; N = header[2]; nz = header[3];
1721 
1722   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
1723   if (newmat->rmap->N < 0) newmat->rmap->N = M;
1724   if (newmat->cmap->N < 0) newmat->cmap->N = N;
1725 
1726   if (newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",M,newmat->rmap->N);
1727   if (newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",N,newmat->cmap->N);
1728 
1729   /*
1730        Handle case where matrix is stored on disk as a dense matrix
1731   */
1732   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1733     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);CHKERRQ(ierr);
1734     PetscFunctionReturn(0);
1735   }
1736 
1737   /* determine ownership of all rows */
1738   if (newmat->rmap->n < 0) {
1739     ierr = PetscMPIIntCast(M/size + ((M % size) > rank),&m);CHKERRQ(ierr);
1740   } else {
1741     ierr = PetscMPIIntCast(newmat->rmap->n,&m);CHKERRQ(ierr);
1742   }
1743   if (newmat->cmap->n < 0) {
1744     n = PETSC_DECIDE;
1745   } else {
1746     ierr = PetscMPIIntCast(newmat->cmap->n,&n);CHKERRQ(ierr);
1747   }
1748 
1749   ierr       = PetscMalloc1(size+2,&rowners);CHKERRQ(ierr);
1750   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1751   rowners[0] = 0;
1752   for (i=2; i<=size; i++) {
1753     rowners[i] += rowners[i-1];
1754   }
1755   rstart = rowners[rank];
1756   rend   = rowners[rank+1];
1757 
1758   /* distribute row lengths to all processors */
1759   ierr = PetscMalloc1(rend-rstart,&ourlens);CHKERRQ(ierr);
1760   if (!rank) {
1761     ierr = PetscMalloc1(M,&rowlengths);CHKERRQ(ierr);
1762     ierr = PetscBinaryRead(fd,rowlengths,M,NULL,PETSC_INT);CHKERRQ(ierr);
1763     ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr);
1764     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1765     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1766     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1767   } else {
1768     ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1769   }
1770 
1771   if (!rank) {
1772     /* calculate the number of nonzeros on each processor */
1773     ierr = PetscMalloc1(size,&procsnz);CHKERRQ(ierr);
1774     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
1775     for (i=0; i<size; i++) {
1776       for (j=rowners[i]; j< rowners[i+1]; j++) {
1777         procsnz[i] += rowlengths[j];
1778       }
1779     }
1780     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1781 
1782     /* determine max buffer needed and allocate it */
1783     maxnz = 0;
1784     for (i=0; i<size; i++) {
1785       maxnz = PetscMax(maxnz,procsnz[i]);
1786     }
1787     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
1788 
1789     /* read in my part of the matrix column indices  */
1790     nz   = procsnz[0];
1791     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
1792     ierr = PetscBinaryRead(fd,mycols,nz,NULL,PETSC_INT);CHKERRQ(ierr);
1793 
1794     /* read in every one elses and ship off */
1795     for (i=1; i<size; i++) {
1796       nz   = procsnz[i];
1797       ierr = PetscBinaryRead(fd,cols,nz,NULL,PETSC_INT);CHKERRQ(ierr);
1798       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
1799     }
1800     ierr = PetscFree(cols);CHKERRQ(ierr);
1801   } else {
1802     /* determine buffer space needed for message */
1803     nz = 0;
1804     for (i=0; i<m; i++) {
1805       nz += ourlens[i];
1806     }
1807     ierr = PetscMalloc1(nz+1,&mycols);CHKERRQ(ierr);
1808 
1809     /* receive message of column indices*/
1810     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
1811     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
1812     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1813   }
1814 
1815   ierr = MatSetSizes(newmat,m,n,M,N);CHKERRQ(ierr);
1816   a = (Mat_MPIDense*)newmat->data;
1817   if (!a->A) {
1818     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
1819   }
1820 
1821   if (!rank) {
1822     ierr = PetscMalloc1(maxnz,&vals);CHKERRQ(ierr);
1823 
1824     /* read in my part of the matrix numerical values  */
1825     nz   = procsnz[0];
1826     ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr);
1827 
1828     /* insert into matrix */
1829     jj      = rstart;
1830     smycols = mycols;
1831     svals   = vals;
1832     for (i=0; i<m; i++) {
1833       ierr     = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1834       smycols += ourlens[i];
1835       svals   += ourlens[i];
1836       jj++;
1837     }
1838 
1839     /* read in other processors and ship out */
1840     for (i=1; i<size; i++) {
1841       nz   = procsnz[i];
1842       ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr);
1843       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
1844     }
1845     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1846   } else {
1847     /* receive numeric values */
1848     ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr);
1849 
1850     /* receive message of values*/
1851     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
1852     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1853     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1854 
1855     /* insert into matrix */
1856     jj      = rstart;
1857     smycols = mycols;
1858     svals   = vals;
1859     for (i=0; i<m; i++) {
1860       ierr     = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1861       smycols += ourlens[i];
1862       svals   += ourlens[i];
1863       jj++;
1864     }
1865   }
1866   ierr = PetscFree(ourlens);CHKERRQ(ierr);
1867   ierr = PetscFree(vals);CHKERRQ(ierr);
1868   ierr = PetscFree(mycols);CHKERRQ(ierr);
1869   ierr = PetscFree(rowners);CHKERRQ(ierr);
1870 
1871   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1872   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1873   PetscFunctionReturn(0);
1874 }
1875 
1876 PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer)
1877 {
1878   PetscErrorCode ierr;
1879   PetscBool      isbinary;
1880 #if defined(PETSC_HAVE_HDF5)
1881   PetscBool      ishdf5;
1882 #endif
1883 
1884   PetscFunctionBegin;
1885   PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
1886   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
1887   /* force binary viewer to load .info file if it has not yet done so */
1888   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
1889   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1890 #if defined(PETSC_HAVE_HDF5)
1891   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5);CHKERRQ(ierr);
1892 #endif
1893   if (isbinary) {
1894     ierr = MatLoad_MPIDense_Binary(newMat,viewer);CHKERRQ(ierr);
1895 #if defined(PETSC_HAVE_HDF5)
1896   } else if (ishdf5) {
1897     ierr = MatLoad_Dense_HDF5(newMat,viewer);CHKERRQ(ierr);
1898 #endif
1899   } else SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
1900   PetscFunctionReturn(0);
1901 }
1902 
1903 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool  *flag)
1904 {
1905   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
1906   Mat            a,b;
1907   PetscBool      flg;
1908   PetscErrorCode ierr;
1909 
1910   PetscFunctionBegin;
1911   a    = matA->A;
1912   b    = matB->A;
1913   ierr = MatEqual(a,b,&flg);CHKERRQ(ierr);
1914   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1915   PetscFunctionReturn(0);
1916 }
1917 
1918 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(Mat A)
1919 {
1920   PetscErrorCode        ierr;
1921   Mat_MPIDense          *a = (Mat_MPIDense*)A->data;
1922   Mat_TransMatMultDense *atb = a->atbdense;
1923 
1924   PetscFunctionBegin;
1925   ierr = PetscFree3(atb->sendbuf,atb->atbarray,atb->recvcounts);CHKERRQ(ierr);
1926   ierr = (atb->destroy)(A);CHKERRQ(ierr);
1927   ierr = PetscFree(atb);CHKERRQ(ierr);
1928   PetscFunctionReturn(0);
1929 }
1930 
1931 PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(Mat A)
1932 {
1933   PetscErrorCode        ierr;
1934   Mat_MPIDense          *a = (Mat_MPIDense*)A->data;
1935   Mat_MatTransMultDense *abt = a->abtdense;
1936 
1937   PetscFunctionBegin;
1938   ierr = PetscFree2(abt->buf[0],abt->buf[1]);CHKERRQ(ierr);
1939   ierr = PetscFree2(abt->recvcounts,abt->recvdispls);CHKERRQ(ierr);
1940   ierr = (abt->destroy)(A);CHKERRQ(ierr);
1941   ierr = PetscFree(abt);CHKERRQ(ierr);
1942   PetscFunctionReturn(0);
1943 }
1944 
1945 PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
1946 {
1947   Mat_MPIDense   *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
1948   Mat_SeqDense   *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data;
1949   Mat_TransMatMultDense *atb = c->atbdense;
1950   PetscErrorCode ierr;
1951   MPI_Comm       comm;
1952   PetscMPIInt    rank,size,*recvcounts=atb->recvcounts;
1953   PetscScalar    *carray,*atbarray=atb->atbarray,*sendbuf=atb->sendbuf;
1954   PetscInt       i,cN=C->cmap->N,cM=C->rmap->N,proc,k,j;
1955   PetscScalar    _DOne=1.0,_DZero=0.0;
1956   PetscBLASInt   am,an,bn,aN;
1957   const PetscInt *ranges;
1958 
1959   PetscFunctionBegin;
1960   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1961   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1962   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1963 
1964   /* compute atbarray = aseq^T * bseq */
1965   ierr = PetscBLASIntCast(a->A->cmap->n,&an);CHKERRQ(ierr);
1966   ierr = PetscBLASIntCast(b->A->cmap->n,&bn);CHKERRQ(ierr);
1967   ierr = PetscBLASIntCast(a->A->rmap->n,&am);CHKERRQ(ierr);
1968   ierr = PetscBLASIntCast(A->cmap->N,&aN);CHKERRQ(ierr);
1969   PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&an,&bn,&am,&_DOne,aseq->v,&aseq->lda,bseq->v,&bseq->lda,&_DZero,atbarray,&aN));
1970 
1971   ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr);
1972   for (i=0; i<size; i++) recvcounts[i] = (ranges[i+1] - ranges[i])*cN;
1973 
1974   /* arrange atbarray into sendbuf */
1975   k = 0;
1976   for (proc=0; proc<size; proc++) {
1977     for (j=0; j<cN; j++) {
1978       for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*cM];
1979     }
1980   }
1981   /* sum all atbarray to local values of C */
1982   ierr = MatDenseGetArray(c->A,&carray);CHKERRQ(ierr);
1983   ierr = MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm);CHKERRQ(ierr);
1984   ierr = MatDenseRestoreArray(c->A,&carray);CHKERRQ(ierr);
1985   PetscFunctionReturn(0);
1986 }
1987 
1988 PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C)
1989 {
1990   PetscErrorCode        ierr;
1991   Mat                   Cdense;
1992   MPI_Comm              comm;
1993   PetscMPIInt           size;
1994   PetscInt              cm=A->cmap->n,cM,cN=B->cmap->N;
1995   Mat_MPIDense          *c;
1996   Mat_TransMatMultDense *atb;
1997 
1998   PetscFunctionBegin;
1999   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2000   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
2001     SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2002   }
2003 
2004   /* create matrix product Cdense */
2005   ierr = MatCreate(comm,&Cdense);CHKERRQ(ierr);
2006   ierr = MatSetSizes(Cdense,cm,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2007   ierr = MatSetType(Cdense,MATMPIDENSE);CHKERRQ(ierr);
2008   ierr = MatMPIDenseSetPreallocation(Cdense,NULL);CHKERRQ(ierr);
2009   ierr = MatAssemblyBegin(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2010   ierr = MatAssemblyEnd(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2011   *C   = Cdense;
2012 
2013   /* create data structure for reuse Cdense */
2014   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2015   ierr = PetscNew(&atb);CHKERRQ(ierr);
2016   cM = Cdense->rmap->N;
2017   ierr = PetscMalloc3(cM*cN,&atb->sendbuf,cM*cN,&atb->atbarray,size,&atb->recvcounts);CHKERRQ(ierr);
2018 
2019   c                    = (Mat_MPIDense*)Cdense->data;
2020   c->atbdense          = atb;
2021   atb->destroy         = Cdense->ops->destroy;
2022   Cdense->ops->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
2023   PetscFunctionReturn(0);
2024 }
2025 
2026 PetscErrorCode MatTransposeMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
2027 {
2028   PetscErrorCode ierr;
2029 
2030   PetscFunctionBegin;
2031   if (scall == MAT_INITIAL_MATRIX) {
2032     ierr = MatTransposeMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr);
2033   }
2034   ierr = MatTransposeMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr);
2035   PetscFunctionReturn(0);
2036 }
2037 
2038 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat *C)
2039 {
2040   PetscErrorCode        ierr;
2041   Mat                   Cdense;
2042   MPI_Comm              comm;
2043   PetscMPIInt           i, size;
2044   PetscInt              maxRows, bufsiz;
2045   Mat_MPIDense          *c;
2046   PetscMPIInt           tag;
2047   const char            *algTypes[2] = {"allgatherv","cyclic"};
2048   PetscInt              alg, nalg = 2;
2049   Mat_MatTransMultDense *abt;
2050 
2051   PetscFunctionBegin;
2052   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2053   alg = 0; /* default is allgatherv */
2054   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"MatMatTransposeMult","Mat");CHKERRQ(ierr);
2055   ierr = PetscOptionsEList("-matmattransmult_mpidense_mpidense_via","Algorithmic approach","MatMatTransposeMult",algTypes,nalg,algTypes[0],&alg,NULL);CHKERRQ(ierr);
2056   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2057   if (A->cmap->N != B->cmap->N) {
2058     SETERRQ2(comm,PETSC_ERR_ARG_SIZ,"Matrix global column dimensions are incompatible, A (%D) != B (%D)",A->cmap->N,B->cmap->N);
2059   }
2060 
2061   /* create matrix product Cdense */
2062   ierr = MatCreate(comm,&Cdense);CHKERRQ(ierr);
2063   ierr = MatSetSizes(Cdense,A->rmap->n,B->rmap->n,A->rmap->N,B->rmap->N);CHKERRQ(ierr);
2064   ierr = MatSetType(Cdense,MATMPIDENSE);CHKERRQ(ierr);
2065   ierr = MatMPIDenseSetPreallocation(Cdense,NULL);CHKERRQ(ierr);
2066   ierr = PetscObjectGetNewTag((PetscObject)Cdense, &tag);CHKERRQ(ierr);
2067   ierr = MatAssemblyBegin(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2068   ierr = MatAssemblyEnd(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2069   *C   = Cdense;
2070 
2071   /* create data structure for reuse Cdense */
2072   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2073   ierr = PetscNew(&abt);CHKERRQ(ierr);
2074   abt->tag = tag;
2075   abt->alg = alg;
2076   switch (alg) {
2077   case 1:
2078     for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i]));
2079     bufsiz = A->cmap->N * maxRows;
2080     ierr = PetscMalloc2(bufsiz,&(abt->buf[0]),bufsiz,&(abt->buf[1]));CHKERRQ(ierr);
2081     break;
2082   default:
2083     ierr = PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1]));CHKERRQ(ierr);
2084     ierr = PetscMalloc2(size,&(abt->recvcounts),size+1,&(abt->recvdispls));CHKERRQ(ierr);
2085     for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N;
2086     for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i];
2087     break;
2088   }
2089 
2090   c                    = (Mat_MPIDense*)Cdense->data;
2091   c->abtdense          = abt;
2092   abt->destroy         = Cdense->ops->destroy;
2093   Cdense->ops->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense;
2094   PetscFunctionReturn(0);
2095 }
2096 
2097 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C)
2098 {
2099   Mat_MPIDense   *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2100   Mat_SeqDense   *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data;
2101   Mat_SeqDense   *cseq=(Mat_SeqDense*)(c->A)->data;
2102   Mat_MatTransMultDense *abt = c->abtdense;
2103   PetscErrorCode ierr;
2104   MPI_Comm       comm;
2105   PetscMPIInt    rank,size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom;
2106   PetscScalar    *sendbuf, *recvbuf=0, *carray;
2107   PetscInt       i,cK=A->cmap->N,k,j,bn;
2108   PetscScalar    _DOne=1.0,_DZero=0.0;
2109   PetscBLASInt   cm, cn, ck;
2110   MPI_Request    reqs[2];
2111   const PetscInt *ranges;
2112 
2113   PetscFunctionBegin;
2114   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2115   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2116   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2117 
2118   ierr = MatGetOwnershipRanges(B,&ranges);CHKERRQ(ierr);
2119   bn = B->rmap->n;
2120   if (bseq->lda == bn) {
2121     sendbuf = bseq->v;
2122   } else {
2123     sendbuf = abt->buf[0];
2124     for (k = 0, i = 0; i < cK; i++) {
2125       for (j = 0; j < bn; j++, k++) {
2126         sendbuf[k] = bseq->v[i * bseq->lda + j];
2127       }
2128     }
2129   }
2130   if (size > 1) {
2131     sendto = (rank + size - 1) % size;
2132     recvfrom = (rank + size + 1) % size;
2133   } else {
2134     sendto = recvfrom = 0;
2135   }
2136   ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr);
2137   ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr);
2138   recvisfrom = rank;
2139   for (i = 0; i < size; i++) {
2140     /* we have finished receiving in sending, bufs can be read/modified */
2141     PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */
2142     PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom];
2143 
2144     if (nextrecvisfrom != rank) {
2145       /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */
2146       sendsiz = cK * bn;
2147       recvsiz = cK * nextbn;
2148       recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1];
2149       ierr = MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]);CHKERRQ(ierr);
2150       ierr = MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]);CHKERRQ(ierr);
2151     }
2152 
2153     /* local aseq * sendbuf^T */
2154     ierr = PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn);CHKERRQ(ierr);
2155     carray = &cseq->v[cseq->lda * ranges[recvisfrom]];
2156     PetscStackCallBLAS("BLASgemm",BLASgemm_("N","T",&cm,&cn,&ck,&_DOne,aseq->v,&aseq->lda,sendbuf,&cn,&_DZero,carray,&cseq->lda));
2157 
2158     if (nextrecvisfrom != rank) {
2159       /* wait for the sends and receives to complete, swap sendbuf and recvbuf */
2160       ierr = MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2161     }
2162     bn = nextbn;
2163     recvisfrom = nextrecvisfrom;
2164     sendbuf = recvbuf;
2165   }
2166   PetscFunctionReturn(0);
2167 }
2168 
2169 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C)
2170 {
2171   Mat_MPIDense   *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2172   Mat_SeqDense   *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data;
2173   Mat_SeqDense   *cseq=(Mat_SeqDense*)(c->A)->data;
2174   Mat_MatTransMultDense *abt = c->abtdense;
2175   PetscErrorCode ierr;
2176   MPI_Comm       comm;
2177   PetscMPIInt    rank,size;
2178   PetscScalar    *sendbuf, *recvbuf;
2179   PetscInt       i,cK=A->cmap->N,k,j,bn;
2180   PetscScalar    _DOne=1.0,_DZero=0.0;
2181   PetscBLASInt   cm, cn, ck;
2182 
2183   PetscFunctionBegin;
2184   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2185   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2186   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2187 
2188   /* copy transpose of B into buf[0] */
2189   bn      = B->rmap->n;
2190   sendbuf = abt->buf[0];
2191   recvbuf = abt->buf[1];
2192   for (k = 0, j = 0; j < bn; j++) {
2193     for (i = 0; i < cK; i++, k++) {
2194       sendbuf[k] = bseq->v[i * bseq->lda + j];
2195     }
2196   }
2197   ierr = MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm);CHKERRQ(ierr);
2198   ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr);
2199   ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr);
2200   ierr = PetscBLASIntCast(c->A->cmap->n,&cn);CHKERRQ(ierr);
2201   PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&cm,&cn,&ck,&_DOne,aseq->v,&aseq->lda,recvbuf,&ck,&_DZero,cseq->v,&cseq->lda));
2202   PetscFunctionReturn(0);
2203 }
2204 
2205 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2206 {
2207   Mat_MPIDense   *c=(Mat_MPIDense*)C->data;
2208   Mat_MatTransMultDense *abt = c->abtdense;
2209   PetscErrorCode ierr;
2210 
2211   PetscFunctionBegin;
2212   switch (abt->alg) {
2213   case 1:
2214     ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C);CHKERRQ(ierr);
2215     break;
2216   default:
2217     ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C);CHKERRQ(ierr);
2218     break;
2219   }
2220   PetscFunctionReturn(0);
2221 }
2222 
2223 static PetscErrorCode MatMatTransposeMult_MPIDense_MPIDense(Mat A,Mat B, MatReuse scall, PetscReal fill, Mat *C)
2224 {
2225   PetscErrorCode ierr;
2226 
2227   PetscFunctionBegin;
2228   if (scall == MAT_INITIAL_MATRIX) {
2229     ierr = MatMatTransposeMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr);
2230   }
2231   ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr);
2232   PetscFunctionReturn(0);
2233 }
2234 
2235 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(Mat A)
2236 {
2237   PetscErrorCode   ierr;
2238   Mat_MPIDense     *a = (Mat_MPIDense*)A->data;
2239   Mat_MatMultDense *ab = a->abdense;
2240 
2241   PetscFunctionBegin;
2242   ierr = MatDestroy(&ab->Ce);CHKERRQ(ierr);
2243   ierr = MatDestroy(&ab->Ae);CHKERRQ(ierr);
2244   ierr = MatDestroy(&ab->Be);CHKERRQ(ierr);
2245 
2246   ierr = (ab->destroy)(A);CHKERRQ(ierr);
2247   ierr = PetscFree(ab);CHKERRQ(ierr);
2248   PetscFunctionReturn(0);
2249 }
2250 
2251 #if defined(PETSC_HAVE_ELEMENTAL)
2252 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
2253 {
2254   PetscErrorCode   ierr;
2255   Mat_MPIDense     *c=(Mat_MPIDense*)C->data;
2256   Mat_MatMultDense *ab=c->abdense;
2257 
2258   PetscFunctionBegin;
2259   ierr = MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae);CHKERRQ(ierr);
2260   ierr = MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be);CHKERRQ(ierr);
2261   ierr = MatMatMultNumeric(ab->Ae,ab->Be,ab->Ce);CHKERRQ(ierr);
2262   ierr = MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
2263   PetscFunctionReturn(0);
2264 }
2265 
2266 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C)
2267 {
2268   PetscErrorCode   ierr;
2269   Mat              Ae,Be,Ce;
2270   Mat_MPIDense     *c;
2271   Mat_MatMultDense *ab;
2272 
2273   PetscFunctionBegin;
2274   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
2275     SETERRQ4(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2276   }
2277 
2278   /* convert A and B to Elemental matrices Ae and Be */
2279   ierr = MatConvert(A,MATELEMENTAL,MAT_INITIAL_MATRIX, &Ae);CHKERRQ(ierr);
2280   ierr = MatConvert(B,MATELEMENTAL,MAT_INITIAL_MATRIX, &Be);CHKERRQ(ierr);
2281 
2282   /* Ce = Ae*Be */
2283   ierr = MatMatMultSymbolic(Ae,Be,fill,&Ce);CHKERRQ(ierr);
2284   ierr = MatMatMultNumeric(Ae,Be,Ce);CHKERRQ(ierr);
2285 
2286   /* convert Ce to C */
2287   ierr = MatConvert(Ce,MATMPIDENSE,MAT_INITIAL_MATRIX,C);CHKERRQ(ierr);
2288 
2289   /* create data structure for reuse Cdense */
2290   ierr = PetscNew(&ab);CHKERRQ(ierr);
2291   c                  = (Mat_MPIDense*)(*C)->data;
2292   c->abdense         = ab;
2293 
2294   ab->Ae             = Ae;
2295   ab->Be             = Be;
2296   ab->Ce             = Ce;
2297   ab->destroy        = (*C)->ops->destroy;
2298   (*C)->ops->destroy        = MatDestroy_MatMatMult_MPIDense_MPIDense;
2299   (*C)->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
2300   PetscFunctionReturn(0);
2301 }
2302 
2303 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
2304 {
2305   PetscErrorCode ierr;
2306 
2307   PetscFunctionBegin;
2308   if (scall == MAT_INITIAL_MATRIX) { /* simbolic product includes numeric product */
2309     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
2310     ierr = MatMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr);
2311     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
2312   } else {
2313     ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
2314     ierr = MatMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr);
2315     ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
2316   }
2317   PetscFunctionReturn(0);
2318 }
2319 #endif
2320 
2321