xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 6947451f174971b5cbba4c8ab49dd392302c23da)
1 
2 /*
3    Basic functions for basic parallel dense matrices.
4 */
5 
6 #include <../src/mat/impls/dense/mpi/mpidense.h>    /*I   "petscmat.h"  I*/
7 #include <../src/mat/impls/aij/mpi/mpiaij.h>
8 #include <petscblaslapack.h>
9 
10 /*@
11 
12       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
13               matrix that represents the operator. For sequential matrices it returns itself.
14 
15     Input Parameter:
16 .      A - the Seq or MPI dense matrix
17 
18     Output Parameter:
19 .      B - the inner matrix
20 
21     Level: intermediate
22 
23 @*/
24 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
25 {
26   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
27   PetscErrorCode ierr;
28   PetscBool      flg;
29 
30   PetscFunctionBegin;
31   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr);
32   if (flg) *B = mat->A;
33   else *B = A;
34   PetscFunctionReturn(0);
35 }
36 
37 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
38 {
39   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
40   PetscErrorCode ierr;
41   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
42 
43   PetscFunctionBegin;
44   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
45   lrow = row - rstart;
46   ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr);
47   PetscFunctionReturn(0);
48 }
49 
50 PetscErrorCode MatRestoreRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
51 {
52   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
53   PetscErrorCode ierr;
54   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
55 
56   PetscFunctionBegin;
57   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
58   lrow = row - rstart;
59   ierr = MatRestoreRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr);
60   PetscFunctionReturn(0);
61 }
62 
63 PetscErrorCode  MatGetDiagonalBlock_MPIDense(Mat A,Mat *a)
64 {
65   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
66   PetscErrorCode ierr;
67   PetscInt       m = A->rmap->n,rstart = A->rmap->rstart;
68   PetscScalar    *array;
69   MPI_Comm       comm;
70   PetscBool      cong;
71   Mat            B;
72 
73   PetscFunctionBegin;
74   ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
75   if (!cong) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported.");
76   ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr);
77   if (!B) {
78     ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
79     ierr = MatCreate(comm,&B);CHKERRQ(ierr);
80     ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr);
81     ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr);
82     ierr = MatDenseGetArray(mdn->A,&array);CHKERRQ(ierr);
83     ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr);
84     ierr = MatDenseRestoreArray(mdn->A,&array);CHKERRQ(ierr);
85     ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
86     ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
87     ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr);
88     *a   = B;
89     ierr = MatDestroy(&B);CHKERRQ(ierr);
90   } else *a = B;
91   PetscFunctionReturn(0);
92 }
93 
94 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
95 {
96   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
97   PetscErrorCode ierr;
98   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
99   PetscBool      roworiented = A->roworiented;
100 
101   PetscFunctionBegin;
102   for (i=0; i<m; i++) {
103     if (idxm[i] < 0) continue;
104     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
105     if (idxm[i] >= rstart && idxm[i] < rend) {
106       row = idxm[i] - rstart;
107       if (roworiented) {
108         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
109       } else {
110         for (j=0; j<n; j++) {
111           if (idxn[j] < 0) continue;
112           if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
113           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
114         }
115       }
116     } else if (!A->donotstash) {
117       mat->assembled = PETSC_FALSE;
118       if (roworiented) {
119         ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
120       } else {
121         ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
122       }
123     }
124   }
125   PetscFunctionReturn(0);
126 }
127 
128 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
129 {
130   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
131   PetscErrorCode ierr;
132   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
133 
134   PetscFunctionBegin;
135   for (i=0; i<m; i++) {
136     if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
137     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
138     if (idxm[i] >= rstart && idxm[i] < rend) {
139       row = idxm[i] - rstart;
140       for (j=0; j<n; j++) {
141         if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
142         if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
143         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
144       }
145     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
146   }
147   PetscFunctionReturn(0);
148 }
149 
150 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A,PetscInt *lda)
151 {
152   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
153   PetscErrorCode ierr;
154 
155   PetscFunctionBegin;
156   ierr = MatDenseGetLDA(a->A,lda);CHKERRQ(ierr);
157   PetscFunctionReturn(0);
158 }
159 
160 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar **array)
161 {
162   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
163   PetscErrorCode ierr;
164 
165   PetscFunctionBegin;
166   ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr);
167   PetscFunctionReturn(0);
168 }
169 
170 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A,const PetscScalar **array)
171 {
172   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
173   PetscErrorCode ierr;
174 
175   PetscFunctionBegin;
176   ierr = MatDenseGetArrayRead(a->A,array);CHKERRQ(ierr);
177   PetscFunctionReturn(0);
178 }
179 
180 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A,PetscScalar **array)
181 {
182   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
183   PetscErrorCode ierr;
184 
185   PetscFunctionBegin;
186   ierr = MatDenseGetArrayWrite(a->A,array);CHKERRQ(ierr);
187   PetscFunctionReturn(0);
188 }
189 
190 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A,const PetscScalar *array)
191 {
192   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
193   PetscErrorCode ierr;
194 
195   PetscFunctionBegin;
196   if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first");
197   ierr = MatDensePlaceArray(a->A,array);CHKERRQ(ierr);
198   PetscFunctionReturn(0);
199 }
200 
201 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A)
202 {
203   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
204   PetscErrorCode ierr;
205 
206   PetscFunctionBegin;
207   if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first");
208   ierr = MatDenseResetArray(a->A);CHKERRQ(ierr);
209   PetscFunctionReturn(0);
210 }
211 
212 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
213 {
214   Mat_MPIDense      *mat  = (Mat_MPIDense*)A->data,*newmatd;
215   PetscErrorCode    ierr;
216   PetscInt          lda,i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols;
217   const PetscInt    *irow,*icol;
218   const PetscScalar *v;
219   PetscScalar       *bv;
220   Mat               newmat;
221   IS                iscol_local;
222   MPI_Comm          comm_is,comm_mat;
223 
224   PetscFunctionBegin;
225   ierr = PetscObjectGetComm((PetscObject)A,&comm_mat);CHKERRQ(ierr);
226   ierr = PetscObjectGetComm((PetscObject)iscol,&comm_is);CHKERRQ(ierr);
227   if (comm_mat != comm_is) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NOTSAMECOMM,"IS communicator must match matrix communicator");
228 
229   ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
230   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
231   ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr);
232   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
233   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
234   ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */
235 
236   /* No parallel redistribution currently supported! Should really check each index set
237      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
238      original matrix! */
239 
240   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
241   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
242 
243   /* Check submatrix call */
244   if (scall == MAT_REUSE_MATRIX) {
245     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
246     /* Really need to test rows and column sizes! */
247     newmat = *B;
248   } else {
249     /* Create and fill new matrix */
250     ierr = MatCreate(PetscObjectComm((PetscObject)A),&newmat);CHKERRQ(ierr);
251     ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
252     ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr);
253     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
254   }
255 
256   /* Now extract the data pointers and do the copy, column at a time */
257   newmatd = (Mat_MPIDense*)newmat->data;
258   ierr = MatDenseGetArray(newmatd->A,&bv);CHKERRQ(ierr);
259   ierr = MatDenseGetArrayRead(mat->A,&v);CHKERRQ(ierr);
260   ierr = MatDenseGetLDA(mat->A,&lda);CHKERRQ(ierr);
261   for (i=0; i<Ncols; i++) {
262     const PetscScalar *av = v + lda*icol[i];
263     for (j=0; j<nrows; j++) {
264       *bv++ = av[irow[j] - rstart];
265     }
266   }
267   ierr = MatDenseRestoreArrayRead(mat->A,&v);CHKERRQ(ierr);
268   ierr = MatDenseRestoreArray(newmatd->A,&bv);CHKERRQ(ierr);
269 
270   /* Assemble the matrices so that the correct flags are set */
271   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
272   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
273 
274   /* Free work space */
275   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
276   ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr);
277   ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
278   *B   = newmat;
279   PetscFunctionReturn(0);
280 }
281 
282 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar **array)
283 {
284   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
285   PetscErrorCode ierr;
286 
287   PetscFunctionBegin;
288   ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr);
289   PetscFunctionReturn(0);
290 }
291 
292 PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A,const PetscScalar **array)
293 {
294   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
295   PetscErrorCode ierr;
296 
297   PetscFunctionBegin;
298   ierr = MatDenseRestoreArrayRead(a->A,array);CHKERRQ(ierr);
299   PetscFunctionReturn(0);
300 }
301 
302 PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A,PetscScalar **array)
303 {
304   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
305   PetscErrorCode ierr;
306 
307   PetscFunctionBegin;
308   ierr = MatDenseRestoreArrayWrite(a->A,array);CHKERRQ(ierr);
309   PetscFunctionReturn(0);
310 }
311 
312 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
313 {
314   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
315   PetscErrorCode ierr;
316   PetscInt       nstash,reallocs;
317 
318   PetscFunctionBegin;
319   if (mdn->donotstash || mat->nooffprocentries) return(0);
320 
321   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
322   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
323   ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
324   PetscFunctionReturn(0);
325 }
326 
327 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
328 {
329   Mat_MPIDense   *mdn=(Mat_MPIDense*)mat->data;
330   PetscErrorCode ierr;
331   PetscInt       i,*row,*col,flg,j,rstart,ncols;
332   PetscMPIInt    n;
333   PetscScalar    *val;
334 
335   PetscFunctionBegin;
336   if (!mdn->donotstash && !mat->nooffprocentries) {
337     /*  wait on receives */
338     while (1) {
339       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
340       if (!flg) break;
341 
342       for (i=0; i<n;) {
343         /* Now identify the consecutive vals belonging to the same row */
344         for (j=i,rstart=row[j]; j<n; j++) {
345           if (row[j] != rstart) break;
346         }
347         if (j < n) ncols = j-i;
348         else       ncols = n-i;
349         /* Now assemble all these values with a single function call */
350         ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
351         i    = j;
352       }
353     }
354     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
355   }
356 
357   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
358   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
359 
360   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
361     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
362   }
363   PetscFunctionReturn(0);
364 }
365 
366 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
367 {
368   PetscErrorCode ierr;
369   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
370 
371   PetscFunctionBegin;
372   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
373   PetscFunctionReturn(0);
374 }
375 
376 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt n,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
377 {
378   Mat_MPIDense      *l = (Mat_MPIDense*)A->data;
379   PetscErrorCode    ierr;
380   PetscInt          i,len,*lrows;
381 
382   PetscFunctionBegin;
383   /* get locally owned rows */
384   ierr = PetscLayoutMapLocal(A->rmap,n,rows,&len,&lrows,NULL);CHKERRQ(ierr);
385   /* fix right hand side if needed */
386   if (x && b) {
387     const PetscScalar *xx;
388     PetscScalar       *bb;
389 
390     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
391     ierr = VecGetArrayWrite(b, &bb);CHKERRQ(ierr);
392     for (i=0;i<len;++i) bb[lrows[i]] = diag*xx[lrows[i]];
393     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
394     ierr = VecRestoreArrayWrite(b, &bb);CHKERRQ(ierr);
395   }
396   ierr = MatZeroRows(l->A,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr);
397   if (diag != 0.0) {
398     Vec d;
399 
400     ierr = MatCreateVecs(A,NULL,&d);CHKERRQ(ierr);
401     ierr = VecSet(d,diag);CHKERRQ(ierr);
402     ierr = MatDiagonalSet(A,d,INSERT_VALUES);CHKERRQ(ierr);
403     ierr = VecDestroy(&d);CHKERRQ(ierr);
404   }
405   ierr = PetscFree(lrows);CHKERRQ(ierr);
406   PetscFunctionReturn(0);
407 }
408 
409 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec);
410 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec);
411 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec);
412 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec);
413 
414 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
415 {
416   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
417   PetscErrorCode    ierr;
418   const PetscScalar *ax;
419   PetscScalar       *ay;
420 
421   PetscFunctionBegin;
422   ierr = VecGetArrayReadInPlace(xx,&ax);CHKERRQ(ierr);
423   ierr = VecGetArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr);
424   ierr = PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr);
425   ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr);
426   ierr = VecRestoreArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr);
427   ierr = VecRestoreArrayReadInPlace(xx,&ax);CHKERRQ(ierr);
428   ierr = (*mdn->A->ops->mult)(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
429   PetscFunctionReturn(0);
430 }
431 
432 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
433 {
434   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
435   PetscErrorCode    ierr;
436   const PetscScalar *ax;
437   PetscScalar       *ay;
438 
439   PetscFunctionBegin;
440   ierr = VecGetArrayReadInPlace(xx,&ax);CHKERRQ(ierr);
441   ierr = VecGetArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr);
442   ierr = PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr);
443   ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr);
444   ierr = VecRestoreArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr);
445   ierr = VecRestoreArrayReadInPlace(xx,&ax);CHKERRQ(ierr);
446   ierr = (*mdn->A->ops->multadd)(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
447   PetscFunctionReturn(0);
448 }
449 
450 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
451 {
452   Mat_MPIDense      *a = (Mat_MPIDense*)A->data;
453   PetscErrorCode    ierr;
454   const PetscScalar *ax;
455   PetscScalar       *ay;
456 
457   PetscFunctionBegin;
458   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
459   ierr = (*a->A->ops->multtranspose)(a->A,xx,a->lvec);CHKERRQ(ierr);
460   ierr = VecGetArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr);
461   ierr = VecGetArrayInPlace(yy,&ay);CHKERRQ(ierr);
462   ierr = PetscSFReduceBegin(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr);
463   ierr = PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr);
464   ierr = VecRestoreArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr);
465   ierr = VecRestoreArrayInPlace(yy,&ay);CHKERRQ(ierr);
466   PetscFunctionReturn(0);
467 }
468 
469 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
470 {
471   Mat_MPIDense      *a = (Mat_MPIDense*)A->data;
472   PetscErrorCode    ierr;
473   const PetscScalar *ax;
474   PetscScalar       *ay;
475 
476   PetscFunctionBegin;
477   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
478   ierr = (*a->A->ops->multtranspose)(a->A,xx,a->lvec);CHKERRQ(ierr);
479   ierr = VecGetArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr);
480   ierr = VecGetArrayInPlace(zz,&ay);CHKERRQ(ierr);
481   ierr = PetscSFReduceBegin(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr);
482   ierr = PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr);
483   ierr = VecRestoreArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr);
484   ierr = VecRestoreArrayInPlace(zz,&ay);CHKERRQ(ierr);
485   PetscFunctionReturn(0);
486 }
487 
488 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
489 {
490   Mat_MPIDense      *a    = (Mat_MPIDense*)A->data;
491   PetscErrorCode    ierr;
492   PetscInt          lda,len,i,n,m = A->rmap->n,radd;
493   PetscScalar       *x,zero = 0.0;
494   const PetscScalar *av;
495 
496   PetscFunctionBegin;
497   ierr = VecSet(v,zero);CHKERRQ(ierr);
498   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
499   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
500   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
501   len  = PetscMin(a->A->rmap->n,a->A->cmap->n);
502   radd = A->rmap->rstart*m;
503   ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr);
504   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
505   for (i=0; i<len; i++) {
506     x[i] = av[radd + i*lda + i];
507   }
508   ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr);
509   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
510   PetscFunctionReturn(0);
511 }
512 
513 PetscErrorCode MatDestroy_MPIDense(Mat mat)
514 {
515   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
516   PetscErrorCode ierr;
517 
518   PetscFunctionBegin;
519 #if defined(PETSC_USE_LOG)
520   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
521 #endif
522   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
523   ierr = MatDestroy(&mdn->A);CHKERRQ(ierr);
524   ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr);
525   ierr = PetscSFDestroy(&mdn->Mvctx);CHKERRQ(ierr);
526   ierr = VecDestroy(&mdn->cvec);CHKERRQ(ierr);
527 
528   ierr = PetscFree(mat->data);CHKERRQ(ierr);
529   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
530 
531   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",NULL);CHKERRQ(ierr);
532   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL);CHKERRQ(ierr);
533   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL);CHKERRQ(ierr);
534   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",NULL);CHKERRQ(ierr);
535   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",NULL);CHKERRQ(ierr);
536   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",NULL);CHKERRQ(ierr);
537   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",NULL);CHKERRQ(ierr);
538   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",NULL);CHKERRQ(ierr);
539   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",NULL);CHKERRQ(ierr);
540 #if defined(PETSC_HAVE_ELEMENTAL)
541   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL);CHKERRQ(ierr);
542 #endif
543   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);CHKERRQ(ierr);
544   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
545   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",NULL);CHKERRQ(ierr);
546   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
547   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
548   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_nest_mpidense_C",NULL);CHKERRQ(ierr);
549   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_nest_mpidense_C",NULL);CHKERRQ(ierr);
550   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
551   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
552   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",NULL);CHKERRQ(ierr);
553   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",NULL);CHKERRQ(ierr);
554   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",NULL);CHKERRQ(ierr);
555   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",NULL);CHKERRQ(ierr);
556   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",NULL);CHKERRQ(ierr);
557   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",NULL);CHKERRQ(ierr);
558   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",NULL);CHKERRQ(ierr);
559   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",NULL);CHKERRQ(ierr);
560   PetscFunctionReturn(0);
561 }
562 
563 PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat,PetscViewer);
564 
565 #include <petscdraw.h>
566 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
567 {
568   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
569   PetscErrorCode    ierr;
570   PetscMPIInt       rank = mdn->rank;
571   PetscViewerType   vtype;
572   PetscBool         iascii,isdraw;
573   PetscViewer       sviewer;
574   PetscViewerFormat format;
575 
576   PetscFunctionBegin;
577   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
578   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
579   if (iascii) {
580     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
581     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
582     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
583       MatInfo info;
584       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
585       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
586       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
587       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
588       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
589       ierr = PetscSFView(mdn->Mvctx,viewer);CHKERRQ(ierr);
590       PetscFunctionReturn(0);
591     } else if (format == PETSC_VIEWER_ASCII_INFO) {
592       PetscFunctionReturn(0);
593     }
594   } else if (isdraw) {
595     PetscDraw draw;
596     PetscBool isnull;
597 
598     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
599     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
600     if (isnull) PetscFunctionReturn(0);
601   }
602 
603   {
604     /* assemble the entire matrix onto first processor. */
605     Mat         A;
606     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz;
607     PetscInt    *cols;
608     PetscScalar *vals;
609 
610     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
611     if (!rank) {
612       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
613     } else {
614       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
615     }
616     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
617     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
618     ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr);
619     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
620 
621     /* Copy the matrix ... This isn't the most efficient means,
622        but it's quick for now */
623     A->insertmode = INSERT_VALUES;
624 
625     row = mat->rmap->rstart;
626     m   = mdn->A->rmap->n;
627     for (i=0; i<m; i++) {
628       ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
629       ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
630       ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
631       row++;
632     }
633 
634     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
635     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
636     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
637     if (!rank) {
638       ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
639       ierr = MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
640     }
641     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
642     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
643     ierr = MatDestroy(&A);CHKERRQ(ierr);
644   }
645   PetscFunctionReturn(0);
646 }
647 
648 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
649 {
650   PetscErrorCode ierr;
651   PetscBool      iascii,isbinary,isdraw,issocket;
652 
653   PetscFunctionBegin;
654   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
655   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
656   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
657   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
658 
659   if (iascii || issocket || isdraw) {
660     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
661   } else if (isbinary) {
662     ierr = MatView_Dense_Binary(mat,viewer);CHKERRQ(ierr);
663   }
664   PetscFunctionReturn(0);
665 }
666 
667 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
668 {
669   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
670   Mat            mdn  = mat->A;
671   PetscErrorCode ierr;
672   PetscLogDouble isend[5],irecv[5];
673 
674   PetscFunctionBegin;
675   info->block_size = 1.0;
676 
677   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
678 
679   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
680   isend[3] = info->memory;  isend[4] = info->mallocs;
681   if (flag == MAT_LOCAL) {
682     info->nz_used      = isend[0];
683     info->nz_allocated = isend[1];
684     info->nz_unneeded  = isend[2];
685     info->memory       = isend[3];
686     info->mallocs      = isend[4];
687   } else if (flag == MAT_GLOBAL_MAX) {
688     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
689 
690     info->nz_used      = irecv[0];
691     info->nz_allocated = irecv[1];
692     info->nz_unneeded  = irecv[2];
693     info->memory       = irecv[3];
694     info->mallocs      = irecv[4];
695   } else if (flag == MAT_GLOBAL_SUM) {
696     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
697 
698     info->nz_used      = irecv[0];
699     info->nz_allocated = irecv[1];
700     info->nz_unneeded  = irecv[2];
701     info->memory       = irecv[3];
702     info->mallocs      = irecv[4];
703   }
704   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
705   info->fill_ratio_needed = 0;
706   info->factor_mallocs    = 0;
707   PetscFunctionReturn(0);
708 }
709 
710 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg)
711 {
712   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
713   PetscErrorCode ierr;
714 
715   PetscFunctionBegin;
716   switch (op) {
717   case MAT_NEW_NONZERO_LOCATIONS:
718   case MAT_NEW_NONZERO_LOCATION_ERR:
719   case MAT_NEW_NONZERO_ALLOCATION_ERR:
720     MatCheckPreallocated(A,1);
721     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
722     break;
723   case MAT_ROW_ORIENTED:
724     MatCheckPreallocated(A,1);
725     a->roworiented = flg;
726     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
727     break;
728   case MAT_NEW_DIAGONALS:
729   case MAT_KEEP_NONZERO_PATTERN:
730   case MAT_USE_HASH_TABLE:
731   case MAT_SORTED_FULL:
732     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
733     break;
734   case MAT_IGNORE_OFF_PROC_ENTRIES:
735     a->donotstash = flg;
736     break;
737   case MAT_SYMMETRIC:
738   case MAT_STRUCTURALLY_SYMMETRIC:
739   case MAT_HERMITIAN:
740   case MAT_SYMMETRY_ETERNAL:
741   case MAT_IGNORE_LOWER_TRIANGULAR:
742   case MAT_IGNORE_ZERO_ENTRIES:
743     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
744     break;
745   default:
746     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
747   }
748   PetscFunctionReturn(0);
749 }
750 
751 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
752 {
753   Mat_MPIDense      *mdn = (Mat_MPIDense*)A->data;
754   const PetscScalar *l;
755   PetscScalar       x,*v,*vv,*r;
756   PetscErrorCode    ierr;
757   PetscInt          i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n,lda;
758 
759   PetscFunctionBegin;
760   ierr = MatDenseGetArray(mdn->A,&vv);CHKERRQ(ierr);
761   ierr = MatDenseGetLDA(mdn->A,&lda);CHKERRQ(ierr);
762   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
763   if (ll) {
764     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
765     if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %D != %D", s2a, s2);
766     ierr = VecGetArrayRead(ll,&l);CHKERRQ(ierr);
767     for (i=0; i<m; i++) {
768       x = l[i];
769       v = vv + i;
770       for (j=0; j<n; j++) { (*v) *= x; v+= lda;}
771     }
772     ierr = VecRestoreArrayRead(ll,&l);CHKERRQ(ierr);
773     ierr = PetscLogFlops(1.0*n*m);CHKERRQ(ierr);
774   }
775   if (rr) {
776     const PetscScalar *ar;
777 
778     ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr);
779     if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
780     ierr = VecGetArrayRead(rr,&ar);CHKERRQ(ierr);
781     ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
782     ierr = PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ar,r);CHKERRQ(ierr);
783     ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ar,r);CHKERRQ(ierr);
784     ierr = VecRestoreArrayRead(rr,&ar);CHKERRQ(ierr);
785     for (i=0; i<n; i++) {
786       x = r[i];
787       v = vv + i*lda;
788       for (j=0; j<m; j++) (*v++) *= x;
789     }
790     ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr);
791     ierr = PetscLogFlops(1.0*n*m);CHKERRQ(ierr);
792   }
793   ierr = MatDenseRestoreArray(mdn->A,&vv);CHKERRQ(ierr);
794   PetscFunctionReturn(0);
795 }
796 
797 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
798 {
799   Mat_MPIDense      *mdn = (Mat_MPIDense*)A->data;
800   PetscErrorCode    ierr;
801   PetscInt          i,j;
802   PetscReal         sum = 0.0;
803   const PetscScalar *av,*v;
804 
805   PetscFunctionBegin;
806   ierr = MatDenseGetArrayRead(mdn->A,&av);CHKERRQ(ierr);
807   v    = av;
808   if (mdn->size == 1) {
809     ierr =  MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
810   } else {
811     if (type == NORM_FROBENIUS) {
812       for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) {
813         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
814       }
815       ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
816       *nrm = PetscSqrtReal(*nrm);
817       ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr);
818     } else if (type == NORM_1) {
819       PetscReal *tmp,*tmp2;
820       ierr = PetscCalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);CHKERRQ(ierr);
821       *nrm = 0.0;
822       v    = av;
823       for (j=0; j<mdn->A->cmap->n; j++) {
824         for (i=0; i<mdn->A->rmap->n; i++) {
825           tmp[j] += PetscAbsScalar(*v);  v++;
826         }
827       }
828       ierr = MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
829       for (j=0; j<A->cmap->N; j++) {
830         if (tmp2[j] > *nrm) *nrm = tmp2[j];
831       }
832       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
833       ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr);
834     } else if (type == NORM_INFINITY) { /* max row norm */
835       PetscReal ntemp;
836       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
837       ierr = MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
838     } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm");
839   }
840   ierr = MatDenseRestoreArrayRead(mdn->A,&av);CHKERRQ(ierr);
841   PetscFunctionReturn(0);
842 }
843 
844 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
845 {
846   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
847   Mat            B;
848   PetscInt       M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart;
849   PetscErrorCode ierr;
850   PetscInt       j,i,lda;
851   PetscScalar    *v;
852 
853   PetscFunctionBegin;
854   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
855     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
856     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
857     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
858     ierr = MatMPIDenseSetPreallocation(B,NULL);CHKERRQ(ierr);
859   } else B = *matout;
860 
861   m    = a->A->rmap->n; n = a->A->cmap->n;
862   ierr = MatDenseGetArrayRead(a->A,(const PetscScalar**)&v);CHKERRQ(ierr);
863   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
864   ierr = PetscMalloc1(m,&rwork);CHKERRQ(ierr);
865   for (i=0; i<m; i++) rwork[i] = rstart + i;
866   for (j=0; j<n; j++) {
867     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
868     v   += lda;
869   }
870   ierr = MatDenseRestoreArrayRead(a->A,(const PetscScalar**)&v);CHKERRQ(ierr);
871   ierr = PetscFree(rwork);CHKERRQ(ierr);
872   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
873   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
874   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
875     *matout = B;
876   } else {
877     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
878   }
879   PetscFunctionReturn(0);
880 }
881 
882 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*);
883 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat,PetscScalar);
884 
885 PetscErrorCode MatSetUp_MPIDense(Mat A)
886 {
887   PetscErrorCode ierr;
888 
889   PetscFunctionBegin;
890   ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
891   ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
892   if (!A->preallocated) {
893     ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
894   }
895   PetscFunctionReturn(0);
896 }
897 
898 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
899 {
900   PetscErrorCode ierr;
901   Mat_MPIDense   *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data;
902 
903   PetscFunctionBegin;
904   ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr);
905   PetscFunctionReturn(0);
906 }
907 
908 PetscErrorCode MatConjugate_MPIDense(Mat mat)
909 {
910   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;
911   PetscErrorCode ierr;
912 
913   PetscFunctionBegin;
914   ierr = MatConjugate(a->A);CHKERRQ(ierr);
915   PetscFunctionReturn(0);
916 }
917 
918 PetscErrorCode MatRealPart_MPIDense(Mat A)
919 {
920   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
921   PetscErrorCode ierr;
922 
923   PetscFunctionBegin;
924   ierr = MatRealPart(a->A);CHKERRQ(ierr);
925   PetscFunctionReturn(0);
926 }
927 
928 PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
929 {
930   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
931   PetscErrorCode ierr;
932 
933   PetscFunctionBegin;
934   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
935   PetscFunctionReturn(0);
936 }
937 
938 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A,Vec v,PetscInt col)
939 {
940   PetscErrorCode ierr;
941   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
942 
943   PetscFunctionBegin;
944   if (!a->A) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing local matrix");
945   if (!a->A->ops->getcolumnvector) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing get column operation");
946   ierr = (*a->A->ops->getcolumnvector)(a->A,v,col);CHKERRQ(ierr);
947   PetscFunctionReturn(0);
948 }
949 
950 PETSC_INTERN PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*);
951 
952 PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms)
953 {
954   PetscErrorCode ierr;
955   PetscInt       i,n;
956   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
957   PetscReal      *work;
958 
959   PetscFunctionBegin;
960   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
961   ierr = PetscMalloc1(n,&work);CHKERRQ(ierr);
962   ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr);
963   if (type == NORM_2) {
964     for (i=0; i<n; i++) work[i] *= work[i];
965   }
966   if (type == NORM_INFINITY) {
967     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
968   } else {
969     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
970   }
971   ierr = PetscFree(work);CHKERRQ(ierr);
972   if (type == NORM_2) {
973     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
974   }
975   PetscFunctionReturn(0);
976 }
977 
978 #if defined(PETSC_HAVE_CUDA)
979 static PetscErrorCode MatDenseGetColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
980 {
981   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
982   PetscErrorCode ierr;
983   PetscInt       lda;
984 
985   PetscFunctionBegin;
986   if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first");
987   if (!a->cvec) {
988     ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
989   }
990   a->vecinuse = col + 1;
991   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
992   ierr = MatDenseCUDAGetArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
993   ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
994   *v   = a->cvec;
995   PetscFunctionReturn(0);
996 }
997 
998 static PetscErrorCode MatDenseRestoreColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
999 {
1000   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1001   PetscErrorCode ierr;
1002 
1003   PetscFunctionBegin;
1004   if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec first");
1005   if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector");
1006   a->vecinuse = 0;
1007   ierr = MatDenseCUDARestoreArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1008   ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr);
1009   *v   = NULL;
1010   PetscFunctionReturn(0);
1011 }
1012 
1013 static PetscErrorCode MatDenseGetColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1014 {
1015   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1016   PetscInt       lda;
1017   PetscErrorCode ierr;
1018 
1019   PetscFunctionBegin;
1020   if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first");
1021   if (!a->cvec) {
1022     ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1023   }
1024   a->vecinuse = col + 1;
1025   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1026   ierr = MatDenseCUDAGetArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr);
1027   ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1028   ierr = VecLockReadPush(a->cvec);CHKERRQ(ierr);
1029   *v   = a->cvec;
1030   PetscFunctionReturn(0);
1031 }
1032 
1033 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1034 {
1035   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1036   PetscErrorCode ierr;
1037 
1038   PetscFunctionBegin;
1039   if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec first");
1040   if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector");
1041   a->vecinuse = 0;
1042   ierr = MatDenseCUDARestoreArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr);
1043   ierr = VecLockReadPop(a->cvec);CHKERRQ(ierr);
1044   ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr);
1045   *v   = NULL;
1046   PetscFunctionReturn(0);
1047 }
1048 
1049 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1050 {
1051   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1052   PetscErrorCode ierr;
1053   PetscInt       lda;
1054 
1055   PetscFunctionBegin;
1056   if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first");
1057   if (!a->cvec) {
1058     ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1059   }
1060   a->vecinuse = col + 1;
1061   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1062   ierr = MatDenseCUDAGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1063   ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1064   *v   = a->cvec;
1065   PetscFunctionReturn(0);
1066 }
1067 
1068 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1069 {
1070   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1071   PetscErrorCode ierr;
1072 
1073   PetscFunctionBegin;
1074   if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec first");
1075   if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector");
1076   a->vecinuse = 0;
1077   ierr = MatDenseCUDARestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1078   ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr);
1079   *v   = NULL;
1080   PetscFunctionReturn(0);
1081 }
1082 
1083 static PetscErrorCode MatDenseCUDAPlaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a)
1084 {
1085   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1086   PetscErrorCode ierr;
1087 
1088   PetscFunctionBegin;
1089   if (l->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first");
1090   ierr = MatDenseCUDAPlaceArray(l->A,a);CHKERRQ(ierr);
1091   PetscFunctionReturn(0);
1092 }
1093 
1094 static PetscErrorCode MatDenseCUDAResetArray_MPIDenseCUDA(Mat A)
1095 {
1096   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1097   PetscErrorCode ierr;
1098 
1099   PetscFunctionBegin;
1100   if (l->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first");
1101   ierr = MatDenseCUDAResetArray(l->A);CHKERRQ(ierr);
1102   PetscFunctionReturn(0);
1103 }
1104 
1105 static PetscErrorCode MatDenseCUDAGetArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a)
1106 {
1107   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1108   PetscErrorCode ierr;
1109 
1110   PetscFunctionBegin;
1111   ierr = MatDenseCUDAGetArrayWrite(l->A,a);CHKERRQ(ierr);
1112   PetscFunctionReturn(0);
1113 }
1114 
1115 static PetscErrorCode MatDenseCUDARestoreArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a)
1116 {
1117   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1118   PetscErrorCode ierr;
1119 
1120   PetscFunctionBegin;
1121   ierr = MatDenseCUDARestoreArrayWrite(l->A,a);CHKERRQ(ierr);
1122   PetscFunctionReturn(0);
1123 }
1124 
1125 static PetscErrorCode MatDenseCUDAGetArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a)
1126 {
1127   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1128   PetscErrorCode ierr;
1129 
1130   PetscFunctionBegin;
1131   ierr = MatDenseCUDAGetArrayRead(l->A,a);CHKERRQ(ierr);
1132   PetscFunctionReturn(0);
1133 }
1134 
1135 static PetscErrorCode MatDenseCUDARestoreArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a)
1136 {
1137   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1138   PetscErrorCode ierr;
1139 
1140   PetscFunctionBegin;
1141   ierr = MatDenseCUDARestoreArrayRead(l->A,a);CHKERRQ(ierr);
1142   PetscFunctionReturn(0);
1143 }
1144 
1145 static PetscErrorCode MatDenseCUDAGetArray_MPIDenseCUDA(Mat A, PetscScalar **a)
1146 {
1147   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1148   PetscErrorCode ierr;
1149 
1150   PetscFunctionBegin;
1151   ierr = MatDenseCUDAGetArray(l->A,a);CHKERRQ(ierr);
1152   PetscFunctionReturn(0);
1153 }
1154 
1155 static PetscErrorCode MatDenseCUDARestoreArray_MPIDenseCUDA(Mat A, PetscScalar **a)
1156 {
1157   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1158   PetscErrorCode ierr;
1159 
1160   PetscFunctionBegin;
1161   ierr = MatDenseCUDARestoreArray(l->A,a);CHKERRQ(ierr);
1162   PetscFunctionReturn(0);
1163 }
1164 
1165 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat,PetscInt,Vec*);
1166 static PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat,PetscInt,Vec*);
1167 static PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat,PetscInt,Vec*);
1168 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat,PetscInt,Vec*);
1169 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat,PetscInt,Vec*);
1170 static PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat,PetscInt,Vec*);
1171 
1172 static PetscErrorCode MatBindToCPU_MPIDenseCUDA(Mat mat,PetscBool bind)
1173 {
1174   Mat_MPIDense   *d = (Mat_MPIDense*)mat->data;
1175   PetscErrorCode ierr;
1176 
1177   PetscFunctionBegin;
1178   if (d->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first");
1179   if (d->A) {
1180     ierr = MatBindToCPU(d->A,bind);CHKERRQ(ierr);
1181   }
1182   mat->boundtocpu = bind;
1183   if (!bind) {
1184     PetscBool iscuda;
1185 
1186     ierr = PetscObjectTypeCompare((PetscObject)d->cvec,VECMPICUDA,&iscuda);CHKERRQ(ierr);
1187     if (!iscuda) {
1188       ierr = VecDestroy(&d->cvec);CHKERRQ(ierr);
1189     }
1190     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDenseCUDA);CHKERRQ(ierr);
1191     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDenseCUDA);CHKERRQ(ierr);
1192     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDenseCUDA);CHKERRQ(ierr);
1193     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDenseCUDA);CHKERRQ(ierr);
1194     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDenseCUDA);CHKERRQ(ierr);
1195     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDenseCUDA);CHKERRQ(ierr);
1196   } else {
1197     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense);CHKERRQ(ierr);
1198     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense);CHKERRQ(ierr);
1199     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense);CHKERRQ(ierr);
1200     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense);CHKERRQ(ierr);
1201     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense);CHKERRQ(ierr);
1202     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense);CHKERRQ(ierr);
1203   }
1204   PetscFunctionReturn(0);
1205 }
1206 
1207 PetscErrorCode MatMPIDenseCUDASetPreallocation(Mat A, PetscScalar *d_data)
1208 {
1209   Mat_MPIDense   *d = (Mat_MPIDense*)A->data;
1210   PetscErrorCode ierr;
1211   PetscBool      iscuda;
1212 
1213   PetscFunctionBegin;
1214   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr);
1215   if (!iscuda) PetscFunctionReturn(0);
1216   ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
1217   ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
1218   if (!d->A) {
1219     ierr = MatCreate(PETSC_COMM_SELF,&d->A);CHKERRQ(ierr);
1220     ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)d->A);CHKERRQ(ierr);
1221     ierr = MatSetSizes(d->A,A->rmap->n,A->cmap->N,A->rmap->n,A->cmap->N);CHKERRQ(ierr);
1222   }
1223   ierr = MatSetType(d->A,MATSEQDENSECUDA);CHKERRQ(ierr);
1224   ierr = MatSeqDenseCUDASetPreallocation(d->A,d_data);CHKERRQ(ierr);
1225   A->preallocated = PETSC_TRUE;
1226   PetscFunctionReturn(0);
1227 }
1228 #endif
1229 
1230 static PetscErrorCode MatSetRandom_MPIDense(Mat x,PetscRandom rctx)
1231 {
1232   Mat_MPIDense   *d = (Mat_MPIDense*)x->data;
1233   PetscErrorCode ierr;
1234 
1235   PetscFunctionBegin;
1236   ierr = MatSetRandom(d->A,rctx);CHKERRQ(ierr);
1237   PetscFunctionReturn(0);
1238 }
1239 
1240 PETSC_INTERN PetscErrorCode MatMatMultNumeric_MPIDense(Mat A,Mat,Mat);
1241 
1242 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool  *missing,PetscInt *d)
1243 {
1244   PetscFunctionBegin;
1245   *missing = PETSC_FALSE;
1246   PetscFunctionReturn(0);
1247 }
1248 
1249 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat);
1250 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat);
1251 
1252 /* -------------------------------------------------------------------*/
1253 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense,
1254                                         MatGetRow_MPIDense,
1255                                         MatRestoreRow_MPIDense,
1256                                         MatMult_MPIDense,
1257                                 /*  4*/ MatMultAdd_MPIDense,
1258                                         MatMultTranspose_MPIDense,
1259                                         MatMultTransposeAdd_MPIDense,
1260                                         0,
1261                                         0,
1262                                         0,
1263                                 /* 10*/ 0,
1264                                         0,
1265                                         0,
1266                                         0,
1267                                         MatTranspose_MPIDense,
1268                                 /* 15*/ MatGetInfo_MPIDense,
1269                                         MatEqual_MPIDense,
1270                                         MatGetDiagonal_MPIDense,
1271                                         MatDiagonalScale_MPIDense,
1272                                         MatNorm_MPIDense,
1273                                 /* 20*/ MatAssemblyBegin_MPIDense,
1274                                         MatAssemblyEnd_MPIDense,
1275                                         MatSetOption_MPIDense,
1276                                         MatZeroEntries_MPIDense,
1277                                 /* 24*/ MatZeroRows_MPIDense,
1278                                         0,
1279                                         0,
1280                                         0,
1281                                         0,
1282                                 /* 29*/ MatSetUp_MPIDense,
1283                                         0,
1284                                         0,
1285                                         MatGetDiagonalBlock_MPIDense,
1286                                         0,
1287                                 /* 34*/ MatDuplicate_MPIDense,
1288                                         0,
1289                                         0,
1290                                         0,
1291                                         0,
1292                                 /* 39*/ MatAXPY_MPIDense,
1293                                         MatCreateSubMatrices_MPIDense,
1294                                         0,
1295                                         MatGetValues_MPIDense,
1296                                         0,
1297                                 /* 44*/ 0,
1298                                         MatScale_MPIDense,
1299                                         MatShift_Basic,
1300                                         0,
1301                                         0,
1302                                 /* 49*/ MatSetRandom_MPIDense,
1303                                         0,
1304                                         0,
1305                                         0,
1306                                         0,
1307                                 /* 54*/ 0,
1308                                         0,
1309                                         0,
1310                                         0,
1311                                         0,
1312                                 /* 59*/ MatCreateSubMatrix_MPIDense,
1313                                         MatDestroy_MPIDense,
1314                                         MatView_MPIDense,
1315                                         0,
1316                                         0,
1317                                 /* 64*/ 0,
1318                                         0,
1319                                         0,
1320                                         0,
1321                                         0,
1322                                 /* 69*/ 0,
1323                                         0,
1324                                         0,
1325                                         0,
1326                                         0,
1327                                 /* 74*/ 0,
1328                                         0,
1329                                         0,
1330                                         0,
1331                                         0,
1332                                 /* 79*/ 0,
1333                                         0,
1334                                         0,
1335                                         0,
1336                                 /* 83*/ MatLoad_MPIDense,
1337                                         0,
1338                                         0,
1339                                         0,
1340                                         0,
1341                                         0,
1342                                 /* 89*/ 0,
1343                                         0,
1344                                         MatMatMultNumeric_MPIDense,
1345                                         0,
1346                                         0,
1347                                 /* 94*/ 0,
1348                                         0,
1349                                         0,
1350                                         MatMatTransposeMultNumeric_MPIDense_MPIDense,
1351                                         0,
1352                                 /* 99*/ MatProductSetFromOptions_MPIDense,
1353                                         0,
1354                                         0,
1355                                         MatConjugate_MPIDense,
1356                                         0,
1357                                 /*104*/ 0,
1358                                         MatRealPart_MPIDense,
1359                                         MatImaginaryPart_MPIDense,
1360                                         0,
1361                                         0,
1362                                 /*109*/ 0,
1363                                         0,
1364                                         0,
1365                                         MatGetColumnVector_MPIDense,
1366                                         MatMissingDiagonal_MPIDense,
1367                                 /*114*/ 0,
1368                                         0,
1369                                         0,
1370                                         0,
1371                                         0,
1372                                 /*119*/ 0,
1373                                         0,
1374                                         0,
1375                                         0,
1376                                         0,
1377                                 /*124*/ 0,
1378                                         MatGetColumnNorms_MPIDense,
1379                                         0,
1380                                         0,
1381                                         0,
1382                                 /*129*/ 0,
1383                                         0,
1384                                         0,
1385                                         MatTransposeMatMultNumeric_MPIDense_MPIDense,
1386                                         0,
1387                                 /*134*/ 0,
1388                                         0,
1389                                         0,
1390                                         0,
1391                                         0,
1392                                 /*139*/ 0,
1393                                         0,
1394                                         0,
1395                                         0,
1396                                         0,
1397                                         MatCreateMPIMatConcatenateSeqMat_MPIDense,
1398                                 /*145*/ 0,
1399                                         0,
1400                                         0
1401 };
1402 
1403 PetscErrorCode  MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1404 {
1405   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;
1406   PetscBool      iscuda;
1407   PetscErrorCode ierr;
1408 
1409   PetscFunctionBegin;
1410   ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
1411   ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
1412   if (!a->A) {
1413     ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
1414     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
1415     ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr);
1416   }
1417   ierr = PetscObjectTypeCompare((PetscObject)mat,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr);
1418   ierr = MatSetType(a->A,iscuda ? MATSEQDENSECUDA : MATSEQDENSE);CHKERRQ(ierr);
1419   ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr);
1420   mat->preallocated = PETSC_TRUE;
1421   PetscFunctionReturn(0);
1422 }
1423 
1424 #if defined(PETSC_HAVE_ELEMENTAL)
1425 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1426 {
1427   Mat            mat_elemental;
1428   PetscErrorCode ierr;
1429   PetscScalar    *v;
1430   PetscInt       m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols;
1431 
1432   PetscFunctionBegin;
1433   if (reuse == MAT_REUSE_MATRIX) {
1434     mat_elemental = *newmat;
1435     ierr = MatZeroEntries(*newmat);CHKERRQ(ierr);
1436   } else {
1437     ierr = MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);CHKERRQ(ierr);
1438     ierr = MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1439     ierr = MatSetType(mat_elemental,MATELEMENTAL);CHKERRQ(ierr);
1440     ierr = MatSetUp(mat_elemental);CHKERRQ(ierr);
1441     ierr = MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
1442   }
1443 
1444   ierr = PetscMalloc2(m,&rows,N,&cols);CHKERRQ(ierr);
1445   for (i=0; i<N; i++) cols[i] = i;
1446   for (i=0; i<m; i++) rows[i] = rstart + i;
1447 
1448   /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1449   ierr = MatDenseGetArray(A,&v);CHKERRQ(ierr);
1450   ierr = MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES);CHKERRQ(ierr);
1451   ierr = MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1452   ierr = MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1453   ierr = MatDenseRestoreArray(A,&v);CHKERRQ(ierr);
1454   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1455 
1456   if (reuse == MAT_INPLACE_MATRIX) {
1457     ierr = MatHeaderReplace(A,&mat_elemental);CHKERRQ(ierr);
1458   } else {
1459     *newmat = mat_elemental;
1460   }
1461   PetscFunctionReturn(0);
1462 }
1463 #endif
1464 
1465 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A,PetscInt col,PetscScalar **vals)
1466 {
1467   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
1468   PetscErrorCode ierr;
1469 
1470   PetscFunctionBegin;
1471   ierr = MatDenseGetColumn(mat->A,col,vals);CHKERRQ(ierr);
1472   PetscFunctionReturn(0);
1473 }
1474 
1475 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A,PetscScalar **vals)
1476 {
1477   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
1478   PetscErrorCode ierr;
1479 
1480   PetscFunctionBegin;
1481   ierr = MatDenseRestoreColumn(mat->A,vals);CHKERRQ(ierr);
1482   PetscFunctionReturn(0);
1483 }
1484 
1485 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
1486 {
1487   PetscErrorCode ierr;
1488   Mat_MPIDense   *mat;
1489   PetscInt       m,nloc,N;
1490 
1491   PetscFunctionBegin;
1492   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
1493   ierr = MatGetLocalSize(inmat,NULL,&nloc);CHKERRQ(ierr);
1494   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
1495     PetscInt sum;
1496 
1497     if (n == PETSC_DECIDE) {
1498       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
1499     }
1500     /* Check sum(n) = N */
1501     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
1502     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
1503 
1504     ierr = MatCreateDense(comm,m,n,PETSC_DETERMINE,N,NULL,outmat);CHKERRQ(ierr);
1505   }
1506 
1507   /* numeric phase */
1508   mat = (Mat_MPIDense*)(*outmat)->data;
1509   ierr = MatCopy(inmat,mat->A,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1510   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1511   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1512   PetscFunctionReturn(0);
1513 }
1514 
1515 #if defined(PETSC_HAVE_CUDA)
1516 PetscErrorCode MatConvert_MPIDenseCUDA_MPIDense(Mat M,MatType type,MatReuse reuse,Mat *newmat)
1517 {
1518   Mat            B;
1519   Mat_MPIDense   *m;
1520   PetscErrorCode ierr;
1521 
1522   PetscFunctionBegin;
1523   if (reuse == MAT_INITIAL_MATRIX) {
1524     ierr = MatDuplicate(M,MAT_COPY_VALUES,newmat);CHKERRQ(ierr);
1525   } else if (reuse == MAT_REUSE_MATRIX) {
1526     ierr = MatCopy(M,*newmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1527   }
1528 
1529   B    = *newmat;
1530   ierr = MatBindToCPU_MPIDenseCUDA(B,PETSC_TRUE);CHKERRQ(ierr);
1531   ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr);
1532   ierr = PetscStrallocpy(VECSTANDARD,&B->defaultvectype);CHKERRQ(ierr);
1533   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSE);CHKERRQ(ierr);
1534   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C",NULL);CHKERRQ(ierr);
1535   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",NULL);CHKERRQ(ierr);
1536   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",NULL);CHKERRQ(ierr);
1537   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C",NULL);CHKERRQ(ierr);
1538   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C",NULL);CHKERRQ(ierr);
1539   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C",NULL);CHKERRQ(ierr);
1540   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C",NULL);CHKERRQ(ierr);
1541   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C",NULL);CHKERRQ(ierr);
1542   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C",NULL);CHKERRQ(ierr);
1543   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C",NULL);CHKERRQ(ierr);
1544   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C",NULL);CHKERRQ(ierr);
1545   m    = (Mat_MPIDense*)(B)->data;
1546   if (m->A) {
1547     ierr = MatConvert(m->A,MATSEQDENSE,MAT_INPLACE_MATRIX,&m->A);CHKERRQ(ierr);
1548     ierr = MatSetUpMultiply_MPIDense(B);CHKERRQ(ierr);
1549   }
1550   B->ops->bindtocpu = NULL;
1551   B->offloadmask    = PETSC_OFFLOAD_CPU;
1552   PetscFunctionReturn(0);
1553 }
1554 
1555 PetscErrorCode MatConvert_MPIDense_MPIDenseCUDA(Mat M,MatType type,MatReuse reuse,Mat *newmat)
1556 {
1557   Mat            B;
1558   Mat_MPIDense   *m;
1559   PetscErrorCode ierr;
1560 
1561   PetscFunctionBegin;
1562   if (reuse == MAT_INITIAL_MATRIX) {
1563     ierr = MatDuplicate(M,MAT_COPY_VALUES,newmat);CHKERRQ(ierr);
1564   } else if (reuse == MAT_REUSE_MATRIX) {
1565     ierr = MatCopy(M,*newmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1566   }
1567 
1568   B    = *newmat;
1569   ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr);
1570   ierr = PetscStrallocpy(VECCUDA,&B->defaultvectype);CHKERRQ(ierr);
1571   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSECUDA);CHKERRQ(ierr);
1572   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C",            MatConvert_MPIDenseCUDA_MPIDense);CHKERRQ(ierr);
1573   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr);
1574   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr);
1575   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C",                        MatDenseCUDAGetArray_MPIDenseCUDA);CHKERRQ(ierr);
1576   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C",                    MatDenseCUDAGetArrayRead_MPIDenseCUDA);CHKERRQ(ierr);
1577   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C",                   MatDenseCUDAGetArrayWrite_MPIDenseCUDA);CHKERRQ(ierr);
1578   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C",                    MatDenseCUDARestoreArray_MPIDenseCUDA);CHKERRQ(ierr);
1579   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C",                MatDenseCUDARestoreArrayRead_MPIDenseCUDA);CHKERRQ(ierr);
1580   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C",               MatDenseCUDARestoreArrayWrite_MPIDenseCUDA);CHKERRQ(ierr);
1581   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C",                      MatDenseCUDAPlaceArray_MPIDenseCUDA);CHKERRQ(ierr);
1582   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C",                      MatDenseCUDAResetArray_MPIDenseCUDA);CHKERRQ(ierr);
1583   m    = (Mat_MPIDense*)(B)->data;
1584   if (m->A) {
1585     ierr = MatConvert(m->A,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&m->A);CHKERRQ(ierr);
1586     ierr = MatSetUpMultiply_MPIDense(B);CHKERRQ(ierr);
1587     B->offloadmask = PETSC_OFFLOAD_BOTH;
1588   } else {
1589     B->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
1590   }
1591   ierr = MatBindToCPU_MPIDenseCUDA(B,PETSC_FALSE);CHKERRQ(ierr);
1592 
1593   B->ops->bindtocpu = MatBindToCPU_MPIDenseCUDA;
1594   PetscFunctionReturn(0);
1595 }
1596 #endif
1597 
1598 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A,PetscInt col,Vec *v)
1599 {
1600   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1601   PetscErrorCode ierr;
1602   PetscInt       lda;
1603 
1604   PetscFunctionBegin;
1605   if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first");
1606   if (!a->cvec) {
1607     ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1608   }
1609   a->vecinuse = col + 1;
1610   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1611   ierr = MatDenseGetArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1612   ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1613   *v   = a->cvec;
1614   PetscFunctionReturn(0);
1615 }
1616 
1617 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A,PetscInt col,Vec *v)
1618 {
1619   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1620   PetscErrorCode ierr;
1621 
1622   PetscFunctionBegin;
1623   if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec first");
1624   if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector");
1625   a->vecinuse = 0;
1626   ierr = MatDenseRestoreArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1627   ierr = VecResetArray(a->cvec);CHKERRQ(ierr);
1628   *v   = NULL;
1629   PetscFunctionReturn(0);
1630 }
1631 
1632 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v)
1633 {
1634   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1635   PetscErrorCode ierr;
1636   PetscInt       lda;
1637 
1638   PetscFunctionBegin;
1639   if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first");
1640   if (!a->cvec) {
1641     ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1642   }
1643   a->vecinuse = col + 1;
1644   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1645   ierr = MatDenseGetArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr);
1646   ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1647   ierr = VecLockReadPush(a->cvec);CHKERRQ(ierr);
1648   *v   = a->cvec;
1649   PetscFunctionReturn(0);
1650 }
1651 
1652 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v)
1653 {
1654   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1655   PetscErrorCode ierr;
1656 
1657   PetscFunctionBegin;
1658   if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec first");
1659   if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector");
1660   a->vecinuse = 0;
1661   ierr = MatDenseRestoreArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr);
1662   ierr = VecLockReadPop(a->cvec);CHKERRQ(ierr);
1663   ierr = VecResetArray(a->cvec);CHKERRQ(ierr);
1664   *v   = NULL;
1665   PetscFunctionReturn(0);
1666 }
1667 
1668 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v)
1669 {
1670   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1671   PetscErrorCode ierr;
1672   PetscInt       lda;
1673 
1674   PetscFunctionBegin;
1675   if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first");
1676   if (!a->cvec) {
1677     ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1678   }
1679   a->vecinuse = col + 1;
1680   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1681   ierr = MatDenseGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1682   ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1683   *v   = a->cvec;
1684   PetscFunctionReturn(0);
1685 }
1686 
1687 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v)
1688 {
1689   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1690   PetscErrorCode ierr;
1691 
1692   PetscFunctionBegin;
1693   if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec first");
1694   if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector");
1695   a->vecinuse = 0;
1696   ierr = MatDenseRestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1697   ierr = VecResetArray(a->cvec);CHKERRQ(ierr);
1698   *v   = NULL;
1699   PetscFunctionReturn(0);
1700 }
1701 
1702 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat)
1703 {
1704   Mat_MPIDense   *a;
1705   PetscErrorCode ierr;
1706 
1707   PetscFunctionBegin;
1708   ierr      = PetscNewLog(mat,&a);CHKERRQ(ierr);
1709   mat->data = (void*)a;
1710   ierr      = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1711 
1712   mat->insertmode = NOT_SET_VALUES;
1713   ierr            = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&a->rank);CHKERRQ(ierr);
1714   ierr            = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&a->size);CHKERRQ(ierr);
1715 
1716   /* build cache for off array entries formed */
1717   a->donotstash = PETSC_FALSE;
1718 
1719   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);CHKERRQ(ierr);
1720 
1721   /* stuff used for matrix vector multiply */
1722   a->lvec        = 0;
1723   a->Mvctx       = 0;
1724   a->roworiented = PETSC_TRUE;
1725 
1726   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",MatDenseGetLDA_MPIDense);CHKERRQ(ierr);
1727   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);CHKERRQ(ierr);
1728   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr);
1729   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",MatDenseGetArrayRead_MPIDense);CHKERRQ(ierr);
1730   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",MatDenseRestoreArrayRead_MPIDense);CHKERRQ(ierr);
1731   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",MatDenseGetArrayWrite_MPIDense);CHKERRQ(ierr);
1732   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",MatDenseRestoreArrayWrite_MPIDense);CHKERRQ(ierr);
1733   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",MatDensePlaceArray_MPIDense);CHKERRQ(ierr);
1734   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",MatDenseResetArray_MPIDense);CHKERRQ(ierr);
1735   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense);CHKERRQ(ierr);
1736   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense);CHKERRQ(ierr);
1737   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense);CHKERRQ(ierr);
1738   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense);CHKERRQ(ierr);
1739   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense);CHKERRQ(ierr);
1740   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense);CHKERRQ(ierr);
1741 #if defined(PETSC_HAVE_ELEMENTAL)
1742   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental);CHKERRQ(ierr);
1743 #endif
1744 #if defined(PETSC_HAVE_CUDA)
1745   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpidensecuda_C",MatConvert_MPIDense_MPIDenseCUDA);CHKERRQ(ierr);
1746 #endif
1747   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1748   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr);
1749   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr);
1750   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1751   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1752   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_nest_mpidense_C",MatMatMultSymbolic_Nest_Dense);CHKERRQ(ierr);
1753   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_nest_mpidense_C",MatMatMultNumeric_Nest_Dense);CHKERRQ(ierr);
1754 
1755   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",MatTransposeMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1756   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",MatTransposeMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1757   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",MatDenseGetColumn_MPIDense);CHKERRQ(ierr);
1758   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",MatDenseRestoreColumn_MPIDense);CHKERRQ(ierr);
1759   ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr);
1760   PetscFunctionReturn(0);
1761 }
1762 
1763 /*MC
1764    MATMPIDENSECUDA - MATMPIDENSECUDA = "mpidensecuda" - A matrix type to be used for distributed dense matrices on GPUs.
1765 
1766    Options Database Keys:
1767 . -mat_type mpidensecuda - sets the matrix type to "mpidensecuda" during a call to MatSetFromOptions()
1768 
1769   Level: beginner
1770 
1771 .seealso:
1772 
1773 M*/
1774 #if defined(PETSC_HAVE_CUDA)
1775 PETSC_EXTERN PetscErrorCode MatCreate_MPIDenseCUDA(Mat B)
1776 {
1777   PetscErrorCode ierr;
1778 
1779   PetscFunctionBegin;
1780   ierr = MatCreate_MPIDense(B);CHKERRQ(ierr);
1781   ierr = MatConvert_MPIDense_MPIDenseCUDA(B,MATMPIDENSECUDA,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr);
1782   PetscFunctionReturn(0);
1783 }
1784 #endif
1785 
1786 /*MC
1787    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1788 
1789    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1790    and MATMPIDENSE otherwise.
1791 
1792    Options Database Keys:
1793 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1794 
1795   Level: beginner
1796 
1797 
1798 .seealso: MATSEQDENSE,MATMPIDENSE,MATDENSECUDA
1799 M*/
1800 
1801 /*MC
1802    MATDENSECUDA - MATDENSECUDA = "densecuda" - A matrix type to be used for dense matrices on GPUs.
1803 
1804    This matrix type is identical to MATSEQDENSECUDA when constructed with a single process communicator,
1805    and MATMPIDENSECUDA otherwise.
1806 
1807    Options Database Keys:
1808 . -mat_type densecuda - sets the matrix type to "densecuda" during a call to MatSetFromOptions()
1809 
1810   Level: beginner
1811 
1812 .seealso: MATSEQDENSECUDA,MATMPIDENSECUDA,MATDENSE
1813 M*/
1814 
1815 /*@C
1816    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1817 
1818    Not collective
1819 
1820    Input Parameters:
1821 .  B - the matrix
1822 -  data - optional location of matrix data.  Set data=NULL for PETSc
1823    to control all matrix memory allocation.
1824 
1825    Notes:
1826    The dense format is fully compatible with standard Fortran 77
1827    storage by columns.
1828 
1829    The data input variable is intended primarily for Fortran programmers
1830    who wish to allocate their own matrix memory space.  Most users should
1831    set data=NULL.
1832 
1833    Level: intermediate
1834 
1835 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1836 @*/
1837 PetscErrorCode  MatMPIDenseSetPreallocation(Mat B,PetscScalar *data)
1838 {
1839   PetscErrorCode ierr;
1840 
1841   PetscFunctionBegin;
1842   ierr = PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));CHKERRQ(ierr);
1843   PetscFunctionReturn(0);
1844 }
1845 
1846 /*@
1847    MatDensePlaceArray - Allows one to replace the array in a dense matrix with an
1848    array provided by the user. This is useful to avoid copying an array
1849    into a matrix
1850 
1851    Not Collective
1852 
1853    Input Parameters:
1854 +  mat - the matrix
1855 -  array - the array in column major order
1856 
1857    Notes:
1858    You can return to the original array with a call to MatDenseResetArray(). The user is responsible for freeing this array; it will not be
1859    freed when the matrix is destroyed.
1860 
1861    Level: developer
1862 
1863 .seealso: MatDenseGetArray(), MatDenseResetArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()
1864 
1865 @*/
1866 PetscErrorCode  MatDensePlaceArray(Mat mat,const PetscScalar *array)
1867 {
1868   PetscErrorCode ierr;
1869 
1870   PetscFunctionBegin;
1871   ierr = PetscUseMethod(mat,"MatDensePlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr);
1872   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
1873 #if defined(PETSC_HAVE_CUDA)
1874   mat->offloadmask = PETSC_OFFLOAD_CPU;
1875 #endif
1876   PetscFunctionReturn(0);
1877 }
1878 
1879 /*@
1880    MatDenseResetArray - Resets the matrix array to that it previously had before the call to MatDensePlaceArray()
1881 
1882    Not Collective
1883 
1884    Input Parameters:
1885 .  mat - the matrix
1886 
1887    Notes:
1888    You can only call this after a call to MatDensePlaceArray()
1889 
1890    Level: developer
1891 
1892 .seealso: MatDenseGetArray(), MatDensePlaceArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()
1893 
1894 @*/
1895 PetscErrorCode  MatDenseResetArray(Mat mat)
1896 {
1897   PetscErrorCode ierr;
1898 
1899   PetscFunctionBegin;
1900   ierr = PetscUseMethod(mat,"MatDenseResetArray_C",(Mat),(mat));CHKERRQ(ierr);
1901   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
1902   PetscFunctionReturn(0);
1903 }
1904 
1905 #if defined(PETSC_HAVE_CUDA)
1906 /*@C
1907    MatDenseCUDAPlaceArray - Allows one to replace the GPU array in a dense matrix with an
1908    array provided by the user. This is useful to avoid copying an array
1909    into a matrix
1910 
1911    Not Collective
1912 
1913    Input Parameters:
1914 +  mat - the matrix
1915 -  array - the array in column major order
1916 
1917    Notes:
1918    You can return to the original array with a call to MatDenseCUDAResetArray(). The user is responsible for freeing this array; it will not be
1919    freed when the matrix is destroyed. The array must have been allocated with cudaMalloc().
1920 
1921    Level: developer
1922 
1923 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAResetArray()
1924 @*/
1925 PetscErrorCode  MatDenseCUDAPlaceArray(Mat mat,const PetscScalar *array)
1926 {
1927   PetscErrorCode ierr;
1928 
1929   PetscFunctionBegin;
1930   ierr = PetscUseMethod(mat,"MatDenseCUDAPlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr);
1931   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
1932   mat->offloadmask = PETSC_OFFLOAD_GPU;
1933   PetscFunctionReturn(0);
1934 }
1935 
1936 /*@C
1937    MatDenseCUDAResetArray - Resets the matrix array to that it previously had before the call to MatDenseCUDAPlaceArray()
1938 
1939    Not Collective
1940 
1941    Input Parameters:
1942 .  mat - the matrix
1943 
1944    Notes:
1945    You can only call this after a call to MatDenseCUDAPlaceArray()
1946 
1947    Level: developer
1948 
1949 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAPlaceArray()
1950 
1951 @*/
1952 PetscErrorCode  MatDenseCUDAResetArray(Mat mat)
1953 {
1954   PetscErrorCode ierr;
1955 
1956   PetscFunctionBegin;
1957   ierr = PetscUseMethod(mat,"MatDenseCUDAResetArray_C",(Mat),(mat));CHKERRQ(ierr);
1958   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
1959   PetscFunctionReturn(0);
1960 }
1961 
1962 /*@C
1963    MatDenseCUDAGetArrayWrite - Provides write access to the CUDA buffer inside a dense matrix.
1964 
1965    Not Collective
1966 
1967    Input Parameters:
1968 .  A - the matrix
1969 
1970    Output Parameters
1971 .  array - the GPU array in column major order
1972 
1973    Notes:
1974    The data on the GPU may not be updated due to operations done on the CPU. If you need updated data, use MatDenseCUDAGetArray(). The array must be restored with MatDenseCUDARestoreArrayWrite() when no longer needed.
1975 
1976    Level: developer
1977 
1978 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayRead(), MatDenseCUDARestoreArrayRead()
1979 @*/
1980 PetscErrorCode MatDenseCUDAGetArrayWrite(Mat A, PetscScalar **a)
1981 {
1982   PetscErrorCode ierr;
1983 
1984   PetscFunctionBegin;
1985   ierr = PetscUseMethod(A,"MatDenseCUDAGetArrayWrite_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr);
1986   ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr);
1987   PetscFunctionReturn(0);
1988 }
1989 
1990 /*@C
1991    MatDenseCUDARestoreArrayWrite - Restore write access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArrayWrite().
1992 
1993    Not Collective
1994 
1995    Input Parameters:
1996 +  A - the matrix
1997 -  array - the GPU array in column major order
1998 
1999    Notes:
2000 
2001    Level: developer
2002 
2003 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead(), MatDenseCUDAGetArrayRead()
2004 @*/
2005 PetscErrorCode MatDenseCUDARestoreArrayWrite(Mat A, PetscScalar **a)
2006 {
2007   PetscErrorCode ierr;
2008 
2009   PetscFunctionBegin;
2010   ierr = PetscUseMethod(A,"MatDenseCUDARestoreArrayWrite_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr);
2011   ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr);
2012   A->offloadmask = PETSC_OFFLOAD_GPU;
2013   PetscFunctionReturn(0);
2014 }
2015 
2016 /*@C
2017    MatDenseCUDAGetArrayRead - Provides read-only access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArrayRead() when no longer needed.
2018 
2019    Not Collective
2020 
2021    Input Parameters:
2022 .  A - the matrix
2023 
2024    Output Parameters
2025 .  array - the GPU array in column major order
2026 
2027    Notes:
2028    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite().
2029 
2030    Level: developer
2031 
2032 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead()
2033 @*/
2034 PetscErrorCode MatDenseCUDAGetArrayRead(Mat A, const PetscScalar **a)
2035 {
2036   PetscErrorCode ierr;
2037 
2038   PetscFunctionBegin;
2039   ierr = PetscUseMethod(A,"MatDenseCUDAGetArrayRead_C",(Mat,const PetscScalar**),(A,a));CHKERRQ(ierr);
2040   PetscFunctionReturn(0);
2041 }
2042 
2043 /*@C
2044    MatDenseCUDARestoreArrayRead - Restore read-only access to the CUDA buffer inside a dense matrix previously obtained with a call to MatDenseCUDAGetArrayRead().
2045 
2046    Not Collective
2047 
2048    Input Parameters:
2049 +  A - the matrix
2050 -  array - the GPU array in column major order
2051 
2052    Notes:
2053    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite().
2054 
2055    Level: developer
2056 
2057 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDAGetArrayRead()
2058 @*/
2059 PetscErrorCode MatDenseCUDARestoreArrayRead(Mat A, const PetscScalar **a)
2060 {
2061   PetscErrorCode ierr;
2062 
2063   PetscFunctionBegin;
2064   ierr = PetscUseMethod(A,"MatDenseCUDARestoreArrayRead_C",(Mat,const PetscScalar**),(A,a));CHKERRQ(ierr);
2065   PetscFunctionReturn(0);
2066 }
2067 
2068 /*@C
2069    MatDenseCUDAGetArray - Provides access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArray() when no longer needed.
2070 
2071    Not Collective
2072 
2073    Input Parameters:
2074 .  A - the matrix
2075 
2076    Output Parameters
2077 .  array - the GPU array in column major order
2078 
2079    Notes:
2080    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). For read-only access, use MatDenseCUDAGetArrayRead().
2081 
2082    Level: developer
2083 
2084 .seealso: MatDenseCUDAGetArrayRead(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead()
2085 @*/
2086 PetscErrorCode MatDenseCUDAGetArray(Mat A, PetscScalar **a)
2087 {
2088   PetscErrorCode ierr;
2089 
2090   PetscFunctionBegin;
2091   ierr = PetscUseMethod(A,"MatDenseCUDAGetArray_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr);
2092   ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr);
2093   PetscFunctionReturn(0);
2094 }
2095 
2096 /*@C
2097    MatDenseCUDARestoreArray - Restore access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArray().
2098 
2099    Not Collective
2100 
2101    Input Parameters:
2102 +  A - the matrix
2103 -  array - the GPU array in column major order
2104 
2105    Notes:
2106 
2107    Level: developer
2108 
2109 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead(), MatDenseCUDAGetArrayRead()
2110 @*/
2111 PetscErrorCode MatDenseCUDARestoreArray(Mat A, PetscScalar **a)
2112 {
2113   PetscErrorCode ierr;
2114 
2115   PetscFunctionBegin;
2116   ierr = PetscUseMethod(A,"MatDenseCUDARestoreArray_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr);
2117   ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr);
2118   A->offloadmask = PETSC_OFFLOAD_GPU;
2119   PetscFunctionReturn(0);
2120 }
2121 #endif
2122 
2123 /*@C
2124    MatCreateDense - Creates a matrix in dense format.
2125 
2126    Collective
2127 
2128    Input Parameters:
2129 +  comm - MPI communicator
2130 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2131 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2132 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
2133 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
2134 -  data - optional location of matrix data.  Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
2135    to control all matrix memory allocation.
2136 
2137    Output Parameter:
2138 .  A - the matrix
2139 
2140    Notes:
2141    The dense format is fully compatible with standard Fortran 77
2142    storage by columns.
2143 
2144    The data input variable is intended primarily for Fortran programmers
2145    who wish to allocate their own matrix memory space.  Most users should
2146    set data=NULL (PETSC_NULL_SCALAR for Fortran users).
2147 
2148    The user MUST specify either the local or global matrix dimensions
2149    (possibly both).
2150 
2151    Level: intermediate
2152 
2153 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
2154 @*/
2155 PetscErrorCode  MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
2156 {
2157   PetscErrorCode ierr;
2158   PetscMPIInt    size;
2159 
2160   PetscFunctionBegin;
2161   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2162   PetscValidLogicalCollectiveBool(*A,!!data,6);
2163   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2164   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2165   if (size > 1) {
2166     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
2167     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
2168     if (data) {  /* user provided data array, so no need to assemble */
2169       ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr);
2170       (*A)->assembled = PETSC_TRUE;
2171     }
2172   } else {
2173     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
2174     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
2175   }
2176   PetscFunctionReturn(0);
2177 }
2178 
2179 #if defined(PETSC_HAVE_CUDA)
2180 /*@C
2181    MatCreateDenseCUDA - Creates a matrix in dense format using CUDA.
2182 
2183    Collective
2184 
2185    Input Parameters:
2186 +  comm - MPI communicator
2187 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2188 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2189 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
2190 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
2191 -  data - optional location of GPU matrix data.  Set data=NULL for PETSc
2192    to control matrix memory allocation.
2193 
2194    Output Parameter:
2195 .  A - the matrix
2196 
2197    Notes:
2198 
2199    Level: intermediate
2200 
2201 .seealso: MatCreate(), MatCreateDense()
2202 @*/
2203 PetscErrorCode  MatCreateDenseCUDA(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
2204 {
2205   PetscErrorCode ierr;
2206   PetscMPIInt    size;
2207 
2208   PetscFunctionBegin;
2209   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2210   PetscValidLogicalCollectiveBool(*A,!!data,6);
2211   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2212   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2213   if (size > 1) {
2214     ierr = MatSetType(*A,MATMPIDENSECUDA);CHKERRQ(ierr);
2215     ierr = MatMPIDenseCUDASetPreallocation(*A,data);CHKERRQ(ierr);
2216     if (data) {  /* user provided data array, so no need to assemble */
2217       ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr);
2218       (*A)->assembled = PETSC_TRUE;
2219     }
2220   } else {
2221     ierr = MatSetType(*A,MATSEQDENSECUDA);CHKERRQ(ierr);
2222     ierr = MatSeqDenseCUDASetPreallocation(*A,data);CHKERRQ(ierr);
2223   }
2224   PetscFunctionReturn(0);
2225 }
2226 #endif
2227 
2228 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
2229 {
2230   Mat            mat;
2231   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;
2232   PetscErrorCode ierr;
2233 
2234   PetscFunctionBegin;
2235   *newmat = 0;
2236   ierr    = MatCreate(PetscObjectComm((PetscObject)A),&mat);CHKERRQ(ierr);
2237   ierr    = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
2238   ierr    = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr);
2239   a       = (Mat_MPIDense*)mat->data;
2240 
2241   mat->factortype   = A->factortype;
2242   mat->assembled    = PETSC_TRUE;
2243   mat->preallocated = PETSC_TRUE;
2244 
2245   a->size         = oldmat->size;
2246   a->rank         = oldmat->rank;
2247   mat->insertmode = NOT_SET_VALUES;
2248   a->donotstash   = oldmat->donotstash;
2249 
2250   ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr);
2251   ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr);
2252 
2253   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2254   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2255   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
2256 
2257   *newmat = mat;
2258   PetscFunctionReturn(0);
2259 }
2260 
2261 PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer)
2262 {
2263   PetscErrorCode ierr;
2264   PetscBool      isbinary;
2265 #if defined(PETSC_HAVE_HDF5)
2266   PetscBool      ishdf5;
2267 #endif
2268 
2269   PetscFunctionBegin;
2270   PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
2271   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
2272   /* force binary viewer to load .info file if it has not yet done so */
2273   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2274   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
2275 #if defined(PETSC_HAVE_HDF5)
2276   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5);CHKERRQ(ierr);
2277 #endif
2278   if (isbinary) {
2279     ierr = MatLoad_Dense_Binary(newMat,viewer);CHKERRQ(ierr);
2280 #if defined(PETSC_HAVE_HDF5)
2281   } else if (ishdf5) {
2282     ierr = MatLoad_Dense_HDF5(newMat,viewer);CHKERRQ(ierr);
2283 #endif
2284   } else SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
2285   PetscFunctionReturn(0);
2286 }
2287 
2288 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool  *flag)
2289 {
2290   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
2291   Mat            a,b;
2292   PetscBool      flg;
2293   PetscErrorCode ierr;
2294 
2295   PetscFunctionBegin;
2296   a    = matA->A;
2297   b    = matB->A;
2298   ierr = MatEqual(a,b,&flg);CHKERRQ(ierr);
2299   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2300   PetscFunctionReturn(0);
2301 }
2302 
2303 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(Mat A)
2304 {
2305   PetscErrorCode        ierr;
2306   Mat_MPIDense          *a = (Mat_MPIDense*)A->data;
2307   Mat_TransMatMultDense *atb = a->atbdense;
2308 
2309   PetscFunctionBegin;
2310   ierr = PetscFree2(atb->sendbuf,atb->recvcounts);CHKERRQ(ierr);
2311   ierr = MatDestroy(&atb->atb);CHKERRQ(ierr);
2312   ierr = (*atb->destroy)(A);CHKERRQ(ierr);
2313   ierr = PetscFree(atb);CHKERRQ(ierr);
2314   PetscFunctionReturn(0);
2315 }
2316 
2317 PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(Mat A)
2318 {
2319   PetscErrorCode        ierr;
2320   Mat_MPIDense          *a = (Mat_MPIDense*)A->data;
2321   Mat_MatTransMultDense *abt = a->abtdense;
2322 
2323   PetscFunctionBegin;
2324   ierr = PetscFree2(abt->buf[0],abt->buf[1]);CHKERRQ(ierr);
2325   ierr = PetscFree2(abt->recvcounts,abt->recvdispls);CHKERRQ(ierr);
2326   ierr = (abt->destroy)(A);CHKERRQ(ierr);
2327   ierr = PetscFree(abt);CHKERRQ(ierr);
2328   PetscFunctionReturn(0);
2329 }
2330 
2331 PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
2332 {
2333   Mat_MPIDense          *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2334   Mat_TransMatMultDense *atb = c->atbdense;
2335   PetscErrorCode        ierr;
2336   MPI_Comm              comm;
2337   PetscMPIInt           size,*recvcounts=atb->recvcounts;
2338   PetscScalar           *carray,*sendbuf=atb->sendbuf;
2339   const PetscScalar     *atbarray;
2340   PetscInt              i,cN=C->cmap->N,cM=C->rmap->N,proc,k,j;
2341   const PetscInt        *ranges;
2342 
2343   PetscFunctionBegin;
2344   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2345   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2346 
2347   /* compute atbarray = aseq^T * bseq */
2348   ierr = MatTransposeMatMult(a->A,b->A,atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX,PETSC_DEFAULT,&atb->atb);CHKERRQ(ierr);
2349 
2350   ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr);
2351   for (i=0; i<size; i++) recvcounts[i] = (ranges[i+1] - ranges[i])*cN;
2352 
2353   /* arrange atbarray into sendbuf */
2354   ierr = MatDenseGetArrayRead(atb->atb,&atbarray);CHKERRQ(ierr);
2355   for (proc=0, k=0; proc<size; proc++) {
2356     for (j=0; j<cN; j++) {
2357       for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*cM];
2358     }
2359   }
2360   ierr = MatDenseRestoreArrayRead(atb->atb,&atbarray);CHKERRQ(ierr);
2361 
2362   /* sum all atbarray to local values of C */
2363   ierr = MatDenseGetArray(c->A,&carray);CHKERRQ(ierr);
2364   ierr = MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm);CHKERRQ(ierr);
2365   ierr = MatDenseRestoreArray(c->A,&carray);CHKERRQ(ierr);
2366   PetscFunctionReturn(0);
2367 }
2368 
2369 PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C)
2370 {
2371   PetscErrorCode        ierr;
2372   MPI_Comm              comm;
2373   PetscMPIInt           size;
2374   PetscInt              cm=A->cmap->n,cM,cN=B->cmap->N;
2375   Mat_MPIDense          *c;
2376   Mat_TransMatMultDense *atb;
2377 
2378   PetscFunctionBegin;
2379   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2380   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
2381     SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2382   }
2383 
2384   /* create matrix product C */
2385   ierr = MatSetSizes(C,cm,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2386   ierr = MatSetType(C,MATMPIDENSE);CHKERRQ(ierr);
2387   ierr = MatSetUp(C);CHKERRQ(ierr);
2388   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2389   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2390 
2391   /* create data structure for reuse C */
2392   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2393   ierr = PetscNew(&atb);CHKERRQ(ierr);
2394   cM   = C->rmap->N;
2395   ierr = PetscMalloc2(cM*cN,&atb->sendbuf,size,&atb->recvcounts);CHKERRQ(ierr);
2396 
2397   c               = (Mat_MPIDense*)C->data;
2398   c->atbdense     = atb;
2399   atb->destroy    = C->ops->destroy;
2400   C->ops->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
2401   PetscFunctionReturn(0);
2402 }
2403 
2404 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2405 {
2406   PetscErrorCode        ierr;
2407   MPI_Comm              comm;
2408   PetscMPIInt           i, size;
2409   PetscInt              maxRows, bufsiz;
2410   Mat_MPIDense          *c;
2411   PetscMPIInt           tag;
2412   PetscInt              alg;
2413   Mat_MatTransMultDense *abt;
2414   Mat_Product           *product = C->product;
2415   PetscBool             flg;
2416 
2417   PetscFunctionBegin;
2418   /* check local size of A and B */
2419   if (A->cmap->n != B->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local column dimensions are incompatible, A (%D) != B (%D)",A->cmap->n,B->cmap->n);
2420 
2421   ierr = PetscStrcmp(product->alg,"allgatherv",&flg);CHKERRQ(ierr);
2422   alg  = flg ? 0 : 1;
2423 
2424   /* setup matrix product C */
2425   ierr = MatSetSizes(C,A->rmap->n,B->rmap->n,A->rmap->N,B->rmap->N);CHKERRQ(ierr);
2426   ierr = MatSetType(C,MATMPIDENSE);CHKERRQ(ierr);
2427   ierr = MatSetUp(C);CHKERRQ(ierr);
2428   ierr = PetscObjectGetNewTag((PetscObject)C, &tag);CHKERRQ(ierr);
2429   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2430   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2431 
2432   /* create data structure for reuse C */
2433   ierr = PetscObjectGetComm((PetscObject)C,&comm);CHKERRQ(ierr);
2434   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2435   ierr = PetscNew(&abt);CHKERRQ(ierr);
2436   abt->tag = tag;
2437   abt->alg = alg;
2438   switch (alg) {
2439   case 1: /* alg: "cyclic" */
2440     for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i]));
2441     bufsiz = A->cmap->N * maxRows;
2442     ierr = PetscMalloc2(bufsiz,&(abt->buf[0]),bufsiz,&(abt->buf[1]));CHKERRQ(ierr);
2443     break;
2444   default: /* alg: "allgatherv" */
2445     ierr = PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1]));CHKERRQ(ierr);
2446     ierr = PetscMalloc2(size,&(abt->recvcounts),size+1,&(abt->recvdispls));CHKERRQ(ierr);
2447     for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N;
2448     for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i];
2449     break;
2450   }
2451 
2452   c                               = (Mat_MPIDense*)C->data;
2453   c->abtdense                     = abt;
2454   abt->destroy                    = C->ops->destroy;
2455   C->ops->destroy                 = MatDestroy_MatMatTransMult_MPIDense_MPIDense;
2456   C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense;
2457   PetscFunctionReturn(0);
2458 }
2459 
2460 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C)
2461 {
2462   Mat_MPIDense          *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2463   Mat_MatTransMultDense *abt = c->abtdense;
2464   PetscErrorCode        ierr;
2465   MPI_Comm              comm;
2466   PetscMPIInt           rank,size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom;
2467   PetscScalar           *sendbuf, *recvbuf=0, *cv;
2468   PetscInt              i,cK=A->cmap->N,k,j,bn;
2469   PetscScalar           _DOne=1.0,_DZero=0.0;
2470   const PetscScalar     *av,*bv;
2471   PetscBLASInt          cm, cn, ck, alda, blda, clda;
2472   MPI_Request           reqs[2];
2473   const PetscInt        *ranges;
2474 
2475   PetscFunctionBegin;
2476   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2477   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2478   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2479   ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr);
2480   ierr = MatDenseGetArrayRead(b->A,&bv);CHKERRQ(ierr);
2481   ierr = MatDenseGetArray(c->A,&cv);CHKERRQ(ierr);
2482   ierr = MatDenseGetLDA(a->A,&i);CHKERRQ(ierr);
2483   ierr = PetscBLASIntCast(i,&alda);CHKERRQ(ierr);
2484   ierr = MatDenseGetLDA(b->A,&i);CHKERRQ(ierr);
2485   ierr = PetscBLASIntCast(i,&blda);CHKERRQ(ierr);
2486   ierr = MatDenseGetLDA(c->A,&i);CHKERRQ(ierr);
2487   ierr = PetscBLASIntCast(i,&clda);CHKERRQ(ierr);
2488   ierr = MatGetOwnershipRanges(B,&ranges);CHKERRQ(ierr);
2489   bn   = B->rmap->n;
2490   if (blda == bn) {
2491     sendbuf = (PetscScalar*)bv;
2492   } else {
2493     sendbuf = abt->buf[0];
2494     for (k = 0, i = 0; i < cK; i++) {
2495       for (j = 0; j < bn; j++, k++) {
2496         sendbuf[k] = bv[i * blda + j];
2497       }
2498     }
2499   }
2500   if (size > 1) {
2501     sendto = (rank + size - 1) % size;
2502     recvfrom = (rank + size + 1) % size;
2503   } else {
2504     sendto = recvfrom = 0;
2505   }
2506   ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr);
2507   ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr);
2508   recvisfrom = rank;
2509   for (i = 0; i < size; i++) {
2510     /* we have finished receiving in sending, bufs can be read/modified */
2511     PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */
2512     PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom];
2513 
2514     if (nextrecvisfrom != rank) {
2515       /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */
2516       sendsiz = cK * bn;
2517       recvsiz = cK * nextbn;
2518       recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1];
2519       ierr = MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]);CHKERRQ(ierr);
2520       ierr = MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]);CHKERRQ(ierr);
2521     }
2522 
2523     /* local aseq * sendbuf^T */
2524     ierr = PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn);CHKERRQ(ierr);
2525     PetscStackCallBLAS("BLASgemm",BLASgemm_("N","T",&cm,&cn,&ck,&_DOne,av,&alda,sendbuf,&cn,&_DZero,cv + clda * ranges[recvisfrom],&clda));
2526 
2527     if (nextrecvisfrom != rank) {
2528       /* wait for the sends and receives to complete, swap sendbuf and recvbuf */
2529       ierr = MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2530     }
2531     bn = nextbn;
2532     recvisfrom = nextrecvisfrom;
2533     sendbuf = recvbuf;
2534   }
2535   ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr);
2536   ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr);
2537   ierr = MatDenseRestoreArray(c->A,&cv);CHKERRQ(ierr);
2538   PetscFunctionReturn(0);
2539 }
2540 
2541 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C)
2542 {
2543   Mat_MPIDense          *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2544   Mat_MatTransMultDense *abt = c->abtdense;
2545   PetscErrorCode        ierr;
2546   MPI_Comm              comm;
2547   PetscMPIInt           size;
2548   PetscScalar           *cv, *sendbuf, *recvbuf;
2549   const PetscScalar     *av,*bv;
2550   PetscInt              blda,i,cK=A->cmap->N,k,j,bn;
2551   PetscScalar           _DOne=1.0,_DZero=0.0;
2552   PetscBLASInt          cm, cn, ck, alda, clda;
2553 
2554   PetscFunctionBegin;
2555   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2556   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2557   ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr);
2558   ierr = MatDenseGetArrayRead(b->A,&bv);CHKERRQ(ierr);
2559   ierr = MatDenseGetArray(c->A,&cv);CHKERRQ(ierr);
2560   ierr = MatDenseGetLDA(a->A,&i);CHKERRQ(ierr);
2561   ierr = PetscBLASIntCast(i,&alda);CHKERRQ(ierr);
2562   ierr = MatDenseGetLDA(b->A,&blda);CHKERRQ(ierr);
2563   ierr = MatDenseGetLDA(c->A,&i);CHKERRQ(ierr);
2564   ierr = PetscBLASIntCast(i,&clda);CHKERRQ(ierr);
2565   /* copy transpose of B into buf[0] */
2566   bn      = B->rmap->n;
2567   sendbuf = abt->buf[0];
2568   recvbuf = abt->buf[1];
2569   for (k = 0, j = 0; j < bn; j++) {
2570     for (i = 0; i < cK; i++, k++) {
2571       sendbuf[k] = bv[i * blda + j];
2572     }
2573   }
2574   ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr);
2575   ierr = MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm);CHKERRQ(ierr);
2576   ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr);
2577   ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr);
2578   ierr = PetscBLASIntCast(c->A->cmap->n,&cn);CHKERRQ(ierr);
2579   PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&cm,&cn,&ck,&_DOne,av,&alda,recvbuf,&ck,&_DZero,cv,&clda));
2580   ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr);
2581   ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr);
2582   ierr = MatDenseRestoreArray(c->A,&cv);CHKERRQ(ierr);
2583   PetscFunctionReturn(0);
2584 }
2585 
2586 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2587 {
2588   Mat_MPIDense          *c=(Mat_MPIDense*)C->data;
2589   Mat_MatTransMultDense *abt = c->abtdense;
2590   PetscErrorCode        ierr;
2591 
2592   PetscFunctionBegin;
2593   switch (abt->alg) {
2594   case 1:
2595     ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C);CHKERRQ(ierr);
2596     break;
2597   default:
2598     ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C);CHKERRQ(ierr);
2599     break;
2600   }
2601   PetscFunctionReturn(0);
2602 }
2603 
2604 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(Mat A)
2605 {
2606   PetscErrorCode   ierr;
2607   Mat_MPIDense     *a = (Mat_MPIDense*)A->data;
2608   Mat_MatMultDense *ab = a->abdense;
2609 
2610   PetscFunctionBegin;
2611   ierr = MatDestroy(&ab->Ce);CHKERRQ(ierr);
2612   ierr = MatDestroy(&ab->Ae);CHKERRQ(ierr);
2613   ierr = MatDestroy(&ab->Be);CHKERRQ(ierr);
2614 
2615   ierr = (ab->destroy)(A);CHKERRQ(ierr);
2616   ierr = PetscFree(ab);CHKERRQ(ierr);
2617   PetscFunctionReturn(0);
2618 }
2619 
2620 #if defined(PETSC_HAVE_ELEMENTAL)
2621 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
2622 {
2623   PetscErrorCode   ierr;
2624   Mat_MPIDense     *c=(Mat_MPIDense*)C->data;
2625   Mat_MatMultDense *ab=c->abdense;
2626 
2627   PetscFunctionBegin;
2628   ierr = MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae);CHKERRQ(ierr);
2629   ierr = MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be);CHKERRQ(ierr);
2630   ierr = MatMatMultNumeric_Elemental(ab->Ae,ab->Be,ab->Ce);CHKERRQ(ierr);
2631   ierr = MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
2632   PetscFunctionReturn(0);
2633 }
2634 
2635 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C)
2636 {
2637   PetscErrorCode   ierr;
2638   Mat              Ae,Be,Ce;
2639   Mat_MPIDense     *c;
2640   Mat_MatMultDense *ab;
2641 
2642   PetscFunctionBegin;
2643   /* check local size of A and B */
2644   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
2645     SETERRQ4(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2646   }
2647 
2648   /* create elemental matrices Ae and Be */
2649   ierr = MatCreate(PetscObjectComm((PetscObject)A), &Ae);CHKERRQ(ierr);
2650   ierr = MatSetSizes(Ae,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
2651   ierr = MatSetType(Ae,MATELEMENTAL);CHKERRQ(ierr);
2652   ierr = MatSetUp(Ae);CHKERRQ(ierr);
2653   ierr = MatSetOption(Ae,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2654 
2655   ierr = MatCreate(PetscObjectComm((PetscObject)B), &Be);CHKERRQ(ierr);
2656   ierr = MatSetSizes(Be,PETSC_DECIDE,PETSC_DECIDE,B->rmap->N,B->cmap->N);CHKERRQ(ierr);
2657   ierr = MatSetType(Be,MATELEMENTAL);CHKERRQ(ierr);
2658   ierr = MatSetUp(Be);CHKERRQ(ierr);
2659   ierr = MatSetOption(Be,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2660 
2661   /* compute symbolic Ce = Ae*Be */
2662   ierr = MatCreate(PetscObjectComm((PetscObject)C),&Ce);CHKERRQ(ierr);
2663   ierr = MatMatMultSymbolic_Elemental(Ae,Be,fill,Ce);CHKERRQ(ierr);
2664 
2665   /* setup C */
2666   ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2667   ierr = MatSetType(C,MATDENSE);CHKERRQ(ierr);
2668   ierr = MatSetUp(C);CHKERRQ(ierr);
2669 
2670   /* create data structure for reuse Cdense */
2671   ierr = PetscNew(&ab);CHKERRQ(ierr);
2672   c                  = (Mat_MPIDense*)C->data;
2673   c->abdense         = ab;
2674 
2675   ab->Ae             = Ae;
2676   ab->Be             = Be;
2677   ab->Ce             = Ce;
2678   ab->destroy        = C->ops->destroy;
2679   C->ops->destroy        = MatDestroy_MatMatMult_MPIDense_MPIDense;
2680   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
2681   C->ops->productnumeric = MatProductNumeric_AB;
2682   PetscFunctionReturn(0);
2683 }
2684 #endif
2685 /* ----------------------------------------------- */
2686 #if defined(PETSC_HAVE_ELEMENTAL)
2687 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C)
2688 {
2689   PetscFunctionBegin;
2690   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense;
2691   C->ops->productsymbolic = MatProductSymbolic_AB;
2692   C->ops->productnumeric  = MatProductNumeric_AB;
2693   PetscFunctionReturn(0);
2694 }
2695 #endif
2696 
2697 static PetscErrorCode MatProductSymbolic_AtB_MPIDense(Mat C)
2698 {
2699   PetscErrorCode ierr;
2700   Mat_Product    *product = C->product;
2701 
2702   PetscFunctionBegin;
2703   ierr = MatTransposeMatMultSymbolic_MPIDense_MPIDense(product->A,product->B,product->fill,C);CHKERRQ(ierr);
2704   C->ops->productnumeric          = MatProductNumeric_AtB;
2705   C->ops->transposematmultnumeric = MatTransposeMatMultNumeric_MPIDense_MPIDense;
2706   PetscFunctionReturn(0);
2707 }
2708 
2709 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C)
2710 {
2711   Mat_Product *product = C->product;
2712   Mat         A = product->A,B=product->B;
2713 
2714   PetscFunctionBegin;
2715   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend)
2716     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2717 
2718   C->ops->productsymbolic = MatProductSymbolic_AtB_MPIDense;
2719   PetscFunctionReturn(0);
2720 }
2721 
2722 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C)
2723 {
2724   PetscErrorCode ierr;
2725   Mat_Product    *product = C->product;
2726   const char     *algTypes[2] = {"allgatherv","cyclic"};
2727   PetscInt       alg,nalg = 2;
2728   PetscBool      flg = PETSC_FALSE;
2729 
2730   PetscFunctionBegin;
2731   /* Set default algorithm */
2732   alg = 0; /* default is allgatherv */
2733   ierr = PetscStrcmp(product->alg,"default",&flg);CHKERRQ(ierr);
2734   if (flg) {
2735     ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr);
2736   }
2737 
2738   /* Get runtime option */
2739   if (product->api_user) {
2740     ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatTransposeMult","Mat");CHKERRQ(ierr);
2741     ierr = PetscOptionsEList("-matmattransmult_mpidense_mpidense_via","Algorithmic approach","MatMatTransposeMult",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr);
2742     ierr = PetscOptionsEnd();CHKERRQ(ierr);
2743   } else {
2744     ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_ABt","Mat");CHKERRQ(ierr);
2745     ierr = PetscOptionsEList("-matproduct_abt_mpidense_mpidense_via","Algorithmic approach","MatProduct_ABt",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr);
2746     ierr = PetscOptionsEnd();CHKERRQ(ierr);
2747   }
2748   if (flg) {
2749     ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr);
2750   }
2751 
2752   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense;
2753   C->ops->productsymbolic          = MatProductSymbolic_ABt;
2754   C->ops->productnumeric           = MatProductNumeric_ABt;
2755   PetscFunctionReturn(0);
2756 }
2757 
2758 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C)
2759 {
2760   PetscErrorCode ierr;
2761   Mat_Product    *product = C->product;
2762 
2763   PetscFunctionBegin;
2764   switch (product->type) {
2765 #if defined(PETSC_HAVE_ELEMENTAL)
2766   case MATPRODUCT_AB:
2767     ierr = MatProductSetFromOptions_MPIDense_AB(C);CHKERRQ(ierr);
2768     break;
2769 #endif
2770   case MATPRODUCT_AtB:
2771     ierr = MatProductSetFromOptions_MPIDense_AtB(C);CHKERRQ(ierr);
2772     break;
2773   case MATPRODUCT_ABt:
2774     ierr = MatProductSetFromOptions_MPIDense_ABt(C);CHKERRQ(ierr);
2775     break;
2776   default: SETERRQ1(PetscObjectComm((PetscObject)C),PETSC_ERR_SUP,"MatProduct type %s is not supported for MPIDense and MPIDense matrices",MatProductTypes[product->type]);
2777   }
2778   PetscFunctionReturn(0);
2779 }
2780