xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision d24d420451aab1d024a728b511d7b61b40627436)
1 
2 /*
3    Basic functions for basic parallel dense matrices.
4 */
5 
6 #include <../src/mat/impls/dense/mpi/mpidense.h>    /*I   "petscmat.h"  I*/
7 #include <../src/mat/impls/aij/mpi/mpiaij.h>
8 #include <petscblaslapack.h>
9 
10 /*@
11 
12       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
13               matrix that represents the operator. For sequential matrices it returns itself.
14 
15     Input Parameter:
16 .      A - the Seq or MPI dense matrix
17 
18     Output Parameter:
19 .      B - the inner matrix
20 
21     Level: intermediate
22 
23 @*/
24 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
25 {
26   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
27   PetscErrorCode ierr;
28   PetscBool      flg;
29 
30   PetscFunctionBegin;
31   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
32   PetscValidPointer(B,2);
33   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr);
34   if (flg) *B = mat->A;
35   else *B = A;
36   PetscFunctionReturn(0);
37 }
38 
39 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
40 {
41   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
42   PetscErrorCode ierr;
43   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
44 
45   PetscFunctionBegin;
46   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
47   lrow = row - rstart;
48   ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr);
49   PetscFunctionReturn(0);
50 }
51 
52 PetscErrorCode MatRestoreRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
53 {
54   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
55   PetscErrorCode ierr;
56   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
57 
58   PetscFunctionBegin;
59   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
60   lrow = row - rstart;
61   ierr = MatRestoreRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr);
62   PetscFunctionReturn(0);
63 }
64 
65 PetscErrorCode  MatGetDiagonalBlock_MPIDense(Mat A,Mat *a)
66 {
67   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
68   PetscErrorCode ierr;
69   PetscInt       m = A->rmap->n,rstart = A->rmap->rstart;
70   PetscScalar    *array;
71   MPI_Comm       comm;
72   PetscBool      flg;
73   Mat            B;
74 
75   PetscFunctionBegin;
76   ierr = MatHasCongruentLayouts(A,&flg);CHKERRQ(ierr);
77   if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported.");
78   ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr);
79   if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */
80 
81     ierr = PetscObjectTypeCompare((PetscObject)mdn->A,MATSEQDENSECUDA,&flg);CHKERRQ(ierr);
82     if (flg) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature",MATSEQDENSECUDA);
83     ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
84     ierr = MatCreate(comm,&B);CHKERRQ(ierr);
85     ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr);
86     ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr);
87     ierr = MatDenseGetArrayRead(mdn->A,(const PetscScalar**)&array);CHKERRQ(ierr);
88     ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr);
89     ierr = MatDenseRestoreArrayRead(mdn->A,(const PetscScalar**)&array);CHKERRQ(ierr);
90     ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
91     ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
92     ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr);
93     *a   = B;
94     ierr = MatDestroy(&B);CHKERRQ(ierr);
95   } else *a = B;
96   PetscFunctionReturn(0);
97 }
98 
99 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
100 {
101   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
102   PetscErrorCode ierr;
103   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
104   PetscBool      roworiented = A->roworiented;
105 
106   PetscFunctionBegin;
107   for (i=0; i<m; i++) {
108     if (idxm[i] < 0) continue;
109     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
110     if (idxm[i] >= rstart && idxm[i] < rend) {
111       row = idxm[i] - rstart;
112       if (roworiented) {
113         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
114       } else {
115         for (j=0; j<n; j++) {
116           if (idxn[j] < 0) continue;
117           if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
118           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
119         }
120       }
121     } else if (!A->donotstash) {
122       mat->assembled = PETSC_FALSE;
123       if (roworiented) {
124         ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
125       } else {
126         ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
127       }
128     }
129   }
130   PetscFunctionReturn(0);
131 }
132 
133 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
134 {
135   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
136   PetscErrorCode ierr;
137   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
138 
139   PetscFunctionBegin;
140   for (i=0; i<m; i++) {
141     if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
142     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
143     if (idxm[i] >= rstart && idxm[i] < rend) {
144       row = idxm[i] - rstart;
145       for (j=0; j<n; j++) {
146         if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
147         if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
148         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
149       }
150     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
151   }
152   PetscFunctionReturn(0);
153 }
154 
155 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A,PetscInt *lda)
156 {
157   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
158   PetscErrorCode ierr;
159 
160   PetscFunctionBegin;
161   ierr = MatDenseGetLDA(a->A,lda);CHKERRQ(ierr);
162   PetscFunctionReturn(0);
163 }
164 
165 static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A,PetscInt lda)
166 {
167   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
168   PetscErrorCode ierr;
169 
170   PetscFunctionBegin;
171   ierr = MatDenseSetLDA(a->A,lda);CHKERRQ(ierr);
172   PetscFunctionReturn(0);
173 }
174 
175 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar **array)
176 {
177   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
178   PetscErrorCode ierr;
179 
180   PetscFunctionBegin;
181   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
182   ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr);
183   PetscFunctionReturn(0);
184 }
185 
186 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A,const PetscScalar **array)
187 {
188   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
189   PetscErrorCode ierr;
190 
191   PetscFunctionBegin;
192   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
193   ierr = MatDenseGetArrayRead(a->A,array);CHKERRQ(ierr);
194   PetscFunctionReturn(0);
195 }
196 
197 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A,PetscScalar **array)
198 {
199   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
200   PetscErrorCode ierr;
201 
202   PetscFunctionBegin;
203   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
204   ierr = MatDenseGetArrayWrite(a->A,array);CHKERRQ(ierr);
205   PetscFunctionReturn(0);
206 }
207 
208 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A,const PetscScalar *array)
209 {
210   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
211   PetscErrorCode ierr;
212 
213   PetscFunctionBegin;
214   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
215   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
216   ierr = MatDensePlaceArray(a->A,array);CHKERRQ(ierr);
217   PetscFunctionReturn(0);
218 }
219 
220 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A)
221 {
222   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
223   PetscErrorCode ierr;
224 
225   PetscFunctionBegin;
226   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
227   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
228   ierr = MatDenseResetArray(a->A);CHKERRQ(ierr);
229   PetscFunctionReturn(0);
230 }
231 
232 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A,const PetscScalar *array)
233 {
234   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
235   PetscErrorCode ierr;
236 
237   PetscFunctionBegin;
238   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
239   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
240   ierr = MatDenseReplaceArray(a->A,array);CHKERRQ(ierr);
241   PetscFunctionReturn(0);
242 }
243 
244 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
245 {
246   Mat_MPIDense      *mat  = (Mat_MPIDense*)A->data,*newmatd;
247   PetscErrorCode    ierr;
248   PetscInt          lda,i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols;
249   const PetscInt    *irow,*icol;
250   const PetscScalar *v;
251   PetscScalar       *bv;
252   Mat               newmat;
253   IS                iscol_local;
254   MPI_Comm          comm_is,comm_mat;
255 
256   PetscFunctionBegin;
257   ierr = PetscObjectGetComm((PetscObject)A,&comm_mat);CHKERRQ(ierr);
258   ierr = PetscObjectGetComm((PetscObject)iscol,&comm_is);CHKERRQ(ierr);
259   if (comm_mat != comm_is) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NOTSAMECOMM,"IS communicator must match matrix communicator");
260 
261   ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
262   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
263   ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr);
264   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
265   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
266   ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */
267 
268   /* No parallel redistribution currently supported! Should really check each index set
269      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
270      original matrix! */
271 
272   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
273   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
274 
275   /* Check submatrix call */
276   if (scall == MAT_REUSE_MATRIX) {
277     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
278     /* Really need to test rows and column sizes! */
279     newmat = *B;
280   } else {
281     /* Create and fill new matrix */
282     ierr = MatCreate(PetscObjectComm((PetscObject)A),&newmat);CHKERRQ(ierr);
283     ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
284     ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr);
285     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
286   }
287 
288   /* Now extract the data pointers and do the copy, column at a time */
289   newmatd = (Mat_MPIDense*)newmat->data;
290   ierr = MatDenseGetArray(newmatd->A,&bv);CHKERRQ(ierr);
291   ierr = MatDenseGetArrayRead(mat->A,&v);CHKERRQ(ierr);
292   ierr = MatDenseGetLDA(mat->A,&lda);CHKERRQ(ierr);
293   for (i=0; i<Ncols; i++) {
294     const PetscScalar *av = v + lda*icol[i];
295     for (j=0; j<nrows; j++) {
296       *bv++ = av[irow[j] - rstart];
297     }
298   }
299   ierr = MatDenseRestoreArrayRead(mat->A,&v);CHKERRQ(ierr);
300   ierr = MatDenseRestoreArray(newmatd->A,&bv);CHKERRQ(ierr);
301 
302   /* Assemble the matrices so that the correct flags are set */
303   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
304   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
305 
306   /* Free work space */
307   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
308   ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr);
309   ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
310   *B   = newmat;
311   PetscFunctionReturn(0);
312 }
313 
314 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar **array)
315 {
316   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
317   PetscErrorCode ierr;
318 
319   PetscFunctionBegin;
320   ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr);
321   PetscFunctionReturn(0);
322 }
323 
324 PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A,const PetscScalar **array)
325 {
326   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
327   PetscErrorCode ierr;
328 
329   PetscFunctionBegin;
330   ierr = MatDenseRestoreArrayRead(a->A,array);CHKERRQ(ierr);
331   PetscFunctionReturn(0);
332 }
333 
334 PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A,PetscScalar **array)
335 {
336   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
337   PetscErrorCode ierr;
338 
339   PetscFunctionBegin;
340   ierr = MatDenseRestoreArrayWrite(a->A,array);CHKERRQ(ierr);
341   PetscFunctionReturn(0);
342 }
343 
344 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
345 {
346   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
347   PetscErrorCode ierr;
348   PetscInt       nstash,reallocs;
349 
350   PetscFunctionBegin;
351   if (mdn->donotstash || mat->nooffprocentries) return(0);
352 
353   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
354   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
355   ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
356   PetscFunctionReturn(0);
357 }
358 
359 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
360 {
361   Mat_MPIDense   *mdn=(Mat_MPIDense*)mat->data;
362   PetscErrorCode ierr;
363   PetscInt       i,*row,*col,flg,j,rstart,ncols;
364   PetscMPIInt    n;
365   PetscScalar    *val;
366 
367   PetscFunctionBegin;
368   if (!mdn->donotstash && !mat->nooffprocentries) {
369     /*  wait on receives */
370     while (1) {
371       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
372       if (!flg) break;
373 
374       for (i=0; i<n;) {
375         /* Now identify the consecutive vals belonging to the same row */
376         for (j=i,rstart=row[j]; j<n; j++) {
377           if (row[j] != rstart) break;
378         }
379         if (j < n) ncols = j-i;
380         else       ncols = n-i;
381         /* Now assemble all these values with a single function call */
382         ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
383         i    = j;
384       }
385     }
386     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
387   }
388 
389   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
390   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
391 
392   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
393     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
394   }
395   PetscFunctionReturn(0);
396 }
397 
398 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
399 {
400   PetscErrorCode ierr;
401   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
402 
403   PetscFunctionBegin;
404   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
405   PetscFunctionReturn(0);
406 }
407 
408 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt n,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
409 {
410   Mat_MPIDense      *l = (Mat_MPIDense*)A->data;
411   PetscErrorCode    ierr;
412   PetscInt          i,len,*lrows;
413 
414   PetscFunctionBegin;
415   /* get locally owned rows */
416   ierr = PetscLayoutMapLocal(A->rmap,n,rows,&len,&lrows,NULL);CHKERRQ(ierr);
417   /* fix right hand side if needed */
418   if (x && b) {
419     const PetscScalar *xx;
420     PetscScalar       *bb;
421 
422     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
423     ierr = VecGetArrayWrite(b, &bb);CHKERRQ(ierr);
424     for (i=0;i<len;++i) bb[lrows[i]] = diag*xx[lrows[i]];
425     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
426     ierr = VecRestoreArrayWrite(b, &bb);CHKERRQ(ierr);
427   }
428   ierr = MatZeroRows(l->A,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr);
429   if (diag != 0.0) {
430     Vec d;
431 
432     ierr = MatCreateVecs(A,NULL,&d);CHKERRQ(ierr);
433     ierr = VecSet(d,diag);CHKERRQ(ierr);
434     ierr = MatDiagonalSet(A,d,INSERT_VALUES);CHKERRQ(ierr);
435     ierr = VecDestroy(&d);CHKERRQ(ierr);
436   }
437   ierr = PetscFree(lrows);CHKERRQ(ierr);
438   PetscFunctionReturn(0);
439 }
440 
441 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec);
442 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec);
443 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec);
444 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec);
445 
446 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
447 {
448   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
449   PetscErrorCode    ierr;
450   const PetscScalar *ax;
451   PetscScalar       *ay;
452 
453   PetscFunctionBegin;
454   ierr = VecGetArrayReadInPlace(xx,&ax);CHKERRQ(ierr);
455   ierr = VecGetArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr);
456   ierr = PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr);
457   ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr);
458   ierr = VecRestoreArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr);
459   ierr = VecRestoreArrayReadInPlace(xx,&ax);CHKERRQ(ierr);
460   ierr = (*mdn->A->ops->mult)(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
461   PetscFunctionReturn(0);
462 }
463 
464 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
465 {
466   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
467   PetscErrorCode    ierr;
468   const PetscScalar *ax;
469   PetscScalar       *ay;
470 
471   PetscFunctionBegin;
472   ierr = VecGetArrayReadInPlace(xx,&ax);CHKERRQ(ierr);
473   ierr = VecGetArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr);
474   ierr = PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr);
475   ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr);
476   ierr = VecRestoreArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr);
477   ierr = VecRestoreArrayReadInPlace(xx,&ax);CHKERRQ(ierr);
478   ierr = (*mdn->A->ops->multadd)(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
479   PetscFunctionReturn(0);
480 }
481 
482 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
483 {
484   Mat_MPIDense      *a = (Mat_MPIDense*)A->data;
485   PetscErrorCode    ierr;
486   const PetscScalar *ax;
487   PetscScalar       *ay;
488 
489   PetscFunctionBegin;
490   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
491   ierr = (*a->A->ops->multtranspose)(a->A,xx,a->lvec);CHKERRQ(ierr);
492   ierr = VecGetArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr);
493   ierr = VecGetArrayInPlace(yy,&ay);CHKERRQ(ierr);
494   ierr = PetscSFReduceBegin(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr);
495   ierr = PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr);
496   ierr = VecRestoreArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr);
497   ierr = VecRestoreArrayInPlace(yy,&ay);CHKERRQ(ierr);
498   PetscFunctionReturn(0);
499 }
500 
501 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
502 {
503   Mat_MPIDense      *a = (Mat_MPIDense*)A->data;
504   PetscErrorCode    ierr;
505   const PetscScalar *ax;
506   PetscScalar       *ay;
507 
508   PetscFunctionBegin;
509   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
510   ierr = (*a->A->ops->multtranspose)(a->A,xx,a->lvec);CHKERRQ(ierr);
511   ierr = VecGetArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr);
512   ierr = VecGetArrayInPlace(zz,&ay);CHKERRQ(ierr);
513   ierr = PetscSFReduceBegin(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr);
514   ierr = PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr);
515   ierr = VecRestoreArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr);
516   ierr = VecRestoreArrayInPlace(zz,&ay);CHKERRQ(ierr);
517   PetscFunctionReturn(0);
518 }
519 
520 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
521 {
522   Mat_MPIDense      *a    = (Mat_MPIDense*)A->data;
523   PetscErrorCode    ierr;
524   PetscInt          lda,len,i,n,m = A->rmap->n,radd;
525   PetscScalar       *x,zero = 0.0;
526   const PetscScalar *av;
527 
528   PetscFunctionBegin;
529   ierr = VecSet(v,zero);CHKERRQ(ierr);
530   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
531   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
532   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
533   len  = PetscMin(a->A->rmap->n,a->A->cmap->n);
534   radd = A->rmap->rstart*m;
535   ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr);
536   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
537   for (i=0; i<len; i++) {
538     x[i] = av[radd + i*lda + i];
539   }
540   ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr);
541   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
542   PetscFunctionReturn(0);
543 }
544 
545 PetscErrorCode MatDestroy_MPIDense(Mat mat)
546 {
547   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
548   PetscErrorCode ierr;
549 
550   PetscFunctionBegin;
551 #if defined(PETSC_USE_LOG)
552   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
553 #endif
554   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
555   if (mdn->vecinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
556   if (mdn->matinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
557   ierr = MatDestroy(&mdn->A);CHKERRQ(ierr);
558   ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr);
559   ierr = PetscSFDestroy(&mdn->Mvctx);CHKERRQ(ierr);
560   ierr = VecDestroy(&mdn->cvec);CHKERRQ(ierr);
561   ierr = MatDestroy(&mdn->cmat);CHKERRQ(ierr);
562 
563   ierr = PetscFree(mat->data);CHKERRQ(ierr);
564   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
565 
566   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",NULL);CHKERRQ(ierr);
567   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",NULL);CHKERRQ(ierr);
568   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL);CHKERRQ(ierr);
569   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL);CHKERRQ(ierr);
570   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",NULL);CHKERRQ(ierr);
571   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",NULL);CHKERRQ(ierr);
572   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",NULL);CHKERRQ(ierr);
573   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",NULL);CHKERRQ(ierr);
574   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",NULL);CHKERRQ(ierr);
575   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",NULL);CHKERRQ(ierr);
576   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",NULL);CHKERRQ(ierr);
577 #if defined(PETSC_HAVE_ELEMENTAL)
578   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL);CHKERRQ(ierr);
579 #endif
580 #if defined(PETSC_HAVE_SCALAPACK)
581   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_scalapack_C",NULL);CHKERRQ(ierr);
582 #endif
583   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);CHKERRQ(ierr);
584   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
585   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",NULL);CHKERRQ(ierr);
586 #if defined (PETSC_HAVE_CUDA)
587   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidense_C",NULL);CHKERRQ(ierr);
588   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaijcusparse_C",NULL);CHKERRQ(ierr);
589 #endif
590   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",NULL);CHKERRQ(ierr);
591   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",NULL);CHKERRQ(ierr);
592   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",NULL);CHKERRQ(ierr);
593   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",NULL);CHKERRQ(ierr);
594   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",NULL);CHKERRQ(ierr);
595   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",NULL);CHKERRQ(ierr);
596   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",NULL);CHKERRQ(ierr);
597   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",NULL);CHKERRQ(ierr);
598   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetSubMatrix_C",NULL);CHKERRQ(ierr);
599   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreSubMatrix_C",NULL);CHKERRQ(ierr);
600   PetscFunctionReturn(0);
601 }
602 
603 PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat,PetscViewer);
604 
605 #include <petscdraw.h>
606 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
607 {
608   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
609   PetscErrorCode    ierr;
610   PetscMPIInt       rank;
611   PetscViewerType   vtype;
612   PetscBool         iascii,isdraw;
613   PetscViewer       sviewer;
614   PetscViewerFormat format;
615 
616   PetscFunctionBegin;
617   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
618   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
619   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
620   if (iascii) {
621     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
622     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
623     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
624       MatInfo info;
625       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
626       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
627       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
628       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
629       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
630       ierr = PetscSFView(mdn->Mvctx,viewer);CHKERRQ(ierr);
631       PetscFunctionReturn(0);
632     } else if (format == PETSC_VIEWER_ASCII_INFO) {
633       PetscFunctionReturn(0);
634     }
635   } else if (isdraw) {
636     PetscDraw draw;
637     PetscBool isnull;
638 
639     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
640     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
641     if (isnull) PetscFunctionReturn(0);
642   }
643 
644   {
645     /* assemble the entire matrix onto first processor. */
646     Mat         A;
647     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz;
648     PetscInt    *cols;
649     PetscScalar *vals;
650 
651     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
652     if (!rank) {
653       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
654     } else {
655       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
656     }
657     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
658     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
659     ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr);
660     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
661 
662     /* Copy the matrix ... This isn't the most efficient means,
663        but it's quick for now */
664     A->insertmode = INSERT_VALUES;
665 
666     row = mat->rmap->rstart;
667     m   = mdn->A->rmap->n;
668     for (i=0; i<m; i++) {
669       ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
670       ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
671       ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
672       row++;
673     }
674 
675     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
676     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
677     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
678     if (!rank) {
679       ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
680       ierr = MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
681     }
682     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
683     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
684     ierr = MatDestroy(&A);CHKERRQ(ierr);
685   }
686   PetscFunctionReturn(0);
687 }
688 
689 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
690 {
691   PetscErrorCode ierr;
692   PetscBool      iascii,isbinary,isdraw,issocket;
693 
694   PetscFunctionBegin;
695   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
696   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
697   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
698   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
699 
700   if (iascii || issocket || isdraw) {
701     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
702   } else if (isbinary) {
703     ierr = MatView_Dense_Binary(mat,viewer);CHKERRQ(ierr);
704   }
705   PetscFunctionReturn(0);
706 }
707 
708 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
709 {
710   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
711   Mat            mdn  = mat->A;
712   PetscErrorCode ierr;
713   PetscLogDouble isend[5],irecv[5];
714 
715   PetscFunctionBegin;
716   info->block_size = 1.0;
717 
718   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
719 
720   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
721   isend[3] = info->memory;  isend[4] = info->mallocs;
722   if (flag == MAT_LOCAL) {
723     info->nz_used      = isend[0];
724     info->nz_allocated = isend[1];
725     info->nz_unneeded  = isend[2];
726     info->memory       = isend[3];
727     info->mallocs      = isend[4];
728   } else if (flag == MAT_GLOBAL_MAX) {
729     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
730 
731     info->nz_used      = irecv[0];
732     info->nz_allocated = irecv[1];
733     info->nz_unneeded  = irecv[2];
734     info->memory       = irecv[3];
735     info->mallocs      = irecv[4];
736   } else if (flag == MAT_GLOBAL_SUM) {
737     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
738 
739     info->nz_used      = irecv[0];
740     info->nz_allocated = irecv[1];
741     info->nz_unneeded  = irecv[2];
742     info->memory       = irecv[3];
743     info->mallocs      = irecv[4];
744   }
745   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
746   info->fill_ratio_needed = 0;
747   info->factor_mallocs    = 0;
748   PetscFunctionReturn(0);
749 }
750 
751 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg)
752 {
753   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
754   PetscErrorCode ierr;
755 
756   PetscFunctionBegin;
757   switch (op) {
758   case MAT_NEW_NONZERO_LOCATIONS:
759   case MAT_NEW_NONZERO_LOCATION_ERR:
760   case MAT_NEW_NONZERO_ALLOCATION_ERR:
761     MatCheckPreallocated(A,1);
762     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
763     break;
764   case MAT_ROW_ORIENTED:
765     MatCheckPreallocated(A,1);
766     a->roworiented = flg;
767     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
768     break;
769   case MAT_NEW_DIAGONALS:
770   case MAT_KEEP_NONZERO_PATTERN:
771   case MAT_USE_HASH_TABLE:
772   case MAT_SORTED_FULL:
773     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
774     break;
775   case MAT_IGNORE_OFF_PROC_ENTRIES:
776     a->donotstash = flg;
777     break;
778   case MAT_SYMMETRIC:
779   case MAT_STRUCTURALLY_SYMMETRIC:
780   case MAT_HERMITIAN:
781   case MAT_SYMMETRY_ETERNAL:
782   case MAT_IGNORE_LOWER_TRIANGULAR:
783   case MAT_IGNORE_ZERO_ENTRIES:
784     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
785     break;
786   default:
787     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
788   }
789   PetscFunctionReturn(0);
790 }
791 
792 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
793 {
794   Mat_MPIDense      *mdn = (Mat_MPIDense*)A->data;
795   const PetscScalar *l;
796   PetscScalar       x,*v,*vv,*r;
797   PetscErrorCode    ierr;
798   PetscInt          i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n,lda;
799 
800   PetscFunctionBegin;
801   ierr = MatDenseGetArray(mdn->A,&vv);CHKERRQ(ierr);
802   ierr = MatDenseGetLDA(mdn->A,&lda);CHKERRQ(ierr);
803   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
804   if (ll) {
805     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
806     if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %D != %D", s2a, s2);
807     ierr = VecGetArrayRead(ll,&l);CHKERRQ(ierr);
808     for (i=0; i<m; i++) {
809       x = l[i];
810       v = vv + i;
811       for (j=0; j<n; j++) { (*v) *= x; v+= lda;}
812     }
813     ierr = VecRestoreArrayRead(ll,&l);CHKERRQ(ierr);
814     ierr = PetscLogFlops(1.0*n*m);CHKERRQ(ierr);
815   }
816   if (rr) {
817     const PetscScalar *ar;
818 
819     ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr);
820     if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
821     ierr = VecGetArrayRead(rr,&ar);CHKERRQ(ierr);
822     ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
823     ierr = PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ar,r);CHKERRQ(ierr);
824     ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ar,r);CHKERRQ(ierr);
825     ierr = VecRestoreArrayRead(rr,&ar);CHKERRQ(ierr);
826     for (i=0; i<n; i++) {
827       x = r[i];
828       v = vv + i*lda;
829       for (j=0; j<m; j++) (*v++) *= x;
830     }
831     ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr);
832     ierr = PetscLogFlops(1.0*n*m);CHKERRQ(ierr);
833   }
834   ierr = MatDenseRestoreArray(mdn->A,&vv);CHKERRQ(ierr);
835   PetscFunctionReturn(0);
836 }
837 
838 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
839 {
840   Mat_MPIDense      *mdn = (Mat_MPIDense*)A->data;
841   PetscErrorCode    ierr;
842   PetscInt          i,j;
843   PetscMPIInt       size;
844   PetscReal         sum = 0.0;
845   const PetscScalar *av,*v;
846 
847   PetscFunctionBegin;
848   ierr = MatDenseGetArrayRead(mdn->A,&av);CHKERRQ(ierr);
849   v    = av;
850   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
851   if (size == 1) {
852     ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
853   } else {
854     if (type == NORM_FROBENIUS) {
855       for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) {
856         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
857       }
858       ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
859       *nrm = PetscSqrtReal(*nrm);
860       ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr);
861     } else if (type == NORM_1) {
862       PetscReal *tmp,*tmp2;
863       ierr = PetscCalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);CHKERRQ(ierr);
864       *nrm = 0.0;
865       v    = av;
866       for (j=0; j<mdn->A->cmap->n; j++) {
867         for (i=0; i<mdn->A->rmap->n; i++) {
868           tmp[j] += PetscAbsScalar(*v);  v++;
869         }
870       }
871       ierr = MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
872       for (j=0; j<A->cmap->N; j++) {
873         if (tmp2[j] > *nrm) *nrm = tmp2[j];
874       }
875       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
876       ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr);
877     } else if (type == NORM_INFINITY) { /* max row norm */
878       PetscReal ntemp;
879       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
880       ierr = MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
881     } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm");
882   }
883   ierr = MatDenseRestoreArrayRead(mdn->A,&av);CHKERRQ(ierr);
884   PetscFunctionReturn(0);
885 }
886 
887 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
888 {
889   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
890   Mat            B;
891   PetscInt       M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart;
892   PetscErrorCode ierr;
893   PetscInt       j,i,lda;
894   PetscScalar    *v;
895 
896   PetscFunctionBegin;
897   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
898     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
899     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
900     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
901     ierr = MatMPIDenseSetPreallocation(B,NULL);CHKERRQ(ierr);
902   } else B = *matout;
903 
904   m    = a->A->rmap->n; n = a->A->cmap->n;
905   ierr = MatDenseGetArrayRead(a->A,(const PetscScalar**)&v);CHKERRQ(ierr);
906   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
907   ierr = PetscMalloc1(m,&rwork);CHKERRQ(ierr);
908   for (i=0; i<m; i++) rwork[i] = rstart + i;
909   for (j=0; j<n; j++) {
910     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
911     v   += lda;
912   }
913   ierr = MatDenseRestoreArrayRead(a->A,(const PetscScalar**)&v);CHKERRQ(ierr);
914   ierr = PetscFree(rwork);CHKERRQ(ierr);
915   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
916   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
917   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
918     *matout = B;
919   } else {
920     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
921   }
922   PetscFunctionReturn(0);
923 }
924 
925 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*);
926 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat,PetscScalar);
927 
928 PetscErrorCode MatSetUp_MPIDense(Mat A)
929 {
930   PetscErrorCode ierr;
931 
932   PetscFunctionBegin;
933   ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
934   ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
935   if (!A->preallocated) {
936     ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
937   }
938   PetscFunctionReturn(0);
939 }
940 
941 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
942 {
943   PetscErrorCode ierr;
944   Mat_MPIDense   *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data;
945 
946   PetscFunctionBegin;
947   ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr);
948   PetscFunctionReturn(0);
949 }
950 
951 PetscErrorCode MatConjugate_MPIDense(Mat mat)
952 {
953   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;
954   PetscErrorCode ierr;
955 
956   PetscFunctionBegin;
957   ierr = MatConjugate(a->A);CHKERRQ(ierr);
958   PetscFunctionReturn(0);
959 }
960 
961 PetscErrorCode MatRealPart_MPIDense(Mat A)
962 {
963   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
964   PetscErrorCode ierr;
965 
966   PetscFunctionBegin;
967   ierr = MatRealPart(a->A);CHKERRQ(ierr);
968   PetscFunctionReturn(0);
969 }
970 
971 PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
972 {
973   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
974   PetscErrorCode ierr;
975 
976   PetscFunctionBegin;
977   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
978   PetscFunctionReturn(0);
979 }
980 
981 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A,Vec v,PetscInt col)
982 {
983   PetscErrorCode ierr;
984   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
985 
986   PetscFunctionBegin;
987   if (!a->A) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing local matrix");
988   if (!a->A->ops->getcolumnvector) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing get column operation");
989   ierr = (*a->A->ops->getcolumnvector)(a->A,v,col);CHKERRQ(ierr);
990   PetscFunctionReturn(0);
991 }
992 
993 PETSC_INTERN PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*);
994 
995 PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms)
996 {
997   PetscErrorCode ierr;
998   PetscInt       i,n;
999   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
1000   PetscReal      *work;
1001 
1002   PetscFunctionBegin;
1003   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
1004   ierr = PetscMalloc1(n,&work);CHKERRQ(ierr);
1005   ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr);
1006   if (type == NORM_2) {
1007     for (i=0; i<n; i++) work[i] *= work[i];
1008   }
1009   if (type == NORM_INFINITY) {
1010     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
1011   } else {
1012     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
1013   }
1014   ierr = PetscFree(work);CHKERRQ(ierr);
1015   if (type == NORM_2) {
1016     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
1017   }
1018   PetscFunctionReturn(0);
1019 }
1020 
1021 #if defined(PETSC_HAVE_CUDA)
1022 static PetscErrorCode MatDenseGetColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1023 {
1024   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1025   PetscErrorCode ierr;
1026   PetscInt       lda;
1027 
1028   PetscFunctionBegin;
1029   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1030   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1031   if (!a->cvec) {
1032     ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1033     ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec);CHKERRQ(ierr);
1034   }
1035   a->vecinuse = col + 1;
1036   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1037   ierr = MatDenseCUDAGetArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1038   ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1039   *v   = a->cvec;
1040   PetscFunctionReturn(0);
1041 }
1042 
1043 static PetscErrorCode MatDenseRestoreColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1044 {
1045   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1046   PetscErrorCode ierr;
1047 
1048   PetscFunctionBegin;
1049   if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1050   if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector");
1051   a->vecinuse = 0;
1052   ierr = MatDenseCUDARestoreArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1053   ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr);
1054   *v   = NULL;
1055   PetscFunctionReturn(0);
1056 }
1057 
1058 static PetscErrorCode MatDenseGetColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1059 {
1060   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1061   PetscInt       lda;
1062   PetscErrorCode ierr;
1063 
1064   PetscFunctionBegin;
1065   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1066   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1067   if (!a->cvec) {
1068     ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1069     ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec);CHKERRQ(ierr);
1070   }
1071   a->vecinuse = col + 1;
1072   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1073   ierr = MatDenseCUDAGetArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr);
1074   ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1075   ierr = VecLockReadPush(a->cvec);CHKERRQ(ierr);
1076   *v   = a->cvec;
1077   PetscFunctionReturn(0);
1078 }
1079 
1080 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1081 {
1082   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1083   PetscErrorCode ierr;
1084 
1085   PetscFunctionBegin;
1086   if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1087   if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
1088   a->vecinuse = 0;
1089   ierr = MatDenseCUDARestoreArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr);
1090   ierr = VecLockReadPop(a->cvec);CHKERRQ(ierr);
1091   ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr);
1092   *v   = NULL;
1093   PetscFunctionReturn(0);
1094 }
1095 
1096 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1097 {
1098   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1099   PetscErrorCode ierr;
1100   PetscInt       lda;
1101 
1102   PetscFunctionBegin;
1103   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1104   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1105   if (!a->cvec) {
1106     ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1107     ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec);CHKERRQ(ierr);
1108   }
1109   a->vecinuse = col + 1;
1110   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1111   ierr = MatDenseCUDAGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1112   ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1113   *v   = a->cvec;
1114   PetscFunctionReturn(0);
1115 }
1116 
1117 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1118 {
1119   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1120   PetscErrorCode ierr;
1121 
1122   PetscFunctionBegin;
1123   if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1124   if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
1125   a->vecinuse = 0;
1126   ierr = MatDenseCUDARestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1127   ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr);
1128   *v   = NULL;
1129   PetscFunctionReturn(0);
1130 }
1131 
1132 static PetscErrorCode MatDenseCUDAPlaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a)
1133 {
1134   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1135   PetscErrorCode ierr;
1136 
1137   PetscFunctionBegin;
1138   if (l->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1139   if (l->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1140   ierr = MatDenseCUDAPlaceArray(l->A,a);CHKERRQ(ierr);
1141   PetscFunctionReturn(0);
1142 }
1143 
1144 static PetscErrorCode MatDenseCUDAResetArray_MPIDenseCUDA(Mat A)
1145 {
1146   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1147   PetscErrorCode ierr;
1148 
1149   PetscFunctionBegin;
1150   if (l->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1151   if (l->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1152   ierr = MatDenseCUDAResetArray(l->A);CHKERRQ(ierr);
1153   PetscFunctionReturn(0);
1154 }
1155 
1156 static PetscErrorCode MatDenseCUDAReplaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a)
1157 {
1158   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1159   PetscErrorCode ierr;
1160 
1161   PetscFunctionBegin;
1162   if (l->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1163   if (l->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1164   ierr = MatDenseCUDAReplaceArray(l->A,a);CHKERRQ(ierr);
1165   PetscFunctionReturn(0);
1166 }
1167 
1168 static PetscErrorCode MatDenseCUDAGetArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a)
1169 {
1170   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1171   PetscErrorCode ierr;
1172 
1173   PetscFunctionBegin;
1174   ierr = MatDenseCUDAGetArrayWrite(l->A,a);CHKERRQ(ierr);
1175   PetscFunctionReturn(0);
1176 }
1177 
1178 static PetscErrorCode MatDenseCUDARestoreArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a)
1179 {
1180   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1181   PetscErrorCode ierr;
1182 
1183   PetscFunctionBegin;
1184   ierr = MatDenseCUDARestoreArrayWrite(l->A,a);CHKERRQ(ierr);
1185   PetscFunctionReturn(0);
1186 }
1187 
1188 static PetscErrorCode MatDenseCUDAGetArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a)
1189 {
1190   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1191   PetscErrorCode ierr;
1192 
1193   PetscFunctionBegin;
1194   ierr = MatDenseCUDAGetArrayRead(l->A,a);CHKERRQ(ierr);
1195   PetscFunctionReturn(0);
1196 }
1197 
1198 static PetscErrorCode MatDenseCUDARestoreArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a)
1199 {
1200   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1201   PetscErrorCode ierr;
1202 
1203   PetscFunctionBegin;
1204   ierr = MatDenseCUDARestoreArrayRead(l->A,a);CHKERRQ(ierr);
1205   PetscFunctionReturn(0);
1206 }
1207 
1208 static PetscErrorCode MatDenseCUDAGetArray_MPIDenseCUDA(Mat A, PetscScalar **a)
1209 {
1210   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1211   PetscErrorCode ierr;
1212 
1213   PetscFunctionBegin;
1214   ierr = MatDenseCUDAGetArray(l->A,a);CHKERRQ(ierr);
1215   PetscFunctionReturn(0);
1216 }
1217 
1218 static PetscErrorCode MatDenseCUDARestoreArray_MPIDenseCUDA(Mat A, PetscScalar **a)
1219 {
1220   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1221   PetscErrorCode ierr;
1222 
1223   PetscFunctionBegin;
1224   ierr = MatDenseCUDARestoreArray(l->A,a);CHKERRQ(ierr);
1225   PetscFunctionReturn(0);
1226 }
1227 
1228 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat,PetscInt,Vec*);
1229 static PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat,PetscInt,Vec*);
1230 static PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat,PetscInt,Vec*);
1231 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat,PetscInt,Vec*);
1232 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat,PetscInt,Vec*);
1233 static PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat,PetscInt,Vec*);
1234 static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat,Mat*);
1235 
1236 static PetscErrorCode MatBindToCPU_MPIDenseCUDA(Mat mat,PetscBool bind)
1237 {
1238   Mat_MPIDense   *d = (Mat_MPIDense*)mat->data;
1239   PetscErrorCode ierr;
1240 
1241   PetscFunctionBegin;
1242   if (d->vecinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1243   if (d->matinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1244   if (d->A) {
1245     ierr = MatBindToCPU(d->A,bind);CHKERRQ(ierr);
1246   }
1247   mat->boundtocpu = bind;
1248   if (!bind) {
1249     PetscBool iscuda;
1250 
1251     ierr = PetscObjectTypeCompare((PetscObject)d->cvec,VECMPICUDA,&iscuda);CHKERRQ(ierr);
1252     if (!iscuda) {
1253       ierr = VecDestroy(&d->cvec);CHKERRQ(ierr);
1254     }
1255     ierr = PetscObjectTypeCompare((PetscObject)d->cmat,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr);
1256     if (!iscuda) {
1257       ierr = MatDestroy(&d->cmat);CHKERRQ(ierr);
1258     }
1259     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDenseCUDA);CHKERRQ(ierr);
1260     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDenseCUDA);CHKERRQ(ierr);
1261     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDenseCUDA);CHKERRQ(ierr);
1262     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDenseCUDA);CHKERRQ(ierr);
1263     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDenseCUDA);CHKERRQ(ierr);
1264     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDenseCUDA);CHKERRQ(ierr);
1265   } else {
1266     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense);CHKERRQ(ierr);
1267     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense);CHKERRQ(ierr);
1268     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense);CHKERRQ(ierr);
1269     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense);CHKERRQ(ierr);
1270     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense);CHKERRQ(ierr);
1271     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense);CHKERRQ(ierr);
1272   }
1273   if (d->cmat) {
1274     ierr = MatBindToCPU(d->cmat,bind);CHKERRQ(ierr);
1275   }
1276   PetscFunctionReturn(0);
1277 }
1278 
1279 PetscErrorCode MatMPIDenseCUDASetPreallocation(Mat A, PetscScalar *d_data)
1280 {
1281   Mat_MPIDense   *d = (Mat_MPIDense*)A->data;
1282   PetscErrorCode ierr;
1283   PetscBool      iscuda;
1284 
1285   PetscFunctionBegin;
1286   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
1287   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr);
1288   if (!iscuda) PetscFunctionReturn(0);
1289   ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
1290   ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
1291   if (!d->A) {
1292     ierr = MatCreate(PETSC_COMM_SELF,&d->A);CHKERRQ(ierr);
1293     ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)d->A);CHKERRQ(ierr);
1294     ierr = MatSetSizes(d->A,A->rmap->n,A->cmap->N,A->rmap->n,A->cmap->N);CHKERRQ(ierr);
1295   }
1296   ierr = MatSetType(d->A,MATSEQDENSECUDA);CHKERRQ(ierr);
1297   ierr = MatSeqDenseCUDASetPreallocation(d->A,d_data);CHKERRQ(ierr);
1298   A->preallocated = PETSC_TRUE;
1299   PetscFunctionReturn(0);
1300 }
1301 #endif
1302 
1303 static PetscErrorCode MatSetRandom_MPIDense(Mat x,PetscRandom rctx)
1304 {
1305   Mat_MPIDense   *d = (Mat_MPIDense*)x->data;
1306   PetscErrorCode ierr;
1307 
1308   PetscFunctionBegin;
1309   ierr = MatSetRandom(d->A,rctx);CHKERRQ(ierr);
1310   PetscFunctionReturn(0);
1311 }
1312 
1313 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool  *missing,PetscInt *d)
1314 {
1315   PetscFunctionBegin;
1316   *missing = PETSC_FALSE;
1317   PetscFunctionReturn(0);
1318 }
1319 
1320 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat);
1321 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat);
1322 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat);
1323 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat);
1324 static PetscErrorCode MatEqual_MPIDense(Mat,Mat,PetscBool*);
1325 static PetscErrorCode MatLoad_MPIDense(Mat,PetscViewer);
1326 
1327 /* -------------------------------------------------------------------*/
1328 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense,
1329                                         MatGetRow_MPIDense,
1330                                         MatRestoreRow_MPIDense,
1331                                         MatMult_MPIDense,
1332                                 /*  4*/ MatMultAdd_MPIDense,
1333                                         MatMultTranspose_MPIDense,
1334                                         MatMultTransposeAdd_MPIDense,
1335                                         0,
1336                                         0,
1337                                         0,
1338                                 /* 10*/ 0,
1339                                         0,
1340                                         0,
1341                                         0,
1342                                         MatTranspose_MPIDense,
1343                                 /* 15*/ MatGetInfo_MPIDense,
1344                                         MatEqual_MPIDense,
1345                                         MatGetDiagonal_MPIDense,
1346                                         MatDiagonalScale_MPIDense,
1347                                         MatNorm_MPIDense,
1348                                 /* 20*/ MatAssemblyBegin_MPIDense,
1349                                         MatAssemblyEnd_MPIDense,
1350                                         MatSetOption_MPIDense,
1351                                         MatZeroEntries_MPIDense,
1352                                 /* 24*/ MatZeroRows_MPIDense,
1353                                         0,
1354                                         0,
1355                                         0,
1356                                         0,
1357                                 /* 29*/ MatSetUp_MPIDense,
1358                                         0,
1359                                         0,
1360                                         MatGetDiagonalBlock_MPIDense,
1361                                         0,
1362                                 /* 34*/ MatDuplicate_MPIDense,
1363                                         0,
1364                                         0,
1365                                         0,
1366                                         0,
1367                                 /* 39*/ MatAXPY_MPIDense,
1368                                         MatCreateSubMatrices_MPIDense,
1369                                         0,
1370                                         MatGetValues_MPIDense,
1371                                         0,
1372                                 /* 44*/ 0,
1373                                         MatScale_MPIDense,
1374                                         MatShift_Basic,
1375                                         0,
1376                                         0,
1377                                 /* 49*/ MatSetRandom_MPIDense,
1378                                         0,
1379                                         0,
1380                                         0,
1381                                         0,
1382                                 /* 54*/ 0,
1383                                         0,
1384                                         0,
1385                                         0,
1386                                         0,
1387                                 /* 59*/ MatCreateSubMatrix_MPIDense,
1388                                         MatDestroy_MPIDense,
1389                                         MatView_MPIDense,
1390                                         0,
1391                                         0,
1392                                 /* 64*/ 0,
1393                                         0,
1394                                         0,
1395                                         0,
1396                                         0,
1397                                 /* 69*/ 0,
1398                                         0,
1399                                         0,
1400                                         0,
1401                                         0,
1402                                 /* 74*/ 0,
1403                                         0,
1404                                         0,
1405                                         0,
1406                                         0,
1407                                 /* 79*/ 0,
1408                                         0,
1409                                         0,
1410                                         0,
1411                                 /* 83*/ MatLoad_MPIDense,
1412                                         0,
1413                                         0,
1414                                         0,
1415                                         0,
1416                                         0,
1417                                 /* 89*/ 0,
1418                                         0,
1419                                         0,
1420                                         0,
1421                                         0,
1422                                 /* 94*/ 0,
1423                                         0,
1424                                         MatMatTransposeMultSymbolic_MPIDense_MPIDense,
1425                                         MatMatTransposeMultNumeric_MPIDense_MPIDense,
1426                                         0,
1427                                 /* 99*/ MatProductSetFromOptions_MPIDense,
1428                                         0,
1429                                         0,
1430                                         MatConjugate_MPIDense,
1431                                         0,
1432                                 /*104*/ 0,
1433                                         MatRealPart_MPIDense,
1434                                         MatImaginaryPart_MPIDense,
1435                                         0,
1436                                         0,
1437                                 /*109*/ 0,
1438                                         0,
1439                                         0,
1440                                         MatGetColumnVector_MPIDense,
1441                                         MatMissingDiagonal_MPIDense,
1442                                 /*114*/ 0,
1443                                         0,
1444                                         0,
1445                                         0,
1446                                         0,
1447                                 /*119*/ 0,
1448                                         0,
1449                                         0,
1450                                         0,
1451                                         0,
1452                                 /*124*/ 0,
1453                                         MatGetColumnNorms_MPIDense,
1454                                         0,
1455                                         0,
1456                                         0,
1457                                 /*129*/ 0,
1458                                         0,
1459                                         MatTransposeMatMultSymbolic_MPIDense_MPIDense,
1460                                         MatTransposeMatMultNumeric_MPIDense_MPIDense,
1461                                         0,
1462                                 /*134*/ 0,
1463                                         0,
1464                                         0,
1465                                         0,
1466                                         0,
1467                                 /*139*/ 0,
1468                                         0,
1469                                         0,
1470                                         0,
1471                                         0,
1472                                         MatCreateMPIMatConcatenateSeqMat_MPIDense,
1473                                 /*145*/ 0,
1474                                         0,
1475                                         0
1476 };
1477 
1478 PetscErrorCode  MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1479 {
1480   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;
1481   PetscBool      iscuda;
1482   PetscErrorCode ierr;
1483 
1484   PetscFunctionBegin;
1485   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1486   ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
1487   ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
1488   if (!a->A) {
1489     ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
1490     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
1491     ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr);
1492   }
1493   ierr = PetscObjectTypeCompare((PetscObject)mat,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr);
1494   ierr = MatSetType(a->A,iscuda ? MATSEQDENSECUDA : MATSEQDENSE);CHKERRQ(ierr);
1495   ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr);
1496   mat->preallocated = PETSC_TRUE;
1497   PetscFunctionReturn(0);
1498 }
1499 
1500 #if defined(PETSC_HAVE_ELEMENTAL)
1501 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1502 {
1503   Mat            mat_elemental;
1504   PetscErrorCode ierr;
1505   PetscScalar    *v;
1506   PetscInt       m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols;
1507 
1508   PetscFunctionBegin;
1509   if (reuse == MAT_REUSE_MATRIX) {
1510     mat_elemental = *newmat;
1511     ierr = MatZeroEntries(*newmat);CHKERRQ(ierr);
1512   } else {
1513     ierr = MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);CHKERRQ(ierr);
1514     ierr = MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1515     ierr = MatSetType(mat_elemental,MATELEMENTAL);CHKERRQ(ierr);
1516     ierr = MatSetUp(mat_elemental);CHKERRQ(ierr);
1517     ierr = MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
1518   }
1519 
1520   ierr = PetscMalloc2(m,&rows,N,&cols);CHKERRQ(ierr);
1521   for (i=0; i<N; i++) cols[i] = i;
1522   for (i=0; i<m; i++) rows[i] = rstart + i;
1523 
1524   /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1525   ierr = MatDenseGetArray(A,&v);CHKERRQ(ierr);
1526   ierr = MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES);CHKERRQ(ierr);
1527   ierr = MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1528   ierr = MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1529   ierr = MatDenseRestoreArray(A,&v);CHKERRQ(ierr);
1530   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1531 
1532   if (reuse == MAT_INPLACE_MATRIX) {
1533     ierr = MatHeaderReplace(A,&mat_elemental);CHKERRQ(ierr);
1534   } else {
1535     *newmat = mat_elemental;
1536   }
1537   PetscFunctionReturn(0);
1538 }
1539 #endif
1540 
1541 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A,PetscInt col,PetscScalar **vals)
1542 {
1543   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
1544   PetscErrorCode ierr;
1545 
1546   PetscFunctionBegin;
1547   ierr = MatDenseGetColumn(mat->A,col,vals);CHKERRQ(ierr);
1548   PetscFunctionReturn(0);
1549 }
1550 
1551 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A,PetscScalar **vals)
1552 {
1553   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
1554   PetscErrorCode ierr;
1555 
1556   PetscFunctionBegin;
1557   ierr = MatDenseRestoreColumn(mat->A,vals);CHKERRQ(ierr);
1558   PetscFunctionReturn(0);
1559 }
1560 
1561 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
1562 {
1563   PetscErrorCode ierr;
1564   Mat_MPIDense   *mat;
1565   PetscInt       m,nloc,N;
1566 
1567   PetscFunctionBegin;
1568   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
1569   ierr = MatGetLocalSize(inmat,NULL,&nloc);CHKERRQ(ierr);
1570   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
1571     PetscInt sum;
1572 
1573     if (n == PETSC_DECIDE) {
1574       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
1575     }
1576     /* Check sum(n) = N */
1577     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
1578     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
1579 
1580     ierr = MatCreateDense(comm,m,n,PETSC_DETERMINE,N,NULL,outmat);CHKERRQ(ierr);
1581   }
1582 
1583   /* numeric phase */
1584   mat = (Mat_MPIDense*)(*outmat)->data;
1585   ierr = MatCopy(inmat,mat->A,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1586   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1587   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1588   PetscFunctionReturn(0);
1589 }
1590 
1591 #if defined(PETSC_HAVE_CUDA)
1592 PetscErrorCode MatConvert_MPIDenseCUDA_MPIDense(Mat M,MatType type,MatReuse reuse,Mat *newmat)
1593 {
1594   Mat            B;
1595   Mat_MPIDense   *m;
1596   PetscErrorCode ierr;
1597 
1598   PetscFunctionBegin;
1599   if (reuse == MAT_INITIAL_MATRIX) {
1600     ierr = MatDuplicate(M,MAT_COPY_VALUES,newmat);CHKERRQ(ierr);
1601   } else if (reuse == MAT_REUSE_MATRIX) {
1602     ierr = MatCopy(M,*newmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1603   }
1604 
1605   B    = *newmat;
1606   ierr = MatBindToCPU_MPIDenseCUDA(B,PETSC_TRUE);CHKERRQ(ierr);
1607   ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr);
1608   ierr = PetscStrallocpy(VECSTANDARD,&B->defaultvectype);CHKERRQ(ierr);
1609   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSE);CHKERRQ(ierr);
1610   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C",NULL);CHKERRQ(ierr);
1611   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",NULL);CHKERRQ(ierr);
1612   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",NULL);CHKERRQ(ierr);
1613   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",NULL);CHKERRQ(ierr);
1614   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",NULL);CHKERRQ(ierr);
1615   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C",NULL);CHKERRQ(ierr);
1616   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C",NULL);CHKERRQ(ierr);
1617   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C",NULL);CHKERRQ(ierr);
1618   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C",NULL);CHKERRQ(ierr);
1619   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C",NULL);CHKERRQ(ierr);
1620   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C",NULL);CHKERRQ(ierr);
1621   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C",NULL);CHKERRQ(ierr);
1622   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C",NULL);CHKERRQ(ierr);
1623   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C",NULL);CHKERRQ(ierr);
1624   m    = (Mat_MPIDense*)(B)->data;
1625   if (m->A) {
1626     ierr = MatConvert(m->A,MATSEQDENSE,MAT_INPLACE_MATRIX,&m->A);CHKERRQ(ierr);
1627     ierr = MatSetUpMultiply_MPIDense(B);CHKERRQ(ierr);
1628   }
1629   B->ops->bindtocpu = NULL;
1630   B->offloadmask    = PETSC_OFFLOAD_CPU;
1631   PetscFunctionReturn(0);
1632 }
1633 
1634 PetscErrorCode MatConvert_MPIDense_MPIDenseCUDA(Mat M,MatType type,MatReuse reuse,Mat *newmat)
1635 {
1636   Mat            B;
1637   Mat_MPIDense   *m;
1638   PetscErrorCode ierr;
1639 
1640   PetscFunctionBegin;
1641   if (reuse == MAT_INITIAL_MATRIX) {
1642     ierr = MatDuplicate(M,MAT_COPY_VALUES,newmat);CHKERRQ(ierr);
1643   } else if (reuse == MAT_REUSE_MATRIX) {
1644     ierr = MatCopy(M,*newmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1645   }
1646 
1647   B    = *newmat;
1648   ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr);
1649   ierr = PetscStrallocpy(VECCUDA,&B->defaultvectype);CHKERRQ(ierr);
1650   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSECUDA);CHKERRQ(ierr);
1651   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C",                    MatConvert_MPIDenseCUDA_MPIDense);CHKERRQ(ierr);
1652   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",        MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr);
1653   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr);
1654   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",        MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr);
1655   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr);
1656   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C",                                MatDenseCUDAGetArray_MPIDenseCUDA);CHKERRQ(ierr);
1657   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C",                            MatDenseCUDAGetArrayRead_MPIDenseCUDA);CHKERRQ(ierr);
1658   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C",                           MatDenseCUDAGetArrayWrite_MPIDenseCUDA);CHKERRQ(ierr);
1659   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C",                            MatDenseCUDARestoreArray_MPIDenseCUDA);CHKERRQ(ierr);
1660   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C",                        MatDenseCUDARestoreArrayRead_MPIDenseCUDA);CHKERRQ(ierr);
1661   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C",                       MatDenseCUDARestoreArrayWrite_MPIDenseCUDA);CHKERRQ(ierr);
1662   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C",                              MatDenseCUDAPlaceArray_MPIDenseCUDA);CHKERRQ(ierr);
1663   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C",                              MatDenseCUDAResetArray_MPIDenseCUDA);CHKERRQ(ierr);
1664   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C",                            MatDenseCUDAReplaceArray_MPIDenseCUDA);CHKERRQ(ierr);
1665   m    = (Mat_MPIDense*)(B)->data;
1666   if (m->A) {
1667     ierr = MatConvert(m->A,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&m->A);CHKERRQ(ierr);
1668     ierr = MatSetUpMultiply_MPIDense(B);CHKERRQ(ierr);
1669     B->offloadmask = PETSC_OFFLOAD_BOTH;
1670   } else {
1671     B->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
1672   }
1673   ierr = MatBindToCPU_MPIDenseCUDA(B,PETSC_FALSE);CHKERRQ(ierr);
1674 
1675   B->ops->bindtocpu = MatBindToCPU_MPIDenseCUDA;
1676   PetscFunctionReturn(0);
1677 }
1678 #endif
1679 
1680 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A,PetscInt col,Vec *v)
1681 {
1682   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1683   PetscErrorCode ierr;
1684   PetscInt       lda;
1685 
1686   PetscFunctionBegin;
1687   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1688   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1689   if (!a->cvec) {
1690     ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1691   }
1692   a->vecinuse = col + 1;
1693   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1694   ierr = MatDenseGetArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1695   ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1696   *v   = a->cvec;
1697   PetscFunctionReturn(0);
1698 }
1699 
1700 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A,PetscInt col,Vec *v)
1701 {
1702   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1703   PetscErrorCode ierr;
1704 
1705   PetscFunctionBegin;
1706   if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1707   if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector");
1708   a->vecinuse = 0;
1709   ierr = MatDenseRestoreArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1710   ierr = VecResetArray(a->cvec);CHKERRQ(ierr);
1711   *v   = NULL;
1712   PetscFunctionReturn(0);
1713 }
1714 
1715 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v)
1716 {
1717   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1718   PetscErrorCode ierr;
1719   PetscInt       lda;
1720 
1721   PetscFunctionBegin;
1722   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1723   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1724   if (!a->cvec) {
1725     ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1726   }
1727   a->vecinuse = col + 1;
1728   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1729   ierr = MatDenseGetArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr);
1730   ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1731   ierr = VecLockReadPush(a->cvec);CHKERRQ(ierr);
1732   *v   = a->cvec;
1733   PetscFunctionReturn(0);
1734 }
1735 
1736 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v)
1737 {
1738   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1739   PetscErrorCode ierr;
1740 
1741   PetscFunctionBegin;
1742   if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1743   if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
1744   a->vecinuse = 0;
1745   ierr = MatDenseRestoreArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr);
1746   ierr = VecLockReadPop(a->cvec);CHKERRQ(ierr);
1747   ierr = VecResetArray(a->cvec);CHKERRQ(ierr);
1748   *v   = NULL;
1749   PetscFunctionReturn(0);
1750 }
1751 
1752 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v)
1753 {
1754   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1755   PetscErrorCode ierr;
1756   PetscInt       lda;
1757 
1758   PetscFunctionBegin;
1759   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1760   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1761   if (!a->cvec) {
1762     ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1763   }
1764   a->vecinuse = col + 1;
1765   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1766   ierr = MatDenseGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1767   ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1768   *v   = a->cvec;
1769   PetscFunctionReturn(0);
1770 }
1771 
1772 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v)
1773 {
1774   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1775   PetscErrorCode ierr;
1776 
1777   PetscFunctionBegin;
1778   if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1779   if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
1780   a->vecinuse = 0;
1781   ierr = MatDenseRestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1782   ierr = VecResetArray(a->cvec);CHKERRQ(ierr);
1783   *v   = NULL;
1784   PetscFunctionReturn(0);
1785 }
1786 
1787 PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A,PetscInt cbegin,PetscInt cend,Mat *v)
1788 {
1789   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1790   Mat_MPIDense   *c;
1791   PetscErrorCode ierr;
1792   MPI_Comm       comm;
1793   PetscBool      setup = PETSC_FALSE;
1794 
1795   PetscFunctionBegin;
1796   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1797   if (a->vecinuse) SETERRQ(comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1798   if (a->matinuse) SETERRQ(comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1799   if (!a->cmat) {
1800     setup = PETSC_TRUE;
1801     ierr = MatCreate(comm,&a->cmat);CHKERRQ(ierr);
1802     ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cmat);CHKERRQ(ierr);
1803     ierr = MatSetType(a->cmat,((PetscObject)A)->type_name);CHKERRQ(ierr);
1804     ierr = PetscLayoutReference(A->rmap,&a->cmat->rmap);CHKERRQ(ierr);
1805     ierr = PetscLayoutSetSize(a->cmat->cmap,cend-cbegin);CHKERRQ(ierr);
1806     ierr = PetscLayoutSetUp(a->cmat->cmap);CHKERRQ(ierr);
1807   } else if (cend-cbegin != a->cmat->cmap->N) {
1808     setup = PETSC_TRUE;
1809     ierr = PetscLayoutDestroy(&a->cmat->cmap);CHKERRQ(ierr);
1810     ierr = PetscLayoutCreate(comm,&a->cmat->cmap);CHKERRQ(ierr);
1811     ierr = PetscLayoutSetSize(a->cmat->cmap,cend-cbegin);CHKERRQ(ierr);
1812     ierr = PetscLayoutSetUp(a->cmat->cmap);CHKERRQ(ierr);
1813   }
1814   c = (Mat_MPIDense*)a->cmat->data;
1815   if (c->A) SETERRQ(comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1816   ierr = MatDenseGetSubMatrix(a->A,cbegin,cend,&c->A);CHKERRQ(ierr);
1817   if (setup) { /* do we really need this? */
1818     ierr = MatSetUpMultiply_MPIDense(a->cmat);CHKERRQ(ierr);
1819   }
1820   a->cmat->preallocated = PETSC_TRUE;
1821   a->cmat->assembled = PETSC_TRUE;
1822   a->matinuse = cbegin + 1;
1823   *v = a->cmat;
1824   PetscFunctionReturn(0);
1825 }
1826 
1827 PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A,Mat *v)
1828 {
1829   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1830   Mat_MPIDense   *c;
1831   PetscErrorCode ierr;
1832 
1833   PetscFunctionBegin;
1834   if (!a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetSubMatrix() first");
1835   if (!a->cmat) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal matrix");
1836   if (*v != a->cmat) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Not the matrix obtained from MatDenseGetSubMatrix()");
1837   a->matinuse = 0;
1838   c    = (Mat_MPIDense*)a->cmat->data;
1839   ierr = MatDenseRestoreSubMatrix(a->A,&c->A);CHKERRQ(ierr);
1840   *v   = NULL;
1841   PetscFunctionReturn(0);
1842 }
1843 
1844 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat)
1845 {
1846   Mat_MPIDense   *a;
1847   PetscErrorCode ierr;
1848 
1849   PetscFunctionBegin;
1850   ierr      = PetscNewLog(mat,&a);CHKERRQ(ierr);
1851   mat->data = (void*)a;
1852   ierr      = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1853 
1854   mat->insertmode = NOT_SET_VALUES;
1855 
1856   /* build cache for off array entries formed */
1857   a->donotstash = PETSC_FALSE;
1858 
1859   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);CHKERRQ(ierr);
1860 
1861   /* stuff used for matrix vector multiply */
1862   a->lvec        = 0;
1863   a->Mvctx       = 0;
1864   a->roworiented = PETSC_TRUE;
1865 
1866   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",MatDenseGetLDA_MPIDense);CHKERRQ(ierr);
1867   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",MatDenseSetLDA_MPIDense);CHKERRQ(ierr);
1868   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);CHKERRQ(ierr);
1869   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr);
1870   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",MatDenseGetArrayRead_MPIDense);CHKERRQ(ierr);
1871   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",MatDenseRestoreArrayRead_MPIDense);CHKERRQ(ierr);
1872   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",MatDenseGetArrayWrite_MPIDense);CHKERRQ(ierr);
1873   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",MatDenseRestoreArrayWrite_MPIDense);CHKERRQ(ierr);
1874   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",MatDensePlaceArray_MPIDense);CHKERRQ(ierr);
1875   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",MatDenseResetArray_MPIDense);CHKERRQ(ierr);
1876   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",MatDenseReplaceArray_MPIDense);CHKERRQ(ierr);
1877   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense);CHKERRQ(ierr);
1878   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense);CHKERRQ(ierr);
1879   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense);CHKERRQ(ierr);
1880   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense);CHKERRQ(ierr);
1881   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense);CHKERRQ(ierr);
1882   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense);CHKERRQ(ierr);
1883   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetSubMatrix_C",MatDenseGetSubMatrix_MPIDense);CHKERRQ(ierr);
1884   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreSubMatrix_C",MatDenseRestoreSubMatrix_MPIDense);CHKERRQ(ierr);
1885 #if defined(PETSC_HAVE_ELEMENTAL)
1886   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental);CHKERRQ(ierr);
1887 #endif
1888 #if defined(PETSC_HAVE_SCALAPACK)
1889   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_scalapack_C",MatConvert_Dense_ScaLAPACK);CHKERRQ(ierr);
1890 #endif
1891 #if defined(PETSC_HAVE_CUDA)
1892   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpidensecuda_C",MatConvert_MPIDense_MPIDenseCUDA);CHKERRQ(ierr);
1893 #endif
1894   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1895   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr);
1896   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr);
1897 #if defined(PETSC_HAVE_CUDA)
1898   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr);
1899   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaijcusparse_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr);
1900 #endif
1901 
1902   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",MatDenseGetColumn_MPIDense);CHKERRQ(ierr);
1903   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",MatDenseRestoreColumn_MPIDense);CHKERRQ(ierr);
1904   ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr);
1905   PetscFunctionReturn(0);
1906 }
1907 
1908 /*MC
1909    MATMPIDENSECUDA - MATMPIDENSECUDA = "mpidensecuda" - A matrix type to be used for distributed dense matrices on GPUs.
1910 
1911    Options Database Keys:
1912 . -mat_type mpidensecuda - sets the matrix type to "mpidensecuda" during a call to MatSetFromOptions()
1913 
1914   Level: beginner
1915 
1916 .seealso:
1917 
1918 M*/
1919 #if defined(PETSC_HAVE_CUDA)
1920 PETSC_EXTERN PetscErrorCode MatCreate_MPIDenseCUDA(Mat B)
1921 {
1922   PetscErrorCode ierr;
1923 
1924   PetscFunctionBegin;
1925   ierr = MatCreate_MPIDense(B);CHKERRQ(ierr);
1926   ierr = MatConvert_MPIDense_MPIDenseCUDA(B,MATMPIDENSECUDA,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr);
1927   PetscFunctionReturn(0);
1928 }
1929 #endif
1930 
1931 /*MC
1932    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1933 
1934    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1935    and MATMPIDENSE otherwise.
1936 
1937    Options Database Keys:
1938 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1939 
1940   Level: beginner
1941 
1942 
1943 .seealso: MATSEQDENSE,MATMPIDENSE,MATDENSECUDA
1944 M*/
1945 
1946 /*MC
1947    MATDENSECUDA - MATDENSECUDA = "densecuda" - A matrix type to be used for dense matrices on GPUs.
1948 
1949    This matrix type is identical to MATSEQDENSECUDA when constructed with a single process communicator,
1950    and MATMPIDENSECUDA otherwise.
1951 
1952    Options Database Keys:
1953 . -mat_type densecuda - sets the matrix type to "densecuda" during a call to MatSetFromOptions()
1954 
1955   Level: beginner
1956 
1957 .seealso: MATSEQDENSECUDA,MATMPIDENSECUDA,MATDENSE
1958 M*/
1959 
1960 /*@C
1961    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1962 
1963    Not collective
1964 
1965    Input Parameters:
1966 .  B - the matrix
1967 -  data - optional location of matrix data.  Set data=NULL for PETSc
1968    to control all matrix memory allocation.
1969 
1970    Notes:
1971    The dense format is fully compatible with standard Fortran 77
1972    storage by columns.
1973 
1974    The data input variable is intended primarily for Fortran programmers
1975    who wish to allocate their own matrix memory space.  Most users should
1976    set data=NULL.
1977 
1978    Level: intermediate
1979 
1980 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1981 @*/
1982 PetscErrorCode  MatMPIDenseSetPreallocation(Mat B,PetscScalar *data)
1983 {
1984   PetscErrorCode ierr;
1985 
1986   PetscFunctionBegin;
1987   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
1988   ierr = PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));CHKERRQ(ierr);
1989   PetscFunctionReturn(0);
1990 }
1991 
1992 /*@
1993    MatDensePlaceArray - Allows one to replace the array in a dense matrix with an
1994    array provided by the user. This is useful to avoid copying an array
1995    into a matrix
1996 
1997    Not Collective
1998 
1999    Input Parameters:
2000 +  mat - the matrix
2001 -  array - the array in column major order
2002 
2003    Notes:
2004    You can return to the original array with a call to MatDenseResetArray(). The user is responsible for freeing this array; it will not be
2005    freed when the matrix is destroyed.
2006 
2007    Level: developer
2008 
2009 .seealso: MatDenseGetArray(), MatDenseResetArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()
2010 
2011 @*/
2012 PetscErrorCode  MatDensePlaceArray(Mat mat,const PetscScalar *array)
2013 {
2014   PetscErrorCode ierr;
2015 
2016   PetscFunctionBegin;
2017   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2018   ierr = PetscUseMethod(mat,"MatDensePlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr);
2019   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2020 #if defined(PETSC_HAVE_CUDA)
2021   mat->offloadmask = PETSC_OFFLOAD_CPU;
2022 #endif
2023   PetscFunctionReturn(0);
2024 }
2025 
2026 /*@
2027    MatDenseResetArray - Resets the matrix array to that it previously had before the call to MatDensePlaceArray()
2028 
2029    Not Collective
2030 
2031    Input Parameters:
2032 .  mat - the matrix
2033 
2034    Notes:
2035    You can only call this after a call to MatDensePlaceArray()
2036 
2037    Level: developer
2038 
2039 .seealso: MatDenseGetArray(), MatDensePlaceArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()
2040 
2041 @*/
2042 PetscErrorCode  MatDenseResetArray(Mat mat)
2043 {
2044   PetscErrorCode ierr;
2045 
2046   PetscFunctionBegin;
2047   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2048   ierr = PetscUseMethod(mat,"MatDenseResetArray_C",(Mat),(mat));CHKERRQ(ierr);
2049   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2050   PetscFunctionReturn(0);
2051 }
2052 
2053 /*@
2054    MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an
2055    array provided by the user. This is useful to avoid copying an array
2056    into a matrix
2057 
2058    Not Collective
2059 
2060    Input Parameters:
2061 +  mat - the matrix
2062 -  array - the array in column major order
2063 
2064    Notes:
2065    The memory passed in MUST be obtained with PetscMalloc() and CANNOT be
2066    freed by the user. It will be freed when the matrix is destroyed.
2067 
2068    Level: developer
2069 
2070 .seealso: MatDenseGetArray(), VecReplaceArray()
2071 @*/
2072 PetscErrorCode  MatDenseReplaceArray(Mat mat,const PetscScalar *array)
2073 {
2074   PetscErrorCode ierr;
2075 
2076   PetscFunctionBegin;
2077   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2078   ierr = PetscUseMethod(mat,"MatDenseReplaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr);
2079   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2080 #if defined(PETSC_HAVE_CUDA)
2081   mat->offloadmask = PETSC_OFFLOAD_CPU;
2082 #endif
2083   PetscFunctionReturn(0);
2084 }
2085 
2086 #if defined(PETSC_HAVE_CUDA)
2087 /*@C
2088    MatDenseCUDAPlaceArray - Allows one to replace the GPU array in a dense matrix with an
2089    array provided by the user. This is useful to avoid copying an array
2090    into a matrix
2091 
2092    Not Collective
2093 
2094    Input Parameters:
2095 +  mat - the matrix
2096 -  array - the array in column major order
2097 
2098    Notes:
2099    You can return to the original array with a call to MatDenseCUDAResetArray(). The user is responsible for freeing this array; it will not be
2100    freed when the matrix is destroyed. The array must have been allocated with cudaMalloc().
2101 
2102    Level: developer
2103 
2104 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAResetArray()
2105 @*/
2106 PetscErrorCode  MatDenseCUDAPlaceArray(Mat mat,const PetscScalar *array)
2107 {
2108   PetscErrorCode ierr;
2109 
2110   PetscFunctionBegin;
2111   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2112   ierr = PetscUseMethod(mat,"MatDenseCUDAPlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr);
2113   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2114   mat->offloadmask = PETSC_OFFLOAD_GPU;
2115   PetscFunctionReturn(0);
2116 }
2117 
2118 /*@C
2119    MatDenseCUDAResetArray - Resets the matrix array to that it previously had before the call to MatDenseCUDAPlaceArray()
2120 
2121    Not Collective
2122 
2123    Input Parameters:
2124 .  mat - the matrix
2125 
2126    Notes:
2127    You can only call this after a call to MatDenseCUDAPlaceArray()
2128 
2129    Level: developer
2130 
2131 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAPlaceArray()
2132 
2133 @*/
2134 PetscErrorCode  MatDenseCUDAResetArray(Mat mat)
2135 {
2136   PetscErrorCode ierr;
2137 
2138   PetscFunctionBegin;
2139   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2140   ierr = PetscUseMethod(mat,"MatDenseCUDAResetArray_C",(Mat),(mat));CHKERRQ(ierr);
2141   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2142   PetscFunctionReturn(0);
2143 }
2144 
2145 /*@C
2146    MatDenseCUDAReplaceArray - Allows one to replace the GPU array in a dense matrix with an
2147    array provided by the user. This is useful to avoid copying an array
2148    into a matrix
2149 
2150    Not Collective
2151 
2152    Input Parameters:
2153 +  mat - the matrix
2154 -  array - the array in column major order
2155 
2156    Notes:
2157    This permanently replaces the GPU array and frees the memory associated with the old GPU array.
2158    The memory passed in CANNOT be freed by the user. It will be freed
2159    when the matrix is destroyed. The array should respect the matrix leading dimension.
2160 
2161    Level: developer
2162 
2163 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAPlaceArray(), MatDenseCUDAResetArray()
2164 @*/
2165 PetscErrorCode  MatDenseCUDAReplaceArray(Mat mat,const PetscScalar *array)
2166 {
2167   PetscErrorCode ierr;
2168 
2169   PetscFunctionBegin;
2170   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2171   ierr = PetscUseMethod(mat,"MatDenseCUDAReplaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr);
2172   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2173   mat->offloadmask = PETSC_OFFLOAD_GPU;
2174   PetscFunctionReturn(0);
2175 }
2176 
2177 /*@C
2178    MatDenseCUDAGetArrayWrite - Provides write access to the CUDA buffer inside a dense matrix.
2179 
2180    Not Collective
2181 
2182    Input Parameters:
2183 .  A - the matrix
2184 
2185    Output Parameters
2186 .  array - the GPU array in column major order
2187 
2188    Notes:
2189    The data on the GPU may not be updated due to operations done on the CPU. If you need updated data, use MatDenseCUDAGetArray(). The array must be restored with MatDenseCUDARestoreArrayWrite() when no longer needed.
2190 
2191    Level: developer
2192 
2193 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayRead(), MatDenseCUDARestoreArrayRead()
2194 @*/
2195 PetscErrorCode MatDenseCUDAGetArrayWrite(Mat A, PetscScalar **a)
2196 {
2197   PetscErrorCode ierr;
2198 
2199   PetscFunctionBegin;
2200   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2201   ierr = PetscUseMethod(A,"MatDenseCUDAGetArrayWrite_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr);
2202   ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr);
2203   PetscFunctionReturn(0);
2204 }
2205 
2206 /*@C
2207    MatDenseCUDARestoreArrayWrite - Restore write access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArrayWrite().
2208 
2209    Not Collective
2210 
2211    Input Parameters:
2212 +  A - the matrix
2213 -  array - the GPU array in column major order
2214 
2215    Notes:
2216 
2217    Level: developer
2218 
2219 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead(), MatDenseCUDAGetArrayRead()
2220 @*/
2221 PetscErrorCode MatDenseCUDARestoreArrayWrite(Mat A, PetscScalar **a)
2222 {
2223   PetscErrorCode ierr;
2224 
2225   PetscFunctionBegin;
2226   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2227   ierr = PetscUseMethod(A,"MatDenseCUDARestoreArrayWrite_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr);
2228   ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr);
2229   A->offloadmask = PETSC_OFFLOAD_GPU;
2230   PetscFunctionReturn(0);
2231 }
2232 
2233 /*@C
2234    MatDenseCUDAGetArrayRead - Provides read-only access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArrayRead() when no longer needed.
2235 
2236    Not Collective
2237 
2238    Input Parameters:
2239 .  A - the matrix
2240 
2241    Output Parameters
2242 .  array - the GPU array in column major order
2243 
2244    Notes:
2245    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite().
2246 
2247    Level: developer
2248 
2249 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead()
2250 @*/
2251 PetscErrorCode MatDenseCUDAGetArrayRead(Mat A, const PetscScalar **a)
2252 {
2253   PetscErrorCode ierr;
2254 
2255   PetscFunctionBegin;
2256   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2257   ierr = PetscUseMethod(A,"MatDenseCUDAGetArrayRead_C",(Mat,const PetscScalar**),(A,a));CHKERRQ(ierr);
2258   PetscFunctionReturn(0);
2259 }
2260 
2261 /*@C
2262    MatDenseCUDARestoreArrayRead - Restore read-only access to the CUDA buffer inside a dense matrix previously obtained with a call to MatDenseCUDAGetArrayRead().
2263 
2264    Not Collective
2265 
2266    Input Parameters:
2267 +  A - the matrix
2268 -  array - the GPU array in column major order
2269 
2270    Notes:
2271    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite().
2272 
2273    Level: developer
2274 
2275 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDAGetArrayRead()
2276 @*/
2277 PetscErrorCode MatDenseCUDARestoreArrayRead(Mat A, const PetscScalar **a)
2278 {
2279   PetscErrorCode ierr;
2280 
2281   PetscFunctionBegin;
2282   ierr = PetscUseMethod(A,"MatDenseCUDARestoreArrayRead_C",(Mat,const PetscScalar**),(A,a));CHKERRQ(ierr);
2283   PetscFunctionReturn(0);
2284 }
2285 
2286 /*@C
2287    MatDenseCUDAGetArray - Provides access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArray() when no longer needed.
2288 
2289    Not Collective
2290 
2291    Input Parameters:
2292 .  A - the matrix
2293 
2294    Output Parameters
2295 .  array - the GPU array in column major order
2296 
2297    Notes:
2298    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). For read-only access, use MatDenseCUDAGetArrayRead().
2299 
2300    Level: developer
2301 
2302 .seealso: MatDenseCUDAGetArrayRead(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead()
2303 @*/
2304 PetscErrorCode MatDenseCUDAGetArray(Mat A, PetscScalar **a)
2305 {
2306   PetscErrorCode ierr;
2307 
2308   PetscFunctionBegin;
2309   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2310   ierr = PetscUseMethod(A,"MatDenseCUDAGetArray_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr);
2311   ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr);
2312   PetscFunctionReturn(0);
2313 }
2314 
2315 /*@C
2316    MatDenseCUDARestoreArray - Restore access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArray().
2317 
2318    Not Collective
2319 
2320    Input Parameters:
2321 +  A - the matrix
2322 -  array - the GPU array in column major order
2323 
2324    Notes:
2325 
2326    Level: developer
2327 
2328 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead(), MatDenseCUDAGetArrayRead()
2329 @*/
2330 PetscErrorCode MatDenseCUDARestoreArray(Mat A, PetscScalar **a)
2331 {
2332   PetscErrorCode ierr;
2333 
2334   PetscFunctionBegin;
2335   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2336   ierr = PetscUseMethod(A,"MatDenseCUDARestoreArray_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr);
2337   ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr);
2338   A->offloadmask = PETSC_OFFLOAD_GPU;
2339   PetscFunctionReturn(0);
2340 }
2341 #endif
2342 
2343 /*@C
2344    MatCreateDense - Creates a matrix in dense format.
2345 
2346    Collective
2347 
2348    Input Parameters:
2349 +  comm - MPI communicator
2350 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2351 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2352 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
2353 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
2354 -  data - optional location of matrix data.  Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
2355    to control all matrix memory allocation.
2356 
2357    Output Parameter:
2358 .  A - the matrix
2359 
2360    Notes:
2361    The dense format is fully compatible with standard Fortran 77
2362    storage by columns.
2363 
2364    The data input variable is intended primarily for Fortran programmers
2365    who wish to allocate their own matrix memory space.  Most users should
2366    set data=NULL (PETSC_NULL_SCALAR for Fortran users).
2367 
2368    The user MUST specify either the local or global matrix dimensions
2369    (possibly both).
2370 
2371    Level: intermediate
2372 
2373 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
2374 @*/
2375 PetscErrorCode  MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
2376 {
2377   PetscErrorCode ierr;
2378   PetscMPIInt    size;
2379 
2380   PetscFunctionBegin;
2381   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2382   PetscValidLogicalCollectiveBool(*A,!!data,6);
2383   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2384   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2385   if (size > 1) {
2386     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
2387     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
2388     if (data) {  /* user provided data array, so no need to assemble */
2389       ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr);
2390       (*A)->assembled = PETSC_TRUE;
2391     }
2392   } else {
2393     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
2394     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
2395   }
2396   PetscFunctionReturn(0);
2397 }
2398 
2399 #if defined(PETSC_HAVE_CUDA)
2400 /*@C
2401    MatCreateDenseCUDA - Creates a matrix in dense format using CUDA.
2402 
2403    Collective
2404 
2405    Input Parameters:
2406 +  comm - MPI communicator
2407 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2408 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2409 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
2410 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
2411 -  data - optional location of GPU matrix data.  Set data=NULL for PETSc
2412    to control matrix memory allocation.
2413 
2414    Output Parameter:
2415 .  A - the matrix
2416 
2417    Notes:
2418 
2419    Level: intermediate
2420 
2421 .seealso: MatCreate(), MatCreateDense()
2422 @*/
2423 PetscErrorCode  MatCreateDenseCUDA(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
2424 {
2425   PetscErrorCode ierr;
2426   PetscMPIInt    size;
2427 
2428   PetscFunctionBegin;
2429   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2430   PetscValidLogicalCollectiveBool(*A,!!data,6);
2431   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2432   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2433   if (size > 1) {
2434     ierr = MatSetType(*A,MATMPIDENSECUDA);CHKERRQ(ierr);
2435     ierr = MatMPIDenseCUDASetPreallocation(*A,data);CHKERRQ(ierr);
2436     if (data) {  /* user provided data array, so no need to assemble */
2437       ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr);
2438       (*A)->assembled = PETSC_TRUE;
2439     }
2440   } else {
2441     ierr = MatSetType(*A,MATSEQDENSECUDA);CHKERRQ(ierr);
2442     ierr = MatSeqDenseCUDASetPreallocation(*A,data);CHKERRQ(ierr);
2443   }
2444   PetscFunctionReturn(0);
2445 }
2446 #endif
2447 
2448 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
2449 {
2450   Mat            mat;
2451   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;
2452   PetscErrorCode ierr;
2453 
2454   PetscFunctionBegin;
2455   *newmat = 0;
2456   ierr    = MatCreate(PetscObjectComm((PetscObject)A),&mat);CHKERRQ(ierr);
2457   ierr    = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
2458   ierr    = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr);
2459   a       = (Mat_MPIDense*)mat->data;
2460 
2461   mat->factortype   = A->factortype;
2462   mat->assembled    = PETSC_TRUE;
2463   mat->preallocated = PETSC_TRUE;
2464 
2465   mat->insertmode = NOT_SET_VALUES;
2466   a->donotstash   = oldmat->donotstash;
2467 
2468   ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr);
2469   ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr);
2470 
2471   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2472   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2473   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
2474 
2475   *newmat = mat;
2476   PetscFunctionReturn(0);
2477 }
2478 
2479 PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer)
2480 {
2481   PetscErrorCode ierr;
2482   PetscBool      isbinary;
2483 #if defined(PETSC_HAVE_HDF5)
2484   PetscBool      ishdf5;
2485 #endif
2486 
2487   PetscFunctionBegin;
2488   PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
2489   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
2490   /* force binary viewer to load .info file if it has not yet done so */
2491   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2492   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
2493 #if defined(PETSC_HAVE_HDF5)
2494   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5);CHKERRQ(ierr);
2495 #endif
2496   if (isbinary) {
2497     ierr = MatLoad_Dense_Binary(newMat,viewer);CHKERRQ(ierr);
2498 #if defined(PETSC_HAVE_HDF5)
2499   } else if (ishdf5) {
2500     ierr = MatLoad_Dense_HDF5(newMat,viewer);CHKERRQ(ierr);
2501 #endif
2502   } else SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
2503   PetscFunctionReturn(0);
2504 }
2505 
2506 static PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool  *flag)
2507 {
2508   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
2509   Mat            a,b;
2510   PetscBool      flg;
2511   PetscErrorCode ierr;
2512 
2513   PetscFunctionBegin;
2514   a    = matA->A;
2515   b    = matB->A;
2516   ierr = MatEqual(a,b,&flg);CHKERRQ(ierr);
2517   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2518   PetscFunctionReturn(0);
2519 }
2520 
2521 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data)
2522 {
2523   PetscErrorCode        ierr;
2524   Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data;
2525 
2526   PetscFunctionBegin;
2527   ierr = PetscFree2(atb->sendbuf,atb->recvcounts);CHKERRQ(ierr);
2528   ierr = MatDestroy(&atb->atb);CHKERRQ(ierr);
2529   ierr = PetscFree(atb);CHKERRQ(ierr);
2530   PetscFunctionReturn(0);
2531 }
2532 
2533 PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data)
2534 {
2535   PetscErrorCode        ierr;
2536   Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data;
2537 
2538   PetscFunctionBegin;
2539   ierr = PetscFree2(abt->buf[0],abt->buf[1]);CHKERRQ(ierr);
2540   ierr = PetscFree2(abt->recvcounts,abt->recvdispls);CHKERRQ(ierr);
2541   ierr = PetscFree(abt);CHKERRQ(ierr);
2542   PetscFunctionReturn(0);
2543 }
2544 
2545 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
2546 {
2547   Mat_MPIDense          *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2548   Mat_TransMatMultDense *atb;
2549   PetscErrorCode        ierr;
2550   MPI_Comm              comm;
2551   PetscMPIInt           size,*recvcounts;
2552   PetscScalar           *carray,*sendbuf;
2553   const PetscScalar     *atbarray;
2554   PetscInt              i,cN=C->cmap->N,cM=C->rmap->N,proc,k,j;
2555   const PetscInt        *ranges;
2556 
2557   PetscFunctionBegin;
2558   MatCheckProduct(C,3);
2559   if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2560   atb = (Mat_TransMatMultDense *)C->product->data;
2561   recvcounts = atb->recvcounts;
2562   sendbuf = atb->sendbuf;
2563 
2564   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2565   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2566 
2567   /* compute atbarray = aseq^T * bseq */
2568   ierr = MatTransposeMatMult(a->A,b->A,atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX,PETSC_DEFAULT,&atb->atb);CHKERRQ(ierr);
2569 
2570   ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr);
2571 
2572   /* arrange atbarray into sendbuf */
2573   ierr = MatDenseGetArrayRead(atb->atb,&atbarray);CHKERRQ(ierr);
2574   for (proc=0, k=0; proc<size; proc++) {
2575     for (j=0; j<cN; j++) {
2576       for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*cM];
2577     }
2578   }
2579   ierr = MatDenseRestoreArrayRead(atb->atb,&atbarray);CHKERRQ(ierr);
2580 
2581   /* sum all atbarray to local values of C */
2582   ierr = MatDenseGetArrayWrite(c->A,&carray);CHKERRQ(ierr);
2583   ierr = MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm);CHKERRQ(ierr);
2584   ierr = MatDenseRestoreArrayWrite(c->A,&carray);CHKERRQ(ierr);
2585   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2586   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2587   PetscFunctionReturn(0);
2588 }
2589 
2590 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C)
2591 {
2592   PetscErrorCode        ierr;
2593   MPI_Comm              comm;
2594   PetscMPIInt           size;
2595   PetscInt              cm=A->cmap->n,cM,cN=B->cmap->N;
2596   Mat_TransMatMultDense *atb;
2597   PetscBool             cisdense;
2598   PetscInt              i;
2599   const PetscInt        *ranges;
2600 
2601   PetscFunctionBegin;
2602   MatCheckProduct(C,3);
2603   if (C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
2604   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2605   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
2606     SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2607   }
2608 
2609   /* create matrix product C */
2610   ierr = MatSetSizes(C,cm,B->cmap->n,A->cmap->N,B->cmap->N);CHKERRQ(ierr);
2611   ierr = PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"");CHKERRQ(ierr);
2612   if (!cisdense) {
2613     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
2614   }
2615   ierr = MatSetUp(C);CHKERRQ(ierr);
2616 
2617   /* create data structure for reuse C */
2618   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2619   ierr = PetscNew(&atb);CHKERRQ(ierr);
2620   cM   = C->rmap->N;
2621   ierr = PetscMalloc2((size_t)cM*(size_t)cN,&atb->sendbuf,size,&atb->recvcounts);CHKERRQ(ierr);
2622   ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr);
2623   for (i=0; i<size; i++) atb->recvcounts[i] = (ranges[i+1] - ranges[i])*cN;
2624 
2625   C->product->data    = atb;
2626   C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
2627   PetscFunctionReturn(0);
2628 }
2629 
2630 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2631 {
2632   PetscErrorCode        ierr;
2633   MPI_Comm              comm;
2634   PetscMPIInt           i, size;
2635   PetscInt              maxRows, bufsiz;
2636   PetscMPIInt           tag;
2637   PetscInt              alg;
2638   Mat_MatTransMultDense *abt;
2639   Mat_Product           *product = C->product;
2640   PetscBool             flg;
2641 
2642   PetscFunctionBegin;
2643   MatCheckProduct(C,4);
2644   if (C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
2645   /* check local size of A and B */
2646   if (A->cmap->n != B->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local column dimensions are incompatible, A (%D) != B (%D)",A->cmap->n,B->cmap->n);
2647 
2648   ierr = PetscStrcmp(product->alg,"allgatherv",&flg);CHKERRQ(ierr);
2649   alg  = flg ? 0 : 1;
2650 
2651   /* setup matrix product C */
2652   ierr = MatSetSizes(C,A->rmap->n,B->rmap->n,A->rmap->N,B->rmap->N);CHKERRQ(ierr);
2653   ierr = MatSetType(C,MATMPIDENSE);CHKERRQ(ierr);
2654   ierr = MatSetUp(C);CHKERRQ(ierr);
2655   ierr = PetscObjectGetNewTag((PetscObject)C,&tag);CHKERRQ(ierr);
2656 
2657   /* create data structure for reuse C */
2658   ierr = PetscObjectGetComm((PetscObject)C,&comm);CHKERRQ(ierr);
2659   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2660   ierr = PetscNew(&abt);CHKERRQ(ierr);
2661   abt->tag = tag;
2662   abt->alg = alg;
2663   switch (alg) {
2664   case 1: /* alg: "cyclic" */
2665     for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i]));
2666     bufsiz = A->cmap->N * maxRows;
2667     ierr = PetscMalloc2(bufsiz,&(abt->buf[0]),bufsiz,&(abt->buf[1]));CHKERRQ(ierr);
2668     break;
2669   default: /* alg: "allgatherv" */
2670     ierr = PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1]));CHKERRQ(ierr);
2671     ierr = PetscMalloc2(size,&(abt->recvcounts),size+1,&(abt->recvdispls));CHKERRQ(ierr);
2672     for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N;
2673     for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i];
2674     break;
2675   }
2676 
2677   C->product->data    = abt;
2678   C->product->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense;
2679   C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense;
2680   PetscFunctionReturn(0);
2681 }
2682 
2683 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C)
2684 {
2685   Mat_MPIDense          *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2686   Mat_MatTransMultDense *abt;
2687   PetscErrorCode        ierr;
2688   MPI_Comm              comm;
2689   PetscMPIInt           rank,size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom;
2690   PetscScalar           *sendbuf, *recvbuf=0, *cv;
2691   PetscInt              i,cK=A->cmap->N,k,j,bn;
2692   PetscScalar           _DOne=1.0,_DZero=0.0;
2693   const PetscScalar     *av,*bv;
2694   PetscBLASInt          cm, cn, ck, alda, blda, clda;
2695   MPI_Request           reqs[2];
2696   const PetscInt        *ranges;
2697 
2698   PetscFunctionBegin;
2699   MatCheckProduct(C,3);
2700   if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2701   abt  = (Mat_MatTransMultDense*)C->product->data;
2702   ierr = PetscObjectGetComm((PetscObject)C,&comm);CHKERRQ(ierr);
2703   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2704   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2705   ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr);
2706   ierr = MatDenseGetArrayRead(b->A,&bv);CHKERRQ(ierr);
2707   ierr = MatDenseGetArrayWrite(c->A,&cv);CHKERRQ(ierr);
2708   ierr = MatDenseGetLDA(a->A,&i);CHKERRQ(ierr);
2709   ierr = PetscBLASIntCast(i,&alda);CHKERRQ(ierr);
2710   ierr = MatDenseGetLDA(b->A,&i);CHKERRQ(ierr);
2711   ierr = PetscBLASIntCast(i,&blda);CHKERRQ(ierr);
2712   ierr = MatDenseGetLDA(c->A,&i);CHKERRQ(ierr);
2713   ierr = PetscBLASIntCast(i,&clda);CHKERRQ(ierr);
2714   ierr = MatGetOwnershipRanges(B,&ranges);CHKERRQ(ierr);
2715   bn   = B->rmap->n;
2716   if (blda == bn) {
2717     sendbuf = (PetscScalar*)bv;
2718   } else {
2719     sendbuf = abt->buf[0];
2720     for (k = 0, i = 0; i < cK; i++) {
2721       for (j = 0; j < bn; j++, k++) {
2722         sendbuf[k] = bv[i * blda + j];
2723       }
2724     }
2725   }
2726   if (size > 1) {
2727     sendto = (rank + size - 1) % size;
2728     recvfrom = (rank + size + 1) % size;
2729   } else {
2730     sendto = recvfrom = 0;
2731   }
2732   ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr);
2733   ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr);
2734   recvisfrom = rank;
2735   for (i = 0; i < size; i++) {
2736     /* we have finished receiving in sending, bufs can be read/modified */
2737     PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */
2738     PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom];
2739 
2740     if (nextrecvisfrom != rank) {
2741       /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */
2742       sendsiz = cK * bn;
2743       recvsiz = cK * nextbn;
2744       recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1];
2745       ierr = MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]);CHKERRQ(ierr);
2746       ierr = MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]);CHKERRQ(ierr);
2747     }
2748 
2749     /* local aseq * sendbuf^T */
2750     ierr = PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn);CHKERRQ(ierr);
2751     if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","T",&cm,&cn,&ck,&_DOne,av,&alda,sendbuf,&cn,&_DZero,cv + clda * ranges[recvisfrom],&clda));
2752 
2753     if (nextrecvisfrom != rank) {
2754       /* wait for the sends and receives to complete, swap sendbuf and recvbuf */
2755       ierr = MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2756     }
2757     bn = nextbn;
2758     recvisfrom = nextrecvisfrom;
2759     sendbuf = recvbuf;
2760   }
2761   ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr);
2762   ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr);
2763   ierr = MatDenseRestoreArrayWrite(c->A,&cv);CHKERRQ(ierr);
2764   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2765   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2766   PetscFunctionReturn(0);
2767 }
2768 
2769 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C)
2770 {
2771   Mat_MPIDense          *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2772   Mat_MatTransMultDense *abt;
2773   PetscErrorCode        ierr;
2774   MPI_Comm              comm;
2775   PetscMPIInt           size;
2776   PetscScalar           *cv, *sendbuf, *recvbuf;
2777   const PetscScalar     *av,*bv;
2778   PetscInt              blda,i,cK=A->cmap->N,k,j,bn;
2779   PetscScalar           _DOne=1.0,_DZero=0.0;
2780   PetscBLASInt          cm, cn, ck, alda, clda;
2781 
2782   PetscFunctionBegin;
2783   MatCheckProduct(C,3);
2784   if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2785   abt  = (Mat_MatTransMultDense*)C->product->data;
2786   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2787   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2788   ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr);
2789   ierr = MatDenseGetArrayRead(b->A,&bv);CHKERRQ(ierr);
2790   ierr = MatDenseGetArrayWrite(c->A,&cv);CHKERRQ(ierr);
2791   ierr = MatDenseGetLDA(a->A,&i);CHKERRQ(ierr);
2792   ierr = PetscBLASIntCast(i,&alda);CHKERRQ(ierr);
2793   ierr = MatDenseGetLDA(b->A,&blda);CHKERRQ(ierr);
2794   ierr = MatDenseGetLDA(c->A,&i);CHKERRQ(ierr);
2795   ierr = PetscBLASIntCast(i,&clda);CHKERRQ(ierr);
2796   /* copy transpose of B into buf[0] */
2797   bn      = B->rmap->n;
2798   sendbuf = abt->buf[0];
2799   recvbuf = abt->buf[1];
2800   for (k = 0, j = 0; j < bn; j++) {
2801     for (i = 0; i < cK; i++, k++) {
2802       sendbuf[k] = bv[i * blda + j];
2803     }
2804   }
2805   ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr);
2806   ierr = MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm);CHKERRQ(ierr);
2807   ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr);
2808   ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr);
2809   ierr = PetscBLASIntCast(c->A->cmap->n,&cn);CHKERRQ(ierr);
2810   if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&cm,&cn,&ck,&_DOne,av,&alda,recvbuf,&ck,&_DZero,cv,&clda));
2811   ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr);
2812   ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr);
2813   ierr = MatDenseRestoreArrayWrite(c->A,&cv);CHKERRQ(ierr);
2814   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2815   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2816   PetscFunctionReturn(0);
2817 }
2818 
2819 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2820 {
2821   Mat_MatTransMultDense *abt;
2822   PetscErrorCode        ierr;
2823 
2824   PetscFunctionBegin;
2825   MatCheckProduct(C,3);
2826   if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2827   abt = (Mat_MatTransMultDense*)C->product->data;
2828   switch (abt->alg) {
2829   case 1:
2830     ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C);CHKERRQ(ierr);
2831     break;
2832   default:
2833     ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C);CHKERRQ(ierr);
2834     break;
2835   }
2836   PetscFunctionReturn(0);
2837 }
2838 
2839 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data)
2840 {
2841   PetscErrorCode   ierr;
2842   Mat_MatMultDense *ab = (Mat_MatMultDense*)data;
2843 
2844   PetscFunctionBegin;
2845   ierr = MatDestroy(&ab->Ce);CHKERRQ(ierr);
2846   ierr = MatDestroy(&ab->Ae);CHKERRQ(ierr);
2847   ierr = MatDestroy(&ab->Be);CHKERRQ(ierr);
2848   ierr = PetscFree(ab);CHKERRQ(ierr);
2849   PetscFunctionReturn(0);
2850 }
2851 
2852 #if defined(PETSC_HAVE_ELEMENTAL)
2853 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
2854 {
2855   PetscErrorCode   ierr;
2856   Mat_MatMultDense *ab;
2857 
2858   PetscFunctionBegin;
2859   MatCheckProduct(C,3);
2860   if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Missing product data");
2861   ab   = (Mat_MatMultDense*)C->product->data;
2862   ierr = MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae);CHKERRQ(ierr);
2863   ierr = MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be);CHKERRQ(ierr);
2864   ierr = MatMatMultNumeric_Elemental(ab->Ae,ab->Be,ab->Ce);CHKERRQ(ierr);
2865   ierr = MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
2866   PetscFunctionReturn(0);
2867 }
2868 
2869 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C)
2870 {
2871   PetscErrorCode   ierr;
2872   Mat              Ae,Be,Ce;
2873   Mat_MatMultDense *ab;
2874 
2875   PetscFunctionBegin;
2876   MatCheckProduct(C,4);
2877   if (C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
2878   /* check local size of A and B */
2879   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
2880     SETERRQ4(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2881   }
2882 
2883   /* create elemental matrices Ae and Be */
2884   ierr = MatCreate(PetscObjectComm((PetscObject)A), &Ae);CHKERRQ(ierr);
2885   ierr = MatSetSizes(Ae,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
2886   ierr = MatSetType(Ae,MATELEMENTAL);CHKERRQ(ierr);
2887   ierr = MatSetUp(Ae);CHKERRQ(ierr);
2888   ierr = MatSetOption(Ae,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2889 
2890   ierr = MatCreate(PetscObjectComm((PetscObject)B), &Be);CHKERRQ(ierr);
2891   ierr = MatSetSizes(Be,PETSC_DECIDE,PETSC_DECIDE,B->rmap->N,B->cmap->N);CHKERRQ(ierr);
2892   ierr = MatSetType(Be,MATELEMENTAL);CHKERRQ(ierr);
2893   ierr = MatSetUp(Be);CHKERRQ(ierr);
2894   ierr = MatSetOption(Be,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2895 
2896   /* compute symbolic Ce = Ae*Be */
2897   ierr = MatCreate(PetscObjectComm((PetscObject)C),&Ce);CHKERRQ(ierr);
2898   ierr = MatMatMultSymbolic_Elemental(Ae,Be,fill,Ce);CHKERRQ(ierr);
2899 
2900   /* setup C */
2901   ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2902   ierr = MatSetType(C,MATDENSE);CHKERRQ(ierr);
2903   ierr = MatSetUp(C);CHKERRQ(ierr);
2904 
2905   /* create data structure for reuse Cdense */
2906   ierr = PetscNew(&ab);CHKERRQ(ierr);
2907   ab->Ae = Ae;
2908   ab->Be = Be;
2909   ab->Ce = Ce;
2910 
2911   C->product->data    = ab;
2912   C->product->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense;
2913   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
2914   PetscFunctionReturn(0);
2915 }
2916 #endif
2917 /* ----------------------------------------------- */
2918 #if defined(PETSC_HAVE_ELEMENTAL)
2919 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C)
2920 {
2921   PetscFunctionBegin;
2922   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense;
2923   C->ops->productsymbolic = MatProductSymbolic_AB;
2924   PetscFunctionReturn(0);
2925 }
2926 #endif
2927 
2928 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C)
2929 {
2930   Mat_Product *product = C->product;
2931   Mat         A = product->A,B=product->B;
2932 
2933   PetscFunctionBegin;
2934   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend)
2935     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2936   C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense;
2937   C->ops->productsymbolic = MatProductSymbolic_AtB;
2938   PetscFunctionReturn(0);
2939 }
2940 
2941 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C)
2942 {
2943   PetscErrorCode ierr;
2944   Mat_Product    *product = C->product;
2945   const char     *algTypes[2] = {"allgatherv","cyclic"};
2946   PetscInt       alg,nalg = 2;
2947   PetscBool      flg = PETSC_FALSE;
2948 
2949   PetscFunctionBegin;
2950   /* Set default algorithm */
2951   alg = 0; /* default is allgatherv */
2952   ierr = PetscStrcmp(product->alg,"default",&flg);CHKERRQ(ierr);
2953   if (flg) {
2954     ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr);
2955   }
2956 
2957   /* Get runtime option */
2958   if (product->api_user) {
2959     ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatTransposeMult","Mat");CHKERRQ(ierr);
2960     ierr = PetscOptionsEList("-matmattransmult_mpidense_mpidense_via","Algorithmic approach","MatMatTransposeMult",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr);
2961     ierr = PetscOptionsEnd();CHKERRQ(ierr);
2962   } else {
2963     ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_ABt","Mat");CHKERRQ(ierr);
2964     ierr = PetscOptionsEList("-matproduct_abt_mpidense_mpidense_via","Algorithmic approach","MatProduct_ABt",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr);
2965     ierr = PetscOptionsEnd();CHKERRQ(ierr);
2966   }
2967   if (flg) {
2968     ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr);
2969   }
2970 
2971   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense;
2972   C->ops->productsymbolic          = MatProductSymbolic_ABt;
2973   PetscFunctionReturn(0);
2974 }
2975 
2976 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C)
2977 {
2978   PetscErrorCode ierr;
2979   Mat_Product    *product = C->product;
2980 
2981   PetscFunctionBegin;
2982   switch (product->type) {
2983 #if defined(PETSC_HAVE_ELEMENTAL)
2984   case MATPRODUCT_AB:
2985     ierr = MatProductSetFromOptions_MPIDense_AB(C);CHKERRQ(ierr);
2986     break;
2987 #endif
2988   case MATPRODUCT_AtB:
2989     ierr = MatProductSetFromOptions_MPIDense_AtB(C);CHKERRQ(ierr);
2990     break;
2991   case MATPRODUCT_ABt:
2992     ierr = MatProductSetFromOptions_MPIDense_ABt(C);CHKERRQ(ierr);
2993     break;
2994   default:
2995     break;
2996   }
2997   PetscFunctionReturn(0);
2998 }
2999