xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision a873a8cd69acc6fd9b12ad3d6b30ee1bf0a81da9)
1 
2 /*
3    Basic functions for basic parallel dense matrices.
4 */
5 
6 #include <../src/mat/impls/dense/mpi/mpidense.h>    /*I   "petscmat.h"  I*/
7 #include <../src/mat/impls/aij/mpi/mpiaij.h>
8 #include <petscblaslapack.h>
9 
10 /*@
11 
12       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
13               matrix that represents the operator. For sequential matrices it returns itself.
14 
15     Input Parameter:
16 .      A - the Seq or MPI dense matrix
17 
18     Output Parameter:
19 .      B - the inner matrix
20 
21     Level: intermediate
22 
23 @*/
24 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
25 {
26   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
27   PetscErrorCode ierr;
28   PetscBool      flg;
29 
30   PetscFunctionBegin;
31   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
32   PetscValidPointer(B,2);
33   ierr = PetscObjectBaseTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr);
34   if (flg) *B = mat->A;
35   else {
36     ierr = PetscObjectBaseTypeCompare((PetscObject)A,MATSEQDENSE,&flg);CHKERRQ(ierr);
37     if (!flg) SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Not for matrix type %s",((PetscObject)A)->type_name);
38     *B = A;
39   }
40   PetscFunctionReturn(0);
41 }
42 
43 PetscErrorCode MatCopy_MPIDense(Mat A, Mat B, MatStructure s)
44 {
45   Mat_MPIDense   *Amat = (Mat_MPIDense*)A->data;
46   Mat_MPIDense   *Bmat = (Mat_MPIDense*)B->data;
47   PetscErrorCode ierr;
48 
49   PetscFunctionBegin;
50   ierr = MatCopy(Amat->A,Bmat->A,s);CHKERRQ(ierr);
51   PetscFunctionReturn(0);
52 }
53 
54 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
55 {
56   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
57   PetscErrorCode ierr;
58   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
59 
60   PetscFunctionBegin;
61   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
62   lrow = row - rstart;
63   ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr);
64   PetscFunctionReturn(0);
65 }
66 
67 PetscErrorCode MatRestoreRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
68 {
69   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
70   PetscErrorCode ierr;
71   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
72 
73   PetscFunctionBegin;
74   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
75   lrow = row - rstart;
76   ierr = MatRestoreRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr);
77   PetscFunctionReturn(0);
78 }
79 
80 PetscErrorCode  MatGetDiagonalBlock_MPIDense(Mat A,Mat *a)
81 {
82   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
83   PetscErrorCode ierr;
84   PetscInt       m = A->rmap->n,rstart = A->rmap->rstart;
85   PetscScalar    *array;
86   MPI_Comm       comm;
87   PetscBool      flg;
88   Mat            B;
89 
90   PetscFunctionBegin;
91   ierr = MatHasCongruentLayouts(A,&flg);CHKERRQ(ierr);
92   if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported.");
93   ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr);
94   if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */
95 
96     ierr = PetscObjectTypeCompare((PetscObject)mdn->A,MATSEQDENSECUDA,&flg);CHKERRQ(ierr);
97     if (flg) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature",MATSEQDENSECUDA);
98     ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
99     ierr = MatCreate(comm,&B);CHKERRQ(ierr);
100     ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr);
101     ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr);
102     ierr = MatDenseGetArrayRead(mdn->A,(const PetscScalar**)&array);CHKERRQ(ierr);
103     ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr);
104     ierr = MatDenseRestoreArrayRead(mdn->A,(const PetscScalar**)&array);CHKERRQ(ierr);
105     ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
106     ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
107     ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr);
108     *a   = B;
109     ierr = MatDestroy(&B);CHKERRQ(ierr);
110   } else *a = B;
111   PetscFunctionReturn(0);
112 }
113 
114 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
115 {
116   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
117   PetscErrorCode ierr;
118   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
119   PetscBool      roworiented = A->roworiented;
120 
121   PetscFunctionBegin;
122   for (i=0; i<m; i++) {
123     if (idxm[i] < 0) continue;
124     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
125     if (idxm[i] >= rstart && idxm[i] < rend) {
126       row = idxm[i] - rstart;
127       if (roworiented) {
128         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
129       } else {
130         for (j=0; j<n; j++) {
131           if (idxn[j] < 0) continue;
132           if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
133           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
134         }
135       }
136     } else if (!A->donotstash) {
137       mat->assembled = PETSC_FALSE;
138       if (roworiented) {
139         ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
140       } else {
141         ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
142       }
143     }
144   }
145   PetscFunctionReturn(0);
146 }
147 
148 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
149 {
150   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
151   PetscErrorCode ierr;
152   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
153 
154   PetscFunctionBegin;
155   for (i=0; i<m; i++) {
156     if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
157     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
158     if (idxm[i] >= rstart && idxm[i] < rend) {
159       row = idxm[i] - rstart;
160       for (j=0; j<n; j++) {
161         if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
162         if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
163         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
164       }
165     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
166   }
167   PetscFunctionReturn(0);
168 }
169 
170 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A,PetscInt *lda)
171 {
172   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
173   PetscErrorCode ierr;
174 
175   PetscFunctionBegin;
176   ierr = MatDenseGetLDA(a->A,lda);CHKERRQ(ierr);
177   PetscFunctionReturn(0);
178 }
179 
180 static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A,PetscInt lda)
181 {
182   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
183   PetscBool      iscuda;
184   PetscErrorCode ierr;
185 
186   PetscFunctionBegin;
187   if (!a->A) {
188     if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
189     ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
190     ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
191     ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
192     ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->A);CHKERRQ(ierr);
193     ierr = MatSetSizes(a->A,A->rmap->n,A->cmap->N,A->rmap->n,A->cmap->N);CHKERRQ(ierr);
194     ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr);
195     ierr = MatSetType(a->A,iscuda ? MATSEQDENSECUDA : MATSEQDENSE);CHKERRQ(ierr);
196   }
197   ierr = MatDenseSetLDA(a->A,lda);CHKERRQ(ierr);
198   PetscFunctionReturn(0);
199 }
200 
201 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar **array)
202 {
203   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
204   PetscErrorCode ierr;
205 
206   PetscFunctionBegin;
207   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
208   ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr);
209   PetscFunctionReturn(0);
210 }
211 
212 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A,const PetscScalar **array)
213 {
214   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
215   PetscErrorCode ierr;
216 
217   PetscFunctionBegin;
218   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
219   ierr = MatDenseGetArrayRead(a->A,array);CHKERRQ(ierr);
220   PetscFunctionReturn(0);
221 }
222 
223 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A,PetscScalar **array)
224 {
225   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
226   PetscErrorCode ierr;
227 
228   PetscFunctionBegin;
229   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
230   ierr = MatDenseGetArrayWrite(a->A,array);CHKERRQ(ierr);
231   PetscFunctionReturn(0);
232 }
233 
234 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A,const PetscScalar *array)
235 {
236   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
237   PetscErrorCode ierr;
238 
239   PetscFunctionBegin;
240   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
241   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
242   ierr = MatDensePlaceArray(a->A,array);CHKERRQ(ierr);
243   PetscFunctionReturn(0);
244 }
245 
246 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A)
247 {
248   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
249   PetscErrorCode ierr;
250 
251   PetscFunctionBegin;
252   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
253   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
254   ierr = MatDenseResetArray(a->A);CHKERRQ(ierr);
255   PetscFunctionReturn(0);
256 }
257 
258 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A,const PetscScalar *array)
259 {
260   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
261   PetscErrorCode ierr;
262 
263   PetscFunctionBegin;
264   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
265   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
266   ierr = MatDenseReplaceArray(a->A,array);CHKERRQ(ierr);
267   PetscFunctionReturn(0);
268 }
269 
270 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
271 {
272   Mat_MPIDense      *mat  = (Mat_MPIDense*)A->data,*newmatd;
273   PetscErrorCode    ierr;
274   PetscInt          lda,i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols;
275   const PetscInt    *irow,*icol;
276   const PetscScalar *v;
277   PetscScalar       *bv;
278   Mat               newmat;
279   IS                iscol_local;
280   MPI_Comm          comm_is,comm_mat;
281 
282   PetscFunctionBegin;
283   ierr = PetscObjectGetComm((PetscObject)A,&comm_mat);CHKERRQ(ierr);
284   ierr = PetscObjectGetComm((PetscObject)iscol,&comm_is);CHKERRQ(ierr);
285   if (comm_mat != comm_is) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NOTSAMECOMM,"IS communicator must match matrix communicator");
286 
287   ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
288   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
289   ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr);
290   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
291   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
292   ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */
293 
294   /* No parallel redistribution currently supported! Should really check each index set
295      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
296      original matrix! */
297 
298   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
299   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
300 
301   /* Check submatrix call */
302   if (scall == MAT_REUSE_MATRIX) {
303     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
304     /* Really need to test rows and column sizes! */
305     newmat = *B;
306   } else {
307     /* Create and fill new matrix */
308     ierr = MatCreate(PetscObjectComm((PetscObject)A),&newmat);CHKERRQ(ierr);
309     ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
310     ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr);
311     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
312   }
313 
314   /* Now extract the data pointers and do the copy, column at a time */
315   newmatd = (Mat_MPIDense*)newmat->data;
316   ierr = MatDenseGetArray(newmatd->A,&bv);CHKERRQ(ierr);
317   ierr = MatDenseGetArrayRead(mat->A,&v);CHKERRQ(ierr);
318   ierr = MatDenseGetLDA(mat->A,&lda);CHKERRQ(ierr);
319   for (i=0; i<Ncols; i++) {
320     const PetscScalar *av = v + lda*icol[i];
321     for (j=0; j<nrows; j++) {
322       *bv++ = av[irow[j] - rstart];
323     }
324   }
325   ierr = MatDenseRestoreArrayRead(mat->A,&v);CHKERRQ(ierr);
326   ierr = MatDenseRestoreArray(newmatd->A,&bv);CHKERRQ(ierr);
327 
328   /* Assemble the matrices so that the correct flags are set */
329   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
330   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
331 
332   /* Free work space */
333   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
334   ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr);
335   ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
336   *B   = newmat;
337   PetscFunctionReturn(0);
338 }
339 
340 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar **array)
341 {
342   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
343   PetscErrorCode ierr;
344 
345   PetscFunctionBegin;
346   ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr);
347   PetscFunctionReturn(0);
348 }
349 
350 PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A,const PetscScalar **array)
351 {
352   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
353   PetscErrorCode ierr;
354 
355   PetscFunctionBegin;
356   ierr = MatDenseRestoreArrayRead(a->A,array);CHKERRQ(ierr);
357   PetscFunctionReturn(0);
358 }
359 
360 PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A,PetscScalar **array)
361 {
362   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
363   PetscErrorCode ierr;
364 
365   PetscFunctionBegin;
366   ierr = MatDenseRestoreArrayWrite(a->A,array);CHKERRQ(ierr);
367   PetscFunctionReturn(0);
368 }
369 
370 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
371 {
372   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
373   PetscErrorCode ierr;
374   PetscInt       nstash,reallocs;
375 
376   PetscFunctionBegin;
377   if (mdn->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
378 
379   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
380   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
381   ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
382   PetscFunctionReturn(0);
383 }
384 
385 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
386 {
387   Mat_MPIDense   *mdn=(Mat_MPIDense*)mat->data;
388   PetscErrorCode ierr;
389   PetscInt       i,*row,*col,flg,j,rstart,ncols;
390   PetscMPIInt    n;
391   PetscScalar    *val;
392 
393   PetscFunctionBegin;
394   if (!mdn->donotstash && !mat->nooffprocentries) {
395     /*  wait on receives */
396     while (1) {
397       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
398       if (!flg) break;
399 
400       for (i=0; i<n;) {
401         /* Now identify the consecutive vals belonging to the same row */
402         for (j=i,rstart=row[j]; j<n; j++) {
403           if (row[j] != rstart) break;
404         }
405         if (j < n) ncols = j-i;
406         else       ncols = n-i;
407         /* Now assemble all these values with a single function call */
408         ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
409         i    = j;
410       }
411     }
412     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
413   }
414 
415   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
416   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
417 
418   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
419     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
420   }
421   PetscFunctionReturn(0);
422 }
423 
424 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
425 {
426   PetscErrorCode ierr;
427   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
428 
429   PetscFunctionBegin;
430   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
431   PetscFunctionReturn(0);
432 }
433 
434 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt n,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
435 {
436   Mat_MPIDense      *l = (Mat_MPIDense*)A->data;
437   PetscErrorCode    ierr;
438   PetscInt          i,len,*lrows;
439 
440   PetscFunctionBegin;
441   /* get locally owned rows */
442   ierr = PetscLayoutMapLocal(A->rmap,n,rows,&len,&lrows,NULL);CHKERRQ(ierr);
443   /* fix right hand side if needed */
444   if (x && b) {
445     const PetscScalar *xx;
446     PetscScalar       *bb;
447 
448     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
449     ierr = VecGetArrayWrite(b, &bb);CHKERRQ(ierr);
450     for (i=0;i<len;++i) bb[lrows[i]] = diag*xx[lrows[i]];
451     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
452     ierr = VecRestoreArrayWrite(b, &bb);CHKERRQ(ierr);
453   }
454   ierr = MatZeroRows(l->A,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr);
455   if (diag != 0.0) {
456     Vec d;
457 
458     ierr = MatCreateVecs(A,NULL,&d);CHKERRQ(ierr);
459     ierr = VecSet(d,diag);CHKERRQ(ierr);
460     ierr = MatDiagonalSet(A,d,INSERT_VALUES);CHKERRQ(ierr);
461     ierr = VecDestroy(&d);CHKERRQ(ierr);
462   }
463   ierr = PetscFree(lrows);CHKERRQ(ierr);
464   PetscFunctionReturn(0);
465 }
466 
467 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec);
468 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec);
469 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec);
470 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec);
471 
472 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
473 {
474   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
475   PetscErrorCode    ierr;
476   const PetscScalar *ax;
477   PetscScalar       *ay;
478   PetscMemType      axmtype,aymtype;
479 
480   PetscFunctionBegin;
481   ierr = VecGetArrayReadAndMemType(xx,&ax,&axmtype);CHKERRQ(ierr);
482   ierr = VecGetArrayAndMemType(mdn->lvec,&ay,&aymtype);CHKERRQ(ierr);
483   ierr = PetscSFBcastWithMemTypeBegin(mdn->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPI_REPLACE);CHKERRQ(ierr);
484   ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay,MPI_REPLACE);CHKERRQ(ierr);
485   ierr = VecRestoreArrayAndMemType(mdn->lvec,&ay);CHKERRQ(ierr);
486   ierr = VecRestoreArrayReadAndMemType(xx,&ax);CHKERRQ(ierr);
487   ierr = (*mdn->A->ops->mult)(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
488   PetscFunctionReturn(0);
489 }
490 
491 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
492 {
493   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
494   PetscErrorCode    ierr;
495   const PetscScalar *ax;
496   PetscScalar       *ay;
497   PetscMemType      axmtype,aymtype;
498 
499   PetscFunctionBegin;
500   ierr = VecGetArrayReadAndMemType(xx,&ax,&axmtype);CHKERRQ(ierr);
501   ierr = VecGetArrayAndMemType(mdn->lvec,&ay,&aymtype);CHKERRQ(ierr);
502   ierr = PetscSFBcastWithMemTypeBegin(mdn->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPI_REPLACE);CHKERRQ(ierr);
503   ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay,MPI_REPLACE);CHKERRQ(ierr);
504   ierr = VecRestoreArrayAndMemType(mdn->lvec,&ay);CHKERRQ(ierr);
505   ierr = VecRestoreArrayReadAndMemType(xx,&ax);CHKERRQ(ierr);
506   ierr = (*mdn->A->ops->multadd)(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
507   PetscFunctionReturn(0);
508 }
509 
510 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
511 {
512   Mat_MPIDense      *a = (Mat_MPIDense*)A->data;
513   PetscErrorCode    ierr;
514   const PetscScalar *ax;
515   PetscScalar       *ay;
516   PetscMemType      axmtype,aymtype;
517 
518   PetscFunctionBegin;
519   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
520   ierr = (*a->A->ops->multtranspose)(a->A,xx,a->lvec);CHKERRQ(ierr);
521   ierr = VecGetArrayReadAndMemType(a->lvec,&ax,&axmtype);CHKERRQ(ierr);
522   ierr = VecGetArrayAndMemType(yy,&ay,&aymtype);CHKERRQ(ierr);
523   ierr = PetscSFReduceWithMemTypeBegin(a->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPIU_SUM);CHKERRQ(ierr);
524   ierr = PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr);
525   ierr = VecRestoreArrayReadAndMemType(a->lvec,&ax);CHKERRQ(ierr);
526   ierr = VecRestoreArrayAndMemType(yy,&ay);CHKERRQ(ierr);
527   PetscFunctionReturn(0);
528 }
529 
530 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
531 {
532   Mat_MPIDense      *a = (Mat_MPIDense*)A->data;
533   PetscErrorCode    ierr;
534   const PetscScalar *ax;
535   PetscScalar       *ay;
536   PetscMemType      axmtype,aymtype;
537 
538   PetscFunctionBegin;
539   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
540   ierr = (*a->A->ops->multtranspose)(a->A,xx,a->lvec);CHKERRQ(ierr);
541   ierr = VecGetArrayReadAndMemType(a->lvec,&ax,&axmtype);CHKERRQ(ierr);
542   ierr = VecGetArrayAndMemType(zz,&ay,&aymtype);CHKERRQ(ierr);
543   ierr = PetscSFReduceWithMemTypeBegin(a->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPIU_SUM);CHKERRQ(ierr);
544   ierr = PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr);
545   ierr = VecRestoreArrayReadAndMemType(a->lvec,&ax);CHKERRQ(ierr);
546   ierr = VecRestoreArrayAndMemType(zz,&ay);CHKERRQ(ierr);
547   PetscFunctionReturn(0);
548 }
549 
550 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
551 {
552   Mat_MPIDense      *a    = (Mat_MPIDense*)A->data;
553   PetscErrorCode    ierr;
554   PetscInt          lda,len,i,n,m = A->rmap->n,radd;
555   PetscScalar       *x,zero = 0.0;
556   const PetscScalar *av;
557 
558   PetscFunctionBegin;
559   ierr = VecSet(v,zero);CHKERRQ(ierr);
560   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
561   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
562   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
563   len  = PetscMin(a->A->rmap->n,a->A->cmap->n);
564   radd = A->rmap->rstart*m;
565   ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr);
566   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
567   for (i=0; i<len; i++) {
568     x[i] = av[radd + i*lda + i];
569   }
570   ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr);
571   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
572   PetscFunctionReturn(0);
573 }
574 
575 PetscErrorCode MatDestroy_MPIDense(Mat mat)
576 {
577   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
578   PetscErrorCode ierr;
579 
580   PetscFunctionBegin;
581 #if defined(PETSC_USE_LOG)
582   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
583 #endif
584   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
585   if (mdn->vecinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
586   if (mdn->matinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
587   ierr = MatDestroy(&mdn->A);CHKERRQ(ierr);
588   ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr);
589   ierr = PetscSFDestroy(&mdn->Mvctx);CHKERRQ(ierr);
590   ierr = VecDestroy(&mdn->cvec);CHKERRQ(ierr);
591   ierr = MatDestroy(&mdn->cmat);CHKERRQ(ierr);
592 
593   ierr = PetscFree(mat->data);CHKERRQ(ierr);
594   ierr = PetscObjectChangeTypeName((PetscObject)mat,NULL);CHKERRQ(ierr);
595 
596   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",NULL);CHKERRQ(ierr);
597   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",NULL);CHKERRQ(ierr);
598   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL);CHKERRQ(ierr);
599   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL);CHKERRQ(ierr);
600   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",NULL);CHKERRQ(ierr);
601   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",NULL);CHKERRQ(ierr);
602   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",NULL);CHKERRQ(ierr);
603   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",NULL);CHKERRQ(ierr);
604   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",NULL);CHKERRQ(ierr);
605   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",NULL);CHKERRQ(ierr);
606   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",NULL);CHKERRQ(ierr);
607   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
608   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpiaij_C",NULL);CHKERRQ(ierr);
609 #if defined(PETSC_HAVE_ELEMENTAL)
610   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL);CHKERRQ(ierr);
611 #endif
612 #if defined(PETSC_HAVE_SCALAPACK)
613   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_scalapack_C",NULL);CHKERRQ(ierr);
614 #endif
615   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);CHKERRQ(ierr);
616   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
617   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",NULL);CHKERRQ(ierr);
618 #if defined (PETSC_HAVE_CUDA)
619   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidense_C",NULL);CHKERRQ(ierr);
620   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaijcusparse_C",NULL);CHKERRQ(ierr);
621 #endif
622   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",NULL);CHKERRQ(ierr);
623   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",NULL);CHKERRQ(ierr);
624   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",NULL);CHKERRQ(ierr);
625   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",NULL);CHKERRQ(ierr);
626   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",NULL);CHKERRQ(ierr);
627   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",NULL);CHKERRQ(ierr);
628   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",NULL);CHKERRQ(ierr);
629   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",NULL);CHKERRQ(ierr);
630   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetSubMatrix_C",NULL);CHKERRQ(ierr);
631   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreSubMatrix_C",NULL);CHKERRQ(ierr);
632   PetscFunctionReturn(0);
633 }
634 
635 PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat,PetscViewer);
636 
637 #include <petscdraw.h>
638 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
639 {
640   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
641   PetscErrorCode    ierr;
642   PetscMPIInt       rank;
643   PetscViewerType   vtype;
644   PetscBool         iascii,isdraw;
645   PetscViewer       sviewer;
646   PetscViewerFormat format;
647 
648   PetscFunctionBegin;
649   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRMPI(ierr);
650   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
651   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
652   if (iascii) {
653     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
654     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
655     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
656       MatInfo info;
657       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
658       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
659       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
660       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
661       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
662       ierr = PetscSFView(mdn->Mvctx,viewer);CHKERRQ(ierr);
663       PetscFunctionReturn(0);
664     } else if (format == PETSC_VIEWER_ASCII_INFO) {
665       PetscFunctionReturn(0);
666     }
667   } else if (isdraw) {
668     PetscDraw draw;
669     PetscBool isnull;
670 
671     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
672     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
673     if (isnull) PetscFunctionReturn(0);
674   }
675 
676   {
677     /* assemble the entire matrix onto first processor. */
678     Mat         A;
679     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz;
680     PetscInt    *cols;
681     PetscScalar *vals;
682 
683     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
684     if (!rank) {
685       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
686     } else {
687       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
688     }
689     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
690     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
691     ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr);
692     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
693 
694     /* Copy the matrix ... This isn't the most efficient means,
695        but it's quick for now */
696     A->insertmode = INSERT_VALUES;
697 
698     row = mat->rmap->rstart;
699     m   = mdn->A->rmap->n;
700     for (i=0; i<m; i++) {
701       ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
702       ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
703       ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
704       row++;
705     }
706 
707     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
708     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
709     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
710     if (!rank) {
711       ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
712       ierr = MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
713     }
714     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
715     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
716     ierr = MatDestroy(&A);CHKERRQ(ierr);
717   }
718   PetscFunctionReturn(0);
719 }
720 
721 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
722 {
723   PetscErrorCode ierr;
724   PetscBool      iascii,isbinary,isdraw,issocket;
725 
726   PetscFunctionBegin;
727   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
728   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
729   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
730   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
731 
732   if (iascii || issocket || isdraw) {
733     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
734   } else if (isbinary) {
735     ierr = MatView_Dense_Binary(mat,viewer);CHKERRQ(ierr);
736   }
737   PetscFunctionReturn(0);
738 }
739 
740 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
741 {
742   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
743   Mat            mdn  = mat->A;
744   PetscErrorCode ierr;
745   PetscLogDouble isend[5],irecv[5];
746 
747   PetscFunctionBegin;
748   info->block_size = 1.0;
749 
750   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
751 
752   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
753   isend[3] = info->memory;  isend[4] = info->mallocs;
754   if (flag == MAT_LOCAL) {
755     info->nz_used      = isend[0];
756     info->nz_allocated = isend[1];
757     info->nz_unneeded  = isend[2];
758     info->memory       = isend[3];
759     info->mallocs      = isend[4];
760   } else if (flag == MAT_GLOBAL_MAX) {
761     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
762 
763     info->nz_used      = irecv[0];
764     info->nz_allocated = irecv[1];
765     info->nz_unneeded  = irecv[2];
766     info->memory       = irecv[3];
767     info->mallocs      = irecv[4];
768   } else if (flag == MAT_GLOBAL_SUM) {
769     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
770 
771     info->nz_used      = irecv[0];
772     info->nz_allocated = irecv[1];
773     info->nz_unneeded  = irecv[2];
774     info->memory       = irecv[3];
775     info->mallocs      = irecv[4];
776   }
777   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
778   info->fill_ratio_needed = 0;
779   info->factor_mallocs    = 0;
780   PetscFunctionReturn(0);
781 }
782 
783 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg)
784 {
785   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
786   PetscErrorCode ierr;
787 
788   PetscFunctionBegin;
789   switch (op) {
790   case MAT_NEW_NONZERO_LOCATIONS:
791   case MAT_NEW_NONZERO_LOCATION_ERR:
792   case MAT_NEW_NONZERO_ALLOCATION_ERR:
793     MatCheckPreallocated(A,1);
794     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
795     break;
796   case MAT_ROW_ORIENTED:
797     MatCheckPreallocated(A,1);
798     a->roworiented = flg;
799     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
800     break;
801   case MAT_FORCE_DIAGONAL_ENTRIES:
802   case MAT_KEEP_NONZERO_PATTERN:
803   case MAT_USE_HASH_TABLE:
804   case MAT_SORTED_FULL:
805     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
806     break;
807   case MAT_IGNORE_OFF_PROC_ENTRIES:
808     a->donotstash = flg;
809     break;
810   case MAT_SYMMETRIC:
811   case MAT_STRUCTURALLY_SYMMETRIC:
812   case MAT_HERMITIAN:
813   case MAT_SYMMETRY_ETERNAL:
814   case MAT_IGNORE_LOWER_TRIANGULAR:
815   case MAT_IGNORE_ZERO_ENTRIES:
816     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
817     break;
818   default:
819     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
820   }
821   PetscFunctionReturn(0);
822 }
823 
824 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
825 {
826   Mat_MPIDense      *mdn = (Mat_MPIDense*)A->data;
827   const PetscScalar *l;
828   PetscScalar       x,*v,*vv,*r;
829   PetscErrorCode    ierr;
830   PetscInt          i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n,lda;
831 
832   PetscFunctionBegin;
833   ierr = MatDenseGetArray(mdn->A,&vv);CHKERRQ(ierr);
834   ierr = MatDenseGetLDA(mdn->A,&lda);CHKERRQ(ierr);
835   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
836   if (ll) {
837     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
838     if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %D != %D", s2a, s2);
839     ierr = VecGetArrayRead(ll,&l);CHKERRQ(ierr);
840     for (i=0; i<m; i++) {
841       x = l[i];
842       v = vv + i;
843       for (j=0; j<n; j++) { (*v) *= x; v+= lda;}
844     }
845     ierr = VecRestoreArrayRead(ll,&l);CHKERRQ(ierr);
846     ierr = PetscLogFlops(1.0*n*m);CHKERRQ(ierr);
847   }
848   if (rr) {
849     const PetscScalar *ar;
850 
851     ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr);
852     if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
853     ierr = VecGetArrayRead(rr,&ar);CHKERRQ(ierr);
854     ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
855     ierr = PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ar,r,MPI_REPLACE);CHKERRQ(ierr);
856     ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ar,r,MPI_REPLACE);CHKERRQ(ierr);
857     ierr = VecRestoreArrayRead(rr,&ar);CHKERRQ(ierr);
858     for (i=0; i<n; i++) {
859       x = r[i];
860       v = vv + i*lda;
861       for (j=0; j<m; j++) (*v++) *= x;
862     }
863     ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr);
864     ierr = PetscLogFlops(1.0*n*m);CHKERRQ(ierr);
865   }
866   ierr = MatDenseRestoreArray(mdn->A,&vv);CHKERRQ(ierr);
867   PetscFunctionReturn(0);
868 }
869 
870 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
871 {
872   Mat_MPIDense      *mdn = (Mat_MPIDense*)A->data;
873   PetscErrorCode    ierr;
874   PetscInt          i,j;
875   PetscMPIInt       size;
876   PetscReal         sum = 0.0;
877   const PetscScalar *av,*v;
878 
879   PetscFunctionBegin;
880   ierr = MatDenseGetArrayRead(mdn->A,&av);CHKERRQ(ierr);
881   v    = av;
882   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr);
883   if (size == 1) {
884     ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
885   } else {
886     if (type == NORM_FROBENIUS) {
887       for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) {
888         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
889       }
890       ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
891       *nrm = PetscSqrtReal(*nrm);
892       ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr);
893     } else if (type == NORM_1) {
894       PetscReal *tmp,*tmp2;
895       ierr = PetscCalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);CHKERRQ(ierr);
896       *nrm = 0.0;
897       v    = av;
898       for (j=0; j<mdn->A->cmap->n; j++) {
899         for (i=0; i<mdn->A->rmap->n; i++) {
900           tmp[j] += PetscAbsScalar(*v);  v++;
901         }
902       }
903       ierr = MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
904       for (j=0; j<A->cmap->N; j++) {
905         if (tmp2[j] > *nrm) *nrm = tmp2[j];
906       }
907       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
908       ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr);
909     } else if (type == NORM_INFINITY) { /* max row norm */
910       PetscReal ntemp;
911       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
912       ierr = MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
913     } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm");
914   }
915   ierr = MatDenseRestoreArrayRead(mdn->A,&av);CHKERRQ(ierr);
916   PetscFunctionReturn(0);
917 }
918 
919 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
920 {
921   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
922   Mat            B;
923   PetscInt       M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart;
924   PetscErrorCode ierr;
925   PetscInt       j,i,lda;
926   PetscScalar    *v;
927 
928   PetscFunctionBegin;
929   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
930     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
931     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
932     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
933     ierr = MatMPIDenseSetPreallocation(B,NULL);CHKERRQ(ierr);
934   } else B = *matout;
935 
936   m    = a->A->rmap->n; n = a->A->cmap->n;
937   ierr = MatDenseGetArrayRead(a->A,(const PetscScalar**)&v);CHKERRQ(ierr);
938   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
939   ierr = PetscMalloc1(m,&rwork);CHKERRQ(ierr);
940   for (i=0; i<m; i++) rwork[i] = rstart + i;
941   for (j=0; j<n; j++) {
942     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
943     v   += lda;
944   }
945   ierr = MatDenseRestoreArrayRead(a->A,(const PetscScalar**)&v);CHKERRQ(ierr);
946   ierr = PetscFree(rwork);CHKERRQ(ierr);
947   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
948   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
949   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
950     *matout = B;
951   } else {
952     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
953   }
954   PetscFunctionReturn(0);
955 }
956 
957 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*);
958 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat,PetscScalar);
959 
960 PetscErrorCode MatSetUp_MPIDense(Mat A)
961 {
962   PetscErrorCode ierr;
963 
964   PetscFunctionBegin;
965   ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
966   ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
967   if (!A->preallocated) {
968     ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr);
969   }
970   PetscFunctionReturn(0);
971 }
972 
973 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
974 {
975   PetscErrorCode ierr;
976   Mat_MPIDense   *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data;
977 
978   PetscFunctionBegin;
979   ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr);
980   PetscFunctionReturn(0);
981 }
982 
983 PetscErrorCode MatConjugate_MPIDense(Mat mat)
984 {
985   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;
986   PetscErrorCode ierr;
987 
988   PetscFunctionBegin;
989   ierr = MatConjugate(a->A);CHKERRQ(ierr);
990   PetscFunctionReturn(0);
991 }
992 
993 PetscErrorCode MatRealPart_MPIDense(Mat A)
994 {
995   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
996   PetscErrorCode ierr;
997 
998   PetscFunctionBegin;
999   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1000   PetscFunctionReturn(0);
1001 }
1002 
1003 PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
1004 {
1005   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1006   PetscErrorCode ierr;
1007 
1008   PetscFunctionBegin;
1009   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1010   PetscFunctionReturn(0);
1011 }
1012 
1013 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A,Vec v,PetscInt col)
1014 {
1015   PetscErrorCode ierr;
1016   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
1017 
1018   PetscFunctionBegin;
1019   if (!a->A) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing local matrix");
1020   if (!a->A->ops->getcolumnvector) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing get column operation");
1021   ierr = (*a->A->ops->getcolumnvector)(a->A,v,col);CHKERRQ(ierr);
1022   PetscFunctionReturn(0);
1023 }
1024 
1025 PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat,ReductionType,PetscReal*);
1026 
1027 PetscErrorCode MatGetColumnReductions_MPIDense(Mat A,ReductionType type,PetscReal *reductions)
1028 {
1029   PetscErrorCode ierr;
1030   PetscInt       i,m,n;
1031   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
1032   PetscReal      *work;
1033 
1034   PetscFunctionBegin;
1035   ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr);
1036   ierr = PetscMalloc1(n,&work);CHKERRQ(ierr);
1037   if (type == REDUCTION_MEAN) {
1038     ierr = MatGetColumnReductions_SeqDense(a->A,REDUCTION_SUM,work);CHKERRQ(ierr);
1039   } else {
1040     ierr = MatGetColumnReductions_SeqDense(a->A,type,work);CHKERRQ(ierr);
1041   }
1042   if (type == REDUCTION_NORM_2) {
1043     for (i=0; i<n; i++) work[i] *= work[i];
1044   }
1045   if (type == REDUCTION_NORM_INFINITY) {
1046     ierr = MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRMPI(ierr);
1047   } else {
1048     ierr = MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRMPI(ierr);
1049   }
1050   ierr = PetscFree(work);CHKERRQ(ierr);
1051   if (type == REDUCTION_NORM_2) {
1052     for (i=0; i<n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
1053   } else if (type == REDUCTION_MEAN) {
1054     for (i=0; i<n; i++) reductions[i] /= m;
1055   }
1056   PetscFunctionReturn(0);
1057 }
1058 
1059 #if defined(PETSC_HAVE_CUDA)
1060 static PetscErrorCode MatDenseGetColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1061 {
1062   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1063   PetscErrorCode ierr;
1064   PetscInt       lda;
1065 
1066   PetscFunctionBegin;
1067   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1068   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1069   if (!a->cvec) {
1070     ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1071     ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec);CHKERRQ(ierr);
1072   }
1073   a->vecinuse = col + 1;
1074   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1075   ierr = MatDenseCUDAGetArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1076   ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1077   *v   = a->cvec;
1078   PetscFunctionReturn(0);
1079 }
1080 
1081 static PetscErrorCode MatDenseRestoreColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1082 {
1083   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1084   PetscErrorCode ierr;
1085 
1086   PetscFunctionBegin;
1087   if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1088   if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector");
1089   a->vecinuse = 0;
1090   ierr = MatDenseCUDARestoreArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1091   ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr);
1092   *v   = NULL;
1093   PetscFunctionReturn(0);
1094 }
1095 
1096 static PetscErrorCode MatDenseGetColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1097 {
1098   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1099   PetscInt       lda;
1100   PetscErrorCode ierr;
1101 
1102   PetscFunctionBegin;
1103   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1104   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1105   if (!a->cvec) {
1106     ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1107     ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec);CHKERRQ(ierr);
1108   }
1109   a->vecinuse = col + 1;
1110   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1111   ierr = MatDenseCUDAGetArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr);
1112   ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1113   ierr = VecLockReadPush(a->cvec);CHKERRQ(ierr);
1114   *v   = a->cvec;
1115   PetscFunctionReturn(0);
1116 }
1117 
1118 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1119 {
1120   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1121   PetscErrorCode ierr;
1122 
1123   PetscFunctionBegin;
1124   if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1125   if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
1126   a->vecinuse = 0;
1127   ierr = MatDenseCUDARestoreArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr);
1128   ierr = VecLockReadPop(a->cvec);CHKERRQ(ierr);
1129   ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr);
1130   *v   = NULL;
1131   PetscFunctionReturn(0);
1132 }
1133 
1134 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1135 {
1136   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1137   PetscErrorCode ierr;
1138   PetscInt       lda;
1139 
1140   PetscFunctionBegin;
1141   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1142   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1143   if (!a->cvec) {
1144     ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1145     ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec);CHKERRQ(ierr);
1146   }
1147   a->vecinuse = col + 1;
1148   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1149   ierr = MatDenseCUDAGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1150   ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1151   *v   = a->cvec;
1152   PetscFunctionReturn(0);
1153 }
1154 
1155 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1156 {
1157   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1158   PetscErrorCode ierr;
1159 
1160   PetscFunctionBegin;
1161   if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1162   if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
1163   a->vecinuse = 0;
1164   ierr = MatDenseCUDARestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1165   ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr);
1166   *v   = NULL;
1167   PetscFunctionReturn(0);
1168 }
1169 
1170 static PetscErrorCode MatDenseCUDAPlaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a)
1171 {
1172   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1173   PetscErrorCode ierr;
1174 
1175   PetscFunctionBegin;
1176   if (l->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1177   if (l->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1178   ierr = MatDenseCUDAPlaceArray(l->A,a);CHKERRQ(ierr);
1179   PetscFunctionReturn(0);
1180 }
1181 
1182 static PetscErrorCode MatDenseCUDAResetArray_MPIDenseCUDA(Mat A)
1183 {
1184   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1185   PetscErrorCode ierr;
1186 
1187   PetscFunctionBegin;
1188   if (l->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1189   if (l->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1190   ierr = MatDenseCUDAResetArray(l->A);CHKERRQ(ierr);
1191   PetscFunctionReturn(0);
1192 }
1193 
1194 static PetscErrorCode MatDenseCUDAReplaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a)
1195 {
1196   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1197   PetscErrorCode ierr;
1198 
1199   PetscFunctionBegin;
1200   if (l->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1201   if (l->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1202   ierr = MatDenseCUDAReplaceArray(l->A,a);CHKERRQ(ierr);
1203   PetscFunctionReturn(0);
1204 }
1205 
1206 static PetscErrorCode MatDenseCUDAGetArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a)
1207 {
1208   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1209   PetscErrorCode ierr;
1210 
1211   PetscFunctionBegin;
1212   ierr = MatDenseCUDAGetArrayWrite(l->A,a);CHKERRQ(ierr);
1213   PetscFunctionReturn(0);
1214 }
1215 
1216 static PetscErrorCode MatDenseCUDARestoreArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a)
1217 {
1218   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1219   PetscErrorCode ierr;
1220 
1221   PetscFunctionBegin;
1222   ierr = MatDenseCUDARestoreArrayWrite(l->A,a);CHKERRQ(ierr);
1223   PetscFunctionReturn(0);
1224 }
1225 
1226 static PetscErrorCode MatDenseCUDAGetArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a)
1227 {
1228   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1229   PetscErrorCode ierr;
1230 
1231   PetscFunctionBegin;
1232   ierr = MatDenseCUDAGetArrayRead(l->A,a);CHKERRQ(ierr);
1233   PetscFunctionReturn(0);
1234 }
1235 
1236 static PetscErrorCode MatDenseCUDARestoreArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a)
1237 {
1238   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1239   PetscErrorCode ierr;
1240 
1241   PetscFunctionBegin;
1242   ierr = MatDenseCUDARestoreArrayRead(l->A,a);CHKERRQ(ierr);
1243   PetscFunctionReturn(0);
1244 }
1245 
1246 static PetscErrorCode MatDenseCUDAGetArray_MPIDenseCUDA(Mat A, PetscScalar **a)
1247 {
1248   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1249   PetscErrorCode ierr;
1250 
1251   PetscFunctionBegin;
1252   ierr = MatDenseCUDAGetArray(l->A,a);CHKERRQ(ierr);
1253   PetscFunctionReturn(0);
1254 }
1255 
1256 static PetscErrorCode MatDenseCUDARestoreArray_MPIDenseCUDA(Mat A, PetscScalar **a)
1257 {
1258   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1259   PetscErrorCode ierr;
1260 
1261   PetscFunctionBegin;
1262   ierr = MatDenseCUDARestoreArray(l->A,a);CHKERRQ(ierr);
1263   PetscFunctionReturn(0);
1264 }
1265 
1266 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat,PetscInt,Vec*);
1267 static PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat,PetscInt,Vec*);
1268 static PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat,PetscInt,Vec*);
1269 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat,PetscInt,Vec*);
1270 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat,PetscInt,Vec*);
1271 static PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat,PetscInt,Vec*);
1272 static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat,Mat*);
1273 
1274 static PetscErrorCode MatBindToCPU_MPIDenseCUDA(Mat mat,PetscBool bind)
1275 {
1276   Mat_MPIDense   *d = (Mat_MPIDense*)mat->data;
1277   PetscErrorCode ierr;
1278 
1279   PetscFunctionBegin;
1280   if (d->vecinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1281   if (d->matinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1282   if (d->A) {
1283     ierr = MatBindToCPU(d->A,bind);CHKERRQ(ierr);
1284   }
1285   mat->boundtocpu = bind;
1286   if (!bind) {
1287     PetscBool iscuda;
1288 
1289     ierr = PetscObjectTypeCompare((PetscObject)d->cvec,VECMPICUDA,&iscuda);CHKERRQ(ierr);
1290     if (!iscuda) {
1291       ierr = VecDestroy(&d->cvec);CHKERRQ(ierr);
1292     }
1293     ierr = PetscObjectTypeCompare((PetscObject)d->cmat,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr);
1294     if (!iscuda) {
1295       ierr = MatDestroy(&d->cmat);CHKERRQ(ierr);
1296     }
1297     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDenseCUDA);CHKERRQ(ierr);
1298     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDenseCUDA);CHKERRQ(ierr);
1299     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDenseCUDA);CHKERRQ(ierr);
1300     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDenseCUDA);CHKERRQ(ierr);
1301     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDenseCUDA);CHKERRQ(ierr);
1302     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDenseCUDA);CHKERRQ(ierr);
1303   } else {
1304     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense);CHKERRQ(ierr);
1305     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense);CHKERRQ(ierr);
1306     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense);CHKERRQ(ierr);
1307     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense);CHKERRQ(ierr);
1308     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense);CHKERRQ(ierr);
1309     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense);CHKERRQ(ierr);
1310   }
1311   if (d->cmat) {
1312     ierr = MatBindToCPU(d->cmat,bind);CHKERRQ(ierr);
1313   }
1314   PetscFunctionReturn(0);
1315 }
1316 
1317 PetscErrorCode MatMPIDenseCUDASetPreallocation(Mat A, PetscScalar *d_data)
1318 {
1319   Mat_MPIDense   *d = (Mat_MPIDense*)A->data;
1320   PetscErrorCode ierr;
1321   PetscBool      iscuda;
1322 
1323   PetscFunctionBegin;
1324   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
1325   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr);
1326   if (!iscuda) PetscFunctionReturn(0);
1327   ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
1328   ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
1329   if (!d->A) {
1330     ierr = MatCreate(PETSC_COMM_SELF,&d->A);CHKERRQ(ierr);
1331     ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)d->A);CHKERRQ(ierr);
1332     ierr = MatSetSizes(d->A,A->rmap->n,A->cmap->N,A->rmap->n,A->cmap->N);CHKERRQ(ierr);
1333   }
1334   ierr = MatSetType(d->A,MATSEQDENSECUDA);CHKERRQ(ierr);
1335   ierr = MatSeqDenseCUDASetPreallocation(d->A,d_data);CHKERRQ(ierr);
1336   A->preallocated = PETSC_TRUE;
1337   PetscFunctionReturn(0);
1338 }
1339 #endif
1340 
1341 static PetscErrorCode MatSetRandom_MPIDense(Mat x,PetscRandom rctx)
1342 {
1343   Mat_MPIDense   *d = (Mat_MPIDense*)x->data;
1344   PetscErrorCode ierr;
1345 
1346   PetscFunctionBegin;
1347   ierr = MatSetRandom(d->A,rctx);CHKERRQ(ierr);
1348   PetscFunctionReturn(0);
1349 }
1350 
1351 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool  *missing,PetscInt *d)
1352 {
1353   PetscFunctionBegin;
1354   *missing = PETSC_FALSE;
1355   PetscFunctionReturn(0);
1356 }
1357 
1358 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat);
1359 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat);
1360 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat);
1361 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat);
1362 static PetscErrorCode MatEqual_MPIDense(Mat,Mat,PetscBool*);
1363 static PetscErrorCode MatLoad_MPIDense(Mat,PetscViewer);
1364 
1365 /* -------------------------------------------------------------------*/
1366 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense,
1367                                         MatGetRow_MPIDense,
1368                                         MatRestoreRow_MPIDense,
1369                                         MatMult_MPIDense,
1370                                 /*  4*/ MatMultAdd_MPIDense,
1371                                         MatMultTranspose_MPIDense,
1372                                         MatMultTransposeAdd_MPIDense,
1373                                         NULL,
1374                                         NULL,
1375                                         NULL,
1376                                 /* 10*/ NULL,
1377                                         NULL,
1378                                         NULL,
1379                                         NULL,
1380                                         MatTranspose_MPIDense,
1381                                 /* 15*/ MatGetInfo_MPIDense,
1382                                         MatEqual_MPIDense,
1383                                         MatGetDiagonal_MPIDense,
1384                                         MatDiagonalScale_MPIDense,
1385                                         MatNorm_MPIDense,
1386                                 /* 20*/ MatAssemblyBegin_MPIDense,
1387                                         MatAssemblyEnd_MPIDense,
1388                                         MatSetOption_MPIDense,
1389                                         MatZeroEntries_MPIDense,
1390                                 /* 24*/ MatZeroRows_MPIDense,
1391                                         NULL,
1392                                         NULL,
1393                                         NULL,
1394                                         NULL,
1395                                 /* 29*/ MatSetUp_MPIDense,
1396                                         NULL,
1397                                         NULL,
1398                                         MatGetDiagonalBlock_MPIDense,
1399                                         NULL,
1400                                 /* 34*/ MatDuplicate_MPIDense,
1401                                         NULL,
1402                                         NULL,
1403                                         NULL,
1404                                         NULL,
1405                                 /* 39*/ MatAXPY_MPIDense,
1406                                         MatCreateSubMatrices_MPIDense,
1407                                         NULL,
1408                                         MatGetValues_MPIDense,
1409                                         MatCopy_MPIDense,
1410                                 /* 44*/ NULL,
1411                                         MatScale_MPIDense,
1412                                         MatShift_Basic,
1413                                         NULL,
1414                                         NULL,
1415                                 /* 49*/ MatSetRandom_MPIDense,
1416                                         NULL,
1417                                         NULL,
1418                                         NULL,
1419                                         NULL,
1420                                 /* 54*/ NULL,
1421                                         NULL,
1422                                         NULL,
1423                                         NULL,
1424                                         NULL,
1425                                 /* 59*/ MatCreateSubMatrix_MPIDense,
1426                                         MatDestroy_MPIDense,
1427                                         MatView_MPIDense,
1428                                         NULL,
1429                                         NULL,
1430                                 /* 64*/ NULL,
1431                                         NULL,
1432                                         NULL,
1433                                         NULL,
1434                                         NULL,
1435                                 /* 69*/ NULL,
1436                                         NULL,
1437                                         NULL,
1438                                         NULL,
1439                                         NULL,
1440                                 /* 74*/ NULL,
1441                                         NULL,
1442                                         NULL,
1443                                         NULL,
1444                                         NULL,
1445                                 /* 79*/ NULL,
1446                                         NULL,
1447                                         NULL,
1448                                         NULL,
1449                                 /* 83*/ MatLoad_MPIDense,
1450                                         NULL,
1451                                         NULL,
1452                                         NULL,
1453                                         NULL,
1454                                         NULL,
1455                                 /* 89*/ NULL,
1456                                         NULL,
1457                                         NULL,
1458                                         NULL,
1459                                         NULL,
1460                                 /* 94*/ NULL,
1461                                         NULL,
1462                                         MatMatTransposeMultSymbolic_MPIDense_MPIDense,
1463                                         MatMatTransposeMultNumeric_MPIDense_MPIDense,
1464                                         NULL,
1465                                 /* 99*/ MatProductSetFromOptions_MPIDense,
1466                                         NULL,
1467                                         NULL,
1468                                         MatConjugate_MPIDense,
1469                                         NULL,
1470                                 /*104*/ NULL,
1471                                         MatRealPart_MPIDense,
1472                                         MatImaginaryPart_MPIDense,
1473                                         NULL,
1474                                         NULL,
1475                                 /*109*/ NULL,
1476                                         NULL,
1477                                         NULL,
1478                                         MatGetColumnVector_MPIDense,
1479                                         MatMissingDiagonal_MPIDense,
1480                                 /*114*/ NULL,
1481                                         NULL,
1482                                         NULL,
1483                                         NULL,
1484                                         NULL,
1485                                 /*119*/ NULL,
1486                                         NULL,
1487                                         NULL,
1488                                         NULL,
1489                                         NULL,
1490                                 /*124*/ NULL,
1491                                         MatGetColumnReductions_MPIDense,
1492                                         NULL,
1493                                         NULL,
1494                                         NULL,
1495                                 /*129*/ NULL,
1496                                         NULL,
1497                                         MatTransposeMatMultSymbolic_MPIDense_MPIDense,
1498                                         MatTransposeMatMultNumeric_MPIDense_MPIDense,
1499                                         NULL,
1500                                 /*134*/ NULL,
1501                                         NULL,
1502                                         NULL,
1503                                         NULL,
1504                                         NULL,
1505                                 /*139*/ NULL,
1506                                         NULL,
1507                                         NULL,
1508                                         NULL,
1509                                         NULL,
1510                                         MatCreateMPIMatConcatenateSeqMat_MPIDense,
1511                                 /*145*/ NULL,
1512                                         NULL,
1513                                         NULL
1514 };
1515 
1516 PetscErrorCode  MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1517 {
1518   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;
1519   PetscBool      iscuda;
1520   PetscErrorCode ierr;
1521 
1522   PetscFunctionBegin;
1523   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1524   ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
1525   ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
1526   if (!a->A) {
1527     ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
1528     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
1529     ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr);
1530   }
1531   ierr = PetscObjectTypeCompare((PetscObject)mat,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr);
1532   ierr = MatSetType(a->A,iscuda ? MATSEQDENSECUDA : MATSEQDENSE);CHKERRQ(ierr);
1533   ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr);
1534   mat->preallocated = PETSC_TRUE;
1535   PetscFunctionReturn(0);
1536 }
1537 
1538 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIDense(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1539 {
1540   Mat            B,C;
1541   PetscErrorCode ierr;
1542 
1543   PetscFunctionBegin;
1544   ierr = MatMPIAIJGetLocalMat(A,MAT_INITIAL_MATRIX,&C);CHKERRQ(ierr);
1545   ierr = MatConvert_SeqAIJ_SeqDense(C,MATSEQDENSE,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1546   ierr = MatDestroy(&C);CHKERRQ(ierr);
1547   if (reuse == MAT_REUSE_MATRIX) {
1548     C = *newmat;
1549   } else C = NULL;
1550   ierr = MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A),B,A->cmap->n,!C?MAT_INITIAL_MATRIX:MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
1551   ierr = MatDestroy(&B);CHKERRQ(ierr);
1552   if (reuse == MAT_INPLACE_MATRIX) {
1553     ierr = MatHeaderReplace(A,&C);CHKERRQ(ierr);
1554   } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1555   PetscFunctionReturn(0);
1556 }
1557 
1558 PetscErrorCode MatConvert_MPIDense_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1559 {
1560   Mat            B,C;
1561   PetscErrorCode ierr;
1562 
1563   PetscFunctionBegin;
1564   ierr = MatDenseGetLocalMatrix(A,&C);CHKERRQ(ierr);
1565   ierr = MatConvert_SeqDense_SeqAIJ(C,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1566   if (reuse == MAT_REUSE_MATRIX) {
1567     C = *newmat;
1568   } else C = NULL;
1569   ierr = MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A),B,A->cmap->n,!C?MAT_INITIAL_MATRIX:MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
1570   ierr = MatDestroy(&B);CHKERRQ(ierr);
1571   if (reuse == MAT_INPLACE_MATRIX) {
1572     ierr = MatHeaderReplace(A,&C);CHKERRQ(ierr);
1573   } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1574   PetscFunctionReturn(0);
1575 }
1576 
1577 #if defined(PETSC_HAVE_ELEMENTAL)
1578 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1579 {
1580   Mat            mat_elemental;
1581   PetscErrorCode ierr;
1582   PetscScalar    *v;
1583   PetscInt       m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols;
1584 
1585   PetscFunctionBegin;
1586   if (reuse == MAT_REUSE_MATRIX) {
1587     mat_elemental = *newmat;
1588     ierr = MatZeroEntries(*newmat);CHKERRQ(ierr);
1589   } else {
1590     ierr = MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);CHKERRQ(ierr);
1591     ierr = MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1592     ierr = MatSetType(mat_elemental,MATELEMENTAL);CHKERRQ(ierr);
1593     ierr = MatSetUp(mat_elemental);CHKERRQ(ierr);
1594     ierr = MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
1595   }
1596 
1597   ierr = PetscMalloc2(m,&rows,N,&cols);CHKERRQ(ierr);
1598   for (i=0; i<N; i++) cols[i] = i;
1599   for (i=0; i<m; i++) rows[i] = rstart + i;
1600 
1601   /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1602   ierr = MatDenseGetArray(A,&v);CHKERRQ(ierr);
1603   ierr = MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES);CHKERRQ(ierr);
1604   ierr = MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1605   ierr = MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1606   ierr = MatDenseRestoreArray(A,&v);CHKERRQ(ierr);
1607   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1608 
1609   if (reuse == MAT_INPLACE_MATRIX) {
1610     ierr = MatHeaderReplace(A,&mat_elemental);CHKERRQ(ierr);
1611   } else {
1612     *newmat = mat_elemental;
1613   }
1614   PetscFunctionReturn(0);
1615 }
1616 #endif
1617 
1618 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A,PetscInt col,PetscScalar **vals)
1619 {
1620   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
1621   PetscErrorCode ierr;
1622 
1623   PetscFunctionBegin;
1624   ierr = MatDenseGetColumn(mat->A,col,vals);CHKERRQ(ierr);
1625   PetscFunctionReturn(0);
1626 }
1627 
1628 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A,PetscScalar **vals)
1629 {
1630   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
1631   PetscErrorCode ierr;
1632 
1633   PetscFunctionBegin;
1634   ierr = MatDenseRestoreColumn(mat->A,vals);CHKERRQ(ierr);
1635   PetscFunctionReturn(0);
1636 }
1637 
1638 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
1639 {
1640   PetscErrorCode ierr;
1641   Mat_MPIDense   *mat;
1642   PetscInt       m,nloc,N;
1643 
1644   PetscFunctionBegin;
1645   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
1646   ierr = MatGetLocalSize(inmat,NULL,&nloc);CHKERRQ(ierr);
1647   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
1648     PetscInt sum;
1649 
1650     if (n == PETSC_DECIDE) {
1651       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
1652     }
1653     /* Check sum(n) = N */
1654     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr);
1655     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
1656 
1657     ierr = MatCreateDense(comm,m,n,PETSC_DETERMINE,N,NULL,outmat);CHKERRQ(ierr);
1658     ierr = MatSetOption(*outmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
1659   }
1660 
1661   /* numeric phase */
1662   mat = (Mat_MPIDense*)(*outmat)->data;
1663   ierr = MatCopy(inmat,mat->A,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1664   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1665   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1666   PetscFunctionReturn(0);
1667 }
1668 
1669 #if defined(PETSC_HAVE_CUDA)
1670 PetscErrorCode MatConvert_MPIDenseCUDA_MPIDense(Mat M,MatType type,MatReuse reuse,Mat *newmat)
1671 {
1672   Mat            B;
1673   Mat_MPIDense   *m;
1674   PetscErrorCode ierr;
1675 
1676   PetscFunctionBegin;
1677   if (reuse == MAT_INITIAL_MATRIX) {
1678     ierr = MatDuplicate(M,MAT_COPY_VALUES,newmat);CHKERRQ(ierr);
1679   } else if (reuse == MAT_REUSE_MATRIX) {
1680     ierr = MatCopy(M,*newmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1681   }
1682 
1683   B    = *newmat;
1684   ierr = MatBindToCPU_MPIDenseCUDA(B,PETSC_TRUE);CHKERRQ(ierr);
1685   ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr);
1686   ierr = PetscStrallocpy(VECSTANDARD,&B->defaultvectype);CHKERRQ(ierr);
1687   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSE);CHKERRQ(ierr);
1688   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C",NULL);CHKERRQ(ierr);
1689   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",NULL);CHKERRQ(ierr);
1690   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",NULL);CHKERRQ(ierr);
1691   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",NULL);CHKERRQ(ierr);
1692   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",NULL);CHKERRQ(ierr);
1693   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C",NULL);CHKERRQ(ierr);
1694   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C",NULL);CHKERRQ(ierr);
1695   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C",NULL);CHKERRQ(ierr);
1696   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C",NULL);CHKERRQ(ierr);
1697   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C",NULL);CHKERRQ(ierr);
1698   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C",NULL);CHKERRQ(ierr);
1699   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C",NULL);CHKERRQ(ierr);
1700   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C",NULL);CHKERRQ(ierr);
1701   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C",NULL);CHKERRQ(ierr);
1702   m    = (Mat_MPIDense*)(B)->data;
1703   if (m->A) {
1704     ierr = MatConvert(m->A,MATSEQDENSE,MAT_INPLACE_MATRIX,&m->A);CHKERRQ(ierr);
1705     ierr = MatSetUpMultiply_MPIDense(B);CHKERRQ(ierr);
1706   }
1707   B->ops->bindtocpu = NULL;
1708   B->offloadmask    = PETSC_OFFLOAD_CPU;
1709   PetscFunctionReturn(0);
1710 }
1711 
1712 PetscErrorCode MatConvert_MPIDense_MPIDenseCUDA(Mat M,MatType type,MatReuse reuse,Mat *newmat)
1713 {
1714   Mat            B;
1715   Mat_MPIDense   *m;
1716   PetscErrorCode ierr;
1717 
1718   PetscFunctionBegin;
1719   if (reuse == MAT_INITIAL_MATRIX) {
1720     ierr = MatDuplicate(M,MAT_COPY_VALUES,newmat);CHKERRQ(ierr);
1721   } else if (reuse == MAT_REUSE_MATRIX) {
1722     ierr = MatCopy(M,*newmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1723   }
1724 
1725   B    = *newmat;
1726   ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr);
1727   ierr = PetscStrallocpy(VECCUDA,&B->defaultvectype);CHKERRQ(ierr);
1728   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSECUDA);CHKERRQ(ierr);
1729   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C",                    MatConvert_MPIDenseCUDA_MPIDense);CHKERRQ(ierr);
1730   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",        MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr);
1731   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr);
1732   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",        MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr);
1733   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr);
1734   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C",                                MatDenseCUDAGetArray_MPIDenseCUDA);CHKERRQ(ierr);
1735   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C",                            MatDenseCUDAGetArrayRead_MPIDenseCUDA);CHKERRQ(ierr);
1736   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C",                           MatDenseCUDAGetArrayWrite_MPIDenseCUDA);CHKERRQ(ierr);
1737   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C",                            MatDenseCUDARestoreArray_MPIDenseCUDA);CHKERRQ(ierr);
1738   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C",                        MatDenseCUDARestoreArrayRead_MPIDenseCUDA);CHKERRQ(ierr);
1739   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C",                       MatDenseCUDARestoreArrayWrite_MPIDenseCUDA);CHKERRQ(ierr);
1740   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C",                              MatDenseCUDAPlaceArray_MPIDenseCUDA);CHKERRQ(ierr);
1741   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C",                              MatDenseCUDAResetArray_MPIDenseCUDA);CHKERRQ(ierr);
1742   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C",                            MatDenseCUDAReplaceArray_MPIDenseCUDA);CHKERRQ(ierr);
1743   m    = (Mat_MPIDense*)(B->data);
1744   if (m->A) {
1745     ierr = MatConvert(m->A,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&m->A);CHKERRQ(ierr);
1746     ierr = MatSetUpMultiply_MPIDense(B);CHKERRQ(ierr);
1747     B->offloadmask = PETSC_OFFLOAD_BOTH;
1748   } else {
1749     B->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
1750   }
1751   ierr = MatBindToCPU_MPIDenseCUDA(B,PETSC_FALSE);CHKERRQ(ierr);
1752 
1753   B->ops->bindtocpu = MatBindToCPU_MPIDenseCUDA;
1754   PetscFunctionReturn(0);
1755 }
1756 #endif
1757 
1758 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A,PetscInt col,Vec *v)
1759 {
1760   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1761   PetscErrorCode ierr;
1762   PetscInt       lda;
1763 
1764   PetscFunctionBegin;
1765   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1766   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1767   if (!a->cvec) {
1768     ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1769   }
1770   a->vecinuse = col + 1;
1771   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1772   ierr = MatDenseGetArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1773   ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1774   *v   = a->cvec;
1775   PetscFunctionReturn(0);
1776 }
1777 
1778 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A,PetscInt col,Vec *v)
1779 {
1780   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1781   PetscErrorCode ierr;
1782 
1783   PetscFunctionBegin;
1784   if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1785   if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector");
1786   a->vecinuse = 0;
1787   ierr = MatDenseRestoreArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1788   ierr = VecResetArray(a->cvec);CHKERRQ(ierr);
1789   *v   = NULL;
1790   PetscFunctionReturn(0);
1791 }
1792 
1793 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v)
1794 {
1795   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1796   PetscErrorCode ierr;
1797   PetscInt       lda;
1798 
1799   PetscFunctionBegin;
1800   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1801   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1802   if (!a->cvec) {
1803     ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1804   }
1805   a->vecinuse = col + 1;
1806   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1807   ierr = MatDenseGetArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr);
1808   ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1809   ierr = VecLockReadPush(a->cvec);CHKERRQ(ierr);
1810   *v   = a->cvec;
1811   PetscFunctionReturn(0);
1812 }
1813 
1814 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v)
1815 {
1816   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1817   PetscErrorCode ierr;
1818 
1819   PetscFunctionBegin;
1820   if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1821   if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
1822   a->vecinuse = 0;
1823   ierr = MatDenseRestoreArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr);
1824   ierr = VecLockReadPop(a->cvec);CHKERRQ(ierr);
1825   ierr = VecResetArray(a->cvec);CHKERRQ(ierr);
1826   *v   = NULL;
1827   PetscFunctionReturn(0);
1828 }
1829 
1830 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v)
1831 {
1832   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1833   PetscErrorCode ierr;
1834   PetscInt       lda;
1835 
1836   PetscFunctionBegin;
1837   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1838   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1839   if (!a->cvec) {
1840     ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1841   }
1842   a->vecinuse = col + 1;
1843   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1844   ierr = MatDenseGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1845   ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1846   *v   = a->cvec;
1847   PetscFunctionReturn(0);
1848 }
1849 
1850 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v)
1851 {
1852   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1853   PetscErrorCode ierr;
1854 
1855   PetscFunctionBegin;
1856   if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1857   if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
1858   a->vecinuse = 0;
1859   ierr = MatDenseRestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1860   ierr = VecResetArray(a->cvec);CHKERRQ(ierr);
1861   *v   = NULL;
1862   PetscFunctionReturn(0);
1863 }
1864 
1865 PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A,PetscInt cbegin,PetscInt cend,Mat *v)
1866 {
1867   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1868   Mat_MPIDense   *c;
1869   PetscErrorCode ierr;
1870   MPI_Comm       comm;
1871   PetscBool      setup = PETSC_FALSE;
1872 
1873   PetscFunctionBegin;
1874   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1875   if (a->vecinuse) SETERRQ(comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1876   if (a->matinuse) SETERRQ(comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1877   if (!a->cmat) {
1878     setup = PETSC_TRUE;
1879     ierr = MatCreate(comm,&a->cmat);CHKERRQ(ierr);
1880     ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cmat);CHKERRQ(ierr);
1881     ierr = MatSetType(a->cmat,((PetscObject)A)->type_name);CHKERRQ(ierr);
1882     ierr = PetscLayoutReference(A->rmap,&a->cmat->rmap);CHKERRQ(ierr);
1883     ierr = PetscLayoutSetSize(a->cmat->cmap,cend-cbegin);CHKERRQ(ierr);
1884     ierr = PetscLayoutSetUp(a->cmat->cmap);CHKERRQ(ierr);
1885   } else if (cend-cbegin != a->cmat->cmap->N) {
1886     setup = PETSC_TRUE;
1887     ierr = PetscLayoutDestroy(&a->cmat->cmap);CHKERRQ(ierr);
1888     ierr = PetscLayoutCreate(comm,&a->cmat->cmap);CHKERRQ(ierr);
1889     ierr = PetscLayoutSetSize(a->cmat->cmap,cend-cbegin);CHKERRQ(ierr);
1890     ierr = PetscLayoutSetUp(a->cmat->cmap);CHKERRQ(ierr);
1891   }
1892   c = (Mat_MPIDense*)a->cmat->data;
1893   if (c->A) SETERRQ(comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1894   ierr = MatDenseGetSubMatrix(a->A,cbegin,cend,&c->A);CHKERRQ(ierr);
1895   if (setup) { /* do we really need this? */
1896     ierr = MatSetUpMultiply_MPIDense(a->cmat);CHKERRQ(ierr);
1897   }
1898   a->cmat->preallocated = PETSC_TRUE;
1899   a->cmat->assembled = PETSC_TRUE;
1900   a->matinuse = cbegin + 1;
1901   *v = a->cmat;
1902   PetscFunctionReturn(0);
1903 }
1904 
1905 PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A,Mat *v)
1906 {
1907   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1908   Mat_MPIDense   *c;
1909   PetscErrorCode ierr;
1910 
1911   PetscFunctionBegin;
1912   if (!a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetSubMatrix() first");
1913   if (!a->cmat) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal matrix");
1914   if (*v != a->cmat) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Not the matrix obtained from MatDenseGetSubMatrix()");
1915   a->matinuse = 0;
1916   c    = (Mat_MPIDense*)a->cmat->data;
1917   ierr = MatDenseRestoreSubMatrix(a->A,&c->A);CHKERRQ(ierr);
1918   *v   = NULL;
1919   PetscFunctionReturn(0);
1920 }
1921 
1922 /*MC
1923    MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices.
1924 
1925    Options Database Keys:
1926 . -mat_type mpidense - sets the matrix type to "mpidense" during a call to MatSetFromOptions()
1927 
1928   Level: beginner
1929 
1930 .seealso: MatCreateDense()
1931 
1932 M*/
1933 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat)
1934 {
1935   Mat_MPIDense   *a;
1936   PetscErrorCode ierr;
1937 
1938   PetscFunctionBegin;
1939   ierr      = PetscNewLog(mat,&a);CHKERRQ(ierr);
1940   mat->data = (void*)a;
1941   ierr      = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1942 
1943   mat->insertmode = NOT_SET_VALUES;
1944 
1945   /* build cache for off array entries formed */
1946   a->donotstash = PETSC_FALSE;
1947 
1948   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);CHKERRQ(ierr);
1949 
1950   /* stuff used for matrix vector multiply */
1951   a->lvec        = NULL;
1952   a->Mvctx       = NULL;
1953   a->roworiented = PETSC_TRUE;
1954 
1955   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",MatDenseGetLDA_MPIDense);CHKERRQ(ierr);
1956   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",MatDenseSetLDA_MPIDense);CHKERRQ(ierr);
1957   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);CHKERRQ(ierr);
1958   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr);
1959   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",MatDenseGetArrayRead_MPIDense);CHKERRQ(ierr);
1960   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",MatDenseRestoreArrayRead_MPIDense);CHKERRQ(ierr);
1961   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",MatDenseGetArrayWrite_MPIDense);CHKERRQ(ierr);
1962   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",MatDenseRestoreArrayWrite_MPIDense);CHKERRQ(ierr);
1963   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",MatDensePlaceArray_MPIDense);CHKERRQ(ierr);
1964   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",MatDenseResetArray_MPIDense);CHKERRQ(ierr);
1965   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",MatDenseReplaceArray_MPIDense);CHKERRQ(ierr);
1966   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense);CHKERRQ(ierr);
1967   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense);CHKERRQ(ierr);
1968   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense);CHKERRQ(ierr);
1969   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense);CHKERRQ(ierr);
1970   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense);CHKERRQ(ierr);
1971   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense);CHKERRQ(ierr);
1972   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetSubMatrix_C",MatDenseGetSubMatrix_MPIDense);CHKERRQ(ierr);
1973   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreSubMatrix_C",MatDenseRestoreSubMatrix_MPIDense);CHKERRQ(ierr);
1974   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",MatConvert_MPIAIJ_MPIDense);CHKERRQ(ierr);
1975   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpiaij_C",MatConvert_MPIDense_MPIAIJ);CHKERRQ(ierr);
1976 #if defined(PETSC_HAVE_ELEMENTAL)
1977   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental);CHKERRQ(ierr);
1978 #endif
1979 #if defined(PETSC_HAVE_SCALAPACK)
1980   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_scalapack_C",MatConvert_Dense_ScaLAPACK);CHKERRQ(ierr);
1981 #endif
1982 #if defined(PETSC_HAVE_CUDA)
1983   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpidensecuda_C",MatConvert_MPIDense_MPIDenseCUDA);CHKERRQ(ierr);
1984 #endif
1985   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1986   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr);
1987   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr);
1988 #if defined(PETSC_HAVE_CUDA)
1989   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr);
1990   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaijcusparse_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr);
1991 #endif
1992 
1993   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",MatDenseGetColumn_MPIDense);CHKERRQ(ierr);
1994   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",MatDenseRestoreColumn_MPIDense);CHKERRQ(ierr);
1995   ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr);
1996   PetscFunctionReturn(0);
1997 }
1998 
1999 /*MC
2000    MATMPIDENSECUDA - MATMPIDENSECUDA = "mpidensecuda" - A matrix type to be used for distributed dense matrices on GPUs.
2001 
2002    Options Database Keys:
2003 . -mat_type mpidensecuda - sets the matrix type to "mpidensecuda" during a call to MatSetFromOptions()
2004 
2005   Level: beginner
2006 
2007 .seealso:
2008 
2009 M*/
2010 #if defined(PETSC_HAVE_CUDA)
2011 PETSC_EXTERN PetscErrorCode MatCreate_MPIDenseCUDA(Mat B)
2012 {
2013   PetscErrorCode ierr;
2014 
2015   PetscFunctionBegin;
2016   ierr = PetscCUDAInitializeCheck();CHKERRQ(ierr);
2017   ierr = MatCreate_MPIDense(B);CHKERRQ(ierr);
2018   ierr = MatConvert_MPIDense_MPIDenseCUDA(B,MATMPIDENSECUDA,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr);
2019   PetscFunctionReturn(0);
2020 }
2021 #endif
2022 
2023 /*MC
2024    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
2025 
2026    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
2027    and MATMPIDENSE otherwise.
2028 
2029    Options Database Keys:
2030 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
2031 
2032   Level: beginner
2033 
2034 .seealso: MATSEQDENSE,MATMPIDENSE,MATDENSECUDA
2035 M*/
2036 
2037 /*MC
2038    MATDENSECUDA - MATDENSECUDA = "densecuda" - A matrix type to be used for dense matrices on GPUs.
2039 
2040    This matrix type is identical to MATSEQDENSECUDA when constructed with a single process communicator,
2041    and MATMPIDENSECUDA otherwise.
2042 
2043    Options Database Keys:
2044 . -mat_type densecuda - sets the matrix type to "densecuda" during a call to MatSetFromOptions()
2045 
2046   Level: beginner
2047 
2048 .seealso: MATSEQDENSECUDA,MATMPIDENSECUDA,MATDENSE
2049 M*/
2050 
2051 /*@C
2052    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
2053 
2054    Collective
2055 
2056    Input Parameters:
2057 .  B - the matrix
2058 -  data - optional location of matrix data.  Set data=NULL for PETSc
2059    to control all matrix memory allocation.
2060 
2061    Notes:
2062    The dense format is fully compatible with standard Fortran 77
2063    storage by columns.
2064 
2065    The data input variable is intended primarily for Fortran programmers
2066    who wish to allocate their own matrix memory space.  Most users should
2067    set data=NULL.
2068 
2069    Level: intermediate
2070 
2071 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
2072 @*/
2073 PetscErrorCode  MatMPIDenseSetPreallocation(Mat B,PetscScalar *data)
2074 {
2075   PetscErrorCode ierr;
2076 
2077   PetscFunctionBegin;
2078   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2079   ierr = PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));CHKERRQ(ierr);
2080   PetscFunctionReturn(0);
2081 }
2082 
2083 /*@
2084    MatDensePlaceArray - Allows one to replace the array in a dense matrix with an
2085    array provided by the user. This is useful to avoid copying an array
2086    into a matrix
2087 
2088    Not Collective
2089 
2090    Input Parameters:
2091 +  mat - the matrix
2092 -  array - the array in column major order
2093 
2094    Notes:
2095    You can return to the original array with a call to MatDenseResetArray(). The user is responsible for freeing this array; it will not be
2096    freed when the matrix is destroyed.
2097 
2098    Level: developer
2099 
2100 .seealso: MatDenseGetArray(), MatDenseResetArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()
2101 
2102 @*/
2103 PetscErrorCode  MatDensePlaceArray(Mat mat,const PetscScalar *array)
2104 {
2105   PetscErrorCode ierr;
2106 
2107   PetscFunctionBegin;
2108   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2109   ierr = PetscUseMethod(mat,"MatDensePlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr);
2110   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2111 #if defined(PETSC_HAVE_CUDA)
2112   mat->offloadmask = PETSC_OFFLOAD_CPU;
2113 #endif
2114   PetscFunctionReturn(0);
2115 }
2116 
2117 /*@
2118    MatDenseResetArray - Resets the matrix array to that it previously had before the call to MatDensePlaceArray()
2119 
2120    Not Collective
2121 
2122    Input Parameters:
2123 .  mat - the matrix
2124 
2125    Notes:
2126    You can only call this after a call to MatDensePlaceArray()
2127 
2128    Level: developer
2129 
2130 .seealso: MatDenseGetArray(), MatDensePlaceArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()
2131 
2132 @*/
2133 PetscErrorCode  MatDenseResetArray(Mat mat)
2134 {
2135   PetscErrorCode ierr;
2136 
2137   PetscFunctionBegin;
2138   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2139   ierr = PetscUseMethod(mat,"MatDenseResetArray_C",(Mat),(mat));CHKERRQ(ierr);
2140   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2141   PetscFunctionReturn(0);
2142 }
2143 
2144 /*@
2145    MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an
2146    array provided by the user. This is useful to avoid copying an array
2147    into a matrix
2148 
2149    Not Collective
2150 
2151    Input Parameters:
2152 +  mat - the matrix
2153 -  array - the array in column major order
2154 
2155    Notes:
2156    The memory passed in MUST be obtained with PetscMalloc() and CANNOT be
2157    freed by the user. It will be freed when the matrix is destroyed.
2158 
2159    Level: developer
2160 
2161 .seealso: MatDenseGetArray(), VecReplaceArray()
2162 @*/
2163 PetscErrorCode  MatDenseReplaceArray(Mat mat,const PetscScalar *array)
2164 {
2165   PetscErrorCode ierr;
2166 
2167   PetscFunctionBegin;
2168   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2169   ierr = PetscUseMethod(mat,"MatDenseReplaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr);
2170   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2171 #if defined(PETSC_HAVE_CUDA)
2172   mat->offloadmask = PETSC_OFFLOAD_CPU;
2173 #endif
2174   PetscFunctionReturn(0);
2175 }
2176 
2177 #if defined(PETSC_HAVE_CUDA)
2178 /*@C
2179    MatDenseCUDAPlaceArray - Allows one to replace the GPU array in a dense matrix with an
2180    array provided by the user. This is useful to avoid copying an array
2181    into a matrix
2182 
2183    Not Collective
2184 
2185    Input Parameters:
2186 +  mat - the matrix
2187 -  array - the array in column major order
2188 
2189    Notes:
2190    You can return to the original array with a call to MatDenseCUDAResetArray(). The user is responsible for freeing this array; it will not be
2191    freed when the matrix is destroyed. The array must have been allocated with cudaMalloc().
2192 
2193    Level: developer
2194 
2195 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAResetArray()
2196 @*/
2197 PetscErrorCode  MatDenseCUDAPlaceArray(Mat mat,const PetscScalar *array)
2198 {
2199   PetscErrorCode ierr;
2200 
2201   PetscFunctionBegin;
2202   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2203   ierr = PetscUseMethod(mat,"MatDenseCUDAPlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr);
2204   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2205   mat->offloadmask = PETSC_OFFLOAD_GPU;
2206   PetscFunctionReturn(0);
2207 }
2208 
2209 /*@C
2210    MatDenseCUDAResetArray - Resets the matrix array to that it previously had before the call to MatDenseCUDAPlaceArray()
2211 
2212    Not Collective
2213 
2214    Input Parameters:
2215 .  mat - the matrix
2216 
2217    Notes:
2218    You can only call this after a call to MatDenseCUDAPlaceArray()
2219 
2220    Level: developer
2221 
2222 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAPlaceArray()
2223 
2224 @*/
2225 PetscErrorCode  MatDenseCUDAResetArray(Mat mat)
2226 {
2227   PetscErrorCode ierr;
2228 
2229   PetscFunctionBegin;
2230   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2231   ierr = PetscUseMethod(mat,"MatDenseCUDAResetArray_C",(Mat),(mat));CHKERRQ(ierr);
2232   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2233   PetscFunctionReturn(0);
2234 }
2235 
2236 /*@C
2237    MatDenseCUDAReplaceArray - Allows one to replace the GPU array in a dense matrix with an
2238    array provided by the user. This is useful to avoid copying an array
2239    into a matrix
2240 
2241    Not Collective
2242 
2243    Input Parameters:
2244 +  mat - the matrix
2245 -  array - the array in column major order
2246 
2247    Notes:
2248    This permanently replaces the GPU array and frees the memory associated with the old GPU array.
2249    The memory passed in CANNOT be freed by the user. It will be freed
2250    when the matrix is destroyed. The array should respect the matrix leading dimension.
2251 
2252    Level: developer
2253 
2254 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAPlaceArray(), MatDenseCUDAResetArray()
2255 @*/
2256 PetscErrorCode  MatDenseCUDAReplaceArray(Mat mat,const PetscScalar *array)
2257 {
2258   PetscErrorCode ierr;
2259 
2260   PetscFunctionBegin;
2261   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2262   ierr = PetscUseMethod(mat,"MatDenseCUDAReplaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr);
2263   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2264   mat->offloadmask = PETSC_OFFLOAD_GPU;
2265   PetscFunctionReturn(0);
2266 }
2267 
2268 /*@C
2269    MatDenseCUDAGetArrayWrite - Provides write access to the CUDA buffer inside a dense matrix.
2270 
2271    Not Collective
2272 
2273    Input Parameters:
2274 .  A - the matrix
2275 
2276    Output Parameters
2277 .  array - the GPU array in column major order
2278 
2279    Notes:
2280    The data on the GPU may not be updated due to operations done on the CPU. If you need updated data, use MatDenseCUDAGetArray(). The array must be restored with MatDenseCUDARestoreArrayWrite() when no longer needed.
2281 
2282    Level: developer
2283 
2284 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayRead(), MatDenseCUDARestoreArrayRead()
2285 @*/
2286 PetscErrorCode MatDenseCUDAGetArrayWrite(Mat A, PetscScalar **a)
2287 {
2288   PetscErrorCode ierr;
2289 
2290   PetscFunctionBegin;
2291   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2292   ierr = PetscUseMethod(A,"MatDenseCUDAGetArrayWrite_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr);
2293   ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr);
2294   PetscFunctionReturn(0);
2295 }
2296 
2297 /*@C
2298    MatDenseCUDARestoreArrayWrite - Restore write access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArrayWrite().
2299 
2300    Not Collective
2301 
2302    Input Parameters:
2303 +  A - the matrix
2304 -  array - the GPU array in column major order
2305 
2306    Notes:
2307 
2308    Level: developer
2309 
2310 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead(), MatDenseCUDAGetArrayRead()
2311 @*/
2312 PetscErrorCode MatDenseCUDARestoreArrayWrite(Mat A, PetscScalar **a)
2313 {
2314   PetscErrorCode ierr;
2315 
2316   PetscFunctionBegin;
2317   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2318   ierr = PetscUseMethod(A,"MatDenseCUDARestoreArrayWrite_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr);
2319   ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr);
2320   A->offloadmask = PETSC_OFFLOAD_GPU;
2321   PetscFunctionReturn(0);
2322 }
2323 
2324 /*@C
2325    MatDenseCUDAGetArrayRead - Provides read-only access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArrayRead() when no longer needed.
2326 
2327    Not Collective
2328 
2329    Input Parameters:
2330 .  A - the matrix
2331 
2332    Output Parameters
2333 .  array - the GPU array in column major order
2334 
2335    Notes:
2336    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite().
2337 
2338    Level: developer
2339 
2340 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead()
2341 @*/
2342 PetscErrorCode MatDenseCUDAGetArrayRead(Mat A, const PetscScalar **a)
2343 {
2344   PetscErrorCode ierr;
2345 
2346   PetscFunctionBegin;
2347   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2348   ierr = PetscUseMethod(A,"MatDenseCUDAGetArrayRead_C",(Mat,const PetscScalar**),(A,a));CHKERRQ(ierr);
2349   PetscFunctionReturn(0);
2350 }
2351 
2352 /*@C
2353    MatDenseCUDARestoreArrayRead - Restore read-only access to the CUDA buffer inside a dense matrix previously obtained with a call to MatDenseCUDAGetArrayRead().
2354 
2355    Not Collective
2356 
2357    Input Parameters:
2358 +  A - the matrix
2359 -  array - the GPU array in column major order
2360 
2361    Notes:
2362    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite().
2363 
2364    Level: developer
2365 
2366 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDAGetArrayRead()
2367 @*/
2368 PetscErrorCode MatDenseCUDARestoreArrayRead(Mat A, const PetscScalar **a)
2369 {
2370   PetscErrorCode ierr;
2371 
2372   PetscFunctionBegin;
2373   ierr = PetscUseMethod(A,"MatDenseCUDARestoreArrayRead_C",(Mat,const PetscScalar**),(A,a));CHKERRQ(ierr);
2374   PetscFunctionReturn(0);
2375 }
2376 
2377 /*@C
2378    MatDenseCUDAGetArray - Provides access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArray() when no longer needed.
2379 
2380    Not Collective
2381 
2382    Input Parameters:
2383 .  A - the matrix
2384 
2385    Output Parameters
2386 .  array - the GPU array in column major order
2387 
2388    Notes:
2389    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). For read-only access, use MatDenseCUDAGetArrayRead().
2390 
2391    Level: developer
2392 
2393 .seealso: MatDenseCUDAGetArrayRead(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead()
2394 @*/
2395 PetscErrorCode MatDenseCUDAGetArray(Mat A, PetscScalar **a)
2396 {
2397   PetscErrorCode ierr;
2398 
2399   PetscFunctionBegin;
2400   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2401   ierr = PetscUseMethod(A,"MatDenseCUDAGetArray_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr);
2402   ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr);
2403   PetscFunctionReturn(0);
2404 }
2405 
2406 /*@C
2407    MatDenseCUDARestoreArray - Restore access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArray().
2408 
2409    Not Collective
2410 
2411    Input Parameters:
2412 +  A - the matrix
2413 -  array - the GPU array in column major order
2414 
2415    Notes:
2416 
2417    Level: developer
2418 
2419 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead(), MatDenseCUDAGetArrayRead()
2420 @*/
2421 PetscErrorCode MatDenseCUDARestoreArray(Mat A, PetscScalar **a)
2422 {
2423   PetscErrorCode ierr;
2424 
2425   PetscFunctionBegin;
2426   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2427   ierr = PetscUseMethod(A,"MatDenseCUDARestoreArray_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr);
2428   ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr);
2429   A->offloadmask = PETSC_OFFLOAD_GPU;
2430   PetscFunctionReturn(0);
2431 }
2432 #endif
2433 
2434 /*@C
2435    MatCreateDense - Creates a matrix in dense format.
2436 
2437    Collective
2438 
2439    Input Parameters:
2440 +  comm - MPI communicator
2441 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2442 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2443 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
2444 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
2445 -  data - optional location of matrix data.  Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
2446    to control all matrix memory allocation.
2447 
2448    Output Parameter:
2449 .  A - the matrix
2450 
2451    Notes:
2452    The dense format is fully compatible with standard Fortran 77
2453    storage by columns.
2454    Note that, although local portions of the matrix are stored in column-major
2455    order, the matrix is partitioned across MPI ranks by row.
2456 
2457    The data input variable is intended primarily for Fortran programmers
2458    who wish to allocate their own matrix memory space.  Most users should
2459    set data=NULL (PETSC_NULL_SCALAR for Fortran users).
2460 
2461    The user MUST specify either the local or global matrix dimensions
2462    (possibly both).
2463 
2464    Level: intermediate
2465 
2466 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
2467 @*/
2468 PetscErrorCode  MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
2469 {
2470   PetscErrorCode ierr;
2471   PetscMPIInt    size;
2472 
2473   PetscFunctionBegin;
2474   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2475   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2476   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
2477   if (size > 1) {
2478     PetscBool havedata = (PetscBool)!!data;
2479 
2480     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
2481     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
2482     ierr = MPIU_Allreduce(MPI_IN_PLACE,&havedata,1,MPIU_BOOL,MPI_LOR,comm);CHKERRMPI(ierr);
2483     if (havedata) {  /* user provided data array, so no need to assemble */
2484       ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr);
2485       (*A)->assembled = PETSC_TRUE;
2486     }
2487   } else {
2488     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
2489     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
2490   }
2491   PetscFunctionReturn(0);
2492 }
2493 
2494 #if defined(PETSC_HAVE_CUDA)
2495 /*@C
2496    MatCreateDenseCUDA - Creates a matrix in dense format using CUDA.
2497 
2498    Collective
2499 
2500    Input Parameters:
2501 +  comm - MPI communicator
2502 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2503 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2504 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
2505 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
2506 -  data - optional location of GPU matrix data.  Set data=NULL for PETSc
2507    to control matrix memory allocation.
2508 
2509    Output Parameter:
2510 .  A - the matrix
2511 
2512    Notes:
2513 
2514    Level: intermediate
2515 
2516 .seealso: MatCreate(), MatCreateDense()
2517 @*/
2518 PetscErrorCode  MatCreateDenseCUDA(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
2519 {
2520   PetscErrorCode ierr;
2521   PetscMPIInt    size;
2522 
2523   PetscFunctionBegin;
2524   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2525   PetscValidLogicalCollectiveBool(*A,!!data,6);
2526   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2527   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
2528   if (size > 1) {
2529     ierr = MatSetType(*A,MATMPIDENSECUDA);CHKERRQ(ierr);
2530     ierr = MatMPIDenseCUDASetPreallocation(*A,data);CHKERRQ(ierr);
2531     if (data) {  /* user provided data array, so no need to assemble */
2532       ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr);
2533       (*A)->assembled = PETSC_TRUE;
2534     }
2535   } else {
2536     ierr = MatSetType(*A,MATSEQDENSECUDA);CHKERRQ(ierr);
2537     ierr = MatSeqDenseCUDASetPreallocation(*A,data);CHKERRQ(ierr);
2538   }
2539   PetscFunctionReturn(0);
2540 }
2541 #endif
2542 
2543 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
2544 {
2545   Mat            mat;
2546   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;
2547   PetscErrorCode ierr;
2548 
2549   PetscFunctionBegin;
2550   *newmat = NULL;
2551   ierr    = MatCreate(PetscObjectComm((PetscObject)A),&mat);CHKERRQ(ierr);
2552   ierr    = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
2553   ierr    = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr);
2554   a       = (Mat_MPIDense*)mat->data;
2555 
2556   mat->factortype   = A->factortype;
2557   mat->assembled    = PETSC_TRUE;
2558   mat->preallocated = PETSC_TRUE;
2559 
2560   mat->insertmode = NOT_SET_VALUES;
2561   a->donotstash   = oldmat->donotstash;
2562 
2563   ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr);
2564   ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr);
2565 
2566   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2567   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2568   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
2569 
2570   *newmat = mat;
2571   PetscFunctionReturn(0);
2572 }
2573 
2574 PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer)
2575 {
2576   PetscErrorCode ierr;
2577   PetscBool      isbinary;
2578 #if defined(PETSC_HAVE_HDF5)
2579   PetscBool      ishdf5;
2580 #endif
2581 
2582   PetscFunctionBegin;
2583   PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
2584   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
2585   /* force binary viewer to load .info file if it has not yet done so */
2586   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2587   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
2588 #if defined(PETSC_HAVE_HDF5)
2589   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5);CHKERRQ(ierr);
2590 #endif
2591   if (isbinary) {
2592     ierr = MatLoad_Dense_Binary(newMat,viewer);CHKERRQ(ierr);
2593 #if defined(PETSC_HAVE_HDF5)
2594   } else if (ishdf5) {
2595     ierr = MatLoad_Dense_HDF5(newMat,viewer);CHKERRQ(ierr);
2596 #endif
2597   } else SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
2598   PetscFunctionReturn(0);
2599 }
2600 
2601 static PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool  *flag)
2602 {
2603   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
2604   Mat            a,b;
2605   PetscBool      flg;
2606   PetscErrorCode ierr;
2607 
2608   PetscFunctionBegin;
2609   a    = matA->A;
2610   b    = matB->A;
2611   ierr = MatEqual(a,b,&flg);CHKERRQ(ierr);
2612   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
2613   PetscFunctionReturn(0);
2614 }
2615 
2616 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data)
2617 {
2618   PetscErrorCode        ierr;
2619   Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data;
2620 
2621   PetscFunctionBegin;
2622   ierr = PetscFree2(atb->sendbuf,atb->recvcounts);CHKERRQ(ierr);
2623   ierr = MatDestroy(&atb->atb);CHKERRQ(ierr);
2624   ierr = PetscFree(atb);CHKERRQ(ierr);
2625   PetscFunctionReturn(0);
2626 }
2627 
2628 PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data)
2629 {
2630   PetscErrorCode        ierr;
2631   Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data;
2632 
2633   PetscFunctionBegin;
2634   ierr = PetscFree2(abt->buf[0],abt->buf[1]);CHKERRQ(ierr);
2635   ierr = PetscFree2(abt->recvcounts,abt->recvdispls);CHKERRQ(ierr);
2636   ierr = PetscFree(abt);CHKERRQ(ierr);
2637   PetscFunctionReturn(0);
2638 }
2639 
2640 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
2641 {
2642   Mat_MPIDense          *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2643   Mat_TransMatMultDense *atb;
2644   PetscErrorCode        ierr;
2645   MPI_Comm              comm;
2646   PetscMPIInt           size,*recvcounts;
2647   PetscScalar           *carray,*sendbuf;
2648   const PetscScalar     *atbarray;
2649   PetscInt              i,cN=C->cmap->N,cM=C->rmap->N,proc,k,j;
2650   const PetscInt        *ranges;
2651 
2652   PetscFunctionBegin;
2653   MatCheckProduct(C,3);
2654   if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2655   atb = (Mat_TransMatMultDense *)C->product->data;
2656   recvcounts = atb->recvcounts;
2657   sendbuf = atb->sendbuf;
2658 
2659   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2660   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
2661 
2662   /* compute atbarray = aseq^T * bseq */
2663   ierr = MatTransposeMatMult(a->A,b->A,atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX,PETSC_DEFAULT,&atb->atb);CHKERRQ(ierr);
2664 
2665   ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr);
2666 
2667   /* arrange atbarray into sendbuf */
2668   ierr = MatDenseGetArrayRead(atb->atb,&atbarray);CHKERRQ(ierr);
2669   for (proc=0, k=0; proc<size; proc++) {
2670     for (j=0; j<cN; j++) {
2671       for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*cM];
2672     }
2673   }
2674   ierr = MatDenseRestoreArrayRead(atb->atb,&atbarray);CHKERRQ(ierr);
2675 
2676   /* sum all atbarray to local values of C */
2677   ierr = MatDenseGetArrayWrite(c->A,&carray);CHKERRQ(ierr);
2678   ierr = MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm);CHKERRMPI(ierr);
2679   ierr = MatDenseRestoreArrayWrite(c->A,&carray);CHKERRQ(ierr);
2680   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2681   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2682   PetscFunctionReturn(0);
2683 }
2684 
2685 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C)
2686 {
2687   PetscErrorCode        ierr;
2688   MPI_Comm              comm;
2689   PetscMPIInt           size;
2690   PetscInt              cm=A->cmap->n,cM,cN=B->cmap->N;
2691   Mat_TransMatMultDense *atb;
2692   PetscBool             cisdense;
2693   PetscInt              i;
2694   const PetscInt        *ranges;
2695 
2696   PetscFunctionBegin;
2697   MatCheckProduct(C,3);
2698   if (C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
2699   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2700   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
2701     SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2702   }
2703 
2704   /* create matrix product C */
2705   ierr = MatSetSizes(C,cm,B->cmap->n,A->cmap->N,B->cmap->N);CHKERRQ(ierr);
2706   ierr = PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"");CHKERRQ(ierr);
2707   if (!cisdense) {
2708     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
2709   }
2710   ierr = MatSetUp(C);CHKERRQ(ierr);
2711 
2712   /* create data structure for reuse C */
2713   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
2714   ierr = PetscNew(&atb);CHKERRQ(ierr);
2715   cM   = C->rmap->N;
2716   ierr = PetscMalloc2((size_t)cM*(size_t)cN,&atb->sendbuf,size,&atb->recvcounts);CHKERRQ(ierr);
2717   ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr);
2718   for (i=0; i<size; i++) atb->recvcounts[i] = (ranges[i+1] - ranges[i])*cN;
2719 
2720   C->product->data    = atb;
2721   C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
2722   PetscFunctionReturn(0);
2723 }
2724 
2725 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2726 {
2727   PetscErrorCode        ierr;
2728   MPI_Comm              comm;
2729   PetscMPIInt           i, size;
2730   PetscInt              maxRows, bufsiz;
2731   PetscMPIInt           tag;
2732   PetscInt              alg;
2733   Mat_MatTransMultDense *abt;
2734   Mat_Product           *product = C->product;
2735   PetscBool             flg;
2736 
2737   PetscFunctionBegin;
2738   MatCheckProduct(C,4);
2739   if (C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
2740   /* check local size of A and B */
2741   if (A->cmap->n != B->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local column dimensions are incompatible, A (%D) != B (%D)",A->cmap->n,B->cmap->n);
2742 
2743   ierr = PetscStrcmp(product->alg,"allgatherv",&flg);CHKERRQ(ierr);
2744   alg  = flg ? 0 : 1;
2745 
2746   /* setup matrix product C */
2747   ierr = MatSetSizes(C,A->rmap->n,B->rmap->n,A->rmap->N,B->rmap->N);CHKERRQ(ierr);
2748   ierr = MatSetType(C,MATMPIDENSE);CHKERRQ(ierr);
2749   ierr = MatSetUp(C);CHKERRQ(ierr);
2750   ierr = PetscObjectGetNewTag((PetscObject)C,&tag);CHKERRQ(ierr);
2751 
2752   /* create data structure for reuse C */
2753   ierr = PetscObjectGetComm((PetscObject)C,&comm);CHKERRQ(ierr);
2754   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
2755   ierr = PetscNew(&abt);CHKERRQ(ierr);
2756   abt->tag = tag;
2757   abt->alg = alg;
2758   switch (alg) {
2759   case 1: /* alg: "cyclic" */
2760     for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i]));
2761     bufsiz = A->cmap->N * maxRows;
2762     ierr = PetscMalloc2(bufsiz,&(abt->buf[0]),bufsiz,&(abt->buf[1]));CHKERRQ(ierr);
2763     break;
2764   default: /* alg: "allgatherv" */
2765     ierr = PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1]));CHKERRQ(ierr);
2766     ierr = PetscMalloc2(size,&(abt->recvcounts),size+1,&(abt->recvdispls));CHKERRQ(ierr);
2767     for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N;
2768     for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i];
2769     break;
2770   }
2771 
2772   C->product->data    = abt;
2773   C->product->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense;
2774   C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense;
2775   PetscFunctionReturn(0);
2776 }
2777 
2778 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C)
2779 {
2780   Mat_MPIDense          *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2781   Mat_MatTransMultDense *abt;
2782   PetscErrorCode        ierr;
2783   MPI_Comm              comm;
2784   PetscMPIInt           rank,size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom;
2785   PetscScalar           *sendbuf, *recvbuf=NULL, *cv;
2786   PetscInt              i,cK=A->cmap->N,k,j,bn;
2787   PetscScalar           _DOne=1.0,_DZero=0.0;
2788   const PetscScalar     *av,*bv;
2789   PetscBLASInt          cm, cn, ck, alda, blda = 0, clda;
2790   MPI_Request           reqs[2];
2791   const PetscInt        *ranges;
2792 
2793   PetscFunctionBegin;
2794   MatCheckProduct(C,3);
2795   if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2796   abt  = (Mat_MatTransMultDense*)C->product->data;
2797   ierr = PetscObjectGetComm((PetscObject)C,&comm);CHKERRQ(ierr);
2798   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
2799   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
2800   ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr);
2801   ierr = MatDenseGetArrayRead(b->A,&bv);CHKERRQ(ierr);
2802   ierr = MatDenseGetArrayWrite(c->A,&cv);CHKERRQ(ierr);
2803   ierr = MatDenseGetLDA(a->A,&i);CHKERRQ(ierr);
2804   ierr = PetscBLASIntCast(i,&alda);CHKERRQ(ierr);
2805   ierr = MatDenseGetLDA(b->A,&i);CHKERRQ(ierr);
2806   ierr = PetscBLASIntCast(i,&blda);CHKERRQ(ierr);
2807   ierr = MatDenseGetLDA(c->A,&i);CHKERRQ(ierr);
2808   ierr = PetscBLASIntCast(i,&clda);CHKERRQ(ierr);
2809   ierr = MatGetOwnershipRanges(B,&ranges);CHKERRQ(ierr);
2810   bn   = B->rmap->n;
2811   if (blda == bn) {
2812     sendbuf = (PetscScalar*)bv;
2813   } else {
2814     sendbuf = abt->buf[0];
2815     for (k = 0, i = 0; i < cK; i++) {
2816       for (j = 0; j < bn; j++, k++) {
2817         sendbuf[k] = bv[i * blda + j];
2818       }
2819     }
2820   }
2821   if (size > 1) {
2822     sendto = (rank + size - 1) % size;
2823     recvfrom = (rank + size + 1) % size;
2824   } else {
2825     sendto = recvfrom = 0;
2826   }
2827   ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr);
2828   ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr);
2829   recvisfrom = rank;
2830   for (i = 0; i < size; i++) {
2831     /* we have finished receiving in sending, bufs can be read/modified */
2832     PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */
2833     PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom];
2834 
2835     if (nextrecvisfrom != rank) {
2836       /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */
2837       sendsiz = cK * bn;
2838       recvsiz = cK * nextbn;
2839       recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1];
2840       ierr = MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]);CHKERRMPI(ierr);
2841       ierr = MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]);CHKERRMPI(ierr);
2842     }
2843 
2844     /* local aseq * sendbuf^T */
2845     ierr = PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn);CHKERRQ(ierr);
2846     if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","T",&cm,&cn,&ck,&_DOne,av,&alda,sendbuf,&cn,&_DZero,cv + clda * ranges[recvisfrom],&clda));
2847 
2848     if (nextrecvisfrom != rank) {
2849       /* wait for the sends and receives to complete, swap sendbuf and recvbuf */
2850       ierr = MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE);CHKERRMPI(ierr);
2851     }
2852     bn = nextbn;
2853     recvisfrom = nextrecvisfrom;
2854     sendbuf = recvbuf;
2855   }
2856   ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr);
2857   ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr);
2858   ierr = MatDenseRestoreArrayWrite(c->A,&cv);CHKERRQ(ierr);
2859   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2860   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2861   PetscFunctionReturn(0);
2862 }
2863 
2864 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C)
2865 {
2866   Mat_MPIDense          *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2867   Mat_MatTransMultDense *abt;
2868   PetscErrorCode        ierr;
2869   MPI_Comm              comm;
2870   PetscMPIInt           size;
2871   PetscScalar           *cv, *sendbuf, *recvbuf;
2872   const PetscScalar     *av,*bv;
2873   PetscInt              blda,i,cK=A->cmap->N,k,j,bn;
2874   PetscScalar           _DOne=1.0,_DZero=0.0;
2875   PetscBLASInt          cm, cn, ck, alda, clda;
2876 
2877   PetscFunctionBegin;
2878   MatCheckProduct(C,3);
2879   if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2880   abt  = (Mat_MatTransMultDense*)C->product->data;
2881   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2882   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
2883   ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr);
2884   ierr = MatDenseGetArrayRead(b->A,&bv);CHKERRQ(ierr);
2885   ierr = MatDenseGetArrayWrite(c->A,&cv);CHKERRQ(ierr);
2886   ierr = MatDenseGetLDA(a->A,&i);CHKERRQ(ierr);
2887   ierr = PetscBLASIntCast(i,&alda);CHKERRQ(ierr);
2888   ierr = MatDenseGetLDA(b->A,&blda);CHKERRQ(ierr);
2889   ierr = MatDenseGetLDA(c->A,&i);CHKERRQ(ierr);
2890   ierr = PetscBLASIntCast(i,&clda);CHKERRQ(ierr);
2891   /* copy transpose of B into buf[0] */
2892   bn      = B->rmap->n;
2893   sendbuf = abt->buf[0];
2894   recvbuf = abt->buf[1];
2895   for (k = 0, j = 0; j < bn; j++) {
2896     for (i = 0; i < cK; i++, k++) {
2897       sendbuf[k] = bv[i * blda + j];
2898     }
2899   }
2900   ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr);
2901   ierr = MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm);CHKERRMPI(ierr);
2902   ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr);
2903   ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr);
2904   ierr = PetscBLASIntCast(c->A->cmap->n,&cn);CHKERRQ(ierr);
2905   if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&cm,&cn,&ck,&_DOne,av,&alda,recvbuf,&ck,&_DZero,cv,&clda));
2906   ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr);
2907   ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr);
2908   ierr = MatDenseRestoreArrayWrite(c->A,&cv);CHKERRQ(ierr);
2909   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2910   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2911   PetscFunctionReturn(0);
2912 }
2913 
2914 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2915 {
2916   Mat_MatTransMultDense *abt;
2917   PetscErrorCode        ierr;
2918 
2919   PetscFunctionBegin;
2920   MatCheckProduct(C,3);
2921   if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2922   abt = (Mat_MatTransMultDense*)C->product->data;
2923   switch (abt->alg) {
2924   case 1:
2925     ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C);CHKERRQ(ierr);
2926     break;
2927   default:
2928     ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C);CHKERRQ(ierr);
2929     break;
2930   }
2931   PetscFunctionReturn(0);
2932 }
2933 
2934 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data)
2935 {
2936   PetscErrorCode   ierr;
2937   Mat_MatMultDense *ab = (Mat_MatMultDense*)data;
2938 
2939   PetscFunctionBegin;
2940   ierr = MatDestroy(&ab->Ce);CHKERRQ(ierr);
2941   ierr = MatDestroy(&ab->Ae);CHKERRQ(ierr);
2942   ierr = MatDestroy(&ab->Be);CHKERRQ(ierr);
2943   ierr = PetscFree(ab);CHKERRQ(ierr);
2944   PetscFunctionReturn(0);
2945 }
2946 
2947 #if defined(PETSC_HAVE_ELEMENTAL)
2948 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
2949 {
2950   PetscErrorCode   ierr;
2951   Mat_MatMultDense *ab;
2952 
2953   PetscFunctionBegin;
2954   MatCheckProduct(C,3);
2955   if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Missing product data");
2956   ab   = (Mat_MatMultDense*)C->product->data;
2957   ierr = MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae);CHKERRQ(ierr);
2958   ierr = MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be);CHKERRQ(ierr);
2959   ierr = MatMatMultNumeric_Elemental(ab->Ae,ab->Be,ab->Ce);CHKERRQ(ierr);
2960   ierr = MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
2961   PetscFunctionReturn(0);
2962 }
2963 
2964 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C)
2965 {
2966   PetscErrorCode   ierr;
2967   Mat              Ae,Be,Ce;
2968   Mat_MatMultDense *ab;
2969 
2970   PetscFunctionBegin;
2971   MatCheckProduct(C,4);
2972   if (C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
2973   /* check local size of A and B */
2974   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
2975     SETERRQ4(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2976   }
2977 
2978   /* create elemental matrices Ae and Be */
2979   ierr = MatCreate(PetscObjectComm((PetscObject)A), &Ae);CHKERRQ(ierr);
2980   ierr = MatSetSizes(Ae,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
2981   ierr = MatSetType(Ae,MATELEMENTAL);CHKERRQ(ierr);
2982   ierr = MatSetUp(Ae);CHKERRQ(ierr);
2983   ierr = MatSetOption(Ae,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2984 
2985   ierr = MatCreate(PetscObjectComm((PetscObject)B), &Be);CHKERRQ(ierr);
2986   ierr = MatSetSizes(Be,PETSC_DECIDE,PETSC_DECIDE,B->rmap->N,B->cmap->N);CHKERRQ(ierr);
2987   ierr = MatSetType(Be,MATELEMENTAL);CHKERRQ(ierr);
2988   ierr = MatSetUp(Be);CHKERRQ(ierr);
2989   ierr = MatSetOption(Be,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2990 
2991   /* compute symbolic Ce = Ae*Be */
2992   ierr = MatCreate(PetscObjectComm((PetscObject)C),&Ce);CHKERRQ(ierr);
2993   ierr = MatMatMultSymbolic_Elemental(Ae,Be,fill,Ce);CHKERRQ(ierr);
2994 
2995   /* setup C */
2996   ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2997   ierr = MatSetType(C,MATDENSE);CHKERRQ(ierr);
2998   ierr = MatSetUp(C);CHKERRQ(ierr);
2999 
3000   /* create data structure for reuse Cdense */
3001   ierr = PetscNew(&ab);CHKERRQ(ierr);
3002   ab->Ae = Ae;
3003   ab->Be = Be;
3004   ab->Ce = Ce;
3005 
3006   C->product->data    = ab;
3007   C->product->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense;
3008   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
3009   PetscFunctionReturn(0);
3010 }
3011 #endif
3012 /* ----------------------------------------------- */
3013 #if defined(PETSC_HAVE_ELEMENTAL)
3014 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C)
3015 {
3016   PetscFunctionBegin;
3017   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense;
3018   C->ops->productsymbolic = MatProductSymbolic_AB;
3019   PetscFunctionReturn(0);
3020 }
3021 #endif
3022 
3023 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C)
3024 {
3025   Mat_Product *product = C->product;
3026   Mat         A = product->A,B=product->B;
3027 
3028   PetscFunctionBegin;
3029   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend)
3030     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
3031   C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense;
3032   C->ops->productsymbolic = MatProductSymbolic_AtB;
3033   PetscFunctionReturn(0);
3034 }
3035 
3036 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C)
3037 {
3038   PetscErrorCode ierr;
3039   Mat_Product    *product = C->product;
3040   const char     *algTypes[2] = {"allgatherv","cyclic"};
3041   PetscInt       alg,nalg = 2;
3042   PetscBool      flg = PETSC_FALSE;
3043 
3044   PetscFunctionBegin;
3045   /* Set default algorithm */
3046   alg = 0; /* default is allgatherv */
3047   ierr = PetscStrcmp(product->alg,"default",&flg);CHKERRQ(ierr);
3048   if (flg) {
3049     ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr);
3050   }
3051 
3052   /* Get runtime option */
3053   if (product->api_user) {
3054     ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatTransposeMult","Mat");CHKERRQ(ierr);
3055     ierr = PetscOptionsEList("-matmattransmult_mpidense_mpidense_via","Algorithmic approach","MatMatTransposeMult",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr);
3056     ierr = PetscOptionsEnd();CHKERRQ(ierr);
3057   } else {
3058     ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_ABt","Mat");CHKERRQ(ierr);
3059     ierr = PetscOptionsEList("-matproduct_abt_mpidense_mpidense_via","Algorithmic approach","MatProduct_ABt",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr);
3060     ierr = PetscOptionsEnd();CHKERRQ(ierr);
3061   }
3062   if (flg) {
3063     ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr);
3064   }
3065 
3066   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense;
3067   C->ops->productsymbolic          = MatProductSymbolic_ABt;
3068   PetscFunctionReturn(0);
3069 }
3070 
3071 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C)
3072 {
3073   PetscErrorCode ierr;
3074   Mat_Product    *product = C->product;
3075 
3076   PetscFunctionBegin;
3077   switch (product->type) {
3078 #if defined(PETSC_HAVE_ELEMENTAL)
3079   case MATPRODUCT_AB:
3080     ierr = MatProductSetFromOptions_MPIDense_AB(C);CHKERRQ(ierr);
3081     break;
3082 #endif
3083   case MATPRODUCT_AtB:
3084     ierr = MatProductSetFromOptions_MPIDense_AtB(C);CHKERRQ(ierr);
3085     break;
3086   case MATPRODUCT_ABt:
3087     ierr = MatProductSetFromOptions_MPIDense_ABt(C);CHKERRQ(ierr);
3088     break;
3089   default:
3090     break;
3091   }
3092   PetscFunctionReturn(0);
3093 }
3094