xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 93c87e653a96ce16476a56a0af60371aaa497750)
1 
2 /*
3    Basic functions for basic parallel dense matrices.
4 */
5 
6 #include <../src/mat/impls/dense/mpi/mpidense.h>    /*I   "petscmat.h"  I*/
7 #include <../src/mat/impls/aij/mpi/mpiaij.h>
8 #include <petscblaslapack.h>
9 
10 /*@
11 
12       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
13               matrix that represents the operator. For sequential matrices it returns itself.
14 
15     Input Parameter:
16 .      A - the Seq or MPI dense matrix
17 
18     Output Parameter:
19 .      B - the inner matrix
20 
21     Level: intermediate
22 
23 @*/
24 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
25 {
26   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
27   PetscErrorCode ierr;
28   PetscBool      flg;
29 
30   PetscFunctionBegin;
31   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
32   PetscValidPointer(B,2);
33   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr);
34   if (flg) *B = mat->A;
35   else *B = A;
36   PetscFunctionReturn(0);
37 }
38 
39 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
40 {
41   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
42   PetscErrorCode ierr;
43   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
44 
45   PetscFunctionBegin;
46   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
47   lrow = row - rstart;
48   ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr);
49   PetscFunctionReturn(0);
50 }
51 
52 PetscErrorCode MatRestoreRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
53 {
54   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
55   PetscErrorCode ierr;
56   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
57 
58   PetscFunctionBegin;
59   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
60   lrow = row - rstart;
61   ierr = MatRestoreRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr);
62   PetscFunctionReturn(0);
63 }
64 
65 PetscErrorCode  MatGetDiagonalBlock_MPIDense(Mat A,Mat *a)
66 {
67   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
68   PetscErrorCode ierr;
69   PetscInt       m = A->rmap->n,rstart = A->rmap->rstart;
70   PetscScalar    *array;
71   MPI_Comm       comm;
72   PetscBool      flg;
73   Mat            B;
74 
75   PetscFunctionBegin;
76   ierr = MatHasCongruentLayouts(A,&flg);CHKERRQ(ierr);
77   if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported.");
78   ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr);
79   if (!B) { /* This should use a new function, e.g. MatDenseGetSubMatrix (not create) */
80 
81     ierr = PetscObjectTypeCompare((PetscObject)mdn->A,MATSEQDENSECUDA,&flg);CHKERRQ(ierr);
82     if (flg) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature",MATSEQDENSECUDA);
83     ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
84     ierr = MatCreate(comm,&B);CHKERRQ(ierr);
85     ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr);
86     ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr);
87     ierr = MatDenseGetArrayRead(mdn->A,(const PetscScalar**)&array);CHKERRQ(ierr);
88     ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr);
89     ierr = MatDenseRestoreArrayRead(mdn->A,(const PetscScalar**)&array);CHKERRQ(ierr);
90     ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
91     ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
92     ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr);
93     *a   = B;
94     ierr = MatDestroy(&B);CHKERRQ(ierr);
95   } else *a = B;
96   PetscFunctionReturn(0);
97 }
98 
99 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
100 {
101   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
102   PetscErrorCode ierr;
103   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
104   PetscBool      roworiented = A->roworiented;
105 
106   PetscFunctionBegin;
107   for (i=0; i<m; i++) {
108     if (idxm[i] < 0) continue;
109     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
110     if (idxm[i] >= rstart && idxm[i] < rend) {
111       row = idxm[i] - rstart;
112       if (roworiented) {
113         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
114       } else {
115         for (j=0; j<n; j++) {
116           if (idxn[j] < 0) continue;
117           if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
118           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
119         }
120       }
121     } else if (!A->donotstash) {
122       mat->assembled = PETSC_FALSE;
123       if (roworiented) {
124         ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
125       } else {
126         ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
127       }
128     }
129   }
130   PetscFunctionReturn(0);
131 }
132 
133 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
134 {
135   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
136   PetscErrorCode ierr;
137   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
138 
139   PetscFunctionBegin;
140   for (i=0; i<m; i++) {
141     if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
142     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
143     if (idxm[i] >= rstart && idxm[i] < rend) {
144       row = idxm[i] - rstart;
145       for (j=0; j<n; j++) {
146         if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
147         if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
148         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
149       }
150     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
151   }
152   PetscFunctionReturn(0);
153 }
154 
155 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A,PetscInt *lda)
156 {
157   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
158   PetscErrorCode ierr;
159 
160   PetscFunctionBegin;
161   ierr = MatDenseGetLDA(a->A,lda);CHKERRQ(ierr);
162   PetscFunctionReturn(0);
163 }
164 
165 static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A,PetscInt lda)
166 {
167   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
168   PetscErrorCode ierr;
169 
170   PetscFunctionBegin;
171   ierr = MatDenseSetLDA(a->A,lda);CHKERRQ(ierr);
172   PetscFunctionReturn(0);
173 }
174 
175 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar **array)
176 {
177   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
178   PetscErrorCode ierr;
179 
180   PetscFunctionBegin;
181   ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr);
182   PetscFunctionReturn(0);
183 }
184 
185 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A,const PetscScalar **array)
186 {
187   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
188   PetscErrorCode ierr;
189 
190   PetscFunctionBegin;
191   ierr = MatDenseGetArrayRead(a->A,array);CHKERRQ(ierr);
192   PetscFunctionReturn(0);
193 }
194 
195 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A,PetscScalar **array)
196 {
197   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
198   PetscErrorCode ierr;
199 
200   PetscFunctionBegin;
201   ierr = MatDenseGetArrayWrite(a->A,array);CHKERRQ(ierr);
202   PetscFunctionReturn(0);
203 }
204 
205 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A,const PetscScalar *array)
206 {
207   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
208   PetscErrorCode ierr;
209 
210   PetscFunctionBegin;
211   if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first");
212   ierr = MatDensePlaceArray(a->A,array);CHKERRQ(ierr);
213   PetscFunctionReturn(0);
214 }
215 
216 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A)
217 {
218   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
219   PetscErrorCode ierr;
220 
221   PetscFunctionBegin;
222   if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first");
223   ierr = MatDenseResetArray(a->A);CHKERRQ(ierr);
224   PetscFunctionReturn(0);
225 }
226 
227 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A,const PetscScalar *array)
228 {
229   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
230   PetscErrorCode ierr;
231 
232   PetscFunctionBegin;
233   if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first");
234   ierr = MatDenseReplaceArray(a->A,array);CHKERRQ(ierr);
235   PetscFunctionReturn(0);
236 }
237 
238 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
239 {
240   Mat_MPIDense      *mat  = (Mat_MPIDense*)A->data,*newmatd;
241   PetscErrorCode    ierr;
242   PetscInt          lda,i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols;
243   const PetscInt    *irow,*icol;
244   const PetscScalar *v;
245   PetscScalar       *bv;
246   Mat               newmat;
247   IS                iscol_local;
248   MPI_Comm          comm_is,comm_mat;
249 
250   PetscFunctionBegin;
251   ierr = PetscObjectGetComm((PetscObject)A,&comm_mat);CHKERRQ(ierr);
252   ierr = PetscObjectGetComm((PetscObject)iscol,&comm_is);CHKERRQ(ierr);
253   if (comm_mat != comm_is) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NOTSAMECOMM,"IS communicator must match matrix communicator");
254 
255   ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
256   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
257   ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr);
258   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
259   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
260   ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */
261 
262   /* No parallel redistribution currently supported! Should really check each index set
263      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
264      original matrix! */
265 
266   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
267   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
268 
269   /* Check submatrix call */
270   if (scall == MAT_REUSE_MATRIX) {
271     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
272     /* Really need to test rows and column sizes! */
273     newmat = *B;
274   } else {
275     /* Create and fill new matrix */
276     ierr = MatCreate(PetscObjectComm((PetscObject)A),&newmat);CHKERRQ(ierr);
277     ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
278     ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr);
279     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
280   }
281 
282   /* Now extract the data pointers and do the copy, column at a time */
283   newmatd = (Mat_MPIDense*)newmat->data;
284   ierr = MatDenseGetArray(newmatd->A,&bv);CHKERRQ(ierr);
285   ierr = MatDenseGetArrayRead(mat->A,&v);CHKERRQ(ierr);
286   ierr = MatDenseGetLDA(mat->A,&lda);CHKERRQ(ierr);
287   for (i=0; i<Ncols; i++) {
288     const PetscScalar *av = v + lda*icol[i];
289     for (j=0; j<nrows; j++) {
290       *bv++ = av[irow[j] - rstart];
291     }
292   }
293   ierr = MatDenseRestoreArrayRead(mat->A,&v);CHKERRQ(ierr);
294   ierr = MatDenseRestoreArray(newmatd->A,&bv);CHKERRQ(ierr);
295 
296   /* Assemble the matrices so that the correct flags are set */
297   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
298   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
299 
300   /* Free work space */
301   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
302   ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr);
303   ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
304   *B   = newmat;
305   PetscFunctionReturn(0);
306 }
307 
308 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar **array)
309 {
310   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
311   PetscErrorCode ierr;
312 
313   PetscFunctionBegin;
314   ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr);
315   PetscFunctionReturn(0);
316 }
317 
318 PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A,const PetscScalar **array)
319 {
320   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
321   PetscErrorCode ierr;
322 
323   PetscFunctionBegin;
324   ierr = MatDenseRestoreArrayRead(a->A,array);CHKERRQ(ierr);
325   PetscFunctionReturn(0);
326 }
327 
328 PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A,PetscScalar **array)
329 {
330   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
331   PetscErrorCode ierr;
332 
333   PetscFunctionBegin;
334   ierr = MatDenseRestoreArrayWrite(a->A,array);CHKERRQ(ierr);
335   PetscFunctionReturn(0);
336 }
337 
338 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
339 {
340   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
341   PetscErrorCode ierr;
342   PetscInt       nstash,reallocs;
343 
344   PetscFunctionBegin;
345   if (mdn->donotstash || mat->nooffprocentries) return(0);
346 
347   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
348   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
349   ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
350   PetscFunctionReturn(0);
351 }
352 
353 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
354 {
355   Mat_MPIDense   *mdn=(Mat_MPIDense*)mat->data;
356   PetscErrorCode ierr;
357   PetscInt       i,*row,*col,flg,j,rstart,ncols;
358   PetscMPIInt    n;
359   PetscScalar    *val;
360 
361   PetscFunctionBegin;
362   if (!mdn->donotstash && !mat->nooffprocentries) {
363     /*  wait on receives */
364     while (1) {
365       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
366       if (!flg) break;
367 
368       for (i=0; i<n;) {
369         /* Now identify the consecutive vals belonging to the same row */
370         for (j=i,rstart=row[j]; j<n; j++) {
371           if (row[j] != rstart) break;
372         }
373         if (j < n) ncols = j-i;
374         else       ncols = n-i;
375         /* Now assemble all these values with a single function call */
376         ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
377         i    = j;
378       }
379     }
380     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
381   }
382 
383   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
384   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
385 
386   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
387     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
388   }
389   PetscFunctionReturn(0);
390 }
391 
392 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
393 {
394   PetscErrorCode ierr;
395   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
396 
397   PetscFunctionBegin;
398   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
399   PetscFunctionReturn(0);
400 }
401 
402 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt n,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
403 {
404   Mat_MPIDense      *l = (Mat_MPIDense*)A->data;
405   PetscErrorCode    ierr;
406   PetscInt          i,len,*lrows;
407 
408   PetscFunctionBegin;
409   /* get locally owned rows */
410   ierr = PetscLayoutMapLocal(A->rmap,n,rows,&len,&lrows,NULL);CHKERRQ(ierr);
411   /* fix right hand side if needed */
412   if (x && b) {
413     const PetscScalar *xx;
414     PetscScalar       *bb;
415 
416     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
417     ierr = VecGetArrayWrite(b, &bb);CHKERRQ(ierr);
418     for (i=0;i<len;++i) bb[lrows[i]] = diag*xx[lrows[i]];
419     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
420     ierr = VecRestoreArrayWrite(b, &bb);CHKERRQ(ierr);
421   }
422   ierr = MatZeroRows(l->A,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr);
423   if (diag != 0.0) {
424     Vec d;
425 
426     ierr = MatCreateVecs(A,NULL,&d);CHKERRQ(ierr);
427     ierr = VecSet(d,diag);CHKERRQ(ierr);
428     ierr = MatDiagonalSet(A,d,INSERT_VALUES);CHKERRQ(ierr);
429     ierr = VecDestroy(&d);CHKERRQ(ierr);
430   }
431   ierr = PetscFree(lrows);CHKERRQ(ierr);
432   PetscFunctionReturn(0);
433 }
434 
435 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec);
436 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec);
437 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec);
438 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec);
439 
440 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
441 {
442   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
443   PetscErrorCode    ierr;
444   const PetscScalar *ax;
445   PetscScalar       *ay;
446 
447   PetscFunctionBegin;
448   ierr = VecGetArrayReadInPlace(xx,&ax);CHKERRQ(ierr);
449   ierr = VecGetArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr);
450   ierr = PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr);
451   ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr);
452   ierr = VecRestoreArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr);
453   ierr = VecRestoreArrayReadInPlace(xx,&ax);CHKERRQ(ierr);
454   ierr = (*mdn->A->ops->mult)(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
455   PetscFunctionReturn(0);
456 }
457 
458 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
459 {
460   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
461   PetscErrorCode    ierr;
462   const PetscScalar *ax;
463   PetscScalar       *ay;
464 
465   PetscFunctionBegin;
466   ierr = VecGetArrayReadInPlace(xx,&ax);CHKERRQ(ierr);
467   ierr = VecGetArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr);
468   ierr = PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr);
469   ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr);
470   ierr = VecRestoreArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr);
471   ierr = VecRestoreArrayReadInPlace(xx,&ax);CHKERRQ(ierr);
472   ierr = (*mdn->A->ops->multadd)(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
473   PetscFunctionReturn(0);
474 }
475 
476 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
477 {
478   Mat_MPIDense      *a = (Mat_MPIDense*)A->data;
479   PetscErrorCode    ierr;
480   const PetscScalar *ax;
481   PetscScalar       *ay;
482 
483   PetscFunctionBegin;
484   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
485   ierr = (*a->A->ops->multtranspose)(a->A,xx,a->lvec);CHKERRQ(ierr);
486   ierr = VecGetArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr);
487   ierr = VecGetArrayInPlace(yy,&ay);CHKERRQ(ierr);
488   ierr = PetscSFReduceBegin(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr);
489   ierr = PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr);
490   ierr = VecRestoreArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr);
491   ierr = VecRestoreArrayInPlace(yy,&ay);CHKERRQ(ierr);
492   PetscFunctionReturn(0);
493 }
494 
495 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
496 {
497   Mat_MPIDense      *a = (Mat_MPIDense*)A->data;
498   PetscErrorCode    ierr;
499   const PetscScalar *ax;
500   PetscScalar       *ay;
501 
502   PetscFunctionBegin;
503   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
504   ierr = (*a->A->ops->multtranspose)(a->A,xx,a->lvec);CHKERRQ(ierr);
505   ierr = VecGetArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr);
506   ierr = VecGetArrayInPlace(zz,&ay);CHKERRQ(ierr);
507   ierr = PetscSFReduceBegin(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr);
508   ierr = PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr);
509   ierr = VecRestoreArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr);
510   ierr = VecRestoreArrayInPlace(zz,&ay);CHKERRQ(ierr);
511   PetscFunctionReturn(0);
512 }
513 
514 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
515 {
516   Mat_MPIDense      *a    = (Mat_MPIDense*)A->data;
517   PetscErrorCode    ierr;
518   PetscInt          lda,len,i,n,m = A->rmap->n,radd;
519   PetscScalar       *x,zero = 0.0;
520   const PetscScalar *av;
521 
522   PetscFunctionBegin;
523   ierr = VecSet(v,zero);CHKERRQ(ierr);
524   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
525   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
526   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
527   len  = PetscMin(a->A->rmap->n,a->A->cmap->n);
528   radd = A->rmap->rstart*m;
529   ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr);
530   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
531   for (i=0; i<len; i++) {
532     x[i] = av[radd + i*lda + i];
533   }
534   ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr);
535   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
536   PetscFunctionReturn(0);
537 }
538 
539 PetscErrorCode MatDestroy_MPIDense(Mat mat)
540 {
541   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
542   PetscErrorCode ierr;
543 
544   PetscFunctionBegin;
545 #if defined(PETSC_USE_LOG)
546   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
547 #endif
548   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
549   ierr = MatDestroy(&mdn->A);CHKERRQ(ierr);
550   ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr);
551   ierr = PetscSFDestroy(&mdn->Mvctx);CHKERRQ(ierr);
552   ierr = VecDestroy(&mdn->cvec);CHKERRQ(ierr);
553 
554   ierr = PetscFree(mat->data);CHKERRQ(ierr);
555   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
556 
557   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",NULL);CHKERRQ(ierr);
558   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",NULL);CHKERRQ(ierr);
559   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL);CHKERRQ(ierr);
560   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL);CHKERRQ(ierr);
561   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",NULL);CHKERRQ(ierr);
562   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",NULL);CHKERRQ(ierr);
563   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",NULL);CHKERRQ(ierr);
564   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",NULL);CHKERRQ(ierr);
565   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",NULL);CHKERRQ(ierr);
566   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",NULL);CHKERRQ(ierr);
567   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",NULL);CHKERRQ(ierr);
568 #if defined(PETSC_HAVE_ELEMENTAL)
569   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL);CHKERRQ(ierr);
570 #endif
571   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);CHKERRQ(ierr);
572   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
573   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",NULL);CHKERRQ(ierr);
574   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
575   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
576   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_nest_mpidense_C",NULL);CHKERRQ(ierr);
577   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_nest_mpidense_C",NULL);CHKERRQ(ierr);
578   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
579   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
580   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",NULL);CHKERRQ(ierr);
581   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",NULL);CHKERRQ(ierr);
582   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",NULL);CHKERRQ(ierr);
583   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",NULL);CHKERRQ(ierr);
584   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",NULL);CHKERRQ(ierr);
585   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",NULL);CHKERRQ(ierr);
586   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",NULL);CHKERRQ(ierr);
587   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",NULL);CHKERRQ(ierr);
588   PetscFunctionReturn(0);
589 }
590 
591 PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat,PetscViewer);
592 
593 #include <petscdraw.h>
594 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
595 {
596   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
597   PetscErrorCode    ierr;
598   PetscMPIInt       rank = mdn->rank;
599   PetscViewerType   vtype;
600   PetscBool         iascii,isdraw;
601   PetscViewer       sviewer;
602   PetscViewerFormat format;
603 
604   PetscFunctionBegin;
605   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
606   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
607   if (iascii) {
608     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
609     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
610     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
611       MatInfo info;
612       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
613       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
614       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
615       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
616       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
617       ierr = PetscSFView(mdn->Mvctx,viewer);CHKERRQ(ierr);
618       PetscFunctionReturn(0);
619     } else if (format == PETSC_VIEWER_ASCII_INFO) {
620       PetscFunctionReturn(0);
621     }
622   } else if (isdraw) {
623     PetscDraw draw;
624     PetscBool isnull;
625 
626     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
627     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
628     if (isnull) PetscFunctionReturn(0);
629   }
630 
631   {
632     /* assemble the entire matrix onto first processor. */
633     Mat         A;
634     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz;
635     PetscInt    *cols;
636     PetscScalar *vals;
637 
638     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
639     if (!rank) {
640       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
641     } else {
642       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
643     }
644     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
645     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
646     ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr);
647     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
648 
649     /* Copy the matrix ... This isn't the most efficient means,
650        but it's quick for now */
651     A->insertmode = INSERT_VALUES;
652 
653     row = mat->rmap->rstart;
654     m   = mdn->A->rmap->n;
655     for (i=0; i<m; i++) {
656       ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
657       ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
658       ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
659       row++;
660     }
661 
662     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
663     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
664     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
665     if (!rank) {
666       ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
667       ierr = MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
668     }
669     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
670     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
671     ierr = MatDestroy(&A);CHKERRQ(ierr);
672   }
673   PetscFunctionReturn(0);
674 }
675 
676 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
677 {
678   PetscErrorCode ierr;
679   PetscBool      iascii,isbinary,isdraw,issocket;
680 
681   PetscFunctionBegin;
682   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
683   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
684   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
685   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
686 
687   if (iascii || issocket || isdraw) {
688     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
689   } else if (isbinary) {
690     ierr = MatView_Dense_Binary(mat,viewer);CHKERRQ(ierr);
691   }
692   PetscFunctionReturn(0);
693 }
694 
695 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
696 {
697   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
698   Mat            mdn  = mat->A;
699   PetscErrorCode ierr;
700   PetscLogDouble isend[5],irecv[5];
701 
702   PetscFunctionBegin;
703   info->block_size = 1.0;
704 
705   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
706 
707   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
708   isend[3] = info->memory;  isend[4] = info->mallocs;
709   if (flag == MAT_LOCAL) {
710     info->nz_used      = isend[0];
711     info->nz_allocated = isend[1];
712     info->nz_unneeded  = isend[2];
713     info->memory       = isend[3];
714     info->mallocs      = isend[4];
715   } else if (flag == MAT_GLOBAL_MAX) {
716     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
717 
718     info->nz_used      = irecv[0];
719     info->nz_allocated = irecv[1];
720     info->nz_unneeded  = irecv[2];
721     info->memory       = irecv[3];
722     info->mallocs      = irecv[4];
723   } else if (flag == MAT_GLOBAL_SUM) {
724     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
725 
726     info->nz_used      = irecv[0];
727     info->nz_allocated = irecv[1];
728     info->nz_unneeded  = irecv[2];
729     info->memory       = irecv[3];
730     info->mallocs      = irecv[4];
731   }
732   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
733   info->fill_ratio_needed = 0;
734   info->factor_mallocs    = 0;
735   PetscFunctionReturn(0);
736 }
737 
738 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg)
739 {
740   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
741   PetscErrorCode ierr;
742 
743   PetscFunctionBegin;
744   switch (op) {
745   case MAT_NEW_NONZERO_LOCATIONS:
746   case MAT_NEW_NONZERO_LOCATION_ERR:
747   case MAT_NEW_NONZERO_ALLOCATION_ERR:
748     MatCheckPreallocated(A,1);
749     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
750     break;
751   case MAT_ROW_ORIENTED:
752     MatCheckPreallocated(A,1);
753     a->roworiented = flg;
754     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
755     break;
756   case MAT_NEW_DIAGONALS:
757   case MAT_KEEP_NONZERO_PATTERN:
758   case MAT_USE_HASH_TABLE:
759   case MAT_SORTED_FULL:
760     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
761     break;
762   case MAT_IGNORE_OFF_PROC_ENTRIES:
763     a->donotstash = flg;
764     break;
765   case MAT_SYMMETRIC:
766   case MAT_STRUCTURALLY_SYMMETRIC:
767   case MAT_HERMITIAN:
768   case MAT_SYMMETRY_ETERNAL:
769   case MAT_IGNORE_LOWER_TRIANGULAR:
770   case MAT_IGNORE_ZERO_ENTRIES:
771     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
772     break;
773   default:
774     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
775   }
776   PetscFunctionReturn(0);
777 }
778 
779 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
780 {
781   Mat_MPIDense      *mdn = (Mat_MPIDense*)A->data;
782   const PetscScalar *l;
783   PetscScalar       x,*v,*vv,*r;
784   PetscErrorCode    ierr;
785   PetscInt          i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n,lda;
786 
787   PetscFunctionBegin;
788   ierr = MatDenseGetArray(mdn->A,&vv);CHKERRQ(ierr);
789   ierr = MatDenseGetLDA(mdn->A,&lda);CHKERRQ(ierr);
790   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
791   if (ll) {
792     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
793     if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %D != %D", s2a, s2);
794     ierr = VecGetArrayRead(ll,&l);CHKERRQ(ierr);
795     for (i=0; i<m; i++) {
796       x = l[i];
797       v = vv + i;
798       for (j=0; j<n; j++) { (*v) *= x; v+= lda;}
799     }
800     ierr = VecRestoreArrayRead(ll,&l);CHKERRQ(ierr);
801     ierr = PetscLogFlops(1.0*n*m);CHKERRQ(ierr);
802   }
803   if (rr) {
804     const PetscScalar *ar;
805 
806     ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr);
807     if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
808     ierr = VecGetArrayRead(rr,&ar);CHKERRQ(ierr);
809     ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
810     ierr = PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ar,r);CHKERRQ(ierr);
811     ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ar,r);CHKERRQ(ierr);
812     ierr = VecRestoreArrayRead(rr,&ar);CHKERRQ(ierr);
813     for (i=0; i<n; i++) {
814       x = r[i];
815       v = vv + i*lda;
816       for (j=0; j<m; j++) (*v++) *= x;
817     }
818     ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr);
819     ierr = PetscLogFlops(1.0*n*m);CHKERRQ(ierr);
820   }
821   ierr = MatDenseRestoreArray(mdn->A,&vv);CHKERRQ(ierr);
822   PetscFunctionReturn(0);
823 }
824 
825 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
826 {
827   Mat_MPIDense      *mdn = (Mat_MPIDense*)A->data;
828   PetscErrorCode    ierr;
829   PetscInt          i,j;
830   PetscReal         sum = 0.0;
831   const PetscScalar *av,*v;
832 
833   PetscFunctionBegin;
834   ierr = MatDenseGetArrayRead(mdn->A,&av);CHKERRQ(ierr);
835   v    = av;
836   if (mdn->size == 1) {
837     ierr =  MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
838   } else {
839     if (type == NORM_FROBENIUS) {
840       for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) {
841         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
842       }
843       ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
844       *nrm = PetscSqrtReal(*nrm);
845       ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr);
846     } else if (type == NORM_1) {
847       PetscReal *tmp,*tmp2;
848       ierr = PetscCalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);CHKERRQ(ierr);
849       *nrm = 0.0;
850       v    = av;
851       for (j=0; j<mdn->A->cmap->n; j++) {
852         for (i=0; i<mdn->A->rmap->n; i++) {
853           tmp[j] += PetscAbsScalar(*v);  v++;
854         }
855       }
856       ierr = MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
857       for (j=0; j<A->cmap->N; j++) {
858         if (tmp2[j] > *nrm) *nrm = tmp2[j];
859       }
860       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
861       ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr);
862     } else if (type == NORM_INFINITY) { /* max row norm */
863       PetscReal ntemp;
864       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
865       ierr = MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
866     } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm");
867   }
868   ierr = MatDenseRestoreArrayRead(mdn->A,&av);CHKERRQ(ierr);
869   PetscFunctionReturn(0);
870 }
871 
872 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
873 {
874   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
875   Mat            B;
876   PetscInt       M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart;
877   PetscErrorCode ierr;
878   PetscInt       j,i,lda;
879   PetscScalar    *v;
880 
881   PetscFunctionBegin;
882   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
883     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
884     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
885     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
886     ierr = MatMPIDenseSetPreallocation(B,NULL);CHKERRQ(ierr);
887   } else B = *matout;
888 
889   m    = a->A->rmap->n; n = a->A->cmap->n;
890   ierr = MatDenseGetArrayRead(a->A,(const PetscScalar**)&v);CHKERRQ(ierr);
891   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
892   ierr = PetscMalloc1(m,&rwork);CHKERRQ(ierr);
893   for (i=0; i<m; i++) rwork[i] = rstart + i;
894   for (j=0; j<n; j++) {
895     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
896     v   += lda;
897   }
898   ierr = MatDenseRestoreArrayRead(a->A,(const PetscScalar**)&v);CHKERRQ(ierr);
899   ierr = PetscFree(rwork);CHKERRQ(ierr);
900   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
901   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
902   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
903     *matout = B;
904   } else {
905     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
906   }
907   PetscFunctionReturn(0);
908 }
909 
910 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*);
911 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat,PetscScalar);
912 
913 PetscErrorCode MatSetUp_MPIDense(Mat A)
914 {
915   PetscErrorCode ierr;
916 
917   PetscFunctionBegin;
918   ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
919   ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
920   if (!A->preallocated) {
921     ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
922   }
923   PetscFunctionReturn(0);
924 }
925 
926 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
927 {
928   PetscErrorCode ierr;
929   Mat_MPIDense   *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data;
930 
931   PetscFunctionBegin;
932   ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr);
933   PetscFunctionReturn(0);
934 }
935 
936 PetscErrorCode MatConjugate_MPIDense(Mat mat)
937 {
938   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;
939   PetscErrorCode ierr;
940 
941   PetscFunctionBegin;
942   ierr = MatConjugate(a->A);CHKERRQ(ierr);
943   PetscFunctionReturn(0);
944 }
945 
946 PetscErrorCode MatRealPart_MPIDense(Mat A)
947 {
948   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
949   PetscErrorCode ierr;
950 
951   PetscFunctionBegin;
952   ierr = MatRealPart(a->A);CHKERRQ(ierr);
953   PetscFunctionReturn(0);
954 }
955 
956 PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
957 {
958   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
959   PetscErrorCode ierr;
960 
961   PetscFunctionBegin;
962   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
963   PetscFunctionReturn(0);
964 }
965 
966 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A,Vec v,PetscInt col)
967 {
968   PetscErrorCode ierr;
969   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
970 
971   PetscFunctionBegin;
972   if (!a->A) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing local matrix");
973   if (!a->A->ops->getcolumnvector) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing get column operation");
974   ierr = (*a->A->ops->getcolumnvector)(a->A,v,col);CHKERRQ(ierr);
975   PetscFunctionReturn(0);
976 }
977 
978 PETSC_INTERN PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*);
979 
980 PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms)
981 {
982   PetscErrorCode ierr;
983   PetscInt       i,n;
984   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
985   PetscReal      *work;
986 
987   PetscFunctionBegin;
988   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
989   ierr = PetscMalloc1(n,&work);CHKERRQ(ierr);
990   ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr);
991   if (type == NORM_2) {
992     for (i=0; i<n; i++) work[i] *= work[i];
993   }
994   if (type == NORM_INFINITY) {
995     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
996   } else {
997     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
998   }
999   ierr = PetscFree(work);CHKERRQ(ierr);
1000   if (type == NORM_2) {
1001     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
1002   }
1003   PetscFunctionReturn(0);
1004 }
1005 
1006 #if defined(PETSC_HAVE_CUDA)
1007 static PetscErrorCode MatDenseGetColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1008 {
1009   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1010   PetscErrorCode ierr;
1011   PetscInt       lda;
1012 
1013   PetscFunctionBegin;
1014   if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first");
1015   if (!a->cvec) {
1016     ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1017   }
1018   a->vecinuse = col + 1;
1019   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1020   ierr = MatDenseCUDAGetArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1021   ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1022   *v   = a->cvec;
1023   PetscFunctionReturn(0);
1024 }
1025 
1026 static PetscErrorCode MatDenseRestoreColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1027 {
1028   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1029   PetscErrorCode ierr;
1030 
1031   PetscFunctionBegin;
1032   if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec first");
1033   if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector");
1034   a->vecinuse = 0;
1035   ierr = MatDenseCUDARestoreArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1036   ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr);
1037   *v   = NULL;
1038   PetscFunctionReturn(0);
1039 }
1040 
1041 static PetscErrorCode MatDenseGetColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1042 {
1043   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1044   PetscInt       lda;
1045   PetscErrorCode ierr;
1046 
1047   PetscFunctionBegin;
1048   if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first");
1049   if (!a->cvec) {
1050     ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1051   }
1052   a->vecinuse = col + 1;
1053   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1054   ierr = MatDenseCUDAGetArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr);
1055   ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1056   ierr = VecLockReadPush(a->cvec);CHKERRQ(ierr);
1057   *v   = a->cvec;
1058   PetscFunctionReturn(0);
1059 }
1060 
1061 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1062 {
1063   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1064   PetscErrorCode ierr;
1065 
1066   PetscFunctionBegin;
1067   if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec first");
1068   if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector");
1069   a->vecinuse = 0;
1070   ierr = MatDenseCUDARestoreArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr);
1071   ierr = VecLockReadPop(a->cvec);CHKERRQ(ierr);
1072   ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr);
1073   *v   = NULL;
1074   PetscFunctionReturn(0);
1075 }
1076 
1077 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1078 {
1079   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1080   PetscErrorCode ierr;
1081   PetscInt       lda;
1082 
1083   PetscFunctionBegin;
1084   if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first");
1085   if (!a->cvec) {
1086     ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1087   }
1088   a->vecinuse = col + 1;
1089   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1090   ierr = MatDenseCUDAGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1091   ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1092   *v   = a->cvec;
1093   PetscFunctionReturn(0);
1094 }
1095 
1096 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1097 {
1098   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1099   PetscErrorCode ierr;
1100 
1101   PetscFunctionBegin;
1102   if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec first");
1103   if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector");
1104   a->vecinuse = 0;
1105   ierr = MatDenseCUDARestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1106   ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr);
1107   *v   = NULL;
1108   PetscFunctionReturn(0);
1109 }
1110 
1111 static PetscErrorCode MatDenseCUDAPlaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a)
1112 {
1113   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1114   PetscErrorCode ierr;
1115 
1116   PetscFunctionBegin;
1117   if (l->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first");
1118   ierr = MatDenseCUDAPlaceArray(l->A,a);CHKERRQ(ierr);
1119   PetscFunctionReturn(0);
1120 }
1121 
1122 static PetscErrorCode MatDenseCUDAResetArray_MPIDenseCUDA(Mat A)
1123 {
1124   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1125   PetscErrorCode ierr;
1126 
1127   PetscFunctionBegin;
1128   if (l->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first");
1129   ierr = MatDenseCUDAResetArray(l->A);CHKERRQ(ierr);
1130   PetscFunctionReturn(0);
1131 }
1132 
1133 static PetscErrorCode MatDenseCUDAReplaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a)
1134 {
1135   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1136   PetscErrorCode ierr;
1137 
1138   PetscFunctionBegin;
1139   if (l->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first");
1140   ierr = MatDenseCUDAReplaceArray(l->A,a);CHKERRQ(ierr);
1141   PetscFunctionReturn(0);
1142 }
1143 
1144 static PetscErrorCode MatDenseCUDAGetArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a)
1145 {
1146   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1147   PetscErrorCode ierr;
1148 
1149   PetscFunctionBegin;
1150   ierr = MatDenseCUDAGetArrayWrite(l->A,a);CHKERRQ(ierr);
1151   PetscFunctionReturn(0);
1152 }
1153 
1154 static PetscErrorCode MatDenseCUDARestoreArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a)
1155 {
1156   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1157   PetscErrorCode ierr;
1158 
1159   PetscFunctionBegin;
1160   ierr = MatDenseCUDARestoreArrayWrite(l->A,a);CHKERRQ(ierr);
1161   PetscFunctionReturn(0);
1162 }
1163 
1164 static PetscErrorCode MatDenseCUDAGetArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a)
1165 {
1166   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1167   PetscErrorCode ierr;
1168 
1169   PetscFunctionBegin;
1170   ierr = MatDenseCUDAGetArrayRead(l->A,a);CHKERRQ(ierr);
1171   PetscFunctionReturn(0);
1172 }
1173 
1174 static PetscErrorCode MatDenseCUDARestoreArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a)
1175 {
1176   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1177   PetscErrorCode ierr;
1178 
1179   PetscFunctionBegin;
1180   ierr = MatDenseCUDARestoreArrayRead(l->A,a);CHKERRQ(ierr);
1181   PetscFunctionReturn(0);
1182 }
1183 
1184 static PetscErrorCode MatDenseCUDAGetArray_MPIDenseCUDA(Mat A, PetscScalar **a)
1185 {
1186   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1187   PetscErrorCode ierr;
1188 
1189   PetscFunctionBegin;
1190   ierr = MatDenseCUDAGetArray(l->A,a);CHKERRQ(ierr);
1191   PetscFunctionReturn(0);
1192 }
1193 
1194 static PetscErrorCode MatDenseCUDARestoreArray_MPIDenseCUDA(Mat A, PetscScalar **a)
1195 {
1196   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1197   PetscErrorCode ierr;
1198 
1199   PetscFunctionBegin;
1200   ierr = MatDenseCUDARestoreArray(l->A,a);CHKERRQ(ierr);
1201   PetscFunctionReturn(0);
1202 }
1203 
1204 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat,PetscInt,Vec*);
1205 static PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat,PetscInt,Vec*);
1206 static PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat,PetscInt,Vec*);
1207 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat,PetscInt,Vec*);
1208 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat,PetscInt,Vec*);
1209 static PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat,PetscInt,Vec*);
1210 
1211 static PetscErrorCode MatBindToCPU_MPIDenseCUDA(Mat mat,PetscBool bind)
1212 {
1213   Mat_MPIDense   *d = (Mat_MPIDense*)mat->data;
1214   PetscErrorCode ierr;
1215 
1216   PetscFunctionBegin;
1217   if (d->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first");
1218   if (d->A) {
1219     ierr = MatBindToCPU(d->A,bind);CHKERRQ(ierr);
1220   }
1221   mat->boundtocpu = bind;
1222   if (!bind) {
1223     PetscBool iscuda;
1224 
1225     ierr = PetscObjectTypeCompare((PetscObject)d->cvec,VECMPICUDA,&iscuda);CHKERRQ(ierr);
1226     if (!iscuda) {
1227       ierr = VecDestroy(&d->cvec);CHKERRQ(ierr);
1228     }
1229     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDenseCUDA);CHKERRQ(ierr);
1230     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDenseCUDA);CHKERRQ(ierr);
1231     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDenseCUDA);CHKERRQ(ierr);
1232     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDenseCUDA);CHKERRQ(ierr);
1233     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDenseCUDA);CHKERRQ(ierr);
1234     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDenseCUDA);CHKERRQ(ierr);
1235   } else {
1236     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense);CHKERRQ(ierr);
1237     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense);CHKERRQ(ierr);
1238     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense);CHKERRQ(ierr);
1239     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense);CHKERRQ(ierr);
1240     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense);CHKERRQ(ierr);
1241     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense);CHKERRQ(ierr);
1242   }
1243   PetscFunctionReturn(0);
1244 }
1245 
1246 PetscErrorCode MatMPIDenseCUDASetPreallocation(Mat A, PetscScalar *d_data)
1247 {
1248   Mat_MPIDense   *d = (Mat_MPIDense*)A->data;
1249   PetscErrorCode ierr;
1250   PetscBool      iscuda;
1251 
1252   PetscFunctionBegin;
1253   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
1254   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr);
1255   if (!iscuda) PetscFunctionReturn(0);
1256   ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
1257   ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
1258   if (!d->A) {
1259     ierr = MatCreate(PETSC_COMM_SELF,&d->A);CHKERRQ(ierr);
1260     ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)d->A);CHKERRQ(ierr);
1261     ierr = MatSetSizes(d->A,A->rmap->n,A->cmap->N,A->rmap->n,A->cmap->N);CHKERRQ(ierr);
1262   }
1263   ierr = MatSetType(d->A,MATSEQDENSECUDA);CHKERRQ(ierr);
1264   ierr = MatSeqDenseCUDASetPreallocation(d->A,d_data);CHKERRQ(ierr);
1265   A->preallocated = PETSC_TRUE;
1266   PetscFunctionReturn(0);
1267 }
1268 #endif
1269 
1270 static PetscErrorCode MatSetRandom_MPIDense(Mat x,PetscRandom rctx)
1271 {
1272   Mat_MPIDense   *d = (Mat_MPIDense*)x->data;
1273   PetscErrorCode ierr;
1274 
1275   PetscFunctionBegin;
1276   ierr = MatSetRandom(d->A,rctx);CHKERRQ(ierr);
1277   PetscFunctionReturn(0);
1278 }
1279 
1280 PETSC_INTERN PetscErrorCode MatMatMultNumeric_MPIDense(Mat A,Mat,Mat);
1281 
1282 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool  *missing,PetscInt *d)
1283 {
1284   PetscFunctionBegin;
1285   *missing = PETSC_FALSE;
1286   PetscFunctionReturn(0);
1287 }
1288 
1289 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat);
1290 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat);
1291 
1292 /* -------------------------------------------------------------------*/
1293 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense,
1294                                         MatGetRow_MPIDense,
1295                                         MatRestoreRow_MPIDense,
1296                                         MatMult_MPIDense,
1297                                 /*  4*/ MatMultAdd_MPIDense,
1298                                         MatMultTranspose_MPIDense,
1299                                         MatMultTransposeAdd_MPIDense,
1300                                         0,
1301                                         0,
1302                                         0,
1303                                 /* 10*/ 0,
1304                                         0,
1305                                         0,
1306                                         0,
1307                                         MatTranspose_MPIDense,
1308                                 /* 15*/ MatGetInfo_MPIDense,
1309                                         MatEqual_MPIDense,
1310                                         MatGetDiagonal_MPIDense,
1311                                         MatDiagonalScale_MPIDense,
1312                                         MatNorm_MPIDense,
1313                                 /* 20*/ MatAssemblyBegin_MPIDense,
1314                                         MatAssemblyEnd_MPIDense,
1315                                         MatSetOption_MPIDense,
1316                                         MatZeroEntries_MPIDense,
1317                                 /* 24*/ MatZeroRows_MPIDense,
1318                                         0,
1319                                         0,
1320                                         0,
1321                                         0,
1322                                 /* 29*/ MatSetUp_MPIDense,
1323                                         0,
1324                                         0,
1325                                         MatGetDiagonalBlock_MPIDense,
1326                                         0,
1327                                 /* 34*/ MatDuplicate_MPIDense,
1328                                         0,
1329                                         0,
1330                                         0,
1331                                         0,
1332                                 /* 39*/ MatAXPY_MPIDense,
1333                                         MatCreateSubMatrices_MPIDense,
1334                                         0,
1335                                         MatGetValues_MPIDense,
1336                                         0,
1337                                 /* 44*/ 0,
1338                                         MatScale_MPIDense,
1339                                         MatShift_Basic,
1340                                         0,
1341                                         0,
1342                                 /* 49*/ MatSetRandom_MPIDense,
1343                                         0,
1344                                         0,
1345                                         0,
1346                                         0,
1347                                 /* 54*/ 0,
1348                                         0,
1349                                         0,
1350                                         0,
1351                                         0,
1352                                 /* 59*/ MatCreateSubMatrix_MPIDense,
1353                                         MatDestroy_MPIDense,
1354                                         MatView_MPIDense,
1355                                         0,
1356                                         0,
1357                                 /* 64*/ 0,
1358                                         0,
1359                                         0,
1360                                         0,
1361                                         0,
1362                                 /* 69*/ 0,
1363                                         0,
1364                                         0,
1365                                         0,
1366                                         0,
1367                                 /* 74*/ 0,
1368                                         0,
1369                                         0,
1370                                         0,
1371                                         0,
1372                                 /* 79*/ 0,
1373                                         0,
1374                                         0,
1375                                         0,
1376                                 /* 83*/ MatLoad_MPIDense,
1377                                         0,
1378                                         0,
1379                                         0,
1380                                         0,
1381                                         0,
1382                                 /* 89*/ 0,
1383                                         0,
1384                                         MatMatMultNumeric_MPIDense,
1385                                         0,
1386                                         0,
1387                                 /* 94*/ 0,
1388                                         0,
1389                                         0,
1390                                         MatMatTransposeMultNumeric_MPIDense_MPIDense,
1391                                         0,
1392                                 /* 99*/ MatProductSetFromOptions_MPIDense,
1393                                         0,
1394                                         0,
1395                                         MatConjugate_MPIDense,
1396                                         0,
1397                                 /*104*/ 0,
1398                                         MatRealPart_MPIDense,
1399                                         MatImaginaryPart_MPIDense,
1400                                         0,
1401                                         0,
1402                                 /*109*/ 0,
1403                                         0,
1404                                         0,
1405                                         MatGetColumnVector_MPIDense,
1406                                         MatMissingDiagonal_MPIDense,
1407                                 /*114*/ 0,
1408                                         0,
1409                                         0,
1410                                         0,
1411                                         0,
1412                                 /*119*/ 0,
1413                                         0,
1414                                         0,
1415                                         0,
1416                                         0,
1417                                 /*124*/ 0,
1418                                         MatGetColumnNorms_MPIDense,
1419                                         0,
1420                                         0,
1421                                         0,
1422                                 /*129*/ 0,
1423                                         0,
1424                                         0,
1425                                         MatTransposeMatMultNumeric_MPIDense_MPIDense,
1426                                         0,
1427                                 /*134*/ 0,
1428                                         0,
1429                                         0,
1430                                         0,
1431                                         0,
1432                                 /*139*/ 0,
1433                                         0,
1434                                         0,
1435                                         0,
1436                                         0,
1437                                         MatCreateMPIMatConcatenateSeqMat_MPIDense,
1438                                 /*145*/ 0,
1439                                         0,
1440                                         0
1441 };
1442 
1443 PetscErrorCode  MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1444 {
1445   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;
1446   PetscBool      iscuda;
1447   PetscErrorCode ierr;
1448 
1449   PetscFunctionBegin;
1450   ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
1451   ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
1452   if (!a->A) {
1453     ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
1454     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
1455     ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr);
1456   }
1457   ierr = PetscObjectTypeCompare((PetscObject)mat,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr);
1458   ierr = MatSetType(a->A,iscuda ? MATSEQDENSECUDA : MATSEQDENSE);CHKERRQ(ierr);
1459   ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr);
1460   mat->preallocated = PETSC_TRUE;
1461   PetscFunctionReturn(0);
1462 }
1463 
1464 #if defined(PETSC_HAVE_ELEMENTAL)
1465 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1466 {
1467   Mat            mat_elemental;
1468   PetscErrorCode ierr;
1469   PetscScalar    *v;
1470   PetscInt       m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols;
1471 
1472   PetscFunctionBegin;
1473   if (reuse == MAT_REUSE_MATRIX) {
1474     mat_elemental = *newmat;
1475     ierr = MatZeroEntries(*newmat);CHKERRQ(ierr);
1476   } else {
1477     ierr = MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);CHKERRQ(ierr);
1478     ierr = MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1479     ierr = MatSetType(mat_elemental,MATELEMENTAL);CHKERRQ(ierr);
1480     ierr = MatSetUp(mat_elemental);CHKERRQ(ierr);
1481     ierr = MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
1482   }
1483 
1484   ierr = PetscMalloc2(m,&rows,N,&cols);CHKERRQ(ierr);
1485   for (i=0; i<N; i++) cols[i] = i;
1486   for (i=0; i<m; i++) rows[i] = rstart + i;
1487 
1488   /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1489   ierr = MatDenseGetArray(A,&v);CHKERRQ(ierr);
1490   ierr = MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES);CHKERRQ(ierr);
1491   ierr = MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1492   ierr = MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1493   ierr = MatDenseRestoreArray(A,&v);CHKERRQ(ierr);
1494   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1495 
1496   if (reuse == MAT_INPLACE_MATRIX) {
1497     ierr = MatHeaderReplace(A,&mat_elemental);CHKERRQ(ierr);
1498   } else {
1499     *newmat = mat_elemental;
1500   }
1501   PetscFunctionReturn(0);
1502 }
1503 #endif
1504 
1505 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A,PetscInt col,PetscScalar **vals)
1506 {
1507   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
1508   PetscErrorCode ierr;
1509 
1510   PetscFunctionBegin;
1511   ierr = MatDenseGetColumn(mat->A,col,vals);CHKERRQ(ierr);
1512   PetscFunctionReturn(0);
1513 }
1514 
1515 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A,PetscScalar **vals)
1516 {
1517   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
1518   PetscErrorCode ierr;
1519 
1520   PetscFunctionBegin;
1521   ierr = MatDenseRestoreColumn(mat->A,vals);CHKERRQ(ierr);
1522   PetscFunctionReturn(0);
1523 }
1524 
1525 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
1526 {
1527   PetscErrorCode ierr;
1528   Mat_MPIDense   *mat;
1529   PetscInt       m,nloc,N;
1530 
1531   PetscFunctionBegin;
1532   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
1533   ierr = MatGetLocalSize(inmat,NULL,&nloc);CHKERRQ(ierr);
1534   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
1535     PetscInt sum;
1536 
1537     if (n == PETSC_DECIDE) {
1538       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
1539     }
1540     /* Check sum(n) = N */
1541     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
1542     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
1543 
1544     ierr = MatCreateDense(comm,m,n,PETSC_DETERMINE,N,NULL,outmat);CHKERRQ(ierr);
1545   }
1546 
1547   /* numeric phase */
1548   mat = (Mat_MPIDense*)(*outmat)->data;
1549   ierr = MatCopy(inmat,mat->A,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1550   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1551   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1552   PetscFunctionReturn(0);
1553 }
1554 
1555 #if defined(PETSC_HAVE_CUDA)
1556 PetscErrorCode MatConvert_MPIDenseCUDA_MPIDense(Mat M,MatType type,MatReuse reuse,Mat *newmat)
1557 {
1558   Mat            B;
1559   Mat_MPIDense   *m;
1560   PetscErrorCode ierr;
1561 
1562   PetscFunctionBegin;
1563   if (reuse == MAT_INITIAL_MATRIX) {
1564     ierr = MatDuplicate(M,MAT_COPY_VALUES,newmat);CHKERRQ(ierr);
1565   } else if (reuse == MAT_REUSE_MATRIX) {
1566     ierr = MatCopy(M,*newmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1567   }
1568 
1569   B    = *newmat;
1570   ierr = MatBindToCPU_MPIDenseCUDA(B,PETSC_TRUE);CHKERRQ(ierr);
1571   ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr);
1572   ierr = PetscStrallocpy(VECSTANDARD,&B->defaultvectype);CHKERRQ(ierr);
1573   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSE);CHKERRQ(ierr);
1574   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C",NULL);CHKERRQ(ierr);
1575   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",NULL);CHKERRQ(ierr);
1576   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",NULL);CHKERRQ(ierr);
1577   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C",NULL);CHKERRQ(ierr);
1578   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C",NULL);CHKERRQ(ierr);
1579   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C",NULL);CHKERRQ(ierr);
1580   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C",NULL);CHKERRQ(ierr);
1581   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C",NULL);CHKERRQ(ierr);
1582   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C",NULL);CHKERRQ(ierr);
1583   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C",NULL);CHKERRQ(ierr);
1584   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C",NULL);CHKERRQ(ierr);
1585   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C",NULL);CHKERRQ(ierr);
1586   m    = (Mat_MPIDense*)(B)->data;
1587   if (m->A) {
1588     ierr = MatConvert(m->A,MATSEQDENSE,MAT_INPLACE_MATRIX,&m->A);CHKERRQ(ierr);
1589     ierr = MatSetUpMultiply_MPIDense(B);CHKERRQ(ierr);
1590   }
1591   B->ops->bindtocpu = NULL;
1592   B->offloadmask    = PETSC_OFFLOAD_CPU;
1593   PetscFunctionReturn(0);
1594 }
1595 
1596 PetscErrorCode MatConvert_MPIDense_MPIDenseCUDA(Mat M,MatType type,MatReuse reuse,Mat *newmat)
1597 {
1598   Mat            B;
1599   Mat_MPIDense   *m;
1600   PetscErrorCode ierr;
1601 
1602   PetscFunctionBegin;
1603   if (reuse == MAT_INITIAL_MATRIX) {
1604     ierr = MatDuplicate(M,MAT_COPY_VALUES,newmat);CHKERRQ(ierr);
1605   } else if (reuse == MAT_REUSE_MATRIX) {
1606     ierr = MatCopy(M,*newmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1607   }
1608 
1609   B    = *newmat;
1610   ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr);
1611   ierr = PetscStrallocpy(VECCUDA,&B->defaultvectype);CHKERRQ(ierr);
1612   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSECUDA);CHKERRQ(ierr);
1613   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C",            MatConvert_MPIDenseCUDA_MPIDense);CHKERRQ(ierr);
1614   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr);
1615   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr);
1616   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C",                        MatDenseCUDAGetArray_MPIDenseCUDA);CHKERRQ(ierr);
1617   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C",                    MatDenseCUDAGetArrayRead_MPIDenseCUDA);CHKERRQ(ierr);
1618   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C",                   MatDenseCUDAGetArrayWrite_MPIDenseCUDA);CHKERRQ(ierr);
1619   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C",                    MatDenseCUDARestoreArray_MPIDenseCUDA);CHKERRQ(ierr);
1620   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C",                MatDenseCUDARestoreArrayRead_MPIDenseCUDA);CHKERRQ(ierr);
1621   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C",               MatDenseCUDARestoreArrayWrite_MPIDenseCUDA);CHKERRQ(ierr);
1622   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C",                      MatDenseCUDAPlaceArray_MPIDenseCUDA);CHKERRQ(ierr);
1623   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C",                      MatDenseCUDAResetArray_MPIDenseCUDA);CHKERRQ(ierr);
1624   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C",                    MatDenseCUDAReplaceArray_MPIDenseCUDA);CHKERRQ(ierr);
1625   m    = (Mat_MPIDense*)(B)->data;
1626   if (m->A) {
1627     ierr = MatConvert(m->A,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&m->A);CHKERRQ(ierr);
1628     ierr = MatSetUpMultiply_MPIDense(B);CHKERRQ(ierr);
1629     B->offloadmask = PETSC_OFFLOAD_BOTH;
1630   } else {
1631     B->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
1632   }
1633   ierr = MatBindToCPU_MPIDenseCUDA(B,PETSC_FALSE);CHKERRQ(ierr);
1634 
1635   B->ops->bindtocpu = MatBindToCPU_MPIDenseCUDA;
1636   PetscFunctionReturn(0);
1637 }
1638 #endif
1639 
1640 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A,PetscInt col,Vec *v)
1641 {
1642   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1643   PetscErrorCode ierr;
1644   PetscInt       lda;
1645 
1646   PetscFunctionBegin;
1647   if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first");
1648   if (!a->cvec) {
1649     ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1650   }
1651   a->vecinuse = col + 1;
1652   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1653   ierr = MatDenseGetArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1654   ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1655   *v   = a->cvec;
1656   PetscFunctionReturn(0);
1657 }
1658 
1659 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A,PetscInt col,Vec *v)
1660 {
1661   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1662   PetscErrorCode ierr;
1663 
1664   PetscFunctionBegin;
1665   if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec first");
1666   if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector");
1667   a->vecinuse = 0;
1668   ierr = MatDenseRestoreArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1669   ierr = VecResetArray(a->cvec);CHKERRQ(ierr);
1670   *v   = NULL;
1671   PetscFunctionReturn(0);
1672 }
1673 
1674 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v)
1675 {
1676   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1677   PetscErrorCode ierr;
1678   PetscInt       lda;
1679 
1680   PetscFunctionBegin;
1681   if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first");
1682   if (!a->cvec) {
1683     ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1684   }
1685   a->vecinuse = col + 1;
1686   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1687   ierr = MatDenseGetArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr);
1688   ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1689   ierr = VecLockReadPush(a->cvec);CHKERRQ(ierr);
1690   *v   = a->cvec;
1691   PetscFunctionReturn(0);
1692 }
1693 
1694 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v)
1695 {
1696   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1697   PetscErrorCode ierr;
1698 
1699   PetscFunctionBegin;
1700   if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec first");
1701   if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector");
1702   a->vecinuse = 0;
1703   ierr = MatDenseRestoreArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr);
1704   ierr = VecLockReadPop(a->cvec);CHKERRQ(ierr);
1705   ierr = VecResetArray(a->cvec);CHKERRQ(ierr);
1706   *v   = NULL;
1707   PetscFunctionReturn(0);
1708 }
1709 
1710 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v)
1711 {
1712   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1713   PetscErrorCode ierr;
1714   PetscInt       lda;
1715 
1716   PetscFunctionBegin;
1717   if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first");
1718   if (!a->cvec) {
1719     ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1720   }
1721   a->vecinuse = col + 1;
1722   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1723   ierr = MatDenseGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1724   ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1725   *v   = a->cvec;
1726   PetscFunctionReturn(0);
1727 }
1728 
1729 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v)
1730 {
1731   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1732   PetscErrorCode ierr;
1733 
1734   PetscFunctionBegin;
1735   if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec first");
1736   if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector");
1737   a->vecinuse = 0;
1738   ierr = MatDenseRestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1739   ierr = VecResetArray(a->cvec);CHKERRQ(ierr);
1740   *v   = NULL;
1741   PetscFunctionReturn(0);
1742 }
1743 
1744 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat)
1745 {
1746   Mat_MPIDense   *a;
1747   PetscErrorCode ierr;
1748 
1749   PetscFunctionBegin;
1750   ierr      = PetscNewLog(mat,&a);CHKERRQ(ierr);
1751   mat->data = (void*)a;
1752   ierr      = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1753 
1754   mat->insertmode = NOT_SET_VALUES;
1755   ierr            = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&a->rank);CHKERRQ(ierr);
1756   ierr            = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&a->size);CHKERRQ(ierr);
1757 
1758   /* build cache for off array entries formed */
1759   a->donotstash = PETSC_FALSE;
1760 
1761   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);CHKERRQ(ierr);
1762 
1763   /* stuff used for matrix vector multiply */
1764   a->lvec        = 0;
1765   a->Mvctx       = 0;
1766   a->roworiented = PETSC_TRUE;
1767 
1768   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",MatDenseGetLDA_MPIDense);CHKERRQ(ierr);
1769   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",MatDenseSetLDA_MPIDense);CHKERRQ(ierr);
1770   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);CHKERRQ(ierr);
1771   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr);
1772   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",MatDenseGetArrayRead_MPIDense);CHKERRQ(ierr);
1773   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",MatDenseRestoreArrayRead_MPIDense);CHKERRQ(ierr);
1774   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",MatDenseGetArrayWrite_MPIDense);CHKERRQ(ierr);
1775   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",MatDenseRestoreArrayWrite_MPIDense);CHKERRQ(ierr);
1776   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",MatDensePlaceArray_MPIDense);CHKERRQ(ierr);
1777   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",MatDenseResetArray_MPIDense);CHKERRQ(ierr);
1778   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",MatDenseReplaceArray_MPIDense);CHKERRQ(ierr);
1779   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense);CHKERRQ(ierr);
1780   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense);CHKERRQ(ierr);
1781   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense);CHKERRQ(ierr);
1782   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense);CHKERRQ(ierr);
1783   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense);CHKERRQ(ierr);
1784   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense);CHKERRQ(ierr);
1785 #if defined(PETSC_HAVE_ELEMENTAL)
1786   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental);CHKERRQ(ierr);
1787 #endif
1788 #if defined(PETSC_HAVE_CUDA)
1789   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpidensecuda_C",MatConvert_MPIDense_MPIDenseCUDA);CHKERRQ(ierr);
1790 #endif
1791   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1792   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr);
1793   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr);
1794   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1795   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1796   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_nest_mpidense_C",MatMatMultSymbolic_Nest_Dense);CHKERRQ(ierr);
1797   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_nest_mpidense_C",MatMatMultNumeric_Nest_Dense);CHKERRQ(ierr);
1798 
1799   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",MatTransposeMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1800   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",MatTransposeMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1801   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",MatDenseGetColumn_MPIDense);CHKERRQ(ierr);
1802   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",MatDenseRestoreColumn_MPIDense);CHKERRQ(ierr);
1803   ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr);
1804   PetscFunctionReturn(0);
1805 }
1806 
1807 /*MC
1808    MATMPIDENSECUDA - MATMPIDENSECUDA = "mpidensecuda" - A matrix type to be used for distributed dense matrices on GPUs.
1809 
1810    Options Database Keys:
1811 . -mat_type mpidensecuda - sets the matrix type to "mpidensecuda" during a call to MatSetFromOptions()
1812 
1813   Level: beginner
1814 
1815 .seealso:
1816 
1817 M*/
1818 #if defined(PETSC_HAVE_CUDA)
1819 PETSC_EXTERN PetscErrorCode MatCreate_MPIDenseCUDA(Mat B)
1820 {
1821   PetscErrorCode ierr;
1822 
1823   PetscFunctionBegin;
1824   ierr = MatCreate_MPIDense(B);CHKERRQ(ierr);
1825   ierr = MatConvert_MPIDense_MPIDenseCUDA(B,MATMPIDENSECUDA,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr);
1826   PetscFunctionReturn(0);
1827 }
1828 #endif
1829 
1830 /*MC
1831    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1832 
1833    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1834    and MATMPIDENSE otherwise.
1835 
1836    Options Database Keys:
1837 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1838 
1839   Level: beginner
1840 
1841 
1842 .seealso: MATSEQDENSE,MATMPIDENSE,MATDENSECUDA
1843 M*/
1844 
1845 /*MC
1846    MATDENSECUDA - MATDENSECUDA = "densecuda" - A matrix type to be used for dense matrices on GPUs.
1847 
1848    This matrix type is identical to MATSEQDENSECUDA when constructed with a single process communicator,
1849    and MATMPIDENSECUDA otherwise.
1850 
1851    Options Database Keys:
1852 . -mat_type densecuda - sets the matrix type to "densecuda" during a call to MatSetFromOptions()
1853 
1854   Level: beginner
1855 
1856 .seealso: MATSEQDENSECUDA,MATMPIDENSECUDA,MATDENSE
1857 M*/
1858 
1859 /*@C
1860    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1861 
1862    Not collective
1863 
1864    Input Parameters:
1865 .  B - the matrix
1866 -  data - optional location of matrix data.  Set data=NULL for PETSc
1867    to control all matrix memory allocation.
1868 
1869    Notes:
1870    The dense format is fully compatible with standard Fortran 77
1871    storage by columns.
1872 
1873    The data input variable is intended primarily for Fortran programmers
1874    who wish to allocate their own matrix memory space.  Most users should
1875    set data=NULL.
1876 
1877    Level: intermediate
1878 
1879 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1880 @*/
1881 PetscErrorCode  MatMPIDenseSetPreallocation(Mat B,PetscScalar *data)
1882 {
1883   PetscErrorCode ierr;
1884 
1885   PetscFunctionBegin;
1886   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
1887   ierr = PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));CHKERRQ(ierr);
1888   PetscFunctionReturn(0);
1889 }
1890 
1891 /*@
1892    MatDensePlaceArray - Allows one to replace the array in a dense matrix with an
1893    array provided by the user. This is useful to avoid copying an array
1894    into a matrix
1895 
1896    Not Collective
1897 
1898    Input Parameters:
1899 +  mat - the matrix
1900 -  array - the array in column major order
1901 
1902    Notes:
1903    You can return to the original array with a call to MatDenseResetArray(). The user is responsible for freeing this array; it will not be
1904    freed when the matrix is destroyed.
1905 
1906    Level: developer
1907 
1908 .seealso: MatDenseGetArray(), MatDenseResetArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()
1909 
1910 @*/
1911 PetscErrorCode  MatDensePlaceArray(Mat mat,const PetscScalar *array)
1912 {
1913   PetscErrorCode ierr;
1914 
1915   PetscFunctionBegin;
1916   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
1917   ierr = PetscUseMethod(mat,"MatDensePlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr);
1918   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
1919 #if defined(PETSC_HAVE_CUDA)
1920   mat->offloadmask = PETSC_OFFLOAD_CPU;
1921 #endif
1922   PetscFunctionReturn(0);
1923 }
1924 
1925 /*@
1926    MatDenseResetArray - Resets the matrix array to that it previously had before the call to MatDensePlaceArray()
1927 
1928    Not Collective
1929 
1930    Input Parameters:
1931 .  mat - the matrix
1932 
1933    Notes:
1934    You can only call this after a call to MatDensePlaceArray()
1935 
1936    Level: developer
1937 
1938 .seealso: MatDenseGetArray(), MatDensePlaceArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()
1939 
1940 @*/
1941 PetscErrorCode  MatDenseResetArray(Mat mat)
1942 {
1943   PetscErrorCode ierr;
1944 
1945   PetscFunctionBegin;
1946   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
1947   ierr = PetscUseMethod(mat,"MatDenseResetArray_C",(Mat),(mat));CHKERRQ(ierr);
1948   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
1949   PetscFunctionReturn(0);
1950 }
1951 
1952 /*@
1953    MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an
1954    array provided by the user. This is useful to avoid copying an array
1955    into a matrix
1956 
1957    Not Collective
1958 
1959    Input Parameters:
1960 +  mat - the matrix
1961 -  array - the array in column major order
1962 
1963    Notes:
1964    The memory passed in MUST be obtained with PetscMalloc() and CANNOT be
1965    freed by the user. It will be freed when the matrix is destroyed.
1966 
1967    Level: developer
1968 
1969 .seealso: MatDenseGetArray(), VecReplaceArray()
1970 @*/
1971 PetscErrorCode  MatDenseReplaceArray(Mat mat,const PetscScalar *array)
1972 {
1973   PetscErrorCode ierr;
1974 
1975   PetscFunctionBegin;
1976   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
1977   ierr = PetscUseMethod(mat,"MatDenseReplaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr);
1978   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
1979 #if defined(PETSC_HAVE_CUDA)
1980   mat->offloadmask = PETSC_OFFLOAD_CPU;
1981 #endif
1982   PetscFunctionReturn(0);
1983 }
1984 
1985 #if defined(PETSC_HAVE_CUDA)
1986 /*@C
1987    MatDenseCUDAPlaceArray - Allows one to replace the GPU array in a dense matrix with an
1988    array provided by the user. This is useful to avoid copying an array
1989    into a matrix
1990 
1991    Not Collective
1992 
1993    Input Parameters:
1994 +  mat - the matrix
1995 -  array - the array in column major order
1996 
1997    Notes:
1998    You can return to the original array with a call to MatDenseCUDAResetArray(). The user is responsible for freeing this array; it will not be
1999    freed when the matrix is destroyed. The array must have been allocated with cudaMalloc().
2000 
2001    Level: developer
2002 
2003 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAResetArray()
2004 @*/
2005 PetscErrorCode  MatDenseCUDAPlaceArray(Mat mat,const PetscScalar *array)
2006 {
2007   PetscErrorCode ierr;
2008 
2009   PetscFunctionBegin;
2010   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2011   ierr = PetscUseMethod(mat,"MatDenseCUDAPlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr);
2012   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2013   mat->offloadmask = PETSC_OFFLOAD_GPU;
2014   PetscFunctionReturn(0);
2015 }
2016 
2017 /*@C
2018    MatDenseCUDAResetArray - Resets the matrix array to that it previously had before the call to MatDenseCUDAPlaceArray()
2019 
2020    Not Collective
2021 
2022    Input Parameters:
2023 .  mat - the matrix
2024 
2025    Notes:
2026    You can only call this after a call to MatDenseCUDAPlaceArray()
2027 
2028    Level: developer
2029 
2030 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAPlaceArray()
2031 
2032 @*/
2033 PetscErrorCode  MatDenseCUDAResetArray(Mat mat)
2034 {
2035   PetscErrorCode ierr;
2036 
2037   PetscFunctionBegin;
2038   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2039   ierr = PetscUseMethod(mat,"MatDenseCUDAResetArray_C",(Mat),(mat));CHKERRQ(ierr);
2040   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2041   PetscFunctionReturn(0);
2042 }
2043 
2044 /*@C
2045    MatDenseCUDAReplaceArray - Allows one to replace the GPU array in a dense matrix with an
2046    array provided by the user. This is useful to avoid copying an array
2047    into a matrix
2048 
2049    Not Collective
2050 
2051    Input Parameters:
2052 +  mat - the matrix
2053 -  array - the array in column major order
2054 
2055    Notes:
2056    This permanently replaces the GPU array and frees the memory associated with the old GPU array.
2057    The memory passed in CANNOT be freed by the user. It will be freed
2058    when the matrix is destroyed. The array should respect the matrix leading dimension.
2059 
2060    Level: developer
2061 
2062 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAPlaceArray(), MatDenseCUDAResetArray()
2063 @*/
2064 PetscErrorCode  MatDenseCUDAReplaceArray(Mat mat,const PetscScalar *array)
2065 {
2066   PetscErrorCode ierr;
2067 
2068   PetscFunctionBegin;
2069   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2070   ierr = PetscUseMethod(mat,"MatDenseCUDAReplaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr);
2071   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2072   mat->offloadmask = PETSC_OFFLOAD_GPU;
2073   PetscFunctionReturn(0);
2074 }
2075 
2076 /*@C
2077    MatDenseCUDAGetArrayWrite - Provides write access to the CUDA buffer inside a dense matrix.
2078 
2079    Not Collective
2080 
2081    Input Parameters:
2082 .  A - the matrix
2083 
2084    Output Parameters
2085 .  array - the GPU array in column major order
2086 
2087    Notes:
2088    The data on the GPU may not be updated due to operations done on the CPU. If you need updated data, use MatDenseCUDAGetArray(). The array must be restored with MatDenseCUDARestoreArrayWrite() when no longer needed.
2089 
2090    Level: developer
2091 
2092 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayRead(), MatDenseCUDARestoreArrayRead()
2093 @*/
2094 PetscErrorCode MatDenseCUDAGetArrayWrite(Mat A, PetscScalar **a)
2095 {
2096   PetscErrorCode ierr;
2097 
2098   PetscFunctionBegin;
2099   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2100   ierr = PetscUseMethod(A,"MatDenseCUDAGetArrayWrite_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr);
2101   ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr);
2102   PetscFunctionReturn(0);
2103 }
2104 
2105 /*@C
2106    MatDenseCUDARestoreArrayWrite - Restore write access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArrayWrite().
2107 
2108    Not Collective
2109 
2110    Input Parameters:
2111 +  A - the matrix
2112 -  array - the GPU array in column major order
2113 
2114    Notes:
2115 
2116    Level: developer
2117 
2118 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead(), MatDenseCUDAGetArrayRead()
2119 @*/
2120 PetscErrorCode MatDenseCUDARestoreArrayWrite(Mat A, PetscScalar **a)
2121 {
2122   PetscErrorCode ierr;
2123 
2124   PetscFunctionBegin;
2125   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2126   ierr = PetscUseMethod(A,"MatDenseCUDARestoreArrayWrite_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr);
2127   ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr);
2128   A->offloadmask = PETSC_OFFLOAD_GPU;
2129   PetscFunctionReturn(0);
2130 }
2131 
2132 /*@C
2133    MatDenseCUDAGetArrayRead - Provides read-only access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArrayRead() when no longer needed.
2134 
2135    Not Collective
2136 
2137    Input Parameters:
2138 .  A - the matrix
2139 
2140    Output Parameters
2141 .  array - the GPU array in column major order
2142 
2143    Notes:
2144    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite().
2145 
2146    Level: developer
2147 
2148 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead()
2149 @*/
2150 PetscErrorCode MatDenseCUDAGetArrayRead(Mat A, const PetscScalar **a)
2151 {
2152   PetscErrorCode ierr;
2153 
2154   PetscFunctionBegin;
2155   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2156   ierr = PetscUseMethod(A,"MatDenseCUDAGetArrayRead_C",(Mat,const PetscScalar**),(A,a));CHKERRQ(ierr);
2157   PetscFunctionReturn(0);
2158 }
2159 
2160 /*@C
2161    MatDenseCUDARestoreArrayRead - Restore read-only access to the CUDA buffer inside a dense matrix previously obtained with a call to MatDenseCUDAGetArrayRead().
2162 
2163    Not Collective
2164 
2165    Input Parameters:
2166 +  A - the matrix
2167 -  array - the GPU array in column major order
2168 
2169    Notes:
2170    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite().
2171 
2172    Level: developer
2173 
2174 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDAGetArrayRead()
2175 @*/
2176 PetscErrorCode MatDenseCUDARestoreArrayRead(Mat A, const PetscScalar **a)
2177 {
2178   PetscErrorCode ierr;
2179 
2180   PetscFunctionBegin;
2181   ierr = PetscUseMethod(A,"MatDenseCUDARestoreArrayRead_C",(Mat,const PetscScalar**),(A,a));CHKERRQ(ierr);
2182   PetscFunctionReturn(0);
2183 }
2184 
2185 /*@C
2186    MatDenseCUDAGetArray - Provides access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArray() when no longer needed.
2187 
2188    Not Collective
2189 
2190    Input Parameters:
2191 .  A - the matrix
2192 
2193    Output Parameters
2194 .  array - the GPU array in column major order
2195 
2196    Notes:
2197    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). For read-only access, use MatDenseCUDAGetArrayRead().
2198 
2199    Level: developer
2200 
2201 .seealso: MatDenseCUDAGetArrayRead(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead()
2202 @*/
2203 PetscErrorCode MatDenseCUDAGetArray(Mat A, PetscScalar **a)
2204 {
2205   PetscErrorCode ierr;
2206 
2207   PetscFunctionBegin;
2208   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2209   ierr = PetscUseMethod(A,"MatDenseCUDAGetArray_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr);
2210   ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr);
2211   PetscFunctionReturn(0);
2212 }
2213 
2214 /*@C
2215    MatDenseCUDARestoreArray - Restore access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArray().
2216 
2217    Not Collective
2218 
2219    Input Parameters:
2220 +  A - the matrix
2221 -  array - the GPU array in column major order
2222 
2223    Notes:
2224 
2225    Level: developer
2226 
2227 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead(), MatDenseCUDAGetArrayRead()
2228 @*/
2229 PetscErrorCode MatDenseCUDARestoreArray(Mat A, PetscScalar **a)
2230 {
2231   PetscErrorCode ierr;
2232 
2233   PetscFunctionBegin;
2234   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2235   ierr = PetscUseMethod(A,"MatDenseCUDARestoreArray_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr);
2236   ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr);
2237   A->offloadmask = PETSC_OFFLOAD_GPU;
2238   PetscFunctionReturn(0);
2239 }
2240 #endif
2241 
2242 /*@C
2243    MatCreateDense - Creates a matrix in dense format.
2244 
2245    Collective
2246 
2247    Input Parameters:
2248 +  comm - MPI communicator
2249 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2250 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2251 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
2252 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
2253 -  data - optional location of matrix data.  Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
2254    to control all matrix memory allocation.
2255 
2256    Output Parameter:
2257 .  A - the matrix
2258 
2259    Notes:
2260    The dense format is fully compatible with standard Fortran 77
2261    storage by columns.
2262 
2263    The data input variable is intended primarily for Fortran programmers
2264    who wish to allocate their own matrix memory space.  Most users should
2265    set data=NULL (PETSC_NULL_SCALAR for Fortran users).
2266 
2267    The user MUST specify either the local or global matrix dimensions
2268    (possibly both).
2269 
2270    Level: intermediate
2271 
2272 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
2273 @*/
2274 PetscErrorCode  MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
2275 {
2276   PetscErrorCode ierr;
2277   PetscMPIInt    size;
2278 
2279   PetscFunctionBegin;
2280   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2281   PetscValidLogicalCollectiveBool(*A,!!data,6);
2282   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2283   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2284   if (size > 1) {
2285     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
2286     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
2287     if (data) {  /* user provided data array, so no need to assemble */
2288       ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr);
2289       (*A)->assembled = PETSC_TRUE;
2290     }
2291   } else {
2292     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
2293     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
2294   }
2295   PetscFunctionReturn(0);
2296 }
2297 
2298 #if defined(PETSC_HAVE_CUDA)
2299 /*@C
2300    MatCreateDenseCUDA - Creates a matrix in dense format using CUDA.
2301 
2302    Collective
2303 
2304    Input Parameters:
2305 +  comm - MPI communicator
2306 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2307 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2308 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
2309 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
2310 -  data - optional location of GPU matrix data.  Set data=NULL for PETSc
2311    to control matrix memory allocation.
2312 
2313    Output Parameter:
2314 .  A - the matrix
2315 
2316    Notes:
2317 
2318    Level: intermediate
2319 
2320 .seealso: MatCreate(), MatCreateDense()
2321 @*/
2322 PetscErrorCode  MatCreateDenseCUDA(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
2323 {
2324   PetscErrorCode ierr;
2325   PetscMPIInt    size;
2326 
2327   PetscFunctionBegin;
2328   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2329   PetscValidLogicalCollectiveBool(*A,!!data,6);
2330   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2331   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2332   if (size > 1) {
2333     ierr = MatSetType(*A,MATMPIDENSECUDA);CHKERRQ(ierr);
2334     ierr = MatMPIDenseCUDASetPreallocation(*A,data);CHKERRQ(ierr);
2335     if (data) {  /* user provided data array, so no need to assemble */
2336       ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr);
2337       (*A)->assembled = PETSC_TRUE;
2338     }
2339   } else {
2340     ierr = MatSetType(*A,MATSEQDENSECUDA);CHKERRQ(ierr);
2341     ierr = MatSeqDenseCUDASetPreallocation(*A,data);CHKERRQ(ierr);
2342   }
2343   PetscFunctionReturn(0);
2344 }
2345 #endif
2346 
2347 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
2348 {
2349   Mat            mat;
2350   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;
2351   PetscErrorCode ierr;
2352 
2353   PetscFunctionBegin;
2354   *newmat = 0;
2355   ierr    = MatCreate(PetscObjectComm((PetscObject)A),&mat);CHKERRQ(ierr);
2356   ierr    = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
2357   ierr    = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr);
2358   a       = (Mat_MPIDense*)mat->data;
2359 
2360   mat->factortype   = A->factortype;
2361   mat->assembled    = PETSC_TRUE;
2362   mat->preallocated = PETSC_TRUE;
2363 
2364   a->size         = oldmat->size;
2365   a->rank         = oldmat->rank;
2366   mat->insertmode = NOT_SET_VALUES;
2367   a->donotstash   = oldmat->donotstash;
2368 
2369   ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr);
2370   ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr);
2371 
2372   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2373   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2374   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
2375 
2376   *newmat = mat;
2377   PetscFunctionReturn(0);
2378 }
2379 
2380 PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer)
2381 {
2382   PetscErrorCode ierr;
2383   PetscBool      isbinary;
2384 #if defined(PETSC_HAVE_HDF5)
2385   PetscBool      ishdf5;
2386 #endif
2387 
2388   PetscFunctionBegin;
2389   PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
2390   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
2391   /* force binary viewer to load .info file if it has not yet done so */
2392   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2393   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
2394 #if defined(PETSC_HAVE_HDF5)
2395   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5);CHKERRQ(ierr);
2396 #endif
2397   if (isbinary) {
2398     ierr = MatLoad_Dense_Binary(newMat,viewer);CHKERRQ(ierr);
2399 #if defined(PETSC_HAVE_HDF5)
2400   } else if (ishdf5) {
2401     ierr = MatLoad_Dense_HDF5(newMat,viewer);CHKERRQ(ierr);
2402 #endif
2403   } else SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
2404   PetscFunctionReturn(0);
2405 }
2406 
2407 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool  *flag)
2408 {
2409   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
2410   Mat            a,b;
2411   PetscBool      flg;
2412   PetscErrorCode ierr;
2413 
2414   PetscFunctionBegin;
2415   a    = matA->A;
2416   b    = matB->A;
2417   ierr = MatEqual(a,b,&flg);CHKERRQ(ierr);
2418   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2419   PetscFunctionReturn(0);
2420 }
2421 
2422 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(Mat A)
2423 {
2424   PetscErrorCode        ierr;
2425   Mat_MPIDense          *a = (Mat_MPIDense*)A->data;
2426   Mat_TransMatMultDense *atb = a->atbdense;
2427 
2428   PetscFunctionBegin;
2429   ierr = PetscFree2(atb->sendbuf,atb->recvcounts);CHKERRQ(ierr);
2430   ierr = MatDestroy(&atb->atb);CHKERRQ(ierr);
2431   ierr = (*atb->destroy)(A);CHKERRQ(ierr);
2432   ierr = PetscFree(atb);CHKERRQ(ierr);
2433   PetscFunctionReturn(0);
2434 }
2435 
2436 PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(Mat A)
2437 {
2438   PetscErrorCode        ierr;
2439   Mat_MPIDense          *a = (Mat_MPIDense*)A->data;
2440   Mat_MatTransMultDense *abt = a->abtdense;
2441 
2442   PetscFunctionBegin;
2443   ierr = PetscFree2(abt->buf[0],abt->buf[1]);CHKERRQ(ierr);
2444   ierr = PetscFree2(abt->recvcounts,abt->recvdispls);CHKERRQ(ierr);
2445   ierr = (abt->destroy)(A);CHKERRQ(ierr);
2446   ierr = PetscFree(abt);CHKERRQ(ierr);
2447   PetscFunctionReturn(0);
2448 }
2449 
2450 PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
2451 {
2452   Mat_MPIDense          *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2453   Mat_TransMatMultDense *atb = c->atbdense;
2454   PetscErrorCode        ierr;
2455   MPI_Comm              comm;
2456   PetscMPIInt           size,*recvcounts=atb->recvcounts;
2457   PetscScalar           *carray,*sendbuf=atb->sendbuf;
2458   const PetscScalar     *atbarray;
2459   PetscInt              i,cN=C->cmap->N,cM=C->rmap->N,proc,k,j;
2460   const PetscInt        *ranges;
2461 
2462   PetscFunctionBegin;
2463   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2464   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2465 
2466   /* compute atbarray = aseq^T * bseq */
2467   ierr = MatTransposeMatMult(a->A,b->A,atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX,PETSC_DEFAULT,&atb->atb);CHKERRQ(ierr);
2468 
2469   ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr);
2470   for (i=0; i<size; i++) recvcounts[i] = (ranges[i+1] - ranges[i])*cN;
2471 
2472   /* arrange atbarray into sendbuf */
2473   ierr = MatDenseGetArrayRead(atb->atb,&atbarray);CHKERRQ(ierr);
2474   for (proc=0, k=0; proc<size; proc++) {
2475     for (j=0; j<cN; j++) {
2476       for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*cM];
2477     }
2478   }
2479   ierr = MatDenseRestoreArrayRead(atb->atb,&atbarray);CHKERRQ(ierr);
2480 
2481   /* sum all atbarray to local values of C */
2482   ierr = MatDenseGetArray(c->A,&carray);CHKERRQ(ierr);
2483   ierr = MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm);CHKERRQ(ierr);
2484   ierr = MatDenseRestoreArray(c->A,&carray);CHKERRQ(ierr);
2485   PetscFunctionReturn(0);
2486 }
2487 
2488 PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C)
2489 {
2490   PetscErrorCode        ierr;
2491   MPI_Comm              comm;
2492   PetscMPIInt           size;
2493   PetscInt              cm=A->cmap->n,cM,cN=B->cmap->N;
2494   Mat_MPIDense          *c;
2495   Mat_TransMatMultDense *atb;
2496 
2497   PetscFunctionBegin;
2498   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2499   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
2500     SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2501   }
2502 
2503   /* create matrix product C */
2504   ierr = MatSetSizes(C,cm,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2505   ierr = MatSetType(C,MATMPIDENSE);CHKERRQ(ierr);
2506   ierr = MatSetUp(C);CHKERRQ(ierr);
2507   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2508   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2509 
2510   /* create data structure for reuse C */
2511   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2512   ierr = PetscNew(&atb);CHKERRQ(ierr);
2513   cM   = C->rmap->N;
2514   ierr = PetscMalloc2(cM*cN,&atb->sendbuf,size,&atb->recvcounts);CHKERRQ(ierr);
2515 
2516   c               = (Mat_MPIDense*)C->data;
2517   c->atbdense     = atb;
2518   atb->destroy    = C->ops->destroy;
2519   C->ops->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
2520   PetscFunctionReturn(0);
2521 }
2522 
2523 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2524 {
2525   PetscErrorCode        ierr;
2526   MPI_Comm              comm;
2527   PetscMPIInt           i, size;
2528   PetscInt              maxRows, bufsiz;
2529   Mat_MPIDense          *c;
2530   PetscMPIInt           tag;
2531   PetscInt              alg;
2532   Mat_MatTransMultDense *abt;
2533   Mat_Product           *product = C->product;
2534   PetscBool             flg;
2535 
2536   PetscFunctionBegin;
2537   /* check local size of A and B */
2538   if (A->cmap->n != B->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local column dimensions are incompatible, A (%D) != B (%D)",A->cmap->n,B->cmap->n);
2539 
2540   ierr = PetscStrcmp(product->alg,"allgatherv",&flg);CHKERRQ(ierr);
2541   alg  = flg ? 0 : 1;
2542 
2543   /* setup matrix product C */
2544   ierr = MatSetSizes(C,A->rmap->n,B->rmap->n,A->rmap->N,B->rmap->N);CHKERRQ(ierr);
2545   ierr = MatSetType(C,MATMPIDENSE);CHKERRQ(ierr);
2546   ierr = MatSetUp(C);CHKERRQ(ierr);
2547   ierr = PetscObjectGetNewTag((PetscObject)C, &tag);CHKERRQ(ierr);
2548   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2549   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2550 
2551   /* create data structure for reuse C */
2552   ierr = PetscObjectGetComm((PetscObject)C,&comm);CHKERRQ(ierr);
2553   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2554   ierr = PetscNew(&abt);CHKERRQ(ierr);
2555   abt->tag = tag;
2556   abt->alg = alg;
2557   switch (alg) {
2558   case 1: /* alg: "cyclic" */
2559     for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i]));
2560     bufsiz = A->cmap->N * maxRows;
2561     ierr = PetscMalloc2(bufsiz,&(abt->buf[0]),bufsiz,&(abt->buf[1]));CHKERRQ(ierr);
2562     break;
2563   default: /* alg: "allgatherv" */
2564     ierr = PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1]));CHKERRQ(ierr);
2565     ierr = PetscMalloc2(size,&(abt->recvcounts),size+1,&(abt->recvdispls));CHKERRQ(ierr);
2566     for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N;
2567     for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i];
2568     break;
2569   }
2570 
2571   c                               = (Mat_MPIDense*)C->data;
2572   c->abtdense                     = abt;
2573   abt->destroy                    = C->ops->destroy;
2574   C->ops->destroy                 = MatDestroy_MatMatTransMult_MPIDense_MPIDense;
2575   C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense;
2576   PetscFunctionReturn(0);
2577 }
2578 
2579 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C)
2580 {
2581   Mat_MPIDense          *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2582   Mat_MatTransMultDense *abt = c->abtdense;
2583   PetscErrorCode        ierr;
2584   MPI_Comm              comm;
2585   PetscMPIInt           rank,size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom;
2586   PetscScalar           *sendbuf, *recvbuf=0, *cv;
2587   PetscInt              i,cK=A->cmap->N,k,j,bn;
2588   PetscScalar           _DOne=1.0,_DZero=0.0;
2589   const PetscScalar     *av,*bv;
2590   PetscBLASInt          cm, cn, ck, alda, blda, clda;
2591   MPI_Request           reqs[2];
2592   const PetscInt        *ranges;
2593 
2594   PetscFunctionBegin;
2595   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2596   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2597   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2598   ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr);
2599   ierr = MatDenseGetArrayRead(b->A,&bv);CHKERRQ(ierr);
2600   ierr = MatDenseGetArray(c->A,&cv);CHKERRQ(ierr);
2601   ierr = MatDenseGetLDA(a->A,&i);CHKERRQ(ierr);
2602   ierr = PetscBLASIntCast(i,&alda);CHKERRQ(ierr);
2603   ierr = MatDenseGetLDA(b->A,&i);CHKERRQ(ierr);
2604   ierr = PetscBLASIntCast(i,&blda);CHKERRQ(ierr);
2605   ierr = MatDenseGetLDA(c->A,&i);CHKERRQ(ierr);
2606   ierr = PetscBLASIntCast(i,&clda);CHKERRQ(ierr);
2607   ierr = MatGetOwnershipRanges(B,&ranges);CHKERRQ(ierr);
2608   bn   = B->rmap->n;
2609   if (blda == bn) {
2610     sendbuf = (PetscScalar*)bv;
2611   } else {
2612     sendbuf = abt->buf[0];
2613     for (k = 0, i = 0; i < cK; i++) {
2614       for (j = 0; j < bn; j++, k++) {
2615         sendbuf[k] = bv[i * blda + j];
2616       }
2617     }
2618   }
2619   if (size > 1) {
2620     sendto = (rank + size - 1) % size;
2621     recvfrom = (rank + size + 1) % size;
2622   } else {
2623     sendto = recvfrom = 0;
2624   }
2625   ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr);
2626   ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr);
2627   recvisfrom = rank;
2628   for (i = 0; i < size; i++) {
2629     /* we have finished receiving in sending, bufs can be read/modified */
2630     PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */
2631     PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom];
2632 
2633     if (nextrecvisfrom != rank) {
2634       /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */
2635       sendsiz = cK * bn;
2636       recvsiz = cK * nextbn;
2637       recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1];
2638       ierr = MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]);CHKERRQ(ierr);
2639       ierr = MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]);CHKERRQ(ierr);
2640     }
2641 
2642     /* local aseq * sendbuf^T */
2643     ierr = PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn);CHKERRQ(ierr);
2644     if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","T",&cm,&cn,&ck,&_DOne,av,&alda,sendbuf,&cn,&_DZero,cv + clda * ranges[recvisfrom],&clda));
2645 
2646     if (nextrecvisfrom != rank) {
2647       /* wait for the sends and receives to complete, swap sendbuf and recvbuf */
2648       ierr = MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2649     }
2650     bn = nextbn;
2651     recvisfrom = nextrecvisfrom;
2652     sendbuf = recvbuf;
2653   }
2654   ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr);
2655   ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr);
2656   ierr = MatDenseRestoreArray(c->A,&cv);CHKERRQ(ierr);
2657   PetscFunctionReturn(0);
2658 }
2659 
2660 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C)
2661 {
2662   Mat_MPIDense          *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2663   Mat_MatTransMultDense *abt = c->abtdense;
2664   PetscErrorCode        ierr;
2665   MPI_Comm              comm;
2666   PetscMPIInt           size;
2667   PetscScalar           *cv, *sendbuf, *recvbuf;
2668   const PetscScalar     *av,*bv;
2669   PetscInt              blda,i,cK=A->cmap->N,k,j,bn;
2670   PetscScalar           _DOne=1.0,_DZero=0.0;
2671   PetscBLASInt          cm, cn, ck, alda, clda;
2672 
2673   PetscFunctionBegin;
2674   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2675   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2676   ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr);
2677   ierr = MatDenseGetArrayRead(b->A,&bv);CHKERRQ(ierr);
2678   ierr = MatDenseGetArray(c->A,&cv);CHKERRQ(ierr);
2679   ierr = MatDenseGetLDA(a->A,&i);CHKERRQ(ierr);
2680   ierr = PetscBLASIntCast(i,&alda);CHKERRQ(ierr);
2681   ierr = MatDenseGetLDA(b->A,&blda);CHKERRQ(ierr);
2682   ierr = MatDenseGetLDA(c->A,&i);CHKERRQ(ierr);
2683   ierr = PetscBLASIntCast(i,&clda);CHKERRQ(ierr);
2684   /* copy transpose of B into buf[0] */
2685   bn      = B->rmap->n;
2686   sendbuf = abt->buf[0];
2687   recvbuf = abt->buf[1];
2688   for (k = 0, j = 0; j < bn; j++) {
2689     for (i = 0; i < cK; i++, k++) {
2690       sendbuf[k] = bv[i * blda + j];
2691     }
2692   }
2693   ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr);
2694   ierr = MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm);CHKERRQ(ierr);
2695   ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr);
2696   ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr);
2697   ierr = PetscBLASIntCast(c->A->cmap->n,&cn);CHKERRQ(ierr);
2698   if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&cm,&cn,&ck,&_DOne,av,&alda,recvbuf,&ck,&_DZero,cv,&clda));
2699   ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr);
2700   ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr);
2701   ierr = MatDenseRestoreArray(c->A,&cv);CHKERRQ(ierr);
2702   PetscFunctionReturn(0);
2703 }
2704 
2705 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2706 {
2707   Mat_MPIDense          *c=(Mat_MPIDense*)C->data;
2708   Mat_MatTransMultDense *abt = c->abtdense;
2709   PetscErrorCode        ierr;
2710 
2711   PetscFunctionBegin;
2712   switch (abt->alg) {
2713   case 1:
2714     ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C);CHKERRQ(ierr);
2715     break;
2716   default:
2717     ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C);CHKERRQ(ierr);
2718     break;
2719   }
2720   PetscFunctionReturn(0);
2721 }
2722 
2723 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(Mat A)
2724 {
2725   PetscErrorCode   ierr;
2726   Mat_MPIDense     *a = (Mat_MPIDense*)A->data;
2727   Mat_MatMultDense *ab = a->abdense;
2728 
2729   PetscFunctionBegin;
2730   ierr = MatDestroy(&ab->Ce);CHKERRQ(ierr);
2731   ierr = MatDestroy(&ab->Ae);CHKERRQ(ierr);
2732   ierr = MatDestroy(&ab->Be);CHKERRQ(ierr);
2733 
2734   ierr = (ab->destroy)(A);CHKERRQ(ierr);
2735   ierr = PetscFree(ab);CHKERRQ(ierr);
2736   PetscFunctionReturn(0);
2737 }
2738 
2739 #if defined(PETSC_HAVE_ELEMENTAL)
2740 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
2741 {
2742   PetscErrorCode   ierr;
2743   Mat_MPIDense     *c=(Mat_MPIDense*)C->data;
2744   Mat_MatMultDense *ab=c->abdense;
2745 
2746   PetscFunctionBegin;
2747   ierr = MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae);CHKERRQ(ierr);
2748   ierr = MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be);CHKERRQ(ierr);
2749   ierr = MatMatMultNumeric_Elemental(ab->Ae,ab->Be,ab->Ce);CHKERRQ(ierr);
2750   ierr = MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
2751   PetscFunctionReturn(0);
2752 }
2753 
2754 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C)
2755 {
2756   PetscErrorCode   ierr;
2757   Mat              Ae,Be,Ce;
2758   Mat_MPIDense     *c;
2759   Mat_MatMultDense *ab;
2760 
2761   PetscFunctionBegin;
2762   /* check local size of A and B */
2763   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
2764     SETERRQ4(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2765   }
2766 
2767   /* create elemental matrices Ae and Be */
2768   ierr = MatCreate(PetscObjectComm((PetscObject)A), &Ae);CHKERRQ(ierr);
2769   ierr = MatSetSizes(Ae,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
2770   ierr = MatSetType(Ae,MATELEMENTAL);CHKERRQ(ierr);
2771   ierr = MatSetUp(Ae);CHKERRQ(ierr);
2772   ierr = MatSetOption(Ae,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2773 
2774   ierr = MatCreate(PetscObjectComm((PetscObject)B), &Be);CHKERRQ(ierr);
2775   ierr = MatSetSizes(Be,PETSC_DECIDE,PETSC_DECIDE,B->rmap->N,B->cmap->N);CHKERRQ(ierr);
2776   ierr = MatSetType(Be,MATELEMENTAL);CHKERRQ(ierr);
2777   ierr = MatSetUp(Be);CHKERRQ(ierr);
2778   ierr = MatSetOption(Be,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2779 
2780   /* compute symbolic Ce = Ae*Be */
2781   ierr = MatCreate(PetscObjectComm((PetscObject)C),&Ce);CHKERRQ(ierr);
2782   ierr = MatMatMultSymbolic_Elemental(Ae,Be,fill,Ce);CHKERRQ(ierr);
2783 
2784   /* setup C */
2785   ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2786   ierr = MatSetType(C,MATDENSE);CHKERRQ(ierr);
2787   ierr = MatSetUp(C);CHKERRQ(ierr);
2788 
2789   /* create data structure for reuse Cdense */
2790   ierr = PetscNew(&ab);CHKERRQ(ierr);
2791   c                  = (Mat_MPIDense*)C->data;
2792   c->abdense         = ab;
2793 
2794   ab->Ae             = Ae;
2795   ab->Be             = Be;
2796   ab->Ce             = Ce;
2797   ab->destroy        = C->ops->destroy;
2798   C->ops->destroy        = MatDestroy_MatMatMult_MPIDense_MPIDense;
2799   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
2800   C->ops->productnumeric = MatProductNumeric_AB;
2801   PetscFunctionReturn(0);
2802 }
2803 #endif
2804 /* ----------------------------------------------- */
2805 #if defined(PETSC_HAVE_ELEMENTAL)
2806 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C)
2807 {
2808   PetscFunctionBegin;
2809   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense;
2810   C->ops->productsymbolic = MatProductSymbolic_AB;
2811   C->ops->productnumeric  = MatProductNumeric_AB;
2812   PetscFunctionReturn(0);
2813 }
2814 #endif
2815 
2816 static PetscErrorCode MatProductSymbolic_AtB_MPIDense(Mat C)
2817 {
2818   PetscErrorCode ierr;
2819   Mat_Product    *product = C->product;
2820 
2821   PetscFunctionBegin;
2822   ierr = MatTransposeMatMultSymbolic_MPIDense_MPIDense(product->A,product->B,product->fill,C);CHKERRQ(ierr);
2823   C->ops->productnumeric          = MatProductNumeric_AtB;
2824   C->ops->transposematmultnumeric = MatTransposeMatMultNumeric_MPIDense_MPIDense;
2825   PetscFunctionReturn(0);
2826 }
2827 
2828 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C)
2829 {
2830   Mat_Product *product = C->product;
2831   Mat         A = product->A,B=product->B;
2832 
2833   PetscFunctionBegin;
2834   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend)
2835     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2836 
2837   C->ops->productsymbolic = MatProductSymbolic_AtB_MPIDense;
2838   PetscFunctionReturn(0);
2839 }
2840 
2841 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C)
2842 {
2843   PetscErrorCode ierr;
2844   Mat_Product    *product = C->product;
2845   const char     *algTypes[2] = {"allgatherv","cyclic"};
2846   PetscInt       alg,nalg = 2;
2847   PetscBool      flg = PETSC_FALSE;
2848 
2849   PetscFunctionBegin;
2850   /* Set default algorithm */
2851   alg = 0; /* default is allgatherv */
2852   ierr = PetscStrcmp(product->alg,"default",&flg);CHKERRQ(ierr);
2853   if (flg) {
2854     ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr);
2855   }
2856 
2857   /* Get runtime option */
2858   if (product->api_user) {
2859     ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatTransposeMult","Mat");CHKERRQ(ierr);
2860     ierr = PetscOptionsEList("-matmattransmult_mpidense_mpidense_via","Algorithmic approach","MatMatTransposeMult",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr);
2861     ierr = PetscOptionsEnd();CHKERRQ(ierr);
2862   } else {
2863     ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_ABt","Mat");CHKERRQ(ierr);
2864     ierr = PetscOptionsEList("-matproduct_abt_mpidense_mpidense_via","Algorithmic approach","MatProduct_ABt",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr);
2865     ierr = PetscOptionsEnd();CHKERRQ(ierr);
2866   }
2867   if (flg) {
2868     ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr);
2869   }
2870 
2871   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense;
2872   C->ops->productsymbolic          = MatProductSymbolic_ABt;
2873   C->ops->productnumeric           = MatProductNumeric_ABt;
2874   PetscFunctionReturn(0);
2875 }
2876 
2877 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C)
2878 {
2879   PetscErrorCode ierr;
2880   Mat_Product    *product = C->product;
2881 
2882   PetscFunctionBegin;
2883   switch (product->type) {
2884 #if defined(PETSC_HAVE_ELEMENTAL)
2885   case MATPRODUCT_AB:
2886     ierr = MatProductSetFromOptions_MPIDense_AB(C);CHKERRQ(ierr);
2887     break;
2888 #endif
2889   case MATPRODUCT_AtB:
2890     ierr = MatProductSetFromOptions_MPIDense_AtB(C);CHKERRQ(ierr);
2891     break;
2892   case MATPRODUCT_ABt:
2893     ierr = MatProductSetFromOptions_MPIDense_ABt(C);CHKERRQ(ierr);
2894     break;
2895   default: SETERRQ1(PetscObjectComm((PetscObject)C),PETSC_ERR_SUP,"MatProduct type %s is not supported for MPIDense and MPIDense matrices",MatProductTypes[product->type]);
2896   }
2897   PetscFunctionReturn(0);
2898 }
2899