xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision a3fa217bfefef3d54995bb82b2b5ea8a3c3e9019)
1 
2 /*
3    Basic functions for basic parallel dense matrices.
4 */
5 
6 
7 #include <../src/mat/impls/dense/mpi/mpidense.h>    /*I   "petscmat.h"  I*/
8 #include <../src/mat/impls/aij/mpi/mpiaij.h>
9 
10 #undef __FUNCT__
11 #define __FUNCT__ "MatDenseGetLocalMatrix"
12 /*@
13 
14       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
15               matrix that represents the operator. For sequential matrices it returns itself.
16 
17     Input Parameter:
18 .      A - the Seq or MPI dense matrix
19 
20     Output Parameter:
21 .      B - the inner matrix
22 
23     Level: intermediate
24 
25 @*/
26 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
27 {
28   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
29   PetscErrorCode ierr;
30   PetscBool      flg;
31 
32   PetscFunctionBegin;
33   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr);
34   if (flg) *B = mat->A;
35   else *B = A;
36   PetscFunctionReturn(0);
37 }
38 
39 #undef __FUNCT__
40 #define __FUNCT__ "MatGetRow_MPIDense"
41 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
42 {
43   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
44   PetscErrorCode ierr;
45   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
46 
47   PetscFunctionBegin;
48   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
49   lrow = row - rstart;
50   ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr);
51   PetscFunctionReturn(0);
52 }
53 
54 #undef __FUNCT__
55 #define __FUNCT__ "MatRestoreRow_MPIDense"
56 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
57 {
58   PetscErrorCode ierr;
59 
60   PetscFunctionBegin;
61   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
62   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
63   PetscFunctionReturn(0);
64 }
65 
66 #undef __FUNCT__
67 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense"
68 PetscErrorCode  MatGetDiagonalBlock_MPIDense(Mat A,Mat *a)
69 {
70   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
71   PetscErrorCode ierr;
72   PetscInt       m = A->rmap->n,rstart = A->rmap->rstart;
73   PetscScalar    *array;
74   MPI_Comm       comm;
75   Mat            B;
76 
77   PetscFunctionBegin;
78   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported.");
79 
80   ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr);
81   if (!B) {
82     ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
83     ierr = MatCreate(comm,&B);CHKERRQ(ierr);
84     ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr);
85     ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr);
86     ierr = MatDenseGetArray(mdn->A,&array);CHKERRQ(ierr);
87     ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr);
88     ierr = MatDenseRestoreArray(mdn->A,&array);CHKERRQ(ierr);
89     ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
90     ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
91     ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr);
92     *a   = B;
93     ierr = MatDestroy(&B);CHKERRQ(ierr);
94   } else *a = B;
95   PetscFunctionReturn(0);
96 }
97 
98 #undef __FUNCT__
99 #define __FUNCT__ "MatSetValues_MPIDense"
100 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
101 {
102   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
103   PetscErrorCode ierr;
104   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
105   PetscBool      roworiented = A->roworiented;
106 
107   PetscFunctionBegin;
108   if (v) PetscValidScalarPointer(v,6);
109   for (i=0; i<m; i++) {
110     if (idxm[i] < 0) continue;
111     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
112     if (idxm[i] >= rstart && idxm[i] < rend) {
113       row = idxm[i] - rstart;
114       if (roworiented) {
115         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
116       } else {
117         for (j=0; j<n; j++) {
118           if (idxn[j] < 0) continue;
119           if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
120           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
121         }
122       }
123     } else if (!A->donotstash) {
124       mat->assembled = PETSC_FALSE;
125       if (roworiented) {
126         ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
127       } else {
128         ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
129       }
130     }
131   }
132   PetscFunctionReturn(0);
133 }
134 
135 #undef __FUNCT__
136 #define __FUNCT__ "MatGetValues_MPIDense"
137 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
138 {
139   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
140   PetscErrorCode ierr;
141   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
142 
143   PetscFunctionBegin;
144   for (i=0; i<m; i++) {
145     if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
146     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
147     if (idxm[i] >= rstart && idxm[i] < rend) {
148       row = idxm[i] - rstart;
149       for (j=0; j<n; j++) {
150         if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
151         if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
152         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
153       }
154     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
155   }
156   PetscFunctionReturn(0);
157 }
158 
159 #undef __FUNCT__
160 #define __FUNCT__ "MatDenseGetArray_MPIDense"
161 PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar *array[])
162 {
163   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
164   PetscErrorCode ierr;
165 
166   PetscFunctionBegin;
167   ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr);
168   PetscFunctionReturn(0);
169 }
170 
171 #undef __FUNCT__
172 #define __FUNCT__ "MatGetSubMatrix_MPIDense"
173 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
174 {
175   Mat_MPIDense   *mat  = (Mat_MPIDense*)A->data,*newmatd;
176   Mat_SeqDense   *lmat = (Mat_SeqDense*)mat->A->data;
177   PetscErrorCode ierr;
178   PetscInt       i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols;
179   const PetscInt *irow,*icol;
180   PetscScalar    *av,*bv,*v = lmat->v;
181   Mat            newmat;
182   IS             iscol_local;
183 
184   PetscFunctionBegin;
185   ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
186   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
187   ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr);
188   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
189   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
190   ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */
191 
192   /* No parallel redistribution currently supported! Should really check each index set
193      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
194      original matrix! */
195 
196   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
197   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
198 
199   /* Check submatrix call */
200   if (scall == MAT_REUSE_MATRIX) {
201     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
202     /* Really need to test rows and column sizes! */
203     newmat = *B;
204   } else {
205     /* Create and fill new matrix */
206     ierr = MatCreate(PetscObjectComm((PetscObject)A),&newmat);CHKERRQ(ierr);
207     ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
208     ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr);
209     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
210   }
211 
212   /* Now extract the data pointers and do the copy, column at a time */
213   newmatd = (Mat_MPIDense*)newmat->data;
214   bv      = ((Mat_SeqDense*)newmatd->A->data)->v;
215 
216   for (i=0; i<Ncols; i++) {
217     av = v + ((Mat_SeqDense*)mat->A->data)->lda*icol[i];
218     for (j=0; j<nrows; j++) {
219       *bv++ = av[irow[j] - rstart];
220     }
221   }
222 
223   /* Assemble the matrices so that the correct flags are set */
224   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
225   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
226 
227   /* Free work space */
228   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
229   ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr);
230   ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
231   *B   = newmat;
232   PetscFunctionReturn(0);
233 }
234 
235 #undef __FUNCT__
236 #define __FUNCT__ "MatDenseRestoreArray_MPIDense"
237 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar *array[])
238 {
239   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
240   PetscErrorCode ierr;
241 
242   PetscFunctionBegin;
243   ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr);
244   PetscFunctionReturn(0);
245 }
246 
247 #undef __FUNCT__
248 #define __FUNCT__ "MatAssemblyBegin_MPIDense"
249 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
250 {
251   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
252   MPI_Comm       comm;
253   PetscErrorCode ierr;
254   PetscInt       nstash,reallocs;
255   InsertMode     addv;
256 
257   PetscFunctionBegin;
258   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
259   /* make sure all processors are either in INSERTMODE or ADDMODE */
260   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,comm);CHKERRQ(ierr);
261   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
262   mat->insertmode = addv; /* in case this processor had no cache */
263 
264   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
265   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
266   ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
267   PetscFunctionReturn(0);
268 }
269 
270 #undef __FUNCT__
271 #define __FUNCT__ "MatAssemblyEnd_MPIDense"
272 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
273 {
274   Mat_MPIDense   *mdn=(Mat_MPIDense*)mat->data;
275   PetscErrorCode ierr;
276   PetscInt       i,*row,*col,flg,j,rstart,ncols;
277   PetscMPIInt    n;
278   PetscScalar    *val;
279   InsertMode     addv=mat->insertmode;
280 
281   PetscFunctionBegin;
282   /*  wait on receives */
283   while (1) {
284     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
285     if (!flg) break;
286 
287     for (i=0; i<n;) {
288       /* Now identify the consecutive vals belonging to the same row */
289       for (j=i,rstart=row[j]; j<n; j++) {
290         if (row[j] != rstart) break;
291       }
292       if (j < n) ncols = j-i;
293       else       ncols = n-i;
294       /* Now assemble all these values with a single function call */
295       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
296       i    = j;
297     }
298   }
299   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
300 
301   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
302   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
303 
304   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
305     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
306   }
307   PetscFunctionReturn(0);
308 }
309 
310 #undef __FUNCT__
311 #define __FUNCT__ "MatZeroEntries_MPIDense"
312 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
313 {
314   PetscErrorCode ierr;
315   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
316 
317   PetscFunctionBegin;
318   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
319   PetscFunctionReturn(0);
320 }
321 
322 /* the code does not do the diagonal entries correctly unless the
323    matrix is square and the column and row owerships are identical.
324    This is a BUG. The only way to fix it seems to be to access
325    mdn->A and mdn->B directly and not through the MatZeroRows()
326    routine.
327 */
328 #undef __FUNCT__
329 #define __FUNCT__ "MatZeroRows_MPIDense"
330 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
331 {
332   Mat_MPIDense      *l = (Mat_MPIDense*)A->data;
333   PetscErrorCode    ierr;
334   PetscInt          i,*owners = A->rmap->range;
335   PetscInt          *sizes,j,idx,nsends;
336   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
337   PetscInt          *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source;
338   PetscInt          *lens,*lrows,*values;
339   PetscMPIInt       n,imdex,rank = l->rank,size = l->size;
340   MPI_Comm          comm;
341   MPI_Request       *send_waits,*recv_waits;
342   MPI_Status        recv_status,*send_status;
343   PetscBool         found;
344   const PetscScalar *xx;
345   PetscScalar       *bb;
346 
347   PetscFunctionBegin;
348   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
349   if (A->rmap->N != A->cmap->N) SETERRQ(comm,PETSC_ERR_SUP,"Only handles square matrices");
350   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only handles matrices with identical column and row ownership");
351   /*  first count number of contributors to each processor */
352   ierr = PetscCalloc1(2*size,&sizes);CHKERRQ(ierr);
353   ierr = PetscMalloc1(N+1,&owner);CHKERRQ(ierr);  /* see note*/
354   for (i=0; i<N; i++) {
355     idx   = rows[i];
356     found = PETSC_FALSE;
357     for (j=0; j<size; j++) {
358       if (idx >= owners[j] && idx < owners[j+1]) {
359         sizes[2*j]++; sizes[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
360       }
361     }
362     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
363   }
364   nsends = 0;
365   for (i=0; i<size; i++) nsends += sizes[2*i+1];
366 
367   /* inform other processors of number of messages and max length*/
368   ierr = PetscMaxSum(comm,sizes,&nmax,&nrecvs);CHKERRQ(ierr);
369 
370   /* post receives:   */
371   ierr = PetscMalloc1((nrecvs+1)*(nmax+1),&rvalues);CHKERRQ(ierr);
372   ierr = PetscMalloc1((nrecvs+1),&recv_waits);CHKERRQ(ierr);
373   for (i=0; i<nrecvs; i++) {
374     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
375   }
376 
377   /* do sends:
378       1) starts[i] gives the starting index in svalues for stuff going to
379          the ith processor
380   */
381   ierr = PetscMalloc1((N+1),&svalues);CHKERRQ(ierr);
382   ierr = PetscMalloc1((nsends+1),&send_waits);CHKERRQ(ierr);
383   ierr = PetscMalloc1((size+1),&starts);CHKERRQ(ierr);
384 
385   starts[0] = 0;
386   for (i=1; i<size; i++) starts[i] = starts[i-1] + sizes[2*i-2];
387   for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i];
388 
389   starts[0] = 0;
390   for (i=1; i<size+1; i++) starts[i] = starts[i-1] + sizes[2*i-2];
391   count = 0;
392   for (i=0; i<size; i++) {
393     if (sizes[2*i+1]) {
394       ierr = MPI_Isend(svalues+starts[i],sizes[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
395     }
396   }
397   ierr = PetscFree(starts);CHKERRQ(ierr);
398 
399   base = owners[rank];
400 
401   /*  wait on receives */
402   ierr  = PetscMalloc2(nrecvs,&lens,nrecvs,&source);CHKERRQ(ierr);
403   count = nrecvs;
404   slen  = 0;
405   while (count) {
406     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
407     /* unpack receives into our local space */
408     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
409 
410     source[imdex] = recv_status.MPI_SOURCE;
411     lens[imdex]   = n;
412     slen += n;
413     count--;
414   }
415   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
416 
417   /* move the data into the send scatter */
418   ierr  = PetscMalloc1((slen+1),&lrows);CHKERRQ(ierr);
419   count = 0;
420   for (i=0; i<nrecvs; i++) {
421     values = rvalues + i*nmax;
422     for (j=0; j<lens[i]; j++) {
423       lrows[count++] = values[j] - base;
424     }
425   }
426   ierr = PetscFree(rvalues);CHKERRQ(ierr);
427   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
428   ierr = PetscFree(owner);CHKERRQ(ierr);
429   ierr = PetscFree(sizes);CHKERRQ(ierr);
430 
431   /* fix right hand side if needed */
432   if (x && b) {
433     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
434     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
435     for (i=0; i<slen; i++) {
436       bb[lrows[i]] = diag*xx[lrows[i]];
437     }
438     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
439     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
440   }
441 
442   /* actually zap the local rows */
443   ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
444   if (diag != 0.0) {
445     Mat_SeqDense *ll = (Mat_SeqDense*)l->A->data;
446     PetscInt     m   = ll->lda, i;
447 
448     for (i=0; i<slen; i++) {
449       ll->v[lrows[i] + m*(A->cmap->rstart + lrows[i])] = diag;
450     }
451   }
452   ierr = PetscFree(lrows);CHKERRQ(ierr);
453 
454   /* wait on sends */
455   if (nsends) {
456     ierr = PetscMalloc1(nsends,&send_status);CHKERRQ(ierr);
457     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
458     ierr = PetscFree(send_status);CHKERRQ(ierr);
459   }
460   ierr = PetscFree(send_waits);CHKERRQ(ierr);
461   ierr = PetscFree(svalues);CHKERRQ(ierr);
462   PetscFunctionReturn(0);
463 }
464 
465 #undef __FUNCT__
466 #define __FUNCT__ "MatMult_MPIDense"
467 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
468 {
469   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
470   PetscErrorCode ierr;
471 
472   PetscFunctionBegin;
473   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
474   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
475   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
476   PetscFunctionReturn(0);
477 }
478 
479 #undef __FUNCT__
480 #define __FUNCT__ "MatMultAdd_MPIDense"
481 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
482 {
483   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
484   PetscErrorCode ierr;
485 
486   PetscFunctionBegin;
487   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
488   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
489   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
490   PetscFunctionReturn(0);
491 }
492 
493 #undef __FUNCT__
494 #define __FUNCT__ "MatMultTranspose_MPIDense"
495 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
496 {
497   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
498   PetscErrorCode ierr;
499   PetscScalar    zero = 0.0;
500 
501   PetscFunctionBegin;
502   ierr = VecSet(yy,zero);CHKERRQ(ierr);
503   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
504   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
505   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
506   PetscFunctionReturn(0);
507 }
508 
509 #undef __FUNCT__
510 #define __FUNCT__ "MatMultTransposeAdd_MPIDense"
511 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
512 {
513   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
514   PetscErrorCode ierr;
515 
516   PetscFunctionBegin;
517   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
518   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
519   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
520   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
521   PetscFunctionReturn(0);
522 }
523 
524 #undef __FUNCT__
525 #define __FUNCT__ "MatGetDiagonal_MPIDense"
526 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
527 {
528   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
529   Mat_SeqDense   *aloc = (Mat_SeqDense*)a->A->data;
530   PetscErrorCode ierr;
531   PetscInt       len,i,n,m = A->rmap->n,radd;
532   PetscScalar    *x,zero = 0.0;
533 
534   PetscFunctionBegin;
535   ierr = VecSet(v,zero);CHKERRQ(ierr);
536   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
537   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
538   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
539   len  = PetscMin(a->A->rmap->n,a->A->cmap->n);
540   radd = A->rmap->rstart*m;
541   for (i=0; i<len; i++) {
542     x[i] = aloc->v[radd + i*m + i];
543   }
544   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
545   PetscFunctionReturn(0);
546 }
547 
548 #undef __FUNCT__
549 #define __FUNCT__ "MatDestroy_MPIDense"
550 PetscErrorCode MatDestroy_MPIDense(Mat mat)
551 {
552   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
553   PetscErrorCode ierr;
554 
555   PetscFunctionBegin;
556 #if defined(PETSC_USE_LOG)
557   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
558 #endif
559   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
560   ierr = MatDestroy(&mdn->A);CHKERRQ(ierr);
561   ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr);
562   ierr = VecScatterDestroy(&mdn->Mvctx);CHKERRQ(ierr);
563 
564   ierr = PetscFree(mat->data);CHKERRQ(ierr);
565   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
566   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
567   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);CHKERRQ(ierr);
568   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
569   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
570   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
571   PetscFunctionReturn(0);
572 }
573 
574 #undef __FUNCT__
575 #define __FUNCT__ "MatView_MPIDense_Binary"
576 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
577 {
578   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
579   PetscErrorCode    ierr;
580   PetscViewerFormat format;
581   int               fd;
582   PetscInt          header[4],mmax,N = mat->cmap->N,i,j,m,k;
583   PetscMPIInt       rank,tag  = ((PetscObject)viewer)->tag,size;
584   PetscScalar       *work,*v,*vv;
585   Mat_SeqDense      *a = (Mat_SeqDense*)mdn->A->data;
586 
587   PetscFunctionBegin;
588   if (mdn->size == 1) {
589     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
590   } else {
591     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
592     ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
593     ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
594 
595     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
596     if (format == PETSC_VIEWER_NATIVE) {
597 
598       if (!rank) {
599         /* store the matrix as a dense matrix */
600         header[0] = MAT_FILE_CLASSID;
601         header[1] = mat->rmap->N;
602         header[2] = N;
603         header[3] = MATRIX_BINARY_FORMAT_DENSE;
604         ierr      = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
605 
606         /* get largest work array needed for transposing array */
607         mmax = mat->rmap->n;
608         for (i=1; i<size; i++) {
609           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
610         }
611         ierr = PetscMalloc1(mmax*N,&work);CHKERRQ(ierr);
612 
613         /* write out local array, by rows */
614         m = mat->rmap->n;
615         v = a->v;
616         for (j=0; j<N; j++) {
617           for (i=0; i<m; i++) {
618             work[j + i*N] = *v++;
619           }
620         }
621         ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
622         /* get largest work array to receive messages from other processes, excludes process zero */
623         mmax = 0;
624         for (i=1; i<size; i++) {
625           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
626         }
627         ierr = PetscMalloc1(mmax*N,&vv);CHKERRQ(ierr);
628         for (k = 1; k < size; k++) {
629           v    = vv;
630           m    = mat->rmap->range[k+1] - mat->rmap->range[k];
631           ierr = MPIULong_Recv(v,m*N,MPIU_SCALAR,k,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
632 
633           for (j = 0; j < N; j++) {
634             for (i = 0; i < m; i++) {
635               work[j + i*N] = *v++;
636             }
637           }
638           ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
639         }
640         ierr = PetscFree(work);CHKERRQ(ierr);
641         ierr = PetscFree(vv);CHKERRQ(ierr);
642       } else {
643         ierr = MPIULong_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
644       }
645     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerSetFormat(viewer,PETSC_VIEWER_NATIVE)");
646   }
647   PetscFunctionReturn(0);
648 }
649 
650 #include <petscdraw.h>
651 #undef __FUNCT__
652 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket"
653 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
654 {
655   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
656   PetscErrorCode    ierr;
657   PetscMPIInt       size = mdn->size,rank = mdn->rank;
658   PetscViewerType   vtype;
659   PetscBool         iascii,isdraw;
660   PetscViewer       sviewer;
661   PetscViewerFormat format;
662 
663   PetscFunctionBegin;
664   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
665   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
666   if (iascii) {
667     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
668     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
669     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
670       MatInfo info;
671       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
672       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
673       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
674       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
675       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
676       ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
677       PetscFunctionReturn(0);
678     } else if (format == PETSC_VIEWER_ASCII_INFO) {
679       PetscFunctionReturn(0);
680     }
681   } else if (isdraw) {
682     PetscDraw draw;
683     PetscBool isnull;
684 
685     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
686     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
687     if (isnull) PetscFunctionReturn(0);
688   }
689 
690   if (size == 1) {
691     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
692   } else {
693     /* assemble the entire matrix onto first processor. */
694     Mat         A;
695     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz;
696     PetscInt    *cols;
697     PetscScalar *vals;
698 
699     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
700     if (!rank) {
701       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
702     } else {
703       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
704     }
705     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
706     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
707     ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr);
708     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
709 
710     /* Copy the matrix ... This isn't the most efficient means,
711        but it's quick for now */
712     A->insertmode = INSERT_VALUES;
713 
714     row = mat->rmap->rstart;
715     m   = mdn->A->rmap->n;
716     for (i=0; i<m; i++) {
717       ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
718       ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
719       ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
720       row++;
721     }
722 
723     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
724     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
725     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
726     if (!rank) {
727       ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
728       /* Set the type name to MATMPIDense so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqDense_ASCII()*/
729       ierr = PetscStrcpy(((PetscObject)((Mat_MPIDense*)(A->data))->A)->type_name,MATMPIDENSE);CHKERRQ(ierr);
730       ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
731     }
732     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
733     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
734     ierr = MatDestroy(&A);CHKERRQ(ierr);
735   }
736   PetscFunctionReturn(0);
737 }
738 
739 #undef __FUNCT__
740 #define __FUNCT__ "MatView_MPIDense"
741 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
742 {
743   PetscErrorCode ierr;
744   PetscBool      iascii,isbinary,isdraw,issocket;
745 
746   PetscFunctionBegin;
747   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
748   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
749   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
750   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
751 
752   if (iascii || issocket || isdraw) {
753     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
754   } else if (isbinary) {
755     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
756   }
757   PetscFunctionReturn(0);
758 }
759 
760 #undef __FUNCT__
761 #define __FUNCT__ "MatGetInfo_MPIDense"
762 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
763 {
764   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
765   Mat            mdn  = mat->A;
766   PetscErrorCode ierr;
767   PetscReal      isend[5],irecv[5];
768 
769   PetscFunctionBegin;
770   info->block_size = 1.0;
771 
772   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
773 
774   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
775   isend[3] = info->memory;  isend[4] = info->mallocs;
776   if (flag == MAT_LOCAL) {
777     info->nz_used      = isend[0];
778     info->nz_allocated = isend[1];
779     info->nz_unneeded  = isend[2];
780     info->memory       = isend[3];
781     info->mallocs      = isend[4];
782   } else if (flag == MAT_GLOBAL_MAX) {
783     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
784 
785     info->nz_used      = irecv[0];
786     info->nz_allocated = irecv[1];
787     info->nz_unneeded  = irecv[2];
788     info->memory       = irecv[3];
789     info->mallocs      = irecv[4];
790   } else if (flag == MAT_GLOBAL_SUM) {
791     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
792 
793     info->nz_used      = irecv[0];
794     info->nz_allocated = irecv[1];
795     info->nz_unneeded  = irecv[2];
796     info->memory       = irecv[3];
797     info->mallocs      = irecv[4];
798   }
799   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
800   info->fill_ratio_needed = 0;
801   info->factor_mallocs    = 0;
802   PetscFunctionReturn(0);
803 }
804 
805 #undef __FUNCT__
806 #define __FUNCT__ "MatSetOption_MPIDense"
807 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg)
808 {
809   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
810   PetscErrorCode ierr;
811 
812   PetscFunctionBegin;
813   switch (op) {
814   case MAT_NEW_NONZERO_LOCATIONS:
815   case MAT_NEW_NONZERO_LOCATION_ERR:
816   case MAT_NEW_NONZERO_ALLOCATION_ERR:
817     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
818     break;
819   case MAT_ROW_ORIENTED:
820     a->roworiented = flg;
821 
822     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
823     break;
824   case MAT_NEW_DIAGONALS:
825   case MAT_KEEP_NONZERO_PATTERN:
826   case MAT_USE_HASH_TABLE:
827     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
828     break;
829   case MAT_IGNORE_OFF_PROC_ENTRIES:
830     a->donotstash = flg;
831     break;
832   case MAT_SYMMETRIC:
833   case MAT_STRUCTURALLY_SYMMETRIC:
834   case MAT_HERMITIAN:
835   case MAT_SYMMETRY_ETERNAL:
836   case MAT_IGNORE_LOWER_TRIANGULAR:
837     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
838     break;
839   default:
840     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
841   }
842   PetscFunctionReturn(0);
843 }
844 
845 
846 #undef __FUNCT__
847 #define __FUNCT__ "MatDiagonalScale_MPIDense"
848 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
849 {
850   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
851   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
852   PetscScalar    *l,*r,x,*v;
853   PetscErrorCode ierr;
854   PetscInt       i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n;
855 
856   PetscFunctionBegin;
857   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
858   if (ll) {
859     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
860     if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2);
861     ierr = VecGetArray(ll,&l);CHKERRQ(ierr);
862     for (i=0; i<m; i++) {
863       x = l[i];
864       v = mat->v + i;
865       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
866     }
867     ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr);
868     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
869   }
870   if (rr) {
871     ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr);
872     if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
873     ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
874     ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
875     ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
876     for (i=0; i<n; i++) {
877       x = r[i];
878       v = mat->v + i*m;
879       for (j=0; j<m; j++) (*v++) *= x;
880     }
881     ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr);
882     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
883   }
884   PetscFunctionReturn(0);
885 }
886 
887 #undef __FUNCT__
888 #define __FUNCT__ "MatNorm_MPIDense"
889 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
890 {
891   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
892   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
893   PetscErrorCode ierr;
894   PetscInt       i,j;
895   PetscReal      sum = 0.0;
896   PetscScalar    *v  = mat->v;
897 
898   PetscFunctionBegin;
899   if (mdn->size == 1) {
900     ierr =  MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
901   } else {
902     if (type == NORM_FROBENIUS) {
903       for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) {
904         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
905       }
906       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
907       *nrm = PetscSqrtReal(*nrm);
908       ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr);
909     } else if (type == NORM_1) {
910       PetscReal *tmp,*tmp2;
911       ierr = PetscMalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);CHKERRQ(ierr);
912       ierr = PetscMemzero(tmp,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
913       ierr = PetscMemzero(tmp2,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
914       *nrm = 0.0;
915       v    = mat->v;
916       for (j=0; j<mdn->A->cmap->n; j++) {
917         for (i=0; i<mdn->A->rmap->n; i++) {
918           tmp[j] += PetscAbsScalar(*v);  v++;
919         }
920       }
921       ierr = MPI_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
922       for (j=0; j<A->cmap->N; j++) {
923         if (tmp2[j] > *nrm) *nrm = tmp2[j];
924       }
925       ierr = PetscFree2(tmp,tmp);CHKERRQ(ierr);
926       ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr);
927     } else if (type == NORM_INFINITY) { /* max row norm */
928       PetscReal ntemp;
929       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
930       ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
931     } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm");
932   }
933   PetscFunctionReturn(0);
934 }
935 
936 #undef __FUNCT__
937 #define __FUNCT__ "MatTranspose_MPIDense"
938 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
939 {
940   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
941   Mat_SeqDense   *Aloc = (Mat_SeqDense*)a->A->data;
942   Mat            B;
943   PetscInt       M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart;
944   PetscErrorCode ierr;
945   PetscInt       j,i;
946   PetscScalar    *v;
947 
948   PetscFunctionBegin;
949   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports square matrix only in-place");
950   if (reuse == MAT_INITIAL_MATRIX || A == *matout) {
951     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
952     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
953     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
954     ierr = MatMPIDenseSetPreallocation(B,NULL);CHKERRQ(ierr);
955   } else {
956     B = *matout;
957   }
958 
959   m    = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v;
960   ierr = PetscMalloc1(m,&rwork);CHKERRQ(ierr);
961   for (i=0; i<m; i++) rwork[i] = rstart + i;
962   for (j=0; j<n; j++) {
963     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
964     v   += m;
965   }
966   ierr = PetscFree(rwork);CHKERRQ(ierr);
967   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
968   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
969   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
970     *matout = B;
971   } else {
972     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
973   }
974   PetscFunctionReturn(0);
975 }
976 
977 
978 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*);
979 extern PetscErrorCode MatScale_MPIDense(Mat,PetscScalar);
980 
981 #undef __FUNCT__
982 #define __FUNCT__ "MatSetUp_MPIDense"
983 PetscErrorCode MatSetUp_MPIDense(Mat A)
984 {
985   PetscErrorCode ierr;
986 
987   PetscFunctionBegin;
988   ierr =  MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
989   PetscFunctionReturn(0);
990 }
991 
992 #undef __FUNCT__
993 #define __FUNCT__ "MatAXPY_MPIDense"
994 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
995 {
996   PetscErrorCode ierr;
997   Mat_MPIDense   *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data;
998 
999   PetscFunctionBegin;
1000   ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr);
1001   ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
1002   PetscFunctionReturn(0);
1003 }
1004 
1005 #undef __FUNCT__
1006 #define __FUNCT__ "MatConjugate_MPIDense"
1007 PetscErrorCode  MatConjugate_MPIDense(Mat mat)
1008 {
1009   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;
1010   PetscErrorCode ierr;
1011 
1012   PetscFunctionBegin;
1013   ierr = MatConjugate(a->A);CHKERRQ(ierr);
1014   PetscFunctionReturn(0);
1015 }
1016 
1017 #undef __FUNCT__
1018 #define __FUNCT__ "MatRealPart_MPIDense"
1019 PetscErrorCode MatRealPart_MPIDense(Mat A)
1020 {
1021   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1022   PetscErrorCode ierr;
1023 
1024   PetscFunctionBegin;
1025   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1026   PetscFunctionReturn(0);
1027 }
1028 
1029 #undef __FUNCT__
1030 #define __FUNCT__ "MatImaginaryPart_MPIDense"
1031 PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
1032 {
1033   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1034   PetscErrorCode ierr;
1035 
1036   PetscFunctionBegin;
1037   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1038   PetscFunctionReturn(0);
1039 }
1040 
1041 extern PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*);
1042 #undef __FUNCT__
1043 #define __FUNCT__ "MatGetColumnNorms_MPIDense"
1044 PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms)
1045 {
1046   PetscErrorCode ierr;
1047   PetscInt       i,n;
1048   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
1049   PetscReal      *work;
1050 
1051   PetscFunctionBegin;
1052   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
1053   ierr = PetscMalloc1(n,&work);CHKERRQ(ierr);
1054   ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr);
1055   if (type == NORM_2) {
1056     for (i=0; i<n; i++) work[i] *= work[i];
1057   }
1058   if (type == NORM_INFINITY) {
1059     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
1060   } else {
1061     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
1062   }
1063   ierr = PetscFree(work);CHKERRQ(ierr);
1064   if (type == NORM_2) {
1065     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
1066   }
1067   PetscFunctionReturn(0);
1068 }
1069 
1070 #undef __FUNCT__
1071 #define __FUNCT__ "MatSetRandom_MPIDense"
1072 static PetscErrorCode  MatSetRandom_MPIDense(Mat x,PetscRandom rctx)
1073 {
1074   Mat_MPIDense   *d = (Mat_MPIDense*)x->data;
1075   PetscErrorCode ierr;
1076   PetscScalar    *a;
1077   PetscInt       m,n,i;
1078 
1079   PetscFunctionBegin;
1080   ierr = MatGetSize(d->A,&m,&n);CHKERRQ(ierr);
1081   ierr = MatDenseGetArray(d->A,&a);CHKERRQ(ierr);
1082   for (i=0; i<m*n; i++) {
1083     ierr = PetscRandomGetValue(rctx,a+i);CHKERRQ(ierr);
1084   }
1085   ierr = MatDenseRestoreArray(d->A,&a);CHKERRQ(ierr);
1086   PetscFunctionReturn(0);
1087 }
1088 
1089 /* -------------------------------------------------------------------*/
1090 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense,
1091                                         MatGetRow_MPIDense,
1092                                         MatRestoreRow_MPIDense,
1093                                         MatMult_MPIDense,
1094                                 /*  4*/ MatMultAdd_MPIDense,
1095                                         MatMultTranspose_MPIDense,
1096                                         MatMultTransposeAdd_MPIDense,
1097                                         0,
1098                                         0,
1099                                         0,
1100                                 /* 10*/ 0,
1101                                         0,
1102                                         0,
1103                                         0,
1104                                         MatTranspose_MPIDense,
1105                                 /* 15*/ MatGetInfo_MPIDense,
1106                                         MatEqual_MPIDense,
1107                                         MatGetDiagonal_MPIDense,
1108                                         MatDiagonalScale_MPIDense,
1109                                         MatNorm_MPIDense,
1110                                 /* 20*/ MatAssemblyBegin_MPIDense,
1111                                         MatAssemblyEnd_MPIDense,
1112                                         MatSetOption_MPIDense,
1113                                         MatZeroEntries_MPIDense,
1114                                 /* 24*/ MatZeroRows_MPIDense,
1115                                         0,
1116                                         0,
1117                                         0,
1118                                         0,
1119                                 /* 29*/ MatSetUp_MPIDense,
1120                                         0,
1121                                         0,
1122                                         0,
1123                                         0,
1124                                 /* 34*/ MatDuplicate_MPIDense,
1125                                         0,
1126                                         0,
1127                                         0,
1128                                         0,
1129                                 /* 39*/ MatAXPY_MPIDense,
1130                                         MatGetSubMatrices_MPIDense,
1131                                         0,
1132                                         MatGetValues_MPIDense,
1133                                         0,
1134                                 /* 44*/ 0,
1135                                         MatScale_MPIDense,
1136                                         0,
1137                                         0,
1138                                         0,
1139                                 /* 49*/ MatSetRandom_MPIDense,
1140                                         0,
1141                                         0,
1142                                         0,
1143                                         0,
1144                                 /* 54*/ 0,
1145                                         0,
1146                                         0,
1147                                         0,
1148                                         0,
1149                                 /* 59*/ MatGetSubMatrix_MPIDense,
1150                                         MatDestroy_MPIDense,
1151                                         MatView_MPIDense,
1152                                         0,
1153                                         0,
1154                                 /* 64*/ 0,
1155                                         0,
1156                                         0,
1157                                         0,
1158                                         0,
1159                                 /* 69*/ 0,
1160                                         0,
1161                                         0,
1162                                         0,
1163                                         0,
1164                                 /* 74*/ 0,
1165                                         0,
1166                                         0,
1167                                         0,
1168                                         0,
1169                                 /* 79*/ 0,
1170                                         0,
1171                                         0,
1172                                         0,
1173                                 /* 83*/ MatLoad_MPIDense,
1174                                         0,
1175                                         0,
1176                                         0,
1177                                         0,
1178                                         0,
1179                                 /* 89*/
1180                                         0,
1181                                         0,
1182                                         0,
1183                                         0,
1184                                         0,
1185                                 /* 94*/ 0,
1186                                         0,
1187                                         0,
1188                                         0,
1189                                         0,
1190                                 /* 99*/ 0,
1191                                         0,
1192                                         0,
1193                                         MatConjugate_MPIDense,
1194                                         0,
1195                                 /*104*/ 0,
1196                                         MatRealPart_MPIDense,
1197                                         MatImaginaryPart_MPIDense,
1198                                         0,
1199                                         0,
1200                                 /*109*/ 0,
1201                                         0,
1202                                         0,
1203                                         0,
1204                                         0,
1205                                 /*114*/ 0,
1206                                         0,
1207                                         0,
1208                                         0,
1209                                         0,
1210                                 /*119*/ 0,
1211                                         0,
1212                                         0,
1213                                         0,
1214                                         0,
1215                                 /*124*/ 0,
1216                                         MatGetColumnNorms_MPIDense,
1217                                         0,
1218                                         0,
1219                                         0,
1220                                 /*129*/ 0,
1221                                         0,
1222                                         0,
1223                                         0,
1224                                         0,
1225                                 /*134*/ 0,
1226                                         0,
1227                                         0,
1228                                         0,
1229                                         0,
1230                                 /*139*/ 0,
1231                                         0,
1232                                         0
1233 };
1234 
1235 #undef __FUNCT__
1236 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense"
1237 PetscErrorCode  MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1238 {
1239   Mat_MPIDense   *a;
1240   PetscErrorCode ierr;
1241 
1242   PetscFunctionBegin;
1243   mat->preallocated = PETSC_TRUE;
1244   /* Note:  For now, when data is specified above, this assumes the user correctly
1245    allocates the local dense storage space.  We should add error checking. */
1246 
1247   a       = (Mat_MPIDense*)mat->data;
1248   ierr    = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
1249   ierr    = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
1250   a->nvec = mat->cmap->n;
1251 
1252   ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
1253   ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr);
1254   ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr);
1255   ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr);
1256   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
1257   PetscFunctionReturn(0);
1258 }
1259 
1260 #undef __FUNCT__
1261 #define __FUNCT__ "MatCreate_MPIDense"
1262 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat)
1263 {
1264   Mat_MPIDense   *a;
1265   PetscErrorCode ierr;
1266 
1267   PetscFunctionBegin;
1268   ierr      = PetscNewLog(mat,&a);CHKERRQ(ierr);
1269   mat->data = (void*)a;
1270   ierr      = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1271 
1272   mat->insertmode = NOT_SET_VALUES;
1273   ierr            = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&a->rank);CHKERRQ(ierr);
1274   ierr            = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&a->size);CHKERRQ(ierr);
1275 
1276   /* build cache for off array entries formed */
1277   a->donotstash = PETSC_FALSE;
1278 
1279   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);CHKERRQ(ierr);
1280 
1281   /* stuff used for matrix vector multiply */
1282   a->lvec        = 0;
1283   a->Mvctx       = 0;
1284   a->roworiented = PETSC_TRUE;
1285 
1286   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);CHKERRQ(ierr);
1287   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr);
1288 
1289   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr);
1290   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1291   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr);
1292   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1293   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1294 
1295   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",MatTransposeMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr);
1296   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",MatTransposeMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1297   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",MatTransposeMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1298   ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr);
1299   PetscFunctionReturn(0);
1300 }
1301 
1302 /*MC
1303    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1304 
1305    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1306    and MATMPIDENSE otherwise.
1307 
1308    Options Database Keys:
1309 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1310 
1311   Level: beginner
1312 
1313 
1314 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE
1315 M*/
1316 
1317 #undef __FUNCT__
1318 #define __FUNCT__ "MatMPIDenseSetPreallocation"
1319 /*@C
1320    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1321 
1322    Not collective
1323 
1324    Input Parameters:
1325 .  A - the matrix
1326 -  data - optional location of matrix data.  Set data=NULL for PETSc
1327    to control all matrix memory allocation.
1328 
1329    Notes:
1330    The dense format is fully compatible with standard Fortran 77
1331    storage by columns.
1332 
1333    The data input variable is intended primarily for Fortran programmers
1334    who wish to allocate their own matrix memory space.  Most users should
1335    set data=NULL.
1336 
1337    Level: intermediate
1338 
1339 .keywords: matrix,dense, parallel
1340 
1341 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1342 @*/
1343 PetscErrorCode  MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data)
1344 {
1345   PetscErrorCode ierr;
1346 
1347   PetscFunctionBegin;
1348   ierr = PetscTryMethod(mat,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(mat,data));CHKERRQ(ierr);
1349   PetscFunctionReturn(0);
1350 }
1351 
1352 #undef __FUNCT__
1353 #define __FUNCT__ "MatCreateDense"
1354 /*@C
1355    MatCreateDense - Creates a parallel matrix in dense format.
1356 
1357    Collective on MPI_Comm
1358 
1359    Input Parameters:
1360 +  comm - MPI communicator
1361 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1362 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1363 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1364 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1365 -  data - optional location of matrix data.  Set data=NULL (NULL_SCALAR for Fortran users) for PETSc
1366    to control all matrix memory allocation.
1367 
1368    Output Parameter:
1369 .  A - the matrix
1370 
1371    Notes:
1372    The dense format is fully compatible with standard Fortran 77
1373    storage by columns.
1374 
1375    The data input variable is intended primarily for Fortran programmers
1376    who wish to allocate their own matrix memory space.  Most users should
1377    set data=NULL (NULL_SCALAR for Fortran users).
1378 
1379    The user MUST specify either the local or global matrix dimensions
1380    (possibly both).
1381 
1382    Level: intermediate
1383 
1384 .keywords: matrix,dense, parallel
1385 
1386 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1387 @*/
1388 PetscErrorCode  MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
1389 {
1390   PetscErrorCode ierr;
1391   PetscMPIInt    size;
1392 
1393   PetscFunctionBegin;
1394   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1395   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
1396   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1397   if (size > 1) {
1398     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
1399     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1400   } else {
1401     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
1402     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1403   }
1404   PetscFunctionReturn(0);
1405 }
1406 
1407 #undef __FUNCT__
1408 #define __FUNCT__ "MatDuplicate_MPIDense"
1409 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1410 {
1411   Mat            mat;
1412   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;
1413   PetscErrorCode ierr;
1414 
1415   PetscFunctionBegin;
1416   *newmat = 0;
1417   ierr    = MatCreate(PetscObjectComm((PetscObject)A),&mat);CHKERRQ(ierr);
1418   ierr    = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1419   ierr    = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr);
1420   a       = (Mat_MPIDense*)mat->data;
1421   ierr    = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1422 
1423   mat->factortype   = A->factortype;
1424   mat->assembled    = PETSC_TRUE;
1425   mat->preallocated = PETSC_TRUE;
1426 
1427   a->size         = oldmat->size;
1428   a->rank         = oldmat->rank;
1429   mat->insertmode = NOT_SET_VALUES;
1430   a->nvec         = oldmat->nvec;
1431   a->donotstash   = oldmat->donotstash;
1432 
1433   ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr);
1434   ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr);
1435 
1436   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
1437   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1438   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
1439 
1440   *newmat = mat;
1441   PetscFunctionReturn(0);
1442 }
1443 
1444 #undef __FUNCT__
1445 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile"
1446 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N,Mat newmat,PetscInt sizesset)
1447 {
1448   PetscErrorCode ierr;
1449   PetscMPIInt    rank,size;
1450   PetscInt       *rowners,i,m,nz,j;
1451   PetscScalar    *array,*vals,*vals_ptr;
1452 
1453   PetscFunctionBegin;
1454   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1455   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1456 
1457   /* determine ownership of all rows */
1458   if (newmat->rmap->n < 0) m = M/size + ((M % size) > rank);
1459   else m = newmat->rmap->n;
1460   ierr       = PetscMalloc1((size+2),&rowners);CHKERRQ(ierr);
1461   ierr       = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
1462   rowners[0] = 0;
1463   for (i=2; i<=size; i++) {
1464     rowners[i] += rowners[i-1];
1465   }
1466 
1467   if (!sizesset) {
1468     ierr = MatSetSizes(newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr);
1469   }
1470   ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
1471   ierr = MatDenseGetArray(newmat,&array);CHKERRQ(ierr);
1472 
1473   if (!rank) {
1474     ierr = PetscMalloc1(m*N,&vals);CHKERRQ(ierr);
1475 
1476     /* read in my part of the matrix numerical values  */
1477     ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr);
1478 
1479     /* insert into matrix-by row (this is why cannot directly read into array */
1480     vals_ptr = vals;
1481     for (i=0; i<m; i++) {
1482       for (j=0; j<N; j++) {
1483         array[i + j*m] = *vals_ptr++;
1484       }
1485     }
1486 
1487     /* read in other processors and ship out */
1488     for (i=1; i<size; i++) {
1489       nz   = (rowners[i+1] - rowners[i])*N;
1490       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1491       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr);
1492     }
1493   } else {
1494     /* receive numeric values */
1495     ierr = PetscMalloc1(m*N,&vals);CHKERRQ(ierr);
1496 
1497     /* receive message of values*/
1498     ierr = MPIULong_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr);
1499 
1500     /* insert into matrix-by row (this is why cannot directly read into array */
1501     vals_ptr = vals;
1502     for (i=0; i<m; i++) {
1503       for (j=0; j<N; j++) {
1504         array[i + j*m] = *vals_ptr++;
1505       }
1506     }
1507   }
1508   ierr = MatDenseRestoreArray(newmat,&array);CHKERRQ(ierr);
1509   ierr = PetscFree(rowners);CHKERRQ(ierr);
1510   ierr = PetscFree(vals);CHKERRQ(ierr);
1511   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1512   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1513   PetscFunctionReturn(0);
1514 }
1515 
1516 #undef __FUNCT__
1517 #define __FUNCT__ "MatLoad_MPIDense"
1518 PetscErrorCode MatLoad_MPIDense(Mat newmat,PetscViewer viewer)
1519 {
1520   PetscScalar    *vals,*svals;
1521   MPI_Comm       comm;
1522   MPI_Status     status;
1523   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz;
1524   PetscInt       header[4],*rowlengths = 0,M,N,*cols;
1525   PetscInt       *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1526   PetscInt       i,nz,j,rstart,rend,sizesset=1,grows,gcols;
1527   int            fd;
1528   PetscErrorCode ierr;
1529 
1530   PetscFunctionBegin;
1531   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
1532   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1533   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1534   if (!rank) {
1535     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1536     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
1537     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1538   }
1539   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;
1540 
1541   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
1542   M    = header[1]; N = header[2]; nz = header[3];
1543 
1544   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
1545   if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
1546   if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;
1547 
1548   /* If global sizes are set, check if they are consistent with that given in the file */
1549   if (sizesset) {
1550     ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr);
1551   }
1552   if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
1553   if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
1554 
1555   /*
1556        Handle case where matrix is stored on disk as a dense matrix
1557   */
1558   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1559     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat,sizesset);CHKERRQ(ierr);
1560     PetscFunctionReturn(0);
1561   }
1562 
1563   /* determine ownership of all rows */
1564   if (newmat->rmap->n < 0) {
1565     ierr = PetscMPIIntCast(M/size + ((M % size) > rank),&m);CHKERRQ(ierr);
1566   } else {
1567     ierr = PetscMPIIntCast(newmat->rmap->n,&m);CHKERRQ(ierr);
1568   }
1569   ierr       = PetscMalloc1((size+2),&rowners);CHKERRQ(ierr);
1570   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1571   rowners[0] = 0;
1572   for (i=2; i<=size; i++) {
1573     rowners[i] += rowners[i-1];
1574   }
1575   rstart = rowners[rank];
1576   rend   = rowners[rank+1];
1577 
1578   /* distribute row lengths to all processors */
1579   ierr = PetscMalloc2(rend-rstart,&ourlens,rend-rstart,&offlens);CHKERRQ(ierr);
1580   if (!rank) {
1581     ierr = PetscMalloc1(M,&rowlengths);CHKERRQ(ierr);
1582     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1583     ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr);
1584     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1585     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1586     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1587   } else {
1588     ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1589   }
1590 
1591   if (!rank) {
1592     /* calculate the number of nonzeros on each processor */
1593     ierr = PetscMalloc1(size,&procsnz);CHKERRQ(ierr);
1594     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
1595     for (i=0; i<size; i++) {
1596       for (j=rowners[i]; j< rowners[i+1]; j++) {
1597         procsnz[i] += rowlengths[j];
1598       }
1599     }
1600     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1601 
1602     /* determine max buffer needed and allocate it */
1603     maxnz = 0;
1604     for (i=0; i<size; i++) {
1605       maxnz = PetscMax(maxnz,procsnz[i]);
1606     }
1607     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
1608 
1609     /* read in my part of the matrix column indices  */
1610     nz   = procsnz[0];
1611     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
1612     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1613 
1614     /* read in every one elses and ship off */
1615     for (i=1; i<size; i++) {
1616       nz   = procsnz[i];
1617       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1618       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
1619     }
1620     ierr = PetscFree(cols);CHKERRQ(ierr);
1621   } else {
1622     /* determine buffer space needed for message */
1623     nz = 0;
1624     for (i=0; i<m; i++) {
1625       nz += ourlens[i];
1626     }
1627     ierr = PetscMalloc1((nz+1),&mycols);CHKERRQ(ierr);
1628 
1629     /* receive message of column indices*/
1630     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
1631     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
1632     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1633   }
1634 
1635   /* loop over local rows, determining number of off diagonal entries */
1636   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
1637   jj   = 0;
1638   for (i=0; i<m; i++) {
1639     for (j=0; j<ourlens[i]; j++) {
1640       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1641       jj++;
1642     }
1643   }
1644 
1645   /* create our matrix */
1646   for (i=0; i<m; i++) ourlens[i] -= offlens[i];
1647 
1648   if (!sizesset) {
1649     ierr = MatSetSizes(newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr);
1650   }
1651   ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
1652   for (i=0; i<m; i++) ourlens[i] += offlens[i];
1653 
1654   if (!rank) {
1655     ierr = PetscMalloc1(maxnz,&vals);CHKERRQ(ierr);
1656 
1657     /* read in my part of the matrix numerical values  */
1658     nz   = procsnz[0];
1659     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1660 
1661     /* insert into matrix */
1662     jj      = rstart;
1663     smycols = mycols;
1664     svals   = vals;
1665     for (i=0; i<m; i++) {
1666       ierr     = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1667       smycols += ourlens[i];
1668       svals   += ourlens[i];
1669       jj++;
1670     }
1671 
1672     /* read in other processors and ship out */
1673     for (i=1; i<size; i++) {
1674       nz   = procsnz[i];
1675       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1676       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
1677     }
1678     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1679   } else {
1680     /* receive numeric values */
1681     ierr = PetscMalloc1((nz+1),&vals);CHKERRQ(ierr);
1682 
1683     /* receive message of values*/
1684     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
1685     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1686     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1687 
1688     /* insert into matrix */
1689     jj      = rstart;
1690     smycols = mycols;
1691     svals   = vals;
1692     for (i=0; i<m; i++) {
1693       ierr     = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1694       smycols += ourlens[i];
1695       svals   += ourlens[i];
1696       jj++;
1697     }
1698   }
1699   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
1700   ierr = PetscFree(vals);CHKERRQ(ierr);
1701   ierr = PetscFree(mycols);CHKERRQ(ierr);
1702   ierr = PetscFree(rowners);CHKERRQ(ierr);
1703 
1704   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1705   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1706   PetscFunctionReturn(0);
1707 }
1708 
1709 #undef __FUNCT__
1710 #define __FUNCT__ "MatEqual_MPIDense"
1711 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool  *flag)
1712 {
1713   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
1714   Mat            a,b;
1715   PetscBool      flg;
1716   PetscErrorCode ierr;
1717 
1718   PetscFunctionBegin;
1719   a    = matA->A;
1720   b    = matB->A;
1721   ierr = MatEqual(a,b,&flg);CHKERRQ(ierr);
1722   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1723   PetscFunctionReturn(0);
1724 }
1725 
1726