xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 5b2fa52080009d719f1c80545e2867c4a36b8e55)
1 #ifdef PETSC_RCS_HEADER
2 static char vcid[] = "$Id: mpidense.c,v 1.120 1999/07/27 22:18:47 curfman Exp curfman $";
3 #endif
4 
5 /*
6    Basic functions for basic parallel dense matrices.
7 */
8 
9 #include "src/mat/impls/dense/mpi/mpidense.h"
10 #include "src/vec/vecimpl.h"
11 
12 extern int MatSetUpMultiply_MPIDense(Mat);
13 
14 #undef __FUNC__
15 #define __FUNC__ "MatSetValues_MPIDense"
16 int MatSetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v,InsertMode addv)
17 {
18   Mat_MPIDense *A = (Mat_MPIDense *) mat->data;
19   int          ierr, i, j, rstart = A->rstart, rend = A->rend, row;
20   int          roworiented = A->roworiented;
21 
22   PetscFunctionBegin;
23   for ( i=0; i<m; i++ ) {
24     if (idxm[i] < 0) continue;
25     if (idxm[i] >= A->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
26     if (idxm[i] >= rstart && idxm[i] < rend) {
27       row = idxm[i] - rstart;
28       if (roworiented) {
29         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
30       } else {
31         for ( j=0; j<n; j++ ) {
32           if (idxn[j] < 0) continue;
33           if (idxn[j] >= A->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
34           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
35         }
36       }
37     } else {
38       if ( !A->donotstash) {
39         if (roworiented) {
40           ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr);
41         } else {
42           ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr);
43         }
44       }
45     }
46   }
47   PetscFunctionReturn(0);
48 }
49 
50 #undef __FUNC__
51 #define __FUNC__ "MatGetValues_MPIDense"
52 int MatGetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
53 {
54   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
55   int          ierr, i, j, rstart = mdn->rstart, rend = mdn->rend, row;
56 
57   PetscFunctionBegin;
58   for ( i=0; i<m; i++ ) {
59     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
60     if (idxm[i] >= mdn->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
61     if (idxm[i] >= rstart && idxm[i] < rend) {
62       row = idxm[i] - rstart;
63       for ( j=0; j<n; j++ ) {
64         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
65         if (idxn[j] >= mdn->N) {
66           SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
67         }
68         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
69       }
70     } else {
71       SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported");
72     }
73   }
74   PetscFunctionReturn(0);
75 }
76 
77 #undef __FUNC__
78 #define __FUNC__ "MatGetArray_MPIDense"
79 int MatGetArray_MPIDense(Mat A,Scalar **array)
80 {
81   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
82   int          ierr;
83 
84   PetscFunctionBegin;
85   ierr = MatGetArray(a->A,array);CHKERRQ(ierr);
86   PetscFunctionReturn(0);
87 }
88 
89 #undef __FUNC__
90 #define __FUNC__ "MatGetSubMatrix_MPIDense"
91 static int MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B)
92 {
93   Mat_MPIDense *mat = (Mat_MPIDense *) A->data, *newmatd;
94   Mat_SeqDense *lmat = (Mat_SeqDense *) mat->A->data;
95   int          i, j, ierr, *irow, *icol, n_lrows, n_lcols;
96   int          rstart, rend, nrows, ncols, nlrows, nlcols, rank, ncols_all;
97   Scalar       *av, *bv, *v = lmat->v;
98   Mat          newmat;
99 
100   PetscFunctionBegin;
101   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
102   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
103   ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr);
104   ierr = ISGetSize(isrow,&nrows);CHKERRQ(ierr);
105   ierr = ISGetSize(iscol,&ncols);CHKERRQ(ierr);
106 
107   /* No parallel redistribution currently supported! Should really check each index set
108      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
109      original matrix! */
110 
111   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
112   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
113 
114   /* Check submatrix call */
115   if (scall == MAT_REUSE_MATRIX) {
116     /* SETERRQ(PETSC_ERR_ARG_SIZ,0,"Reused submatrix wrong size"); */
117     /* Really need to test rows and column sizes! */
118     newmat = *B;
119   } else {
120     /* Create and fill new matrix */
121     ierr = MatCreateMPIDense(A->comm,nrows,ncols,PETSC_DECIDE,PETSC_DECIDE,PETSC_NULL,&newmat);CHKERRQ(ierr);
122   }
123 
124   /* Now extract the data pointers and do the copy, column at a time */
125   newmatd = (Mat_MPIDense *) newmat->data;
126   bv = ((Mat_SeqDense *)newmatd->A->data)->v;
127 
128   for ( i=0; i<ncols; i++ ) {
129     av = v + nlrows*icol[i];
130     for (j=0; j<nrows; j++ ) {
131       *bv++ = av[irow[j] - rstart];
132     }
133   }
134 
135   /* Assemble the matrices so that the correct flags are set */
136   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
137   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
138 
139   /* Free work space */
140   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
141   ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr);
142   *B = newmat;
143   PetscFunctionReturn(0);
144 }
145 
146 #undef __FUNC__
147 #define __FUNC__ "MatRestoreArray_MPIDense"
148 int MatRestoreArray_MPIDense(Mat A,Scalar **array)
149 {
150   PetscFunctionBegin;
151   PetscFunctionReturn(0);
152 }
153 
154 #undef __FUNC__
155 #define __FUNC__ "MatAssemblyBegin_MPIDense"
156 int MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
157 {
158   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
159   MPI_Comm     comm = mat->comm;
160   int          ierr,nstash,reallocs;
161   InsertMode   addv;
162 
163   PetscFunctionBegin;
164   /* make sure all processors are either in INSERTMODE or ADDMODE */
165   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr);
166   if (addv == (ADD_VALUES|INSERT_VALUES)) {
167     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Cannot mix adds/inserts on different procs");
168   }
169   mat->insertmode = addv; /* in case this processor had no cache */
170 
171   ierr = MatStashScatterBegin_Private(&mat->stash,mdn->rowners);CHKERRQ(ierr);
172   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
173   PLogInfo(mdn->A,"MatAssemblyBegin_MPIDense:Stash has %d entries, uses %d mallocs.\n",
174            nstash,reallocs);
175   PetscFunctionReturn(0);
176 }
177 
178 #undef __FUNC__
179 #define __FUNC__ "MatAssemblyEnd_MPIDense"
180 int MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
181 {
182   Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data;
183   int          i,n,ierr,*row,*col,flg,j,rstart,ncols;
184   Scalar       *val;
185   InsertMode   addv=mat->insertmode;
186 
187   PetscFunctionBegin;
188   /*  wait on receives */
189   while (1) {
190     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
191     if (!flg) break;
192 
193     for ( i=0; i<n; ) {
194       /* Now identify the consecutive vals belonging to the same row */
195       for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; }
196       if (j < n) ncols = j-i;
197       else       ncols = n-i;
198       /* Now assemble all these values with a single function call */
199       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
200       i = j;
201     }
202   }
203   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
204 
205   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
206   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
207 
208   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
209     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
210   }
211   PetscFunctionReturn(0);
212 }
213 
214 #undef __FUNC__
215 #define __FUNC__ "MatZeroEntries_MPIDense"
216 int MatZeroEntries_MPIDense(Mat A)
217 {
218   int          ierr;
219   Mat_MPIDense *l = (Mat_MPIDense *) A->data;
220 
221   PetscFunctionBegin;
222   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
223   PetscFunctionReturn(0);
224 }
225 
226 #undef __FUNC__
227 #define __FUNC__ "MatGetBlockSize_MPIDense"
228 int MatGetBlockSize_MPIDense(Mat A,int *bs)
229 {
230   PetscFunctionBegin;
231   *bs = 1;
232   PetscFunctionReturn(0);
233 }
234 
235 /* the code does not do the diagonal entries correctly unless the
236    matrix is square and the column and row owerships are identical.
237    This is a BUG. The only way to fix it seems to be to access
238    mdn->A and mdn->B directly and not through the MatZeroRows()
239    routine.
240 */
241 #undef __FUNC__
242 #define __FUNC__ "MatZeroRows_MPIDense"
243 int MatZeroRows_MPIDense(Mat A,IS is,Scalar *diag)
244 {
245   Mat_MPIDense   *l = (Mat_MPIDense *) A->data;
246   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
247   int            *procs,*nprocs,j,found,idx,nsends,*work;
248   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
249   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
250   int            *lens,imdex,*lrows,*values;
251   MPI_Comm       comm = A->comm;
252   MPI_Request    *send_waits,*recv_waits;
253   MPI_Status     recv_status,*send_status;
254   IS             istmp;
255 
256   PetscFunctionBegin;
257   ierr = ISGetSize(is,&N);CHKERRQ(ierr);
258   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
259 
260   /*  first count number of contributors to each processor */
261   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) );CHKPTRQ(nprocs);
262   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
263   procs  = nprocs + size;
264   owner  = (int *) PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/
265   for ( i=0; i<N; i++ ) {
266     idx = rows[i];
267     found = 0;
268     for ( j=0; j<size; j++ ) {
269       if (idx >= owners[j] && idx < owners[j+1]) {
270         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
271       }
272     }
273     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range");
274   }
275   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
276 
277   /* inform other processors of number of messages and max length*/
278   work   = (int *) PetscMalloc( size*sizeof(int) );CHKPTRQ(work);
279   ierr   = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
280   nrecvs = work[rank];
281   ierr   = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
282   nmax   = work[rank];
283   ierr = PetscFree(work);CHKERRQ(ierr);
284 
285   /* post receives:   */
286   rvalues    = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues);
287   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
288   for ( i=0; i<nrecvs; i++ ) {
289     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
290   }
291 
292   /* do sends:
293       1) starts[i] gives the starting index in svalues for stuff going to
294          the ith processor
295   */
296   svalues    = (int *) PetscMalloc( (N+1)*sizeof(int) );CHKPTRQ(svalues);
297   send_waits = (MPI_Request *) PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
298   starts     = (int *) PetscMalloc( (size+1)*sizeof(int) );CHKPTRQ(starts);
299   starts[0]  = 0;
300   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
301   for ( i=0; i<N; i++ ) {
302     svalues[starts[owner[i]]++] = rows[i];
303   }
304   ISRestoreIndices(is,&rows);
305 
306   starts[0] = 0;
307   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
308   count = 0;
309   for ( i=0; i<size; i++ ) {
310     if (procs[i]) {
311       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
312     }
313   }
314   ierr = PetscFree(starts);CHKERRQ(ierr);
315 
316   base = owners[rank];
317 
318   /*  wait on receives */
319   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) );CHKPTRQ(lens);
320   source = lens + nrecvs;
321   count  = nrecvs; slen = 0;
322   while (count) {
323     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
324     /* unpack receives into our local space */
325     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
326     source[imdex]  = recv_status.MPI_SOURCE;
327     lens[imdex]  = n;
328     slen += n;
329     count--;
330   }
331   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
332 
333   /* move the data into the send scatter */
334   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) );CHKPTRQ(lrows);
335   count = 0;
336   for ( i=0; i<nrecvs; i++ ) {
337     values = rvalues + i*nmax;
338     for ( j=0; j<lens[i]; j++ ) {
339       lrows[count++] = values[j] - base;
340     }
341   }
342   ierr = PetscFree(rvalues);CHKERRQ(ierr);
343   ierr = PetscFree(lens);CHKERRQ(ierr);
344   ierr = PetscFree(owner);CHKERRQ(ierr);
345   ierr = PetscFree(nprocs);CHKERRQ(ierr);
346 
347   /* actually zap the local rows */
348   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
349   PLogObjectParent(A,istmp);
350   ierr = PetscFree(lrows);CHKERRQ(ierr);
351   ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
352   ierr = ISDestroy(istmp);CHKERRQ(ierr);
353 
354   /* wait on sends */
355   if (nsends) {
356     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
357     ierr        = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
358     ierr = PetscFree(send_status);CHKERRQ(ierr);
359   }
360   ierr = PetscFree(send_waits);CHKERRQ(ierr);
361   ierr = PetscFree(svalues);CHKERRQ(ierr);
362 
363   PetscFunctionReturn(0);
364 }
365 
366 #undef __FUNC__
367 #define __FUNC__ "MatMult_MPIDense"
368 int MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
369 {
370   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
371   int          ierr;
372 
373   PetscFunctionBegin;
374   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
375   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
376   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
377   PetscFunctionReturn(0);
378 }
379 
380 #undef __FUNC__
381 #define __FUNC__ "MatMultAdd_MPIDense"
382 int MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
383 {
384   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
385   int          ierr;
386 
387   PetscFunctionBegin;
388   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
389   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
390   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
391   PetscFunctionReturn(0);
392 }
393 
394 #undef __FUNC__
395 #define __FUNC__ "MatMultTrans_MPIDense"
396 int MatMultTrans_MPIDense(Mat A,Vec xx,Vec yy)
397 {
398   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
399   int          ierr;
400   Scalar       zero = 0.0;
401 
402   PetscFunctionBegin;
403   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
404   ierr = MatMultTrans_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
405   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
406   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
407   PetscFunctionReturn(0);
408 }
409 
410 #undef __FUNC__
411 #define __FUNC__ "MatMultTransAdd_MPIDense"
412 int MatMultTransAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
413 {
414   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
415   int          ierr;
416 
417   PetscFunctionBegin;
418   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
419   ierr = MatMultTrans_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
420   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
421   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
422   PetscFunctionReturn(0);
423 }
424 
425 #undef __FUNC__
426 #define __FUNC__ "MatGetDiagonal_MPIDense"
427 int MatGetDiagonal_MPIDense(Mat A,Vec v)
428 {
429   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
430   Mat_SeqDense *aloc = (Mat_SeqDense *) a->A->data;
431   int          ierr, len, i, n, m = a->m, radd;
432   Scalar       *x, zero = 0.0;
433 
434   PetscFunctionBegin;
435   VecSet(&zero,v);
436   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
437   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
438   if (n != a->M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Nonconforming mat and vec");
439   len = PetscMin(aloc->m,aloc->n);
440   radd = a->rstart*m;
441   for ( i=0; i<len; i++ ) {
442     x[i] = aloc->v[radd + i*m + i];
443   }
444   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
445   PetscFunctionReturn(0);
446 }
447 
448 #undef __FUNC__
449 #define __FUNC__ "MatDestroy_MPIDense"
450 int MatDestroy_MPIDense(Mat mat)
451 {
452   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
453   int          ierr;
454 
455   PetscFunctionBegin;
456 
457   if (mat->mapping) {
458     ierr = ISLocalToGlobalMappingDestroy(mat->mapping);CHKERRQ(ierr);
459   }
460   if (mat->bmapping) {
461     ierr = ISLocalToGlobalMappingDestroy(mat->bmapping);CHKERRQ(ierr);
462   }
463 #if defined(PETSC_USE_LOG)
464   PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mdn->M,mdn->N);
465 #endif
466   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
467   ierr = PetscFree(mdn->rowners);CHKERRQ(ierr);
468   ierr = MatDestroy(mdn->A);CHKERRQ(ierr);
469   if (mdn->lvec)   VecDestroy(mdn->lvec);
470   if (mdn->Mvctx)  VecScatterDestroy(mdn->Mvctx);
471   if (mdn->factor) {
472     if (mdn->factor->temp)   {ierr = PetscFree(mdn->factor->temp);CHKERRQ(ierr);}
473     if (mdn->factor->tag)    {ierr = PetscFree(mdn->factor->tag);CHKERRQ(ierr);}
474     if (mdn->factor->pivots) {ierr = PetscFree(mdn->factor->pivots);CHKERRQ(ierr);}
475     ierr = PetscFree(mdn->factor);CHKERRQ(ierr);
476   }
477   ierr = PetscFree(mdn);CHKERRQ(ierr);
478   if (mat->rmap) {
479     ierr = MapDestroy(mat->rmap);CHKERRQ(ierr);
480   }
481   if (mat->cmap) {
482     ierr = MapDestroy(mat->cmap);CHKERRQ(ierr);
483   }
484   PLogObjectDestroy(mat);
485   PetscHeaderDestroy(mat);
486   PetscFunctionReturn(0);
487 }
488 
489 #undef __FUNC__
490 #define __FUNC__ "MatView_MPIDense_Binary"
491 static int MatView_MPIDense_Binary(Mat mat,Viewer viewer)
492 {
493   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
494   int          ierr;
495 
496   PetscFunctionBegin;
497   if (mdn->size == 1) {
498     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
499   }
500   else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported");
501   PetscFunctionReturn(0);
502 }
503 
504 #undef __FUNC__
505 #define __FUNC__ "MatView_MPIDense_ASCII"
506 static int MatView_MPIDense_ASCII(Mat mat,Viewer viewer)
507 {
508   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
509   int          ierr, format, size = mdn->size, rank = mdn->rank;
510   FILE         *fd;
511   ViewerType   vtype;
512 
513   PetscFunctionBegin;
514   ierr = ViewerGetType(viewer,&vtype);CHKERRQ(ierr);
515   ierr = ViewerASCIIGetPointer(viewer,&fd);CHKERRQ(ierr);
516   ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr);
517   if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
518     MatInfo info;
519     ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
520     PetscSequentialPhaseBegin(mat->comm,1);
521       fprintf(fd,"  [%d] local rows %d nz %d nz alloced %d mem %d \n",rank,mdn->m,
522          (int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
523       fflush(fd);
524     PetscSequentialPhaseEnd(mat->comm,1);
525     ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
526     PetscFunctionReturn(0);
527   } else if (format == VIEWER_FORMAT_ASCII_INFO) {
528     PetscFunctionReturn(0);
529   }
530 
531   if (size == 1) {
532     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
533   } else {
534     /* assemble the entire matrix onto first processor. */
535     Mat          A;
536     int          M = mdn->M, N = mdn->N,m,row,i, nz, *cols;
537     Scalar       *vals;
538     Mat_SeqDense *Amdn = (Mat_SeqDense*) mdn->A->data;
539 
540     if (!rank) {
541       ierr = MatCreateMPIDense(mat->comm,M,N,M,N,PETSC_NULL,&A);CHKERRQ(ierr);
542     } else {
543       ierr = MatCreateMPIDense(mat->comm,0,N,M,N,PETSC_NULL,&A);CHKERRQ(ierr);
544     }
545     PLogObjectParent(mat,A);
546 
547     /* Copy the matrix ... This isn't the most efficient means,
548        but it's quick for now */
549     row = mdn->rstart; m = Amdn->m;
550     for ( i=0; i<m; i++ ) {
551       ierr = MatGetRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
552       ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
553       ierr = MatRestoreRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
554       row++;
555     }
556 
557     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
558     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
559     if (!rank) {
560       ierr = MatView(((Mat_MPIDense*)(A->data))->A,viewer);CHKERRQ(ierr);
561     }
562     ierr = MatDestroy(A);CHKERRQ(ierr);
563   }
564   PetscFunctionReturn(0);
565 }
566 
567 #undef __FUNC__
568 #define __FUNC__ "MatView_MPIDense"
569 int MatView_MPIDense(Mat mat,Viewer viewer)
570 {
571   int          ierr;
572   ViewerType   vtype;
573 
574   ierr = ViewerGetType(viewer,&vtype);CHKERRQ(ierr);
575   if (PetscTypeCompare(vtype,ASCII_VIEWER)) {
576     ierr = MatView_MPIDense_ASCII(mat,viewer);CHKERRQ(ierr);
577   } else if (PetscTypeCompare(vtype,BINARY_VIEWER)) {
578     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
579   } else {
580     SETERRQ(1,1,"Viewer type not supported by PETSc object");
581   }
582   PetscFunctionReturn(0);
583 }
584 
585 #undef __FUNC__
586 #define __FUNC__ "MatGetInfo_MPIDense"
587 int MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
588 {
589   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
590   Mat          mdn = mat->A;
591   int          ierr;
592   double       isend[5], irecv[5];
593 
594   PetscFunctionBegin;
595   info->rows_global    = (double)mat->M;
596   info->columns_global = (double)mat->N;
597   info->rows_local     = (double)mat->m;
598   info->columns_local  = (double)mat->N;
599   info->block_size     = 1.0;
600   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
601   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
602   isend[3] = info->memory;  isend[4] = info->mallocs;
603   if (flag == MAT_LOCAL) {
604     info->nz_used      = isend[0];
605     info->nz_allocated = isend[1];
606     info->nz_unneeded  = isend[2];
607     info->memory       = isend[3];
608     info->mallocs      = isend[4];
609   } else if (flag == MAT_GLOBAL_MAX) {
610     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,A->comm);CHKERRQ(ierr);
611     info->nz_used      = irecv[0];
612     info->nz_allocated = irecv[1];
613     info->nz_unneeded  = irecv[2];
614     info->memory       = irecv[3];
615     info->mallocs      = irecv[4];
616   } else if (flag == MAT_GLOBAL_SUM) {
617     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr);
618     info->nz_used      = irecv[0];
619     info->nz_allocated = irecv[1];
620     info->nz_unneeded  = irecv[2];
621     info->memory       = irecv[3];
622     info->mallocs      = irecv[4];
623   }
624   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
625   info->fill_ratio_needed = 0;
626   info->factor_mallocs    = 0;
627   PetscFunctionReturn(0);
628 }
629 
630 /* extern int MatLUFactorSymbolic_MPIDense(Mat,IS,IS,double,Mat*);
631    extern int MatLUFactorNumeric_MPIDense(Mat,Mat*);
632    extern int MatLUFactor_MPIDense(Mat,IS,IS,double);
633    extern int MatSolve_MPIDense(Mat,Vec,Vec);
634    extern int MatSolveAdd_MPIDense(Mat,Vec,Vec,Vec);
635    extern int MatSolveTrans_MPIDense(Mat,Vec,Vec);
636    extern int MatSolveTransAdd_MPIDense(Mat,Vec,Vec,Vec); */
637 
638 #undef __FUNC__
639 #define __FUNC__ "MatSetOption_MPIDense"
640 int MatSetOption_MPIDense(Mat A,MatOption op)
641 {
642   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
643 
644   PetscFunctionBegin;
645   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
646       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
647       op == MAT_NEW_NONZERO_LOCATION_ERR ||
648       op == MAT_NEW_NONZERO_ALLOCATION_ERR ||
649       op == MAT_COLUMNS_SORTED ||
650       op == MAT_COLUMNS_UNSORTED) {
651         MatSetOption(a->A,op);
652   } else if (op == MAT_ROW_ORIENTED) {
653         a->roworiented = 1;
654         MatSetOption(a->A,op);
655   } else if (op == MAT_ROWS_SORTED ||
656              op == MAT_ROWS_UNSORTED ||
657              op == MAT_SYMMETRIC ||
658              op == MAT_STRUCTURALLY_SYMMETRIC ||
659              op == MAT_YES_NEW_DIAGONALS ||
660              op == MAT_USE_HASH_TABLE) {
661     PLogInfo(A,"MatSetOption_MPIDense:Option ignored\n");
662   } else if (op == MAT_COLUMN_ORIENTED) {
663     a->roworiented = 0; MatSetOption(a->A,op);
664   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
665     a->donotstash = 1;
666   } else if (op == MAT_NO_NEW_DIAGONALS) {
667     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
668   } else {
669     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
670   }
671   PetscFunctionReturn(0);
672 }
673 
674 #undef __FUNC__
675 #define __FUNC__ "MatGetSize_MPIDense"
676 int MatGetSize_MPIDense(Mat A,int *m,int *n)
677 {
678   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
679 
680   PetscFunctionBegin;
681   *m = mat->M; *n = mat->N;
682   PetscFunctionReturn(0);
683 }
684 
685 #undef __FUNC__
686 #define __FUNC__ "MatGetLocalSize_MPIDense"
687 int MatGetLocalSize_MPIDense(Mat A,int *m,int *n)
688 {
689   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
690 
691   PetscFunctionBegin;
692   *m = mat->m; *n = mat->N;
693   PetscFunctionReturn(0);
694 }
695 
696 #undef __FUNC__
697 #define __FUNC__ "MatGetOwnershipRange_MPIDense"
698 int MatGetOwnershipRange_MPIDense(Mat A,int *m,int *n)
699 {
700   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
701 
702   PetscFunctionBegin;
703   *m = mat->rstart; *n = mat->rend;
704   PetscFunctionReturn(0);
705 }
706 
707 #undef __FUNC__
708 #define __FUNC__ "MatGetRow_MPIDense"
709 int MatGetRow_MPIDense(Mat A,int row,int *nz,int **idx,Scalar **v)
710 {
711   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
712   int          lrow, rstart = mat->rstart, rend = mat->rend,ierr;
713 
714   PetscFunctionBegin;
715   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,0,"only local rows")
716   lrow = row - rstart;
717   ierr = MatGetRow(mat->A,lrow,nz,idx,v);CHKERRQ(ierr);
718   PetscFunctionReturn(0);
719 }
720 
721 #undef __FUNC__
722 #define __FUNC__ "MatRestoreRow_MPIDense"
723 int MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,Scalar **v)
724 {
725   int ierr;
726 
727   PetscFunctionBegin;
728   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
729   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
730   PetscFunctionReturn(0);
731 }
732 
733 #undef __FUNC__
734 #define __FUNC__ "MatDiagonalScale_MPIDense"
735 int MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
736 {
737   Mat_MPIDense *mdn = (Mat_MPIDense *) A->data;
738   Mat_SeqDense *mat = (Mat_SeqDense*) mdn->A->data;
739   Scalar       *l,*r,x,*v;
740   int          ierr,i,j,m = mat->m, n = mat->n;
741 
742   PetscFunctionBegin;
743   if (ll) {
744     ierr = VecGetSize(ll,&m);CHKERRQ(ierr);
745     ierr = VecGetArray(ll,&l);CHKERRQ(ierr);
746     if (m != mat->m) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Left scaling vec wrong size");
747     for ( i=0; i<m; i++ ) {
748       x = l[i];
749       v = mat->v + i;
750       for ( j=0; j<n; j++ ) { (*v) *= x; v+= m;}
751     }
752     ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr);
753     PLogFlops(n*m);
754   }
755   if (rr) {
756     ierr = VecGetSize(rr,&n);CHKERRQ(ierr);
757     if (n != mat->n) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Right scaling vec wrong size");
758     ierr = VecScatterBegin(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
759     ierr = VecScatterEnd(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
760     ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
761     for ( i=0; i<n; i++ ) {
762       x = r[i];
763       v = mat->v + i*m;
764       for ( j=0; j<m; j++ ) { (*v++) *= x;}
765     }
766     ierr = VecRestoreArray(rr,&r);CHKERRQ(ierr);
767     PLogFlops(n*m);
768   }
769   PetscFunctionReturn(0);
770 }
771 
772 #undef __FUNC__
773 #define __FUNC__ "MatNorm_MPIDense"
774 int MatNorm_MPIDense(Mat A,NormType type,double *norm)
775 {
776   Mat_MPIDense *mdn = (Mat_MPIDense *) A->data;
777   Mat_SeqDense *mat = (Mat_SeqDense*) mdn->A->data;
778   int          ierr, i, j;
779   double       sum = 0.0;
780   Scalar       *v = mat->v;
781 
782   PetscFunctionBegin;
783   if (mdn->size == 1) {
784     ierr =  MatNorm(mdn->A,type,norm);CHKERRQ(ierr);
785   } else {
786     if (type == NORM_FROBENIUS) {
787       for (i=0; i<mat->n*mat->m; i++ ) {
788 #if defined(PETSC_USE_COMPLEX)
789         sum += PetscReal(PetscConj(*v)*(*v)); v++;
790 #else
791         sum += (*v)*(*v); v++;
792 #endif
793       }
794       ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr);
795       *norm = sqrt(*norm);
796       PLogFlops(2*mat->n*mat->m);
797     } else if (type == NORM_1) {
798       double *tmp, *tmp2;
799       tmp  = (double *) PetscMalloc( 2*mdn->N*sizeof(double) );CHKPTRQ(tmp);
800       tmp2 = tmp + mdn->N;
801       ierr = PetscMemzero(tmp,2*mdn->N*sizeof(double));CHKERRQ(ierr);
802       *norm = 0.0;
803       v = mat->v;
804       for ( j=0; j<mat->n; j++ ) {
805         for ( i=0; i<mat->m; i++ ) {
806           tmp[j] += PetscAbsScalar(*v);  v++;
807         }
808       }
809       ierr = MPI_Allreduce(tmp,tmp2,mdn->N,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr);
810       for ( j=0; j<mdn->N; j++ ) {
811         if (tmp2[j] > *norm) *norm = tmp2[j];
812       }
813       ierr = PetscFree(tmp);CHKERRQ(ierr);
814       PLogFlops(mat->n*mat->m);
815     } else if (type == NORM_INFINITY) { /* max row norm */
816       double ntemp;
817       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
818       ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,A->comm);CHKERRQ(ierr);
819     } else {
820       SETERRQ(PETSC_ERR_SUP,0,"No support for two norm");
821     }
822   }
823   PetscFunctionReturn(0);
824 }
825 
826 #undef __FUNC__
827 #define __FUNC__ "MatTranspose_MPIDense"
828 int MatTranspose_MPIDense(Mat A,Mat *matout)
829 {
830   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
831   Mat_SeqDense *Aloc = (Mat_SeqDense *) a->A->data;
832   Mat          B;
833   int          M = a->M, N = a->N, m, n, *rwork, rstart = a->rstart;
834   int          j, i, ierr;
835   Scalar       *v;
836 
837   PetscFunctionBegin;
838   if (matout == PETSC_NULL && M != N) {
839     SETERRQ(PETSC_ERR_SUP,0,"Supports square matrix only in-place");
840   }
841   ierr = MatCreateMPIDense(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,PETSC_NULL,&B);CHKERRQ(ierr);
842 
843   m = Aloc->m; n = Aloc->n; v = Aloc->v;
844   rwork = (int *) PetscMalloc(n*sizeof(int));CHKPTRQ(rwork);
845   for ( j=0; j<n; j++ ) {
846     for (i=0; i<m; i++) rwork[i] = rstart + i;
847     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
848     v   += m;
849   }
850   ierr = PetscFree(rwork);CHKERRQ(ierr);
851   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
852   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
853   if (matout != PETSC_NULL) {
854     *matout = B;
855   } else {
856     PetscOps *Abops;
857     MatOps   Aops;
858 
859     /* This isn't really an in-place transpose, but free data struct from a */
860     ierr = PetscFree(a->rowners);CHKERRQ(ierr);
861     ierr = MatDestroy(a->A);CHKERRQ(ierr);
862     if (a->lvec) VecDestroy(a->lvec);
863     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
864     ierr = PetscFree(a);CHKERRQ(ierr);
865 
866     /*
867          This is horrible, horrible code. We need to keep the
868       A pointers for the bops and ops but copy everything
869       else from C.
870     */
871     Abops   = A->bops;
872     Aops    = A->ops;
873     ierr    = PetscMemcpy(A,B,sizeof(struct _p_Mat));CHKERRQ(ierr);
874     A->bops = Abops;
875     A->ops  = Aops;
876 
877     PetscHeaderDestroy(B);
878   }
879   PetscFunctionReturn(0);
880 }
881 
882 #include "pinclude/blaslapack.h"
883 #undef __FUNC__
884 #define __FUNC__ "MatScale_MPIDense"
885 int MatScale_MPIDense(Scalar *alpha,Mat inA)
886 {
887   Mat_MPIDense *A = (Mat_MPIDense *) inA->data;
888   Mat_SeqDense *a = (Mat_SeqDense *) A->A->data;
889   int          one = 1, nz;
890 
891   PetscFunctionBegin;
892   nz = a->m*a->n;
893   BLscal_( &nz, alpha, a->v, &one );
894   PLogFlops(nz);
895   PetscFunctionReturn(0);
896 }
897 
898 static int MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *);
899 extern int MatGetSubMatrices_MPIDense(Mat,int,IS *,IS *,MatReuse,Mat **);
900 
901 /* -------------------------------------------------------------------*/
902 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
903        MatGetRow_MPIDense,
904        MatRestoreRow_MPIDense,
905        MatMult_MPIDense,
906        MatMultAdd_MPIDense,
907        MatMultTrans_MPIDense,
908        MatMultTransAdd_MPIDense,
909        0,
910        0,
911        0,
912        0,
913        0,
914        0,
915        0,
916        MatTranspose_MPIDense,
917        MatGetInfo_MPIDense,0,
918        MatGetDiagonal_MPIDense,
919        MatDiagonalScale_MPIDense,
920        MatNorm_MPIDense,
921        MatAssemblyBegin_MPIDense,
922        MatAssemblyEnd_MPIDense,
923        0,
924        MatSetOption_MPIDense,
925        MatZeroEntries_MPIDense,
926        MatZeroRows_MPIDense,
927        0,
928        0,
929        0,
930        0,
931        MatGetSize_MPIDense,
932        MatGetLocalSize_MPIDense,
933        MatGetOwnershipRange_MPIDense,
934        0,
935        0,
936        MatGetArray_MPIDense,
937        MatRestoreArray_MPIDense,
938        MatDuplicate_MPIDense,
939        0,
940        0,
941        0,
942        0,
943        0,
944        MatGetSubMatrices_MPIDense,
945        0,
946        MatGetValues_MPIDense,
947        0,
948        0,
949        MatScale_MPIDense,
950        0,
951        0,
952        0,
953        MatGetBlockSize_MPIDense,
954        0,
955        0,
956        0,
957        0,
958        0,
959        0,
960        0,
961        0,
962        0,
963        MatGetSubMatrix_MPIDense,
964        0,
965        0,
966        MatGetMaps_Petsc};
967 
968 #undef __FUNC__
969 #define __FUNC__ "MatCreateMPIDense"
970 /*@C
971    MatCreateMPIDense - Creates a sparse parallel matrix in dense format.
972 
973    Collective on MPI_Comm
974 
975    Input Parameters:
976 +  comm - MPI communicator
977 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
978 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
979 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
980 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
981 -  data - optional location of matrix data.  Set data=PETSC_NULL for PETSc
982    to control all matrix memory allocation.
983 
984    Output Parameter:
985 .  A - the matrix
986 
987    Notes:
988    The dense format is fully compatible with standard Fortran 77
989    storage by columns.
990 
991    The data input variable is intended primarily for Fortran programmers
992    who wish to allocate their own matrix memory space.  Most users should
993    set data=PETSC_NULL.
994 
995    The user MUST specify either the local or global matrix dimensions
996    (possibly both).
997 
998    Currently, the only parallel dense matrix decomposition is by rows,
999    so that n=N and each submatrix owns all of the global columns.
1000 
1001    Level: intermediate
1002 
1003 .keywords: matrix, dense, parallel
1004 
1005 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1006 @*/
1007 int MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,Scalar *data,Mat *A)
1008 {
1009   Mat          mat;
1010   Mat_MPIDense *a;
1011   int          ierr, i,flg;
1012 
1013   PetscFunctionBegin;
1014   /* Note:  For now, when data is specified above, this assumes the user correctly
1015    allocates the local dense storage space.  We should add error checking. */
1016 
1017   *A = 0;
1018   PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIDENSE,"Mat",comm,MatDestroy,MatView);
1019   PLogObjectCreate(mat);
1020   mat->data         = (void *) (a = PetscNew(Mat_MPIDense));CHKPTRQ(a);
1021   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1022   mat->ops->destroy = MatDestroy_MPIDense;
1023   mat->ops->view    = MatView_MPIDense;
1024   mat->factor       = 0;
1025   mat->mapping      = 0;
1026 
1027   a->factor       = 0;
1028   mat->insertmode = NOT_SET_VALUES;
1029   ierr = MPI_Comm_rank(comm,&a->rank);CHKERRQ(ierr);
1030   ierr = MPI_Comm_size(comm,&a->size);CHKERRQ(ierr);
1031 
1032   ierr = PetscSplitOwnership(comm,&m,&M);CHKERRQ(ierr);
1033 
1034   /*
1035      The computation of n is wrong below, n should represent the number of local
1036      rows in the right (column vector)
1037   */
1038 
1039   /* each row stores all columns */
1040   if (N == PETSC_DECIDE) N = n;
1041   if (n == PETSC_DECIDE) {n = N/a->size + ((N % a->size) > a->rank);}
1042   /*  if (n != N) SETERRQ(PETSC_ERR_SUP,0,"For now, only n=N is supported"); */
1043   a->N = mat->N = N;
1044   a->M = mat->M = M;
1045   a->m = mat->m = m;
1046   a->n = mat->n = n;
1047 
1048   /* the information in the maps duplicates the information computed below, eventually
1049      we should remove the duplicate information that is not contained in the maps */
1050   ierr = MapCreateMPI(comm,m,M,&mat->rmap);CHKERRQ(ierr);
1051   ierr = MapCreateMPI(comm,PETSC_DECIDE,N,&mat->cmap);CHKERRQ(ierr);
1052 
1053   /* build local table of row and column ownerships */
1054   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners);
1055   a->cowners = a->rowners + a->size + 1;
1056   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));
1057   ierr = MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1058   a->rowners[0] = 0;
1059   for ( i=2; i<=a->size; i++ ) {
1060     a->rowners[i] += a->rowners[i-1];
1061   }
1062   a->rstart = a->rowners[a->rank];
1063   a->rend   = a->rowners[a->rank+1];
1064   ierr      = MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1065   a->cowners[0] = 0;
1066   for ( i=2; i<=a->size; i++ ) {
1067     a->cowners[i] += a->cowners[i-1];
1068   }
1069 
1070   ierr = MatCreateSeqDense(PETSC_COMM_SELF,m,N,data,&a->A);CHKERRQ(ierr);
1071   PLogObjectParent(mat,a->A);
1072 
1073   /* build cache for off array entries formed */
1074   a->donotstash = 0;
1075   ierr = MatStashCreate_Private(comm,1,&mat->stash);CHKERRQ(ierr);
1076 
1077   /* stuff used for matrix vector multiply */
1078   a->lvec        = 0;
1079   a->Mvctx       = 0;
1080   a->roworiented = 1;
1081 
1082   *A = mat;
1083   ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr);
1084   if (flg) {
1085     ierr = MatPrintHelp(mat);CHKERRQ(ierr);
1086   }
1087   PetscFunctionReturn(0);
1088 }
1089 
1090 #undef __FUNC__
1091 #define __FUNC__ "MatDuplicate_MPIDense"
1092 static int MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1093 {
1094   Mat          mat;
1095   Mat_MPIDense *a,*oldmat = (Mat_MPIDense *) A->data;
1096   int          ierr;
1097   FactorCtx    *factor;
1098 
1099   PetscFunctionBegin;
1100   *newmat       = 0;
1101   PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIDENSE,"Mat",A->comm,MatDestroy,MatView);
1102   PLogObjectCreate(mat);
1103   mat->data         = (void *) (a = PetscNew(Mat_MPIDense));CHKPTRQ(a);
1104   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1105   mat->ops->destroy = MatDestroy_MPIDense;
1106   mat->ops->view    = MatView_MPIDense;
1107   mat->factor       = A->factor;
1108   mat->assembled    = PETSC_TRUE;
1109 
1110   a->m = mat->m = oldmat->m;
1111   a->n = mat->n = oldmat->n;
1112   a->M = mat->M = oldmat->M;
1113   a->N = mat->N = oldmat->N;
1114   if (oldmat->factor) {
1115     a->factor = (FactorCtx *) (factor = PetscNew(FactorCtx));CHKPTRQ(factor);
1116     /* copy factor contents ... add this code! */
1117   } else a->factor = 0;
1118 
1119   a->rstart       = oldmat->rstart;
1120   a->rend         = oldmat->rend;
1121   a->size         = oldmat->size;
1122   a->rank         = oldmat->rank;
1123   mat->insertmode = NOT_SET_VALUES;
1124   a->donotstash   = oldmat->donotstash;
1125   a->rowners = (int *) PetscMalloc((a->size+1)*sizeof(int));CHKPTRQ(a->rowners);
1126   PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));
1127   ierr = PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));CHKERRQ(ierr);
1128   ierr = MatStashCreate_Private(A->comm,1,&mat->stash);CHKERRQ(ierr);
1129 
1130   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1131   PLogObjectParent(mat,a->lvec);
1132   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1133   PLogObjectParent(mat,a->Mvctx);
1134   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1135   PLogObjectParent(mat,a->A);
1136   *newmat = mat;
1137   PetscFunctionReturn(0);
1138 }
1139 
1140 #include "sys.h"
1141 
1142 #undef __FUNC__
1143 #define __FUNC__ "MatLoad_MPIDense_DenseInFile"
1144 int MatLoad_MPIDense_DenseInFile(MPI_Comm comm,int fd,int M, int N, Mat *newmat)
1145 {
1146   int        *rowners, i,size,rank,m,ierr,nz,j;
1147   Scalar     *array,*vals,*vals_ptr;
1148   MPI_Status status;
1149 
1150   PetscFunctionBegin;
1151   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1152   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1153 
1154   /* determine ownership of all rows */
1155   m          = M/size + ((M % size) > rank);
1156   rowners    = (int *) PetscMalloc((size+2)*sizeof(int));CHKPTRQ(rowners);
1157   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1158   rowners[0] = 0;
1159   for ( i=2; i<=size; i++ ) {
1160     rowners[i] += rowners[i-1];
1161   }
1162 
1163   ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr);
1164   ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr);
1165 
1166   if (!rank) {
1167     vals = (Scalar *) PetscMalloc( m*N*sizeof(Scalar) );CHKPTRQ(vals);
1168 
1169     /* read in my part of the matrix numerical values  */
1170     ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr);
1171 
1172     /* insert into matrix-by row (this is why cannot directly read into array */
1173     vals_ptr = vals;
1174     for ( i=0; i<m; i++ ) {
1175       for ( j=0; j<N; j++ ) {
1176         array[i + j*m] = *vals_ptr++;
1177       }
1178     }
1179 
1180     /* read in other processors and ship out */
1181     for ( i=1; i<size; i++ ) {
1182       nz   = (rowners[i+1] - rowners[i])*N;
1183       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1184       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm);CHKERRQ(ierr);
1185     }
1186   } else {
1187     /* receive numeric values */
1188     vals = (Scalar*) PetscMalloc( m*N*sizeof(Scalar) );CHKPTRQ(vals);
1189 
1190     /* receive message of values*/
1191     ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status);CHKERRQ(ierr);
1192 
1193     /* insert into matrix-by row (this is why cannot directly read into array */
1194     vals_ptr = vals;
1195     for ( i=0; i<m; i++ ) {
1196       for ( j=0; j<N; j++ ) {
1197         array[i + j*m] = *vals_ptr++;
1198       }
1199     }
1200   }
1201   ierr = PetscFree(rowners);CHKERRQ(ierr);
1202   ierr = PetscFree(vals);CHKERRQ(ierr);
1203   ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1204   ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1205   PetscFunctionReturn(0);
1206 }
1207 
1208 
1209 #undef __FUNC__
1210 #define __FUNC__ "MatLoad_MPIDense"
1211 int MatLoad_MPIDense(Viewer viewer,MatType type,Mat *newmat)
1212 {
1213   Mat          A;
1214   Scalar       *vals,*svals;
1215   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1216   MPI_Status   status;
1217   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1218   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
1219   int          tag = ((PetscObject)viewer)->tag;
1220   int          i, nz, ierr, j,rstart, rend, fd;
1221 
1222   PetscFunctionBegin;
1223   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1224   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1225   if (!rank) {
1226     ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1227     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1228     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object");
1229   }
1230 
1231   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1232   M = header[1]; N = header[2]; nz = header[3];
1233 
1234   /*
1235        Handle case where matrix is stored on disk as a dense matrix
1236   */
1237   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1238     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);CHKERRQ(ierr);
1239     PetscFunctionReturn(0);
1240   }
1241 
1242   /* determine ownership of all rows */
1243   m          = M/size + ((M % size) > rank);
1244   rowners    = (int *) PetscMalloc((size+2)*sizeof(int));CHKPTRQ(rowners);
1245   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1246   rowners[0] = 0;
1247   for ( i=2; i<=size; i++ ) {
1248     rowners[i] += rowners[i-1];
1249   }
1250   rstart = rowners[rank];
1251   rend   = rowners[rank+1];
1252 
1253   /* distribute row lengths to all processors */
1254   ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) );CHKPTRQ(ourlens);
1255   offlens = ourlens + (rend-rstart);
1256   if (!rank) {
1257     rowlengths = (int*) PetscMalloc( M*sizeof(int) );CHKPTRQ(rowlengths);
1258     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1259     sndcounts = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(sndcounts);
1260     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
1261     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1262     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1263   } else {
1264     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);CHKERRQ(ierr);
1265   }
1266 
1267   if (!rank) {
1268     /* calculate the number of nonzeros on each processor */
1269     procsnz = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(procsnz);
1270     ierr    = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
1271     for ( i=0; i<size; i++ ) {
1272       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
1273         procsnz[i] += rowlengths[j];
1274       }
1275     }
1276     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1277 
1278     /* determine max buffer needed and allocate it */
1279     maxnz = 0;
1280     for ( i=0; i<size; i++ ) {
1281       maxnz = PetscMax(maxnz,procsnz[i]);
1282     }
1283     cols = (int *) PetscMalloc( maxnz*sizeof(int) );CHKPTRQ(cols);
1284 
1285     /* read in my part of the matrix column indices  */
1286     nz = procsnz[0];
1287     mycols = (int *) PetscMalloc( nz*sizeof(int) );CHKPTRQ(mycols);
1288     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1289 
1290     /* read in every one elses and ship off */
1291     for ( i=1; i<size; i++ ) {
1292       nz   = procsnz[i];
1293       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1294       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
1295     }
1296     ierr = PetscFree(cols);CHKERRQ(ierr);
1297   } else {
1298     /* determine buffer space needed for message */
1299     nz = 0;
1300     for ( i=0; i<m; i++ ) {
1301       nz += ourlens[i];
1302     }
1303     mycols = (int*) PetscMalloc( nz*sizeof(int) );CHKPTRQ(mycols);
1304 
1305     /* receive message of column indices*/
1306     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
1307     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
1308     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
1309   }
1310 
1311   /* loop over local rows, determining number of off diagonal entries */
1312   ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr);
1313   jj = 0;
1314   for ( i=0; i<m; i++ ) {
1315     for ( j=0; j<ourlens[i]; j++ ) {
1316       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1317       jj++;
1318     }
1319   }
1320 
1321   /* create our matrix */
1322   for ( i=0; i<m; i++ ) {
1323     ourlens[i] -= offlens[i];
1324   }
1325   ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr);
1326   A = *newmat;
1327   for ( i=0; i<m; i++ ) {
1328     ourlens[i] += offlens[i];
1329   }
1330 
1331   if (!rank) {
1332     vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) );CHKPTRQ(vals);
1333 
1334     /* read in my part of the matrix numerical values  */
1335     nz = procsnz[0];
1336     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1337 
1338     /* insert into matrix */
1339     jj      = rstart;
1340     smycols = mycols;
1341     svals   = vals;
1342     for ( i=0; i<m; i++ ) {
1343       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1344       smycols += ourlens[i];
1345       svals   += ourlens[i];
1346       jj++;
1347     }
1348 
1349     /* read in other processors and ship out */
1350     for ( i=1; i<size; i++ ) {
1351       nz   = procsnz[i];
1352       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1353       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
1354     }
1355     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1356   } else {
1357     /* receive numeric values */
1358     vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) );CHKPTRQ(vals);
1359 
1360     /* receive message of values*/
1361     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
1362     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1363     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
1364 
1365     /* insert into matrix */
1366     jj      = rstart;
1367     smycols = mycols;
1368     svals   = vals;
1369     for ( i=0; i<m; i++ ) {
1370       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1371       smycols += ourlens[i];
1372       svals   += ourlens[i];
1373       jj++;
1374     }
1375   }
1376   ierr = PetscFree(ourlens);CHKERRQ(ierr);
1377   ierr = PetscFree(vals);CHKERRQ(ierr);
1378   ierr = PetscFree(mycols);CHKERRQ(ierr);
1379   ierr = PetscFree(rowners);CHKERRQ(ierr);
1380 
1381   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1382   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1383   PetscFunctionReturn(0);
1384 }
1385 
1386 
1387 
1388 
1389 
1390