xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision b5a2b587128f21047682b483fc0f96d9cc8e8a45)
1 /*$Id: mpidense.c,v 1.153 2001/03/23 23:21:49 balay Exp buschelm $*/
2 
3 /*
4    Basic functions for basic parallel dense matrices.
5 */
6 
7 #include "src/mat/impls/dense/mpi/mpidense.h"
8 #include "src/vec/vecimpl.h"
9 
10 EXTERN_C_BEGIN
11 #undef __FUNCT__
12 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense"
13 int MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B)
14 {
15   Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
16   int          m = A->m,rstart = mdn->rstart,ierr;
17   Scalar       *array;
18   MPI_Comm     comm;
19 
20   PetscFunctionBegin;
21   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Only square matrices supported.");
22 
23   /* The reuse aspect is not implemented efficiently */
24   if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);}
25 
26   ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
27   ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr);
28   ierr = MatCreateSeqDense(comm,m,m,array+m*rstart,B);CHKERRQ(ierr);
29   ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr);
30   ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
31   ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
32 
33   *iscopy = PETSC_TRUE;
34   PetscFunctionReturn(0);
35 }
36 EXTERN_C_END
37 
38 EXTERN int MatSetUpMultiply_MPIDense(Mat);
39 
40 #undef __FUNCT__
41 #define __FUNCT__ "MatSetValues_MPIDense"
42 int MatSetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v,InsertMode addv)
43 {
44   Mat_MPIDense *A = (Mat_MPIDense*)mat->data;
45   int          ierr,i,j,rstart = A->rstart,rend = A->rend,row;
46   PetscTruth   roworiented = A->roworiented;
47 
48   PetscFunctionBegin;
49   for (i=0; i<m; i++) {
50     if (idxm[i] < 0) continue;
51     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
52     if (idxm[i] >= rstart && idxm[i] < rend) {
53       row = idxm[i] - rstart;
54       if (roworiented) {
55         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
56       } else {
57         for (j=0; j<n; j++) {
58           if (idxn[j] < 0) continue;
59           if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
60           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
61         }
62       }
63     } else {
64       if (!A->donotstash) {
65         if (roworiented) {
66           ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr);
67         } else {
68           ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr);
69         }
70       }
71     }
72   }
73   PetscFunctionReturn(0);
74 }
75 
76 #undef __FUNCT__
77 #define __FUNCT__ "MatGetValues_MPIDense"
78 int MatGetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
79 {
80   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
81   int          ierr,i,j,rstart = mdn->rstart,rend = mdn->rend,row;
82 
83   PetscFunctionBegin;
84   for (i=0; i<m; i++) {
85     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
86     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
87     if (idxm[i] >= rstart && idxm[i] < rend) {
88       row = idxm[i] - rstart;
89       for (j=0; j<n; j++) {
90         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
91         if (idxn[j] >= mat->N) {
92           SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
93         }
94         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
95       }
96     } else {
97       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
98     }
99   }
100   PetscFunctionReturn(0);
101 }
102 
103 #undef __FUNCT__
104 #define __FUNCT__ "MatGetArray_MPIDense"
105 int MatGetArray_MPIDense(Mat A,Scalar **array)
106 {
107   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
108   int          ierr;
109 
110   PetscFunctionBegin;
111   ierr = MatGetArray(a->A,array);CHKERRQ(ierr);
112   PetscFunctionReturn(0);
113 }
114 
115 #undef __FUNCT__
116 #define __FUNCT__ "MatGetSubMatrix_MPIDense"
117 static int MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B)
118 {
119   Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd;
120   Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data;
121   int          i,j,ierr,*irow,*icol,rstart,rend,nrows,ncols,nlrows,nlcols;
122   Scalar       *av,*bv,*v = lmat->v;
123   Mat          newmat;
124 
125   PetscFunctionBegin;
126   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
127   ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr);
128   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
129   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
130 
131   /* No parallel redistribution currently supported! Should really check each index set
132      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
133      original matrix! */
134 
135   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
136   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
137 
138   /* Check submatrix call */
139   if (scall == MAT_REUSE_MATRIX) {
140     /* SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
141     /* Really need to test rows and column sizes! */
142     newmat = *B;
143   } else {
144     /* Create and fill new matrix */
145     ierr = MatCreateMPIDense(A->comm,nrows,cs,PETSC_DECIDE,ncols,PETSC_NULL,&newmat);CHKERRQ(ierr);
146   }
147 
148   /* Now extract the data pointers and do the copy, column at a time */
149   newmatd = (Mat_MPIDense*)newmat->data;
150   bv      = ((Mat_SeqDense *)newmatd->A->data)->v;
151 
152   for (i=0; i<ncols; i++) {
153     av = v + nlrows*icol[i];
154     for (j=0; j<nrows; j++) {
155       *bv++ = av[irow[j] - rstart];
156     }
157   }
158 
159   /* Assemble the matrices so that the correct flags are set */
160   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
161   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
162 
163   /* Free work space */
164   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
165   ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr);
166   *B = newmat;
167   PetscFunctionReturn(0);
168 }
169 
170 #undef __FUNCT__
171 #define __FUNCT__ "MatRestoreArray_MPIDense"
172 int MatRestoreArray_MPIDense(Mat A,Scalar **array)
173 {
174   PetscFunctionBegin;
175   PetscFunctionReturn(0);
176 }
177 
178 #undef __FUNCT__
179 #define __FUNCT__ "MatAssemblyBegin_MPIDense"
180 int MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
181 {
182   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
183   MPI_Comm     comm = mat->comm;
184   int          ierr,nstash,reallocs;
185   InsertMode   addv;
186 
187   PetscFunctionBegin;
188   /* make sure all processors are either in INSERTMODE or ADDMODE */
189   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr);
190   if (addv == (ADD_VALUES|INSERT_VALUES)) {
191     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
192   }
193   mat->insertmode = addv; /* in case this processor had no cache */
194 
195   ierr = MatStashScatterBegin_Private(&mat->stash,mdn->rowners);CHKERRQ(ierr);
196   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
197   PetscLogInfo(mdn->A,"MatAssemblyBegin_MPIDense:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
198   PetscFunctionReturn(0);
199 }
200 
201 #undef __FUNCT__
202 #define __FUNCT__ "MatAssemblyEnd_MPIDense"
203 int MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
204 {
205   Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data;
206   int          i,n,ierr,*row,*col,flg,j,rstart,ncols;
207   Scalar       *val;
208   InsertMode   addv=mat->insertmode;
209 
210   PetscFunctionBegin;
211   /*  wait on receives */
212   while (1) {
213     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
214     if (!flg) break;
215 
216     for (i=0; i<n;) {
217       /* Now identify the consecutive vals belonging to the same row */
218       for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
219       if (j < n) ncols = j-i;
220       else       ncols = n-i;
221       /* Now assemble all these values with a single function call */
222       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
223       i = j;
224     }
225   }
226   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
227 
228   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
229   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
230 
231   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
232     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
233   }
234   PetscFunctionReturn(0);
235 }
236 
237 #undef __FUNCT__
238 #define __FUNCT__ "MatZeroEntries_MPIDense"
239 int MatZeroEntries_MPIDense(Mat A)
240 {
241   int          ierr;
242   Mat_MPIDense *l = (Mat_MPIDense*)A->data;
243 
244   PetscFunctionBegin;
245   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
246   PetscFunctionReturn(0);
247 }
248 
249 #undef __FUNCT__
250 #define __FUNCT__ "MatGetBlockSize_MPIDense"
251 int MatGetBlockSize_MPIDense(Mat A,int *bs)
252 {
253   PetscFunctionBegin;
254   *bs = 1;
255   PetscFunctionReturn(0);
256 }
257 
258 /* the code does not do the diagonal entries correctly unless the
259    matrix is square and the column and row owerships are identical.
260    This is a BUG. The only way to fix it seems to be to access
261    mdn->A and mdn->B directly and not through the MatZeroRows()
262    routine.
263 */
264 #undef __FUNCT__
265 #define __FUNCT__ "MatZeroRows_MPIDense"
266 int MatZeroRows_MPIDense(Mat A,IS is,Scalar *diag)
267 {
268   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
269   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
270   int            *procs,*nprocs,j,idx,nsends,*work;
271   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
272   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
273   int            *lens,imdex,*lrows,*values;
274   MPI_Comm       comm = A->comm;
275   MPI_Request    *send_waits,*recv_waits;
276   MPI_Status     recv_status,*send_status;
277   IS             istmp;
278   PetscTruth     found;
279 
280   PetscFunctionBegin;
281   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
282   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
283 
284   /*  first count number of contributors to each processor */
285   ierr  = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr);
286   ierr  = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
287   procs = nprocs + size;
288   ierr  = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/
289   for (i=0; i<N; i++) {
290     idx = rows[i];
291     found = PETSC_FALSE;
292     for (j=0; j<size; j++) {
293       if (idx >= owners[j] && idx < owners[j+1]) {
294         nprocs[j]++; procs[j] = 1; owner[i] = j; found = PETSC_TRUE; break;
295       }
296     }
297     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
298   }
299   nsends = 0;  for (i=0; i<size; i++) { nsends += procs[i];}
300 
301   /* inform other processors of number of messages and max length*/
302   ierr   = PetscMalloc(2*size*sizeof(int),&work);CHKERRQ(ierr);
303   ierr   = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr);
304   nmax   = work[rank];
305   nrecvs = work[size+rank];
306   ierr   = PetscFree(work);CHKERRQ(ierr);
307 
308   /* post receives:   */
309   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr);
310   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
311   for (i=0; i<nrecvs; i++) {
312     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
313   }
314 
315   /* do sends:
316       1) starts[i] gives the starting index in svalues for stuff going to
317          the ith processor
318   */
319   ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr);
320   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
321   ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr);
322   starts[0]  = 0;
323   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
324   for (i=0; i<N; i++) {
325     svalues[starts[owner[i]]++] = rows[i];
326   }
327   ISRestoreIndices(is,&rows);
328 
329   starts[0] = 0;
330   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
331   count = 0;
332   for (i=0; i<size; i++) {
333     if (procs[i]) {
334       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
335     }
336   }
337   ierr = PetscFree(starts);CHKERRQ(ierr);
338 
339   base = owners[rank];
340 
341   /*  wait on receives */
342   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr);
343   source = lens + nrecvs;
344   count  = nrecvs; slen = 0;
345   while (count) {
346     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
347     /* unpack receives into our local space */
348     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
349     source[imdex]  = recv_status.MPI_SOURCE;
350     lens[imdex]  = n;
351     slen += n;
352     count--;
353   }
354   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
355 
356   /* move the data into the send scatter */
357   ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr);
358   count = 0;
359   for (i=0; i<nrecvs; i++) {
360     values = rvalues + i*nmax;
361     for (j=0; j<lens[i]; j++) {
362       lrows[count++] = values[j] - base;
363     }
364   }
365   ierr = PetscFree(rvalues);CHKERRQ(ierr);
366   ierr = PetscFree(lens);CHKERRQ(ierr);
367   ierr = PetscFree(owner);CHKERRQ(ierr);
368   ierr = PetscFree(nprocs);CHKERRQ(ierr);
369 
370   /* actually zap the local rows */
371   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
372   PetscLogObjectParent(A,istmp);
373   ierr = PetscFree(lrows);CHKERRQ(ierr);
374   ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
375   ierr = ISDestroy(istmp);CHKERRQ(ierr);
376 
377   /* wait on sends */
378   if (nsends) {
379     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
380     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
381     ierr = PetscFree(send_status);CHKERRQ(ierr);
382   }
383   ierr = PetscFree(send_waits);CHKERRQ(ierr);
384   ierr = PetscFree(svalues);CHKERRQ(ierr);
385 
386   PetscFunctionReturn(0);
387 }
388 
389 #undef __FUNCT__
390 #define __FUNCT__ "MatMult_MPIDense"
391 int MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
392 {
393   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
394   int          ierr;
395 
396   PetscFunctionBegin;
397   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
398   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
399   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
400   PetscFunctionReturn(0);
401 }
402 
403 #undef __FUNCT__
404 #define __FUNCT__ "MatMultAdd_MPIDense"
405 int MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
406 {
407   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
408   int          ierr;
409 
410   PetscFunctionBegin;
411   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
412   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
413   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
414   PetscFunctionReturn(0);
415 }
416 
417 #undef __FUNCT__
418 #define __FUNCT__ "MatMultTranspose_MPIDense"
419 int MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
420 {
421   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
422   int          ierr;
423   Scalar       zero = 0.0;
424 
425   PetscFunctionBegin;
426   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
427   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
428   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
429   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
430   PetscFunctionReturn(0);
431 }
432 
433 #undef __FUNCT__
434 #define __FUNCT__ "MatMultTransposeAdd_MPIDense"
435 int MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
436 {
437   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
438   int          ierr;
439 
440   PetscFunctionBegin;
441   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
442   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
443   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
444   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
445   PetscFunctionReturn(0);
446 }
447 
448 #undef __FUNCT__
449 #define __FUNCT__ "MatGetDiagonal_MPIDense"
450 int MatGetDiagonal_MPIDense(Mat A,Vec v)
451 {
452   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
453   Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data;
454   int          ierr,len,i,n,m = A->m,radd;
455   Scalar       *x,zero = 0.0;
456 
457   PetscFunctionBegin;
458   ierr = VecSet(&zero,v);CHKERRQ(ierr);
459   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
460   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
461   if (n != A->M) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
462   len  = PetscMin(a->A->m,a->A->n);
463   radd = a->rstart*m;
464   for (i=0; i<len; i++) {
465     x[i] = aloc->v[radd + i*m + i];
466   }
467   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
468   PetscFunctionReturn(0);
469 }
470 
471 #undef __FUNCT__
472 #define __FUNCT__ "MatDestroy_MPIDense"
473 int MatDestroy_MPIDense(Mat mat)
474 {
475   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
476   int          ierr;
477 
478   PetscFunctionBegin;
479 
480 #if defined(PETSC_USE_LOG)
481   PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N);
482 #endif
483   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
484   ierr = PetscFree(mdn->rowners);CHKERRQ(ierr);
485   ierr = MatDestroy(mdn->A);CHKERRQ(ierr);
486   if (mdn->lvec)   VecDestroy(mdn->lvec);
487   if (mdn->Mvctx)  VecScatterDestroy(mdn->Mvctx);
488   if (mdn->factor) {
489     if (mdn->factor->temp)   {ierr = PetscFree(mdn->factor->temp);CHKERRQ(ierr);}
490     if (mdn->factor->tag)    {ierr = PetscFree(mdn->factor->tag);CHKERRQ(ierr);}
491     if (mdn->factor->pivots) {ierr = PetscFree(mdn->factor->pivots);CHKERRQ(ierr);}
492     ierr = PetscFree(mdn->factor);CHKERRQ(ierr);
493   }
494   ierr = PetscFree(mdn);CHKERRQ(ierr);
495   PetscFunctionReturn(0);
496 }
497 
498 #undef __FUNCT__
499 #define __FUNCT__ "MatView_MPIDense_Binary"
500 static int MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
501 {
502   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
503   int          ierr;
504 
505   PetscFunctionBegin;
506   if (mdn->size == 1) {
507     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
508   }
509   else SETERRQ(PETSC_ERR_SUP,"Only uniprocessor output supported");
510   PetscFunctionReturn(0);
511 }
512 
513 #undef __FUNCT__
514 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket"
515 static int MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
516 {
517   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
518   int               ierr,size = mdn->size,rank = mdn->rank;
519   PetscViewerType   vtype;
520   PetscTruth        isascii,isdraw;
521   PetscViewer       sviewer;
522   PetscViewerFormat format;
523 
524   PetscFunctionBegin;
525   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
526   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
527   if (isascii) {
528     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
529     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
530     if (format == PETSC_VIEWER_ASCII_INFO_LONG) {
531       MatInfo info;
532       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
533       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %d nz %d nz alloced %d mem %d \n",rank,mat->m,
534                    (int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
535       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
536       ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
537       PetscFunctionReturn(0);
538     } else if (format == PETSC_VIEWER_ASCII_INFO) {
539       PetscFunctionReturn(0);
540     }
541   } else if (isdraw) {
542     PetscDraw       draw;
543     PetscTruth isnull;
544 
545     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
546     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
547     if (isnull) PetscFunctionReturn(0);
548   }
549 
550   if (size == 1) {
551     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
552   } else {
553     /* assemble the entire matrix onto first processor. */
554     Mat          A;
555     int          M = mat->M,N = mat->N,m,row,i,nz,*cols;
556     Scalar       *vals;
557 
558     if (!rank) {
559       ierr = MatCreateMPIDense(mat->comm,M,N,M,N,PETSC_NULL,&A);CHKERRQ(ierr);
560     } else {
561       ierr = MatCreateMPIDense(mat->comm,0,0,M,N,PETSC_NULL,&A);CHKERRQ(ierr);
562     }
563     PetscLogObjectParent(mat,A);
564 
565     /* Copy the matrix ... This isn't the most efficient means,
566        but it's quick for now */
567     row = mdn->rstart; m = mdn->A->m;
568     for (i=0; i<m; i++) {
569       ierr = MatGetRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
570       ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
571       ierr = MatRestoreRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
572       row++;
573     }
574 
575     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
576     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
577     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
578     if (!rank) {
579       ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
580     }
581     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
582     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
583     ierr = MatDestroy(A);CHKERRQ(ierr);
584   }
585   PetscFunctionReturn(0);
586 }
587 
588 #undef __FUNCT__
589 #define __FUNCT__ "MatView_MPIDense"
590 int MatView_MPIDense(Mat mat,PetscViewer viewer)
591 {
592   int        ierr;
593   PetscTruth isascii,isbinary,isdraw,issocket;
594 
595   PetscFunctionBegin;
596 
597   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
598   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
599   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
600   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
601 
602   if (isascii || issocket || isdraw) {
603     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
604   } else if (isbinary) {
605     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
606   } else {
607     SETERRQ1(1,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name);
608   }
609   PetscFunctionReturn(0);
610 }
611 
612 #undef __FUNCT__
613 #define __FUNCT__ "MatGetInfo_MPIDense"
614 int MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
615 {
616   Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
617   Mat          mdn = mat->A;
618   int          ierr;
619   PetscReal    isend[5],irecv[5];
620 
621   PetscFunctionBegin;
622   info->rows_global    = (double)A->M;
623   info->columns_global = (double)A->N;
624   info->rows_local     = (double)A->m;
625   info->columns_local  = (double)A->N;
626   info->block_size     = 1.0;
627   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
628   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
629   isend[3] = info->memory;  isend[4] = info->mallocs;
630   if (flag == MAT_LOCAL) {
631     info->nz_used      = isend[0];
632     info->nz_allocated = isend[1];
633     info->nz_unneeded  = isend[2];
634     info->memory       = isend[3];
635     info->mallocs      = isend[4];
636   } else if (flag == MAT_GLOBAL_MAX) {
637     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,A->comm);CHKERRQ(ierr);
638     info->nz_used      = irecv[0];
639     info->nz_allocated = irecv[1];
640     info->nz_unneeded  = irecv[2];
641     info->memory       = irecv[3];
642     info->mallocs      = irecv[4];
643   } else if (flag == MAT_GLOBAL_SUM) {
644     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr);
645     info->nz_used      = irecv[0];
646     info->nz_allocated = irecv[1];
647     info->nz_unneeded  = irecv[2];
648     info->memory       = irecv[3];
649     info->mallocs      = irecv[4];
650   }
651   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
652   info->fill_ratio_needed = 0;
653   info->factor_mallocs    = 0;
654   PetscFunctionReturn(0);
655 }
656 
657 #undef __FUNCT__
658 #define __FUNCT__ "MatSetOption_MPIDense"
659 int MatSetOption_MPIDense(Mat A,MatOption op)
660 {
661   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
662   int          ierr;
663 
664   PetscFunctionBegin;
665   switch (op) {
666   case MAT_NO_NEW_NONZERO_LOCATIONS:
667   case MAT_YES_NEW_NONZERO_LOCATIONS:
668   case MAT_NEW_NONZERO_LOCATION_ERR:
669   case MAT_NEW_NONZERO_ALLOCATION_ERR:
670   case MAT_COLUMNS_SORTED:
671   case MAT_COLUMNS_UNSORTED:
672     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
673     break;
674   case MAT_ROW_ORIENTED:
675     a->roworiented = PETSC_TRUE;
676     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
677     break;
678   case MAT_ROWS_SORTED:
679   case MAT_ROWS_UNSORTED:
680   case MAT_YES_NEW_DIAGONALS:
681   case MAT_USE_HASH_TABLE:
682     PetscLogInfo(A,"MatSetOption_MPIDense:Option ignored\n");
683     break;
684   case MAT_COLUMN_ORIENTED:
685     a->roworiented = PETSC_FALSE;
686     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
687     break;
688   case MAT_IGNORE_OFF_PROC_ENTRIES:
689     a->donotstash = PETSC_TRUE;
690     break;
691   case MAT_NO_NEW_DIAGONALS:
692     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
693     break;
694   default:
695     SETERRQ(PETSC_ERR_SUP,"unknown option");
696     break;
697   }
698   PetscFunctionReturn(0);
699 }
700 
701 #undef __FUNCT__
702 #define __FUNCT__ "MatGetOwnershipRange_MPIDense"
703 int MatGetOwnershipRange_MPIDense(Mat A,int *m,int *n)
704 {
705   Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
706 
707   PetscFunctionBegin;
708   if (m) *m = mat->rstart;
709   if (n) *n = mat->rend;
710   PetscFunctionReturn(0);
711 }
712 
713 #undef __FUNCT__
714 #define __FUNCT__ "MatGetRow_MPIDense"
715 int MatGetRow_MPIDense(Mat A,int row,int *nz,int **idx,Scalar **v)
716 {
717   Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
718   int          lrow,rstart = mat->rstart,rend = mat->rend,ierr;
719 
720   PetscFunctionBegin;
721   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,"only local rows")
722   lrow = row - rstart;
723   ierr = MatGetRow(mat->A,lrow,nz,idx,v);CHKERRQ(ierr);
724   PetscFunctionReturn(0);
725 }
726 
727 #undef __FUNCT__
728 #define __FUNCT__ "MatRestoreRow_MPIDense"
729 int MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,Scalar **v)
730 {
731   int ierr;
732 
733   PetscFunctionBegin;
734   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
735   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
736   PetscFunctionReturn(0);
737 }
738 
739 #undef __FUNCT__
740 #define __FUNCT__ "MatDiagonalScale_MPIDense"
741 int MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
742 {
743   Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
744   Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data;
745   Scalar       *l,*r,x,*v;
746   int          ierr,i,j,s2a,s3a,s2,s3,m=mdn->A->m,n=mdn->A->n;
747 
748   PetscFunctionBegin;
749   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
750   if (ll) {
751     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
752     if (s2a != s2) SETERRQ(PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size");
753     ierr = VecGetArray(ll,&l);CHKERRQ(ierr);
754     for (i=0; i<m; i++) {
755       x = l[i];
756       v = mat->v + i;
757       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
758     }
759     ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr);
760     PetscLogFlops(n*m);
761   }
762   if (rr) {
763     ierr = VecGetSize(rr,&s3a);CHKERRQ(ierr);
764     if (s3a != s3) SETERRQ(PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size");
765     ierr = VecScatterBegin(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
766     ierr = VecScatterEnd(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
767     ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
768     for (i=0; i<n; i++) {
769       x = r[i];
770       v = mat->v + i*m;
771       for (j=0; j<m; j++) { (*v++) *= x;}
772     }
773     ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr);
774     PetscLogFlops(n*m);
775   }
776   PetscFunctionReturn(0);
777 }
778 
779 #undef __FUNCT__
780 #define __FUNCT__ "MatNorm_MPIDense"
781 int MatNorm_MPIDense(Mat A,NormType type,PetscReal *norm)
782 {
783   Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
784   Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data;
785   int          ierr,i,j;
786   PetscReal    sum = 0.0;
787   Scalar       *v = mat->v;
788 
789   PetscFunctionBegin;
790   if (mdn->size == 1) {
791     ierr =  MatNorm(mdn->A,type,norm);CHKERRQ(ierr);
792   } else {
793     if (type == NORM_FROBENIUS) {
794       for (i=0; i<mdn->A->n*mdn->A->m; i++) {
795 #if defined(PETSC_USE_COMPLEX)
796         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
797 #else
798         sum += (*v)*(*v); v++;
799 #endif
800       }
801       ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr);
802       *norm = sqrt(*norm);
803       PetscLogFlops(2*mdn->A->n*mdn->A->m);
804     } else if (type == NORM_1) {
805       PetscReal *tmp,*tmp2;
806       ierr = PetscMalloc(2*A->N*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
807       tmp2 = tmp + A->N;
808       ierr = PetscMemzero(tmp,2*A->N*sizeof(PetscReal));CHKERRQ(ierr);
809       *norm = 0.0;
810       v = mat->v;
811       for (j=0; j<mdn->A->n; j++) {
812         for (i=0; i<mdn->A->m; i++) {
813           tmp[j] += PetscAbsScalar(*v);  v++;
814         }
815       }
816       ierr = MPI_Allreduce(tmp,tmp2,A->N,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr);
817       for (j=0; j<A->N; j++) {
818         if (tmp2[j] > *norm) *norm = tmp2[j];
819       }
820       ierr = PetscFree(tmp);CHKERRQ(ierr);
821       PetscLogFlops(A->n*A->m);
822     } else if (type == NORM_INFINITY) { /* max row norm */
823       PetscReal ntemp;
824       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
825       ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,A->comm);CHKERRQ(ierr);
826     } else {
827       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
828     }
829   }
830   PetscFunctionReturn(0);
831 }
832 
833 #undef __FUNCT__
834 #define __FUNCT__ "MatTranspose_MPIDense"
835 int MatTranspose_MPIDense(Mat A,Mat *matout)
836 {
837   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
838   Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data;
839   Mat          B;
840   int          M = A->M,N = A->N,m,n,*rwork,rstart = a->rstart;
841   int          j,i,ierr;
842   Scalar       *v;
843 
844   PetscFunctionBegin;
845   if (!matout && M != N) {
846     SETERRQ(PETSC_ERR_SUP,"Supports square matrix only in-place");
847   }
848   ierr = MatCreateMPIDense(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,PETSC_NULL,&B);CHKERRQ(ierr);
849 
850   m = a->A->m; n = a->A->n; v = Aloc->v;
851   ierr = PetscMalloc(n*sizeof(int),&rwork);CHKERRQ(ierr);
852   for (j=0; j<n; j++) {
853     for (i=0; i<m; i++) rwork[i] = rstart + i;
854     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
855     v   += m;
856   }
857   ierr = PetscFree(rwork);CHKERRQ(ierr);
858   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
859   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
860   if (matout) {
861     *matout = B;
862   } else {
863     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
864   }
865   PetscFunctionReturn(0);
866 }
867 
868 #include "petscblaslapack.h"
869 #undef __FUNCT__
870 #define __FUNCT__ "MatScale_MPIDense"
871 int MatScale_MPIDense(Scalar *alpha,Mat inA)
872 {
873   Mat_MPIDense *A = (Mat_MPIDense*)inA->data;
874   Mat_SeqDense *a = (Mat_SeqDense*)A->A->data;
875   int          one = 1,nz;
876 
877   PetscFunctionBegin;
878   nz = inA->m*inA->N;
879   BLscal_(&nz,alpha,a->v,&one);
880   PetscLogFlops(nz);
881   PetscFunctionReturn(0);
882 }
883 
884 static int MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *);
885 EXTERN int MatGetSubMatrices_MPIDense(Mat,int,IS *,IS *,MatReuse,Mat **);
886 
887 #undef __FUNCT__
888 #define __FUNCT__ "MatSetUpPreallocation_MPIDense"
889 int MatSetUpPreallocation_MPIDense(Mat A)
890 {
891   int        ierr;
892 
893   PetscFunctionBegin;
894   ierr =  MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
895   PetscFunctionReturn(0);
896 }
897 
898 /* -------------------------------------------------------------------*/
899 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
900        MatGetRow_MPIDense,
901        MatRestoreRow_MPIDense,
902        MatMult_MPIDense,
903        MatMultAdd_MPIDense,
904        MatMultTranspose_MPIDense,
905        MatMultTransposeAdd_MPIDense,
906        0,
907        0,
908        0,
909        0,
910        0,
911        0,
912        0,
913        MatTranspose_MPIDense,
914        MatGetInfo_MPIDense,0,
915        MatGetDiagonal_MPIDense,
916        MatDiagonalScale_MPIDense,
917        MatNorm_MPIDense,
918        MatAssemblyBegin_MPIDense,
919        MatAssemblyEnd_MPIDense,
920        0,
921        MatSetOption_MPIDense,
922        MatZeroEntries_MPIDense,
923        MatZeroRows_MPIDense,
924        0,
925        0,
926        0,
927        0,
928        MatSetUpPreallocation_MPIDense,
929        0,
930        MatGetOwnershipRange_MPIDense,
931        0,
932        0,
933        MatGetArray_MPIDense,
934        MatRestoreArray_MPIDense,
935        MatDuplicate_MPIDense,
936        0,
937        0,
938        0,
939        0,
940        0,
941        MatGetSubMatrices_MPIDense,
942        0,
943        MatGetValues_MPIDense,
944        0,
945        0,
946        MatScale_MPIDense,
947        0,
948        0,
949        0,
950        MatGetBlockSize_MPIDense,
951        0,
952        0,
953        0,
954        0,
955        0,
956        0,
957        0,
958        0,
959        0,
960        MatGetSubMatrix_MPIDense,
961        MatDestroy_MPIDense,
962        MatView_MPIDense,
963        MatGetMaps_Petsc};
964 
965 EXTERN_C_BEGIN
966 #undef __FUNCT__
967 #define __FUNCT__ "MatCreate_MPIDense"
968 int MatCreate_MPIDense(Mat mat)
969 {
970   Mat_MPIDense *a;
971   int          ierr,i;
972 
973   PetscFunctionBegin;
974   ierr              = PetscNew(Mat_MPIDense,&a);CHKERRQ(ierr);
975   mat->data         = (void*)a;
976   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
977   mat->factor       = 0;
978   mat->mapping      = 0;
979 
980   a->factor       = 0;
981   mat->insertmode = NOT_SET_VALUES;
982   ierr = MPI_Comm_rank(mat->comm,&a->rank);CHKERRQ(ierr);
983   ierr = MPI_Comm_size(mat->comm,&a->size);CHKERRQ(ierr);
984 
985   ierr = PetscSplitOwnership(mat->comm,&mat->m,&mat->M);CHKERRQ(ierr);
986   ierr = PetscSplitOwnership(mat->comm,&mat->n,&mat->N);CHKERRQ(ierr);
987   a->nvec = mat->n;
988 
989   /* the information in the maps duplicates the information computed below, eventually
990      we should remove the duplicate information that is not contained in the maps */
991   ierr = MapCreateMPI(mat->comm,mat->m,mat->M,&mat->rmap);CHKERRQ(ierr);
992   ierr = MapCreateMPI(mat->comm,mat->n,mat->N,&mat->cmap);CHKERRQ(ierr);
993 
994   /* build local table of row and column ownerships */
995   ierr       = PetscMalloc(2*(a->size+2)*sizeof(int),&a->rowners);CHKERRQ(ierr);
996   a->cowners = a->rowners + a->size + 1;
997   PetscLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));
998   ierr = MPI_Allgather(&mat->m,1,MPI_INT,a->rowners+1,1,MPI_INT,mat->comm);CHKERRQ(ierr);
999   a->rowners[0] = 0;
1000   for (i=2; i<=a->size; i++) {
1001     a->rowners[i] += a->rowners[i-1];
1002   }
1003   a->rstart = a->rowners[a->rank];
1004   a->rend   = a->rowners[a->rank+1];
1005   ierr      = MPI_Allgather(&mat->n,1,MPI_INT,a->cowners+1,1,MPI_INT,mat->comm);CHKERRQ(ierr);
1006   a->cowners[0] = 0;
1007   for (i=2; i<=a->size; i++) {
1008     a->cowners[i] += a->cowners[i-1];
1009   }
1010 
1011   /* build cache for off array entries formed */
1012   a->donotstash = PETSC_FALSE;
1013   ierr = MatStashCreate_Private(mat->comm,1,&mat->stash);CHKERRQ(ierr);
1014 
1015   /* stuff used for matrix vector multiply */
1016   a->lvec        = 0;
1017   a->Mvctx       = 0;
1018   a->roworiented = PETSC_TRUE;
1019 
1020   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C",
1021                                      "MatGetDiagonalBlock_MPIDense",
1022                                      MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr);
1023   PetscFunctionReturn(0);
1024 }
1025 EXTERN_C_END
1026 
1027 #undef __FUNCT__
1028 #define __FUNCT__ "MatMPIDenseSetPreallocation"
1029 /*@C
1030    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1031 
1032    Not collective
1033 
1034    Input Parameters:
1035 .  A - the matrix
1036 -  data - optional location of matrix data.  Set data=PETSC_NULL for PETSc
1037    to control all matrix memory allocation.
1038 
1039    Notes:
1040    The dense format is fully compatible with standard Fortran 77
1041    storage by columns.
1042 
1043    The data input variable is intended primarily for Fortran programmers
1044    who wish to allocate their own matrix memory space.  Most users should
1045    set data=PETSC_NULL.
1046 
1047    Level: intermediate
1048 
1049 .keywords: matrix,dense, parallel
1050 
1051 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1052 @*/
1053 int MatMPIDenseSetPreallocation(Mat mat,Scalar *data)
1054 {
1055   Mat_MPIDense *a;
1056   int          ierr;
1057   PetscTruth   flg2;
1058 
1059   PetscFunctionBegin;
1060   ierr = PetscTypeCompare((PetscObject)mat,MATMPIDENSE,&flg2);CHKERRQ(ierr);
1061   if (!flg2) PetscFunctionReturn(0);
1062   mat->preallocated = PETSC_TRUE;
1063   /* Note:  For now, when data is specified above, this assumes the user correctly
1064    allocates the local dense storage space.  We should add error checking. */
1065 
1066   a    = (Mat_MPIDense*)mat->data;
1067   ierr = MatCreateSeqDense(PETSC_COMM_SELF,mat->m,mat->N,data,&a->A);CHKERRQ(ierr);
1068   PetscLogObjectParent(mat,a->A);
1069   PetscFunctionReturn(0);
1070 }
1071 
1072 #undef __FUNCT__
1073 #define __FUNCT__ "MatCreateMPIDense"
1074 /*@C
1075    MatCreateMPIDense - Creates a sparse parallel matrix in dense format.
1076 
1077    Collective on MPI_Comm
1078 
1079    Input Parameters:
1080 +  comm - MPI communicator
1081 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1082 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1083 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1084 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1085 -  data - optional location of matrix data.  Set data=PETSC_NULL for PETSc
1086    to control all matrix memory allocation.
1087 
1088    Output Parameter:
1089 .  A - the matrix
1090 
1091    Notes:
1092    The dense format is fully compatible with standard Fortran 77
1093    storage by columns.
1094 
1095    The data input variable is intended primarily for Fortran programmers
1096    who wish to allocate their own matrix memory space.  Most users should
1097    set data=PETSC_NULL.
1098 
1099    The user MUST specify either the local or global matrix dimensions
1100    (possibly both).
1101 
1102    Level: intermediate
1103 
1104 .keywords: matrix,dense, parallel
1105 
1106 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1107 @*/
1108 int MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,Scalar *data,Mat *A)
1109 {
1110   int ierr,size;
1111 
1112   PetscFunctionBegin;
1113   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
1114   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1115   if (size > 1) {
1116     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
1117     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1118   } else {
1119     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
1120     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1121   }
1122   PetscFunctionReturn(0);
1123 }
1124 
1125 #undef __FUNCT__
1126 #define __FUNCT__ "MatDuplicate_MPIDense"
1127 static int MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1128 {
1129   Mat          mat;
1130   Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data;
1131   int          ierr;
1132 
1133   PetscFunctionBegin;
1134   *newmat       = 0;
1135   ierr = MatCreate(A->comm,A->m,A->n,A->M,A->N,&mat);CHKERRQ(ierr);
1136   ierr = MatSetType(mat,MATMPIDENSE);CHKERRQ(ierr);
1137   ierr              = PetscNew(Mat_MPIDense,&a);CHKERRQ(ierr);
1138   mat->data         = (void*)a;
1139   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1140   mat->factor       = A->factor;
1141   mat->assembled    = PETSC_TRUE;
1142   mat->preallocated = PETSC_TRUE;
1143 
1144   a->rstart       = oldmat->rstart;
1145   a->rend         = oldmat->rend;
1146   a->size         = oldmat->size;
1147   a->rank         = oldmat->rank;
1148   mat->insertmode = NOT_SET_VALUES;
1149   a->nvec         = oldmat->nvec;
1150   a->donotstash   = oldmat->donotstash;
1151   ierr            = PetscMalloc((a->size+1)*sizeof(int),&a->rowners);CHKERRQ(ierr);
1152   PetscLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));
1153   ierr = PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));CHKERRQ(ierr);
1154   ierr = MatStashCreate_Private(A->comm,1,&mat->stash);CHKERRQ(ierr);
1155 
1156   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
1157   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1158   PetscLogObjectParent(mat,a->A);
1159   *newmat = mat;
1160   PetscFunctionReturn(0);
1161 }
1162 
1163 #include "petscsys.h"
1164 
1165 #undef __FUNCT__
1166 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile"
1167 int MatLoad_MPIDense_DenseInFile(MPI_Comm comm,int fd,int M,int N,Mat *newmat)
1168 {
1169   int        *rowners,i,size,rank,m,ierr,nz,j;
1170   Scalar     *array,*vals,*vals_ptr;
1171   MPI_Status status;
1172 
1173   PetscFunctionBegin;
1174   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1175   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1176 
1177   /* determine ownership of all rows */
1178   m          = M/size + ((M % size) > rank);
1179   ierr       = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
1180   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1181   rowners[0] = 0;
1182   for (i=2; i<=size; i++) {
1183     rowners[i] += rowners[i-1];
1184   }
1185 
1186   ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr);
1187   ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr);
1188 
1189   if (!rank) {
1190     ierr = PetscMalloc(m*N*sizeof(Scalar),&vals);CHKERRQ(ierr);
1191 
1192     /* read in my part of the matrix numerical values  */
1193     ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr);
1194 
1195     /* insert into matrix-by row (this is why cannot directly read into array */
1196     vals_ptr = vals;
1197     for (i=0; i<m; i++) {
1198       for (j=0; j<N; j++) {
1199         array[i + j*m] = *vals_ptr++;
1200       }
1201     }
1202 
1203     /* read in other processors and ship out */
1204     for (i=1; i<size; i++) {
1205       nz   = (rowners[i+1] - rowners[i])*N;
1206       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1207       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm);CHKERRQ(ierr);
1208     }
1209   } else {
1210     /* receive numeric values */
1211     ierr = PetscMalloc(m*N*sizeof(Scalar),&vals);CHKERRQ(ierr);
1212 
1213     /* receive message of values*/
1214     ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status);CHKERRQ(ierr);
1215 
1216     /* insert into matrix-by row (this is why cannot directly read into array */
1217     vals_ptr = vals;
1218     for (i=0; i<m; i++) {
1219       for (j=0; j<N; j++) {
1220         array[i + j*m] = *vals_ptr++;
1221       }
1222     }
1223   }
1224   ierr = PetscFree(rowners);CHKERRQ(ierr);
1225   ierr = PetscFree(vals);CHKERRQ(ierr);
1226   ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1227   ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1228   PetscFunctionReturn(0);
1229 }
1230 
1231 EXTERN_C_BEGIN
1232 #undef __FUNCT__
1233 #define __FUNCT__ "MatLoad_MPIDense"
1234 int MatLoad_MPIDense(PetscViewer viewer,MatType type,Mat *newmat)
1235 {
1236   Mat          A;
1237   Scalar       *vals,*svals;
1238   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1239   MPI_Status   status;
1240   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1241   int          *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1242   int          tag = ((PetscObject)viewer)->tag;
1243   int          i,nz,ierr,j,rstart,rend,fd;
1244 
1245   PetscFunctionBegin;
1246   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1247   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1248   if (!rank) {
1249     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1250     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1251     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1252   }
1253 
1254   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1255   M = header[1]; N = header[2]; nz = header[3];
1256 
1257   /*
1258        Handle case where matrix is stored on disk as a dense matrix
1259   */
1260   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1261     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);CHKERRQ(ierr);
1262     PetscFunctionReturn(0);
1263   }
1264 
1265   /* determine ownership of all rows */
1266   m          = M/size + ((M % size) > rank);
1267   ierr       = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
1268   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1269   rowners[0] = 0;
1270   for (i=2; i<=size; i++) {
1271     rowners[i] += rowners[i-1];
1272   }
1273   rstart = rowners[rank];
1274   rend   = rowners[rank+1];
1275 
1276   /* distribute row lengths to all processors */
1277   ierr    = PetscMalloc(2*(rend-rstart)*sizeof(int),&ourlens);CHKERRQ(ierr);
1278   offlens = ourlens + (rend-rstart);
1279   if (!rank) {
1280     ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr);
1281     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1282     ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr);
1283     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1284     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1285     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1286   } else {
1287     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1288   }
1289 
1290   if (!rank) {
1291     /* calculate the number of nonzeros on each processor */
1292     ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr);
1293     ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
1294     for (i=0; i<size; i++) {
1295       for (j=rowners[i]; j< rowners[i+1]; j++) {
1296         procsnz[i] += rowlengths[j];
1297       }
1298     }
1299     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1300 
1301     /* determine max buffer needed and allocate it */
1302     maxnz = 0;
1303     for (i=0; i<size; i++) {
1304       maxnz = PetscMax(maxnz,procsnz[i]);
1305     }
1306     ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr);
1307 
1308     /* read in my part of the matrix column indices  */
1309     nz = procsnz[0];
1310     ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr);
1311     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1312 
1313     /* read in every one elses and ship off */
1314     for (i=1; i<size; i++) {
1315       nz   = procsnz[i];
1316       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1317       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
1318     }
1319     ierr = PetscFree(cols);CHKERRQ(ierr);
1320   } else {
1321     /* determine buffer space needed for message */
1322     nz = 0;
1323     for (i=0; i<m; i++) {
1324       nz += ourlens[i];
1325     }
1326     ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr);
1327 
1328     /* receive message of column indices*/
1329     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
1330     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
1331     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1332   }
1333 
1334   /* loop over local rows, determining number of off diagonal entries */
1335   ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr);
1336   jj = 0;
1337   for (i=0; i<m; i++) {
1338     for (j=0; j<ourlens[i]; j++) {
1339       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1340       jj++;
1341     }
1342   }
1343 
1344   /* create our matrix */
1345   for (i=0; i<m; i++) {
1346     ourlens[i] -= offlens[i];
1347   }
1348   ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr);
1349   A = *newmat;
1350   for (i=0; i<m; i++) {
1351     ourlens[i] += offlens[i];
1352   }
1353 
1354   if (!rank) {
1355     ierr = PetscMalloc(maxnz*sizeof(Scalar),&vals);CHKERRQ(ierr);
1356 
1357     /* read in my part of the matrix numerical values  */
1358     nz = procsnz[0];
1359     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1360 
1361     /* insert into matrix */
1362     jj      = rstart;
1363     smycols = mycols;
1364     svals   = vals;
1365     for (i=0; i<m; i++) {
1366       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1367       smycols += ourlens[i];
1368       svals   += ourlens[i];
1369       jj++;
1370     }
1371 
1372     /* read in other processors and ship out */
1373     for (i=1; i<size; i++) {
1374       nz   = procsnz[i];
1375       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1376       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
1377     }
1378     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1379   } else {
1380     /* receive numeric values */
1381     ierr = PetscMalloc(nz*sizeof(Scalar),&vals);CHKERRQ(ierr);
1382 
1383     /* receive message of values*/
1384     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
1385     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1386     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1387 
1388     /* insert into matrix */
1389     jj      = rstart;
1390     smycols = mycols;
1391     svals   = vals;
1392     for (i=0; i<m; i++) {
1393       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1394       smycols += ourlens[i];
1395       svals   += ourlens[i];
1396       jj++;
1397     }
1398   }
1399   ierr = PetscFree(ourlens);CHKERRQ(ierr);
1400   ierr = PetscFree(vals);CHKERRQ(ierr);
1401   ierr = PetscFree(mycols);CHKERRQ(ierr);
1402   ierr = PetscFree(rowners);CHKERRQ(ierr);
1403 
1404   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1405   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1406   PetscFunctionReturn(0);
1407 }
1408 EXTERN_C_END
1409 
1410 
1411 
1412 
1413 
1414