xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 973046187a5e16e2b37c23287137a5e6ee1ae012)
1 /*$Id: mpidense.c,v 1.159 2001/08/10 03:30:41 bsmith Exp $*/
2 
3 /*
4    Basic functions for basic parallel dense matrices.
5 */
6 
7 #include "src/mat/impls/dense/mpi/mpidense.h"
8 #include "src/vec/vecimpl.h"
9 
10 EXTERN_C_BEGIN
11 #undef __FUNCT__
12 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense"
13 int MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B)
14 {
15   Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
16   int          m = A->m,rstart = mdn->rstart,ierr;
17   PetscScalar  *array;
18   MPI_Comm     comm;
19 
20   PetscFunctionBegin;
21   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Only square matrices supported.");
22 
23   /* The reuse aspect is not implemented efficiently */
24   if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);}
25 
26   ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
27   ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr);
28   ierr = MatCreateSeqDense(comm,m,m,array+m*rstart,B);CHKERRQ(ierr);
29   ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr);
30   ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
31   ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
32 
33   *iscopy = PETSC_TRUE;
34   PetscFunctionReturn(0);
35 }
36 EXTERN_C_END
37 
38 #undef __FUNCT__
39 #define __FUNCT__ "MatSetValues_MPIDense"
40 int MatSetValues_MPIDense(Mat mat,int m,const int idxm[],int n,const int idxn[],const PetscScalar v[],InsertMode addv)
41 {
42   Mat_MPIDense *A = (Mat_MPIDense*)mat->data;
43   int          ierr,i,j,rstart = A->rstart,rend = A->rend,row;
44   PetscTruth   roworiented = A->roworiented;
45 
46   PetscFunctionBegin;
47   for (i=0; i<m; i++) {
48     if (idxm[i] < 0) continue;
49     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
50     if (idxm[i] >= rstart && idxm[i] < rend) {
51       row = idxm[i] - rstart;
52       if (roworiented) {
53         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
54       } else {
55         for (j=0; j<n; j++) {
56           if (idxn[j] < 0) continue;
57           if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
58           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
59         }
60       }
61     } else {
62       if (!A->donotstash) {
63         if (roworiented) {
64           ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr);
65         } else {
66           ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr);
67         }
68       }
69     }
70   }
71   PetscFunctionReturn(0);
72 }
73 
74 #undef __FUNCT__
75 #define __FUNCT__ "MatGetValues_MPIDense"
76 int MatGetValues_MPIDense(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[])
77 {
78   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
79   int          ierr,i,j,rstart = mdn->rstart,rend = mdn->rend,row;
80 
81   PetscFunctionBegin;
82   for (i=0; i<m; i++) {
83     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
84     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
85     if (idxm[i] >= rstart && idxm[i] < rend) {
86       row = idxm[i] - rstart;
87       for (j=0; j<n; j++) {
88         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
89         if (idxn[j] >= mat->N) {
90           SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
91         }
92         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
93       }
94     } else {
95       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
96     }
97   }
98   PetscFunctionReturn(0);
99 }
100 
101 #undef __FUNCT__
102 #define __FUNCT__ "MatGetArray_MPIDense"
103 int MatGetArray_MPIDense(Mat A,PetscScalar *array[])
104 {
105   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
106   int          ierr;
107 
108   PetscFunctionBegin;
109   ierr = MatGetArray(a->A,array);CHKERRQ(ierr);
110   PetscFunctionReturn(0);
111 }
112 
113 #undef __FUNCT__
114 #define __FUNCT__ "MatGetSubMatrix_MPIDense"
115 static int MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B)
116 {
117   Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd;
118   Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data;
119   int          i,j,ierr,*irow,*icol,rstart,rend,nrows,ncols,nlrows,nlcols;
120   PetscScalar  *av,*bv,*v = lmat->v;
121   Mat          newmat;
122 
123   PetscFunctionBegin;
124   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
125   ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr);
126   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
127   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
128 
129   /* No parallel redistribution currently supported! Should really check each index set
130      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
131      original matrix! */
132 
133   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
134   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
135 
136   /* Check submatrix call */
137   if (scall == MAT_REUSE_MATRIX) {
138     /* SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
139     /* Really need to test rows and column sizes! */
140     newmat = *B;
141   } else {
142     /* Create and fill new matrix */
143     ierr = MatCreateMPIDense(A->comm,nrows,cs,PETSC_DECIDE,ncols,PETSC_NULL,&newmat);CHKERRQ(ierr);
144   }
145 
146   /* Now extract the data pointers and do the copy, column at a time */
147   newmatd = (Mat_MPIDense*)newmat->data;
148   bv      = ((Mat_SeqDense *)newmatd->A->data)->v;
149 
150   for (i=0; i<ncols; i++) {
151     av = v + nlrows*icol[i];
152     for (j=0; j<nrows; j++) {
153       *bv++ = av[irow[j] - rstart];
154     }
155   }
156 
157   /* Assemble the matrices so that the correct flags are set */
158   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
159   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
160 
161   /* Free work space */
162   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
163   ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr);
164   *B = newmat;
165   PetscFunctionReturn(0);
166 }
167 
168 #undef __FUNCT__
169 #define __FUNCT__ "MatRestoreArray_MPIDense"
170 int MatRestoreArray_MPIDense(Mat A,PetscScalar *array[])
171 {
172   PetscFunctionBegin;
173   PetscFunctionReturn(0);
174 }
175 
176 #undef __FUNCT__
177 #define __FUNCT__ "MatAssemblyBegin_MPIDense"
178 int MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
179 {
180   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
181   MPI_Comm     comm = mat->comm;
182   int          ierr,nstash,reallocs;
183   InsertMode   addv;
184 
185   PetscFunctionBegin;
186   /* make sure all processors are either in INSERTMODE or ADDMODE */
187   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr);
188   if (addv == (ADD_VALUES|INSERT_VALUES)) {
189     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
190   }
191   mat->insertmode = addv; /* in case this processor had no cache */
192 
193   ierr = MatStashScatterBegin_Private(&mat->stash,mdn->rowners);CHKERRQ(ierr);
194   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
195   PetscLogInfo(mdn->A,"MatAssemblyBegin_MPIDense:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
196   PetscFunctionReturn(0);
197 }
198 
199 #undef __FUNCT__
200 #define __FUNCT__ "MatAssemblyEnd_MPIDense"
201 int MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
202 {
203   Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data;
204   int          i,n,ierr,*row,*col,flg,j,rstart,ncols;
205   PetscScalar  *val;
206   InsertMode   addv=mat->insertmode;
207 
208   PetscFunctionBegin;
209   /*  wait on receives */
210   while (1) {
211     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
212     if (!flg) break;
213 
214     for (i=0; i<n;) {
215       /* Now identify the consecutive vals belonging to the same row */
216       for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
217       if (j < n) ncols = j-i;
218       else       ncols = n-i;
219       /* Now assemble all these values with a single function call */
220       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
221       i = j;
222     }
223   }
224   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
225 
226   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
227   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
228 
229   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
230     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
231   }
232   PetscFunctionReturn(0);
233 }
234 
235 #undef __FUNCT__
236 #define __FUNCT__ "MatZeroEntries_MPIDense"
237 int MatZeroEntries_MPIDense(Mat A)
238 {
239   int          ierr;
240   Mat_MPIDense *l = (Mat_MPIDense*)A->data;
241 
242   PetscFunctionBegin;
243   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
244   PetscFunctionReturn(0);
245 }
246 
247 #undef __FUNCT__
248 #define __FUNCT__ "MatGetBlockSize_MPIDense"
249 int MatGetBlockSize_MPIDense(Mat A,int *bs)
250 {
251   PetscFunctionBegin;
252   *bs = 1;
253   PetscFunctionReturn(0);
254 }
255 
256 /* the code does not do the diagonal entries correctly unless the
257    matrix is square and the column and row owerships are identical.
258    This is a BUG. The only way to fix it seems to be to access
259    mdn->A and mdn->B directly and not through the MatZeroRows()
260    routine.
261 */
262 #undef __FUNCT__
263 #define __FUNCT__ "MatZeroRows_MPIDense"
264 int MatZeroRows_MPIDense(Mat A,IS is,const PetscScalar *diag)
265 {
266   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
267   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
268   int            *nprocs,j,idx,nsends;
269   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
270   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
271   int            *lens,imdex,*lrows,*values;
272   MPI_Comm       comm = A->comm;
273   MPI_Request    *send_waits,*recv_waits;
274   MPI_Status     recv_status,*send_status;
275   IS             istmp;
276   PetscTruth     found;
277 
278   PetscFunctionBegin;
279   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
280   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
281 
282   /*  first count number of contributors to each processor */
283   ierr  = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr);
284   ierr  = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
285   ierr  = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/
286   for (i=0; i<N; i++) {
287     idx = rows[i];
288     found = PETSC_FALSE;
289     for (j=0; j<size; j++) {
290       if (idx >= owners[j] && idx < owners[j+1]) {
291         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
292       }
293     }
294     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
295   }
296   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
297 
298   /* inform other processors of number of messages and max length*/
299   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
300 
301   /* post receives:   */
302   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr);
303   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
304   for (i=0; i<nrecvs; i++) {
305     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
306   }
307 
308   /* do sends:
309       1) starts[i] gives the starting index in svalues for stuff going to
310          the ith processor
311   */
312   ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr);
313   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
314   ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr);
315   starts[0]  = 0;
316   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
317   for (i=0; i<N; i++) {
318     svalues[starts[owner[i]]++] = rows[i];
319   }
320   ISRestoreIndices(is,&rows);
321 
322   starts[0] = 0;
323   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
324   count = 0;
325   for (i=0; i<size; i++) {
326     if (nprocs[2*i+1]) {
327       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
328     }
329   }
330   ierr = PetscFree(starts);CHKERRQ(ierr);
331 
332   base = owners[rank];
333 
334   /*  wait on receives */
335   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr);
336   source = lens + nrecvs;
337   count  = nrecvs; slen = 0;
338   while (count) {
339     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
340     /* unpack receives into our local space */
341     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
342     source[imdex]  = recv_status.MPI_SOURCE;
343     lens[imdex]  = n;
344     slen += n;
345     count--;
346   }
347   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
348 
349   /* move the data into the send scatter */
350   ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr);
351   count = 0;
352   for (i=0; i<nrecvs; i++) {
353     values = rvalues + i*nmax;
354     for (j=0; j<lens[i]; j++) {
355       lrows[count++] = values[j] - base;
356     }
357   }
358   ierr = PetscFree(rvalues);CHKERRQ(ierr);
359   ierr = PetscFree(lens);CHKERRQ(ierr);
360   ierr = PetscFree(owner);CHKERRQ(ierr);
361   ierr = PetscFree(nprocs);CHKERRQ(ierr);
362 
363   /* actually zap the local rows */
364   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
365   PetscLogObjectParent(A,istmp);
366   ierr = PetscFree(lrows);CHKERRQ(ierr);
367   ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
368   ierr = ISDestroy(istmp);CHKERRQ(ierr);
369 
370   /* wait on sends */
371   if (nsends) {
372     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
373     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
374     ierr = PetscFree(send_status);CHKERRQ(ierr);
375   }
376   ierr = PetscFree(send_waits);CHKERRQ(ierr);
377   ierr = PetscFree(svalues);CHKERRQ(ierr);
378 
379   PetscFunctionReturn(0);
380 }
381 
382 #undef __FUNCT__
383 #define __FUNCT__ "MatMult_MPIDense"
384 int MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
385 {
386   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
387   int          ierr;
388 
389   PetscFunctionBegin;
390   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
391   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
392   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
393   PetscFunctionReturn(0);
394 }
395 
396 #undef __FUNCT__
397 #define __FUNCT__ "MatMultAdd_MPIDense"
398 int MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
399 {
400   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
401   int          ierr;
402 
403   PetscFunctionBegin;
404   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
405   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
406   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
407   PetscFunctionReturn(0);
408 }
409 
410 #undef __FUNCT__
411 #define __FUNCT__ "MatMultTranspose_MPIDense"
412 int MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
413 {
414   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
415   int          ierr;
416   PetscScalar  zero = 0.0;
417 
418   PetscFunctionBegin;
419   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
420   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
421   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
422   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
423   PetscFunctionReturn(0);
424 }
425 
426 #undef __FUNCT__
427 #define __FUNCT__ "MatMultTransposeAdd_MPIDense"
428 int MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
429 {
430   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
431   int          ierr;
432 
433   PetscFunctionBegin;
434   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
435   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
436   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
437   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
438   PetscFunctionReturn(0);
439 }
440 
441 #undef __FUNCT__
442 #define __FUNCT__ "MatGetDiagonal_MPIDense"
443 int MatGetDiagonal_MPIDense(Mat A,Vec v)
444 {
445   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
446   Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data;
447   int          ierr,len,i,n,m = A->m,radd;
448   PetscScalar  *x,zero = 0.0;
449 
450   PetscFunctionBegin;
451   ierr = VecSet(&zero,v);CHKERRQ(ierr);
452   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
453   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
454   if (n != A->M) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
455   len  = PetscMin(a->A->m,a->A->n);
456   radd = a->rstart*m;
457   for (i=0; i<len; i++) {
458     x[i] = aloc->v[radd + i*m + i];
459   }
460   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
461   PetscFunctionReturn(0);
462 }
463 
464 #undef __FUNCT__
465 #define __FUNCT__ "MatDestroy_MPIDense"
466 int MatDestroy_MPIDense(Mat mat)
467 {
468   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
469   int          ierr;
470 
471   PetscFunctionBegin;
472 
473 #if defined(PETSC_USE_LOG)
474   PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N);
475 #endif
476   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
477   ierr = PetscFree(mdn->rowners);CHKERRQ(ierr);
478   ierr = MatDestroy(mdn->A);CHKERRQ(ierr);
479   if (mdn->lvec)   VecDestroy(mdn->lvec);
480   if (mdn->Mvctx)  VecScatterDestroy(mdn->Mvctx);
481   if (mdn->factor) {
482     if (mdn->factor->temp)   {ierr = PetscFree(mdn->factor->temp);CHKERRQ(ierr);}
483     if (mdn->factor->tag)    {ierr = PetscFree(mdn->factor->tag);CHKERRQ(ierr);}
484     if (mdn->factor->pivots) {ierr = PetscFree(mdn->factor->pivots);CHKERRQ(ierr);}
485     ierr = PetscFree(mdn->factor);CHKERRQ(ierr);
486   }
487   ierr = PetscFree(mdn);CHKERRQ(ierr);
488   PetscFunctionReturn(0);
489 }
490 
491 #undef __FUNCT__
492 #define __FUNCT__ "MatView_MPIDense_Binary"
493 static int MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
494 {
495   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
496   int          ierr;
497 
498   PetscFunctionBegin;
499   if (mdn->size == 1) {
500     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
501   }
502   else SETERRQ(PETSC_ERR_SUP,"Only uniprocessor output supported");
503   PetscFunctionReturn(0);
504 }
505 
506 #undef __FUNCT__
507 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket"
508 static int MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
509 {
510   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
511   int               ierr,size = mdn->size,rank = mdn->rank;
512   PetscViewerType   vtype;
513   PetscTruth        isascii,isdraw;
514   PetscViewer       sviewer;
515   PetscViewerFormat format;
516 
517   PetscFunctionBegin;
518   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
519   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
520   if (isascii) {
521     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
522     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
523     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
524       MatInfo info;
525       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
526       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %d nz %d nz alloced %d mem %d \n",rank,mat->m,
527                    (int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
528       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
529       ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
530       PetscFunctionReturn(0);
531     } else if (format == PETSC_VIEWER_ASCII_INFO) {
532       PetscFunctionReturn(0);
533     }
534   } else if (isdraw) {
535     PetscDraw       draw;
536     PetscTruth isnull;
537 
538     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
539     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
540     if (isnull) PetscFunctionReturn(0);
541   }
542 
543   if (size == 1) {
544     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
545   } else {
546     /* assemble the entire matrix onto first processor. */
547     Mat          A;
548     int          M = mat->M,N = mat->N,m,row,i,nz,*cols;
549     PetscScalar  *vals;
550 
551     if (!rank) {
552       ierr = MatCreateMPIDense(mat->comm,M,N,M,N,PETSC_NULL,&A);CHKERRQ(ierr);
553     } else {
554       ierr = MatCreateMPIDense(mat->comm,0,0,M,N,PETSC_NULL,&A);CHKERRQ(ierr);
555     }
556     PetscLogObjectParent(mat,A);
557 
558     /* Copy the matrix ... This isn't the most efficient means,
559        but it's quick for now */
560     row = mdn->rstart; m = mdn->A->m;
561     for (i=0; i<m; i++) {
562       ierr = MatGetRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
563       ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
564       ierr = MatRestoreRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
565       row++;
566     }
567 
568     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
569     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
570     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
571     if (!rank) {
572       ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
573     }
574     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
575     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
576     ierr = MatDestroy(A);CHKERRQ(ierr);
577   }
578   PetscFunctionReturn(0);
579 }
580 
581 #undef __FUNCT__
582 #define __FUNCT__ "MatView_MPIDense"
583 int MatView_MPIDense(Mat mat,PetscViewer viewer)
584 {
585   int        ierr;
586   PetscTruth isascii,isbinary,isdraw,issocket;
587 
588   PetscFunctionBegin;
589 
590   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
591   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
592   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
593   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
594 
595   if (isascii || issocket || isdraw) {
596     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
597   } else if (isbinary) {
598     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
599   } else {
600     SETERRQ1(1,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name);
601   }
602   PetscFunctionReturn(0);
603 }
604 
605 #undef __FUNCT__
606 #define __FUNCT__ "MatGetInfo_MPIDense"
607 int MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
608 {
609   Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
610   Mat          mdn = mat->A;
611   int          ierr;
612   PetscReal    isend[5],irecv[5];
613 
614   PetscFunctionBegin;
615   info->rows_global    = (double)A->M;
616   info->columns_global = (double)A->N;
617   info->rows_local     = (double)A->m;
618   info->columns_local  = (double)A->N;
619   info->block_size     = 1.0;
620   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
621   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
622   isend[3] = info->memory;  isend[4] = info->mallocs;
623   if (flag == MAT_LOCAL) {
624     info->nz_used      = isend[0];
625     info->nz_allocated = isend[1];
626     info->nz_unneeded  = isend[2];
627     info->memory       = isend[3];
628     info->mallocs      = isend[4];
629   } else if (flag == MAT_GLOBAL_MAX) {
630     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,A->comm);CHKERRQ(ierr);
631     info->nz_used      = irecv[0];
632     info->nz_allocated = irecv[1];
633     info->nz_unneeded  = irecv[2];
634     info->memory       = irecv[3];
635     info->mallocs      = irecv[4];
636   } else if (flag == MAT_GLOBAL_SUM) {
637     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr);
638     info->nz_used      = irecv[0];
639     info->nz_allocated = irecv[1];
640     info->nz_unneeded  = irecv[2];
641     info->memory       = irecv[3];
642     info->mallocs      = irecv[4];
643   }
644   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
645   info->fill_ratio_needed = 0;
646   info->factor_mallocs    = 0;
647   PetscFunctionReturn(0);
648 }
649 
650 #undef __FUNCT__
651 #define __FUNCT__ "MatSetOption_MPIDense"
652 int MatSetOption_MPIDense(Mat A,MatOption op)
653 {
654   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
655   int          ierr;
656 
657   PetscFunctionBegin;
658   switch (op) {
659   case MAT_NO_NEW_NONZERO_LOCATIONS:
660   case MAT_YES_NEW_NONZERO_LOCATIONS:
661   case MAT_NEW_NONZERO_LOCATION_ERR:
662   case MAT_NEW_NONZERO_ALLOCATION_ERR:
663   case MAT_COLUMNS_SORTED:
664   case MAT_COLUMNS_UNSORTED:
665     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
666     break;
667   case MAT_ROW_ORIENTED:
668     a->roworiented = PETSC_TRUE;
669     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
670     break;
671   case MAT_ROWS_SORTED:
672   case MAT_ROWS_UNSORTED:
673   case MAT_YES_NEW_DIAGONALS:
674   case MAT_USE_HASH_TABLE:
675     PetscLogInfo(A,"MatSetOption_MPIDense:Option ignored\n");
676     break;
677   case MAT_COLUMN_ORIENTED:
678     a->roworiented = PETSC_FALSE;
679     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
680     break;
681   case MAT_IGNORE_OFF_PROC_ENTRIES:
682     a->donotstash = PETSC_TRUE;
683     break;
684   case MAT_NO_NEW_DIAGONALS:
685     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
686   default:
687     SETERRQ(PETSC_ERR_SUP,"unknown option");
688   }
689   PetscFunctionReturn(0);
690 }
691 
692 #undef __FUNCT__
693 #define __FUNCT__ "MatGetRow_MPIDense"
694 int MatGetRow_MPIDense(Mat A,int row,int *nz,int **idx,PetscScalar **v)
695 {
696   Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
697   int          lrow,rstart = mat->rstart,rend = mat->rend,ierr;
698 
699   PetscFunctionBegin;
700   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,"only local rows")
701   lrow = row - rstart;
702   ierr = MatGetRow(mat->A,lrow,nz,idx,v);CHKERRQ(ierr);
703   PetscFunctionReturn(0);
704 }
705 
706 #undef __FUNCT__
707 #define __FUNCT__ "MatRestoreRow_MPIDense"
708 int MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
709 {
710   int ierr;
711 
712   PetscFunctionBegin;
713   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
714   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
715   PetscFunctionReturn(0);
716 }
717 
718 #undef __FUNCT__
719 #define __FUNCT__ "MatDiagonalScale_MPIDense"
720 int MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
721 {
722   Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
723   Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data;
724   PetscScalar  *l,*r,x,*v;
725   int          ierr,i,j,s2a,s3a,s2,s3,m=mdn->A->m,n=mdn->A->n;
726 
727   PetscFunctionBegin;
728   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
729   if (ll) {
730     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
731     if (s2a != s2) SETERRQ(PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size");
732     ierr = VecGetArray(ll,&l);CHKERRQ(ierr);
733     for (i=0; i<m; i++) {
734       x = l[i];
735       v = mat->v + i;
736       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
737     }
738     ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr);
739     PetscLogFlops(n*m);
740   }
741   if (rr) {
742     ierr = VecGetSize(rr,&s3a);CHKERRQ(ierr);
743     if (s3a != s3) SETERRQ(PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size");
744     ierr = VecScatterBegin(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
745     ierr = VecScatterEnd(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
746     ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
747     for (i=0; i<n; i++) {
748       x = r[i];
749       v = mat->v + i*m;
750       for (j=0; j<m; j++) { (*v++) *= x;}
751     }
752     ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr);
753     PetscLogFlops(n*m);
754   }
755   PetscFunctionReturn(0);
756 }
757 
758 #undef __FUNCT__
759 #define __FUNCT__ "MatNorm_MPIDense"
760 int MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
761 {
762   Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
763   Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data;
764   int          ierr,i,j;
765   PetscReal    sum = 0.0;
766   PetscScalar  *v = mat->v;
767 
768   PetscFunctionBegin;
769   if (mdn->size == 1) {
770     ierr =  MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
771   } else {
772     if (type == NORM_FROBENIUS) {
773       for (i=0; i<mdn->A->n*mdn->A->m; i++) {
774 #if defined(PETSC_USE_COMPLEX)
775         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
776 #else
777         sum += (*v)*(*v); v++;
778 #endif
779       }
780       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr);
781       *nrm = sqrt(*nrm);
782       PetscLogFlops(2*mdn->A->n*mdn->A->m);
783     } else if (type == NORM_1) {
784       PetscReal *tmp,*tmp2;
785       ierr = PetscMalloc(2*A->N*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
786       tmp2 = tmp + A->N;
787       ierr = PetscMemzero(tmp,2*A->N*sizeof(PetscReal));CHKERRQ(ierr);
788       *nrm = 0.0;
789       v = mat->v;
790       for (j=0; j<mdn->A->n; j++) {
791         for (i=0; i<mdn->A->m; i++) {
792           tmp[j] += PetscAbsScalar(*v);  v++;
793         }
794       }
795       ierr = MPI_Allreduce(tmp,tmp2,A->N,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr);
796       for (j=0; j<A->N; j++) {
797         if (tmp2[j] > *nrm) *nrm = tmp2[j];
798       }
799       ierr = PetscFree(tmp);CHKERRQ(ierr);
800       PetscLogFlops(A->n*A->m);
801     } else if (type == NORM_INFINITY) { /* max row norm */
802       PetscReal ntemp;
803       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
804       ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPI_MAX,A->comm);CHKERRQ(ierr);
805     } else {
806       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
807     }
808   }
809   PetscFunctionReturn(0);
810 }
811 
812 #undef __FUNCT__
813 #define __FUNCT__ "MatTranspose_MPIDense"
814 int MatTranspose_MPIDense(Mat A,Mat *matout)
815 {
816   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
817   Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data;
818   Mat          B;
819   int          M = A->M,N = A->N,m,n,*rwork,rstart = a->rstart;
820   int          j,i,ierr;
821   PetscScalar  *v;
822 
823   PetscFunctionBegin;
824   if (!matout && M != N) {
825     SETERRQ(PETSC_ERR_SUP,"Supports square matrix only in-place");
826   }
827   ierr = MatCreateMPIDense(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,PETSC_NULL,&B);CHKERRQ(ierr);
828 
829   m = a->A->m; n = a->A->n; v = Aloc->v;
830   ierr = PetscMalloc(n*sizeof(int),&rwork);CHKERRQ(ierr);
831   for (j=0; j<n; j++) {
832     for (i=0; i<m; i++) rwork[i] = rstart + i;
833     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
834     v   += m;
835   }
836   ierr = PetscFree(rwork);CHKERRQ(ierr);
837   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
838   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
839   if (matout) {
840     *matout = B;
841   } else {
842     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
843   }
844   PetscFunctionReturn(0);
845 }
846 
847 #include "petscblaslapack.h"
848 #undef __FUNCT__
849 #define __FUNCT__ "MatScale_MPIDense"
850 int MatScale_MPIDense(const PetscScalar *alpha,Mat inA)
851 {
852   Mat_MPIDense *A = (Mat_MPIDense*)inA->data;
853   Mat_SeqDense *a = (Mat_SeqDense*)A->A->data;
854   int          one = 1,nz;
855 
856   PetscFunctionBegin;
857   nz = inA->m*inA->N;
858   BLscal_(&nz,(PetscScalar*)alpha,a->v,&one);
859   PetscLogFlops(nz);
860   PetscFunctionReturn(0);
861 }
862 
863 static int MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *);
864 
865 #undef __FUNCT__
866 #define __FUNCT__ "MatSetUpPreallocation_MPIDense"
867 int MatSetUpPreallocation_MPIDense(Mat A)
868 {
869   int        ierr;
870 
871   PetscFunctionBegin;
872   ierr =  MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
873   PetscFunctionReturn(0);
874 }
875 
876 /* -------------------------------------------------------------------*/
877 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
878        MatGetRow_MPIDense,
879        MatRestoreRow_MPIDense,
880        MatMult_MPIDense,
881 /* 4*/ MatMultAdd_MPIDense,
882        MatMultTranspose_MPIDense,
883        MatMultTransposeAdd_MPIDense,
884        0,
885        0,
886        0,
887 /*10*/ 0,
888        0,
889        0,
890        0,
891        MatTranspose_MPIDense,
892 /*15*/ MatGetInfo_MPIDense,
893        0,
894        MatGetDiagonal_MPIDense,
895        MatDiagonalScale_MPIDense,
896        MatNorm_MPIDense,
897 /*20*/ MatAssemblyBegin_MPIDense,
898        MatAssemblyEnd_MPIDense,
899        0,
900        MatSetOption_MPIDense,
901        MatZeroEntries_MPIDense,
902 /*25*/ MatZeroRows_MPIDense,
903        0,
904        0,
905        0,
906        0,
907 /*30*/ MatSetUpPreallocation_MPIDense,
908        0,
909        0,
910        MatGetArray_MPIDense,
911        MatRestoreArray_MPIDense,
912 /*35*/ MatDuplicate_MPIDense,
913        0,
914        0,
915        0,
916        0,
917 /*40*/ 0,
918        MatGetSubMatrices_MPIDense,
919        0,
920        MatGetValues_MPIDense,
921        0,
922 /*45*/ 0,
923        MatScale_MPIDense,
924        0,
925        0,
926        0,
927 /*50*/ MatGetBlockSize_MPIDense,
928        0,
929        0,
930        0,
931        0,
932 /*55*/ 0,
933        0,
934        0,
935        0,
936        0,
937 /*60*/ MatGetSubMatrix_MPIDense,
938        MatDestroy_MPIDense,
939        MatView_MPIDense,
940        MatGetPetscMaps_Petsc,
941        0,
942 /*65*/ 0,
943        0,
944        0,
945        0,
946        0,
947 /*70*/ 0,
948        0,
949        0,
950        0,
951        0,
952 /*75*/ 0,
953        0,
954        0,
955        0,
956        0,
957 /*80*/ 0,
958        0,
959        0,
960        0,
961        0,
962 /*85*/ MatLoad_MPIDense};
963 
964 EXTERN_C_BEGIN
965 #undef __FUNCT__
966 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense"
967 int MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
968 {
969   Mat_MPIDense *a;
970   int          ierr;
971 
972   PetscFunctionBegin;
973   mat->preallocated = PETSC_TRUE;
974   /* Note:  For now, when data is specified above, this assumes the user correctly
975    allocates the local dense storage space.  We should add error checking. */
976 
977   a    = (Mat_MPIDense*)mat->data;
978   ierr = MatCreateSeqDense(PETSC_COMM_SELF,mat->m,mat->N,data,&a->A);CHKERRQ(ierr);
979   PetscLogObjectParent(mat,a->A);
980   PetscFunctionReturn(0);
981 }
982 EXTERN_C_END
983 
984 /*MC
985    MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices.
986 
987    Options Database Keys:
988 . -mat_type mpidense - sets the matrix type to "mpidense" during a call to MatSetFromOptions()
989 
990   Level: beginner
991 
992 .seealso: MatCreateMPIDense
993 M*/
994 
995 EXTERN_C_BEGIN
996 #undef __FUNCT__
997 #define __FUNCT__ "MatCreate_MPIDense"
998 int MatCreate_MPIDense(Mat mat)
999 {
1000   Mat_MPIDense *a;
1001   int          ierr,i;
1002 
1003   PetscFunctionBegin;
1004   ierr              = PetscNew(Mat_MPIDense,&a);CHKERRQ(ierr);
1005   mat->data         = (void*)a;
1006   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1007   mat->factor       = 0;
1008   mat->mapping      = 0;
1009 
1010   a->factor       = 0;
1011   mat->insertmode = NOT_SET_VALUES;
1012   ierr = MPI_Comm_rank(mat->comm,&a->rank);CHKERRQ(ierr);
1013   ierr = MPI_Comm_size(mat->comm,&a->size);CHKERRQ(ierr);
1014 
1015   ierr = PetscSplitOwnership(mat->comm,&mat->m,&mat->M);CHKERRQ(ierr);
1016   ierr = PetscSplitOwnership(mat->comm,&mat->n,&mat->N);CHKERRQ(ierr);
1017   a->nvec = mat->n;
1018 
1019   /* the information in the maps duplicates the information computed below, eventually
1020      we should remove the duplicate information that is not contained in the maps */
1021   ierr = PetscMapCreateMPI(mat->comm,mat->m,mat->M,&mat->rmap);CHKERRQ(ierr);
1022   ierr = PetscMapCreateMPI(mat->comm,mat->n,mat->N,&mat->cmap);CHKERRQ(ierr);
1023 
1024   /* build local table of row and column ownerships */
1025   ierr       = PetscMalloc(2*(a->size+2)*sizeof(int),&a->rowners);CHKERRQ(ierr);
1026   a->cowners = a->rowners + a->size + 1;
1027   PetscLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));
1028   ierr = MPI_Allgather(&mat->m,1,MPI_INT,a->rowners+1,1,MPI_INT,mat->comm);CHKERRQ(ierr);
1029   a->rowners[0] = 0;
1030   for (i=2; i<=a->size; i++) {
1031     a->rowners[i] += a->rowners[i-1];
1032   }
1033   a->rstart = a->rowners[a->rank];
1034   a->rend   = a->rowners[a->rank+1];
1035   ierr      = MPI_Allgather(&mat->n,1,MPI_INT,a->cowners+1,1,MPI_INT,mat->comm);CHKERRQ(ierr);
1036   a->cowners[0] = 0;
1037   for (i=2; i<=a->size; i++) {
1038     a->cowners[i] += a->cowners[i-1];
1039   }
1040 
1041   /* build cache for off array entries formed */
1042   a->donotstash = PETSC_FALSE;
1043   ierr = MatStashCreate_Private(mat->comm,1,&mat->stash);CHKERRQ(ierr);
1044 
1045   /* stuff used for matrix vector multiply */
1046   a->lvec        = 0;
1047   a->Mvctx       = 0;
1048   a->roworiented = PETSC_TRUE;
1049 
1050   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C",
1051                                      "MatGetDiagonalBlock_MPIDense",
1052                                      MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr);
1053   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C",
1054                                      "MatMPIDenseSetPreallocation_MPIDense",
1055                                      MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1056   PetscFunctionReturn(0);
1057 }
1058 EXTERN_C_END
1059 
1060 #undef __FUNCT__
1061 #define __FUNCT__ "MatMPIDenseSetPreallocation"
1062 /*@C
1063    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1064 
1065    Not collective
1066 
1067    Input Parameters:
1068 .  A - the matrix
1069 -  data - optional location of matrix data.  Set data=PETSC_NULL for PETSc
1070    to control all matrix memory allocation.
1071 
1072    Notes:
1073    The dense format is fully compatible with standard Fortran 77
1074    storage by columns.
1075 
1076    The data input variable is intended primarily for Fortran programmers
1077    who wish to allocate their own matrix memory space.  Most users should
1078    set data=PETSC_NULL.
1079 
1080    Level: intermediate
1081 
1082 .keywords: matrix,dense, parallel
1083 
1084 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1085 @*/
1086 int MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data)
1087 {
1088   int ierr,(*f)(Mat,PetscScalar *);
1089 
1090   PetscFunctionBegin;
1091   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1092   if (f) {
1093     ierr = (*f)(mat,data);CHKERRQ(ierr);
1094   }
1095   PetscFunctionReturn(0);
1096 }
1097 
1098 #undef __FUNCT__
1099 #define __FUNCT__ "MatCreateMPIDense"
1100 /*@C
1101    MatCreateMPIDense - Creates a sparse parallel matrix in dense format.
1102 
1103    Collective on MPI_Comm
1104 
1105    Input Parameters:
1106 +  comm - MPI communicator
1107 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1108 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1109 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1110 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1111 -  data - optional location of matrix data.  Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1112    to control all matrix memory allocation.
1113 
1114    Output Parameter:
1115 .  A - the matrix
1116 
1117    Notes:
1118    The dense format is fully compatible with standard Fortran 77
1119    storage by columns.
1120 
1121    The data input variable is intended primarily for Fortran programmers
1122    who wish to allocate their own matrix memory space.  Most users should
1123    set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users).
1124 
1125    The user MUST specify either the local or global matrix dimensions
1126    (possibly both).
1127 
1128    Level: intermediate
1129 
1130 .keywords: matrix,dense, parallel
1131 
1132 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1133 @*/
1134 int MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,PetscScalar *data,Mat *A)
1135 {
1136   int ierr,size;
1137 
1138   PetscFunctionBegin;
1139   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
1140   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1141   if (size > 1) {
1142     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
1143     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1144   } else {
1145     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
1146     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1147   }
1148   PetscFunctionReturn(0);
1149 }
1150 
1151 #undef __FUNCT__
1152 #define __FUNCT__ "MatDuplicate_MPIDense"
1153 static int MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1154 {
1155   Mat          mat;
1156   Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data;
1157   int          ierr;
1158 
1159   PetscFunctionBegin;
1160   *newmat       = 0;
1161   ierr = MatCreate(A->comm,A->m,A->n,A->M,A->N,&mat);CHKERRQ(ierr);
1162   ierr = MatSetType(mat,MATMPIDENSE);CHKERRQ(ierr);
1163   ierr              = PetscNew(Mat_MPIDense,&a);CHKERRQ(ierr);
1164   mat->data         = (void*)a;
1165   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1166   mat->factor       = A->factor;
1167   mat->assembled    = PETSC_TRUE;
1168   mat->preallocated = PETSC_TRUE;
1169 
1170   a->rstart       = oldmat->rstart;
1171   a->rend         = oldmat->rend;
1172   a->size         = oldmat->size;
1173   a->rank         = oldmat->rank;
1174   mat->insertmode = NOT_SET_VALUES;
1175   a->nvec         = oldmat->nvec;
1176   a->donotstash   = oldmat->donotstash;
1177   ierr            = PetscMalloc((a->size+1)*sizeof(int),&a->rowners);CHKERRQ(ierr);
1178   PetscLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));
1179   ierr = PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));CHKERRQ(ierr);
1180   ierr = MatStashCreate_Private(A->comm,1,&mat->stash);CHKERRQ(ierr);
1181 
1182   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
1183   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1184   PetscLogObjectParent(mat,a->A);
1185   *newmat = mat;
1186   PetscFunctionReturn(0);
1187 }
1188 
1189 #include "petscsys.h"
1190 
1191 #undef __FUNCT__
1192 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile"
1193 int MatLoad_MPIDense_DenseInFile(MPI_Comm comm,int fd,int M,int N,Mat *newmat)
1194 {
1195   int          *rowners,i,size,rank,m,ierr,nz,j;
1196   PetscScalar  *array,*vals,*vals_ptr;
1197   MPI_Status   status;
1198 
1199   PetscFunctionBegin;
1200   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1201   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1202 
1203   /* determine ownership of all rows */
1204   m          = M/size + ((M % size) > rank);
1205   ierr       = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
1206   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1207   rowners[0] = 0;
1208   for (i=2; i<=size; i++) {
1209     rowners[i] += rowners[i-1];
1210   }
1211 
1212   ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr);
1213   ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr);
1214 
1215   if (!rank) {
1216     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1217 
1218     /* read in my part of the matrix numerical values  */
1219     ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr);
1220 
1221     /* insert into matrix-by row (this is why cannot directly read into array */
1222     vals_ptr = vals;
1223     for (i=0; i<m; i++) {
1224       for (j=0; j<N; j++) {
1225         array[i + j*m] = *vals_ptr++;
1226       }
1227     }
1228 
1229     /* read in other processors and ship out */
1230     for (i=1; i<size; i++) {
1231       nz   = (rowners[i+1] - rowners[i])*N;
1232       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1233       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm);CHKERRQ(ierr);
1234     }
1235   } else {
1236     /* receive numeric values */
1237     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1238 
1239     /* receive message of values*/
1240     ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status);CHKERRQ(ierr);
1241 
1242     /* insert into matrix-by row (this is why cannot directly read into array */
1243     vals_ptr = vals;
1244     for (i=0; i<m; i++) {
1245       for (j=0; j<N; j++) {
1246         array[i + j*m] = *vals_ptr++;
1247       }
1248     }
1249   }
1250   ierr = PetscFree(rowners);CHKERRQ(ierr);
1251   ierr = PetscFree(vals);CHKERRQ(ierr);
1252   ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1253   ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1254   PetscFunctionReturn(0);
1255 }
1256 
1257 EXTERN_C_BEGIN
1258 #undef __FUNCT__
1259 #define __FUNCT__ "MatLoad_MPIDense"
1260 int MatLoad_MPIDense(PetscViewer viewer,MatType type,Mat *newmat)
1261 {
1262   Mat          A;
1263   PetscScalar  *vals,*svals;
1264   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1265   MPI_Status   status;
1266   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1267   int          *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1268   int          tag = ((PetscObject)viewer)->tag;
1269   int          i,nz,ierr,j,rstart,rend,fd;
1270 
1271   PetscFunctionBegin;
1272   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1273   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1274   if (!rank) {
1275     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1276     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1277     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1278   }
1279 
1280   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1281   M = header[1]; N = header[2]; nz = header[3];
1282 
1283   /*
1284        Handle case where matrix is stored on disk as a dense matrix
1285   */
1286   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1287     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);CHKERRQ(ierr);
1288     PetscFunctionReturn(0);
1289   }
1290 
1291   /* determine ownership of all rows */
1292   m          = M/size + ((M % size) > rank);
1293   ierr       = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
1294   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1295   rowners[0] = 0;
1296   for (i=2; i<=size; i++) {
1297     rowners[i] += rowners[i-1];
1298   }
1299   rstart = rowners[rank];
1300   rend   = rowners[rank+1];
1301 
1302   /* distribute row lengths to all processors */
1303   ierr    = PetscMalloc(2*(rend-rstart)*sizeof(int),&ourlens);CHKERRQ(ierr);
1304   offlens = ourlens + (rend-rstart);
1305   if (!rank) {
1306     ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr);
1307     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1308     ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr);
1309     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1310     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1311     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1312   } else {
1313     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1314   }
1315 
1316   if (!rank) {
1317     /* calculate the number of nonzeros on each processor */
1318     ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr);
1319     ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
1320     for (i=0; i<size; i++) {
1321       for (j=rowners[i]; j< rowners[i+1]; j++) {
1322         procsnz[i] += rowlengths[j];
1323       }
1324     }
1325     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1326 
1327     /* determine max buffer needed and allocate it */
1328     maxnz = 0;
1329     for (i=0; i<size; i++) {
1330       maxnz = PetscMax(maxnz,procsnz[i]);
1331     }
1332     ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr);
1333 
1334     /* read in my part of the matrix column indices  */
1335     nz = procsnz[0];
1336     ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr);
1337     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1338 
1339     /* read in every one elses and ship off */
1340     for (i=1; i<size; i++) {
1341       nz   = procsnz[i];
1342       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1343       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
1344     }
1345     ierr = PetscFree(cols);CHKERRQ(ierr);
1346   } else {
1347     /* determine buffer space needed for message */
1348     nz = 0;
1349     for (i=0; i<m; i++) {
1350       nz += ourlens[i];
1351     }
1352     ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr);
1353 
1354     /* receive message of column indices*/
1355     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
1356     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
1357     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1358   }
1359 
1360   /* loop over local rows, determining number of off diagonal entries */
1361   ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr);
1362   jj = 0;
1363   for (i=0; i<m; i++) {
1364     for (j=0; j<ourlens[i]; j++) {
1365       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1366       jj++;
1367     }
1368   }
1369 
1370   /* create our matrix */
1371   for (i=0; i<m; i++) {
1372     ourlens[i] -= offlens[i];
1373   }
1374   ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr);
1375   A = *newmat;
1376   for (i=0; i<m; i++) {
1377     ourlens[i] += offlens[i];
1378   }
1379 
1380   if (!rank) {
1381     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1382 
1383     /* read in my part of the matrix numerical values  */
1384     nz = procsnz[0];
1385     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1386 
1387     /* insert into matrix */
1388     jj      = rstart;
1389     smycols = mycols;
1390     svals   = vals;
1391     for (i=0; i<m; i++) {
1392       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1393       smycols += ourlens[i];
1394       svals   += ourlens[i];
1395       jj++;
1396     }
1397 
1398     /* read in other processors and ship out */
1399     for (i=1; i<size; i++) {
1400       nz   = procsnz[i];
1401       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1402       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
1403     }
1404     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1405   } else {
1406     /* receive numeric values */
1407     ierr = PetscMalloc(nz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1408 
1409     /* receive message of values*/
1410     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
1411     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1412     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1413 
1414     /* insert into matrix */
1415     jj      = rstart;
1416     smycols = mycols;
1417     svals   = vals;
1418     for (i=0; i<m; i++) {
1419       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1420       smycols += ourlens[i];
1421       svals   += ourlens[i];
1422       jj++;
1423     }
1424   }
1425   ierr = PetscFree(ourlens);CHKERRQ(ierr);
1426   ierr = PetscFree(vals);CHKERRQ(ierr);
1427   ierr = PetscFree(mycols);CHKERRQ(ierr);
1428   ierr = PetscFree(rowners);CHKERRQ(ierr);
1429 
1430   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1431   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1432   PetscFunctionReturn(0);
1433 }
1434 EXTERN_C_END
1435 
1436 
1437 
1438 
1439 
1440