xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 329f5518e9d4bb7ce96c0c5576cc53785c973973)
1 /*$Id: mpidense.c,v 1.133 1999/11/24 21:53:46 bsmith Exp bsmith $*/
2 
3 /*
4    Basic functions for basic parallel dense matrices.
5 */
6 
7 #include "src/mat/impls/dense/mpi/mpidense.h"
8 #include "src/vec/vecimpl.h"
9 
10 EXTERN_C_BEGIN
11 #undef __FUNC__
12 #define __FUNC__ "MatGetDiagonalBlock_MPIDense"
13 int MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B)
14 {
15   Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
16   int          m = mdn->m,rstart = mdn->rstart,rank,ierr;
17   Scalar       *array;
18   MPI_Comm     comm;
19 
20   PetscFunctionBegin;
21   if (mdn->M != mdn->N) SETERRQ(PETSC_ERR_SUP,0,"Only square matrices supported.");
22 
23   /* The reuse aspect is not implemented efficiently */
24   if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);}
25 
26   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
27   ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
28   ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr);
29   ierr = MatCreateSeqDense(comm,m,m,array+m*rstart,B);CHKERRQ(ierr);
30   ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr);
31   ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
32   ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
33 
34   *iscopy = PETSC_TRUE;
35   PetscFunctionReturn(0);
36 }
37 EXTERN_C_END
38 
39 extern int MatSetUpMultiply_MPIDense(Mat);
40 
41 #undef __FUNC__
42 #define __FUNC__ "MatSetValues_MPIDense"
43 int MatSetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v,InsertMode addv)
44 {
45   Mat_MPIDense *A = (Mat_MPIDense*)mat->data;
46   int          ierr,i,j,rstart = A->rstart,rend = A->rend,row;
47   int          roworiented = A->roworiented;
48 
49   PetscFunctionBegin;
50   for (i=0; i<m; i++) {
51     if (idxm[i] < 0) continue;
52     if (idxm[i] >= A->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
53     if (idxm[i] >= rstart && idxm[i] < rend) {
54       row = idxm[i] - rstart;
55       if (roworiented) {
56         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
57       } else {
58         for (j=0; j<n; j++) {
59           if (idxn[j] < 0) continue;
60           if (idxn[j] >= A->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
61           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
62         }
63       }
64     } else {
65       if (!A->donotstash) {
66         if (roworiented) {
67           ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr);
68         } else {
69           ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr);
70         }
71       }
72     }
73   }
74   PetscFunctionReturn(0);
75 }
76 
77 #undef __FUNC__
78 #define __FUNC__ "MatGetValues_MPIDense"
79 int MatGetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
80 {
81   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
82   int          ierr,i,j,rstart = mdn->rstart,rend = mdn->rend,row;
83 
84   PetscFunctionBegin;
85   for (i=0; i<m; i++) {
86     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
87     if (idxm[i] >= mdn->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
88     if (idxm[i] >= rstart && idxm[i] < rend) {
89       row = idxm[i] - rstart;
90       for (j=0; j<n; j++) {
91         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
92         if (idxn[j] >= mdn->N) {
93           SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
94         }
95         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
96       }
97     } else {
98       SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported");
99     }
100   }
101   PetscFunctionReturn(0);
102 }
103 
104 #undef __FUNC__
105 #define __FUNC__ "MatGetArray_MPIDense"
106 int MatGetArray_MPIDense(Mat A,Scalar **array)
107 {
108   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
109   int          ierr;
110 
111   PetscFunctionBegin;
112   ierr = MatGetArray(a->A,array);CHKERRQ(ierr);
113   PetscFunctionReturn(0);
114 }
115 
116 #undef __FUNC__
117 #define __FUNC__ "MatGetSubMatrix_MPIDense"
118 static int MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B)
119 {
120   Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd;
121   Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data;
122   int          i,j,ierr,*irow,*icol,rstart,rend,nrows,ncols,nlrows,nlcols,rank;
123   Scalar       *av,*bv,*v = lmat->v;
124   Mat          newmat;
125 
126   PetscFunctionBegin;
127   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
128   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
129   ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr);
130   ierr = ISGetSize(isrow,&nrows);CHKERRQ(ierr);
131   ierr = ISGetSize(iscol,&ncols);CHKERRQ(ierr);
132 
133   /* No parallel redistribution currently supported! Should really check each index set
134      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
135      original matrix! */
136 
137   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
138   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
139 
140   /* Check submatrix call */
141   if (scall == MAT_REUSE_MATRIX) {
142     /* SETERRQ(PETSC_ERR_ARG_SIZ,0,"Reused submatrix wrong size"); */
143     /* Really need to test rows and column sizes! */
144     newmat = *B;
145   } else {
146     /* Create and fill new matrix */
147     ierr = MatCreateMPIDense(A->comm,nrows,ncols,PETSC_DECIDE,PETSC_DECIDE,PETSC_NULL,&newmat);CHKERRQ(ierr);
148   }
149 
150   /* Now extract the data pointers and do the copy, column at a time */
151   newmatd = (Mat_MPIDense*)newmat->data;
152   bv = ((Mat_SeqDense *)newmatd->A->data)->v;
153 
154   for (i=0; i<ncols; i++) {
155     av = v + nlrows*icol[i];
156     for (j=0; j<nrows; j++) {
157       *bv++ = av[irow[j] - rstart];
158     }
159   }
160 
161   /* Assemble the matrices so that the correct flags are set */
162   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
163   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
164 
165   /* Free work space */
166   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
167   ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr);
168   *B = newmat;
169   PetscFunctionReturn(0);
170 }
171 
172 #undef __FUNC__
173 #define __FUNC__ "MatRestoreArray_MPIDense"
174 int MatRestoreArray_MPIDense(Mat A,Scalar **array)
175 {
176   PetscFunctionBegin;
177   PetscFunctionReturn(0);
178 }
179 
180 #undef __FUNC__
181 #define __FUNC__ "MatAssemblyBegin_MPIDense"
182 int MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
183 {
184   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
185   MPI_Comm     comm = mat->comm;
186   int          ierr,nstash,reallocs;
187   InsertMode   addv;
188 
189   PetscFunctionBegin;
190   /* make sure all processors are either in INSERTMODE or ADDMODE */
191   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr);
192   if (addv == (ADD_VALUES|INSERT_VALUES)) {
193     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Cannot mix adds/inserts on different procs");
194   }
195   mat->insertmode = addv; /* in case this processor had no cache */
196 
197   ierr = MatStashScatterBegin_Private(&mat->stash,mdn->rowners);CHKERRQ(ierr);
198   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
199   PLogInfo(mdn->A,"MatAssemblyBegin_MPIDense:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
200   PetscFunctionReturn(0);
201 }
202 
203 #undef __FUNC__
204 #define __FUNC__ "MatAssemblyEnd_MPIDense"
205 int MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
206 {
207   Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data;
208   int          i,n,ierr,*row,*col,flg,j,rstart,ncols;
209   Scalar       *val;
210   InsertMode   addv=mat->insertmode;
211 
212   PetscFunctionBegin;
213   /*  wait on receives */
214   while (1) {
215     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
216     if (!flg) break;
217 
218     for (i=0; i<n;) {
219       /* Now identify the consecutive vals belonging to the same row */
220       for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
221       if (j < n) ncols = j-i;
222       else       ncols = n-i;
223       /* Now assemble all these values with a single function call */
224       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
225       i = j;
226     }
227   }
228   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
229 
230   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
231   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
232 
233   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
234     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
235   }
236   PetscFunctionReturn(0);
237 }
238 
239 #undef __FUNC__
240 #define __FUNC__ "MatZeroEntries_MPIDense"
241 int MatZeroEntries_MPIDense(Mat A)
242 {
243   int          ierr;
244   Mat_MPIDense *l = (Mat_MPIDense*)A->data;
245 
246   PetscFunctionBegin;
247   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
248   PetscFunctionReturn(0);
249 }
250 
251 #undef __FUNC__
252 #define __FUNC__ "MatGetBlockSize_MPIDense"
253 int MatGetBlockSize_MPIDense(Mat A,int *bs)
254 {
255   PetscFunctionBegin;
256   *bs = 1;
257   PetscFunctionReturn(0);
258 }
259 
260 /* the code does not do the diagonal entries correctly unless the
261    matrix is square and the column and row owerships are identical.
262    This is a BUG. The only way to fix it seems to be to access
263    mdn->A and mdn->B directly and not through the MatZeroRows()
264    routine.
265 */
266 #undef __FUNC__
267 #define __FUNC__ "MatZeroRows_MPIDense"
268 int MatZeroRows_MPIDense(Mat A,IS is,Scalar *diag)
269 {
270   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
271   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
272   int            *procs,*nprocs,j,found,idx,nsends,*work;
273   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
274   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
275   int            *lens,imdex,*lrows,*values;
276   MPI_Comm       comm = A->comm;
277   MPI_Request    *send_waits,*recv_waits;
278   MPI_Status     recv_status,*send_status;
279   IS             istmp;
280 
281   PetscFunctionBegin;
282   ierr = ISGetSize(is,&N);CHKERRQ(ierr);
283   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
284 
285   /*  first count number of contributors to each processor */
286   nprocs = (int*)PetscMalloc(2*size*sizeof(int));CHKPTRQ(nprocs);
287   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
288   procs  = nprocs + size;
289   owner  = (int*)PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/
290   for (i=0; i<N; i++) {
291     idx = rows[i];
292     found = 0;
293     for (j=0; j<size; j++) {
294       if (idx >= owners[j] && idx < owners[j+1]) {
295         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
296       }
297     }
298     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range");
299   }
300   nsends = 0;  for (i=0; i<size; i++) { nsends += procs[i];}
301 
302   /* inform other processors of number of messages and max length*/
303   work   = (int*)PetscMalloc(2*size*sizeof(int));CHKPTRQ(work);
304   ierr   = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr);
305   nmax   = work[rank];
306   nrecvs = work[size+rank];
307   ierr   = PetscFree(work);CHKERRQ(ierr);
308 
309   /* post receives:   */
310   rvalues    = (int*)PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues);
311   recv_waits = (MPI_Request*)PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
312   for (i=0; i<nrecvs; i++) {
313     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
314   }
315 
316   /* do sends:
317       1) starts[i] gives the starting index in svalues for stuff going to
318          the ith processor
319   */
320   svalues    = (int*)PetscMalloc((N+1)*sizeof(int));CHKPTRQ(svalues);
321   send_waits = (MPI_Request*)PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
322   starts     = (int*)PetscMalloc((size+1)*sizeof(int));CHKPTRQ(starts);
323   starts[0]  = 0;
324   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
325   for (i=0; i<N; i++) {
326     svalues[starts[owner[i]]++] = rows[i];
327   }
328   ISRestoreIndices(is,&rows);
329 
330   starts[0] = 0;
331   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
332   count = 0;
333   for (i=0; i<size; i++) {
334     if (procs[i]) {
335       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
336     }
337   }
338   ierr = PetscFree(starts);CHKERRQ(ierr);
339 
340   base = owners[rank];
341 
342   /*  wait on receives */
343   lens   = (int*)PetscMalloc(2*(nrecvs+1)*sizeof(int));CHKPTRQ(lens);
344   source = lens + nrecvs;
345   count  = nrecvs; slen = 0;
346   while (count) {
347     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
348     /* unpack receives into our local space */
349     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
350     source[imdex]  = recv_status.MPI_SOURCE;
351     lens[imdex]  = n;
352     slen += n;
353     count--;
354   }
355   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
356 
357   /* move the data into the send scatter */
358   lrows = (int*)PetscMalloc((slen+1)*sizeof(int));CHKPTRQ(lrows);
359   count = 0;
360   for (i=0; i<nrecvs; i++) {
361     values = rvalues + i*nmax;
362     for (j=0; j<lens[i]; j++) {
363       lrows[count++] = values[j] - base;
364     }
365   }
366   ierr = PetscFree(rvalues);CHKERRQ(ierr);
367   ierr = PetscFree(lens);CHKERRQ(ierr);
368   ierr = PetscFree(owner);CHKERRQ(ierr);
369   ierr = PetscFree(nprocs);CHKERRQ(ierr);
370 
371   /* actually zap the local rows */
372   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
373   PLogObjectParent(A,istmp);
374   ierr = PetscFree(lrows);CHKERRQ(ierr);
375   ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
376   ierr = ISDestroy(istmp);CHKERRQ(ierr);
377 
378   /* wait on sends */
379   if (nsends) {
380     send_status = (MPI_Status*)PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
381     ierr        = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
382     ierr = PetscFree(send_status);CHKERRQ(ierr);
383   }
384   ierr = PetscFree(send_waits);CHKERRQ(ierr);
385   ierr = PetscFree(svalues);CHKERRQ(ierr);
386 
387   PetscFunctionReturn(0);
388 }
389 
390 #undef __FUNC__
391 #define __FUNC__ "MatMult_MPIDense"
392 int MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
393 {
394   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
395   int          ierr;
396 
397   PetscFunctionBegin;
398   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
399   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
400   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
401   PetscFunctionReturn(0);
402 }
403 
404 #undef __FUNC__
405 #define __FUNC__ "MatMultAdd_MPIDense"
406 int MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
407 {
408   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
409   int          ierr;
410 
411   PetscFunctionBegin;
412   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
413   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
414   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
415   PetscFunctionReturn(0);
416 }
417 
418 #undef __FUNC__
419 #define __FUNC__ "MatMultTranspose_MPIDense"
420 int MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
421 {
422   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
423   int          ierr;
424   Scalar       zero = 0.0;
425 
426   PetscFunctionBegin;
427   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
428   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
429   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
430   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
431   PetscFunctionReturn(0);
432 }
433 
434 #undef __FUNC__
435 #define __FUNC__ "MatMultTransposeAdd_MPIDense"
436 int MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
437 {
438   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
439   int          ierr;
440 
441   PetscFunctionBegin;
442   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
443   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
444   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
445   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
446   PetscFunctionReturn(0);
447 }
448 
449 #undef __FUNC__
450 #define __FUNC__ "MatGetDiagonal_MPIDense"
451 int MatGetDiagonal_MPIDense(Mat A,Vec v)
452 {
453   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
454   Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data;
455   int          ierr,len,i,n,m = a->m,radd;
456   Scalar       *x,zero = 0.0;
457 
458   PetscFunctionBegin;
459   VecSet(&zero,v);
460   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
461   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
462   if (n != a->M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Nonconforming mat and vec");
463   len = PetscMin(aloc->m,aloc->n);
464   radd = a->rstart*m;
465   for (i=0; i<len; i++) {
466     x[i] = aloc->v[radd + i*m + i];
467   }
468   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
469   PetscFunctionReturn(0);
470 }
471 
472 #undef __FUNC__
473 #define __FUNC__ "MatDestroy_MPIDense"
474 int MatDestroy_MPIDense(Mat mat)
475 {
476   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
477   int          ierr;
478 
479   PetscFunctionBegin;
480 
481   if (mat->mapping) {
482     ierr = ISLocalToGlobalMappingDestroy(mat->mapping);CHKERRQ(ierr);
483   }
484   if (mat->bmapping) {
485     ierr = ISLocalToGlobalMappingDestroy(mat->bmapping);CHKERRQ(ierr);
486   }
487 #if defined(PETSC_USE_LOG)
488   PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mdn->M,mdn->N);
489 #endif
490   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
491   ierr = PetscFree(mdn->rowners);CHKERRQ(ierr);
492   ierr = MatDestroy(mdn->A);CHKERRQ(ierr);
493   if (mdn->lvec)   VecDestroy(mdn->lvec);
494   if (mdn->Mvctx)  VecScatterDestroy(mdn->Mvctx);
495   if (mdn->factor) {
496     if (mdn->factor->temp)   {ierr = PetscFree(mdn->factor->temp);CHKERRQ(ierr);}
497     if (mdn->factor->tag)    {ierr = PetscFree(mdn->factor->tag);CHKERRQ(ierr);}
498     if (mdn->factor->pivots) {ierr = PetscFree(mdn->factor->pivots);CHKERRQ(ierr);}
499     ierr = PetscFree(mdn->factor);CHKERRQ(ierr);
500   }
501   ierr = PetscFree(mdn);CHKERRQ(ierr);
502   if (mat->rmap) {
503     ierr = MapDestroy(mat->rmap);CHKERRQ(ierr);
504   }
505   if (mat->cmap) {
506     ierr = MapDestroy(mat->cmap);CHKERRQ(ierr);
507   }
508   PLogObjectDestroy(mat);
509   PetscHeaderDestroy(mat);
510   PetscFunctionReturn(0);
511 }
512 
513 #undef __FUNC__
514 #define __FUNC__ "MatView_MPIDense_Binary"
515 static int MatView_MPIDense_Binary(Mat mat,Viewer viewer)
516 {
517   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
518   int          ierr;
519 
520   PetscFunctionBegin;
521   if (mdn->size == 1) {
522     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
523   }
524   else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported");
525   PetscFunctionReturn(0);
526 }
527 
528 #undef __FUNC__
529 #define __FUNC__ "MatView_MPIDense_ASCIIorDraworSocket"
530 static int MatView_MPIDense_ASCIIorDraworSocket(Mat mat,Viewer viewer)
531 {
532   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
533   int          ierr,format,size = mdn->size,rank = mdn->rank;
534   ViewerType   vtype;
535   PetscTruth   isascii,isdraw;
536 
537   PetscFunctionBegin;
538   ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr);
539   ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr);
540   if (isascii) {
541     ierr = ViewerGetType(viewer,&vtype);CHKERRQ(ierr);
542     ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr);
543     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
544       MatInfo info;
545       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
546       ierr = ViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %d nz %d nz alloced %d mem %d \n",rank,mdn->m,
547                    (int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
548       ierr = ViewerFlush(viewer);CHKERRQ(ierr);
549       ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
550       PetscFunctionReturn(0);
551     } else if (format == VIEWER_FORMAT_ASCII_INFO) {
552       PetscFunctionReturn(0);
553     }
554   } else if (isdraw) {
555     Draw       draw;
556     PetscTruth isnull;
557 
558     ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
559     ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr);
560     if (isnull) PetscFunctionReturn(0);
561   }
562 
563   if (size == 1) {
564     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
565   } else {
566     /* assemble the entire matrix onto first processor. */
567     Mat          A;
568     int          M = mdn->M,N = mdn->N,m,row,i,nz,*cols;
569     Scalar       *vals;
570     Mat_SeqDense *Amdn = (Mat_SeqDense*)mdn->A->data;
571 
572     if (!rank) {
573       ierr = MatCreateMPIDense(mat->comm,M,N,M,N,PETSC_NULL,&A);CHKERRQ(ierr);
574     } else {
575       ierr = MatCreateMPIDense(mat->comm,0,0,M,N,PETSC_NULL,&A);CHKERRQ(ierr);
576     }
577     PLogObjectParent(mat,A);
578 
579     /* Copy the matrix ... This isn't the most efficient means,
580        but it's quick for now */
581     row = mdn->rstart; m = Amdn->m;
582     for (i=0; i<m; i++) {
583       ierr = MatGetRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
584       ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
585       ierr = MatRestoreRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
586       row++;
587     }
588 
589     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
590     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
591     if (!rank) {
592       Viewer sviewer;
593       ierr = ViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
594       ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
595       ierr = ViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
596     }
597     ierr = ViewerFlush(viewer);CHKERRQ(ierr);
598     ierr = MatDestroy(A);CHKERRQ(ierr);
599   }
600   PetscFunctionReturn(0);
601 }
602 
603 #undef __FUNC__
604 #define __FUNC__ "MatView_MPIDense"
605 int MatView_MPIDense(Mat mat,Viewer viewer)
606 {
607   int        ierr;
608   PetscTruth isascii,isbinary,isdraw,issocket;
609 
610   PetscFunctionBegin;
611 
612   ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr);
613   ierr = PetscTypeCompare((PetscObject)viewer,BINARY_VIEWER,&isbinary);CHKERRQ(ierr);
614   ierr = PetscTypeCompare((PetscObject)viewer,SOCKET_VIEWER,&issocket);CHKERRQ(ierr);
615   ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr);
616 
617   if (isascii || issocket || isdraw) {
618     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
619   } else if (isbinary) {
620     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
621   } else {
622     SETERRQ1(1,1,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name);
623   }
624   PetscFunctionReturn(0);
625 }
626 
627 #undef __FUNC__
628 #define __FUNC__ "MatGetInfo_MPIDense"
629 int MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
630 {
631   Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
632   Mat          mdn = mat->A;
633   int          ierr;
634   PetscReal    isend[5],irecv[5];
635 
636   PetscFunctionBegin;
637   info->rows_global    = (double)mat->M;
638   info->columns_global = (double)mat->N;
639   info->rows_local     = (double)mat->m;
640   info->columns_local  = (double)mat->N;
641   info->block_size     = 1.0;
642   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
643   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
644   isend[3] = info->memory;  isend[4] = info->mallocs;
645   if (flag == MAT_LOCAL) {
646     info->nz_used      = isend[0];
647     info->nz_allocated = isend[1];
648     info->nz_unneeded  = isend[2];
649     info->memory       = isend[3];
650     info->mallocs      = isend[4];
651   } else if (flag == MAT_GLOBAL_MAX) {
652     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,A->comm);CHKERRQ(ierr);
653     info->nz_used      = irecv[0];
654     info->nz_allocated = irecv[1];
655     info->nz_unneeded  = irecv[2];
656     info->memory       = irecv[3];
657     info->mallocs      = irecv[4];
658   } else if (flag == MAT_GLOBAL_SUM) {
659     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr);
660     info->nz_used      = irecv[0];
661     info->nz_allocated = irecv[1];
662     info->nz_unneeded  = irecv[2];
663     info->memory       = irecv[3];
664     info->mallocs      = irecv[4];
665   }
666   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
667   info->fill_ratio_needed = 0;
668   info->factor_mallocs    = 0;
669   PetscFunctionReturn(0);
670 }
671 
672 #undef __FUNC__
673 #define __FUNC__ "MatSetOption_MPIDense"
674 int MatSetOption_MPIDense(Mat A,MatOption op)
675 {
676   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
677 
678   PetscFunctionBegin;
679   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
680       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
681       op == MAT_NEW_NONZERO_LOCATION_ERR ||
682       op == MAT_NEW_NONZERO_ALLOCATION_ERR ||
683       op == MAT_COLUMNS_SORTED ||
684       op == MAT_COLUMNS_UNSORTED) {
685         MatSetOption(a->A,op);
686   } else if (op == MAT_ROW_ORIENTED) {
687         a->roworiented = 1;
688         MatSetOption(a->A,op);
689   } else if (op == MAT_ROWS_SORTED ||
690              op == MAT_ROWS_UNSORTED ||
691              op == MAT_SYMMETRIC ||
692              op == MAT_STRUCTURALLY_SYMMETRIC ||
693              op == MAT_YES_NEW_DIAGONALS ||
694              op == MAT_USE_HASH_TABLE) {
695     PLogInfo(A,"MatSetOption_MPIDense:Option ignored\n");
696   } else if (op == MAT_COLUMN_ORIENTED) {
697     a->roworiented = 0; MatSetOption(a->A,op);
698   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
699     a->donotstash = 1;
700   } else if (op == MAT_NO_NEW_DIAGONALS) {
701     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
702   } else {
703     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
704   }
705   PetscFunctionReturn(0);
706 }
707 
708 #undef __FUNC__
709 #define __FUNC__ "MatGetSize_MPIDense"
710 int MatGetSize_MPIDense(Mat A,int *m,int *n)
711 {
712   Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
713 
714   PetscFunctionBegin;
715   if (m) *m = mat->M;
716   if (n) *n = mat->N;
717   PetscFunctionReturn(0);
718 }
719 
720 #undef __FUNC__
721 #define __FUNC__ "MatGetLocalSize_MPIDense"
722 int MatGetLocalSize_MPIDense(Mat A,int *m,int *n)
723 {
724   Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
725 
726   PetscFunctionBegin;
727   *m = mat->m; *n = mat->N;
728   PetscFunctionReturn(0);
729 }
730 
731 #undef __FUNC__
732 #define __FUNC__ "MatGetOwnershipRange_MPIDense"
733 int MatGetOwnershipRange_MPIDense(Mat A,int *m,int *n)
734 {
735   Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
736 
737   PetscFunctionBegin;
738   *m = mat->rstart; *n = mat->rend;
739   PetscFunctionReturn(0);
740 }
741 
742 #undef __FUNC__
743 #define __FUNC__ "MatGetRow_MPIDense"
744 int MatGetRow_MPIDense(Mat A,int row,int *nz,int **idx,Scalar **v)
745 {
746   Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
747   int          lrow,rstart = mat->rstart,rend = mat->rend,ierr;
748 
749   PetscFunctionBegin;
750   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,0,"only local rows")
751   lrow = row - rstart;
752   ierr = MatGetRow(mat->A,lrow,nz,idx,v);CHKERRQ(ierr);
753   PetscFunctionReturn(0);
754 }
755 
756 #undef __FUNC__
757 #define __FUNC__ "MatRestoreRow_MPIDense"
758 int MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,Scalar **v)
759 {
760   int ierr;
761 
762   PetscFunctionBegin;
763   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
764   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
765   PetscFunctionReturn(0);
766 }
767 
768 #undef __FUNC__
769 #define __FUNC__ "MatDiagonalScale_MPIDense"
770 int MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
771 {
772   Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
773   Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data;
774   Scalar       *l,*r,x,*v;
775   int          ierr,i,j,s2a,s3a,s2,s3,m=mat->m,n=mat->n;
776 
777   PetscFunctionBegin;
778   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
779   if (ll) {
780     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
781     if (s2a != s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Left scaling vector non-conforming local size");
782     ierr = VecGetArray(ll,&l);CHKERRQ(ierr);
783     for (i=0; i<m; i++) {
784       x = l[i];
785       v = mat->v + i;
786       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
787     }
788     ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr);
789     PLogFlops(n*m);
790   }
791   if (rr) {
792     ierr = VecGetSize(rr,&s3a);CHKERRQ(ierr);
793     if (s3a != s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Right scaling vec non-conforming local size");
794     ierr = VecScatterBegin(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
795     ierr = VecScatterEnd(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
796     ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
797     for (i=0; i<n; i++) {
798       x = r[i];
799       v = mat->v + i*m;
800       for (j=0; j<m; j++) { (*v++) *= x;}
801     }
802     ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr);
803     PLogFlops(n*m);
804   }
805   PetscFunctionReturn(0);
806 }
807 
808 #undef __FUNC__
809 #define __FUNC__ "MatNorm_MPIDense"
810 int MatNorm_MPIDense(Mat A,NormType type,PetscReal *norm)
811 {
812   Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
813   Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data;
814   int          ierr,i,j;
815   PetscReal    sum = 0.0;
816   Scalar       *v = mat->v;
817 
818   PetscFunctionBegin;
819   if (mdn->size == 1) {
820     ierr =  MatNorm(mdn->A,type,norm);CHKERRQ(ierr);
821   } else {
822     if (type == NORM_FROBENIUS) {
823       for (i=0; i<mat->n*mat->m; i++) {
824 #if defined(PETSC_USE_COMPLEX)
825         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
826 #else
827         sum += (*v)*(*v); v++;
828 #endif
829       }
830       ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr);
831       *norm = sqrt(*norm);
832       PLogFlops(2*mat->n*mat->m);
833     } else if (type == NORM_1) {
834       PetscReal *tmp,*tmp2;
835       tmp  = (PetscReal*)PetscMalloc(2*mdn->N*sizeof(PetscReal));CHKPTRQ(tmp);
836       tmp2 = tmp + mdn->N;
837       ierr = PetscMemzero(tmp,2*mdn->N*sizeof(PetscReal));CHKERRQ(ierr);
838       *norm = 0.0;
839       v = mat->v;
840       for (j=0; j<mat->n; j++) {
841         for (i=0; i<mat->m; i++) {
842           tmp[j] += PetscAbsScalar(*v);  v++;
843         }
844       }
845       ierr = MPI_Allreduce(tmp,tmp2,mdn->N,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr);
846       for (j=0; j<mdn->N; j++) {
847         if (tmp2[j] > *norm) *norm = tmp2[j];
848       }
849       ierr = PetscFree(tmp);CHKERRQ(ierr);
850       PLogFlops(mat->n*mat->m);
851     } else if (type == NORM_INFINITY) { /* max row norm */
852       PetscReal ntemp;
853       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
854       ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,A->comm);CHKERRQ(ierr);
855     } else {
856       SETERRQ(PETSC_ERR_SUP,0,"No support for two norm");
857     }
858   }
859   PetscFunctionReturn(0);
860 }
861 
862 #undef __FUNC__
863 #define __FUNC__ "MatTranspose_MPIDense"
864 int MatTranspose_MPIDense(Mat A,Mat *matout)
865 {
866   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
867   Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data;
868   Mat          B;
869   int          M = a->M,N = a->N,m,n,*rwork,rstart = a->rstart;
870   int          j,i,ierr;
871   Scalar       *v;
872 
873   PetscFunctionBegin;
874   if (!matout && M != N) {
875     SETERRQ(PETSC_ERR_SUP,0,"Supports square matrix only in-place");
876   }
877   ierr = MatCreateMPIDense(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,PETSC_NULL,&B);CHKERRQ(ierr);
878 
879   m = Aloc->m; n = Aloc->n; v = Aloc->v;
880   rwork = (int*)PetscMalloc(n*sizeof(int));CHKPTRQ(rwork);
881   for (j=0; j<n; j++) {
882     for (i=0; i<m; i++) rwork[i] = rstart + i;
883     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
884     v   += m;
885   }
886   ierr = PetscFree(rwork);CHKERRQ(ierr);
887   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
888   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
889   if (matout) {
890     *matout = B;
891   } else {
892     PetscOps *Abops;
893     MatOps   Aops;
894 
895     /* This isn't really an in-place transpose, but free data struct from a */
896     ierr = PetscFree(a->rowners);CHKERRQ(ierr);
897     ierr = MatDestroy(a->A);CHKERRQ(ierr);
898     if (a->lvec) VecDestroy(a->lvec);
899     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
900     ierr = PetscFree(a);CHKERRQ(ierr);
901 
902     /*
903          This is horrible, horrible code. We need to keep the
904       A pointers for the bops and ops but copy everything
905       else from C.
906     */
907     Abops   = A->bops;
908     Aops    = A->ops;
909     ierr    = PetscMemcpy(A,B,sizeof(struct _p_Mat));CHKERRQ(ierr);
910     A->bops = Abops;
911     A->ops  = Aops;
912 
913     PetscHeaderDestroy(B);
914   }
915   PetscFunctionReturn(0);
916 }
917 
918 #include "pinclude/blaslapack.h"
919 #undef __FUNC__
920 #define __FUNC__ "MatScale_MPIDense"
921 int MatScale_MPIDense(Scalar *alpha,Mat inA)
922 {
923   Mat_MPIDense *A = (Mat_MPIDense*)inA->data;
924   Mat_SeqDense *a = (Mat_SeqDense*)A->A->data;
925   int          one = 1,nz;
926 
927   PetscFunctionBegin;
928   nz = a->m*a->n;
929   BLscal_(&nz,alpha,a->v,&one);
930   PLogFlops(nz);
931   PetscFunctionReturn(0);
932 }
933 
934 static int MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *);
935 extern int MatGetSubMatrices_MPIDense(Mat,int,IS *,IS *,MatReuse,Mat **);
936 
937 /* -------------------------------------------------------------------*/
938 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
939        MatGetRow_MPIDense,
940        MatRestoreRow_MPIDense,
941        MatMult_MPIDense,
942        MatMultAdd_MPIDense,
943        MatMultTranspose_MPIDense,
944        MatMultTransposeAdd_MPIDense,
945        0,
946        0,
947        0,
948        0,
949        0,
950        0,
951        0,
952        MatTranspose_MPIDense,
953        MatGetInfo_MPIDense,0,
954        MatGetDiagonal_MPIDense,
955        MatDiagonalScale_MPIDense,
956        MatNorm_MPIDense,
957        MatAssemblyBegin_MPIDense,
958        MatAssemblyEnd_MPIDense,
959        0,
960        MatSetOption_MPIDense,
961        MatZeroEntries_MPIDense,
962        MatZeroRows_MPIDense,
963        0,
964        0,
965        0,
966        0,
967        MatGetSize_MPIDense,
968        MatGetLocalSize_MPIDense,
969        MatGetOwnershipRange_MPIDense,
970        0,
971        0,
972        MatGetArray_MPIDense,
973        MatRestoreArray_MPIDense,
974        MatDuplicate_MPIDense,
975        0,
976        0,
977        0,
978        0,
979        0,
980        MatGetSubMatrices_MPIDense,
981        0,
982        MatGetValues_MPIDense,
983        0,
984        0,
985        MatScale_MPIDense,
986        0,
987        0,
988        0,
989        MatGetBlockSize_MPIDense,
990        0,
991        0,
992        0,
993        0,
994        0,
995        0,
996        0,
997        0,
998        0,
999        MatGetSubMatrix_MPIDense,
1000        0,
1001        0,
1002        MatGetMaps_Petsc};
1003 
1004 #undef __FUNC__
1005 #define __FUNC__ "MatCreateMPIDense"
1006 /*@C
1007    MatCreateMPIDense - Creates a sparse parallel matrix in dense format.
1008 
1009    Collective on MPI_Comm
1010 
1011    Input Parameters:
1012 +  comm - MPI communicator
1013 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1014 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1015 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1016 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1017 -  data - optional location of matrix data.  Set data=PETSC_NULL for PETSc
1018    to control all matrix memory allocation.
1019 
1020    Output Parameter:
1021 .  A - the matrix
1022 
1023    Notes:
1024    The dense format is fully compatible with standard Fortran 77
1025    storage by columns.
1026 
1027    The data input variable is intended primarily for Fortran programmers
1028    who wish to allocate their own matrix memory space.  Most users should
1029    set data=PETSC_NULL.
1030 
1031    The user MUST specify either the local or global matrix dimensions
1032    (possibly both).
1033 
1034    Level: intermediate
1035 
1036 .keywords: matrix,dense, parallel
1037 
1038 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1039 @*/
1040 int MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,Scalar *data,Mat *A)
1041 {
1042   Mat          mat;
1043   Mat_MPIDense *a;
1044   int          ierr,i;
1045   PetscTruth   flg;
1046 
1047   PetscFunctionBegin;
1048   /* Note:  For now, when data is specified above, this assumes the user correctly
1049    allocates the local dense storage space.  We should add error checking. */
1050 
1051   *A = 0;
1052   PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIDENSE,"Mat",comm,MatDestroy,MatView);
1053   PLogObjectCreate(mat);
1054   mat->data         = (void*)(a = PetscNew(Mat_MPIDense));CHKPTRQ(a);
1055   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1056   mat->ops->destroy = MatDestroy_MPIDense;
1057   mat->ops->view    = MatView_MPIDense;
1058   mat->factor       = 0;
1059   mat->mapping      = 0;
1060 
1061   a->factor       = 0;
1062   mat->insertmode = NOT_SET_VALUES;
1063   ierr = MPI_Comm_rank(comm,&a->rank);CHKERRQ(ierr);
1064   ierr = MPI_Comm_size(comm,&a->size);CHKERRQ(ierr);
1065 
1066   ierr = PetscSplitOwnership(comm,&m,&M);CHKERRQ(ierr);
1067 
1068   ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
1069   a->nvec = n;
1070 
1071   /* each row stores all columns */
1072   a->N = mat->N = N;
1073   a->M = mat->M = M;
1074   a->m = mat->m = m;
1075   a->n = mat->n = N;   /* NOTE: n == N */
1076 
1077   /* the information in the maps duplicates the information computed below, eventually
1078      we should remove the duplicate information that is not contained in the maps */
1079   ierr = MapCreateMPI(comm,m,M,&mat->rmap);CHKERRQ(ierr);
1080   ierr = MapCreateMPI(comm,n,N,&mat->cmap);CHKERRQ(ierr);
1081 
1082   /* build local table of row and column ownerships */
1083   a->rowners = (int*)PetscMalloc(2*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners);
1084   a->cowners = a->rowners + a->size + 1;
1085   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));
1086   ierr = MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1087   a->rowners[0] = 0;
1088   for (i=2; i<=a->size; i++) {
1089     a->rowners[i] += a->rowners[i-1];
1090   }
1091   a->rstart = a->rowners[a->rank];
1092   a->rend   = a->rowners[a->rank+1];
1093   ierr      = MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1094   a->cowners[0] = 0;
1095   for (i=2; i<=a->size; i++) {
1096     a->cowners[i] += a->cowners[i-1];
1097   }
1098 
1099   ierr = MatCreateSeqDense(PETSC_COMM_SELF,m,N,data,&a->A);CHKERRQ(ierr);
1100   PLogObjectParent(mat,a->A);
1101 
1102   /* build cache for off array entries formed */
1103   a->donotstash = 0;
1104   ierr = MatStashCreate_Private(comm,1,&mat->stash);CHKERRQ(ierr);
1105 
1106   /* stuff used for matrix vector multiply */
1107   a->lvec        = 0;
1108   a->Mvctx       = 0;
1109   a->roworiented = 1;
1110 
1111   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C",
1112                                      "MatGetDiagonalBlock_MPIDense",
1113                                      (void*)MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr);
1114 
1115   *A = mat;
1116   ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr);
1117   if (flg) {
1118     ierr = MatPrintHelp(mat);CHKERRQ(ierr);
1119   }
1120   PetscFunctionReturn(0);
1121 }
1122 
1123 #undef __FUNC__
1124 #define __FUNC__ "MatDuplicate_MPIDense"
1125 static int MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1126 {
1127   Mat          mat;
1128   Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data;
1129   int          ierr;
1130   FactorCtx    *factor;
1131 
1132   PetscFunctionBegin;
1133   *newmat       = 0;
1134   PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIDENSE,"Mat",A->comm,MatDestroy,MatView);
1135   PLogObjectCreate(mat);
1136   mat->data         = (void*)(a = PetscNew(Mat_MPIDense));CHKPTRQ(a);
1137   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1138   mat->ops->destroy = MatDestroy_MPIDense;
1139   mat->ops->view    = MatView_MPIDense;
1140   mat->factor       = A->factor;
1141   mat->assembled    = PETSC_TRUE;
1142 
1143   a->m = mat->m = oldmat->m;
1144   a->n = mat->n = oldmat->n;
1145   a->M = mat->M = oldmat->M;
1146   a->N = mat->N = oldmat->N;
1147   if (oldmat->factor) {
1148     a->factor = (FactorCtx*)(factor = PetscNew(FactorCtx));CHKPTRQ(factor);
1149     /* copy factor contents ... add this code! */
1150   } else a->factor = 0;
1151 
1152   a->rstart       = oldmat->rstart;
1153   a->rend         = oldmat->rend;
1154   a->size         = oldmat->size;
1155   a->rank         = oldmat->rank;
1156   mat->insertmode = NOT_SET_VALUES;
1157   a->donotstash   = oldmat->donotstash;
1158   a->rowners = (int*)PetscMalloc((a->size+1)*sizeof(int));CHKPTRQ(a->rowners);
1159   PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));
1160   ierr = PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));CHKERRQ(ierr);
1161   ierr = MatStashCreate_Private(A->comm,1,&mat->stash);CHKERRQ(ierr);
1162 
1163   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
1164   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1165   PLogObjectParent(mat,a->A);
1166   *newmat = mat;
1167   PetscFunctionReturn(0);
1168 }
1169 
1170 #include "sys.h"
1171 
1172 #undef __FUNC__
1173 #define __FUNC__ "MatLoad_MPIDense_DenseInFile"
1174 int MatLoad_MPIDense_DenseInFile(MPI_Comm comm,int fd,int M,int N,Mat *newmat)
1175 {
1176   int        *rowners,i,size,rank,m,ierr,nz,j;
1177   Scalar     *array,*vals,*vals_ptr;
1178   MPI_Status status;
1179 
1180   PetscFunctionBegin;
1181   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1182   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1183 
1184   /* determine ownership of all rows */
1185   m          = M/size + ((M % size) > rank);
1186   rowners    = (int*)PetscMalloc((size+2)*sizeof(int));CHKPTRQ(rowners);
1187   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1188   rowners[0] = 0;
1189   for (i=2; i<=size; i++) {
1190     rowners[i] += rowners[i-1];
1191   }
1192 
1193   ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr);
1194   ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr);
1195 
1196   if (!rank) {
1197     vals = (Scalar*)PetscMalloc(m*N*sizeof(Scalar));CHKPTRQ(vals);
1198 
1199     /* read in my part of the matrix numerical values  */
1200     ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr);
1201 
1202     /* insert into matrix-by row (this is why cannot directly read into array */
1203     vals_ptr = vals;
1204     for (i=0; i<m; i++) {
1205       for (j=0; j<N; j++) {
1206         array[i + j*m] = *vals_ptr++;
1207       }
1208     }
1209 
1210     /* read in other processors and ship out */
1211     for (i=1; i<size; i++) {
1212       nz   = (rowners[i+1] - rowners[i])*N;
1213       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1214       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm);CHKERRQ(ierr);
1215     }
1216   } else {
1217     /* receive numeric values */
1218     vals = (Scalar*)PetscMalloc(m*N*sizeof(Scalar));CHKPTRQ(vals);
1219 
1220     /* receive message of values*/
1221     ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status);CHKERRQ(ierr);
1222 
1223     /* insert into matrix-by row (this is why cannot directly read into array */
1224     vals_ptr = vals;
1225     for (i=0; i<m; i++) {
1226       for (j=0; j<N; j++) {
1227         array[i + j*m] = *vals_ptr++;
1228       }
1229     }
1230   }
1231   ierr = PetscFree(rowners);CHKERRQ(ierr);
1232   ierr = PetscFree(vals);CHKERRQ(ierr);
1233   ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1234   ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1235   PetscFunctionReturn(0);
1236 }
1237 
1238 
1239 #undef __FUNC__
1240 #define __FUNC__ "MatLoad_MPIDense"
1241 int MatLoad_MPIDense(Viewer viewer,MatType type,Mat *newmat)
1242 {
1243   Mat          A;
1244   Scalar       *vals,*svals;
1245   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1246   MPI_Status   status;
1247   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1248   int          *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1249   int          tag = ((PetscObject)viewer)->tag;
1250   int          i,nz,ierr,j,rstart,rend,fd;
1251 
1252   PetscFunctionBegin;
1253   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1254   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1255   if (!rank) {
1256     ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1257     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1258     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object");
1259   }
1260 
1261   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1262   M = header[1]; N = header[2]; nz = header[3];
1263 
1264   /*
1265        Handle case where matrix is stored on disk as a dense matrix
1266   */
1267   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1268     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);CHKERRQ(ierr);
1269     PetscFunctionReturn(0);
1270   }
1271 
1272   /* determine ownership of all rows */
1273   m          = M/size + ((M % size) > rank);
1274   rowners    = (int*)PetscMalloc((size+2)*sizeof(int));CHKPTRQ(rowners);
1275   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1276   rowners[0] = 0;
1277   for (i=2; i<=size; i++) {
1278     rowners[i] += rowners[i-1];
1279   }
1280   rstart = rowners[rank];
1281   rend   = rowners[rank+1];
1282 
1283   /* distribute row lengths to all processors */
1284   ourlens = (int*)PetscMalloc(2*(rend-rstart)*sizeof(int));CHKPTRQ(ourlens);
1285   offlens = ourlens + (rend-rstart);
1286   if (!rank) {
1287     rowlengths = (int*)PetscMalloc(M*sizeof(int));CHKPTRQ(rowlengths);
1288     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1289     sndcounts = (int*)PetscMalloc(size*sizeof(int));CHKPTRQ(sndcounts);
1290     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1291     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1292     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1293   } else {
1294     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1295   }
1296 
1297   if (!rank) {
1298     /* calculate the number of nonzeros on each processor */
1299     procsnz = (int*)PetscMalloc(size*sizeof(int));CHKPTRQ(procsnz);
1300     ierr    = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
1301     for (i=0; i<size; i++) {
1302       for (j=rowners[i]; j< rowners[i+1]; j++) {
1303         procsnz[i] += rowlengths[j];
1304       }
1305     }
1306     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1307 
1308     /* determine max buffer needed and allocate it */
1309     maxnz = 0;
1310     for (i=0; i<size; i++) {
1311       maxnz = PetscMax(maxnz,procsnz[i]);
1312     }
1313     cols = (int*)PetscMalloc(maxnz*sizeof(int));CHKPTRQ(cols);
1314 
1315     /* read in my part of the matrix column indices  */
1316     nz = procsnz[0];
1317     mycols = (int*)PetscMalloc(nz*sizeof(int));CHKPTRQ(mycols);
1318     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1319 
1320     /* read in every one elses and ship off */
1321     for (i=1; i<size; i++) {
1322       nz   = procsnz[i];
1323       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1324       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
1325     }
1326     ierr = PetscFree(cols);CHKERRQ(ierr);
1327   } else {
1328     /* determine buffer space needed for message */
1329     nz = 0;
1330     for (i=0; i<m; i++) {
1331       nz += ourlens[i];
1332     }
1333     mycols = (int*)PetscMalloc(nz*sizeof(int));CHKPTRQ(mycols);
1334 
1335     /* receive message of column indices*/
1336     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
1337     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
1338     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
1339   }
1340 
1341   /* loop over local rows, determining number of off diagonal entries */
1342   ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr);
1343   jj = 0;
1344   for (i=0; i<m; i++) {
1345     for (j=0; j<ourlens[i]; j++) {
1346       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1347       jj++;
1348     }
1349   }
1350 
1351   /* create our matrix */
1352   for (i=0; i<m; i++) {
1353     ourlens[i] -= offlens[i];
1354   }
1355   ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr);
1356   A = *newmat;
1357   for (i=0; i<m; i++) {
1358     ourlens[i] += offlens[i];
1359   }
1360 
1361   if (!rank) {
1362     vals = (Scalar*)PetscMalloc(maxnz*sizeof(Scalar));CHKPTRQ(vals);
1363 
1364     /* read in my part of the matrix numerical values  */
1365     nz = procsnz[0];
1366     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1367 
1368     /* insert into matrix */
1369     jj      = rstart;
1370     smycols = mycols;
1371     svals   = vals;
1372     for (i=0; i<m; i++) {
1373       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1374       smycols += ourlens[i];
1375       svals   += ourlens[i];
1376       jj++;
1377     }
1378 
1379     /* read in other processors and ship out */
1380     for (i=1; i<size; i++) {
1381       nz   = procsnz[i];
1382       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1383       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
1384     }
1385     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1386   } else {
1387     /* receive numeric values */
1388     vals = (Scalar*)PetscMalloc(nz*sizeof(Scalar));CHKPTRQ(vals);
1389 
1390     /* receive message of values*/
1391     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
1392     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1393     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
1394 
1395     /* insert into matrix */
1396     jj      = rstart;
1397     smycols = mycols;
1398     svals   = vals;
1399     for (i=0; i<m; i++) {
1400       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1401       smycols += ourlens[i];
1402       svals   += ourlens[i];
1403       jj++;
1404     }
1405   }
1406   ierr = PetscFree(ourlens);CHKERRQ(ierr);
1407   ierr = PetscFree(vals);CHKERRQ(ierr);
1408   ierr = PetscFree(mycols);CHKERRQ(ierr);
1409   ierr = PetscFree(rowners);CHKERRQ(ierr);
1410 
1411   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1412   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1413   PetscFunctionReturn(0);
1414 }
1415 
1416 
1417 
1418 
1419 
1420