xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision dfbe8321245cdf9338262824a41580dace4e4143)
1 /*
2    Basic functions for basic parallel dense matrices.
3 */
4 
5 #include "src/mat/impls/dense/mpi/mpidense.h"
6 
7 EXTERN_C_BEGIN
8 #undef __FUNCT__
9 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense"
10 PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B)
11 {
12   Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
13   int          m = A->m,rstart = mdn->rstart,ierr;
14   PetscScalar  *array;
15   MPI_Comm     comm;
16 
17   PetscFunctionBegin;
18   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Only square matrices supported.");
19 
20   /* The reuse aspect is not implemented efficiently */
21   if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);}
22 
23   ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
24   ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr);
25   ierr = MatCreate(comm,m,m,m,m,B);CHKERRQ(ierr);
26   ierr = MatSetType(*B,mdn->A->type_name);CHKERRQ(ierr);
27   ierr = MatSeqDenseSetPreallocation(*B,array+m*rstart);CHKERRQ(ierr);
28   ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr);
29   ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
30   ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
31 
32   *iscopy = PETSC_TRUE;
33   PetscFunctionReturn(0);
34 }
35 EXTERN_C_END
36 
37 #undef __FUNCT__
38 #define __FUNCT__ "MatSetValues_MPIDense"
39 PetscErrorCode MatSetValues_MPIDense(Mat mat,int m,const int idxm[],int n,const int idxn[],const PetscScalar v[],InsertMode addv)
40 {
41   Mat_MPIDense *A = (Mat_MPIDense*)mat->data;
42   PetscErrorCode ierr;
43   int          i,j,rstart = A->rstart,rend = A->rend,row;
44   PetscTruth   roworiented = A->roworiented;
45 
46   PetscFunctionBegin;
47   for (i=0; i<m; i++) {
48     if (idxm[i] < 0) continue;
49     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
50     if (idxm[i] >= rstart && idxm[i] < rend) {
51       row = idxm[i] - rstart;
52       if (roworiented) {
53         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
54       } else {
55         for (j=0; j<n; j++) {
56           if (idxn[j] < 0) continue;
57           if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
58           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
59         }
60       }
61     } else {
62       if (!A->donotstash) {
63         if (roworiented) {
64           ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr);
65         } else {
66           ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr);
67         }
68       }
69     }
70   }
71   PetscFunctionReturn(0);
72 }
73 
74 #undef __FUNCT__
75 #define __FUNCT__ "MatGetValues_MPIDense"
76 PetscErrorCode MatGetValues_MPIDense(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[])
77 {
78   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
79   PetscErrorCode ierr;
80   int          i,j,rstart = mdn->rstart,rend = mdn->rend,row;
81 
82   PetscFunctionBegin;
83   for (i=0; i<m; i++) {
84     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
85     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
86     if (idxm[i] >= rstart && idxm[i] < rend) {
87       row = idxm[i] - rstart;
88       for (j=0; j<n; j++) {
89         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
90         if (idxn[j] >= mat->N) {
91           SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
92         }
93         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
94       }
95     } else {
96       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
97     }
98   }
99   PetscFunctionReturn(0);
100 }
101 
102 #undef __FUNCT__
103 #define __FUNCT__ "MatGetArray_MPIDense"
104 PetscErrorCode MatGetArray_MPIDense(Mat A,PetscScalar *array[])
105 {
106   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
107   PetscErrorCode ierr;
108 
109   PetscFunctionBegin;
110   ierr = MatGetArray(a->A,array);CHKERRQ(ierr);
111   PetscFunctionReturn(0);
112 }
113 
114 #undef __FUNCT__
115 #define __FUNCT__ "MatGetSubMatrix_MPIDense"
116 static int MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B)
117 {
118   Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd;
119   Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data;
120   int          i,j,ierr,*irow,*icol,rstart,rend,nrows,ncols,nlrows,nlcols;
121   PetscScalar  *av,*bv,*v = lmat->v;
122   Mat          newmat;
123 
124   PetscFunctionBegin;
125   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
126   ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr);
127   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
128   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
129 
130   /* No parallel redistribution currently supported! Should really check each index set
131      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
132      original matrix! */
133 
134   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
135   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
136 
137   /* Check submatrix call */
138   if (scall == MAT_REUSE_MATRIX) {
139     /* SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
140     /* Really need to test rows and column sizes! */
141     newmat = *B;
142   } else {
143     /* Create and fill new matrix */
144     ierr = MatCreate(A->comm,nrows,cs,PETSC_DECIDE,ncols,&newmat);CHKERRQ(ierr);
145     ierr = MatSetType(newmat,A->type_name);CHKERRQ(ierr);
146     ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr);
147   }
148 
149   /* Now extract the data pointers and do the copy, column at a time */
150   newmatd = (Mat_MPIDense*)newmat->data;
151   bv      = ((Mat_SeqDense *)newmatd->A->data)->v;
152 
153   for (i=0; i<ncols; i++) {
154     av = v + nlrows*icol[i];
155     for (j=0; j<nrows; j++) {
156       *bv++ = av[irow[j] - rstart];
157     }
158   }
159 
160   /* Assemble the matrices so that the correct flags are set */
161   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
162   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
163 
164   /* Free work space */
165   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
166   ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr);
167   *B = newmat;
168   PetscFunctionReturn(0);
169 }
170 
171 #undef __FUNCT__
172 #define __FUNCT__ "MatRestoreArray_MPIDense"
173 PetscErrorCode MatRestoreArray_MPIDense(Mat A,PetscScalar *array[])
174 {
175   PetscFunctionBegin;
176   PetscFunctionReturn(0);
177 }
178 
179 #undef __FUNCT__
180 #define __FUNCT__ "MatAssemblyBegin_MPIDense"
181 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
182 {
183   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
184   MPI_Comm     comm = mat->comm;
185   PetscErrorCode ierr;
186   int          nstash,reallocs;
187   InsertMode   addv;
188 
189   PetscFunctionBegin;
190   /* make sure all processors are either in INSERTMODE or ADDMODE */
191   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr);
192   if (addv == (ADD_VALUES|INSERT_VALUES)) {
193     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
194   }
195   mat->insertmode = addv; /* in case this processor had no cache */
196 
197   ierr = MatStashScatterBegin_Private(&mat->stash,mdn->rowners);CHKERRQ(ierr);
198   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
199   PetscLogInfo(mdn->A,"MatAssemblyBegin_MPIDense:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
200   PetscFunctionReturn(0);
201 }
202 
203 #undef __FUNCT__
204 #define __FUNCT__ "MatAssemblyEnd_MPIDense"
205 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
206 {
207   Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data;
208   int          i,n,ierr,*row,*col,flg,j,rstart,ncols;
209   PetscScalar  *val;
210   InsertMode   addv=mat->insertmode;
211 
212   PetscFunctionBegin;
213   /*  wait on receives */
214   while (1) {
215     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
216     if (!flg) break;
217 
218     for (i=0; i<n;) {
219       /* Now identify the consecutive vals belonging to the same row */
220       for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
221       if (j < n) ncols = j-i;
222       else       ncols = n-i;
223       /* Now assemble all these values with a single function call */
224       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
225       i = j;
226     }
227   }
228   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
229 
230   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
231   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
232 
233   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
234     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
235   }
236   PetscFunctionReturn(0);
237 }
238 
239 #undef __FUNCT__
240 #define __FUNCT__ "MatZeroEntries_MPIDense"
241 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
242 {
243   PetscErrorCode ierr;
244   Mat_MPIDense *l = (Mat_MPIDense*)A->data;
245 
246   PetscFunctionBegin;
247   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
248   PetscFunctionReturn(0);
249 }
250 
251 #undef __FUNCT__
252 #define __FUNCT__ "MatGetBlockSize_MPIDense"
253 PetscErrorCode MatGetBlockSize_MPIDense(Mat A,int *bs)
254 {
255   PetscFunctionBegin;
256   *bs = 1;
257   PetscFunctionReturn(0);
258 }
259 
260 /* the code does not do the diagonal entries correctly unless the
261    matrix is square and the column and row owerships are identical.
262    This is a BUG. The only way to fix it seems to be to access
263    mdn->A and mdn->B directly and not through the MatZeroRows()
264    routine.
265 */
266 #undef __FUNCT__
267 #define __FUNCT__ "MatZeroRows_MPIDense"
268 PetscErrorCode MatZeroRows_MPIDense(Mat A,IS is,const PetscScalar *diag)
269 {
270   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
271   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
272   int            *nprocs,j,idx,nsends;
273   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
274   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
275   int            *lens,imdex,*lrows,*values;
276   MPI_Comm       comm = A->comm;
277   MPI_Request    *send_waits,*recv_waits;
278   MPI_Status     recv_status,*send_status;
279   IS             istmp;
280   PetscTruth     found;
281 
282   PetscFunctionBegin;
283   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
284   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
285 
286   /*  first count number of contributors to each processor */
287   ierr  = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr);
288   ierr  = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
289   ierr  = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/
290   for (i=0; i<N; i++) {
291     idx = rows[i];
292     found = PETSC_FALSE;
293     for (j=0; j<size; j++) {
294       if (idx >= owners[j] && idx < owners[j+1]) {
295         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
296       }
297     }
298     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
299   }
300   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
301 
302   /* inform other processors of number of messages and max length*/
303   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
304 
305   /* post receives:   */
306   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr);
307   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
308   for (i=0; i<nrecvs; i++) {
309     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
310   }
311 
312   /* do sends:
313       1) starts[i] gives the starting index in svalues for stuff going to
314          the ith processor
315   */
316   ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr);
317   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
318   ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr);
319   starts[0]  = 0;
320   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
321   for (i=0; i<N; i++) {
322     svalues[starts[owner[i]]++] = rows[i];
323   }
324   ISRestoreIndices(is,&rows);
325 
326   starts[0] = 0;
327   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
328   count = 0;
329   for (i=0; i<size; i++) {
330     if (nprocs[2*i+1]) {
331       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
332     }
333   }
334   ierr = PetscFree(starts);CHKERRQ(ierr);
335 
336   base = owners[rank];
337 
338   /*  wait on receives */
339   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr);
340   source = lens + nrecvs;
341   count  = nrecvs; slen = 0;
342   while (count) {
343     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
344     /* unpack receives into our local space */
345     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
346     source[imdex]  = recv_status.MPI_SOURCE;
347     lens[imdex]  = n;
348     slen += n;
349     count--;
350   }
351   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
352 
353   /* move the data into the send scatter */
354   ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr);
355   count = 0;
356   for (i=0; i<nrecvs; i++) {
357     values = rvalues + i*nmax;
358     for (j=0; j<lens[i]; j++) {
359       lrows[count++] = values[j] - base;
360     }
361   }
362   ierr = PetscFree(rvalues);CHKERRQ(ierr);
363   ierr = PetscFree(lens);CHKERRQ(ierr);
364   ierr = PetscFree(owner);CHKERRQ(ierr);
365   ierr = PetscFree(nprocs);CHKERRQ(ierr);
366 
367   /* actually zap the local rows */
368   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
369   PetscLogObjectParent(A,istmp);
370   ierr = PetscFree(lrows);CHKERRQ(ierr);
371   ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
372   ierr = ISDestroy(istmp);CHKERRQ(ierr);
373 
374   /* wait on sends */
375   if (nsends) {
376     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
377     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
378     ierr = PetscFree(send_status);CHKERRQ(ierr);
379   }
380   ierr = PetscFree(send_waits);CHKERRQ(ierr);
381   ierr = PetscFree(svalues);CHKERRQ(ierr);
382 
383   PetscFunctionReturn(0);
384 }
385 
386 #undef __FUNCT__
387 #define __FUNCT__ "MatMult_MPIDense"
388 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
389 {
390   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
391   PetscErrorCode ierr;
392 
393   PetscFunctionBegin;
394   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
395   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
396   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
397   PetscFunctionReturn(0);
398 }
399 
400 #undef __FUNCT__
401 #define __FUNCT__ "MatMultAdd_MPIDense"
402 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
403 {
404   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
405   PetscErrorCode ierr;
406 
407   PetscFunctionBegin;
408   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
409   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
410   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
411   PetscFunctionReturn(0);
412 }
413 
414 #undef __FUNCT__
415 #define __FUNCT__ "MatMultTranspose_MPIDense"
416 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
417 {
418   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
419   PetscErrorCode ierr;
420   PetscScalar  zero = 0.0;
421 
422   PetscFunctionBegin;
423   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
424   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
425   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
426   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
427   PetscFunctionReturn(0);
428 }
429 
430 #undef __FUNCT__
431 #define __FUNCT__ "MatMultTransposeAdd_MPIDense"
432 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
433 {
434   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
435   PetscErrorCode ierr;
436 
437   PetscFunctionBegin;
438   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
439   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
440   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
441   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
442   PetscFunctionReturn(0);
443 }
444 
445 #undef __FUNCT__
446 #define __FUNCT__ "MatGetDiagonal_MPIDense"
447 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
448 {
449   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
450   Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data;
451   PetscErrorCode ierr;
452   int          len,i,n,m = A->m,radd;
453   PetscScalar  *x,zero = 0.0;
454 
455   PetscFunctionBegin;
456   ierr = VecSet(&zero,v);CHKERRQ(ierr);
457   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
458   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
459   if (n != A->M) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
460   len  = PetscMin(a->A->m,a->A->n);
461   radd = a->rstart*m;
462   for (i=0; i<len; i++) {
463     x[i] = aloc->v[radd + i*m + i];
464   }
465   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
466   PetscFunctionReturn(0);
467 }
468 
469 #undef __FUNCT__
470 #define __FUNCT__ "MatDestroy_MPIDense"
471 PetscErrorCode MatDestroy_MPIDense(Mat mat)
472 {
473   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
474   PetscErrorCode ierr;
475 
476   PetscFunctionBegin;
477 
478 #if defined(PETSC_USE_LOG)
479   PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N);
480 #endif
481   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
482   ierr = PetscFree(mdn->rowners);CHKERRQ(ierr);
483   ierr = MatDestroy(mdn->A);CHKERRQ(ierr);
484   if (mdn->lvec)   VecDestroy(mdn->lvec);
485   if (mdn->Mvctx)  VecScatterDestroy(mdn->Mvctx);
486   if (mdn->factor) {
487     if (mdn->factor->temp)   {ierr = PetscFree(mdn->factor->temp);CHKERRQ(ierr);}
488     if (mdn->factor->tag)    {ierr = PetscFree(mdn->factor->tag);CHKERRQ(ierr);}
489     if (mdn->factor->pivots) {ierr = PetscFree(mdn->factor->pivots);CHKERRQ(ierr);}
490     ierr = PetscFree(mdn->factor);CHKERRQ(ierr);
491   }
492   ierr = PetscFree(mdn);CHKERRQ(ierr);
493   PetscFunctionReturn(0);
494 }
495 
496 #undef __FUNCT__
497 #define __FUNCT__ "MatView_MPIDense_Binary"
498 static int MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
499 {
500   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
501   PetscErrorCode ierr;
502 
503   PetscFunctionBegin;
504   if (mdn->size == 1) {
505     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
506   }
507   else SETERRQ(PETSC_ERR_SUP,"Only uniprocessor output supported");
508   PetscFunctionReturn(0);
509 }
510 
511 #undef __FUNCT__
512 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket"
513 static int MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
514 {
515   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
516   PetscErrorCode ierr;
517   int               size = mdn->size,rank = mdn->rank;
518   PetscViewerType   vtype;
519   PetscTruth        iascii,isdraw;
520   PetscViewer       sviewer;
521   PetscViewerFormat format;
522 
523   PetscFunctionBegin;
524   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
525   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
526   if (iascii) {
527     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
528     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
529     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
530       MatInfo info;
531       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
532       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %d nz %d nz alloced %d mem %d \n",rank,mat->m,
533                    (int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
534       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
535       ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
536       PetscFunctionReturn(0);
537     } else if (format == PETSC_VIEWER_ASCII_INFO) {
538       PetscFunctionReturn(0);
539     }
540   } else if (isdraw) {
541     PetscDraw       draw;
542     PetscTruth isnull;
543 
544     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
545     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
546     if (isnull) PetscFunctionReturn(0);
547   }
548 
549   if (size == 1) {
550     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
551   } else {
552     /* assemble the entire matrix onto first processor. */
553     Mat               A;
554     int               M = mat->M,N = mat->N,m,row,i,nz;
555     const int         *cols;
556     const PetscScalar *vals;
557 
558     if (!rank) {
559       ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr);
560     } else {
561       ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr);
562     }
563     /* Since this is a temporary matrix, MATMPIDENSE instead of A->type_name here is probably acceptable. */
564     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
565     ierr = MatMPIDenseSetPreallocation(A,PETSC_NULL);
566     PetscLogObjectParent(mat,A);
567 
568     /* Copy the matrix ... This isn't the most efficient means,
569        but it's quick for now */
570     row = mdn->rstart; m = mdn->A->m;
571     for (i=0; i<m; i++) {
572       ierr = MatGetRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
573       ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
574       ierr = MatRestoreRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
575       row++;
576     }
577 
578     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
579     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
580     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
581     if (!rank) {
582       ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
583     }
584     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
585     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
586     ierr = MatDestroy(A);CHKERRQ(ierr);
587   }
588   PetscFunctionReturn(0);
589 }
590 
591 #undef __FUNCT__
592 #define __FUNCT__ "MatView_MPIDense"
593 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
594 {
595   PetscErrorCode ierr;
596   PetscTruth iascii,isbinary,isdraw,issocket;
597 
598   PetscFunctionBegin;
599 
600   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
601   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
602   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
603   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
604 
605   if (iascii || issocket || isdraw) {
606     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
607   } else if (isbinary) {
608     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
609   } else {
610     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name);
611   }
612   PetscFunctionReturn(0);
613 }
614 
615 #undef __FUNCT__
616 #define __FUNCT__ "MatGetInfo_MPIDense"
617 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
618 {
619   Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
620   Mat          mdn = mat->A;
621   PetscErrorCode ierr;
622   PetscReal    isend[5],irecv[5];
623 
624   PetscFunctionBegin;
625   info->rows_global    = (double)A->M;
626   info->columns_global = (double)A->N;
627   info->rows_local     = (double)A->m;
628   info->columns_local  = (double)A->N;
629   info->block_size     = 1.0;
630   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
631   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
632   isend[3] = info->memory;  isend[4] = info->mallocs;
633   if (flag == MAT_LOCAL) {
634     info->nz_used      = isend[0];
635     info->nz_allocated = isend[1];
636     info->nz_unneeded  = isend[2];
637     info->memory       = isend[3];
638     info->mallocs      = isend[4];
639   } else if (flag == MAT_GLOBAL_MAX) {
640     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,A->comm);CHKERRQ(ierr);
641     info->nz_used      = irecv[0];
642     info->nz_allocated = irecv[1];
643     info->nz_unneeded  = irecv[2];
644     info->memory       = irecv[3];
645     info->mallocs      = irecv[4];
646   } else if (flag == MAT_GLOBAL_SUM) {
647     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr);
648     info->nz_used      = irecv[0];
649     info->nz_allocated = irecv[1];
650     info->nz_unneeded  = irecv[2];
651     info->memory       = irecv[3];
652     info->mallocs      = irecv[4];
653   }
654   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
655   info->fill_ratio_needed = 0;
656   info->factor_mallocs    = 0;
657   PetscFunctionReturn(0);
658 }
659 
660 #undef __FUNCT__
661 #define __FUNCT__ "MatSetOption_MPIDense"
662 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op)
663 {
664   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
665   PetscErrorCode ierr;
666 
667   PetscFunctionBegin;
668   switch (op) {
669   case MAT_NO_NEW_NONZERO_LOCATIONS:
670   case MAT_YES_NEW_NONZERO_LOCATIONS:
671   case MAT_NEW_NONZERO_LOCATION_ERR:
672   case MAT_NEW_NONZERO_ALLOCATION_ERR:
673   case MAT_COLUMNS_SORTED:
674   case MAT_COLUMNS_UNSORTED:
675     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
676     break;
677   case MAT_ROW_ORIENTED:
678     a->roworiented = PETSC_TRUE;
679     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
680     break;
681   case MAT_ROWS_SORTED:
682   case MAT_ROWS_UNSORTED:
683   case MAT_YES_NEW_DIAGONALS:
684   case MAT_USE_HASH_TABLE:
685     PetscLogInfo(A,"MatSetOption_MPIDense:Option ignored\n");
686     break;
687   case MAT_COLUMN_ORIENTED:
688     a->roworiented = PETSC_FALSE;
689     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
690     break;
691   case MAT_IGNORE_OFF_PROC_ENTRIES:
692     a->donotstash = PETSC_TRUE;
693     break;
694   case MAT_NO_NEW_DIAGONALS:
695     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
696   case MAT_SYMMETRIC:
697   case MAT_STRUCTURALLY_SYMMETRIC:
698   case MAT_NOT_SYMMETRIC:
699   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
700   case MAT_HERMITIAN:
701   case MAT_NOT_HERMITIAN:
702   case MAT_SYMMETRY_ETERNAL:
703   case MAT_NOT_SYMMETRY_ETERNAL:
704     break;
705   default:
706     SETERRQ(PETSC_ERR_SUP,"unknown option");
707   }
708   PetscFunctionReturn(0);
709 }
710 
711 #undef __FUNCT__
712 #define __FUNCT__ "MatGetRow_MPIDense"
713 PetscErrorCode MatGetRow_MPIDense(Mat A,int row,int *nz,int **idx,PetscScalar **v)
714 {
715   Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
716   int          lrow,rstart = mat->rstart,rend = mat->rend,ierr;
717 
718   PetscFunctionBegin;
719   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,"only local rows")
720   lrow = row - rstart;
721   ierr = MatGetRow(mat->A,lrow,nz,(const int **)idx,(const PetscScalar **)v);CHKERRQ(ierr);
722   PetscFunctionReturn(0);
723 }
724 
725 #undef __FUNCT__
726 #define __FUNCT__ "MatRestoreRow_MPIDense"
727 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
728 {
729   PetscErrorCode ierr;
730 
731   PetscFunctionBegin;
732   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
733   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
734   PetscFunctionReturn(0);
735 }
736 
737 #undef __FUNCT__
738 #define __FUNCT__ "MatDiagonalScale_MPIDense"
739 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
740 {
741   Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
742   Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data;
743   PetscScalar  *l,*r,x,*v;
744   PetscErrorCode ierr;
745   int          i,j,s2a,s3a,s2,s3,m=mdn->A->m,n=mdn->A->n;
746 
747   PetscFunctionBegin;
748   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
749   if (ll) {
750     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
751     if (s2a != s2) SETERRQ(PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size");
752     ierr = VecGetArray(ll,&l);CHKERRQ(ierr);
753     for (i=0; i<m; i++) {
754       x = l[i];
755       v = mat->v + i;
756       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
757     }
758     ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr);
759     PetscLogFlops(n*m);
760   }
761   if (rr) {
762     ierr = VecGetSize(rr,&s3a);CHKERRQ(ierr);
763     if (s3a != s3) SETERRQ(PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size");
764     ierr = VecScatterBegin(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
765     ierr = VecScatterEnd(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
766     ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
767     for (i=0; i<n; i++) {
768       x = r[i];
769       v = mat->v + i*m;
770       for (j=0; j<m; j++) { (*v++) *= x;}
771     }
772     ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr);
773     PetscLogFlops(n*m);
774   }
775   PetscFunctionReturn(0);
776 }
777 
778 #undef __FUNCT__
779 #define __FUNCT__ "MatNorm_MPIDense"
780 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
781 {
782   Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
783   Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data;
784   PetscErrorCode ierr;
785   int          i,j;
786   PetscReal    sum = 0.0;
787   PetscScalar  *v = mat->v;
788 
789   PetscFunctionBegin;
790   if (mdn->size == 1) {
791     ierr =  MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
792   } else {
793     if (type == NORM_FROBENIUS) {
794       for (i=0; i<mdn->A->n*mdn->A->m; i++) {
795 #if defined(PETSC_USE_COMPLEX)
796         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
797 #else
798         sum += (*v)*(*v); v++;
799 #endif
800       }
801       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr);
802       *nrm = sqrt(*nrm);
803       PetscLogFlops(2*mdn->A->n*mdn->A->m);
804     } else if (type == NORM_1) {
805       PetscReal *tmp,*tmp2;
806       ierr = PetscMalloc(2*A->N*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
807       tmp2 = tmp + A->N;
808       ierr = PetscMemzero(tmp,2*A->N*sizeof(PetscReal));CHKERRQ(ierr);
809       *nrm = 0.0;
810       v = mat->v;
811       for (j=0; j<mdn->A->n; j++) {
812         for (i=0; i<mdn->A->m; i++) {
813           tmp[j] += PetscAbsScalar(*v);  v++;
814         }
815       }
816       ierr = MPI_Allreduce(tmp,tmp2,A->N,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr);
817       for (j=0; j<A->N; j++) {
818         if (tmp2[j] > *nrm) *nrm = tmp2[j];
819       }
820       ierr = PetscFree(tmp);CHKERRQ(ierr);
821       PetscLogFlops(A->n*A->m);
822     } else if (type == NORM_INFINITY) { /* max row norm */
823       PetscReal ntemp;
824       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
825       ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPI_MAX,A->comm);CHKERRQ(ierr);
826     } else {
827       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
828     }
829   }
830   PetscFunctionReturn(0);
831 }
832 
833 #undef __FUNCT__
834 #define __FUNCT__ "MatTranspose_MPIDense"
835 PetscErrorCode MatTranspose_MPIDense(Mat A,Mat *matout)
836 {
837   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
838   Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data;
839   Mat          B;
840   int          M = A->M,N = A->N,m,n,*rwork,rstart = a->rstart;
841   int          j,i,ierr;
842   PetscScalar  *v;
843 
844   PetscFunctionBegin;
845   if (!matout && M != N) {
846     SETERRQ(PETSC_ERR_SUP,"Supports square matrix only in-place");
847   }
848   ierr = MatCreate(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,&B);CHKERRQ(ierr);
849   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
850   ierr = MatMPIDenseSetPreallocation(B,PETSC_NULL);CHKERRQ(ierr);
851 
852   m = a->A->m; n = a->A->n; v = Aloc->v;
853   ierr = PetscMalloc(n*sizeof(int),&rwork);CHKERRQ(ierr);
854   for (j=0; j<n; j++) {
855     for (i=0; i<m; i++) rwork[i] = rstart + i;
856     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
857     v   += m;
858   }
859   ierr = PetscFree(rwork);CHKERRQ(ierr);
860   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
861   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
862   if (matout) {
863     *matout = B;
864   } else {
865     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
866   }
867   PetscFunctionReturn(0);
868 }
869 
870 #include "petscblaslapack.h"
871 #undef __FUNCT__
872 #define __FUNCT__ "MatScale_MPIDense"
873 PetscErrorCode MatScale_MPIDense(const PetscScalar *alpha,Mat inA)
874 {
875   Mat_MPIDense *A = (Mat_MPIDense*)inA->data;
876   Mat_SeqDense *a = (Mat_SeqDense*)A->A->data;
877   int          one = 1,nz;
878 
879   PetscFunctionBegin;
880   nz = inA->m*inA->N;
881   BLscal_(&nz,(PetscScalar*)alpha,a->v,&one);
882   PetscLogFlops(nz);
883   PetscFunctionReturn(0);
884 }
885 
886 static int MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *);
887 
888 #undef __FUNCT__
889 #define __FUNCT__ "MatSetUpPreallocation_MPIDense"
890 PetscErrorCode MatSetUpPreallocation_MPIDense(Mat A)
891 {
892   PetscErrorCode ierr;
893 
894   PetscFunctionBegin;
895   ierr =  MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
896   PetscFunctionReturn(0);
897 }
898 
899 /* -------------------------------------------------------------------*/
900 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
901        MatGetRow_MPIDense,
902        MatRestoreRow_MPIDense,
903        MatMult_MPIDense,
904 /* 4*/ MatMultAdd_MPIDense,
905        MatMultTranspose_MPIDense,
906        MatMultTransposeAdd_MPIDense,
907        0,
908        0,
909        0,
910 /*10*/ 0,
911        0,
912        0,
913        0,
914        MatTranspose_MPIDense,
915 /*15*/ MatGetInfo_MPIDense,
916        0,
917        MatGetDiagonal_MPIDense,
918        MatDiagonalScale_MPIDense,
919        MatNorm_MPIDense,
920 /*20*/ MatAssemblyBegin_MPIDense,
921        MatAssemblyEnd_MPIDense,
922        0,
923        MatSetOption_MPIDense,
924        MatZeroEntries_MPIDense,
925 /*25*/ MatZeroRows_MPIDense,
926        0,
927        0,
928        0,
929        0,
930 /*30*/ MatSetUpPreallocation_MPIDense,
931        0,
932        0,
933        MatGetArray_MPIDense,
934        MatRestoreArray_MPIDense,
935 /*35*/ MatDuplicate_MPIDense,
936        0,
937        0,
938        0,
939        0,
940 /*40*/ 0,
941        MatGetSubMatrices_MPIDense,
942        0,
943        MatGetValues_MPIDense,
944        0,
945 /*45*/ 0,
946        MatScale_MPIDense,
947        0,
948        0,
949        0,
950 /*50*/ MatGetBlockSize_MPIDense,
951        0,
952        0,
953        0,
954        0,
955 /*55*/ 0,
956        0,
957        0,
958        0,
959        0,
960 /*60*/ MatGetSubMatrix_MPIDense,
961        MatDestroy_MPIDense,
962        MatView_MPIDense,
963        MatGetPetscMaps_Petsc,
964        0,
965 /*65*/ 0,
966        0,
967        0,
968        0,
969        0,
970 /*70*/ 0,
971        0,
972        0,
973        0,
974        0,
975 /*75*/ 0,
976        0,
977        0,
978        0,
979        0,
980 /*80*/ 0,
981        0,
982        0,
983        0,
984 /*85*/ MatLoad_MPIDense};
985 
986 EXTERN_C_BEGIN
987 #undef __FUNCT__
988 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense"
989 PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
990 {
991   Mat_MPIDense *a;
992   PetscErrorCode ierr;
993 
994   PetscFunctionBegin;
995   mat->preallocated = PETSC_TRUE;
996   /* Note:  For now, when data is specified above, this assumes the user correctly
997    allocates the local dense storage space.  We should add error checking. */
998 
999   a    = (Mat_MPIDense*)mat->data;
1000   ierr = MatCreate(PETSC_COMM_SELF,mat->m,mat->N,mat->m,mat->N,&a->A);CHKERRQ(ierr);
1001   ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr);
1002   ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr);
1003   PetscLogObjectParent(mat,a->A);
1004   PetscFunctionReturn(0);
1005 }
1006 EXTERN_C_END
1007 
1008 /*MC
1009    MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices.
1010 
1011    Options Database Keys:
1012 . -mat_type mpidense - sets the matrix type to "mpidense" during a call to MatSetFromOptions()
1013 
1014   Level: beginner
1015 
1016 .seealso: MatCreateMPIDense
1017 M*/
1018 
1019 EXTERN_C_BEGIN
1020 #undef __FUNCT__
1021 #define __FUNCT__ "MatCreate_MPIDense"
1022 PetscErrorCode MatCreate_MPIDense(Mat mat)
1023 {
1024   Mat_MPIDense *a;
1025   PetscErrorCode ierr;
1026   int          i;
1027 
1028   PetscFunctionBegin;
1029   ierr              = PetscNew(Mat_MPIDense,&a);CHKERRQ(ierr);
1030   mat->data         = (void*)a;
1031   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1032   mat->factor       = 0;
1033   mat->mapping      = 0;
1034 
1035   a->factor       = 0;
1036   mat->insertmode = NOT_SET_VALUES;
1037   ierr = MPI_Comm_rank(mat->comm,&a->rank);CHKERRQ(ierr);
1038   ierr = MPI_Comm_size(mat->comm,&a->size);CHKERRQ(ierr);
1039 
1040   ierr = PetscSplitOwnership(mat->comm,&mat->m,&mat->M);CHKERRQ(ierr);
1041   ierr = PetscSplitOwnership(mat->comm,&mat->n,&mat->N);CHKERRQ(ierr);
1042   a->nvec = mat->n;
1043 
1044   /* the information in the maps duplicates the information computed below, eventually
1045      we should remove the duplicate information that is not contained in the maps */
1046   ierr = PetscMapCreateMPI(mat->comm,mat->m,mat->M,&mat->rmap);CHKERRQ(ierr);
1047   ierr = PetscMapCreateMPI(mat->comm,mat->n,mat->N,&mat->cmap);CHKERRQ(ierr);
1048 
1049   /* build local table of row and column ownerships */
1050   ierr       = PetscMalloc(2*(a->size+2)*sizeof(int),&a->rowners);CHKERRQ(ierr);
1051   a->cowners = a->rowners + a->size + 1;
1052   PetscLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));
1053   ierr = MPI_Allgather(&mat->m,1,MPI_INT,a->rowners+1,1,MPI_INT,mat->comm);CHKERRQ(ierr);
1054   a->rowners[0] = 0;
1055   for (i=2; i<=a->size; i++) {
1056     a->rowners[i] += a->rowners[i-1];
1057   }
1058   a->rstart = a->rowners[a->rank];
1059   a->rend   = a->rowners[a->rank+1];
1060   ierr      = MPI_Allgather(&mat->n,1,MPI_INT,a->cowners+1,1,MPI_INT,mat->comm);CHKERRQ(ierr);
1061   a->cowners[0] = 0;
1062   for (i=2; i<=a->size; i++) {
1063     a->cowners[i] += a->cowners[i-1];
1064   }
1065 
1066   /* build cache for off array entries formed */
1067   a->donotstash = PETSC_FALSE;
1068   ierr = MatStashCreate_Private(mat->comm,1,&mat->stash);CHKERRQ(ierr);
1069 
1070   /* stuff used for matrix vector multiply */
1071   a->lvec        = 0;
1072   a->Mvctx       = 0;
1073   a->roworiented = PETSC_TRUE;
1074 
1075   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C",
1076                                      "MatGetDiagonalBlock_MPIDense",
1077                                      MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr);
1078   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C",
1079                                      "MatMPIDenseSetPreallocation_MPIDense",
1080                                      MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1081   PetscFunctionReturn(0);
1082 }
1083 EXTERN_C_END
1084 
1085 /*MC
1086    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1087 
1088    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1089    and MATMPIDENSE otherwise.
1090 
1091    Options Database Keys:
1092 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1093 
1094   Level: beginner
1095 
1096 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE
1097 M*/
1098 
1099 EXTERN_C_BEGIN
1100 #undef __FUNCT__
1101 #define __FUNCT__ "MatCreate_Dense"
1102 PetscErrorCode MatCreate_Dense(Mat A)
1103 {
1104   PetscErrorCode ierr,size;
1105 
1106   PetscFunctionBegin;
1107   ierr = PetscObjectChangeTypeName((PetscObject)A,MATDENSE);CHKERRQ(ierr);
1108   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
1109   if (size == 1) {
1110     ierr = MatSetType(A,MATSEQDENSE);CHKERRQ(ierr);
1111   } else {
1112     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
1113   }
1114   PetscFunctionReturn(0);
1115 }
1116 EXTERN_C_END
1117 
1118 #undef __FUNCT__
1119 #define __FUNCT__ "MatMPIDenseSetPreallocation"
1120 /*@C
1121    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1122 
1123    Not collective
1124 
1125    Input Parameters:
1126 .  A - the matrix
1127 -  data - optional location of matrix data.  Set data=PETSC_NULL for PETSc
1128    to control all matrix memory allocation.
1129 
1130    Notes:
1131    The dense format is fully compatible with standard Fortran 77
1132    storage by columns.
1133 
1134    The data input variable is intended primarily for Fortran programmers
1135    who wish to allocate their own matrix memory space.  Most users should
1136    set data=PETSC_NULL.
1137 
1138    Level: intermediate
1139 
1140 .keywords: matrix,dense, parallel
1141 
1142 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1143 @*/
1144 PetscErrorCode MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data)
1145 {
1146   PetscErrorCode ierr,(*f)(Mat,PetscScalar *);
1147 
1148   PetscFunctionBegin;
1149   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1150   if (f) {
1151     ierr = (*f)(mat,data);CHKERRQ(ierr);
1152   }
1153   PetscFunctionReturn(0);
1154 }
1155 
1156 #undef __FUNCT__
1157 #define __FUNCT__ "MatCreateMPIDense"
1158 /*@C
1159    MatCreateMPIDense - Creates a sparse parallel matrix in dense format.
1160 
1161    Collective on MPI_Comm
1162 
1163    Input Parameters:
1164 +  comm - MPI communicator
1165 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1166 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1167 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1168 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1169 -  data - optional location of matrix data.  Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1170    to control all matrix memory allocation.
1171 
1172    Output Parameter:
1173 .  A - the matrix
1174 
1175    Notes:
1176    The dense format is fully compatible with standard Fortran 77
1177    storage by columns.
1178 
1179    The data input variable is intended primarily for Fortran programmers
1180    who wish to allocate their own matrix memory space.  Most users should
1181    set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users).
1182 
1183    The user MUST specify either the local or global matrix dimensions
1184    (possibly both).
1185 
1186    Level: intermediate
1187 
1188 .keywords: matrix,dense, parallel
1189 
1190 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1191 @*/
1192 PetscErrorCode MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,PetscScalar *data,Mat *A)
1193 {
1194   PetscErrorCode ierr,size;
1195 
1196   PetscFunctionBegin;
1197   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
1198   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1199   if (size > 1) {
1200     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
1201     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1202   } else {
1203     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
1204     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1205   }
1206   PetscFunctionReturn(0);
1207 }
1208 
1209 #undef __FUNCT__
1210 #define __FUNCT__ "MatDuplicate_MPIDense"
1211 static int MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1212 {
1213   Mat          mat;
1214   Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data;
1215   PetscErrorCode ierr;
1216 
1217   PetscFunctionBegin;
1218   *newmat       = 0;
1219   ierr = MatCreate(A->comm,A->m,A->n,A->M,A->N,&mat);CHKERRQ(ierr);
1220   ierr = MatSetType(mat,A->type_name);CHKERRQ(ierr);
1221   ierr              = PetscNew(Mat_MPIDense,&a);CHKERRQ(ierr);
1222   mat->data         = (void*)a;
1223   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1224   mat->factor       = A->factor;
1225   mat->assembled    = PETSC_TRUE;
1226   mat->preallocated = PETSC_TRUE;
1227 
1228   a->rstart       = oldmat->rstart;
1229   a->rend         = oldmat->rend;
1230   a->size         = oldmat->size;
1231   a->rank         = oldmat->rank;
1232   mat->insertmode = NOT_SET_VALUES;
1233   a->nvec         = oldmat->nvec;
1234   a->donotstash   = oldmat->donotstash;
1235   ierr            = PetscMalloc((a->size+1)*sizeof(int),&a->rowners);CHKERRQ(ierr);
1236   PetscLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));
1237   ierr = PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));CHKERRQ(ierr);
1238   ierr = MatStashCreate_Private(A->comm,1,&mat->stash);CHKERRQ(ierr);
1239 
1240   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
1241   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1242   PetscLogObjectParent(mat,a->A);
1243   *newmat = mat;
1244   PetscFunctionReturn(0);
1245 }
1246 
1247 #include "petscsys.h"
1248 
1249 #undef __FUNCT__
1250 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile"
1251 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,int fd,int M,int N,const MatType type,Mat *newmat)
1252 {
1253   int          *rowners,i,size,rank,m,ierr,nz,j;
1254   PetscScalar  *array,*vals,*vals_ptr;
1255   MPI_Status   status;
1256 
1257   PetscFunctionBegin;
1258   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1259   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1260 
1261   /* determine ownership of all rows */
1262   m          = M/size + ((M % size) > rank);
1263   ierr       = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
1264   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1265   rowners[0] = 0;
1266   for (i=2; i<=size; i++) {
1267     rowners[i] += rowners[i-1];
1268   }
1269 
1270   ierr = MatCreate(comm,m,PETSC_DECIDE,M,N,newmat);CHKERRQ(ierr);
1271   ierr = MatSetType(*newmat,type);CHKERRQ(ierr);
1272   ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr);
1273   ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr);
1274 
1275   if (!rank) {
1276     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1277 
1278     /* read in my part of the matrix numerical values  */
1279     ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr);
1280 
1281     /* insert into matrix-by row (this is why cannot directly read into array */
1282     vals_ptr = vals;
1283     for (i=0; i<m; i++) {
1284       for (j=0; j<N; j++) {
1285         array[i + j*m] = *vals_ptr++;
1286       }
1287     }
1288 
1289     /* read in other processors and ship out */
1290     for (i=1; i<size; i++) {
1291       nz   = (rowners[i+1] - rowners[i])*N;
1292       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1293       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm);CHKERRQ(ierr);
1294     }
1295   } else {
1296     /* receive numeric values */
1297     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1298 
1299     /* receive message of values*/
1300     ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status);CHKERRQ(ierr);
1301 
1302     /* insert into matrix-by row (this is why cannot directly read into array */
1303     vals_ptr = vals;
1304     for (i=0; i<m; i++) {
1305       for (j=0; j<N; j++) {
1306         array[i + j*m] = *vals_ptr++;
1307       }
1308     }
1309   }
1310   ierr = PetscFree(rowners);CHKERRQ(ierr);
1311   ierr = PetscFree(vals);CHKERRQ(ierr);
1312   ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1313   ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1314   PetscFunctionReturn(0);
1315 }
1316 
1317 #undef __FUNCT__
1318 #define __FUNCT__ "MatLoad_MPIDense"
1319 PetscErrorCode MatLoad_MPIDense(PetscViewer viewer,const MatType type,Mat *newmat)
1320 {
1321   Mat          A;
1322   PetscScalar  *vals,*svals;
1323   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1324   MPI_Status   status;
1325   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1326   int          *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1327   int          tag = ((PetscObject)viewer)->tag;
1328   int          i,nz,ierr,j,rstart,rend,fd;
1329 
1330   PetscFunctionBegin;
1331   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1332   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1333   if (!rank) {
1334     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1335     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1336     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1337   }
1338 
1339   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1340   M = header[1]; N = header[2]; nz = header[3];
1341 
1342   /*
1343        Handle case where matrix is stored on disk as a dense matrix
1344   */
1345   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1346     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,type,newmat);CHKERRQ(ierr);
1347     PetscFunctionReturn(0);
1348   }
1349 
1350   /* determine ownership of all rows */
1351   m          = M/size + ((M % size) > rank);
1352   ierr       = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
1353   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1354   rowners[0] = 0;
1355   for (i=2; i<=size; i++) {
1356     rowners[i] += rowners[i-1];
1357   }
1358   rstart = rowners[rank];
1359   rend   = rowners[rank+1];
1360 
1361   /* distribute row lengths to all processors */
1362   ierr    = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);CHKERRQ(ierr);
1363   offlens = ourlens + (rend-rstart);
1364   if (!rank) {
1365     ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr);
1366     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1367     ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr);
1368     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1369     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1370     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1371   } else {
1372     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1373   }
1374 
1375   if (!rank) {
1376     /* calculate the number of nonzeros on each processor */
1377     ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr);
1378     ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
1379     for (i=0; i<size; i++) {
1380       for (j=rowners[i]; j< rowners[i+1]; j++) {
1381         procsnz[i] += rowlengths[j];
1382       }
1383     }
1384     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1385 
1386     /* determine max buffer needed and allocate it */
1387     maxnz = 0;
1388     for (i=0; i<size; i++) {
1389       maxnz = PetscMax(maxnz,procsnz[i]);
1390     }
1391     ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr);
1392 
1393     /* read in my part of the matrix column indices  */
1394     nz = procsnz[0];
1395     ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr);
1396     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1397 
1398     /* read in every one elses and ship off */
1399     for (i=1; i<size; i++) {
1400       nz   = procsnz[i];
1401       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1402       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
1403     }
1404     ierr = PetscFree(cols);CHKERRQ(ierr);
1405   } else {
1406     /* determine buffer space needed for message */
1407     nz = 0;
1408     for (i=0; i<m; i++) {
1409       nz += ourlens[i];
1410     }
1411     ierr = PetscMalloc((nz+1)*sizeof(int),&mycols);CHKERRQ(ierr);
1412 
1413     /* receive message of column indices*/
1414     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
1415     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
1416     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1417   }
1418 
1419   /* loop over local rows, determining number of off diagonal entries */
1420   ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr);
1421   jj = 0;
1422   for (i=0; i<m; i++) {
1423     for (j=0; j<ourlens[i]; j++) {
1424       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1425       jj++;
1426     }
1427   }
1428 
1429   /* create our matrix */
1430   for (i=0; i<m; i++) {
1431     ourlens[i] -= offlens[i];
1432   }
1433   ierr = MatCreate(comm,m,PETSC_DECIDE,M,N,newmat);CHKERRQ(ierr);
1434   ierr = MatSetType(*newmat,type);CHKERRQ(ierr);
1435   ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr);
1436   A = *newmat;
1437   for (i=0; i<m; i++) {
1438     ourlens[i] += offlens[i];
1439   }
1440 
1441   if (!rank) {
1442     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1443 
1444     /* read in my part of the matrix numerical values  */
1445     nz = procsnz[0];
1446     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1447 
1448     /* insert into matrix */
1449     jj      = rstart;
1450     smycols = mycols;
1451     svals   = vals;
1452     for (i=0; i<m; i++) {
1453       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1454       smycols += ourlens[i];
1455       svals   += ourlens[i];
1456       jj++;
1457     }
1458 
1459     /* read in other processors and ship out */
1460     for (i=1; i<size; i++) {
1461       nz   = procsnz[i];
1462       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1463       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
1464     }
1465     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1466   } else {
1467     /* receive numeric values */
1468     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1469 
1470     /* receive message of values*/
1471     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
1472     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1473     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1474 
1475     /* insert into matrix */
1476     jj      = rstart;
1477     smycols = mycols;
1478     svals   = vals;
1479     for (i=0; i<m; i++) {
1480       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1481       smycols += ourlens[i];
1482       svals   += ourlens[i];
1483       jj++;
1484     }
1485   }
1486   ierr = PetscFree(ourlens);CHKERRQ(ierr);
1487   ierr = PetscFree(vals);CHKERRQ(ierr);
1488   ierr = PetscFree(mycols);CHKERRQ(ierr);
1489   ierr = PetscFree(rowners);CHKERRQ(ierr);
1490 
1491   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1492   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1493   PetscFunctionReturn(0);
1494 }
1495 
1496 
1497 
1498 
1499 
1500