xref: /petsc/src/mat/utils/axpy.c (revision 340700d4fca726647d48eef3b260395dce4f294f)
1 
2 #include <petsc/private/matimpl.h>  /*I   "petscmat.h"  I*/
3 
4 /*@
5    MatAXPY - Computes Y = a*X + Y.
6 
7    Logically  Collective on Mat
8 
9    Input Parameters:
10 +  a - the scalar multiplier
11 .  X - the first matrix
12 .  Y - the second matrix
13 -  str - either SAME_NONZERO_PATTERN, DIFFERENT_NONZERO_PATTERN
14          or SUBSET_NONZERO_PATTERN (nonzeros of X is a subset of Y's)
15 
16    Level: intermediate
17 
18 .keywords: matrix, add
19 
20 .seealso: MatAYPX()
21  @*/
22 PetscErrorCode MatAXPY(Mat Y,PetscScalar a,Mat X,MatStructure str)
23 {
24   PetscErrorCode ierr,(*f)(Mat,Mat*);
25   PetscInt       M1,M2,N1,N2;
26   PetscInt       m1,m2,n1,n2;
27   MatType        t1,t2;
28   Mat            T,F;
29   PetscBool      sametype,transpose;
30 
31   PetscFunctionBegin;
32   PetscValidHeaderSpecific(Y,MAT_CLASSID,1);
33   PetscValidLogicalCollectiveScalar(Y,a,2);
34   PetscValidHeaderSpecific(X,MAT_CLASSID,3);
35   PetscCheckSameComm(Y,1,X,3);
36   ierr = MatGetSize(X,&M1,&N1);CHKERRQ(ierr);
37   ierr = MatGetSize(Y,&M2,&N2);CHKERRQ(ierr);
38   ierr = MatGetLocalSize(X,&m1,&n1);CHKERRQ(ierr);
39   ierr = MatGetLocalSize(Y,&m2,&n2);CHKERRQ(ierr);
40   if (M1 != M2 || N1 != N2) SETERRQ4(PetscObjectComm((PetscObject)Y),PETSC_ERR_ARG_SIZ,"Non conforming matrix add: global sizes %D x %D, %D x %D",M1,M2,N1,N2);
41   if (m1 != m2 || n1 != n2) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Non conforming matrix add: local sizes %D x %D, %D x %D",m1,m2,n1,n2);
42 
43   ierr = MatGetType(X,&t1);CHKERRQ(ierr);
44   ierr = MatGetType(Y,&t2);CHKERRQ(ierr);
45   ierr = PetscStrcmp(t1,t2,&sametype);CHKERRQ(ierr);
46   ierr = PetscLogEventBegin(MAT_AXPY,Y,0,0,0);CHKERRQ(ierr);
47   if (Y->ops->axpy && sametype) {
48     ierr = (*Y->ops->axpy)(Y,a,X,str);CHKERRQ(ierr);
49   } else {
50     ierr = PetscStrcmp(t1,MATTRANSPOSEMAT,&transpose);CHKERRQ(ierr);
51     if (transpose) {
52         ierr = PetscObjectQueryFunction((PetscObject)X,"MatTransposeGetMat_C",&f);CHKERRQ(ierr);
53         if (f) {
54           ierr = PetscInfo(NULL,"Explicitly transposing X of type MATTRANSPOSEMAT matrix to perform MatAXPY()\n");CHKERRQ(ierr);
55           ierr = f(X,&T);CHKERRQ(ierr);
56           ierr = MatTranspose(T,MAT_INITIAL_MATRIX,&F);CHKERRQ(ierr);
57         } else {
58           ierr = PetscInfo(NULL,"Explicitly Hermitian transposing X of type MATTRANSPOSEMAT matrix to perform MatAXPY()\n");CHKERRQ(ierr);
59           ierr = MatHermitianTransposeGetMat(X,&T);CHKERRQ(ierr);
60           ierr = MatHermitianTranspose(T,MAT_INITIAL_MATRIX,&F);CHKERRQ(ierr);
61         }
62         ierr = MatAXPY(Y,a,F,str);CHKERRQ(ierr);
63         ierr = MatDestroy(&F);CHKERRQ(ierr);
64     } else {
65       ierr = PetscStrcmp(t2,MATTRANSPOSEMAT,&transpose);CHKERRQ(ierr);
66       if (transpose) {
67         ierr = PetscObjectQueryFunction((PetscObject)Y,"MatTransposeGetMat_C",&f);CHKERRQ(ierr);
68         if (f) {
69           ierr = PetscInfo(NULL,"Explicitly transposing Y of type MATTRANSPOSEMAT matrix to perform MatAXPY()\n");CHKERRQ(ierr);
70           ierr = f(Y,&T);CHKERRQ(ierr);
71           ierr = MatTranspose(T,MAT_INITIAL_MATRIX,&F);CHKERRQ(ierr);
72         } else {
73           ierr = PetscInfo(NULL,"Explicitly Hermitian transposing Y of type MATTRANSPOSEMAT matrix to perform MatAXPY()\n");CHKERRQ(ierr);
74           ierr = MatHermitianTransposeGetMat(Y,&T);CHKERRQ(ierr);
75           ierr = MatHermitianTranspose(T,MAT_INITIAL_MATRIX,&F);CHKERRQ(ierr);
76         }
77         ierr = MatAXPY(F,a,X,str);CHKERRQ(ierr);
78         ierr = MatDestroy(&F);CHKERRQ(ierr);
79       } else {
80         if (str != DIFFERENT_NONZERO_PATTERN) {
81           ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
82         } else {
83           Mat B;
84 
85           ierr = MatAXPY_Basic_Preallocate(Y,X,&B);CHKERRQ(ierr);
86           ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
87           ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
88         }
89       }
90     }
91   }
92   ierr = PetscLogEventEnd(MAT_AXPY,Y,0,0,0);CHKERRQ(ierr);
93 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
94   if (Y->valid_GPU_matrix != PETSC_OFFLOAD_UNALLOCATED) {
95     Y->valid_GPU_matrix = PETSC_OFFLOAD_CPU;
96   }
97 #endif
98   PetscFunctionReturn(0);
99 }
100 
101 PetscErrorCode MatAXPY_Basic_Preallocate(Mat Y, Mat X, Mat *B)
102 {
103   PetscErrorCode ierr;
104   PetscErrorCode (*preall)(Mat,Mat,Mat*) = NULL;
105 
106   PetscFunctionBegin;
107   /* look for any available faster alternative to the general preallocator */
108   ierr = PetscObjectQueryFunction((PetscObject)Y,"MatAXPYGetPreallocation_C",&preall);CHKERRQ(ierr);
109   if (preall) {
110     ierr = (*preall)(Y,X,B);CHKERRQ(ierr);
111   } else { /* Use MatPrellocator, assumes same row-col distribution */
112     Mat      preallocator;
113     PetscInt r,rstart,rend;
114     PetscInt m,n,M,N;
115 
116     ierr = MatGetSize(Y,&M,&N);CHKERRQ(ierr);
117     ierr = MatGetLocalSize(Y,&m,&n);CHKERRQ(ierr);
118     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&preallocator);CHKERRQ(ierr);
119     ierr = MatSetType(preallocator,MATPREALLOCATOR);CHKERRQ(ierr);
120     ierr = MatSetSizes(preallocator,m,n,M,N);CHKERRQ(ierr);
121     ierr = MatSetUp(preallocator);CHKERRQ(ierr);
122     ierr = MatGetOwnershipRange(preallocator,&rstart,&rend);CHKERRQ(ierr);
123     for (r = rstart; r < rend; ++r) {
124       PetscInt          ncols;
125       const PetscInt    *row;
126       const PetscScalar *vals;
127 
128       ierr = MatGetRow(Y,r,&ncols,&row,&vals);CHKERRQ(ierr);
129       ierr = MatSetValues(preallocator,1,&r,ncols,row,vals,INSERT_VALUES);CHKERRQ(ierr);
130       ierr = MatRestoreRow(Y,r,&ncols,&row,&vals);CHKERRQ(ierr);
131       ierr = MatGetRow(X,r,&ncols,&row,&vals);CHKERRQ(ierr);
132       ierr = MatSetValues(preallocator,1,&r,ncols,row,vals,INSERT_VALUES);CHKERRQ(ierr);
133       ierr = MatRestoreRow(X,r,&ncols,&row,&vals);CHKERRQ(ierr);
134     }
135     ierr = MatAssemblyBegin(preallocator,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
136     ierr = MatAssemblyEnd(preallocator,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
137 
138     ierr = MatCreate(PetscObjectComm((PetscObject)Y),B);CHKERRQ(ierr);
139     ierr = MatSetType(*B,((PetscObject)Y)->type_name);CHKERRQ(ierr);
140     ierr = MatSetSizes(*B,m,n,M,N);CHKERRQ(ierr);
141     ierr = MatPreallocatorPreallocate(preallocator,PETSC_FALSE,*B);CHKERRQ(ierr);
142     ierr = MatDestroy(&preallocator);CHKERRQ(ierr);
143   }
144   PetscFunctionReturn(0);
145 }
146 
147 PetscErrorCode MatAXPY_Basic(Mat Y,PetscScalar a,Mat X,MatStructure str)
148 {
149   PetscInt          i,start,end,j,ncols,m,n;
150   PetscErrorCode    ierr;
151   const PetscInt    *row;
152   PetscScalar       *val;
153   const PetscScalar *vals;
154 
155   PetscFunctionBegin;
156   ierr = MatGetSize(X,&m,&n);CHKERRQ(ierr);
157   ierr = MatGetOwnershipRange(X,&start,&end);CHKERRQ(ierr);
158   if (a == 1.0) {
159     for (i = start; i < end; i++) {
160       ierr = MatGetRow(X,i,&ncols,&row,&vals);CHKERRQ(ierr);
161       ierr = MatSetValues(Y,1,&i,ncols,row,vals,ADD_VALUES);CHKERRQ(ierr);
162       ierr = MatRestoreRow(X,i,&ncols,&row,&vals);CHKERRQ(ierr);
163     }
164   } else {
165     PetscInt vs = 100;
166     /* realloc if needed, as this function may be used in parallel */
167     ierr = PetscMalloc1(vs,&val);CHKERRQ(ierr);
168     for (i=start; i<end; i++) {
169       ierr = MatGetRow(X,i,&ncols,&row,&vals);CHKERRQ(ierr);
170       if (vs < ncols) {
171         vs   = PetscMin(2*ncols,n);
172         ierr = PetscRealloc(vs*sizeof(*val),&val);CHKERRQ(ierr);
173       }
174       for (j=0; j<ncols; j++) val[j] = a*vals[j];
175       ierr = MatSetValues(Y,1,&i,ncols,row,val,ADD_VALUES);CHKERRQ(ierr);
176       ierr = MatRestoreRow(X,i,&ncols,&row,&vals);CHKERRQ(ierr);
177     }
178     ierr = PetscFree(val);CHKERRQ(ierr);
179   }
180   ierr = MatAssemblyBegin(Y,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
181   ierr = MatAssemblyEnd(Y,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
182   PetscFunctionReturn(0);
183 }
184 
185 PetscErrorCode MatAXPY_BasicWithPreallocation(Mat B,Mat Y,PetscScalar a,Mat X,MatStructure str)
186 {
187   PetscInt          i,start,end,j,ncols,m,n;
188   PetscErrorCode    ierr;
189   const PetscInt    *row;
190   PetscScalar       *val;
191   const PetscScalar *vals;
192 
193   PetscFunctionBegin;
194   ierr = MatGetSize(X,&m,&n);CHKERRQ(ierr);
195   ierr = MatGetOwnershipRange(X,&start,&end);CHKERRQ(ierr);
196   if (a == 1.0) {
197     for (i = start; i < end; i++) {
198       ierr = MatGetRow(Y,i,&ncols,&row,&vals);CHKERRQ(ierr);
199       ierr = MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES);CHKERRQ(ierr);
200       ierr = MatRestoreRow(Y,i,&ncols,&row,&vals);CHKERRQ(ierr);
201 
202       ierr = MatGetRow(X,i,&ncols,&row,&vals);CHKERRQ(ierr);
203       ierr = MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES);CHKERRQ(ierr);
204       ierr = MatRestoreRow(X,i,&ncols,&row,&vals);CHKERRQ(ierr);
205     }
206   } else {
207     PetscInt vs = 100;
208     /* realloc if needed, as this function may be used in parallel */
209     ierr = PetscMalloc1(vs,&val);CHKERRQ(ierr);
210     for (i=start; i<end; i++) {
211       ierr = MatGetRow(Y,i,&ncols,&row,&vals);CHKERRQ(ierr);
212       ierr = MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES);CHKERRQ(ierr);
213       ierr = MatRestoreRow(Y,i,&ncols,&row,&vals);CHKERRQ(ierr);
214 
215       ierr = MatGetRow(X,i,&ncols,&row,&vals);CHKERRQ(ierr);
216       if (vs < ncols) {
217         vs   = PetscMin(2*ncols,n);
218         ierr = PetscRealloc(vs*sizeof(*val),&val);CHKERRQ(ierr);
219       }
220       for (j=0; j<ncols; j++) val[j] = a*vals[j];
221       ierr = MatSetValues(B,1,&i,ncols,row,val,ADD_VALUES);CHKERRQ(ierr);
222       ierr = MatRestoreRow(X,i,&ncols,&row,&vals);CHKERRQ(ierr);
223     }
224     ierr = PetscFree(val);CHKERRQ(ierr);
225   }
226   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
227   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
228   PetscFunctionReturn(0);
229 }
230 
231 /*@
232    MatShift - Computes Y =  Y + a I, where a is a PetscScalar and I is the identity matrix.
233 
234    Neighbor-wise Collective on Mat
235 
236    Input Parameters:
237 +  Y - the matrices
238 -  a - the PetscScalar
239 
240    Level: intermediate
241 
242    Notes:
243     If the matrix Y is missing some diagonal entries this routine can be very slow. To make it fast one should initially
244    fill the matrix so that all diagonal entries have a value (with a value of zero for those locations that would not have an
245    entry).
246 
247    To form Y = Y + diag(V) use MatDiagonalSet()
248 
249    Developers Note: If the local "diagonal part" of the matrix Y has no entries then the local diagonal part is
250     preallocated with 1 nonzero per row for the to be added values. This allows for fast shifting of an empty matrix.
251 
252 .keywords: matrix, add, shift
253 
254 .seealso: MatDiagonalSet(), MatScale(), MatDiagonalScale()
255  @*/
256 PetscErrorCode  MatShift(Mat Y,PetscScalar a)
257 {
258   PetscErrorCode ierr;
259 
260   PetscFunctionBegin;
261   PetscValidHeaderSpecific(Y,MAT_CLASSID,1);
262   if (!Y->assembled) SETERRQ(PetscObjectComm((PetscObject)Y),PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
263   if (Y->factortype) SETERRQ(PetscObjectComm((PetscObject)Y),PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
264   MatCheckPreallocated(Y,1);
265 
266   if (Y->ops->shift) {
267     ierr = (*Y->ops->shift)(Y,a);CHKERRQ(ierr);
268   } else {
269     ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
270   }
271 
272   ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
273 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
274   if (Y->valid_GPU_matrix != PETSC_OFFLOAD_UNALLOCATED) {
275     Y->valid_GPU_matrix = PETSC_OFFLOAD_CPU;
276   }
277 #endif
278   PetscFunctionReturn(0);
279 }
280 
281 PetscErrorCode  MatDiagonalSet_Default(Mat Y,Vec D,InsertMode is)
282 {
283   PetscErrorCode ierr;
284   PetscInt       i,start,end;
285   PetscScalar    *v;
286 
287   PetscFunctionBegin;
288   ierr = MatGetOwnershipRange(Y,&start,&end);CHKERRQ(ierr);
289   ierr = VecGetArray(D,&v);CHKERRQ(ierr);
290   for (i=start; i<end; i++) {
291     ierr = MatSetValues(Y,1,&i,1,&i,v+i-start,is);CHKERRQ(ierr);
292   }
293   ierr = VecRestoreArray(D,&v);CHKERRQ(ierr);
294   ierr = MatAssemblyBegin(Y,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
295   ierr = MatAssemblyEnd(Y,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
296   PetscFunctionReturn(0);
297 }
298 
299 /*@
300    MatDiagonalSet - Computes Y = Y + D, where D is a diagonal matrix
301    that is represented as a vector. Or Y[i,i] = D[i] if InsertMode is
302    INSERT_VALUES.
303 
304    Input Parameters:
305 +  Y - the input matrix
306 .  D - the diagonal matrix, represented as a vector
307 -  i - INSERT_VALUES or ADD_VALUES
308 
309    Neighbor-wise Collective on Mat and Vec
310 
311    Notes:
312     If the matrix Y is missing some diagonal entries this routine can be very slow. To make it fast one should initially
313    fill the matrix so that all diagonal entries have a value (with a value of zero for those locations that would not have an
314    entry).
315 
316    Level: intermediate
317 
318 .keywords: matrix, add, shift, diagonal
319 
320 .seealso: MatShift(), MatScale(), MatDiagonalScale()
321 @*/
322 PetscErrorCode  MatDiagonalSet(Mat Y,Vec D,InsertMode is)
323 {
324   PetscErrorCode ierr;
325   PetscInt       matlocal,veclocal;
326 
327   PetscFunctionBegin;
328   PetscValidHeaderSpecific(Y,MAT_CLASSID,1);
329   PetscValidHeaderSpecific(D,VEC_CLASSID,2);
330   ierr = MatGetLocalSize(Y,&matlocal,NULL);CHKERRQ(ierr);
331   ierr = VecGetLocalSize(D,&veclocal);CHKERRQ(ierr);
332   if (matlocal != veclocal) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number local rows of matrix %D does not match that of vector for diagonal %D",matlocal,veclocal);
333   if (Y->ops->diagonalset) {
334     ierr = (*Y->ops->diagonalset)(Y,D,is);CHKERRQ(ierr);
335   } else {
336     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
337   }
338   ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
339   PetscFunctionReturn(0);
340 }
341 
342 /*@
343    MatAYPX - Computes Y = a*Y + X.
344 
345    Logically on Mat
346 
347    Input Parameters:
348 +  a - the PetscScalar multiplier
349 .  Y - the first matrix
350 .  X - the second matrix
351 -  str - either SAME_NONZERO_PATTERN, DIFFERENT_NONZERO_PATTERN or SUBSET_NONZERO_PATTERN
352 
353    Level: intermediate
354 
355 .keywords: matrix, add
356 
357 .seealso: MatAXPY()
358  @*/
359 PetscErrorCode  MatAYPX(Mat Y,PetscScalar a,Mat X,MatStructure str)
360 {
361   PetscScalar    one = 1.0;
362   PetscErrorCode ierr;
363   PetscInt       mX,mY,nX,nY;
364 
365   PetscFunctionBegin;
366   PetscValidHeaderSpecific(X,MAT_CLASSID,3);
367   PetscValidHeaderSpecific(Y,MAT_CLASSID,1);
368   PetscValidLogicalCollectiveScalar(Y,a,2);
369   ierr = MatGetSize(X,&mX,&nX);CHKERRQ(ierr);
370   ierr = MatGetSize(X,&mY,&nY);CHKERRQ(ierr);
371   if (mX != mY || nX != nY) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Non conforming matrices: %D %D first %D %D second",mX,mY,nX,nY);
372 
373   ierr = MatScale(Y,a);CHKERRQ(ierr);
374   ierr = MatAXPY(Y,one,X,str);CHKERRQ(ierr);
375   PetscFunctionReturn(0);
376 }
377 
378 /*@
379     MatComputeExplicitOperator - Computes the explicit matrix
380 
381     Collective on Mat
382 
383     Input Parameter:
384 .   inmat - the matrix
385 
386     Output Parameter:
387 .   mat - the explict  operator
388 
389     Notes:
390     This computation is done by applying the operators to columns of the
391     identity matrix.
392 
393     Currently, this routine uses a dense matrix format when 1 processor
394     is used and a sparse format otherwise.  This routine is costly in general,
395     and is recommended for use only with relatively small systems.
396 
397     Level: advanced
398 
399 .keywords: Mat, compute, explicit, operator
400 @*/
401 PetscErrorCode  MatComputeExplicitOperator(Mat inmat,Mat *mat)
402 {
403   PetscErrorCode ierr;
404   MPI_Comm       comm;
405   PetscMPIInt    size;
406 
407   PetscFunctionBegin;
408   PetscValidHeaderSpecific(inmat,MAT_CLASSID,1);
409   PetscValidPointer(mat,2);
410 
411   ierr = PetscObjectGetComm((PetscObject)inmat,&comm);CHKERRQ(ierr);
412   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
413   ierr = MatConvert_Shell(inmat,size == 1 ? MATSEQDENSE : MATAIJ,MAT_INITIAL_MATRIX,mat);CHKERRQ(ierr);
414   PetscFunctionReturn(0);
415 }
416 
417 /*@
418     MatComputeExplicitOperatorTranspose - Computes the explicit matrix representation of
419         a give matrix that can apply MatMultTranspose()
420 
421     Collective on Mat
422 
423     Input Parameter:
424 .   inmat - the matrix
425 
426     Output Parameter:
427 .   mat - the explict  operator transposed
428 
429     Notes:
430     This computation is done by applying the transpose of the operator to columns of the
431     identity matrix.
432 
433     Currently, this routine uses a dense matrix format when 1 processor
434     is used and a sparse format otherwise.  This routine is costly in general,
435     and is recommended for use only with relatively small systems.
436 
437     Level: advanced
438 
439 .keywords: Mat, compute, explicit, operator
440 @*/
441 PetscErrorCode  MatComputeExplicitOperatorTranspose(Mat inmat,Mat *mat)
442 {
443   Vec            in,out;
444   PetscErrorCode ierr;
445   PetscInt       i,m,n,M,N,*rows,start,end;
446   MPI_Comm       comm;
447   PetscScalar    *array,zero = 0.0,one = 1.0;
448   PetscMPIInt    size;
449 
450   PetscFunctionBegin;
451   PetscValidHeaderSpecific(inmat,MAT_CLASSID,1);
452   PetscValidPointer(mat,2);
453 
454   ierr = PetscObjectGetComm((PetscObject)inmat,&comm);CHKERRQ(ierr);
455   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
456 
457   ierr = MatGetLocalSize(inmat,&m,&n);CHKERRQ(ierr);
458   ierr = MatGetSize(inmat,&M,&N);CHKERRQ(ierr);
459   ierr = MatCreateVecs(inmat,&in,&out);CHKERRQ(ierr);
460   ierr = VecSetOption(in,VEC_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
461   ierr = VecGetOwnershipRange(out,&start,&end);CHKERRQ(ierr);
462   ierr = PetscMalloc1(m,&rows);CHKERRQ(ierr);
463   for (i=0; i<m; i++) rows[i] = start + i;
464 
465   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
466   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
467   if (size == 1) {
468     ierr = MatSetType(*mat,MATSEQDENSE);CHKERRQ(ierr);
469     ierr = MatSeqDenseSetPreallocation(*mat,NULL);CHKERRQ(ierr);
470   } else {
471     ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
472     ierr = MatMPIAIJSetPreallocation(*mat,n,NULL,N-n,NULL);CHKERRQ(ierr);
473   }
474 
475   for (i=0; i<N; i++) {
476 
477     ierr = VecSet(in,zero);CHKERRQ(ierr);
478     ierr = VecSetValues(in,1,&i,&one,INSERT_VALUES);CHKERRQ(ierr);
479     ierr = VecAssemblyBegin(in);CHKERRQ(ierr);
480     ierr = VecAssemblyEnd(in);CHKERRQ(ierr);
481 
482     ierr = MatMultTranspose(inmat,in,out);CHKERRQ(ierr);
483 
484     ierr = VecGetArray(out,&array);CHKERRQ(ierr);
485     ierr = MatSetValues(*mat,m,rows,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
486     ierr = VecRestoreArray(out,&array);CHKERRQ(ierr);
487 
488   }
489   ierr = PetscFree(rows);CHKERRQ(ierr);
490   ierr = VecDestroy(&out);CHKERRQ(ierr);
491   ierr = VecDestroy(&in);CHKERRQ(ierr);
492   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
493   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
494   PetscFunctionReturn(0);
495 }
496 
497 /*@
498   MatChop - Set all values in the matrix less than the tolerance to zero
499 
500   Input Parameters:
501 + A   - The matrix
502 - tol - The zero tolerance
503 
504   Output Parameters:
505 . A - The chopped matrix
506 
507   Level: intermediate
508 
509 .seealso: MatCreate(), MatZeroEntries()
510  @*/
511 PetscErrorCode MatChop(Mat A, PetscReal tol)
512 {
513   PetscScalar    *newVals;
514   PetscInt       *newCols;
515   PetscInt       rStart, rEnd, numRows, maxRows, r, colMax = 0;
516   PetscErrorCode ierr;
517 
518   PetscFunctionBegin;
519   ierr = MatGetOwnershipRange(A, &rStart, &rEnd);CHKERRQ(ierr);
520   for (r = rStart; r < rEnd; ++r) {
521     PetscInt ncols;
522 
523     ierr   = MatGetRow(A, r, &ncols, NULL, NULL);CHKERRQ(ierr);
524     colMax = PetscMax(colMax, ncols);CHKERRQ(ierr);
525     ierr   = MatRestoreRow(A, r, &ncols, NULL, NULL);CHKERRQ(ierr);
526   }
527   numRows = rEnd - rStart;
528   ierr    = MPIU_Allreduce(&numRows, &maxRows, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
529   ierr    = PetscMalloc2(colMax,&newCols,colMax,&newVals);CHKERRQ(ierr);
530   for (r = rStart; r < rStart+maxRows; ++r) {
531     const PetscScalar *vals;
532     const PetscInt    *cols;
533     PetscInt           ncols, newcols, c;
534 
535     if (r < rEnd) {
536       ierr = MatGetRow(A, r, &ncols, &cols, &vals);CHKERRQ(ierr);
537       for (c = 0; c < ncols; ++c) {
538         newCols[c] = cols[c];
539         newVals[c] = PetscAbsScalar(vals[c]) < tol ? 0.0 : vals[c];
540       }
541       newcols = ncols;
542       ierr = MatRestoreRow(A, r, &ncols, &cols, &vals);CHKERRQ(ierr);
543       ierr = MatSetValues(A, 1, &r, newcols, newCols, newVals, INSERT_VALUES);CHKERRQ(ierr);
544     }
545     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
546     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
547   }
548   ierr = PetscFree2(newCols,newVals);CHKERRQ(ierr);
549   PetscFunctionReturn(0);
550 }
551