xref: /petsc/src/mat/utils/axpy.c (revision 89d6d5e90dc70017acb628f602ed2ebe21e29ab8)
1 
2 #include <petsc/private/matimpl.h>  /*I   "petscmat.h"  I*/
3 
4 static PetscErrorCode MatTransposeAXPY_Private(Mat Y,PetscScalar a,Mat X,MatStructure str,Mat T)
5 {
6   PetscErrorCode ierr,(*f)(Mat,Mat*);
7   Mat            A,F;
8 
9   PetscFunctionBegin;
10   ierr = PetscObjectQueryFunction((PetscObject)T,"MatTransposeGetMat_C",&f);CHKERRQ(ierr);
11   if (f) {
12     ierr = MatTransposeGetMat(T,&A);CHKERRQ(ierr);
13     if (T == X) {
14       ierr = PetscInfo(NULL,"Explicitly transposing X of type MATTRANSPOSEMAT to perform MatAXPY()\n");CHKERRQ(ierr);
15       ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&F);CHKERRQ(ierr);
16       A = Y;
17     } else {
18       ierr = PetscInfo(NULL,"Transposing X because Y of type MATTRANSPOSEMAT to perform MatAXPY()\n");CHKERRQ(ierr);
19       ierr = MatTranspose(X,MAT_INITIAL_MATRIX,&F);CHKERRQ(ierr);
20     }
21   } else {
22     ierr = MatHermitianTransposeGetMat(T,&A);CHKERRQ(ierr);
23     if (T == X) {
24       ierr = PetscInfo(NULL,"Explicitly Hermitian transposing X of type MATTRANSPOSEMAT to perform MatAXPY()\n");CHKERRQ(ierr);
25       ierr = MatHermitianTranspose(A,MAT_INITIAL_MATRIX,&F);CHKERRQ(ierr);
26       A = Y;
27     } else {
28       ierr = PetscInfo(NULL,"Hermitian transposing X because Y of type MATTRANSPOSEMAT to perform MatAXPY()\n");CHKERRQ(ierr);
29       ierr = MatHermitianTranspose(X,MAT_INITIAL_MATRIX,&F);CHKERRQ(ierr);
30     }
31   }
32   ierr = MatAXPY(A,a,F,str);CHKERRQ(ierr);
33   ierr = MatDestroy(&F);CHKERRQ(ierr);
34   PetscFunctionReturn(0);
35 }
36 
37 /*@
38    MatAXPY - Computes Y = a*X + Y.
39 
40    Logically  Collective on Mat
41 
42    Input Parameters:
43 +  a - the scalar multiplier
44 .  X - the first matrix
45 .  Y - the second matrix
46 -  str - either SAME_NONZERO_PATTERN, DIFFERENT_NONZERO_PATTERN
47          or SUBSET_NONZERO_PATTERN (nonzeros of X is a subset of Y's)
48 
49    Level: intermediate
50 
51 .keywords: matrix, add
52 
53 .seealso: MatAYPX()
54  @*/
55 PetscErrorCode MatAXPY(Mat Y,PetscScalar a,Mat X,MatStructure str)
56 {
57   PetscErrorCode ierr;
58   PetscInt       M1,M2,N1,N2;
59   PetscInt       m1,m2,n1,n2;
60   MatType        t1,t2;
61   PetscBool      sametype,transpose;
62 
63   PetscFunctionBegin;
64   PetscValidHeaderSpecific(Y,MAT_CLASSID,1);
65   PetscValidLogicalCollectiveScalar(Y,a,2);
66   PetscValidHeaderSpecific(X,MAT_CLASSID,3);
67   PetscCheckSameComm(Y,1,X,3);
68   ierr = MatGetSize(X,&M1,&N1);CHKERRQ(ierr);
69   ierr = MatGetSize(Y,&M2,&N2);CHKERRQ(ierr);
70   ierr = MatGetLocalSize(X,&m1,&n1);CHKERRQ(ierr);
71   ierr = MatGetLocalSize(Y,&m2,&n2);CHKERRQ(ierr);
72   if (M1 != M2 || N1 != N2) SETERRQ4(PetscObjectComm((PetscObject)Y),PETSC_ERR_ARG_SIZ,"Non conforming matrix add: global sizes %D x %D, %D x %D",M1,M2,N1,N2);
73   if (m1 != m2 || n1 != n2) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Non conforming matrix add: local sizes %D x %D, %D x %D",m1,m2,n1,n2);
74 
75   ierr = MatGetType(X,&t1);CHKERRQ(ierr);
76   ierr = MatGetType(Y,&t2);CHKERRQ(ierr);
77   ierr = PetscStrcmp(t1,t2,&sametype);CHKERRQ(ierr);
78   ierr = PetscLogEventBegin(MAT_AXPY,Y,0,0,0);CHKERRQ(ierr);
79   if (Y->ops->axpy && sametype) {
80     ierr = (*Y->ops->axpy)(Y,a,X,str);CHKERRQ(ierr);
81   } else {
82     ierr = PetscStrcmp(t1,MATTRANSPOSEMAT,&transpose);CHKERRQ(ierr);
83     if (transpose) {
84         ierr = MatTransposeAXPY_Private(Y,a,X,str,X);CHKERRQ(ierr);
85     } else {
86       ierr = PetscStrcmp(t2,MATTRANSPOSEMAT,&transpose);CHKERRQ(ierr);
87       if (transpose) {
88         ierr = MatTransposeAXPY_Private(Y,a,X,str,Y);CHKERRQ(ierr);
89       } else {
90         if (str != DIFFERENT_NONZERO_PATTERN) {
91           ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
92         } else {
93           Mat B;
94 
95           ierr = MatAXPY_Basic_Preallocate(Y,X,&B);CHKERRQ(ierr);
96           ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
97           ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
98         }
99       }
100     }
101   }
102   ierr = PetscLogEventEnd(MAT_AXPY,Y,0,0,0);CHKERRQ(ierr);
103 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
104   if (Y->valid_GPU_matrix != PETSC_OFFLOAD_UNALLOCATED) {
105     Y->valid_GPU_matrix = PETSC_OFFLOAD_CPU;
106   }
107 #endif
108   PetscFunctionReturn(0);
109 }
110 
111 PetscErrorCode MatAXPY_Basic_Preallocate(Mat Y, Mat X, Mat *B)
112 {
113   PetscErrorCode ierr;
114   PetscErrorCode (*preall)(Mat,Mat,Mat*) = NULL;
115 
116   PetscFunctionBegin;
117   /* look for any available faster alternative to the general preallocator */
118   ierr = PetscObjectQueryFunction((PetscObject)Y,"MatAXPYGetPreallocation_C",&preall);CHKERRQ(ierr);
119   if (preall) {
120     ierr = (*preall)(Y,X,B);CHKERRQ(ierr);
121   } else { /* Use MatPrellocator, assumes same row-col distribution */
122     Mat      preallocator;
123     PetscInt r,rstart,rend;
124     PetscInt m,n,M,N;
125 
126     ierr = MatGetRowUpperTriangular(Y);CHKERRQ(ierr);
127     ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr);
128     ierr = MatGetSize(Y,&M,&N);CHKERRQ(ierr);
129     ierr = MatGetLocalSize(Y,&m,&n);CHKERRQ(ierr);
130     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&preallocator);CHKERRQ(ierr);
131     ierr = MatSetType(preallocator,MATPREALLOCATOR);CHKERRQ(ierr);
132     ierr = MatSetSizes(preallocator,m,n,M,N);CHKERRQ(ierr);
133     ierr = MatSetUp(preallocator);CHKERRQ(ierr);
134     ierr = MatGetOwnershipRange(preallocator,&rstart,&rend);CHKERRQ(ierr);
135     for (r = rstart; r < rend; ++r) {
136       PetscInt          ncols;
137       const PetscInt    *row;
138       const PetscScalar *vals;
139 
140       ierr = MatGetRow(Y,r,&ncols,&row,&vals);CHKERRQ(ierr);
141       ierr = MatSetValues(preallocator,1,&r,ncols,row,vals,INSERT_VALUES);CHKERRQ(ierr);
142       ierr = MatRestoreRow(Y,r,&ncols,&row,&vals);CHKERRQ(ierr);
143       ierr = MatGetRow(X,r,&ncols,&row,&vals);CHKERRQ(ierr);
144       ierr = MatSetValues(preallocator,1,&r,ncols,row,vals,INSERT_VALUES);CHKERRQ(ierr);
145       ierr = MatRestoreRow(X,r,&ncols,&row,&vals);CHKERRQ(ierr);
146     }
147     ierr = MatAssemblyBegin(preallocator,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
148     ierr = MatAssemblyEnd(preallocator,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
149     ierr = MatRestoreRowUpperTriangular(Y);CHKERRQ(ierr);
150     ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr);
151 
152     ierr = MatCreate(PetscObjectComm((PetscObject)Y),B);CHKERRQ(ierr);
153     ierr = MatSetType(*B,((PetscObject)Y)->type_name);CHKERRQ(ierr);
154     ierr = MatSetSizes(*B,m,n,M,N);CHKERRQ(ierr);
155     ierr = MatPreallocatorPreallocate(preallocator,PETSC_FALSE,*B);CHKERRQ(ierr);
156     ierr = MatDestroy(&preallocator);CHKERRQ(ierr);
157   }
158   PetscFunctionReturn(0);
159 }
160 
161 PetscErrorCode MatAXPY_Basic(Mat Y,PetscScalar a,Mat X,MatStructure str)
162 {
163   PetscInt          i,start,end,j,ncols,m,n;
164   PetscErrorCode    ierr;
165   const PetscInt    *row;
166   PetscScalar       *val;
167   const PetscScalar *vals;
168 
169   PetscFunctionBegin;
170   ierr = MatGetSize(X,&m,&n);CHKERRQ(ierr);
171   ierr = MatGetOwnershipRange(X,&start,&end);CHKERRQ(ierr);
172   ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr);
173   if (a == 1.0) {
174     for (i = start; i < end; i++) {
175       ierr = MatGetRow(X,i,&ncols,&row,&vals);CHKERRQ(ierr);
176       ierr = MatSetValues(Y,1,&i,ncols,row,vals,ADD_VALUES);CHKERRQ(ierr);
177       ierr = MatRestoreRow(X,i,&ncols,&row,&vals);CHKERRQ(ierr);
178     }
179   } else {
180     PetscInt vs = 100;
181     /* realloc if needed, as this function may be used in parallel */
182     ierr = PetscMalloc1(vs,&val);CHKERRQ(ierr);
183     for (i=start; i<end; i++) {
184       ierr = MatGetRow(X,i,&ncols,&row,&vals);CHKERRQ(ierr);
185       if (vs < ncols) {
186         vs   = PetscMin(2*ncols,n);
187         ierr = PetscRealloc(vs*sizeof(*val),&val);CHKERRQ(ierr);
188       }
189       for (j=0; j<ncols; j++) val[j] = a*vals[j];
190       ierr = MatSetValues(Y,1,&i,ncols,row,val,ADD_VALUES);CHKERRQ(ierr);
191       ierr = MatRestoreRow(X,i,&ncols,&row,&vals);CHKERRQ(ierr);
192     }
193     ierr = PetscFree(val);CHKERRQ(ierr);
194   }
195   ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr);
196   ierr = MatAssemblyBegin(Y,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
197   ierr = MatAssemblyEnd(Y,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
198   PetscFunctionReturn(0);
199 }
200 
201 PetscErrorCode MatAXPY_BasicWithPreallocation(Mat B,Mat Y,PetscScalar a,Mat X,MatStructure str)
202 {
203   PetscInt          i,start,end,j,ncols,m,n;
204   PetscErrorCode    ierr;
205   const PetscInt    *row;
206   PetscScalar       *val;
207   const PetscScalar *vals;
208 
209   PetscFunctionBegin;
210   ierr = MatGetSize(X,&m,&n);CHKERRQ(ierr);
211   ierr = MatGetOwnershipRange(X,&start,&end);CHKERRQ(ierr);
212   ierr = MatGetRowUpperTriangular(Y);CHKERRQ(ierr);
213   ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr);
214   if (a == 1.0) {
215     for (i = start; i < end; i++) {
216       ierr = MatGetRow(Y,i,&ncols,&row,&vals);CHKERRQ(ierr);
217       ierr = MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES);CHKERRQ(ierr);
218       ierr = MatRestoreRow(Y,i,&ncols,&row,&vals);CHKERRQ(ierr);
219 
220       ierr = MatGetRow(X,i,&ncols,&row,&vals);CHKERRQ(ierr);
221       ierr = MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES);CHKERRQ(ierr);
222       ierr = MatRestoreRow(X,i,&ncols,&row,&vals);CHKERRQ(ierr);
223     }
224   } else {
225     PetscInt vs = 100;
226     /* realloc if needed, as this function may be used in parallel */
227     ierr = PetscMalloc1(vs,&val);CHKERRQ(ierr);
228     for (i=start; i<end; i++) {
229       ierr = MatGetRow(Y,i,&ncols,&row,&vals);CHKERRQ(ierr);
230       ierr = MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES);CHKERRQ(ierr);
231       ierr = MatRestoreRow(Y,i,&ncols,&row,&vals);CHKERRQ(ierr);
232 
233       ierr = MatGetRow(X,i,&ncols,&row,&vals);CHKERRQ(ierr);
234       if (vs < ncols) {
235         vs   = PetscMin(2*ncols,n);
236         ierr = PetscRealloc(vs*sizeof(*val),&val);CHKERRQ(ierr);
237       }
238       for (j=0; j<ncols; j++) val[j] = a*vals[j];
239       ierr = MatSetValues(B,1,&i,ncols,row,val,ADD_VALUES);CHKERRQ(ierr);
240       ierr = MatRestoreRow(X,i,&ncols,&row,&vals);CHKERRQ(ierr);
241     }
242     ierr = PetscFree(val);CHKERRQ(ierr);
243   }
244   ierr = MatRestoreRowUpperTriangular(Y);CHKERRQ(ierr);
245   ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr);
246   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
247   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
248   PetscFunctionReturn(0);
249 }
250 
251 /*@
252    MatShift - Computes Y =  Y + a I, where a is a PetscScalar and I is the identity matrix.
253 
254    Neighbor-wise Collective on Mat
255 
256    Input Parameters:
257 +  Y - the matrices
258 -  a - the PetscScalar
259 
260    Level: intermediate
261 
262    Notes:
263     If the matrix Y is missing some diagonal entries this routine can be very slow. To make it fast one should initially
264    fill the matrix so that all diagonal entries have a value (with a value of zero for those locations that would not have an
265    entry).
266 
267    To form Y = Y + diag(V) use MatDiagonalSet()
268 
269    Developers Note: If the local "diagonal part" of the matrix Y has no entries then the local diagonal part is
270     preallocated with 1 nonzero per row for the to be added values. This allows for fast shifting of an empty matrix.
271 
272 .keywords: matrix, add, shift
273 
274 .seealso: MatDiagonalSet(), MatScale(), MatDiagonalScale()
275  @*/
276 PetscErrorCode  MatShift(Mat Y,PetscScalar a)
277 {
278   PetscErrorCode ierr;
279 
280   PetscFunctionBegin;
281   PetscValidHeaderSpecific(Y,MAT_CLASSID,1);
282   if (!Y->assembled) SETERRQ(PetscObjectComm((PetscObject)Y),PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
283   if (Y->factortype) SETERRQ(PetscObjectComm((PetscObject)Y),PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
284   MatCheckPreallocated(Y,1);
285 
286   if (Y->ops->shift) {
287     ierr = (*Y->ops->shift)(Y,a);CHKERRQ(ierr);
288   } else {
289     ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
290   }
291 
292   ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
293 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
294   if (Y->valid_GPU_matrix != PETSC_OFFLOAD_UNALLOCATED) {
295     Y->valid_GPU_matrix = PETSC_OFFLOAD_CPU;
296   }
297 #endif
298   PetscFunctionReturn(0);
299 }
300 
301 PetscErrorCode  MatDiagonalSet_Default(Mat Y,Vec D,InsertMode is)
302 {
303   PetscErrorCode ierr;
304   PetscInt       i,start,end;
305   PetscScalar    *v;
306 
307   PetscFunctionBegin;
308   ierr = MatGetOwnershipRange(Y,&start,&end);CHKERRQ(ierr);
309   ierr = VecGetArray(D,&v);CHKERRQ(ierr);
310   for (i=start; i<end; i++) {
311     ierr = MatSetValues(Y,1,&i,1,&i,v+i-start,is);CHKERRQ(ierr);
312   }
313   ierr = VecRestoreArray(D,&v);CHKERRQ(ierr);
314   ierr = MatAssemblyBegin(Y,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
315   ierr = MatAssemblyEnd(Y,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
316   PetscFunctionReturn(0);
317 }
318 
319 /*@
320    MatDiagonalSet - Computes Y = Y + D, where D is a diagonal matrix
321    that is represented as a vector. Or Y[i,i] = D[i] if InsertMode is
322    INSERT_VALUES.
323 
324    Input Parameters:
325 +  Y - the input matrix
326 .  D - the diagonal matrix, represented as a vector
327 -  i - INSERT_VALUES or ADD_VALUES
328 
329    Neighbor-wise Collective on Mat and Vec
330 
331    Notes:
332     If the matrix Y is missing some diagonal entries this routine can be very slow. To make it fast one should initially
333    fill the matrix so that all diagonal entries have a value (with a value of zero for those locations that would not have an
334    entry).
335 
336    Level: intermediate
337 
338 .keywords: matrix, add, shift, diagonal
339 
340 .seealso: MatShift(), MatScale(), MatDiagonalScale()
341 @*/
342 PetscErrorCode  MatDiagonalSet(Mat Y,Vec D,InsertMode is)
343 {
344   PetscErrorCode ierr;
345   PetscInt       matlocal,veclocal;
346 
347   PetscFunctionBegin;
348   PetscValidHeaderSpecific(Y,MAT_CLASSID,1);
349   PetscValidHeaderSpecific(D,VEC_CLASSID,2);
350   ierr = MatGetLocalSize(Y,&matlocal,NULL);CHKERRQ(ierr);
351   ierr = VecGetLocalSize(D,&veclocal);CHKERRQ(ierr);
352   if (matlocal != veclocal) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number local rows of matrix %D does not match that of vector for diagonal %D",matlocal,veclocal);
353   if (Y->ops->diagonalset) {
354     ierr = (*Y->ops->diagonalset)(Y,D,is);CHKERRQ(ierr);
355   } else {
356     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
357   }
358   ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
359   PetscFunctionReturn(0);
360 }
361 
362 /*@
363    MatAYPX - Computes Y = a*Y + X.
364 
365    Logically on Mat
366 
367    Input Parameters:
368 +  a - the PetscScalar multiplier
369 .  Y - the first matrix
370 .  X - the second matrix
371 -  str - either SAME_NONZERO_PATTERN, DIFFERENT_NONZERO_PATTERN or SUBSET_NONZERO_PATTERN
372 
373    Level: intermediate
374 
375 .keywords: matrix, add
376 
377 .seealso: MatAXPY()
378  @*/
379 PetscErrorCode  MatAYPX(Mat Y,PetscScalar a,Mat X,MatStructure str)
380 {
381   PetscScalar    one = 1.0;
382   PetscErrorCode ierr;
383   PetscInt       mX,mY,nX,nY;
384 
385   PetscFunctionBegin;
386   PetscValidHeaderSpecific(X,MAT_CLASSID,3);
387   PetscValidHeaderSpecific(Y,MAT_CLASSID,1);
388   PetscValidLogicalCollectiveScalar(Y,a,2);
389   ierr = MatGetSize(X,&mX,&nX);CHKERRQ(ierr);
390   ierr = MatGetSize(X,&mY,&nY);CHKERRQ(ierr);
391   if (mX != mY || nX != nY) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Non conforming matrices: %D %D first %D %D second",mX,mY,nX,nY);
392 
393   ierr = MatScale(Y,a);CHKERRQ(ierr);
394   ierr = MatAXPY(Y,one,X,str);CHKERRQ(ierr);
395   PetscFunctionReturn(0);
396 }
397 
398 /*@
399     MatComputeExplicitOperator - Computes the explicit matrix
400 
401     Collective on Mat
402 
403     Input Parameter:
404 .   inmat - the matrix
405 
406     Output Parameter:
407 .   mat - the explict  operator
408 
409     Notes:
410     This computation is done by applying the operators to columns of the
411     identity matrix.
412 
413     Currently, this routine uses a dense matrix format when 1 processor
414     is used and a sparse format otherwise.  This routine is costly in general,
415     and is recommended for use only with relatively small systems.
416 
417     Level: advanced
418 
419 .keywords: Mat, compute, explicit, operator
420 @*/
421 PetscErrorCode  MatComputeExplicitOperator(Mat inmat,Mat *mat)
422 {
423   PetscErrorCode ierr;
424   MPI_Comm       comm;
425   PetscMPIInt    size;
426 
427   PetscFunctionBegin;
428   PetscValidHeaderSpecific(inmat,MAT_CLASSID,1);
429   PetscValidPointer(mat,2);
430 
431   ierr = PetscObjectGetComm((PetscObject)inmat,&comm);CHKERRQ(ierr);
432   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
433   ierr = MatConvert_Shell(inmat,size == 1 ? MATSEQDENSE : MATAIJ,MAT_INITIAL_MATRIX,mat);CHKERRQ(ierr);
434   PetscFunctionReturn(0);
435 }
436 
437 /*@
438     MatComputeExplicitOperatorTranspose - Computes the explicit matrix representation of
439         a give matrix that can apply MatMultTranspose()
440 
441     Collective on Mat
442 
443     Input Parameter:
444 .   inmat - the matrix
445 
446     Output Parameter:
447 .   mat - the explict  operator transposed
448 
449     Notes:
450     This computation is done by applying the transpose of the operator to columns of the
451     identity matrix.
452 
453     Currently, this routine uses a dense matrix format when 1 processor
454     is used and a sparse format otherwise.  This routine is costly in general,
455     and is recommended for use only with relatively small systems.
456 
457     Level: advanced
458 
459 .keywords: Mat, compute, explicit, operator
460 @*/
461 PetscErrorCode  MatComputeExplicitOperatorTranspose(Mat inmat,Mat *mat)
462 {
463   Vec            in,out;
464   PetscErrorCode ierr;
465   PetscInt       i,m,n,M,N,*rows,start,end;
466   MPI_Comm       comm;
467   PetscScalar    *array,zero = 0.0,one = 1.0;
468   PetscMPIInt    size;
469 
470   PetscFunctionBegin;
471   PetscValidHeaderSpecific(inmat,MAT_CLASSID,1);
472   PetscValidPointer(mat,2);
473 
474   ierr = PetscObjectGetComm((PetscObject)inmat,&comm);CHKERRQ(ierr);
475   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
476 
477   ierr = MatGetLocalSize(inmat,&m,&n);CHKERRQ(ierr);
478   ierr = MatGetSize(inmat,&M,&N);CHKERRQ(ierr);
479   ierr = MatCreateVecs(inmat,&in,&out);CHKERRQ(ierr);
480   ierr = VecSetOption(in,VEC_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
481   ierr = VecGetOwnershipRange(out,&start,&end);CHKERRQ(ierr);
482   ierr = PetscMalloc1(m,&rows);CHKERRQ(ierr);
483   for (i=0; i<m; i++) rows[i] = start + i;
484 
485   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
486   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
487   if (size == 1) {
488     ierr = MatSetType(*mat,MATSEQDENSE);CHKERRQ(ierr);
489     ierr = MatSeqDenseSetPreallocation(*mat,NULL);CHKERRQ(ierr);
490   } else {
491     ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
492     ierr = MatMPIAIJSetPreallocation(*mat,n,NULL,N-n,NULL);CHKERRQ(ierr);
493   }
494 
495   for (i=0; i<N; i++) {
496 
497     ierr = VecSet(in,zero);CHKERRQ(ierr);
498     ierr = VecSetValues(in,1,&i,&one,INSERT_VALUES);CHKERRQ(ierr);
499     ierr = VecAssemblyBegin(in);CHKERRQ(ierr);
500     ierr = VecAssemblyEnd(in);CHKERRQ(ierr);
501 
502     ierr = MatMultTranspose(inmat,in,out);CHKERRQ(ierr);
503 
504     ierr = VecGetArray(out,&array);CHKERRQ(ierr);
505     ierr = MatSetValues(*mat,m,rows,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
506     ierr = VecRestoreArray(out,&array);CHKERRQ(ierr);
507 
508   }
509   ierr = PetscFree(rows);CHKERRQ(ierr);
510   ierr = VecDestroy(&out);CHKERRQ(ierr);
511   ierr = VecDestroy(&in);CHKERRQ(ierr);
512   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
513   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
514   PetscFunctionReturn(0);
515 }
516 
517 /*@
518   MatChop - Set all values in the matrix less than the tolerance to zero
519 
520   Input Parameters:
521 + A   - The matrix
522 - tol - The zero tolerance
523 
524   Output Parameters:
525 . A - The chopped matrix
526 
527   Level: intermediate
528 
529 .seealso: MatCreate(), MatZeroEntries()
530  @*/
531 PetscErrorCode MatChop(Mat A, PetscReal tol)
532 {
533   PetscScalar    *newVals;
534   PetscInt       *newCols;
535   PetscInt       rStart, rEnd, numRows, maxRows, r, colMax = 0;
536   PetscErrorCode ierr;
537 
538   PetscFunctionBegin;
539   ierr = MatGetOwnershipRange(A, &rStart, &rEnd);CHKERRQ(ierr);
540   for (r = rStart; r < rEnd; ++r) {
541     PetscInt ncols;
542 
543     ierr   = MatGetRow(A, r, &ncols, NULL, NULL);CHKERRQ(ierr);
544     colMax = PetscMax(colMax, ncols);CHKERRQ(ierr);
545     ierr   = MatRestoreRow(A, r, &ncols, NULL, NULL);CHKERRQ(ierr);
546   }
547   numRows = rEnd - rStart;
548   ierr    = MPIU_Allreduce(&numRows, &maxRows, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
549   ierr    = PetscMalloc2(colMax,&newCols,colMax,&newVals);CHKERRQ(ierr);
550   for (r = rStart; r < rStart+maxRows; ++r) {
551     const PetscScalar *vals;
552     const PetscInt    *cols;
553     PetscInt           ncols, newcols, c;
554 
555     if (r < rEnd) {
556       ierr = MatGetRow(A, r, &ncols, &cols, &vals);CHKERRQ(ierr);
557       for (c = 0; c < ncols; ++c) {
558         newCols[c] = cols[c];
559         newVals[c] = PetscAbsScalar(vals[c]) < tol ? 0.0 : vals[c];
560       }
561       newcols = ncols;
562       ierr = MatRestoreRow(A, r, &ncols, &cols, &vals);CHKERRQ(ierr);
563       ierr = MatSetValues(A, 1, &r, newcols, newCols, newVals, INSERT_VALUES);CHKERRQ(ierr);
564     }
565     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
566     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
567   }
568   ierr = PetscFree2(newCols,newVals);CHKERRQ(ierr);
569   PetscFunctionReturn(0);
570 }
571