xref: /petsc/src/mat/utils/axpy.c (revision a683ea4ad565b772fff972484e3dcf1f62c45dc4)
1 
2 #include <petsc/private/matimpl.h>  /*I   "petscmat.h"  I*/
3 
4 /*@
5    MatAXPY - Computes Y = a*X + Y.
6 
7    Logically  Collective on Mat
8 
9    Input Parameters:
10 +  a - the scalar multiplier
11 .  X - the first matrix
12 .  Y - the second matrix
13 -  str - either SAME_NONZERO_PATTERN, DIFFERENT_NONZERO_PATTERN
14          or SUBSET_NONZERO_PATTERN (nonzeros of X is a subset of Y's)
15 
16    Level: intermediate
17 
18 .keywords: matrix, add
19 
20 .seealso: MatAYPX()
21  @*/
22 PetscErrorCode MatAXPY(Mat Y,PetscScalar a,Mat X,MatStructure str)
23 {
24   PetscErrorCode ierr,(*f)(Mat,Mat*);
25   PetscInt       M1,M2,N1,N2;
26   PetscInt       m1,m2,n1,n2;
27   MatType        t1,t2;
28   Mat            T,F;
29   PetscBool      sametype,transpose;
30 
31   PetscFunctionBegin;
32   PetscValidHeaderSpecific(Y,MAT_CLASSID,1);
33   PetscValidLogicalCollectiveScalar(Y,a,2);
34   PetscValidHeaderSpecific(X,MAT_CLASSID,3);
35   PetscCheckSameComm(Y,1,X,3);
36   ierr = MatGetSize(X,&M1,&N1);CHKERRQ(ierr);
37   ierr = MatGetSize(Y,&M2,&N2);CHKERRQ(ierr);
38   ierr = MatGetLocalSize(X,&m1,&n1);CHKERRQ(ierr);
39   ierr = MatGetLocalSize(Y,&m2,&n2);CHKERRQ(ierr);
40   if (M1 != M2 || N1 != N2) SETERRQ4(PetscObjectComm((PetscObject)Y),PETSC_ERR_ARG_SIZ,"Non conforming matrix add: global sizes %D x %D, %D x %D",M1,M2,N1,N2);
41   if (m1 != m2 || n1 != n2) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Non conforming matrix add: local sizes %D x %D, %D x %D",m1,m2,n1,n2);
42 
43   ierr = MatGetType(X,&t1);CHKERRQ(ierr);
44   ierr = MatGetType(Y,&t2);CHKERRQ(ierr);
45   ierr = PetscStrcmp(t1,t2,&sametype);CHKERRQ(ierr);
46   ierr = PetscLogEventBegin(MAT_AXPY,Y,0,0,0);CHKERRQ(ierr);
47   if (Y->ops->axpy && sametype) {
48     ierr = (*Y->ops->axpy)(Y,a,X,str);CHKERRQ(ierr);
49   } else {
50     ierr = PetscStrcmp(t1,MATTRANSPOSEMAT,&transpose);CHKERRQ(ierr);
51     if (transpose) {
52         PetscObjectQueryFunction((PetscObject)X,"MatTransposeGetMat_C",&f);
53         if (f) {
54           ierr = f(X,&T);CHKERRQ(ierr);
55           ierr = MatTranspose(T,MAT_INITIAL_MATRIX,&F);CHKERRQ(ierr);
56         } else {
57           ierr = MatHermitianTransposeGetMat(X,&T);CHKERRQ(ierr);
58           ierr = MatHermitianTranspose(T,MAT_INITIAL_MATRIX,&F);CHKERRQ(ierr);
59         }
60         ierr = MatAXPY(Y,a,F,str);CHKERRQ(ierr);
61         ierr = MatDestroy(&F);CHKERRQ(ierr);
62     } else {
63       ierr = PetscStrcmp(t2,MATTRANSPOSEMAT,&transpose);CHKERRQ(ierr);
64       if (transpose) {
65         PetscObjectQueryFunction((PetscObject)Y,"MatTransposeGetMat_C",&f);
66         if (f) {
67           ierr = f(Y,&T);CHKERRQ(ierr);
68           ierr = MatTranspose(T,MAT_INITIAL_MATRIX,&F);CHKERRQ(ierr);
69         } else {
70           ierr = MatHermitianTransposeGetMat(Y,&T);CHKERRQ(ierr);
71           ierr = MatHermitianTranspose(T,MAT_INITIAL_MATRIX,&F);CHKERRQ(ierr);
72         }
73         ierr = MatAXPY(F,a,X,str);CHKERRQ(ierr);
74         ierr = MatDestroy(&F);CHKERRQ(ierr);
75       } else {
76         if (str != DIFFERENT_NONZERO_PATTERN) {
77           ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
78         } else {
79           Mat B;
80 
81           ierr = MatAXPY_Basic_Preallocate(Y,X,&B);CHKERRQ(ierr);
82           ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
83           ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
84         }
85       }
86     }
87   }
88   ierr = PetscLogEventEnd(MAT_AXPY,Y,0,0,0);CHKERRQ(ierr);
89 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
90   if (Y->valid_GPU_matrix != PETSC_OFFLOAD_UNALLOCATED) {
91     Y->valid_GPU_matrix = PETSC_OFFLOAD_CPU;
92   }
93 #endif
94   PetscFunctionReturn(0);
95 }
96 
97 PetscErrorCode MatAXPY_Basic_Preallocate(Mat Y, Mat X, Mat *B)
98 {
99   PetscErrorCode ierr;
100   PetscErrorCode (*preall)(Mat,Mat,Mat*) = NULL;
101 
102   PetscFunctionBegin;
103   /* look for any available faster alternative to the general preallocator */
104   ierr = PetscObjectQueryFunction((PetscObject)Y,"MatAXPYGetPreallocation_C",&preall);CHKERRQ(ierr);
105   if (preall) {
106     ierr = (*preall)(Y,X,B);CHKERRQ(ierr);
107   } else { /* Use MatPrellocator, assumes same row-col distribution */
108     Mat      preallocator;
109     PetscInt r,rstart,rend;
110     PetscInt m,n,M,N;
111 
112     ierr = MatGetSize(Y,&M,&N);CHKERRQ(ierr);
113     ierr = MatGetLocalSize(Y,&m,&n);CHKERRQ(ierr);
114     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&preallocator);CHKERRQ(ierr);
115     ierr = MatSetType(preallocator,MATPREALLOCATOR);CHKERRQ(ierr);
116     ierr = MatSetSizes(preallocator,m,n,M,N);CHKERRQ(ierr);
117     ierr = MatSetUp(preallocator);CHKERRQ(ierr);
118     ierr = MatGetOwnershipRange(preallocator,&rstart,&rend);CHKERRQ(ierr);
119     for (r = rstart; r < rend; ++r) {
120       PetscInt          ncols;
121       const PetscInt    *row;
122       const PetscScalar *vals;
123 
124       ierr = MatGetRow(Y,r,&ncols,&row,&vals);CHKERRQ(ierr);
125       ierr = MatSetValues(preallocator,1,&r,ncols,row,vals,INSERT_VALUES);CHKERRQ(ierr);
126       ierr = MatRestoreRow(Y,r,&ncols,&row,&vals);CHKERRQ(ierr);
127       ierr = MatGetRow(X,r,&ncols,&row,&vals);CHKERRQ(ierr);
128       ierr = MatSetValues(preallocator,1,&r,ncols,row,vals,INSERT_VALUES);CHKERRQ(ierr);
129       ierr = MatRestoreRow(X,r,&ncols,&row,&vals);CHKERRQ(ierr);
130     }
131     ierr = MatAssemblyBegin(preallocator,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
132     ierr = MatAssemblyEnd(preallocator,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
133 
134     ierr = MatCreate(PetscObjectComm((PetscObject)Y),B);CHKERRQ(ierr);
135     ierr = MatSetType(*B,((PetscObject)Y)->type_name);CHKERRQ(ierr);
136     ierr = MatSetSizes(*B,m,n,M,N);CHKERRQ(ierr);
137     ierr = MatPreallocatorPreallocate(preallocator,PETSC_FALSE,*B);CHKERRQ(ierr);
138     ierr = MatDestroy(&preallocator);CHKERRQ(ierr);
139   }
140   PetscFunctionReturn(0);
141 }
142 
143 PetscErrorCode MatAXPY_Basic(Mat Y,PetscScalar a,Mat X,MatStructure str)
144 {
145   PetscInt          i,start,end,j,ncols,m,n;
146   PetscErrorCode    ierr;
147   const PetscInt    *row;
148   PetscScalar       *val;
149   const PetscScalar *vals;
150 
151   PetscFunctionBegin;
152   ierr = MatGetSize(X,&m,&n);CHKERRQ(ierr);
153   ierr = MatGetOwnershipRange(X,&start,&end);CHKERRQ(ierr);
154   if (a == 1.0) {
155     for (i = start; i < end; i++) {
156       ierr = MatGetRow(X,i,&ncols,&row,&vals);CHKERRQ(ierr);
157       ierr = MatSetValues(Y,1,&i,ncols,row,vals,ADD_VALUES);CHKERRQ(ierr);
158       ierr = MatRestoreRow(X,i,&ncols,&row,&vals);CHKERRQ(ierr);
159     }
160   } else {
161     PetscInt vs = 100;
162     /* realloc if needed, as this function may be used in parallel */
163     ierr = PetscMalloc1(vs,&val);CHKERRQ(ierr);
164     for (i=start; i<end; i++) {
165       ierr = MatGetRow(X,i,&ncols,&row,&vals);CHKERRQ(ierr);
166       if (vs < ncols) {
167         vs   = PetscMin(2*ncols,n);
168         ierr = PetscRealloc(vs*sizeof(*val),&val);CHKERRQ(ierr);
169       }
170       for (j=0; j<ncols; j++) val[j] = a*vals[j];
171       ierr = MatSetValues(Y,1,&i,ncols,row,val,ADD_VALUES);CHKERRQ(ierr);
172       ierr = MatRestoreRow(X,i,&ncols,&row,&vals);CHKERRQ(ierr);
173     }
174     ierr = PetscFree(val);CHKERRQ(ierr);
175   }
176   ierr = MatAssemblyBegin(Y,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
177   ierr = MatAssemblyEnd(Y,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
178   PetscFunctionReturn(0);
179 }
180 
181 PetscErrorCode MatAXPY_BasicWithPreallocation(Mat B,Mat Y,PetscScalar a,Mat X,MatStructure str)
182 {
183   PetscInt          i,start,end,j,ncols,m,n;
184   PetscErrorCode    ierr;
185   const PetscInt    *row;
186   PetscScalar       *val;
187   const PetscScalar *vals;
188 
189   PetscFunctionBegin;
190   ierr = MatGetSize(X,&m,&n);CHKERRQ(ierr);
191   ierr = MatGetOwnershipRange(X,&start,&end);CHKERRQ(ierr);
192   if (a == 1.0) {
193     for (i = start; i < end; i++) {
194       ierr = MatGetRow(Y,i,&ncols,&row,&vals);CHKERRQ(ierr);
195       ierr = MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES);CHKERRQ(ierr);
196       ierr = MatRestoreRow(Y,i,&ncols,&row,&vals);CHKERRQ(ierr);
197 
198       ierr = MatGetRow(X,i,&ncols,&row,&vals);CHKERRQ(ierr);
199       ierr = MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES);CHKERRQ(ierr);
200       ierr = MatRestoreRow(X,i,&ncols,&row,&vals);CHKERRQ(ierr);
201     }
202   } else {
203     PetscInt vs = 100;
204     /* realloc if needed, as this function may be used in parallel */
205     ierr = PetscMalloc1(vs,&val);CHKERRQ(ierr);
206     for (i=start; i<end; i++) {
207       ierr = MatGetRow(Y,i,&ncols,&row,&vals);CHKERRQ(ierr);
208       ierr = MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES);CHKERRQ(ierr);
209       ierr = MatRestoreRow(Y,i,&ncols,&row,&vals);CHKERRQ(ierr);
210 
211       ierr = MatGetRow(X,i,&ncols,&row,&vals);CHKERRQ(ierr);
212       if (vs < ncols) {
213         vs   = PetscMin(2*ncols,n);
214         ierr = PetscRealloc(vs*sizeof(*val),&val);CHKERRQ(ierr);
215       }
216       for (j=0; j<ncols; j++) val[j] = a*vals[j];
217       ierr = MatSetValues(B,1,&i,ncols,row,val,ADD_VALUES);CHKERRQ(ierr);
218       ierr = MatRestoreRow(X,i,&ncols,&row,&vals);CHKERRQ(ierr);
219     }
220     ierr = PetscFree(val);CHKERRQ(ierr);
221   }
222   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
223   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
224   PetscFunctionReturn(0);
225 }
226 
227 /*@
228    MatShift - Computes Y =  Y + a I, where a is a PetscScalar and I is the identity matrix.
229 
230    Neighbor-wise Collective on Mat
231 
232    Input Parameters:
233 +  Y - the matrices
234 -  a - the PetscScalar
235 
236    Level: intermediate
237 
238    Notes:
239     If the matrix Y is missing some diagonal entries this routine can be very slow. To make it fast one should initially
240    fill the matrix so that all diagonal entries have a value (with a value of zero for those locations that would not have an
241    entry).
242 
243    To form Y = Y + diag(V) use MatDiagonalSet()
244 
245    Developers Note: If the local "diagonal part" of the matrix Y has no entries then the local diagonal part is
246     preallocated with 1 nonzero per row for the to be added values. This allows for fast shifting of an empty matrix.
247 
248 .keywords: matrix, add, shift
249 
250 .seealso: MatDiagonalSet(), MatScale(), MatDiagonalScale()
251  @*/
252 PetscErrorCode  MatShift(Mat Y,PetscScalar a)
253 {
254   PetscErrorCode ierr;
255 
256   PetscFunctionBegin;
257   PetscValidHeaderSpecific(Y,MAT_CLASSID,1);
258   if (!Y->assembled) SETERRQ(PetscObjectComm((PetscObject)Y),PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
259   if (Y->factortype) SETERRQ(PetscObjectComm((PetscObject)Y),PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
260   MatCheckPreallocated(Y,1);
261 
262   if (Y->ops->shift) {
263     ierr = (*Y->ops->shift)(Y,a);CHKERRQ(ierr);
264   } else {
265     ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
266   }
267 
268   ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
269 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
270   if (Y->valid_GPU_matrix != PETSC_OFFLOAD_UNALLOCATED) {
271     Y->valid_GPU_matrix = PETSC_OFFLOAD_CPU;
272   }
273 #endif
274   PetscFunctionReturn(0);
275 }
276 
277 PetscErrorCode  MatDiagonalSet_Default(Mat Y,Vec D,InsertMode is)
278 {
279   PetscErrorCode ierr;
280   PetscInt       i,start,end;
281   PetscScalar    *v;
282 
283   PetscFunctionBegin;
284   ierr = MatGetOwnershipRange(Y,&start,&end);CHKERRQ(ierr);
285   ierr = VecGetArray(D,&v);CHKERRQ(ierr);
286   for (i=start; i<end; i++) {
287     ierr = MatSetValues(Y,1,&i,1,&i,v+i-start,is);CHKERRQ(ierr);
288   }
289   ierr = VecRestoreArray(D,&v);CHKERRQ(ierr);
290   ierr = MatAssemblyBegin(Y,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
291   ierr = MatAssemblyEnd(Y,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
292   PetscFunctionReturn(0);
293 }
294 
295 /*@
296    MatDiagonalSet - Computes Y = Y + D, where D is a diagonal matrix
297    that is represented as a vector. Or Y[i,i] = D[i] if InsertMode is
298    INSERT_VALUES.
299 
300    Input Parameters:
301 +  Y - the input matrix
302 .  D - the diagonal matrix, represented as a vector
303 -  i - INSERT_VALUES or ADD_VALUES
304 
305    Neighbor-wise Collective on Mat and Vec
306 
307    Notes:
308     If the matrix Y is missing some diagonal entries this routine can be very slow. To make it fast one should initially
309    fill the matrix so that all diagonal entries have a value (with a value of zero for those locations that would not have an
310    entry).
311 
312    Level: intermediate
313 
314 .keywords: matrix, add, shift, diagonal
315 
316 .seealso: MatShift(), MatScale(), MatDiagonalScale()
317 @*/
318 PetscErrorCode  MatDiagonalSet(Mat Y,Vec D,InsertMode is)
319 {
320   PetscErrorCode ierr;
321   PetscInt       matlocal,veclocal;
322 
323   PetscFunctionBegin;
324   PetscValidHeaderSpecific(Y,MAT_CLASSID,1);
325   PetscValidHeaderSpecific(D,VEC_CLASSID,2);
326   ierr = MatGetLocalSize(Y,&matlocal,NULL);CHKERRQ(ierr);
327   ierr = VecGetLocalSize(D,&veclocal);CHKERRQ(ierr);
328   if (matlocal != veclocal) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number local rows of matrix %D does not match that of vector for diagonal %D",matlocal,veclocal);
329   if (Y->ops->diagonalset) {
330     ierr = (*Y->ops->diagonalset)(Y,D,is);CHKERRQ(ierr);
331   } else {
332     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
333   }
334   ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
335   PetscFunctionReturn(0);
336 }
337 
338 /*@
339    MatAYPX - Computes Y = a*Y + X.
340 
341    Logically on Mat
342 
343    Input Parameters:
344 +  a - the PetscScalar multiplier
345 .  Y - the first matrix
346 .  X - the second matrix
347 -  str - either SAME_NONZERO_PATTERN, DIFFERENT_NONZERO_PATTERN or SUBSET_NONZERO_PATTERN
348 
349    Level: intermediate
350 
351 .keywords: matrix, add
352 
353 .seealso: MatAXPY()
354  @*/
355 PetscErrorCode  MatAYPX(Mat Y,PetscScalar a,Mat X,MatStructure str)
356 {
357   PetscScalar    one = 1.0;
358   PetscErrorCode ierr;
359   PetscInt       mX,mY,nX,nY;
360 
361   PetscFunctionBegin;
362   PetscValidHeaderSpecific(X,MAT_CLASSID,3);
363   PetscValidHeaderSpecific(Y,MAT_CLASSID,1);
364   PetscValidLogicalCollectiveScalar(Y,a,2);
365   ierr = MatGetSize(X,&mX,&nX);CHKERRQ(ierr);
366   ierr = MatGetSize(X,&mY,&nY);CHKERRQ(ierr);
367   if (mX != mY || nX != nY) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Non conforming matrices: %D %D first %D %D second",mX,mY,nX,nY);
368 
369   ierr = MatScale(Y,a);CHKERRQ(ierr);
370   ierr = MatAXPY(Y,one,X,str);CHKERRQ(ierr);
371   PetscFunctionReturn(0);
372 }
373 
374 /*@
375     MatComputeExplicitOperator - Computes the explicit matrix
376 
377     Collective on Mat
378 
379     Input Parameter:
380 .   inmat - the matrix
381 
382     Output Parameter:
383 .   mat - the explict  operator
384 
385     Notes:
386     This computation is done by applying the operators to columns of the
387     identity matrix.
388 
389     Currently, this routine uses a dense matrix format when 1 processor
390     is used and a sparse format otherwise.  This routine is costly in general,
391     and is recommended for use only with relatively small systems.
392 
393     Level: advanced
394 
395 .keywords: Mat, compute, explicit, operator
396 @*/
397 PetscErrorCode  MatComputeExplicitOperator(Mat inmat,Mat *mat)
398 {
399   PetscErrorCode ierr;
400   MPI_Comm       comm;
401   PetscMPIInt    size;
402 
403   PetscFunctionBegin;
404   PetscValidHeaderSpecific(inmat,MAT_CLASSID,1);
405   PetscValidPointer(mat,2);
406 
407   ierr = PetscObjectGetComm((PetscObject)inmat,&comm);CHKERRQ(ierr);
408   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
409   ierr = MatConvert_Shell(inmat,size == 1 ? MATSEQDENSE : MATAIJ,MAT_INITIAL_MATRIX,mat);CHKERRQ(ierr);
410   PetscFunctionReturn(0);
411 }
412 
413 /*@
414     MatComputeExplicitOperatorTranspose - Computes the explicit matrix representation of
415         a give matrix that can apply MatMultTranspose()
416 
417     Collective on Mat
418 
419     Input Parameter:
420 .   inmat - the matrix
421 
422     Output Parameter:
423 .   mat - the explict  operator transposed
424 
425     Notes:
426     This computation is done by applying the transpose of the operator to columns of the
427     identity matrix.
428 
429     Currently, this routine uses a dense matrix format when 1 processor
430     is used and a sparse format otherwise.  This routine is costly in general,
431     and is recommended for use only with relatively small systems.
432 
433     Level: advanced
434 
435 .keywords: Mat, compute, explicit, operator
436 @*/
437 PetscErrorCode  MatComputeExplicitOperatorTranspose(Mat inmat,Mat *mat)
438 {
439   Vec            in,out;
440   PetscErrorCode ierr;
441   PetscInt       i,m,n,M,N,*rows,start,end;
442   MPI_Comm       comm;
443   PetscScalar    *array,zero = 0.0,one = 1.0;
444   PetscMPIInt    size;
445 
446   PetscFunctionBegin;
447   PetscValidHeaderSpecific(inmat,MAT_CLASSID,1);
448   PetscValidPointer(mat,2);
449 
450   ierr = PetscObjectGetComm((PetscObject)inmat,&comm);CHKERRQ(ierr);
451   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
452 
453   ierr = MatGetLocalSize(inmat,&m,&n);CHKERRQ(ierr);
454   ierr = MatGetSize(inmat,&M,&N);CHKERRQ(ierr);
455   ierr = MatCreateVecs(inmat,&in,&out);CHKERRQ(ierr);
456   ierr = VecSetOption(in,VEC_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
457   ierr = VecGetOwnershipRange(out,&start,&end);CHKERRQ(ierr);
458   ierr = PetscMalloc1(m,&rows);CHKERRQ(ierr);
459   for (i=0; i<m; i++) rows[i] = start + i;
460 
461   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
462   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
463   if (size == 1) {
464     ierr = MatSetType(*mat,MATSEQDENSE);CHKERRQ(ierr);
465     ierr = MatSeqDenseSetPreallocation(*mat,NULL);CHKERRQ(ierr);
466   } else {
467     ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
468     ierr = MatMPIAIJSetPreallocation(*mat,n,NULL,N-n,NULL);CHKERRQ(ierr);
469   }
470 
471   for (i=0; i<N; i++) {
472 
473     ierr = VecSet(in,zero);CHKERRQ(ierr);
474     ierr = VecSetValues(in,1,&i,&one,INSERT_VALUES);CHKERRQ(ierr);
475     ierr = VecAssemblyBegin(in);CHKERRQ(ierr);
476     ierr = VecAssemblyEnd(in);CHKERRQ(ierr);
477 
478     ierr = MatMultTranspose(inmat,in,out);CHKERRQ(ierr);
479 
480     ierr = VecGetArray(out,&array);CHKERRQ(ierr);
481     ierr = MatSetValues(*mat,m,rows,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
482     ierr = VecRestoreArray(out,&array);CHKERRQ(ierr);
483 
484   }
485   ierr = PetscFree(rows);CHKERRQ(ierr);
486   ierr = VecDestroy(&out);CHKERRQ(ierr);
487   ierr = VecDestroy(&in);CHKERRQ(ierr);
488   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
489   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
490   PetscFunctionReturn(0);
491 }
492 
493 /*@
494   MatChop - Set all values in the matrix less than the tolerance to zero
495 
496   Input Parameters:
497 + A   - The matrix
498 - tol - The zero tolerance
499 
500   Output Parameters:
501 . A - The chopped matrix
502 
503   Level: intermediate
504 
505 .seealso: MatCreate(), MatZeroEntries()
506  @*/
507 PetscErrorCode MatChop(Mat A, PetscReal tol)
508 {
509   PetscScalar    *newVals;
510   PetscInt       *newCols;
511   PetscInt       rStart, rEnd, numRows, maxRows, r, colMax = 0;
512   PetscErrorCode ierr;
513 
514   PetscFunctionBegin;
515   ierr = MatGetOwnershipRange(A, &rStart, &rEnd);CHKERRQ(ierr);
516   for (r = rStart; r < rEnd; ++r) {
517     PetscInt ncols;
518 
519     ierr   = MatGetRow(A, r, &ncols, NULL, NULL);CHKERRQ(ierr);
520     colMax = PetscMax(colMax, ncols);CHKERRQ(ierr);
521     ierr   = MatRestoreRow(A, r, &ncols, NULL, NULL);CHKERRQ(ierr);
522   }
523   numRows = rEnd - rStart;
524   ierr    = MPIU_Allreduce(&numRows, &maxRows, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
525   ierr    = PetscMalloc2(colMax,&newCols,colMax,&newVals);CHKERRQ(ierr);
526   for (r = rStart; r < rStart+maxRows; ++r) {
527     const PetscScalar *vals;
528     const PetscInt    *cols;
529     PetscInt           ncols, newcols, c;
530 
531     if (r < rEnd) {
532       ierr = MatGetRow(A, r, &ncols, &cols, &vals);CHKERRQ(ierr);
533       for (c = 0; c < ncols; ++c) {
534         newCols[c] = cols[c];
535         newVals[c] = PetscAbsScalar(vals[c]) < tol ? 0.0 : vals[c];
536       }
537       newcols = ncols;
538       ierr = MatRestoreRow(A, r, &ncols, &cols, &vals);CHKERRQ(ierr);
539       ierr = MatSetValues(A, 1, &r, newcols, newCols, newVals, INSERT_VALUES);CHKERRQ(ierr);
540     }
541     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
542     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
543   }
544   ierr = PetscFree2(newCols,newVals);CHKERRQ(ierr);
545   PetscFunctionReturn(0);
546 }
547