1 2 #include <petsc/private/matimpl.h> /*I "petscmat.h" I*/ 3 4 static PetscErrorCode MatTransposeAXPY_Private(Mat Y,PetscScalar a,Mat X,MatStructure str,Mat T) 5 { 6 PetscErrorCode ierr,(*f)(Mat,Mat*); 7 Mat A,F; 8 9 PetscFunctionBegin; 10 ierr = PetscObjectQueryFunction((PetscObject)T,"MatTransposeGetMat_C",&f);CHKERRQ(ierr); 11 if (f) { 12 ierr = MatTransposeGetMat(T,&A);CHKERRQ(ierr); 13 if (T == X) { 14 ierr = PetscInfo(NULL,"Explicitly transposing X of type MATTRANSPOSEMAT to perform MatAXPY()\n");CHKERRQ(ierr); 15 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&F);CHKERRQ(ierr); 16 A = Y; 17 } else { 18 ierr = PetscInfo(NULL,"Transposing X because Y of type MATTRANSPOSEMAT to perform MatAXPY()\n");CHKERRQ(ierr); 19 ierr = MatTranspose(X,MAT_INITIAL_MATRIX,&F);CHKERRQ(ierr); 20 } 21 } else { 22 ierr = MatHermitianTransposeGetMat(T,&A);CHKERRQ(ierr); 23 if (T == X) { 24 ierr = PetscInfo(NULL,"Explicitly Hermitian transposing X of type MATTRANSPOSEMAT to perform MatAXPY()\n");CHKERRQ(ierr); 25 ierr = MatHermitianTranspose(A,MAT_INITIAL_MATRIX,&F);CHKERRQ(ierr); 26 A = Y; 27 } else { 28 ierr = PetscInfo(NULL,"Hermitian transposing X because Y of type MATTRANSPOSEMAT to perform MatAXPY()\n");CHKERRQ(ierr); 29 ierr = MatHermitianTranspose(X,MAT_INITIAL_MATRIX,&F);CHKERRQ(ierr); 30 } 31 } 32 ierr = MatAXPY(A,a,F,str);CHKERRQ(ierr); 33 ierr = MatDestroy(&F);CHKERRQ(ierr); 34 PetscFunctionReturn(0); 35 } 36 37 /*@ 38 MatAXPY - Computes Y = a*X + Y. 39 40 Logically Collective on Mat 41 42 Input Parameters: 43 + a - the scalar multiplier 44 . X - the first matrix 45 . Y - the second matrix 46 - str - either SAME_NONZERO_PATTERN, DIFFERENT_NONZERO_PATTERN 47 or SUBSET_NONZERO_PATTERN (nonzeros of X is a subset of Y's) 48 49 Level: intermediate 50 51 .keywords: matrix, add 52 53 .seealso: MatAYPX() 54 @*/ 55 PetscErrorCode MatAXPY(Mat Y,PetscScalar a,Mat X,MatStructure str) 56 { 57 PetscErrorCode ierr; 58 PetscInt M1,M2,N1,N2; 59 PetscInt m1,m2,n1,n2; 60 MatType t1,t2; 61 PetscBool sametype,transpose; 62 63 PetscFunctionBegin; 64 PetscValidHeaderSpecific(Y,MAT_CLASSID,1); 65 PetscValidLogicalCollectiveScalar(Y,a,2); 66 PetscValidHeaderSpecific(X,MAT_CLASSID,3); 67 PetscCheckSameComm(Y,1,X,3); 68 ierr = MatGetSize(X,&M1,&N1);CHKERRQ(ierr); 69 ierr = MatGetSize(Y,&M2,&N2);CHKERRQ(ierr); 70 ierr = MatGetLocalSize(X,&m1,&n1);CHKERRQ(ierr); 71 ierr = MatGetLocalSize(Y,&m2,&n2);CHKERRQ(ierr); 72 if (M1 != M2 || N1 != N2) SETERRQ4(PetscObjectComm((PetscObject)Y),PETSC_ERR_ARG_SIZ,"Non conforming matrix add: global sizes %D x %D, %D x %D",M1,M2,N1,N2); 73 if (m1 != m2 || n1 != n2) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Non conforming matrix add: local sizes %D x %D, %D x %D",m1,m2,n1,n2); 74 75 ierr = MatGetType(X,&t1);CHKERRQ(ierr); 76 ierr = MatGetType(Y,&t2);CHKERRQ(ierr); 77 ierr = PetscStrcmp(t1,t2,&sametype);CHKERRQ(ierr); 78 ierr = PetscLogEventBegin(MAT_AXPY,Y,0,0,0);CHKERRQ(ierr); 79 if (Y->ops->axpy && sametype) { 80 ierr = (*Y->ops->axpy)(Y,a,X,str);CHKERRQ(ierr); 81 } else { 82 ierr = PetscStrcmp(t1,MATTRANSPOSEMAT,&transpose);CHKERRQ(ierr); 83 if (transpose) { 84 ierr = MatTransposeAXPY_Private(Y,a,X,str,X);CHKERRQ(ierr); 85 } else { 86 ierr = PetscStrcmp(t2,MATTRANSPOSEMAT,&transpose);CHKERRQ(ierr); 87 if (transpose) { 88 ierr = MatTransposeAXPY_Private(Y,a,X,str,Y);CHKERRQ(ierr); 89 } else { 90 if (str != DIFFERENT_NONZERO_PATTERN) { 91 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 92 } else { 93 Mat B; 94 95 ierr = MatAXPY_Basic_Preallocate(Y,X,&B);CHKERRQ(ierr); 96 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 97 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 98 } 99 } 100 } 101 } 102 ierr = PetscLogEventEnd(MAT_AXPY,Y,0,0,0);CHKERRQ(ierr); 103 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 104 if (Y->valid_GPU_matrix != PETSC_OFFLOAD_UNALLOCATED) { 105 Y->valid_GPU_matrix = PETSC_OFFLOAD_CPU; 106 } 107 #endif 108 PetscFunctionReturn(0); 109 } 110 111 PetscErrorCode MatAXPY_Basic_Preallocate(Mat Y, Mat X, Mat *B) 112 { 113 PetscErrorCode ierr; 114 PetscErrorCode (*preall)(Mat,Mat,Mat*) = NULL; 115 116 PetscFunctionBegin; 117 /* look for any available faster alternative to the general preallocator */ 118 ierr = PetscObjectQueryFunction((PetscObject)Y,"MatAXPYGetPreallocation_C",&preall);CHKERRQ(ierr); 119 if (preall) { 120 ierr = (*preall)(Y,X,B);CHKERRQ(ierr); 121 } else { /* Use MatPrellocator, assumes same row-col distribution */ 122 Mat preallocator; 123 PetscInt r,rstart,rend; 124 PetscInt m,n,M,N; 125 126 ierr = MatGetRowUpperTriangular(Y);CHKERRQ(ierr); 127 ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr); 128 ierr = MatGetSize(Y,&M,&N);CHKERRQ(ierr); 129 ierr = MatGetLocalSize(Y,&m,&n);CHKERRQ(ierr); 130 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&preallocator);CHKERRQ(ierr); 131 ierr = MatSetType(preallocator,MATPREALLOCATOR);CHKERRQ(ierr); 132 ierr = MatSetSizes(preallocator,m,n,M,N);CHKERRQ(ierr); 133 ierr = MatSetUp(preallocator);CHKERRQ(ierr); 134 ierr = MatGetOwnershipRange(preallocator,&rstart,&rend);CHKERRQ(ierr); 135 for (r = rstart; r < rend; ++r) { 136 PetscInt ncols; 137 const PetscInt *row; 138 const PetscScalar *vals; 139 140 ierr = MatGetRow(Y,r,&ncols,&row,&vals);CHKERRQ(ierr); 141 ierr = MatSetValues(preallocator,1,&r,ncols,row,vals,INSERT_VALUES);CHKERRQ(ierr); 142 ierr = MatRestoreRow(Y,r,&ncols,&row,&vals);CHKERRQ(ierr); 143 ierr = MatGetRow(X,r,&ncols,&row,&vals);CHKERRQ(ierr); 144 ierr = MatSetValues(preallocator,1,&r,ncols,row,vals,INSERT_VALUES);CHKERRQ(ierr); 145 ierr = MatRestoreRow(X,r,&ncols,&row,&vals);CHKERRQ(ierr); 146 } 147 ierr = MatAssemblyBegin(preallocator,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 148 ierr = MatAssemblyEnd(preallocator,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 149 ierr = MatRestoreRowUpperTriangular(Y);CHKERRQ(ierr); 150 ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr); 151 152 ierr = MatCreate(PetscObjectComm((PetscObject)Y),B);CHKERRQ(ierr); 153 ierr = MatSetType(*B,((PetscObject)Y)->type_name);CHKERRQ(ierr); 154 ierr = MatSetSizes(*B,m,n,M,N);CHKERRQ(ierr); 155 ierr = MatPreallocatorPreallocate(preallocator,PETSC_FALSE,*B);CHKERRQ(ierr); 156 ierr = MatDestroy(&preallocator);CHKERRQ(ierr); 157 } 158 PetscFunctionReturn(0); 159 } 160 161 PetscErrorCode MatAXPY_Basic(Mat Y,PetscScalar a,Mat X,MatStructure str) 162 { 163 PetscInt i,start,end,j,ncols,m,n; 164 PetscErrorCode ierr; 165 const PetscInt *row; 166 PetscScalar *val; 167 const PetscScalar *vals; 168 169 PetscFunctionBegin; 170 ierr = MatGetSize(X,&m,&n);CHKERRQ(ierr); 171 ierr = MatGetOwnershipRange(X,&start,&end);CHKERRQ(ierr); 172 ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr); 173 if (a == 1.0) { 174 for (i = start; i < end; i++) { 175 ierr = MatGetRow(X,i,&ncols,&row,&vals);CHKERRQ(ierr); 176 ierr = MatSetValues(Y,1,&i,ncols,row,vals,ADD_VALUES);CHKERRQ(ierr); 177 ierr = MatRestoreRow(X,i,&ncols,&row,&vals);CHKERRQ(ierr); 178 } 179 } else { 180 PetscInt vs = 100; 181 /* realloc if needed, as this function may be used in parallel */ 182 ierr = PetscMalloc1(vs,&val);CHKERRQ(ierr); 183 for (i=start; i<end; i++) { 184 ierr = MatGetRow(X,i,&ncols,&row,&vals);CHKERRQ(ierr); 185 if (vs < ncols) { 186 vs = PetscMin(2*ncols,n); 187 ierr = PetscRealloc(vs*sizeof(*val),&val);CHKERRQ(ierr); 188 } 189 for (j=0; j<ncols; j++) val[j] = a*vals[j]; 190 ierr = MatSetValues(Y,1,&i,ncols,row,val,ADD_VALUES);CHKERRQ(ierr); 191 ierr = MatRestoreRow(X,i,&ncols,&row,&vals);CHKERRQ(ierr); 192 } 193 ierr = PetscFree(val);CHKERRQ(ierr); 194 } 195 ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr); 196 ierr = MatAssemblyBegin(Y,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 197 ierr = MatAssemblyEnd(Y,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 198 PetscFunctionReturn(0); 199 } 200 201 PetscErrorCode MatAXPY_BasicWithPreallocation(Mat B,Mat Y,PetscScalar a,Mat X,MatStructure str) 202 { 203 PetscInt i,start,end,j,ncols,m,n; 204 PetscErrorCode ierr; 205 const PetscInt *row; 206 PetscScalar *val; 207 const PetscScalar *vals; 208 209 PetscFunctionBegin; 210 ierr = MatGetSize(X,&m,&n);CHKERRQ(ierr); 211 ierr = MatGetOwnershipRange(X,&start,&end);CHKERRQ(ierr); 212 ierr = MatGetRowUpperTriangular(Y);CHKERRQ(ierr); 213 ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr); 214 if (a == 1.0) { 215 for (i = start; i < end; i++) { 216 ierr = MatGetRow(Y,i,&ncols,&row,&vals);CHKERRQ(ierr); 217 ierr = MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES);CHKERRQ(ierr); 218 ierr = MatRestoreRow(Y,i,&ncols,&row,&vals);CHKERRQ(ierr); 219 220 ierr = MatGetRow(X,i,&ncols,&row,&vals);CHKERRQ(ierr); 221 ierr = MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES);CHKERRQ(ierr); 222 ierr = MatRestoreRow(X,i,&ncols,&row,&vals);CHKERRQ(ierr); 223 } 224 } else { 225 PetscInt vs = 100; 226 /* realloc if needed, as this function may be used in parallel */ 227 ierr = PetscMalloc1(vs,&val);CHKERRQ(ierr); 228 for (i=start; i<end; i++) { 229 ierr = MatGetRow(Y,i,&ncols,&row,&vals);CHKERRQ(ierr); 230 ierr = MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES);CHKERRQ(ierr); 231 ierr = MatRestoreRow(Y,i,&ncols,&row,&vals);CHKERRQ(ierr); 232 233 ierr = MatGetRow(X,i,&ncols,&row,&vals);CHKERRQ(ierr); 234 if (vs < ncols) { 235 vs = PetscMin(2*ncols,n); 236 ierr = PetscRealloc(vs*sizeof(*val),&val);CHKERRQ(ierr); 237 } 238 for (j=0; j<ncols; j++) val[j] = a*vals[j]; 239 ierr = MatSetValues(B,1,&i,ncols,row,val,ADD_VALUES);CHKERRQ(ierr); 240 ierr = MatRestoreRow(X,i,&ncols,&row,&vals);CHKERRQ(ierr); 241 } 242 ierr = PetscFree(val);CHKERRQ(ierr); 243 } 244 ierr = MatRestoreRowUpperTriangular(Y);CHKERRQ(ierr); 245 ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr); 246 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 247 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 248 PetscFunctionReturn(0); 249 } 250 251 /*@ 252 MatShift - Computes Y = Y + a I, where a is a PetscScalar and I is the identity matrix. 253 254 Neighbor-wise Collective on Mat 255 256 Input Parameters: 257 + Y - the matrices 258 - a - the PetscScalar 259 260 Level: intermediate 261 262 Notes: 263 If the matrix Y is missing some diagonal entries this routine can be very slow. To make it fast one should initially 264 fill the matrix so that all diagonal entries have a value (with a value of zero for those locations that would not have an 265 entry). 266 267 To form Y = Y + diag(V) use MatDiagonalSet() 268 269 Developers Note: If the local "diagonal part" of the matrix Y has no entries then the local diagonal part is 270 preallocated with 1 nonzero per row for the to be added values. This allows for fast shifting of an empty matrix. 271 272 .keywords: matrix, add, shift 273 274 .seealso: MatDiagonalSet(), MatScale(), MatDiagonalScale() 275 @*/ 276 PetscErrorCode MatShift(Mat Y,PetscScalar a) 277 { 278 PetscErrorCode ierr; 279 280 PetscFunctionBegin; 281 PetscValidHeaderSpecific(Y,MAT_CLASSID,1); 282 if (!Y->assembled) SETERRQ(PetscObjectComm((PetscObject)Y),PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 283 if (Y->factortype) SETERRQ(PetscObjectComm((PetscObject)Y),PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 284 MatCheckPreallocated(Y,1); 285 286 if (Y->ops->shift) { 287 ierr = (*Y->ops->shift)(Y,a);CHKERRQ(ierr); 288 } else { 289 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 290 } 291 292 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 293 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 294 if (Y->valid_GPU_matrix != PETSC_OFFLOAD_UNALLOCATED) { 295 Y->valid_GPU_matrix = PETSC_OFFLOAD_CPU; 296 } 297 #endif 298 PetscFunctionReturn(0); 299 } 300 301 PetscErrorCode MatDiagonalSet_Default(Mat Y,Vec D,InsertMode is) 302 { 303 PetscErrorCode ierr; 304 PetscInt i,start,end; 305 PetscScalar *v; 306 307 PetscFunctionBegin; 308 ierr = MatGetOwnershipRange(Y,&start,&end);CHKERRQ(ierr); 309 ierr = VecGetArray(D,&v);CHKERRQ(ierr); 310 for (i=start; i<end; i++) { 311 ierr = MatSetValues(Y,1,&i,1,&i,v+i-start,is);CHKERRQ(ierr); 312 } 313 ierr = VecRestoreArray(D,&v);CHKERRQ(ierr); 314 ierr = MatAssemblyBegin(Y,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 315 ierr = MatAssemblyEnd(Y,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 316 PetscFunctionReturn(0); 317 } 318 319 /*@ 320 MatDiagonalSet - Computes Y = Y + D, where D is a diagonal matrix 321 that is represented as a vector. Or Y[i,i] = D[i] if InsertMode is 322 INSERT_VALUES. 323 324 Input Parameters: 325 + Y - the input matrix 326 . D - the diagonal matrix, represented as a vector 327 - i - INSERT_VALUES or ADD_VALUES 328 329 Neighbor-wise Collective on Mat and Vec 330 331 Notes: 332 If the matrix Y is missing some diagonal entries this routine can be very slow. To make it fast one should initially 333 fill the matrix so that all diagonal entries have a value (with a value of zero for those locations that would not have an 334 entry). 335 336 Level: intermediate 337 338 .keywords: matrix, add, shift, diagonal 339 340 .seealso: MatShift(), MatScale(), MatDiagonalScale() 341 @*/ 342 PetscErrorCode MatDiagonalSet(Mat Y,Vec D,InsertMode is) 343 { 344 PetscErrorCode ierr; 345 PetscInt matlocal,veclocal; 346 347 PetscFunctionBegin; 348 PetscValidHeaderSpecific(Y,MAT_CLASSID,1); 349 PetscValidHeaderSpecific(D,VEC_CLASSID,2); 350 ierr = MatGetLocalSize(Y,&matlocal,NULL);CHKERRQ(ierr); 351 ierr = VecGetLocalSize(D,&veclocal);CHKERRQ(ierr); 352 if (matlocal != veclocal) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number local rows of matrix %D does not match that of vector for diagonal %D",matlocal,veclocal); 353 if (Y->ops->diagonalset) { 354 ierr = (*Y->ops->diagonalset)(Y,D,is);CHKERRQ(ierr); 355 } else { 356 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr); 357 } 358 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 359 PetscFunctionReturn(0); 360 } 361 362 /*@ 363 MatAYPX - Computes Y = a*Y + X. 364 365 Logically on Mat 366 367 Input Parameters: 368 + a - the PetscScalar multiplier 369 . Y - the first matrix 370 . X - the second matrix 371 - str - either SAME_NONZERO_PATTERN, DIFFERENT_NONZERO_PATTERN or SUBSET_NONZERO_PATTERN 372 373 Level: intermediate 374 375 .keywords: matrix, add 376 377 .seealso: MatAXPY() 378 @*/ 379 PetscErrorCode MatAYPX(Mat Y,PetscScalar a,Mat X,MatStructure str) 380 { 381 PetscScalar one = 1.0; 382 PetscErrorCode ierr; 383 PetscInt mX,mY,nX,nY; 384 385 PetscFunctionBegin; 386 PetscValidHeaderSpecific(X,MAT_CLASSID,3); 387 PetscValidHeaderSpecific(Y,MAT_CLASSID,1); 388 PetscValidLogicalCollectiveScalar(Y,a,2); 389 ierr = MatGetSize(X,&mX,&nX);CHKERRQ(ierr); 390 ierr = MatGetSize(X,&mY,&nY);CHKERRQ(ierr); 391 if (mX != mY || nX != nY) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Non conforming matrices: %D %D first %D %D second",mX,mY,nX,nY); 392 393 ierr = MatScale(Y,a);CHKERRQ(ierr); 394 ierr = MatAXPY(Y,one,X,str);CHKERRQ(ierr); 395 PetscFunctionReturn(0); 396 } 397 398 /*@ 399 MatComputeExplicitOperator - Computes the explicit matrix 400 401 Collective on Mat 402 403 Input Parameter: 404 . inmat - the matrix 405 406 Output Parameter: 407 . mat - the explict operator 408 409 Notes: 410 This computation is done by applying the operators to columns of the 411 identity matrix. 412 413 Currently, this routine uses a dense matrix format when 1 processor 414 is used and a sparse format otherwise. This routine is costly in general, 415 and is recommended for use only with relatively small systems. 416 417 Level: advanced 418 419 .keywords: Mat, compute, explicit, operator 420 @*/ 421 PetscErrorCode MatComputeExplicitOperator(Mat inmat,Mat *mat) 422 { 423 PetscErrorCode ierr; 424 MPI_Comm comm; 425 PetscMPIInt size; 426 427 PetscFunctionBegin; 428 PetscValidHeaderSpecific(inmat,MAT_CLASSID,1); 429 PetscValidPointer(mat,2); 430 431 ierr = PetscObjectGetComm((PetscObject)inmat,&comm);CHKERRQ(ierr); 432 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 433 ierr = MatConvert_Shell(inmat,size == 1 ? MATSEQDENSE : MATAIJ,MAT_INITIAL_MATRIX,mat);CHKERRQ(ierr); 434 PetscFunctionReturn(0); 435 } 436 437 /*@ 438 MatComputeExplicitOperatorTranspose - Computes the explicit matrix representation of 439 a give matrix that can apply MatMultTranspose() 440 441 Collective on Mat 442 443 Input Parameter: 444 . inmat - the matrix 445 446 Output Parameter: 447 . mat - the explict operator transposed 448 449 Notes: 450 This computation is done by applying the transpose of the operator to columns of the 451 identity matrix. 452 453 Currently, this routine uses a dense matrix format when 1 processor 454 is used and a sparse format otherwise. This routine is costly in general, 455 and is recommended for use only with relatively small systems. 456 457 Level: advanced 458 459 .keywords: Mat, compute, explicit, operator 460 @*/ 461 PetscErrorCode MatComputeExplicitOperatorTranspose(Mat inmat,Mat *mat) 462 { 463 Vec in,out; 464 PetscErrorCode ierr; 465 PetscInt i,m,n,M,N,*rows,start,end; 466 MPI_Comm comm; 467 PetscScalar *array,zero = 0.0,one = 1.0; 468 PetscMPIInt size; 469 470 PetscFunctionBegin; 471 PetscValidHeaderSpecific(inmat,MAT_CLASSID,1); 472 PetscValidPointer(mat,2); 473 474 ierr = PetscObjectGetComm((PetscObject)inmat,&comm);CHKERRQ(ierr); 475 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 476 477 ierr = MatGetLocalSize(inmat,&m,&n);CHKERRQ(ierr); 478 ierr = MatGetSize(inmat,&M,&N);CHKERRQ(ierr); 479 ierr = MatCreateVecs(inmat,&in,&out);CHKERRQ(ierr); 480 ierr = VecSetOption(in,VEC_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 481 ierr = VecGetOwnershipRange(out,&start,&end);CHKERRQ(ierr); 482 ierr = PetscMalloc1(m,&rows);CHKERRQ(ierr); 483 for (i=0; i<m; i++) rows[i] = start + i; 484 485 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 486 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 487 if (size == 1) { 488 ierr = MatSetType(*mat,MATSEQDENSE);CHKERRQ(ierr); 489 ierr = MatSeqDenseSetPreallocation(*mat,NULL);CHKERRQ(ierr); 490 } else { 491 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 492 ierr = MatMPIAIJSetPreallocation(*mat,n,NULL,N-n,NULL);CHKERRQ(ierr); 493 } 494 495 for (i=0; i<N; i++) { 496 497 ierr = VecSet(in,zero);CHKERRQ(ierr); 498 ierr = VecSetValues(in,1,&i,&one,INSERT_VALUES);CHKERRQ(ierr); 499 ierr = VecAssemblyBegin(in);CHKERRQ(ierr); 500 ierr = VecAssemblyEnd(in);CHKERRQ(ierr); 501 502 ierr = MatMultTranspose(inmat,in,out);CHKERRQ(ierr); 503 504 ierr = VecGetArray(out,&array);CHKERRQ(ierr); 505 ierr = MatSetValues(*mat,m,rows,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 506 ierr = VecRestoreArray(out,&array);CHKERRQ(ierr); 507 508 } 509 ierr = PetscFree(rows);CHKERRQ(ierr); 510 ierr = VecDestroy(&out);CHKERRQ(ierr); 511 ierr = VecDestroy(&in);CHKERRQ(ierr); 512 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 513 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 514 PetscFunctionReturn(0); 515 } 516 517 /*@ 518 MatChop - Set all values in the matrix less than the tolerance to zero 519 520 Input Parameters: 521 + A - The matrix 522 - tol - The zero tolerance 523 524 Output Parameters: 525 . A - The chopped matrix 526 527 Level: intermediate 528 529 .seealso: MatCreate(), MatZeroEntries() 530 @*/ 531 PetscErrorCode MatChop(Mat A, PetscReal tol) 532 { 533 PetscScalar *newVals; 534 PetscInt *newCols; 535 PetscInt rStart, rEnd, numRows, maxRows, r, colMax = 0; 536 PetscErrorCode ierr; 537 538 PetscFunctionBegin; 539 ierr = MatGetOwnershipRange(A, &rStart, &rEnd);CHKERRQ(ierr); 540 for (r = rStart; r < rEnd; ++r) { 541 PetscInt ncols; 542 543 ierr = MatGetRow(A, r, &ncols, NULL, NULL);CHKERRQ(ierr); 544 colMax = PetscMax(colMax, ncols);CHKERRQ(ierr); 545 ierr = MatRestoreRow(A, r, &ncols, NULL, NULL);CHKERRQ(ierr); 546 } 547 numRows = rEnd - rStart; 548 ierr = MPIU_Allreduce(&numRows, &maxRows, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 549 ierr = PetscMalloc2(colMax,&newCols,colMax,&newVals);CHKERRQ(ierr); 550 for (r = rStart; r < rStart+maxRows; ++r) { 551 const PetscScalar *vals; 552 const PetscInt *cols; 553 PetscInt ncols, newcols, c; 554 555 if (r < rEnd) { 556 ierr = MatGetRow(A, r, &ncols, &cols, &vals);CHKERRQ(ierr); 557 for (c = 0; c < ncols; ++c) { 558 newCols[c] = cols[c]; 559 newVals[c] = PetscAbsScalar(vals[c]) < tol ? 0.0 : vals[c]; 560 } 561 newcols = ncols; 562 ierr = MatRestoreRow(A, r, &ncols, &cols, &vals);CHKERRQ(ierr); 563 ierr = MatSetValues(A, 1, &r, newcols, newCols, newVals, INSERT_VALUES);CHKERRQ(ierr); 564 } 565 ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 566 ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 567 } 568 ierr = PetscFree2(newCols,newVals);CHKERRQ(ierr); 569 PetscFunctionReturn(0); 570 } 571