1 2 #include <petsc/private/matimpl.h> /*I "petscmat.h" I*/ 3 4 /*@ 5 MatAXPY - Computes Y = a*X + Y. 6 7 Logically Collective on Mat 8 9 Input Parameters: 10 + a - the scalar multiplier 11 . X - the first matrix 12 . Y - the second matrix 13 - str - either SAME_NONZERO_PATTERN, DIFFERENT_NONZERO_PATTERN 14 or SUBSET_NONZERO_PATTERN (nonzeros of X is a subset of Y's) 15 16 Level: intermediate 17 18 .keywords: matrix, add 19 20 .seealso: MatAYPX() 21 @*/ 22 PetscErrorCode MatAXPY(Mat Y,PetscScalar a,Mat X,MatStructure str) 23 { 24 PetscErrorCode ierr; 25 PetscInt m1,m2,n1,n2; 26 PetscBool sametype; 27 28 PetscFunctionBegin; 29 PetscValidHeaderSpecific(X,MAT_CLASSID,3); 30 PetscValidHeaderSpecific(Y,MAT_CLASSID,1); 31 PetscValidLogicalCollectiveScalar(Y,a,2); 32 ierr = MatGetSize(X,&m1,&n1);CHKERRQ(ierr); 33 ierr = MatGetSize(Y,&m2,&n2);CHKERRQ(ierr); 34 if (m1 != m2 || n1 != n2) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Non conforming matrix add: %D %D %D %D",m1,m2,n1,n2); 35 36 ierr = PetscStrcmp(((PetscObject)X)->type_name,((PetscObject)Y)->type_name,&sametype);CHKERRQ(ierr); 37 ierr = PetscLogEventBegin(MAT_AXPY,Y,0,0,0);CHKERRQ(ierr); 38 if (Y->ops->axpy && sametype) { 39 ierr = (*Y->ops->axpy)(Y,a,X,str);CHKERRQ(ierr); 40 } else { 41 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 42 } 43 ierr = PetscLogEventEnd(MAT_AXPY,Y,0,0,0);CHKERRQ(ierr); 44 #if defined(PETSC_HAVE_VIENNACL) 45 if (Y->valid_GPU_matrix != PETSC_VIENNACL_UNALLOCATED) { 46 Y->valid_GPU_matrix = PETSC_VIENNACL_CPU; 47 } 48 #elif defined(PETSC_HAVE_VECCUDA) 49 if (Y->valid_GPU_matrix != PETSC_CUDA_UNALLOCATED) { 50 Y->valid_GPU_matrix = PETSC_CUDA_CPU; 51 } 52 #endif 53 PetscFunctionReturn(0); 54 } 55 56 PetscErrorCode MatAXPY_Basic(Mat Y,PetscScalar a,Mat X,MatStructure str) 57 { 58 PetscInt i,start,end,j,ncols,m,n; 59 PetscErrorCode ierr; 60 const PetscInt *row; 61 PetscScalar *val; 62 const PetscScalar *vals; 63 64 PetscFunctionBegin; 65 ierr = MatGetSize(X,&m,&n);CHKERRQ(ierr); 66 ierr = MatGetOwnershipRange(X,&start,&end);CHKERRQ(ierr); 67 if (a == 1.0) { 68 for (i = start; i < end; i++) { 69 ierr = MatGetRow(X,i,&ncols,&row,&vals);CHKERRQ(ierr); 70 ierr = MatSetValues(Y,1,&i,ncols,row,vals,ADD_VALUES);CHKERRQ(ierr); 71 ierr = MatRestoreRow(X,i,&ncols,&row,&vals);CHKERRQ(ierr); 72 } 73 } else { 74 ierr = PetscMalloc1(n+1,&val);CHKERRQ(ierr); 75 for (i=start; i<end; i++) { 76 ierr = MatGetRow(X,i,&ncols,&row,&vals);CHKERRQ(ierr); 77 for (j=0; j<ncols; j++) { 78 val[j] = a*vals[j]; 79 } 80 ierr = MatSetValues(Y,1,&i,ncols,row,val,ADD_VALUES);CHKERRQ(ierr); 81 ierr = MatRestoreRow(X,i,&ncols,&row,&vals);CHKERRQ(ierr); 82 } 83 ierr = PetscFree(val);CHKERRQ(ierr); 84 } 85 ierr = MatAssemblyBegin(Y,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 86 ierr = MatAssemblyEnd(Y,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 87 PetscFunctionReturn(0); 88 } 89 90 PetscErrorCode MatAXPY_BasicWithPreallocation(Mat B,Mat Y,PetscScalar a,Mat X,MatStructure str) 91 { 92 PetscInt i,start,end,j,ncols,m,n; 93 PetscErrorCode ierr; 94 const PetscInt *row; 95 PetscScalar *val; 96 const PetscScalar *vals; 97 98 PetscFunctionBegin; 99 ierr = MatGetSize(X,&m,&n);CHKERRQ(ierr); 100 ierr = MatGetOwnershipRange(X,&start,&end);CHKERRQ(ierr); 101 if (a == 1.0) { 102 for (i = start; i < end; i++) { 103 ierr = MatGetRow(Y,i,&ncols,&row,&vals);CHKERRQ(ierr); 104 ierr = MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES);CHKERRQ(ierr); 105 ierr = MatRestoreRow(Y,i,&ncols,&row,&vals);CHKERRQ(ierr); 106 107 ierr = MatGetRow(X,i,&ncols,&row,&vals);CHKERRQ(ierr); 108 ierr = MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES);CHKERRQ(ierr); 109 ierr = MatRestoreRow(X,i,&ncols,&row,&vals);CHKERRQ(ierr); 110 } 111 } else { 112 ierr = PetscMalloc1(n+1,&val);CHKERRQ(ierr); 113 for (i=start; i<end; i++) { 114 ierr = MatGetRow(Y,i,&ncols,&row,&vals);CHKERRQ(ierr); 115 ierr = MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES);CHKERRQ(ierr); 116 ierr = MatRestoreRow(Y,i,&ncols,&row,&vals);CHKERRQ(ierr); 117 118 ierr = MatGetRow(X,i,&ncols,&row,&vals);CHKERRQ(ierr); 119 for (j=0; j<ncols; j++) { 120 val[j] = a*vals[j]; 121 } 122 ierr = MatSetValues(B,1,&i,ncols,row,val,ADD_VALUES);CHKERRQ(ierr); 123 ierr = MatRestoreRow(X,i,&ncols,&row,&vals);CHKERRQ(ierr); 124 } 125 ierr = PetscFree(val);CHKERRQ(ierr); 126 } 127 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 128 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 129 PetscFunctionReturn(0); 130 } 131 132 /*@ 133 MatShift - Computes Y = Y + a I, where a is a PetscScalar and I is the identity matrix. 134 135 Neighbor-wise Collective on Mat 136 137 Input Parameters: 138 + Y - the matrices 139 - a - the PetscScalar 140 141 Level: intermediate 142 143 Notes: If the matrix Y is missing some diagonal entries this routine can be very slow. To make it fast one should initially 144 fill the matrix so that all diagonal entries have a value (with a value of zero for those locations that would not have an 145 entry). 146 147 To form Y = Y + diag(V) use MatDiagonalSet() 148 149 Developers Note: If the local "diagonal part" of the matrix Y has no entries then the local diagonal part is 150 preallocated with 1 nonzero per row for the to be added values. This allows for fast shifting of an empty matrix. 151 152 .keywords: matrix, add, shift 153 154 .seealso: MatDiagonalSet(), MatScale(), MatDiagonalScale() 155 @*/ 156 PetscErrorCode MatShift(Mat Y,PetscScalar a) 157 { 158 PetscErrorCode ierr; 159 160 PetscFunctionBegin; 161 PetscValidHeaderSpecific(Y,MAT_CLASSID,1); 162 if (!Y->assembled) SETERRQ(PetscObjectComm((PetscObject)Y),PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 163 if (Y->factortype) SETERRQ(PetscObjectComm((PetscObject)Y),PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 164 MatCheckPreallocated(Y,1); 165 166 if (Y->ops->shift) { 167 ierr = (*Y->ops->shift)(Y,a);CHKERRQ(ierr); 168 } else { 169 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 170 } 171 172 #if defined(PETSC_HAVE_VIENNACL) 173 if (Y->valid_GPU_matrix != PETSC_VIENNACL_UNALLOCATED) { 174 Y->valid_GPU_matrix = PETSC_VIENNACL_CPU; 175 } 176 #elif defined(PETSC_HAVE_VECCUDA) 177 if (Y->valid_GPU_matrix != PETSC_CUDA_UNALLOCATED) { 178 Y->valid_GPU_matrix = PETSC_CUDA_CPU; 179 } 180 #endif 181 PetscFunctionReturn(0); 182 } 183 184 PetscErrorCode MatDiagonalSet_Default(Mat Y,Vec D,InsertMode is) 185 { 186 PetscErrorCode ierr; 187 PetscInt i,start,end; 188 PetscScalar *v; 189 190 PetscFunctionBegin; 191 ierr = MatGetOwnershipRange(Y,&start,&end);CHKERRQ(ierr); 192 ierr = VecGetArray(D,&v);CHKERRQ(ierr); 193 for (i=start; i<end; i++) { 194 ierr = MatSetValues(Y,1,&i,1,&i,v+i-start,is);CHKERRQ(ierr); 195 } 196 ierr = VecRestoreArray(D,&v);CHKERRQ(ierr); 197 ierr = MatAssemblyBegin(Y,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 198 ierr = MatAssemblyEnd(Y,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 199 PetscFunctionReturn(0); 200 } 201 202 /*@ 203 MatDiagonalSet - Computes Y = Y + D, where D is a diagonal matrix 204 that is represented as a vector. Or Y[i,i] = D[i] if InsertMode is 205 INSERT_VALUES. 206 207 Input Parameters: 208 + Y - the input matrix 209 . D - the diagonal matrix, represented as a vector 210 - i - INSERT_VALUES or ADD_VALUES 211 212 Neighbor-wise Collective on Mat and Vec 213 214 Notes: If the matrix Y is missing some diagonal entries this routine can be very slow. To make it fast one should initially 215 fill the matrix so that all diagonal entries have a value (with a value of zero for those locations that would not have an 216 entry). 217 218 Level: intermediate 219 220 .keywords: matrix, add, shift, diagonal 221 222 .seealso: MatShift(), MatScale(), MatDiagonalScale() 223 @*/ 224 PetscErrorCode MatDiagonalSet(Mat Y,Vec D,InsertMode is) 225 { 226 PetscErrorCode ierr; 227 PetscInt matlocal,veclocal; 228 229 PetscFunctionBegin; 230 PetscValidHeaderSpecific(Y,MAT_CLASSID,1); 231 PetscValidHeaderSpecific(D,VEC_CLASSID,2); 232 ierr = MatGetLocalSize(Y,&matlocal,NULL);CHKERRQ(ierr); 233 ierr = VecGetLocalSize(D,&veclocal);CHKERRQ(ierr); 234 if (matlocal != veclocal) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number local rows of matrix %D does not match that of vector for diagonal %D",matlocal,veclocal); 235 if (Y->ops->diagonalset) { 236 ierr = (*Y->ops->diagonalset)(Y,D,is);CHKERRQ(ierr); 237 } else { 238 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr); 239 } 240 PetscFunctionReturn(0); 241 } 242 243 /*@ 244 MatAYPX - Computes Y = a*Y + X. 245 246 Logically on Mat 247 248 Input Parameters: 249 + a - the PetscScalar multiplier 250 . Y - the first matrix 251 . X - the second matrix 252 - str - either SAME_NONZERO_PATTERN, DIFFERENT_NONZERO_PATTERN or SUBSET_NONZERO_PATTERN 253 254 Level: intermediate 255 256 .keywords: matrix, add 257 258 .seealso: MatAXPY() 259 @*/ 260 PetscErrorCode MatAYPX(Mat Y,PetscScalar a,Mat X,MatStructure str) 261 { 262 PetscScalar one = 1.0; 263 PetscErrorCode ierr; 264 PetscInt mX,mY,nX,nY; 265 266 PetscFunctionBegin; 267 PetscValidHeaderSpecific(X,MAT_CLASSID,3); 268 PetscValidHeaderSpecific(Y,MAT_CLASSID,1); 269 PetscValidLogicalCollectiveScalar(Y,a,2); 270 ierr = MatGetSize(X,&mX,&nX);CHKERRQ(ierr); 271 ierr = MatGetSize(X,&mY,&nY);CHKERRQ(ierr); 272 if (mX != mY || nX != nY) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Non conforming matrices: %D %D first %D %D second",mX,mY,nX,nY); 273 274 ierr = MatScale(Y,a);CHKERRQ(ierr); 275 ierr = MatAXPY(Y,one,X,str);CHKERRQ(ierr); 276 PetscFunctionReturn(0); 277 } 278 279 /*@ 280 MatComputeExplicitOperator - Computes the explicit matrix 281 282 Collective on Mat 283 284 Input Parameter: 285 . inmat - the matrix 286 287 Output Parameter: 288 . mat - the explict operator 289 290 Notes: 291 This computation is done by applying the operators to columns of the 292 identity matrix. 293 294 Currently, this routine uses a dense matrix format when 1 processor 295 is used and a sparse format otherwise. This routine is costly in general, 296 and is recommended for use only with relatively small systems. 297 298 Level: advanced 299 300 .keywords: Mat, compute, explicit, operator 301 @*/ 302 PetscErrorCode MatComputeExplicitOperator(Mat inmat,Mat *mat) 303 { 304 Vec in,out; 305 PetscErrorCode ierr; 306 PetscInt i,m,n,M,N,*rows,start,end; 307 MPI_Comm comm; 308 PetscScalar *array,zero = 0.0,one = 1.0; 309 PetscMPIInt size; 310 311 PetscFunctionBegin; 312 PetscValidHeaderSpecific(inmat,MAT_CLASSID,1); 313 PetscValidPointer(mat,2); 314 315 ierr = PetscObjectGetComm((PetscObject)inmat,&comm);CHKERRQ(ierr); 316 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 317 318 ierr = MatGetLocalSize(inmat,&m,&n);CHKERRQ(ierr); 319 ierr = MatGetSize(inmat,&M,&N);CHKERRQ(ierr); 320 ierr = MatCreateVecs(inmat,&in,&out);CHKERRQ(ierr); 321 ierr = VecSetOption(in,VEC_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 322 ierr = VecGetOwnershipRange(out,&start,&end);CHKERRQ(ierr); 323 ierr = PetscMalloc1(m,&rows);CHKERRQ(ierr); 324 for (i=0; i<m; i++) rows[i] = start + i; 325 326 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 327 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 328 if (size == 1) { 329 ierr = MatSetType(*mat,MATSEQDENSE);CHKERRQ(ierr); 330 ierr = MatSeqDenseSetPreallocation(*mat,NULL);CHKERRQ(ierr); 331 } else { 332 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 333 ierr = MatMPIAIJSetPreallocation(*mat,n,NULL,N-n,NULL);CHKERRQ(ierr); 334 } 335 336 for (i=0; i<N; i++) { 337 338 ierr = VecSet(in,zero);CHKERRQ(ierr); 339 ierr = VecSetValues(in,1,&i,&one,INSERT_VALUES);CHKERRQ(ierr); 340 ierr = VecAssemblyBegin(in);CHKERRQ(ierr); 341 ierr = VecAssemblyEnd(in);CHKERRQ(ierr); 342 343 ierr = MatMult(inmat,in,out);CHKERRQ(ierr); 344 345 ierr = VecGetArray(out,&array);CHKERRQ(ierr); 346 ierr = MatSetValues(*mat,m,rows,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 347 ierr = VecRestoreArray(out,&array);CHKERRQ(ierr); 348 349 } 350 ierr = PetscFree(rows);CHKERRQ(ierr); 351 ierr = VecDestroy(&out);CHKERRQ(ierr); 352 ierr = VecDestroy(&in);CHKERRQ(ierr); 353 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 354 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 355 PetscFunctionReturn(0); 356 } 357 358 /*@ 359 MatComputeExplicitOperatorTranspose - Computes the explicit matrix representation of 360 a give matrix that can apply MatMultTranspose() 361 362 Collective on Mat 363 364 Input Parameter: 365 . inmat - the matrix 366 367 Output Parameter: 368 . mat - the explict operator transposed 369 370 Notes: 371 This computation is done by applying the transpose of the operator to columns of the 372 identity matrix. 373 374 Currently, this routine uses a dense matrix format when 1 processor 375 is used and a sparse format otherwise. This routine is costly in general, 376 and is recommended for use only with relatively small systems. 377 378 Level: advanced 379 380 .keywords: Mat, compute, explicit, operator 381 @*/ 382 PetscErrorCode MatComputeExplicitOperatorTranspose(Mat inmat,Mat *mat) 383 { 384 Vec in,out; 385 PetscErrorCode ierr; 386 PetscInt i,m,n,M,N,*rows,start,end; 387 MPI_Comm comm; 388 PetscScalar *array,zero = 0.0,one = 1.0; 389 PetscMPIInt size; 390 391 PetscFunctionBegin; 392 PetscValidHeaderSpecific(inmat,MAT_CLASSID,1); 393 PetscValidPointer(mat,2); 394 395 ierr = PetscObjectGetComm((PetscObject)inmat,&comm);CHKERRQ(ierr); 396 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 397 398 ierr = MatGetLocalSize(inmat,&m,&n);CHKERRQ(ierr); 399 ierr = MatGetSize(inmat,&M,&N);CHKERRQ(ierr); 400 ierr = MatCreateVecs(inmat,&in,&out);CHKERRQ(ierr); 401 ierr = VecSetOption(in,VEC_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 402 ierr = VecGetOwnershipRange(out,&start,&end);CHKERRQ(ierr); 403 ierr = PetscMalloc1(m,&rows);CHKERRQ(ierr); 404 for (i=0; i<m; i++) rows[i] = start + i; 405 406 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 407 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 408 if (size == 1) { 409 ierr = MatSetType(*mat,MATSEQDENSE);CHKERRQ(ierr); 410 ierr = MatSeqDenseSetPreallocation(*mat,NULL);CHKERRQ(ierr); 411 } else { 412 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 413 ierr = MatMPIAIJSetPreallocation(*mat,n,NULL,N-n,NULL);CHKERRQ(ierr); 414 } 415 416 for (i=0; i<N; i++) { 417 418 ierr = VecSet(in,zero);CHKERRQ(ierr); 419 ierr = VecSetValues(in,1,&i,&one,INSERT_VALUES);CHKERRQ(ierr); 420 ierr = VecAssemblyBegin(in);CHKERRQ(ierr); 421 ierr = VecAssemblyEnd(in);CHKERRQ(ierr); 422 423 ierr = MatMultTranspose(inmat,in,out);CHKERRQ(ierr); 424 425 ierr = VecGetArray(out,&array);CHKERRQ(ierr); 426 ierr = MatSetValues(*mat,m,rows,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 427 ierr = VecRestoreArray(out,&array);CHKERRQ(ierr); 428 429 } 430 ierr = PetscFree(rows);CHKERRQ(ierr); 431 ierr = VecDestroy(&out);CHKERRQ(ierr); 432 ierr = VecDestroy(&in);CHKERRQ(ierr); 433 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 434 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 435 PetscFunctionReturn(0); 436 } 437 438 /*@ 439 MatChop - Set all values in the matrix less than the tolerance to zero 440 441 Input Parameters: 442 + A - The matrix 443 - tol - The zero tolerance 444 445 Output Parameters: 446 . A - The chopped matrix 447 448 Level: intermediate 449 450 .seealso: MatCreate(), MatZeroEntries() 451 @*/ 452 PetscErrorCode MatChop(Mat A, PetscReal tol) 453 { 454 PetscScalar *newVals; 455 PetscInt *newCols; 456 PetscInt rStart, rEnd, numRows, maxRows, r, colMax = 0; 457 PetscErrorCode ierr; 458 459 PetscFunctionBegin; 460 ierr = MatGetOwnershipRange(A, &rStart, &rEnd);CHKERRQ(ierr); 461 for (r = rStart; r < rEnd; ++r) { 462 PetscInt ncols; 463 464 ierr = MatGetRow(A, r, &ncols, NULL, NULL);CHKERRQ(ierr); 465 colMax = PetscMax(colMax, ncols);CHKERRQ(ierr); 466 ierr = MatRestoreRow(A, r, &ncols, NULL, NULL);CHKERRQ(ierr); 467 } 468 numRows = rEnd - rStart; 469 ierr = MPIU_Allreduce(&numRows, &maxRows, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 470 ierr = PetscMalloc2(colMax,&newCols,colMax,&newVals);CHKERRQ(ierr); 471 for (r = rStart; r < rStart+maxRows; ++r) { 472 const PetscScalar *vals; 473 const PetscInt *cols; 474 PetscInt ncols, newcols, c; 475 476 if (r < rEnd) { 477 ierr = MatGetRow(A, r, &ncols, &cols, &vals);CHKERRQ(ierr); 478 for (c = 0; c < ncols; ++c) { 479 newCols[c] = cols[c]; 480 newVals[c] = PetscAbsScalar(vals[c]) < tol ? 0.0 : vals[c]; 481 } 482 newcols = ncols; 483 ierr = MatRestoreRow(A, r, &ncols, &cols, &vals);CHKERRQ(ierr); 484 ierr = MatSetValues(A, 1, &r, newcols, newCols, newVals, INSERT_VALUES);CHKERRQ(ierr); 485 } 486 ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 487 ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 488 } 489 ierr = PetscFree2(newCols,newVals);CHKERRQ(ierr); 490 PetscFunctionReturn(0); 491 } 492