1 2 /* 3 Defines the basic matrix operations for the KAIJ matrix storage format. 4 This format is used to evaluate matrices of the form: 5 6 [I \otimes S + A \otimes T] 7 8 where 9 S is a dense (p \times q) matrix 10 T is a dense (p \times q) matrix 11 A is an AIJ (n \times n) matrix 12 I is the identity matrix 13 14 The resulting matrix is (np \times nq) 15 16 We provide: 17 MatMult() 18 MatMultAdd() 19 MatInvertBlockDiagonal() 20 and 21 MatCreateKAIJ(Mat,PetscInt,PetscInt,const PetscScalar[],const PetscScalar[],Mat*) 22 23 This single directory handles both the sequential and parallel codes 24 */ 25 26 #include <../src/mat/impls/kaij/kaij.h> /*I "petscmat.h" I*/ 27 #include <../src/mat/utils/freespace.h> 28 #include <petsc/private/vecimpl.h> 29 30 /*@C 31 MatKAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the KAIJ matrix 32 33 Not Collective, but if the KAIJ matrix is parallel, the AIJ matrix is also parallel 34 35 Input Parameter: 36 . A - the KAIJ matrix 37 38 Output Parameter: 39 . B - the AIJ matrix 40 41 Level: advanced 42 43 Notes: The reference count on the AIJ matrix is not increased so you should not destroy it. 44 45 .seealso: MatCreateKAIJ() 46 @*/ 47 PetscErrorCode MatKAIJGetAIJ(Mat A,Mat *B) 48 { 49 PetscErrorCode ierr; 50 PetscBool ismpikaij,isseqkaij; 51 52 PetscFunctionBegin; 53 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIKAIJ,&ismpikaij);CHKERRQ(ierr); 54 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQKAIJ,&isseqkaij);CHKERRQ(ierr); 55 if (ismpikaij) { 56 Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data; 57 58 *B = b->A; 59 } else if (isseqkaij) { 60 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 61 62 *B = b->AIJ; 63 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix passed in is not of type KAIJ"); 64 PetscFunctionReturn(0); 65 } 66 67 /*@C 68 MatKAIJGetS - Get the S matrix describing the shift action of the KAIJ matrix 69 70 Not Collective; the entire S is stored and returned independently on all processes. 71 72 Input Parameter: 73 . A - the KAIJ matrix 74 75 Output Parameters: 76 + m - the number of rows in S 77 . n - the number of columns in S 78 - S - the S matrix, in form of a scalar array in column-major format 79 80 Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired) 81 82 Level: advanced 83 84 .seealso: MatCreateKAIJ(), MatGetBlockSizes() 85 @*/ 86 PetscErrorCode MatKAIJGetS(Mat A,PetscInt *m,PetscInt *n,PetscScalar **S) 87 { 88 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 89 PetscFunctionBegin; 90 if (m) *m = b->p; 91 if (n) *n = b->q; 92 if (S) *S = b->S; 93 PetscFunctionReturn(0); 94 } 95 96 /*@C 97 MatKAIJGetSRead - Get a read-only pointer to the S matrix describing the shift action of the KAIJ matrix 98 99 Not Collective; the entire S is stored and returned independently on all processes. 100 101 Input Parameter: 102 . A - the KAIJ matrix 103 104 Output Parameters: 105 + m - the number of rows in S 106 . n - the number of columns in S 107 - S - the S matrix, in form of a scalar array in column-major format 108 109 Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired) 110 111 Level: advanced 112 113 .seealso: MatCreateKAIJ(), MatGetBlockSizes() 114 @*/ 115 PetscErrorCode MatKAIJGetSRead(Mat A,PetscInt *m,PetscInt *n,const PetscScalar **S) 116 { 117 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 118 PetscFunctionBegin; 119 if (m) *m = b->p; 120 if (n) *n = b->q; 121 if (S) *S = b->S; 122 PetscFunctionReturn(0); 123 } 124 125 /*@C 126 MatKAIJRestoreS - Restore array obtained with MatKAIJGetS() 127 128 Not collective 129 130 Input Parameter: 131 . A - the KAIJ matrix 132 133 Output Parameter: 134 . S - location of pointer to array obtained with MatKAIJGetS() 135 136 Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored. 137 If NULL is passed, it will not attempt to zero the array pointer. 138 139 Level: advanced 140 .seealso: MatKAIJGetS(), MatKAIJGetSRead(), MatKAIJRestoreSRead() 141 @*/ 142 PetscErrorCode MatKAIJRestoreS(Mat A,PetscScalar **S) 143 { 144 PetscFunctionBegin; 145 if (S) *S = NULL; 146 PetscObjectStateIncrease((PetscObject)A); 147 PetscFunctionReturn(0); 148 } 149 150 /*@C 151 MatKAIJRestoreSRead - Restore array obtained with MatKAIJGetSRead() 152 153 Not collective 154 155 Input Parameter: 156 . A - the KAIJ matrix 157 158 Output Parameter: 159 . S - location of pointer to array obtained with MatKAIJGetS() 160 161 Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored. 162 If NULL is passed, it will not attempt to zero the array pointer. 163 164 Level: advanced 165 .seealso: MatKAIJGetS(), MatKAIJGetSRead(), MatKAIJRestoreSRead() 166 @*/ 167 PetscErrorCode MatKAIJRestoreSRead(Mat A,const PetscScalar **S) 168 { 169 PetscFunctionBegin; 170 if (S) *S = NULL; 171 PetscFunctionReturn(0); 172 } 173 174 /*@C 175 MatKAIJGetT - Get the transformation matrix T associated with the KAIJ matrix 176 177 Not Collective; the entire T is stored and returned independently on all processes 178 179 Input Parameter: 180 . A - the KAIJ matrix 181 182 Output Parameter: 183 + m - the number of rows in T 184 . n - the number of columns in T 185 - T - the T matrix, in form of a scalar array in column-major format 186 187 Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired) 188 189 Level: advanced 190 191 .seealso: MatCreateKAIJ(), MatGetBlockSizes() 192 @*/ 193 PetscErrorCode MatKAIJGetT(Mat A,PetscInt *m,PetscInt *n,PetscScalar **T) 194 { 195 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 196 PetscFunctionBegin; 197 if (m) *m = b->p; 198 if (n) *n = b->q; 199 if (T) *T = b->T; 200 PetscFunctionReturn(0); 201 } 202 203 /*@C 204 MatKAIJGetTRead - Get a read-only pointer to the transformation matrix T associated with the KAIJ matrix 205 206 Not Collective; the entire T is stored and returned independently on all processes 207 208 Input Parameter: 209 . A - the KAIJ matrix 210 211 Output Parameter: 212 + m - the number of rows in T 213 . n - the number of columns in T 214 - T - the T matrix, in form of a scalar array in column-major format 215 216 Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired) 217 218 Level: advanced 219 220 .seealso: MatCreateKAIJ(), MatGetBlockSizes() 221 @*/ 222 PetscErrorCode MatKAIJGetTRead(Mat A,PetscInt *m,PetscInt *n,const PetscScalar **T) 223 { 224 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 225 PetscFunctionBegin; 226 if (m) *m = b->p; 227 if (n) *n = b->q; 228 if (T) *T = b->T; 229 PetscFunctionReturn(0); 230 } 231 232 /*@C 233 MatKAIJRestoreT - Restore array obtained with MatKAIJGetT() 234 235 Not collective 236 237 Input Parameter: 238 . A - the KAIJ matrix 239 240 Output Parameter: 241 . T - location of pointer to array obtained with MatKAIJGetS() 242 243 Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored. 244 If NULL is passed, it will not attempt to zero the array pointer. 245 246 Level: advanced 247 .seealso: MatKAIJGetT(), MatKAIJGetTRead(), MatKAIJRestoreTRead() 248 @*/ 249 PetscErrorCode MatKAIJRestoreT(Mat A,PetscScalar **T) 250 { 251 PetscFunctionBegin; 252 if (T) *T = NULL; 253 PetscObjectStateIncrease((PetscObject)A); 254 PetscFunctionReturn(0); 255 } 256 257 /*@C 258 MatKAIJRestoreTRead - Restore array obtained with MatKAIJGetTRead() 259 260 Not collective 261 262 Input Parameter: 263 . A - the KAIJ matrix 264 265 Output Parameter: 266 . T - location of pointer to array obtained with MatKAIJGetS() 267 268 Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored. 269 If NULL is passed, it will not attempt to zero the array pointer. 270 271 Level: advanced 272 .seealso: MatKAIJGetT(), MatKAIJGetTRead(), MatKAIJRestoreTRead() 273 @*/ 274 PetscErrorCode MatKAIJRestoreTRead(Mat A,const PetscScalar **T) 275 { 276 PetscFunctionBegin; 277 if (T) *T = NULL; 278 PetscFunctionReturn(0); 279 } 280 281 /*@ 282 MatKAIJSetAIJ - Set the AIJ matrix describing the blockwise action of the KAIJ matrix 283 284 Logically Collective; if the AIJ matrix is parallel, the KAIJ matrix is also parallel 285 286 Input Parameters: 287 + A - the KAIJ matrix 288 - B - the AIJ matrix 289 290 Notes: 291 This function increases the reference count on the AIJ matrix, so the user is free to destroy the matrix if it is not needed. 292 Changes to the entries of the AIJ matrix will immediately affect the KAIJ matrix. 293 294 Level: advanced 295 296 .seealso: MatKAIJGetAIJ(), MatKAIJSetS(), MatKAIJSetT() 297 @*/ 298 PetscErrorCode MatKAIJSetAIJ(Mat A,Mat B) 299 { 300 PetscErrorCode ierr; 301 PetscMPIInt size; 302 303 PetscFunctionBegin; 304 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 305 if (size == 1) { 306 Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data; 307 a->AIJ = B; 308 } else { 309 Mat_MPIKAIJ *a = (Mat_MPIKAIJ*)A->data; 310 a->A = B; 311 } 312 ierr = PetscObjectReference((PetscObject)B);CHKERRQ(ierr); 313 PetscFunctionReturn(0); 314 } 315 316 /*@C 317 MatKAIJSetS - Set the S matrix describing the shift action of the KAIJ matrix 318 319 Logically Collective; the entire S is stored independently on all processes. 320 321 Input Parameters: 322 + A - the KAIJ matrix 323 . p - the number of rows in S 324 . q - the number of columns in S 325 - S - the S matrix, in form of a scalar array in column-major format 326 327 Notes: The dimensions p and q must match those of the transformation matrix T associated with the KAIJ matrix. 328 The S matrix is copied, so the user can destroy this array. 329 330 Level: Advanced 331 332 .seealso: MatKAIJGetS(), MatKAIJSetT(), MatKAIJSetAIJ() 333 @*/ 334 PetscErrorCode MatKAIJSetS(Mat A,PetscInt p,PetscInt q,const PetscScalar S[]) 335 { 336 PetscErrorCode ierr; 337 Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data; 338 339 PetscFunctionBegin; 340 ierr = PetscFree(a->S);CHKERRQ(ierr); 341 if (S) { 342 ierr = PetscMalloc1(p*q*sizeof(PetscScalar),&a->S);CHKERRQ(ierr); 343 ierr = PetscMemcpy(a->S,S,p*q*sizeof(PetscScalar));CHKERRQ(ierr); 344 } else a->S = NULL; 345 346 a->p = p; 347 a->q = q; 348 PetscFunctionReturn(0); 349 } 350 351 /*@C 352 MatKAIJSetT - Set the transformation matrix T associated with the KAIJ matrix 353 354 Logically Collective; the entire T is stored independently on all processes. 355 356 Input Parameters: 357 + A - the KAIJ matrix 358 . p - the number of rows in S 359 . q - the number of columns in S 360 - T - the T matrix, in form of a scalar array in column-major format 361 362 Notes: The dimensions p and q must match those of the shift matrix S associated with the KAIJ matrix. 363 The T matrix is copied, so the user can destroy this array. 364 365 Level: Advanced 366 367 .seealso: MatKAIJGetT(), MatKAIJSetS(), MatKAIJSetAIJ() 368 @*/ 369 PetscErrorCode MatKAIJSetT(Mat A,PetscInt p,PetscInt q,const PetscScalar T[]) 370 { 371 PetscErrorCode ierr; 372 PetscInt i,j; 373 Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data; 374 PetscBool isTI = PETSC_FALSE; 375 376 PetscFunctionBegin; 377 /* check if T is an identity matrix */ 378 if (T && (p == q)) { 379 isTI = PETSC_TRUE; 380 for (i=0; i<p; i++) { 381 for (j=0; j<q; j++) { 382 if (i == j) { 383 /* diagonal term must be 1 */ 384 if (T[i+j*p] != 1.0) isTI = PETSC_FALSE; 385 } else { 386 /* off-diagonal term must be 0 */ 387 if (T[i+j*p] != 0.0) isTI = PETSC_FALSE; 388 } 389 } 390 } 391 } 392 a->isTI = isTI; 393 394 ierr = PetscFree(a->T);CHKERRQ(ierr); 395 if (T && (!isTI)) { 396 ierr = PetscMalloc1(p*q*sizeof(PetscScalar),&a->T);CHKERRQ(ierr); 397 ierr = PetscMemcpy(a->T,T,p*q*sizeof(PetscScalar));CHKERRQ(ierr); 398 } else a->T = NULL; 399 400 a->p = p; 401 a->q = q; 402 PetscFunctionReturn(0); 403 } 404 405 PetscErrorCode MatDestroy_SeqKAIJ(Mat A) 406 { 407 PetscErrorCode ierr; 408 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 409 410 PetscFunctionBegin; 411 ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr); 412 ierr = PetscFree(b->S);CHKERRQ(ierr); 413 ierr = PetscFree(b->T);CHKERRQ(ierr); 414 ierr = PetscFree(b->ibdiag);CHKERRQ(ierr); 415 ierr = PetscFree5(b->sor.w,b->sor.y,b->sor.work,b->sor.t,b->sor.arr);CHKERRQ(ierr); 416 ierr = PetscFree(A->data);CHKERRQ(ierr); 417 PetscFunctionReturn(0); 418 } 419 420 PetscErrorCode MatSetUp_KAIJ(Mat A) 421 { 422 PetscErrorCode ierr; 423 PetscInt n; 424 PetscMPIInt size; 425 Mat_SeqKAIJ *seqkaij = (Mat_SeqKAIJ*)A->data; 426 427 PetscFunctionBegin; 428 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 429 if (size == 1) { 430 ierr = MatSetSizes(A,seqkaij->p*seqkaij->AIJ->rmap->n,seqkaij->q*seqkaij->AIJ->cmap->n,seqkaij->p*seqkaij->AIJ->rmap->N,seqkaij->q*seqkaij->AIJ->cmap->N);CHKERRQ(ierr); 431 ierr = PetscLayoutSetBlockSize(A->rmap,seqkaij->p);CHKERRQ(ierr); 432 ierr = PetscLayoutSetBlockSize(A->cmap,seqkaij->q);CHKERRQ(ierr); 433 ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr); 434 ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr); 435 } else { 436 Mat_MPIKAIJ *a; 437 Mat_MPIAIJ *mpiaij; 438 IS from,to; 439 Vec gvec; 440 PetscScalar *T; 441 PetscInt i,j; 442 443 a = (Mat_MPIKAIJ*)A->data; 444 mpiaij = (Mat_MPIAIJ*)a->A->data; 445 ierr = MatSetSizes(A,a->p*a->A->rmap->n,a->q*a->A->cmap->n,a->p*a->A->rmap->N,a->q*a->A->cmap->N);CHKERRQ(ierr); 446 ierr = PetscLayoutSetBlockSize(A->rmap,seqkaij->p);CHKERRQ(ierr); 447 ierr = PetscLayoutSetBlockSize(A->cmap,seqkaij->q);CHKERRQ(ierr); 448 ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr); 449 ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr); 450 451 if (a->isTI) { 452 /* If the transformation matrix associated with the parallel matrix A is the identity matrix, then a->T will be NULL. 453 * In this case, if we pass a->T directly to the MatCreateKAIJ() calls to create the sequential submatrices, the routine will 454 * not be able to tell that transformation matrix should be set to the identity; thus we create a temporary identity matrix 455 * to pass in. */ 456 ierr = PetscMalloc1(a->p*a->q*sizeof(PetscScalar),&T);CHKERRQ(ierr); 457 for (i=0; i<a->p; i++) { 458 for (j=0; j<a->q; j++) { 459 if (i==j) T[i+j*a->p] = 1.0; 460 else T[i+j*a->p] = 0.0; 461 } 462 } 463 } else T = a->T; 464 ierr = MatCreateKAIJ(mpiaij->A,a->p,a->q,a->S,T,&a->AIJ);CHKERRQ(ierr); 465 ierr = MatCreateKAIJ(mpiaij->B,a->p,a->q,NULL,T,&a->OAIJ);CHKERRQ(ierr); 466 if (a->isTI) { 467 ierr = PetscFree(T);CHKERRQ(ierr); 468 } 469 470 ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr); 471 ierr = VecCreate(PETSC_COMM_SELF,&a->w);CHKERRQ(ierr); 472 ierr = VecSetSizes(a->w,n*a->q,n*a->q);CHKERRQ(ierr); 473 ierr = VecSetBlockSize(a->w,a->q);CHKERRQ(ierr); 474 ierr = VecSetType(a->w,VECSEQ);CHKERRQ(ierr); 475 476 /* create two temporary Index sets for build scatter gather */ 477 ierr = ISCreateBlock(PetscObjectComm((PetscObject)a->A),a->q,n,mpiaij->garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); 478 ierr = ISCreateStride(PETSC_COMM_SELF,n*a->q,0,1,&to);CHKERRQ(ierr); 479 480 /* create temporary global vector to generate scatter context */ 481 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)a->A),a->q,a->q*a->A->cmap->n,a->q*a->A->cmap->N,NULL,&gvec);CHKERRQ(ierr); 482 483 /* generate the scatter context */ 484 ierr = VecScatterCreate(gvec,from,a->w,to,&a->ctx);CHKERRQ(ierr); 485 486 ierr = ISDestroy(&from);CHKERRQ(ierr); 487 ierr = ISDestroy(&to);CHKERRQ(ierr); 488 ierr = VecDestroy(&gvec);CHKERRQ(ierr); 489 } 490 491 A->assembled = PETSC_TRUE; 492 PetscFunctionReturn(0); 493 } 494 495 PetscErrorCode MatView_KAIJ(Mat A,PetscViewer viewer) 496 { 497 PetscViewerFormat format; 498 Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data; 499 Mat B; 500 PetscInt i; 501 PetscErrorCode ierr; 502 PetscBool ismpikaij; 503 504 PetscFunctionBegin; 505 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIKAIJ,&ismpikaij);CHKERRQ(ierr); 506 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 507 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL || format == PETSC_VIEWER_ASCII_IMPL) { 508 ierr = PetscViewerASCIIPrintf(viewer,"S and T have %D rows and %D columns\n",a->p,a->q);CHKERRQ(ierr); 509 510 /* Print appropriate details for S. */ 511 if (!a->S) { 512 ierr = PetscViewerASCIIPrintf(viewer,"S is NULL\n"); 513 } else if (format == PETSC_VIEWER_ASCII_IMPL) { 514 ierr = PetscViewerASCIIPrintf(viewer,"Entries of S are ");CHKERRQ(ierr); 515 for (i=0; i<(a->p * a->q); i++) { 516 #if defined(PETSC_USE_COMPLEX) 517 ierr = PetscViewerASCIIPrintf(viewer,"%18.16e %18.16e ",(double)PetscRealPart(a->S[i]),(double)PetscImaginaryPart(a->S[i]));CHKERRQ(ierr); 518 #else 519 ierr = PetscViewerASCIIPrintf(viewer,"%18.16e ",(double)PetscRealPart(a->S[i]));CHKERRQ(ierr); 520 #endif 521 } 522 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 523 } 524 525 /* Print appropriate details for T. */ 526 if (a->isTI) { 527 ierr = PetscViewerASCIIPrintf(viewer,"T is the identity matrix\n"); 528 } else if (!a->T) { 529 ierr = PetscViewerASCIIPrintf(viewer,"T is NULL\n"); 530 } else if (format == PETSC_VIEWER_ASCII_IMPL) { 531 ierr = PetscViewerASCIIPrintf(viewer,"Entries of T are ");CHKERRQ(ierr); 532 for (i=0; i<(a->p * a->q); i++) { 533 #if defined(PETSC_USE_COMPLEX) 534 ierr = PetscViewerASCIIPrintf(viewer,"%18.16e %18.16e ",(double)PetscRealPart(a->T[i]),(double)PetscImaginaryPart(a->T[i]));CHKERRQ(ierr); 535 #else 536 ierr = PetscViewerASCIIPrintf(viewer,"%18.16e ",(double)PetscRealPart(a->T[i]));CHKERRQ(ierr); 537 #endif 538 } 539 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 540 } 541 542 /* Now print details for the AIJ matrix, using the AIJ viewer. */ 543 ierr = PetscViewerASCIIPrintf(viewer,"Now viewing the associated AIJ matrix:\n");CHKERRQ(ierr); 544 if (ismpikaij) { 545 Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data; 546 ierr = MatView(b->A,viewer);CHKERRQ(ierr); 547 } else { 548 ierr = MatView(a->AIJ,viewer);CHKERRQ(ierr); 549 } 550 551 } else { 552 /* For all other matrix viewer output formats, simply convert to an AIJ matrix and call MatView() on that. */ 553 if (ismpikaij) { 554 ierr = MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 555 } else { 556 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 557 } 558 ierr = MatView(B,viewer);CHKERRQ(ierr); 559 ierr = MatDestroy(&B);CHKERRQ(ierr); 560 } 561 PetscFunctionReturn(0); 562 } 563 564 PetscErrorCode MatDestroy_MPIKAIJ(Mat A) 565 { 566 PetscErrorCode ierr; 567 Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data; 568 569 PetscFunctionBegin; 570 ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr); 571 ierr = MatDestroy(&b->OAIJ);CHKERRQ(ierr); 572 ierr = MatDestroy(&b->A);CHKERRQ(ierr); 573 ierr = VecScatterDestroy(&b->ctx);CHKERRQ(ierr); 574 ierr = VecDestroy(&b->w);CHKERRQ(ierr); 575 ierr = PetscFree(b->S);CHKERRQ(ierr); 576 ierr = PetscFree(b->T);CHKERRQ(ierr); 577 ierr = PetscFree(b->ibdiag);CHKERRQ(ierr); 578 ierr = PetscFree(A->data);CHKERRQ(ierr); 579 PetscFunctionReturn(0); 580 } 581 582 /* --------------------------------------------------------------------------------------*/ 583 584 /* zz = yy + Axx */ 585 PetscErrorCode MatMultAdd_SeqKAIJ(Mat A,Vec xx,Vec yy,Vec zz) 586 { 587 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 588 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 589 const PetscScalar *s = b->S, *t = b->T; 590 const PetscScalar *x,*v,*bx; 591 PetscScalar *y,*sums; 592 PetscErrorCode ierr; 593 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 594 PetscInt n,i,jrow,j,l,p=b->p,q=b->q,k; 595 596 PetscFunctionBegin; 597 if (!yy) { 598 ierr = VecSet(zz,0.0);CHKERRQ(ierr); 599 } else { 600 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 601 } 602 if ((!s) && (!t) && (!b->isTI)) PetscFunctionReturn(0); 603 604 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 605 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 606 idx = a->j; 607 v = a->a; 608 ii = a->i; 609 610 if (b->isTI) { 611 for (i=0; i<m; i++) { 612 jrow = ii[i]; 613 n = ii[i+1] - jrow; 614 sums = y + p*i; 615 for (j=0; j<n; j++) { 616 for (k=0; k<p; k++) { 617 sums[k] += v[jrow+j]*x[q*idx[jrow+j]+k]; 618 } 619 } 620 } 621 } else if (t) { 622 for (i=0; i<m; i++) { 623 jrow = ii[i]; 624 n = ii[i+1] - jrow; 625 sums = y + p*i; 626 bx = x + q*i; 627 for (j=0; j<n; j++) { 628 for (k=0; k<p; k++) { 629 for (l=0; l<q; l++) { 630 sums[k] += v[jrow+j]*t[k+l*p]*x[q*idx[jrow+j]+l]; 631 } 632 } 633 } 634 } 635 } 636 if (s) { 637 for (i=0; i<m; i++) { 638 sums = y + p*i; 639 bx = x + q*i; 640 if (i < b->AIJ->cmap->n) { 641 for (j=0; j<q; j++) { 642 for (k=0; k<p; k++) { 643 sums[k] += s[k+j*p]*bx[j]; 644 } 645 } 646 } 647 } 648 } 649 650 ierr = PetscLogFlops((2.0*p*q-p)*m+2*p*a->nz);CHKERRQ(ierr); 651 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 652 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 653 PetscFunctionReturn(0); 654 } 655 656 PetscErrorCode MatMult_SeqKAIJ(Mat A,Vec xx,Vec yy) 657 { 658 PetscErrorCode ierr; 659 PetscFunctionBegin; 660 ierr = MatMultAdd_SeqKAIJ(A,xx,PETSC_NULL,yy);CHKERRQ(ierr); 661 PetscFunctionReturn(0); 662 } 663 664 #include <petsc/private/kernels/blockinvert.h> 665 666 PetscErrorCode MatInvertBlockDiagonal_SeqKAIJ(Mat A,const PetscScalar **values) 667 { 668 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 669 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 670 const PetscScalar *S = b->S; 671 const PetscScalar *T = b->T; 672 const PetscScalar *v = a->a; 673 const PetscInt p = b->p, q = b->q, m = b->AIJ->rmap->n, *idx = a->j, *ii = a->i; 674 PetscErrorCode ierr; 675 PetscInt i,j,*v_pivots,dof,dof2; 676 PetscScalar *diag,aval,*v_work; 677 678 PetscFunctionBegin; 679 if (p != q) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Block size must be square to calculate inverse."); 680 if ((!S) && (!T) && (!b->isTI)) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Cannot invert a zero matrix."); 681 682 dof = p; 683 dof2 = dof*dof; 684 685 if (b->ibdiagvalid) { 686 if (values) *values = b->ibdiag; 687 PetscFunctionReturn(0); 688 } 689 if (!b->ibdiag) { 690 ierr = PetscMalloc1(dof2*m*sizeof(PetscScalar),&b->ibdiag);CHKERRQ(ierr); 691 ierr = PetscLogObjectMemory((PetscObject)A,dof2*m*sizeof(PetscScalar));CHKERRQ(ierr); 692 } 693 if (values) *values = b->ibdiag; 694 diag = b->ibdiag; 695 696 ierr = PetscMalloc2(dof,&v_work,dof,&v_pivots);CHKERRQ(ierr); 697 for (i=0; i<m; i++) { 698 if (S) { 699 ierr = PetscMemcpy(diag,S,dof2*sizeof(PetscScalar));CHKERRQ(ierr); 700 } else { 701 ierr = PetscMemzero(diag,dof2*sizeof(PetscScalar));CHKERRQ(ierr); 702 } 703 if (b->isTI) { 704 aval = 0; 705 for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j]; 706 for (j=0; j<dof; j++) diag[j+dof*j] += aval; 707 } else if (T) { 708 aval = 0; 709 for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j]; 710 for (j=0; j<dof2; j++) diag[j] += aval*T[j]; 711 } 712 ierr = PetscKernel_A_gets_inverse_A(dof,diag,v_pivots,v_work,PETSC_FALSE,NULL);CHKERRQ(ierr); 713 diag += dof2; 714 } 715 ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr); 716 717 b->ibdiagvalid = PETSC_TRUE; 718 PetscFunctionReturn(0); 719 } 720 721 static PetscErrorCode MatGetDiagonalBlock_MPIKAIJ(Mat A,Mat *B) 722 { 723 Mat_MPIKAIJ *kaij = (Mat_MPIKAIJ*) A->data; 724 725 PetscFunctionBegin; 726 *B = kaij->AIJ; 727 PetscFunctionReturn(0); 728 } 729 730 PetscErrorCode MatSOR_SeqKAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 731 { 732 PetscErrorCode ierr; 733 Mat_SeqKAIJ *kaij = (Mat_SeqKAIJ*) A->data; 734 Mat_SeqAIJ *a = (Mat_SeqAIJ*)kaij->AIJ->data; 735 const PetscScalar *aa = a->a, *T = kaij->T, *v; 736 const PetscInt m = kaij->AIJ->rmap->n, *ai=a->i, *aj=a->j, p = kaij->p, q = kaij->q, *diag, *vi; 737 const PetscScalar *b, *xb, *idiag; 738 PetscScalar *x, *work, *workt, *w, *y, *arr, *t, *arrt; 739 PetscInt i, j, k, i2, bs, bs2, nz; 740 741 PetscFunctionBegin; 742 its = its*lits; 743 if (flag & SOR_EISENSTAT) SETERRQ (PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 744 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 745 if (fshift) SETERRQ (PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for diagonal shift"); 746 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for applying upper or lower triangular parts"); 747 if (p != q) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSOR for KAIJ: No support for non-square dense blocks"); 748 else {bs = p; bs2 = bs*bs; } 749 750 if (!m) PetscFunctionReturn(0); 751 752 if (!kaij->ibdiagvalid) { ierr = MatInvertBlockDiagonal_SeqKAIJ(A,NULL);CHKERRQ(ierr); } 753 idiag = kaij->ibdiag; 754 diag = a->diag; 755 756 if (!kaij->sor.setup) { 757 ierr = PetscMalloc5(bs,&kaij->sor.w,bs,&kaij->sor.y,m*bs,&kaij->sor.work,m*bs,&kaij->sor.t,m*bs2,&kaij->sor.arr);CHKERRQ(ierr); 758 kaij->sor.setup = PETSC_TRUE; 759 } 760 y = kaij->sor.y; 761 w = kaij->sor.w; 762 work = kaij->sor.work; 763 t = kaij->sor.t; 764 arr = kaij->sor.arr; 765 766 ierr = VecGetArray(xx,&x); CHKERRQ(ierr); 767 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 768 769 if (flag & SOR_ZERO_INITIAL_GUESS) { 770 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 771 PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x); /* x[0:bs] <- D^{-1} b[0:bs] */ 772 ierr = PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr); 773 i2 = bs; 774 idiag += bs2; 775 for (i=1; i<m; i++) { 776 v = aa + ai[i]; 777 vi = aj + ai[i]; 778 nz = diag[i] - ai[i]; 779 780 if (T) { /* b - T (Arow * x) */ 781 ierr = PetscMemzero(w,bs*sizeof(PetscScalar)); 782 for (j=0; j<nz; j++) { 783 for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k]; 784 } 785 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs,w,T,&t[i2]); 786 for (k=0; k<bs; k++) t[i2+k] += b[i2+k]; 787 } else if (kaij->isTI) { 788 ierr = PetscMemcpy(t+i2,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 789 for (j=0; j<nz; j++) { 790 for (k=0; k<bs; k++) t[i2+k] -= v[j] * x[vi[j]*bs+k]; 791 } 792 } else { 793 ierr = PetscMemcpy(t+i2,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 794 } 795 796 PetscKernel_w_gets_Ar_times_v(bs,bs,t+i2,idiag,y); 797 for (j=0; j<bs; j++) x[i2+j] = omega * y[j]; 798 799 idiag += bs2; 800 i2 += bs; 801 } 802 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 803 ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr); 804 xb = t; 805 } else xb = b; 806 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 807 idiag = kaij->ibdiag+bs2*(m-1); 808 i2 = bs * (m-1); 809 ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 810 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 811 i2 -= bs; 812 idiag -= bs2; 813 for (i=m-2; i>=0; i--) { 814 v = aa + diag[i] + 1 ; 815 vi = aj + diag[i] + 1; 816 nz = ai[i+1] - diag[i] - 1; 817 818 if (T) { /* FIXME: This branch untested */ 819 ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 820 /* copy all rows of x that are needed into contiguous space */ 821 workt = work; 822 for (j=0; j<nz; j++) { 823 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 824 workt += bs; 825 } 826 arrt = arr; 827 for (j=0; j<nz; j++) { 828 ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 829 for (k=0; k<bs2; k++) arrt[k] *= v[j]; 830 arrt += bs2; 831 } 832 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 833 } else if (kaij->isTI) { 834 ierr = PetscMemcpy(w,t+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 835 for (j=0; j<nz; j++) { 836 for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k]; 837 } 838 } 839 840 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); /* RHS incorrect for omega != 1.0 */ 841 for (j=0; j<bs; j++) x[i2+j] = (1.0-omega) * x[i2+j] + omega * y[j]; 842 843 idiag -= bs2; 844 i2 -= bs; 845 } 846 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 847 } 848 its--; 849 } 850 while (its--) { /* FIXME: This branch not updated */ 851 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 852 i2 = 0; 853 idiag = kaij->ibdiag; 854 for (i=0; i<m; i++) { 855 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 856 857 v = aa + ai[i]; 858 vi = aj + ai[i]; 859 nz = diag[i] - ai[i]; 860 workt = work; 861 for (j=0; j<nz; j++) { 862 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 863 workt += bs; 864 } 865 arrt = arr; 866 if (T) { 867 for (j=0; j<nz; j++) { 868 ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 869 for (k=0; k<bs2; k++) arrt[k] *= v[j]; 870 arrt += bs2; 871 } 872 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 873 } else if (kaij->isTI) { 874 for (j=0; j<nz; j++) { 875 ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 876 for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 877 arrt += bs2; 878 } 879 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 880 } 881 ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr); 882 883 v = aa + diag[i] + 1; 884 vi = aj + diag[i] + 1; 885 nz = ai[i+1] - diag[i] - 1; 886 workt = work; 887 for (j=0; j<nz; j++) { 888 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 889 workt += bs; 890 } 891 arrt = arr; 892 if (T) { 893 for (j=0; j<nz; j++) { 894 ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 895 for (k=0; k<bs2; k++) arrt[k] *= v[j]; 896 arrt += bs2; 897 } 898 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 899 } else if (kaij->isTI) { 900 for (j=0; j<nz; j++) { 901 ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 902 for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 903 arrt += bs2; 904 } 905 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 906 } 907 908 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); 909 for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j); 910 911 idiag += bs2; 912 i2 += bs; 913 } 914 xb = t; 915 } 916 else xb = b; 917 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 918 idiag = kaij->ibdiag+bs2*(m-1); 919 i2 = bs * (m-1); 920 if (xb == b) { 921 for (i=m-1; i>=0; i--) { 922 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 923 924 v = aa + ai[i]; 925 vi = aj + ai[i]; 926 nz = diag[i] - ai[i]; 927 workt = work; 928 for (j=0; j<nz; j++) { 929 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 930 workt += bs; 931 } 932 arrt = arr; 933 if (T) { 934 for (j=0; j<nz; j++) { 935 ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 936 for (k=0; k<bs2; k++) arrt[k] *= v[j]; 937 arrt += bs2; 938 } 939 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 940 } else if (kaij->isTI) { 941 for (j=0; j<nz; j++) { 942 ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 943 for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 944 arrt += bs2; 945 } 946 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 947 } 948 949 v = aa + diag[i] + 1; 950 vi = aj + diag[i] + 1; 951 nz = ai[i+1] - diag[i] - 1; 952 workt = work; 953 for (j=0; j<nz; j++) { 954 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 955 workt += bs; 956 } 957 arrt = arr; 958 if (T) { 959 for (j=0; j<nz; j++) { 960 ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 961 for (k=0; k<bs2; k++) arrt[k] *= v[j]; 962 arrt += bs2; 963 } 964 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 965 } else if (kaij->isTI) { 966 for (j=0; j<nz; j++) { 967 ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 968 for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 969 arrt += bs2; 970 } 971 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 972 } 973 974 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); 975 for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j); 976 } 977 } else { 978 for (i=m-1; i>=0; i--) { 979 ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 980 v = aa + diag[i] + 1; 981 vi = aj + diag[i] + 1; 982 nz = ai[i+1] - diag[i] - 1; 983 workt = work; 984 for (j=0; j<nz; j++) { 985 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 986 workt += bs; 987 } 988 arrt = arr; 989 if (T) { 990 for (j=0; j<nz; j++) { 991 ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 992 for (k=0; k<bs2; k++) arrt[k] *= v[j]; 993 arrt += bs2; 994 } 995 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 996 } else if (kaij->isTI) { 997 for (j=0; j<nz; j++) { 998 ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 999 for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 1000 arrt += bs2; 1001 } 1002 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 1003 } 1004 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); 1005 for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j); 1006 } 1007 idiag -= bs2; 1008 i2 -= bs; 1009 } 1010 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 1011 } 1012 } 1013 1014 ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr); 1015 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1016 PetscFunctionReturn(0); 1017 } 1018 1019 /*===================================================================================*/ 1020 1021 PetscErrorCode MatMultAdd_MPIKAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1022 { 1023 Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data; 1024 PetscErrorCode ierr; 1025 1026 PetscFunctionBegin; 1027 if (!yy) { 1028 ierr = VecSet(zz,0.0);CHKERRQ(ierr); 1029 } else { 1030 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 1031 } 1032 /* start the scatter */ 1033 ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1034 ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,zz,zz);CHKERRQ(ierr); 1035 ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1036 ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr); 1037 PetscFunctionReturn(0); 1038 } 1039 1040 PetscErrorCode MatMult_MPIKAIJ(Mat A,Vec xx,Vec yy) 1041 { 1042 PetscErrorCode ierr; 1043 PetscFunctionBegin; 1044 ierr = MatMultAdd_MPIKAIJ(A,xx,PETSC_NULL,yy);CHKERRQ(ierr); 1045 PetscFunctionReturn(0); 1046 } 1047 1048 PetscErrorCode MatInvertBlockDiagonal_MPIKAIJ(Mat A,const PetscScalar **values) 1049 { 1050 Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data; 1051 PetscErrorCode ierr; 1052 1053 PetscFunctionBegin; 1054 ierr = (*b->AIJ->ops->invertblockdiagonal)(b->AIJ,values);CHKERRQ(ierr); 1055 PetscFunctionReturn(0); 1056 } 1057 1058 /* ----------------------------------------------------------------*/ 1059 1060 PetscErrorCode MatGetRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values) 1061 { 1062 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*) A->data; 1063 PetscErrorCode diag = PETSC_FALSE; 1064 PetscErrorCode ierr; 1065 PetscInt nzaij,nz,*colsaij,*idx,i,j,p=b->p,q=b->q,r=row/p,s=row%p,c; 1066 PetscScalar *vaij,*v,*S=b->S,*T=b->T; 1067 1068 PetscFunctionBegin; 1069 if (b->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1070 b->getrowactive = PETSC_TRUE; 1071 if (row < 0 || row >= A->rmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range",row); 1072 1073 if ((!S) && (!T) && (!b->isTI)) { 1074 if (ncols) *ncols = 0; 1075 if (cols) *cols = NULL; 1076 if (values) *values = NULL; 1077 PetscFunctionReturn(0); 1078 } 1079 1080 if (T || b->isTI) { 1081 ierr = MatGetRow_SeqAIJ(b->AIJ,r,&nzaij,&colsaij,&vaij);CHKERRQ(ierr); 1082 c = nzaij; 1083 for (i=0; i<nzaij; i++) { 1084 /* check if this row contains a diagonal entry */ 1085 if (colsaij[i] == r) { 1086 diag = PETSC_TRUE; 1087 c = i; 1088 } 1089 } 1090 } else nzaij = c = 0; 1091 1092 /* calculate size of row */ 1093 nz = 0; 1094 if (S) nz += q; 1095 if (T || b->isTI) nz += (diag && S ? (nzaij-1)*q : nzaij*q); 1096 1097 if (cols || values) { 1098 ierr = PetscMalloc2(nz,&idx,nz,&v);CHKERRQ(ierr); 1099 for (i=0; i<q; i++) { 1100 /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */ 1101 v[i] = 0.0; 1102 } 1103 if (b->isTI) { 1104 for (i=0; i<nzaij; i++) { 1105 for (j=0; j<q; j++) { 1106 idx[i*q+j] = colsaij[i]*q+j; 1107 v[i*q+j] = (j==s ? vaij[i] : 0); 1108 } 1109 } 1110 } else if (T) { 1111 for (i=0; i<nzaij; i++) { 1112 for (j=0; j<q; j++) { 1113 idx[i*q+j] = colsaij[i]*q+j; 1114 v[i*q+j] = vaij[i]*T[s+j*p]; 1115 } 1116 } 1117 } 1118 if (S) { 1119 for (j=0; j<q; j++) { 1120 idx[c*q+j] = r*q+j; 1121 v[c*q+j] += S[s+j*p]; 1122 } 1123 } 1124 } 1125 1126 if (ncols) *ncols = nz; 1127 if (cols) *cols = idx; 1128 if (values) *values = v; 1129 PetscFunctionReturn(0); 1130 } 1131 1132 PetscErrorCode MatRestoreRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1133 { 1134 PetscErrorCode ierr; 1135 PetscFunctionBegin; 1136 ierr = PetscFree2(*idx,*v);CHKERRQ(ierr); 1137 ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE; 1138 PetscFunctionReturn(0); 1139 } 1140 1141 PetscErrorCode MatGetRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values) 1142 { 1143 Mat_MPIKAIJ *b = (Mat_MPIKAIJ*) A->data; 1144 Mat MatAIJ = ((Mat_SeqKAIJ*)b->AIJ->data)->AIJ; 1145 Mat MatOAIJ = ((Mat_SeqKAIJ*)b->OAIJ->data)->AIJ; 1146 Mat AIJ = b->A; 1147 PetscBool diag = PETSC_FALSE; 1148 PetscErrorCode ierr; 1149 const PetscInt rstart=A->rmap->rstart,rend=A->rmap->rend,p=b->p,q=b->q,*garray; 1150 PetscInt nz,*idx,ncolsaij,ncolsoaij,*colsaij,*colsoaij,r,s,c,i,j,lrow; 1151 PetscScalar *v,*vals,*ovals,*S=b->S,*T=b->T; 1152 1153 PetscFunctionBegin; 1154 if (b->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1155 b->getrowactive = PETSC_TRUE; 1156 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1157 lrow = row - rstart; 1158 1159 if ((!S) && (!T) && (!b->isTI)) { 1160 if (ncols) *ncols = 0; 1161 if (cols) *cols = NULL; 1162 if (values) *values = NULL; 1163 PetscFunctionReturn(0); 1164 } 1165 1166 r = lrow/p; 1167 s = lrow%p; 1168 1169 if (T || b->isTI) { 1170 ierr = MatMPIAIJGetSeqAIJ(AIJ,NULL,NULL,&garray); 1171 ierr = MatGetRow_SeqAIJ(MatAIJ,lrow/p,&ncolsaij,&colsaij,&vals);CHKERRQ(ierr); 1172 ierr = MatGetRow_SeqAIJ(MatOAIJ,lrow/p,&ncolsoaij,&colsoaij,&ovals);CHKERRQ(ierr); 1173 1174 c = ncolsaij + ncolsoaij; 1175 for (i=0; i<ncolsaij; i++) { 1176 /* check if this row contains a diagonal entry */ 1177 if (colsaij[i] == r) { 1178 diag = PETSC_TRUE; 1179 c = i; 1180 } 1181 } 1182 } else c = 0; 1183 1184 /* calculate size of row */ 1185 nz = 0; 1186 if (S) nz += q; 1187 if (T || b->isTI) nz += (diag && S ? (ncolsaij+ncolsoaij-1)*q : (ncolsaij+ncolsoaij)*q); 1188 1189 if (cols || values) { 1190 ierr = PetscMalloc2(nz,&idx,nz,&v);CHKERRQ(ierr); 1191 for (i=0; i<q; i++) { 1192 /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */ 1193 v[i] = 0.0; 1194 } 1195 if (b->isTI) { 1196 for (i=0; i<ncolsaij; i++) { 1197 for (j=0; j<q; j++) { 1198 idx[i*q+j] = (colsaij[i]+rstart/p)*q+j; 1199 v[i*q+j] = (j==s ? vals[i] : 0.0); 1200 } 1201 } 1202 for (i=0; i<ncolsoaij; i++) { 1203 for (j=0; j<q; j++) { 1204 idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j; 1205 v[(i+ncolsaij)*q+j] = (j==s ? ovals[i]: 0.0); 1206 } 1207 } 1208 } else if (T) { 1209 for (i=0; i<ncolsaij; i++) { 1210 for (j=0; j<q; j++) { 1211 idx[i*q+j] = (colsaij[i]+rstart/p)*q+j; 1212 v[i*q+j] = vals[i]*T[s+j*p]; 1213 } 1214 } 1215 for (i=0; i<ncolsoaij; i++) { 1216 for (j=0; j<q; j++) { 1217 idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j; 1218 v[(i+ncolsaij)*q+j] = ovals[i]*T[s+j*p]; 1219 } 1220 } 1221 } 1222 if (S) { 1223 for (j=0; j<q; j++) { 1224 idx[c*q+j] = (r+rstart/p)*q+j; 1225 v[c*q+j] += S[s+j*p]; 1226 } 1227 } 1228 } 1229 1230 if (ncols) *ncols = nz; 1231 if (cols) *cols = idx; 1232 if (values) *values = v; 1233 PetscFunctionReturn(0); 1234 } 1235 1236 PetscErrorCode MatRestoreRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1237 { 1238 PetscErrorCode ierr; 1239 PetscFunctionBegin; 1240 ierr = PetscFree2(*idx,*v);CHKERRQ(ierr); 1241 ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE; 1242 PetscFunctionReturn(0); 1243 } 1244 1245 PetscErrorCode MatCreateSubMatrix_KAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat) 1246 { 1247 PetscErrorCode ierr; 1248 Mat A; 1249 1250 PetscFunctionBegin; 1251 ierr = MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr); 1252 ierr = MatCreateSubMatrix(A,isrow,iscol,cll,newmat);CHKERRQ(ierr); 1253 ierr = MatDestroy(&A);CHKERRQ(ierr); 1254 PetscFunctionReturn(0); 1255 } 1256 1257 /* ---------------------------------------------------------------------------------- */ 1258 /*@C 1259 MatCreateKAIJ - Creates a matrix type to be used for matrices of the following form: 1260 1261 [I \otimes S + A \otimes T] 1262 1263 where 1264 S is a dense (p \times q) matrix 1265 T is a dense (p \times q) matrix 1266 A is an AIJ (n \times n) matrix 1267 I is the identity matrix 1268 The resulting matrix is (np \times nq) 1269 1270 S and T are always stored independently on all processes as PetscScalar arrays in column-major format. 1271 1272 Collective 1273 1274 Input Parameters: 1275 + A - the AIJ matrix 1276 . p - number of rows in S and T 1277 . q - number of columns in S and T 1278 . S - the S matrix (can be PETSC_NULL), stored as a PetscScalar array (column-major) 1279 - T - the T matrix (can be PETSC_NULL), stored as a PetscScalar array (column-major) 1280 1281 Output Parameter: 1282 . kaij - the new KAIJ matrix 1283 1284 Notes: 1285 This function increases the reference count on the AIJ matrix, so the user is free to destroy the matrix if it is not needed. 1286 Changes to the entries of the AIJ matrix will immediately affect the KAIJ matrix. 1287 1288 Level: advanced 1289 1290 .seealso: MatKAIJSetAIJ(), MatKAIJSetS(), MatKAIJSetT(), MatKAIJGetAIJ(), MatKAIJGetS(), MatKAIJGetT(), MATKAIJ 1291 @*/ 1292 PetscErrorCode MatCreateKAIJ(Mat A,PetscInt p,PetscInt q,const PetscScalar S[],const PetscScalar T[],Mat *kaij) 1293 { 1294 PetscErrorCode ierr; 1295 PetscMPIInt size; 1296 1297 PetscFunctionBegin; 1298 ierr = MatCreate(PetscObjectComm((PetscObject)A),kaij);CHKERRQ(ierr); 1299 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 1300 if (size == 1) { 1301 ierr = MatSetType(*kaij,MATSEQKAIJ);CHKERRQ(ierr); 1302 } else { 1303 ierr = MatSetType(*kaij,MATMPIKAIJ);CHKERRQ(ierr); 1304 } 1305 ierr = MatKAIJSetAIJ(*kaij,A);CHKERRQ(ierr); 1306 ierr = MatKAIJSetS(*kaij,p,q,S);CHKERRQ(ierr); 1307 ierr = MatKAIJSetT(*kaij,p,q,T);CHKERRQ(ierr); 1308 ierr = MatSetUp(*kaij); 1309 PetscFunctionReturn(0); 1310 } 1311 1312 /*MC 1313 MATKAIJ - MATKAIJ = "kaij" - A matrix type to be used to evaluate matrices of the following form: 1314 1315 [I \otimes S + A \otimes T] 1316 1317 where 1318 S is a dense (p \times q) matrix 1319 T is a dense (p \times q) matrix 1320 A is an AIJ (n \times n) matrix 1321 I is the identity matrix 1322 The resulting matrix is (np \times nq) 1323 1324 S and T are always stored independently on all processes as PetscScalar arrays in column-major format. 1325 1326 Level: advanced 1327 1328 .seealso: MatKAIJSetAIJ(), MatKAIJSetS(), MatKAIJSetT(), MatKAIJGetAIJ(), MatKAIJGetS(), MatKAIJGetT(), MatCreateKAIJ() 1329 M*/ 1330 1331 PETSC_EXTERN PetscErrorCode MatCreate_KAIJ(Mat A) 1332 { 1333 PetscErrorCode ierr; 1334 Mat_MPIKAIJ *b; 1335 PetscMPIInt size; 1336 1337 PetscFunctionBegin; 1338 ierr = PetscNewLog(A,&b);CHKERRQ(ierr); 1339 A->data = (void*)b; 1340 1341 ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1342 1343 A->ops->setup = MatSetUp_KAIJ; 1344 1345 b->w = 0; 1346 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 1347 if (size == 1) { 1348 ierr = PetscObjectChangeTypeName((PetscObject)A,MATSEQKAIJ);CHKERRQ(ierr); 1349 A->ops->setup = MatSetUp_KAIJ; 1350 A->ops->destroy = MatDestroy_SeqKAIJ; 1351 A->ops->view = MatView_KAIJ; 1352 A->ops->mult = MatMult_SeqKAIJ; 1353 A->ops->multadd = MatMultAdd_SeqKAIJ; 1354 A->ops->invertblockdiagonal = MatInvertBlockDiagonal_SeqKAIJ; 1355 A->ops->getrow = MatGetRow_SeqKAIJ; 1356 A->ops->restorerow = MatRestoreRow_SeqKAIJ; 1357 A->ops->sor = MatSOR_SeqKAIJ; 1358 } else { 1359 ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIKAIJ);CHKERRQ(ierr); 1360 A->ops->setup = MatSetUp_KAIJ; 1361 A->ops->destroy = MatDestroy_MPIKAIJ; 1362 A->ops->view = MatView_KAIJ; 1363 A->ops->mult = MatMult_MPIKAIJ; 1364 A->ops->multadd = MatMultAdd_MPIKAIJ; 1365 A->ops->invertblockdiagonal = MatInvertBlockDiagonal_MPIKAIJ; 1366 A->ops->getrow = MatGetRow_MPIKAIJ; 1367 A->ops->restorerow = MatRestoreRow_MPIKAIJ; 1368 ierr = PetscObjectComposeFunction((PetscObject)A,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIKAIJ);CHKERRQ(ierr); 1369 } 1370 A->ops->createsubmatrix = MatCreateSubMatrix_KAIJ; 1371 PetscFunctionReturn(0); 1372 } 1373