1 2 /* 3 Defines the basic matrix operations for the KAIJ matrix storage format. 4 This format is used to evaluate matrices of the form: 5 6 [I \otimes S + A \otimes T] 7 8 where 9 S is a dense (p \times q) matrix 10 T is a dense (p \times q) matrix 11 A is an AIJ (n \times n) matrix 12 I is the identity matrix 13 14 The resulting matrix is (np \times nq) 15 16 We provide: 17 MatMult() 18 MatMultAdd() 19 MatInvertBlockDiagonal() 20 and 21 MatCreateKAIJ(Mat,PetscInt,PetscInt,const PetscScalar[],const PetscScalar[],Mat*) 22 23 This single directory handles both the sequential and parallel codes 24 */ 25 26 #include <../src/mat/impls/kaij/kaij.h> /*I "petscmat.h" I*/ 27 #include <../src/mat/utils/freespace.h> 28 #include <petsc/private/vecimpl.h> 29 30 /*@C 31 MatKAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the KAIJ matrix 32 33 Not Collective, but if the KAIJ matrix is parallel, the AIJ matrix is also parallel 34 35 Input Parameter: 36 . A - the KAIJ matrix 37 38 Output Parameter: 39 . B - the AIJ matrix 40 41 Level: advanced 42 43 Notes: The reference count on the AIJ matrix is not increased so you should not destroy it. 44 45 .seealso: MatCreateKAIJ() 46 @*/ 47 PetscErrorCode MatKAIJGetAIJ(Mat A,Mat *B) 48 { 49 PetscErrorCode ierr; 50 PetscBool ismpikaij,isseqkaij; 51 52 PetscFunctionBegin; 53 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIKAIJ,&ismpikaij);CHKERRQ(ierr); 54 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQKAIJ,&isseqkaij);CHKERRQ(ierr); 55 if (ismpikaij) { 56 Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data; 57 58 *B = b->A; 59 } else if (isseqkaij) { 60 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 61 62 *B = b->AIJ; 63 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix passed in is not of type KAIJ"); 64 PetscFunctionReturn(0); 65 } 66 67 /*@C 68 MatKAIJGetS - Get the S matrix describing the shift action of the KAIJ matrix 69 70 Not Collective; the entire S is stored and returned independently on all processes. 71 72 Input Parameter: 73 . A - the KAIJ matrix 74 75 Output Parameters: 76 + m - the number of rows in S 77 . n - the number of columns in S 78 - S - the S matrix, in form of a scalar array in column-major format 79 80 Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired) 81 82 Level: advanced 83 84 .seealso: MatCreateKAIJ(), MatGetBlockSizes() 85 @*/ 86 PetscErrorCode MatKAIJGetS(Mat A,PetscInt *m,PetscInt *n,PetscScalar **S) 87 { 88 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 89 PetscFunctionBegin; 90 if (m) *m = b->p; 91 if (n) *n = b->q; 92 if (S) *S = b->S; 93 PetscFunctionReturn(0); 94 } 95 96 /*@C 97 MatKAIJGetSRead - Get a read-only pointer to the S matrix describing the shift action of the KAIJ matrix 98 99 Not Collective; the entire S is stored and returned independently on all processes. 100 101 Input Parameter: 102 . A - the KAIJ matrix 103 104 Output Parameters: 105 + m - the number of rows in S 106 . n - the number of columns in S 107 - S - the S matrix, in form of a scalar array in column-major format 108 109 Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired) 110 111 Level: advanced 112 113 .seealso: MatCreateKAIJ(), MatGetBlockSizes() 114 @*/ 115 PetscErrorCode MatKAIJGetSRead(Mat A,PetscInt *m,PetscInt *n,const PetscScalar **S) 116 { 117 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 118 PetscFunctionBegin; 119 if (m) *m = b->p; 120 if (n) *n = b->q; 121 if (S) *S = b->S; 122 PetscFunctionReturn(0); 123 } 124 125 /*@C 126 MatKAIJRestoreS - Restore array obtained with MatKAIJGetS() 127 128 Not collective 129 130 Input Parameter: 131 . A - the KAIJ matrix 132 133 Output Parameter: 134 . S - location of pointer to array obtained with MatKAIJGetS() 135 136 Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored. 137 If NULL is passed, it will not attempt to zero the array pointer. 138 139 Level: advanced 140 .seealso: MatKAIJGetS(), MatKAIJGetSRead(), MatKAIJRestoreSRead() 141 @*/ 142 PetscErrorCode MatKAIJRestoreS(Mat A,PetscScalar **S) 143 { 144 PetscErrorCode ierr; 145 146 PetscFunctionBegin; 147 if (S) *S = NULL; 148 ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr); 149 PetscFunctionReturn(0); 150 } 151 152 /*@C 153 MatKAIJRestoreSRead - Restore array obtained with MatKAIJGetSRead() 154 155 Not collective 156 157 Input Parameter: 158 . A - the KAIJ matrix 159 160 Output Parameter: 161 . S - location of pointer to array obtained with MatKAIJGetS() 162 163 Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored. 164 If NULL is passed, it will not attempt to zero the array pointer. 165 166 Level: advanced 167 .seealso: MatKAIJGetS(), MatKAIJGetSRead(), MatKAIJRestoreSRead() 168 @*/ 169 PetscErrorCode MatKAIJRestoreSRead(Mat A,const PetscScalar **S) 170 { 171 PetscFunctionBegin; 172 if (S) *S = NULL; 173 PetscFunctionReturn(0); 174 } 175 176 /*@C 177 MatKAIJGetT - Get the transformation matrix T associated with the KAIJ matrix 178 179 Not Collective; the entire T is stored and returned independently on all processes 180 181 Input Parameter: 182 . A - the KAIJ matrix 183 184 Output Parameters: 185 + m - the number of rows in T 186 . n - the number of columns in T 187 - T - the T matrix, in form of a scalar array in column-major format 188 189 Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired) 190 191 Level: advanced 192 193 .seealso: MatCreateKAIJ(), MatGetBlockSizes() 194 @*/ 195 PetscErrorCode MatKAIJGetT(Mat A,PetscInt *m,PetscInt *n,PetscScalar **T) 196 { 197 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 198 PetscFunctionBegin; 199 if (m) *m = b->p; 200 if (n) *n = b->q; 201 if (T) *T = b->T; 202 PetscFunctionReturn(0); 203 } 204 205 /*@C 206 MatKAIJGetTRead - Get a read-only pointer to the transformation matrix T associated with the KAIJ matrix 207 208 Not Collective; the entire T is stored and returned independently on all processes 209 210 Input Parameter: 211 . A - the KAIJ matrix 212 213 Output Parameters: 214 + m - the number of rows in T 215 . n - the number of columns in T 216 - T - the T matrix, in form of a scalar array in column-major format 217 218 Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired) 219 220 Level: advanced 221 222 .seealso: MatCreateKAIJ(), MatGetBlockSizes() 223 @*/ 224 PetscErrorCode MatKAIJGetTRead(Mat A,PetscInt *m,PetscInt *n,const PetscScalar **T) 225 { 226 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 227 PetscFunctionBegin; 228 if (m) *m = b->p; 229 if (n) *n = b->q; 230 if (T) *T = b->T; 231 PetscFunctionReturn(0); 232 } 233 234 /*@C 235 MatKAIJRestoreT - Restore array obtained with MatKAIJGetT() 236 237 Not collective 238 239 Input Parameter: 240 . A - the KAIJ matrix 241 242 Output Parameter: 243 . T - location of pointer to array obtained with MatKAIJGetS() 244 245 Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored. 246 If NULL is passed, it will not attempt to zero the array pointer. 247 248 Level: advanced 249 .seealso: MatKAIJGetT(), MatKAIJGetTRead(), MatKAIJRestoreTRead() 250 @*/ 251 PetscErrorCode MatKAIJRestoreT(Mat A,PetscScalar **T) 252 { 253 PetscErrorCode ierr; 254 255 PetscFunctionBegin; 256 if (T) *T = NULL; 257 ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr); 258 PetscFunctionReturn(0); 259 } 260 261 /*@C 262 MatKAIJRestoreTRead - Restore array obtained with MatKAIJGetTRead() 263 264 Not collective 265 266 Input Parameter: 267 . A - the KAIJ matrix 268 269 Output Parameter: 270 . T - location of pointer to array obtained with MatKAIJGetS() 271 272 Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored. 273 If NULL is passed, it will not attempt to zero the array pointer. 274 275 Level: advanced 276 .seealso: MatKAIJGetT(), MatKAIJGetTRead(), MatKAIJRestoreTRead() 277 @*/ 278 PetscErrorCode MatKAIJRestoreTRead(Mat A,const PetscScalar **T) 279 { 280 PetscFunctionBegin; 281 if (T) *T = NULL; 282 PetscFunctionReturn(0); 283 } 284 285 /*@ 286 MatKAIJSetAIJ - Set the AIJ matrix describing the blockwise action of the KAIJ matrix 287 288 Logically Collective; if the AIJ matrix is parallel, the KAIJ matrix is also parallel 289 290 Input Parameters: 291 + A - the KAIJ matrix 292 - B - the AIJ matrix 293 294 Notes: 295 This function increases the reference count on the AIJ matrix, so the user is free to destroy the matrix if it is not needed. 296 Changes to the entries of the AIJ matrix will immediately affect the KAIJ matrix. 297 298 Level: advanced 299 300 .seealso: MatKAIJGetAIJ(), MatKAIJSetS(), MatKAIJSetT() 301 @*/ 302 PetscErrorCode MatKAIJSetAIJ(Mat A,Mat B) 303 { 304 PetscErrorCode ierr; 305 PetscMPIInt size; 306 307 PetscFunctionBegin; 308 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr); 309 if (size == 1) { 310 Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data; 311 a->AIJ = B; 312 } else { 313 Mat_MPIKAIJ *a = (Mat_MPIKAIJ*)A->data; 314 a->A = B; 315 } 316 ierr = PetscObjectReference((PetscObject)B);CHKERRQ(ierr); 317 PetscFunctionReturn(0); 318 } 319 320 /*@C 321 MatKAIJSetS - Set the S matrix describing the shift action of the KAIJ matrix 322 323 Logically Collective; the entire S is stored independently on all processes. 324 325 Input Parameters: 326 + A - the KAIJ matrix 327 . p - the number of rows in S 328 . q - the number of columns in S 329 - S - the S matrix, in form of a scalar array in column-major format 330 331 Notes: The dimensions p and q must match those of the transformation matrix T associated with the KAIJ matrix. 332 The S matrix is copied, so the user can destroy this array. 333 334 Level: Advanced 335 336 .seealso: MatKAIJGetS(), MatKAIJSetT(), MatKAIJSetAIJ() 337 @*/ 338 PetscErrorCode MatKAIJSetS(Mat A,PetscInt p,PetscInt q,const PetscScalar S[]) 339 { 340 PetscErrorCode ierr; 341 Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data; 342 343 PetscFunctionBegin; 344 ierr = PetscFree(a->S);CHKERRQ(ierr); 345 if (S) { 346 ierr = PetscMalloc1(p*q*sizeof(PetscScalar),&a->S);CHKERRQ(ierr); 347 ierr = PetscMemcpy(a->S,S,p*q*sizeof(PetscScalar));CHKERRQ(ierr); 348 } else a->S = NULL; 349 350 a->p = p; 351 a->q = q; 352 PetscFunctionReturn(0); 353 } 354 355 /*@C 356 MatKAIJGetScaledIdentity - Check if both S and T are scaled identities. 357 358 Logically Collective. 359 360 Input Parameter: 361 . A - the KAIJ matrix 362 363 Output Parameter: 364 . identity - the Boolean value 365 366 Level: Advanced 367 368 .seealso: MatKAIJGetS(), MatKAIJGetT() 369 @*/ 370 PetscErrorCode MatKAIJGetScaledIdentity(Mat A,PetscBool* identity) 371 { 372 Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data; 373 PetscInt i,j; 374 375 PetscFunctionBegin; 376 if (a->p != a->q) { 377 *identity = PETSC_FALSE; 378 PetscFunctionReturn(0); 379 } else *identity = PETSC_TRUE; 380 if (!a->isTI || a->S) { 381 for (i=0; i<a->p && *identity; i++) { 382 for (j=0; j<a->p && *identity; j++) { 383 if (i != j) { 384 if (a->S && PetscAbsScalar(a->S[i+j*a->p]) > PETSC_SMALL) *identity = PETSC_FALSE; 385 if (a->T && PetscAbsScalar(a->T[i+j*a->p]) > PETSC_SMALL) *identity = PETSC_FALSE; 386 } else { 387 if (a->S && PetscAbsScalar(a->S[i*(a->p+1)]-a->S[0]) > PETSC_SMALL) *identity = PETSC_FALSE; 388 if (a->T && PetscAbsScalar(a->T[i*(a->p+1)]-a->T[0]) > PETSC_SMALL) *identity = PETSC_FALSE; 389 } 390 } 391 } 392 } 393 PetscFunctionReturn(0); 394 } 395 396 /*@C 397 MatKAIJSetT - Set the transformation matrix T associated with the KAIJ matrix 398 399 Logically Collective; the entire T is stored independently on all processes. 400 401 Input Parameters: 402 + A - the KAIJ matrix 403 . p - the number of rows in S 404 . q - the number of columns in S 405 - T - the T matrix, in form of a scalar array in column-major format 406 407 Notes: The dimensions p and q must match those of the shift matrix S associated with the KAIJ matrix. 408 The T matrix is copied, so the user can destroy this array. 409 410 Level: Advanced 411 412 .seealso: MatKAIJGetT(), MatKAIJSetS(), MatKAIJSetAIJ() 413 @*/ 414 PetscErrorCode MatKAIJSetT(Mat A,PetscInt p,PetscInt q,const PetscScalar T[]) 415 { 416 PetscErrorCode ierr; 417 PetscInt i,j; 418 Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data; 419 PetscBool isTI = PETSC_FALSE; 420 421 PetscFunctionBegin; 422 /* check if T is an identity matrix */ 423 if (T && (p == q)) { 424 isTI = PETSC_TRUE; 425 for (i=0; i<p; i++) { 426 for (j=0; j<q; j++) { 427 if (i == j) { 428 /* diagonal term must be 1 */ 429 if (T[i+j*p] != 1.0) isTI = PETSC_FALSE; 430 } else { 431 /* off-diagonal term must be 0 */ 432 if (T[i+j*p] != 0.0) isTI = PETSC_FALSE; 433 } 434 } 435 } 436 } 437 a->isTI = isTI; 438 439 ierr = PetscFree(a->T);CHKERRQ(ierr); 440 if (T && (!isTI)) { 441 ierr = PetscMalloc1(p*q*sizeof(PetscScalar),&a->T);CHKERRQ(ierr); 442 ierr = PetscMemcpy(a->T,T,p*q*sizeof(PetscScalar));CHKERRQ(ierr); 443 } else a->T = NULL; 444 445 a->p = p; 446 a->q = q; 447 PetscFunctionReturn(0); 448 } 449 450 PetscErrorCode MatDestroy_SeqKAIJ(Mat A) 451 { 452 PetscErrorCode ierr; 453 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 454 455 PetscFunctionBegin; 456 ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr); 457 ierr = PetscFree(b->S);CHKERRQ(ierr); 458 ierr = PetscFree(b->T);CHKERRQ(ierr); 459 ierr = PetscFree(b->ibdiag);CHKERRQ(ierr); 460 ierr = PetscFree5(b->sor.w,b->sor.y,b->sor.work,b->sor.t,b->sor.arr);CHKERRQ(ierr); 461 ierr = PetscFree(A->data);CHKERRQ(ierr); 462 PetscFunctionReturn(0); 463 } 464 465 PETSC_INTERN PetscErrorCode MatKAIJ_build_AIJ_OAIJ(Mat A) 466 { 467 PetscErrorCode ierr; 468 Mat_MPIKAIJ *a; 469 Mat_MPIAIJ *mpiaij; 470 PetscScalar *T; 471 PetscInt i,j; 472 PetscObjectState state; 473 474 PetscFunctionBegin; 475 a = (Mat_MPIKAIJ*)A->data; 476 mpiaij = (Mat_MPIAIJ*)a->A->data; 477 478 ierr = PetscObjectStateGet((PetscObject)a->A,&state);CHKERRQ(ierr); 479 if (state == a->state) { 480 /* The existing AIJ and KAIJ members are up-to-date, so simply exit. */ 481 PetscFunctionReturn(0); 482 } else { 483 ierr = MatDestroy(&a->AIJ);CHKERRQ(ierr); 484 ierr = MatDestroy(&a->OAIJ);CHKERRQ(ierr); 485 if (a->isTI) { 486 /* If the transformation matrix associated with the parallel matrix A is the identity matrix, then a->T will be NULL. 487 * In this case, if we pass a->T directly to the MatCreateKAIJ() calls to create the sequential submatrices, the routine will 488 * not be able to tell that transformation matrix should be set to the identity; thus we create a temporary identity matrix 489 * to pass in. */ 490 ierr = PetscMalloc1(a->p*a->q*sizeof(PetscScalar),&T);CHKERRQ(ierr); 491 for (i=0; i<a->p; i++) { 492 for (j=0; j<a->q; j++) { 493 if (i==j) T[i+j*a->p] = 1.0; 494 else T[i+j*a->p] = 0.0; 495 } 496 } 497 } else T = a->T; 498 ierr = MatCreateKAIJ(mpiaij->A,a->p,a->q,a->S,T,&a->AIJ);CHKERRQ(ierr); 499 ierr = MatCreateKAIJ(mpiaij->B,a->p,a->q,NULL,T,&a->OAIJ);CHKERRQ(ierr); 500 if (a->isTI) { 501 ierr = PetscFree(T);CHKERRQ(ierr); 502 } 503 a->state = state; 504 } 505 506 PetscFunctionReturn(0); 507 } 508 509 PetscErrorCode MatSetUp_KAIJ(Mat A) 510 { 511 PetscErrorCode ierr; 512 PetscInt n; 513 PetscMPIInt size; 514 Mat_SeqKAIJ *seqkaij = (Mat_SeqKAIJ*)A->data; 515 516 PetscFunctionBegin; 517 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr); 518 if (size == 1) { 519 ierr = MatSetSizes(A,seqkaij->p*seqkaij->AIJ->rmap->n,seqkaij->q*seqkaij->AIJ->cmap->n,seqkaij->p*seqkaij->AIJ->rmap->N,seqkaij->q*seqkaij->AIJ->cmap->N);CHKERRQ(ierr); 520 ierr = PetscLayoutSetBlockSize(A->rmap,seqkaij->p);CHKERRQ(ierr); 521 ierr = PetscLayoutSetBlockSize(A->cmap,seqkaij->q);CHKERRQ(ierr); 522 ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr); 523 ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr); 524 } else { 525 Mat_MPIKAIJ *a; 526 Mat_MPIAIJ *mpiaij; 527 IS from,to; 528 Vec gvec; 529 PetscScalar *T; 530 PetscInt i,j; 531 532 a = (Mat_MPIKAIJ*)A->data; 533 mpiaij = (Mat_MPIAIJ*)a->A->data; 534 ierr = MatSetSizes(A,a->p*a->A->rmap->n,a->q*a->A->cmap->n,a->p*a->A->rmap->N,a->q*a->A->cmap->N);CHKERRQ(ierr); 535 ierr = PetscLayoutSetBlockSize(A->rmap,seqkaij->p);CHKERRQ(ierr); 536 ierr = PetscLayoutSetBlockSize(A->cmap,seqkaij->q);CHKERRQ(ierr); 537 ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr); 538 ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr); 539 540 if (a->isTI) { 541 /* If the transformation matrix associated with the parallel matrix A is the identity matrix, then a->T will be NULL. 542 * In this case, if we pass a->T directly to the MatCreateKAIJ() calls to create the sequential submatrices, the routine will 543 * not be able to tell that transformation matrix should be set to the identity; thus we create a temporary identity matrix 544 * to pass in. */ 545 ierr = PetscMalloc1(a->p*a->q*sizeof(PetscScalar),&T);CHKERRQ(ierr); 546 for (i=0; i<a->p; i++) { 547 for (j=0; j<a->q; j++) { 548 if (i==j) T[i+j*a->p] = 1.0; 549 else T[i+j*a->p] = 0.0; 550 } 551 } 552 } else T = a->T; 553 ierr = MatCreateKAIJ(mpiaij->A,a->p,a->q,a->S,T,&a->AIJ);CHKERRQ(ierr); 554 ierr = MatCreateKAIJ(mpiaij->B,a->p,a->q,NULL,T,&a->OAIJ);CHKERRQ(ierr); 555 if (a->isTI) { 556 ierr = PetscFree(T);CHKERRQ(ierr); 557 } 558 559 ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr); 560 ierr = VecCreate(PETSC_COMM_SELF,&a->w);CHKERRQ(ierr); 561 ierr = VecSetSizes(a->w,n*a->q,n*a->q);CHKERRQ(ierr); 562 ierr = VecSetBlockSize(a->w,a->q);CHKERRQ(ierr); 563 ierr = VecSetType(a->w,VECSEQ);CHKERRQ(ierr); 564 565 /* create two temporary Index sets for build scatter gather */ 566 ierr = ISCreateBlock(PetscObjectComm((PetscObject)a->A),a->q,n,mpiaij->garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); 567 ierr = ISCreateStride(PETSC_COMM_SELF,n*a->q,0,1,&to);CHKERRQ(ierr); 568 569 /* create temporary global vector to generate scatter context */ 570 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)a->A),a->q,a->q*a->A->cmap->n,a->q*a->A->cmap->N,NULL,&gvec);CHKERRQ(ierr); 571 572 /* generate the scatter context */ 573 ierr = VecScatterCreate(gvec,from,a->w,to,&a->ctx);CHKERRQ(ierr); 574 575 ierr = ISDestroy(&from);CHKERRQ(ierr); 576 ierr = ISDestroy(&to);CHKERRQ(ierr); 577 ierr = VecDestroy(&gvec);CHKERRQ(ierr); 578 } 579 580 A->assembled = PETSC_TRUE; 581 PetscFunctionReturn(0); 582 } 583 584 PetscErrorCode MatView_KAIJ(Mat A,PetscViewer viewer) 585 { 586 PetscViewerFormat format; 587 Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data; 588 Mat B; 589 PetscInt i; 590 PetscErrorCode ierr; 591 PetscBool ismpikaij; 592 593 PetscFunctionBegin; 594 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIKAIJ,&ismpikaij);CHKERRQ(ierr); 595 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 596 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL || format == PETSC_VIEWER_ASCII_IMPL) { 597 ierr = PetscViewerASCIIPrintf(viewer,"S and T have %D rows and %D columns\n",a->p,a->q);CHKERRQ(ierr); 598 599 /* Print appropriate details for S. */ 600 if (!a->S) { 601 ierr = PetscViewerASCIIPrintf(viewer,"S is NULL\n");CHKERRQ(ierr); 602 } else if (format == PETSC_VIEWER_ASCII_IMPL) { 603 ierr = PetscViewerASCIIPrintf(viewer,"Entries of S are ");CHKERRQ(ierr); 604 for (i=0; i<(a->p * a->q); i++) { 605 #if defined(PETSC_USE_COMPLEX) 606 ierr = PetscViewerASCIIPrintf(viewer,"%18.16e %18.16e ",(double)PetscRealPart(a->S[i]),(double)PetscImaginaryPart(a->S[i]));CHKERRQ(ierr); 607 #else 608 ierr = PetscViewerASCIIPrintf(viewer,"%18.16e ",(double)PetscRealPart(a->S[i]));CHKERRQ(ierr); 609 #endif 610 } 611 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 612 } 613 614 /* Print appropriate details for T. */ 615 if (a->isTI) { 616 ierr = PetscViewerASCIIPrintf(viewer,"T is the identity matrix\n");CHKERRQ(ierr); 617 } else if (!a->T) { 618 ierr = PetscViewerASCIIPrintf(viewer,"T is NULL\n");CHKERRQ(ierr); 619 } else if (format == PETSC_VIEWER_ASCII_IMPL) { 620 ierr = PetscViewerASCIIPrintf(viewer,"Entries of T are ");CHKERRQ(ierr); 621 for (i=0; i<(a->p * a->q); i++) { 622 #if defined(PETSC_USE_COMPLEX) 623 ierr = PetscViewerASCIIPrintf(viewer,"%18.16e %18.16e ",(double)PetscRealPart(a->T[i]),(double)PetscImaginaryPart(a->T[i]));CHKERRQ(ierr); 624 #else 625 ierr = PetscViewerASCIIPrintf(viewer,"%18.16e ",(double)PetscRealPart(a->T[i]));CHKERRQ(ierr); 626 #endif 627 } 628 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 629 } 630 631 /* Now print details for the AIJ matrix, using the AIJ viewer. */ 632 ierr = PetscViewerASCIIPrintf(viewer,"Now viewing the associated AIJ matrix:\n");CHKERRQ(ierr); 633 if (ismpikaij) { 634 Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data; 635 ierr = MatView(b->A,viewer);CHKERRQ(ierr); 636 } else { 637 ierr = MatView(a->AIJ,viewer);CHKERRQ(ierr); 638 } 639 640 } else { 641 /* For all other matrix viewer output formats, simply convert to an AIJ matrix and call MatView() on that. */ 642 if (ismpikaij) { 643 ierr = MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 644 } else { 645 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 646 } 647 ierr = MatView(B,viewer);CHKERRQ(ierr); 648 ierr = MatDestroy(&B);CHKERRQ(ierr); 649 } 650 PetscFunctionReturn(0); 651 } 652 653 PetscErrorCode MatDestroy_MPIKAIJ(Mat A) 654 { 655 PetscErrorCode ierr; 656 Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data; 657 658 PetscFunctionBegin; 659 ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr); 660 ierr = MatDestroy(&b->OAIJ);CHKERRQ(ierr); 661 ierr = MatDestroy(&b->A);CHKERRQ(ierr); 662 ierr = VecScatterDestroy(&b->ctx);CHKERRQ(ierr); 663 ierr = VecDestroy(&b->w);CHKERRQ(ierr); 664 ierr = PetscFree(b->S);CHKERRQ(ierr); 665 ierr = PetscFree(b->T);CHKERRQ(ierr); 666 ierr = PetscFree(b->ibdiag);CHKERRQ(ierr); 667 ierr = PetscFree(A->data);CHKERRQ(ierr); 668 PetscFunctionReturn(0); 669 } 670 671 /* --------------------------------------------------------------------------------------*/ 672 673 /* zz = yy + Axx */ 674 PetscErrorCode MatMultAdd_SeqKAIJ(Mat A,Vec xx,Vec yy,Vec zz) 675 { 676 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 677 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 678 const PetscScalar *s = b->S, *t = b->T; 679 const PetscScalar *x,*v,*bx; 680 PetscScalar *y,*sums; 681 PetscErrorCode ierr; 682 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 683 PetscInt n,i,jrow,j,l,p=b->p,q=b->q,k; 684 685 PetscFunctionBegin; 686 if (!yy) { 687 ierr = VecSet(zz,0.0);CHKERRQ(ierr); 688 } else { 689 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 690 } 691 if ((!s) && (!t) && (!b->isTI)) PetscFunctionReturn(0); 692 693 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 694 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 695 idx = a->j; 696 v = a->a; 697 ii = a->i; 698 699 if (b->isTI) { 700 for (i=0; i<m; i++) { 701 jrow = ii[i]; 702 n = ii[i+1] - jrow; 703 sums = y + p*i; 704 for (j=0; j<n; j++) { 705 for (k=0; k<p; k++) { 706 sums[k] += v[jrow+j]*x[q*idx[jrow+j]+k]; 707 } 708 } 709 } 710 ierr = PetscLogFlops(3.0*(a->nz)*p);CHKERRQ(ierr); 711 } else if (t) { 712 for (i=0; i<m; i++) { 713 jrow = ii[i]; 714 n = ii[i+1] - jrow; 715 sums = y + p*i; 716 for (j=0; j<n; j++) { 717 for (k=0; k<p; k++) { 718 for (l=0; l<q; l++) { 719 sums[k] += v[jrow+j]*t[k+l*p]*x[q*idx[jrow+j]+l]; 720 } 721 } 722 } 723 } 724 /* The flop count below assumes that v[jrow+j] is hoisted out (which an optimizing compiler is likely to do), 725 * and also that T part is hoisted outside this loop (in exchange for temporary storage) as (A \otimes I) (I \otimes T), 726 * so that this multiply doesn't have to be redone for each matrix entry, but just once per column. The latter 727 * transformation is much less likely to be applied, but we nonetheless count the minimum flops required. */ 728 ierr = PetscLogFlops((2.0*p*q-p)*m+2.0*p*a->nz);CHKERRQ(ierr); 729 } 730 if (s) { 731 for (i=0; i<m; i++) { 732 sums = y + p*i; 733 bx = x + q*i; 734 if (i < b->AIJ->cmap->n) { 735 for (j=0; j<q; j++) { 736 for (k=0; k<p; k++) { 737 sums[k] += s[k+j*p]*bx[j]; 738 } 739 } 740 } 741 } 742 ierr = PetscLogFlops(2.0*m*p*q);CHKERRQ(ierr); 743 } 744 745 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 746 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 747 PetscFunctionReturn(0); 748 } 749 750 PetscErrorCode MatMult_SeqKAIJ(Mat A,Vec xx,Vec yy) 751 { 752 PetscErrorCode ierr; 753 PetscFunctionBegin; 754 ierr = MatMultAdd_SeqKAIJ(A,xx,PETSC_NULL,yy);CHKERRQ(ierr); 755 PetscFunctionReturn(0); 756 } 757 758 #include <petsc/private/kernels/blockinvert.h> 759 760 PetscErrorCode MatInvertBlockDiagonal_SeqKAIJ(Mat A,const PetscScalar **values) 761 { 762 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 763 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 764 const PetscScalar *S = b->S; 765 const PetscScalar *T = b->T; 766 const PetscScalar *v = a->a; 767 const PetscInt p = b->p, q = b->q, m = b->AIJ->rmap->n, *idx = a->j, *ii = a->i; 768 PetscErrorCode ierr; 769 PetscInt i,j,*v_pivots,dof,dof2; 770 PetscScalar *diag,aval,*v_work; 771 772 PetscFunctionBegin; 773 if (p != q) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Block size must be square to calculate inverse."); 774 if ((!S) && (!T) && (!b->isTI)) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Cannot invert a zero matrix."); 775 776 dof = p; 777 dof2 = dof*dof; 778 779 if (b->ibdiagvalid) { 780 if (values) *values = b->ibdiag; 781 PetscFunctionReturn(0); 782 } 783 if (!b->ibdiag) { 784 ierr = PetscMalloc1(dof2*m*sizeof(PetscScalar),&b->ibdiag);CHKERRQ(ierr); 785 ierr = PetscLogObjectMemory((PetscObject)A,dof2*m*sizeof(PetscScalar));CHKERRQ(ierr); 786 } 787 if (values) *values = b->ibdiag; 788 diag = b->ibdiag; 789 790 ierr = PetscMalloc2(dof,&v_work,dof,&v_pivots);CHKERRQ(ierr); 791 for (i=0; i<m; i++) { 792 if (S) { 793 ierr = PetscMemcpy(diag,S,dof2*sizeof(PetscScalar));CHKERRQ(ierr); 794 } else { 795 ierr = PetscMemzero(diag,dof2*sizeof(PetscScalar));CHKERRQ(ierr); 796 } 797 if (b->isTI) { 798 aval = 0; 799 for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j]; 800 for (j=0; j<dof; j++) diag[j+dof*j] += aval; 801 } else if (T) { 802 aval = 0; 803 for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j]; 804 for (j=0; j<dof2; j++) diag[j] += aval*T[j]; 805 } 806 ierr = PetscKernel_A_gets_inverse_A(dof,diag,v_pivots,v_work,PETSC_FALSE,NULL);CHKERRQ(ierr); 807 diag += dof2; 808 } 809 ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr); 810 811 b->ibdiagvalid = PETSC_TRUE; 812 PetscFunctionReturn(0); 813 } 814 815 static PetscErrorCode MatGetDiagonalBlock_MPIKAIJ(Mat A,Mat *B) 816 { 817 Mat_MPIKAIJ *kaij = (Mat_MPIKAIJ*) A->data; 818 819 PetscFunctionBegin; 820 *B = kaij->AIJ; 821 PetscFunctionReturn(0); 822 } 823 824 PetscErrorCode MatSOR_SeqKAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 825 { 826 PetscErrorCode ierr; 827 Mat_SeqKAIJ *kaij = (Mat_SeqKAIJ*) A->data; 828 Mat_SeqAIJ *a = (Mat_SeqAIJ*)kaij->AIJ->data; 829 const PetscScalar *aa = a->a, *T = kaij->T, *v; 830 const PetscInt m = kaij->AIJ->rmap->n, *ai=a->i, *aj=a->j, p = kaij->p, q = kaij->q, *diag, *vi; 831 const PetscScalar *b, *xb, *idiag; 832 PetscScalar *x, *work, *workt, *w, *y, *arr, *t, *arrt; 833 PetscInt i, j, k, i2, bs, bs2, nz; 834 835 PetscFunctionBegin; 836 its = its*lits; 837 if (flag & SOR_EISENSTAT) SETERRQ (PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 838 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 839 if (fshift) SETERRQ (PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for diagonal shift"); 840 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for applying upper or lower triangular parts"); 841 if (p != q) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSOR for KAIJ: No support for non-square dense blocks"); 842 else {bs = p; bs2 = bs*bs; } 843 844 if (!m) PetscFunctionReturn(0); 845 846 if (!kaij->ibdiagvalid) { ierr = MatInvertBlockDiagonal_SeqKAIJ(A,NULL);CHKERRQ(ierr); } 847 idiag = kaij->ibdiag; 848 diag = a->diag; 849 850 if (!kaij->sor.setup) { 851 ierr = PetscMalloc5(bs,&kaij->sor.w,bs,&kaij->sor.y,m*bs,&kaij->sor.work,m*bs,&kaij->sor.t,m*bs2,&kaij->sor.arr);CHKERRQ(ierr); 852 kaij->sor.setup = PETSC_TRUE; 853 } 854 y = kaij->sor.y; 855 w = kaij->sor.w; 856 work = kaij->sor.work; 857 t = kaij->sor.t; 858 arr = kaij->sor.arr; 859 860 ierr = VecGetArray(xx,&x); CHKERRQ(ierr); 861 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 862 863 if (flag & SOR_ZERO_INITIAL_GUESS) { 864 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 865 PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x); /* x[0:bs] <- D^{-1} b[0:bs] */ 866 ierr = PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr); 867 i2 = bs; 868 idiag += bs2; 869 for (i=1; i<m; i++) { 870 v = aa + ai[i]; 871 vi = aj + ai[i]; 872 nz = diag[i] - ai[i]; 873 874 if (T) { /* b - T (Arow * x) */ 875 ierr = PetscMemzero(w,bs*sizeof(PetscScalar));CHKERRQ(ierr); 876 for (j=0; j<nz; j++) { 877 for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k]; 878 } 879 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs,w,T,&t[i2]); 880 for (k=0; k<bs; k++) t[i2+k] += b[i2+k]; 881 } else if (kaij->isTI) { 882 ierr = PetscMemcpy(t+i2,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 883 for (j=0; j<nz; j++) { 884 for (k=0; k<bs; k++) t[i2+k] -= v[j] * x[vi[j]*bs+k]; 885 } 886 } else { 887 ierr = PetscMemcpy(t+i2,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 888 } 889 890 PetscKernel_w_gets_Ar_times_v(bs,bs,t+i2,idiag,y); 891 for (j=0; j<bs; j++) x[i2+j] = omega * y[j]; 892 893 idiag += bs2; 894 i2 += bs; 895 } 896 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 897 ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr); 898 xb = t; 899 } else xb = b; 900 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 901 idiag = kaij->ibdiag+bs2*(m-1); 902 i2 = bs * (m-1); 903 ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 904 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 905 i2 -= bs; 906 idiag -= bs2; 907 for (i=m-2; i>=0; i--) { 908 v = aa + diag[i] + 1 ; 909 vi = aj + diag[i] + 1; 910 nz = ai[i+1] - diag[i] - 1; 911 912 if (T) { /* FIXME: This branch untested */ 913 ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 914 /* copy all rows of x that are needed into contiguous space */ 915 workt = work; 916 for (j=0; j<nz; j++) { 917 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 918 workt += bs; 919 } 920 arrt = arr; 921 for (j=0; j<nz; j++) { 922 ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 923 for (k=0; k<bs2; k++) arrt[k] *= v[j]; 924 arrt += bs2; 925 } 926 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 927 } else if (kaij->isTI) { 928 ierr = PetscMemcpy(w,t+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 929 for (j=0; j<nz; j++) { 930 for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k]; 931 } 932 } 933 934 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); /* RHS incorrect for omega != 1.0 */ 935 for (j=0; j<bs; j++) x[i2+j] = (1.0-omega) * x[i2+j] + omega * y[j]; 936 937 idiag -= bs2; 938 i2 -= bs; 939 } 940 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 941 } 942 its--; 943 } 944 while (its--) { /* FIXME: This branch not updated */ 945 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 946 i2 = 0; 947 idiag = kaij->ibdiag; 948 for (i=0; i<m; i++) { 949 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 950 951 v = aa + ai[i]; 952 vi = aj + ai[i]; 953 nz = diag[i] - ai[i]; 954 workt = work; 955 for (j=0; j<nz; j++) { 956 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 957 workt += bs; 958 } 959 arrt = arr; 960 if (T) { 961 for (j=0; j<nz; j++) { 962 ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 963 for (k=0; k<bs2; k++) arrt[k] *= v[j]; 964 arrt += bs2; 965 } 966 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 967 } else if (kaij->isTI) { 968 for (j=0; j<nz; j++) { 969 ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 970 for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 971 arrt += bs2; 972 } 973 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 974 } 975 ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr); 976 977 v = aa + diag[i] + 1; 978 vi = aj + diag[i] + 1; 979 nz = ai[i+1] - diag[i] - 1; 980 workt = work; 981 for (j=0; j<nz; j++) { 982 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 983 workt += bs; 984 } 985 arrt = arr; 986 if (T) { 987 for (j=0; j<nz; j++) { 988 ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 989 for (k=0; k<bs2; k++) arrt[k] *= v[j]; 990 arrt += bs2; 991 } 992 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 993 } else if (kaij->isTI) { 994 for (j=0; j<nz; j++) { 995 ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 996 for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 997 arrt += bs2; 998 } 999 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 1000 } 1001 1002 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); 1003 for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j); 1004 1005 idiag += bs2; 1006 i2 += bs; 1007 } 1008 xb = t; 1009 } 1010 else xb = b; 1011 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 1012 idiag = kaij->ibdiag+bs2*(m-1); 1013 i2 = bs * (m-1); 1014 if (xb == b) { 1015 for (i=m-1; i>=0; i--) { 1016 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 1017 1018 v = aa + ai[i]; 1019 vi = aj + ai[i]; 1020 nz = diag[i] - ai[i]; 1021 workt = work; 1022 for (j=0; j<nz; j++) { 1023 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 1024 workt += bs; 1025 } 1026 arrt = arr; 1027 if (T) { 1028 for (j=0; j<nz; j++) { 1029 ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 1030 for (k=0; k<bs2; k++) arrt[k] *= v[j]; 1031 arrt += bs2; 1032 } 1033 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 1034 } else if (kaij->isTI) { 1035 for (j=0; j<nz; j++) { 1036 ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 1037 for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 1038 arrt += bs2; 1039 } 1040 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 1041 } 1042 1043 v = aa + diag[i] + 1; 1044 vi = aj + diag[i] + 1; 1045 nz = ai[i+1] - diag[i] - 1; 1046 workt = work; 1047 for (j=0; j<nz; j++) { 1048 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 1049 workt += bs; 1050 } 1051 arrt = arr; 1052 if (T) { 1053 for (j=0; j<nz; j++) { 1054 ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 1055 for (k=0; k<bs2; k++) arrt[k] *= v[j]; 1056 arrt += bs2; 1057 } 1058 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 1059 } else if (kaij->isTI) { 1060 for (j=0; j<nz; j++) { 1061 ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 1062 for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 1063 arrt += bs2; 1064 } 1065 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 1066 } 1067 1068 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); 1069 for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j); 1070 } 1071 } else { 1072 for (i=m-1; i>=0; i--) { 1073 ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 1074 v = aa + diag[i] + 1; 1075 vi = aj + diag[i] + 1; 1076 nz = ai[i+1] - diag[i] - 1; 1077 workt = work; 1078 for (j=0; j<nz; j++) { 1079 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 1080 workt += bs; 1081 } 1082 arrt = arr; 1083 if (T) { 1084 for (j=0; j<nz; j++) { 1085 ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 1086 for (k=0; k<bs2; k++) arrt[k] *= v[j]; 1087 arrt += bs2; 1088 } 1089 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 1090 } else if (kaij->isTI) { 1091 for (j=0; j<nz; j++) { 1092 ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 1093 for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 1094 arrt += bs2; 1095 } 1096 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 1097 } 1098 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); 1099 for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j); 1100 } 1101 } 1102 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 1103 } 1104 } 1105 1106 ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr); 1107 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1108 PetscFunctionReturn(0); 1109 } 1110 1111 /*===================================================================================*/ 1112 1113 PetscErrorCode MatMultAdd_MPIKAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1114 { 1115 Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data; 1116 PetscErrorCode ierr; 1117 1118 PetscFunctionBegin; 1119 if (!yy) { 1120 ierr = VecSet(zz,0.0);CHKERRQ(ierr); 1121 } else { 1122 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 1123 } 1124 /* start the scatter */ 1125 ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1126 ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,zz,zz);CHKERRQ(ierr); 1127 ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1128 ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr); 1129 PetscFunctionReturn(0); 1130 } 1131 1132 PetscErrorCode MatMult_MPIKAIJ(Mat A,Vec xx,Vec yy) 1133 { 1134 PetscErrorCode ierr; 1135 PetscFunctionBegin; 1136 ierr = MatMultAdd_MPIKAIJ(A,xx,PETSC_NULL,yy);CHKERRQ(ierr); 1137 PetscFunctionReturn(0); 1138 } 1139 1140 PetscErrorCode MatInvertBlockDiagonal_MPIKAIJ(Mat A,const PetscScalar **values) 1141 { 1142 Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data; 1143 PetscErrorCode ierr; 1144 1145 PetscFunctionBegin; 1146 ierr = (*b->AIJ->ops->invertblockdiagonal)(b->AIJ,values);CHKERRQ(ierr); 1147 PetscFunctionReturn(0); 1148 } 1149 1150 /* ----------------------------------------------------------------*/ 1151 1152 PetscErrorCode MatGetRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values) 1153 { 1154 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*) A->data; 1155 PetscErrorCode diag = PETSC_FALSE; 1156 PetscErrorCode ierr; 1157 PetscInt nzaij,nz,*colsaij,*idx,i,j,p=b->p,q=b->q,r=row/p,s=row%p,c; 1158 PetscScalar *vaij,*v,*S=b->S,*T=b->T; 1159 1160 PetscFunctionBegin; 1161 if (b->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1162 b->getrowactive = PETSC_TRUE; 1163 if (row < 0 || row >= A->rmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range",row); 1164 1165 if ((!S) && (!T) && (!b->isTI)) { 1166 if (ncols) *ncols = 0; 1167 if (cols) *cols = NULL; 1168 if (values) *values = NULL; 1169 PetscFunctionReturn(0); 1170 } 1171 1172 if (T || b->isTI) { 1173 ierr = MatGetRow_SeqAIJ(b->AIJ,r,&nzaij,&colsaij,&vaij);CHKERRQ(ierr); 1174 c = nzaij; 1175 for (i=0; i<nzaij; i++) { 1176 /* check if this row contains a diagonal entry */ 1177 if (colsaij[i] == r) { 1178 diag = PETSC_TRUE; 1179 c = i; 1180 } 1181 } 1182 } else nzaij = c = 0; 1183 1184 /* calculate size of row */ 1185 nz = 0; 1186 if (S) nz += q; 1187 if (T || b->isTI) nz += (diag && S ? (nzaij-1)*q : nzaij*q); 1188 1189 if (cols || values) { 1190 ierr = PetscMalloc2(nz,&idx,nz,&v);CHKERRQ(ierr); 1191 for (i=0; i<q; i++) { 1192 /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */ 1193 v[i] = 0.0; 1194 } 1195 if (b->isTI) { 1196 for (i=0; i<nzaij; i++) { 1197 for (j=0; j<q; j++) { 1198 idx[i*q+j] = colsaij[i]*q+j; 1199 v[i*q+j] = (j==s ? vaij[i] : 0); 1200 } 1201 } 1202 } else if (T) { 1203 for (i=0; i<nzaij; i++) { 1204 for (j=0; j<q; j++) { 1205 idx[i*q+j] = colsaij[i]*q+j; 1206 v[i*q+j] = vaij[i]*T[s+j*p]; 1207 } 1208 } 1209 } 1210 if (S) { 1211 for (j=0; j<q; j++) { 1212 idx[c*q+j] = r*q+j; 1213 v[c*q+j] += S[s+j*p]; 1214 } 1215 } 1216 } 1217 1218 if (ncols) *ncols = nz; 1219 if (cols) *cols = idx; 1220 if (values) *values = v; 1221 PetscFunctionReturn(0); 1222 } 1223 1224 PetscErrorCode MatRestoreRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1225 { 1226 PetscErrorCode ierr; 1227 1228 PetscFunctionBegin; 1229 if (nz) *nz = 0; 1230 ierr = PetscFree2(*idx,*v);CHKERRQ(ierr); 1231 ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE; 1232 PetscFunctionReturn(0); 1233 } 1234 1235 PetscErrorCode MatGetRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values) 1236 { 1237 Mat_MPIKAIJ *b = (Mat_MPIKAIJ*) A->data; 1238 Mat MatAIJ = ((Mat_SeqKAIJ*)b->AIJ->data)->AIJ; 1239 Mat MatOAIJ = ((Mat_SeqKAIJ*)b->OAIJ->data)->AIJ; 1240 Mat AIJ = b->A; 1241 PetscBool diag = PETSC_FALSE; 1242 PetscErrorCode ierr; 1243 const PetscInt rstart=A->rmap->rstart,rend=A->rmap->rend,p=b->p,q=b->q,*garray; 1244 PetscInt nz,*idx,ncolsaij = 0,ncolsoaij = 0,*colsaij,*colsoaij,r,s,c,i,j,lrow; 1245 PetscScalar *v,*vals,*ovals,*S=b->S,*T=b->T; 1246 1247 PetscFunctionBegin; 1248 if (b->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1249 b->getrowactive = PETSC_TRUE; 1250 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1251 lrow = row - rstart; 1252 1253 if ((!S) && (!T) && (!b->isTI)) { 1254 if (ncols) *ncols = 0; 1255 if (cols) *cols = NULL; 1256 if (values) *values = NULL; 1257 PetscFunctionReturn(0); 1258 } 1259 1260 r = lrow/p; 1261 s = lrow%p; 1262 1263 if (T || b->isTI) { 1264 ierr = MatMPIAIJGetSeqAIJ(AIJ,NULL,NULL,&garray);CHKERRQ(ierr); 1265 ierr = MatGetRow_SeqAIJ(MatAIJ,lrow/p,&ncolsaij,&colsaij,&vals);CHKERRQ(ierr); 1266 ierr = MatGetRow_SeqAIJ(MatOAIJ,lrow/p,&ncolsoaij,&colsoaij,&ovals);CHKERRQ(ierr); 1267 1268 c = ncolsaij + ncolsoaij; 1269 for (i=0; i<ncolsaij; i++) { 1270 /* check if this row contains a diagonal entry */ 1271 if (colsaij[i] == r) { 1272 diag = PETSC_TRUE; 1273 c = i; 1274 } 1275 } 1276 } else c = 0; 1277 1278 /* calculate size of row */ 1279 nz = 0; 1280 if (S) nz += q; 1281 if (T || b->isTI) nz += (diag && S ? (ncolsaij+ncolsoaij-1)*q : (ncolsaij+ncolsoaij)*q); 1282 1283 if (cols || values) { 1284 ierr = PetscMalloc2(nz,&idx,nz,&v);CHKERRQ(ierr); 1285 for (i=0; i<q; i++) { 1286 /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */ 1287 v[i] = 0.0; 1288 } 1289 if (b->isTI) { 1290 for (i=0; i<ncolsaij; i++) { 1291 for (j=0; j<q; j++) { 1292 idx[i*q+j] = (colsaij[i]+rstart/p)*q+j; 1293 v[i*q+j] = (j==s ? vals[i] : 0.0); 1294 } 1295 } 1296 for (i=0; i<ncolsoaij; i++) { 1297 for (j=0; j<q; j++) { 1298 idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j; 1299 v[(i+ncolsaij)*q+j] = (j==s ? ovals[i]: 0.0); 1300 } 1301 } 1302 } else if (T) { 1303 for (i=0; i<ncolsaij; i++) { 1304 for (j=0; j<q; j++) { 1305 idx[i*q+j] = (colsaij[i]+rstart/p)*q+j; 1306 v[i*q+j] = vals[i]*T[s+j*p]; 1307 } 1308 } 1309 for (i=0; i<ncolsoaij; i++) { 1310 for (j=0; j<q; j++) { 1311 idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j; 1312 v[(i+ncolsaij)*q+j] = ovals[i]*T[s+j*p]; 1313 } 1314 } 1315 } 1316 if (S) { 1317 for (j=0; j<q; j++) { 1318 idx[c*q+j] = (r+rstart/p)*q+j; 1319 v[c*q+j] += S[s+j*p]; 1320 } 1321 } 1322 } 1323 1324 if (ncols) *ncols = nz; 1325 if (cols) *cols = idx; 1326 if (values) *values = v; 1327 PetscFunctionReturn(0); 1328 } 1329 1330 PetscErrorCode MatRestoreRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1331 { 1332 PetscErrorCode ierr; 1333 PetscFunctionBegin; 1334 ierr = PetscFree2(*idx,*v);CHKERRQ(ierr); 1335 ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE; 1336 PetscFunctionReturn(0); 1337 } 1338 1339 PetscErrorCode MatCreateSubMatrix_KAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat) 1340 { 1341 PetscErrorCode ierr; 1342 Mat A; 1343 1344 PetscFunctionBegin; 1345 ierr = MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr); 1346 ierr = MatCreateSubMatrix(A,isrow,iscol,cll,newmat);CHKERRQ(ierr); 1347 ierr = MatDestroy(&A);CHKERRQ(ierr); 1348 PetscFunctionReturn(0); 1349 } 1350 1351 /* ---------------------------------------------------------------------------------- */ 1352 /*@C 1353 MatCreateKAIJ - Creates a matrix type to be used for matrices of the following form: 1354 1355 [I \otimes S + A \otimes T] 1356 1357 where 1358 S is a dense (p \times q) matrix 1359 T is a dense (p \times q) matrix 1360 A is an AIJ (n \times n) matrix 1361 I is the identity matrix 1362 The resulting matrix is (np \times nq) 1363 1364 S and T are always stored independently on all processes as PetscScalar arrays in column-major format. 1365 1366 Collective 1367 1368 Input Parameters: 1369 + A - the AIJ matrix 1370 . p - number of rows in S and T 1371 . q - number of columns in S and T 1372 . S - the S matrix (can be PETSC_NULL), stored as a PetscScalar array (column-major) 1373 - T - the T matrix (can be PETSC_NULL), stored as a PetscScalar array (column-major) 1374 1375 Output Parameter: 1376 . kaij - the new KAIJ matrix 1377 1378 Notes: 1379 This function increases the reference count on the AIJ matrix, so the user is free to destroy the matrix if it is not needed. 1380 Changes to the entries of the AIJ matrix will immediately affect the KAIJ matrix. 1381 1382 Level: advanced 1383 1384 .seealso: MatKAIJSetAIJ(), MatKAIJSetS(), MatKAIJSetT(), MatKAIJGetAIJ(), MatKAIJGetS(), MatKAIJGetT(), MATKAIJ 1385 @*/ 1386 PetscErrorCode MatCreateKAIJ(Mat A,PetscInt p,PetscInt q,const PetscScalar S[],const PetscScalar T[],Mat *kaij) 1387 { 1388 PetscErrorCode ierr; 1389 PetscMPIInt size; 1390 1391 PetscFunctionBegin; 1392 ierr = MatCreate(PetscObjectComm((PetscObject)A),kaij);CHKERRQ(ierr); 1393 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr); 1394 if (size == 1) { 1395 ierr = MatSetType(*kaij,MATSEQKAIJ);CHKERRQ(ierr); 1396 } else { 1397 ierr = MatSetType(*kaij,MATMPIKAIJ);CHKERRQ(ierr); 1398 } 1399 ierr = MatKAIJSetAIJ(*kaij,A);CHKERRQ(ierr); 1400 ierr = MatKAIJSetS(*kaij,p,q,S);CHKERRQ(ierr); 1401 ierr = MatKAIJSetT(*kaij,p,q,T);CHKERRQ(ierr); 1402 ierr = MatSetUp(*kaij);CHKERRQ(ierr); 1403 PetscFunctionReturn(0); 1404 } 1405 1406 /*MC 1407 MATKAIJ - MATKAIJ = "kaij" - A matrix type to be used to evaluate matrices of form 1408 [I \otimes S + A \otimes T], 1409 where 1410 S is a dense (p \times q) matrix, 1411 T is a dense (p \times q) matrix, 1412 A is an AIJ (n \times n) matrix, 1413 and I is the identity matrix. 1414 The resulting matrix is (np \times nq). 1415 1416 S and T are always stored independently on all processes as PetscScalar arrays in column-major format. 1417 1418 Notes: 1419 A linear system with multiple right-hand sides, AX = B, can be expressed in the KAIJ-friendly form of (A \otimes I) x = b, 1420 where x and b are column vectors containing the row-major representations of X and B. 1421 1422 Level: advanced 1423 1424 .seealso: MatKAIJSetAIJ(), MatKAIJSetS(), MatKAIJSetT(), MatKAIJGetAIJ(), MatKAIJGetS(), MatKAIJGetT(), MatCreateKAIJ() 1425 M*/ 1426 1427 PETSC_EXTERN PetscErrorCode MatCreate_KAIJ(Mat A) 1428 { 1429 PetscErrorCode ierr; 1430 Mat_MPIKAIJ *b; 1431 PetscMPIInt size; 1432 1433 PetscFunctionBegin; 1434 ierr = PetscNewLog(A,&b);CHKERRQ(ierr); 1435 A->data = (void*)b; 1436 1437 ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1438 1439 A->ops->setup = MatSetUp_KAIJ; 1440 1441 b->w = NULL; 1442 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr); 1443 if (size == 1) { 1444 ierr = PetscObjectChangeTypeName((PetscObject)A,MATSEQKAIJ);CHKERRQ(ierr); 1445 A->ops->setup = MatSetUp_KAIJ; 1446 A->ops->destroy = MatDestroy_SeqKAIJ; 1447 A->ops->view = MatView_KAIJ; 1448 A->ops->mult = MatMult_SeqKAIJ; 1449 A->ops->multadd = MatMultAdd_SeqKAIJ; 1450 A->ops->invertblockdiagonal = MatInvertBlockDiagonal_SeqKAIJ; 1451 A->ops->getrow = MatGetRow_SeqKAIJ; 1452 A->ops->restorerow = MatRestoreRow_SeqKAIJ; 1453 A->ops->sor = MatSOR_SeqKAIJ; 1454 } else { 1455 ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIKAIJ);CHKERRQ(ierr); 1456 A->ops->setup = MatSetUp_KAIJ; 1457 A->ops->destroy = MatDestroy_MPIKAIJ; 1458 A->ops->view = MatView_KAIJ; 1459 A->ops->mult = MatMult_MPIKAIJ; 1460 A->ops->multadd = MatMultAdd_MPIKAIJ; 1461 A->ops->invertblockdiagonal = MatInvertBlockDiagonal_MPIKAIJ; 1462 A->ops->getrow = MatGetRow_MPIKAIJ; 1463 A->ops->restorerow = MatRestoreRow_MPIKAIJ; 1464 ierr = PetscObjectComposeFunction((PetscObject)A,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIKAIJ);CHKERRQ(ierr); 1465 } 1466 A->ops->createsubmatrix = MatCreateSubMatrix_KAIJ; 1467 PetscFunctionReturn(0); 1468 } 1469