1 2 /* 3 Defines the basic matrix operations for the KAIJ matrix storage format. 4 This format is used to evaluate matrices of the form: 5 6 [I \otimes S + A \otimes T] 7 8 where 9 S is a dense (p \times q) matrix 10 T is a dense (p \times q) matrix 11 A is an AIJ (n \times n) matrix 12 I is the identity matrix 13 14 The resulting matrix is (np \times nq) 15 16 We provide: 17 MatMult() 18 MatMultAdd() 19 MatInvertBlockDiagonal() 20 and 21 MatCreateKAIJ(Mat,PetscInt,PetscInt,const PetscScalar[],const PetscScalar[],Mat*) 22 23 This single directory handles both the sequential and parallel codes 24 */ 25 26 #include <../src/mat/impls/kaij/kaij.h> /*I "petscmat.h" I*/ 27 #include <../src/mat/utils/freespace.h> 28 #include <petsc/private/vecimpl.h> 29 30 /*@C 31 MatKAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the KAIJ matrix 32 33 Not Collective, but if the KAIJ matrix is parallel, the AIJ matrix is also parallel 34 35 Input Parameter: 36 . A - the KAIJ matrix 37 38 Output Parameter: 39 . B - the AIJ matrix 40 41 Level: advanced 42 43 Notes: The reference count on the AIJ matrix is not increased so you should not destroy it. 44 45 .seealso: MatCreateKAIJ() 46 @*/ 47 PetscErrorCode MatKAIJGetAIJ(Mat A,Mat *B) 48 { 49 PetscErrorCode ierr; 50 PetscBool ismpikaij,isseqkaij; 51 52 PetscFunctionBegin; 53 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIKAIJ,&ismpikaij);CHKERRQ(ierr); 54 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQKAIJ,&isseqkaij);CHKERRQ(ierr); 55 if (ismpikaij) { 56 Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data; 57 58 *B = b->A; 59 } else if (isseqkaij) { 60 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 61 62 *B = b->AIJ; 63 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix passed in is not of type KAIJ"); 64 PetscFunctionReturn(0); 65 } 66 67 /*@C 68 MatKAIJGetS - Get the S matrix describing the shift action of the KAIJ matrix 69 70 Not Collective; the entire S is stored and returned independently on all processes. 71 72 Input Parameter: 73 . A - the KAIJ matrix 74 75 Output Parameters: 76 + m - the number of rows in S 77 . n - the number of columns in S 78 - S - the S matrix, in form of a scalar array in column-major format 79 80 Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired) 81 82 Level: advanced 83 84 .seealso: MatCreateKAIJ(), MatGetBlockSizes() 85 @*/ 86 PetscErrorCode MatKAIJGetS(Mat A,PetscInt *m,PetscInt *n,PetscScalar **S) 87 { 88 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 89 PetscFunctionBegin; 90 if (m) *m = b->p; 91 if (n) *n = b->q; 92 if (S) *S = b->S; 93 PetscFunctionReturn(0); 94 } 95 96 /*@C 97 MatKAIJGetSRead - Get a read-only pointer to the S matrix describing the shift action of the KAIJ matrix 98 99 Not Collective; the entire S is stored and returned independently on all processes. 100 101 Input Parameter: 102 . A - the KAIJ matrix 103 104 Output Parameters: 105 + m - the number of rows in S 106 . n - the number of columns in S 107 - S - the S matrix, in form of a scalar array in column-major format 108 109 Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired) 110 111 Level: advanced 112 113 .seealso: MatCreateKAIJ(), MatGetBlockSizes() 114 @*/ 115 PetscErrorCode MatKAIJGetSRead(Mat A,PetscInt *m,PetscInt *n,const PetscScalar **S) 116 { 117 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 118 PetscFunctionBegin; 119 if (m) *m = b->p; 120 if (n) *n = b->q; 121 if (S) *S = b->S; 122 PetscFunctionReturn(0); 123 } 124 125 /*@C 126 MatKAIJRestoreS - Restore array obtained with MatKAIJGetS() 127 128 Not collective 129 130 Input Parameter: 131 . A - the KAIJ matrix 132 133 Output Parameter: 134 . S - location of pointer to array obtained with MatKAIJGetS() 135 136 Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored. 137 If NULL is passed, it will not attempt to zero the array pointer. 138 139 Level: advanced 140 .seealso: MatKAIJGetS(), MatKAIJGetSRead(), MatKAIJRestoreSRead() 141 @*/ 142 PetscErrorCode MatKAIJRestoreS(Mat A,PetscScalar **S) 143 { 144 PetscErrorCode ierr; 145 146 PetscFunctionBegin; 147 if (S) *S = NULL; 148 ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr); 149 PetscFunctionReturn(0); 150 } 151 152 /*@C 153 MatKAIJRestoreSRead - Restore array obtained with MatKAIJGetSRead() 154 155 Not collective 156 157 Input Parameter: 158 . A - the KAIJ matrix 159 160 Output Parameter: 161 . S - location of pointer to array obtained with MatKAIJGetS() 162 163 Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored. 164 If NULL is passed, it will not attempt to zero the array pointer. 165 166 Level: advanced 167 .seealso: MatKAIJGetS(), MatKAIJGetSRead(), MatKAIJRestoreSRead() 168 @*/ 169 PetscErrorCode MatKAIJRestoreSRead(Mat A,const PetscScalar **S) 170 { 171 PetscFunctionBegin; 172 if (S) *S = NULL; 173 PetscFunctionReturn(0); 174 } 175 176 /*@C 177 MatKAIJGetT - Get the transformation matrix T associated with the KAIJ matrix 178 179 Not Collective; the entire T is stored and returned independently on all processes 180 181 Input Parameter: 182 . A - the KAIJ matrix 183 184 Output Parameters: 185 + m - the number of rows in T 186 . n - the number of columns in T 187 - T - the T matrix, in form of a scalar array in column-major format 188 189 Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired) 190 191 Level: advanced 192 193 .seealso: MatCreateKAIJ(), MatGetBlockSizes() 194 @*/ 195 PetscErrorCode MatKAIJGetT(Mat A,PetscInt *m,PetscInt *n,PetscScalar **T) 196 { 197 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 198 PetscFunctionBegin; 199 if (m) *m = b->p; 200 if (n) *n = b->q; 201 if (T) *T = b->T; 202 PetscFunctionReturn(0); 203 } 204 205 /*@C 206 MatKAIJGetTRead - Get a read-only pointer to the transformation matrix T associated with the KAIJ matrix 207 208 Not Collective; the entire T is stored and returned independently on all processes 209 210 Input Parameter: 211 . A - the KAIJ matrix 212 213 Output Parameters: 214 + m - the number of rows in T 215 . n - the number of columns in T 216 - T - the T matrix, in form of a scalar array in column-major format 217 218 Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired) 219 220 Level: advanced 221 222 .seealso: MatCreateKAIJ(), MatGetBlockSizes() 223 @*/ 224 PetscErrorCode MatKAIJGetTRead(Mat A,PetscInt *m,PetscInt *n,const PetscScalar **T) 225 { 226 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 227 PetscFunctionBegin; 228 if (m) *m = b->p; 229 if (n) *n = b->q; 230 if (T) *T = b->T; 231 PetscFunctionReturn(0); 232 } 233 234 /*@C 235 MatKAIJRestoreT - Restore array obtained with MatKAIJGetT() 236 237 Not collective 238 239 Input Parameter: 240 . A - the KAIJ matrix 241 242 Output Parameter: 243 . T - location of pointer to array obtained with MatKAIJGetS() 244 245 Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored. 246 If NULL is passed, it will not attempt to zero the array pointer. 247 248 Level: advanced 249 .seealso: MatKAIJGetT(), MatKAIJGetTRead(), MatKAIJRestoreTRead() 250 @*/ 251 PetscErrorCode MatKAIJRestoreT(Mat A,PetscScalar **T) 252 { 253 PetscErrorCode ierr; 254 255 PetscFunctionBegin; 256 if (T) *T = NULL; 257 ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr); 258 PetscFunctionReturn(0); 259 } 260 261 /*@C 262 MatKAIJRestoreTRead - Restore array obtained with MatKAIJGetTRead() 263 264 Not collective 265 266 Input Parameter: 267 . A - the KAIJ matrix 268 269 Output Parameter: 270 . T - location of pointer to array obtained with MatKAIJGetS() 271 272 Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored. 273 If NULL is passed, it will not attempt to zero the array pointer. 274 275 Level: advanced 276 .seealso: MatKAIJGetT(), MatKAIJGetTRead(), MatKAIJRestoreTRead() 277 @*/ 278 PetscErrorCode MatKAIJRestoreTRead(Mat A,const PetscScalar **T) 279 { 280 PetscFunctionBegin; 281 if (T) *T = NULL; 282 PetscFunctionReturn(0); 283 } 284 285 /*@ 286 MatKAIJSetAIJ - Set the AIJ matrix describing the blockwise action of the KAIJ matrix 287 288 Logically Collective; if the AIJ matrix is parallel, the KAIJ matrix is also parallel 289 290 Input Parameters: 291 + A - the KAIJ matrix 292 - B - the AIJ matrix 293 294 Notes: 295 This function increases the reference count on the AIJ matrix, so the user is free to destroy the matrix if it is not needed. 296 Changes to the entries of the AIJ matrix will immediately affect the KAIJ matrix. 297 298 Level: advanced 299 300 .seealso: MatKAIJGetAIJ(), MatKAIJSetS(), MatKAIJSetT() 301 @*/ 302 PetscErrorCode MatKAIJSetAIJ(Mat A,Mat B) 303 { 304 PetscErrorCode ierr; 305 PetscMPIInt size; 306 PetscBool flg; 307 308 PetscFunctionBegin; 309 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr); 310 if (size == 1) { 311 ierr = PetscObjectTypeCompare((PetscObject)B,MATSEQAIJ,&flg);CHKERRQ(ierr); 312 if (!flg) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"MatKAIJSetAIJ() with MATSEQKAIJ does not support %s as the AIJ mat",((PetscObject)B)->type_name); 313 Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data; 314 a->AIJ = B; 315 } else { 316 Mat_MPIKAIJ *a = (Mat_MPIKAIJ*)A->data; 317 a->A = B; 318 } 319 ierr = PetscObjectReference((PetscObject)B);CHKERRQ(ierr); 320 PetscFunctionReturn(0); 321 } 322 323 /*@C 324 MatKAIJSetS - Set the S matrix describing the shift action of the KAIJ matrix 325 326 Logically Collective; the entire S is stored independently on all processes. 327 328 Input Parameters: 329 + A - the KAIJ matrix 330 . p - the number of rows in S 331 . q - the number of columns in S 332 - S - the S matrix, in form of a scalar array in column-major format 333 334 Notes: The dimensions p and q must match those of the transformation matrix T associated with the KAIJ matrix. 335 The S matrix is copied, so the user can destroy this array. 336 337 Level: Advanced 338 339 .seealso: MatKAIJGetS(), MatKAIJSetT(), MatKAIJSetAIJ() 340 @*/ 341 PetscErrorCode MatKAIJSetS(Mat A,PetscInt p,PetscInt q,const PetscScalar S[]) 342 { 343 PetscErrorCode ierr; 344 Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data; 345 346 PetscFunctionBegin; 347 ierr = PetscFree(a->S);CHKERRQ(ierr); 348 if (S) { 349 ierr = PetscMalloc1(p*q*sizeof(PetscScalar),&a->S);CHKERRQ(ierr); 350 ierr = PetscMemcpy(a->S,S,p*q*sizeof(PetscScalar));CHKERRQ(ierr); 351 } else a->S = NULL; 352 353 a->p = p; 354 a->q = q; 355 PetscFunctionReturn(0); 356 } 357 358 /*@C 359 MatKAIJGetScaledIdentity - Check if both S and T are scaled identities. 360 361 Logically Collective. 362 363 Input Parameter: 364 . A - the KAIJ matrix 365 366 Output Parameter: 367 . identity - the Boolean value 368 369 Level: Advanced 370 371 .seealso: MatKAIJGetS(), MatKAIJGetT() 372 @*/ 373 PetscErrorCode MatKAIJGetScaledIdentity(Mat A,PetscBool* identity) 374 { 375 Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data; 376 PetscInt i,j; 377 378 PetscFunctionBegin; 379 if (a->p != a->q) { 380 *identity = PETSC_FALSE; 381 PetscFunctionReturn(0); 382 } else *identity = PETSC_TRUE; 383 if (!a->isTI || a->S) { 384 for (i=0; i<a->p && *identity; i++) { 385 for (j=0; j<a->p && *identity; j++) { 386 if (i != j) { 387 if (a->S && PetscAbsScalar(a->S[i+j*a->p]) > PETSC_SMALL) *identity = PETSC_FALSE; 388 if (a->T && PetscAbsScalar(a->T[i+j*a->p]) > PETSC_SMALL) *identity = PETSC_FALSE; 389 } else { 390 if (a->S && PetscAbsScalar(a->S[i*(a->p+1)]-a->S[0]) > PETSC_SMALL) *identity = PETSC_FALSE; 391 if (a->T && PetscAbsScalar(a->T[i*(a->p+1)]-a->T[0]) > PETSC_SMALL) *identity = PETSC_FALSE; 392 } 393 } 394 } 395 } 396 PetscFunctionReturn(0); 397 } 398 399 /*@C 400 MatKAIJSetT - Set the transformation matrix T associated with the KAIJ matrix 401 402 Logically Collective; the entire T is stored independently on all processes. 403 404 Input Parameters: 405 + A - the KAIJ matrix 406 . p - the number of rows in S 407 . q - the number of columns in S 408 - T - the T matrix, in form of a scalar array in column-major format 409 410 Notes: The dimensions p and q must match those of the shift matrix S associated with the KAIJ matrix. 411 The T matrix is copied, so the user can destroy this array. 412 413 Level: Advanced 414 415 .seealso: MatKAIJGetT(), MatKAIJSetS(), MatKAIJSetAIJ() 416 @*/ 417 PetscErrorCode MatKAIJSetT(Mat A,PetscInt p,PetscInt q,const PetscScalar T[]) 418 { 419 PetscErrorCode ierr; 420 PetscInt i,j; 421 Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data; 422 PetscBool isTI = PETSC_FALSE; 423 424 PetscFunctionBegin; 425 /* check if T is an identity matrix */ 426 if (T && (p == q)) { 427 isTI = PETSC_TRUE; 428 for (i=0; i<p; i++) { 429 for (j=0; j<q; j++) { 430 if (i == j) { 431 /* diagonal term must be 1 */ 432 if (T[i+j*p] != 1.0) isTI = PETSC_FALSE; 433 } else { 434 /* off-diagonal term must be 0 */ 435 if (T[i+j*p] != 0.0) isTI = PETSC_FALSE; 436 } 437 } 438 } 439 } 440 a->isTI = isTI; 441 442 ierr = PetscFree(a->T);CHKERRQ(ierr); 443 if (T && (!isTI)) { 444 ierr = PetscMalloc1(p*q*sizeof(PetscScalar),&a->T);CHKERRQ(ierr); 445 ierr = PetscMemcpy(a->T,T,p*q*sizeof(PetscScalar));CHKERRQ(ierr); 446 } else a->T = NULL; 447 448 a->p = p; 449 a->q = q; 450 PetscFunctionReturn(0); 451 } 452 453 PetscErrorCode MatDestroy_SeqKAIJ(Mat A) 454 { 455 PetscErrorCode ierr; 456 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 457 458 PetscFunctionBegin; 459 ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr); 460 ierr = PetscFree(b->S);CHKERRQ(ierr); 461 ierr = PetscFree(b->T);CHKERRQ(ierr); 462 ierr = PetscFree(b->ibdiag);CHKERRQ(ierr); 463 ierr = PetscFree5(b->sor.w,b->sor.y,b->sor.work,b->sor.t,b->sor.arr);CHKERRQ(ierr); 464 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqkaij_seqaij_C",NULL);CHKERRQ(ierr); 465 ierr = PetscFree(A->data);CHKERRQ(ierr); 466 PetscFunctionReturn(0); 467 } 468 469 PETSC_INTERN PetscErrorCode MatKAIJ_build_AIJ_OAIJ(Mat A) 470 { 471 PetscErrorCode ierr; 472 Mat_MPIKAIJ *a; 473 Mat_MPIAIJ *mpiaij; 474 PetscScalar *T; 475 PetscInt i,j; 476 PetscObjectState state; 477 478 PetscFunctionBegin; 479 a = (Mat_MPIKAIJ*)A->data; 480 mpiaij = (Mat_MPIAIJ*)a->A->data; 481 482 ierr = PetscObjectStateGet((PetscObject)a->A,&state);CHKERRQ(ierr); 483 if (state == a->state) { 484 /* The existing AIJ and KAIJ members are up-to-date, so simply exit. */ 485 PetscFunctionReturn(0); 486 } else { 487 ierr = MatDestroy(&a->AIJ);CHKERRQ(ierr); 488 ierr = MatDestroy(&a->OAIJ);CHKERRQ(ierr); 489 if (a->isTI) { 490 /* If the transformation matrix associated with the parallel matrix A is the identity matrix, then a->T will be NULL. 491 * In this case, if we pass a->T directly to the MatCreateKAIJ() calls to create the sequential submatrices, the routine will 492 * not be able to tell that transformation matrix should be set to the identity; thus we create a temporary identity matrix 493 * to pass in. */ 494 ierr = PetscMalloc1(a->p*a->q*sizeof(PetscScalar),&T);CHKERRQ(ierr); 495 for (i=0; i<a->p; i++) { 496 for (j=0; j<a->q; j++) { 497 if (i==j) T[i+j*a->p] = 1.0; 498 else T[i+j*a->p] = 0.0; 499 } 500 } 501 } else T = a->T; 502 ierr = MatCreateKAIJ(mpiaij->A,a->p,a->q,a->S,T,&a->AIJ);CHKERRQ(ierr); 503 ierr = MatCreateKAIJ(mpiaij->B,a->p,a->q,NULL,T,&a->OAIJ);CHKERRQ(ierr); 504 if (a->isTI) { 505 ierr = PetscFree(T);CHKERRQ(ierr); 506 } 507 a->state = state; 508 } 509 510 PetscFunctionReturn(0); 511 } 512 513 PetscErrorCode MatSetUp_KAIJ(Mat A) 514 { 515 PetscErrorCode ierr; 516 PetscInt n; 517 PetscMPIInt size; 518 Mat_SeqKAIJ *seqkaij = (Mat_SeqKAIJ*)A->data; 519 520 PetscFunctionBegin; 521 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr); 522 if (size == 1) { 523 ierr = MatSetSizes(A,seqkaij->p*seqkaij->AIJ->rmap->n,seqkaij->q*seqkaij->AIJ->cmap->n,seqkaij->p*seqkaij->AIJ->rmap->N,seqkaij->q*seqkaij->AIJ->cmap->N);CHKERRQ(ierr); 524 ierr = PetscLayoutSetBlockSize(A->rmap,seqkaij->p);CHKERRQ(ierr); 525 ierr = PetscLayoutSetBlockSize(A->cmap,seqkaij->q);CHKERRQ(ierr); 526 ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr); 527 ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr); 528 } else { 529 Mat_MPIKAIJ *a; 530 Mat_MPIAIJ *mpiaij; 531 IS from,to; 532 Vec gvec; 533 534 a = (Mat_MPIKAIJ*)A->data; 535 mpiaij = (Mat_MPIAIJ*)a->A->data; 536 ierr = MatSetSizes(A,a->p*a->A->rmap->n,a->q*a->A->cmap->n,a->p*a->A->rmap->N,a->q*a->A->cmap->N);CHKERRQ(ierr); 537 ierr = PetscLayoutSetBlockSize(A->rmap,seqkaij->p);CHKERRQ(ierr); 538 ierr = PetscLayoutSetBlockSize(A->cmap,seqkaij->q);CHKERRQ(ierr); 539 ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr); 540 ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr); 541 542 ierr = MatKAIJ_build_AIJ_OAIJ(A);CHKERRQ(ierr); 543 544 ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr); 545 ierr = VecCreate(PETSC_COMM_SELF,&a->w);CHKERRQ(ierr); 546 ierr = VecSetSizes(a->w,n*a->q,n*a->q);CHKERRQ(ierr); 547 ierr = VecSetBlockSize(a->w,a->q);CHKERRQ(ierr); 548 ierr = VecSetType(a->w,VECSEQ);CHKERRQ(ierr); 549 550 /* create two temporary Index sets for build scatter gather */ 551 ierr = ISCreateBlock(PetscObjectComm((PetscObject)a->A),a->q,n,mpiaij->garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); 552 ierr = ISCreateStride(PETSC_COMM_SELF,n*a->q,0,1,&to);CHKERRQ(ierr); 553 554 /* create temporary global vector to generate scatter context */ 555 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)a->A),a->q,a->q*a->A->cmap->n,a->q*a->A->cmap->N,NULL,&gvec);CHKERRQ(ierr); 556 557 /* generate the scatter context */ 558 ierr = VecScatterCreate(gvec,from,a->w,to,&a->ctx);CHKERRQ(ierr); 559 560 ierr = ISDestroy(&from);CHKERRQ(ierr); 561 ierr = ISDestroy(&to);CHKERRQ(ierr); 562 ierr = VecDestroy(&gvec);CHKERRQ(ierr); 563 } 564 565 A->assembled = PETSC_TRUE; 566 PetscFunctionReturn(0); 567 } 568 569 PetscErrorCode MatView_KAIJ(Mat A,PetscViewer viewer) 570 { 571 PetscViewerFormat format; 572 Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data; 573 Mat B; 574 PetscInt i; 575 PetscErrorCode ierr; 576 PetscBool ismpikaij; 577 578 PetscFunctionBegin; 579 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIKAIJ,&ismpikaij);CHKERRQ(ierr); 580 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 581 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL || format == PETSC_VIEWER_ASCII_IMPL) { 582 ierr = PetscViewerASCIIPrintf(viewer,"S and T have %" PetscInt_FMT " rows and %" PetscInt_FMT " columns\n",a->p,a->q);CHKERRQ(ierr); 583 584 /* Print appropriate details for S. */ 585 if (!a->S) { 586 ierr = PetscViewerASCIIPrintf(viewer,"S is NULL\n");CHKERRQ(ierr); 587 } else if (format == PETSC_VIEWER_ASCII_IMPL) { 588 ierr = PetscViewerASCIIPrintf(viewer,"Entries of S are ");CHKERRQ(ierr); 589 for (i=0; i<(a->p * a->q); i++) { 590 #if defined(PETSC_USE_COMPLEX) 591 ierr = PetscViewerASCIIPrintf(viewer,"%18.16e %18.16e ",(double)PetscRealPart(a->S[i]),(double)PetscImaginaryPart(a->S[i]));CHKERRQ(ierr); 592 #else 593 ierr = PetscViewerASCIIPrintf(viewer,"%18.16e ",(double)PetscRealPart(a->S[i]));CHKERRQ(ierr); 594 #endif 595 } 596 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 597 } 598 599 /* Print appropriate details for T. */ 600 if (a->isTI) { 601 ierr = PetscViewerASCIIPrintf(viewer,"T is the identity matrix\n");CHKERRQ(ierr); 602 } else if (!a->T) { 603 ierr = PetscViewerASCIIPrintf(viewer,"T is NULL\n");CHKERRQ(ierr); 604 } else if (format == PETSC_VIEWER_ASCII_IMPL) { 605 ierr = PetscViewerASCIIPrintf(viewer,"Entries of T are ");CHKERRQ(ierr); 606 for (i=0; i<(a->p * a->q); i++) { 607 #if defined(PETSC_USE_COMPLEX) 608 ierr = PetscViewerASCIIPrintf(viewer,"%18.16e %18.16e ",(double)PetscRealPart(a->T[i]),(double)PetscImaginaryPart(a->T[i]));CHKERRQ(ierr); 609 #else 610 ierr = PetscViewerASCIIPrintf(viewer,"%18.16e ",(double)PetscRealPart(a->T[i]));CHKERRQ(ierr); 611 #endif 612 } 613 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 614 } 615 616 /* Now print details for the AIJ matrix, using the AIJ viewer. */ 617 ierr = PetscViewerASCIIPrintf(viewer,"Now viewing the associated AIJ matrix:\n");CHKERRQ(ierr); 618 if (ismpikaij) { 619 Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data; 620 ierr = MatView(b->A,viewer);CHKERRQ(ierr); 621 } else { 622 ierr = MatView(a->AIJ,viewer);CHKERRQ(ierr); 623 } 624 625 } else { 626 /* For all other matrix viewer output formats, simply convert to an AIJ matrix and call MatView() on that. */ 627 ierr = MatConvert(A,MATAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 628 ierr = MatView(B,viewer);CHKERRQ(ierr); 629 ierr = MatDestroy(&B);CHKERRQ(ierr); 630 } 631 PetscFunctionReturn(0); 632 } 633 634 PetscErrorCode MatDestroy_MPIKAIJ(Mat A) 635 { 636 PetscErrorCode ierr; 637 Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data; 638 639 PetscFunctionBegin; 640 ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr); 641 ierr = MatDestroy(&b->OAIJ);CHKERRQ(ierr); 642 ierr = MatDestroy(&b->A);CHKERRQ(ierr); 643 ierr = VecScatterDestroy(&b->ctx);CHKERRQ(ierr); 644 ierr = VecDestroy(&b->w);CHKERRQ(ierr); 645 ierr = PetscFree(b->S);CHKERRQ(ierr); 646 ierr = PetscFree(b->T);CHKERRQ(ierr); 647 ierr = PetscFree(b->ibdiag);CHKERRQ(ierr); 648 ierr = PetscObjectComposeFunction((PetscObject)A,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr); 649 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_mpikaij_mpiaij_C",NULL);CHKERRQ(ierr); 650 ierr = PetscFree(A->data);CHKERRQ(ierr); 651 PetscFunctionReturn(0); 652 } 653 654 /* --------------------------------------------------------------------------------------*/ 655 656 /* zz = yy + Axx */ 657 PetscErrorCode MatMultAdd_SeqKAIJ(Mat A,Vec xx,Vec yy,Vec zz) 658 { 659 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 660 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 661 const PetscScalar *s = b->S, *t = b->T; 662 const PetscScalar *x,*v,*bx; 663 PetscScalar *y,*sums; 664 PetscErrorCode ierr; 665 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 666 PetscInt n,i,jrow,j,l,p=b->p,q=b->q,k; 667 668 PetscFunctionBegin; 669 if (!yy) { 670 ierr = VecSet(zz,0.0);CHKERRQ(ierr); 671 } else { 672 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 673 } 674 if ((!s) && (!t) && (!b->isTI)) PetscFunctionReturn(0); 675 676 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 677 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 678 idx = a->j; 679 v = a->a; 680 ii = a->i; 681 682 if (b->isTI) { 683 for (i=0; i<m; i++) { 684 jrow = ii[i]; 685 n = ii[i+1] - jrow; 686 sums = y + p*i; 687 for (j=0; j<n; j++) { 688 for (k=0; k<p; k++) { 689 sums[k] += v[jrow+j]*x[q*idx[jrow+j]+k]; 690 } 691 } 692 } 693 ierr = PetscLogFlops(3.0*(a->nz)*p);CHKERRQ(ierr); 694 } else if (t) { 695 for (i=0; i<m; i++) { 696 jrow = ii[i]; 697 n = ii[i+1] - jrow; 698 sums = y + p*i; 699 for (j=0; j<n; j++) { 700 for (k=0; k<p; k++) { 701 for (l=0; l<q; l++) { 702 sums[k] += v[jrow+j]*t[k+l*p]*x[q*idx[jrow+j]+l]; 703 } 704 } 705 } 706 } 707 /* The flop count below assumes that v[jrow+j] is hoisted out (which an optimizing compiler is likely to do), 708 * and also that T part is hoisted outside this loop (in exchange for temporary storage) as (A \otimes I) (I \otimes T), 709 * so that this multiply doesn't have to be redone for each matrix entry, but just once per column. The latter 710 * transformation is much less likely to be applied, but we nonetheless count the minimum flops required. */ 711 ierr = PetscLogFlops((2.0*p*q-p)*m+2.0*p*a->nz);CHKERRQ(ierr); 712 } 713 if (s) { 714 for (i=0; i<m; i++) { 715 sums = y + p*i; 716 bx = x + q*i; 717 if (i < b->AIJ->cmap->n) { 718 for (j=0; j<q; j++) { 719 for (k=0; k<p; k++) { 720 sums[k] += s[k+j*p]*bx[j]; 721 } 722 } 723 } 724 } 725 ierr = PetscLogFlops(2.0*m*p*q);CHKERRQ(ierr); 726 } 727 728 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 729 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 730 PetscFunctionReturn(0); 731 } 732 733 PetscErrorCode MatMult_SeqKAIJ(Mat A,Vec xx,Vec yy) 734 { 735 PetscErrorCode ierr; 736 PetscFunctionBegin; 737 ierr = MatMultAdd_SeqKAIJ(A,xx,PETSC_NULL,yy);CHKERRQ(ierr); 738 PetscFunctionReturn(0); 739 } 740 741 #include <petsc/private/kernels/blockinvert.h> 742 743 PetscErrorCode MatInvertBlockDiagonal_SeqKAIJ(Mat A,const PetscScalar **values) 744 { 745 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 746 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 747 const PetscScalar *S = b->S; 748 const PetscScalar *T = b->T; 749 const PetscScalar *v = a->a; 750 const PetscInt p = b->p, q = b->q, m = b->AIJ->rmap->n, *idx = a->j, *ii = a->i; 751 PetscErrorCode ierr; 752 PetscInt i,j,*v_pivots,dof,dof2; 753 PetscScalar *diag,aval,*v_work; 754 755 PetscFunctionBegin; 756 if (p != q) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Block size must be square to calculate inverse."); 757 if ((!S) && (!T) && (!b->isTI)) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Cannot invert a zero matrix."); 758 759 dof = p; 760 dof2 = dof*dof; 761 762 if (b->ibdiagvalid) { 763 if (values) *values = b->ibdiag; 764 PetscFunctionReturn(0); 765 } 766 if (!b->ibdiag) { 767 ierr = PetscMalloc1(dof2*m*sizeof(PetscScalar),&b->ibdiag);CHKERRQ(ierr); 768 ierr = PetscLogObjectMemory((PetscObject)A,dof2*m*sizeof(PetscScalar));CHKERRQ(ierr); 769 } 770 if (values) *values = b->ibdiag; 771 diag = b->ibdiag; 772 773 ierr = PetscMalloc2(dof,&v_work,dof,&v_pivots);CHKERRQ(ierr); 774 for (i=0; i<m; i++) { 775 if (S) { 776 ierr = PetscMemcpy(diag,S,dof2*sizeof(PetscScalar));CHKERRQ(ierr); 777 } else { 778 ierr = PetscMemzero(diag,dof2*sizeof(PetscScalar));CHKERRQ(ierr); 779 } 780 if (b->isTI) { 781 aval = 0; 782 for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j]; 783 for (j=0; j<dof; j++) diag[j+dof*j] += aval; 784 } else if (T) { 785 aval = 0; 786 for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j]; 787 for (j=0; j<dof2; j++) diag[j] += aval*T[j]; 788 } 789 ierr = PetscKernel_A_gets_inverse_A(dof,diag,v_pivots,v_work,PETSC_FALSE,NULL);CHKERRQ(ierr); 790 diag += dof2; 791 } 792 ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr); 793 794 b->ibdiagvalid = PETSC_TRUE; 795 PetscFunctionReturn(0); 796 } 797 798 static PetscErrorCode MatGetDiagonalBlock_MPIKAIJ(Mat A,Mat *B) 799 { 800 Mat_MPIKAIJ *kaij = (Mat_MPIKAIJ*) A->data; 801 802 PetscFunctionBegin; 803 *B = kaij->AIJ; 804 PetscFunctionReturn(0); 805 } 806 807 PetscErrorCode MatSOR_SeqKAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 808 { 809 PetscErrorCode ierr; 810 Mat_SeqKAIJ *kaij = (Mat_SeqKAIJ*) A->data; 811 Mat_SeqAIJ *a = (Mat_SeqAIJ*)kaij->AIJ->data; 812 const PetscScalar *aa = a->a, *T = kaij->T, *v; 813 const PetscInt m = kaij->AIJ->rmap->n, *ai=a->i, *aj=a->j, p = kaij->p, q = kaij->q, *diag, *vi; 814 const PetscScalar *b, *xb, *idiag; 815 PetscScalar *x, *work, *workt, *w, *y, *arr, *t, *arrt; 816 PetscInt i, j, k, i2, bs, bs2, nz; 817 818 PetscFunctionBegin; 819 its = its*lits; 820 if (flag & SOR_EISENSTAT) SETERRQ (PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 821 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive",its,lits); 822 if (fshift) SETERRQ (PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for diagonal shift"); 823 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for applying upper or lower triangular parts"); 824 if (p != q) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSOR for KAIJ: No support for non-square dense blocks"); 825 else {bs = p; bs2 = bs*bs; } 826 827 if (!m) PetscFunctionReturn(0); 828 829 if (!kaij->ibdiagvalid) { ierr = MatInvertBlockDiagonal_SeqKAIJ(A,NULL);CHKERRQ(ierr); } 830 idiag = kaij->ibdiag; 831 diag = a->diag; 832 833 if (!kaij->sor.setup) { 834 ierr = PetscMalloc5(bs,&kaij->sor.w,bs,&kaij->sor.y,m*bs,&kaij->sor.work,m*bs,&kaij->sor.t,m*bs2,&kaij->sor.arr);CHKERRQ(ierr); 835 kaij->sor.setup = PETSC_TRUE; 836 } 837 y = kaij->sor.y; 838 w = kaij->sor.w; 839 work = kaij->sor.work; 840 t = kaij->sor.t; 841 arr = kaij->sor.arr; 842 843 ierr = VecGetArray(xx,&x); CHKERRQ(ierr); 844 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 845 846 if (flag & SOR_ZERO_INITIAL_GUESS) { 847 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 848 PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x); /* x[0:bs] <- D^{-1} b[0:bs] */ 849 ierr = PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr); 850 i2 = bs; 851 idiag += bs2; 852 for (i=1; i<m; i++) { 853 v = aa + ai[i]; 854 vi = aj + ai[i]; 855 nz = diag[i] - ai[i]; 856 857 if (T) { /* b - T (Arow * x) */ 858 ierr = PetscMemzero(w,bs*sizeof(PetscScalar));CHKERRQ(ierr); 859 for (j=0; j<nz; j++) { 860 for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k]; 861 } 862 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs,w,T,&t[i2]); 863 for (k=0; k<bs; k++) t[i2+k] += b[i2+k]; 864 } else if (kaij->isTI) { 865 ierr = PetscMemcpy(t+i2,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 866 for (j=0; j<nz; j++) { 867 for (k=0; k<bs; k++) t[i2+k] -= v[j] * x[vi[j]*bs+k]; 868 } 869 } else { 870 ierr = PetscMemcpy(t+i2,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 871 } 872 873 PetscKernel_w_gets_Ar_times_v(bs,bs,t+i2,idiag,y); 874 for (j=0; j<bs; j++) x[i2+j] = omega * y[j]; 875 876 idiag += bs2; 877 i2 += bs; 878 } 879 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 880 ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr); 881 xb = t; 882 } else xb = b; 883 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 884 idiag = kaij->ibdiag+bs2*(m-1); 885 i2 = bs * (m-1); 886 ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 887 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 888 i2 -= bs; 889 idiag -= bs2; 890 for (i=m-2; i>=0; i--) { 891 v = aa + diag[i] + 1 ; 892 vi = aj + diag[i] + 1; 893 nz = ai[i+1] - diag[i] - 1; 894 895 if (T) { /* FIXME: This branch untested */ 896 ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 897 /* copy all rows of x that are needed into contiguous space */ 898 workt = work; 899 for (j=0; j<nz; j++) { 900 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 901 workt += bs; 902 } 903 arrt = arr; 904 for (j=0; j<nz; j++) { 905 ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 906 for (k=0; k<bs2; k++) arrt[k] *= v[j]; 907 arrt += bs2; 908 } 909 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 910 } else if (kaij->isTI) { 911 ierr = PetscMemcpy(w,t+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 912 for (j=0; j<nz; j++) { 913 for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k]; 914 } 915 } 916 917 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); /* RHS incorrect for omega != 1.0 */ 918 for (j=0; j<bs; j++) x[i2+j] = (1.0-omega) * x[i2+j] + omega * y[j]; 919 920 idiag -= bs2; 921 i2 -= bs; 922 } 923 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 924 } 925 its--; 926 } 927 while (its--) { /* FIXME: This branch not updated */ 928 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 929 i2 = 0; 930 idiag = kaij->ibdiag; 931 for (i=0; i<m; i++) { 932 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 933 934 v = aa + ai[i]; 935 vi = aj + ai[i]; 936 nz = diag[i] - ai[i]; 937 workt = work; 938 for (j=0; j<nz; j++) { 939 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 940 workt += bs; 941 } 942 arrt = arr; 943 if (T) { 944 for (j=0; j<nz; j++) { 945 ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 946 for (k=0; k<bs2; k++) arrt[k] *= v[j]; 947 arrt += bs2; 948 } 949 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 950 } else if (kaij->isTI) { 951 for (j=0; j<nz; j++) { 952 ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 953 for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 954 arrt += bs2; 955 } 956 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 957 } 958 ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr); 959 960 v = aa + diag[i] + 1; 961 vi = aj + diag[i] + 1; 962 nz = ai[i+1] - diag[i] - 1; 963 workt = work; 964 for (j=0; j<nz; j++) { 965 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 966 workt += bs; 967 } 968 arrt = arr; 969 if (T) { 970 for (j=0; j<nz; j++) { 971 ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 972 for (k=0; k<bs2; k++) arrt[k] *= v[j]; 973 arrt += bs2; 974 } 975 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 976 } else if (kaij->isTI) { 977 for (j=0; j<nz; j++) { 978 ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 979 for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 980 arrt += bs2; 981 } 982 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 983 } 984 985 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); 986 for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j); 987 988 idiag += bs2; 989 i2 += bs; 990 } 991 xb = t; 992 } 993 else xb = b; 994 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 995 idiag = kaij->ibdiag+bs2*(m-1); 996 i2 = bs * (m-1); 997 if (xb == b) { 998 for (i=m-1; i>=0; i--) { 999 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 1000 1001 v = aa + ai[i]; 1002 vi = aj + ai[i]; 1003 nz = diag[i] - ai[i]; 1004 workt = work; 1005 for (j=0; j<nz; j++) { 1006 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 1007 workt += bs; 1008 } 1009 arrt = arr; 1010 if (T) { 1011 for (j=0; j<nz; j++) { 1012 ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 1013 for (k=0; k<bs2; k++) arrt[k] *= v[j]; 1014 arrt += bs2; 1015 } 1016 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 1017 } else if (kaij->isTI) { 1018 for (j=0; j<nz; j++) { 1019 ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 1020 for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 1021 arrt += bs2; 1022 } 1023 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 1024 } 1025 1026 v = aa + diag[i] + 1; 1027 vi = aj + diag[i] + 1; 1028 nz = ai[i+1] - diag[i] - 1; 1029 workt = work; 1030 for (j=0; j<nz; j++) { 1031 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 1032 workt += bs; 1033 } 1034 arrt = arr; 1035 if (T) { 1036 for (j=0; j<nz; j++) { 1037 ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 1038 for (k=0; k<bs2; k++) arrt[k] *= v[j]; 1039 arrt += bs2; 1040 } 1041 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 1042 } else if (kaij->isTI) { 1043 for (j=0; j<nz; j++) { 1044 ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 1045 for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 1046 arrt += bs2; 1047 } 1048 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 1049 } 1050 1051 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); 1052 for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j); 1053 } 1054 } else { 1055 for (i=m-1; i>=0; i--) { 1056 ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 1057 v = aa + diag[i] + 1; 1058 vi = aj + diag[i] + 1; 1059 nz = ai[i+1] - diag[i] - 1; 1060 workt = work; 1061 for (j=0; j<nz; j++) { 1062 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 1063 workt += bs; 1064 } 1065 arrt = arr; 1066 if (T) { 1067 for (j=0; j<nz; j++) { 1068 ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 1069 for (k=0; k<bs2; k++) arrt[k] *= v[j]; 1070 arrt += bs2; 1071 } 1072 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 1073 } else if (kaij->isTI) { 1074 for (j=0; j<nz; j++) { 1075 ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 1076 for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 1077 arrt += bs2; 1078 } 1079 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 1080 } 1081 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); 1082 for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j); 1083 } 1084 } 1085 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 1086 } 1087 } 1088 1089 ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr); 1090 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1091 PetscFunctionReturn(0); 1092 } 1093 1094 /*===================================================================================*/ 1095 1096 PetscErrorCode MatMultAdd_MPIKAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1097 { 1098 Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data; 1099 PetscErrorCode ierr; 1100 1101 PetscFunctionBegin; 1102 if (!yy) { 1103 ierr = VecSet(zz,0.0);CHKERRQ(ierr); 1104 } else { 1105 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 1106 } 1107 ierr = MatKAIJ_build_AIJ_OAIJ(A);CHKERRQ(ierr); /* Ensure b->AIJ and b->OAIJ are up to date. */ 1108 /* start the scatter */ 1109 ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1110 ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,zz,zz);CHKERRQ(ierr); 1111 ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1112 ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr); 1113 PetscFunctionReturn(0); 1114 } 1115 1116 PetscErrorCode MatMult_MPIKAIJ(Mat A,Vec xx,Vec yy) 1117 { 1118 PetscErrorCode ierr; 1119 PetscFunctionBegin; 1120 ierr = MatMultAdd_MPIKAIJ(A,xx,PETSC_NULL,yy);CHKERRQ(ierr); 1121 PetscFunctionReturn(0); 1122 } 1123 1124 PetscErrorCode MatInvertBlockDiagonal_MPIKAIJ(Mat A,const PetscScalar **values) 1125 { 1126 Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data; 1127 PetscErrorCode ierr; 1128 1129 PetscFunctionBegin; 1130 ierr = MatKAIJ_build_AIJ_OAIJ(A);CHKERRQ(ierr); /* Ensure b->AIJ is up to date. */ 1131 ierr = (*b->AIJ->ops->invertblockdiagonal)(b->AIJ,values);CHKERRQ(ierr); 1132 PetscFunctionReturn(0); 1133 } 1134 1135 /* ----------------------------------------------------------------*/ 1136 1137 PetscErrorCode MatGetRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values) 1138 { 1139 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*) A->data; 1140 PetscErrorCode diag = PETSC_FALSE; 1141 PetscErrorCode ierr; 1142 PetscInt nzaij,nz,*colsaij,*idx,i,j,p=b->p,q=b->q,r=row/p,s=row%p,c; 1143 PetscScalar *vaij,*v,*S=b->S,*T=b->T; 1144 1145 PetscFunctionBegin; 1146 if (b->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1147 b->getrowactive = PETSC_TRUE; 1148 if (row < 0 || row >= A->rmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %" PetscInt_FMT " out of range",row); 1149 1150 if ((!S) && (!T) && (!b->isTI)) { 1151 if (ncols) *ncols = 0; 1152 if (cols) *cols = NULL; 1153 if (values) *values = NULL; 1154 PetscFunctionReturn(0); 1155 } 1156 1157 if (T || b->isTI) { 1158 ierr = MatGetRow_SeqAIJ(b->AIJ,r,&nzaij,&colsaij,&vaij);CHKERRQ(ierr); 1159 c = nzaij; 1160 for (i=0; i<nzaij; i++) { 1161 /* check if this row contains a diagonal entry */ 1162 if (colsaij[i] == r) { 1163 diag = PETSC_TRUE; 1164 c = i; 1165 } 1166 } 1167 } else nzaij = c = 0; 1168 1169 /* calculate size of row */ 1170 nz = 0; 1171 if (S) nz += q; 1172 if (T || b->isTI) nz += (diag && S ? (nzaij-1)*q : nzaij*q); 1173 1174 if (cols || values) { 1175 ierr = PetscMalloc2(nz,&idx,nz,&v);CHKERRQ(ierr); 1176 for (i=0; i<q; i++) { 1177 /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */ 1178 v[i] = 0.0; 1179 } 1180 if (b->isTI) { 1181 for (i=0; i<nzaij; i++) { 1182 for (j=0; j<q; j++) { 1183 idx[i*q+j] = colsaij[i]*q+j; 1184 v[i*q+j] = (j==s ? vaij[i] : 0); 1185 } 1186 } 1187 } else if (T) { 1188 for (i=0; i<nzaij; i++) { 1189 for (j=0; j<q; j++) { 1190 idx[i*q+j] = colsaij[i]*q+j; 1191 v[i*q+j] = vaij[i]*T[s+j*p]; 1192 } 1193 } 1194 } 1195 if (S) { 1196 for (j=0; j<q; j++) { 1197 idx[c*q+j] = r*q+j; 1198 v[c*q+j] += S[s+j*p]; 1199 } 1200 } 1201 } 1202 1203 if (ncols) *ncols = nz; 1204 if (cols) *cols = idx; 1205 if (values) *values = v; 1206 PetscFunctionReturn(0); 1207 } 1208 1209 PetscErrorCode MatRestoreRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1210 { 1211 PetscErrorCode ierr; 1212 1213 PetscFunctionBegin; 1214 if (nz) *nz = 0; 1215 ierr = PetscFree2(*idx,*v);CHKERRQ(ierr); 1216 ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE; 1217 PetscFunctionReturn(0); 1218 } 1219 1220 PetscErrorCode MatGetRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values) 1221 { 1222 Mat_MPIKAIJ *b = (Mat_MPIKAIJ*) A->data; 1223 Mat AIJ = b->A; 1224 PetscBool diag = PETSC_FALSE; 1225 Mat MatAIJ,MatOAIJ; 1226 PetscErrorCode ierr; 1227 const PetscInt rstart=A->rmap->rstart,rend=A->rmap->rend,p=b->p,q=b->q,*garray; 1228 PetscInt nz,*idx,ncolsaij = 0,ncolsoaij = 0,*colsaij,*colsoaij,r,s,c,i,j,lrow; 1229 PetscScalar *v,*vals,*ovals,*S=b->S,*T=b->T; 1230 1231 PetscFunctionBegin; 1232 ierr = MatKAIJ_build_AIJ_OAIJ(A);CHKERRQ(ierr); /* Ensure b->AIJ and b->OAIJ are up to date. */ 1233 MatAIJ = ((Mat_SeqKAIJ*)b->AIJ->data)->AIJ; 1234 MatOAIJ = ((Mat_SeqKAIJ*)b->OAIJ->data)->AIJ; 1235 if (b->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1236 b->getrowactive = PETSC_TRUE; 1237 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1238 lrow = row - rstart; 1239 1240 if ((!S) && (!T) && (!b->isTI)) { 1241 if (ncols) *ncols = 0; 1242 if (cols) *cols = NULL; 1243 if (values) *values = NULL; 1244 PetscFunctionReturn(0); 1245 } 1246 1247 r = lrow/p; 1248 s = lrow%p; 1249 1250 if (T || b->isTI) { 1251 ierr = MatMPIAIJGetSeqAIJ(AIJ,NULL,NULL,&garray);CHKERRQ(ierr); 1252 ierr = MatGetRow_SeqAIJ(MatAIJ,lrow/p,&ncolsaij,&colsaij,&vals);CHKERRQ(ierr); 1253 ierr = MatGetRow_SeqAIJ(MatOAIJ,lrow/p,&ncolsoaij,&colsoaij,&ovals);CHKERRQ(ierr); 1254 1255 c = ncolsaij + ncolsoaij; 1256 for (i=0; i<ncolsaij; i++) { 1257 /* check if this row contains a diagonal entry */ 1258 if (colsaij[i] == r) { 1259 diag = PETSC_TRUE; 1260 c = i; 1261 } 1262 } 1263 } else c = 0; 1264 1265 /* calculate size of row */ 1266 nz = 0; 1267 if (S) nz += q; 1268 if (T || b->isTI) nz += (diag && S ? (ncolsaij+ncolsoaij-1)*q : (ncolsaij+ncolsoaij)*q); 1269 1270 if (cols || values) { 1271 ierr = PetscMalloc2(nz,&idx,nz,&v);CHKERRQ(ierr); 1272 for (i=0; i<q; i++) { 1273 /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */ 1274 v[i] = 0.0; 1275 } 1276 if (b->isTI) { 1277 for (i=0; i<ncolsaij; i++) { 1278 for (j=0; j<q; j++) { 1279 idx[i*q+j] = (colsaij[i]+rstart/p)*q+j; 1280 v[i*q+j] = (j==s ? vals[i] : 0.0); 1281 } 1282 } 1283 for (i=0; i<ncolsoaij; i++) { 1284 for (j=0; j<q; j++) { 1285 idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j; 1286 v[(i+ncolsaij)*q+j] = (j==s ? ovals[i]: 0.0); 1287 } 1288 } 1289 } else if (T) { 1290 for (i=0; i<ncolsaij; i++) { 1291 for (j=0; j<q; j++) { 1292 idx[i*q+j] = (colsaij[i]+rstart/p)*q+j; 1293 v[i*q+j] = vals[i]*T[s+j*p]; 1294 } 1295 } 1296 for (i=0; i<ncolsoaij; i++) { 1297 for (j=0; j<q; j++) { 1298 idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j; 1299 v[(i+ncolsaij)*q+j] = ovals[i]*T[s+j*p]; 1300 } 1301 } 1302 } 1303 if (S) { 1304 for (j=0; j<q; j++) { 1305 idx[c*q+j] = (r+rstart/p)*q+j; 1306 v[c*q+j] += S[s+j*p]; 1307 } 1308 } 1309 } 1310 1311 if (ncols) *ncols = nz; 1312 if (cols) *cols = idx; 1313 if (values) *values = v; 1314 PetscFunctionReturn(0); 1315 } 1316 1317 PetscErrorCode MatRestoreRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1318 { 1319 PetscErrorCode ierr; 1320 PetscFunctionBegin; 1321 ierr = PetscFree2(*idx,*v);CHKERRQ(ierr); 1322 ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE; 1323 PetscFunctionReturn(0); 1324 } 1325 1326 PetscErrorCode MatCreateSubMatrix_KAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat) 1327 { 1328 PetscErrorCode ierr; 1329 Mat A; 1330 1331 PetscFunctionBegin; 1332 ierr = MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr); 1333 ierr = MatCreateSubMatrix(A,isrow,iscol,cll,newmat);CHKERRQ(ierr); 1334 ierr = MatDestroy(&A);CHKERRQ(ierr); 1335 PetscFunctionReturn(0); 1336 } 1337 1338 /* ---------------------------------------------------------------------------------- */ 1339 /*@C 1340 MatCreateKAIJ - Creates a matrix type to be used for matrices of the following form: 1341 1342 [I \otimes S + A \otimes T] 1343 1344 where 1345 S is a dense (p \times q) matrix 1346 T is a dense (p \times q) matrix 1347 A is an AIJ (n \times n) matrix 1348 I is the identity matrix 1349 The resulting matrix is (np \times nq) 1350 1351 S and T are always stored independently on all processes as PetscScalar arrays in column-major format. 1352 1353 Collective 1354 1355 Input Parameters: 1356 + A - the AIJ matrix 1357 . p - number of rows in S and T 1358 . q - number of columns in S and T 1359 . S - the S matrix (can be PETSC_NULL), stored as a PetscScalar array (column-major) 1360 - T - the T matrix (can be PETSC_NULL), stored as a PetscScalar array (column-major) 1361 1362 Output Parameter: 1363 . kaij - the new KAIJ matrix 1364 1365 Notes: 1366 This function increases the reference count on the AIJ matrix, so the user is free to destroy the matrix if it is not needed. 1367 Changes to the entries of the AIJ matrix will immediately affect the KAIJ matrix. 1368 1369 Developer Notes: 1370 In the MATMPIKAIJ case, the internal 'AIJ' and 'OAIJ' sequential KAIJ matrices are kept up to date by tracking the object state 1371 of the AIJ matrix 'A' that describes the blockwise action of the MATMPIKAIJ matrix and, if the object state has changed, lazily 1372 rebuilding 'AIJ' and 'OAIJ' just before executing operations with the MATMPIKAIJ matrix. If new types of operations are added, 1373 routines implementing those must also ensure these are rebuilt when needed (by calling the internal MatKAIJ_build_AIJ_OAIJ() routine). 1374 1375 Level: advanced 1376 1377 .seealso: MatKAIJSetAIJ(), MatKAIJSetS(), MatKAIJSetT(), MatKAIJGetAIJ(), MatKAIJGetS(), MatKAIJGetT(), MATKAIJ 1378 @*/ 1379 PetscErrorCode MatCreateKAIJ(Mat A,PetscInt p,PetscInt q,const PetscScalar S[],const PetscScalar T[],Mat *kaij) 1380 { 1381 PetscErrorCode ierr; 1382 1383 PetscFunctionBegin; 1384 ierr = MatCreate(PetscObjectComm((PetscObject)A),kaij);CHKERRQ(ierr); 1385 ierr = MatSetType(*kaij,MATKAIJ);CHKERRQ(ierr); 1386 ierr = MatKAIJSetAIJ(*kaij,A);CHKERRQ(ierr); 1387 ierr = MatKAIJSetS(*kaij,p,q,S);CHKERRQ(ierr); 1388 ierr = MatKAIJSetT(*kaij,p,q,T);CHKERRQ(ierr); 1389 ierr = MatSetUp(*kaij);CHKERRQ(ierr); 1390 PetscFunctionReturn(0); 1391 } 1392 1393 /*MC 1394 MATKAIJ - MATKAIJ = "kaij" - A matrix type to be used to evaluate matrices of form 1395 [I \otimes S + A \otimes T], 1396 where 1397 S is a dense (p \times q) matrix, 1398 T is a dense (p \times q) matrix, 1399 A is an AIJ (n \times n) matrix, 1400 and I is the identity matrix. 1401 The resulting matrix is (np \times nq). 1402 1403 S and T are always stored independently on all processes as PetscScalar arrays in column-major format. 1404 1405 Notes: 1406 A linear system with multiple right-hand sides, AX = B, can be expressed in the KAIJ-friendly form of (A \otimes I) x = b, 1407 where x and b are column vectors containing the row-major representations of X and B. 1408 1409 Level: advanced 1410 1411 .seealso: MatKAIJSetAIJ(), MatKAIJSetS(), MatKAIJSetT(), MatKAIJGetAIJ(), MatKAIJGetS(), MatKAIJGetT(), MatCreateKAIJ() 1412 M*/ 1413 1414 PETSC_EXTERN PetscErrorCode MatCreate_KAIJ(Mat A) 1415 { 1416 PetscErrorCode ierr; 1417 Mat_MPIKAIJ *b; 1418 PetscMPIInt size; 1419 1420 PetscFunctionBegin; 1421 ierr = PetscNewLog(A,&b);CHKERRQ(ierr); 1422 A->data = (void*)b; 1423 1424 ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1425 1426 b->w = NULL; 1427 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr); 1428 if (size == 1) { 1429 ierr = PetscObjectChangeTypeName((PetscObject)A,MATSEQKAIJ);CHKERRQ(ierr); 1430 A->ops->destroy = MatDestroy_SeqKAIJ; 1431 A->ops->mult = MatMult_SeqKAIJ; 1432 A->ops->multadd = MatMultAdd_SeqKAIJ; 1433 A->ops->invertblockdiagonal = MatInvertBlockDiagonal_SeqKAIJ; 1434 A->ops->getrow = MatGetRow_SeqKAIJ; 1435 A->ops->restorerow = MatRestoreRow_SeqKAIJ; 1436 A->ops->sor = MatSOR_SeqKAIJ; 1437 } else { 1438 ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIKAIJ);CHKERRQ(ierr); 1439 A->ops->destroy = MatDestroy_MPIKAIJ; 1440 A->ops->mult = MatMult_MPIKAIJ; 1441 A->ops->multadd = MatMultAdd_MPIKAIJ; 1442 A->ops->invertblockdiagonal = MatInvertBlockDiagonal_MPIKAIJ; 1443 A->ops->getrow = MatGetRow_MPIKAIJ; 1444 A->ops->restorerow = MatRestoreRow_MPIKAIJ; 1445 ierr = PetscObjectComposeFunction((PetscObject)A,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIKAIJ);CHKERRQ(ierr); 1446 } 1447 A->ops->setup = MatSetUp_KAIJ; 1448 A->ops->view = MatView_KAIJ; 1449 A->ops->createsubmatrix = MatCreateSubMatrix_KAIJ; 1450 PetscFunctionReturn(0); 1451 } 1452