1 2 /* 3 Defines the basic matrix operations for the KAIJ matrix storage format. 4 This format is used to evaluate matrices of the form: 5 6 [I \otimes S + A \otimes T] 7 8 where 9 S is a dense (p \times q) matrix 10 T is a dense (p \times q) matrix 11 A is an AIJ (n \times n) matrix 12 I is the identity matrix 13 14 The resulting matrix is (np \times nq) 15 16 We provide: 17 MatMult() 18 MatMultAdd() 19 MatInvertBlockDiagonal() 20 and 21 MatCreateKAIJ(Mat,PetscInt,PetscInt,const PetscScalar[],const PetscScalar[],Mat*) 22 23 This single directory handles both the sequential and parallel codes 24 */ 25 26 #include <../src/mat/impls/kaij/kaij.h> /*I "petscmat.h" I*/ 27 #include <../src/mat/utils/freespace.h> 28 #include <petsc/private/vecimpl.h> 29 30 /*@C 31 MatKAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the KAIJ matrix 32 33 Not Collective, but if the KAIJ matrix is parallel, the AIJ matrix is also parallel 34 35 Input Parameter: 36 . A - the KAIJ matrix 37 38 Output Parameter: 39 . B - the AIJ matrix 40 41 Level: advanced 42 43 Notes: The reference count on the AIJ matrix is not increased so you should not destroy it. 44 45 .seealso: MatCreateKAIJ() 46 @*/ 47 PetscErrorCode MatKAIJGetAIJ(Mat A,Mat *B) 48 { 49 PetscErrorCode ierr; 50 PetscBool ismpikaij,isseqkaij; 51 52 PetscFunctionBegin; 53 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIKAIJ,&ismpikaij);CHKERRQ(ierr); 54 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQKAIJ,&isseqkaij);CHKERRQ(ierr); 55 if (ismpikaij) { 56 Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data; 57 58 *B = b->A; 59 } else if (isseqkaij) { 60 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 61 62 *B = b->AIJ; 63 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix passed in is not of type KAIJ"); 64 PetscFunctionReturn(0); 65 } 66 67 /*@C 68 MatKAIJGetS - Get the S matrix describing the shift action of the KAIJ matrix 69 70 Not Collective; the entire S is stored and returned independently on all processes. 71 72 Input Parameter: 73 . A - the KAIJ matrix 74 75 Output Parameters: 76 + m - the number of rows in S 77 . n - the number of columns in S 78 - S - the S matrix, in form of a scalar array in column-major format 79 80 Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired) 81 82 Level: advanced 83 84 .seealso: MatCreateKAIJ(), MatGetBlockSizes() 85 @*/ 86 PetscErrorCode MatKAIJGetS(Mat A,PetscInt *m,PetscInt *n,PetscScalar **S) 87 { 88 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 89 PetscFunctionBegin; 90 if (m) *m = b->p; 91 if (n) *n = b->q; 92 if (S) *S = b->S; 93 PetscFunctionReturn(0); 94 } 95 96 /*@C 97 MatKAIJGetSRead - Get a read-only pointer to the S matrix describing the shift action of the KAIJ matrix 98 99 Not Collective; the entire S is stored and returned independently on all processes. 100 101 Input Parameter: 102 . A - the KAIJ matrix 103 104 Output Parameters: 105 + m - the number of rows in S 106 . n - the number of columns in S 107 - S - the S matrix, in form of a scalar array in column-major format 108 109 Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired) 110 111 Level: advanced 112 113 .seealso: MatCreateKAIJ(), MatGetBlockSizes() 114 @*/ 115 PetscErrorCode MatKAIJGetSRead(Mat A,PetscInt *m,PetscInt *n,const PetscScalar **S) 116 { 117 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 118 PetscFunctionBegin; 119 if (m) *m = b->p; 120 if (n) *n = b->q; 121 if (S) *S = b->S; 122 PetscFunctionReturn(0); 123 } 124 125 /*@C 126 MatKAIJRestoreS - Restore array obtained with MatKAIJGetS() 127 128 Not collective 129 130 Input Parameter: 131 . A - the KAIJ matrix 132 133 Output Parameter: 134 . S - location of pointer to array obtained with MatKAIJGetS() 135 136 Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored. 137 If NULL is passed, it will not attempt to zero the array pointer. 138 139 Level: advanced 140 .seealso: MatKAIJGetS(), MatKAIJGetSRead(), MatKAIJRestoreSRead() 141 @*/ 142 PetscErrorCode MatKAIJRestoreS(Mat A,PetscScalar **S) 143 { 144 PetscErrorCode ierr; 145 146 PetscFunctionBegin; 147 if (S) *S = NULL; 148 ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr); 149 PetscFunctionReturn(0); 150 } 151 152 /*@C 153 MatKAIJRestoreSRead - Restore array obtained with MatKAIJGetSRead() 154 155 Not collective 156 157 Input Parameter: 158 . A - the KAIJ matrix 159 160 Output Parameter: 161 . S - location of pointer to array obtained with MatKAIJGetS() 162 163 Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored. 164 If NULL is passed, it will not attempt to zero the array pointer. 165 166 Level: advanced 167 .seealso: MatKAIJGetS(), MatKAIJGetSRead(), MatKAIJRestoreSRead() 168 @*/ 169 PetscErrorCode MatKAIJRestoreSRead(Mat A,const PetscScalar **S) 170 { 171 PetscFunctionBegin; 172 if (S) *S = NULL; 173 PetscFunctionReturn(0); 174 } 175 176 /*@C 177 MatKAIJGetT - Get the transformation matrix T associated with the KAIJ matrix 178 179 Not Collective; the entire T is stored and returned independently on all processes 180 181 Input Parameter: 182 . A - the KAIJ matrix 183 184 Output Parameter: 185 + m - the number of rows in T 186 . n - the number of columns in T 187 - T - the T matrix, in form of a scalar array in column-major format 188 189 Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired) 190 191 Level: advanced 192 193 .seealso: MatCreateKAIJ(), MatGetBlockSizes() 194 @*/ 195 PetscErrorCode MatKAIJGetT(Mat A,PetscInt *m,PetscInt *n,PetscScalar **T) 196 { 197 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 198 PetscFunctionBegin; 199 if (m) *m = b->p; 200 if (n) *n = b->q; 201 if (T) *T = b->T; 202 PetscFunctionReturn(0); 203 } 204 205 /*@C 206 MatKAIJGetTRead - Get a read-only pointer to the transformation matrix T associated with the KAIJ matrix 207 208 Not Collective; the entire T is stored and returned independently on all processes 209 210 Input Parameter: 211 . A - the KAIJ matrix 212 213 Output Parameter: 214 + m - the number of rows in T 215 . n - the number of columns in T 216 - T - the T matrix, in form of a scalar array in column-major format 217 218 Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired) 219 220 Level: advanced 221 222 .seealso: MatCreateKAIJ(), MatGetBlockSizes() 223 @*/ 224 PetscErrorCode MatKAIJGetTRead(Mat A,PetscInt *m,PetscInt *n,const PetscScalar **T) 225 { 226 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 227 PetscFunctionBegin; 228 if (m) *m = b->p; 229 if (n) *n = b->q; 230 if (T) *T = b->T; 231 PetscFunctionReturn(0); 232 } 233 234 /*@C 235 MatKAIJRestoreT - Restore array obtained with MatKAIJGetT() 236 237 Not collective 238 239 Input Parameter: 240 . A - the KAIJ matrix 241 242 Output Parameter: 243 . T - location of pointer to array obtained with MatKAIJGetS() 244 245 Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored. 246 If NULL is passed, it will not attempt to zero the array pointer. 247 248 Level: advanced 249 .seealso: MatKAIJGetT(), MatKAIJGetTRead(), MatKAIJRestoreTRead() 250 @*/ 251 PetscErrorCode MatKAIJRestoreT(Mat A,PetscScalar **T) 252 { 253 PetscErrorCode ierr; 254 255 PetscFunctionBegin; 256 if (T) *T = NULL; 257 ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr); 258 PetscFunctionReturn(0); 259 } 260 261 /*@C 262 MatKAIJRestoreTRead - Restore array obtained with MatKAIJGetTRead() 263 264 Not collective 265 266 Input Parameter: 267 . A - the KAIJ matrix 268 269 Output Parameter: 270 . T - location of pointer to array obtained with MatKAIJGetS() 271 272 Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored. 273 If NULL is passed, it will not attempt to zero the array pointer. 274 275 Level: advanced 276 .seealso: MatKAIJGetT(), MatKAIJGetTRead(), MatKAIJRestoreTRead() 277 @*/ 278 PetscErrorCode MatKAIJRestoreTRead(Mat A,const PetscScalar **T) 279 { 280 PetscFunctionBegin; 281 if (T) *T = NULL; 282 PetscFunctionReturn(0); 283 } 284 285 /*@ 286 MatKAIJSetAIJ - Set the AIJ matrix describing the blockwise action of the KAIJ matrix 287 288 Logically Collective; if the AIJ matrix is parallel, the KAIJ matrix is also parallel 289 290 Input Parameters: 291 + A - the KAIJ matrix 292 - B - the AIJ matrix 293 294 Notes: 295 This function increases the reference count on the AIJ matrix, so the user is free to destroy the matrix if it is not needed. 296 Changes to the entries of the AIJ matrix will immediately affect the KAIJ matrix. 297 298 Level: advanced 299 300 .seealso: MatKAIJGetAIJ(), MatKAIJSetS(), MatKAIJSetT() 301 @*/ 302 PetscErrorCode MatKAIJSetAIJ(Mat A,Mat B) 303 { 304 PetscErrorCode ierr; 305 PetscMPIInt size; 306 307 PetscFunctionBegin; 308 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 309 if (size == 1) { 310 Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data; 311 a->AIJ = B; 312 } else { 313 Mat_MPIKAIJ *a = (Mat_MPIKAIJ*)A->data; 314 a->A = B; 315 } 316 ierr = PetscObjectReference((PetscObject)B);CHKERRQ(ierr); 317 PetscFunctionReturn(0); 318 } 319 320 /*@C 321 MatKAIJSetS - Set the S matrix describing the shift action of the KAIJ matrix 322 323 Logically Collective; the entire S is stored independently on all processes. 324 325 Input Parameters: 326 + A - the KAIJ matrix 327 . p - the number of rows in S 328 . q - the number of columns in S 329 - S - the S matrix, in form of a scalar array in column-major format 330 331 Notes: The dimensions p and q must match those of the transformation matrix T associated with the KAIJ matrix. 332 The S matrix is copied, so the user can destroy this array. 333 334 Level: Advanced 335 336 .seealso: MatKAIJGetS(), MatKAIJSetT(), MatKAIJSetAIJ() 337 @*/ 338 PetscErrorCode MatKAIJSetS(Mat A,PetscInt p,PetscInt q,const PetscScalar S[]) 339 { 340 PetscErrorCode ierr; 341 Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data; 342 343 PetscFunctionBegin; 344 ierr = PetscFree(a->S);CHKERRQ(ierr); 345 if (S) { 346 ierr = PetscMalloc1(p*q*sizeof(PetscScalar),&a->S);CHKERRQ(ierr); 347 ierr = PetscMemcpy(a->S,S,p*q*sizeof(PetscScalar));CHKERRQ(ierr); 348 } else a->S = NULL; 349 350 a->p = p; 351 a->q = q; 352 PetscFunctionReturn(0); 353 } 354 355 /*@C 356 MatKAIJSetT - Set the transformation matrix T associated with the KAIJ matrix 357 358 Logically Collective; the entire T is stored independently on all processes. 359 360 Input Parameters: 361 + A - the KAIJ matrix 362 . p - the number of rows in S 363 . q - the number of columns in S 364 - T - the T matrix, in form of a scalar array in column-major format 365 366 Notes: The dimensions p and q must match those of the shift matrix S associated with the KAIJ matrix. 367 The T matrix is copied, so the user can destroy this array. 368 369 Level: Advanced 370 371 .seealso: MatKAIJGetT(), MatKAIJSetS(), MatKAIJSetAIJ() 372 @*/ 373 PetscErrorCode MatKAIJSetT(Mat A,PetscInt p,PetscInt q,const PetscScalar T[]) 374 { 375 PetscErrorCode ierr; 376 PetscInt i,j; 377 Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data; 378 PetscBool isTI = PETSC_FALSE; 379 380 PetscFunctionBegin; 381 /* check if T is an identity matrix */ 382 if (T && (p == q)) { 383 isTI = PETSC_TRUE; 384 for (i=0; i<p; i++) { 385 for (j=0; j<q; j++) { 386 if (i == j) { 387 /* diagonal term must be 1 */ 388 if (T[i+j*p] != 1.0) isTI = PETSC_FALSE; 389 } else { 390 /* off-diagonal term must be 0 */ 391 if (T[i+j*p] != 0.0) isTI = PETSC_FALSE; 392 } 393 } 394 } 395 } 396 a->isTI = isTI; 397 398 ierr = PetscFree(a->T);CHKERRQ(ierr); 399 if (T && (!isTI)) { 400 ierr = PetscMalloc1(p*q*sizeof(PetscScalar),&a->T);CHKERRQ(ierr); 401 ierr = PetscMemcpy(a->T,T,p*q*sizeof(PetscScalar));CHKERRQ(ierr); 402 } else a->T = NULL; 403 404 a->p = p; 405 a->q = q; 406 PetscFunctionReturn(0); 407 } 408 409 PetscErrorCode MatDestroy_SeqKAIJ(Mat A) 410 { 411 PetscErrorCode ierr; 412 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 413 414 PetscFunctionBegin; 415 ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr); 416 ierr = PetscFree(b->S);CHKERRQ(ierr); 417 ierr = PetscFree(b->T);CHKERRQ(ierr); 418 ierr = PetscFree(b->ibdiag);CHKERRQ(ierr); 419 ierr = PetscFree5(b->sor.w,b->sor.y,b->sor.work,b->sor.t,b->sor.arr);CHKERRQ(ierr); 420 ierr = PetscFree(A->data);CHKERRQ(ierr); 421 PetscFunctionReturn(0); 422 } 423 424 PetscErrorCode MatSetUp_KAIJ(Mat A) 425 { 426 PetscErrorCode ierr; 427 PetscInt n; 428 PetscMPIInt size; 429 Mat_SeqKAIJ *seqkaij = (Mat_SeqKAIJ*)A->data; 430 431 PetscFunctionBegin; 432 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 433 if (size == 1) { 434 ierr = MatSetSizes(A,seqkaij->p*seqkaij->AIJ->rmap->n,seqkaij->q*seqkaij->AIJ->cmap->n,seqkaij->p*seqkaij->AIJ->rmap->N,seqkaij->q*seqkaij->AIJ->cmap->N);CHKERRQ(ierr); 435 ierr = PetscLayoutSetBlockSize(A->rmap,seqkaij->p);CHKERRQ(ierr); 436 ierr = PetscLayoutSetBlockSize(A->cmap,seqkaij->q);CHKERRQ(ierr); 437 ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr); 438 ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr); 439 } else { 440 Mat_MPIKAIJ *a; 441 Mat_MPIAIJ *mpiaij; 442 IS from,to; 443 Vec gvec; 444 PetscScalar *T; 445 PetscInt i,j; 446 447 a = (Mat_MPIKAIJ*)A->data; 448 mpiaij = (Mat_MPIAIJ*)a->A->data; 449 ierr = MatSetSizes(A,a->p*a->A->rmap->n,a->q*a->A->cmap->n,a->p*a->A->rmap->N,a->q*a->A->cmap->N);CHKERRQ(ierr); 450 ierr = PetscLayoutSetBlockSize(A->rmap,seqkaij->p);CHKERRQ(ierr); 451 ierr = PetscLayoutSetBlockSize(A->cmap,seqkaij->q);CHKERRQ(ierr); 452 ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr); 453 ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr); 454 455 if (a->isTI) { 456 /* If the transformation matrix associated with the parallel matrix A is the identity matrix, then a->T will be NULL. 457 * In this case, if we pass a->T directly to the MatCreateKAIJ() calls to create the sequential submatrices, the routine will 458 * not be able to tell that transformation matrix should be set to the identity; thus we create a temporary identity matrix 459 * to pass in. */ 460 ierr = PetscMalloc1(a->p*a->q*sizeof(PetscScalar),&T);CHKERRQ(ierr); 461 for (i=0; i<a->p; i++) { 462 for (j=0; j<a->q; j++) { 463 if (i==j) T[i+j*a->p] = 1.0; 464 else T[i+j*a->p] = 0.0; 465 } 466 } 467 } else T = a->T; 468 ierr = MatCreateKAIJ(mpiaij->A,a->p,a->q,a->S,T,&a->AIJ);CHKERRQ(ierr); 469 ierr = MatCreateKAIJ(mpiaij->B,a->p,a->q,NULL,T,&a->OAIJ);CHKERRQ(ierr); 470 if (a->isTI) { 471 ierr = PetscFree(T);CHKERRQ(ierr); 472 } 473 474 ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr); 475 ierr = VecCreate(PETSC_COMM_SELF,&a->w);CHKERRQ(ierr); 476 ierr = VecSetSizes(a->w,n*a->q,n*a->q);CHKERRQ(ierr); 477 ierr = VecSetBlockSize(a->w,a->q);CHKERRQ(ierr); 478 ierr = VecSetType(a->w,VECSEQ);CHKERRQ(ierr); 479 480 /* create two temporary Index sets for build scatter gather */ 481 ierr = ISCreateBlock(PetscObjectComm((PetscObject)a->A),a->q,n,mpiaij->garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); 482 ierr = ISCreateStride(PETSC_COMM_SELF,n*a->q,0,1,&to);CHKERRQ(ierr); 483 484 /* create temporary global vector to generate scatter context */ 485 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)a->A),a->q,a->q*a->A->cmap->n,a->q*a->A->cmap->N,NULL,&gvec);CHKERRQ(ierr); 486 487 /* generate the scatter context */ 488 ierr = VecScatterCreate(gvec,from,a->w,to,&a->ctx);CHKERRQ(ierr); 489 490 ierr = ISDestroy(&from);CHKERRQ(ierr); 491 ierr = ISDestroy(&to);CHKERRQ(ierr); 492 ierr = VecDestroy(&gvec);CHKERRQ(ierr); 493 } 494 495 A->assembled = PETSC_TRUE; 496 PetscFunctionReturn(0); 497 } 498 499 PetscErrorCode MatView_KAIJ(Mat A,PetscViewer viewer) 500 { 501 PetscViewerFormat format; 502 Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data; 503 Mat B; 504 PetscInt i; 505 PetscErrorCode ierr; 506 PetscBool ismpikaij; 507 508 PetscFunctionBegin; 509 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIKAIJ,&ismpikaij);CHKERRQ(ierr); 510 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 511 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL || format == PETSC_VIEWER_ASCII_IMPL) { 512 ierr = PetscViewerASCIIPrintf(viewer,"S and T have %D rows and %D columns\n",a->p,a->q);CHKERRQ(ierr); 513 514 /* Print appropriate details for S. */ 515 if (!a->S) { 516 ierr = PetscViewerASCIIPrintf(viewer,"S is NULL\n"); 517 } else if (format == PETSC_VIEWER_ASCII_IMPL) { 518 ierr = PetscViewerASCIIPrintf(viewer,"Entries of S are ");CHKERRQ(ierr); 519 for (i=0; i<(a->p * a->q); i++) { 520 #if defined(PETSC_USE_COMPLEX) 521 ierr = PetscViewerASCIIPrintf(viewer,"%18.16e %18.16e ",(double)PetscRealPart(a->S[i]),(double)PetscImaginaryPart(a->S[i]));CHKERRQ(ierr); 522 #else 523 ierr = PetscViewerASCIIPrintf(viewer,"%18.16e ",(double)PetscRealPart(a->S[i]));CHKERRQ(ierr); 524 #endif 525 } 526 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 527 } 528 529 /* Print appropriate details for T. */ 530 if (a->isTI) { 531 ierr = PetscViewerASCIIPrintf(viewer,"T is the identity matrix\n"); 532 } else if (!a->T) { 533 ierr = PetscViewerASCIIPrintf(viewer,"T is NULL\n"); 534 } else if (format == PETSC_VIEWER_ASCII_IMPL) { 535 ierr = PetscViewerASCIIPrintf(viewer,"Entries of T are ");CHKERRQ(ierr); 536 for (i=0; i<(a->p * a->q); i++) { 537 #if defined(PETSC_USE_COMPLEX) 538 ierr = PetscViewerASCIIPrintf(viewer,"%18.16e %18.16e ",(double)PetscRealPart(a->T[i]),(double)PetscImaginaryPart(a->T[i]));CHKERRQ(ierr); 539 #else 540 ierr = PetscViewerASCIIPrintf(viewer,"%18.16e ",(double)PetscRealPart(a->T[i]));CHKERRQ(ierr); 541 #endif 542 } 543 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 544 } 545 546 /* Now print details for the AIJ matrix, using the AIJ viewer. */ 547 ierr = PetscViewerASCIIPrintf(viewer,"Now viewing the associated AIJ matrix:\n");CHKERRQ(ierr); 548 if (ismpikaij) { 549 Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data; 550 ierr = MatView(b->A,viewer);CHKERRQ(ierr); 551 } else { 552 ierr = MatView(a->AIJ,viewer);CHKERRQ(ierr); 553 } 554 555 } else { 556 /* For all other matrix viewer output formats, simply convert to an AIJ matrix and call MatView() on that. */ 557 if (ismpikaij) { 558 ierr = MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 559 } else { 560 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 561 } 562 ierr = MatView(B,viewer);CHKERRQ(ierr); 563 ierr = MatDestroy(&B);CHKERRQ(ierr); 564 } 565 PetscFunctionReturn(0); 566 } 567 568 PetscErrorCode MatDestroy_MPIKAIJ(Mat A) 569 { 570 PetscErrorCode ierr; 571 Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data; 572 573 PetscFunctionBegin; 574 ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr); 575 ierr = MatDestroy(&b->OAIJ);CHKERRQ(ierr); 576 ierr = MatDestroy(&b->A);CHKERRQ(ierr); 577 ierr = VecScatterDestroy(&b->ctx);CHKERRQ(ierr); 578 ierr = VecDestroy(&b->w);CHKERRQ(ierr); 579 ierr = PetscFree(b->S);CHKERRQ(ierr); 580 ierr = PetscFree(b->T);CHKERRQ(ierr); 581 ierr = PetscFree(b->ibdiag);CHKERRQ(ierr); 582 ierr = PetscFree(A->data);CHKERRQ(ierr); 583 PetscFunctionReturn(0); 584 } 585 586 /* --------------------------------------------------------------------------------------*/ 587 588 /* zz = yy + Axx */ 589 PetscErrorCode MatMultAdd_SeqKAIJ(Mat A,Vec xx,Vec yy,Vec zz) 590 { 591 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 592 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 593 const PetscScalar *s = b->S, *t = b->T; 594 const PetscScalar *x,*v,*bx; 595 PetscScalar *y,*sums; 596 PetscErrorCode ierr; 597 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 598 PetscInt n,i,jrow,j,l,p=b->p,q=b->q,k; 599 600 PetscFunctionBegin; 601 if (!yy) { 602 ierr = VecSet(zz,0.0);CHKERRQ(ierr); 603 } else { 604 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 605 } 606 if ((!s) && (!t) && (!b->isTI)) PetscFunctionReturn(0); 607 608 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 609 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 610 idx = a->j; 611 v = a->a; 612 ii = a->i; 613 614 if (b->isTI) { 615 for (i=0; i<m; i++) { 616 jrow = ii[i]; 617 n = ii[i+1] - jrow; 618 sums = y + p*i; 619 for (j=0; j<n; j++) { 620 for (k=0; k<p; k++) { 621 sums[k] += v[jrow+j]*x[q*idx[jrow+j]+k]; 622 } 623 } 624 } 625 ierr = PetscLogFlops((a->nz)*3*p);CHKERRQ(ierr); 626 } else if (t) { 627 for (i=0; i<m; i++) { 628 jrow = ii[i]; 629 n = ii[i+1] - jrow; 630 sums = y + p*i; 631 bx = x + q*i; 632 for (j=0; j<n; j++) { 633 for (k=0; k<p; k++) { 634 for (l=0; l<q; l++) { 635 sums[k] += v[jrow+j]*t[k+l*p]*x[q*idx[jrow+j]+l]; 636 } 637 } 638 } 639 } 640 /* The flop count below assumes that v[jrow+j] is hoisted out (which an optimizing compiler is likely to do), 641 * and also that T part is hoisted outside this loop (in exchange for temporary storage) as (A \otimes I) (I \otimes T), 642 * so that this multiply doesn't have to be redone for each matrix entry, but just once per column. The latter 643 * transformation is much less likely to be applied, but we nonetheless count the minimum flops required. */ 644 ierr = PetscLogFlops((2.0*p*q-p)*m+2*p*a->nz);CHKERRQ(ierr); 645 } 646 if (s) { 647 for (i=0; i<m; i++) { 648 sums = y + p*i; 649 bx = x + q*i; 650 if (i < b->AIJ->cmap->n) { 651 for (j=0; j<q; j++) { 652 for (k=0; k<p; k++) { 653 sums[k] += s[k+j*p]*bx[j]; 654 } 655 } 656 } 657 } 658 ierr = PetscLogFlops(m*2*p*q);CHKERRQ(ierr); 659 } 660 661 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 662 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 663 PetscFunctionReturn(0); 664 } 665 666 PetscErrorCode MatMult_SeqKAIJ(Mat A,Vec xx,Vec yy) 667 { 668 PetscErrorCode ierr; 669 PetscFunctionBegin; 670 ierr = MatMultAdd_SeqKAIJ(A,xx,PETSC_NULL,yy);CHKERRQ(ierr); 671 PetscFunctionReturn(0); 672 } 673 674 #include <petsc/private/kernels/blockinvert.h> 675 676 PetscErrorCode MatInvertBlockDiagonal_SeqKAIJ(Mat A,const PetscScalar **values) 677 { 678 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 679 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 680 const PetscScalar *S = b->S; 681 const PetscScalar *T = b->T; 682 const PetscScalar *v = a->a; 683 const PetscInt p = b->p, q = b->q, m = b->AIJ->rmap->n, *idx = a->j, *ii = a->i; 684 PetscErrorCode ierr; 685 PetscInt i,j,*v_pivots,dof,dof2; 686 PetscScalar *diag,aval,*v_work; 687 688 PetscFunctionBegin; 689 if (p != q) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Block size must be square to calculate inverse."); 690 if ((!S) && (!T) && (!b->isTI)) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Cannot invert a zero matrix."); 691 692 dof = p; 693 dof2 = dof*dof; 694 695 if (b->ibdiagvalid) { 696 if (values) *values = b->ibdiag; 697 PetscFunctionReturn(0); 698 } 699 if (!b->ibdiag) { 700 ierr = PetscMalloc1(dof2*m*sizeof(PetscScalar),&b->ibdiag);CHKERRQ(ierr); 701 ierr = PetscLogObjectMemory((PetscObject)A,dof2*m*sizeof(PetscScalar));CHKERRQ(ierr); 702 } 703 if (values) *values = b->ibdiag; 704 diag = b->ibdiag; 705 706 ierr = PetscMalloc2(dof,&v_work,dof,&v_pivots);CHKERRQ(ierr); 707 for (i=0; i<m; i++) { 708 if (S) { 709 ierr = PetscMemcpy(diag,S,dof2*sizeof(PetscScalar));CHKERRQ(ierr); 710 } else { 711 ierr = PetscMemzero(diag,dof2*sizeof(PetscScalar));CHKERRQ(ierr); 712 } 713 if (b->isTI) { 714 aval = 0; 715 for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j]; 716 for (j=0; j<dof; j++) diag[j+dof*j] += aval; 717 } else if (T) { 718 aval = 0; 719 for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j]; 720 for (j=0; j<dof2; j++) diag[j] += aval*T[j]; 721 } 722 ierr = PetscKernel_A_gets_inverse_A(dof,diag,v_pivots,v_work,PETSC_FALSE,NULL);CHKERRQ(ierr); 723 diag += dof2; 724 } 725 ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr); 726 727 b->ibdiagvalid = PETSC_TRUE; 728 PetscFunctionReturn(0); 729 } 730 731 static PetscErrorCode MatGetDiagonalBlock_MPIKAIJ(Mat A,Mat *B) 732 { 733 Mat_MPIKAIJ *kaij = (Mat_MPIKAIJ*) A->data; 734 735 PetscFunctionBegin; 736 *B = kaij->AIJ; 737 PetscFunctionReturn(0); 738 } 739 740 PetscErrorCode MatSOR_SeqKAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 741 { 742 PetscErrorCode ierr; 743 Mat_SeqKAIJ *kaij = (Mat_SeqKAIJ*) A->data; 744 Mat_SeqAIJ *a = (Mat_SeqAIJ*)kaij->AIJ->data; 745 const PetscScalar *aa = a->a, *T = kaij->T, *v; 746 const PetscInt m = kaij->AIJ->rmap->n, *ai=a->i, *aj=a->j, p = kaij->p, q = kaij->q, *diag, *vi; 747 const PetscScalar *b, *xb, *idiag; 748 PetscScalar *x, *work, *workt, *w, *y, *arr, *t, *arrt; 749 PetscInt i, j, k, i2, bs, bs2, nz; 750 751 PetscFunctionBegin; 752 its = its*lits; 753 if (flag & SOR_EISENSTAT) SETERRQ (PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 754 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 755 if (fshift) SETERRQ (PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for diagonal shift"); 756 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for applying upper or lower triangular parts"); 757 if (p != q) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSOR for KAIJ: No support for non-square dense blocks"); 758 else {bs = p; bs2 = bs*bs; } 759 760 if (!m) PetscFunctionReturn(0); 761 762 if (!kaij->ibdiagvalid) { ierr = MatInvertBlockDiagonal_SeqKAIJ(A,NULL);CHKERRQ(ierr); } 763 idiag = kaij->ibdiag; 764 diag = a->diag; 765 766 if (!kaij->sor.setup) { 767 ierr = PetscMalloc5(bs,&kaij->sor.w,bs,&kaij->sor.y,m*bs,&kaij->sor.work,m*bs,&kaij->sor.t,m*bs2,&kaij->sor.arr);CHKERRQ(ierr); 768 kaij->sor.setup = PETSC_TRUE; 769 } 770 y = kaij->sor.y; 771 w = kaij->sor.w; 772 work = kaij->sor.work; 773 t = kaij->sor.t; 774 arr = kaij->sor.arr; 775 776 ierr = VecGetArray(xx,&x); CHKERRQ(ierr); 777 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 778 779 if (flag & SOR_ZERO_INITIAL_GUESS) { 780 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 781 PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x); /* x[0:bs] <- D^{-1} b[0:bs] */ 782 ierr = PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr); 783 i2 = bs; 784 idiag += bs2; 785 for (i=1; i<m; i++) { 786 v = aa + ai[i]; 787 vi = aj + ai[i]; 788 nz = diag[i] - ai[i]; 789 790 if (T) { /* b - T (Arow * x) */ 791 ierr = PetscMemzero(w,bs*sizeof(PetscScalar)); 792 for (j=0; j<nz; j++) { 793 for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k]; 794 } 795 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs,w,T,&t[i2]); 796 for (k=0; k<bs; k++) t[i2+k] += b[i2+k]; 797 } else if (kaij->isTI) { 798 ierr = PetscMemcpy(t+i2,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 799 for (j=0; j<nz; j++) { 800 for (k=0; k<bs; k++) t[i2+k] -= v[j] * x[vi[j]*bs+k]; 801 } 802 } else { 803 ierr = PetscMemcpy(t+i2,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 804 } 805 806 PetscKernel_w_gets_Ar_times_v(bs,bs,t+i2,idiag,y); 807 for (j=0; j<bs; j++) x[i2+j] = omega * y[j]; 808 809 idiag += bs2; 810 i2 += bs; 811 } 812 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 813 ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr); 814 xb = t; 815 } else xb = b; 816 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 817 idiag = kaij->ibdiag+bs2*(m-1); 818 i2 = bs * (m-1); 819 ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 820 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 821 i2 -= bs; 822 idiag -= bs2; 823 for (i=m-2; i>=0; i--) { 824 v = aa + diag[i] + 1 ; 825 vi = aj + diag[i] + 1; 826 nz = ai[i+1] - diag[i] - 1; 827 828 if (T) { /* FIXME: This branch untested */ 829 ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 830 /* copy all rows of x that are needed into contiguous space */ 831 workt = work; 832 for (j=0; j<nz; j++) { 833 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 834 workt += bs; 835 } 836 arrt = arr; 837 for (j=0; j<nz; j++) { 838 ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 839 for (k=0; k<bs2; k++) arrt[k] *= v[j]; 840 arrt += bs2; 841 } 842 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 843 } else if (kaij->isTI) { 844 ierr = PetscMemcpy(w,t+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 845 for (j=0; j<nz; j++) { 846 for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k]; 847 } 848 } 849 850 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); /* RHS incorrect for omega != 1.0 */ 851 for (j=0; j<bs; j++) x[i2+j] = (1.0-omega) * x[i2+j] + omega * y[j]; 852 853 idiag -= bs2; 854 i2 -= bs; 855 } 856 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 857 } 858 its--; 859 } 860 while (its--) { /* FIXME: This branch not updated */ 861 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 862 i2 = 0; 863 idiag = kaij->ibdiag; 864 for (i=0; i<m; i++) { 865 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 866 867 v = aa + ai[i]; 868 vi = aj + ai[i]; 869 nz = diag[i] - ai[i]; 870 workt = work; 871 for (j=0; j<nz; j++) { 872 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 873 workt += bs; 874 } 875 arrt = arr; 876 if (T) { 877 for (j=0; j<nz; j++) { 878 ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 879 for (k=0; k<bs2; k++) arrt[k] *= v[j]; 880 arrt += bs2; 881 } 882 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 883 } else if (kaij->isTI) { 884 for (j=0; j<nz; j++) { 885 ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 886 for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 887 arrt += bs2; 888 } 889 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 890 } 891 ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr); 892 893 v = aa + diag[i] + 1; 894 vi = aj + diag[i] + 1; 895 nz = ai[i+1] - diag[i] - 1; 896 workt = work; 897 for (j=0; j<nz; j++) { 898 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 899 workt += bs; 900 } 901 arrt = arr; 902 if (T) { 903 for (j=0; j<nz; j++) { 904 ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 905 for (k=0; k<bs2; k++) arrt[k] *= v[j]; 906 arrt += bs2; 907 } 908 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 909 } else if (kaij->isTI) { 910 for (j=0; j<nz; j++) { 911 ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 912 for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 913 arrt += bs2; 914 } 915 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 916 } 917 918 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); 919 for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j); 920 921 idiag += bs2; 922 i2 += bs; 923 } 924 xb = t; 925 } 926 else xb = b; 927 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 928 idiag = kaij->ibdiag+bs2*(m-1); 929 i2 = bs * (m-1); 930 if (xb == b) { 931 for (i=m-1; i>=0; i--) { 932 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 933 934 v = aa + ai[i]; 935 vi = aj + ai[i]; 936 nz = diag[i] - ai[i]; 937 workt = work; 938 for (j=0; j<nz; j++) { 939 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 940 workt += bs; 941 } 942 arrt = arr; 943 if (T) { 944 for (j=0; j<nz; j++) { 945 ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 946 for (k=0; k<bs2; k++) arrt[k] *= v[j]; 947 arrt += bs2; 948 } 949 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 950 } else if (kaij->isTI) { 951 for (j=0; j<nz; j++) { 952 ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 953 for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 954 arrt += bs2; 955 } 956 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 957 } 958 959 v = aa + diag[i] + 1; 960 vi = aj + diag[i] + 1; 961 nz = ai[i+1] - diag[i] - 1; 962 workt = work; 963 for (j=0; j<nz; j++) { 964 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 965 workt += bs; 966 } 967 arrt = arr; 968 if (T) { 969 for (j=0; j<nz; j++) { 970 ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 971 for (k=0; k<bs2; k++) arrt[k] *= v[j]; 972 arrt += bs2; 973 } 974 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 975 } else if (kaij->isTI) { 976 for (j=0; j<nz; j++) { 977 ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 978 for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 979 arrt += bs2; 980 } 981 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 982 } 983 984 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); 985 for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j); 986 } 987 } else { 988 for (i=m-1; i>=0; i--) { 989 ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 990 v = aa + diag[i] + 1; 991 vi = aj + diag[i] + 1; 992 nz = ai[i+1] - diag[i] - 1; 993 workt = work; 994 for (j=0; j<nz; j++) { 995 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 996 workt += bs; 997 } 998 arrt = arr; 999 if (T) { 1000 for (j=0; j<nz; j++) { 1001 ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 1002 for (k=0; k<bs2; k++) arrt[k] *= v[j]; 1003 arrt += bs2; 1004 } 1005 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 1006 } else if (kaij->isTI) { 1007 for (j=0; j<nz; j++) { 1008 ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 1009 for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 1010 arrt += bs2; 1011 } 1012 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 1013 } 1014 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); 1015 for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j); 1016 } 1017 idiag -= bs2; 1018 i2 -= bs; 1019 } 1020 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 1021 } 1022 } 1023 1024 ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr); 1025 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1026 PetscFunctionReturn(0); 1027 } 1028 1029 /*===================================================================================*/ 1030 1031 PetscErrorCode MatMultAdd_MPIKAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1032 { 1033 Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data; 1034 PetscErrorCode ierr; 1035 1036 PetscFunctionBegin; 1037 if (!yy) { 1038 ierr = VecSet(zz,0.0);CHKERRQ(ierr); 1039 } else { 1040 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 1041 } 1042 /* start the scatter */ 1043 ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1044 ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,zz,zz);CHKERRQ(ierr); 1045 ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1046 ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr); 1047 PetscFunctionReturn(0); 1048 } 1049 1050 PetscErrorCode MatMult_MPIKAIJ(Mat A,Vec xx,Vec yy) 1051 { 1052 PetscErrorCode ierr; 1053 PetscFunctionBegin; 1054 ierr = MatMultAdd_MPIKAIJ(A,xx,PETSC_NULL,yy);CHKERRQ(ierr); 1055 PetscFunctionReturn(0); 1056 } 1057 1058 PetscErrorCode MatInvertBlockDiagonal_MPIKAIJ(Mat A,const PetscScalar **values) 1059 { 1060 Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data; 1061 PetscErrorCode ierr; 1062 1063 PetscFunctionBegin; 1064 ierr = (*b->AIJ->ops->invertblockdiagonal)(b->AIJ,values);CHKERRQ(ierr); 1065 PetscFunctionReturn(0); 1066 } 1067 1068 /* ----------------------------------------------------------------*/ 1069 1070 PetscErrorCode MatGetRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values) 1071 { 1072 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*) A->data; 1073 PetscErrorCode diag = PETSC_FALSE; 1074 PetscErrorCode ierr; 1075 PetscInt nzaij,nz,*colsaij,*idx,i,j,p=b->p,q=b->q,r=row/p,s=row%p,c; 1076 PetscScalar *vaij,*v,*S=b->S,*T=b->T; 1077 1078 PetscFunctionBegin; 1079 if (b->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1080 b->getrowactive = PETSC_TRUE; 1081 if (row < 0 || row >= A->rmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range",row); 1082 1083 if ((!S) && (!T) && (!b->isTI)) { 1084 if (ncols) *ncols = 0; 1085 if (cols) *cols = NULL; 1086 if (values) *values = NULL; 1087 PetscFunctionReturn(0); 1088 } 1089 1090 if (T || b->isTI) { 1091 ierr = MatGetRow_SeqAIJ(b->AIJ,r,&nzaij,&colsaij,&vaij);CHKERRQ(ierr); 1092 c = nzaij; 1093 for (i=0; i<nzaij; i++) { 1094 /* check if this row contains a diagonal entry */ 1095 if (colsaij[i] == r) { 1096 diag = PETSC_TRUE; 1097 c = i; 1098 } 1099 } 1100 } else nzaij = c = 0; 1101 1102 /* calculate size of row */ 1103 nz = 0; 1104 if (S) nz += q; 1105 if (T || b->isTI) nz += (diag && S ? (nzaij-1)*q : nzaij*q); 1106 1107 if (cols || values) { 1108 ierr = PetscMalloc2(nz,&idx,nz,&v);CHKERRQ(ierr); 1109 for (i=0; i<q; i++) { 1110 /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */ 1111 v[i] = 0.0; 1112 } 1113 if (b->isTI) { 1114 for (i=0; i<nzaij; i++) { 1115 for (j=0; j<q; j++) { 1116 idx[i*q+j] = colsaij[i]*q+j; 1117 v[i*q+j] = (j==s ? vaij[i] : 0); 1118 } 1119 } 1120 } else if (T) { 1121 for (i=0; i<nzaij; i++) { 1122 for (j=0; j<q; j++) { 1123 idx[i*q+j] = colsaij[i]*q+j; 1124 v[i*q+j] = vaij[i]*T[s+j*p]; 1125 } 1126 } 1127 } 1128 if (S) { 1129 for (j=0; j<q; j++) { 1130 idx[c*q+j] = r*q+j; 1131 v[c*q+j] += S[s+j*p]; 1132 } 1133 } 1134 } 1135 1136 if (ncols) *ncols = nz; 1137 if (cols) *cols = idx; 1138 if (values) *values = v; 1139 PetscFunctionReturn(0); 1140 } 1141 1142 PetscErrorCode MatRestoreRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1143 { 1144 PetscErrorCode ierr; 1145 PetscFunctionBegin; 1146 ierr = PetscFree2(*idx,*v);CHKERRQ(ierr); 1147 ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE; 1148 PetscFunctionReturn(0); 1149 } 1150 1151 PetscErrorCode MatGetRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values) 1152 { 1153 Mat_MPIKAIJ *b = (Mat_MPIKAIJ*) A->data; 1154 Mat MatAIJ = ((Mat_SeqKAIJ*)b->AIJ->data)->AIJ; 1155 Mat MatOAIJ = ((Mat_SeqKAIJ*)b->OAIJ->data)->AIJ; 1156 Mat AIJ = b->A; 1157 PetscBool diag = PETSC_FALSE; 1158 PetscErrorCode ierr; 1159 const PetscInt rstart=A->rmap->rstart,rend=A->rmap->rend,p=b->p,q=b->q,*garray; 1160 PetscInt nz,*idx,ncolsaij,ncolsoaij,*colsaij,*colsoaij,r,s,c,i,j,lrow; 1161 PetscScalar *v,*vals,*ovals,*S=b->S,*T=b->T; 1162 1163 PetscFunctionBegin; 1164 if (b->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1165 b->getrowactive = PETSC_TRUE; 1166 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1167 lrow = row - rstart; 1168 1169 if ((!S) && (!T) && (!b->isTI)) { 1170 if (ncols) *ncols = 0; 1171 if (cols) *cols = NULL; 1172 if (values) *values = NULL; 1173 PetscFunctionReturn(0); 1174 } 1175 1176 r = lrow/p; 1177 s = lrow%p; 1178 1179 if (T || b->isTI) { 1180 ierr = MatMPIAIJGetSeqAIJ(AIJ,NULL,NULL,&garray); 1181 ierr = MatGetRow_SeqAIJ(MatAIJ,lrow/p,&ncolsaij,&colsaij,&vals);CHKERRQ(ierr); 1182 ierr = MatGetRow_SeqAIJ(MatOAIJ,lrow/p,&ncolsoaij,&colsoaij,&ovals);CHKERRQ(ierr); 1183 1184 c = ncolsaij + ncolsoaij; 1185 for (i=0; i<ncolsaij; i++) { 1186 /* check if this row contains a diagonal entry */ 1187 if (colsaij[i] == r) { 1188 diag = PETSC_TRUE; 1189 c = i; 1190 } 1191 } 1192 } else c = 0; 1193 1194 /* calculate size of row */ 1195 nz = 0; 1196 if (S) nz += q; 1197 if (T || b->isTI) nz += (diag && S ? (ncolsaij+ncolsoaij-1)*q : (ncolsaij+ncolsoaij)*q); 1198 1199 if (cols || values) { 1200 ierr = PetscMalloc2(nz,&idx,nz,&v);CHKERRQ(ierr); 1201 for (i=0; i<q; i++) { 1202 /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */ 1203 v[i] = 0.0; 1204 } 1205 if (b->isTI) { 1206 for (i=0; i<ncolsaij; i++) { 1207 for (j=0; j<q; j++) { 1208 idx[i*q+j] = (colsaij[i]+rstart/p)*q+j; 1209 v[i*q+j] = (j==s ? vals[i] : 0.0); 1210 } 1211 } 1212 for (i=0; i<ncolsoaij; i++) { 1213 for (j=0; j<q; j++) { 1214 idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j; 1215 v[(i+ncolsaij)*q+j] = (j==s ? ovals[i]: 0.0); 1216 } 1217 } 1218 } else if (T) { 1219 for (i=0; i<ncolsaij; i++) { 1220 for (j=0; j<q; j++) { 1221 idx[i*q+j] = (colsaij[i]+rstart/p)*q+j; 1222 v[i*q+j] = vals[i]*T[s+j*p]; 1223 } 1224 } 1225 for (i=0; i<ncolsoaij; i++) { 1226 for (j=0; j<q; j++) { 1227 idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j; 1228 v[(i+ncolsaij)*q+j] = ovals[i]*T[s+j*p]; 1229 } 1230 } 1231 } 1232 if (S) { 1233 for (j=0; j<q; j++) { 1234 idx[c*q+j] = (r+rstart/p)*q+j; 1235 v[c*q+j] += S[s+j*p]; 1236 } 1237 } 1238 } 1239 1240 if (ncols) *ncols = nz; 1241 if (cols) *cols = idx; 1242 if (values) *values = v; 1243 PetscFunctionReturn(0); 1244 } 1245 1246 PetscErrorCode MatRestoreRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1247 { 1248 PetscErrorCode ierr; 1249 PetscFunctionBegin; 1250 ierr = PetscFree2(*idx,*v);CHKERRQ(ierr); 1251 ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE; 1252 PetscFunctionReturn(0); 1253 } 1254 1255 PetscErrorCode MatCreateSubMatrix_KAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat) 1256 { 1257 PetscErrorCode ierr; 1258 Mat A; 1259 1260 PetscFunctionBegin; 1261 ierr = MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr); 1262 ierr = MatCreateSubMatrix(A,isrow,iscol,cll,newmat);CHKERRQ(ierr); 1263 ierr = MatDestroy(&A);CHKERRQ(ierr); 1264 PetscFunctionReturn(0); 1265 } 1266 1267 /* ---------------------------------------------------------------------------------- */ 1268 /*@C 1269 MatCreateKAIJ - Creates a matrix type to be used for matrices of the following form: 1270 1271 [I \otimes S + A \otimes T] 1272 1273 where 1274 S is a dense (p \times q) matrix 1275 T is a dense (p \times q) matrix 1276 A is an AIJ (n \times n) matrix 1277 I is the identity matrix 1278 The resulting matrix is (np \times nq) 1279 1280 S and T are always stored independently on all processes as PetscScalar arrays in column-major format. 1281 1282 Collective 1283 1284 Input Parameters: 1285 + A - the AIJ matrix 1286 . p - number of rows in S and T 1287 . q - number of columns in S and T 1288 . S - the S matrix (can be PETSC_NULL), stored as a PetscScalar array (column-major) 1289 - T - the T matrix (can be PETSC_NULL), stored as a PetscScalar array (column-major) 1290 1291 Output Parameter: 1292 . kaij - the new KAIJ matrix 1293 1294 Notes: 1295 This function increases the reference count on the AIJ matrix, so the user is free to destroy the matrix if it is not needed. 1296 Changes to the entries of the AIJ matrix will immediately affect the KAIJ matrix. 1297 1298 Level: advanced 1299 1300 .seealso: MatKAIJSetAIJ(), MatKAIJSetS(), MatKAIJSetT(), MatKAIJGetAIJ(), MatKAIJGetS(), MatKAIJGetT(), MATKAIJ 1301 @*/ 1302 PetscErrorCode MatCreateKAIJ(Mat A,PetscInt p,PetscInt q,const PetscScalar S[],const PetscScalar T[],Mat *kaij) 1303 { 1304 PetscErrorCode ierr; 1305 PetscMPIInt size; 1306 1307 PetscFunctionBegin; 1308 ierr = MatCreate(PetscObjectComm((PetscObject)A),kaij);CHKERRQ(ierr); 1309 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 1310 if (size == 1) { 1311 ierr = MatSetType(*kaij,MATSEQKAIJ);CHKERRQ(ierr); 1312 } else { 1313 ierr = MatSetType(*kaij,MATMPIKAIJ);CHKERRQ(ierr); 1314 } 1315 ierr = MatKAIJSetAIJ(*kaij,A);CHKERRQ(ierr); 1316 ierr = MatKAIJSetS(*kaij,p,q,S);CHKERRQ(ierr); 1317 ierr = MatKAIJSetT(*kaij,p,q,T);CHKERRQ(ierr); 1318 ierr = MatSetUp(*kaij); 1319 PetscFunctionReturn(0); 1320 } 1321 1322 /*MC 1323 MATKAIJ - MATKAIJ = "kaij" - A matrix type to be used to evaluate matrices of the following form: 1324 1325 [I \otimes S + A \otimes T] 1326 1327 where 1328 S is a dense (p \times q) matrix 1329 T is a dense (p \times q) matrix 1330 A is an AIJ (n \times n) matrix 1331 I is the identity matrix 1332 The resulting matrix is (np \times nq) 1333 1334 S and T are always stored independently on all processes as PetscScalar arrays in column-major format. 1335 1336 Level: advanced 1337 1338 .seealso: MatKAIJSetAIJ(), MatKAIJSetS(), MatKAIJSetT(), MatKAIJGetAIJ(), MatKAIJGetS(), MatKAIJGetT(), MatCreateKAIJ() 1339 M*/ 1340 1341 PETSC_EXTERN PetscErrorCode MatCreate_KAIJ(Mat A) 1342 { 1343 PetscErrorCode ierr; 1344 Mat_MPIKAIJ *b; 1345 PetscMPIInt size; 1346 1347 PetscFunctionBegin; 1348 ierr = PetscNewLog(A,&b);CHKERRQ(ierr); 1349 A->data = (void*)b; 1350 1351 ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1352 1353 A->ops->setup = MatSetUp_KAIJ; 1354 1355 b->w = 0; 1356 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 1357 if (size == 1) { 1358 ierr = PetscObjectChangeTypeName((PetscObject)A,MATSEQKAIJ);CHKERRQ(ierr); 1359 A->ops->setup = MatSetUp_KAIJ; 1360 A->ops->destroy = MatDestroy_SeqKAIJ; 1361 A->ops->view = MatView_KAIJ; 1362 A->ops->mult = MatMult_SeqKAIJ; 1363 A->ops->multadd = MatMultAdd_SeqKAIJ; 1364 A->ops->invertblockdiagonal = MatInvertBlockDiagonal_SeqKAIJ; 1365 A->ops->getrow = MatGetRow_SeqKAIJ; 1366 A->ops->restorerow = MatRestoreRow_SeqKAIJ; 1367 A->ops->sor = MatSOR_SeqKAIJ; 1368 } else { 1369 ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIKAIJ);CHKERRQ(ierr); 1370 A->ops->setup = MatSetUp_KAIJ; 1371 A->ops->destroy = MatDestroy_MPIKAIJ; 1372 A->ops->view = MatView_KAIJ; 1373 A->ops->mult = MatMult_MPIKAIJ; 1374 A->ops->multadd = MatMultAdd_MPIKAIJ; 1375 A->ops->invertblockdiagonal = MatInvertBlockDiagonal_MPIKAIJ; 1376 A->ops->getrow = MatGetRow_MPIKAIJ; 1377 A->ops->restorerow = MatRestoreRow_MPIKAIJ; 1378 ierr = PetscObjectComposeFunction((PetscObject)A,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIKAIJ);CHKERRQ(ierr); 1379 } 1380 A->ops->createsubmatrix = MatCreateSubMatrix_KAIJ; 1381 PetscFunctionReturn(0); 1382 } 1383