1 2 /* 3 Defines the basic matrix operations for the KAIJ matrix storage format. 4 This format is used to evaluate matrices of the form: 5 6 [I \otimes S + A \otimes T] 7 8 where 9 S is a dense (p \times q) matrix 10 T is a dense (p \times q) matrix 11 A is an AIJ (n \times n) matrix 12 I is the identity matrix 13 14 The resulting matrix is (np \times nq) 15 16 We provide: 17 MatMult() 18 MatMultAdd() 19 MatInvertBlockDiagonal() 20 and 21 MatCreateKAIJ(Mat,PetscInt,PetscInt,const PetscScalar[],const PetscScalar[],Mat*) 22 23 This single directory handles both the sequential and parallel codes 24 */ 25 26 #include <../src/mat/impls/kaij/kaij.h> /*I "petscmat.h" I*/ 27 #include <../src/mat/utils/freespace.h> 28 #include <petsc/private/vecimpl.h> 29 30 /*@C 31 MatKAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the KAIJ matrix 32 33 Not Collective, but if the KAIJ matrix is parallel, the AIJ matrix is also parallel 34 35 Input Parameter: 36 . A - the KAIJ matrix 37 38 Output Parameter: 39 . B - the AIJ matrix 40 41 Level: advanced 42 43 Notes: The reference count on the AIJ matrix is not increased so you should not destroy it. 44 45 .seealso: MatCreateKAIJ() 46 @*/ 47 PetscErrorCode MatKAIJGetAIJ(Mat A,Mat *B) 48 { 49 PetscErrorCode ierr; 50 PetscBool ismpikaij,isseqkaij; 51 52 PetscFunctionBegin; 53 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIKAIJ,&ismpikaij);CHKERRQ(ierr); 54 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQKAIJ,&isseqkaij);CHKERRQ(ierr); 55 if (ismpikaij) { 56 Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data; 57 58 *B = b->A; 59 } else if (isseqkaij) { 60 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 61 62 *B = b->AIJ; 63 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix passed in is not of type KAIJ"); 64 PetscFunctionReturn(0); 65 } 66 67 /*@C 68 MatKAIJGetS - Get the S matrix describing the shift action of the KAIJ matrix 69 70 Not Collective; the entire S is stored and returned independently on all processes. 71 72 Input Parameter: 73 . A - the KAIJ matrix 74 75 Output Parameters: 76 + m - the number of rows in S 77 . n - the number of columns in S 78 - S - the S matrix, in form of a scalar array in column-major format 79 80 Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired) 81 82 Level: advanced 83 84 .seealso: MatCreateKAIJ(), MatGetBlockSizes() 85 @*/ 86 PetscErrorCode MatKAIJGetS(Mat A,PetscInt *m,PetscInt *n,PetscScalar **S) 87 { 88 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 89 PetscFunctionBegin; 90 if (m) *m = b->p; 91 if (n) *n = b->q; 92 if (S) *S = b->S; 93 PetscFunctionReturn(0); 94 } 95 96 /*@C 97 MatKAIJGetSRead - Get a read-only pointer to the S matrix describing the shift action of the KAIJ matrix 98 99 Not Collective; the entire S is stored and returned independently on all processes. 100 101 Input Parameter: 102 . A - the KAIJ matrix 103 104 Output Parameters: 105 + m - the number of rows in S 106 . n - the number of columns in S 107 - S - the S matrix, in form of a scalar array in column-major format 108 109 Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired) 110 111 Level: advanced 112 113 .seealso: MatCreateKAIJ(), MatGetBlockSizes() 114 @*/ 115 PetscErrorCode MatKAIJGetSRead(Mat A,PetscInt *m,PetscInt *n,const PetscScalar **S) 116 { 117 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 118 PetscFunctionBegin; 119 if (m) *m = b->p; 120 if (n) *n = b->q; 121 if (S) *S = b->S; 122 PetscFunctionReturn(0); 123 } 124 125 /*@C 126 MatKAIJRestoreS - Restore array obtained with MatKAIJGetS() 127 128 Not collective 129 130 Input Parameter: 131 . A - the KAIJ matrix 132 133 Output Parameter: 134 . S - location of pointer to array obtained with MatKAIJGetS() 135 136 Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored. 137 If NULL is passed, it will not attempt to zero the array pointer. 138 139 Level: advanced 140 .seealso: MatKAIJGetS(), MatKAIJGetSRead(), MatKAIJRestoreSRead() 141 @*/ 142 PetscErrorCode MatKAIJRestoreS(Mat A,PetscScalar **S) 143 { 144 PetscFunctionBegin; 145 if (S) *S = NULL; 146 PetscObjectStateIncrease((PetscObject)A); 147 PetscFunctionReturn(0); 148 } 149 150 /*@C 151 MatKAIJRestoreSRead - Restore array obtained with MatKAIJGetSRead() 152 153 Not collective 154 155 Input Parameter: 156 . A - the KAIJ matrix 157 158 Output Parameter: 159 . S - location of pointer to array obtained with MatKAIJGetS() 160 161 Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored. 162 If NULL is passed, it will not attempt to zero the array pointer. 163 164 Level: advanced 165 .seealso: MatKAIJGetS(), MatKAIJGetSRead(), MatKAIJRestoreSRead() 166 @*/ 167 PetscErrorCode MatKAIJRestoreSRead(Mat A,const PetscScalar **S) 168 { 169 PetscFunctionBegin; 170 if (S) *S = NULL; 171 PetscFunctionReturn(0); 172 } 173 174 /*@C 175 MatKAIJGetT - Get the transformation matrix T associated with the KAIJ matrix 176 177 Not Collective; the entire T is stored and returned independently on all processes 178 179 Input Parameter: 180 . A - the KAIJ matrix 181 182 Output Parameter: 183 + m - the number of rows in T 184 . n - the number of columns in T 185 - T - the T matrix, in form of a scalar array in column-major format 186 187 Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired) 188 189 Level: advanced 190 191 .seealso: MatCreateKAIJ(), MatGetBlockSizes() 192 @*/ 193 PetscErrorCode MatKAIJGetT(Mat A,PetscInt *m,PetscInt *n,PetscScalar **T) 194 { 195 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 196 PetscFunctionBegin; 197 if (m) *m = b->p; 198 if (n) *n = b->q; 199 if (T) *T = b->T; 200 PetscFunctionReturn(0); 201 } 202 203 /*@C 204 MatKAIJGetTRead - Get a read-only pointer to the transformation matrix T associated with the KAIJ matrix 205 206 Not Collective; the entire T is stored and returned independently on all processes 207 208 Input Parameter: 209 . A - the KAIJ matrix 210 211 Output Parameter: 212 + m - the number of rows in T 213 . n - the number of columns in T 214 - T - the T matrix, in form of a scalar array in column-major format 215 216 Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired) 217 218 Level: advanced 219 220 .seealso: MatCreateKAIJ(), MatGetBlockSizes() 221 @*/ 222 PetscErrorCode MatKAIJGetTRead(Mat A,PetscInt *m,PetscInt *n,const PetscScalar **T) 223 { 224 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 225 PetscFunctionBegin; 226 if (m) *m = b->p; 227 if (n) *n = b->q; 228 if (T) *T = b->T; 229 PetscFunctionReturn(0); 230 } 231 232 /*@C 233 MatKAIJRestoreT - Restore array obtained with MatKAIJGetT() 234 235 Not collective 236 237 Input Parameter: 238 . A - the KAIJ matrix 239 240 Output Parameter: 241 . T - location of pointer to array obtained with MatKAIJGetS() 242 243 Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored. 244 If NULL is passed, it will not attempt to zero the array pointer. 245 246 Level: advanced 247 .seealso: MatKAIJGetT(), MatKAIJGetTRead(), MatKAIJRestoreTRead() 248 @*/ 249 PetscErrorCode MatKAIJRestoreT(Mat A,PetscScalar **T) 250 { 251 PetscFunctionBegin; 252 if (T) *T = NULL; 253 PetscObjectStateIncrease((PetscObject)A); 254 PetscFunctionReturn(0); 255 } 256 257 /*@C 258 MatKAIJRestoreTRead - Restore array obtained with MatKAIJGetTRead() 259 260 Not collective 261 262 Input Parameter: 263 . A - the KAIJ matrix 264 265 Output Parameter: 266 . T - location of pointer to array obtained with MatKAIJGetS() 267 268 Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored. 269 If NULL is passed, it will not attempt to zero the array pointer. 270 271 Level: advanced 272 .seealso: MatKAIJGetT(), MatKAIJGetTRead(), MatKAIJRestoreTRead() 273 @*/ 274 PetscErrorCode MatKAIJRestoreTRead(Mat A,const PetscScalar **T) 275 { 276 PetscFunctionBegin; 277 if (T) *T = NULL; 278 PetscFunctionReturn(0); 279 } 280 281 /*@ 282 MatKAIJSetAIJ - Set the AIJ matrix describing the blockwise action of the KAIJ matrix 283 284 Logically Collective; if the AIJ matrix is parallel, the KAIJ matrix is also parallel 285 286 Input Parameters: 287 + A - the KAIJ matrix 288 - B - the AIJ matrix 289 290 Notes: 291 This function increases the reference count on the AIJ matrix, so the user is free to destroy the matrix if it is not needed. 292 Changes to the entries of the AIJ matrix will immediately affect the KAIJ matrix. 293 294 Level: advanced 295 296 .seealso: MatKAIJGetAIJ(), MatKAIJSetS(), MatKAIJSetT() 297 @*/ 298 PetscErrorCode MatKAIJSetAIJ(Mat A,Mat B) 299 { 300 PetscErrorCode ierr; 301 PetscMPIInt size; 302 303 PetscFunctionBegin; 304 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 305 if (size == 1) { 306 Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data; 307 a->AIJ = B; 308 } else { 309 Mat_MPIKAIJ *a = (Mat_MPIKAIJ*)A->data; 310 a->A = B; 311 } 312 ierr = PetscObjectReference((PetscObject)B);CHKERRQ(ierr); 313 PetscFunctionReturn(0); 314 } 315 316 /*@C 317 MatKAIJSetS - Set the S matrix describing the shift action of the KAIJ matrix 318 319 Logically Collective; the entire S is stored independently on all processes. 320 321 Input Parameters: 322 + A - the KAIJ matrix 323 . p - the number of rows in S 324 . q - the number of columns in S 325 - S - the S matrix, in form of a scalar array in column-major format 326 327 Notes: The dimensions p and q must match those of the transformation matrix T associated with the KAIJ matrix. 328 The S matrix is copied, so the user can destroy this array. 329 330 Level: Advanced 331 332 .seealso: MatKAIJGetS(), MatKAIJSetT(), MatKAIJSetAIJ() 333 @*/ 334 PetscErrorCode MatKAIJSetS(Mat A,PetscInt p,PetscInt q,const PetscScalar S[]) 335 { 336 PetscErrorCode ierr; 337 Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data; 338 339 PetscFunctionBegin; 340 ierr = PetscFree(a->S);CHKERRQ(ierr); 341 if (S) { 342 ierr = PetscMalloc1(p*q*sizeof(PetscScalar),&a->S);CHKERRQ(ierr); 343 ierr = PetscMemcpy(a->S,S,p*q*sizeof(PetscScalar));CHKERRQ(ierr); 344 } else a->S = NULL; 345 346 a->p = p; 347 a->q = q; 348 PetscFunctionReturn(0); 349 } 350 351 /*@C 352 MatKAIJSetT - Set the transformation matrix T associated with the KAIJ matrix 353 354 Logically Collective; the entire T is stored independently on all processes. 355 356 Input Parameters: 357 + A - the KAIJ matrix 358 . p - the number of rows in S 359 . q - the number of columns in S 360 - T - the T matrix, in form of a scalar array in column-major format 361 362 Notes: The dimensions p and q must match those of the shift matrix S associated with the KAIJ matrix. 363 The T matrix is copied, so the user can destroy this array. 364 365 Level: Advanced 366 367 .seealso: MatKAIJGetT(), MatKAIJSetS(), MatKAIJSetAIJ() 368 @*/ 369 PetscErrorCode MatKAIJSetT(Mat A,PetscInt p,PetscInt q,const PetscScalar T[]) 370 { 371 PetscErrorCode ierr; 372 PetscInt i,j; 373 Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data; 374 PetscBool isTI = PETSC_FALSE; 375 376 PetscFunctionBegin; 377 /* check if T is an identity matrix */ 378 if (T && (p == q)) { 379 isTI = PETSC_TRUE; 380 for (i=0; i<p; i++) { 381 for (j=0; j<q; j++) { 382 if (i == j) { 383 /* diagonal term must be 1 */ 384 if (T[i+j*p] != 1.0) isTI = PETSC_FALSE; 385 } else { 386 /* off-diagonal term must be 0 */ 387 if (T[i+j*p] != 0.0) isTI = PETSC_FALSE; 388 } 389 } 390 } 391 } 392 a->isTI = isTI; 393 394 ierr = PetscFree(a->T);CHKERRQ(ierr); 395 if (T && (!isTI)) { 396 ierr = PetscMalloc1(p*q*sizeof(PetscScalar),&a->T);CHKERRQ(ierr); 397 ierr = PetscMemcpy(a->T,T,p*q*sizeof(PetscScalar));CHKERRQ(ierr); 398 } else a->T = NULL; 399 400 a->p = p; 401 a->q = q; 402 PetscFunctionReturn(0); 403 } 404 405 PetscErrorCode MatDestroy_SeqKAIJ(Mat A) 406 { 407 PetscErrorCode ierr; 408 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 409 410 PetscFunctionBegin; 411 ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr); 412 ierr = PetscFree(b->S);CHKERRQ(ierr); 413 ierr = PetscFree(b->T);CHKERRQ(ierr); 414 ierr = PetscFree(b->ibdiag);CHKERRQ(ierr); 415 ierr = PetscFree5(b->sor.w,b->sor.y,b->sor.work,b->sor.t,b->sor.arr);CHKERRQ(ierr); 416 ierr = PetscFree(A->data);CHKERRQ(ierr); 417 PetscFunctionReturn(0); 418 } 419 420 PetscErrorCode MatSetUp_KAIJ(Mat A) 421 { 422 PetscErrorCode ierr; 423 PetscInt n; 424 PetscMPIInt size; 425 Mat_SeqKAIJ *seqkaij = (Mat_SeqKAIJ*)A->data; 426 427 PetscFunctionBegin; 428 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 429 if (size == 1) { 430 ierr = MatSetSizes(A,seqkaij->p*seqkaij->AIJ->rmap->n,seqkaij->q*seqkaij->AIJ->cmap->n,seqkaij->p*seqkaij->AIJ->rmap->N,seqkaij->q*seqkaij->AIJ->cmap->N);CHKERRQ(ierr); 431 ierr = PetscLayoutSetBlockSize(A->rmap,seqkaij->p);CHKERRQ(ierr); 432 ierr = PetscLayoutSetBlockSize(A->cmap,seqkaij->q);CHKERRQ(ierr); 433 ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr); 434 ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr); 435 } else { 436 Mat_MPIKAIJ *a; 437 Mat_MPIAIJ *mpiaij; 438 IS from,to; 439 Vec gvec; 440 PetscScalar *T; 441 PetscInt i,j; 442 443 a = (Mat_MPIKAIJ*)A->data; 444 mpiaij = (Mat_MPIAIJ*)a->A->data; 445 ierr = MatSetSizes(A,a->p*a->A->rmap->n,a->q*a->A->cmap->n,a->p*a->A->rmap->N,a->q*a->A->cmap->N);CHKERRQ(ierr); 446 ierr = PetscLayoutSetBlockSize(A->rmap,seqkaij->p);CHKERRQ(ierr); 447 ierr = PetscLayoutSetBlockSize(A->cmap,seqkaij->q);CHKERRQ(ierr); 448 ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr); 449 ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr); 450 451 if (a->isTI) { 452 /* If the transformation matrix associated with the parallel matrix A is the identity matrix, then a->T will be NULL. 453 * In this case, if we pass a->T directly to the MatCreateKAIJ() calls to create the sequential submatrices, the routine will 454 * not be able to tell that transformation matrix should be set to the identity; thus we create a temporary identity matrix 455 * to pass in. */ 456 ierr = PetscMalloc1(a->p*a->q*sizeof(PetscScalar),&T);CHKERRQ(ierr); 457 for (i=0; i<a->p; i++) { 458 for (j=0; j<a->q; j++) { 459 if (i==j) T[i+j*a->p] = 1.0; 460 else T[i+j*a->p] = 0.0; 461 } 462 } 463 } else T = a->T; 464 ierr = MatCreateKAIJ(mpiaij->A,a->p,a->q,a->S,T,&a->AIJ);CHKERRQ(ierr); 465 ierr = MatCreateKAIJ(mpiaij->B,a->p,a->q,NULL,T,&a->OAIJ);CHKERRQ(ierr); 466 if (a->isTI) { 467 ierr = PetscFree(T);CHKERRQ(ierr); 468 } 469 470 ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr); 471 ierr = VecCreate(PETSC_COMM_SELF,&a->w);CHKERRQ(ierr); 472 ierr = VecSetSizes(a->w,n*a->q,n*a->q);CHKERRQ(ierr); 473 ierr = VecSetBlockSize(a->w,a->q);CHKERRQ(ierr); 474 ierr = VecSetType(a->w,VECSEQ);CHKERRQ(ierr); 475 476 /* create two temporary Index sets for build scatter gather */ 477 ierr = ISCreateBlock(PetscObjectComm((PetscObject)a->A),a->q,n,mpiaij->garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); 478 ierr = ISCreateStride(PETSC_COMM_SELF,n*a->q,0,1,&to);CHKERRQ(ierr); 479 480 /* create temporary global vector to generate scatter context */ 481 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)a->A),a->q,a->q*a->A->cmap->n,a->q*a->A->cmap->N,NULL,&gvec);CHKERRQ(ierr); 482 483 /* generate the scatter context */ 484 ierr = VecScatterCreate(gvec,from,a->w,to,&a->ctx);CHKERRQ(ierr); 485 486 ierr = ISDestroy(&from);CHKERRQ(ierr); 487 ierr = ISDestroy(&to);CHKERRQ(ierr); 488 ierr = VecDestroy(&gvec);CHKERRQ(ierr); 489 } 490 491 A->assembled = PETSC_TRUE; 492 PetscFunctionReturn(0); 493 } 494 495 PetscErrorCode MatView_KAIJ(Mat A,PetscViewer viewer) 496 { 497 PetscViewerFormat format; 498 Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data; 499 Mat B; 500 PetscInt i; 501 PetscErrorCode ierr; 502 PetscBool ismpikaij; 503 504 PetscFunctionBegin; 505 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIKAIJ,&ismpikaij);CHKERRQ(ierr); 506 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 507 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL || format == PETSC_VIEWER_ASCII_IMPL) { 508 ierr = PetscViewerASCIIPrintf(viewer,"S and T have %D rows and %D columns\n",a->p,a->q);CHKERRQ(ierr); 509 510 /* Print appropriate details for S. */ 511 if (!a->S) { 512 ierr = PetscViewerASCIIPrintf(viewer,"S is NULL\n"); 513 } else if (format == PETSC_VIEWER_ASCII_IMPL) { 514 ierr = PetscViewerASCIIPrintf(viewer,"Entries of S are ");CHKERRQ(ierr); 515 for (i=0; i<(a->p * a->q); i++) { 516 #if defined(PETSC_USE_COMPLEX) 517 ierr = PetscViewerASCIIPrintf(viewer,"%18.16e %18.16e ",(double)PetscRealPart(a->S[i]),(double)PetscImaginaryPart(a->S[i]));CHKERRQ(ierr); 518 #else 519 ierr = PetscViewerASCIIPrintf(viewer,"%18.16e ",(double)PetscRealPart(a->S[i]));CHKERRQ(ierr); 520 #endif 521 } 522 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 523 } 524 525 /* Print appropriate details for T. */ 526 if (a->isTI) { 527 ierr = PetscViewerASCIIPrintf(viewer,"T is the identity matrix\n"); 528 } else if (!a->T) { 529 ierr = PetscViewerASCIIPrintf(viewer,"T is NULL\n"); 530 } else if (format == PETSC_VIEWER_ASCII_IMPL) { 531 ierr = PetscViewerASCIIPrintf(viewer,"Entries of T are ");CHKERRQ(ierr); 532 for (i=0; i<(a->p * a->q); i++) { 533 #if defined(PETSC_USE_COMPLEX) 534 ierr = PetscViewerASCIIPrintf(viewer,"%18.16e %18.16e ",(double)PetscRealPart(a->T[i]),(double)PetscImaginaryPart(a->T[i]));CHKERRQ(ierr); 535 #else 536 ierr = PetscViewerASCIIPrintf(viewer,"%18.16e ",(double)PetscRealPart(a->T[i]));CHKERRQ(ierr); 537 #endif 538 } 539 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 540 } 541 542 /* Now print details for the AIJ matrix, using the AIJ viewer. */ 543 ierr = PetscViewerASCIIPrintf(viewer,"Now viewing the associated AIJ matrix:\n");CHKERRQ(ierr); 544 if (ismpikaij) { 545 Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data; 546 ierr = MatView(b->A,viewer);CHKERRQ(ierr); 547 } else { 548 ierr = MatView(a->AIJ,viewer);CHKERRQ(ierr); 549 } 550 551 } else { 552 /* For all other matrix viewer output formats, simply convert to an AIJ matrix and call MatView() on that. */ 553 if (ismpikaij) { 554 ierr = MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 555 } else { 556 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 557 } 558 ierr = MatView(B,viewer);CHKERRQ(ierr); 559 ierr = MatDestroy(&B);CHKERRQ(ierr); 560 } 561 PetscFunctionReturn(0); 562 } 563 564 PetscErrorCode MatDestroy_MPIKAIJ(Mat A) 565 { 566 PetscErrorCode ierr; 567 Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data; 568 569 PetscFunctionBegin; 570 ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr); 571 ierr = MatDestroy(&b->OAIJ);CHKERRQ(ierr); 572 ierr = MatDestroy(&b->A);CHKERRQ(ierr); 573 ierr = VecScatterDestroy(&b->ctx);CHKERRQ(ierr); 574 ierr = VecDestroy(&b->w);CHKERRQ(ierr); 575 ierr = PetscFree(b->S);CHKERRQ(ierr); 576 ierr = PetscFree(b->T);CHKERRQ(ierr); 577 ierr = PetscFree(b->ibdiag);CHKERRQ(ierr); 578 ierr = PetscFree(A->data);CHKERRQ(ierr); 579 PetscFunctionReturn(0); 580 } 581 582 /* --------------------------------------------------------------------------------------*/ 583 584 /* zz = yy + Axx */ 585 PetscErrorCode MatMultAdd_SeqKAIJ(Mat A,Vec xx,Vec yy,Vec zz) 586 { 587 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 588 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 589 const PetscScalar *s = b->S, *t = b->T; 590 const PetscScalar *x,*v,*bx; 591 PetscScalar *y,*sums; 592 PetscErrorCode ierr; 593 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 594 PetscInt n,i,jrow,j,l,p=b->p,q=b->q,k; 595 596 PetscFunctionBegin; 597 if (!yy) { 598 ierr = VecSet(zz,0.0);CHKERRQ(ierr); 599 } else { 600 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 601 } 602 if ((!s) && (!t) && (!b->isTI)) PetscFunctionReturn(0); 603 604 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 605 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 606 idx = a->j; 607 v = a->a; 608 ii = a->i; 609 610 if (b->isTI) { 611 for (i=0; i<m; i++) { 612 jrow = ii[i]; 613 n = ii[i+1] - jrow; 614 sums = y + p*i; 615 for (j=0; j<n; j++) { 616 for (k=0; k<p; k++) { 617 sums[k] += v[jrow+j]*x[q*idx[jrow+j]+k]; 618 } 619 } 620 } 621 ierr = PetscLogFlops((a->nz)*3*p);CHKERRQ(ierr); 622 } else if (t) { 623 for (i=0; i<m; i++) { 624 jrow = ii[i]; 625 n = ii[i+1] - jrow; 626 sums = y + p*i; 627 bx = x + q*i; 628 for (j=0; j<n; j++) { 629 for (k=0; k<p; k++) { 630 for (l=0; l<q; l++) { 631 sums[k] += v[jrow+j]*t[k+l*p]*x[q*idx[jrow+j]+l]; 632 } 633 } 634 } 635 } 636 /* The flop count below assumes that v[jrow+j] is hoisted out (which an optimizing compiler is likely to do), 637 * and also that T part is hoisted outside this loop (in exchange for temporary storage) as (A \otimes I) (I \otimes T), 638 * so that this multiply doesn't have to be redone for each matrix entry, but just once per column. The latter 639 * transformation is much less likely to be applied, but we nonetheless count the minimum flops required. */ 640 ierr = PetscLogFlops((2.0*p*q-p)*m+2*p*a->nz);CHKERRQ(ierr); 641 } 642 if (s) { 643 for (i=0; i<m; i++) { 644 sums = y + p*i; 645 bx = x + q*i; 646 if (i < b->AIJ->cmap->n) { 647 for (j=0; j<q; j++) { 648 for (k=0; k<p; k++) { 649 sums[k] += s[k+j*p]*bx[j]; 650 } 651 } 652 } 653 } 654 ierr = PetscLogFlops(m*2*p*q);CHKERRQ(ierr); 655 } 656 657 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 658 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 659 PetscFunctionReturn(0); 660 } 661 662 PetscErrorCode MatMult_SeqKAIJ(Mat A,Vec xx,Vec yy) 663 { 664 PetscErrorCode ierr; 665 PetscFunctionBegin; 666 ierr = MatMultAdd_SeqKAIJ(A,xx,PETSC_NULL,yy);CHKERRQ(ierr); 667 PetscFunctionReturn(0); 668 } 669 670 #include <petsc/private/kernels/blockinvert.h> 671 672 PetscErrorCode MatInvertBlockDiagonal_SeqKAIJ(Mat A,const PetscScalar **values) 673 { 674 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 675 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 676 const PetscScalar *S = b->S; 677 const PetscScalar *T = b->T; 678 const PetscScalar *v = a->a; 679 const PetscInt p = b->p, q = b->q, m = b->AIJ->rmap->n, *idx = a->j, *ii = a->i; 680 PetscErrorCode ierr; 681 PetscInt i,j,*v_pivots,dof,dof2; 682 PetscScalar *diag,aval,*v_work; 683 684 PetscFunctionBegin; 685 if (p != q) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Block size must be square to calculate inverse."); 686 if ((!S) && (!T) && (!b->isTI)) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Cannot invert a zero matrix."); 687 688 dof = p; 689 dof2 = dof*dof; 690 691 if (b->ibdiagvalid) { 692 if (values) *values = b->ibdiag; 693 PetscFunctionReturn(0); 694 } 695 if (!b->ibdiag) { 696 ierr = PetscMalloc1(dof2*m*sizeof(PetscScalar),&b->ibdiag);CHKERRQ(ierr); 697 ierr = PetscLogObjectMemory((PetscObject)A,dof2*m*sizeof(PetscScalar));CHKERRQ(ierr); 698 } 699 if (values) *values = b->ibdiag; 700 diag = b->ibdiag; 701 702 ierr = PetscMalloc2(dof,&v_work,dof,&v_pivots);CHKERRQ(ierr); 703 for (i=0; i<m; i++) { 704 if (S) { 705 ierr = PetscMemcpy(diag,S,dof2*sizeof(PetscScalar));CHKERRQ(ierr); 706 } else { 707 ierr = PetscMemzero(diag,dof2*sizeof(PetscScalar));CHKERRQ(ierr); 708 } 709 if (b->isTI) { 710 aval = 0; 711 for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j]; 712 for (j=0; j<dof; j++) diag[j+dof*j] += aval; 713 } else if (T) { 714 aval = 0; 715 for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j]; 716 for (j=0; j<dof2; j++) diag[j] += aval*T[j]; 717 } 718 ierr = PetscKernel_A_gets_inverse_A(dof,diag,v_pivots,v_work,PETSC_FALSE,NULL);CHKERRQ(ierr); 719 diag += dof2; 720 } 721 ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr); 722 723 b->ibdiagvalid = PETSC_TRUE; 724 PetscFunctionReturn(0); 725 } 726 727 static PetscErrorCode MatGetDiagonalBlock_MPIKAIJ(Mat A,Mat *B) 728 { 729 Mat_MPIKAIJ *kaij = (Mat_MPIKAIJ*) A->data; 730 731 PetscFunctionBegin; 732 *B = kaij->AIJ; 733 PetscFunctionReturn(0); 734 } 735 736 PetscErrorCode MatSOR_SeqKAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 737 { 738 PetscErrorCode ierr; 739 Mat_SeqKAIJ *kaij = (Mat_SeqKAIJ*) A->data; 740 Mat_SeqAIJ *a = (Mat_SeqAIJ*)kaij->AIJ->data; 741 const PetscScalar *aa = a->a, *T = kaij->T, *v; 742 const PetscInt m = kaij->AIJ->rmap->n, *ai=a->i, *aj=a->j, p = kaij->p, q = kaij->q, *diag, *vi; 743 const PetscScalar *b, *xb, *idiag; 744 PetscScalar *x, *work, *workt, *w, *y, *arr, *t, *arrt; 745 PetscInt i, j, k, i2, bs, bs2, nz; 746 747 PetscFunctionBegin; 748 its = its*lits; 749 if (flag & SOR_EISENSTAT) SETERRQ (PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 750 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 751 if (fshift) SETERRQ (PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for diagonal shift"); 752 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for applying upper or lower triangular parts"); 753 if (p != q) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSOR for KAIJ: No support for non-square dense blocks"); 754 else {bs = p; bs2 = bs*bs; } 755 756 if (!m) PetscFunctionReturn(0); 757 758 if (!kaij->ibdiagvalid) { ierr = MatInvertBlockDiagonal_SeqKAIJ(A,NULL);CHKERRQ(ierr); } 759 idiag = kaij->ibdiag; 760 diag = a->diag; 761 762 if (!kaij->sor.setup) { 763 ierr = PetscMalloc5(bs,&kaij->sor.w,bs,&kaij->sor.y,m*bs,&kaij->sor.work,m*bs,&kaij->sor.t,m*bs2,&kaij->sor.arr);CHKERRQ(ierr); 764 kaij->sor.setup = PETSC_TRUE; 765 } 766 y = kaij->sor.y; 767 w = kaij->sor.w; 768 work = kaij->sor.work; 769 t = kaij->sor.t; 770 arr = kaij->sor.arr; 771 772 ierr = VecGetArray(xx,&x); CHKERRQ(ierr); 773 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 774 775 if (flag & SOR_ZERO_INITIAL_GUESS) { 776 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 777 PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x); /* x[0:bs] <- D^{-1} b[0:bs] */ 778 ierr = PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr); 779 i2 = bs; 780 idiag += bs2; 781 for (i=1; i<m; i++) { 782 v = aa + ai[i]; 783 vi = aj + ai[i]; 784 nz = diag[i] - ai[i]; 785 786 if (T) { /* b - T (Arow * x) */ 787 ierr = PetscMemzero(w,bs*sizeof(PetscScalar)); 788 for (j=0; j<nz; j++) { 789 for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k]; 790 } 791 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs,w,T,&t[i2]); 792 for (k=0; k<bs; k++) t[i2+k] += b[i2+k]; 793 } else if (kaij->isTI) { 794 ierr = PetscMemcpy(t+i2,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 795 for (j=0; j<nz; j++) { 796 for (k=0; k<bs; k++) t[i2+k] -= v[j] * x[vi[j]*bs+k]; 797 } 798 } else { 799 ierr = PetscMemcpy(t+i2,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 800 } 801 802 PetscKernel_w_gets_Ar_times_v(bs,bs,t+i2,idiag,y); 803 for (j=0; j<bs; j++) x[i2+j] = omega * y[j]; 804 805 idiag += bs2; 806 i2 += bs; 807 } 808 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 809 ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr); 810 xb = t; 811 } else xb = b; 812 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 813 idiag = kaij->ibdiag+bs2*(m-1); 814 i2 = bs * (m-1); 815 ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 816 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 817 i2 -= bs; 818 idiag -= bs2; 819 for (i=m-2; i>=0; i--) { 820 v = aa + diag[i] + 1 ; 821 vi = aj + diag[i] + 1; 822 nz = ai[i+1] - diag[i] - 1; 823 824 if (T) { /* FIXME: This branch untested */ 825 ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 826 /* copy all rows of x that are needed into contiguous space */ 827 workt = work; 828 for (j=0; j<nz; j++) { 829 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 830 workt += bs; 831 } 832 arrt = arr; 833 for (j=0; j<nz; j++) { 834 ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 835 for (k=0; k<bs2; k++) arrt[k] *= v[j]; 836 arrt += bs2; 837 } 838 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 839 } else if (kaij->isTI) { 840 ierr = PetscMemcpy(w,t+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 841 for (j=0; j<nz; j++) { 842 for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k]; 843 } 844 } 845 846 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); /* RHS incorrect for omega != 1.0 */ 847 for (j=0; j<bs; j++) x[i2+j] = (1.0-omega) * x[i2+j] + omega * y[j]; 848 849 idiag -= bs2; 850 i2 -= bs; 851 } 852 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 853 } 854 its--; 855 } 856 while (its--) { /* FIXME: This branch not updated */ 857 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 858 i2 = 0; 859 idiag = kaij->ibdiag; 860 for (i=0; i<m; i++) { 861 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 862 863 v = aa + ai[i]; 864 vi = aj + ai[i]; 865 nz = diag[i] - ai[i]; 866 workt = work; 867 for (j=0; j<nz; j++) { 868 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 869 workt += bs; 870 } 871 arrt = arr; 872 if (T) { 873 for (j=0; j<nz; j++) { 874 ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 875 for (k=0; k<bs2; k++) arrt[k] *= v[j]; 876 arrt += bs2; 877 } 878 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 879 } else if (kaij->isTI) { 880 for (j=0; j<nz; j++) { 881 ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 882 for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 883 arrt += bs2; 884 } 885 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 886 } 887 ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr); 888 889 v = aa + diag[i] + 1; 890 vi = aj + diag[i] + 1; 891 nz = ai[i+1] - diag[i] - 1; 892 workt = work; 893 for (j=0; j<nz; j++) { 894 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 895 workt += bs; 896 } 897 arrt = arr; 898 if (T) { 899 for (j=0; j<nz; j++) { 900 ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 901 for (k=0; k<bs2; k++) arrt[k] *= v[j]; 902 arrt += bs2; 903 } 904 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 905 } else if (kaij->isTI) { 906 for (j=0; j<nz; j++) { 907 ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 908 for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 909 arrt += bs2; 910 } 911 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 912 } 913 914 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); 915 for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j); 916 917 idiag += bs2; 918 i2 += bs; 919 } 920 xb = t; 921 } 922 else xb = b; 923 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 924 idiag = kaij->ibdiag+bs2*(m-1); 925 i2 = bs * (m-1); 926 if (xb == b) { 927 for (i=m-1; i>=0; i--) { 928 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 929 930 v = aa + ai[i]; 931 vi = aj + ai[i]; 932 nz = diag[i] - ai[i]; 933 workt = work; 934 for (j=0; j<nz; j++) { 935 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 936 workt += bs; 937 } 938 arrt = arr; 939 if (T) { 940 for (j=0; j<nz; j++) { 941 ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 942 for (k=0; k<bs2; k++) arrt[k] *= v[j]; 943 arrt += bs2; 944 } 945 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 946 } else if (kaij->isTI) { 947 for (j=0; j<nz; j++) { 948 ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 949 for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 950 arrt += bs2; 951 } 952 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 953 } 954 955 v = aa + diag[i] + 1; 956 vi = aj + diag[i] + 1; 957 nz = ai[i+1] - diag[i] - 1; 958 workt = work; 959 for (j=0; j<nz; j++) { 960 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 961 workt += bs; 962 } 963 arrt = arr; 964 if (T) { 965 for (j=0; j<nz; j++) { 966 ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 967 for (k=0; k<bs2; k++) arrt[k] *= v[j]; 968 arrt += bs2; 969 } 970 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 971 } else if (kaij->isTI) { 972 for (j=0; j<nz; j++) { 973 ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 974 for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 975 arrt += bs2; 976 } 977 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 978 } 979 980 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); 981 for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j); 982 } 983 } else { 984 for (i=m-1; i>=0; i--) { 985 ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 986 v = aa + diag[i] + 1; 987 vi = aj + diag[i] + 1; 988 nz = ai[i+1] - diag[i] - 1; 989 workt = work; 990 for (j=0; j<nz; j++) { 991 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 992 workt += bs; 993 } 994 arrt = arr; 995 if (T) { 996 for (j=0; j<nz; j++) { 997 ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 998 for (k=0; k<bs2; k++) arrt[k] *= v[j]; 999 arrt += bs2; 1000 } 1001 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 1002 } else if (kaij->isTI) { 1003 for (j=0; j<nz; j++) { 1004 ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 1005 for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 1006 arrt += bs2; 1007 } 1008 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 1009 } 1010 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); 1011 for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j); 1012 } 1013 idiag -= bs2; 1014 i2 -= bs; 1015 } 1016 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 1017 } 1018 } 1019 1020 ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr); 1021 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1022 PetscFunctionReturn(0); 1023 } 1024 1025 /*===================================================================================*/ 1026 1027 PetscErrorCode MatMultAdd_MPIKAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1028 { 1029 Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data; 1030 PetscErrorCode ierr; 1031 1032 PetscFunctionBegin; 1033 if (!yy) { 1034 ierr = VecSet(zz,0.0);CHKERRQ(ierr); 1035 } else { 1036 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 1037 } 1038 /* start the scatter */ 1039 ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1040 ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,zz,zz);CHKERRQ(ierr); 1041 ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1042 ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr); 1043 PetscFunctionReturn(0); 1044 } 1045 1046 PetscErrorCode MatMult_MPIKAIJ(Mat A,Vec xx,Vec yy) 1047 { 1048 PetscErrorCode ierr; 1049 PetscFunctionBegin; 1050 ierr = MatMultAdd_MPIKAIJ(A,xx,PETSC_NULL,yy);CHKERRQ(ierr); 1051 PetscFunctionReturn(0); 1052 } 1053 1054 PetscErrorCode MatInvertBlockDiagonal_MPIKAIJ(Mat A,const PetscScalar **values) 1055 { 1056 Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data; 1057 PetscErrorCode ierr; 1058 1059 PetscFunctionBegin; 1060 ierr = (*b->AIJ->ops->invertblockdiagonal)(b->AIJ,values);CHKERRQ(ierr); 1061 PetscFunctionReturn(0); 1062 } 1063 1064 /* ----------------------------------------------------------------*/ 1065 1066 PetscErrorCode MatGetRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values) 1067 { 1068 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*) A->data; 1069 PetscErrorCode diag = PETSC_FALSE; 1070 PetscErrorCode ierr; 1071 PetscInt nzaij,nz,*colsaij,*idx,i,j,p=b->p,q=b->q,r=row/p,s=row%p,c; 1072 PetscScalar *vaij,*v,*S=b->S,*T=b->T; 1073 1074 PetscFunctionBegin; 1075 if (b->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1076 b->getrowactive = PETSC_TRUE; 1077 if (row < 0 || row >= A->rmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range",row); 1078 1079 if ((!S) && (!T) && (!b->isTI)) { 1080 if (ncols) *ncols = 0; 1081 if (cols) *cols = NULL; 1082 if (values) *values = NULL; 1083 PetscFunctionReturn(0); 1084 } 1085 1086 if (T || b->isTI) { 1087 ierr = MatGetRow_SeqAIJ(b->AIJ,r,&nzaij,&colsaij,&vaij);CHKERRQ(ierr); 1088 c = nzaij; 1089 for (i=0; i<nzaij; i++) { 1090 /* check if this row contains a diagonal entry */ 1091 if (colsaij[i] == r) { 1092 diag = PETSC_TRUE; 1093 c = i; 1094 } 1095 } 1096 } else nzaij = c = 0; 1097 1098 /* calculate size of row */ 1099 nz = 0; 1100 if (S) nz += q; 1101 if (T || b->isTI) nz += (diag && S ? (nzaij-1)*q : nzaij*q); 1102 1103 if (cols || values) { 1104 ierr = PetscMalloc2(nz,&idx,nz,&v);CHKERRQ(ierr); 1105 for (i=0; i<q; i++) { 1106 /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */ 1107 v[i] = 0.0; 1108 } 1109 if (b->isTI) { 1110 for (i=0; i<nzaij; i++) { 1111 for (j=0; j<q; j++) { 1112 idx[i*q+j] = colsaij[i]*q+j; 1113 v[i*q+j] = (j==s ? vaij[i] : 0); 1114 } 1115 } 1116 } else if (T) { 1117 for (i=0; i<nzaij; i++) { 1118 for (j=0; j<q; j++) { 1119 idx[i*q+j] = colsaij[i]*q+j; 1120 v[i*q+j] = vaij[i]*T[s+j*p]; 1121 } 1122 } 1123 } 1124 if (S) { 1125 for (j=0; j<q; j++) { 1126 idx[c*q+j] = r*q+j; 1127 v[c*q+j] += S[s+j*p]; 1128 } 1129 } 1130 } 1131 1132 if (ncols) *ncols = nz; 1133 if (cols) *cols = idx; 1134 if (values) *values = v; 1135 PetscFunctionReturn(0); 1136 } 1137 1138 PetscErrorCode MatRestoreRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1139 { 1140 PetscErrorCode ierr; 1141 PetscFunctionBegin; 1142 ierr = PetscFree2(*idx,*v);CHKERRQ(ierr); 1143 ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE; 1144 PetscFunctionReturn(0); 1145 } 1146 1147 PetscErrorCode MatGetRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values) 1148 { 1149 Mat_MPIKAIJ *b = (Mat_MPIKAIJ*) A->data; 1150 Mat MatAIJ = ((Mat_SeqKAIJ*)b->AIJ->data)->AIJ; 1151 Mat MatOAIJ = ((Mat_SeqKAIJ*)b->OAIJ->data)->AIJ; 1152 Mat AIJ = b->A; 1153 PetscBool diag = PETSC_FALSE; 1154 PetscErrorCode ierr; 1155 const PetscInt rstart=A->rmap->rstart,rend=A->rmap->rend,p=b->p,q=b->q,*garray; 1156 PetscInt nz,*idx,ncolsaij,ncolsoaij,*colsaij,*colsoaij,r,s,c,i,j,lrow; 1157 PetscScalar *v,*vals,*ovals,*S=b->S,*T=b->T; 1158 1159 PetscFunctionBegin; 1160 if (b->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1161 b->getrowactive = PETSC_TRUE; 1162 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1163 lrow = row - rstart; 1164 1165 if ((!S) && (!T) && (!b->isTI)) { 1166 if (ncols) *ncols = 0; 1167 if (cols) *cols = NULL; 1168 if (values) *values = NULL; 1169 PetscFunctionReturn(0); 1170 } 1171 1172 r = lrow/p; 1173 s = lrow%p; 1174 1175 if (T || b->isTI) { 1176 ierr = MatMPIAIJGetSeqAIJ(AIJ,NULL,NULL,&garray); 1177 ierr = MatGetRow_SeqAIJ(MatAIJ,lrow/p,&ncolsaij,&colsaij,&vals);CHKERRQ(ierr); 1178 ierr = MatGetRow_SeqAIJ(MatOAIJ,lrow/p,&ncolsoaij,&colsoaij,&ovals);CHKERRQ(ierr); 1179 1180 c = ncolsaij + ncolsoaij; 1181 for (i=0; i<ncolsaij; i++) { 1182 /* check if this row contains a diagonal entry */ 1183 if (colsaij[i] == r) { 1184 diag = PETSC_TRUE; 1185 c = i; 1186 } 1187 } 1188 } else c = 0; 1189 1190 /* calculate size of row */ 1191 nz = 0; 1192 if (S) nz += q; 1193 if (T || b->isTI) nz += (diag && S ? (ncolsaij+ncolsoaij-1)*q : (ncolsaij+ncolsoaij)*q); 1194 1195 if (cols || values) { 1196 ierr = PetscMalloc2(nz,&idx,nz,&v);CHKERRQ(ierr); 1197 for (i=0; i<q; i++) { 1198 /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */ 1199 v[i] = 0.0; 1200 } 1201 if (b->isTI) { 1202 for (i=0; i<ncolsaij; i++) { 1203 for (j=0; j<q; j++) { 1204 idx[i*q+j] = (colsaij[i]+rstart/p)*q+j; 1205 v[i*q+j] = (j==s ? vals[i] : 0.0); 1206 } 1207 } 1208 for (i=0; i<ncolsoaij; i++) { 1209 for (j=0; j<q; j++) { 1210 idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j; 1211 v[(i+ncolsaij)*q+j] = (j==s ? ovals[i]: 0.0); 1212 } 1213 } 1214 } else if (T) { 1215 for (i=0; i<ncolsaij; i++) { 1216 for (j=0; j<q; j++) { 1217 idx[i*q+j] = (colsaij[i]+rstart/p)*q+j; 1218 v[i*q+j] = vals[i]*T[s+j*p]; 1219 } 1220 } 1221 for (i=0; i<ncolsoaij; i++) { 1222 for (j=0; j<q; j++) { 1223 idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j; 1224 v[(i+ncolsaij)*q+j] = ovals[i]*T[s+j*p]; 1225 } 1226 } 1227 } 1228 if (S) { 1229 for (j=0; j<q; j++) { 1230 idx[c*q+j] = (r+rstart/p)*q+j; 1231 v[c*q+j] += S[s+j*p]; 1232 } 1233 } 1234 } 1235 1236 if (ncols) *ncols = nz; 1237 if (cols) *cols = idx; 1238 if (values) *values = v; 1239 PetscFunctionReturn(0); 1240 } 1241 1242 PetscErrorCode MatRestoreRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1243 { 1244 PetscErrorCode ierr; 1245 PetscFunctionBegin; 1246 ierr = PetscFree2(*idx,*v);CHKERRQ(ierr); 1247 ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE; 1248 PetscFunctionReturn(0); 1249 } 1250 1251 PetscErrorCode MatCreateSubMatrix_KAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat) 1252 { 1253 PetscErrorCode ierr; 1254 Mat A; 1255 1256 PetscFunctionBegin; 1257 ierr = MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr); 1258 ierr = MatCreateSubMatrix(A,isrow,iscol,cll,newmat);CHKERRQ(ierr); 1259 ierr = MatDestroy(&A);CHKERRQ(ierr); 1260 PetscFunctionReturn(0); 1261 } 1262 1263 /* ---------------------------------------------------------------------------------- */ 1264 /*@C 1265 MatCreateKAIJ - Creates a matrix type to be used for matrices of the following form: 1266 1267 [I \otimes S + A \otimes T] 1268 1269 where 1270 S is a dense (p \times q) matrix 1271 T is a dense (p \times q) matrix 1272 A is an AIJ (n \times n) matrix 1273 I is the identity matrix 1274 The resulting matrix is (np \times nq) 1275 1276 S and T are always stored independently on all processes as PetscScalar arrays in column-major format. 1277 1278 Collective 1279 1280 Input Parameters: 1281 + A - the AIJ matrix 1282 . p - number of rows in S and T 1283 . q - number of columns in S and T 1284 . S - the S matrix (can be PETSC_NULL), stored as a PetscScalar array (column-major) 1285 - T - the T matrix (can be PETSC_NULL), stored as a PetscScalar array (column-major) 1286 1287 Output Parameter: 1288 . kaij - the new KAIJ matrix 1289 1290 Notes: 1291 This function increases the reference count on the AIJ matrix, so the user is free to destroy the matrix if it is not needed. 1292 Changes to the entries of the AIJ matrix will immediately affect the KAIJ matrix. 1293 1294 Level: advanced 1295 1296 .seealso: MatKAIJSetAIJ(), MatKAIJSetS(), MatKAIJSetT(), MatKAIJGetAIJ(), MatKAIJGetS(), MatKAIJGetT(), MATKAIJ 1297 @*/ 1298 PetscErrorCode MatCreateKAIJ(Mat A,PetscInt p,PetscInt q,const PetscScalar S[],const PetscScalar T[],Mat *kaij) 1299 { 1300 PetscErrorCode ierr; 1301 PetscMPIInt size; 1302 1303 PetscFunctionBegin; 1304 ierr = MatCreate(PetscObjectComm((PetscObject)A),kaij);CHKERRQ(ierr); 1305 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 1306 if (size == 1) { 1307 ierr = MatSetType(*kaij,MATSEQKAIJ);CHKERRQ(ierr); 1308 } else { 1309 ierr = MatSetType(*kaij,MATMPIKAIJ);CHKERRQ(ierr); 1310 } 1311 ierr = MatKAIJSetAIJ(*kaij,A);CHKERRQ(ierr); 1312 ierr = MatKAIJSetS(*kaij,p,q,S);CHKERRQ(ierr); 1313 ierr = MatKAIJSetT(*kaij,p,q,T);CHKERRQ(ierr); 1314 ierr = MatSetUp(*kaij); 1315 PetscFunctionReturn(0); 1316 } 1317 1318 /*MC 1319 MATKAIJ - MATKAIJ = "kaij" - A matrix type to be used to evaluate matrices of the following form: 1320 1321 [I \otimes S + A \otimes T] 1322 1323 where 1324 S is a dense (p \times q) matrix 1325 T is a dense (p \times q) matrix 1326 A is an AIJ (n \times n) matrix 1327 I is the identity matrix 1328 The resulting matrix is (np \times nq) 1329 1330 S and T are always stored independently on all processes as PetscScalar arrays in column-major format. 1331 1332 Level: advanced 1333 1334 .seealso: MatKAIJSetAIJ(), MatKAIJSetS(), MatKAIJSetT(), MatKAIJGetAIJ(), MatKAIJGetS(), MatKAIJGetT(), MatCreateKAIJ() 1335 M*/ 1336 1337 PETSC_EXTERN PetscErrorCode MatCreate_KAIJ(Mat A) 1338 { 1339 PetscErrorCode ierr; 1340 Mat_MPIKAIJ *b; 1341 PetscMPIInt size; 1342 1343 PetscFunctionBegin; 1344 ierr = PetscNewLog(A,&b);CHKERRQ(ierr); 1345 A->data = (void*)b; 1346 1347 ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1348 1349 A->ops->setup = MatSetUp_KAIJ; 1350 1351 b->w = 0; 1352 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 1353 if (size == 1) { 1354 ierr = PetscObjectChangeTypeName((PetscObject)A,MATSEQKAIJ);CHKERRQ(ierr); 1355 A->ops->setup = MatSetUp_KAIJ; 1356 A->ops->destroy = MatDestroy_SeqKAIJ; 1357 A->ops->view = MatView_KAIJ; 1358 A->ops->mult = MatMult_SeqKAIJ; 1359 A->ops->multadd = MatMultAdd_SeqKAIJ; 1360 A->ops->invertblockdiagonal = MatInvertBlockDiagonal_SeqKAIJ; 1361 A->ops->getrow = MatGetRow_SeqKAIJ; 1362 A->ops->restorerow = MatRestoreRow_SeqKAIJ; 1363 A->ops->sor = MatSOR_SeqKAIJ; 1364 } else { 1365 ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIKAIJ);CHKERRQ(ierr); 1366 A->ops->setup = MatSetUp_KAIJ; 1367 A->ops->destroy = MatDestroy_MPIKAIJ; 1368 A->ops->view = MatView_KAIJ; 1369 A->ops->mult = MatMult_MPIKAIJ; 1370 A->ops->multadd = MatMultAdd_MPIKAIJ; 1371 A->ops->invertblockdiagonal = MatInvertBlockDiagonal_MPIKAIJ; 1372 A->ops->getrow = MatGetRow_MPIKAIJ; 1373 A->ops->restorerow = MatRestoreRow_MPIKAIJ; 1374 ierr = PetscObjectComposeFunction((PetscObject)A,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIKAIJ);CHKERRQ(ierr); 1375 } 1376 A->ops->createsubmatrix = MatCreateSubMatrix_KAIJ; 1377 PetscFunctionReturn(0); 1378 } 1379