1 2 /* 3 Defines the basic matrix operations for the KAIJ matrix storage format. 4 This format is used to evaluate matrices of the form: 5 6 [I \otimes S + A \otimes T] 7 8 where 9 S is a dense (p \times q) matrix 10 T is a dense (p \times q) matrix 11 A is an AIJ (n \times n) matrix 12 I is the identity matrix 13 14 The resulting matrix is (np \times nq) 15 16 We provide: 17 MatMult() 18 MatMultAdd() 19 MatInvertBlockDiagonal() 20 and 21 MatCreateKAIJ(Mat,PetscInt,PetscInt,const PetscScalar[],const PetscScalar[],Mat*) 22 23 This single directory handles both the sequential and parallel codes 24 */ 25 26 #include <../src/mat/impls/kaij/kaij.h> /*I "petscmat.h" I*/ 27 #include <../src/mat/utils/freespace.h> 28 #include <petsc/private/vecimpl.h> 29 30 /*@C 31 MatKAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the KAIJ matrix 32 33 Not Collective, but if the KAIJ matrix is parallel, the AIJ matrix is also parallel 34 35 Input Parameter: 36 . A - the KAIJ matrix 37 38 Output Parameter: 39 . B - the AIJ matrix 40 41 Level: advanced 42 43 Notes: The reference count on the AIJ matrix is not increased so you should not destroy it. 44 45 .seealso: MatCreateKAIJ() 46 @*/ 47 PetscErrorCode MatKAIJGetAIJ(Mat A,Mat *B) 48 { 49 PetscErrorCode ierr; 50 PetscBool ismpikaij,isseqkaij; 51 52 PetscFunctionBegin; 53 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIKAIJ,&ismpikaij);CHKERRQ(ierr); 54 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQKAIJ,&isseqkaij);CHKERRQ(ierr); 55 if (ismpikaij) { 56 Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data; 57 58 *B = b->A; 59 } else if (isseqkaij) { 60 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 61 62 *B = b->AIJ; 63 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix passed in is not of type KAIJ"); 64 PetscFunctionReturn(0); 65 } 66 67 /*@C 68 MatKAIJGetS - Get the S matrix describing the shift action of the KAIJ matrix 69 70 Not Collective; the entire S is stored and returned independently on all processes. 71 72 Input Parameter: 73 . A - the KAIJ matrix 74 75 Output Parameters: 76 + m - the number of rows in S 77 . n - the number of columns in S 78 - S - the S matrix, in form of a scalar array in column-major format 79 80 Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired) 81 82 Level: advanced 83 84 .seealso: MatCreateKAIJ(), MatGetBlockSizes() 85 @*/ 86 PetscErrorCode MatKAIJGetS(Mat A,PetscInt *m,PetscInt *n,PetscScalar **S) 87 { 88 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 89 PetscFunctionBegin; 90 if (m) *m = b->p; 91 if (n) *n = b->q; 92 if (S) *S = b->S; 93 PetscFunctionReturn(0); 94 } 95 96 /*@C 97 MatKAIJGetSRead - Get a read-only pointer to the S matrix describing the shift action of the KAIJ matrix 98 99 Not Collective; the entire S is stored and returned independently on all processes. 100 101 Input Parameter: 102 . A - the KAIJ matrix 103 104 Output Parameters: 105 + m - the number of rows in S 106 . n - the number of columns in S 107 - S - the S matrix, in form of a scalar array in column-major format 108 109 Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired) 110 111 Level: advanced 112 113 .seealso: MatCreateKAIJ(), MatGetBlockSizes() 114 @*/ 115 PetscErrorCode MatKAIJGetSRead(Mat A,PetscInt *m,PetscInt *n,const PetscScalar **S) 116 { 117 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 118 PetscFunctionBegin; 119 if (m) *m = b->p; 120 if (n) *n = b->q; 121 if (S) *S = b->S; 122 PetscFunctionReturn(0); 123 } 124 125 /*@C 126 MatKAIJRestoreS - Restore array obtained with MatKAIJGetS() 127 128 Not collective 129 130 Input Parameter: 131 . A - the KAIJ matrix 132 133 Output Parameter: 134 . S - location of pointer to array obtained with MatKAIJGetS() 135 136 Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored. 137 If NULL is passed, it will not attempt to zero the array pointer. 138 139 Level: advanced 140 .seealso: MatKAIJGetS(), MatKAIJGetSRead(), MatKAIJRestoreSRead() 141 @*/ 142 PetscErrorCode MatKAIJRestoreS(Mat A,PetscScalar **S) 143 { 144 PetscFunctionBegin; 145 if (S) *S = NULL; 146 PetscObjectStateIncrease((PetscObject)A); 147 PetscFunctionReturn(0); 148 } 149 150 /*@C 151 MatKAIJRestoreSRead - Restore array obtained with MatKAIJGetSRead() 152 153 Not collective 154 155 Input Parameter: 156 . A - the KAIJ matrix 157 158 Output Parameter: 159 . S - location of pointer to array obtained with MatKAIJGetS() 160 161 Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored. 162 If NULL is passed, it will not attempt to zero the array pointer. 163 164 Level: advanced 165 .seealso: MatKAIJGetS(), MatKAIJGetSRead(), MatKAIJRestoreSRead() 166 @*/ 167 PetscErrorCode MatKAIJRestoreSRead(Mat A,const PetscScalar **S) 168 { 169 PetscFunctionBegin; 170 if (S) *S = NULL; 171 PetscFunctionReturn(0); 172 } 173 174 /*@C 175 MatKAIJGetT - Get the transformation matrix T associated with the KAIJ matrix 176 177 Not Collective; the entire T is stored and returned independently on all processes 178 179 Input Parameter: 180 . A - the KAIJ matrix 181 182 Output Parameter: 183 + m - the number of rows in T 184 . n - the number of columns in T 185 - T - the T matrix, in form of a scalar array in column-major format 186 187 Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired) 188 189 Level: advanced 190 191 .seealso: MatCreateKAIJ(), MatGetBlockSizes() 192 @*/ 193 PetscErrorCode MatKAIJGetT(Mat A,PetscInt *m,PetscInt *n,PetscScalar **T) 194 { 195 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 196 PetscFunctionBegin; 197 if (m) *m = b->p; 198 if (n) *n = b->q; 199 if (T) *T = b->T; 200 PetscFunctionReturn(0); 201 } 202 203 /*@C 204 MatKAIJGetTRead - Get a read-only pointer to the transformation matrix T associated with the KAIJ matrix 205 206 Not Collective; the entire T is stored and returned independently on all processes 207 208 Input Parameter: 209 . A - the KAIJ matrix 210 211 Output Parameter: 212 + m - the number of rows in T 213 . n - the number of columns in T 214 - T - the T matrix, in form of a scalar array in column-major format 215 216 Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired) 217 218 Level: advanced 219 220 .seealso: MatCreateKAIJ(), MatGetBlockSizes() 221 @*/ 222 PetscErrorCode MatKAIJGetTRead(Mat A,PetscInt *m,PetscInt *n,const PetscScalar **T) 223 { 224 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 225 PetscFunctionBegin; 226 if (m) *m = b->p; 227 if (n) *n = b->q; 228 if (T) *T = b->T; 229 PetscFunctionReturn(0); 230 } 231 232 /*@C 233 MatKAIJRestoreT - Restore array obtained with MatKAIJGetT() 234 235 Not collective 236 237 Input Parameter: 238 . A - the KAIJ matrix 239 240 Output Parameter: 241 . T - location of pointer to array obtained with MatKAIJGetS() 242 243 Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored. 244 If NULL is passed, it will not attempt to zero the array pointer. 245 246 Level: advanced 247 .seealso: MatKAIJGetT(), MatKAIJGetTRead(), MatKAIJRestoreTRead() 248 @*/ 249 PetscErrorCode MatKAIJRestoreT(Mat A,PetscScalar **T) 250 { 251 PetscFunctionBegin; 252 if (T) *T = NULL; 253 PetscObjectStateIncrease((PetscObject)A); 254 PetscFunctionReturn(0); 255 } 256 257 /*@C 258 MatKAIJRestoreTRead - Restore array obtained with MatKAIJGetTRead() 259 260 Not collective 261 262 Input Parameter: 263 . A - the KAIJ matrix 264 265 Output Parameter: 266 . T - location of pointer to array obtained with MatKAIJGetS() 267 268 Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored. 269 If NULL is passed, it will not attempt to zero the array pointer. 270 271 Level: advanced 272 .seealso: MatKAIJGetT(), MatKAIJGetTRead(), MatKAIJRestoreTRead() 273 @*/ 274 PetscErrorCode MatKAIJRestoreTRead(Mat A,const PetscScalar **T) 275 { 276 PetscFunctionBegin; 277 if (T) *T = NULL; 278 PetscFunctionReturn(0); 279 } 280 281 /*@ 282 MatKAIJSetAIJ - Set the AIJ matrix describing the blockwise action of the KAIJ matrix 283 284 Logically Collective; if the AIJ matrix is parallel, the KAIJ matrix is also parallel 285 286 Input Parameters: 287 + A - the KAIJ matrix 288 - B - the AIJ matrix 289 290 Notes: 291 This function increases the reference count on the AIJ matrix, so the user is free to destroy the matrix if it is not needed. 292 Changes to the entries of the AIJ matrix will immediately affect the KAIJ matrix. 293 294 Level: advanced 295 296 .seealso: MatKAIJGetAIJ(), MatKAIJSetS(), MatKAIJSetT() 297 @*/ 298 PetscErrorCode MatKAIJSetAIJ(Mat A,Mat B) 299 { 300 PetscErrorCode ierr; 301 PetscMPIInt size; 302 303 PetscFunctionBegin; 304 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 305 if (size == 1) { 306 Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data; 307 a->AIJ = B; 308 } else { 309 Mat_MPIKAIJ *a = (Mat_MPIKAIJ*)A->data; 310 a->A = B; 311 } 312 ierr = PetscObjectReference((PetscObject)B);CHKERRQ(ierr); 313 PetscFunctionReturn(0); 314 } 315 316 /*@C 317 MatKAIJSetS - Set the S matrix describing the shift action of the KAIJ matrix 318 319 Logically Collective; the entire S is stored independently on all processes. 320 321 Input Parameters: 322 + A - the KAIJ matrix 323 . p - the number of rows in S 324 . q - the number of columns in S 325 - S - the S matrix, in form of a scalar array in column-major format 326 327 Notes: The dimensions p and q must match those of the transformation matrix T associated with the KAIJ matrix. 328 The S matrix is copied, so the user can destroy this array. 329 330 Level: Advanced 331 332 .seealso: MatKAIJGetS(), MatKAIJSetT(), MatKAIJSetAIJ() 333 @*/ 334 PetscErrorCode MatKAIJSetS(Mat A,PetscInt p,PetscInt q,const PetscScalar S[]) 335 { 336 PetscErrorCode ierr; 337 Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data; 338 339 PetscFunctionBegin; 340 ierr = PetscFree(a->S);CHKERRQ(ierr); 341 if (S) { 342 ierr = PetscMalloc1(p*q*sizeof(PetscScalar),&a->S);CHKERRQ(ierr); 343 ierr = PetscMemcpy(a->S,S,p*q*sizeof(PetscScalar));CHKERRQ(ierr); 344 } else a->S = NULL; 345 346 a->p = p; 347 a->q = q; 348 PetscFunctionReturn(0); 349 } 350 351 /*@C 352 MatKAIJSetT - Set the transformation matrix T associated with the KAIJ matrix 353 354 Logically Collective; the entire T is stored independently on all processes. 355 356 Input Parameters: 357 + A - the KAIJ matrix 358 . p - the number of rows in S 359 . q - the number of columns in S 360 - T - the T matrix, in form of a scalar array in column-major format 361 362 Notes: The dimensions p and q must match those of the shift matrix S associated with the KAIJ matrix. 363 The T matrix is copied, so the user can destroy this array. 364 365 Level: Advanced 366 367 .seealso: MatKAIJGetT(), MatKAIJSetS(), MatKAIJSetAIJ() 368 @*/ 369 PetscErrorCode MatKAIJSetT(Mat A,PetscInt p,PetscInt q,const PetscScalar T[]) 370 { 371 PetscErrorCode ierr; 372 PetscInt i,j; 373 Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data; 374 PetscBool isTI = PETSC_FALSE; 375 376 PetscFunctionBegin; 377 /* check if T is an identity matrix */ 378 if (T && (p == q)) { 379 isTI = PETSC_TRUE; 380 for (i=0; i<p; i++) { 381 for (j=0; j<q; j++) { 382 if (i == j) { 383 /* diagonal term must be 1 */ 384 if (T[i+j*p] != 1.0) isTI = PETSC_FALSE; 385 } else { 386 /* off-diagonal term must be 0 */ 387 if (T[i+j*p] != 0.0) isTI = PETSC_FALSE; 388 } 389 } 390 } 391 } 392 a->isTI = isTI; 393 394 ierr = PetscFree(a->T);CHKERRQ(ierr); 395 if (T && (!isTI)) { 396 ierr = PetscMalloc1(p*q*sizeof(PetscScalar),&a->T);CHKERRQ(ierr); 397 ierr = PetscMemcpy(a->T,T,p*q*sizeof(PetscScalar));CHKERRQ(ierr); 398 } else a->T = NULL; 399 400 a->p = p; 401 a->q = q; 402 PetscFunctionReturn(0); 403 } 404 405 PetscErrorCode MatDestroy_SeqKAIJ(Mat A) 406 { 407 PetscErrorCode ierr; 408 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 409 410 PetscFunctionBegin; 411 ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr); 412 ierr = PetscFree(b->S);CHKERRQ(ierr); 413 ierr = PetscFree(b->T);CHKERRQ(ierr); 414 ierr = PetscFree(b->ibdiag);CHKERRQ(ierr); 415 ierr = PetscFree5(b->sor.w,b->sor.y,b->sor.work,b->sor.t,b->sor.arr);CHKERRQ(ierr); 416 ierr = PetscFree(A->data);CHKERRQ(ierr); 417 PetscFunctionReturn(0); 418 } 419 420 PetscErrorCode MatSetUp_KAIJ(Mat A) 421 { 422 PetscErrorCode ierr; 423 PetscInt n; 424 PetscMPIInt size; 425 Mat_SeqKAIJ *seqkaij = (Mat_SeqKAIJ*)A->data; 426 427 PetscFunctionBegin; 428 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 429 if (size == 1) { 430 ierr = MatSetSizes(A,seqkaij->p*seqkaij->AIJ->rmap->n,seqkaij->q*seqkaij->AIJ->cmap->n,seqkaij->p*seqkaij->AIJ->rmap->N,seqkaij->q*seqkaij->AIJ->cmap->N);CHKERRQ(ierr); 431 ierr = PetscLayoutSetBlockSize(A->rmap,seqkaij->p);CHKERRQ(ierr); 432 ierr = PetscLayoutSetBlockSize(A->cmap,seqkaij->q);CHKERRQ(ierr); 433 ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr); 434 ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr); 435 } else { 436 Mat_MPIKAIJ *a; 437 Mat_MPIAIJ *mpiaij; 438 IS from,to; 439 Vec gvec; 440 PetscScalar *T; 441 PetscInt i,j; 442 443 a = (Mat_MPIKAIJ*)A->data; 444 mpiaij = (Mat_MPIAIJ*)a->A->data; 445 ierr = MatSetSizes(A,a->p*a->A->rmap->n,a->q*a->A->cmap->n,a->p*a->A->rmap->N,a->q*a->A->cmap->N);CHKERRQ(ierr); 446 ierr = PetscLayoutSetBlockSize(A->rmap,seqkaij->p);CHKERRQ(ierr); 447 ierr = PetscLayoutSetBlockSize(A->cmap,seqkaij->q);CHKERRQ(ierr); 448 ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr); 449 ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr); 450 451 if (a->isTI) { 452 /* If the transformation matrix associated with the parallel matrix A is the identity matrix, then a->T will be NULL. 453 * In this case, if we pass a->T directly to the MatCreateKAIJ() calls to create the sequential submatrices, the routine will 454 * not be able to tell that transformation matrix should be set to the identity; thus we create a temporary identity matrix 455 * to pass in. */ 456 ierr = PetscMalloc1(a->p*a->q*sizeof(PetscScalar),&T);CHKERRQ(ierr); 457 for (i=0; i<a->p; i++) { 458 for (j=0; j<a->q; j++) { 459 if (i==j) T[i+j*a->p] = 1.0; 460 else T[i+j*a->p] = 0.0; 461 } 462 } 463 } else T = a->T; 464 ierr = MatCreateKAIJ(mpiaij->A,a->p,a->q,a->S,T,&a->AIJ);CHKERRQ(ierr); 465 ierr = MatCreateKAIJ(mpiaij->B,a->p,a->q,NULL,T,&a->OAIJ);CHKERRQ(ierr); 466 ierr = PetscFree(T);CHKERRQ(ierr); 467 468 ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr); 469 ierr = VecCreate(PETSC_COMM_SELF,&a->w);CHKERRQ(ierr); 470 ierr = VecSetSizes(a->w,n*a->q,n*a->q);CHKERRQ(ierr); 471 ierr = VecSetBlockSize(a->w,a->q);CHKERRQ(ierr); 472 ierr = VecSetType(a->w,VECSEQ);CHKERRQ(ierr); 473 474 /* create two temporary Index sets for build scatter gather */ 475 ierr = ISCreateBlock(PetscObjectComm((PetscObject)a->A),a->q,n,mpiaij->garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); 476 ierr = ISCreateStride(PETSC_COMM_SELF,n*a->q,0,1,&to);CHKERRQ(ierr); 477 478 /* create temporary global vector to generate scatter context */ 479 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)a->A),a->q,a->q*a->A->cmap->n,a->q*a->A->cmap->N,NULL,&gvec);CHKERRQ(ierr); 480 481 /* generate the scatter context */ 482 ierr = VecScatterCreate(gvec,from,a->w,to,&a->ctx);CHKERRQ(ierr); 483 484 ierr = ISDestroy(&from);CHKERRQ(ierr); 485 ierr = ISDestroy(&to);CHKERRQ(ierr); 486 ierr = VecDestroy(&gvec);CHKERRQ(ierr); 487 } 488 489 A->assembled = PETSC_TRUE; 490 PetscFunctionReturn(0); 491 } 492 493 PetscErrorCode MatView_SeqKAIJ(Mat A,PetscViewer viewer) 494 { 495 PetscErrorCode ierr; 496 Mat B; 497 498 PetscFunctionBegin; 499 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 500 ierr = MatView(B,viewer);CHKERRQ(ierr); 501 ierr = MatDestroy(&B);CHKERRQ(ierr); 502 PetscFunctionReturn(0); 503 } 504 505 PetscErrorCode MatView_MPIKAIJ(Mat A,PetscViewer viewer) 506 { 507 PetscErrorCode ierr; 508 Mat B; 509 510 PetscFunctionBegin; 511 ierr = MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 512 ierr = MatView(B,viewer);CHKERRQ(ierr); 513 ierr = MatDestroy(&B);CHKERRQ(ierr); 514 PetscFunctionReturn(0); 515 } 516 517 PetscErrorCode MatDestroy_MPIKAIJ(Mat A) 518 { 519 PetscErrorCode ierr; 520 Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data; 521 522 PetscFunctionBegin; 523 ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr); 524 ierr = MatDestroy(&b->OAIJ);CHKERRQ(ierr); 525 ierr = MatDestroy(&b->A);CHKERRQ(ierr); 526 ierr = VecScatterDestroy(&b->ctx);CHKERRQ(ierr); 527 ierr = VecDestroy(&b->w);CHKERRQ(ierr); 528 ierr = PetscFree(b->S);CHKERRQ(ierr); 529 ierr = PetscFree(b->T);CHKERRQ(ierr); 530 ierr = PetscFree(b->ibdiag);CHKERRQ(ierr); 531 ierr = PetscFree(A->data);CHKERRQ(ierr); 532 PetscFunctionReturn(0); 533 } 534 535 /* --------------------------------------------------------------------------------------*/ 536 537 /* zz = yy + Axx */ 538 PetscErrorCode MatMultAdd_SeqKAIJ(Mat A,Vec xx,Vec yy,Vec zz) 539 { 540 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 541 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 542 const PetscScalar *s = b->S, *t = b->T; 543 const PetscScalar *x,*v,*bx; 544 PetscScalar *y,*sums; 545 PetscErrorCode ierr; 546 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 547 PetscInt n,i,jrow,j,l,p=b->p,q=b->q,k; 548 549 PetscFunctionBegin; 550 if (!yy) { 551 ierr = VecSet(zz,0.0);CHKERRQ(ierr); 552 } else { 553 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 554 } 555 if ((!s) && (!t) && (!b->isTI)) PetscFunctionReturn(0); 556 557 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 558 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 559 idx = a->j; 560 v = a->a; 561 ii = a->i; 562 563 if (b->isTI) { 564 for (i=0; i<m; i++) { 565 jrow = ii[i]; 566 n = ii[i+1] - jrow; 567 sums = y + p*i; 568 for (j=0; j<n; j++) { 569 for (k=0; k<p; k++) { 570 sums[k] += v[jrow+j]*x[q*idx[jrow+j]+k]; 571 } 572 } 573 } 574 } else if (t) { 575 for (i=0; i<m; i++) { 576 jrow = ii[i]; 577 n = ii[i+1] - jrow; 578 sums = y + p*i; 579 bx = x + q*i; 580 for (j=0; j<n; j++) { 581 for (k=0; k<p; k++) { 582 for (l=0; l<q; l++) { 583 sums[k] += v[jrow+j]*t[k+l*p]*x[q*idx[jrow+j]+l]; 584 } 585 } 586 } 587 } 588 } 589 if (s) { 590 for (i=0; i<m; i++) { 591 sums = y + p*i; 592 bx = x + q*i; 593 if (i < b->AIJ->cmap->n) { 594 for (j=0; j<q; j++) { 595 for (k=0; k<p; k++) { 596 sums[k] += s[k+j*p]*bx[j]; 597 } 598 } 599 } 600 } 601 } 602 603 ierr = PetscLogFlops((2.0*p*q-p)*m+2*p*a->nz);CHKERRQ(ierr); 604 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 605 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 606 PetscFunctionReturn(0); 607 } 608 609 PetscErrorCode MatMult_SeqKAIJ(Mat A,Vec xx,Vec yy) 610 { 611 PetscErrorCode ierr; 612 PetscFunctionBegin; 613 ierr = MatMultAdd_SeqKAIJ(A,xx,PETSC_NULL,yy);CHKERRQ(ierr); 614 PetscFunctionReturn(0); 615 } 616 617 #include <petsc/private/kernels/blockinvert.h> 618 619 PetscErrorCode MatInvertBlockDiagonal_SeqKAIJ(Mat A,const PetscScalar **values) 620 { 621 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 622 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 623 const PetscScalar *S = b->S; 624 const PetscScalar *T = b->T; 625 const PetscScalar *v = a->a; 626 const PetscInt p = b->p, q = b->q, m = b->AIJ->rmap->n, *idx = a->j, *ii = a->i; 627 PetscErrorCode ierr; 628 PetscInt i,j,*v_pivots,dof,dof2; 629 PetscScalar *diag,aval,*v_work; 630 631 PetscFunctionBegin; 632 if (p != q) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Block size must be square to calculate inverse."); 633 if ((!S) && (!T) && (!b->isTI)) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Cannot invert a zero matrix."); 634 635 dof = p; 636 dof2 = dof*dof; 637 638 if (b->ibdiagvalid) { 639 if (values) *values = b->ibdiag; 640 PetscFunctionReturn(0); 641 } 642 if (!b->ibdiag) { 643 ierr = PetscMalloc1(dof2*m*sizeof(PetscScalar),&b->ibdiag);CHKERRQ(ierr); 644 ierr = PetscLogObjectMemory((PetscObject)A,dof2*m*sizeof(PetscScalar));CHKERRQ(ierr); 645 } 646 if (values) *values = b->ibdiag; 647 diag = b->ibdiag; 648 649 ierr = PetscMalloc2(dof,&v_work,dof,&v_pivots);CHKERRQ(ierr); 650 for (i=0; i<m; i++) { 651 if (S) { 652 ierr = PetscMemcpy(diag,S,dof2*sizeof(PetscScalar));CHKERRQ(ierr); 653 } else { 654 ierr = PetscMemzero(diag,dof2*sizeof(PetscScalar));CHKERRQ(ierr); 655 } 656 if (b->isTI) { 657 aval = 0; 658 for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j]; 659 for (j=0; j<dof; j++) diag[j+dof*j] += aval; 660 } else if (T) { 661 aval = 0; 662 for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j]; 663 for (j=0; j<dof2; j++) diag[j] += aval*T[j]; 664 } 665 ierr = PetscKernel_A_gets_inverse_A(dof,diag,v_pivots,v_work,PETSC_FALSE,NULL);CHKERRQ(ierr); 666 diag += dof2; 667 } 668 ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr); 669 670 b->ibdiagvalid = PETSC_TRUE; 671 PetscFunctionReturn(0); 672 } 673 674 static PetscErrorCode MatGetDiagonalBlock_MPIKAIJ(Mat A,Mat *B) 675 { 676 Mat_MPIKAIJ *kaij = (Mat_MPIKAIJ*) A->data; 677 678 PetscFunctionBegin; 679 *B = kaij->AIJ; 680 PetscFunctionReturn(0); 681 } 682 683 PetscErrorCode MatSOR_SeqKAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 684 { 685 PetscErrorCode ierr; 686 Mat_SeqKAIJ *kaij = (Mat_SeqKAIJ*) A->data; 687 Mat_SeqAIJ *a = (Mat_SeqAIJ*)kaij->AIJ->data; 688 const PetscScalar *aa = a->a, *T = kaij->T, *v; 689 const PetscInt m = kaij->AIJ->rmap->n, *ai=a->i, *aj=a->j, p = kaij->p, q = kaij->q, *diag, *vi; 690 const PetscScalar *b, *xb, *idiag; 691 PetscScalar *x, *work, *workt, *w, *y, *arr, *t, *arrt; 692 PetscInt i, j, k, i2, bs, bs2, nz; 693 694 PetscFunctionBegin; 695 its = its*lits; 696 if (flag & SOR_EISENSTAT) SETERRQ (PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 697 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 698 if (fshift) SETERRQ (PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 699 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 700 if (p != q) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSOR for KAIJ: Sorry, no support for non-square dense blocks"); 701 else {bs = p; bs2 = bs*bs; } 702 703 if (!m) PetscFunctionReturn(0); 704 705 if (!kaij->ibdiagvalid) { ierr = MatInvertBlockDiagonal_SeqKAIJ(A,NULL);CHKERRQ(ierr); } 706 idiag = kaij->ibdiag; 707 diag = a->diag; 708 709 if (!kaij->sor.setup) { 710 ierr = PetscMalloc5(bs,&kaij->sor.w,bs,&kaij->sor.y,m*bs,&kaij->sor.work,m*bs,&kaij->sor.t,m*bs2,&kaij->sor.arr);CHKERRQ(ierr); 711 kaij->sor.setup = PETSC_TRUE; 712 } 713 y = kaij->sor.y; 714 w = kaij->sor.w; 715 work = kaij->sor.work; 716 t = kaij->sor.t; 717 arr = kaij->sor.arr; 718 719 ierr = VecGetArray(xx,&x); CHKERRQ(ierr); 720 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 721 722 if (flag & SOR_ZERO_INITIAL_GUESS) { 723 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 724 PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x); /* x[0:bs] <- D^{-1} b[0:bs] */ 725 ierr = PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr); 726 i2 = bs; 727 idiag += bs2; 728 for (i=1; i<m; i++) { 729 v = aa + ai[i]; 730 vi = aj + ai[i]; 731 nz = diag[i] - ai[i]; 732 733 if (T) { /* b - T (Arow * x) */ 734 for (k=0; k<bs; k++) w[k] = 0; 735 for (j=0; j<nz; j++) { 736 for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k]; 737 } 738 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs,w,T,&t[i2]); 739 for (k=0; k<bs; k++) t[i2+k] += b[i2+k]; 740 } else if (kaij->isTI) { 741 for (k=0; k<bs; k++) t[i2+k] = b[i2+k]; 742 for (j=0; j<nz; j++) { 743 for (k=0; k<bs; k++) t[i2+k] -= v[j] * x[vi[j]*bs+k]; 744 } 745 } else { 746 for (k=0; k<bs; k++) t[i2+k] = b[i2+k]; 747 } 748 749 PetscKernel_w_gets_Ar_times_v(bs,bs,t+i2,idiag,y); 750 for (j=0; j<bs; j++) x[i2+j] = omega * y[j]; 751 752 idiag += bs2; 753 i2 += bs; 754 } 755 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 756 ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr); 757 xb = t; 758 } else xb = b; 759 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 760 idiag = kaij->ibdiag+bs2*(m-1); 761 i2 = bs * (m-1); 762 ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 763 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 764 i2 -= bs; 765 idiag -= bs2; 766 for (i=m-2; i>=0; i--) { 767 v = aa + diag[i] + 1 ; 768 vi = aj + diag[i] + 1; 769 nz = ai[i+1] - diag[i] - 1; 770 771 if (T) { /* FIXME: This branch untested */ 772 ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 773 /* copy all rows of x that are needed into contiguous space */ 774 workt = work; 775 for (j=0; j<nz; j++) { 776 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 777 workt += bs; 778 } 779 arrt = arr; 780 for (j=0; j<nz; j++) { 781 ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 782 for (k=0; k<bs2; k++) arrt[k] *= v[j]; 783 arrt += bs2; 784 } 785 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 786 } else if (kaij->isTI) { 787 for (k=0; k<bs; k++) w[k] = t[i2+k]; 788 for (j=0; j<nz; j++) { 789 for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k]; 790 } 791 } 792 793 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); /* RHS incorrect for omega != 1.0 */ 794 for (j=0; j<bs; j++) x[i2+j] = (1.0-omega) * x[i2+j] + omega * y[j]; 795 796 idiag -= bs2; 797 i2 -= bs; 798 } 799 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 800 } 801 its--; 802 } 803 while (its--) { /* FIXME: This branch not updated */ 804 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 805 i2 = 0; 806 idiag = kaij->ibdiag; 807 for (i=0; i<m; i++) { 808 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 809 810 v = aa + ai[i]; 811 vi = aj + ai[i]; 812 nz = diag[i] - ai[i]; 813 workt = work; 814 for (j=0; j<nz; j++) { 815 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 816 workt += bs; 817 } 818 arrt = arr; 819 if (T) { 820 for (j=0; j<nz; j++) { 821 ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 822 for (k=0; k<bs2; k++) arrt[k] *= v[j]; 823 arrt += bs2; 824 } 825 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 826 } else if (kaij->isTI) { 827 for (j=0; j<nz; j++) { 828 ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 829 for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 830 arrt += bs2; 831 } 832 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 833 } 834 ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr); 835 836 v = aa + diag[i] + 1; 837 vi = aj + diag[i] + 1; 838 nz = ai[i+1] - diag[i] - 1; 839 workt = work; 840 for (j=0; j<nz; j++) { 841 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 842 workt += bs; 843 } 844 arrt = arr; 845 if (T) { 846 for (j=0; j<nz; j++) { 847 ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 848 for (k=0; k<bs2; k++) arrt[k] *= v[j]; 849 arrt += bs2; 850 } 851 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 852 } else if (kaij->isTI) { 853 for (j=0; j<nz; j++) { 854 ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 855 for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 856 arrt += bs2; 857 } 858 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 859 } 860 861 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); 862 for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j); 863 864 idiag += bs2; 865 i2 += bs; 866 } 867 xb = t; 868 } 869 else xb = b; 870 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 871 idiag = kaij->ibdiag+bs2*(m-1); 872 i2 = bs * (m-1); 873 if (xb == b) { 874 for (i=m-1; i>=0; i--) { 875 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 876 877 v = aa + ai[i]; 878 vi = aj + ai[i]; 879 nz = diag[i] - ai[i]; 880 workt = work; 881 for (j=0; j<nz; j++) { 882 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 883 workt += bs; 884 } 885 arrt = arr; 886 if (T) { 887 for (j=0; j<nz; j++) { 888 ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 889 for (k=0; k<bs2; k++) arrt[k] *= v[j]; 890 arrt += bs2; 891 } 892 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 893 } else if (kaij->isTI) { 894 for (j=0; j<nz; j++) { 895 ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 896 for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 897 arrt += bs2; 898 } 899 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 900 } 901 902 v = aa + diag[i] + 1; 903 vi = aj + diag[i] + 1; 904 nz = ai[i+1] - diag[i] - 1; 905 workt = work; 906 for (j=0; j<nz; j++) { 907 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 908 workt += bs; 909 } 910 arrt = arr; 911 if (T) { 912 for (j=0; j<nz; j++) { 913 ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 914 for (k=0; k<bs2; k++) arrt[k] *= v[j]; 915 arrt += bs2; 916 } 917 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 918 } else if (kaij->isTI) { 919 for (j=0; j<nz; j++) { 920 ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 921 for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 922 arrt += bs2; 923 } 924 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 925 } 926 927 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); 928 for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j); 929 } 930 } else { 931 for (i=m-1; i>=0; i--) { 932 ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 933 v = aa + diag[i] + 1; 934 vi = aj + diag[i] + 1; 935 nz = ai[i+1] - diag[i] - 1; 936 workt = work; 937 for (j=0; j<nz; j++) { 938 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 939 workt += bs; 940 } 941 arrt = arr; 942 if (T) { 943 for (j=0; j<nz; j++) { 944 ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 945 for (k=0; k<bs2; k++) arrt[k] *= v[j]; 946 arrt += bs2; 947 } 948 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 949 } else if (kaij->isTI) { 950 for (j=0; j<nz; j++) { 951 ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 952 for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 953 arrt += bs2; 954 } 955 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 956 } 957 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); 958 for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j); 959 } 960 idiag -= bs2; 961 i2 -= bs; 962 } 963 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 964 } 965 } 966 967 ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr); 968 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 969 PetscFunctionReturn(0); 970 } 971 972 /*===================================================================================*/ 973 974 PetscErrorCode MatMultAdd_MPIKAIJ(Mat A,Vec xx,Vec yy,Vec zz) 975 { 976 Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data; 977 PetscErrorCode ierr; 978 979 PetscFunctionBegin; 980 if (!yy) { 981 ierr = VecSet(zz,0.0);CHKERRQ(ierr); 982 } else { 983 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 984 } 985 /* start the scatter */ 986 ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 987 ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,zz,zz);CHKERRQ(ierr); 988 ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 989 ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr); 990 PetscFunctionReturn(0); 991 } 992 993 PetscErrorCode MatMult_MPIKAIJ(Mat A,Vec xx,Vec yy) 994 { 995 PetscErrorCode ierr; 996 PetscFunctionBegin; 997 ierr = MatMultAdd_MPIKAIJ(A,xx,PETSC_NULL,yy);CHKERRQ(ierr); 998 PetscFunctionReturn(0); 999 } 1000 1001 PetscErrorCode MatInvertBlockDiagonal_MPIKAIJ(Mat A,const PetscScalar **values) 1002 { 1003 Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data; 1004 PetscErrorCode ierr; 1005 1006 PetscFunctionBegin; 1007 ierr = (*b->AIJ->ops->invertblockdiagonal)(b->AIJ,values);CHKERRQ(ierr); 1008 PetscFunctionReturn(0); 1009 } 1010 1011 /* ----------------------------------------------------------------*/ 1012 1013 PetscErrorCode MatGetRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values) 1014 { 1015 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*) A->data; 1016 PetscErrorCode diag = PETSC_FALSE; 1017 PetscErrorCode ierr; 1018 PetscInt nzaij,nz,*colsaij,*idx,i,j,p=b->p,q=b->q,r=row/p,s=row%p,c; 1019 PetscScalar *vaij,*v,*S=b->S,*T=b->T; 1020 1021 PetscFunctionBegin; 1022 if (b->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1023 b->getrowactive = PETSC_TRUE; 1024 if (row < 0 || row >= A->rmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range",row); 1025 1026 if ((!S) && (!T) && (!b->isTI)) { 1027 if (ncols) *ncols = 0; 1028 if (cols) *cols = NULL; 1029 if (values) *values = NULL; 1030 PetscFunctionReturn(0); 1031 } 1032 1033 if (T || b->isTI) { 1034 ierr = MatGetRow_SeqAIJ(b->AIJ,r,&nzaij,&colsaij,&vaij);CHKERRQ(ierr); 1035 c = nzaij; 1036 for (i=0; i<nzaij; i++) { 1037 /* check if this row contains a diagonal entry */ 1038 if (colsaij[i] == r) { 1039 diag = PETSC_TRUE; 1040 c = i; 1041 } 1042 } 1043 } else nzaij = c = 0; 1044 1045 /* calculate size of row */ 1046 nz = 0; 1047 if (S) nz += q; 1048 if (T || b->isTI) nz += (diag && S ? (nzaij-1)*q : nzaij*q); 1049 1050 if (cols || values) { 1051 ierr = PetscMalloc2(nz,&idx,nz,&v);CHKERRQ(ierr); 1052 for (i=0; i<q; i++) { 1053 /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */ 1054 v[i] = 0.0; 1055 } 1056 if (b->isTI) { 1057 for (i=0; i<nzaij; i++) { 1058 for (j=0; j<q; j++) { 1059 idx[i*q+j] = colsaij[i]*q+j; 1060 v[i*q+j] = (j==s ? vaij[i] : 0); 1061 } 1062 } 1063 } else if (T) { 1064 for (i=0; i<nzaij; i++) { 1065 for (j=0; j<q; j++) { 1066 idx[i*q+j] = colsaij[i]*q+j; 1067 v[i*q+j] = vaij[i]*T[s+j*p]; 1068 } 1069 } 1070 } 1071 if (S) { 1072 for (j=0; j<q; j++) { 1073 idx[c*q+j] = r*q+j; 1074 v[c*q+j] += S[s+j*p]; 1075 } 1076 } 1077 } 1078 1079 if (ncols) *ncols = nz; 1080 if (cols) *cols = idx; 1081 if (values) *values = v; 1082 PetscFunctionReturn(0); 1083 } 1084 1085 PetscErrorCode MatRestoreRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1086 { 1087 PetscErrorCode ierr; 1088 PetscFunctionBegin; 1089 ierr = PetscFree2(*idx,*v);CHKERRQ(ierr); 1090 ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE; 1091 PetscFunctionReturn(0); 1092 } 1093 1094 PetscErrorCode MatGetRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values) 1095 { 1096 Mat_MPIKAIJ *b = (Mat_MPIKAIJ*) A->data; 1097 Mat MatAIJ = ((Mat_SeqKAIJ*)b->AIJ->data)->AIJ; 1098 Mat MatOAIJ = ((Mat_SeqKAIJ*)b->OAIJ->data)->AIJ; 1099 Mat AIJ = b->A; 1100 PetscBool diag = PETSC_FALSE; 1101 PetscErrorCode ierr; 1102 const PetscInt rstart=A->rmap->rstart,rend=A->rmap->rend,p=b->p,q=b->q,*garray; 1103 PetscInt nz,*idx,ncolsaij,ncolsoaij,*colsaij,*colsoaij,r,s,c,i,j,lrow; 1104 PetscScalar *v,*vals,*ovals,*S=b->S,*T=b->T; 1105 1106 PetscFunctionBegin; 1107 if (b->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1108 b->getrowactive = PETSC_TRUE; 1109 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1110 lrow = row - rstart; 1111 1112 if ((!S) && (!T) && (!b->isTI)) { 1113 if (ncols) *ncols = 0; 1114 if (cols) *cols = NULL; 1115 if (values) *values = NULL; 1116 PetscFunctionReturn(0); 1117 } 1118 1119 r = lrow/p; 1120 s = lrow%p; 1121 1122 if (T || b->isTI) { 1123 ierr = MatMPIAIJGetSeqAIJ(AIJ,NULL,NULL,&garray); 1124 ierr = MatGetRow_SeqAIJ(MatAIJ,lrow/p,&ncolsaij,&colsaij,&vals);CHKERRQ(ierr); 1125 ierr = MatGetRow_SeqAIJ(MatOAIJ,lrow/p,&ncolsoaij,&colsoaij,&ovals);CHKERRQ(ierr); 1126 1127 c = ncolsaij + ncolsoaij; 1128 for (i=0; i<ncolsaij; i++) { 1129 /* check if this row contains a diagonal entry */ 1130 if (colsaij[i] == r) { 1131 diag = PETSC_TRUE; 1132 c = i; 1133 } 1134 } 1135 } else c = 0; 1136 1137 /* calculate size of row */ 1138 nz = 0; 1139 if (S) nz += q; 1140 if (T || b->isTI) nz += (diag && S ? (ncolsaij+ncolsoaij-1)*q : (ncolsaij+ncolsoaij)*q); 1141 1142 if (cols || values) { 1143 ierr = PetscMalloc2(nz,&idx,nz,&v);CHKERRQ(ierr); 1144 for (i=0; i<q; i++) { 1145 /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */ 1146 v[i] = 0.0; 1147 } 1148 if (b->isTI) { 1149 for (i=0; i<ncolsaij; i++) { 1150 for (j=0; j<q; j++) { 1151 idx[i*q+j] = (colsaij[i]+rstart/p)*q+j; 1152 v[i*q+j] = (j==s ? vals[i] : 0.0); 1153 } 1154 } 1155 for (i=0; i<ncolsoaij; i++) { 1156 for (j=0; j<q; j++) { 1157 idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j; 1158 v[(i+ncolsaij)*q+j] = (j==s ? ovals[i]: 0.0); 1159 } 1160 } 1161 } else if (T) { 1162 for (i=0; i<ncolsaij; i++) { 1163 for (j=0; j<q; j++) { 1164 idx[i*q+j] = (colsaij[i]+rstart/p)*q+j; 1165 v[i*q+j] = vals[i]*T[s+j*p]; 1166 } 1167 } 1168 for (i=0; i<ncolsoaij; i++) { 1169 for (j=0; j<q; j++) { 1170 idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j; 1171 v[(i+ncolsaij)*q+j] = ovals[i]*T[s+j*p]; 1172 } 1173 } 1174 } 1175 if (S) { 1176 for (j=0; j<q; j++) { 1177 idx[c*q+j] = (r+rstart/p)*q+j; 1178 v[c*q+j] += S[s+j*p]; 1179 } 1180 } 1181 } 1182 1183 if (ncols) *ncols = nz; 1184 if (cols) *cols = idx; 1185 if (values) *values = v; 1186 PetscFunctionReturn(0); 1187 } 1188 1189 PetscErrorCode MatRestoreRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1190 { 1191 PetscErrorCode ierr; 1192 PetscFunctionBegin; 1193 ierr = PetscFree2(*idx,*v);CHKERRQ(ierr); 1194 ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE; 1195 PetscFunctionReturn(0); 1196 } 1197 1198 PetscErrorCode MatCreateSubMatrix_KAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat) 1199 { 1200 PetscErrorCode ierr; 1201 Mat A; 1202 1203 PetscFunctionBegin; 1204 ierr = MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr); 1205 ierr = MatCreateSubMatrix(A,isrow,iscol,cll,newmat);CHKERRQ(ierr); 1206 ierr = MatDestroy(&A);CHKERRQ(ierr); 1207 PetscFunctionReturn(0); 1208 } 1209 1210 /* ---------------------------------------------------------------------------------- */ 1211 /*@C 1212 MatCreateKAIJ - Creates a matrix type to be used for matrices of the following form: 1213 1214 [I \otimes S + A \otimes T] 1215 1216 where 1217 S is a dense (p \times q) matrix 1218 T is a dense (p \times q) matrix 1219 A is an AIJ (n \times n) matrix 1220 I is the identity matrix 1221 The resulting matrix is (np \times nq) 1222 1223 The matrix type is based on MATSEQAIJ for a sequential matrix A, and MATMPIAIJ for a distributed matrix A. 1224 S is always stored independently on all processes as a PetscScalar array in column-major format. 1225 1226 Collective 1227 1228 Input Parameters: 1229 + A - the AIJ matrix 1230 . S - the S matrix, stored as a PetscScalar array (column-major) 1231 . T - the T matrix, stored as a PetscScalar array (column-major) 1232 . p - number of rows in S and T 1233 - q - number of columns in S and T 1234 1235 Output Parameter: 1236 . kaij - the new KAIJ matrix 1237 1238 Operations provided: 1239 + MatMult 1240 . MatMultAdd 1241 . MatInvertBlockDiagonal 1242 - MatView 1243 1244 Level: advanced 1245 1246 .seealso: MatKAIJSetAIJ(), MatKAIJSetS(), MatKAIJSetT(), MatKAIJGetAIJ(), MatKAIJGetS(), MatKAIJGetT(), MATKAIJ 1247 @*/ 1248 PetscErrorCode MatCreateKAIJ(Mat A,PetscInt p,PetscInt q,const PetscScalar S[],const PetscScalar T[],Mat *kaij) 1249 { 1250 PetscErrorCode ierr; 1251 PetscMPIInt size; 1252 1253 PetscFunctionBegin; 1254 ierr = MatCreate(PetscObjectComm((PetscObject)A),kaij);CHKERRQ(ierr); 1255 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 1256 if (size == 1) { 1257 ierr = MatSetType(*kaij,MATSEQKAIJ);CHKERRQ(ierr); 1258 } else { 1259 ierr = MatSetType(*kaij,MATMPIKAIJ);CHKERRQ(ierr); 1260 } 1261 ierr = MatKAIJSetAIJ(*kaij,A);CHKERRQ(ierr); 1262 ierr = MatKAIJSetS(*kaij,p,q,S);CHKERRQ(ierr); 1263 ierr = MatKAIJSetT(*kaij,p,q,T);CHKERRQ(ierr); 1264 ierr = MatSetUp(*kaij); 1265 PetscFunctionReturn(0); 1266 } 1267 1268 /*MC 1269 MATKAIJ - MATKAIJ = "kaij" - A matrix type to be used to evaluate matrices of the following form: 1270 1271 [I \otimes S + A \otimes T] 1272 1273 where 1274 S is a dense (p \times q) matrix 1275 T is a dense (p \times q) matrix 1276 A is an AIJ (n \times n) matrix 1277 I is the identity matrix 1278 The resulting matrix is (np \times nq) 1279 1280 The matrix type is based on MATSEQAIJ for a sequential matrix A, and MATMPIAIJ for a distributed matrix A. 1281 S and T are always stored independently on all processes as a PetscScalar array in column-major format. 1282 1283 Operations provided: 1284 + MatMult 1285 . MatMultAdd 1286 . MatInvertBlockDiagonal 1287 - MatView 1288 1289 Level: advanced 1290 1291 .seealso: MatKAIJSetAIJ(), MatKAIJSetS(), MatKAIJSetT(), MatKAIJGetAIJ(), MatKAIJGetS(), MatKAIJGetT(), MatCreateKAIJ() 1292 M*/ 1293 1294 PETSC_EXTERN PetscErrorCode MatCreate_KAIJ(Mat A) 1295 { 1296 PetscErrorCode ierr; 1297 Mat_MPIKAIJ *b; 1298 PetscMPIInt size; 1299 1300 PetscFunctionBegin; 1301 ierr = PetscNewLog(A,&b);CHKERRQ(ierr); 1302 A->data = (void*)b; 1303 1304 ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1305 1306 A->ops->setup = MatSetUp_KAIJ; 1307 1308 b->w = 0; 1309 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 1310 if (size == 1) { 1311 ierr = PetscObjectChangeTypeName((PetscObject)A,MATSEQKAIJ);CHKERRQ(ierr); 1312 A->ops->setup = MatSetUp_KAIJ; 1313 A->ops->destroy = MatDestroy_SeqKAIJ; 1314 A->ops->view = MatView_SeqKAIJ; 1315 A->ops->mult = MatMult_SeqKAIJ; 1316 A->ops->multadd = MatMultAdd_SeqKAIJ; 1317 A->ops->invertblockdiagonal = MatInvertBlockDiagonal_SeqKAIJ; 1318 A->ops->getrow = MatGetRow_SeqKAIJ; 1319 A->ops->restorerow = MatRestoreRow_SeqKAIJ; 1320 A->ops->sor = MatSOR_SeqKAIJ; 1321 } else { 1322 ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIKAIJ);CHKERRQ(ierr); 1323 A->ops->setup = MatSetUp_KAIJ; 1324 A->ops->destroy = MatDestroy_MPIKAIJ; 1325 A->ops->view = MatView_MPIKAIJ; 1326 A->ops->mult = MatMult_MPIKAIJ; 1327 A->ops->multadd = MatMultAdd_MPIKAIJ; 1328 A->ops->invertblockdiagonal = MatInvertBlockDiagonal_MPIKAIJ; 1329 A->ops->getrow = MatGetRow_MPIKAIJ; 1330 A->ops->restorerow = MatRestoreRow_MPIKAIJ; 1331 ierr = PetscObjectComposeFunction((PetscObject)A,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIKAIJ);CHKERRQ(ierr); 1332 } 1333 A->ops->createsubmatrix = MatCreateSubMatrix_KAIJ; 1334 PetscFunctionReturn(0); 1335 } 1336