1 2 /* 3 Defines the basic matrix operations for the KAIJ matrix storage format. 4 This format is used to evaluate matrices of the form: 5 6 [I \otimes S + A \otimes T] 7 8 where 9 S is a dense (p \times q) matrix 10 T is a dense (p \times q) matrix 11 A is an AIJ (n \times n) matrix 12 I is the identity matrix 13 14 The resulting matrix is (np \times nq) 15 16 We provide: 17 MatMult() 18 MatMultAdd() 19 MatInvertBlockDiagonal() 20 and 21 MatCreateKAIJ(Mat,PetscInt,PetscInt,const PetscScalar[],const PetscScalar[],Mat*) 22 23 This single directory handles both the sequential and parallel codes 24 */ 25 26 #include <../src/mat/impls/kaij/kaij.h> /*I "petscmat.h" I*/ 27 #include <../src/mat/utils/freespace.h> 28 #include <petsc/private/vecimpl.h> 29 30 /*@C 31 MatKAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the KAIJ matrix 32 33 Not Collective, but if the KAIJ matrix is parallel, the AIJ matrix is also parallel 34 35 Input Parameter: 36 . A - the KAIJ matrix 37 38 Output Parameter: 39 . B - the AIJ matrix 40 41 Level: advanced 42 43 Notes: The reference count on the AIJ matrix is not increased so you should not destroy it. 44 45 .seealso: MatCreateKAIJ() 46 @*/ 47 PetscErrorCode MatKAIJGetAIJ(Mat A,Mat *B) 48 { 49 PetscErrorCode ierr; 50 PetscBool ismpikaij,isseqkaij; 51 52 PetscFunctionBegin; 53 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIKAIJ,&ismpikaij);CHKERRQ(ierr); 54 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQKAIJ,&isseqkaij);CHKERRQ(ierr); 55 if (ismpikaij) { 56 Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data; 57 58 *B = b->A; 59 } else if (isseqkaij) { 60 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 61 62 *B = b->AIJ; 63 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix passed in is not of type KAIJ"); 64 PetscFunctionReturn(0); 65 } 66 67 /*@C 68 MatKAIJGetS - Get the S matrix describing the shift action of the KAIJ matrix 69 70 Not Collective; the entire S is stored and returned independently on all processes. 71 72 Input Parameter: 73 . A - the KAIJ matrix 74 75 Output Parameters: 76 + m - the number of rows in S 77 . n - the number of columns in S 78 - S - the S matrix, in form of a scalar array in column-major format 79 80 Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired) 81 82 Level: advanced 83 84 .seealso: MatCreateKAIJ(), MatGetBlockSizes() 85 @*/ 86 PetscErrorCode MatKAIJGetS(Mat A,PetscInt *m,PetscInt *n,PetscScalar **S) 87 { 88 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 89 PetscFunctionBegin; 90 if (m) *m = b->p; 91 if (n) *n = b->q; 92 if (S) *S = b->S; 93 PetscFunctionReturn(0); 94 } 95 96 /*@C 97 MatKAIJGetSRead - Get a read-only pointer to the S matrix describing the shift action of the KAIJ matrix 98 99 Not Collective; the entire S is stored and returned independently on all processes. 100 101 Input Parameter: 102 . A - the KAIJ matrix 103 104 Output Parameters: 105 + m - the number of rows in S 106 . n - the number of columns in S 107 - S - the S matrix, in form of a scalar array in column-major format 108 109 Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired) 110 111 Level: advanced 112 113 .seealso: MatCreateKAIJ(), MatGetBlockSizes() 114 @*/ 115 PetscErrorCode MatKAIJGetSRead(Mat A,PetscInt *m,PetscInt *n,const PetscScalar **S) 116 { 117 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 118 PetscFunctionBegin; 119 if (m) *m = b->p; 120 if (n) *n = b->q; 121 if (S) *S = b->S; 122 PetscFunctionReturn(0); 123 } 124 125 /*@C 126 MatKAIJRestoreS - Restore array obtained with MatKAIJGetS() 127 128 Not collective 129 130 Input Parameter: 131 . A - the KAIJ matrix 132 133 Output Parameter: 134 . S - location of pointer to array obtained with MatKAIJGetS() 135 136 Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored. 137 If NULL is passed, it will not attempt to zero the array pointer. 138 139 Level: advanced 140 .seealso: MatKAIJGetS(), MatKAIJGetSRead(), MatKAIJRestoreSRead() 141 @*/ 142 PetscErrorCode MatKAIJRestoreS(Mat A,PetscScalar **S) 143 { 144 PetscFunctionBegin; 145 if (S) *S = NULL; 146 PetscObjectStateIncrease((PetscObject)A); 147 PetscFunctionReturn(0); 148 } 149 150 /*@C 151 MatKAIJRestoreSRead - Restore array obtained with MatKAIJGetSRead() 152 153 Not collective 154 155 Input Parameter: 156 . A - the KAIJ matrix 157 158 Output Parameter: 159 . S - location of pointer to array obtained with MatKAIJGetS() 160 161 Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored. 162 If NULL is passed, it will not attempt to zero the array pointer. 163 164 Level: advanced 165 .seealso: MatKAIJGetS(), MatKAIJGetSRead(), MatKAIJRestoreSRead() 166 @*/ 167 PetscErrorCode MatKAIJRestoreSRead(Mat A,const PetscScalar **S) 168 { 169 PetscFunctionBegin; 170 if (S) *S = NULL; 171 PetscFunctionReturn(0); 172 } 173 174 /*@C 175 MatKAIJGetT - Get the transformation matrix T associated with the KAIJ matrix 176 177 Not Collective; the entire T is stored and returned independently on all processes 178 179 Input Parameter: 180 . A - the KAIJ matrix 181 182 Output Parameter: 183 + m - the number of rows in T 184 . n - the number of columns in T 185 - T - the T matrix, in form of a scalar array in column-major format 186 187 Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired) 188 189 Level: advanced 190 191 .seealso: MatCreateKAIJ(), MatGetBlockSizes() 192 @*/ 193 PetscErrorCode MatKAIJGetT(Mat A,PetscInt *m,PetscInt *n,PetscScalar **T) 194 { 195 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 196 PetscFunctionBegin; 197 if (m) *m = b->p; 198 if (n) *n = b->q; 199 if (T) *T = b->T; 200 PetscFunctionReturn(0); 201 } 202 203 /*@C 204 MatKAIJGetTRead - Get a read-only pointer to the transformation matrix T associated with the KAIJ matrix 205 206 Not Collective; the entire T is stored and returned independently on all processes 207 208 Input Parameter: 209 . A - the KAIJ matrix 210 211 Output Parameter: 212 + m - the number of rows in T 213 . n - the number of columns in T 214 - T - the T matrix, in form of a scalar array in column-major format 215 216 Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired) 217 218 Level: advanced 219 220 .seealso: MatCreateKAIJ(), MatGetBlockSizes() 221 @*/ 222 PetscErrorCode MatKAIJGetTRead(Mat A,PetscInt *m,PetscInt *n,const PetscScalar **T) 223 { 224 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 225 PetscFunctionBegin; 226 if (m) *m = b->p; 227 if (n) *n = b->q; 228 if (T) *T = b->T; 229 PetscFunctionReturn(0); 230 } 231 232 /*@C 233 MatKAIJRestoreT - Restore array obtained with MatKAIJGetT() 234 235 Not collective 236 237 Input Parameter: 238 . A - the KAIJ matrix 239 240 Output Parameter: 241 . T - location of pointer to array obtained with MatKAIJGetS() 242 243 Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored. 244 If NULL is passed, it will not attempt to zero the array pointer. 245 246 Level: advanced 247 .seealso: MatKAIJGetT(), MatKAIJGetTRead(), MatKAIJRestoreTRead() 248 @*/ 249 PetscErrorCode MatKAIJRestoreT(Mat A,PetscScalar **T) 250 { 251 PetscFunctionBegin; 252 if (T) *T = NULL; 253 PetscObjectStateIncrease((PetscObject)A); 254 PetscFunctionReturn(0); 255 } 256 257 /*@C 258 MatKAIJRestoreTRead - Restore array obtained with MatKAIJGetTRead() 259 260 Not collective 261 262 Input Parameter: 263 . A - the KAIJ matrix 264 265 Output Parameter: 266 . T - location of pointer to array obtained with MatKAIJGetS() 267 268 Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored. 269 If NULL is passed, it will not attempt to zero the array pointer. 270 271 Level: advanced 272 .seealso: MatKAIJGetT(), MatKAIJGetTRead(), MatKAIJRestoreTRead() 273 @*/ 274 PetscErrorCode MatKAIJRestoreTRead(Mat A,const PetscScalar **T) 275 { 276 PetscFunctionBegin; 277 if (T) *T = NULL; 278 PetscFunctionReturn(0); 279 } 280 281 /*@ 282 MatKAIJSetAIJ - Set the AIJ matrix describing the blockwise action of the KAIJ matrix 283 284 Logically Collective; if the AIJ matrix is parallel, the KAIJ matrix is also parallel 285 286 Input Parameters: 287 + A - the KAIJ matrix 288 - B - the AIJ matrix 289 290 Notes: 291 This function increases the reference count on the AIJ matrix, so the user is free to destroy the matrix if it is not needed. 292 Changes to the entries of the AIJ matrix will immediately affect the KAIJ matrix. 293 294 Level: advanced 295 296 .seealso: MatKAIJGetAIJ(), MatKAIJSetS(), MatKAIJSetT() 297 @*/ 298 PetscErrorCode MatKAIJSetAIJ(Mat A,Mat B) 299 { 300 PetscErrorCode ierr; 301 PetscMPIInt size; 302 303 PetscFunctionBegin; 304 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 305 if (size == 1) { 306 Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data; 307 a->AIJ = B; 308 } else { 309 Mat_MPIKAIJ *a = (Mat_MPIKAIJ*)A->data; 310 a->A = B; 311 } 312 ierr = PetscObjectReference((PetscObject)B);CHKERRQ(ierr); 313 PetscFunctionReturn(0); 314 } 315 316 /*@C 317 MatKAIJSetS - Set the S matrix describing the shift action of the KAIJ matrix 318 319 Logically Collective; the entire S is stored independently on all processes. 320 321 Input Parameters: 322 + A - the KAIJ matrix 323 . p - the number of rows in S 324 . q - the number of columns in S 325 - S - the S matrix, in form of a scalar array in column-major format 326 327 Notes: The dimensions p and q must match those of the transformation matrix T associated with the KAIJ matrix. 328 The S matrix is copied, so the user can destroy this array. 329 330 Level: Advanced 331 332 .seealso: MatKAIJGetS(), MatKAIJSetT(), MatKAIJSetAIJ() 333 @*/ 334 PetscErrorCode MatKAIJSetS(Mat A,PetscInt p,PetscInt q,const PetscScalar S[]) 335 { 336 PetscErrorCode ierr; 337 Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data; 338 339 PetscFunctionBegin; 340 ierr = PetscFree(a->S);CHKERRQ(ierr); 341 if (S) { 342 ierr = PetscMalloc1(p*q*sizeof(PetscScalar),&a->S);CHKERRQ(ierr); 343 ierr = PetscMemcpy(a->S,S,p*q*sizeof(PetscScalar));CHKERRQ(ierr); 344 } else a->S = NULL; 345 346 a->p = p; 347 a->q = q; 348 PetscFunctionReturn(0); 349 } 350 351 /*@C 352 MatKAIJSetT - Set the transformation matrix T associated with the KAIJ matrix 353 354 Logically Collective; the entire T is stored independently on all processes. 355 356 Input Parameters: 357 + A - the KAIJ matrix 358 . p - the number of rows in S 359 . q - the number of columns in S 360 - T - the T matrix, in form of a scalar array in column-major format 361 362 Notes: The dimensions p and q must match those of the shift matrix S associated with the KAIJ matrix. 363 The T matrix is copied, so the user can destroy this array. 364 365 Level: Advanced 366 367 .seealso: MatKAIJGetT(), MatKAIJSetS(), MatKAIJSetAIJ() 368 @*/ 369 PetscErrorCode MatKAIJSetT(Mat A,PetscInt p,PetscInt q,const PetscScalar T[]) 370 { 371 PetscErrorCode ierr; 372 PetscInt i,j; 373 Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data; 374 PetscBool isTI = PETSC_FALSE; 375 376 PetscFunctionBegin; 377 /* check if T is an identity matrix */ 378 if (T && (p == q)) { 379 isTI = PETSC_TRUE; 380 for (i=0; i<p; i++) { 381 for (j=0; j<q; j++) { 382 if (i == j) { 383 /* diagonal term must be 1 */ 384 if (T[i+j*p] != 1.0) isTI = PETSC_FALSE; 385 } else { 386 /* off-diagonal term must be 0 */ 387 if (T[i+j*p] != 0.0) isTI = PETSC_FALSE; 388 } 389 } 390 } 391 } 392 a->isTI = isTI; 393 394 ierr = PetscFree(a->T);CHKERRQ(ierr); 395 if (T && (!isTI)) { 396 ierr = PetscMalloc1(p*q*sizeof(PetscScalar),&a->T);CHKERRQ(ierr); 397 ierr = PetscMemcpy(a->T,T,p*q*sizeof(PetscScalar));CHKERRQ(ierr); 398 } else a->T = NULL; 399 400 a->p = p; 401 a->q = q; 402 PetscFunctionReturn(0); 403 } 404 405 PetscErrorCode MatDestroy_SeqKAIJ(Mat A) 406 { 407 PetscErrorCode ierr; 408 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 409 410 PetscFunctionBegin; 411 ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr); 412 ierr = PetscFree(b->S);CHKERRQ(ierr); 413 ierr = PetscFree(b->T);CHKERRQ(ierr); 414 ierr = PetscFree(b->ibdiag);CHKERRQ(ierr); 415 ierr = PetscFree5(b->sor.w,b->sor.y,b->sor.work,b->sor.t,b->sor.arr);CHKERRQ(ierr); 416 ierr = PetscFree(A->data);CHKERRQ(ierr); 417 PetscFunctionReturn(0); 418 } 419 420 PetscErrorCode MatSetUp_KAIJ(Mat A) 421 { 422 PetscErrorCode ierr; 423 PetscInt n; 424 PetscMPIInt size; 425 Mat_SeqKAIJ *seqkaij = (Mat_SeqKAIJ*)A->data; 426 427 PetscFunctionBegin; 428 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 429 if (size == 1) { 430 ierr = MatSetSizes(A,seqkaij->p*seqkaij->AIJ->rmap->n,seqkaij->q*seqkaij->AIJ->cmap->n,seqkaij->p*seqkaij->AIJ->rmap->N,seqkaij->q*seqkaij->AIJ->cmap->N);CHKERRQ(ierr); 431 ierr = PetscLayoutSetBlockSize(A->rmap,seqkaij->p);CHKERRQ(ierr); 432 ierr = PetscLayoutSetBlockSize(A->cmap,seqkaij->q);CHKERRQ(ierr); 433 ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr); 434 ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr); 435 } else { 436 Mat_MPIKAIJ *a; 437 Mat_MPIAIJ *mpiaij; 438 IS from,to; 439 Vec gvec; 440 PetscScalar *T; 441 PetscInt i,j; 442 443 a = (Mat_MPIKAIJ*)A->data; 444 mpiaij = (Mat_MPIAIJ*)a->A->data; 445 ierr = MatSetSizes(A,a->p*a->A->rmap->n,a->q*a->A->cmap->n,a->p*a->A->rmap->N,a->q*a->A->cmap->N);CHKERRQ(ierr); 446 ierr = PetscLayoutSetBlockSize(A->rmap,seqkaij->p);CHKERRQ(ierr); 447 ierr = PetscLayoutSetBlockSize(A->cmap,seqkaij->q);CHKERRQ(ierr); 448 ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr); 449 ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr); 450 451 if (a->isTI) { 452 /* If the transformation matrix associated with the parallel matrix A is the identity matrix, then a->T will be NULL. 453 * In this case, if we pass a->T directly to the MatCreateKAIJ() calls to create the sequential submatrices, the routine will 454 * not be able to tell that transformation matrix should be set to the identity; thus we create a temporary identity matrix 455 * to pass in. */ 456 ierr = PetscMalloc1(a->p*a->q*sizeof(PetscScalar),&T);CHKERRQ(ierr); 457 for (i=0; i<a->p; i++) { 458 for (j=0; j<a->q; j++) { 459 if (i==j) T[i+j*a->p] = 1.0; 460 else T[i+j*a->p] = 0.0; 461 } 462 } 463 } else T = a->T; 464 ierr = MatCreateKAIJ(mpiaij->A,a->p,a->q,a->S,T,&a->AIJ);CHKERRQ(ierr); 465 ierr = MatCreateKAIJ(mpiaij->B,a->p,a->q,NULL,T,&a->OAIJ);CHKERRQ(ierr); 466 ierr = PetscFree(T);CHKERRQ(ierr); 467 468 ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr); 469 ierr = VecCreate(PETSC_COMM_SELF,&a->w);CHKERRQ(ierr); 470 ierr = VecSetSizes(a->w,n*a->q,n*a->q);CHKERRQ(ierr); 471 ierr = VecSetBlockSize(a->w,a->q);CHKERRQ(ierr); 472 ierr = VecSetType(a->w,VECSEQ);CHKERRQ(ierr); 473 474 /* create two temporary Index sets for build scatter gather */ 475 ierr = ISCreateBlock(PetscObjectComm((PetscObject)a->A),a->q,n,mpiaij->garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); 476 ierr = ISCreateStride(PETSC_COMM_SELF,n*a->q,0,1,&to);CHKERRQ(ierr); 477 478 /* create temporary global vector to generate scatter context */ 479 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)a->A),a->q,a->q*a->A->cmap->n,a->q*a->A->cmap->N,NULL,&gvec);CHKERRQ(ierr); 480 481 /* generate the scatter context */ 482 ierr = VecScatterCreate(gvec,from,a->w,to,&a->ctx);CHKERRQ(ierr); 483 484 ierr = ISDestroy(&from);CHKERRQ(ierr); 485 ierr = ISDestroy(&to);CHKERRQ(ierr); 486 ierr = VecDestroy(&gvec);CHKERRQ(ierr); 487 } 488 489 A->assembled = PETSC_TRUE; 490 PetscFunctionReturn(0); 491 } 492 493 PetscErrorCode MatView_SeqKAIJ(Mat A,PetscViewer viewer) 494 { 495 PetscErrorCode ierr; 496 Mat B; 497 498 PetscFunctionBegin; 499 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 500 ierr = MatView(B,viewer);CHKERRQ(ierr); 501 ierr = MatDestroy(&B);CHKERRQ(ierr); 502 PetscFunctionReturn(0); 503 } 504 505 PetscErrorCode MatView_MPIKAIJ(Mat A,PetscViewer viewer) 506 { 507 PetscErrorCode ierr; 508 Mat B; 509 510 PetscFunctionBegin; 511 ierr = MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 512 ierr = MatView(B,viewer);CHKERRQ(ierr); 513 ierr = MatDestroy(&B);CHKERRQ(ierr); 514 PetscFunctionReturn(0); 515 } 516 517 PetscErrorCode MatDestroy_MPIKAIJ(Mat A) 518 { 519 PetscErrorCode ierr; 520 Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data; 521 522 PetscFunctionBegin; 523 ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr); 524 ierr = MatDestroy(&b->OAIJ);CHKERRQ(ierr); 525 ierr = MatDestroy(&b->A);CHKERRQ(ierr); 526 ierr = VecScatterDestroy(&b->ctx);CHKERRQ(ierr); 527 ierr = VecDestroy(&b->w);CHKERRQ(ierr); 528 ierr = PetscFree(b->S);CHKERRQ(ierr); 529 ierr = PetscFree(b->T);CHKERRQ(ierr); 530 ierr = PetscFree(b->ibdiag);CHKERRQ(ierr); 531 ierr = PetscFree(A->data);CHKERRQ(ierr); 532 PetscFunctionReturn(0); 533 } 534 535 /* --------------------------------------------------------------------------------------*/ 536 537 /* zz = yy + Axx */ 538 PetscErrorCode MatMultAdd_SeqKAIJ(Mat A,Vec xx,Vec yy,Vec zz) 539 { 540 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 541 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 542 const PetscScalar *s = b->S, *t = b->T; 543 const PetscScalar *x,*v,*bx; 544 PetscScalar *y,*sums; 545 PetscErrorCode ierr; 546 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 547 PetscInt n,i,jrow,j,l,p=b->p,q=b->q,k; 548 549 PetscFunctionBegin; 550 if (!yy) { 551 ierr = VecSet(zz,0.0);CHKERRQ(ierr); 552 } else { 553 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 554 } 555 if ((!s) && (!t) && (!b->isTI)) PetscFunctionReturn(0); 556 557 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 558 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 559 idx = a->j; 560 v = a->a; 561 ii = a->i; 562 563 if (b->isTI) { 564 for (i=0; i<m; i++) { 565 jrow = ii[i]; 566 n = ii[i+1] - jrow; 567 sums = y + p*i; 568 for (j=0; j<n; j++) { 569 for (k=0; k<p; k++) { 570 sums[k] += v[jrow+j]*x[q*idx[jrow+j]+k]; 571 } 572 } 573 } 574 } else if (t) { 575 for (i=0; i<m; i++) { 576 jrow = ii[i]; 577 n = ii[i+1] - jrow; 578 sums = y + p*i; 579 bx = x + q*i; 580 for (j=0; j<n; j++) { 581 for (k=0; k<p; k++) { 582 for (l=0; l<q; l++) { 583 sums[k] += v[jrow+j]*t[k+l*p]*x[q*idx[jrow+j]+l]; 584 } 585 } 586 } 587 } 588 } 589 if (s) { 590 for (i=0; i<m; i++) { 591 sums = y + p*i; 592 bx = x + q*i; 593 if (i < b->AIJ->cmap->n) { 594 for (j=0; j<q; j++) { 595 for (k=0; k<p; k++) { 596 sums[k] += s[k+j*p]*bx[j]; 597 } 598 } 599 } 600 } 601 } 602 603 ierr = PetscLogFlops((2.0*p*q-p)*m+2*p*a->nz);CHKERRQ(ierr); 604 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 605 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 606 PetscFunctionReturn(0); 607 } 608 609 PetscErrorCode MatMult_SeqKAIJ_N(Mat A,Vec xx,Vec yy) 610 { 611 PetscErrorCode ierr; 612 PetscFunctionBegin; 613 ierr = MatMultAdd_SeqKAIJ(A,xx,PETSC_NULL,yy);CHKERRQ(ierr); 614 PetscFunctionReturn(0); 615 } 616 617 PetscErrorCode MatMultAdd_SeqKAIJ_N(Mat A,Vec xx,Vec yy,Vec zz) 618 { 619 PetscErrorCode ierr; 620 PetscFunctionBegin; 621 ierr = MatMultAdd_SeqKAIJ(A,xx,yy,zz);CHKERRQ(ierr); 622 PetscFunctionReturn(0); 623 } 624 625 #include <petsc/private/kernels/blockinvert.h> 626 627 PetscErrorCode MatInvertBlockDiagonal_SeqKAIJ_N(Mat A,const PetscScalar **values) 628 { 629 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 630 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 631 const PetscScalar *S = b->S; 632 const PetscScalar *T = b->T; 633 const PetscScalar *v = a->a; 634 const PetscInt p = b->p, q = b->q, m = b->AIJ->rmap->n, *idx = a->j, *ii = a->i; 635 PetscErrorCode ierr; 636 PetscInt i,j,*v_pivots,dof,dof2; 637 PetscScalar *diag,aval,*v_work; 638 639 PetscFunctionBegin; 640 if (p != q) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Block size must be square to calculate inverse."); 641 if ((!S) && (!T) && (!b->isTI)) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Cannot invert a zero matrix."); 642 643 dof = p; 644 dof2 = dof*dof; 645 646 if (b->ibdiagvalid) { 647 if (values) *values = b->ibdiag; 648 PetscFunctionReturn(0); 649 } 650 if (!b->ibdiag) { 651 ierr = PetscMalloc1(dof2*m*sizeof(PetscScalar),&b->ibdiag);CHKERRQ(ierr); 652 ierr = PetscLogObjectMemory((PetscObject)A,dof2*m*sizeof(PetscScalar));CHKERRQ(ierr); 653 } 654 if (values) *values = b->ibdiag; 655 diag = b->ibdiag; 656 657 ierr = PetscMalloc2(dof,&v_work,dof,&v_pivots);CHKERRQ(ierr); 658 for (i=0; i<m; i++) { 659 if (S) { 660 ierr = PetscMemcpy(diag,S,dof2*sizeof(PetscScalar));CHKERRQ(ierr); 661 } else { 662 ierr = PetscMemzero(diag,dof2*sizeof(PetscScalar));CHKERRQ(ierr); 663 } 664 if (b->isTI) { 665 aval = 0; 666 for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j]; 667 for (j=0; j<dof; j++) diag[j+dof*j] += aval; 668 } else if (T) { 669 aval = 0; 670 for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j]; 671 for (j=0; j<dof2; j++) diag[j] += aval*T[j]; 672 } 673 ierr = PetscKernel_A_gets_inverse_A(dof,diag,v_pivots,v_work,PETSC_FALSE,NULL);CHKERRQ(ierr); 674 diag += dof2; 675 } 676 ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr); 677 678 b->ibdiagvalid = PETSC_TRUE; 679 PetscFunctionReturn(0); 680 } 681 682 static PetscErrorCode MatGetDiagonalBlock_MPIKAIJ(Mat A,Mat *B) 683 { 684 Mat_MPIKAIJ *kaij = (Mat_MPIKAIJ*) A->data; 685 686 PetscFunctionBegin; 687 *B = kaij->AIJ; 688 PetscFunctionReturn(0); 689 } 690 691 PetscErrorCode MatSOR_SeqKAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 692 { 693 PetscErrorCode ierr; 694 Mat_SeqKAIJ *kaij = (Mat_SeqKAIJ*) A->data; 695 Mat_SeqAIJ *a = (Mat_SeqAIJ*)kaij->AIJ->data; 696 const PetscScalar *aa = a->a, *T = kaij->T, *v; 697 const PetscInt m = kaij->AIJ->rmap->n, *ai=a->i, *aj=a->j, p = kaij->p, q = kaij->q, *diag, *vi; 698 const PetscScalar *b, *xb, *idiag; 699 PetscScalar *x, *work, *workt, *w, *y, *arr, *t, *arrt; 700 PetscInt i, j, k, i2, bs, bs2, nz; 701 702 PetscFunctionBegin; 703 its = its*lits; 704 if (flag & SOR_EISENSTAT) SETERRQ (PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 705 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 706 if (fshift) SETERRQ (PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 707 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 708 if (p != q) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSOR for KAIJ: Sorry, no support for non-square dense blocks"); 709 else {bs = p; bs2 = bs*bs; } 710 711 if (!m) PetscFunctionReturn(0); 712 713 if (!kaij->ibdiagvalid) { ierr = MatInvertBlockDiagonal_SeqKAIJ_N(A,NULL);CHKERRQ(ierr); } 714 idiag = kaij->ibdiag; 715 diag = a->diag; 716 717 if (!kaij->sor.setup) { 718 ierr = PetscMalloc5(bs,&kaij->sor.w,bs,&kaij->sor.y,m*bs,&kaij->sor.work,m*bs,&kaij->sor.t,m*bs2,&kaij->sor.arr);CHKERRQ(ierr); 719 kaij->sor.setup = PETSC_TRUE; 720 } 721 y = kaij->sor.y; 722 w = kaij->sor.w; 723 work = kaij->sor.work; 724 t = kaij->sor.t; 725 arr = kaij->sor.arr; 726 727 ierr = VecGetArray(xx,&x); CHKERRQ(ierr); 728 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 729 730 if (flag & SOR_ZERO_INITIAL_GUESS) { 731 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 732 PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x); /* x[0:bs] <- D^{-1} b[0:bs] */ 733 ierr = PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr); 734 i2 = bs; 735 idiag += bs2; 736 for (i=1; i<m; i++) { 737 v = aa + ai[i]; 738 vi = aj + ai[i]; 739 nz = diag[i] - ai[i]; 740 741 if (T) { /* b - T (Arow * x) */ 742 for (k=0; k<bs; k++) w[k] = 0; 743 for (j=0; j<nz; j++) { 744 for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k]; 745 } 746 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs,w,T,&t[i2]); 747 for (k=0; k<bs; k++) t[i2+k] += b[i2+k]; 748 } else if (kaij->isTI) { 749 for (k=0; k<bs; k++) t[i2+k] = b[i2+k]; 750 for (j=0; j<nz; j++) { 751 for (k=0; k<bs; k++) t[i2+k] -= v[j] * x[vi[j]*bs+k]; 752 } 753 } else { 754 for (k=0; k<bs; k++) t[i2+k] = b[i2+k]; 755 } 756 757 PetscKernel_w_gets_Ar_times_v(bs,bs,t+i2,idiag,y); 758 for (j=0; j<bs; j++) x[i2+j] = omega * y[j]; 759 760 idiag += bs2; 761 i2 += bs; 762 } 763 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 764 ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr); 765 xb = t; 766 } else xb = b; 767 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 768 idiag = kaij->ibdiag+bs2*(m-1); 769 i2 = bs * (m-1); 770 ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 771 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 772 i2 -= bs; 773 idiag -= bs2; 774 for (i=m-2; i>=0; i--) { 775 v = aa + diag[i] + 1 ; 776 vi = aj + diag[i] + 1; 777 nz = ai[i+1] - diag[i] - 1; 778 779 if (T) { /* FIXME: This branch untested */ 780 ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 781 /* copy all rows of x that are needed into contiguous space */ 782 workt = work; 783 for (j=0; j<nz; j++) { 784 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 785 workt += bs; 786 } 787 arrt = arr; 788 for (j=0; j<nz; j++) { 789 ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 790 for (k=0; k<bs2; k++) arrt[k] *= v[j]; 791 arrt += bs2; 792 } 793 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 794 } else if (kaij->isTI) { 795 for (k=0; k<bs; k++) w[k] = t[i2+k]; 796 for (j=0; j<nz; j++) { 797 for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k]; 798 } 799 } 800 801 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); /* RHS incorrect for omega != 1.0 */ 802 for (j=0; j<bs; j++) x[i2+j] = (1.0-omega) * x[i2+j] + omega * y[j]; 803 804 idiag -= bs2; 805 i2 -= bs; 806 } 807 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 808 } 809 its--; 810 } 811 while (its--) { /* FIXME: This branch not updated */ 812 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 813 i2 = 0; 814 idiag = kaij->ibdiag; 815 for (i=0; i<m; i++) { 816 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 817 818 v = aa + ai[i]; 819 vi = aj + ai[i]; 820 nz = diag[i] - ai[i]; 821 workt = work; 822 for (j=0; j<nz; j++) { 823 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 824 workt += bs; 825 } 826 arrt = arr; 827 if (T) { 828 for (j=0; j<nz; j++) { 829 ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 830 for (k=0; k<bs2; k++) arrt[k] *= v[j]; 831 arrt += bs2; 832 } 833 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 834 } else if (kaij->isTI) { 835 for (j=0; j<nz; j++) { 836 ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 837 for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 838 arrt += bs2; 839 } 840 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 841 } 842 ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr); 843 844 v = aa + diag[i] + 1; 845 vi = aj + diag[i] + 1; 846 nz = ai[i+1] - diag[i] - 1; 847 workt = work; 848 for (j=0; j<nz; j++) { 849 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 850 workt += bs; 851 } 852 arrt = arr; 853 if (T) { 854 for (j=0; j<nz; j++) { 855 ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 856 for (k=0; k<bs2; k++) arrt[k] *= v[j]; 857 arrt += bs2; 858 } 859 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 860 } else if (kaij->isTI) { 861 for (j=0; j<nz; j++) { 862 ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 863 for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 864 arrt += bs2; 865 } 866 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 867 } 868 869 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); 870 for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j); 871 872 idiag += bs2; 873 i2 += bs; 874 } 875 xb = t; 876 } 877 else xb = b; 878 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 879 idiag = kaij->ibdiag+bs2*(m-1); 880 i2 = bs * (m-1); 881 if (xb == b) { 882 for (i=m-1; i>=0; i--) { 883 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 884 885 v = aa + ai[i]; 886 vi = aj + ai[i]; 887 nz = diag[i] - ai[i]; 888 workt = work; 889 for (j=0; j<nz; j++) { 890 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 891 workt += bs; 892 } 893 arrt = arr; 894 if (T) { 895 for (j=0; j<nz; j++) { 896 ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 897 for (k=0; k<bs2; k++) arrt[k] *= v[j]; 898 arrt += bs2; 899 } 900 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 901 } else if (kaij->isTI) { 902 for (j=0; j<nz; j++) { 903 ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 904 for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 905 arrt += bs2; 906 } 907 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 908 } 909 910 v = aa + diag[i] + 1; 911 vi = aj + diag[i] + 1; 912 nz = ai[i+1] - diag[i] - 1; 913 workt = work; 914 for (j=0; j<nz; j++) { 915 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 916 workt += bs; 917 } 918 arrt = arr; 919 if (T) { 920 for (j=0; j<nz; j++) { 921 ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 922 for (k=0; k<bs2; k++) arrt[k] *= v[j]; 923 arrt += bs2; 924 } 925 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 926 } else if (kaij->isTI) { 927 for (j=0; j<nz; j++) { 928 ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 929 for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 930 arrt += bs2; 931 } 932 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 933 } 934 935 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); 936 for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j); 937 } 938 } else { 939 for (i=m-1; i>=0; i--) { 940 ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 941 v = aa + diag[i] + 1; 942 vi = aj + diag[i] + 1; 943 nz = ai[i+1] - diag[i] - 1; 944 workt = work; 945 for (j=0; j<nz; j++) { 946 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 947 workt += bs; 948 } 949 arrt = arr; 950 if (T) { 951 for (j=0; j<nz; j++) { 952 ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 953 for (k=0; k<bs2; k++) arrt[k] *= v[j]; 954 arrt += bs2; 955 } 956 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 957 } else if (kaij->isTI) { 958 for (j=0; j<nz; j++) { 959 ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 960 for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 961 arrt += bs2; 962 } 963 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 964 } 965 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); 966 for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j); 967 } 968 idiag -= bs2; 969 i2 -= bs; 970 } 971 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 972 } 973 } 974 975 ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr); 976 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 977 PetscFunctionReturn(0); 978 } 979 980 /*===================================================================================*/ 981 982 PetscErrorCode MatMultAdd_MPIKAIJ(Mat A,Vec xx,Vec yy,Vec zz) 983 { 984 Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data; 985 PetscErrorCode ierr; 986 987 PetscFunctionBegin; 988 if (!yy) { 989 ierr = VecSet(zz,0.0);CHKERRQ(ierr); 990 } else { 991 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 992 } 993 /* start the scatter */ 994 ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 995 ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,zz,zz);CHKERRQ(ierr); 996 ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 997 ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr); 998 PetscFunctionReturn(0); 999 } 1000 1001 PetscErrorCode MatMult_MPIKAIJ_dof(Mat A,Vec xx,Vec yy) 1002 { 1003 PetscErrorCode ierr; 1004 PetscFunctionBegin; 1005 ierr = MatMultAdd_MPIKAIJ(A,xx,PETSC_NULL,yy);CHKERRQ(ierr); 1006 PetscFunctionReturn(0); 1007 } 1008 1009 PetscErrorCode MatMultAdd_MPIKAIJ_dof(Mat A,Vec xx,Vec yy, Vec zz) 1010 { 1011 PetscErrorCode ierr; 1012 PetscFunctionBegin; 1013 ierr = MatMultAdd_MPIKAIJ(A,xx,yy,zz);CHKERRQ(ierr); 1014 PetscFunctionReturn(0); 1015 } 1016 1017 PetscErrorCode MatInvertBlockDiagonal_MPIKAIJ_dof(Mat A,const PetscScalar **values) 1018 { 1019 Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data; 1020 PetscErrorCode ierr; 1021 1022 PetscFunctionBegin; 1023 ierr = (*b->AIJ->ops->invertblockdiagonal)(b->AIJ,values);CHKERRQ(ierr); 1024 PetscFunctionReturn(0); 1025 } 1026 1027 /* ----------------------------------------------------------------*/ 1028 1029 PetscErrorCode MatGetRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values) 1030 { 1031 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*) A->data; 1032 PetscErrorCode diag = PETSC_FALSE; 1033 PetscErrorCode ierr; 1034 PetscInt nzaij,nz,*colsaij,*idx,i,j,p=b->p,q=b->q,r=row/p,s=row%p,c; 1035 PetscScalar *vaij,*v,*S=b->S,*T=b->T; 1036 1037 PetscFunctionBegin; 1038 if (b->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1039 b->getrowactive = PETSC_TRUE; 1040 if (row < 0 || row >= A->rmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range",row); 1041 1042 if ((!S) && (!T) && (!b->isTI)) { 1043 if (ncols) *ncols = 0; 1044 if (cols) *cols = NULL; 1045 if (values) *values = NULL; 1046 PetscFunctionReturn(0); 1047 } 1048 1049 if (T || b->isTI) { 1050 ierr = MatGetRow_SeqAIJ(b->AIJ,r,&nzaij,&colsaij,&vaij);CHKERRQ(ierr); 1051 c = nzaij; 1052 for (i=0; i<nzaij; i++) { 1053 /* check if this row contains a diagonal entry */ 1054 if (colsaij[i] == r) { 1055 diag = PETSC_TRUE; 1056 c = i; 1057 } 1058 } 1059 } else nzaij = c = 0; 1060 1061 /* calculate size of row */ 1062 nz = 0; 1063 if (S) nz += q; 1064 if (T || b->isTI) nz += (diag && S ? (nzaij-1)*q : nzaij*q); 1065 1066 if (cols || values) { 1067 ierr = PetscMalloc2(nz,&idx,nz,&v);CHKERRQ(ierr); 1068 for (i=0; i<q; i++) { 1069 /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */ 1070 v[i] = 0.0; 1071 } 1072 if (b->isTI) { 1073 for (i=0; i<nzaij; i++) { 1074 for (j=0; j<q; j++) { 1075 idx[i*q+j] = colsaij[i]*q+j; 1076 v[i*q+j] = (j==s ? vaij[i] : 0); 1077 } 1078 } 1079 } else if (T) { 1080 for (i=0; i<nzaij; i++) { 1081 for (j=0; j<q; j++) { 1082 idx[i*q+j] = colsaij[i]*q+j; 1083 v[i*q+j] = vaij[i]*T[s+j*p]; 1084 } 1085 } 1086 } 1087 if (S) { 1088 for (j=0; j<q; j++) { 1089 idx[c*q+j] = r*q+j; 1090 v[c*q+j] += S[s+j*p]; 1091 } 1092 } 1093 } 1094 1095 if (ncols) *ncols = nz; 1096 if (cols) *cols = idx; 1097 if (values) *values = v; 1098 PetscFunctionReturn(0); 1099 } 1100 1101 PetscErrorCode MatRestoreRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1102 { 1103 PetscErrorCode ierr; 1104 PetscFunctionBegin; 1105 ierr = PetscFree2(*idx,*v);CHKERRQ(ierr); 1106 ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE; 1107 PetscFunctionReturn(0); 1108 } 1109 1110 PetscErrorCode MatGetRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values) 1111 { 1112 Mat_MPIKAIJ *b = (Mat_MPIKAIJ*) A->data; 1113 Mat MatAIJ = ((Mat_SeqKAIJ*)b->AIJ->data)->AIJ; 1114 Mat MatOAIJ = ((Mat_SeqKAIJ*)b->OAIJ->data)->AIJ; 1115 Mat AIJ = b->A; 1116 PetscBool diag = PETSC_FALSE; 1117 PetscErrorCode ierr; 1118 const PetscInt rstart=A->rmap->rstart,rend=A->rmap->rend,p=b->p,q=b->q,*garray; 1119 PetscInt nz,*idx,ncolsaij,ncolsoaij,*colsaij,*colsoaij,r,s,c,i,j,lrow; 1120 PetscScalar *v,*vals,*ovals,*S=b->S,*T=b->T; 1121 1122 PetscFunctionBegin; 1123 if (b->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1124 b->getrowactive = PETSC_TRUE; 1125 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1126 lrow = row - rstart; 1127 1128 if ((!S) && (!T) && (!b->isTI)) { 1129 if (ncols) *ncols = 0; 1130 if (cols) *cols = NULL; 1131 if (values) *values = NULL; 1132 PetscFunctionReturn(0); 1133 } 1134 1135 r = lrow/p; 1136 s = lrow%p; 1137 1138 if (T || b->isTI) { 1139 ierr = MatMPIAIJGetSeqAIJ(AIJ,NULL,NULL,&garray); 1140 ierr = MatGetRow_SeqAIJ(MatAIJ,lrow/p,&ncolsaij,&colsaij,&vals);CHKERRQ(ierr); 1141 ierr = MatGetRow_SeqAIJ(MatOAIJ,lrow/p,&ncolsoaij,&colsoaij,&ovals);CHKERRQ(ierr); 1142 1143 c = ncolsaij + ncolsoaij; 1144 for (i=0; i<ncolsaij; i++) { 1145 /* check if this row contains a diagonal entry */ 1146 if (colsaij[i] == r) { 1147 diag = PETSC_TRUE; 1148 c = i; 1149 } 1150 } 1151 } else c = 0; 1152 1153 /* calculate size of row */ 1154 nz = 0; 1155 if (S) nz += q; 1156 if (T || b->isTI) nz += (diag && S ? (ncolsaij+ncolsoaij-1)*q : (ncolsaij+ncolsoaij)*q); 1157 1158 if (cols || values) { 1159 ierr = PetscMalloc2(nz,&idx,nz,&v);CHKERRQ(ierr); 1160 for (i=0; i<q; i++) { 1161 /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */ 1162 v[i] = 0.0; 1163 } 1164 if (b->isTI) { 1165 for (i=0; i<ncolsaij; i++) { 1166 for (j=0; j<q; j++) { 1167 idx[i*q+j] = (colsaij[i]+rstart/p)*q+j; 1168 v[i*q+j] = (j==s ? vals[i] : 0.0); 1169 } 1170 } 1171 for (i=0; i<ncolsoaij; i++) { 1172 for (j=0; j<q; j++) { 1173 idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j; 1174 v[(i+ncolsaij)*q+j] = (j==s ? ovals[i]: 0.0); 1175 } 1176 } 1177 } else if (T) { 1178 for (i=0; i<ncolsaij; i++) { 1179 for (j=0; j<q; j++) { 1180 idx[i*q+j] = (colsaij[i]+rstart/p)*q+j; 1181 v[i*q+j] = vals[i]*T[s+j*p]; 1182 } 1183 } 1184 for (i=0; i<ncolsoaij; i++) { 1185 for (j=0; j<q; j++) { 1186 idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j; 1187 v[(i+ncolsaij)*q+j] = ovals[i]*T[s+j*p]; 1188 } 1189 } 1190 } 1191 if (S) { 1192 for (j=0; j<q; j++) { 1193 idx[c*q+j] = (r+rstart/p)*q+j; 1194 v[c*q+j] += S[s+j*p]; 1195 } 1196 } 1197 } 1198 1199 if (ncols) *ncols = nz; 1200 if (cols) *cols = idx; 1201 if (values) *values = v; 1202 PetscFunctionReturn(0); 1203 } 1204 1205 PetscErrorCode MatRestoreRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1206 { 1207 PetscErrorCode ierr; 1208 PetscFunctionBegin; 1209 ierr = PetscFree2(*idx,*v);CHKERRQ(ierr); 1210 ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE; 1211 PetscFunctionReturn(0); 1212 } 1213 1214 PetscErrorCode MatCreateSubMatrix_KAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat) 1215 { 1216 PetscErrorCode ierr; 1217 Mat A; 1218 1219 PetscFunctionBegin; 1220 ierr = MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr); 1221 ierr = MatCreateSubMatrix(A,isrow,iscol,cll,newmat);CHKERRQ(ierr); 1222 ierr = MatDestroy(&A);CHKERRQ(ierr); 1223 PetscFunctionReturn(0); 1224 } 1225 1226 /* ---------------------------------------------------------------------------------- */ 1227 /*@C 1228 MatCreateKAIJ - Creates a matrix type to be used for matrices of the following form: 1229 1230 [I \otimes S + A \otimes T] 1231 1232 where 1233 S is a dense (p \times q) matrix 1234 T is a dense (p \times q) matrix 1235 A is an AIJ (n \times n) matrix 1236 I is the identity matrix 1237 The resulting matrix is (np \times nq) 1238 1239 The matrix type is based on MATSEQAIJ for a sequential matrix A, and MATMPIAIJ for a distributed matrix A. 1240 S is always stored independently on all processes as a PetscScalar array in column-major format. 1241 1242 Collective 1243 1244 Input Parameters: 1245 + A - the AIJ matrix 1246 . S - the S matrix, stored as a PetscScalar array (column-major) 1247 . T - the T matrix, stored as a PetscScalar array (column-major) 1248 . p - number of rows in S and T 1249 - q - number of columns in S and T 1250 1251 Output Parameter: 1252 . kaij - the new KAIJ matrix 1253 1254 Operations provided: 1255 + MatMult 1256 . MatMultAdd 1257 . MatInvertBlockDiagonal 1258 - MatView 1259 1260 Level: advanced 1261 1262 .seealso: MatKAIJSetAIJ(), MatKAIJSetS(), MatKAIJSetT(), MatKAIJGetAIJ(), MatKAIJGetS(), MatKAIJGetT(), MATKAIJ 1263 @*/ 1264 PetscErrorCode MatCreateKAIJ(Mat A,PetscInt p,PetscInt q,const PetscScalar S[],const PetscScalar T[],Mat *kaij) 1265 { 1266 PetscErrorCode ierr; 1267 PetscMPIInt size; 1268 1269 PetscFunctionBegin; 1270 ierr = MatCreate(PetscObjectComm((PetscObject)A),kaij);CHKERRQ(ierr); 1271 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 1272 if (size == 1) { 1273 ierr = MatSetType(*kaij,MATSEQKAIJ);CHKERRQ(ierr); 1274 } else { 1275 ierr = MatSetType(*kaij,MATMPIKAIJ);CHKERRQ(ierr); 1276 } 1277 ierr = MatKAIJSetAIJ(*kaij,A);CHKERRQ(ierr); 1278 ierr = MatKAIJSetS(*kaij,p,q,S);CHKERRQ(ierr); 1279 ierr = MatKAIJSetT(*kaij,p,q,T);CHKERRQ(ierr); 1280 ierr = MatSetUp(*kaij); 1281 PetscFunctionReturn(0); 1282 } 1283 1284 /*MC 1285 MATKAIJ - MATKAIJ = "kaij" - A matrix type to be used to evaluate matrices of the following form: 1286 1287 [I \otimes S + A \otimes T] 1288 1289 where 1290 S is a dense (p \times q) matrix 1291 T is a dense (p \times q) matrix 1292 A is an AIJ (n \times n) matrix 1293 I is the identity matrix 1294 The resulting matrix is (np \times nq) 1295 1296 The matrix type is based on MATSEQAIJ for a sequential matrix A, and MATMPIAIJ for a distributed matrix A. 1297 S and T are always stored independently on all processes as a PetscScalar array in column-major format. 1298 1299 Operations provided: 1300 + MatMult 1301 . MatMultAdd 1302 . MatInvertBlockDiagonal 1303 - MatView 1304 1305 Level: advanced 1306 1307 .seealso: MatKAIJSetAIJ(), MatKAIJSetS(), MatKAIJSetT(), MatKAIJGetAIJ(), MatKAIJGetS(), MatKAIJGetT(), MatCreateKAIJ() 1308 M*/ 1309 1310 PETSC_EXTERN PetscErrorCode MatCreate_KAIJ(Mat A) 1311 { 1312 PetscErrorCode ierr; 1313 Mat_MPIKAIJ *b; 1314 PetscMPIInt size; 1315 1316 PetscFunctionBegin; 1317 ierr = PetscNewLog(A,&b);CHKERRQ(ierr); 1318 A->data = (void*)b; 1319 1320 ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1321 1322 A->ops->setup = MatSetUp_KAIJ; 1323 1324 b->w = 0; 1325 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 1326 if (size == 1) { 1327 ierr = PetscObjectChangeTypeName((PetscObject)A,MATSEQKAIJ);CHKERRQ(ierr); 1328 A->ops->setup = MatSetUp_KAIJ; 1329 A->ops->destroy = MatDestroy_SeqKAIJ; 1330 A->ops->view = MatView_SeqKAIJ; 1331 A->ops->mult = MatMult_SeqKAIJ_N; 1332 A->ops->multadd = MatMultAdd_SeqKAIJ_N; 1333 A->ops->invertblockdiagonal = MatInvertBlockDiagonal_SeqKAIJ_N; 1334 A->ops->getrow = MatGetRow_SeqKAIJ; 1335 A->ops->restorerow = MatRestoreRow_SeqKAIJ; 1336 A->ops->sor = MatSOR_SeqKAIJ; 1337 } else { 1338 ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIKAIJ);CHKERRQ(ierr); 1339 A->ops->setup = MatSetUp_KAIJ; 1340 A->ops->destroy = MatDestroy_MPIKAIJ; 1341 A->ops->view = MatView_MPIKAIJ; 1342 A->ops->mult = MatMult_MPIKAIJ_dof; 1343 A->ops->multadd = MatMultAdd_MPIKAIJ_dof; 1344 A->ops->invertblockdiagonal = MatInvertBlockDiagonal_MPIKAIJ_dof; 1345 A->ops->getrow = MatGetRow_MPIKAIJ; 1346 A->ops->restorerow = MatRestoreRow_MPIKAIJ; 1347 ierr = PetscObjectComposeFunction((PetscObject)A,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIKAIJ);CHKERRQ(ierr); 1348 } 1349 A->ops->createsubmatrix = MatCreateSubMatrix_KAIJ; 1350 PetscFunctionReturn(0); 1351 } 1352