1 2 /* 3 Defines the basic matrix operations for the KAIJ matrix storage format. 4 This format is used to evaluate matrices of the form: 5 6 [I \otimes S + A \otimes T] 7 8 where 9 S is a dense (p \times q) matrix 10 T is a dense (p \times q) matrix 11 A is an AIJ (n \times n) matrix 12 I is the identity matrix 13 14 The resulting matrix is (np \times nq) 15 16 We provide: 17 MatMult() 18 MatMultAdd() 19 MatInvertBlockDiagonal() 20 and 21 MatCreateKAIJ(Mat,PetscInt,PetscInt,const PetscScalar[],const PetscScalar[],Mat*) 22 23 This single directory handles both the sequential and parallel codes 24 */ 25 26 #include <../src/mat/impls/kaij/kaij.h> /*I "petscmat.h" I*/ 27 #include <../src/mat/utils/freespace.h> 28 #include <petsc/private/vecimpl.h> 29 30 /*@C 31 MatKAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the KAIJ matrix 32 33 Not Collective, but if the KAIJ matrix is parallel, the AIJ matrix is also parallel 34 35 Input Parameter: 36 . A - the KAIJ matrix 37 38 Output Parameter: 39 . B - the AIJ matrix 40 41 Level: advanced 42 43 Notes: The reference count on the AIJ matrix is not increased so you should not destroy it. 44 45 .seealso: MatCreateKAIJ() 46 @*/ 47 PetscErrorCode MatKAIJGetAIJ(Mat A,Mat *B) 48 { 49 PetscErrorCode ierr; 50 PetscBool ismpikaij,isseqkaij; 51 52 PetscFunctionBegin; 53 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIKAIJ,&ismpikaij);CHKERRQ(ierr); 54 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQKAIJ,&isseqkaij);CHKERRQ(ierr); 55 if (ismpikaij) { 56 Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data; 57 58 *B = b->A; 59 } else if (isseqkaij) { 60 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 61 62 *B = b->AIJ; 63 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix passed in is not of type KAIJ"); 64 PetscFunctionReturn(0); 65 } 66 67 /*@C 68 MatKAIJGetS - Get the S matrix describing the shift action of the KAIJ matrix 69 70 Not Collective; the entire S is stored and returned independently on all processes. 71 72 Input Parameter: 73 . A - the KAIJ matrix 74 75 Output Parameters: 76 + m - the number of rows in S 77 . n - the number of columns in S 78 - S - the S matrix, in form of a scalar array in column-major format 79 80 Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired) 81 82 Level: advanced 83 84 .seealso: MatCreateKAIJ(), MatGetBlockSizes() 85 @*/ 86 PetscErrorCode MatKAIJGetS(Mat A,PetscInt *m,PetscInt *n,PetscScalar **S) 87 { 88 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 89 PetscFunctionBegin; 90 if (m) *m = b->p; 91 if (n) *n = b->q; 92 if (S) *S = b->S; 93 PetscFunctionReturn(0); 94 } 95 96 /*@C 97 MatKAIJGetSRead - Get a read-only pointer to the S matrix describing the shift action of the KAIJ matrix 98 99 Not Collective; the entire S is stored and returned independently on all processes. 100 101 Input Parameter: 102 . A - the KAIJ matrix 103 104 Output Parameters: 105 + m - the number of rows in S 106 . n - the number of columns in S 107 - S - the S matrix, in form of a scalar array in column-major format 108 109 Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired) 110 111 Level: advanced 112 113 .seealso: MatCreateKAIJ(), MatGetBlockSizes() 114 @*/ 115 PetscErrorCode MatKAIJGetSRead(Mat A,PetscInt *m,PetscInt *n,const PetscScalar **S) 116 { 117 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 118 PetscFunctionBegin; 119 if (m) *m = b->p; 120 if (n) *n = b->q; 121 if (S) *S = b->S; 122 PetscFunctionReturn(0); 123 } 124 125 /*@C 126 MatKAIJRestoreS - Restore array obtained with MatKAIJGetS() 127 128 Not collective 129 130 Input Parameter: 131 . A - the KAIJ matrix 132 133 Output Parameter: 134 . S - location of pointer to array obtained with MatKAIJGetS() 135 136 Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored. 137 If NULL is passed, it will not attempt to zero the array pointer. 138 139 Level: advanced 140 .seealso: MatKAIJGetS(), MatKAIJGetSRead(), MatKAIJRestoreSRead() 141 @*/ 142 PetscErrorCode MatKAIJRestoreS(Mat A,PetscScalar **S) 143 { 144 PetscFunctionBegin; 145 if (S) *S = NULL; 146 PetscObjectStateIncrease((PetscObject)A); 147 PetscFunctionReturn(0); 148 } 149 150 /*@C 151 MatKAIJRestoreSRead - Restore array obtained with MatKAIJGetSRead() 152 153 Not collective 154 155 Input Parameter: 156 . A - the KAIJ matrix 157 158 Output Parameter: 159 . S - location of pointer to array obtained with MatKAIJGetS() 160 161 Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored. 162 If NULL is passed, it will not attempt to zero the array pointer. 163 164 Level: advanced 165 .seealso: MatKAIJGetS(), MatKAIJGetSRead(), MatKAIJRestoreSRead() 166 @*/ 167 PetscErrorCode MatKAIJRestoreSRead(Mat A,const PetscScalar **S) 168 { 169 PetscFunctionBegin; 170 if (S) *S = NULL; 171 PetscFunctionReturn(0); 172 } 173 174 /*@C 175 MatKAIJGetT - Get the transformation matrix T associated with the KAIJ matrix 176 177 Not Collective; the entire T is stored and returned independently on all processes 178 179 Input Parameter: 180 . A - the KAIJ matrix 181 182 Output Parameter: 183 + m - the number of rows in T 184 . n - the number of columns in T 185 - T - the T matrix, in form of a scalar array in column-major format 186 187 Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired) 188 189 Level: advanced 190 191 .seealso: MatCreateKAIJ(), MatGetBlockSizes() 192 @*/ 193 PetscErrorCode MatKAIJGetT(Mat A,PetscInt *m,PetscInt *n,PetscScalar **T) 194 { 195 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 196 PetscFunctionBegin; 197 if (m) *m = b->p; 198 if (n) *n = b->q; 199 if (T) *T = b->T; 200 PetscFunctionReturn(0); 201 } 202 203 /*@C 204 MatKAIJGetTRead - Get a read-only pointer to the transformation matrix T associated with the KAIJ matrix 205 206 Not Collective; the entire T is stored and returned independently on all processes 207 208 Input Parameter: 209 . A - the KAIJ matrix 210 211 Output Parameter: 212 + m - the number of rows in T 213 . n - the number of columns in T 214 - T - the T matrix, in form of a scalar array in column-major format 215 216 Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired) 217 218 Level: advanced 219 220 .seealso: MatCreateKAIJ(), MatGetBlockSizes() 221 @*/ 222 PetscErrorCode MatKAIJGetTRead(Mat A,PetscInt *m,PetscInt *n,const PetscScalar **T) 223 { 224 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 225 PetscFunctionBegin; 226 if (m) *m = b->p; 227 if (n) *n = b->q; 228 if (T) *T = b->T; 229 PetscFunctionReturn(0); 230 } 231 232 /*@C 233 MatKAIJRestoreT - Restore array obtained with MatKAIJGetT() 234 235 Not collective 236 237 Input Parameter: 238 . A - the KAIJ matrix 239 240 Output Parameter: 241 . T - location of pointer to array obtained with MatKAIJGetS() 242 243 Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored. 244 If NULL is passed, it will not attempt to zero the array pointer. 245 246 Level: advanced 247 .seealso: MatKAIJGetT(), MatKAIJGetTRead(), MatKAIJRestoreTRead() 248 @*/ 249 PetscErrorCode MatKAIJRestoreT(Mat A,PetscScalar **T) 250 { 251 PetscFunctionBegin; 252 if (T) *T = NULL; 253 PetscObjectStateIncrease((PetscObject)A); 254 PetscFunctionReturn(0); 255 } 256 257 /*@C 258 MatKAIJRestoreTRead - Restore array obtained with MatKAIJGetTRead() 259 260 Not collective 261 262 Input Parameter: 263 . A - the KAIJ matrix 264 265 Output Parameter: 266 . T - location of pointer to array obtained with MatKAIJGetS() 267 268 Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored. 269 If NULL is passed, it will not attempt to zero the array pointer. 270 271 Level: advanced 272 .seealso: MatKAIJGetT(), MatKAIJGetTRead(), MatKAIJRestoreTRead() 273 @*/ 274 PetscErrorCode MatKAIJRestoreTRead(Mat A,const PetscScalar **T) 275 { 276 PetscFunctionBegin; 277 if (T) *T = NULL; 278 PetscFunctionReturn(0); 279 } 280 281 /*@ 282 MatKAIJSetAIJ - Set the AIJ matrix describing the blockwise action of the KAIJ matrix 283 284 Logically Collective; if the AIJ matrix is parallel, the KAIJ matrix is also parallel 285 286 Input Parameters: 287 + A - the KAIJ matrix 288 - B - the AIJ matrix 289 290 Notes: 291 This function increases the reference count on the AIJ matrix, so the user is free to destroy the matrix if it is not needed. 292 Changes to the entries of the AIJ matrix will immediately affect the KAIJ matrix. 293 294 Level: advanced 295 296 .seealso: MatKAIJGetAIJ(), MatKAIJSetS(), MatKAIJSetT() 297 @*/ 298 PetscErrorCode MatKAIJSetAIJ(Mat A,Mat B) 299 { 300 PetscErrorCode ierr; 301 PetscMPIInt size; 302 303 PetscFunctionBegin; 304 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 305 if (size == 1) { 306 Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data; 307 a->AIJ = B; 308 } else { 309 Mat_MPIKAIJ *a = (Mat_MPIKAIJ*)A->data; 310 a->A = B; 311 } 312 ierr = PetscObjectReference((PetscObject)B);CHKERRQ(ierr); 313 PetscFunctionReturn(0); 314 } 315 316 /*@C 317 MatKAIJSetS - Set the S matrix describing the shift action of the KAIJ matrix 318 319 Logically Collective; the entire S is stored independently on all processes. 320 321 Input Parameters: 322 + A - the KAIJ matrix 323 . p - the number of rows in S 324 . q - the number of columns in S 325 - S - the S matrix, in form of a scalar array in column-major format 326 327 Notes: The dimensions p and q must match those of the transformation matrix T associated with the KAIJ matrix. 328 The S matrix is copied, so the user can destroy this array. 329 330 Level: Advanced 331 332 .seealso: MatKAIJGetS(), MatKAIJSetT(), MatKAIJSetAIJ() 333 @*/ 334 PetscErrorCode MatKAIJSetS(Mat A,PetscInt p,PetscInt q,const PetscScalar S[]) 335 { 336 PetscErrorCode ierr; 337 Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data; 338 339 PetscFunctionBegin; 340 ierr = PetscFree(a->S);CHKERRQ(ierr); 341 if (S) { 342 ierr = PetscMalloc1(p*q*sizeof(PetscScalar),&a->S);CHKERRQ(ierr); 343 ierr = PetscMemcpy(a->S,S,p*q*sizeof(PetscScalar));CHKERRQ(ierr); 344 } else a->S = NULL; 345 346 a->p = p; 347 a->q = q; 348 PetscFunctionReturn(0); 349 } 350 351 /*@C 352 MatKAIJSetT - Set the transformation matrix T associated with the KAIJ matrix 353 354 Logically Collective; the entire T is stored independently on all processes. 355 356 Input Parameters: 357 + A - the KAIJ matrix 358 . p - the number of rows in S 359 . q - the number of columns in S 360 - T - the T matrix, in form of a scalar array in column-major format 361 362 Notes: The dimensions p and q must match those of the shift matrix S associated with the KAIJ matrix. 363 The T matrix is copied, so the user can destroy this array. 364 365 Level: Advanced 366 367 .seealso: MatKAIJGetT(), MatKAIJSetS(), MatKAIJSetAIJ() 368 @*/ 369 PetscErrorCode MatKAIJSetT(Mat A,PetscInt p,PetscInt q,const PetscScalar T[]) 370 { 371 PetscErrorCode ierr; 372 PetscInt i,j; 373 Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data; 374 PetscBool isTI = PETSC_FALSE; 375 376 PetscFunctionBegin; 377 /* check if T is an identity matrix */ 378 if (T && (p == q)) { 379 isTI = PETSC_TRUE; 380 for (i=0; i<p; i++) { 381 for (j=0; j<q; j++) { 382 if (i == j) { 383 /* diagonal term must be 1 */ 384 if (T[i+j*p] != 1.0) isTI = PETSC_FALSE; 385 } else { 386 /* off-diagonal term must be 0 */ 387 if (T[i+j*p] != 0.0) isTI = PETSC_FALSE; 388 } 389 } 390 } 391 } 392 a->isTI = isTI; 393 394 ierr = PetscFree(a->T);CHKERRQ(ierr); 395 if (T && (!isTI)) { 396 ierr = PetscMalloc1(p*q*sizeof(PetscScalar),&a->T);CHKERRQ(ierr); 397 ierr = PetscMemcpy(a->T,T,p*q*sizeof(PetscScalar));CHKERRQ(ierr); 398 } else a->T = NULL; 399 400 a->p = p; 401 a->q = q; 402 PetscFunctionReturn(0); 403 } 404 405 PetscErrorCode MatDestroy_SeqKAIJ(Mat A) 406 { 407 PetscErrorCode ierr; 408 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 409 410 PetscFunctionBegin; 411 ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr); 412 ierr = PetscFree(b->S);CHKERRQ(ierr); 413 ierr = PetscFree(b->T);CHKERRQ(ierr); 414 ierr = PetscFree(b->ibdiag);CHKERRQ(ierr); 415 ierr = PetscFree5(b->sor.w,b->sor.y,b->sor.work,b->sor.t,b->sor.arr);CHKERRQ(ierr); 416 ierr = PetscFree(A->data);CHKERRQ(ierr); 417 PetscFunctionReturn(0); 418 } 419 420 PetscErrorCode MatSetUp_KAIJ(Mat A) 421 { 422 PetscErrorCode ierr; 423 PetscInt n; 424 PetscMPIInt size; 425 Mat_SeqKAIJ *seqkaij = (Mat_SeqKAIJ*)A->data; 426 427 PetscFunctionBegin; 428 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 429 if (size == 1) { 430 ierr = MatSetSizes(A,seqkaij->p*seqkaij->AIJ->rmap->n,seqkaij->q*seqkaij->AIJ->cmap->n,seqkaij->p*seqkaij->AIJ->rmap->N,seqkaij->q*seqkaij->AIJ->cmap->N);CHKERRQ(ierr); 431 ierr = PetscLayoutSetBlockSize(A->rmap,seqkaij->p);CHKERRQ(ierr); 432 ierr = PetscLayoutSetBlockSize(A->cmap,seqkaij->q);CHKERRQ(ierr); 433 ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr); 434 ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr); 435 } else { 436 Mat_MPIKAIJ *a; 437 Mat_MPIAIJ *mpiaij; 438 IS from,to; 439 Vec gvec; 440 PetscScalar *T; 441 PetscInt i,j; 442 443 a = (Mat_MPIKAIJ*)A->data; 444 mpiaij = (Mat_MPIAIJ*)a->A->data; 445 ierr = MatSetSizes(A,a->p*a->A->rmap->n,a->q*a->A->cmap->n,a->p*a->A->rmap->N,a->q*a->A->cmap->N);CHKERRQ(ierr); 446 ierr = PetscLayoutSetBlockSize(A->rmap,seqkaij->p);CHKERRQ(ierr); 447 ierr = PetscLayoutSetBlockSize(A->cmap,seqkaij->q);CHKERRQ(ierr); 448 ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr); 449 ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr); 450 451 if (a->isTI) { 452 /* If the transformation matrix associated with the parallel matrix A is the identity matrix, then a->T will be NULL. 453 * In this case, if we pass a->T directly to the MatCreateKAIJ() calls to create the sequential submatrices, the routine will 454 * not be able to tell that transformation matrix should be set to the identity; thus we create a temporary identity matrix 455 * to pass in. */ 456 ierr = PetscMalloc1(a->p*a->q*sizeof(PetscScalar),&T);CHKERRQ(ierr); 457 for (i=0; i<a->p; i++) { 458 for (j=0; j<a->q; j++) { 459 if (i==j) T[i+j*a->p] = 1.0; 460 else T[i+j*a->p] = 0.0; 461 } 462 } 463 } else T = a->T; 464 ierr = MatCreateKAIJ(mpiaij->A,a->p,a->q,a->S,T,&a->AIJ);CHKERRQ(ierr); 465 ierr = MatCreateKAIJ(mpiaij->B,a->p,a->q,NULL,T,&a->OAIJ);CHKERRQ(ierr); 466 if (a->isTI) { 467 ierr = PetscFree(T);CHKERRQ(ierr); 468 } 469 470 ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr); 471 ierr = VecCreate(PETSC_COMM_SELF,&a->w);CHKERRQ(ierr); 472 ierr = VecSetSizes(a->w,n*a->q,n*a->q);CHKERRQ(ierr); 473 ierr = VecSetBlockSize(a->w,a->q);CHKERRQ(ierr); 474 ierr = VecSetType(a->w,VECSEQ);CHKERRQ(ierr); 475 476 /* create two temporary Index sets for build scatter gather */ 477 ierr = ISCreateBlock(PetscObjectComm((PetscObject)a->A),a->q,n,mpiaij->garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); 478 ierr = ISCreateStride(PETSC_COMM_SELF,n*a->q,0,1,&to);CHKERRQ(ierr); 479 480 /* create temporary global vector to generate scatter context */ 481 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)a->A),a->q,a->q*a->A->cmap->n,a->q*a->A->cmap->N,NULL,&gvec);CHKERRQ(ierr); 482 483 /* generate the scatter context */ 484 ierr = VecScatterCreate(gvec,from,a->w,to,&a->ctx);CHKERRQ(ierr); 485 486 ierr = ISDestroy(&from);CHKERRQ(ierr); 487 ierr = ISDestroy(&to);CHKERRQ(ierr); 488 ierr = VecDestroy(&gvec);CHKERRQ(ierr); 489 } 490 491 A->assembled = PETSC_TRUE; 492 PetscFunctionReturn(0); 493 } 494 495 PetscErrorCode MatView_SeqKAIJ(Mat A,PetscViewer viewer) 496 { 497 PetscErrorCode ierr; 498 Mat B; 499 500 PetscFunctionBegin; 501 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 502 ierr = MatView(B,viewer);CHKERRQ(ierr); 503 ierr = MatDestroy(&B);CHKERRQ(ierr); 504 PetscFunctionReturn(0); 505 } 506 507 PetscErrorCode MatView_MPIKAIJ(Mat A,PetscViewer viewer) 508 { 509 PetscErrorCode ierr; 510 Mat B; 511 512 PetscFunctionBegin; 513 ierr = MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 514 ierr = MatView(B,viewer);CHKERRQ(ierr); 515 ierr = MatDestroy(&B);CHKERRQ(ierr); 516 PetscFunctionReturn(0); 517 } 518 519 PetscErrorCode MatDestroy_MPIKAIJ(Mat A) 520 { 521 PetscErrorCode ierr; 522 Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data; 523 524 PetscFunctionBegin; 525 ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr); 526 ierr = MatDestroy(&b->OAIJ);CHKERRQ(ierr); 527 ierr = MatDestroy(&b->A);CHKERRQ(ierr); 528 ierr = VecScatterDestroy(&b->ctx);CHKERRQ(ierr); 529 ierr = VecDestroy(&b->w);CHKERRQ(ierr); 530 ierr = PetscFree(b->S);CHKERRQ(ierr); 531 ierr = PetscFree(b->T);CHKERRQ(ierr); 532 ierr = PetscFree(b->ibdiag);CHKERRQ(ierr); 533 ierr = PetscFree(A->data);CHKERRQ(ierr); 534 PetscFunctionReturn(0); 535 } 536 537 /* --------------------------------------------------------------------------------------*/ 538 539 /* zz = yy + Axx */ 540 PetscErrorCode MatMultAdd_SeqKAIJ(Mat A,Vec xx,Vec yy,Vec zz) 541 { 542 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 543 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 544 const PetscScalar *s = b->S, *t = b->T; 545 const PetscScalar *x,*v,*bx; 546 PetscScalar *y,*sums; 547 PetscErrorCode ierr; 548 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 549 PetscInt n,i,jrow,j,l,p=b->p,q=b->q,k; 550 551 PetscFunctionBegin; 552 if (!yy) { 553 ierr = VecSet(zz,0.0);CHKERRQ(ierr); 554 } else { 555 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 556 } 557 if ((!s) && (!t) && (!b->isTI)) PetscFunctionReturn(0); 558 559 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 560 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 561 idx = a->j; 562 v = a->a; 563 ii = a->i; 564 565 if (b->isTI) { 566 for (i=0; i<m; i++) { 567 jrow = ii[i]; 568 n = ii[i+1] - jrow; 569 sums = y + p*i; 570 for (j=0; j<n; j++) { 571 for (k=0; k<p; k++) { 572 sums[k] += v[jrow+j]*x[q*idx[jrow+j]+k]; 573 } 574 } 575 } 576 } else if (t) { 577 for (i=0; i<m; i++) { 578 jrow = ii[i]; 579 n = ii[i+1] - jrow; 580 sums = y + p*i; 581 bx = x + q*i; 582 for (j=0; j<n; j++) { 583 for (k=0; k<p; k++) { 584 for (l=0; l<q; l++) { 585 sums[k] += v[jrow+j]*t[k+l*p]*x[q*idx[jrow+j]+l]; 586 } 587 } 588 } 589 } 590 } 591 if (s) { 592 for (i=0; i<m; i++) { 593 sums = y + p*i; 594 bx = x + q*i; 595 if (i < b->AIJ->cmap->n) { 596 for (j=0; j<q; j++) { 597 for (k=0; k<p; k++) { 598 sums[k] += s[k+j*p]*bx[j]; 599 } 600 } 601 } 602 } 603 } 604 605 ierr = PetscLogFlops((2.0*p*q-p)*m+2*p*a->nz);CHKERRQ(ierr); 606 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 607 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 608 PetscFunctionReturn(0); 609 } 610 611 PetscErrorCode MatMult_SeqKAIJ(Mat A,Vec xx,Vec yy) 612 { 613 PetscErrorCode ierr; 614 PetscFunctionBegin; 615 ierr = MatMultAdd_SeqKAIJ(A,xx,PETSC_NULL,yy);CHKERRQ(ierr); 616 PetscFunctionReturn(0); 617 } 618 619 #include <petsc/private/kernels/blockinvert.h> 620 621 PetscErrorCode MatInvertBlockDiagonal_SeqKAIJ(Mat A,const PetscScalar **values) 622 { 623 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 624 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 625 const PetscScalar *S = b->S; 626 const PetscScalar *T = b->T; 627 const PetscScalar *v = a->a; 628 const PetscInt p = b->p, q = b->q, m = b->AIJ->rmap->n, *idx = a->j, *ii = a->i; 629 PetscErrorCode ierr; 630 PetscInt i,j,*v_pivots,dof,dof2; 631 PetscScalar *diag,aval,*v_work; 632 633 PetscFunctionBegin; 634 if (p != q) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Block size must be square to calculate inverse."); 635 if ((!S) && (!T) && (!b->isTI)) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Cannot invert a zero matrix."); 636 637 dof = p; 638 dof2 = dof*dof; 639 640 if (b->ibdiagvalid) { 641 if (values) *values = b->ibdiag; 642 PetscFunctionReturn(0); 643 } 644 if (!b->ibdiag) { 645 ierr = PetscMalloc1(dof2*m*sizeof(PetscScalar),&b->ibdiag);CHKERRQ(ierr); 646 ierr = PetscLogObjectMemory((PetscObject)A,dof2*m*sizeof(PetscScalar));CHKERRQ(ierr); 647 } 648 if (values) *values = b->ibdiag; 649 diag = b->ibdiag; 650 651 ierr = PetscMalloc2(dof,&v_work,dof,&v_pivots);CHKERRQ(ierr); 652 for (i=0; i<m; i++) { 653 if (S) { 654 ierr = PetscMemcpy(diag,S,dof2*sizeof(PetscScalar));CHKERRQ(ierr); 655 } else { 656 ierr = PetscMemzero(diag,dof2*sizeof(PetscScalar));CHKERRQ(ierr); 657 } 658 if (b->isTI) { 659 aval = 0; 660 for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j]; 661 for (j=0; j<dof; j++) diag[j+dof*j] += aval; 662 } else if (T) { 663 aval = 0; 664 for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j]; 665 for (j=0; j<dof2; j++) diag[j] += aval*T[j]; 666 } 667 ierr = PetscKernel_A_gets_inverse_A(dof,diag,v_pivots,v_work,PETSC_FALSE,NULL);CHKERRQ(ierr); 668 diag += dof2; 669 } 670 ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr); 671 672 b->ibdiagvalid = PETSC_TRUE; 673 PetscFunctionReturn(0); 674 } 675 676 static PetscErrorCode MatGetDiagonalBlock_MPIKAIJ(Mat A,Mat *B) 677 { 678 Mat_MPIKAIJ *kaij = (Mat_MPIKAIJ*) A->data; 679 680 PetscFunctionBegin; 681 *B = kaij->AIJ; 682 PetscFunctionReturn(0); 683 } 684 685 PetscErrorCode MatSOR_SeqKAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 686 { 687 PetscErrorCode ierr; 688 Mat_SeqKAIJ *kaij = (Mat_SeqKAIJ*) A->data; 689 Mat_SeqAIJ *a = (Mat_SeqAIJ*)kaij->AIJ->data; 690 const PetscScalar *aa = a->a, *T = kaij->T, *v; 691 const PetscInt m = kaij->AIJ->rmap->n, *ai=a->i, *aj=a->j, p = kaij->p, q = kaij->q, *diag, *vi; 692 const PetscScalar *b, *xb, *idiag; 693 PetscScalar *x, *work, *workt, *w, *y, *arr, *t, *arrt; 694 PetscInt i, j, k, i2, bs, bs2, nz; 695 696 PetscFunctionBegin; 697 its = its*lits; 698 if (flag & SOR_EISENSTAT) SETERRQ (PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 699 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 700 if (fshift) SETERRQ (PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for diagonal shift"); 701 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for applying upper or lower triangular parts"); 702 if (p != q) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSOR for KAIJ: No support for non-square dense blocks"); 703 else {bs = p; bs2 = bs*bs; } 704 705 if (!m) PetscFunctionReturn(0); 706 707 if (!kaij->ibdiagvalid) { ierr = MatInvertBlockDiagonal_SeqKAIJ(A,NULL);CHKERRQ(ierr); } 708 idiag = kaij->ibdiag; 709 diag = a->diag; 710 711 if (!kaij->sor.setup) { 712 ierr = PetscMalloc5(bs,&kaij->sor.w,bs,&kaij->sor.y,m*bs,&kaij->sor.work,m*bs,&kaij->sor.t,m*bs2,&kaij->sor.arr);CHKERRQ(ierr); 713 kaij->sor.setup = PETSC_TRUE; 714 } 715 y = kaij->sor.y; 716 w = kaij->sor.w; 717 work = kaij->sor.work; 718 t = kaij->sor.t; 719 arr = kaij->sor.arr; 720 721 ierr = VecGetArray(xx,&x); CHKERRQ(ierr); 722 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 723 724 if (flag & SOR_ZERO_INITIAL_GUESS) { 725 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 726 PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x); /* x[0:bs] <- D^{-1} b[0:bs] */ 727 ierr = PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr); 728 i2 = bs; 729 idiag += bs2; 730 for (i=1; i<m; i++) { 731 v = aa + ai[i]; 732 vi = aj + ai[i]; 733 nz = diag[i] - ai[i]; 734 735 if (T) { /* b - T (Arow * x) */ 736 ierr = PetscMemzero(w,bs*sizeof(PetscScalar)); 737 for (j=0; j<nz; j++) { 738 for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k]; 739 } 740 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs,w,T,&t[i2]); 741 for (k=0; k<bs; k++) t[i2+k] += b[i2+k]; 742 } else if (kaij->isTI) { 743 ierr = PetscMemcpy(t+i2,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 744 for (j=0; j<nz; j++) { 745 for (k=0; k<bs; k++) t[i2+k] -= v[j] * x[vi[j]*bs+k]; 746 } 747 } else { 748 ierr = PetscMemcpy(t+i2,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 749 } 750 751 PetscKernel_w_gets_Ar_times_v(bs,bs,t+i2,idiag,y); 752 for (j=0; j<bs; j++) x[i2+j] = omega * y[j]; 753 754 idiag += bs2; 755 i2 += bs; 756 } 757 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 758 ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr); 759 xb = t; 760 } else xb = b; 761 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 762 idiag = kaij->ibdiag+bs2*(m-1); 763 i2 = bs * (m-1); 764 ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 765 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 766 i2 -= bs; 767 idiag -= bs2; 768 for (i=m-2; i>=0; i--) { 769 v = aa + diag[i] + 1 ; 770 vi = aj + diag[i] + 1; 771 nz = ai[i+1] - diag[i] - 1; 772 773 if (T) { /* FIXME: This branch untested */ 774 ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 775 /* copy all rows of x that are needed into contiguous space */ 776 workt = work; 777 for (j=0; j<nz; j++) { 778 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 779 workt += bs; 780 } 781 arrt = arr; 782 for (j=0; j<nz; j++) { 783 ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 784 for (k=0; k<bs2; k++) arrt[k] *= v[j]; 785 arrt += bs2; 786 } 787 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 788 } else if (kaij->isTI) { 789 ierr = PetscMemcpy(w,t+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 790 for (j=0; j<nz; j++) { 791 for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k]; 792 } 793 } 794 795 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); /* RHS incorrect for omega != 1.0 */ 796 for (j=0; j<bs; j++) x[i2+j] = (1.0-omega) * x[i2+j] + omega * y[j]; 797 798 idiag -= bs2; 799 i2 -= bs; 800 } 801 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 802 } 803 its--; 804 } 805 while (its--) { /* FIXME: This branch not updated */ 806 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 807 i2 = 0; 808 idiag = kaij->ibdiag; 809 for (i=0; i<m; i++) { 810 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 811 812 v = aa + ai[i]; 813 vi = aj + ai[i]; 814 nz = diag[i] - ai[i]; 815 workt = work; 816 for (j=0; j<nz; j++) { 817 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 818 workt += bs; 819 } 820 arrt = arr; 821 if (T) { 822 for (j=0; j<nz; j++) { 823 ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 824 for (k=0; k<bs2; k++) arrt[k] *= v[j]; 825 arrt += bs2; 826 } 827 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 828 } else if (kaij->isTI) { 829 for (j=0; j<nz; j++) { 830 ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 831 for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 832 arrt += bs2; 833 } 834 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 835 } 836 ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr); 837 838 v = aa + diag[i] + 1; 839 vi = aj + diag[i] + 1; 840 nz = ai[i+1] - diag[i] - 1; 841 workt = work; 842 for (j=0; j<nz; j++) { 843 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 844 workt += bs; 845 } 846 arrt = arr; 847 if (T) { 848 for (j=0; j<nz; j++) { 849 ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 850 for (k=0; k<bs2; k++) arrt[k] *= v[j]; 851 arrt += bs2; 852 } 853 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 854 } else if (kaij->isTI) { 855 for (j=0; j<nz; j++) { 856 ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 857 for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 858 arrt += bs2; 859 } 860 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 861 } 862 863 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); 864 for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j); 865 866 idiag += bs2; 867 i2 += bs; 868 } 869 xb = t; 870 } 871 else xb = b; 872 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 873 idiag = kaij->ibdiag+bs2*(m-1); 874 i2 = bs * (m-1); 875 if (xb == b) { 876 for (i=m-1; i>=0; i--) { 877 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 878 879 v = aa + ai[i]; 880 vi = aj + ai[i]; 881 nz = diag[i] - ai[i]; 882 workt = work; 883 for (j=0; j<nz; j++) { 884 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 885 workt += bs; 886 } 887 arrt = arr; 888 if (T) { 889 for (j=0; j<nz; j++) { 890 ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 891 for (k=0; k<bs2; k++) arrt[k] *= v[j]; 892 arrt += bs2; 893 } 894 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 895 } else if (kaij->isTI) { 896 for (j=0; j<nz; j++) { 897 ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 898 for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 899 arrt += bs2; 900 } 901 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 902 } 903 904 v = aa + diag[i] + 1; 905 vi = aj + diag[i] + 1; 906 nz = ai[i+1] - diag[i] - 1; 907 workt = work; 908 for (j=0; j<nz; j++) { 909 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 910 workt += bs; 911 } 912 arrt = arr; 913 if (T) { 914 for (j=0; j<nz; j++) { 915 ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 916 for (k=0; k<bs2; k++) arrt[k] *= v[j]; 917 arrt += bs2; 918 } 919 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 920 } else if (kaij->isTI) { 921 for (j=0; j<nz; j++) { 922 ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 923 for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 924 arrt += bs2; 925 } 926 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 927 } 928 929 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); 930 for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j); 931 } 932 } else { 933 for (i=m-1; i>=0; i--) { 934 ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 935 v = aa + diag[i] + 1; 936 vi = aj + diag[i] + 1; 937 nz = ai[i+1] - diag[i] - 1; 938 workt = work; 939 for (j=0; j<nz; j++) { 940 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 941 workt += bs; 942 } 943 arrt = arr; 944 if (T) { 945 for (j=0; j<nz; j++) { 946 ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 947 for (k=0; k<bs2; k++) arrt[k] *= v[j]; 948 arrt += bs2; 949 } 950 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 951 } else if (kaij->isTI) { 952 for (j=0; j<nz; j++) { 953 ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 954 for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 955 arrt += bs2; 956 } 957 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 958 } 959 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); 960 for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j); 961 } 962 idiag -= bs2; 963 i2 -= bs; 964 } 965 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 966 } 967 } 968 969 ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr); 970 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 971 PetscFunctionReturn(0); 972 } 973 974 /*===================================================================================*/ 975 976 PetscErrorCode MatMultAdd_MPIKAIJ(Mat A,Vec xx,Vec yy,Vec zz) 977 { 978 Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data; 979 PetscErrorCode ierr; 980 981 PetscFunctionBegin; 982 if (!yy) { 983 ierr = VecSet(zz,0.0);CHKERRQ(ierr); 984 } else { 985 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 986 } 987 /* start the scatter */ 988 ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 989 ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,zz,zz);CHKERRQ(ierr); 990 ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 991 ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr); 992 PetscFunctionReturn(0); 993 } 994 995 PetscErrorCode MatMult_MPIKAIJ(Mat A,Vec xx,Vec yy) 996 { 997 PetscErrorCode ierr; 998 PetscFunctionBegin; 999 ierr = MatMultAdd_MPIKAIJ(A,xx,PETSC_NULL,yy);CHKERRQ(ierr); 1000 PetscFunctionReturn(0); 1001 } 1002 1003 PetscErrorCode MatInvertBlockDiagonal_MPIKAIJ(Mat A,const PetscScalar **values) 1004 { 1005 Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data; 1006 PetscErrorCode ierr; 1007 1008 PetscFunctionBegin; 1009 ierr = (*b->AIJ->ops->invertblockdiagonal)(b->AIJ,values);CHKERRQ(ierr); 1010 PetscFunctionReturn(0); 1011 } 1012 1013 /* ----------------------------------------------------------------*/ 1014 1015 PetscErrorCode MatGetRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values) 1016 { 1017 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*) A->data; 1018 PetscErrorCode diag = PETSC_FALSE; 1019 PetscErrorCode ierr; 1020 PetscInt nzaij,nz,*colsaij,*idx,i,j,p=b->p,q=b->q,r=row/p,s=row%p,c; 1021 PetscScalar *vaij,*v,*S=b->S,*T=b->T; 1022 1023 PetscFunctionBegin; 1024 if (b->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1025 b->getrowactive = PETSC_TRUE; 1026 if (row < 0 || row >= A->rmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range",row); 1027 1028 if ((!S) && (!T) && (!b->isTI)) { 1029 if (ncols) *ncols = 0; 1030 if (cols) *cols = NULL; 1031 if (values) *values = NULL; 1032 PetscFunctionReturn(0); 1033 } 1034 1035 if (T || b->isTI) { 1036 ierr = MatGetRow_SeqAIJ(b->AIJ,r,&nzaij,&colsaij,&vaij);CHKERRQ(ierr); 1037 c = nzaij; 1038 for (i=0; i<nzaij; i++) { 1039 /* check if this row contains a diagonal entry */ 1040 if (colsaij[i] == r) { 1041 diag = PETSC_TRUE; 1042 c = i; 1043 } 1044 } 1045 } else nzaij = c = 0; 1046 1047 /* calculate size of row */ 1048 nz = 0; 1049 if (S) nz += q; 1050 if (T || b->isTI) nz += (diag && S ? (nzaij-1)*q : nzaij*q); 1051 1052 if (cols || values) { 1053 ierr = PetscMalloc2(nz,&idx,nz,&v);CHKERRQ(ierr); 1054 for (i=0; i<q; i++) { 1055 /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */ 1056 v[i] = 0.0; 1057 } 1058 if (b->isTI) { 1059 for (i=0; i<nzaij; i++) { 1060 for (j=0; j<q; j++) { 1061 idx[i*q+j] = colsaij[i]*q+j; 1062 v[i*q+j] = (j==s ? vaij[i] : 0); 1063 } 1064 } 1065 } else if (T) { 1066 for (i=0; i<nzaij; i++) { 1067 for (j=0; j<q; j++) { 1068 idx[i*q+j] = colsaij[i]*q+j; 1069 v[i*q+j] = vaij[i]*T[s+j*p]; 1070 } 1071 } 1072 } 1073 if (S) { 1074 for (j=0; j<q; j++) { 1075 idx[c*q+j] = r*q+j; 1076 v[c*q+j] += S[s+j*p]; 1077 } 1078 } 1079 } 1080 1081 if (ncols) *ncols = nz; 1082 if (cols) *cols = idx; 1083 if (values) *values = v; 1084 PetscFunctionReturn(0); 1085 } 1086 1087 PetscErrorCode MatRestoreRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1088 { 1089 PetscErrorCode ierr; 1090 PetscFunctionBegin; 1091 ierr = PetscFree2(*idx,*v);CHKERRQ(ierr); 1092 ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE; 1093 PetscFunctionReturn(0); 1094 } 1095 1096 PetscErrorCode MatGetRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values) 1097 { 1098 Mat_MPIKAIJ *b = (Mat_MPIKAIJ*) A->data; 1099 Mat MatAIJ = ((Mat_SeqKAIJ*)b->AIJ->data)->AIJ; 1100 Mat MatOAIJ = ((Mat_SeqKAIJ*)b->OAIJ->data)->AIJ; 1101 Mat AIJ = b->A; 1102 PetscBool diag = PETSC_FALSE; 1103 PetscErrorCode ierr; 1104 const PetscInt rstart=A->rmap->rstart,rend=A->rmap->rend,p=b->p,q=b->q,*garray; 1105 PetscInt nz,*idx,ncolsaij,ncolsoaij,*colsaij,*colsoaij,r,s,c,i,j,lrow; 1106 PetscScalar *v,*vals,*ovals,*S=b->S,*T=b->T; 1107 1108 PetscFunctionBegin; 1109 if (b->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1110 b->getrowactive = PETSC_TRUE; 1111 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1112 lrow = row - rstart; 1113 1114 if ((!S) && (!T) && (!b->isTI)) { 1115 if (ncols) *ncols = 0; 1116 if (cols) *cols = NULL; 1117 if (values) *values = NULL; 1118 PetscFunctionReturn(0); 1119 } 1120 1121 r = lrow/p; 1122 s = lrow%p; 1123 1124 if (T || b->isTI) { 1125 ierr = MatMPIAIJGetSeqAIJ(AIJ,NULL,NULL,&garray); 1126 ierr = MatGetRow_SeqAIJ(MatAIJ,lrow/p,&ncolsaij,&colsaij,&vals);CHKERRQ(ierr); 1127 ierr = MatGetRow_SeqAIJ(MatOAIJ,lrow/p,&ncolsoaij,&colsoaij,&ovals);CHKERRQ(ierr); 1128 1129 c = ncolsaij + ncolsoaij; 1130 for (i=0; i<ncolsaij; i++) { 1131 /* check if this row contains a diagonal entry */ 1132 if (colsaij[i] == r) { 1133 diag = PETSC_TRUE; 1134 c = i; 1135 } 1136 } 1137 } else c = 0; 1138 1139 /* calculate size of row */ 1140 nz = 0; 1141 if (S) nz += q; 1142 if (T || b->isTI) nz += (diag && S ? (ncolsaij+ncolsoaij-1)*q : (ncolsaij+ncolsoaij)*q); 1143 1144 if (cols || values) { 1145 ierr = PetscMalloc2(nz,&idx,nz,&v);CHKERRQ(ierr); 1146 for (i=0; i<q; i++) { 1147 /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */ 1148 v[i] = 0.0; 1149 } 1150 if (b->isTI) { 1151 for (i=0; i<ncolsaij; i++) { 1152 for (j=0; j<q; j++) { 1153 idx[i*q+j] = (colsaij[i]+rstart/p)*q+j; 1154 v[i*q+j] = (j==s ? vals[i] : 0.0); 1155 } 1156 } 1157 for (i=0; i<ncolsoaij; i++) { 1158 for (j=0; j<q; j++) { 1159 idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j; 1160 v[(i+ncolsaij)*q+j] = (j==s ? ovals[i]: 0.0); 1161 } 1162 } 1163 } else if (T) { 1164 for (i=0; i<ncolsaij; i++) { 1165 for (j=0; j<q; j++) { 1166 idx[i*q+j] = (colsaij[i]+rstart/p)*q+j; 1167 v[i*q+j] = vals[i]*T[s+j*p]; 1168 } 1169 } 1170 for (i=0; i<ncolsoaij; i++) { 1171 for (j=0; j<q; j++) { 1172 idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j; 1173 v[(i+ncolsaij)*q+j] = ovals[i]*T[s+j*p]; 1174 } 1175 } 1176 } 1177 if (S) { 1178 for (j=0; j<q; j++) { 1179 idx[c*q+j] = (r+rstart/p)*q+j; 1180 v[c*q+j] += S[s+j*p]; 1181 } 1182 } 1183 } 1184 1185 if (ncols) *ncols = nz; 1186 if (cols) *cols = idx; 1187 if (values) *values = v; 1188 PetscFunctionReturn(0); 1189 } 1190 1191 PetscErrorCode MatRestoreRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1192 { 1193 PetscErrorCode ierr; 1194 PetscFunctionBegin; 1195 ierr = PetscFree2(*idx,*v);CHKERRQ(ierr); 1196 ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE; 1197 PetscFunctionReturn(0); 1198 } 1199 1200 PetscErrorCode MatCreateSubMatrix_KAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat) 1201 { 1202 PetscErrorCode ierr; 1203 Mat A; 1204 1205 PetscFunctionBegin; 1206 ierr = MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr); 1207 ierr = MatCreateSubMatrix(A,isrow,iscol,cll,newmat);CHKERRQ(ierr); 1208 ierr = MatDestroy(&A);CHKERRQ(ierr); 1209 PetscFunctionReturn(0); 1210 } 1211 1212 /* ---------------------------------------------------------------------------------- */ 1213 /*@C 1214 MatCreateKAIJ - Creates a matrix type to be used for matrices of the following form: 1215 1216 [I \otimes S + A \otimes T] 1217 1218 where 1219 S is a dense (p \times q) matrix 1220 T is a dense (p \times q) matrix 1221 A is an AIJ (n \times n) matrix 1222 I is the identity matrix 1223 The resulting matrix is (np \times nq) 1224 1225 S and T are always stored independently on all processes as PetscScalar arrays in column-major format. 1226 1227 Collective 1228 1229 Input Parameters: 1230 + A - the AIJ matrix 1231 . p - number of rows in S and T 1232 . q - number of columns in S and T 1233 . S - the S matrix (can be PETSC_NULL), stored as a PetscScalar array (column-major) 1234 - T - the T matrix (can be PETSC_NULL), stored as a PetscScalar array (column-major) 1235 1236 Output Parameter: 1237 . kaij - the new KAIJ matrix 1238 1239 Notes: 1240 This function increases the reference count on the AIJ matrix, so the user is free to destroy the matrix if it is not needed. 1241 Changes to the entries of the AIJ matrix will immediately affect the KAIJ matrix. 1242 1243 Level: advanced 1244 1245 .seealso: MatKAIJSetAIJ(), MatKAIJSetS(), MatKAIJSetT(), MatKAIJGetAIJ(), MatKAIJGetS(), MatKAIJGetT(), MATKAIJ 1246 @*/ 1247 PetscErrorCode MatCreateKAIJ(Mat A,PetscInt p,PetscInt q,const PetscScalar S[],const PetscScalar T[],Mat *kaij) 1248 { 1249 PetscErrorCode ierr; 1250 PetscMPIInt size; 1251 1252 PetscFunctionBegin; 1253 ierr = MatCreate(PetscObjectComm((PetscObject)A),kaij);CHKERRQ(ierr); 1254 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 1255 if (size == 1) { 1256 ierr = MatSetType(*kaij,MATSEQKAIJ);CHKERRQ(ierr); 1257 } else { 1258 ierr = MatSetType(*kaij,MATMPIKAIJ);CHKERRQ(ierr); 1259 } 1260 ierr = MatKAIJSetAIJ(*kaij,A);CHKERRQ(ierr); 1261 ierr = MatKAIJSetS(*kaij,p,q,S);CHKERRQ(ierr); 1262 ierr = MatKAIJSetT(*kaij,p,q,T);CHKERRQ(ierr); 1263 ierr = MatSetUp(*kaij); 1264 PetscFunctionReturn(0); 1265 } 1266 1267 /*MC 1268 MATKAIJ - MATKAIJ = "kaij" - A matrix type to be used to evaluate matrices of the following form: 1269 1270 [I \otimes S + A \otimes T] 1271 1272 where 1273 S is a dense (p \times q) matrix 1274 T is a dense (p \times q) matrix 1275 A is an AIJ (n \times n) matrix 1276 I is the identity matrix 1277 The resulting matrix is (np \times nq) 1278 1279 S and T are always stored independently on all processes as PetscScalar arrays in column-major format. 1280 1281 Level: advanced 1282 1283 .seealso: MatKAIJSetAIJ(), MatKAIJSetS(), MatKAIJSetT(), MatKAIJGetAIJ(), MatKAIJGetS(), MatKAIJGetT(), MatCreateKAIJ() 1284 M*/ 1285 1286 PETSC_EXTERN PetscErrorCode MatCreate_KAIJ(Mat A) 1287 { 1288 PetscErrorCode ierr; 1289 Mat_MPIKAIJ *b; 1290 PetscMPIInt size; 1291 1292 PetscFunctionBegin; 1293 ierr = PetscNewLog(A,&b);CHKERRQ(ierr); 1294 A->data = (void*)b; 1295 1296 ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1297 1298 A->ops->setup = MatSetUp_KAIJ; 1299 1300 b->w = 0; 1301 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 1302 if (size == 1) { 1303 ierr = PetscObjectChangeTypeName((PetscObject)A,MATSEQKAIJ);CHKERRQ(ierr); 1304 A->ops->setup = MatSetUp_KAIJ; 1305 A->ops->destroy = MatDestroy_SeqKAIJ; 1306 A->ops->view = MatView_SeqKAIJ; 1307 A->ops->mult = MatMult_SeqKAIJ; 1308 A->ops->multadd = MatMultAdd_SeqKAIJ; 1309 A->ops->invertblockdiagonal = MatInvertBlockDiagonal_SeqKAIJ; 1310 A->ops->getrow = MatGetRow_SeqKAIJ; 1311 A->ops->restorerow = MatRestoreRow_SeqKAIJ; 1312 A->ops->sor = MatSOR_SeqKAIJ; 1313 } else { 1314 ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIKAIJ);CHKERRQ(ierr); 1315 A->ops->setup = MatSetUp_KAIJ; 1316 A->ops->destroy = MatDestroy_MPIKAIJ; 1317 A->ops->view = MatView_MPIKAIJ; 1318 A->ops->mult = MatMult_MPIKAIJ; 1319 A->ops->multadd = MatMultAdd_MPIKAIJ; 1320 A->ops->invertblockdiagonal = MatInvertBlockDiagonal_MPIKAIJ; 1321 A->ops->getrow = MatGetRow_MPIKAIJ; 1322 A->ops->restorerow = MatRestoreRow_MPIKAIJ; 1323 ierr = PetscObjectComposeFunction((PetscObject)A,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIKAIJ);CHKERRQ(ierr); 1324 } 1325 A->ops->createsubmatrix = MatCreateSubMatrix_KAIJ; 1326 PetscFunctionReturn(0); 1327 } 1328