1 2 /* 3 Defines the basic matrix operations for the KAIJ matrix storage format. 4 This format is used to evaluate matrices of the form: 5 6 [I \otimes S + A \otimes T] 7 8 where 9 S is a dense (p \times q) matrix 10 T is a dense (p \times q) matrix 11 A is an AIJ (n \times n) matrix 12 I is the identity matrix 13 14 The resulting matrix is (np \times nq) 15 16 We provide: 17 MatMult() 18 MatMultAdd() 19 MatInvertBlockDiagonal() 20 and 21 MatCreateKAIJ(Mat,PetscInt,PetscInt,const PetscScalar[],const PetscScalar[],Mat*) 22 23 This single directory handles both the sequential and parallel codes 24 */ 25 26 #include <../src/mat/impls/kaij/kaij.h> /*I "petscmat.h" I*/ 27 #include <../src/mat/utils/freespace.h> 28 #include <petsc/private/vecimpl.h> 29 30 /*@C 31 MatKAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the KAIJ matrix 32 33 Not Collective, but if the KAIJ matrix is parallel, the AIJ matrix is also parallel 34 35 Input Parameter: 36 . A - the KAIJ matrix 37 38 Output Parameter: 39 . B - the AIJ matrix 40 41 Level: advanced 42 43 Notes: The reference count on the AIJ matrix is not increased so you should not destroy it. 44 45 .seealso: MatCreateKAIJ() 46 @*/ 47 PetscErrorCode MatKAIJGetAIJ(Mat A,Mat *B) 48 { 49 PetscErrorCode ierr; 50 PetscBool ismpikaij,isseqkaij; 51 52 PetscFunctionBegin; 53 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIKAIJ,&ismpikaij);CHKERRQ(ierr); 54 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQKAIJ,&isseqkaij);CHKERRQ(ierr); 55 if (ismpikaij) { 56 Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data; 57 58 *B = b->A; 59 } else if (isseqkaij) { 60 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 61 62 *B = b->AIJ; 63 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix passed in is not of type KAIJ"); 64 PetscFunctionReturn(0); 65 } 66 67 /*@C 68 MatKAIJGetS - Get the S matrix describing the shift action of the KAIJ matrix 69 70 Not Collective; the entire S is stored and returned independently on all processes. 71 72 Input Parameter: 73 . A - the KAIJ matrix 74 75 Output Parameters: 76 + m - the number of rows in S 77 . n - the number of columns in S 78 - S - the S matrix, in form of a scalar array in column-major format 79 80 Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired) 81 82 Level: advanced 83 84 .seealso: MatCreateKAIJ(), MatGetBlockSizes() 85 @*/ 86 PetscErrorCode MatKAIJGetS(Mat A,PetscInt *m,PetscInt *n,PetscScalar **S) 87 { 88 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 89 PetscFunctionBegin; 90 if (m) *m = b->p; 91 if (n) *n = b->q; 92 if (S) *S = b->S; 93 PetscFunctionReturn(0); 94 } 95 96 /*@C 97 MatKAIJGetSRead - Get a read-only pointer to the S matrix describing the shift action of the KAIJ matrix 98 99 Not Collective; the entire S is stored and returned independently on all processes. 100 101 Input Parameter: 102 . A - the KAIJ matrix 103 104 Output Parameters: 105 + m - the number of rows in S 106 . n - the number of columns in S 107 - S - the S matrix, in form of a scalar array in column-major format 108 109 Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired) 110 111 Level: advanced 112 113 .seealso: MatCreateKAIJ(), MatGetBlockSizes() 114 @*/ 115 PetscErrorCode MatKAIJGetSRead(Mat A,PetscInt *m,PetscInt *n,const PetscScalar **S) 116 { 117 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 118 PetscFunctionBegin; 119 if (m) *m = b->p; 120 if (n) *n = b->q; 121 if (S) *S = b->S; 122 PetscFunctionReturn(0); 123 } 124 125 /*@C 126 MatKAIJRestoreS - Restore array obtained with MatKAIJGetS() 127 128 Not collective 129 130 Input Parameter: 131 . A - the KAIJ matrix 132 133 Output Parameter: 134 . S - location of pointer to array obtained with MatKAIJGetS() 135 136 Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored. 137 If NULL is passed, it will not attempt to zero the array pointer. 138 139 Level: advanced 140 .seealso: MatKAIJGetS(), MatKAIJGetSRead(), MatKAIJRestoreSRead() 141 @*/ 142 PetscErrorCode MatKAIJRestoreS(Mat A,PetscScalar **S) 143 { 144 PetscFunctionBegin; 145 if (S) *S = NULL; 146 PetscObjectStateIncrease((PetscObject)A); 147 PetscFunctionReturn(0); 148 } 149 150 /*@C 151 MatKAIJRestoreSRead - Restore array obtained with MatKAIJGetSRead() 152 153 Not collective 154 155 Input Parameter: 156 . A - the KAIJ matrix 157 158 Output Parameter: 159 . S - location of pointer to array obtained with MatKAIJGetS() 160 161 Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored. 162 If NULL is passed, it will not attempt to zero the array pointer. 163 164 Level: advanced 165 .seealso: MatKAIJGetS(), MatKAIJGetSRead(), MatKAIJRestoreSRead() 166 @*/ 167 PetscErrorCode MatKAIJRestoreSRead(Mat A,const PetscScalar **S) 168 { 169 PetscFunctionBegin; 170 if (S) *S = NULL; 171 PetscFunctionReturn(0); 172 } 173 174 /*@C 175 MatKAIJGetT - Get the transformation matrix T associated with the KAIJ matrix 176 177 Not Collective; the entire T is stored and returned independently on all processes 178 179 Input Parameter: 180 . A - the KAIJ matrix 181 182 Output Parameter: 183 + m - the number of rows in T 184 . n - the number of columns in T 185 - T - the T matrix, in form of a scalar array in column-major format 186 187 Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired) 188 189 Level: advanced 190 191 .seealso: MatCreateKAIJ(), MatGetBlockSizes() 192 @*/ 193 PetscErrorCode MatKAIJGetT(Mat A,PetscInt *m,PetscInt *n,PetscScalar **T) 194 { 195 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 196 PetscFunctionBegin; 197 if (m) *m = b->p; 198 if (n) *n = b->q; 199 if (T) *T = b->T; 200 PetscFunctionReturn(0); 201 } 202 203 /*@C 204 MatKAIJGetTRead - Get a read-only pointer to the transformation matrix T associated with the KAIJ matrix 205 206 Not Collective; the entire T is stored and returned independently on all processes 207 208 Input Parameter: 209 . A - the KAIJ matrix 210 211 Output Parameter: 212 + m - the number of rows in T 213 . n - the number of columns in T 214 - T - the T matrix, in form of a scalar array in column-major format 215 216 Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired) 217 218 Level: advanced 219 220 .seealso: MatCreateKAIJ(), MatGetBlockSizes() 221 @*/ 222 PetscErrorCode MatKAIJGetTRead(Mat A,PetscInt *m,PetscInt *n,const PetscScalar **T) 223 { 224 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 225 PetscFunctionBegin; 226 if (m) *m = b->p; 227 if (n) *n = b->q; 228 if (T) *T = b->T; 229 PetscFunctionReturn(0); 230 } 231 232 /*@C 233 MatKAIJRestoreT - Restore array obtained with MatKAIJGetT() 234 235 Not collective 236 237 Input Parameter: 238 . A - the KAIJ matrix 239 240 Output Parameter: 241 . T - location of pointer to array obtained with MatKAIJGetS() 242 243 Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored. 244 If NULL is passed, it will not attempt to zero the array pointer. 245 246 Level: advanced 247 .seealso: MatKAIJGetT(), MatKAIJGetTRead(), MatKAIJRestoreTRead() 248 @*/ 249 PetscErrorCode MatKAIJRestoreT(Mat A,PetscScalar **T) 250 { 251 PetscFunctionBegin; 252 if (T) *T = NULL; 253 PetscObjectStateIncrease((PetscObject)A); 254 PetscFunctionReturn(0); 255 } 256 257 /*@C 258 MatKAIJRestoreTRead - Restore array obtained with MatKAIJGetTRead() 259 260 Not collective 261 262 Input Parameter: 263 . A - the KAIJ matrix 264 265 Output Parameter: 266 . T - location of pointer to array obtained with MatKAIJGetS() 267 268 Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored. 269 If NULL is passed, it will not attempt to zero the array pointer. 270 271 Level: advanced 272 .seealso: MatKAIJGetT(), MatKAIJGetTRead(), MatKAIJRestoreTRead() 273 @*/ 274 PetscErrorCode MatKAIJRestoreTRead(Mat A,const PetscScalar **T) 275 { 276 PetscFunctionBegin; 277 if (T) *T = NULL; 278 PetscFunctionReturn(0); 279 } 280 281 /*@ 282 MatKAIJSetAIJ - Set the AIJ matrix describing the blockwise action of the KAIJ matrix 283 284 Logically Collective; if the AIJ matrix is parallel, the KAIJ matrix is also parallel 285 286 Input Parameters: 287 + A - the KAIJ matrix 288 - B - the AIJ matrix 289 290 Notes: 291 This function increases the reference count on the AIJ matrix, so the user is free to destroy the matrix if it is not needed. 292 Changes to the entries of the AIJ matrix will immediately affect the KAIJ matrix. 293 294 Level: advanced 295 296 .seealso: MatKAIJGetAIJ(), MatKAIJSetS(), MatKAIJSetT() 297 @*/ 298 PetscErrorCode MatKAIJSetAIJ(Mat A,Mat B) 299 { 300 PetscErrorCode ierr; 301 PetscMPIInt size; 302 303 PetscFunctionBegin; 304 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 305 if (size == 1) { 306 Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data; 307 a->AIJ = B; 308 } else { 309 Mat_MPIKAIJ *a = (Mat_MPIKAIJ*)A->data; 310 a->A = B; 311 } 312 ierr = PetscObjectReference((PetscObject)B);CHKERRQ(ierr); 313 PetscFunctionReturn(0); 314 } 315 316 /*@C 317 MatKAIJSetS - Set the S matrix describing the shift action of the KAIJ matrix 318 319 Logically Collective; the entire S is stored independently on all processes. 320 321 Input Parameters: 322 + A - the KAIJ matrix 323 . p - the number of rows in S 324 . q - the number of columns in S 325 - S - the S matrix, in form of a scalar array in column-major format 326 327 Notes: The dimensions p and q must match those of the transformation matrix T associated with the KAIJ matrix. 328 The S matrix is copied, so the user can destroy this array. 329 330 Level: Advanced 331 332 .seealso: MatKAIJGetS(), MatKAIJSetT(), MatKAIJSetAIJ() 333 @*/ 334 PetscErrorCode MatKAIJSetS(Mat A,PetscInt p,PetscInt q,const PetscScalar S[]) 335 { 336 PetscErrorCode ierr; 337 Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data; 338 339 PetscFunctionBegin; 340 ierr = PetscFree(a->S);CHKERRQ(ierr); 341 if (S) { 342 ierr = PetscMalloc1(p*q*sizeof(PetscScalar),&a->S);CHKERRQ(ierr); 343 ierr = PetscMemcpy(a->S,S,p*q*sizeof(PetscScalar));CHKERRQ(ierr); 344 } else a->S = NULL; 345 346 a->p = p; 347 a->q = q; 348 PetscFunctionReturn(0); 349 } 350 351 /*@C 352 MatKAIJSetT - Set the transformation matrix T associated with the KAIJ matrix 353 354 Logically Collective; the entire T is stored independently on all processes. 355 356 Input Parameters: 357 + A - the KAIJ matrix 358 . p - the number of rows in S 359 . q - the number of columns in S 360 - T - the T matrix, in form of a scalar array in column-major format 361 362 Notes: The dimensions p and q must match those of the shift matrix S associated with the KAIJ matrix. 363 The T matrix is copied, so the user can destroy this array. 364 365 Level: Advanced 366 367 .seealso: MatKAIJGetT(), MatKAIJSetS(), MatKAIJSetAIJ() 368 @*/ 369 PetscErrorCode MatKAIJSetT(Mat A,PetscInt p,PetscInt q,const PetscScalar T[]) 370 { 371 PetscErrorCode ierr; 372 PetscInt i,j; 373 Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data; 374 PetscBool isTI = PETSC_FALSE; 375 376 PetscFunctionBegin; 377 /* check if T is an identity matrix */ 378 if (T && (p == q)) { 379 isTI = PETSC_TRUE; 380 for (i=0; i<p; i++) { 381 for (j=0; j<q; j++) { 382 if (i == j) { 383 /* diagonal term must be 1 */ 384 if (T[i+j*p] != 1.0) isTI = PETSC_FALSE; 385 } else { 386 /* off-diagonal term must be 0 */ 387 if (T[i+j*p] != 0.0) isTI = PETSC_FALSE; 388 } 389 } 390 } 391 } 392 a->isTI = isTI; 393 394 ierr = PetscFree(a->T);CHKERRQ(ierr); 395 if (T && (!isTI)) { 396 ierr = PetscMalloc1(p*q*sizeof(PetscScalar),&a->T);CHKERRQ(ierr); 397 ierr = PetscMemcpy(a->T,T,p*q*sizeof(PetscScalar));CHKERRQ(ierr); 398 } else a->T = NULL; 399 400 a->p = p; 401 a->q = q; 402 PetscFunctionReturn(0); 403 } 404 405 PetscErrorCode MatDestroy_SeqKAIJ(Mat A) 406 { 407 PetscErrorCode ierr; 408 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 409 410 PetscFunctionBegin; 411 ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr); 412 ierr = PetscFree(b->S);CHKERRQ(ierr); 413 ierr = PetscFree(b->T);CHKERRQ(ierr); 414 ierr = PetscFree(b->ibdiag);CHKERRQ(ierr); 415 ierr = PetscFree5(b->sor.w,b->sor.y,b->sor.work,b->sor.t,b->sor.arr);CHKERRQ(ierr); 416 ierr = PetscFree(A->data);CHKERRQ(ierr); 417 PetscFunctionReturn(0); 418 } 419 420 PetscErrorCode MatSetUp_KAIJ(Mat A) 421 { 422 PetscErrorCode ierr; 423 PetscInt n; 424 PetscMPIInt size; 425 Mat_SeqKAIJ *seqkaij = (Mat_SeqKAIJ*)A->data; 426 427 PetscFunctionBegin; 428 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 429 if (size == 1) { 430 ierr = MatSetSizes(A,seqkaij->p*seqkaij->AIJ->rmap->n,seqkaij->q*seqkaij->AIJ->cmap->n,seqkaij->p*seqkaij->AIJ->rmap->N,seqkaij->q*seqkaij->AIJ->cmap->N);CHKERRQ(ierr); 431 ierr = PetscLayoutSetBlockSize(A->rmap,seqkaij->p);CHKERRQ(ierr); 432 ierr = PetscLayoutSetBlockSize(A->cmap,seqkaij->q);CHKERRQ(ierr); 433 ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr); 434 ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr); 435 } else { 436 Mat_MPIKAIJ *a; 437 Mat_MPIAIJ *mpiaij; 438 IS from,to; 439 Vec gvec; 440 PetscScalar *T; 441 PetscInt i,j; 442 443 a = (Mat_MPIKAIJ*)A->data; 444 mpiaij = (Mat_MPIAIJ*)a->A->data; 445 ierr = MatSetSizes(A,a->p*a->A->rmap->n,a->q*a->A->cmap->n,a->p*a->A->rmap->N,a->q*a->A->cmap->N);CHKERRQ(ierr); 446 ierr = PetscLayoutSetBlockSize(A->rmap,seqkaij->p);CHKERRQ(ierr); 447 ierr = PetscLayoutSetBlockSize(A->cmap,seqkaij->q);CHKERRQ(ierr); 448 ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr); 449 ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr); 450 451 if (a->isTI) { 452 /* If the transformation matrix associated with the parallel matrix A is the identity matrix, then a->T will be NULL. 453 * In this case, if we pass a->T directly to the MatCreateKAIJ() calls to create the sequential submatrices, the routine will 454 * not be able to tell that transformation matrix should be set to the identity; thus we create a temporary identity matrix 455 * to pass in. */ 456 ierr = PetscMalloc1(a->p*a->q*sizeof(PetscScalar),&T);CHKERRQ(ierr); 457 for (i=0; i<a->p; i++) { 458 for (j=0; j<a->q; j++) { 459 if (i==j) T[i+j*a->p] = 1.0; 460 else T[i+j*a->p] = 0.0; 461 } 462 } 463 } else T = a->T; 464 ierr = MatCreateKAIJ(mpiaij->A,a->p,a->q,a->S,T,&a->AIJ);CHKERRQ(ierr); 465 ierr = MatCreateKAIJ(mpiaij->B,a->p,a->q,NULL,T,&a->OAIJ);CHKERRQ(ierr); 466 ierr = PetscFree(T);CHKERRQ(ierr); 467 468 ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr); 469 ierr = VecCreate(PETSC_COMM_SELF,&a->w);CHKERRQ(ierr); 470 ierr = VecSetSizes(a->w,n*a->q,n*a->q);CHKERRQ(ierr); 471 ierr = VecSetBlockSize(a->w,a->q);CHKERRQ(ierr); 472 ierr = VecSetType(a->w,VECSEQ);CHKERRQ(ierr); 473 474 /* create two temporary Index sets for build scatter gather */ 475 ierr = ISCreateBlock(PetscObjectComm((PetscObject)a->A),a->q,n,mpiaij->garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); 476 ierr = ISCreateStride(PETSC_COMM_SELF,n*a->q,0,1,&to);CHKERRQ(ierr); 477 478 /* create temporary global vector to generate scatter context */ 479 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)a->A),a->q,a->q*a->A->cmap->n,a->q*a->A->cmap->N,NULL,&gvec);CHKERRQ(ierr); 480 481 /* generate the scatter context */ 482 ierr = VecScatterCreate(gvec,from,a->w,to,&a->ctx);CHKERRQ(ierr); 483 484 ierr = ISDestroy(&from);CHKERRQ(ierr); 485 ierr = ISDestroy(&to);CHKERRQ(ierr); 486 ierr = VecDestroy(&gvec);CHKERRQ(ierr); 487 } 488 489 A->assembled = PETSC_TRUE; 490 PetscFunctionReturn(0); 491 } 492 493 PetscErrorCode MatView_SeqKAIJ(Mat A,PetscViewer viewer) 494 { 495 PetscErrorCode ierr; 496 Mat B; 497 498 PetscFunctionBegin; 499 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 500 ierr = MatView(B,viewer);CHKERRQ(ierr); 501 ierr = MatDestroy(&B);CHKERRQ(ierr); 502 PetscFunctionReturn(0); 503 } 504 505 PetscErrorCode MatView_MPIKAIJ(Mat A,PetscViewer viewer) 506 { 507 PetscErrorCode ierr; 508 Mat B; 509 510 PetscFunctionBegin; 511 ierr = MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 512 ierr = MatView(B,viewer);CHKERRQ(ierr); 513 ierr = MatDestroy(&B);CHKERRQ(ierr); 514 PetscFunctionReturn(0); 515 } 516 517 PetscErrorCode MatDestroy_MPIKAIJ(Mat A) 518 { 519 PetscErrorCode ierr; 520 Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data; 521 522 PetscFunctionBegin; 523 ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr); 524 ierr = MatDestroy(&b->OAIJ);CHKERRQ(ierr); 525 ierr = MatDestroy(&b->A);CHKERRQ(ierr); 526 ierr = VecScatterDestroy(&b->ctx);CHKERRQ(ierr); 527 ierr = VecDestroy(&b->w);CHKERRQ(ierr); 528 ierr = PetscFree(b->S);CHKERRQ(ierr); 529 ierr = PetscFree(b->T);CHKERRQ(ierr); 530 ierr = PetscFree(b->ibdiag);CHKERRQ(ierr); 531 ierr = PetscFree(A->data);CHKERRQ(ierr); 532 PetscFunctionReturn(0); 533 } 534 535 /* --------------------------------------------------------------------------------------*/ 536 537 /* zz = yy + Axx */ 538 PetscErrorCode MatMultAdd_SeqKAIJ(Mat A,Vec xx,Vec yy,Vec zz) 539 { 540 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 541 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 542 const PetscScalar *s = b->S, *t = b->T; 543 const PetscScalar *x,*v,*bx; 544 PetscScalar *y,*sums; 545 PetscErrorCode ierr; 546 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 547 PetscInt n,i,jrow,j,l,p=b->p,q=b->q,k; 548 549 PetscFunctionBegin; 550 if (!yy) { 551 ierr = VecSet(zz,0.0);CHKERRQ(ierr); 552 } else { 553 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 554 } 555 if ((!s) && (!t) && (!b->isTI)) PetscFunctionReturn(0); 556 557 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 558 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 559 idx = a->j; 560 v = a->a; 561 ii = a->i; 562 563 if (b->isTI) { 564 for (i=0; i<m; i++) { 565 jrow = ii[i]; 566 n = ii[i+1] - jrow; 567 sums = y + p*i; 568 for (j=0; j<n; j++) { 569 for (k=0; k<p; k++) { 570 sums[k] += v[jrow+j]*x[q*idx[jrow+j]+k]; 571 } 572 } 573 } 574 } else if (t) { 575 for (i=0; i<m; i++) { 576 jrow = ii[i]; 577 n = ii[i+1] - jrow; 578 sums = y + p*i; 579 bx = x + q*i; 580 for (j=0; j<n; j++) { 581 for (k=0; k<p; k++) { 582 for (l=0; l<q; l++) { 583 sums[k] += v[jrow+j]*t[k+l*p]*x[q*idx[jrow+j]+l]; 584 } 585 } 586 } 587 } 588 } 589 if (s) { 590 for (i=0; i<m; i++) { 591 jrow = ii[i]; 592 n = ii[i+1] - jrow; 593 sums = y + p*i; 594 bx = x + q*i; 595 if (i < b->AIJ->cmap->n) { 596 for (j=0; j<q; j++) { 597 for (k=0; k<p; k++) { 598 sums[k] += s[k+j*p]*bx[j]; 599 } 600 } 601 } 602 } 603 } 604 605 ierr = PetscLogFlops((2.0*p*q-p)*m+2*p*a->nz);CHKERRQ(ierr); 606 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 607 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 608 PetscFunctionReturn(0); 609 } 610 611 PetscErrorCode MatMult_SeqKAIJ_N(Mat A,Vec xx,Vec yy) 612 { 613 PetscErrorCode ierr; 614 PetscFunctionBegin; 615 ierr = MatMultAdd_SeqKAIJ(A,xx,PETSC_NULL,yy);CHKERRQ(ierr); 616 PetscFunctionReturn(0); 617 } 618 619 PetscErrorCode MatMultAdd_SeqKAIJ_N(Mat A,Vec xx,Vec yy,Vec zz) 620 { 621 PetscErrorCode ierr; 622 PetscFunctionBegin; 623 ierr = MatMultAdd_SeqKAIJ(A,xx,yy,zz);CHKERRQ(ierr); 624 PetscFunctionReturn(0); 625 } 626 627 #include <petsc/private/kernels/blockinvert.h> 628 629 PetscErrorCode MatInvertBlockDiagonal_SeqKAIJ_N(Mat A,const PetscScalar **values) 630 { 631 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 632 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 633 const PetscScalar *S = b->S; 634 const PetscScalar *T = b->T; 635 const PetscScalar *v = a->a; 636 const PetscInt p = b->p, q = b->q, m = b->AIJ->rmap->n, *idx = a->j, *ii = a->i; 637 PetscErrorCode ierr; 638 PetscInt i,j,*v_pivots,dof,dof2; 639 PetscScalar *diag,aval,*v_work; 640 641 PetscFunctionBegin; 642 if (p != q) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Block size must be square to calculate inverse."); 643 if ((!S) && (!T) && (!b->isTI)) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Cannot invert a zero matrix."); 644 645 dof = p; 646 dof2 = dof*dof; 647 648 if (b->ibdiagvalid) { 649 if (values) *values = b->ibdiag; 650 PetscFunctionReturn(0); 651 } 652 if (!b->ibdiag) { 653 ierr = PetscMalloc1(dof2*m*sizeof(PetscScalar),&b->ibdiag);CHKERRQ(ierr); 654 ierr = PetscLogObjectMemory((PetscObject)A,dof2*m*sizeof(PetscScalar));CHKERRQ(ierr); 655 } 656 if (values) *values = b->ibdiag; 657 diag = b->ibdiag; 658 659 ierr = PetscMalloc2(dof,&v_work,dof,&v_pivots);CHKERRQ(ierr); 660 for (i=0; i<m; i++) { 661 if (S) { 662 ierr = PetscMemcpy(diag,S,dof2*sizeof(PetscScalar));CHKERRQ(ierr); 663 } else { 664 ierr = PetscMemzero(diag,dof2*sizeof(PetscScalar));CHKERRQ(ierr); 665 } 666 if (b->isTI) { 667 aval = 0; 668 for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j]; 669 for (j=0; j<dof; j++) diag[j+dof*j] += aval; 670 } else if (T) { 671 aval = 0; 672 for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j]; 673 for (j=0; j<dof2; j++) diag[j] += aval*T[j]; 674 } 675 ierr = PetscKernel_A_gets_inverse_A(dof,diag,v_pivots,v_work,PETSC_FALSE,NULL);CHKERRQ(ierr); 676 diag += dof2; 677 } 678 ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr); 679 680 b->ibdiagvalid = PETSC_TRUE; 681 PetscFunctionReturn(0); 682 } 683 684 static PetscErrorCode MatGetDiagonalBlock_MPIKAIJ(Mat A,Mat *B) 685 { 686 Mat_MPIKAIJ *kaij = (Mat_MPIKAIJ*) A->data; 687 688 PetscFunctionBegin; 689 *B = kaij->AIJ; 690 PetscFunctionReturn(0); 691 } 692 693 PetscErrorCode MatSOR_SeqKAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 694 { 695 PetscErrorCode ierr; 696 Mat_SeqKAIJ *kaij = (Mat_SeqKAIJ*) A->data; 697 Mat_SeqAIJ *a = (Mat_SeqAIJ*)kaij->AIJ->data; 698 const PetscScalar *aa = a->a, *T = kaij->T, *v; 699 const PetscInt m = kaij->AIJ->rmap->n, *ai=a->i, *aj=a->j, p = kaij->p, q = kaij->q, *diag, *vi; 700 const PetscScalar *b, *xb, *idiag; 701 PetscScalar *x, *work, *workt, *w, *y, *arr, *t, *arrt; 702 PetscInt i, j, k, i2, bs, bs2, nz; 703 704 PetscFunctionBegin; 705 its = its*lits; 706 if (flag & SOR_EISENSTAT) SETERRQ (PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 707 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 708 if (fshift) SETERRQ (PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 709 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 710 if (p != q) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSOR for KAIJ: Sorry, no support for non-square dense blocks"); 711 else {bs = p; bs2 = bs*bs; } 712 713 if (!m) PetscFunctionReturn(0); 714 715 if (!kaij->ibdiagvalid) { ierr = MatInvertBlockDiagonal_SeqKAIJ_N(A,NULL);CHKERRQ(ierr); } 716 idiag = kaij->ibdiag; 717 diag = a->diag; 718 719 if (!kaij->sor.setup) { 720 ierr = PetscMalloc5(bs,&kaij->sor.w,bs,&kaij->sor.y,m*bs,&kaij->sor.work,m*bs,&kaij->sor.t,m*bs2,&kaij->sor.arr);CHKERRQ(ierr); 721 kaij->sor.setup = PETSC_TRUE; 722 } 723 y = kaij->sor.y; 724 w = kaij->sor.w; 725 work = kaij->sor.work; 726 t = kaij->sor.t; 727 arr = kaij->sor.arr; 728 729 ierr = VecGetArray(xx,&x); CHKERRQ(ierr); 730 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 731 732 if (flag & SOR_ZERO_INITIAL_GUESS) { 733 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 734 PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x); /* x[0:bs] <- D^{-1} b[0:bs] */ 735 ierr = PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr); 736 i2 = bs; 737 idiag += bs2; 738 for (i=1; i<m; i++) { 739 v = aa + ai[i]; 740 vi = aj + ai[i]; 741 nz = diag[i] - ai[i]; 742 743 if (T) { /* b - T (Arow * x) */ 744 for (k=0; k<bs; k++) w[k] = 0; 745 for (j=0; j<nz; j++) { 746 for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k]; 747 } 748 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs,w,T,&t[i2]); 749 for (k=0; k<bs; k++) t[i2+k] += b[i2+k]; 750 } else if (kaij->isTI) { 751 for (k=0; k<bs; k++) t[i2+k] = b[i2+k]; 752 for (j=0; j<nz; j++) { 753 for (k=0; k<bs; k++) t[i2+k] -= v[j] * x[vi[j]*bs+k]; 754 } 755 } else { 756 for (k=0; k<bs; k++) t[i2+k] = b[i2+k]; 757 } 758 759 PetscKernel_w_gets_Ar_times_v(bs,bs,t+i2,idiag,y); 760 for (j=0; j<bs; j++) x[i2+j] = omega * y[j]; 761 762 idiag += bs2; 763 i2 += bs; 764 } 765 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 766 ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr); 767 xb = t; 768 } else xb = b; 769 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 770 idiag = kaij->ibdiag+bs2*(m-1); 771 i2 = bs * (m-1); 772 ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 773 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 774 i2 -= bs; 775 idiag -= bs2; 776 for (i=m-2; i>=0; i--) { 777 v = aa + diag[i] + 1 ; 778 vi = aj + diag[i] + 1; 779 nz = ai[i+1] - diag[i] - 1; 780 781 if (T) { /* FIXME: This branch untested */ 782 ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 783 /* copy all rows of x that are needed into contiguous space */ 784 workt = work; 785 for (j=0; j<nz; j++) { 786 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 787 workt += bs; 788 } 789 arrt = arr; 790 for (j=0; j<nz; j++) { 791 ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 792 for (k=0; k<bs2; k++) arrt[k] *= v[j]; 793 arrt += bs2; 794 } 795 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 796 } else if (kaij->isTI) { 797 for (k=0; k<bs; k++) w[k] = t[i2+k]; 798 for (j=0; j<nz; j++) { 799 for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k]; 800 } 801 } 802 803 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); /* RHS incorrect for omega != 1.0 */ 804 for (j=0; j<bs; j++) x[i2+j] = (1.0-omega) * x[i2+j] + omega * y[j]; 805 806 idiag -= bs2; 807 i2 -= bs; 808 } 809 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 810 } 811 its--; 812 } 813 while (its--) { /* FIXME: This branch not updated */ 814 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 815 i2 = 0; 816 idiag = kaij->ibdiag; 817 for (i=0; i<m; i++) { 818 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 819 820 v = aa + ai[i]; 821 vi = aj + ai[i]; 822 nz = diag[i] - ai[i]; 823 workt = work; 824 for (j=0; j<nz; j++) { 825 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 826 workt += bs; 827 } 828 arrt = arr; 829 if (T) { 830 for (j=0; j<nz; j++) { 831 ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 832 for (k=0; k<bs2; k++) arrt[k] *= v[j]; 833 arrt += bs2; 834 } 835 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 836 } else if (kaij->isTI) { 837 for (j=0; j<nz; j++) { 838 ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 839 for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 840 arrt += bs2; 841 } 842 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 843 } 844 ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr); 845 846 v = aa + diag[i] + 1; 847 vi = aj + diag[i] + 1; 848 nz = ai[i+1] - diag[i] - 1; 849 workt = work; 850 for (j=0; j<nz; j++) { 851 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 852 workt += bs; 853 } 854 arrt = arr; 855 if (T) { 856 for (j=0; j<nz; j++) { 857 ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 858 for (k=0; k<bs2; k++) arrt[k] *= v[j]; 859 arrt += bs2; 860 } 861 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 862 } else if (kaij->isTI) { 863 for (j=0; j<nz; j++) { 864 ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 865 for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 866 arrt += bs2; 867 } 868 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 869 } 870 871 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); 872 for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j); 873 874 idiag += bs2; 875 i2 += bs; 876 } 877 xb = t; 878 } 879 else xb = b; 880 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 881 idiag = kaij->ibdiag+bs2*(m-1); 882 i2 = bs * (m-1); 883 if (xb == b) { 884 for (i=m-1; i>=0; i--) { 885 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 886 887 v = aa + ai[i]; 888 vi = aj + ai[i]; 889 nz = diag[i] - ai[i]; 890 workt = work; 891 for (j=0; j<nz; j++) { 892 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 893 workt += bs; 894 } 895 arrt = arr; 896 if (T) { 897 for (j=0; j<nz; j++) { 898 ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 899 for (k=0; k<bs2; k++) arrt[k] *= v[j]; 900 arrt += bs2; 901 } 902 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 903 } else if (kaij->isTI) { 904 for (j=0; j<nz; j++) { 905 ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 906 for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 907 arrt += bs2; 908 } 909 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 910 } 911 912 v = aa + diag[i] + 1; 913 vi = aj + diag[i] + 1; 914 nz = ai[i+1] - diag[i] - 1; 915 workt = work; 916 for (j=0; j<nz; j++) { 917 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 918 workt += bs; 919 } 920 arrt = arr; 921 if (T) { 922 for (j=0; j<nz; j++) { 923 ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 924 for (k=0; k<bs2; k++) arrt[k] *= v[j]; 925 arrt += bs2; 926 } 927 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 928 } else if (kaij->isTI) { 929 for (j=0; j<nz; j++) { 930 ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 931 for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 932 arrt += bs2; 933 } 934 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 935 } 936 937 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); 938 for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j); 939 } 940 } else { 941 for (i=m-1; i>=0; i--) { 942 ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 943 v = aa + diag[i] + 1; 944 vi = aj + diag[i] + 1; 945 nz = ai[i+1] - diag[i] - 1; 946 workt = work; 947 for (j=0; j<nz; j++) { 948 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 949 workt += bs; 950 } 951 arrt = arr; 952 if (T) { 953 for (j=0; j<nz; j++) { 954 ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 955 for (k=0; k<bs2; k++) arrt[k] *= v[j]; 956 arrt += bs2; 957 } 958 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 959 } else if (kaij->isTI) { 960 for (j=0; j<nz; j++) { 961 ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 962 for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 963 arrt += bs2; 964 } 965 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 966 } 967 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); 968 for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j); 969 } 970 idiag -= bs2; 971 i2 -= bs; 972 } 973 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 974 } 975 } 976 977 ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr); 978 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 979 PetscFunctionReturn(0); 980 } 981 982 /*===================================================================================*/ 983 984 PetscErrorCode MatMultAdd_MPIKAIJ(Mat A,Vec xx,Vec yy,Vec zz) 985 { 986 Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data; 987 PetscErrorCode ierr; 988 989 PetscFunctionBegin; 990 if (!yy) { 991 ierr = VecSet(zz,0.0);CHKERRQ(ierr); 992 } else { 993 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 994 } 995 /* start the scatter */ 996 ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 997 ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,zz,zz);CHKERRQ(ierr); 998 ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 999 ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr); 1000 PetscFunctionReturn(0); 1001 } 1002 1003 PetscErrorCode MatMult_MPIKAIJ_dof(Mat A,Vec xx,Vec yy) 1004 { 1005 PetscErrorCode ierr; 1006 PetscFunctionBegin; 1007 ierr = MatMultAdd_MPIKAIJ(A,xx,PETSC_NULL,yy);CHKERRQ(ierr); 1008 PetscFunctionReturn(0); 1009 } 1010 1011 PetscErrorCode MatMultAdd_MPIKAIJ_dof(Mat A,Vec xx,Vec yy, Vec zz) 1012 { 1013 PetscErrorCode ierr; 1014 PetscFunctionBegin; 1015 ierr = MatMultAdd_MPIKAIJ(A,xx,yy,zz);CHKERRQ(ierr); 1016 PetscFunctionReturn(0); 1017 } 1018 1019 PetscErrorCode MatInvertBlockDiagonal_MPIKAIJ_dof(Mat A,const PetscScalar **values) 1020 { 1021 Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data; 1022 PetscErrorCode ierr; 1023 1024 PetscFunctionBegin; 1025 ierr = (*b->AIJ->ops->invertblockdiagonal)(b->AIJ,values);CHKERRQ(ierr); 1026 PetscFunctionReturn(0); 1027 } 1028 1029 /* ----------------------------------------------------------------*/ 1030 1031 PetscErrorCode MatGetRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values) 1032 { 1033 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*) A->data; 1034 PetscErrorCode diag = PETSC_FALSE; 1035 PetscErrorCode ierr; 1036 PetscInt nzaij,nz,*colsaij,*idx,i,j,p=b->p,q=b->q,r=row/p,s=row%p,c; 1037 PetscScalar *vaij,*v,*S=b->S,*T=b->T; 1038 1039 PetscFunctionBegin; 1040 if (b->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1041 b->getrowactive = PETSC_TRUE; 1042 if (row < 0 || row >= A->rmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range",row); 1043 1044 if ((!S) && (!T) && (!b->isTI)) { 1045 if (ncols) *ncols = 0; 1046 if (cols) *cols = NULL; 1047 if (values) *values = NULL; 1048 PetscFunctionReturn(0); 1049 } 1050 1051 if (T || b->isTI) { 1052 ierr = MatGetRow_SeqAIJ(b->AIJ,r,&nzaij,&colsaij,&vaij);CHKERRQ(ierr); 1053 c = nzaij; 1054 for (i=0; i<nzaij; i++) { 1055 /* check if this row contains a diagonal entry */ 1056 if (colsaij[i] == r) { 1057 diag = PETSC_TRUE; 1058 c = i; 1059 } 1060 } 1061 } else nzaij = c = 0; 1062 1063 /* calculate size of row */ 1064 nz = 0; 1065 if (S) nz += q; 1066 if (T || b->isTI) nz += (diag && S ? (nzaij-1)*q : nzaij*q); 1067 1068 if (cols || values) { 1069 ierr = PetscMalloc2(nz,&idx,nz,&v);CHKERRQ(ierr); 1070 for (i=0; i<q; i++) { 1071 /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */ 1072 v[i] = 0.0; 1073 } 1074 if (b->isTI) { 1075 for (i=0; i<nzaij; i++) { 1076 for (j=0; j<q; j++) { 1077 idx[i*q+j] = colsaij[i]*q+j; 1078 v[i*q+j] = (j==s ? vaij[i] : 0); 1079 } 1080 } 1081 } else if (T) { 1082 for (i=0; i<nzaij; i++) { 1083 for (j=0; j<q; j++) { 1084 idx[i*q+j] = colsaij[i]*q+j; 1085 v[i*q+j] = vaij[i]*T[s+j*p]; 1086 } 1087 } 1088 } 1089 if (S) { 1090 for (j=0; j<q; j++) { 1091 idx[c*q+j] = r*q+j; 1092 v[c*q+j] += S[s+j*p]; 1093 } 1094 } 1095 } 1096 1097 if (ncols) *ncols = nz; 1098 if (cols) *cols = idx; 1099 if (values) *values = v; 1100 PetscFunctionReturn(0); 1101 } 1102 1103 PetscErrorCode MatRestoreRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1104 { 1105 PetscErrorCode ierr; 1106 PetscFunctionBegin; 1107 ierr = PetscFree2(*idx,*v);CHKERRQ(ierr); 1108 ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE; 1109 PetscFunctionReturn(0); 1110 } 1111 1112 PetscErrorCode MatGetRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values) 1113 { 1114 Mat_MPIKAIJ *b = (Mat_MPIKAIJ*) A->data; 1115 Mat MatAIJ = ((Mat_SeqKAIJ*)b->AIJ->data)->AIJ; 1116 Mat MatOAIJ = ((Mat_SeqKAIJ*)b->OAIJ->data)->AIJ; 1117 Mat AIJ = b->A; 1118 PetscBool diag = PETSC_FALSE; 1119 PetscErrorCode ierr; 1120 const PetscInt rstart=A->rmap->rstart,rend=A->rmap->rend,p=b->p,q=b->q,*garray; 1121 PetscInt nz,*idx,ncolsaij,ncolsoaij,*colsaij,*colsoaij,r,s,c,i,j,lrow; 1122 PetscScalar *v,*vals,*ovals,*S=b->S,*T=b->T; 1123 1124 PetscFunctionBegin; 1125 if (b->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1126 b->getrowactive = PETSC_TRUE; 1127 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1128 lrow = row - rstart; 1129 1130 if ((!S) && (!T) && (!b->isTI)) { 1131 if (ncols) *ncols = 0; 1132 if (cols) *cols = NULL; 1133 if (values) *values = NULL; 1134 PetscFunctionReturn(0); 1135 } 1136 1137 r = lrow/p; 1138 s = lrow%p; 1139 1140 if (T || b->isTI) { 1141 ierr = MatMPIAIJGetSeqAIJ(AIJ,NULL,NULL,&garray); 1142 ierr = MatGetRow_SeqAIJ(MatAIJ,lrow/p,&ncolsaij,&colsaij,&vals);CHKERRQ(ierr); 1143 ierr = MatGetRow_SeqAIJ(MatOAIJ,lrow/p,&ncolsoaij,&colsoaij,&ovals);CHKERRQ(ierr); 1144 1145 c = ncolsaij + ncolsoaij; 1146 for (i=0; i<ncolsaij; i++) { 1147 /* check if this row contains a diagonal entry */ 1148 if (colsaij[i] == r) { 1149 diag = PETSC_TRUE; 1150 c = i; 1151 } 1152 } 1153 } else c = 0; 1154 1155 /* calculate size of row */ 1156 nz = 0; 1157 if (S) nz += q; 1158 if (T || b->isTI) nz += (diag && S ? (ncolsaij+ncolsoaij-1)*q : (ncolsaij+ncolsoaij)*q); 1159 1160 if (cols || values) { 1161 ierr = PetscMalloc2(nz,&idx,nz,&v);CHKERRQ(ierr); 1162 for (i=0; i<q; i++) { 1163 /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */ 1164 v[i] = 0.0; 1165 } 1166 if (b->isTI) { 1167 for (i=0; i<ncolsaij; i++) { 1168 for (j=0; j<q; j++) { 1169 idx[i*q+j] = (colsaij[i]+rstart/p)*q+j; 1170 v[i*q+j] = (j==s ? vals[i] : 0.0); 1171 } 1172 } 1173 for (i=0; i<ncolsoaij; i++) { 1174 for (j=0; j<q; j++) { 1175 idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j; 1176 v[(i+ncolsaij)*q+j] = (j==s ? ovals[i]: 0.0); 1177 } 1178 } 1179 } else if (T) { 1180 for (i=0; i<ncolsaij; i++) { 1181 for (j=0; j<q; j++) { 1182 idx[i*q+j] = (colsaij[i]+rstart/p)*q+j; 1183 v[i*q+j] = vals[i]*T[s+j*p]; 1184 } 1185 } 1186 for (i=0; i<ncolsoaij; i++) { 1187 for (j=0; j<q; j++) { 1188 idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j; 1189 v[(i+ncolsaij)*q+j] = ovals[i]*T[s+j*p]; 1190 } 1191 } 1192 } 1193 if (S) { 1194 for (j=0; j<q; j++) { 1195 idx[c*q+j] = (r+rstart/p)*q+j; 1196 v[c*q+j] += S[s+j*p]; 1197 } 1198 } 1199 } 1200 1201 if (ncols) *ncols = nz; 1202 if (cols) *cols = idx; 1203 if (values) *values = v; 1204 PetscFunctionReturn(0); 1205 } 1206 1207 PetscErrorCode MatRestoreRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1208 { 1209 PetscErrorCode ierr; 1210 PetscFunctionBegin; 1211 ierr = PetscFree2(*idx,*v);CHKERRQ(ierr); 1212 ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE; 1213 PetscFunctionReturn(0); 1214 } 1215 1216 PetscErrorCode MatCreateSubMatrix_KAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat) 1217 { 1218 PetscErrorCode ierr; 1219 Mat A; 1220 1221 PetscFunctionBegin; 1222 ierr = MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr); 1223 ierr = MatCreateSubMatrix(A,isrow,iscol,cll,newmat);CHKERRQ(ierr); 1224 ierr = MatDestroy(&A);CHKERRQ(ierr); 1225 PetscFunctionReturn(0); 1226 } 1227 1228 /* ---------------------------------------------------------------------------------- */ 1229 /*@C 1230 MatCreateKAIJ - Creates a matrix type to be used for matrices of the following form: 1231 1232 [I \otimes S + A \otimes T] 1233 1234 where 1235 S is a dense (p \times q) matrix 1236 T is a dense (p \times q) matrix 1237 A is an AIJ (n \times n) matrix 1238 I is the identity matrix 1239 The resulting matrix is (np \times nq) 1240 1241 The matrix type is based on MATSEQAIJ for a sequential matrix A, and MATMPIAIJ for a distributed matrix A. 1242 S is always stored independently on all processes as a PetscScalar array in column-major format. 1243 1244 Collective 1245 1246 Input Parameters: 1247 + A - the AIJ matrix 1248 . S - the S matrix, stored as a PetscScalar array (column-major) 1249 . T - the T matrix, stored as a PetscScalar array (column-major) 1250 . p - number of rows in S and T 1251 - q - number of columns in S and T 1252 1253 Output Parameter: 1254 . kaij - the new KAIJ matrix 1255 1256 Operations provided: 1257 + MatMult 1258 . MatMultAdd 1259 . MatInvertBlockDiagonal 1260 - MatView 1261 1262 Level: advanced 1263 1264 .seealso: MatKAIJSetAIJ(), MatKAIJSetS(), MatKAIJSetT(), MatKAIJGetAIJ(), MatKAIJGetS(), MatKAIJGetT(), MATKAIJ 1265 @*/ 1266 PetscErrorCode MatCreateKAIJ(Mat A,PetscInt p,PetscInt q,const PetscScalar S[],const PetscScalar T[],Mat *kaij) 1267 { 1268 PetscErrorCode ierr; 1269 PetscMPIInt size; 1270 1271 PetscFunctionBegin; 1272 ierr = MatCreate(PetscObjectComm((PetscObject)A),kaij);CHKERRQ(ierr); 1273 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 1274 if (size == 1) { 1275 ierr = MatSetType(*kaij,MATSEQKAIJ);CHKERRQ(ierr); 1276 } else { 1277 ierr = MatSetType(*kaij,MATMPIKAIJ);CHKERRQ(ierr); 1278 } 1279 ierr = MatKAIJSetAIJ(*kaij,A);CHKERRQ(ierr); 1280 ierr = MatKAIJSetS(*kaij,p,q,S);CHKERRQ(ierr); 1281 ierr = MatKAIJSetT(*kaij,p,q,T);CHKERRQ(ierr); 1282 ierr = MatSetUp(*kaij); 1283 PetscFunctionReturn(0); 1284 } 1285 1286 /*MC 1287 MATKAIJ - MATKAIJ = "kaij" - A matrix type to be used to evaluate matrices of the following form: 1288 1289 [I \otimes S + A \otimes T] 1290 1291 where 1292 S is a dense (p \times q) matrix 1293 T is a dense (p \times q) matrix 1294 A is an AIJ (n \times n) matrix 1295 I is the identity matrix 1296 The resulting matrix is (np \times nq) 1297 1298 The matrix type is based on MATSEQAIJ for a sequential matrix A, and MATMPIAIJ for a distributed matrix A. 1299 S and T are always stored independently on all processes as a PetscScalar array in column-major format. 1300 1301 Operations provided: 1302 + MatMult 1303 . MatMultAdd 1304 . MatInvertBlockDiagonal 1305 - MatView 1306 1307 Level: advanced 1308 1309 .seealso: MatKAIJSetAIJ(), MatKAIJSetS(), MatKAIJSetT(), MatKAIJGetAIJ(), MatKAIJGetS(), MatKAIJGetT(), MatCreateKAIJ() 1310 M*/ 1311 1312 PETSC_EXTERN PetscErrorCode MatCreate_KAIJ(Mat A) 1313 { 1314 PetscErrorCode ierr; 1315 Mat_MPIKAIJ *b; 1316 PetscMPIInt size; 1317 1318 PetscFunctionBegin; 1319 ierr = PetscNewLog(A,&b);CHKERRQ(ierr); 1320 A->data = (void*)b; 1321 1322 ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1323 1324 A->ops->setup = MatSetUp_KAIJ; 1325 1326 b->w = 0; 1327 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 1328 if (size == 1) { 1329 ierr = PetscObjectChangeTypeName((PetscObject)A,MATSEQKAIJ);CHKERRQ(ierr); 1330 A->ops->setup = MatSetUp_KAIJ; 1331 A->ops->destroy = MatDestroy_SeqKAIJ; 1332 A->ops->view = MatView_SeqKAIJ; 1333 A->ops->mult = MatMult_SeqKAIJ_N; 1334 A->ops->multadd = MatMultAdd_SeqKAIJ_N; 1335 A->ops->invertblockdiagonal = MatInvertBlockDiagonal_SeqKAIJ_N; 1336 A->ops->getrow = MatGetRow_SeqKAIJ; 1337 A->ops->restorerow = MatRestoreRow_SeqKAIJ; 1338 A->ops->sor = MatSOR_SeqKAIJ; 1339 } else { 1340 ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIKAIJ);CHKERRQ(ierr); 1341 A->ops->setup = MatSetUp_KAIJ; 1342 A->ops->destroy = MatDestroy_MPIKAIJ; 1343 A->ops->view = MatView_MPIKAIJ; 1344 A->ops->mult = MatMult_MPIKAIJ_dof; 1345 A->ops->multadd = MatMultAdd_MPIKAIJ_dof; 1346 A->ops->invertblockdiagonal = MatInvertBlockDiagonal_MPIKAIJ_dof; 1347 A->ops->getrow = MatGetRow_MPIKAIJ; 1348 A->ops->restorerow = MatRestoreRow_MPIKAIJ; 1349 ierr = PetscObjectComposeFunction((PetscObject)A,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIKAIJ);CHKERRQ(ierr); 1350 } 1351 A->ops->createsubmatrix = MatCreateSubMatrix_KAIJ; 1352 PetscFunctionReturn(0); 1353 } 1354