1 2 /* 3 Defines the basic matrix operations for the KAIJ matrix storage format. 4 This format is used to evaluate matrices of the form: 5 6 [I \otimes S + A \otimes T] 7 8 where 9 S is a dense (p \times q) matrix 10 T is a dense (p \times q) matrix 11 A is an AIJ (n \times n) matrix 12 I is the identity matrix 13 14 The resulting matrix is (np \times nq) 15 16 We provide: 17 MatMult() 18 MatMultAdd() 19 MatInvertBlockDiagonal() 20 and 21 MatCreateKAIJ(Mat,PetscInt,PetscInt,const PetscScalar[],const PetscScalar[],Mat*) 22 23 This single directory handles both the sequential and parallel codes 24 */ 25 26 #include <../src/mat/impls/kaij/kaij.h> /*I "petscmat.h" I*/ 27 #include <../src/mat/utils/freespace.h> 28 #include <petsc/private/vecimpl.h> 29 30 /*@C 31 MatKAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the KAIJ matrix 32 33 Not Collective, but if the KAIJ matrix is parallel, the AIJ matrix is also parallel 34 35 Input Parameter: 36 . A - the KAIJ matrix 37 38 Output Parameter: 39 . B - the AIJ matrix 40 41 Level: advanced 42 43 Notes: The reference count on the AIJ matrix is not increased so you should not destroy it. 44 45 .seealso: MatCreateKAIJ() 46 @*/ 47 PetscErrorCode MatKAIJGetAIJ(Mat A,Mat *B) 48 { 49 PetscErrorCode ierr; 50 PetscBool ismpikaij,isseqkaij; 51 52 PetscFunctionBegin; 53 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIKAIJ,&ismpikaij);CHKERRQ(ierr); 54 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQKAIJ,&isseqkaij);CHKERRQ(ierr); 55 if (ismpikaij) { 56 Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data; 57 58 *B = b->A; 59 } else if (isseqkaij) { 60 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 61 62 *B = b->AIJ; 63 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix passed in is not of type KAIJ"); 64 PetscFunctionReturn(0); 65 } 66 67 /*@C 68 MatKAIJGetS - Get the S matrix describing the shift action of the KAIJ matrix 69 70 Not Collective; the entire S is stored and returned independently on all processes. 71 72 Input Parameter: 73 . A - the KAIJ matrix 74 75 Output Parameters: 76 + m - the number of rows in S 77 . n - the number of columns in S 78 - S - the S matrix, in form of a scalar array in column-major format 79 80 Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired) 81 82 Level: advanced 83 84 .seealso: MatCreateKAIJ(), MatGetBlockSizes() 85 @*/ 86 PetscErrorCode MatKAIJGetS(Mat A,PetscInt *m,PetscInt *n,PetscScalar **S) 87 { 88 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 89 PetscFunctionBegin; 90 if (m) *m = b->p; 91 if (n) *n = b->q; 92 if (S) *S = b->S; 93 PetscFunctionReturn(0); 94 } 95 96 /*@C 97 MatKAIJGetSRead - Get a read-only pointer to the S matrix describing the shift action of the KAIJ matrix 98 99 Not Collective; the entire S is stored and returned independently on all processes. 100 101 Input Parameter: 102 . A - the KAIJ matrix 103 104 Output Parameters: 105 + m - the number of rows in S 106 . n - the number of columns in S 107 - S - the S matrix, in form of a scalar array in column-major format 108 109 Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired) 110 111 Level: advanced 112 113 .seealso: MatCreateKAIJ(), MatGetBlockSizes() 114 @*/ 115 PetscErrorCode MatKAIJGetSRead(Mat A,PetscInt *m,PetscInt *n,const PetscScalar **S) 116 { 117 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 118 PetscFunctionBegin; 119 if (m) *m = b->p; 120 if (n) *n = b->q; 121 if (S) *S = b->S; 122 PetscFunctionReturn(0); 123 } 124 125 /*@C 126 MatKAIJRestoreS - Restore array obtained with MatKAIJGetS() 127 128 Not collective 129 130 Input Parameter: 131 . A - the KAIJ matrix 132 133 Output Parameter: 134 . S - location of pointer to array obtained with MatKAIJGetS() 135 136 Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored. 137 If NULL is passed, it will not attempt to zero the array pointer. 138 139 Level: advanced 140 .seealso: MatKAIJGetS(), MatKAIJGetSRead(), MatKAIJRestoreSRead() 141 @*/ 142 PetscErrorCode MatKAIJRestoreS(Mat A,PetscScalar **S) 143 { 144 PetscFunctionBegin; 145 if (S) *S = NULL; 146 PetscObjectStateIncrease((PetscObject)A); 147 PetscFunctionReturn(0); 148 } 149 150 /*@C 151 MatKAIJRestoreSRead - Restore array obtained with MatKAIJGetSRead() 152 153 Not collective 154 155 Input Parameter: 156 . A - the KAIJ matrix 157 158 Output Parameter: 159 . S - location of pointer to array obtained with MatKAIJGetS() 160 161 Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored. 162 If NULL is passed, it will not attempt to zero the array pointer. 163 164 Level: advanced 165 .seealso: MatKAIJGetS(), MatKAIJGetSRead(), MatKAIJRestoreSRead() 166 @*/ 167 PetscErrorCode MatKAIJRestoreSRead(Mat A,const PetscScalar **S) 168 { 169 PetscFunctionBegin; 170 if (S) *S = NULL; 171 PetscFunctionReturn(0); 172 } 173 174 /*@C 175 MatKAIJGetT - Get the transformation matrix T associated with the KAIJ matrix 176 177 Not Collective; the entire T is stored and returned independently on all processes 178 179 Input Parameter: 180 . A - the KAIJ matrix 181 182 Output Parameter: 183 + m - the number of rows in T 184 . n - the number of columns in T 185 - T - the T matrix, in form of a scalar array in column-major format 186 187 Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired) 188 189 Level: advanced 190 191 .seealso: MatCreateKAIJ(), MatGetBlockSizes() 192 @*/ 193 PetscErrorCode MatKAIJGetT(Mat A,PetscInt *m,PetscInt *n,PetscScalar **T) 194 { 195 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 196 PetscFunctionBegin; 197 if (m) *m = b->p; 198 if (n) *n = b->q; 199 if (T) *T = b->T; 200 PetscFunctionReturn(0); 201 } 202 203 /*@C 204 MatKAIJGetTRead - Get a read-only pointer to the transformation matrix T associated with the KAIJ matrix 205 206 Not Collective; the entire T is stored and returned independently on all processes 207 208 Input Parameter: 209 . A - the KAIJ matrix 210 211 Output Parameter: 212 + m - the number of rows in T 213 . n - the number of columns in T 214 - T - the T matrix, in form of a scalar array in column-major format 215 216 Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired) 217 218 Level: advanced 219 220 .seealso: MatCreateKAIJ(), MatGetBlockSizes() 221 @*/ 222 PetscErrorCode MatKAIJGetTRead(Mat A,PetscInt *m,PetscInt *n,const PetscScalar **T) 223 { 224 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 225 PetscFunctionBegin; 226 if (m) *m = b->p; 227 if (n) *n = b->q; 228 if (T) *T = b->T; 229 PetscFunctionReturn(0); 230 } 231 232 /*@C 233 MatKAIJRestoreT - Restore array obtained with MatKAIJGetT() 234 235 Not collective 236 237 Input Parameter: 238 . A - the KAIJ matrix 239 240 Output Parameter: 241 . T - location of pointer to array obtained with MatKAIJGetS() 242 243 Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored. 244 If NULL is passed, it will not attempt to zero the array pointer. 245 246 Level: advanced 247 .seealso: MatKAIJGetT(), MatKAIJGetTRead(), MatKAIJRestoreTRead() 248 @*/ 249 PetscErrorCode MatKAIJRestoreT(Mat A,PetscScalar **T) 250 { 251 PetscFunctionBegin; 252 if (T) *T = NULL; 253 PetscObjectStateIncrease((PetscObject)A); 254 PetscFunctionReturn(0); 255 } 256 257 /*@C 258 MatKAIJRestoreTRead - Restore array obtained with MatKAIJGetTRead() 259 260 Not collective 261 262 Input Parameter: 263 . A - the KAIJ matrix 264 265 Output Parameter: 266 . T - location of pointer to array obtained with MatKAIJGetS() 267 268 Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored. 269 If NULL is passed, it will not attempt to zero the array pointer. 270 271 Level: advanced 272 .seealso: MatKAIJGetT(), MatKAIJGetTRead(), MatKAIJRestoreTRead() 273 @*/ 274 PetscErrorCode MatKAIJRestoreTRead(Mat A,const PetscScalar **T) 275 { 276 PetscFunctionBegin; 277 if (T) *T = NULL; 278 PetscFunctionReturn(0); 279 } 280 281 /*@ 282 MatKAIJSetAIJ - Set the AIJ matrix describing the blockwise action of the KAIJ matrix 283 284 Logically Collective; if the AIJ matrix is parallel, the KAIJ matrix is also parallel 285 286 Input Parameters: 287 + A - the KAIJ matrix 288 - B - the AIJ matrix 289 290 Level: advanced 291 292 .seealso: MatKAIJGetAIJ(), MatKAIJSetS(), MatKAIJSetT() 293 @*/ 294 PetscErrorCode MatKAIJSetAIJ(Mat A,Mat B) 295 { 296 PetscErrorCode ierr; 297 PetscMPIInt size; 298 299 PetscFunctionBegin; 300 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 301 if (size == 1) { 302 Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data; 303 a->AIJ = B; 304 } else { 305 Mat_MPIKAIJ *a = (Mat_MPIKAIJ*)A->data; 306 a->A = B; 307 } 308 PetscFunctionReturn(0); 309 } 310 311 /*@C 312 MatKAIJSetS - Set the S matrix describing the shift action of the KAIJ matrix 313 314 Logically Collective; the entire S is stored independently on all processes. 315 316 Input Parameters: 317 + A - the KAIJ matrix 318 . p - the number of rows in S 319 . q - the number of columns in S 320 - S - the S matrix, in form of a scalar array in column-major format 321 322 Notes: The dimensions p and q must match those of the transformation matrix T associated with the KAIJ matrix. 323 324 Level: Advanced 325 326 .seealso: MatKAIJGetS(), MatKAIJSetT(), MatKAIJSetAIJ() 327 @*/ 328 PetscErrorCode MatKAIJSetS(Mat A,PetscInt p,PetscInt q,const PetscScalar S[]) 329 { 330 PetscErrorCode ierr; 331 Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data; 332 333 PetscFunctionBegin; 334 ierr = PetscFree(a->S);CHKERRQ(ierr); 335 if (S) { 336 ierr = PetscMalloc1(p*q*sizeof(PetscScalar),&a->S);CHKERRQ(ierr); 337 ierr = PetscMemcpy(a->S,S,p*q*sizeof(PetscScalar));CHKERRQ(ierr); 338 } else a->S = NULL; 339 340 a->p = p; 341 a->q = q; 342 PetscFunctionReturn(0); 343 } 344 345 /*@C 346 MatKAIJSetT - Set the transformation matrix T associated with the KAIJ matrix 347 348 Logically Collective; the entire T is stored independently on all processes. 349 350 Input Parameters: 351 + A - the KAIJ matrix 352 . p - the number of rows in S 353 . q - the number of columns in S 354 - T - the T matrix, in form of a scalar array in column-major format 355 356 Notes: The dimensions p and q must match those of the shift matrix S associated with the KAIJ matrix. 357 358 Level: Advanced 359 360 .seealso: MatKAIJGetT(), MatKAIJSetS(), MatKAIJSetAIJ() 361 @*/ 362 PetscErrorCode MatKAIJSetT(Mat A,PetscInt p,PetscInt q,const PetscScalar T[]) 363 { 364 PetscErrorCode ierr; 365 PetscInt i,j; 366 Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data; 367 PetscBool isTI = PETSC_FALSE; 368 369 PetscFunctionBegin; 370 /* check if T is an identity matrix */ 371 if (T && (p == q)) { 372 isTI = PETSC_TRUE; 373 for (i=0; i<p; i++) { 374 for (j=0; j<q; j++) { 375 if (i == j) { 376 /* diagonal term must be 1 */ 377 if (T[i+j*p] != 1.0) isTI = PETSC_FALSE; 378 } else { 379 /* off-diagonal term must be 0 */ 380 if (T[i+j*p] != 0.0) isTI = PETSC_FALSE; 381 } 382 } 383 } 384 } 385 a->isTI = isTI; 386 387 ierr = PetscFree(a->T);CHKERRQ(ierr); 388 if (T && (!isTI)) { 389 ierr = PetscMalloc1(p*q*sizeof(PetscScalar),&a->T);CHKERRQ(ierr); 390 ierr = PetscMemcpy(a->T,T,p*q*sizeof(PetscScalar));CHKERRQ(ierr); 391 } else a->T = NULL; 392 393 a->p = p; 394 a->q = q; 395 PetscFunctionReturn(0); 396 } 397 398 PetscErrorCode MatDestroy_SeqKAIJ(Mat A) 399 { 400 PetscErrorCode ierr; 401 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 402 403 PetscFunctionBegin; 404 ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr); 405 ierr = PetscFree(b->S);CHKERRQ(ierr); 406 ierr = PetscFree(b->T);CHKERRQ(ierr); 407 ierr = PetscFree(b->ibdiag);CHKERRQ(ierr); 408 ierr = PetscFree5(b->sor.w,b->sor.y,b->sor.work,b->sor.t,b->sor.arr);CHKERRQ(ierr); 409 ierr = PetscFree(A->data);CHKERRQ(ierr); 410 PetscFunctionReturn(0); 411 } 412 413 PetscErrorCode MatSetUp_KAIJ(Mat A) 414 { 415 PetscErrorCode ierr; 416 PetscInt n; 417 PetscMPIInt size; 418 Mat_SeqKAIJ *seqkaij = (Mat_SeqKAIJ*)A->data; 419 420 PetscFunctionBegin; 421 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 422 if (size == 1) { 423 ierr = MatSetSizes(A,seqkaij->p*seqkaij->AIJ->rmap->n,seqkaij->q*seqkaij->AIJ->cmap->n,seqkaij->p*seqkaij->AIJ->rmap->N,seqkaij->q*seqkaij->AIJ->cmap->N);CHKERRQ(ierr); 424 ierr = PetscLayoutSetBlockSize(A->rmap,seqkaij->p);CHKERRQ(ierr); 425 ierr = PetscLayoutSetBlockSize(A->cmap,seqkaij->q);CHKERRQ(ierr); 426 ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr); 427 ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr); 428 ierr = PetscObjectReference((PetscObject)seqkaij->AIJ);CHKERRQ(ierr); 429 } else { 430 Mat_MPIKAIJ *a; 431 Mat_MPIAIJ *mpiaij; 432 IS from,to; 433 Vec gvec; 434 PetscScalar *T; 435 PetscInt i,j; 436 437 a = (Mat_MPIKAIJ*)A->data; 438 mpiaij = (Mat_MPIAIJ*)a->A->data; 439 ierr = MatSetSizes(A,a->p*a->A->rmap->n,a->q*a->A->cmap->n,a->p*a->A->rmap->N,a->q*a->A->cmap->N);CHKERRQ(ierr); 440 ierr = PetscLayoutSetBlockSize(A->rmap,seqkaij->p);CHKERRQ(ierr); 441 ierr = PetscLayoutSetBlockSize(A->cmap,seqkaij->q);CHKERRQ(ierr); 442 ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr); 443 ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr); 444 ierr = PetscObjectReference((PetscObject)a->A);CHKERRQ(ierr); 445 446 if (a->isTI) { 447 /* If the transformation matrix associated with the parallel matrix A is the identity matrix, then a->T will be NULL. 448 * In this case, if we pass a->T directly to the MatCreateKAIJ() calls to create the sequential submatrices, the routine will 449 * not be able to tell that transformation matrix should be set to the identity; thus we create a temporary identity matrix 450 * to pass in. */ 451 ierr = PetscMalloc1(a->p*a->q*sizeof(PetscScalar),&T);CHKERRQ(ierr); 452 for (i=0; i<a->p; i++) { 453 for (j=0; j<a->q; j++) { 454 if (i==j) T[i+j*a->p] = 1.0; 455 else T[i+j*a->p] = 0.0; 456 } 457 } 458 } else T = a->T; 459 ierr = MatCreateKAIJ(mpiaij->A,a->p,a->q,a->S,T,&a->AIJ);CHKERRQ(ierr); 460 ierr = MatCreateKAIJ(mpiaij->B,a->p,a->q,NULL,T,&a->OAIJ);CHKERRQ(ierr); 461 ierr = PetscFree(T);CHKERRQ(ierr); 462 463 ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr); 464 ierr = VecCreate(PETSC_COMM_SELF,&a->w);CHKERRQ(ierr); 465 ierr = VecSetSizes(a->w,n*a->q,n*a->q);CHKERRQ(ierr); 466 ierr = VecSetBlockSize(a->w,a->q);CHKERRQ(ierr); 467 ierr = VecSetType(a->w,VECSEQ);CHKERRQ(ierr); 468 469 /* create two temporary Index sets for build scatter gather */ 470 ierr = ISCreateBlock(PetscObjectComm((PetscObject)a->A),a->q,n,mpiaij->garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); 471 ierr = ISCreateStride(PETSC_COMM_SELF,n*a->q,0,1,&to);CHKERRQ(ierr); 472 473 /* create temporary global vector to generate scatter context */ 474 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)a->A),a->q,a->q*a->A->cmap->n,a->q*a->A->cmap->N,NULL,&gvec);CHKERRQ(ierr); 475 476 /* generate the scatter context */ 477 ierr = VecScatterCreate(gvec,from,a->w,to,&a->ctx);CHKERRQ(ierr); 478 479 ierr = ISDestroy(&from);CHKERRQ(ierr); 480 ierr = ISDestroy(&to);CHKERRQ(ierr); 481 ierr = VecDestroy(&gvec);CHKERRQ(ierr); 482 } 483 484 A->assembled = PETSC_TRUE; 485 PetscFunctionReturn(0); 486 } 487 488 PetscErrorCode MatView_SeqKAIJ(Mat A,PetscViewer viewer) 489 { 490 PetscErrorCode ierr; 491 Mat B; 492 493 PetscFunctionBegin; 494 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 495 ierr = MatView(B,viewer);CHKERRQ(ierr); 496 ierr = MatDestroy(&B);CHKERRQ(ierr); 497 PetscFunctionReturn(0); 498 } 499 500 PetscErrorCode MatView_MPIKAIJ(Mat A,PetscViewer viewer) 501 { 502 PetscErrorCode ierr; 503 Mat B; 504 505 PetscFunctionBegin; 506 ierr = MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 507 ierr = MatView(B,viewer);CHKERRQ(ierr); 508 ierr = MatDestroy(&B);CHKERRQ(ierr); 509 PetscFunctionReturn(0); 510 } 511 512 PetscErrorCode MatDestroy_MPIKAIJ(Mat A) 513 { 514 PetscErrorCode ierr; 515 Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data; 516 517 PetscFunctionBegin; 518 ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr); 519 ierr = MatDestroy(&b->OAIJ);CHKERRQ(ierr); 520 ierr = MatDestroy(&b->A);CHKERRQ(ierr); 521 ierr = VecScatterDestroy(&b->ctx);CHKERRQ(ierr); 522 ierr = VecDestroy(&b->w);CHKERRQ(ierr); 523 ierr = PetscFree(b->S);CHKERRQ(ierr); 524 ierr = PetscFree(b->T);CHKERRQ(ierr); 525 ierr = PetscFree(b->ibdiag);CHKERRQ(ierr); 526 ierr = PetscFree(A->data);CHKERRQ(ierr); 527 PetscFunctionReturn(0); 528 } 529 530 /* --------------------------------------------------------------------------------------*/ 531 532 /* zz = yy + Axx */ 533 PetscErrorCode KAIJMultAdd_Seq(Mat A,Vec xx,Vec yy,Vec zz) 534 { 535 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 536 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 537 const PetscScalar *s = b->S, *t = b->T; 538 const PetscScalar *x,*v,*bx; 539 PetscScalar *y,*sums; 540 PetscErrorCode ierr; 541 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 542 PetscInt n,i,jrow,j,l,p=b->p,q=b->q,k; 543 544 PetscFunctionBegin; 545 if (!yy) { 546 ierr = VecSet(zz,0.0);CHKERRQ(ierr); 547 } else { 548 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 549 } 550 if ((!s) && (!t) && (!b->isTI)) PetscFunctionReturn(0); 551 552 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 553 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 554 idx = a->j; 555 v = a->a; 556 ii = a->i; 557 558 if (b->isTI) { 559 for (i=0; i<m; i++) { 560 jrow = ii[i]; 561 n = ii[i+1] - jrow; 562 sums = y + p*i; 563 for (j=0; j<n; j++) { 564 for (k=0; k<p; k++) { 565 sums[k] += v[jrow+j]*x[q*idx[jrow+j]+k]; 566 } 567 } 568 } 569 } else if (t) { 570 for (i=0; i<m; i++) { 571 jrow = ii[i]; 572 n = ii[i+1] - jrow; 573 sums = y + p*i; 574 bx = x + q*i; 575 for (j=0; j<n; j++) { 576 for (k=0; k<p; k++) { 577 for (l=0; l<q; l++) { 578 sums[k] += v[jrow+j]*t[k+l*p]*x[q*idx[jrow+j]+l]; 579 } 580 } 581 } 582 } 583 } 584 if (s) { 585 for (i=0; i<m; i++) { 586 jrow = ii[i]; 587 n = ii[i+1] - jrow; 588 sums = y + p*i; 589 bx = x + q*i; 590 if (i < b->AIJ->cmap->n) { 591 for (j=0; j<q; j++) { 592 for (k=0; k<p; k++) { 593 sums[k] += s[k+j*p]*bx[j]; 594 } 595 } 596 } 597 } 598 } 599 600 ierr = PetscLogFlops((2.0*p*q-p)*m+2*p*a->nz);CHKERRQ(ierr); 601 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 602 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 603 PetscFunctionReturn(0); 604 } 605 606 PetscErrorCode MatMult_SeqKAIJ_N(Mat A,Vec xx,Vec yy) 607 { 608 PetscErrorCode ierr; 609 PetscFunctionBegin; 610 ierr = KAIJMultAdd_Seq(A,xx,PETSC_NULL,yy);CHKERRQ(ierr); 611 PetscFunctionReturn(0); 612 } 613 614 PetscErrorCode MatMultAdd_SeqKAIJ_N(Mat A,Vec xx,Vec yy,Vec zz) 615 { 616 PetscErrorCode ierr; 617 PetscFunctionBegin; 618 ierr = KAIJMultAdd_Seq(A,xx,yy,zz);CHKERRQ(ierr); 619 PetscFunctionReturn(0); 620 } 621 622 #include <petsc/private/kernels/blockinvert.h> 623 624 PetscErrorCode MatInvertBlockDiagonal_SeqKAIJ_N(Mat A,const PetscScalar **values) 625 { 626 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 627 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 628 const PetscScalar *S = b->S; 629 const PetscScalar *T = b->T; 630 const PetscScalar *v = a->a; 631 const PetscInt p = b->p, q = b->q, m = b->AIJ->rmap->n, *idx = a->j, *ii = a->i; 632 PetscErrorCode ierr; 633 PetscInt i,j,*v_pivots,dof,dof2; 634 PetscScalar *diag,aval,*v_work; 635 636 PetscFunctionBegin; 637 if (p != q) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Block size must be square to calculate inverse."); 638 if ((!S) && (!T) && (!b->isTI)) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Cannot invert a zero matrix."); 639 640 dof = p; 641 dof2 = dof*dof; 642 643 if (b->ibdiagvalid) { 644 if (values) *values = b->ibdiag; 645 PetscFunctionReturn(0); 646 } 647 if (!b->ibdiag) { 648 ierr = PetscMalloc1(dof2*m*sizeof(PetscScalar),&b->ibdiag);CHKERRQ(ierr); 649 ierr = PetscLogObjectMemory((PetscObject)A,dof2*m*sizeof(PetscScalar));CHKERRQ(ierr); 650 } 651 if (values) *values = b->ibdiag; 652 diag = b->ibdiag; 653 654 ierr = PetscMalloc2(dof,&v_work,dof,&v_pivots);CHKERRQ(ierr); 655 for (i=0; i<m; i++) { 656 if (S) { 657 ierr = PetscMemcpy(diag,S,dof2*sizeof(PetscScalar));CHKERRQ(ierr); 658 } else { 659 ierr = PetscMemzero(diag,dof2*sizeof(PetscScalar));CHKERRQ(ierr); 660 } 661 if (b->isTI) { 662 aval = 0; 663 for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j]; 664 for (j=0; j<dof; j++) diag[j+dof*j] += aval; 665 } else if (T) { 666 aval = 0; 667 for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j]; 668 for (j=0; j<dof2; j++) diag[j] += aval*T[j]; 669 } 670 ierr = PetscKernel_A_gets_inverse_A(dof,diag,v_pivots,v_work,PETSC_FALSE,NULL);CHKERRQ(ierr); 671 diag += dof2; 672 } 673 ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr); 674 675 b->ibdiagvalid = PETSC_TRUE; 676 PetscFunctionReturn(0); 677 } 678 679 static PetscErrorCode MatGetDiagonalBlock_MPIKAIJ(Mat A,Mat *B) 680 { 681 Mat_MPIKAIJ *kaij = (Mat_MPIKAIJ*) A->data; 682 683 PetscFunctionBegin; 684 *B = kaij->AIJ; 685 PetscFunctionReturn(0); 686 } 687 688 PetscErrorCode MatSOR_SeqKAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 689 { 690 PetscErrorCode ierr; 691 Mat_SeqKAIJ *kaij = (Mat_SeqKAIJ*) A->data; 692 Mat_SeqAIJ *a = (Mat_SeqAIJ*)kaij->AIJ->data; 693 const PetscScalar *aa = a->a, *T = kaij->T, *v; 694 const PetscInt m = kaij->AIJ->rmap->n, *ai=a->i, *aj=a->j, p = kaij->p, q = kaij->q, *diag, *vi; 695 const PetscScalar *b, *xb, *idiag; 696 PetscScalar *x, *work, *workt, *w, *y, *arr, *t, *arrt; 697 PetscInt i, j, k, i2, bs, bs2, nz; 698 699 PetscFunctionBegin; 700 its = its*lits; 701 if (flag & SOR_EISENSTAT) SETERRQ (PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 702 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 703 if (fshift) SETERRQ (PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 704 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 705 if (p != q) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSOR for KAIJ: Sorry, no support for non-square dense blocks"); 706 else {bs = p; bs2 = bs*bs; } 707 708 if (!m) PetscFunctionReturn(0); 709 710 if (!kaij->ibdiagvalid) { ierr = MatInvertBlockDiagonal_SeqKAIJ_N(A,NULL);CHKERRQ(ierr); } 711 idiag = kaij->ibdiag; 712 diag = a->diag; 713 714 if (!kaij->sor.setup) { 715 ierr = PetscMalloc5(bs,&kaij->sor.w,bs,&kaij->sor.y,m*bs,&kaij->sor.work,m*bs,&kaij->sor.t,m*bs2,&kaij->sor.arr);CHKERRQ(ierr); 716 kaij->sor.setup = PETSC_TRUE; 717 } 718 y = kaij->sor.y; 719 w = kaij->sor.w; 720 work = kaij->sor.work; 721 t = kaij->sor.t; 722 arr = kaij->sor.arr; 723 724 ierr = VecGetArray(xx,&x); CHKERRQ(ierr); 725 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 726 727 if (flag & SOR_ZERO_INITIAL_GUESS) { 728 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 729 PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x); /* x[0:bs] <- D^{-1} b[0:bs] */ 730 ierr = PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr); 731 i2 = bs; 732 idiag += bs2; 733 for (i=1; i<m; i++) { 734 v = aa + ai[i]; 735 vi = aj + ai[i]; 736 nz = diag[i] - ai[i]; 737 738 if (T) { /* b - T (Arow * x) */ 739 for (k=0; k<bs; k++) w[k] = 0; 740 for (j=0; j<nz; j++) { 741 for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k]; 742 } 743 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs,w,T,&t[i2]); 744 for (k=0; k<bs; k++) t[i2+k] += b[i2+k]; 745 } else if (kaij->isTI) { 746 for (k=0; k<bs; k++) t[i2+k] = b[i2+k]; 747 for (j=0; j<nz; j++) { 748 for (k=0; k<bs; k++) t[i2+k] -= v[j] * x[vi[j]*bs+k]; 749 } 750 } else { 751 for (k=0; k<bs; k++) t[i2+k] = b[i2+k]; 752 } 753 754 PetscKernel_w_gets_Ar_times_v(bs,bs,t+i2,idiag,y); 755 for (j=0; j<bs; j++) x[i2+j] = omega * y[j]; 756 757 idiag += bs2; 758 i2 += bs; 759 } 760 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 761 ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr); 762 xb = t; 763 } else xb = b; 764 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 765 idiag = kaij->ibdiag+bs2*(m-1); 766 i2 = bs * (m-1); 767 ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 768 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 769 i2 -= bs; 770 idiag -= bs2; 771 for (i=m-2; i>=0; i--) { 772 v = aa + diag[i] + 1 ; 773 vi = aj + diag[i] + 1; 774 nz = ai[i+1] - diag[i] - 1; 775 776 if (T) { /* FIXME: This branch untested */ 777 ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 778 /* copy all rows of x that are needed into contiguous space */ 779 workt = work; 780 for (j=0; j<nz; j++) { 781 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 782 workt += bs; 783 } 784 arrt = arr; 785 for (j=0; j<nz; j++) { 786 ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 787 for (k=0; k<bs2; k++) arrt[k] *= v[j]; 788 arrt += bs2; 789 } 790 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 791 } else if (kaij->isTI) { 792 for (k=0; k<bs; k++) w[k] = t[i2+k]; 793 for (j=0; j<nz; j++) { 794 for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k]; 795 } 796 } 797 798 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); /* RHS incorrect for omega != 1.0 */ 799 for (j=0; j<bs; j++) x[i2+j] = (1.0-omega) * x[i2+j] + omega * y[j]; 800 801 idiag -= bs2; 802 i2 -= bs; 803 } 804 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 805 } 806 its--; 807 } 808 while (its--) { /* FIXME: This branch not updated */ 809 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 810 i2 = 0; 811 idiag = kaij->ibdiag; 812 for (i=0; i<m; i++) { 813 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 814 815 v = aa + ai[i]; 816 vi = aj + ai[i]; 817 nz = diag[i] - ai[i]; 818 workt = work; 819 for (j=0; j<nz; j++) { 820 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 821 workt += bs; 822 } 823 arrt = arr; 824 if (T) { 825 for (j=0; j<nz; j++) { 826 ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 827 for (k=0; k<bs2; k++) arrt[k] *= v[j]; 828 arrt += bs2; 829 } 830 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 831 } else if (kaij->isTI) { 832 for (j=0; j<nz; j++) { 833 ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 834 for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 835 arrt += bs2; 836 } 837 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 838 } 839 ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr); 840 841 v = aa + diag[i] + 1; 842 vi = aj + diag[i] + 1; 843 nz = ai[i+1] - diag[i] - 1; 844 workt = work; 845 for (j=0; j<nz; j++) { 846 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 847 workt += bs; 848 } 849 arrt = arr; 850 if (T) { 851 for (j=0; j<nz; j++) { 852 ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 853 for (k=0; k<bs2; k++) arrt[k] *= v[j]; 854 arrt += bs2; 855 } 856 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 857 } else if (kaij->isTI) { 858 for (j=0; j<nz; j++) { 859 ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 860 for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 861 arrt += bs2; 862 } 863 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 864 } 865 866 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); 867 for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j); 868 869 idiag += bs2; 870 i2 += bs; 871 } 872 xb = t; 873 } 874 else xb = b; 875 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 876 idiag = kaij->ibdiag+bs2*(m-1); 877 i2 = bs * (m-1); 878 if (xb == b) { 879 for (i=m-1; i>=0; i--) { 880 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 881 882 v = aa + ai[i]; 883 vi = aj + ai[i]; 884 nz = diag[i] - ai[i]; 885 workt = work; 886 for (j=0; j<nz; j++) { 887 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 888 workt += bs; 889 } 890 arrt = arr; 891 if (T) { 892 for (j=0; j<nz; j++) { 893 ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 894 for (k=0; k<bs2; k++) arrt[k] *= v[j]; 895 arrt += bs2; 896 } 897 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 898 } else if (kaij->isTI) { 899 for (j=0; j<nz; j++) { 900 ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 901 for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 902 arrt += bs2; 903 } 904 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 905 } 906 907 v = aa + diag[i] + 1; 908 vi = aj + diag[i] + 1; 909 nz = ai[i+1] - diag[i] - 1; 910 workt = work; 911 for (j=0; j<nz; j++) { 912 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 913 workt += bs; 914 } 915 arrt = arr; 916 if (T) { 917 for (j=0; j<nz; j++) { 918 ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 919 for (k=0; k<bs2; k++) arrt[k] *= v[j]; 920 arrt += bs2; 921 } 922 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 923 } else if (kaij->isTI) { 924 for (j=0; j<nz; j++) { 925 ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 926 for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 927 arrt += bs2; 928 } 929 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 930 } 931 932 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); 933 for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j); 934 } 935 } else { 936 for (i=m-1; i>=0; i--) { 937 ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 938 v = aa + diag[i] + 1; 939 vi = aj + diag[i] + 1; 940 nz = ai[i+1] - diag[i] - 1; 941 workt = work; 942 for (j=0; j<nz; j++) { 943 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 944 workt += bs; 945 } 946 arrt = arr; 947 if (T) { 948 for (j=0; j<nz; j++) { 949 ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 950 for (k=0; k<bs2; k++) arrt[k] *= v[j]; 951 arrt += bs2; 952 } 953 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 954 } else if (kaij->isTI) { 955 for (j=0; j<nz; j++) { 956 ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 957 for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 958 arrt += bs2; 959 } 960 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 961 } 962 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); 963 for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j); 964 } 965 idiag -= bs2; 966 i2 -= bs; 967 } 968 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 969 } 970 } 971 972 ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr); 973 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 974 PetscFunctionReturn(0); 975 } 976 977 /*===================================================================================*/ 978 979 PetscErrorCode KAIJMultAdd_MPI(Mat A,Vec xx,Vec yy,Vec zz) 980 { 981 Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data; 982 PetscErrorCode ierr; 983 984 PetscFunctionBegin; 985 if (!yy) { 986 ierr = VecSet(zz,0.0);CHKERRQ(ierr); 987 } else { 988 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 989 } 990 /* start the scatter */ 991 ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 992 ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,zz,zz);CHKERRQ(ierr); 993 ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 994 ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr); 995 PetscFunctionReturn(0); 996 } 997 998 PetscErrorCode MatMult_MPIKAIJ_dof(Mat A,Vec xx,Vec yy) 999 { 1000 PetscErrorCode ierr; 1001 PetscFunctionBegin; 1002 ierr = KAIJMultAdd_MPI(A,xx,PETSC_NULL,yy);CHKERRQ(ierr); 1003 PetscFunctionReturn(0); 1004 } 1005 1006 PetscErrorCode MatMultAdd_MPIKAIJ_dof(Mat A,Vec xx,Vec yy, Vec zz) 1007 { 1008 PetscErrorCode ierr; 1009 PetscFunctionBegin; 1010 ierr = KAIJMultAdd_MPI(A,xx,yy,zz);CHKERRQ(ierr); 1011 PetscFunctionReturn(0); 1012 } 1013 1014 PetscErrorCode MatInvertBlockDiagonal_MPIKAIJ_dof(Mat A,const PetscScalar **values) 1015 { 1016 Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data; 1017 PetscErrorCode ierr; 1018 1019 PetscFunctionBegin; 1020 ierr = (*b->AIJ->ops->invertblockdiagonal)(b->AIJ,values);CHKERRQ(ierr); 1021 PetscFunctionReturn(0); 1022 } 1023 1024 /* ----------------------------------------------------------------*/ 1025 1026 PetscErrorCode MatGetRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values) 1027 { 1028 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*) A->data; 1029 PetscErrorCode diag = PETSC_FALSE; 1030 PetscErrorCode ierr; 1031 PetscInt nzaij,nz,*colsaij,*idx,i,j,p=b->p,q=b->q,r=row/p,s=row%p,c; 1032 PetscScalar *vaij,*v,*S=b->S,*T=b->T; 1033 1034 PetscFunctionBegin; 1035 if (b->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1036 b->getrowactive = PETSC_TRUE; 1037 if (row < 0 || row >= A->rmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range",row); 1038 1039 if ((!S) && (!T) && (!b->isTI)) { 1040 if (ncols) *ncols = 0; 1041 if (cols) *cols = NULL; 1042 if (values) *values = NULL; 1043 PetscFunctionReturn(0); 1044 } 1045 1046 if (T || b->isTI) { 1047 ierr = MatGetRow_SeqAIJ(b->AIJ,r,&nzaij,&colsaij,&vaij);CHKERRQ(ierr); 1048 c = nzaij; 1049 for (i=0; i<nzaij; i++) { 1050 /* check if this row contains a diagonal entry */ 1051 if (colsaij[i] == r) { 1052 diag = PETSC_TRUE; 1053 c = i; 1054 } 1055 } 1056 } else nzaij = c = 0; 1057 1058 /* calculate size of row */ 1059 nz = 0; 1060 if (S) nz += q; 1061 if (T || b->isTI) nz += (diag && S ? (nzaij-1)*q : nzaij*q); 1062 1063 if (cols || values) { 1064 ierr = PetscMalloc2(nz,&idx,nz,&v);CHKERRQ(ierr); 1065 for (i=0; i<q; i++) { 1066 /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */ 1067 v[i] = 0.0; 1068 } 1069 if (b->isTI) { 1070 for (i=0; i<nzaij; i++) { 1071 for (j=0; j<q; j++) { 1072 idx[i*q+j] = colsaij[i]*q+j; 1073 v[i*q+j] = (j==s ? vaij[i] : 0); 1074 } 1075 } 1076 } else if (T) { 1077 for (i=0; i<nzaij; i++) { 1078 for (j=0; j<q; j++) { 1079 idx[i*q+j] = colsaij[i]*q+j; 1080 v[i*q+j] = vaij[i]*T[s+j*p]; 1081 } 1082 } 1083 } 1084 if (S) { 1085 for (j=0; j<q; j++) { 1086 idx[c*q+j] = r*q+j; 1087 v[c*q+j] += S[s+j*p]; 1088 } 1089 } 1090 } 1091 1092 if (ncols) *ncols = nz; 1093 if (cols) *cols = idx; 1094 if (values) *values = v; 1095 PetscFunctionReturn(0); 1096 } 1097 1098 PetscErrorCode MatRestoreRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1099 { 1100 PetscErrorCode ierr; 1101 PetscFunctionBegin; 1102 ierr = PetscFree2(*idx,*v);CHKERRQ(ierr); 1103 ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE; 1104 PetscFunctionReturn(0); 1105 } 1106 1107 PetscErrorCode MatGetRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values) 1108 { 1109 Mat_MPIKAIJ *b = (Mat_MPIKAIJ*) A->data; 1110 Mat MatAIJ = ((Mat_SeqKAIJ*)b->AIJ->data)->AIJ; 1111 Mat MatOAIJ = ((Mat_SeqKAIJ*)b->OAIJ->data)->AIJ; 1112 Mat AIJ = b->A; 1113 PetscBool diag = PETSC_FALSE; 1114 PetscErrorCode ierr; 1115 const PetscInt rstart=A->rmap->rstart,rend=A->rmap->rend,p=b->p,q=b->q,*garray; 1116 PetscInt nz,*idx,ncolsaij,ncolsoaij,*colsaij,*colsoaij,r,s,c,i,j,lrow; 1117 PetscScalar *v,*vals,*ovals,*S=b->S,*T=b->T; 1118 1119 PetscFunctionBegin; 1120 if (b->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1121 b->getrowactive = PETSC_TRUE; 1122 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1123 lrow = row - rstart; 1124 1125 if ((!S) && (!T) && (!b->isTI)) { 1126 if (ncols) *ncols = 0; 1127 if (cols) *cols = NULL; 1128 if (values) *values = NULL; 1129 PetscFunctionReturn(0); 1130 } 1131 1132 r = lrow/p; 1133 s = lrow%p; 1134 1135 if (T || b->isTI) { 1136 ierr = MatMPIAIJGetSeqAIJ(AIJ,NULL,NULL,&garray); 1137 ierr = MatGetRow_SeqAIJ(MatAIJ,lrow/p,&ncolsaij,&colsaij,&vals);CHKERRQ(ierr); 1138 ierr = MatGetRow_SeqAIJ(MatOAIJ,lrow/p,&ncolsoaij,&colsoaij,&ovals);CHKERRQ(ierr); 1139 1140 c = ncolsaij + ncolsoaij; 1141 for (i=0; i<ncolsaij; i++) { 1142 /* check if this row contains a diagonal entry */ 1143 if (colsaij[i] == r) { 1144 diag = PETSC_TRUE; 1145 c = i; 1146 } 1147 } 1148 } else c = 0; 1149 1150 /* calculate size of row */ 1151 nz = 0; 1152 if (S) nz += q; 1153 if (T || b->isTI) nz += (diag && S ? (ncolsaij+ncolsoaij-1)*q : (ncolsaij+ncolsoaij)*q); 1154 1155 if (cols || values) { 1156 ierr = PetscMalloc2(nz,&idx,nz,&v);CHKERRQ(ierr); 1157 for (i=0; i<q; i++) { 1158 /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */ 1159 v[i] = 0.0; 1160 } 1161 if (b->isTI) { 1162 for (i=0; i<ncolsaij; i++) { 1163 for (j=0; j<q; j++) { 1164 idx[i*q+j] = (colsaij[i]+rstart/p)*q+j; 1165 v[i*q+j] = (j==s ? vals[i] : 0.0); 1166 } 1167 } 1168 for (i=0; i<ncolsoaij; i++) { 1169 for (j=0; j<q; j++) { 1170 idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j; 1171 v[(i+ncolsaij)*q+j] = (j==s ? ovals[i]: 0.0); 1172 } 1173 } 1174 } else if (T) { 1175 for (i=0; i<ncolsaij; i++) { 1176 for (j=0; j<q; j++) { 1177 idx[i*q+j] = (colsaij[i]+rstart/p)*q+j; 1178 v[i*q+j] = vals[i]*T[s+j*p]; 1179 } 1180 } 1181 for (i=0; i<ncolsoaij; i++) { 1182 for (j=0; j<q; j++) { 1183 idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j; 1184 v[(i+ncolsaij)*q+j] = ovals[i]*T[s+j*p]; 1185 } 1186 } 1187 } 1188 if (S) { 1189 for (j=0; j<q; j++) { 1190 idx[c*q+j] = (r+rstart/p)*q+j; 1191 v[c*q+j] += S[s+j*p]; 1192 } 1193 } 1194 } 1195 1196 if (ncols) *ncols = nz; 1197 if (cols) *cols = idx; 1198 if (values) *values = v; 1199 PetscFunctionReturn(0); 1200 } 1201 1202 PetscErrorCode MatRestoreRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1203 { 1204 PetscErrorCode ierr; 1205 PetscFunctionBegin; 1206 ierr = PetscFree2(*idx,*v);CHKERRQ(ierr); 1207 ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE; 1208 PetscFunctionReturn(0); 1209 } 1210 1211 PetscErrorCode MatCreateSubMatrix_KAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat) 1212 { 1213 PetscErrorCode ierr; 1214 Mat A; 1215 1216 PetscFunctionBegin; 1217 ierr = MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr); 1218 ierr = MatCreateSubMatrix(A,isrow,iscol,cll,newmat);CHKERRQ(ierr); 1219 ierr = MatDestroy(&A);CHKERRQ(ierr); 1220 PetscFunctionReturn(0); 1221 } 1222 1223 /* ---------------------------------------------------------------------------------- */ 1224 /*@C 1225 MatCreateKAIJ - Creates a matrix type to be used for matrices of the following form: 1226 1227 [I \otimes S + A \otimes T] 1228 1229 where 1230 S is a dense (p \times q) matrix 1231 T is a dense (p \times q) matrix 1232 A is an AIJ (n \times n) matrix 1233 I is the identity matrix 1234 The resulting matrix is (np \times nq) 1235 1236 The matrix type is based on MATSEQAIJ for a sequential matrix A, and MATMPIAIJ for a distributed matrix A. 1237 S is always stored independently on all processes as a PetscScalar array in column-major format. 1238 1239 Collective 1240 1241 Input Parameters: 1242 + A - the AIJ matrix 1243 . S - the S matrix, stored as a PetscScalar array (column-major) 1244 . T - the T matrix, stored as a PetscScalar array (column-major) 1245 . p - number of rows in S and T 1246 - q - number of columns in S and T 1247 1248 Output Parameter: 1249 . kaij - the new KAIJ matrix 1250 1251 Operations provided: 1252 + MatMult 1253 . MatMultAdd 1254 . MatInvertBlockDiagonal 1255 - MatView 1256 1257 Level: advanced 1258 1259 .seealso: MatKAIJSetAIJ(), MatKAIJSetS(), MatKAIJSetT(), MatKAIJGetAIJ(), MatKAIJGetS(), MatKAIJGetT(), MATKAIJ 1260 @*/ 1261 PetscErrorCode MatCreateKAIJ(Mat A,PetscInt p,PetscInt q,const PetscScalar S[],const PetscScalar T[],Mat *kaij) 1262 { 1263 PetscErrorCode ierr; 1264 PetscMPIInt size; 1265 1266 PetscFunctionBegin; 1267 ierr = MatCreate(PetscObjectComm((PetscObject)A),kaij);CHKERRQ(ierr); 1268 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 1269 if (size == 1) { 1270 ierr = MatSetType(*kaij,MATSEQKAIJ);CHKERRQ(ierr); 1271 } else { 1272 ierr = MatSetType(*kaij,MATMPIKAIJ);CHKERRQ(ierr); 1273 } 1274 ierr = MatKAIJSetAIJ(*kaij,A);CHKERRQ(ierr); 1275 ierr = MatKAIJSetS(*kaij,p,q,S);CHKERRQ(ierr); 1276 ierr = MatKAIJSetT(*kaij,p,q,T);CHKERRQ(ierr); 1277 ierr = MatSetUp(*kaij); 1278 PetscFunctionReturn(0); 1279 } 1280 1281 /*MC 1282 MATKAIJ - MATKAIJ = "kaij" - A matrix type to be used to evaluate matrices of the following form: 1283 1284 [I \otimes S + A \otimes T] 1285 1286 where 1287 S is a dense (p \times q) matrix 1288 T is a dense (p \times q) matrix 1289 A is an AIJ (n \times n) matrix 1290 I is the identity matrix 1291 The resulting matrix is (np \times nq) 1292 1293 The matrix type is based on MATSEQAIJ for a sequential matrix A, and MATMPIAIJ for a distributed matrix A. 1294 S and T are always stored independently on all processes as a PetscScalar array in column-major format. 1295 1296 Operations provided: 1297 + MatMult 1298 . MatMultAdd 1299 . MatInvertBlockDiagonal 1300 - MatView 1301 1302 Level: advanced 1303 1304 .seealso: MatKAIJSetAIJ(), MatKAIJSetS(), MatKAIJSetT(), MatKAIJGetAIJ(), MatKAIJGetS(), MatKAIJGetT(), MatCreateKAIJ() 1305 M*/ 1306 1307 PETSC_EXTERN PetscErrorCode MatCreate_KAIJ(Mat A) 1308 { 1309 PetscErrorCode ierr; 1310 Mat_MPIKAIJ *b; 1311 PetscMPIInt size; 1312 1313 PetscFunctionBegin; 1314 ierr = PetscNewLog(A,&b);CHKERRQ(ierr); 1315 A->data = (void*)b; 1316 1317 ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1318 1319 A->ops->setup = MatSetUp_KAIJ; 1320 1321 b->w = 0; 1322 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 1323 if (size == 1) { 1324 ierr = PetscObjectChangeTypeName((PetscObject)A,MATSEQKAIJ);CHKERRQ(ierr); 1325 A->ops->setup = MatSetUp_KAIJ; 1326 A->ops->destroy = MatDestroy_SeqKAIJ; 1327 A->ops->view = MatView_SeqKAIJ; 1328 A->ops->mult = MatMult_SeqKAIJ_N; 1329 A->ops->multadd = MatMultAdd_SeqKAIJ_N; 1330 A->ops->invertblockdiagonal = MatInvertBlockDiagonal_SeqKAIJ_N; 1331 A->ops->getrow = MatGetRow_SeqKAIJ; 1332 A->ops->restorerow = MatRestoreRow_SeqKAIJ; 1333 A->ops->sor = MatSOR_SeqKAIJ; 1334 } else { 1335 ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIKAIJ);CHKERRQ(ierr); 1336 A->ops->setup = MatSetUp_KAIJ; 1337 A->ops->destroy = MatDestroy_MPIKAIJ; 1338 A->ops->view = MatView_MPIKAIJ; 1339 A->ops->mult = MatMult_MPIKAIJ_dof; 1340 A->ops->multadd = MatMultAdd_MPIKAIJ_dof; 1341 A->ops->invertblockdiagonal = MatInvertBlockDiagonal_MPIKAIJ_dof; 1342 A->ops->getrow = MatGetRow_MPIKAIJ; 1343 A->ops->restorerow = MatRestoreRow_MPIKAIJ; 1344 ierr = PetscObjectComposeFunction((PetscObject)A,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIKAIJ);CHKERRQ(ierr); 1345 } 1346 A->ops->createsubmatrix = MatCreateSubMatrix_KAIJ; 1347 PetscFunctionReturn(0); 1348 } 1349