1 2 /* 3 Defines the basic matrix operations for the KAIJ matrix storage format. 4 This format is used to evaluate matrices of the form: 5 6 [I \otimes S + A \otimes T] 7 8 where 9 S is a dense (p \times q) matrix 10 T is a dense (p \times q) matrix 11 A is an AIJ (n \times n) matrix 12 I is the identity matrix 13 14 The resulting matrix is (np \times nq) 15 16 We provide: 17 MatMult() 18 MatMultAdd() 19 MatInvertBlockDiagonal() 20 and 21 MatCreateKAIJ(Mat,PetscInt,PetscInt,const PetscScalar[],const PetscScalar[],Mat*) 22 23 This single directory handles both the sequential and parallel codes 24 */ 25 26 #include <../src/mat/impls/kaij/kaij.h> /*I "petscmat.h" I*/ 27 #include <../src/mat/utils/freespace.h> 28 #include <petsc/private/vecimpl.h> 29 30 /*@C 31 MatKAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the KAIJ matrix 32 33 Not Collective, but if the KAIJ matrix is parallel, the AIJ matrix is also parallel 34 35 Input Parameter: 36 . A - the KAIJ matrix 37 38 Output Parameter: 39 . B - the AIJ matrix 40 41 Level: advanced 42 43 Notes: The reference count on the AIJ matrix is not increased so you should not destroy it. 44 45 .seealso: MatCreateKAIJ() 46 @*/ 47 PetscErrorCode MatKAIJGetAIJ(Mat A,Mat *B) 48 { 49 PetscErrorCode ierr; 50 PetscBool ismpikaij,isseqkaij; 51 52 PetscFunctionBegin; 53 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIKAIJ,&ismpikaij);CHKERRQ(ierr); 54 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQKAIJ,&isseqkaij);CHKERRQ(ierr); 55 if (ismpikaij) { 56 Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data; 57 58 *B = b->A; 59 } else if (isseqkaij) { 60 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 61 62 *B = b->AIJ; 63 } else { 64 *B = A; 65 } 66 PetscFunctionReturn(0); 67 } 68 69 /*@C 70 MatKAIJGetS - Get the S matrix describing the shift action of the KAIJ matrix 71 72 Not Collective; the entire S is stored and returned independently on all processes. 73 74 Input Parameter: 75 . A - the KAIJ matrix 76 77 Output Parameter: 78 . S - the S matrix, in form of a scalar array in column-major format 79 80 Notes: 81 The dimensions of the output array can be determined by calling MatGetBlockSizes() on the KAIJ matrix. 82 The row block and column block sizes returned will correspond to the number of rows and columns, respectively, in S. 83 84 Level: advanced 85 86 .seealso: MatCreateKAIJ(), MatGetBlockSizes() 87 @*/ 88 PetscErrorCode MatKAIJGetS(Mat A,const PetscScalar **S) 89 { 90 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 91 PetscFunctionBegin; 92 *S = b->S; 93 PetscFunctionReturn(0); 94 } 95 96 /*@C 97 MatKAIJGetT - Get the transformation matrix T associated with the KAIJ matrix 98 99 Not Collective; the entire T is stored and returned independently on all processes 100 101 Input Parameter: 102 . A - the KAIJ matrix 103 104 Output Parameter: 105 . T - the T matrix, in form of a scalar array in column-major format 106 107 Notes: 108 The dimensions of the output array can be determined by calling MatGetBlockSizes() on the KAIJ matrix. 109 The row block and column block sizes returned will correspond to the number of rows and columns, respectively, in T. 110 111 Level: advanced 112 113 .seealso: MatCreateKAIJ(), MatGetBlockSizes() 114 @*/ 115 PetscErrorCode MatKAIJGetT(Mat A,const PetscScalar **T) 116 { 117 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 118 PetscFunctionBegin; 119 *T = b->T; 120 PetscFunctionReturn(0); 121 } 122 123 /*@ 124 MatKAIJSetAIJ - Set the AIJ matrix describing the blockwise action of the KAIJ matrix 125 126 Logically Collective; if the AIJ matrix is parallel, the KAIJ matrix is also parallel 127 128 Input Parameters: 129 + A - the KAIJ matrix 130 - B - the AIJ matrix 131 132 Level: advanced 133 134 .seealso: MatKAIJGetAIJ(), MatKAIJSetS(), MatKAIJSetT() 135 @*/ 136 PetscErrorCode MatKAIJSetAIJ(Mat A,Mat B) 137 { 138 PetscErrorCode ierr; 139 PetscMPIInt size; 140 141 PetscFunctionBegin; 142 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 143 if (size == 1) { 144 Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data; 145 a->AIJ = B; 146 } else { 147 Mat_MPIKAIJ *a = (Mat_MPIKAIJ*)A->data; 148 a->A = B; 149 } 150 PetscFunctionReturn(0); 151 } 152 153 /*@C 154 MatKAIJSetS - Set the S matrix describing the shift action of the KAIJ matrix 155 156 Logically Collective; the entire S is stored independently on all processes. 157 158 Input Parameters: 159 + A - the KAIJ matrix 160 . p - the number of rows in S 161 . q - the number of columns in S 162 - S - the S matrix, in form of a scalar array in column-major format 163 164 Notes: The dimensions p and q must match those of the transformation matrix T associated with the KAIJ matrix. 165 166 Level: Advanced 167 168 .seealso: MatKAIJGetS(), MatKAIJSetT(), MatKAIJSetAIJ() 169 @*/ 170 PetscErrorCode MatKAIJSetS(Mat A,PetscInt p,PetscInt q,const PetscScalar S[]) 171 { 172 PetscErrorCode ierr; 173 Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data; 174 175 PetscFunctionBegin; 176 if (a->S) { 177 ierr = PetscFree(a->S);CHKERRQ(ierr); 178 } 179 if (S) { 180 ierr = PetscMalloc1(p*q*sizeof(PetscScalar),&a->S);CHKERRQ(ierr); 181 ierr = PetscMemcpy(a->S,S,p*q*sizeof(PetscScalar));CHKERRQ(ierr); 182 } else a->S = NULL; 183 184 a->p = p; 185 a->q = q; 186 187 PetscFunctionReturn(0); 188 } 189 190 /*@C 191 MatKAIJSetT - Set the transformation matrix T associated with the KAIJ matrix 192 193 Logically Collective; the entire T is stored independently on all processes. 194 195 Input Parameters: 196 + A - the KAIJ matrix 197 . p - the number of rows in S 198 . q - the number of columns in S 199 - T - the T matrix, in form of a scalar array in column-major format 200 201 Notes: The dimensions p and q must match those of the shift matrix S associated with the KAIJ matrix. 202 203 Level: Advanced 204 205 .seealso: MatKAIJGetT(), MatKAIJSetS(), MatKAIJSetAIJ() 206 @*/ 207 PetscErrorCode MatKAIJSetT(Mat A,PetscInt p,PetscInt q,const PetscScalar T[]) 208 { 209 PetscErrorCode ierr; 210 PetscInt i,j; 211 Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data; 212 PetscBool isTI = PETSC_FALSE; 213 214 PetscFunctionBegin; 215 /* check if T is an identity matrix */ 216 if (T && (p == q)) { 217 isTI = PETSC_TRUE; 218 for (i=0; i<p; i++) { 219 for (j=0; j<q; j++) { 220 if (i == j) { 221 /* diagonal term must be 1 */ 222 if (T[i+j*p] != 1.0) isTI = PETSC_FALSE; 223 } else { 224 /* off-diagonal term must be 0 */ 225 if (T[i+j*p] != 0.0) isTI = PETSC_FALSE; 226 } 227 } 228 } 229 } 230 a->isTI = isTI; 231 232 if (a->T) { 233 ierr = PetscFree(a->T);CHKERRQ(ierr); 234 } 235 if (T && (!isTI)) { 236 ierr = PetscMalloc1(p*q*sizeof(PetscScalar),&a->T);CHKERRQ(ierr); 237 ierr = PetscMemcpy(a->T,T,p*q*sizeof(PetscScalar));CHKERRQ(ierr); 238 } else a->T = NULL; 239 240 a->p = p; 241 a->q = q; 242 PetscFunctionReturn(0); 243 } 244 245 PetscErrorCode MatDestroy_SeqKAIJ(Mat A) 246 { 247 PetscErrorCode ierr; 248 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 249 250 PetscFunctionBegin; 251 ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr); 252 ierr = PetscFree(b->S);CHKERRQ(ierr); 253 ierr = PetscFree(b->T);CHKERRQ(ierr); 254 ierr = PetscFree(b->ibdiag);CHKERRQ(ierr); 255 ierr = PetscFree5(b->sor.w,b->sor.y,b->sor.work,b->sor.t,b->sor.arr);CHKERRQ(ierr); 256 ierr = PetscFree(A->data);CHKERRQ(ierr); 257 PetscFunctionReturn(0); 258 } 259 260 PetscErrorCode MatSetUp_KAIJ(Mat A) 261 { 262 PetscErrorCode ierr; 263 PetscInt n; 264 PetscMPIInt size; 265 Mat_SeqKAIJ *seqkaij = (Mat_SeqKAIJ*)A->data; 266 267 PetscFunctionBegin; 268 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 269 if (size == 1) { 270 ierr = MatSetSizes(A,seqkaij->p*seqkaij->AIJ->rmap->n,seqkaij->q*seqkaij->AIJ->cmap->n,seqkaij->p*seqkaij->AIJ->rmap->N,seqkaij->q*seqkaij->AIJ->cmap->N);CHKERRQ(ierr); 271 ierr = PetscLayoutSetBlockSize(A->rmap,seqkaij->p);CHKERRQ(ierr); 272 ierr = PetscLayoutSetBlockSize(A->cmap,seqkaij->q);CHKERRQ(ierr); 273 ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr); 274 ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr); 275 ierr = PetscObjectReference((PetscObject)seqkaij->AIJ);CHKERRQ(ierr); 276 } else { 277 Mat_MPIKAIJ *a; 278 Mat_MPIAIJ *mpiaij; 279 IS from,to; 280 Vec gvec; 281 PetscScalar *T; 282 PetscInt i,j; 283 284 a = (Mat_MPIKAIJ*)A->data; 285 mpiaij = (Mat_MPIAIJ*)a->A->data; 286 ierr = MatSetSizes(A,a->p*a->A->rmap->n,a->q*a->A->cmap->n,a->p*a->A->rmap->N,a->q*a->A->cmap->N);CHKERRQ(ierr); 287 ierr = PetscLayoutSetBlockSize(A->rmap,seqkaij->p);CHKERRQ(ierr); 288 ierr = PetscLayoutSetBlockSize(A->cmap,seqkaij->q);CHKERRQ(ierr); 289 ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr); 290 ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr); 291 ierr = PetscObjectReference((PetscObject)a->A);CHKERRQ(ierr); 292 293 if (a->isTI) { 294 /* If the transformation matrix associated with the parallel matrix A is the identity matrix, then a->T will be NULL. 295 * In this case, if we pass a->T directly to the MatCreateKAIJ() calls to create the sequential submatrices, the routine will 296 * not be able to tell that transformation matrix should be set to the identity; thus we create a temporary identity matrix 297 * to pass in. */ 298 ierr = PetscMalloc1(a->p*a->q*sizeof(PetscScalar),&T);CHKERRQ(ierr); 299 for (i=0; i<a->p; i++) { 300 for (j=0; j<a->q; j++) { 301 if (i==j) T[i+j*a->p] = 1.0; 302 else T[i+j*a->p] = 0.0; 303 } 304 } 305 } else T = a->T; 306 ierr = MatCreateKAIJ(mpiaij->A,a->p,a->q,a->S,T,&a->AIJ);CHKERRQ(ierr); 307 ierr = MatCreateKAIJ(mpiaij->B,a->p,a->q,NULL,T,&a->OAIJ);CHKERRQ(ierr); 308 if (a->isTI) { 309 ierr = PetscFree(T);CHKERRQ(ierr); 310 } 311 312 ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr); 313 ierr = VecCreate(PETSC_COMM_SELF,&a->w);CHKERRQ(ierr); 314 ierr = VecSetSizes(a->w,n*a->q,n*a->q);CHKERRQ(ierr); 315 ierr = VecSetBlockSize(a->w,a->q);CHKERRQ(ierr); 316 ierr = VecSetType(a->w,VECSEQ);CHKERRQ(ierr); 317 318 /* create two temporary Index sets for build scatter gather */ 319 ierr = ISCreateBlock(PetscObjectComm((PetscObject)a->A),a->q,n,mpiaij->garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); 320 ierr = ISCreateStride(PETSC_COMM_SELF,n*a->q,0,1,&to);CHKERRQ(ierr); 321 322 /* create temporary global vector to generate scatter context */ 323 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)a->A),a->q,a->q*a->A->cmap->n,a->q*a->A->cmap->N,NULL,&gvec);CHKERRQ(ierr); 324 325 /* generate the scatter context */ 326 ierr = VecScatterCreate(gvec,from,a->w,to,&a->ctx);CHKERRQ(ierr); 327 328 ierr = ISDestroy(&from);CHKERRQ(ierr); 329 ierr = ISDestroy(&to);CHKERRQ(ierr); 330 ierr = VecDestroy(&gvec);CHKERRQ(ierr); 331 } 332 333 A->assembled = PETSC_TRUE; 334 PetscFunctionReturn(0); 335 } 336 337 PetscErrorCode MatView_SeqKAIJ(Mat A,PetscViewer viewer) 338 { 339 PetscErrorCode ierr; 340 Mat B; 341 342 PetscFunctionBegin; 343 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 344 ierr = MatView(B,viewer);CHKERRQ(ierr); 345 ierr = MatDestroy(&B);CHKERRQ(ierr); 346 PetscFunctionReturn(0); 347 } 348 349 PetscErrorCode MatView_MPIKAIJ(Mat A,PetscViewer viewer) 350 { 351 PetscErrorCode ierr; 352 Mat B; 353 354 PetscFunctionBegin; 355 ierr = MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 356 ierr = MatView(B,viewer);CHKERRQ(ierr); 357 ierr = MatDestroy(&B);CHKERRQ(ierr); 358 PetscFunctionReturn(0); 359 } 360 361 PetscErrorCode MatDestroy_MPIKAIJ(Mat A) 362 { 363 PetscErrorCode ierr; 364 Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data; 365 366 PetscFunctionBegin; 367 ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr); 368 ierr = MatDestroy(&b->OAIJ);CHKERRQ(ierr); 369 ierr = MatDestroy(&b->A);CHKERRQ(ierr); 370 ierr = VecScatterDestroy(&b->ctx);CHKERRQ(ierr); 371 ierr = VecDestroy(&b->w);CHKERRQ(ierr); 372 ierr = PetscFree(b->S);CHKERRQ(ierr); 373 ierr = PetscFree(b->T);CHKERRQ(ierr); 374 ierr = PetscFree(b->ibdiag);CHKERRQ(ierr); 375 ierr = PetscFree(A->data);CHKERRQ(ierr); 376 PetscFunctionReturn(0); 377 } 378 379 380 /* --------------------------------------------------------------------------------------*/ 381 382 /* zz = yy + Axx */ 383 PetscErrorCode KAIJMultAdd_Seq(Mat A,Vec xx,Vec yy,Vec zz) 384 { 385 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 386 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 387 const PetscScalar *s = b->S, *t = b->T; 388 const PetscScalar *x,*v,*bx; 389 PetscScalar *y,*sums; 390 PetscErrorCode ierr; 391 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 392 PetscInt n,i,jrow,j,l,p=b->p,q=b->q,k; 393 394 PetscFunctionBegin; 395 if (!yy) { 396 ierr = VecSet(zz,0.0);CHKERRQ(ierr); 397 } else { 398 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 399 } 400 if ((!s) && (!t) && (!b->isTI)) PetscFunctionReturn(0); 401 402 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 403 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 404 idx = a->j; 405 v = a->a; 406 ii = a->i; 407 408 if (b->isTI) { 409 for (i=0; i<m; i++) { 410 jrow = ii[i]; 411 n = ii[i+1] - jrow; 412 sums = y + p*i; 413 for (j=0; j<n; j++) { 414 for (k=0; k<p; k++) { 415 sums[k] += v[jrow+j]*x[q*idx[jrow+j]+k]; 416 } 417 } 418 } 419 } else if (t) { 420 for (i=0; i<m; i++) { 421 jrow = ii[i]; 422 n = ii[i+1] - jrow; 423 sums = y + p*i; 424 bx = x + q*i; 425 for (j=0; j<n; j++) { 426 for (k=0; k<p; k++) { 427 for (l=0; l<q; l++) { 428 sums[k] += v[jrow+j]*t[k+l*p]*x[q*idx[jrow+j]+l]; 429 } 430 } 431 } 432 } 433 } 434 if (s) { 435 for (i=0; i<m; i++) { 436 jrow = ii[i]; 437 n = ii[i+1] - jrow; 438 sums = y + p*i; 439 bx = x + q*i; 440 if (i < b->AIJ->cmap->n) { 441 for (j=0; j<q; j++) { 442 for (k=0; k<p; k++) { 443 sums[k] += s[k+j*p]*bx[j]; 444 } 445 } 446 } 447 } 448 } 449 450 ierr = PetscLogFlops((2.0*p*q-p)*m+2*p*a->nz);CHKERRQ(ierr); 451 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 452 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 453 PetscFunctionReturn(0); 454 } 455 456 PetscErrorCode MatMult_SeqKAIJ_N(Mat A,Vec xx,Vec yy) 457 { 458 PetscErrorCode ierr; 459 PetscFunctionBegin; 460 ierr = KAIJMultAdd_Seq(A,xx,PETSC_NULL,yy);CHKERRQ(ierr); 461 PetscFunctionReturn(0); 462 } 463 464 PetscErrorCode MatMultAdd_SeqKAIJ_N(Mat A,Vec xx,Vec yy,Vec zz) 465 { 466 PetscErrorCode ierr; 467 PetscFunctionBegin; 468 ierr = KAIJMultAdd_Seq(A,xx,yy,zz);CHKERRQ(ierr); 469 PetscFunctionReturn(0); 470 } 471 472 #include <petsc/private/kernels/blockinvert.h> 473 474 PetscErrorCode MatInvertBlockDiagonal_SeqKAIJ_N(Mat A,const PetscScalar **values) 475 { 476 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 477 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 478 const PetscScalar *S = b->S; 479 const PetscScalar *T = b->T; 480 const PetscScalar *v = a->a; 481 const PetscInt p = b->p, q = b->q, m = b->AIJ->rmap->n, *idx = a->j, *ii = a->i; 482 PetscErrorCode ierr; 483 PetscInt i,j,*v_pivots,dof,dof2; 484 PetscScalar *diag,aval,*v_work; 485 486 PetscFunctionBegin; 487 if (p != q) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Block size must be square to calculate inverse."); 488 if ((!S) && (!T) && (!b->isTI)) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Cannot invert a zero matrix."); 489 490 dof = p; 491 dof2 = dof*dof; 492 493 if (b->ibdiagvalid) { 494 if (values) *values = b->ibdiag; 495 PetscFunctionReturn(0); 496 } 497 if (!b->ibdiag) { 498 ierr = PetscMalloc1(dof2*m*sizeof(PetscScalar),&b->ibdiag);CHKERRQ(ierr); 499 ierr = PetscLogObjectMemory((PetscObject)A,dof2*m*sizeof(PetscScalar));CHKERRQ(ierr); 500 } 501 if (values) *values = b->ibdiag; 502 diag = b->ibdiag; 503 504 ierr = PetscMalloc2(dof,&v_work,dof,&v_pivots);CHKERRQ(ierr); 505 for (i=0; i<m; i++) { 506 if (S) { 507 ierr = PetscMemcpy(diag,S,dof2*sizeof(PetscScalar));CHKERRQ(ierr); 508 } else { 509 ierr = PetscMemzero(diag,dof2*sizeof(PetscScalar));CHKERRQ(ierr); 510 } 511 if (b->isTI) { 512 aval = 0; 513 for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j]; 514 for (j=0; j<dof; j++) diag[j+dof*j] += aval; 515 } else if (T) { 516 aval = 0; 517 for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j]; 518 for (j=0; j<dof2; j++) diag[j] += aval*T[j]; 519 } 520 ierr = PetscKernel_A_gets_inverse_A(dof,diag,v_pivots,v_work,PETSC_FALSE,NULL);CHKERRQ(ierr); 521 diag += dof2; 522 } 523 ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr); 524 525 b->ibdiagvalid = PETSC_TRUE; 526 PetscFunctionReturn(0); 527 } 528 529 static PetscErrorCode MatGetDiagonalBlock_MPIKAIJ(Mat A,Mat *B) 530 { 531 Mat_MPIKAIJ *kaij = (Mat_MPIKAIJ*) A->data; 532 533 PetscFunctionBegin; 534 *B = kaij->AIJ; 535 PetscFunctionReturn(0); 536 } 537 538 PetscErrorCode MatSOR_SeqKAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 539 { 540 PetscErrorCode ierr; 541 Mat_SeqKAIJ *kaij = (Mat_SeqKAIJ*) A->data; 542 Mat_SeqAIJ *a = (Mat_SeqAIJ*)kaij->AIJ->data; 543 const PetscScalar *aa = a->a, *T = kaij->T, *v; 544 const PetscInt m = kaij->AIJ->rmap->n, *ai=a->i, *aj=a->j, p = kaij->p, q = kaij->q, *diag, *vi; 545 const PetscScalar *b, *xb, *idiag; 546 PetscScalar *x, *work, *workt, *w, *y, *arr, *t, *arrt; 547 PetscInt i, j, k, i2, bs, bs2, nz; 548 549 PetscFunctionBegin; 550 its = its*lits; 551 if (flag & SOR_EISENSTAT) SETERRQ (PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 552 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 553 if (fshift) SETERRQ (PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 554 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 555 if (p != q) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSOR for KAIJ: Sorry, no support for non-square dense blocks"); 556 else {bs = p; bs2 = bs*bs; } 557 558 if (!m) PetscFunctionReturn(0); 559 560 if (!kaij->ibdiagvalid) { ierr = MatInvertBlockDiagonal_SeqKAIJ_N(A,NULL);CHKERRQ(ierr); } 561 idiag = kaij->ibdiag; 562 diag = a->diag; 563 564 if (!kaij->sor.setup) { 565 ierr = PetscMalloc5(bs,&kaij->sor.w,bs,&kaij->sor.y,m*bs,&kaij->sor.work,m*bs,&kaij->sor.t,m*bs2,&kaij->sor.arr);CHKERRQ(ierr); 566 kaij->sor.setup = PETSC_TRUE; 567 } 568 y = kaij->sor.y; 569 w = kaij->sor.w; 570 work = kaij->sor.work; 571 t = kaij->sor.t; 572 arr = kaij->sor.arr; 573 574 ierr = VecGetArray(xx,&x); CHKERRQ(ierr); 575 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 576 577 if (flag & SOR_ZERO_INITIAL_GUESS) { 578 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 579 PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x); /* x[0:bs] <- D^{-1} b[0:bs] */ 580 ierr = PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr); 581 i2 = bs; 582 idiag += bs2; 583 for (i=1; i<m; i++) { 584 v = aa + ai[i]; 585 vi = aj + ai[i]; 586 nz = diag[i] - ai[i]; 587 588 if (T) { /* b - T (Arow * x) */ 589 for (k=0; k<bs; k++) w[k] = 0; 590 for (j=0; j<nz; j++) { 591 for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k]; 592 } 593 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs,w,T,&t[i2]); 594 for (k=0; k<bs; k++) t[i2+k] += b[i2+k]; 595 } else if (kaij->isTI) { 596 for (k=0; k<bs; k++) t[i2+k] = b[i2+k]; 597 for (j=0; j<nz; j++) { 598 for (k=0; k<bs; k++) t[i2+k] -= v[j] * x[vi[j]*bs+k]; 599 } 600 } else { 601 for (k=0; k<bs; k++) t[i2+k] = b[i2+k]; 602 } 603 604 PetscKernel_w_gets_Ar_times_v(bs,bs,t+i2,idiag,y); 605 for (j=0; j<bs; j++) x[i2+j] = omega * y[j]; 606 607 idiag += bs2; 608 i2 += bs; 609 } 610 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 611 ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr); 612 xb = t; 613 } else xb = b; 614 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 615 idiag = kaij->ibdiag+bs2*(m-1); 616 i2 = bs * (m-1); 617 ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 618 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 619 i2 -= bs; 620 idiag -= bs2; 621 for (i=m-2; i>=0; i--) { 622 v = aa + diag[i] + 1 ; 623 vi = aj + diag[i] + 1; 624 nz = ai[i+1] - diag[i] - 1; 625 626 if (T) { /* FIXME: This branch untested */ 627 ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 628 /* copy all rows of x that are needed into contiguous space */ 629 workt = work; 630 for (j=0; j<nz; j++) { 631 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 632 workt += bs; 633 } 634 arrt = arr; 635 for (j=0; j<nz; j++) { 636 ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 637 for (k=0; k<bs2; k++) arrt[k] *= v[j]; 638 arrt += bs2; 639 } 640 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 641 } else if (kaij->isTI) { 642 for (k=0; k<bs; k++) w[k] = t[i2+k]; 643 for (j=0; j<nz; j++) { 644 for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k]; 645 } 646 } 647 648 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); /* RHS incorrect for omega != 1.0 */ 649 for (j=0; j<bs; j++) x[i2+j] = (1.0-omega) * x[i2+j] + omega * y[j]; 650 651 idiag -= bs2; 652 i2 -= bs; 653 } 654 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 655 } 656 its--; 657 } 658 while (its--) { /* FIXME: This branch not updated */ 659 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 660 i2 = 0; 661 idiag = kaij->ibdiag; 662 for (i=0; i<m; i++) { 663 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 664 665 v = aa + ai[i]; 666 vi = aj + ai[i]; 667 nz = diag[i] - ai[i]; 668 workt = work; 669 for (j=0; j<nz; j++) { 670 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 671 workt += bs; 672 } 673 arrt = arr; 674 if (T) { 675 for (j=0; j<nz; j++) { 676 ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 677 for (k=0; k<bs2; k++) arrt[k] *= v[j]; 678 arrt += bs2; 679 } 680 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 681 } else if (kaij->isTI) { 682 for (j=0; j<nz; j++) { 683 ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 684 for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 685 arrt += bs2; 686 } 687 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 688 } 689 ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr); 690 691 v = aa + diag[i] + 1; 692 vi = aj + diag[i] + 1; 693 nz = ai[i+1] - diag[i] - 1; 694 workt = work; 695 for (j=0; j<nz; j++) { 696 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 697 workt += bs; 698 } 699 arrt = arr; 700 if (T) { 701 for (j=0; j<nz; j++) { 702 ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 703 for (k=0; k<bs2; k++) arrt[k] *= v[j]; 704 arrt += bs2; 705 } 706 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 707 } else if (kaij->isTI) { 708 for (j=0; j<nz; j++) { 709 ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 710 for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 711 arrt += bs2; 712 } 713 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 714 } 715 716 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); 717 for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j); 718 719 idiag += bs2; 720 i2 += bs; 721 } 722 xb = t; 723 } 724 else xb = b; 725 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 726 idiag = kaij->ibdiag+bs2*(m-1); 727 i2 = bs * (m-1); 728 if (xb == b) { 729 for (i=m-1; i>=0; i--) { 730 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 731 732 v = aa + ai[i]; 733 vi = aj + ai[i]; 734 nz = diag[i] - ai[i]; 735 workt = work; 736 for (j=0; j<nz; j++) { 737 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 738 workt += bs; 739 } 740 arrt = arr; 741 if (T) { 742 for (j=0; j<nz; j++) { 743 ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 744 for (k=0; k<bs2; k++) arrt[k] *= v[j]; 745 arrt += bs2; 746 } 747 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 748 } else if (kaij->isTI) { 749 for (j=0; j<nz; j++) { 750 ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 751 for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 752 arrt += bs2; 753 } 754 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 755 } 756 757 v = aa + diag[i] + 1; 758 vi = aj + diag[i] + 1; 759 nz = ai[i+1] - diag[i] - 1; 760 workt = work; 761 for (j=0; j<nz; j++) { 762 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 763 workt += bs; 764 } 765 arrt = arr; 766 if (T) { 767 for (j=0; j<nz; j++) { 768 ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 769 for (k=0; k<bs2; k++) arrt[k] *= v[j]; 770 arrt += bs2; 771 } 772 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 773 } else if (kaij->isTI) { 774 for (j=0; j<nz; j++) { 775 ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 776 for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 777 arrt += bs2; 778 } 779 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 780 } 781 782 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); 783 for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j); 784 } 785 } else { 786 for (i=m-1; i>=0; i--) { 787 ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 788 v = aa + diag[i] + 1; 789 vi = aj + diag[i] + 1; 790 nz = ai[i+1] - diag[i] - 1; 791 workt = work; 792 for (j=0; j<nz; j++) { 793 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 794 workt += bs; 795 } 796 arrt = arr; 797 if (T) { 798 for (j=0; j<nz; j++) { 799 ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 800 for (k=0; k<bs2; k++) arrt[k] *= v[j]; 801 arrt += bs2; 802 } 803 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 804 } else if (kaij->isTI) { 805 for (j=0; j<nz; j++) { 806 ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 807 for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 808 arrt += bs2; 809 } 810 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 811 } 812 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); 813 for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j); 814 } 815 idiag -= bs2; 816 i2 -= bs; 817 } 818 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 819 } 820 } 821 822 ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr); 823 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 824 PetscFunctionReturn(0); 825 } 826 827 /*===================================================================================*/ 828 829 PetscErrorCode KAIJMultAdd_MPI(Mat A,Vec xx,Vec yy,Vec zz) 830 { 831 Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data; 832 PetscErrorCode ierr; 833 834 PetscFunctionBegin; 835 if (!yy) { 836 ierr = VecSet(zz,0.0);CHKERRQ(ierr); 837 } else { 838 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 839 } 840 /* start the scatter */ 841 ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 842 ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,zz,zz);CHKERRQ(ierr); 843 ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 844 ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr); 845 PetscFunctionReturn(0); 846 } 847 848 PetscErrorCode MatMult_MPIKAIJ_dof(Mat A,Vec xx,Vec yy) 849 { 850 PetscErrorCode ierr; 851 PetscFunctionBegin; 852 ierr = KAIJMultAdd_MPI(A,xx,PETSC_NULL,yy);CHKERRQ(ierr); 853 PetscFunctionReturn(0); 854 } 855 856 PetscErrorCode MatMultAdd_MPIKAIJ_dof(Mat A,Vec xx,Vec yy, Vec zz) 857 { 858 PetscErrorCode ierr; 859 PetscFunctionBegin; 860 ierr = KAIJMultAdd_MPI(A,xx,yy,zz);CHKERRQ(ierr); 861 PetscFunctionReturn(0); 862 } 863 864 PetscErrorCode MatInvertBlockDiagonal_MPIKAIJ_dof(Mat A,const PetscScalar **values) 865 { 866 Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data; 867 PetscErrorCode ierr; 868 869 PetscFunctionBegin; 870 ierr = (*b->AIJ->ops->invertblockdiagonal)(b->AIJ,values);CHKERRQ(ierr); 871 PetscFunctionReturn(0); 872 } 873 874 /* ----------------------------------------------------------------*/ 875 876 PetscErrorCode MatGetRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values) 877 { 878 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*) A->data; 879 PetscErrorCode diag = PETSC_FALSE; 880 PetscErrorCode ierr; 881 PetscInt nzaij,nz,*colsaij,*idx,i,j,p=b->p,q=b->q,r=row/p,s=row%p,c; 882 PetscScalar *vaij,*v,*S=b->S,*T=b->T; 883 884 PetscFunctionBegin; 885 if (b->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 886 b->getrowactive = PETSC_TRUE; 887 if (row < 0 || row >= A->rmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range",row); 888 889 if ((!S) && (!T) && (!b->isTI)) { 890 if (ncols) *ncols = 0; 891 if (cols) *cols = NULL; 892 if (values) *values = NULL; 893 PetscFunctionReturn(0); 894 } 895 896 if (T || b->isTI) { 897 ierr = MatGetRow_SeqAIJ(b->AIJ,r,&nzaij,&colsaij,&vaij);CHKERRQ(ierr); 898 c = nzaij; 899 for (i=0; i<nzaij; i++) { 900 /* check if this row contains a diagonal entry */ 901 if (colsaij[i] == r) { 902 diag = PETSC_TRUE; 903 c = i; 904 } 905 } 906 } else nzaij = c = 0; 907 908 /* calculate size of row */ 909 nz = 0; 910 if (S) nz += q; 911 if (T || b->isTI) nz += (diag && S ? (nzaij-1)*q : nzaij*q); 912 913 if (cols || values) { 914 ierr = PetscMalloc2(nz,&idx,nz,&v);CHKERRQ(ierr); 915 for (i=0; i<q; i++) { 916 /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */ 917 v[i] = 0.0; 918 } 919 if (b->isTI) { 920 for (i=0; i<nzaij; i++) { 921 for (j=0; j<q; j++) { 922 idx[i*q+j] = colsaij[i]*q+j; 923 v[i*q+j] = (j==s ? vaij[i] : 0); 924 } 925 } 926 } else if (T) { 927 for (i=0; i<nzaij; i++) { 928 for (j=0; j<q; j++) { 929 idx[i*q+j] = colsaij[i]*q+j; 930 v[i*q+j] = vaij[i]*T[s+j*p]; 931 } 932 } 933 } 934 if (S) { 935 for (j=0; j<q; j++) { 936 idx[c*q+j] = r*q+j; 937 v[c*q+j] += S[s+j*p]; 938 } 939 } 940 } 941 942 if (ncols) *ncols = nz; 943 if (cols) *cols = idx; 944 if (values) *values = v; 945 946 PetscFunctionReturn(0); 947 } 948 949 PetscErrorCode MatRestoreRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 950 { 951 PetscErrorCode ierr; 952 PetscFunctionBegin; 953 ierr = PetscFree2(*idx,*v);CHKERRQ(ierr); 954 ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE; 955 PetscFunctionReturn(0); 956 } 957 958 PetscErrorCode MatGetRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values) 959 { 960 Mat_MPIKAIJ *b = (Mat_MPIKAIJ*) A->data; 961 Mat MatAIJ = ((Mat_SeqKAIJ*)b->AIJ->data)->AIJ; 962 Mat MatOAIJ = ((Mat_SeqKAIJ*)b->OAIJ->data)->AIJ; 963 Mat AIJ = b->A; 964 PetscBool diag = PETSC_FALSE; 965 PetscErrorCode ierr; 966 const PetscInt rstart=A->rmap->rstart,rend=A->rmap->rend,p=b->p,q=b->q,*garray; 967 PetscInt nz,*idx,ncolsaij,ncolsoaij,*colsaij,*colsoaij,r,s,c,i,j,lrow; 968 PetscScalar *v,*vals,*ovals,*S=b->S,*T=b->T; 969 970 PetscFunctionBegin; 971 if (b->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 972 b->getrowactive = PETSC_TRUE; 973 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 974 lrow = row - rstart; 975 976 if ((!S) && (!T) && (!b->isTI)) { 977 if (ncols) *ncols = 0; 978 if (cols) *cols = NULL; 979 if (values) *values = NULL; 980 PetscFunctionReturn(0); 981 } 982 983 r = lrow/p; 984 s = lrow%p; 985 986 if (T || b->isTI) { 987 ierr = MatMPIAIJGetSeqAIJ(AIJ,NULL,NULL,&garray); 988 ierr = MatGetRow_SeqAIJ(MatAIJ,lrow/p,&ncolsaij,&colsaij,&vals);CHKERRQ(ierr); 989 ierr = MatGetRow_SeqAIJ(MatOAIJ,lrow/p,&ncolsoaij,&colsoaij,&ovals);CHKERRQ(ierr); 990 991 c = ncolsaij + ncolsoaij; 992 for (i=0; i<ncolsaij; i++) { 993 /* check if this row contains a diagonal entry */ 994 if (colsaij[i] == r) { 995 diag = PETSC_TRUE; 996 c = i; 997 } 998 } 999 } else c = 0; 1000 1001 /* calculate size of row */ 1002 nz = 0; 1003 if (S) nz += q; 1004 if (T || b->isTI) nz += (diag && S ? (ncolsaij+ncolsoaij-1)*q : (ncolsaij+ncolsoaij)*q); 1005 1006 if (cols || values) { 1007 ierr = PetscMalloc2(nz,&idx,nz,&v);CHKERRQ(ierr); 1008 for (i=0; i<q; i++) { 1009 /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */ 1010 v[i] = 0.0; 1011 } 1012 if (b->isTI) { 1013 for (i=0; i<ncolsaij; i++) { 1014 for (j=0; j<q; j++) { 1015 idx[i*q+j] = (colsaij[i]+rstart/p)*q+j; 1016 v[i*q+j] = (j==s ? vals[i] : 0.0); 1017 } 1018 } 1019 for (i=0; i<ncolsoaij; i++) { 1020 for (j=0; j<q; j++) { 1021 idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j; 1022 v[(i+ncolsaij)*q+j] = (j==s ? ovals[i]: 0.0); 1023 } 1024 } 1025 } else if (T) { 1026 for (i=0; i<ncolsaij; i++) { 1027 for (j=0; j<q; j++) { 1028 idx[i*q+j] = (colsaij[i]+rstart/p)*q+j; 1029 v[i*q+j] = vals[i]*T[s+j*p]; 1030 } 1031 } 1032 for (i=0; i<ncolsoaij; i++) { 1033 for (j=0; j<q; j++) { 1034 idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j; 1035 v[(i+ncolsaij)*q+j] = ovals[i]*T[s+j*p]; 1036 } 1037 } 1038 } 1039 if (S) { 1040 for (j=0; j<q; j++) { 1041 idx[c*q+j] = (r+rstart/p)*q+j; 1042 v[c*q+j] += S[s+j*p]; 1043 } 1044 } 1045 } 1046 1047 if (ncols) *ncols = nz; 1048 if (cols) *cols = idx; 1049 if (values) *values = v; 1050 PetscFunctionReturn(0); 1051 } 1052 1053 PetscErrorCode MatRestoreRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1054 { 1055 PetscErrorCode ierr; 1056 PetscFunctionBegin; 1057 ierr = PetscFree2(*idx,*v);CHKERRQ(ierr); 1058 ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE; 1059 PetscFunctionReturn(0); 1060 } 1061 1062 PetscErrorCode MatCreateSubMatrix_KAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat) 1063 { 1064 PetscErrorCode ierr; 1065 Mat A; 1066 1067 PetscFunctionBegin; 1068 ierr = MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr); 1069 ierr = MatCreateSubMatrix(A,isrow,iscol,cll,newmat);CHKERRQ(ierr); 1070 ierr = MatDestroy(&A);CHKERRQ(ierr); 1071 PetscFunctionReturn(0); 1072 } 1073 1074 /* ---------------------------------------------------------------------------------- */ 1075 /*@C 1076 MatCreateKAIJ - Creates a matrix type to be used for matrices of the following form: 1077 1078 [I \otimes S + A \otimes T] 1079 1080 where 1081 S is a dense (p \times q) matrix 1082 T is a dense (p \times q) matrix 1083 A is an AIJ (n \times n) matrix 1084 I is the identity matrix 1085 The resulting matrix is (np \times nq) 1086 1087 The matrix type is based on MATSEQAIJ for a sequential matrix A, and MATMPIAIJ for a distributed matrix A. 1088 S is always stored independently on all processes as a PetscScalar array in column-major format. 1089 1090 Collective 1091 1092 Input Parameters: 1093 + A - the AIJ matrix 1094 . S - the S matrix, stored as a PetscScalar array (column-major) 1095 . T - the T matrix, stored as a PetscScalar array (column-major) 1096 . p - number of rows in S and T 1097 - q - number of columns in S and T 1098 1099 Output Parameter: 1100 . kaij - the new KAIJ matrix 1101 1102 Operations provided: 1103 + MatMult 1104 . MatMultAdd 1105 . MatInvertBlockDiagonal 1106 - MatView 1107 1108 Level: advanced 1109 1110 .seealso: MatKAIJSetAIJ(), MatKAIJSetS(), MatKAIJSetT(), MatKAIJGetAIJ(), MatKAIJGetS(), MatKAIJGetT(), MATKAIJ 1111 @*/ 1112 PetscErrorCode MatCreateKAIJ(Mat A,PetscInt p,PetscInt q,const PetscScalar S[],const PetscScalar T[],Mat *kaij) 1113 { 1114 PetscErrorCode ierr; 1115 PetscMPIInt size; 1116 1117 PetscFunctionBegin; 1118 ierr = MatCreate(PetscObjectComm((PetscObject)A),kaij);CHKERRQ(ierr); 1119 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 1120 if (size == 1) { 1121 ierr = MatSetType(*kaij,MATSEQKAIJ);CHKERRQ(ierr); 1122 } else { 1123 ierr = MatSetType(*kaij,MATMPIKAIJ);CHKERRQ(ierr); 1124 } 1125 ierr = MatKAIJSetAIJ(*kaij,A);CHKERRQ(ierr); 1126 ierr = MatKAIJSetS(*kaij,p,q,S);CHKERRQ(ierr); 1127 ierr = MatKAIJSetT(*kaij,p,q,T);CHKERRQ(ierr); 1128 ierr = MatSetUp(*kaij); 1129 PetscFunctionReturn(0); 1130 } 1131 1132 /*MC 1133 MATKAIJ - MATKAIJ = "kaij" - A matrix type to be used to evaluate matrices of the following form: 1134 1135 [I \otimes S + A \otimes T] 1136 1137 where 1138 S is a dense (p \times q) matrix 1139 T is a dense (p \times q) matrix 1140 A is an AIJ (n \times n) matrix 1141 I is the identity matrix 1142 The resulting matrix is (np \times nq) 1143 1144 The matrix type is based on MATSEQAIJ for a sequential matrix A, and MATMPIAIJ for a distributed matrix A. 1145 S and T are always stored independently on all processes as a PetscScalar array in column-major format. 1146 1147 Operations provided: 1148 + MatMult 1149 . MatMultAdd 1150 . MatInvertBlockDiagonal 1151 - MatView 1152 1153 Level: advanced 1154 1155 .seealso: MatKAIJSetAIJ(), MatKAIJSetS(), MatKAIJSetT(), MatKAIJGetAIJ(), MatKAIJGetS(), MatKAIJGetT(), MatCreateKAIJ() 1156 M*/ 1157 1158 PETSC_EXTERN PetscErrorCode MatCreate_KAIJ(Mat A) 1159 { 1160 PetscErrorCode ierr; 1161 Mat_MPIKAIJ *b; 1162 PetscMPIInt size; 1163 1164 PetscFunctionBegin; 1165 ierr = PetscNewLog(A,&b);CHKERRQ(ierr); 1166 A->data = (void*)b; 1167 1168 ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1169 1170 A->ops->setup = MatSetUp_KAIJ; 1171 1172 b->w = 0; 1173 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 1174 if (size == 1) { 1175 ierr = PetscObjectChangeTypeName((PetscObject)A,MATSEQKAIJ);CHKERRQ(ierr); 1176 1177 A->ops->setup = MatSetUp_KAIJ; 1178 A->ops->destroy = MatDestroy_SeqKAIJ; 1179 A->ops->view = MatView_SeqKAIJ; 1180 A->ops->mult = MatMult_SeqKAIJ_N; 1181 A->ops->multadd = MatMultAdd_SeqKAIJ_N; 1182 A->ops->invertblockdiagonal = MatInvertBlockDiagonal_SeqKAIJ_N; 1183 A->ops->getrow = MatGetRow_SeqKAIJ; 1184 A->ops->restorerow = MatRestoreRow_SeqKAIJ; 1185 A->ops->sor = MatSOR_SeqKAIJ; 1186 } else { 1187 ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIKAIJ);CHKERRQ(ierr); 1188 A->ops->setup = MatSetUp_KAIJ; 1189 A->ops->destroy = MatDestroy_MPIKAIJ; 1190 A->ops->view = MatView_MPIKAIJ; 1191 A->ops->mult = MatMult_MPIKAIJ_dof; 1192 A->ops->multadd = MatMultAdd_MPIKAIJ_dof; 1193 A->ops->invertblockdiagonal = MatInvertBlockDiagonal_MPIKAIJ_dof; 1194 A->ops->getrow = MatGetRow_MPIKAIJ; 1195 A->ops->restorerow = MatRestoreRow_MPIKAIJ; 1196 ierr = PetscObjectComposeFunction((PetscObject)A,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIKAIJ);CHKERRQ(ierr); 1197 } 1198 A->ops->createsubmatrix = MatCreateSubMatrix_KAIJ; 1199 PetscFunctionReturn(0); 1200 } 1201