1 2 /* 3 Defines the basic matrix operations for the KAIJ matrix storage format. 4 This format is used to evaluate matrices of the form: 5 6 [I \otimes S + A \otimes T] 7 8 where 9 S is a dense (p \times q) matrix 10 T is a dense (p \times q) matrix 11 A is an AIJ (n \times n) matrix 12 I is the identity matrix 13 14 The resulting matrix is (np \times nq) 15 16 We provide: 17 MatMult() 18 MatMultAdd() 19 MatInvertBlockDiagonal() 20 and 21 MatCreateKAIJ(Mat,PetscInt,PetscInt,const PetscScalar[],const PetscScalar[],Mat*) 22 23 This single directory handles both the sequential and parallel codes 24 */ 25 26 #include <../src/mat/impls/kaij/kaij.h> /*I "petscmat.h" I*/ 27 #include <../src/mat/utils/freespace.h> 28 #include <petsc/private/vecimpl.h> 29 30 /*@C 31 MatKAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the KAIJ matrix 32 33 Not Collective, but if the KAIJ matrix is parallel, the AIJ matrix is also parallel 34 35 Input Parameter: 36 . A - the KAIJ matrix 37 38 Output Parameter: 39 . B - the AIJ matrix 40 41 Level: advanced 42 43 Notes: The reference count on the AIJ matrix is not increased so you should not destroy it. 44 45 .seealso: MatCreateKAIJ() 46 @*/ 47 PetscErrorCode MatKAIJGetAIJ(Mat A,Mat *B) 48 { 49 PetscErrorCode ierr; 50 PetscBool ismpikaij,isseqkaij; 51 52 PetscFunctionBegin; 53 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIKAIJ,&ismpikaij);CHKERRQ(ierr); 54 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQKAIJ,&isseqkaij);CHKERRQ(ierr); 55 if (ismpikaij) { 56 Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data; 57 58 *B = b->A; 59 } else if (isseqkaij) { 60 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 61 62 *B = b->AIJ; 63 } else { 64 *B = A; 65 } 66 PetscFunctionReturn(0); 67 } 68 69 /*@C 70 MatKAIJGetS - Get the S matrix describing the shift action of the KAIJ matrix 71 72 Not Collective; the entire S is stored and returned independently on all processes. 73 74 Input Parameter: 75 . A - the KAIJ matrix 76 77 Output Parameter: 78 . S - the S matrix, in form of a scalar array in column-major format 79 80 Notes: 81 The dimensions of the output array can be determined by calling MatGetBlockSizes() on the KAIJ matrix. 82 The row block and column block sizes returned will correspond to the number of rows and columns, respectively, in S. 83 84 Level: advanced 85 86 .seealso: MatCreateKAIJ(), MatGetBlockSizes() 87 @*/ 88 PetscErrorCode MatKAIJGetS(Mat A,const PetscScalar **S) 89 { 90 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 91 PetscFunctionBegin; 92 *S = b->S; 93 PetscFunctionReturn(0); 94 } 95 96 /*@C 97 MatKAIJGetT - Get the transformation matrix T associated with the KAIJ matrix 98 99 Not Collective; the entire T is stored and returned independently on all processes 100 101 Input Parameter: 102 . A - the KAIJ matrix 103 104 Output Parameter: 105 . T - the T matrix, in form of a scalar array in column-major format 106 107 Notes: 108 The dimensions of the output array can be determined by calling MatGetBlockSizes() on the KAIJ matrix. 109 The row block and column block sizes returned will correspond to the number of rows and columns, respectively, in T. 110 111 Level: advanced 112 113 .seealso: MatCreateKAIJ(), MatGetBlockSizes() 114 @*/ 115 PetscErrorCode MatKAIJGetT(Mat A,const PetscScalar **T) 116 { 117 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 118 PetscFunctionBegin; 119 *T = b->T; 120 PetscFunctionReturn(0); 121 } 122 123 /*@ 124 MatKAIJSetAIJ - Set the AIJ matrix describing the blockwise action of the KAIJ matrix 125 126 Logically Collective; if the AIJ matrix is parallel, the KAIJ matrix is also parallel 127 128 Input Parameters: 129 + A - the KAIJ matrix 130 - B - the AIJ matrix 131 132 Level: advanced 133 134 .seealso: MatKAIJGetAIJ(), MatKAIJSetS(), MatKAIJSetT() 135 @*/ 136 PetscErrorCode MatKAIJSetAIJ(Mat A,Mat B) 137 { 138 PetscErrorCode ierr; 139 PetscMPIInt size; 140 141 PetscFunctionBegin; 142 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 143 if (size == 1) { 144 Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data; 145 a->AIJ = B; 146 } else { 147 Mat_MPIKAIJ *a = (Mat_MPIKAIJ*)A->data; 148 a->A = B; 149 } 150 PetscFunctionReturn(0); 151 } 152 153 /*@C 154 MatKAIJSetS - Set the S matrix describing the shift action of the KAIJ matrix 155 156 Logically Collective; the entire S is stored independently on all processes. 157 158 Input Parameters: 159 + A - the KAIJ matrix 160 . p - the number of rows in S 161 . q - the number of columns in S 162 - S - the S matrix, in form of a scalar array in column-major format 163 164 Notes: The dimensions p and q must match those of the transformation matrix T associated with the KAIJ matrix. 165 166 Level: Advanced 167 168 .seealso: MatKAIJGetS(), MatKAIJSetT(), MatKAIJSetAIJ() 169 @*/ 170 PetscErrorCode MatKAIJSetS(Mat A,PetscInt p,PetscInt q,const PetscScalar S[]) 171 { 172 PetscErrorCode ierr; 173 Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data; 174 175 PetscFunctionBegin; 176 if (a->S) { 177 ierr = PetscFree(a->S);CHKERRQ(ierr); 178 } 179 if (S) { 180 ierr = PetscMalloc(p*q*sizeof(PetscScalar),&a->S);CHKERRQ(ierr); 181 ierr = PetscMemcpy(a->S,S,p*q*sizeof(PetscScalar));CHKERRQ(ierr); 182 } else a->S = NULL; 183 184 a->p = p; 185 a->q = q; 186 187 PetscFunctionReturn(0); 188 } 189 190 /*@C 191 MatKAIJSetT - Set the transformation matrix T associated with the KAIJ matrix 192 193 Logically Collective; the entire T is stored independently on all processes. 194 195 Input Parameters: 196 + A - the KAIJ matrix 197 . p - the number of rows in S 198 . q - the number of columns in S 199 - T - the T matrix, in form of a scalar array in column-major format 200 201 Notes: The dimensions p and q must match those of the shift matrix S associated with the KAIJ matrix. 202 203 Level: Advanced 204 205 .seealso: MatKAIJGetT(), MatKAIJSetS(), MatKAIJSetAIJ() 206 @*/ 207 PetscErrorCode MatKAIJSetT(Mat A,PetscInt p,PetscInt q,const PetscScalar T[]) 208 { 209 PetscErrorCode ierr; 210 PetscInt i,j; 211 Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data; 212 PetscBool isTI = PETSC_FALSE; 213 214 PetscFunctionBegin; 215 /* check if T is an identity matrix */ 216 if (T && (p == q)) { 217 isTI = PETSC_TRUE; 218 for (i=0; i<p; i++) { 219 for (j=0; j<q; j++) { 220 if (i == j) { 221 /* diagonal term must be 1 */ 222 if (T[i+j*p] != 1.0) isTI = PETSC_FALSE; 223 } else { 224 /* off-diagonal term must be 0 */ 225 if (T[i+j*p] != 0.0) isTI = PETSC_FALSE; 226 } 227 } 228 } 229 } 230 a->isTI = isTI; 231 232 if (a->T) { 233 ierr = PetscFree(a->T);CHKERRQ(ierr); 234 } 235 if (T && (!isTI)) { 236 ierr = PetscMalloc(p*q*sizeof(PetscScalar),&a->T);CHKERRQ(ierr); 237 ierr = PetscMemcpy(a->T,T,p*q*sizeof(PetscScalar));CHKERRQ(ierr); 238 } else a->T = NULL; 239 240 a->p = p; 241 a->q = q; 242 PetscFunctionReturn(0); 243 } 244 245 PetscErrorCode MatDestroy_SeqKAIJ(Mat A) 246 { 247 PetscErrorCode ierr; 248 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 249 250 PetscFunctionBegin; 251 ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr); 252 ierr = PetscFree3(b->S,b->T,b->ibdiag);CHKERRQ(ierr); 253 ierr = PetscFree5(b->sor.w,b->sor.y,b->sor.work,b->sor.t,b->sor.arr);CHKERRQ(ierr); 254 ierr = PetscFree(A->data);CHKERRQ(ierr); 255 PetscFunctionReturn(0); 256 } 257 258 PetscErrorCode MatSetUp_KAIJ(Mat A) 259 { 260 PetscErrorCode ierr; 261 PetscInt n; 262 PetscMPIInt size; 263 Mat_SeqKAIJ *seqkaij = (Mat_SeqKAIJ*)A->data; 264 265 PetscFunctionBegin; 266 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 267 if (size == 1) { 268 ierr = MatSetSizes(A,seqkaij->p*seqkaij->AIJ->rmap->n,seqkaij->q*seqkaij->AIJ->cmap->n,seqkaij->p*seqkaij->AIJ->rmap->N,seqkaij->q*seqkaij->AIJ->cmap->N);CHKERRQ(ierr); 269 ierr = PetscLayoutSetBlockSize(A->rmap,seqkaij->p);CHKERRQ(ierr); 270 ierr = PetscLayoutSetBlockSize(A->cmap,seqkaij->q);CHKERRQ(ierr); 271 ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr); 272 ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr); 273 ierr = PetscObjectReference((PetscObject)seqkaij->AIJ);CHKERRQ(ierr); 274 } else { 275 Mat_MPIKAIJ *a; 276 Mat_MPIAIJ *mpiaij; 277 IS from,to; 278 Vec gvec; 279 PetscScalar *T; 280 PetscInt i,j; 281 282 a = (Mat_MPIKAIJ*)A->data; 283 mpiaij = (Mat_MPIAIJ*)a->A->data; 284 ierr = MatSetSizes(A,a->p*a->A->rmap->n,a->q*a->A->cmap->n,a->p*a->A->rmap->N,a->q*a->A->cmap->N);CHKERRQ(ierr); 285 ierr = PetscLayoutSetBlockSize(A->rmap,seqkaij->p);CHKERRQ(ierr); 286 ierr = PetscLayoutSetBlockSize(A->cmap,seqkaij->q);CHKERRQ(ierr); 287 ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr); 288 ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr); 289 ierr = PetscObjectReference((PetscObject)a->A);CHKERRQ(ierr); 290 291 if (a->isTI) { 292 /* If the transformation matrix associated with the parallel matrix A is the identity matrix, then a->T will be NULL. 293 * In this case, if we pass a->T directly to the MatCreateKAIJ() calls to create the sequential submatrices, the routine will 294 * not be able to tell that transformation matrix should be set to the identity; thus we create a temporary identity matrix 295 * to pass in. */ 296 ierr = PetscMalloc(a->p*a->q*sizeof(PetscScalar),&T);CHKERRQ(ierr); 297 for (i=0; i<a->p; i++) { 298 for (j=0; j<a->q; j++) { 299 if (i==j) T[i+j*a->p] = 1.0; 300 else T[i+j*a->p] = 0.0; 301 } 302 } 303 } else T = a->T; 304 ierr = MatCreateKAIJ(mpiaij->A,a->p,a->q,a->S,T,&a->AIJ);CHKERRQ(ierr); 305 ierr = MatCreateKAIJ(mpiaij->B,a->p,a->q,NULL,T,&a->OAIJ);CHKERRQ(ierr); 306 if (a->isTI) { 307 ierr = PetscFree(T);CHKERRQ(ierr); 308 } 309 310 ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr); 311 ierr = VecCreate(PETSC_COMM_SELF,&a->w);CHKERRQ(ierr); 312 ierr = VecSetSizes(a->w,n*a->q,n*a->q);CHKERRQ(ierr); 313 ierr = VecSetBlockSize(a->w,a->q);CHKERRQ(ierr); 314 ierr = VecSetType(a->w,VECSEQ);CHKERRQ(ierr); 315 316 /* create two temporary Index sets for build scatter gather */ 317 ierr = ISCreateBlock(PetscObjectComm((PetscObject)a->A),a->q,n,mpiaij->garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); 318 ierr = ISCreateStride(PETSC_COMM_SELF,n*a->q,0,1,&to);CHKERRQ(ierr); 319 320 /* create temporary global vector to generate scatter context */ 321 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)a->A),a->q,a->q*a->A->cmap->n,a->q*a->A->cmap->N,NULL,&gvec);CHKERRQ(ierr); 322 323 /* generate the scatter context */ 324 ierr = VecScatterCreate(gvec,from,a->w,to,&a->ctx);CHKERRQ(ierr); 325 326 ierr = ISDestroy(&from);CHKERRQ(ierr); 327 ierr = ISDestroy(&to);CHKERRQ(ierr); 328 ierr = VecDestroy(&gvec);CHKERRQ(ierr); 329 } 330 331 A->assembled = PETSC_TRUE; 332 PetscFunctionReturn(0); 333 } 334 335 PetscErrorCode MatView_SeqKAIJ(Mat A,PetscViewer viewer) 336 { 337 PetscErrorCode ierr; 338 Mat B; 339 340 PetscFunctionBegin; 341 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 342 ierr = MatView(B,viewer);CHKERRQ(ierr); 343 ierr = MatDestroy(&B);CHKERRQ(ierr); 344 PetscFunctionReturn(0); 345 } 346 347 PetscErrorCode MatView_MPIKAIJ(Mat A,PetscViewer viewer) 348 { 349 PetscErrorCode ierr; 350 Mat B; 351 352 PetscFunctionBegin; 353 ierr = MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 354 ierr = MatView(B,viewer);CHKERRQ(ierr); 355 ierr = MatDestroy(&B);CHKERRQ(ierr); 356 PetscFunctionReturn(0); 357 } 358 359 PetscErrorCode MatDestroy_MPIKAIJ(Mat A) 360 { 361 PetscErrorCode ierr; 362 Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data; 363 364 PetscFunctionBegin; 365 ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr); 366 ierr = MatDestroy(&b->OAIJ);CHKERRQ(ierr); 367 ierr = MatDestroy(&b->A);CHKERRQ(ierr); 368 ierr = VecScatterDestroy(&b->ctx);CHKERRQ(ierr); 369 ierr = VecDestroy(&b->w);CHKERRQ(ierr); 370 ierr = PetscFree3(b->S,b->T,b->ibdiag);CHKERRQ(ierr); 371 ierr = PetscFree(A->data);CHKERRQ(ierr); 372 PetscFunctionReturn(0); 373 } 374 375 376 /* --------------------------------------------------------------------------------------*/ 377 378 /* zz = yy + Axx */ 379 PetscErrorCode KAIJMultAdd_Seq(Mat A,Vec xx,Vec yy,Vec zz) 380 { 381 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 382 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 383 const PetscScalar *s = b->S, *t = b->T; 384 const PetscScalar *x,*v,*bx; 385 PetscScalar *y,*sums; 386 PetscErrorCode ierr; 387 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 388 PetscInt n,i,jrow,j,l,p=b->p,q=b->q,k; 389 390 PetscFunctionBegin; 391 if (!yy) { 392 ierr = VecSet(zz,0.0);CHKERRQ(ierr); 393 } else { 394 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 395 } 396 if ((!s) && (!t) && (!b->isTI)) PetscFunctionReturn(0); 397 398 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 399 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 400 idx = a->j; 401 v = a->a; 402 ii = a->i; 403 404 if (b->isTI) { 405 for (i=0; i<m; i++) { 406 jrow = ii[i]; 407 n = ii[i+1] - jrow; 408 sums = y + p*i; 409 for (j=0; j<n; j++) { 410 for (k=0; k<p; k++) { 411 sums[k] += v[jrow+j]*x[q*idx[jrow+j]+k]; 412 } 413 } 414 } 415 } else if (t) { 416 for (i=0; i<m; i++) { 417 jrow = ii[i]; 418 n = ii[i+1] - jrow; 419 sums = y + p*i; 420 bx = x + q*i; 421 for (j=0; j<n; j++) { 422 for (k=0; k<p; k++) { 423 for (l=0; l<q; l++) { 424 sums[k] += v[jrow+j]*t[k+l*p]*x[q*idx[jrow+j]+l]; 425 } 426 } 427 } 428 } 429 } 430 if (s) { 431 for (i=0; i<m; i++) { 432 jrow = ii[i]; 433 n = ii[i+1] - jrow; 434 sums = y + p*i; 435 bx = x + q*i; 436 if (i < b->AIJ->cmap->n) { 437 for (j=0; j<q; j++) { 438 for (k=0; k<p; k++) { 439 sums[k] += s[k+j*p]*bx[j]; 440 } 441 } 442 } 443 } 444 } 445 446 ierr = PetscLogFlops((2.0*p*q-p)*m+2*p*a->nz);CHKERRQ(ierr); 447 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 448 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 449 PetscFunctionReturn(0); 450 } 451 452 PetscErrorCode MatMult_SeqKAIJ_N(Mat A,Vec xx,Vec yy) 453 { 454 PetscErrorCode ierr; 455 PetscFunctionBegin; 456 ierr = KAIJMultAdd_Seq(A,xx,PETSC_NULL,yy);CHKERRQ(ierr); 457 PetscFunctionReturn(0); 458 } 459 460 PetscErrorCode MatMultAdd_SeqKAIJ_N(Mat A,Vec xx,Vec yy,Vec zz) 461 { 462 PetscErrorCode ierr; 463 PetscFunctionBegin; 464 ierr = KAIJMultAdd_Seq(A,xx,yy,zz);CHKERRQ(ierr); 465 PetscFunctionReturn(0); 466 } 467 468 #include <petsc/private/kernels/blockinvert.h> 469 470 PetscErrorCode MatInvertBlockDiagonal_SeqKAIJ_N(Mat A,const PetscScalar **values) 471 { 472 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 473 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 474 const PetscScalar *S = b->S; 475 const PetscScalar *T = b->T; 476 const PetscScalar *v = a->a; 477 const PetscInt p = b->p, q = b->q, m = b->AIJ->rmap->n, *idx = a->j, *ii = a->i; 478 PetscErrorCode ierr; 479 PetscInt i,j,*v_pivots,dof,dof2; 480 PetscScalar *diag,aval,*v_work; 481 482 PetscFunctionBegin; 483 if (p != q) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Block size must be square to calculate inverse."); 484 if ((!S) && (!T) && (!b->isTI)) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Cannot invert a zero matrix."); 485 486 dof = p; 487 dof2 = dof*dof; 488 489 if (b->ibdiagvalid) { 490 if (values) *values = b->ibdiag; 491 PetscFunctionReturn(0); 492 } 493 if (!b->ibdiag) { 494 ierr = PetscMalloc(dof2*m*sizeof(PetscScalar),&b->ibdiag);CHKERRQ(ierr); 495 ierr = PetscLogObjectMemory((PetscObject)A,dof2*m*sizeof(PetscScalar));CHKERRQ(ierr); 496 } 497 if (values) *values = b->ibdiag; 498 diag = b->ibdiag; 499 500 ierr = PetscMalloc2(dof,&v_work,dof,&v_pivots);CHKERRQ(ierr); 501 for (i=0; i<m; i++) { 502 if (S) { 503 ierr = PetscMemcpy(diag,S,dof2*sizeof(PetscScalar));CHKERRQ(ierr); 504 } else { 505 ierr = PetscMemzero(diag,dof2*sizeof(PetscScalar));CHKERRQ(ierr); 506 } 507 if (b->isTI) { 508 aval = 0; 509 for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j]; 510 for (j=0; j<dof; j++) diag[j+dof*j] += aval; 511 } else if (T) { 512 aval = 0; 513 for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j]; 514 for (j=0; j<dof2; j++) diag[j] += aval*T[j]; 515 } 516 ierr = PetscKernel_A_gets_inverse_A(dof,diag,v_pivots,v_work,PETSC_FALSE,NULL);CHKERRQ(ierr); 517 diag += dof2; 518 } 519 ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr); 520 521 b->ibdiagvalid = PETSC_TRUE; 522 PetscFunctionReturn(0); 523 } 524 525 static PetscErrorCode MatGetDiagonalBlock_MPIKAIJ(Mat A,Mat *B) 526 { 527 Mat_MPIKAIJ *kaij = (Mat_MPIKAIJ*) A->data; 528 529 PetscFunctionBegin; 530 *B = kaij->AIJ; 531 PetscFunctionReturn(0); 532 } 533 534 PetscErrorCode MatSOR_SeqKAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 535 { 536 PetscErrorCode ierr; 537 Mat_SeqKAIJ *kaij = (Mat_SeqKAIJ*) A->data; 538 Mat_SeqAIJ *a = (Mat_SeqAIJ*)kaij->AIJ->data; 539 const PetscScalar *aa = a->a, *T = kaij->T, *v; 540 const PetscInt m = kaij->AIJ->rmap->n, *ai=a->i, *aj=a->j, p = kaij->p, q = kaij->q, *diag, *vi; 541 const PetscScalar *b, *xb, *idiag; 542 PetscScalar *x, *work, *workt, *w, *y, *arr, *t, *arrt; 543 PetscInt i, j, k, i2, bs, bs2, nz; 544 545 PetscFunctionBegin; 546 its = its*lits; 547 if (flag & SOR_EISENSTAT) SETERRQ (PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 548 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 549 if (fshift) SETERRQ (PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 550 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 551 if (p != q) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSOR for KAIJ: Sorry, no support for non-square dense blocks"); 552 else {bs = p; bs2 = bs*bs; } 553 554 if (!m) PetscFunctionReturn(0); 555 556 if (!kaij->ibdiagvalid) { ierr = MatInvertBlockDiagonal_SeqKAIJ_N(A,NULL);CHKERRQ(ierr); } 557 idiag = kaij->ibdiag; 558 diag = a->diag; 559 560 if (!kaij->sor.setup) { 561 ierr = PetscMalloc5(bs,&kaij->sor.w,bs,&kaij->sor.y,m*bs,&kaij->sor.work,m*bs,&kaij->sor.t,m*bs2,&kaij->sor.arr);CHKERRQ(ierr); 562 kaij->sor.setup = PETSC_TRUE; 563 } 564 y = kaij->sor.y; 565 w = kaij->sor.w; 566 work = kaij->sor.work; 567 t = kaij->sor.t; 568 arr = kaij->sor.arr; 569 570 ierr = VecGetArray(xx,&x); CHKERRQ(ierr); 571 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 572 573 if (flag & SOR_ZERO_INITIAL_GUESS) { 574 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 575 PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x); /* x[0:bs] <- D^{-1} b[0:bs] */ 576 ierr = PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr); 577 i2 = bs; 578 idiag += bs2; 579 for (i=1; i<m; i++) { 580 v = aa + ai[i]; 581 vi = aj + ai[i]; 582 nz = diag[i] - ai[i]; 583 584 if (T) { /* b - T (Arow * x) */ 585 for (k=0; k<bs; k++) w[k] = 0; 586 for (j=0; j<nz; j++) { 587 for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k]; 588 } 589 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs,w,T,&t[i2]); 590 for (k=0; k<bs; k++) t[i2+k] += b[i2+k]; 591 } else if (kaij->isTI) { 592 for (k=0; k<bs; k++) t[i2+k] = b[i2+k]; 593 for (j=0; j<nz; j++) { 594 for (k=0; k<bs; k++) t[i2+k] -= v[j] * x[vi[j]*bs+k]; 595 } 596 } else { 597 for (k=0; k<bs; k++) t[i2+k] = b[i2+k]; 598 } 599 600 PetscKernel_w_gets_Ar_times_v(bs,bs,t+i2,idiag,y); 601 for (j=0; j<bs; j++) x[i2+j] = omega * y[j]; 602 603 idiag += bs2; 604 i2 += bs; 605 } 606 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 607 ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr); 608 xb = t; 609 } else xb = b; 610 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 611 idiag = kaij->ibdiag+bs2*(m-1); 612 i2 = bs * (m-1); 613 ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 614 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 615 i2 -= bs; 616 idiag -= bs2; 617 for (i=m-2; i>=0; i--) { 618 v = aa + diag[i] + 1 ; 619 vi = aj + diag[i] + 1; 620 nz = ai[i+1] - diag[i] - 1; 621 622 if (T) { /* FIXME: This branch untested */ 623 ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 624 /* copy all rows of x that are needed into contiguous space */ 625 workt = work; 626 for (j=0; j<nz; j++) { 627 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 628 workt += bs; 629 } 630 arrt = arr; 631 for (j=0; j<nz; j++) { 632 ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 633 for (k=0; k<bs2; k++) arrt[k] *= v[j]; 634 arrt += bs2; 635 } 636 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 637 } else if (kaij->isTI) { 638 for (k=0; k<bs; k++) w[k] = t[i2+k]; 639 for (j=0; j<nz; j++) { 640 for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k]; 641 } 642 } 643 644 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); /* RHS incorrect for omega != 1.0 */ 645 for (j=0; j<bs; j++) x[i2+j] = (1.0-omega) * x[i2+j] + omega * y[j]; 646 647 idiag -= bs2; 648 i2 -= bs; 649 } 650 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 651 } 652 its--; 653 } 654 while (its--) { /* FIXME: This branch not updated */ 655 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 656 i2 = 0; 657 idiag = kaij->ibdiag; 658 for (i=0; i<m; i++) { 659 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 660 661 v = aa + ai[i]; 662 vi = aj + ai[i]; 663 nz = diag[i] - ai[i]; 664 workt = work; 665 for (j=0; j<nz; j++) { 666 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 667 workt += bs; 668 } 669 arrt = arr; 670 if (T) { 671 for (j=0; j<nz; j++) { 672 ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 673 for (k=0; k<bs2; k++) arrt[k] *= v[j]; 674 arrt += bs2; 675 } 676 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 677 } else if (kaij->isTI) { 678 for (j=0; j<nz; j++) { 679 ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 680 for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 681 arrt += bs2; 682 } 683 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 684 } 685 ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr); 686 687 v = aa + diag[i] + 1; 688 vi = aj + diag[i] + 1; 689 nz = ai[i+1] - diag[i] - 1; 690 workt = work; 691 for (j=0; j<nz; j++) { 692 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 693 workt += bs; 694 } 695 arrt = arr; 696 if (T) { 697 for (j=0; j<nz; j++) { 698 ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 699 for (k=0; k<bs2; k++) arrt[k] *= v[j]; 700 arrt += bs2; 701 } 702 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 703 } else if (kaij->isTI) { 704 for (j=0; j<nz; j++) { 705 ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 706 for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 707 arrt += bs2; 708 } 709 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 710 } 711 712 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); 713 for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j); 714 715 idiag += bs2; 716 i2 += bs; 717 } 718 xb = t; 719 } 720 else xb = b; 721 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 722 idiag = kaij->ibdiag+bs2*(m-1); 723 i2 = bs * (m-1); 724 if (xb == b) { 725 for (i=m-1; i>=0; i--) { 726 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 727 728 v = aa + ai[i]; 729 vi = aj + ai[i]; 730 nz = diag[i] - ai[i]; 731 workt = work; 732 for (j=0; j<nz; j++) { 733 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 734 workt += bs; 735 } 736 arrt = arr; 737 if (T) { 738 for (j=0; j<nz; j++) { 739 ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 740 for (k=0; k<bs2; k++) arrt[k] *= v[j]; 741 arrt += bs2; 742 } 743 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 744 } else if (kaij->isTI) { 745 for (j=0; j<nz; j++) { 746 ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 747 for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 748 arrt += bs2; 749 } 750 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 751 } 752 753 v = aa + diag[i] + 1; 754 vi = aj + diag[i] + 1; 755 nz = ai[i+1] - diag[i] - 1; 756 workt = work; 757 for (j=0; j<nz; j++) { 758 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 759 workt += bs; 760 } 761 arrt = arr; 762 if (T) { 763 for (j=0; j<nz; j++) { 764 ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 765 for (k=0; k<bs2; k++) arrt[k] *= v[j]; 766 arrt += bs2; 767 } 768 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 769 } else if (kaij->isTI) { 770 for (j=0; j<nz; j++) { 771 ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 772 for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 773 arrt += bs2; 774 } 775 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 776 } 777 778 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); 779 for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j); 780 } 781 } else { 782 for (i=m-1; i>=0; i--) { 783 ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 784 v = aa + diag[i] + 1; 785 vi = aj + diag[i] + 1; 786 nz = ai[i+1] - diag[i] - 1; 787 workt = work; 788 for (j=0; j<nz; j++) { 789 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 790 workt += bs; 791 } 792 arrt = arr; 793 if (T) { 794 for (j=0; j<nz; j++) { 795 ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 796 for (k=0; k<bs2; k++) arrt[k] *= v[j]; 797 arrt += bs2; 798 } 799 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 800 } else if (kaij->isTI) { 801 for (j=0; j<nz; j++) { 802 ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 803 for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 804 arrt += bs2; 805 } 806 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 807 } 808 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); 809 for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j); 810 } 811 idiag -= bs2; 812 i2 -= bs; 813 } 814 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 815 } 816 } 817 818 ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr); 819 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 820 PetscFunctionReturn(0); 821 } 822 823 /*===================================================================================*/ 824 825 PetscErrorCode KAIJMultAdd_MPI(Mat A,Vec xx,Vec yy,Vec zz) 826 { 827 Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data; 828 PetscErrorCode ierr; 829 830 PetscFunctionBegin; 831 if (!yy) { 832 ierr = VecSet(zz,0.0);CHKERRQ(ierr); 833 } else { 834 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 835 } 836 /* start the scatter */ 837 ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 838 ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,zz,zz);CHKERRQ(ierr); 839 ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 840 ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr); 841 PetscFunctionReturn(0); 842 } 843 844 PetscErrorCode MatMult_MPIKAIJ_dof(Mat A,Vec xx,Vec yy) 845 { 846 PetscErrorCode ierr; 847 PetscFunctionBegin; 848 ierr = KAIJMultAdd_MPI(A,xx,PETSC_NULL,yy);CHKERRQ(ierr); 849 PetscFunctionReturn(0); 850 } 851 852 PetscErrorCode MatMultAdd_MPIKAIJ_dof(Mat A,Vec xx,Vec yy, Vec zz) 853 { 854 PetscErrorCode ierr; 855 PetscFunctionBegin; 856 ierr = KAIJMultAdd_MPI(A,xx,yy,zz);CHKERRQ(ierr); 857 PetscFunctionReturn(0); 858 } 859 860 PetscErrorCode MatInvertBlockDiagonal_MPIKAIJ_dof(Mat A,const PetscScalar **values) 861 { 862 Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data; 863 PetscErrorCode ierr; 864 865 PetscFunctionBegin; 866 ierr = (*b->AIJ->ops->invertblockdiagonal)(b->AIJ,values);CHKERRQ(ierr); 867 PetscFunctionReturn(0); 868 } 869 870 /* ----------------------------------------------------------------*/ 871 872 PetscErrorCode MatGetRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values) 873 { 874 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*) A->data; 875 PetscErrorCode ierr,diag; 876 PetscInt nzaij,nz,*colsaij,*idx,i,j,p=b->p,q=b->q,r=row/p,s=row%p,c; 877 PetscScalar *vaij,*v,*S=b->S,*T=b->T; 878 879 PetscFunctionBegin; 880 if (b->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 881 b->getrowactive = PETSC_TRUE; 882 if (row < 0 || row >= A->rmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range",row); 883 884 if ((!S) && (!T) && (!b->isTI)) { 885 if (ncols) *ncols = 0; 886 if (cols) *cols = NULL; 887 if (values) *values = NULL; 888 PetscFunctionReturn(0); 889 } 890 891 if (T || b->isTI) { 892 ierr = MatGetRow_SeqAIJ(b->AIJ,r,&nzaij,&colsaij,&vaij);CHKERRQ(ierr); 893 diag = PETSC_FALSE; 894 c = nzaij; 895 for (i=0; i<nzaij; i++) { 896 /* check if this row contains a diagonal entry */ 897 if (colsaij[i] == r) { 898 diag = PETSC_TRUE; 899 c = i; 900 } 901 } 902 } else nzaij = c = 0; 903 904 /* calculate size of row */ 905 nz = 0; 906 if (S) nz += q; 907 if (T || b->isTI) nz += (diag && S ? (nzaij-1)*q : nzaij*q); 908 909 if (cols || values) { 910 ierr = PetscMalloc2(nz,&idx,nz,&v);CHKERRQ(ierr); 911 if (b->isTI) { 912 for (i=0; i<nzaij; i++) { 913 for (j=0; j<q; j++) { 914 idx[i*q+j] = colsaij[i]*q+j; 915 v[i*q+j] = (j==s ? vaij[i] : 0); 916 } 917 } 918 } else if (T) { 919 for (i=0; i<nzaij; i++) { 920 for (j=0; j<q; j++) { 921 idx[i*q+j] = colsaij[i]*q+j; 922 v[i*q+j] = vaij[i]*T[s+j*p]; 923 } 924 } 925 } 926 if (S) { 927 for (j=0; j<q; j++) { 928 idx[c*q+j] = r*q+j; 929 v[c*q+j] += S[s+j*p]; 930 } 931 } 932 } 933 934 if (ncols) *ncols = nz; 935 if (cols) *cols = idx; 936 if (values) *values = v; 937 938 PetscFunctionReturn(0); 939 } 940 941 PetscErrorCode MatRestoreRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 942 { 943 PetscErrorCode ierr; 944 PetscFunctionBegin; 945 ierr = PetscFree2(*idx,*v);CHKERRQ(ierr); 946 ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE; 947 PetscFunctionReturn(0); 948 } 949 950 PetscErrorCode MatGetRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values) 951 { 952 Mat_MPIKAIJ *b = (Mat_MPIKAIJ*) A->data; 953 Mat MatAIJ = ((Mat_SeqKAIJ*)b->AIJ->data)->AIJ; 954 Mat MatOAIJ = ((Mat_SeqKAIJ*)b->OAIJ->data)->AIJ; 955 Mat AIJ = b->A; 956 PetscBool diag = PETSC_FALSE; 957 PetscErrorCode ierr; 958 const PetscInt rstart=A->rmap->rstart,rend=A->rmap->rend,p=b->p,q=b->q,*garray; 959 PetscInt nz,*idx,ncolsaij,ncolsoaij,*colsaij,*colsoaij,r,s,c,i,j,lrow; 960 PetscScalar *v,*vals,*ovals,*S=b->S,*T=b->T; 961 962 PetscFunctionBegin; 963 if (b->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 964 b->getrowactive = PETSC_TRUE; 965 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 966 lrow = row - rstart; 967 968 if ((!S) && (!T) && (!b->isTI)) { 969 if (ncols) *ncols = 0; 970 if (cols) *cols = NULL; 971 if (values) *values = NULL; 972 PetscFunctionReturn(0); 973 } 974 975 r = lrow/p; 976 s = lrow%p; 977 978 if (T || b->isTI) { 979 ierr = MatMPIAIJGetSeqAIJ(AIJ,NULL,NULL,&garray); 980 ierr = MatGetRow_SeqAIJ(MatAIJ,lrow/p,&ncolsaij,&colsaij,&vals);CHKERRQ(ierr); 981 ierr = MatGetRow_SeqAIJ(MatOAIJ,lrow/p,&ncolsoaij,&colsoaij,&ovals);CHKERRQ(ierr); 982 983 c = ncolsaij + ncolsoaij; 984 for (i=0; i<ncolsaij; i++) { 985 /* check if this row contains a diagonal entry */ 986 if (colsaij[i] == r) { 987 diag = PETSC_TRUE; 988 c = i; 989 } 990 } 991 } else c = 0; 992 993 /* calculate size of row */ 994 nz = 0; 995 if (S) nz += q; 996 if (T || b->isTI) nz += (diag && S ? (ncolsaij+ncolsoaij-1)*q : (ncolsaij+ncolsoaij)*q); 997 998 if (cols || values) { 999 ierr = PetscMalloc2(nz,&idx,nz,&v);CHKERRQ(ierr); 1000 if (b->isTI) { 1001 for (i=0; i<ncolsaij; i++) { 1002 for (j=0; j<q; j++) { 1003 idx[i*q+j] = (colsaij[i]+rstart/p)*q+j; 1004 v[i*q+j] = (j==s ? vals[i] : 0.0); 1005 } 1006 } 1007 for (i=0; i<ncolsoaij; i++) { 1008 for (j=0; j<q; j++) { 1009 idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j; 1010 v[(i+ncolsaij)*q+j] = (j==s ? ovals[i]: 0.0); 1011 } 1012 } 1013 } else if (T) { 1014 for (i=0; i<ncolsaij; i++) { 1015 for (j=0; j<q; j++) { 1016 idx[i*q+j] = (colsaij[i]+rstart/p)*q+j; 1017 v[i*q+j] = vals[i]*T[s+j*p]; 1018 } 1019 } 1020 for (i=0; i<ncolsoaij; i++) { 1021 for (j=0; j<q; j++) { 1022 idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j; 1023 v[(i+ncolsaij)*q+j] = ovals[i]*T[s+j*p]; 1024 } 1025 } 1026 } 1027 if (S) { 1028 for (j=0; j<q; j++) { 1029 idx[c*q+j] = (r+rstart/p)*q+j; 1030 v[c*q+j] += S[s+j*p]; 1031 } 1032 } 1033 } 1034 1035 if (ncols) *ncols = nz; 1036 if (cols) *cols = idx; 1037 if (values) *values = v; 1038 PetscFunctionReturn(0); 1039 } 1040 1041 PetscErrorCode MatRestoreRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1042 { 1043 PetscErrorCode ierr; 1044 PetscFunctionBegin; 1045 ierr = PetscFree2(*idx,*v);CHKERRQ(ierr); 1046 ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE; 1047 PetscFunctionReturn(0); 1048 } 1049 1050 PetscErrorCode MatCreateSubMatrix_KAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat) 1051 { 1052 PetscErrorCode ierr; 1053 Mat A; 1054 1055 PetscFunctionBegin; 1056 ierr = MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr); 1057 ierr = MatCreateSubMatrix(A,isrow,iscol,cll,newmat);CHKERRQ(ierr); 1058 ierr = MatDestroy(&A);CHKERRQ(ierr); 1059 PetscFunctionReturn(0); 1060 } 1061 1062 /* ---------------------------------------------------------------------------------- */ 1063 /*@C 1064 MatCreateKAIJ - Creates a matrix type to be used for matrices of the following form: 1065 1066 [I \otimes S + A \otimes T] 1067 1068 where 1069 S is a dense (p \times q) matrix 1070 T is a dense (p \times q) matrix 1071 A is an AIJ (n \times n) matrix 1072 I is the identity matrix 1073 The resulting matrix is (np \times nq) 1074 1075 The matrix type is based on MATSEQAIJ for a sequential matrix A, and MATMPIAIJ for a distributed matrix A. 1076 S is always stored independently on all processes as a PetscScalar array in column-major format. 1077 1078 Collective 1079 1080 Input Parameters: 1081 + A - the AIJ matrix 1082 . S - the S matrix, stored as a PetscScalar array (column-major) 1083 . T - the T matrix, stored as a PetscScalar array (column-major) 1084 . p - number of rows in S and T 1085 - q - number of columns in S and T 1086 1087 Output Parameter: 1088 . kaij - the new KAIJ matrix 1089 1090 Operations provided: 1091 + MatMult 1092 . MatMultAdd 1093 . MatInvertBlockDiagonal 1094 - MatView 1095 1096 Level: advanced 1097 1098 .seealso: MatKAIJSetAIJ(), MatKAIJSetS(), MatKAIJSetT(), MatKAIJGetAIJ(), MatKAIJGetS(), MatKAIJGetT(), MATKAIJ 1099 @*/ 1100 PetscErrorCode MatCreateKAIJ(Mat A,PetscInt p,PetscInt q,const PetscScalar S[],const PetscScalar T[],Mat *kaij) 1101 { 1102 PetscErrorCode ierr; 1103 PetscMPIInt size; 1104 1105 PetscFunctionBegin; 1106 ierr = MatCreate(PetscObjectComm((PetscObject)A),kaij);CHKERRQ(ierr); 1107 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 1108 if (size == 1) { 1109 ierr = MatSetType(*kaij,MATSEQKAIJ);CHKERRQ(ierr); 1110 } else { 1111 ierr = MatSetType(*kaij,MATMPIKAIJ);CHKERRQ(ierr); 1112 } 1113 ierr = MatKAIJSetAIJ(*kaij,A);CHKERRQ(ierr); 1114 ierr = MatKAIJSetS(*kaij,p,q,S);CHKERRQ(ierr); 1115 ierr = MatKAIJSetT(*kaij,p,q,T);CHKERRQ(ierr); 1116 ierr = MatSetUp(*kaij); 1117 PetscFunctionReturn(0); 1118 } 1119 1120 /*MC 1121 MATKAIJ - MATKAIJ = "kaij" - A matrix type to be used to evaluate matrices of the following form: 1122 1123 [I \otimes S + A \otimes T] 1124 1125 where 1126 S is a dense (p \times q) matrix 1127 T is a dense (p \times q) matrix 1128 A is an AIJ (n \times n) matrix 1129 I is the identity matrix 1130 The resulting matrix is (np \times nq) 1131 1132 The matrix type is based on MATSEQAIJ for a sequential matrix A, and MATMPIAIJ for a distributed matrix A. 1133 S and T are always stored independently on all processes as a PetscScalar array in column-major format. 1134 1135 Operations provided: 1136 + MatMult 1137 . MatMultAdd 1138 . MatInvertBlockDiagonal 1139 - MatView 1140 1141 Level: advanced 1142 1143 .seealso: MatKAIJSetAIJ(), MatKAIJSetS(), MatKAIJSetT(), MatKAIJGetAIJ(), MatKAIJGetS(), MatKAIJGetT(), MatCreateKAIJ() 1144 M*/ 1145 1146 PETSC_EXTERN PetscErrorCode MatCreate_KAIJ(Mat A) 1147 { 1148 PetscErrorCode ierr; 1149 Mat_MPIKAIJ *b; 1150 PetscMPIInt size; 1151 1152 PetscFunctionBegin; 1153 ierr = PetscNewLog(A,&b);CHKERRQ(ierr); 1154 A->data = (void*)b; 1155 1156 ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1157 1158 A->ops->setup = MatSetUp_KAIJ; 1159 1160 b->w = 0; 1161 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 1162 if (size == 1) { 1163 ierr = PetscObjectChangeTypeName((PetscObject)A,MATSEQKAIJ);CHKERRQ(ierr); 1164 1165 A->ops->setup = MatSetUp_KAIJ; 1166 A->ops->destroy = MatDestroy_SeqKAIJ; 1167 A->ops->view = MatView_SeqKAIJ; 1168 A->ops->mult = MatMult_SeqKAIJ_N; 1169 A->ops->multadd = MatMultAdd_SeqKAIJ_N; 1170 A->ops->invertblockdiagonal = MatInvertBlockDiagonal_SeqKAIJ_N; 1171 A->ops->getrow = MatGetRow_SeqKAIJ; 1172 A->ops->restorerow = MatRestoreRow_SeqKAIJ; 1173 A->ops->sor = MatSOR_SeqKAIJ; 1174 } else { 1175 ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIKAIJ);CHKERRQ(ierr); 1176 A->ops->setup = MatSetUp_KAIJ; 1177 A->ops->destroy = MatDestroy_MPIKAIJ; 1178 A->ops->view = MatView_MPIKAIJ; 1179 A->ops->mult = MatMult_MPIKAIJ_dof; 1180 A->ops->multadd = MatMultAdd_MPIKAIJ_dof; 1181 A->ops->invertblockdiagonal = MatInvertBlockDiagonal_MPIKAIJ_dof; 1182 A->ops->getrow = MatGetRow_MPIKAIJ; 1183 A->ops->restorerow = MatRestoreRow_MPIKAIJ; 1184 ierr = PetscObjectComposeFunction((PetscObject)A,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIKAIJ);CHKERRQ(ierr); 1185 } 1186 A->ops->createsubmatrix = MatCreateSubMatrix_KAIJ; 1187 PetscFunctionReturn(0); 1188 } 1189