1 2 /* 3 Defines the basic matrix operations for the KAIJ matrix storage format. 4 This format is used to evaluate matrices of the form: 5 6 [I \otimes S + A \otimes T] 7 8 where 9 S is a dense (p \times q) matrix 10 T is a dense (p \times q) matrix 11 A is an AIJ (n \times n) matrix 12 I is the identity matrix 13 14 The resulting matrix is (np \times nq) 15 16 We provide: 17 MatMult() 18 MatMultAdd() 19 MatInvertBlockDiagonal() 20 and 21 MatCreateKAIJ(Mat,PetscInt,PetscInt,const PetscScalar[],const PetscScalar[],Mat*) 22 23 This single directory handles both the sequential and parallel codes 24 */ 25 26 #include <../src/mat/impls/kaij/kaij.h> /*I "petscmat.h" I*/ 27 #include <../src/mat/utils/freespace.h> 28 #include <petsc/private/vecimpl.h> 29 30 /*@C 31 MatKAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the KAIJ matrix 32 33 Not Collective, but if the KAIJ matrix is parallel, the AIJ matrix is also parallel 34 35 Input Parameter: 36 . A - the KAIJ matrix 37 38 Output Parameter: 39 . B - the AIJ matrix 40 41 Level: advanced 42 43 Notes: The reference count on the AIJ matrix is not increased so you should not destroy it. 44 45 .seealso: MatCreateKAIJ() 46 @*/ 47 PetscErrorCode MatKAIJGetAIJ(Mat A,Mat *B) 48 { 49 PetscErrorCode ierr; 50 PetscBool ismpikaij,isseqkaij; 51 52 PetscFunctionBegin; 53 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIKAIJ,&ismpikaij);CHKERRQ(ierr); 54 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQKAIJ,&isseqkaij);CHKERRQ(ierr); 55 if (ismpikaij) { 56 Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data; 57 58 *B = b->A; 59 } else if (isseqkaij) { 60 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 61 62 *B = b->AIJ; 63 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix passed in is not of type KAIJ"); 64 PetscFunctionReturn(0); 65 } 66 67 /*@C 68 MatKAIJGetS - Get the S matrix describing the shift action of the KAIJ matrix 69 70 Not Collective; the entire S is stored and returned independently on all processes. 71 72 Input Parameter: 73 . A - the KAIJ matrix 74 75 Output Parameter: 76 . S - the S matrix, in form of a scalar array in column-major format 77 78 Notes: 79 The dimensions of the output array can be determined by calling MatGetBlockSizes() on the KAIJ matrix. 80 The row block and column block sizes returned will correspond to the number of rows and columns, respectively, in S. 81 82 Level: advanced 83 84 .seealso: MatCreateKAIJ(), MatGetBlockSizes() 85 @*/ 86 PetscErrorCode MatKAIJGetS(Mat A,const PetscScalar **S) 87 { 88 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 89 PetscFunctionBegin; 90 *S = b->S; 91 PetscFunctionReturn(0); 92 } 93 94 /*@C 95 MatKAIJGetT - Get the transformation matrix T associated with the KAIJ matrix 96 97 Not Collective; the entire T is stored and returned independently on all processes 98 99 Input Parameter: 100 . A - the KAIJ matrix 101 102 Output Parameter: 103 . T - the T matrix, in form of a scalar array in column-major format 104 105 Notes: 106 The dimensions of the output array can be determined by calling MatGetBlockSizes() on the KAIJ matrix. 107 The row block and column block sizes returned will correspond to the number of rows and columns, respectively, in T. 108 109 Level: advanced 110 111 .seealso: MatCreateKAIJ(), MatGetBlockSizes() 112 @*/ 113 PetscErrorCode MatKAIJGetT(Mat A,const PetscScalar **T) 114 { 115 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 116 PetscFunctionBegin; 117 *T = b->T; 118 PetscFunctionReturn(0); 119 } 120 121 /*@ 122 MatKAIJSetAIJ - Set the AIJ matrix describing the blockwise action of the KAIJ matrix 123 124 Logically Collective; if the AIJ matrix is parallel, the KAIJ matrix is also parallel 125 126 Input Parameters: 127 + A - the KAIJ matrix 128 - B - the AIJ matrix 129 130 Level: advanced 131 132 .seealso: MatKAIJGetAIJ(), MatKAIJSetS(), MatKAIJSetT() 133 @*/ 134 PetscErrorCode MatKAIJSetAIJ(Mat A,Mat B) 135 { 136 PetscErrorCode ierr; 137 PetscMPIInt size; 138 139 PetscFunctionBegin; 140 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 141 if (size == 1) { 142 Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data; 143 a->AIJ = B; 144 } else { 145 Mat_MPIKAIJ *a = (Mat_MPIKAIJ*)A->data; 146 a->A = B; 147 } 148 PetscFunctionReturn(0); 149 } 150 151 /*@C 152 MatKAIJSetS - Set the S matrix describing the shift action of the KAIJ matrix 153 154 Logically Collective; the entire S is stored independently on all processes. 155 156 Input Parameters: 157 + A - the KAIJ matrix 158 . p - the number of rows in S 159 . q - the number of columns in S 160 - S - the S matrix, in form of a scalar array in column-major format 161 162 Notes: The dimensions p and q must match those of the transformation matrix T associated with the KAIJ matrix. 163 164 Level: Advanced 165 166 .seealso: MatKAIJGetS(), MatKAIJSetT(), MatKAIJSetAIJ() 167 @*/ 168 PetscErrorCode MatKAIJSetS(Mat A,PetscInt p,PetscInt q,const PetscScalar S[]) 169 { 170 PetscErrorCode ierr; 171 Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data; 172 173 PetscFunctionBegin; 174 ierr = PetscFree(a->S);CHKERRQ(ierr); 175 if (S) { 176 ierr = PetscMalloc1(p*q*sizeof(PetscScalar),&a->S);CHKERRQ(ierr); 177 ierr = PetscMemcpy(a->S,S,p*q*sizeof(PetscScalar));CHKERRQ(ierr); 178 } else a->S = NULL; 179 180 a->p = p; 181 a->q = q; 182 PetscFunctionReturn(0); 183 } 184 185 /*@C 186 MatKAIJSetT - Set the transformation matrix T associated with the KAIJ matrix 187 188 Logically Collective; the entire T is stored independently on all processes. 189 190 Input Parameters: 191 + A - the KAIJ matrix 192 . p - the number of rows in S 193 . q - the number of columns in S 194 - T - the T matrix, in form of a scalar array in column-major format 195 196 Notes: The dimensions p and q must match those of the shift matrix S associated with the KAIJ matrix. 197 198 Level: Advanced 199 200 .seealso: MatKAIJGetT(), MatKAIJSetS(), MatKAIJSetAIJ() 201 @*/ 202 PetscErrorCode MatKAIJSetT(Mat A,PetscInt p,PetscInt q,const PetscScalar T[]) 203 { 204 PetscErrorCode ierr; 205 PetscInt i,j; 206 Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data; 207 PetscBool isTI = PETSC_FALSE; 208 209 PetscFunctionBegin; 210 /* check if T is an identity matrix */ 211 if (T && (p == q)) { 212 isTI = PETSC_TRUE; 213 for (i=0; i<p; i++) { 214 for (j=0; j<q; j++) { 215 if (i == j) { 216 /* diagonal term must be 1 */ 217 if (T[i+j*p] != 1.0) isTI = PETSC_FALSE; 218 } else { 219 /* off-diagonal term must be 0 */ 220 if (T[i+j*p] != 0.0) isTI = PETSC_FALSE; 221 } 222 } 223 } 224 } 225 a->isTI = isTI; 226 227 ierr = PetscFree(a->T);CHKERRQ(ierr); 228 if (T && (!isTI)) { 229 ierr = PetscMalloc1(p*q*sizeof(PetscScalar),&a->T);CHKERRQ(ierr); 230 ierr = PetscMemcpy(a->T,T,p*q*sizeof(PetscScalar));CHKERRQ(ierr); 231 } else a->T = NULL; 232 233 a->p = p; 234 a->q = q; 235 PetscFunctionReturn(0); 236 } 237 238 PetscErrorCode MatDestroy_SeqKAIJ(Mat A) 239 { 240 PetscErrorCode ierr; 241 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 242 243 PetscFunctionBegin; 244 ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr); 245 ierr = PetscFree(b->S);CHKERRQ(ierr); 246 ierr = PetscFree(b->T);CHKERRQ(ierr); 247 ierr = PetscFree(b->ibdiag);CHKERRQ(ierr); 248 ierr = PetscFree5(b->sor.w,b->sor.y,b->sor.work,b->sor.t,b->sor.arr);CHKERRQ(ierr); 249 ierr = PetscFree(A->data);CHKERRQ(ierr); 250 PetscFunctionReturn(0); 251 } 252 253 PetscErrorCode MatSetUp_KAIJ(Mat A) 254 { 255 PetscErrorCode ierr; 256 PetscInt n; 257 PetscMPIInt size; 258 Mat_SeqKAIJ *seqkaij = (Mat_SeqKAIJ*)A->data; 259 260 PetscFunctionBegin; 261 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 262 if (size == 1) { 263 ierr = MatSetSizes(A,seqkaij->p*seqkaij->AIJ->rmap->n,seqkaij->q*seqkaij->AIJ->cmap->n,seqkaij->p*seqkaij->AIJ->rmap->N,seqkaij->q*seqkaij->AIJ->cmap->N);CHKERRQ(ierr); 264 ierr = PetscLayoutSetBlockSize(A->rmap,seqkaij->p);CHKERRQ(ierr); 265 ierr = PetscLayoutSetBlockSize(A->cmap,seqkaij->q);CHKERRQ(ierr); 266 ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr); 267 ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr); 268 ierr = PetscObjectReference((PetscObject)seqkaij->AIJ);CHKERRQ(ierr); 269 } else { 270 Mat_MPIKAIJ *a; 271 Mat_MPIAIJ *mpiaij; 272 IS from,to; 273 Vec gvec; 274 PetscScalar *T; 275 PetscInt i,j; 276 277 a = (Mat_MPIKAIJ*)A->data; 278 mpiaij = (Mat_MPIAIJ*)a->A->data; 279 ierr = MatSetSizes(A,a->p*a->A->rmap->n,a->q*a->A->cmap->n,a->p*a->A->rmap->N,a->q*a->A->cmap->N);CHKERRQ(ierr); 280 ierr = PetscLayoutSetBlockSize(A->rmap,seqkaij->p);CHKERRQ(ierr); 281 ierr = PetscLayoutSetBlockSize(A->cmap,seqkaij->q);CHKERRQ(ierr); 282 ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr); 283 ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr); 284 ierr = PetscObjectReference((PetscObject)a->A);CHKERRQ(ierr); 285 286 if (a->isTI) { 287 /* If the transformation matrix associated with the parallel matrix A is the identity matrix, then a->T will be NULL. 288 * In this case, if we pass a->T directly to the MatCreateKAIJ() calls to create the sequential submatrices, the routine will 289 * not be able to tell that transformation matrix should be set to the identity; thus we create a temporary identity matrix 290 * to pass in. */ 291 ierr = PetscMalloc1(a->p*a->q*sizeof(PetscScalar),&T);CHKERRQ(ierr); 292 for (i=0; i<a->p; i++) { 293 for (j=0; j<a->q; j++) { 294 if (i==j) T[i+j*a->p] = 1.0; 295 else T[i+j*a->p] = 0.0; 296 } 297 } 298 } else T = a->T; 299 ierr = MatCreateKAIJ(mpiaij->A,a->p,a->q,a->S,T,&a->AIJ);CHKERRQ(ierr); 300 ierr = MatCreateKAIJ(mpiaij->B,a->p,a->q,NULL,T,&a->OAIJ);CHKERRQ(ierr); 301 ierr = PetscFree(T);CHKERRQ(ierr); 302 303 ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr); 304 ierr = VecCreate(PETSC_COMM_SELF,&a->w);CHKERRQ(ierr); 305 ierr = VecSetSizes(a->w,n*a->q,n*a->q);CHKERRQ(ierr); 306 ierr = VecSetBlockSize(a->w,a->q);CHKERRQ(ierr); 307 ierr = VecSetType(a->w,VECSEQ);CHKERRQ(ierr); 308 309 /* create two temporary Index sets for build scatter gather */ 310 ierr = ISCreateBlock(PetscObjectComm((PetscObject)a->A),a->q,n,mpiaij->garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); 311 ierr = ISCreateStride(PETSC_COMM_SELF,n*a->q,0,1,&to);CHKERRQ(ierr); 312 313 /* create temporary global vector to generate scatter context */ 314 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)a->A),a->q,a->q*a->A->cmap->n,a->q*a->A->cmap->N,NULL,&gvec);CHKERRQ(ierr); 315 316 /* generate the scatter context */ 317 ierr = VecScatterCreate(gvec,from,a->w,to,&a->ctx);CHKERRQ(ierr); 318 319 ierr = ISDestroy(&from);CHKERRQ(ierr); 320 ierr = ISDestroy(&to);CHKERRQ(ierr); 321 ierr = VecDestroy(&gvec);CHKERRQ(ierr); 322 } 323 324 A->assembled = PETSC_TRUE; 325 PetscFunctionReturn(0); 326 } 327 328 PetscErrorCode MatView_SeqKAIJ(Mat A,PetscViewer viewer) 329 { 330 PetscErrorCode ierr; 331 Mat B; 332 333 PetscFunctionBegin; 334 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 335 ierr = MatView(B,viewer);CHKERRQ(ierr); 336 ierr = MatDestroy(&B);CHKERRQ(ierr); 337 PetscFunctionReturn(0); 338 } 339 340 PetscErrorCode MatView_MPIKAIJ(Mat A,PetscViewer viewer) 341 { 342 PetscErrorCode ierr; 343 Mat B; 344 345 PetscFunctionBegin; 346 ierr = MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 347 ierr = MatView(B,viewer);CHKERRQ(ierr); 348 ierr = MatDestroy(&B);CHKERRQ(ierr); 349 PetscFunctionReturn(0); 350 } 351 352 PetscErrorCode MatDestroy_MPIKAIJ(Mat A) 353 { 354 PetscErrorCode ierr; 355 Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data; 356 357 PetscFunctionBegin; 358 ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr); 359 ierr = MatDestroy(&b->OAIJ);CHKERRQ(ierr); 360 ierr = MatDestroy(&b->A);CHKERRQ(ierr); 361 ierr = VecScatterDestroy(&b->ctx);CHKERRQ(ierr); 362 ierr = VecDestroy(&b->w);CHKERRQ(ierr); 363 ierr = PetscFree(b->S);CHKERRQ(ierr); 364 ierr = PetscFree(b->T);CHKERRQ(ierr); 365 ierr = PetscFree(b->ibdiag);CHKERRQ(ierr); 366 ierr = PetscFree(A->data);CHKERRQ(ierr); 367 PetscFunctionReturn(0); 368 } 369 370 /* --------------------------------------------------------------------------------------*/ 371 372 /* zz = yy + Axx */ 373 PetscErrorCode KAIJMultAdd_Seq(Mat A,Vec xx,Vec yy,Vec zz) 374 { 375 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 376 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 377 const PetscScalar *s = b->S, *t = b->T; 378 const PetscScalar *x,*v,*bx; 379 PetscScalar *y,*sums; 380 PetscErrorCode ierr; 381 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 382 PetscInt n,i,jrow,j,l,p=b->p,q=b->q,k; 383 384 PetscFunctionBegin; 385 if (!yy) { 386 ierr = VecSet(zz,0.0);CHKERRQ(ierr); 387 } else { 388 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 389 } 390 if ((!s) && (!t) && (!b->isTI)) PetscFunctionReturn(0); 391 392 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 393 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 394 idx = a->j; 395 v = a->a; 396 ii = a->i; 397 398 if (b->isTI) { 399 for (i=0; i<m; i++) { 400 jrow = ii[i]; 401 n = ii[i+1] - jrow; 402 sums = y + p*i; 403 for (j=0; j<n; j++) { 404 for (k=0; k<p; k++) { 405 sums[k] += v[jrow+j]*x[q*idx[jrow+j]+k]; 406 } 407 } 408 } 409 } else if (t) { 410 for (i=0; i<m; i++) { 411 jrow = ii[i]; 412 n = ii[i+1] - jrow; 413 sums = y + p*i; 414 bx = x + q*i; 415 for (j=0; j<n; j++) { 416 for (k=0; k<p; k++) { 417 for (l=0; l<q; l++) { 418 sums[k] += v[jrow+j]*t[k+l*p]*x[q*idx[jrow+j]+l]; 419 } 420 } 421 } 422 } 423 } 424 if (s) { 425 for (i=0; i<m; i++) { 426 jrow = ii[i]; 427 n = ii[i+1] - jrow; 428 sums = y + p*i; 429 bx = x + q*i; 430 if (i < b->AIJ->cmap->n) { 431 for (j=0; j<q; j++) { 432 for (k=0; k<p; k++) { 433 sums[k] += s[k+j*p]*bx[j]; 434 } 435 } 436 } 437 } 438 } 439 440 ierr = PetscLogFlops((2.0*p*q-p)*m+2*p*a->nz);CHKERRQ(ierr); 441 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 442 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 443 PetscFunctionReturn(0); 444 } 445 446 PetscErrorCode MatMult_SeqKAIJ_N(Mat A,Vec xx,Vec yy) 447 { 448 PetscErrorCode ierr; 449 PetscFunctionBegin; 450 ierr = KAIJMultAdd_Seq(A,xx,PETSC_NULL,yy);CHKERRQ(ierr); 451 PetscFunctionReturn(0); 452 } 453 454 PetscErrorCode MatMultAdd_SeqKAIJ_N(Mat A,Vec xx,Vec yy,Vec zz) 455 { 456 PetscErrorCode ierr; 457 PetscFunctionBegin; 458 ierr = KAIJMultAdd_Seq(A,xx,yy,zz);CHKERRQ(ierr); 459 PetscFunctionReturn(0); 460 } 461 462 #include <petsc/private/kernels/blockinvert.h> 463 464 PetscErrorCode MatInvertBlockDiagonal_SeqKAIJ_N(Mat A,const PetscScalar **values) 465 { 466 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 467 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 468 const PetscScalar *S = b->S; 469 const PetscScalar *T = b->T; 470 const PetscScalar *v = a->a; 471 const PetscInt p = b->p, q = b->q, m = b->AIJ->rmap->n, *idx = a->j, *ii = a->i; 472 PetscErrorCode ierr; 473 PetscInt i,j,*v_pivots,dof,dof2; 474 PetscScalar *diag,aval,*v_work; 475 476 PetscFunctionBegin; 477 if (p != q) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Block size must be square to calculate inverse."); 478 if ((!S) && (!T) && (!b->isTI)) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Cannot invert a zero matrix."); 479 480 dof = p; 481 dof2 = dof*dof; 482 483 if (b->ibdiagvalid) { 484 if (values) *values = b->ibdiag; 485 PetscFunctionReturn(0); 486 } 487 if (!b->ibdiag) { 488 ierr = PetscMalloc1(dof2*m*sizeof(PetscScalar),&b->ibdiag);CHKERRQ(ierr); 489 ierr = PetscLogObjectMemory((PetscObject)A,dof2*m*sizeof(PetscScalar));CHKERRQ(ierr); 490 } 491 if (values) *values = b->ibdiag; 492 diag = b->ibdiag; 493 494 ierr = PetscMalloc2(dof,&v_work,dof,&v_pivots);CHKERRQ(ierr); 495 for (i=0; i<m; i++) { 496 if (S) { 497 ierr = PetscMemcpy(diag,S,dof2*sizeof(PetscScalar));CHKERRQ(ierr); 498 } else { 499 ierr = PetscMemzero(diag,dof2*sizeof(PetscScalar));CHKERRQ(ierr); 500 } 501 if (b->isTI) { 502 aval = 0; 503 for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j]; 504 for (j=0; j<dof; j++) diag[j+dof*j] += aval; 505 } else if (T) { 506 aval = 0; 507 for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j]; 508 for (j=0; j<dof2; j++) diag[j] += aval*T[j]; 509 } 510 ierr = PetscKernel_A_gets_inverse_A(dof,diag,v_pivots,v_work,PETSC_FALSE,NULL);CHKERRQ(ierr); 511 diag += dof2; 512 } 513 ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr); 514 515 b->ibdiagvalid = PETSC_TRUE; 516 PetscFunctionReturn(0); 517 } 518 519 static PetscErrorCode MatGetDiagonalBlock_MPIKAIJ(Mat A,Mat *B) 520 { 521 Mat_MPIKAIJ *kaij = (Mat_MPIKAIJ*) A->data; 522 523 PetscFunctionBegin; 524 *B = kaij->AIJ; 525 PetscFunctionReturn(0); 526 } 527 528 PetscErrorCode MatSOR_SeqKAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 529 { 530 PetscErrorCode ierr; 531 Mat_SeqKAIJ *kaij = (Mat_SeqKAIJ*) A->data; 532 Mat_SeqAIJ *a = (Mat_SeqAIJ*)kaij->AIJ->data; 533 const PetscScalar *aa = a->a, *T = kaij->T, *v; 534 const PetscInt m = kaij->AIJ->rmap->n, *ai=a->i, *aj=a->j, p = kaij->p, q = kaij->q, *diag, *vi; 535 const PetscScalar *b, *xb, *idiag; 536 PetscScalar *x, *work, *workt, *w, *y, *arr, *t, *arrt; 537 PetscInt i, j, k, i2, bs, bs2, nz; 538 539 PetscFunctionBegin; 540 its = its*lits; 541 if (flag & SOR_EISENSTAT) SETERRQ (PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 542 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 543 if (fshift) SETERRQ (PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 544 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 545 if (p != q) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSOR for KAIJ: Sorry, no support for non-square dense blocks"); 546 else {bs = p; bs2 = bs*bs; } 547 548 if (!m) PetscFunctionReturn(0); 549 550 if (!kaij->ibdiagvalid) { ierr = MatInvertBlockDiagonal_SeqKAIJ_N(A,NULL);CHKERRQ(ierr); } 551 idiag = kaij->ibdiag; 552 diag = a->diag; 553 554 if (!kaij->sor.setup) { 555 ierr = PetscMalloc5(bs,&kaij->sor.w,bs,&kaij->sor.y,m*bs,&kaij->sor.work,m*bs,&kaij->sor.t,m*bs2,&kaij->sor.arr);CHKERRQ(ierr); 556 kaij->sor.setup = PETSC_TRUE; 557 } 558 y = kaij->sor.y; 559 w = kaij->sor.w; 560 work = kaij->sor.work; 561 t = kaij->sor.t; 562 arr = kaij->sor.arr; 563 564 ierr = VecGetArray(xx,&x); CHKERRQ(ierr); 565 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 566 567 if (flag & SOR_ZERO_INITIAL_GUESS) { 568 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 569 PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x); /* x[0:bs] <- D^{-1} b[0:bs] */ 570 ierr = PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr); 571 i2 = bs; 572 idiag += bs2; 573 for (i=1; i<m; i++) { 574 v = aa + ai[i]; 575 vi = aj + ai[i]; 576 nz = diag[i] - ai[i]; 577 578 if (T) { /* b - T (Arow * x) */ 579 for (k=0; k<bs; k++) w[k] = 0; 580 for (j=0; j<nz; j++) { 581 for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k]; 582 } 583 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs,w,T,&t[i2]); 584 for (k=0; k<bs; k++) t[i2+k] += b[i2+k]; 585 } else if (kaij->isTI) { 586 for (k=0; k<bs; k++) t[i2+k] = b[i2+k]; 587 for (j=0; j<nz; j++) { 588 for (k=0; k<bs; k++) t[i2+k] -= v[j] * x[vi[j]*bs+k]; 589 } 590 } else { 591 for (k=0; k<bs; k++) t[i2+k] = b[i2+k]; 592 } 593 594 PetscKernel_w_gets_Ar_times_v(bs,bs,t+i2,idiag,y); 595 for (j=0; j<bs; j++) x[i2+j] = omega * y[j]; 596 597 idiag += bs2; 598 i2 += bs; 599 } 600 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 601 ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr); 602 xb = t; 603 } else xb = b; 604 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 605 idiag = kaij->ibdiag+bs2*(m-1); 606 i2 = bs * (m-1); 607 ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 608 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 609 i2 -= bs; 610 idiag -= bs2; 611 for (i=m-2; i>=0; i--) { 612 v = aa + diag[i] + 1 ; 613 vi = aj + diag[i] + 1; 614 nz = ai[i+1] - diag[i] - 1; 615 616 if (T) { /* FIXME: This branch untested */ 617 ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 618 /* copy all rows of x that are needed into contiguous space */ 619 workt = work; 620 for (j=0; j<nz; j++) { 621 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 622 workt += bs; 623 } 624 arrt = arr; 625 for (j=0; j<nz; j++) { 626 ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 627 for (k=0; k<bs2; k++) arrt[k] *= v[j]; 628 arrt += bs2; 629 } 630 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 631 } else if (kaij->isTI) { 632 for (k=0; k<bs; k++) w[k] = t[i2+k]; 633 for (j=0; j<nz; j++) { 634 for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k]; 635 } 636 } 637 638 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); /* RHS incorrect for omega != 1.0 */ 639 for (j=0; j<bs; j++) x[i2+j] = (1.0-omega) * x[i2+j] + omega * y[j]; 640 641 idiag -= bs2; 642 i2 -= bs; 643 } 644 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 645 } 646 its--; 647 } 648 while (its--) { /* FIXME: This branch not updated */ 649 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 650 i2 = 0; 651 idiag = kaij->ibdiag; 652 for (i=0; i<m; i++) { 653 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 654 655 v = aa + ai[i]; 656 vi = aj + ai[i]; 657 nz = diag[i] - ai[i]; 658 workt = work; 659 for (j=0; j<nz; j++) { 660 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 661 workt += bs; 662 } 663 arrt = arr; 664 if (T) { 665 for (j=0; j<nz; j++) { 666 ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 667 for (k=0; k<bs2; k++) arrt[k] *= v[j]; 668 arrt += bs2; 669 } 670 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 671 } else if (kaij->isTI) { 672 for (j=0; j<nz; j++) { 673 ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 674 for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 675 arrt += bs2; 676 } 677 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 678 } 679 ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr); 680 681 v = aa + diag[i] + 1; 682 vi = aj + diag[i] + 1; 683 nz = ai[i+1] - diag[i] - 1; 684 workt = work; 685 for (j=0; j<nz; j++) { 686 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 687 workt += bs; 688 } 689 arrt = arr; 690 if (T) { 691 for (j=0; j<nz; j++) { 692 ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 693 for (k=0; k<bs2; k++) arrt[k] *= v[j]; 694 arrt += bs2; 695 } 696 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 697 } else if (kaij->isTI) { 698 for (j=0; j<nz; j++) { 699 ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 700 for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 701 arrt += bs2; 702 } 703 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 704 } 705 706 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); 707 for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j); 708 709 idiag += bs2; 710 i2 += bs; 711 } 712 xb = t; 713 } 714 else xb = b; 715 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 716 idiag = kaij->ibdiag+bs2*(m-1); 717 i2 = bs * (m-1); 718 if (xb == b) { 719 for (i=m-1; i>=0; i--) { 720 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 721 722 v = aa + ai[i]; 723 vi = aj + ai[i]; 724 nz = diag[i] - ai[i]; 725 workt = work; 726 for (j=0; j<nz; j++) { 727 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 728 workt += bs; 729 } 730 arrt = arr; 731 if (T) { 732 for (j=0; j<nz; j++) { 733 ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 734 for (k=0; k<bs2; k++) arrt[k] *= v[j]; 735 arrt += bs2; 736 } 737 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 738 } else if (kaij->isTI) { 739 for (j=0; j<nz; j++) { 740 ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 741 for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 742 arrt += bs2; 743 } 744 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 745 } 746 747 v = aa + diag[i] + 1; 748 vi = aj + diag[i] + 1; 749 nz = ai[i+1] - diag[i] - 1; 750 workt = work; 751 for (j=0; j<nz; j++) { 752 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 753 workt += bs; 754 } 755 arrt = arr; 756 if (T) { 757 for (j=0; j<nz; j++) { 758 ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 759 for (k=0; k<bs2; k++) arrt[k] *= v[j]; 760 arrt += bs2; 761 } 762 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 763 } else if (kaij->isTI) { 764 for (j=0; j<nz; j++) { 765 ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 766 for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 767 arrt += bs2; 768 } 769 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 770 } 771 772 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); 773 for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j); 774 } 775 } else { 776 for (i=m-1; i>=0; i--) { 777 ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 778 v = aa + diag[i] + 1; 779 vi = aj + diag[i] + 1; 780 nz = ai[i+1] - diag[i] - 1; 781 workt = work; 782 for (j=0; j<nz; j++) { 783 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 784 workt += bs; 785 } 786 arrt = arr; 787 if (T) { 788 for (j=0; j<nz; j++) { 789 ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 790 for (k=0; k<bs2; k++) arrt[k] *= v[j]; 791 arrt += bs2; 792 } 793 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 794 } else if (kaij->isTI) { 795 for (j=0; j<nz; j++) { 796 ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 797 for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 798 arrt += bs2; 799 } 800 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 801 } 802 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); 803 for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j); 804 } 805 idiag -= bs2; 806 i2 -= bs; 807 } 808 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 809 } 810 } 811 812 ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr); 813 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 814 PetscFunctionReturn(0); 815 } 816 817 /*===================================================================================*/ 818 819 PetscErrorCode KAIJMultAdd_MPI(Mat A,Vec xx,Vec yy,Vec zz) 820 { 821 Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data; 822 PetscErrorCode ierr; 823 824 PetscFunctionBegin; 825 if (!yy) { 826 ierr = VecSet(zz,0.0);CHKERRQ(ierr); 827 } else { 828 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 829 } 830 /* start the scatter */ 831 ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 832 ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,zz,zz);CHKERRQ(ierr); 833 ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 834 ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr); 835 PetscFunctionReturn(0); 836 } 837 838 PetscErrorCode MatMult_MPIKAIJ_dof(Mat A,Vec xx,Vec yy) 839 { 840 PetscErrorCode ierr; 841 PetscFunctionBegin; 842 ierr = KAIJMultAdd_MPI(A,xx,PETSC_NULL,yy);CHKERRQ(ierr); 843 PetscFunctionReturn(0); 844 } 845 846 PetscErrorCode MatMultAdd_MPIKAIJ_dof(Mat A,Vec xx,Vec yy, Vec zz) 847 { 848 PetscErrorCode ierr; 849 PetscFunctionBegin; 850 ierr = KAIJMultAdd_MPI(A,xx,yy,zz);CHKERRQ(ierr); 851 PetscFunctionReturn(0); 852 } 853 854 PetscErrorCode MatInvertBlockDiagonal_MPIKAIJ_dof(Mat A,const PetscScalar **values) 855 { 856 Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data; 857 PetscErrorCode ierr; 858 859 PetscFunctionBegin; 860 ierr = (*b->AIJ->ops->invertblockdiagonal)(b->AIJ,values);CHKERRQ(ierr); 861 PetscFunctionReturn(0); 862 } 863 864 /* ----------------------------------------------------------------*/ 865 866 PetscErrorCode MatGetRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values) 867 { 868 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*) A->data; 869 PetscErrorCode diag = PETSC_FALSE; 870 PetscErrorCode ierr; 871 PetscInt nzaij,nz,*colsaij,*idx,i,j,p=b->p,q=b->q,r=row/p,s=row%p,c; 872 PetscScalar *vaij,*v,*S=b->S,*T=b->T; 873 874 PetscFunctionBegin; 875 if (b->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 876 b->getrowactive = PETSC_TRUE; 877 if (row < 0 || row >= A->rmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range",row); 878 879 if ((!S) && (!T) && (!b->isTI)) { 880 if (ncols) *ncols = 0; 881 if (cols) *cols = NULL; 882 if (values) *values = NULL; 883 PetscFunctionReturn(0); 884 } 885 886 if (T || b->isTI) { 887 ierr = MatGetRow_SeqAIJ(b->AIJ,r,&nzaij,&colsaij,&vaij);CHKERRQ(ierr); 888 c = nzaij; 889 for (i=0; i<nzaij; i++) { 890 /* check if this row contains a diagonal entry */ 891 if (colsaij[i] == r) { 892 diag = PETSC_TRUE; 893 c = i; 894 } 895 } 896 } else nzaij = c = 0; 897 898 /* calculate size of row */ 899 nz = 0; 900 if (S) nz += q; 901 if (T || b->isTI) nz += (diag && S ? (nzaij-1)*q : nzaij*q); 902 903 if (cols || values) { 904 ierr = PetscMalloc2(nz,&idx,nz,&v);CHKERRQ(ierr); 905 for (i=0; i<q; i++) { 906 /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */ 907 v[i] = 0.0; 908 } 909 if (b->isTI) { 910 for (i=0; i<nzaij; i++) { 911 for (j=0; j<q; j++) { 912 idx[i*q+j] = colsaij[i]*q+j; 913 v[i*q+j] = (j==s ? vaij[i] : 0); 914 } 915 } 916 } else if (T) { 917 for (i=0; i<nzaij; i++) { 918 for (j=0; j<q; j++) { 919 idx[i*q+j] = colsaij[i]*q+j; 920 v[i*q+j] = vaij[i]*T[s+j*p]; 921 } 922 } 923 } 924 if (S) { 925 for (j=0; j<q; j++) { 926 idx[c*q+j] = r*q+j; 927 v[c*q+j] += S[s+j*p]; 928 } 929 } 930 } 931 932 if (ncols) *ncols = nz; 933 if (cols) *cols = idx; 934 if (values) *values = v; 935 PetscFunctionReturn(0); 936 } 937 938 PetscErrorCode MatRestoreRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 939 { 940 PetscErrorCode ierr; 941 PetscFunctionBegin; 942 ierr = PetscFree2(*idx,*v);CHKERRQ(ierr); 943 ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE; 944 PetscFunctionReturn(0); 945 } 946 947 PetscErrorCode MatGetRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values) 948 { 949 Mat_MPIKAIJ *b = (Mat_MPIKAIJ*) A->data; 950 Mat MatAIJ = ((Mat_SeqKAIJ*)b->AIJ->data)->AIJ; 951 Mat MatOAIJ = ((Mat_SeqKAIJ*)b->OAIJ->data)->AIJ; 952 Mat AIJ = b->A; 953 PetscBool diag = PETSC_FALSE; 954 PetscErrorCode ierr; 955 const PetscInt rstart=A->rmap->rstart,rend=A->rmap->rend,p=b->p,q=b->q,*garray; 956 PetscInt nz,*idx,ncolsaij,ncolsoaij,*colsaij,*colsoaij,r,s,c,i,j,lrow; 957 PetscScalar *v,*vals,*ovals,*S=b->S,*T=b->T; 958 959 PetscFunctionBegin; 960 if (b->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 961 b->getrowactive = PETSC_TRUE; 962 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 963 lrow = row - rstart; 964 965 if ((!S) && (!T) && (!b->isTI)) { 966 if (ncols) *ncols = 0; 967 if (cols) *cols = NULL; 968 if (values) *values = NULL; 969 PetscFunctionReturn(0); 970 } 971 972 r = lrow/p; 973 s = lrow%p; 974 975 if (T || b->isTI) { 976 ierr = MatMPIAIJGetSeqAIJ(AIJ,NULL,NULL,&garray); 977 ierr = MatGetRow_SeqAIJ(MatAIJ,lrow/p,&ncolsaij,&colsaij,&vals);CHKERRQ(ierr); 978 ierr = MatGetRow_SeqAIJ(MatOAIJ,lrow/p,&ncolsoaij,&colsoaij,&ovals);CHKERRQ(ierr); 979 980 c = ncolsaij + ncolsoaij; 981 for (i=0; i<ncolsaij; i++) { 982 /* check if this row contains a diagonal entry */ 983 if (colsaij[i] == r) { 984 diag = PETSC_TRUE; 985 c = i; 986 } 987 } 988 } else c = 0; 989 990 /* calculate size of row */ 991 nz = 0; 992 if (S) nz += q; 993 if (T || b->isTI) nz += (diag && S ? (ncolsaij+ncolsoaij-1)*q : (ncolsaij+ncolsoaij)*q); 994 995 if (cols || values) { 996 ierr = PetscMalloc2(nz,&idx,nz,&v);CHKERRQ(ierr); 997 for (i=0; i<q; i++) { 998 /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */ 999 v[i] = 0.0; 1000 } 1001 if (b->isTI) { 1002 for (i=0; i<ncolsaij; i++) { 1003 for (j=0; j<q; j++) { 1004 idx[i*q+j] = (colsaij[i]+rstart/p)*q+j; 1005 v[i*q+j] = (j==s ? vals[i] : 0.0); 1006 } 1007 } 1008 for (i=0; i<ncolsoaij; i++) { 1009 for (j=0; j<q; j++) { 1010 idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j; 1011 v[(i+ncolsaij)*q+j] = (j==s ? ovals[i]: 0.0); 1012 } 1013 } 1014 } else if (T) { 1015 for (i=0; i<ncolsaij; i++) { 1016 for (j=0; j<q; j++) { 1017 idx[i*q+j] = (colsaij[i]+rstart/p)*q+j; 1018 v[i*q+j] = vals[i]*T[s+j*p]; 1019 } 1020 } 1021 for (i=0; i<ncolsoaij; i++) { 1022 for (j=0; j<q; j++) { 1023 idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j; 1024 v[(i+ncolsaij)*q+j] = ovals[i]*T[s+j*p]; 1025 } 1026 } 1027 } 1028 if (S) { 1029 for (j=0; j<q; j++) { 1030 idx[c*q+j] = (r+rstart/p)*q+j; 1031 v[c*q+j] += S[s+j*p]; 1032 } 1033 } 1034 } 1035 1036 if (ncols) *ncols = nz; 1037 if (cols) *cols = idx; 1038 if (values) *values = v; 1039 PetscFunctionReturn(0); 1040 } 1041 1042 PetscErrorCode MatRestoreRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1043 { 1044 PetscErrorCode ierr; 1045 PetscFunctionBegin; 1046 ierr = PetscFree2(*idx,*v);CHKERRQ(ierr); 1047 ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE; 1048 PetscFunctionReturn(0); 1049 } 1050 1051 PetscErrorCode MatCreateSubMatrix_KAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat) 1052 { 1053 PetscErrorCode ierr; 1054 Mat A; 1055 1056 PetscFunctionBegin; 1057 ierr = MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr); 1058 ierr = MatCreateSubMatrix(A,isrow,iscol,cll,newmat);CHKERRQ(ierr); 1059 ierr = MatDestroy(&A);CHKERRQ(ierr); 1060 PetscFunctionReturn(0); 1061 } 1062 1063 /* ---------------------------------------------------------------------------------- */ 1064 /*@C 1065 MatCreateKAIJ - Creates a matrix type to be used for matrices of the following form: 1066 1067 [I \otimes S + A \otimes T] 1068 1069 where 1070 S is a dense (p \times q) matrix 1071 T is a dense (p \times q) matrix 1072 A is an AIJ (n \times n) matrix 1073 I is the identity matrix 1074 The resulting matrix is (np \times nq) 1075 1076 The matrix type is based on MATSEQAIJ for a sequential matrix A, and MATMPIAIJ for a distributed matrix A. 1077 S is always stored independently on all processes as a PetscScalar array in column-major format. 1078 1079 Collective 1080 1081 Input Parameters: 1082 + A - the AIJ matrix 1083 . S - the S matrix, stored as a PetscScalar array (column-major) 1084 . T - the T matrix, stored as a PetscScalar array (column-major) 1085 . p - number of rows in S and T 1086 - q - number of columns in S and T 1087 1088 Output Parameter: 1089 . kaij - the new KAIJ matrix 1090 1091 Operations provided: 1092 + MatMult 1093 . MatMultAdd 1094 . MatInvertBlockDiagonal 1095 - MatView 1096 1097 Level: advanced 1098 1099 .seealso: MatKAIJSetAIJ(), MatKAIJSetS(), MatKAIJSetT(), MatKAIJGetAIJ(), MatKAIJGetS(), MatKAIJGetT(), MATKAIJ 1100 @*/ 1101 PetscErrorCode MatCreateKAIJ(Mat A,PetscInt p,PetscInt q,const PetscScalar S[],const PetscScalar T[],Mat *kaij) 1102 { 1103 PetscErrorCode ierr; 1104 PetscMPIInt size; 1105 1106 PetscFunctionBegin; 1107 ierr = MatCreate(PetscObjectComm((PetscObject)A),kaij);CHKERRQ(ierr); 1108 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 1109 if (size == 1) { 1110 ierr = MatSetType(*kaij,MATSEQKAIJ);CHKERRQ(ierr); 1111 } else { 1112 ierr = MatSetType(*kaij,MATMPIKAIJ);CHKERRQ(ierr); 1113 } 1114 ierr = MatKAIJSetAIJ(*kaij,A);CHKERRQ(ierr); 1115 ierr = MatKAIJSetS(*kaij,p,q,S);CHKERRQ(ierr); 1116 ierr = MatKAIJSetT(*kaij,p,q,T);CHKERRQ(ierr); 1117 ierr = MatSetUp(*kaij); 1118 PetscFunctionReturn(0); 1119 } 1120 1121 /*MC 1122 MATKAIJ - MATKAIJ = "kaij" - A matrix type to be used to evaluate matrices of the following form: 1123 1124 [I \otimes S + A \otimes T] 1125 1126 where 1127 S is a dense (p \times q) matrix 1128 T is a dense (p \times q) matrix 1129 A is an AIJ (n \times n) matrix 1130 I is the identity matrix 1131 The resulting matrix is (np \times nq) 1132 1133 The matrix type is based on MATSEQAIJ for a sequential matrix A, and MATMPIAIJ for a distributed matrix A. 1134 S and T are always stored independently on all processes as a PetscScalar array in column-major format. 1135 1136 Operations provided: 1137 + MatMult 1138 . MatMultAdd 1139 . MatInvertBlockDiagonal 1140 - MatView 1141 1142 Level: advanced 1143 1144 .seealso: MatKAIJSetAIJ(), MatKAIJSetS(), MatKAIJSetT(), MatKAIJGetAIJ(), MatKAIJGetS(), MatKAIJGetT(), MatCreateKAIJ() 1145 M*/ 1146 1147 PETSC_EXTERN PetscErrorCode MatCreate_KAIJ(Mat A) 1148 { 1149 PetscErrorCode ierr; 1150 Mat_MPIKAIJ *b; 1151 PetscMPIInt size; 1152 1153 PetscFunctionBegin; 1154 ierr = PetscNewLog(A,&b);CHKERRQ(ierr); 1155 A->data = (void*)b; 1156 1157 ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1158 1159 A->ops->setup = MatSetUp_KAIJ; 1160 1161 b->w = 0; 1162 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 1163 if (size == 1) { 1164 ierr = PetscObjectChangeTypeName((PetscObject)A,MATSEQKAIJ);CHKERRQ(ierr); 1165 A->ops->setup = MatSetUp_KAIJ; 1166 A->ops->destroy = MatDestroy_SeqKAIJ; 1167 A->ops->view = MatView_SeqKAIJ; 1168 A->ops->mult = MatMult_SeqKAIJ_N; 1169 A->ops->multadd = MatMultAdd_SeqKAIJ_N; 1170 A->ops->invertblockdiagonal = MatInvertBlockDiagonal_SeqKAIJ_N; 1171 A->ops->getrow = MatGetRow_SeqKAIJ; 1172 A->ops->restorerow = MatRestoreRow_SeqKAIJ; 1173 A->ops->sor = MatSOR_SeqKAIJ; 1174 } else { 1175 ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIKAIJ);CHKERRQ(ierr); 1176 A->ops->setup = MatSetUp_KAIJ; 1177 A->ops->destroy = MatDestroy_MPIKAIJ; 1178 A->ops->view = MatView_MPIKAIJ; 1179 A->ops->mult = MatMult_MPIKAIJ_dof; 1180 A->ops->multadd = MatMultAdd_MPIKAIJ_dof; 1181 A->ops->invertblockdiagonal = MatInvertBlockDiagonal_MPIKAIJ_dof; 1182 A->ops->getrow = MatGetRow_MPIKAIJ; 1183 A->ops->restorerow = MatRestoreRow_MPIKAIJ; 1184 ierr = PetscObjectComposeFunction((PetscObject)A,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIKAIJ);CHKERRQ(ierr); 1185 } 1186 A->ops->createsubmatrix = MatCreateSubMatrix_KAIJ; 1187 PetscFunctionReturn(0); 1188 } 1189