1 2 /* 3 Defines the basic matrix operations for the KAIJ matrix storage format. 4 This format is used to evaluate matrices of the form: 5 6 [I \otimes S + A \otimes T] 7 8 where 9 S is a dense (p \times q) matrix 10 T is a dense (p \times q) matrix 11 A is an AIJ (n \times n) matrix 12 I is the identity matrix 13 14 The resulting matrix is (np \times nq) 15 16 We provide: 17 MatMult() 18 MatMultAdd() 19 MatInvertBlockDiagonal() 20 and 21 MatCreateKAIJ(Mat,PetscInt,PetscInt,const PetscScalar[],const PetscScalar[],Mat*) 22 23 This single directory handles both the sequential and parallel codes 24 */ 25 26 #include <../src/mat/impls/kaij/kaij.h> /*I "petscmat.h" I*/ 27 #include <../src/mat/utils/freespace.h> 28 #include <petsc/private/vecimpl.h> 29 30 /*@C 31 MatKAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the KAIJ matrix 32 33 Not Collective, but if the KAIJ matrix is parallel, the AIJ matrix is also parallel 34 35 Input Parameter: 36 . A - the KAIJ matrix 37 38 Output Parameter: 39 . B - the AIJ matrix 40 41 Level: advanced 42 43 Notes: The reference count on the AIJ matrix is not increased so you should not destroy it. 44 45 .seealso: MatCreateKAIJ() 46 @*/ 47 PetscErrorCode MatKAIJGetAIJ(Mat A,Mat *B) 48 { 49 PetscErrorCode ierr; 50 PetscBool ismpikaij,isseqkaij; 51 52 PetscFunctionBegin; 53 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIKAIJ,&ismpikaij);CHKERRQ(ierr); 54 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQKAIJ,&isseqkaij);CHKERRQ(ierr); 55 if (ismpikaij) { 56 Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data; 57 58 *B = b->A; 59 } else if (isseqkaij) { 60 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 61 62 *B = b->AIJ; 63 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix passed in is not of type KAIJ"); 64 PetscFunctionReturn(0); 65 } 66 67 /*@C 68 MatKAIJGetS - Get the S matrix describing the shift action of the KAIJ matrix 69 70 Not Collective; the entire S is stored and returned independently on all processes. 71 72 Input Parameter: 73 . A - the KAIJ matrix 74 75 Output Parameter: 76 . S - the S matrix, in form of a scalar array in column-major format 77 78 Notes: 79 The dimensions of the output array can be determined by calling MatGetBlockSizes() on the KAIJ matrix. 80 The row block and column block sizes returned will correspond to the number of rows and columns, respectively, in S. 81 82 Level: advanced 83 84 .seealso: MatCreateKAIJ(), MatGetBlockSizes() 85 @*/ 86 PetscErrorCode MatKAIJGetS(Mat A,const PetscScalar **S) 87 { 88 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 89 PetscFunctionBegin; 90 *S = b->S; 91 PetscFunctionReturn(0); 92 } 93 94 /*@C 95 MatKAIJGetT - Get the transformation matrix T associated with the KAIJ matrix 96 97 Not Collective; the entire T is stored and returned independently on all processes 98 99 Input Parameter: 100 . A - the KAIJ matrix 101 102 Output Parameter: 103 . T - the T matrix, in form of a scalar array in column-major format 104 105 Notes: 106 The dimensions of the output array can be determined by calling MatGetBlockSizes() on the KAIJ matrix. 107 The row block and column block sizes returned will correspond to the number of rows and columns, respectively, in T. 108 109 Level: advanced 110 111 .seealso: MatCreateKAIJ(), MatGetBlockSizes() 112 @*/ 113 PetscErrorCode MatKAIJGetT(Mat A,const PetscScalar **T) 114 { 115 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 116 PetscFunctionBegin; 117 *T = b->T; 118 PetscFunctionReturn(0); 119 } 120 121 /*@ 122 MatKAIJSetAIJ - Set the AIJ matrix describing the blockwise action of the KAIJ matrix 123 124 Logically Collective; if the AIJ matrix is parallel, the KAIJ matrix is also parallel 125 126 Input Parameters: 127 + A - the KAIJ matrix 128 - B - the AIJ matrix 129 130 Level: advanced 131 132 .seealso: MatKAIJGetAIJ(), MatKAIJSetS(), MatKAIJSetT() 133 @*/ 134 PetscErrorCode MatKAIJSetAIJ(Mat A,Mat B) 135 { 136 PetscErrorCode ierr; 137 PetscMPIInt size; 138 139 PetscFunctionBegin; 140 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 141 if (size == 1) { 142 Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data; 143 a->AIJ = B; 144 } else { 145 Mat_MPIKAIJ *a = (Mat_MPIKAIJ*)A->data; 146 a->A = B; 147 } 148 PetscFunctionReturn(0); 149 } 150 151 /*@C 152 MatKAIJSetS - Set the S matrix describing the shift action of the KAIJ matrix 153 154 Logically Collective; the entire S is stored independently on all processes. 155 156 Input Parameters: 157 + A - the KAIJ matrix 158 . p - the number of rows in S 159 . q - the number of columns in S 160 - S - the S matrix, in form of a scalar array in column-major format 161 162 Notes: The dimensions p and q must match those of the transformation matrix T associated with the KAIJ matrix. 163 164 Level: Advanced 165 166 .seealso: MatKAIJGetS(), MatKAIJSetT(), MatKAIJSetAIJ() 167 @*/ 168 PetscErrorCode MatKAIJSetS(Mat A,PetscInt p,PetscInt q,const PetscScalar S[]) 169 { 170 PetscErrorCode ierr; 171 Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data; 172 173 PetscFunctionBegin; 174 if (a->S) { 175 ierr = PetscFree(a->S);CHKERRQ(ierr); 176 } 177 if (S) { 178 ierr = PetscMalloc1(p*q*sizeof(PetscScalar),&a->S);CHKERRQ(ierr); 179 ierr = PetscMemcpy(a->S,S,p*q*sizeof(PetscScalar));CHKERRQ(ierr); 180 } else a->S = NULL; 181 182 a->p = p; 183 a->q = q; 184 185 PetscFunctionReturn(0); 186 } 187 188 /*@C 189 MatKAIJSetT - Set the transformation matrix T associated with the KAIJ matrix 190 191 Logically Collective; the entire T is stored independently on all processes. 192 193 Input Parameters: 194 + A - the KAIJ matrix 195 . p - the number of rows in S 196 . q - the number of columns in S 197 - T - the T matrix, in form of a scalar array in column-major format 198 199 Notes: The dimensions p and q must match those of the shift matrix S associated with the KAIJ matrix. 200 201 Level: Advanced 202 203 .seealso: MatKAIJGetT(), MatKAIJSetS(), MatKAIJSetAIJ() 204 @*/ 205 PetscErrorCode MatKAIJSetT(Mat A,PetscInt p,PetscInt q,const PetscScalar T[]) 206 { 207 PetscErrorCode ierr; 208 PetscInt i,j; 209 Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data; 210 PetscBool isTI = PETSC_FALSE; 211 212 PetscFunctionBegin; 213 /* check if T is an identity matrix */ 214 if (T && (p == q)) { 215 isTI = PETSC_TRUE; 216 for (i=0; i<p; i++) { 217 for (j=0; j<q; j++) { 218 if (i == j) { 219 /* diagonal term must be 1 */ 220 if (T[i+j*p] != 1.0) isTI = PETSC_FALSE; 221 } else { 222 /* off-diagonal term must be 0 */ 223 if (T[i+j*p] != 0.0) isTI = PETSC_FALSE; 224 } 225 } 226 } 227 } 228 a->isTI = isTI; 229 230 if (a->T) { 231 ierr = PetscFree(a->T);CHKERRQ(ierr); 232 } 233 if (T && (!isTI)) { 234 ierr = PetscMalloc1(p*q*sizeof(PetscScalar),&a->T);CHKERRQ(ierr); 235 ierr = PetscMemcpy(a->T,T,p*q*sizeof(PetscScalar));CHKERRQ(ierr); 236 } else a->T = NULL; 237 238 a->p = p; 239 a->q = q; 240 PetscFunctionReturn(0); 241 } 242 243 PetscErrorCode MatDestroy_SeqKAIJ(Mat A) 244 { 245 PetscErrorCode ierr; 246 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 247 248 PetscFunctionBegin; 249 ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr); 250 ierr = PetscFree(b->S);CHKERRQ(ierr); 251 ierr = PetscFree(b->T);CHKERRQ(ierr); 252 ierr = PetscFree(b->ibdiag);CHKERRQ(ierr); 253 ierr = PetscFree5(b->sor.w,b->sor.y,b->sor.work,b->sor.t,b->sor.arr);CHKERRQ(ierr); 254 ierr = PetscFree(A->data);CHKERRQ(ierr); 255 PetscFunctionReturn(0); 256 } 257 258 PetscErrorCode MatSetUp_KAIJ(Mat A) 259 { 260 PetscErrorCode ierr; 261 PetscInt n; 262 PetscMPIInt size; 263 Mat_SeqKAIJ *seqkaij = (Mat_SeqKAIJ*)A->data; 264 265 PetscFunctionBegin; 266 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 267 if (size == 1) { 268 ierr = MatSetSizes(A,seqkaij->p*seqkaij->AIJ->rmap->n,seqkaij->q*seqkaij->AIJ->cmap->n,seqkaij->p*seqkaij->AIJ->rmap->N,seqkaij->q*seqkaij->AIJ->cmap->N);CHKERRQ(ierr); 269 ierr = PetscLayoutSetBlockSize(A->rmap,seqkaij->p);CHKERRQ(ierr); 270 ierr = PetscLayoutSetBlockSize(A->cmap,seqkaij->q);CHKERRQ(ierr); 271 ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr); 272 ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr); 273 ierr = PetscObjectReference((PetscObject)seqkaij->AIJ);CHKERRQ(ierr); 274 } else { 275 Mat_MPIKAIJ *a; 276 Mat_MPIAIJ *mpiaij; 277 IS from,to; 278 Vec gvec; 279 PetscScalar *T; 280 PetscInt i,j; 281 282 a = (Mat_MPIKAIJ*)A->data; 283 mpiaij = (Mat_MPIAIJ*)a->A->data; 284 ierr = MatSetSizes(A,a->p*a->A->rmap->n,a->q*a->A->cmap->n,a->p*a->A->rmap->N,a->q*a->A->cmap->N);CHKERRQ(ierr); 285 ierr = PetscLayoutSetBlockSize(A->rmap,seqkaij->p);CHKERRQ(ierr); 286 ierr = PetscLayoutSetBlockSize(A->cmap,seqkaij->q);CHKERRQ(ierr); 287 ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr); 288 ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr); 289 ierr = PetscObjectReference((PetscObject)a->A);CHKERRQ(ierr); 290 291 if (a->isTI) { 292 /* If the transformation matrix associated with the parallel matrix A is the identity matrix, then a->T will be NULL. 293 * In this case, if we pass a->T directly to the MatCreateKAIJ() calls to create the sequential submatrices, the routine will 294 * not be able to tell that transformation matrix should be set to the identity; thus we create a temporary identity matrix 295 * to pass in. */ 296 ierr = PetscMalloc1(a->p*a->q*sizeof(PetscScalar),&T);CHKERRQ(ierr); 297 for (i=0; i<a->p; i++) { 298 for (j=0; j<a->q; j++) { 299 if (i==j) T[i+j*a->p] = 1.0; 300 else T[i+j*a->p] = 0.0; 301 } 302 } 303 } else T = a->T; 304 ierr = MatCreateKAIJ(mpiaij->A,a->p,a->q,a->S,T,&a->AIJ);CHKERRQ(ierr); 305 ierr = MatCreateKAIJ(mpiaij->B,a->p,a->q,NULL,T,&a->OAIJ);CHKERRQ(ierr); 306 if (a->isTI) { 307 ierr = PetscFree(T);CHKERRQ(ierr); 308 } 309 310 ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr); 311 ierr = VecCreate(PETSC_COMM_SELF,&a->w);CHKERRQ(ierr); 312 ierr = VecSetSizes(a->w,n*a->q,n*a->q);CHKERRQ(ierr); 313 ierr = VecSetBlockSize(a->w,a->q);CHKERRQ(ierr); 314 ierr = VecSetType(a->w,VECSEQ);CHKERRQ(ierr); 315 316 /* create two temporary Index sets for build scatter gather */ 317 ierr = ISCreateBlock(PetscObjectComm((PetscObject)a->A),a->q,n,mpiaij->garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); 318 ierr = ISCreateStride(PETSC_COMM_SELF,n*a->q,0,1,&to);CHKERRQ(ierr); 319 320 /* create temporary global vector to generate scatter context */ 321 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)a->A),a->q,a->q*a->A->cmap->n,a->q*a->A->cmap->N,NULL,&gvec);CHKERRQ(ierr); 322 323 /* generate the scatter context */ 324 ierr = VecScatterCreate(gvec,from,a->w,to,&a->ctx);CHKERRQ(ierr); 325 326 ierr = ISDestroy(&from);CHKERRQ(ierr); 327 ierr = ISDestroy(&to);CHKERRQ(ierr); 328 ierr = VecDestroy(&gvec);CHKERRQ(ierr); 329 } 330 331 A->assembled = PETSC_TRUE; 332 PetscFunctionReturn(0); 333 } 334 335 PetscErrorCode MatView_SeqKAIJ(Mat A,PetscViewer viewer) 336 { 337 PetscErrorCode ierr; 338 Mat B; 339 340 PetscFunctionBegin; 341 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 342 ierr = MatView(B,viewer);CHKERRQ(ierr); 343 ierr = MatDestroy(&B);CHKERRQ(ierr); 344 PetscFunctionReturn(0); 345 } 346 347 PetscErrorCode MatView_MPIKAIJ(Mat A,PetscViewer viewer) 348 { 349 PetscErrorCode ierr; 350 Mat B; 351 352 PetscFunctionBegin; 353 ierr = MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 354 ierr = MatView(B,viewer);CHKERRQ(ierr); 355 ierr = MatDestroy(&B);CHKERRQ(ierr); 356 PetscFunctionReturn(0); 357 } 358 359 PetscErrorCode MatDestroy_MPIKAIJ(Mat A) 360 { 361 PetscErrorCode ierr; 362 Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data; 363 364 PetscFunctionBegin; 365 ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr); 366 ierr = MatDestroy(&b->OAIJ);CHKERRQ(ierr); 367 ierr = MatDestroy(&b->A);CHKERRQ(ierr); 368 ierr = VecScatterDestroy(&b->ctx);CHKERRQ(ierr); 369 ierr = VecDestroy(&b->w);CHKERRQ(ierr); 370 ierr = PetscFree(b->S);CHKERRQ(ierr); 371 ierr = PetscFree(b->T);CHKERRQ(ierr); 372 ierr = PetscFree(b->ibdiag);CHKERRQ(ierr); 373 ierr = PetscFree(A->data);CHKERRQ(ierr); 374 PetscFunctionReturn(0); 375 } 376 377 378 /* --------------------------------------------------------------------------------------*/ 379 380 /* zz = yy + Axx */ 381 PetscErrorCode KAIJMultAdd_Seq(Mat A,Vec xx,Vec yy,Vec zz) 382 { 383 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 384 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 385 const PetscScalar *s = b->S, *t = b->T; 386 const PetscScalar *x,*v,*bx; 387 PetscScalar *y,*sums; 388 PetscErrorCode ierr; 389 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 390 PetscInt n,i,jrow,j,l,p=b->p,q=b->q,k; 391 392 PetscFunctionBegin; 393 if (!yy) { 394 ierr = VecSet(zz,0.0);CHKERRQ(ierr); 395 } else { 396 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 397 } 398 if ((!s) && (!t) && (!b->isTI)) PetscFunctionReturn(0); 399 400 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 401 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 402 idx = a->j; 403 v = a->a; 404 ii = a->i; 405 406 if (b->isTI) { 407 for (i=0; i<m; i++) { 408 jrow = ii[i]; 409 n = ii[i+1] - jrow; 410 sums = y + p*i; 411 for (j=0; j<n; j++) { 412 for (k=0; k<p; k++) { 413 sums[k] += v[jrow+j]*x[q*idx[jrow+j]+k]; 414 } 415 } 416 } 417 } else if (t) { 418 for (i=0; i<m; i++) { 419 jrow = ii[i]; 420 n = ii[i+1] - jrow; 421 sums = y + p*i; 422 bx = x + q*i; 423 for (j=0; j<n; j++) { 424 for (k=0; k<p; k++) { 425 for (l=0; l<q; l++) { 426 sums[k] += v[jrow+j]*t[k+l*p]*x[q*idx[jrow+j]+l]; 427 } 428 } 429 } 430 } 431 } 432 if (s) { 433 for (i=0; i<m; i++) { 434 jrow = ii[i]; 435 n = ii[i+1] - jrow; 436 sums = y + p*i; 437 bx = x + q*i; 438 if (i < b->AIJ->cmap->n) { 439 for (j=0; j<q; j++) { 440 for (k=0; k<p; k++) { 441 sums[k] += s[k+j*p]*bx[j]; 442 } 443 } 444 } 445 } 446 } 447 448 ierr = PetscLogFlops((2.0*p*q-p)*m+2*p*a->nz);CHKERRQ(ierr); 449 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 450 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 451 PetscFunctionReturn(0); 452 } 453 454 PetscErrorCode MatMult_SeqKAIJ_N(Mat A,Vec xx,Vec yy) 455 { 456 PetscErrorCode ierr; 457 PetscFunctionBegin; 458 ierr = KAIJMultAdd_Seq(A,xx,PETSC_NULL,yy);CHKERRQ(ierr); 459 PetscFunctionReturn(0); 460 } 461 462 PetscErrorCode MatMultAdd_SeqKAIJ_N(Mat A,Vec xx,Vec yy,Vec zz) 463 { 464 PetscErrorCode ierr; 465 PetscFunctionBegin; 466 ierr = KAIJMultAdd_Seq(A,xx,yy,zz);CHKERRQ(ierr); 467 PetscFunctionReturn(0); 468 } 469 470 #include <petsc/private/kernels/blockinvert.h> 471 472 PetscErrorCode MatInvertBlockDiagonal_SeqKAIJ_N(Mat A,const PetscScalar **values) 473 { 474 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 475 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 476 const PetscScalar *S = b->S; 477 const PetscScalar *T = b->T; 478 const PetscScalar *v = a->a; 479 const PetscInt p = b->p, q = b->q, m = b->AIJ->rmap->n, *idx = a->j, *ii = a->i; 480 PetscErrorCode ierr; 481 PetscInt i,j,*v_pivots,dof,dof2; 482 PetscScalar *diag,aval,*v_work; 483 484 PetscFunctionBegin; 485 if (p != q) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Block size must be square to calculate inverse."); 486 if ((!S) && (!T) && (!b->isTI)) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Cannot invert a zero matrix."); 487 488 dof = p; 489 dof2 = dof*dof; 490 491 if (b->ibdiagvalid) { 492 if (values) *values = b->ibdiag; 493 PetscFunctionReturn(0); 494 } 495 if (!b->ibdiag) { 496 ierr = PetscMalloc1(dof2*m*sizeof(PetscScalar),&b->ibdiag);CHKERRQ(ierr); 497 ierr = PetscLogObjectMemory((PetscObject)A,dof2*m*sizeof(PetscScalar));CHKERRQ(ierr); 498 } 499 if (values) *values = b->ibdiag; 500 diag = b->ibdiag; 501 502 ierr = PetscMalloc2(dof,&v_work,dof,&v_pivots);CHKERRQ(ierr); 503 for (i=0; i<m; i++) { 504 if (S) { 505 ierr = PetscMemcpy(diag,S,dof2*sizeof(PetscScalar));CHKERRQ(ierr); 506 } else { 507 ierr = PetscMemzero(diag,dof2*sizeof(PetscScalar));CHKERRQ(ierr); 508 } 509 if (b->isTI) { 510 aval = 0; 511 for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j]; 512 for (j=0; j<dof; j++) diag[j+dof*j] += aval; 513 } else if (T) { 514 aval = 0; 515 for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j]; 516 for (j=0; j<dof2; j++) diag[j] += aval*T[j]; 517 } 518 ierr = PetscKernel_A_gets_inverse_A(dof,diag,v_pivots,v_work,PETSC_FALSE,NULL);CHKERRQ(ierr); 519 diag += dof2; 520 } 521 ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr); 522 523 b->ibdiagvalid = PETSC_TRUE; 524 PetscFunctionReturn(0); 525 } 526 527 static PetscErrorCode MatGetDiagonalBlock_MPIKAIJ(Mat A,Mat *B) 528 { 529 Mat_MPIKAIJ *kaij = (Mat_MPIKAIJ*) A->data; 530 531 PetscFunctionBegin; 532 *B = kaij->AIJ; 533 PetscFunctionReturn(0); 534 } 535 536 PetscErrorCode MatSOR_SeqKAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 537 { 538 PetscErrorCode ierr; 539 Mat_SeqKAIJ *kaij = (Mat_SeqKAIJ*) A->data; 540 Mat_SeqAIJ *a = (Mat_SeqAIJ*)kaij->AIJ->data; 541 const PetscScalar *aa = a->a, *T = kaij->T, *v; 542 const PetscInt m = kaij->AIJ->rmap->n, *ai=a->i, *aj=a->j, p = kaij->p, q = kaij->q, *diag, *vi; 543 const PetscScalar *b, *xb, *idiag; 544 PetscScalar *x, *work, *workt, *w, *y, *arr, *t, *arrt; 545 PetscInt i, j, k, i2, bs, bs2, nz; 546 547 PetscFunctionBegin; 548 its = its*lits; 549 if (flag & SOR_EISENSTAT) SETERRQ (PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 550 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 551 if (fshift) SETERRQ (PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 552 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 553 if (p != q) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSOR for KAIJ: Sorry, no support for non-square dense blocks"); 554 else {bs = p; bs2 = bs*bs; } 555 556 if (!m) PetscFunctionReturn(0); 557 558 if (!kaij->ibdiagvalid) { ierr = MatInvertBlockDiagonal_SeqKAIJ_N(A,NULL);CHKERRQ(ierr); } 559 idiag = kaij->ibdiag; 560 diag = a->diag; 561 562 if (!kaij->sor.setup) { 563 ierr = PetscMalloc5(bs,&kaij->sor.w,bs,&kaij->sor.y,m*bs,&kaij->sor.work,m*bs,&kaij->sor.t,m*bs2,&kaij->sor.arr);CHKERRQ(ierr); 564 kaij->sor.setup = PETSC_TRUE; 565 } 566 y = kaij->sor.y; 567 w = kaij->sor.w; 568 work = kaij->sor.work; 569 t = kaij->sor.t; 570 arr = kaij->sor.arr; 571 572 ierr = VecGetArray(xx,&x); CHKERRQ(ierr); 573 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 574 575 if (flag & SOR_ZERO_INITIAL_GUESS) { 576 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 577 PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x); /* x[0:bs] <- D^{-1} b[0:bs] */ 578 ierr = PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr); 579 i2 = bs; 580 idiag += bs2; 581 for (i=1; i<m; i++) { 582 v = aa + ai[i]; 583 vi = aj + ai[i]; 584 nz = diag[i] - ai[i]; 585 586 if (T) { /* b - T (Arow * x) */ 587 for (k=0; k<bs; k++) w[k] = 0; 588 for (j=0; j<nz; j++) { 589 for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k]; 590 } 591 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs,w,T,&t[i2]); 592 for (k=0; k<bs; k++) t[i2+k] += b[i2+k]; 593 } else if (kaij->isTI) { 594 for (k=0; k<bs; k++) t[i2+k] = b[i2+k]; 595 for (j=0; j<nz; j++) { 596 for (k=0; k<bs; k++) t[i2+k] -= v[j] * x[vi[j]*bs+k]; 597 } 598 } else { 599 for (k=0; k<bs; k++) t[i2+k] = b[i2+k]; 600 } 601 602 PetscKernel_w_gets_Ar_times_v(bs,bs,t+i2,idiag,y); 603 for (j=0; j<bs; j++) x[i2+j] = omega * y[j]; 604 605 idiag += bs2; 606 i2 += bs; 607 } 608 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 609 ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr); 610 xb = t; 611 } else xb = b; 612 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 613 idiag = kaij->ibdiag+bs2*(m-1); 614 i2 = bs * (m-1); 615 ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 616 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 617 i2 -= bs; 618 idiag -= bs2; 619 for (i=m-2; i>=0; i--) { 620 v = aa + diag[i] + 1 ; 621 vi = aj + diag[i] + 1; 622 nz = ai[i+1] - diag[i] - 1; 623 624 if (T) { /* FIXME: This branch untested */ 625 ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 626 /* copy all rows of x that are needed into contiguous space */ 627 workt = work; 628 for (j=0; j<nz; j++) { 629 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 630 workt += bs; 631 } 632 arrt = arr; 633 for (j=0; j<nz; j++) { 634 ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 635 for (k=0; k<bs2; k++) arrt[k] *= v[j]; 636 arrt += bs2; 637 } 638 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 639 } else if (kaij->isTI) { 640 for (k=0; k<bs; k++) w[k] = t[i2+k]; 641 for (j=0; j<nz; j++) { 642 for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k]; 643 } 644 } 645 646 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); /* RHS incorrect for omega != 1.0 */ 647 for (j=0; j<bs; j++) x[i2+j] = (1.0-omega) * x[i2+j] + omega * y[j]; 648 649 idiag -= bs2; 650 i2 -= bs; 651 } 652 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 653 } 654 its--; 655 } 656 while (its--) { /* FIXME: This branch not updated */ 657 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 658 i2 = 0; 659 idiag = kaij->ibdiag; 660 for (i=0; i<m; i++) { 661 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 662 663 v = aa + ai[i]; 664 vi = aj + ai[i]; 665 nz = diag[i] - ai[i]; 666 workt = work; 667 for (j=0; j<nz; j++) { 668 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 669 workt += bs; 670 } 671 arrt = arr; 672 if (T) { 673 for (j=0; j<nz; j++) { 674 ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 675 for (k=0; k<bs2; k++) arrt[k] *= v[j]; 676 arrt += bs2; 677 } 678 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 679 } else if (kaij->isTI) { 680 for (j=0; j<nz; j++) { 681 ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 682 for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 683 arrt += bs2; 684 } 685 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 686 } 687 ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr); 688 689 v = aa + diag[i] + 1; 690 vi = aj + diag[i] + 1; 691 nz = ai[i+1] - diag[i] - 1; 692 workt = work; 693 for (j=0; j<nz; j++) { 694 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 695 workt += bs; 696 } 697 arrt = arr; 698 if (T) { 699 for (j=0; j<nz; j++) { 700 ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 701 for (k=0; k<bs2; k++) arrt[k] *= v[j]; 702 arrt += bs2; 703 } 704 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 705 } else if (kaij->isTI) { 706 for (j=0; j<nz; j++) { 707 ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 708 for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 709 arrt += bs2; 710 } 711 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 712 } 713 714 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); 715 for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j); 716 717 idiag += bs2; 718 i2 += bs; 719 } 720 xb = t; 721 } 722 else xb = b; 723 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 724 idiag = kaij->ibdiag+bs2*(m-1); 725 i2 = bs * (m-1); 726 if (xb == b) { 727 for (i=m-1; i>=0; i--) { 728 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 729 730 v = aa + ai[i]; 731 vi = aj + ai[i]; 732 nz = diag[i] - ai[i]; 733 workt = work; 734 for (j=0; j<nz; j++) { 735 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 736 workt += bs; 737 } 738 arrt = arr; 739 if (T) { 740 for (j=0; j<nz; j++) { 741 ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 742 for (k=0; k<bs2; k++) arrt[k] *= v[j]; 743 arrt += bs2; 744 } 745 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 746 } else if (kaij->isTI) { 747 for (j=0; j<nz; j++) { 748 ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 749 for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 750 arrt += bs2; 751 } 752 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 753 } 754 755 v = aa + diag[i] + 1; 756 vi = aj + diag[i] + 1; 757 nz = ai[i+1] - diag[i] - 1; 758 workt = work; 759 for (j=0; j<nz; j++) { 760 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 761 workt += bs; 762 } 763 arrt = arr; 764 if (T) { 765 for (j=0; j<nz; j++) { 766 ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 767 for (k=0; k<bs2; k++) arrt[k] *= v[j]; 768 arrt += bs2; 769 } 770 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 771 } else if (kaij->isTI) { 772 for (j=0; j<nz; j++) { 773 ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 774 for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 775 arrt += bs2; 776 } 777 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 778 } 779 780 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); 781 for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j); 782 } 783 } else { 784 for (i=m-1; i>=0; i--) { 785 ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 786 v = aa + diag[i] + 1; 787 vi = aj + diag[i] + 1; 788 nz = ai[i+1] - diag[i] - 1; 789 workt = work; 790 for (j=0; j<nz; j++) { 791 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 792 workt += bs; 793 } 794 arrt = arr; 795 if (T) { 796 for (j=0; j<nz; j++) { 797 ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 798 for (k=0; k<bs2; k++) arrt[k] *= v[j]; 799 arrt += bs2; 800 } 801 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 802 } else if (kaij->isTI) { 803 for (j=0; j<nz; j++) { 804 ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 805 for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 806 arrt += bs2; 807 } 808 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 809 } 810 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); 811 for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j); 812 } 813 idiag -= bs2; 814 i2 -= bs; 815 } 816 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 817 } 818 } 819 820 ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr); 821 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 822 PetscFunctionReturn(0); 823 } 824 825 /*===================================================================================*/ 826 827 PetscErrorCode KAIJMultAdd_MPI(Mat A,Vec xx,Vec yy,Vec zz) 828 { 829 Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data; 830 PetscErrorCode ierr; 831 832 PetscFunctionBegin; 833 if (!yy) { 834 ierr = VecSet(zz,0.0);CHKERRQ(ierr); 835 } else { 836 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 837 } 838 /* start the scatter */ 839 ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 840 ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,zz,zz);CHKERRQ(ierr); 841 ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 842 ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr); 843 PetscFunctionReturn(0); 844 } 845 846 PetscErrorCode MatMult_MPIKAIJ_dof(Mat A,Vec xx,Vec yy) 847 { 848 PetscErrorCode ierr; 849 PetscFunctionBegin; 850 ierr = KAIJMultAdd_MPI(A,xx,PETSC_NULL,yy);CHKERRQ(ierr); 851 PetscFunctionReturn(0); 852 } 853 854 PetscErrorCode MatMultAdd_MPIKAIJ_dof(Mat A,Vec xx,Vec yy, Vec zz) 855 { 856 PetscErrorCode ierr; 857 PetscFunctionBegin; 858 ierr = KAIJMultAdd_MPI(A,xx,yy,zz);CHKERRQ(ierr); 859 PetscFunctionReturn(0); 860 } 861 862 PetscErrorCode MatInvertBlockDiagonal_MPIKAIJ_dof(Mat A,const PetscScalar **values) 863 { 864 Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data; 865 PetscErrorCode ierr; 866 867 PetscFunctionBegin; 868 ierr = (*b->AIJ->ops->invertblockdiagonal)(b->AIJ,values);CHKERRQ(ierr); 869 PetscFunctionReturn(0); 870 } 871 872 /* ----------------------------------------------------------------*/ 873 874 PetscErrorCode MatGetRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values) 875 { 876 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*) A->data; 877 PetscErrorCode diag = PETSC_FALSE; 878 PetscErrorCode ierr; 879 PetscInt nzaij,nz,*colsaij,*idx,i,j,p=b->p,q=b->q,r=row/p,s=row%p,c; 880 PetscScalar *vaij,*v,*S=b->S,*T=b->T; 881 882 PetscFunctionBegin; 883 if (b->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 884 b->getrowactive = PETSC_TRUE; 885 if (row < 0 || row >= A->rmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range",row); 886 887 if ((!S) && (!T) && (!b->isTI)) { 888 if (ncols) *ncols = 0; 889 if (cols) *cols = NULL; 890 if (values) *values = NULL; 891 PetscFunctionReturn(0); 892 } 893 894 if (T || b->isTI) { 895 ierr = MatGetRow_SeqAIJ(b->AIJ,r,&nzaij,&colsaij,&vaij);CHKERRQ(ierr); 896 c = nzaij; 897 for (i=0; i<nzaij; i++) { 898 /* check if this row contains a diagonal entry */ 899 if (colsaij[i] == r) { 900 diag = PETSC_TRUE; 901 c = i; 902 } 903 } 904 } else nzaij = c = 0; 905 906 /* calculate size of row */ 907 nz = 0; 908 if (S) nz += q; 909 if (T || b->isTI) nz += (diag && S ? (nzaij-1)*q : nzaij*q); 910 911 if (cols || values) { 912 ierr = PetscMalloc2(nz,&idx,nz,&v);CHKERRQ(ierr); 913 for (i=0; i<q; i++) { 914 /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */ 915 v[i] = 0.0; 916 } 917 if (b->isTI) { 918 for (i=0; i<nzaij; i++) { 919 for (j=0; j<q; j++) { 920 idx[i*q+j] = colsaij[i]*q+j; 921 v[i*q+j] = (j==s ? vaij[i] : 0); 922 } 923 } 924 } else if (T) { 925 for (i=0; i<nzaij; i++) { 926 for (j=0; j<q; j++) { 927 idx[i*q+j] = colsaij[i]*q+j; 928 v[i*q+j] = vaij[i]*T[s+j*p]; 929 } 930 } 931 } 932 if (S) { 933 for (j=0; j<q; j++) { 934 idx[c*q+j] = r*q+j; 935 v[c*q+j] += S[s+j*p]; 936 } 937 } 938 } 939 940 if (ncols) *ncols = nz; 941 if (cols) *cols = idx; 942 if (values) *values = v; 943 944 PetscFunctionReturn(0); 945 } 946 947 PetscErrorCode MatRestoreRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 948 { 949 PetscErrorCode ierr; 950 PetscFunctionBegin; 951 ierr = PetscFree2(*idx,*v);CHKERRQ(ierr); 952 ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE; 953 PetscFunctionReturn(0); 954 } 955 956 PetscErrorCode MatGetRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values) 957 { 958 Mat_MPIKAIJ *b = (Mat_MPIKAIJ*) A->data; 959 Mat MatAIJ = ((Mat_SeqKAIJ*)b->AIJ->data)->AIJ; 960 Mat MatOAIJ = ((Mat_SeqKAIJ*)b->OAIJ->data)->AIJ; 961 Mat AIJ = b->A; 962 PetscBool diag = PETSC_FALSE; 963 PetscErrorCode ierr; 964 const PetscInt rstart=A->rmap->rstart,rend=A->rmap->rend,p=b->p,q=b->q,*garray; 965 PetscInt nz,*idx,ncolsaij,ncolsoaij,*colsaij,*colsoaij,r,s,c,i,j,lrow; 966 PetscScalar *v,*vals,*ovals,*S=b->S,*T=b->T; 967 968 PetscFunctionBegin; 969 if (b->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 970 b->getrowactive = PETSC_TRUE; 971 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 972 lrow = row - rstart; 973 974 if ((!S) && (!T) && (!b->isTI)) { 975 if (ncols) *ncols = 0; 976 if (cols) *cols = NULL; 977 if (values) *values = NULL; 978 PetscFunctionReturn(0); 979 } 980 981 r = lrow/p; 982 s = lrow%p; 983 984 if (T || b->isTI) { 985 ierr = MatMPIAIJGetSeqAIJ(AIJ,NULL,NULL,&garray); 986 ierr = MatGetRow_SeqAIJ(MatAIJ,lrow/p,&ncolsaij,&colsaij,&vals);CHKERRQ(ierr); 987 ierr = MatGetRow_SeqAIJ(MatOAIJ,lrow/p,&ncolsoaij,&colsoaij,&ovals);CHKERRQ(ierr); 988 989 c = ncolsaij + ncolsoaij; 990 for (i=0; i<ncolsaij; i++) { 991 /* check if this row contains a diagonal entry */ 992 if (colsaij[i] == r) { 993 diag = PETSC_TRUE; 994 c = i; 995 } 996 } 997 } else c = 0; 998 999 /* calculate size of row */ 1000 nz = 0; 1001 if (S) nz += q; 1002 if (T || b->isTI) nz += (diag && S ? (ncolsaij+ncolsoaij-1)*q : (ncolsaij+ncolsoaij)*q); 1003 1004 if (cols || values) { 1005 ierr = PetscMalloc2(nz,&idx,nz,&v);CHKERRQ(ierr); 1006 for (i=0; i<q; i++) { 1007 /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */ 1008 v[i] = 0.0; 1009 } 1010 if (b->isTI) { 1011 for (i=0; i<ncolsaij; i++) { 1012 for (j=0; j<q; j++) { 1013 idx[i*q+j] = (colsaij[i]+rstart/p)*q+j; 1014 v[i*q+j] = (j==s ? vals[i] : 0.0); 1015 } 1016 } 1017 for (i=0; i<ncolsoaij; i++) { 1018 for (j=0; j<q; j++) { 1019 idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j; 1020 v[(i+ncolsaij)*q+j] = (j==s ? ovals[i]: 0.0); 1021 } 1022 } 1023 } else if (T) { 1024 for (i=0; i<ncolsaij; i++) { 1025 for (j=0; j<q; j++) { 1026 idx[i*q+j] = (colsaij[i]+rstart/p)*q+j; 1027 v[i*q+j] = vals[i]*T[s+j*p]; 1028 } 1029 } 1030 for (i=0; i<ncolsoaij; i++) { 1031 for (j=0; j<q; j++) { 1032 idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j; 1033 v[(i+ncolsaij)*q+j] = ovals[i]*T[s+j*p]; 1034 } 1035 } 1036 } 1037 if (S) { 1038 for (j=0; j<q; j++) { 1039 idx[c*q+j] = (r+rstart/p)*q+j; 1040 v[c*q+j] += S[s+j*p]; 1041 } 1042 } 1043 } 1044 1045 if (ncols) *ncols = nz; 1046 if (cols) *cols = idx; 1047 if (values) *values = v; 1048 PetscFunctionReturn(0); 1049 } 1050 1051 PetscErrorCode MatRestoreRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1052 { 1053 PetscErrorCode ierr; 1054 PetscFunctionBegin; 1055 ierr = PetscFree2(*idx,*v);CHKERRQ(ierr); 1056 ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE; 1057 PetscFunctionReturn(0); 1058 } 1059 1060 PetscErrorCode MatCreateSubMatrix_KAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat) 1061 { 1062 PetscErrorCode ierr; 1063 Mat A; 1064 1065 PetscFunctionBegin; 1066 ierr = MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr); 1067 ierr = MatCreateSubMatrix(A,isrow,iscol,cll,newmat);CHKERRQ(ierr); 1068 ierr = MatDestroy(&A);CHKERRQ(ierr); 1069 PetscFunctionReturn(0); 1070 } 1071 1072 /* ---------------------------------------------------------------------------------- */ 1073 /*@C 1074 MatCreateKAIJ - Creates a matrix type to be used for matrices of the following form: 1075 1076 [I \otimes S + A \otimes T] 1077 1078 where 1079 S is a dense (p \times q) matrix 1080 T is a dense (p \times q) matrix 1081 A is an AIJ (n \times n) matrix 1082 I is the identity matrix 1083 The resulting matrix is (np \times nq) 1084 1085 The matrix type is based on MATSEQAIJ for a sequential matrix A, and MATMPIAIJ for a distributed matrix A. 1086 S is always stored independently on all processes as a PetscScalar array in column-major format. 1087 1088 Collective 1089 1090 Input Parameters: 1091 + A - the AIJ matrix 1092 . S - the S matrix, stored as a PetscScalar array (column-major) 1093 . T - the T matrix, stored as a PetscScalar array (column-major) 1094 . p - number of rows in S and T 1095 - q - number of columns in S and T 1096 1097 Output Parameter: 1098 . kaij - the new KAIJ matrix 1099 1100 Operations provided: 1101 + MatMult 1102 . MatMultAdd 1103 . MatInvertBlockDiagonal 1104 - MatView 1105 1106 Level: advanced 1107 1108 .seealso: MatKAIJSetAIJ(), MatKAIJSetS(), MatKAIJSetT(), MatKAIJGetAIJ(), MatKAIJGetS(), MatKAIJGetT(), MATKAIJ 1109 @*/ 1110 PetscErrorCode MatCreateKAIJ(Mat A,PetscInt p,PetscInt q,const PetscScalar S[],const PetscScalar T[],Mat *kaij) 1111 { 1112 PetscErrorCode ierr; 1113 PetscMPIInt size; 1114 1115 PetscFunctionBegin; 1116 ierr = MatCreate(PetscObjectComm((PetscObject)A),kaij);CHKERRQ(ierr); 1117 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 1118 if (size == 1) { 1119 ierr = MatSetType(*kaij,MATSEQKAIJ);CHKERRQ(ierr); 1120 } else { 1121 ierr = MatSetType(*kaij,MATMPIKAIJ);CHKERRQ(ierr); 1122 } 1123 ierr = MatKAIJSetAIJ(*kaij,A);CHKERRQ(ierr); 1124 ierr = MatKAIJSetS(*kaij,p,q,S);CHKERRQ(ierr); 1125 ierr = MatKAIJSetT(*kaij,p,q,T);CHKERRQ(ierr); 1126 ierr = MatSetUp(*kaij); 1127 PetscFunctionReturn(0); 1128 } 1129 1130 /*MC 1131 MATKAIJ - MATKAIJ = "kaij" - A matrix type to be used to evaluate matrices of the following form: 1132 1133 [I \otimes S + A \otimes T] 1134 1135 where 1136 S is a dense (p \times q) matrix 1137 T is a dense (p \times q) matrix 1138 A is an AIJ (n \times n) matrix 1139 I is the identity matrix 1140 The resulting matrix is (np \times nq) 1141 1142 The matrix type is based on MATSEQAIJ for a sequential matrix A, and MATMPIAIJ for a distributed matrix A. 1143 S and T are always stored independently on all processes as a PetscScalar array in column-major format. 1144 1145 Operations provided: 1146 + MatMult 1147 . MatMultAdd 1148 . MatInvertBlockDiagonal 1149 - MatView 1150 1151 Level: advanced 1152 1153 .seealso: MatKAIJSetAIJ(), MatKAIJSetS(), MatKAIJSetT(), MatKAIJGetAIJ(), MatKAIJGetS(), MatKAIJGetT(), MatCreateKAIJ() 1154 M*/ 1155 1156 PETSC_EXTERN PetscErrorCode MatCreate_KAIJ(Mat A) 1157 { 1158 PetscErrorCode ierr; 1159 Mat_MPIKAIJ *b; 1160 PetscMPIInt size; 1161 1162 PetscFunctionBegin; 1163 ierr = PetscNewLog(A,&b);CHKERRQ(ierr); 1164 A->data = (void*)b; 1165 1166 ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1167 1168 A->ops->setup = MatSetUp_KAIJ; 1169 1170 b->w = 0; 1171 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 1172 if (size == 1) { 1173 ierr = PetscObjectChangeTypeName((PetscObject)A,MATSEQKAIJ);CHKERRQ(ierr); 1174 1175 A->ops->setup = MatSetUp_KAIJ; 1176 A->ops->destroy = MatDestroy_SeqKAIJ; 1177 A->ops->view = MatView_SeqKAIJ; 1178 A->ops->mult = MatMult_SeqKAIJ_N; 1179 A->ops->multadd = MatMultAdd_SeqKAIJ_N; 1180 A->ops->invertblockdiagonal = MatInvertBlockDiagonal_SeqKAIJ_N; 1181 A->ops->getrow = MatGetRow_SeqKAIJ; 1182 A->ops->restorerow = MatRestoreRow_SeqKAIJ; 1183 A->ops->sor = MatSOR_SeqKAIJ; 1184 } else { 1185 ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIKAIJ);CHKERRQ(ierr); 1186 A->ops->setup = MatSetUp_KAIJ; 1187 A->ops->destroy = MatDestroy_MPIKAIJ; 1188 A->ops->view = MatView_MPIKAIJ; 1189 A->ops->mult = MatMult_MPIKAIJ_dof; 1190 A->ops->multadd = MatMultAdd_MPIKAIJ_dof; 1191 A->ops->invertblockdiagonal = MatInvertBlockDiagonal_MPIKAIJ_dof; 1192 A->ops->getrow = MatGetRow_MPIKAIJ; 1193 A->ops->restorerow = MatRestoreRow_MPIKAIJ; 1194 ierr = PetscObjectComposeFunction((PetscObject)A,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIKAIJ);CHKERRQ(ierr); 1195 } 1196 A->ops->createsubmatrix = MatCreateSubMatrix_KAIJ; 1197 PetscFunctionReturn(0); 1198 } 1199