1 2 /* 3 Defines the basic matrix operations for the KAIJ matrix storage format. 4 This format is used to evaluate matrices of the form: 5 6 [I \otimes S + A \otimes T] 7 8 where 9 S is a dense (p \times q) matrix 10 T is a dense (p \times q) matrix 11 A is an AIJ (n \times n) matrix 12 I is the identity matrix 13 14 The resulting matrix is (np \times nq) 15 16 We provide: 17 MatMult() 18 MatMultAdd() 19 MatInvertBlockDiagonal() 20 and 21 MatCreateKAIJ(Mat,PetscInt,PetscInt,const PetscScalar[],const PetscScalar[],Mat*) 22 23 This single directory handles both the sequential and parallel codes 24 */ 25 26 #include <../src/mat/impls/kaij/kaij.h> /*I "petscmat.h" I*/ 27 #include <../src/mat/utils/freespace.h> 28 #include <petsc/private/vecimpl.h> 29 30 /*@C 31 MatKAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the KAIJ matrix 32 33 Not Collective, but if the KAIJ matrix is parallel, the AIJ matrix is also parallel 34 35 Input Parameter: 36 . A - the KAIJ matrix 37 38 Output Parameter: 39 . B - the AIJ matrix 40 41 Level: advanced 42 43 Notes: The reference count on the AIJ matrix is not increased so you should not destroy it. 44 45 .seealso: MatCreateKAIJ() 46 @*/ 47 PetscErrorCode MatKAIJGetAIJ(Mat A,Mat *B) 48 { 49 PetscErrorCode ierr; 50 PetscBool ismpikaij,isseqkaij; 51 52 PetscFunctionBegin; 53 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIKAIJ,&ismpikaij);CHKERRQ(ierr); 54 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQKAIJ,&isseqkaij);CHKERRQ(ierr); 55 if (ismpikaij) { 56 Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data; 57 58 *B = b->A; 59 } else if (isseqkaij) { 60 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 61 62 *B = b->AIJ; 63 } else { 64 *B = A; 65 } 66 PetscFunctionReturn(0); 67 } 68 69 /*@C 70 MatKAIJGetS - Get the S matrix describing the shift action of the KAIJ matrix 71 72 Not Collective, the entire S is stored and returned independently on all processes 73 74 Input Parameter: 75 . A - the KAIJ matrix 76 77 Output Parameter: 78 . S - the S matrix, in form of a scalar array in column-major format 79 80 Notes: 81 The dimensions of the output array can be determined by calling MatGetBlockSizes() on the KAIJ matrix. 82 The row block and column block sizes returned will correspond to the number of rows and columns, respectively, in S. 83 84 Level: advanced 85 86 .seealso: MatCreateKAIJ(), MatGetBlockSizes() 87 @*/ 88 PetscErrorCode MatKAIJGetS(Mat A,const PetscScalar **S) 89 { 90 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 91 PetscFunctionBegin; 92 *S = b->S; 93 PetscFunctionReturn(0); 94 } 95 96 /*@C 97 MatKAIJGetT - Get the transformation matrix T associated with the KAIJ matrix 98 99 Not Collective, the entire T is stored and returned independently on all processes 100 101 Input Parameter: 102 . A - the KAIJ matrix 103 104 Output Parameter: 105 . T - the T matrix, in form of a scalar array in column-major format 106 107 Notes: 108 The dimensions of the output array can be determined by calling MatGetBlockSizes() on the KAIJ matrix. 109 The row block and column block sizes returned will correspond to the number of rows and columns, respectively, in T. 110 111 Level: advanced 112 113 .seealso: MatCreateKAIJ(), MatGetBlockSizes() 114 @*/ 115 PetscErrorCode MatKAIJGetT(Mat A,const PetscScalar **T) 116 { 117 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 118 PetscFunctionBegin; 119 *T = b->T; 120 PetscFunctionReturn(0); 121 } 122 123 PetscErrorCode MatDestroy_SeqKAIJ(Mat A) 124 { 125 PetscErrorCode ierr; 126 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 127 128 PetscFunctionBegin; 129 ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr); 130 ierr = PetscFree3(b->S,b->T,b->ibdiag);CHKERRQ(ierr); 131 ierr = PetscFree5(b->sor.w,b->sor.y,b->sor.work,b->sor.t,b->sor.arr);CHKERRQ(ierr); 132 ierr = PetscFree(A->data);CHKERRQ(ierr); 133 PetscFunctionReturn(0); 134 } 135 136 PetscErrorCode MatSetUp_KAIJ(Mat A) 137 { 138 PetscFunctionBegin; 139 SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Must use MatCreateKAIJ() to create KAIJ matrices"); 140 PetscFunctionReturn(0); 141 } 142 143 PetscErrorCode MatView_SeqKAIJ(Mat A,PetscViewer viewer) 144 { 145 PetscErrorCode ierr; 146 Mat B; 147 148 PetscFunctionBegin; 149 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 150 ierr = MatView(B,viewer);CHKERRQ(ierr); 151 ierr = MatDestroy(&B);CHKERRQ(ierr); 152 PetscFunctionReturn(0); 153 } 154 155 PetscErrorCode MatView_MPIKAIJ(Mat A,PetscViewer viewer) 156 { 157 PetscErrorCode ierr; 158 Mat B; 159 160 PetscFunctionBegin; 161 ierr = MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 162 ierr = MatView(B,viewer);CHKERRQ(ierr); 163 ierr = MatDestroy(&B);CHKERRQ(ierr); 164 PetscFunctionReturn(0); 165 } 166 167 PetscErrorCode MatDestroy_MPIKAIJ(Mat A) 168 { 169 PetscErrorCode ierr; 170 Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data; 171 172 PetscFunctionBegin; 173 ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr); 174 ierr = MatDestroy(&b->OAIJ);CHKERRQ(ierr); 175 ierr = MatDestroy(&b->A);CHKERRQ(ierr); 176 ierr = VecScatterDestroy(&b->ctx);CHKERRQ(ierr); 177 ierr = VecDestroy(&b->w);CHKERRQ(ierr); 178 ierr = PetscFree3(b->S,b->T,b->ibdiag);CHKERRQ(ierr); 179 ierr = PetscFree(A->data);CHKERRQ(ierr); 180 PetscFunctionReturn(0); 181 } 182 183 /*MC 184 MATKAIJ - MATKAIJ = "kaij" - A matrix type to be used to evaluate matrices of the following form: 185 186 [I \otimes S + A \otimes T] 187 188 where 189 S is a dense (p \times q) matrix 190 T is a dense (p \times q) matrix 191 A is an AIJ (n \times n) matrix 192 I is the identity matrix 193 The resulting matrix is (np \times nq) 194 195 The matrix type is based on MATSEQAIJ for a sequential matrix A, and MATMPIAIJ for a distributed matrix A. 196 S and T are always stored independently on all processes as a PetscScalar array in column-major format. 197 198 Operations provided: 199 . MatMult 200 . MatMultAdd 201 . MatInvertBlockDiagonal 202 203 Level: advanced 204 205 .seealso: MatKAIJGetAIJ(), MatKAIJGetS(), MatKAIJGetT(), MatCreateKAIJ() 206 M*/ 207 208 PETSC_EXTERN PetscErrorCode MatCreate_KAIJ(Mat A) 209 { 210 PetscErrorCode ierr; 211 Mat_MPIKAIJ *b; 212 PetscMPIInt size; 213 214 PetscFunctionBegin; 215 ierr = PetscNewLog(A,&b);CHKERRQ(ierr); 216 A->data = (void*)b; 217 218 ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 219 220 A->ops->setup = MatSetUp_KAIJ; 221 222 b->w = 0; 223 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 224 if (size == 1) { 225 ierr = PetscObjectChangeTypeName((PetscObject)A,MATSEQKAIJ);CHKERRQ(ierr); 226 } else { 227 ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIKAIJ);CHKERRQ(ierr); 228 } 229 PetscFunctionReturn(0); 230 } 231 232 /* --------------------------------------------------------------------------------------*/ 233 234 /* zz = yy + Axx */ 235 PetscErrorCode KAIJMultAdd_Seq(Mat A,Vec xx,Vec yy,Vec zz) 236 { 237 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 238 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 239 const PetscScalar *s = b->S, *t = b->T; 240 const PetscScalar *x,*v,*bx; 241 PetscScalar *y,*sums; 242 PetscErrorCode ierr; 243 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 244 PetscInt n,i,jrow,j,l,p=b->p,q=b->q,k; 245 246 PetscFunctionBegin; 247 if (!yy) { 248 ierr = VecSet(zz,0.0);CHKERRQ(ierr); 249 } else { 250 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 251 } 252 if ((!s) && (!t) && (!b->isTI)) PetscFunctionReturn(0); 253 254 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 255 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 256 idx = a->j; 257 v = a->a; 258 ii = a->i; 259 260 if (b->isTI) { 261 for (i=0; i<m; i++) { 262 jrow = ii[i]; 263 n = ii[i+1] - jrow; 264 sums = y + p*i; 265 for (j=0; j<n; j++) { 266 for (k=0; k<p; k++) { 267 sums[k] += v[jrow+j]*x[q*idx[jrow+j]+k]; 268 } 269 } 270 } 271 } else if (t) { 272 for (i=0; i<m; i++) { 273 jrow = ii[i]; 274 n = ii[i+1] - jrow; 275 sums = y + p*i; 276 bx = x + q*i; 277 for (j=0; j<n; j++) { 278 for (k=0; k<p; k++) { 279 for (l=0; l<q; l++) { 280 sums[k] += v[jrow+j]*t[k+l*p]*x[q*idx[jrow+j]+l]; 281 } 282 } 283 } 284 } 285 } 286 if (s) { 287 for (i=0; i<m; i++) { 288 jrow = ii[i]; 289 n = ii[i+1] - jrow; 290 sums = y + p*i; 291 bx = x + q*i; 292 if (i < b->AIJ->cmap->n) { 293 for (j=0; j<q; j++) { 294 for (k=0; k<p; k++) { 295 sums[k] += s[k+j*p]*bx[j]; 296 } 297 } 298 } 299 } 300 } 301 302 ierr = PetscLogFlops((2.0*p*q-p)*m+2*p*a->nz);CHKERRQ(ierr); 303 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 304 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 305 PetscFunctionReturn(0); 306 } 307 308 PetscErrorCode MatMult_SeqKAIJ_N(Mat A,Vec xx,Vec yy) 309 { 310 PetscErrorCode ierr; 311 PetscFunctionBegin; 312 ierr = KAIJMultAdd_Seq(A,xx,PETSC_NULL,yy);CHKERRQ(ierr); 313 PetscFunctionReturn(0); 314 } 315 316 PetscErrorCode MatMultAdd_SeqKAIJ_N(Mat A,Vec xx,Vec yy,Vec zz) 317 { 318 PetscErrorCode ierr; 319 PetscFunctionBegin; 320 ierr = KAIJMultAdd_Seq(A,xx,yy,zz);CHKERRQ(ierr); 321 PetscFunctionReturn(0); 322 } 323 324 #include <petsc/private/kernels/blockinvert.h> 325 326 PetscErrorCode MatInvertBlockDiagonal_SeqKAIJ_N(Mat A,const PetscScalar **values) 327 { 328 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 329 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 330 const PetscScalar *S = b->S; 331 const PetscScalar *T = b->T; 332 const PetscScalar *v = a->a; 333 const PetscInt p = b->p, q = b->q, m = b->AIJ->rmap->n, *idx = a->j, *ii = a->i; 334 PetscErrorCode ierr; 335 PetscInt i,j,*v_pivots,dof,dof2; 336 PetscScalar *diag,aval,*v_work; 337 338 PetscFunctionBegin; 339 if (p != q) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Block size must be square to calculate inverse."); 340 if ((!S) && (!T) && (!b->isTI)) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Cannot invert a zero matrix."); 341 342 dof = p; 343 dof2 = dof*dof; 344 345 if (b->ibdiagvalid) { 346 if (values) *values = b->ibdiag; 347 PetscFunctionReturn(0); 348 } 349 if (!b->ibdiag) { 350 ierr = PetscMalloc(dof2*m*sizeof(PetscScalar),&b->ibdiag);CHKERRQ(ierr); 351 ierr = PetscLogObjectMemory((PetscObject)A,dof2*m*sizeof(PetscScalar));CHKERRQ(ierr); 352 } 353 if (values) *values = b->ibdiag; 354 diag = b->ibdiag; 355 356 ierr = PetscMalloc2(dof,&v_work,dof,&v_pivots);CHKERRQ(ierr); 357 for (i=0; i<m; i++) { 358 if (S) { 359 ierr = PetscMemcpy(diag,S,dof2*sizeof(PetscScalar));CHKERRQ(ierr); 360 } else { 361 ierr = PetscMemzero(diag,dof2*sizeof(PetscScalar));CHKERRQ(ierr); 362 } 363 if (b->isTI) { 364 aval = 0; 365 for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j]; 366 for (j=0; j<dof; j++) diag[j+dof*j] += aval; 367 } else if (T) { 368 aval = 0; 369 for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j]; 370 for (j=0; j<dof2; j++) diag[j] += aval*T[j]; 371 } 372 ierr = PetscKernel_A_gets_inverse_A(dof,diag,v_pivots,v_work,PETSC_FALSE,NULL);CHKERRQ(ierr); 373 diag += dof2; 374 } 375 ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr); 376 377 b->ibdiagvalid = PETSC_TRUE; 378 PetscFunctionReturn(0); 379 } 380 381 static PetscErrorCode MatGetDiagonalBlock_MPIKAIJ(Mat A,Mat *B) 382 { 383 Mat_MPIKAIJ *kaij = (Mat_MPIKAIJ*) A->data; 384 385 PetscFunctionBegin; 386 *B = kaij->AIJ; 387 PetscFunctionReturn(0); 388 } 389 390 PetscErrorCode MatSOR_SeqKAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 391 { 392 PetscErrorCode ierr; 393 Mat_SeqKAIJ *kaij = (Mat_SeqKAIJ*) A->data; 394 Mat_SeqAIJ *a = (Mat_SeqAIJ*)kaij->AIJ->data; 395 const PetscScalar *aa = a->a, *T = kaij->T, *v; 396 const PetscInt m = kaij->AIJ->rmap->n, *ai=a->i, *aj=a->j, p = kaij->p, q = kaij->q, *diag, *vi; 397 const PetscScalar *b, *xb, *idiag; 398 PetscScalar *x, *work, *workt, *w, *y, *arr, *t, *arrt; 399 PetscInt i, j, k, i2, bs, bs2, nz; 400 401 402 PetscFunctionBegin; 403 its = its*lits; 404 if (flag & SOR_EISENSTAT) SETERRQ (PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 405 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 406 if (fshift) SETERRQ (PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 407 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 408 if (p != q) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSOR for KAIJ: Sorry, no support for non-square dense blocks"); 409 else {bs = p; bs2 = bs*bs; } 410 411 if (!m) PetscFunctionReturn(0); 412 413 if (!kaij->ibdiagvalid) { ierr = MatInvertBlockDiagonal_SeqKAIJ_N(A,NULL);CHKERRQ(ierr); } 414 idiag = kaij->ibdiag; 415 diag = a->diag; 416 417 if (!kaij->sor.setup) { 418 ierr = PetscMalloc5(bs,&kaij->sor.w,bs,&kaij->sor.y,m*bs,&kaij->sor.work,m*bs,&kaij->sor.t,m*bs2,&kaij->sor.arr);CHKERRQ(ierr); 419 kaij->sor.setup = PETSC_TRUE; 420 } 421 y = kaij->sor.y; 422 w = kaij->sor.w; 423 work = kaij->sor.work; 424 t = kaij->sor.t; 425 arr = kaij->sor.arr; 426 427 ierr = VecGetArray(xx,&x); CHKERRQ(ierr); 428 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 429 430 if (flag & SOR_ZERO_INITIAL_GUESS) { 431 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 432 PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x); /* x[0:bs] <- D^{-1} b[0:bs] */ 433 ierr = PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr); 434 i2 = bs; 435 idiag += bs2; 436 for (i=1; i<m; i++) { 437 v = aa + ai[i]; 438 vi = aj + ai[i]; 439 nz = diag[i] - ai[i]; 440 441 if (T) { /* b - T (Arow * x) */ 442 for (k=0; k<bs; k++) w[k] = 0; 443 for (j=0; j<nz; j++) { 444 for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k]; 445 } 446 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs,w,T,&t[i2]); 447 for (k=0; k<bs; k++) t[i2+k] += b[i2+k]; 448 } else if (kaij->isTI) { 449 for (k=0; k<bs; k++) t[i2+k] = b[i2+k]; 450 for (j=0; j<nz; j++) { 451 for (k=0; k<bs; k++) t[i2+k] -= v[j] * x[vi[j]*bs+k]; 452 } 453 } else { 454 for (k=0; k<bs; k++) t[i2+k] = b[i2+k]; 455 } 456 457 PetscKernel_w_gets_Ar_times_v(bs,bs,t+i2,idiag,y); 458 for (j=0; j<bs; j++) x[i2+j] = omega * y[j]; 459 460 idiag += bs2; 461 i2 += bs; 462 } 463 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 464 ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr); 465 xb = t; 466 } else xb = b; 467 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 468 idiag = kaij->ibdiag+bs2*(m-1); 469 i2 = bs * (m-1); 470 ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 471 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 472 i2 -= bs; 473 idiag -= bs2; 474 for (i=m-2; i>=0; i--) { 475 v = aa + diag[i] + 1 ; 476 vi = aj + diag[i] + 1; 477 nz = ai[i+1] - diag[i] - 1; 478 479 if (T) { /* FIXME: This branch untested */ 480 ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 481 /* copy all rows of x that are needed into contiguous space */ 482 workt = work; 483 for (j=0; j<nz; j++) { 484 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 485 workt += bs; 486 } 487 arrt = arr; 488 for (j=0; j<nz; j++) { 489 ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 490 for (k=0; k<bs2; k++) arrt[k] *= v[j]; 491 arrt += bs2; 492 } 493 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 494 } else if (kaij->isTI) { 495 for (k=0; k<bs; k++) w[k] = t[i2+k]; 496 for (j=0; j<nz; j++) { 497 for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k]; 498 } 499 } 500 501 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); /* RHS incorrect for omega != 1.0 */ 502 for (j=0; j<bs; j++) x[i2+j] = (1.0-omega) * x[i2+j] + omega * y[j]; 503 504 idiag -= bs2; 505 i2 -= bs; 506 } 507 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 508 } 509 its--; 510 } 511 while (its--) { /* FIXME: This branch not updated */ 512 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 513 i2 = 0; 514 idiag = kaij->ibdiag; 515 for (i=0; i<m; i++) { 516 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 517 518 v = aa + ai[i]; 519 vi = aj + ai[i]; 520 nz = diag[i] - ai[i]; 521 workt = work; 522 for (j=0; j<nz; j++) { 523 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 524 workt += bs; 525 } 526 arrt = arr; 527 if (T) { 528 for (j=0; j<nz; j++) { 529 ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 530 for (k=0; k<bs2; k++) arrt[k] *= v[j]; 531 arrt += bs2; 532 } 533 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 534 } else if (kaij->isTI) { 535 for (j=0; j<nz; j++) { 536 ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 537 for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 538 arrt += bs2; 539 } 540 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 541 } 542 ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr); 543 544 v = aa + diag[i] + 1; 545 vi = aj + diag[i] + 1; 546 nz = ai[i+1] - diag[i] - 1; 547 workt = work; 548 for (j=0; j<nz; j++) { 549 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 550 workt += bs; 551 } 552 arrt = arr; 553 if (T) { 554 for (j=0; j<nz; j++) { 555 ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 556 for (k=0; k<bs2; k++) arrt[k] *= v[j]; 557 arrt += bs2; 558 } 559 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 560 } else if (kaij->isTI) { 561 for (j=0; j<nz; j++) { 562 ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 563 for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 564 arrt += bs2; 565 } 566 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 567 } 568 569 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); 570 for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j); 571 572 idiag += bs2; 573 i2 += bs; 574 } 575 xb = t; 576 } 577 else xb = b; 578 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 579 idiag = kaij->ibdiag+bs2*(m-1); 580 i2 = bs * (m-1); 581 if (xb == b) { 582 for (i=m-1; i>=0; i--) { 583 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 584 585 v = aa + ai[i]; 586 vi = aj + ai[i]; 587 nz = diag[i] - ai[i]; 588 workt = work; 589 for (j=0; j<nz; j++) { 590 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 591 workt += bs; 592 } 593 arrt = arr; 594 if (T) { 595 for (j=0; j<nz; j++) { 596 ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 597 for (k=0; k<bs2; k++) arrt[k] *= v[j]; 598 arrt += bs2; 599 } 600 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 601 } else if (kaij->isTI) { 602 for (j=0; j<nz; j++) { 603 ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 604 for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 605 arrt += bs2; 606 } 607 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 608 } 609 610 v = aa + diag[i] + 1; 611 vi = aj + diag[i] + 1; 612 nz = ai[i+1] - diag[i] - 1; 613 workt = work; 614 for (j=0; j<nz; j++) { 615 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 616 workt += bs; 617 } 618 arrt = arr; 619 if (T) { 620 for (j=0; j<nz; j++) { 621 ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 622 for (k=0; k<bs2; k++) arrt[k] *= v[j]; 623 arrt += bs2; 624 } 625 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 626 } else if (kaij->isTI) { 627 for (j=0; j<nz; j++) { 628 ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 629 for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 630 arrt += bs2; 631 } 632 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 633 } 634 635 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); 636 for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j); 637 } 638 } else { 639 for (i=m-1; i>=0; i--) { 640 ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 641 v = aa + diag[i] + 1; 642 vi = aj + diag[i] + 1; 643 nz = ai[i+1] - diag[i] - 1; 644 workt = work; 645 for (j=0; j<nz; j++) { 646 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 647 workt += bs; 648 } 649 arrt = arr; 650 if (T) { 651 for (j=0; j<nz; j++) { 652 ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 653 for (k=0; k<bs2; k++) arrt[k] *= v[j]; 654 arrt += bs2; 655 } 656 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 657 } else if (kaij->isTI) { 658 for (j=0; j<nz; j++) { 659 ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 660 for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 661 arrt += bs2; 662 } 663 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 664 } 665 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); 666 for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j); 667 } 668 idiag -= bs2; 669 i2 -= bs; 670 } 671 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 672 } 673 } 674 675 ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr); 676 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 677 PetscFunctionReturn(0); 678 } 679 680 /*===================================================================================*/ 681 682 PetscErrorCode KAIJMultAdd_MPI(Mat A,Vec xx,Vec yy,Vec zz) 683 { 684 Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data; 685 PetscErrorCode ierr; 686 687 PetscFunctionBegin; 688 if (!yy) { 689 ierr = VecSet(zz,0.0);CHKERRQ(ierr); 690 } else { 691 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 692 } 693 /* start the scatter */ 694 ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 695 ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,zz,zz);CHKERRQ(ierr); 696 ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 697 ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr); 698 PetscFunctionReturn(0); 699 } 700 701 PetscErrorCode MatMult_MPIKAIJ_dof(Mat A,Vec xx,Vec yy) 702 { 703 PetscErrorCode ierr; 704 PetscFunctionBegin; 705 ierr = KAIJMultAdd_MPI(A,xx,PETSC_NULL,yy);CHKERRQ(ierr); 706 PetscFunctionReturn(0); 707 } 708 709 PetscErrorCode MatMultAdd_MPIKAIJ_dof(Mat A,Vec xx,Vec yy, Vec zz) 710 { 711 PetscErrorCode ierr; 712 PetscFunctionBegin; 713 ierr = KAIJMultAdd_MPI(A,xx,yy,zz);CHKERRQ(ierr); 714 PetscFunctionReturn(0); 715 } 716 717 PetscErrorCode MatInvertBlockDiagonal_MPIKAIJ_dof(Mat A,const PetscScalar **values) 718 { 719 Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data; 720 PetscErrorCode ierr; 721 722 PetscFunctionBegin; 723 ierr = (*b->AIJ->ops->invertblockdiagonal)(b->AIJ,values);CHKERRQ(ierr); 724 PetscFunctionReturn(0); 725 } 726 727 /* ----------------------------------------------------------------*/ 728 729 PetscErrorCode MatGetRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values) 730 { 731 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*) A->data; 732 PetscErrorCode ierr,diag; 733 PetscInt nzaij,nz,*colsaij,*idx,i,j,p=b->p,q=b->q,r=row/p,s=row%p,c; 734 PetscScalar *vaij,*v,*S=b->S,*T=b->T; 735 736 PetscFunctionBegin; 737 if (b->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 738 b->getrowactive = PETSC_TRUE; 739 if (row < 0 || row >= A->rmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range",row); 740 741 if ((!S) && (!T) && (!b->isTI)) { 742 if (ncols) *ncols = 0; 743 if (cols) *cols = NULL; 744 if (values) *values = NULL; 745 PetscFunctionReturn(0); 746 } 747 748 if (T || b->isTI) { 749 ierr = MatGetRow_SeqAIJ(b->AIJ,r,&nzaij,&colsaij,&vaij);CHKERRQ(ierr); 750 diag = PETSC_FALSE; 751 c = nzaij; 752 for (i=0; i<nzaij; i++) { 753 /* check if this row contains a diagonal entry */ 754 if (colsaij[i] == r) { 755 diag = PETSC_TRUE; 756 c = i; 757 } 758 } 759 } else nzaij = c = 0; 760 761 /* calculate size of row */ 762 nz = 0; 763 if (S) nz += q; 764 if (T || b->isTI) nz += (diag && S ? (nzaij-1)*q : nzaij*q); 765 766 if (cols || values) { 767 ierr = PetscMalloc2(nz,&idx,nz,&v);CHKERRQ(ierr); 768 if (b->isTI) { 769 for (i=0; i<nzaij; i++) { 770 for (j=0; j<q; j++) { 771 idx[i*q+j] = colsaij[i]*q+j; 772 v[i*q+j] = (j==s ? vaij[i] : 0); 773 } 774 } 775 } else if (T) { 776 for (i=0; i<nzaij; i++) { 777 for (j=0; j<q; j++) { 778 idx[i*q+j] = colsaij[i]*q+j; 779 v[i*q+j] = vaij[i]*T[s+j*p]; 780 } 781 } 782 } 783 if (S) { 784 for (j=0; j<q; j++) { 785 idx[c*q+j] = r*q+j; 786 v[c*q+j] += S[s+j*p]; 787 } 788 } 789 } 790 791 if (ncols) *ncols = nz; 792 if (cols) *cols = idx; 793 if (values) *values = v; 794 795 PetscFunctionReturn(0); 796 } 797 798 PetscErrorCode MatRestoreRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 799 { 800 PetscErrorCode ierr; 801 PetscFunctionBegin; 802 ierr = PetscFree2(*idx,*v);CHKERRQ(ierr); 803 ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE; 804 PetscFunctionReturn(0); 805 } 806 807 PetscErrorCode MatGetRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values) 808 { 809 Mat_MPIKAIJ *b = (Mat_MPIKAIJ*) A->data; 810 Mat MatAIJ = ((Mat_SeqKAIJ*)b->AIJ->data)->AIJ; 811 Mat MatOAIJ = ((Mat_SeqKAIJ*)b->OAIJ->data)->AIJ; 812 Mat AIJ = b->A; 813 PetscErrorCode ierr; 814 const PetscInt rstart=A->rmap->rstart,rend=A->rmap->rend,p=b->p,q=b->q,*garray; 815 PetscInt nz,*idx,ncolsaij,ncolsoaij,*colsaij,*colsoaij,r,s,c,i,j,lrow; 816 PetscScalar *v,*vals,*ovals,*S=b->S,*T=b->T; 817 PetscBool diag; 818 819 PetscFunctionBegin; 820 if (b->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 821 b->getrowactive = PETSC_TRUE; 822 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 823 lrow = row - rstart; 824 825 if ((!S) && (!T) && (!b->isTI)) { 826 if (ncols) *ncols = 0; 827 if (cols) *cols = NULL; 828 if (values) *values = NULL; 829 PetscFunctionReturn(0); 830 } 831 832 r = lrow/p; 833 s = lrow%p; 834 835 if (T || b->isTI) { 836 ierr = MatMPIAIJGetSeqAIJ(AIJ,NULL,NULL,&garray); 837 ierr = MatGetRow_SeqAIJ(MatAIJ,lrow/p,&ncolsaij,&colsaij,&vals);CHKERRQ(ierr); 838 ierr = MatGetRow_SeqAIJ(MatOAIJ,lrow/p,&ncolsoaij,&colsoaij,&ovals);CHKERRQ(ierr); 839 840 diag = PETSC_FALSE; 841 c = ncolsaij + ncolsoaij; 842 for (i=0; i<ncolsaij; i++) { 843 /* check if this row contains a diagonal entry */ 844 if (colsaij[i] == r) { 845 diag = PETSC_TRUE; 846 c = i; 847 } 848 } 849 } else c = 0; 850 851 /* calculate size of row */ 852 nz = 0; 853 if (S) nz += q; 854 if (T || b->isTI) nz += (diag && S ? (ncolsaij+ncolsoaij-1)*q : (ncolsaij+ncolsoaij)*q); 855 856 if (cols || values) { 857 ierr = PetscMalloc2(nz,&idx,nz,&v);CHKERRQ(ierr); 858 if (b->isTI) { 859 for (i=0; i<ncolsaij; i++) { 860 for (j=0; j<q; j++) { 861 idx[i*q+j] = (colsaij[i]+rstart/p)*q+j; 862 v[i*q+j] = (j==s ? vals[i] : 0.0); 863 } 864 } 865 for (i=0; i<ncolsoaij; i++) { 866 for (j=0; j<q; j++) { 867 idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j; 868 v[(i+ncolsaij)*q+j] = (j==s ? ovals[i]: 0.0); 869 } 870 } 871 } else if (T) { 872 for (i=0; i<ncolsaij; i++) { 873 for (j=0; j<q; j++) { 874 idx[i*q+j] = (colsaij[i]+rstart/p)*q+j; 875 v[i*q+j] = vals[i]*T[s+j*p]; 876 } 877 } 878 for (i=0; i<ncolsoaij; i++) { 879 for (j=0; j<q; j++) { 880 idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j; 881 v[(i+ncolsaij)*q+j] = ovals[i]*T[s+j*p]; 882 } 883 } 884 } 885 if (S) { 886 for (j=0; j<q; j++) { 887 idx[c*q+j] = (r+rstart/p)*q+j; 888 v[c*q+j] += S[s+j*p]; 889 } 890 } 891 } 892 893 if (ncols) *ncols = nz; 894 if (cols) *cols = idx; 895 if (values) *values = v; 896 PetscFunctionReturn(0); 897 } 898 899 PetscErrorCode MatRestoreRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 900 { 901 PetscErrorCode ierr; 902 PetscFunctionBegin; 903 ierr = PetscFree2(*idx,*v);CHKERRQ(ierr); 904 ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE; 905 PetscFunctionReturn(0); 906 } 907 908 PetscErrorCode MatCreateSubMatrix_KAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat) 909 { 910 PetscErrorCode ierr; 911 Mat A; 912 913 PetscFunctionBegin; 914 ierr = MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr); 915 ierr = MatCreateSubMatrix(A,isrow,iscol,cll,newmat);CHKERRQ(ierr); 916 ierr = MatDestroy(&A);CHKERRQ(ierr); 917 PetscFunctionReturn(0); 918 } 919 920 /* ---------------------------------------------------------------------------------- */ 921 /*@C 922 MatCreateKAIJ - Creates a matrix type to be used for matrices of the following form: 923 924 [I \otimes S + A \otimes T] 925 926 where 927 S is a dense (p \times q) matrix 928 T is a dense (p \times q) matrix 929 A is an AIJ (n \times n) matrix 930 I is the identity matrix 931 The resulting matrix is (np \times nq) 932 933 The matrix type is based on MATSEQAIJ for a sequential matrix A, and MATMPIAIJ for a distributed matrix A. 934 S is always stored independently on all processes as a PetscScalar array in column-major format. 935 936 Collective 937 938 Input Parameters: 939 + A - the AIJ matrix 940 . S - the S matrix, stored as a PetscScalar array (column-major) 941 . T - the T matrix, stored as a PetscScalar array (column-major) 942 . p - number of rows in S and T 943 - q - number of columns in S and T 944 945 Output Parameter: 946 . kaij - the new KAIJ matrix 947 948 Operations provided: 949 + MatMult 950 . MatMultAdd 951 . MatInvertBlockDiagonal 952 - MatView 953 954 Level: advanced 955 956 .seealso: MatKAIJGetAIJ(), MATKAIJ 957 @*/ 958 PetscErrorCode MatCreateKAIJ(Mat A,PetscInt p,PetscInt q,const PetscScalar S[],const PetscScalar T[],Mat *kaij) 959 { 960 PetscErrorCode ierr; 961 PetscMPIInt size; 962 PetscInt n,i,j; 963 Mat B; 964 PetscBool isTI = PETSC_FALSE; 965 966 PetscFunctionBegin; 967 968 /* check if T is an identity matrix */ 969 if (T && (p == q)) { 970 isTI = PETSC_TRUE; 971 for (i=0; i<p; i++) { 972 for (j=0; j<q; j++) { 973 if (i == j) { 974 /* diagonal term must be 1 */ 975 if (T[i+j*p] != 1.0) isTI = PETSC_FALSE; 976 } else { 977 /* off-diagonal term must be 0 */ 978 if (T[i+j*p] != 0.0) isTI = PETSC_FALSE; 979 } 980 } 981 } 982 } 983 984 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 985 986 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 987 ierr = MatSetSizes(B,p*A->rmap->n,q*A->cmap->n,p*A->rmap->N,q*A->cmap->N);CHKERRQ(ierr); 988 ierr = PetscLayoutSetBlockSize(B->rmap,p);CHKERRQ(ierr); 989 ierr = PetscLayoutSetBlockSize(B->cmap,q);CHKERRQ(ierr); 990 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 991 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 992 993 B->assembled = PETSC_TRUE; 994 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 995 996 if (size == 1) { 997 Mat_SeqKAIJ *b; 998 999 ierr = MatSetType(B,MATSEQKAIJ);CHKERRQ(ierr); 1000 1001 B->ops->setup = NULL; 1002 B->ops->destroy = MatDestroy_SeqKAIJ; 1003 B->ops->view = MatView_SeqKAIJ; 1004 b = (Mat_SeqKAIJ*)B->data; 1005 b->p = p; 1006 b->q = q; 1007 b->AIJ = A; 1008 b->isTI = isTI; 1009 if (S) { 1010 ierr = PetscMalloc(p*q*sizeof(PetscScalar),&b->S);CHKERRQ(ierr); 1011 ierr = PetscMemcpy(b->S,S,p*q*sizeof(PetscScalar));CHKERRQ(ierr); 1012 } else b->S = NULL; 1013 if (T && (!isTI)) { 1014 ierr = PetscMalloc(p*q*sizeof(PetscScalar),&b->T);CHKERRQ(ierr); 1015 ierr = PetscMemcpy(b->T,T,p*q*sizeof(PetscScalar));CHKERRQ(ierr); 1016 } else b->T = NULL; 1017 1018 B->ops->mult = MatMult_SeqKAIJ_N; 1019 B->ops->multadd = MatMultAdd_SeqKAIJ_N; 1020 B->ops->invertblockdiagonal = MatInvertBlockDiagonal_SeqKAIJ_N; 1021 B->ops->getrow = MatGetRow_SeqKAIJ; 1022 B->ops->restorerow = MatRestoreRow_SeqKAIJ; 1023 B->ops->sor = MatSOR_SeqKAIJ; 1024 } else { 1025 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*)A->data; 1026 Mat_MPIKAIJ *b; 1027 IS from,to; 1028 Vec gvec; 1029 1030 ierr = MatSetType(B,MATMPIKAIJ);CHKERRQ(ierr); 1031 1032 B->ops->setup = NULL; 1033 B->ops->destroy = MatDestroy_MPIKAIJ; 1034 B->ops->view = MatView_MPIKAIJ; 1035 1036 b = (Mat_MPIKAIJ*)B->data; 1037 b->p = p; 1038 b->q = q; 1039 b->A = A; 1040 b->isTI = isTI; 1041 if (S) { 1042 ierr = PetscMalloc(p*q*sizeof(PetscScalar),&b->S);CHKERRQ(ierr); 1043 ierr = PetscMemcpy(b->S,S,p*q*sizeof(PetscScalar));CHKERRQ(ierr); 1044 } else b->S = NULL; 1045 if (T &&(!isTI)) { 1046 ierr = PetscMalloc(p*q*sizeof(PetscScalar),&b->T);CHKERRQ(ierr); 1047 ierr = PetscMemcpy(b->T,T,p*q*sizeof(PetscScalar));CHKERRQ(ierr); 1048 } else b->T = NULL; 1049 1050 ierr = MatCreateKAIJ(mpiaij->A,p,q,S ,T,&b->AIJ);CHKERRQ(ierr); 1051 ierr = MatCreateKAIJ(mpiaij->B,p,q,NULL,T,&b->OAIJ);CHKERRQ(ierr); 1052 1053 ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr); 1054 ierr = VecCreate(PETSC_COMM_SELF,&b->w);CHKERRQ(ierr); 1055 ierr = VecSetSizes(b->w,n*q,n*q);CHKERRQ(ierr); 1056 ierr = VecSetBlockSize(b->w,q);CHKERRQ(ierr); 1057 ierr = VecSetType(b->w,VECSEQ);CHKERRQ(ierr); 1058 1059 /* create two temporary Index sets for build scatter gather */ 1060 ierr = ISCreateBlock(PetscObjectComm((PetscObject)A),q,n,mpiaij->garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); 1061 ierr = ISCreateStride(PETSC_COMM_SELF,n*q,0,1,&to);CHKERRQ(ierr); 1062 1063 /* create temporary global vector to generate scatter context */ 1064 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),q,q*A->cmap->n,q*A->cmap->N,NULL,&gvec);CHKERRQ(ierr); 1065 1066 /* generate the scatter context */ 1067 ierr = VecScatterCreateWithData(gvec,from,b->w,to,&b->ctx);CHKERRQ(ierr); 1068 1069 ierr = ISDestroy(&from);CHKERRQ(ierr); 1070 ierr = ISDestroy(&to);CHKERRQ(ierr); 1071 ierr = VecDestroy(&gvec);CHKERRQ(ierr); 1072 1073 B->ops->mult = MatMult_MPIKAIJ_dof; 1074 B->ops->multadd = MatMultAdd_MPIKAIJ_dof; 1075 B->ops->invertblockdiagonal = MatInvertBlockDiagonal_MPIKAIJ_dof; 1076 B->ops->getrow = MatGetRow_MPIKAIJ; 1077 B->ops->restorerow = MatRestoreRow_MPIKAIJ; 1078 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIKAIJ);CHKERRQ(ierr); 1079 } 1080 B->ops->createsubmatrix = MatCreateSubMatrix_KAIJ; 1081 B->assembled = PETSC_TRUE; 1082 ierr = MatSetUp(B);CHKERRQ(ierr); 1083 *kaij = B; 1084 ierr = MatViewFromOptions(B,NULL,"-mat_view");CHKERRQ(ierr); 1085 1086 PetscFunctionReturn(0); 1087 } 1088