1 2 /* 3 Defines the basic matrix operations for the KAIJ matrix storage format. 4 This format is used to evaluate matrices of the form: 5 6 [I \otimes S + A \otimes T] 7 8 where 9 S is a dense (p \times q) matrix 10 T is a dense (p \times q) matrix 11 A is an AIJ (n \times n) matrix 12 I is the identity matrix 13 14 The resulting matrix is (np \times nq) 15 16 We provide: 17 MatMult() 18 MatMultAdd() 19 MatInvertBlockDiagonal() 20 and 21 MatCreateKAIJ(Mat,PetscInt,PetscInt,const PetscScalar[],const PetscScalar[],Mat*) 22 23 This single directory handles both the sequential and parallel codes 24 */ 25 26 #include <../src/mat/impls/kaij/kaij.h> /*I "petscmat.h" I*/ 27 #include <../src/mat/utils/freespace.h> 28 #include <petsc/private/vecimpl.h> 29 30 /*@C 31 MatKAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the KAIJ matrix 32 33 Not Collective, but if the KAIJ matrix is parallel, the AIJ matrix is also parallel 34 35 Input Parameter: 36 . A - the KAIJ matrix 37 38 Output Parameter: 39 . B - the AIJ matrix 40 41 Level: advanced 42 43 Notes: The reference count on the AIJ matrix is not increased so you should not destroy it. 44 45 .seealso: MatCreateKAIJ() 46 @*/ 47 PetscErrorCode MatKAIJGetAIJ(Mat A,Mat *B) 48 { 49 PetscErrorCode ierr; 50 PetscBool ismpikaij,isseqkaij; 51 52 PetscFunctionBegin; 53 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIKAIJ,&ismpikaij);CHKERRQ(ierr); 54 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQKAIJ,&isseqkaij);CHKERRQ(ierr); 55 if (ismpikaij) { 56 Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data; 57 58 *B = b->A; 59 } else if (isseqkaij) { 60 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 61 62 *B = b->AIJ; 63 } else { 64 *B = A; 65 } 66 PetscFunctionReturn(0); 67 } 68 69 /*@C 70 MatKAIJGetS - Get the S matrix describing the shift action of the KAIJ matrix 71 72 Not Collective, the entire S is stored and returned independently on all processes 73 74 Input Parameter: 75 . A - the KAIJ matrix 76 77 Output Parameter: 78 . S - the S matrix, in form of a scalar array in column-major format 79 80 Level: advanced 81 82 Notes: The reference count on the S matrix is not increased so you should not destroy it. 83 84 .seealso: MatCreateKAIJ() 85 @*/ 86 PetscErrorCode MatKAIJGetS(Mat A,const PetscScalar **S) 87 { 88 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 89 PetscFunctionBegin; 90 *S = b->S; 91 PetscFunctionReturn(0); 92 } 93 94 /*@C 95 MatKAIJGetT - Get the T matrix describing the shift action of the KAIJ matrix 96 97 Not Collective, the entire T is stored and returned independently on all processes 98 99 Input Parameter: 100 . A - the KAIJ matrix 101 102 Output Parameter: 103 . T - the T matrix, in form of a scalar array in column-major format 104 105 Level: advanced 106 107 Notes: The reference count on the T matrix is not increased so you should not destroy it. 108 109 .seealso: MatCreateKAIJ() 110 @*/ 111 PetscErrorCode MatKAIJGetT(Mat A,const PetscScalar **T) 112 { 113 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 114 PetscFunctionBegin; 115 *T = b->T; 116 PetscFunctionReturn(0); 117 } 118 119 PetscErrorCode MatDestroy_SeqKAIJ(Mat A) 120 { 121 PetscErrorCode ierr; 122 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 123 124 PetscFunctionBegin; 125 ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr); 126 ierr = PetscFree3(b->S,b->T,b->ibdiag);CHKERRQ(ierr); 127 ierr = PetscFree5(b->sor.w,b->sor.y,b->sor.work,b->sor.t,b->sor.arr);CHKERRQ(ierr); 128 ierr = PetscFree(A->data);CHKERRQ(ierr); 129 PetscFunctionReturn(0); 130 } 131 132 PetscErrorCode MatSetUp_KAIJ(Mat A) 133 { 134 PetscFunctionBegin; 135 SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Must use MatCreateKAIJ() to create KAIJ matrices"); 136 PetscFunctionReturn(0); 137 } 138 139 PetscErrorCode MatView_SeqKAIJ(Mat A,PetscViewer viewer) 140 { 141 PetscErrorCode ierr; 142 Mat B; 143 144 PetscFunctionBegin; 145 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 146 ierr = MatView(B,viewer);CHKERRQ(ierr); 147 ierr = MatDestroy(&B);CHKERRQ(ierr); 148 PetscFunctionReturn(0); 149 } 150 151 PetscErrorCode MatView_MPIKAIJ(Mat A,PetscViewer viewer) 152 { 153 PetscErrorCode ierr; 154 Mat B; 155 156 PetscFunctionBegin; 157 ierr = MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 158 ierr = MatView(B,viewer);CHKERRQ(ierr); 159 ierr = MatDestroy(&B);CHKERRQ(ierr); 160 PetscFunctionReturn(0); 161 } 162 163 PetscErrorCode MatDestroy_MPIKAIJ(Mat A) 164 { 165 PetscErrorCode ierr; 166 Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data; 167 168 PetscFunctionBegin; 169 ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr); 170 ierr = MatDestroy(&b->OAIJ);CHKERRQ(ierr); 171 ierr = MatDestroy(&b->A);CHKERRQ(ierr); 172 ierr = VecScatterDestroy(&b->ctx);CHKERRQ(ierr); 173 ierr = VecDestroy(&b->w);CHKERRQ(ierr); 174 ierr = PetscFree3(b->S,b->T,b->ibdiag);CHKERRQ(ierr); 175 ierr = PetscFree(A->data);CHKERRQ(ierr); 176 177 PetscFunctionReturn(0); 178 } 179 180 /*MC 181 MATKAIJ - MATKAIJ = "kaij" - A matrix type to be used to evaluate matrices of the following form: 182 183 [I \otimes S + A \otimes T] 184 185 where 186 S is a dense (p \times q) matrix 187 T is a dense (p \times q) matrix 188 A is an AIJ (n \times n) matrix 189 I is the identity matrix 190 The resulting matrix is (np \times nq) 191 192 The matrix type is based on MATSEQAIJ for a sequential matrix A, and MATMPIAIJ for a distributed matrix A. 193 S and T are always stored independently on all processes as a PetscScalar array in column-major format. 194 195 Operations provided: 196 . MatMult 197 . MatMultAdd 198 . MatInvertBlockDiagonal 199 200 Level: advanced 201 202 .seealso: MatKAIJGetAIJ(), MatKAIJGetS(), MatKAIJGetT(), MatCreateKAIJ() 203 M*/ 204 205 PETSC_EXTERN PetscErrorCode MatCreate_KAIJ(Mat A) 206 { 207 PetscErrorCode ierr; 208 Mat_MPIKAIJ *b; 209 PetscMPIInt size; 210 211 PetscFunctionBegin; 212 ierr = PetscNewLog(A,&b);CHKERRQ(ierr); 213 A->data = (void*)b; 214 215 ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 216 217 A->ops->setup = MatSetUp_KAIJ; 218 219 b->w = 0; 220 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 221 if (size == 1) { 222 ierr = PetscObjectChangeTypeName((PetscObject)A,MATSEQKAIJ);CHKERRQ(ierr); 223 } else { 224 ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIKAIJ);CHKERRQ(ierr); 225 } 226 PetscFunctionReturn(0); 227 } 228 229 /* --------------------------------------------------------------------------------------*/ 230 231 /* zz = yy + Axx */ 232 PetscErrorCode KAIJMultAdd_Seq(Mat A,Vec xx,Vec yy,Vec zz) 233 { 234 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 235 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 236 const PetscScalar *s = b->S, *t = b->T; 237 const PetscScalar *x,*v,*bx; 238 PetscScalar *y,*sums; 239 PetscErrorCode ierr; 240 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 241 PetscInt n,i,jrow,j,l,p=b->p,q=b->q,k; 242 243 PetscFunctionBegin; 244 245 if (!yy) { 246 ierr = VecSet(zz,0.0);CHKERRQ(ierr); 247 } else { 248 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 249 } 250 if ((!s) && (!t) && (!b->isTI)) PetscFunctionReturn(0); 251 252 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 253 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 254 idx = a->j; 255 v = a->a; 256 ii = a->i; 257 258 if (b->isTI) { 259 for (i=0; i<m; i++) { 260 jrow = ii[i]; 261 n = ii[i+1] - jrow; 262 sums = y + p*i; 263 for (j=0; j<n; j++) { 264 for (k=0; k<p; k++) { 265 sums[k] += v[jrow+j]*x[q*idx[jrow+j]+k]; 266 } 267 } 268 } 269 } else if (t) { 270 for (i=0; i<m; i++) { 271 jrow = ii[i]; 272 n = ii[i+1] - jrow; 273 sums = y + p*i; 274 bx = x + q*i; 275 for (j=0; j<n; j++) { 276 for (k=0; k<p; k++) { 277 for (l=0; l<q; l++) { 278 sums[k] += v[jrow+j]*t[k+l*p]*x[q*idx[jrow+j]+l]; 279 } 280 } 281 } 282 } 283 } 284 if (s) { 285 for (i=0; i<m; i++) { 286 jrow = ii[i]; 287 n = ii[i+1] - jrow; 288 sums = y + p*i; 289 bx = x + q*i; 290 if (i < b->AIJ->cmap->n) { 291 for (j=0; j<q; j++) { 292 for (k=0; k<p; k++) { 293 sums[k] += s[k+j*p]*bx[j]; 294 } 295 } 296 } 297 } 298 } 299 300 ierr = PetscLogFlops((2.0*p*q-p)*m+2*p*a->nz);CHKERRQ(ierr); 301 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 302 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 303 PetscFunctionReturn(0); 304 } 305 306 PetscErrorCode MatMult_SeqKAIJ_N(Mat A,Vec xx,Vec yy) 307 { 308 PetscErrorCode ierr; 309 PetscFunctionBegin; 310 ierr = KAIJMultAdd_Seq(A,xx,PETSC_NULL,yy);CHKERRQ(ierr); 311 PetscFunctionReturn(0); 312 } 313 314 PetscErrorCode MatMultAdd_SeqKAIJ_N(Mat A,Vec xx,Vec yy,Vec zz) 315 { 316 PetscErrorCode ierr; 317 PetscFunctionBegin; 318 ierr = KAIJMultAdd_Seq(A,xx,yy,zz);CHKERRQ(ierr); 319 PetscFunctionReturn(0); 320 } 321 322 #include <petsc/private/kernels/blockinvert.h> 323 324 PetscErrorCode MatInvertBlockDiagonal_SeqKAIJ_N(Mat A,const PetscScalar **values) 325 { 326 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 327 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 328 const PetscScalar *S = b->S; 329 const PetscScalar *T = b->T; 330 const PetscScalar *v = a->a; 331 const PetscInt p = b->p, q = b->q, m = b->AIJ->rmap->n, *idx = a->j, *ii = a->i; 332 PetscErrorCode ierr; 333 PetscInt i,j,*v_pivots,dof,dof2; 334 PetscScalar *diag,aval,*v_work; 335 336 PetscFunctionBegin; 337 if (p != q) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Block size must be square to calculate inverse."); 338 if ((!S) && (!T) && (!b->isTI)) { 339 SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Cannot invert a zero matrix."); 340 } 341 342 dof = p; 343 dof2 = dof*dof; 344 345 if (b->ibdiagvalid) { 346 if (values) *values = b->ibdiag; 347 PetscFunctionReturn(0); 348 } 349 if (!b->ibdiag) { 350 ierr = PetscMalloc(dof2*m*sizeof(PetscScalar),&b->ibdiag);CHKERRQ(ierr); 351 ierr = PetscLogObjectMemory((PetscObject)A,dof2*m*sizeof(PetscScalar));CHKERRQ(ierr); 352 } 353 if (values) *values = b->ibdiag; 354 diag = b->ibdiag; 355 356 ierr = PetscMalloc2(dof,&v_work,dof,&v_pivots);CHKERRQ(ierr); 357 for (i=0; i<m; i++) { 358 if (S) { 359 ierr = PetscMemcpy(diag,S,dof2*sizeof(PetscScalar));CHKERRQ(ierr); 360 } else { 361 ierr = PetscMemzero(diag,dof2*sizeof(PetscScalar));CHKERRQ(ierr); 362 } 363 if (b->isTI) { 364 aval = 0; 365 for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j]; 366 for (j=0; j<dof; j++) diag[j+dof*j] += aval; 367 } else if (T) { 368 aval = 0; 369 for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j]; 370 for (j=0; j<dof2; j++) diag[j] += aval*T[j]; 371 } 372 ierr = PetscKernel_A_gets_inverse_A(dof,diag,v_pivots,v_work,PETSC_FALSE,NULL);CHKERRQ(ierr); 373 diag += dof2; 374 } 375 ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr); 376 377 b->ibdiagvalid = PETSC_TRUE; 378 PetscFunctionReturn(0); 379 } 380 381 static PetscErrorCode MatGetDiagonalBlock_MPIKAIJ(Mat A,Mat *B) 382 { 383 Mat_MPIKAIJ *kaij = (Mat_MPIKAIJ*) A->data; 384 385 PetscFunctionBegin; 386 *B = kaij->AIJ; 387 PetscFunctionReturn(0); 388 } 389 390 PetscErrorCode MatSOR_SeqKAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 391 { 392 PetscErrorCode ierr; 393 Mat_SeqKAIJ *kaij = (Mat_SeqKAIJ*) A->data; 394 Mat_SeqAIJ *a = (Mat_SeqAIJ*)kaij->AIJ->data; 395 const PetscScalar *aa = a->a, *T = kaij->T, *v; 396 const PetscInt m = kaij->AIJ->rmap->n, *ai=a->i, *aj=a->j, p = kaij->p, q = kaij->q, *diag, *vi; 397 const PetscScalar *b, *xb, *idiag; 398 PetscScalar *x, *work, *workt, *w, *y, *arr, *t, *arrt; 399 PetscInt i, j, k, i2, bs, bs2, nz; 400 401 402 PetscFunctionBegin; 403 its = its*lits; 404 if (flag & SOR_EISENSTAT) SETERRQ (PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 405 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 406 if (fshift) SETERRQ (PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 407 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) 408 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 409 if (p != q) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSOR for KAIJ: Sorry, no support for non-square dense blocks"); 410 else {bs = p; bs2 = bs*bs; } 411 412 if (!m) PetscFunctionReturn(0); 413 414 if (!kaij->ibdiagvalid) { ierr = MatInvertBlockDiagonal_SeqKAIJ_N(A,NULL);CHKERRQ(ierr); } 415 idiag = kaij->ibdiag; 416 diag = a->diag; 417 418 if (!kaij->sor.setup) { 419 ierr = PetscMalloc5(bs,&kaij->sor.w,bs,&kaij->sor.y,m*bs,&kaij->sor.work,m*bs,&kaij->sor.t,m*bs2,&kaij->sor.arr);CHKERRQ(ierr); 420 kaij->sor.setup = PETSC_TRUE; 421 } 422 y = kaij->sor.y; 423 w = kaij->sor.w; 424 work = kaij->sor.work; 425 t = kaij->sor.t; 426 arr = kaij->sor.arr; 427 428 ierr = VecGetArray(xx,&x); CHKERRQ(ierr); 429 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 430 431 if (flag & SOR_ZERO_INITIAL_GUESS) { 432 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 433 PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x); /* x[0:bs] <- D^{-1} b[0:bs] */ 434 ierr = PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr); 435 i2 = bs; 436 idiag += bs2; 437 for (i=1; i<m; i++) { 438 v = aa + ai[i]; 439 vi = aj + ai[i]; 440 nz = diag[i] - ai[i]; 441 442 if (T) { /* b - T (Arow * x) */ 443 for (k=0; k<bs; k++) w[k] = 0; 444 for (j=0; j<nz; j++) { 445 for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k]; 446 } 447 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs,w,T,&t[i2]); 448 for (k=0; k<bs; k++) t[i2+k] += b[i2+k]; 449 } else if (kaij->isTI) { 450 for (k=0; k<bs; k++) t[i2+k] = b[i2+k]; 451 for (j=0; j<nz; j++) { 452 for (k=0; k<bs; k++) t[i2+k] -= v[j] * x[vi[j]*bs+k]; 453 } 454 } else { 455 for (k=0; k<bs; k++) t[i2+k] = b[i2+k]; 456 } 457 458 PetscKernel_w_gets_Ar_times_v(bs,bs,t+i2,idiag,y); 459 for (j=0; j<bs; j++) x[i2+j] = omega * y[j]; 460 461 idiag += bs2; 462 i2 += bs; 463 } 464 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 465 ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr); 466 xb = t; 467 } else xb = b; 468 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 469 idiag = kaij->ibdiag+bs2*(m-1); 470 i2 = bs * (m-1); 471 ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 472 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 473 i2 -= bs; 474 idiag -= bs2; 475 for (i=m-2; i>=0; i--) { 476 v = aa + diag[i] + 1 ; 477 vi = aj + diag[i] + 1; 478 nz = ai[i+1] - diag[i] - 1; 479 480 if (T) { /* FIXME: This branch untested */ 481 ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 482 /* copy all rows of x that are needed into contiguous space */ 483 workt = work; 484 for (j=0; j<nz; j++) { 485 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 486 workt += bs; 487 } 488 arrt = arr; 489 for (j=0; j<nz; j++) { 490 ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 491 for (k=0; k<bs2; k++) arrt[k] *= v[j]; 492 arrt += bs2; 493 } 494 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 495 } else if (kaij->isTI) { 496 for (k=0; k<bs; k++) w[k] = t[i2+k]; 497 for (j=0; j<nz; j++) { 498 for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k]; 499 } 500 } 501 502 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); /* RHS incorrect for omega != 1.0 */ 503 for (j=0; j<bs; j++) x[i2+j] = (1.0-omega) * x[i2+j] + omega * y[j]; 504 505 idiag -= bs2; 506 i2 -= bs; 507 } 508 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 509 } 510 its--; 511 } 512 while (its--) { /* FIXME: This branch not updated */ 513 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 514 i2 = 0; 515 idiag = kaij->ibdiag; 516 for (i=0; i<m; i++) { 517 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 518 519 v = aa + ai[i]; 520 vi = aj + ai[i]; 521 nz = diag[i] - ai[i]; 522 workt = work; 523 for (j=0; j<nz; j++) { 524 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 525 workt += bs; 526 } 527 arrt = arr; 528 if (T) { 529 for (j=0; j<nz; j++) { 530 ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 531 for (k=0; k<bs2; k++) arrt[k] *= v[j]; 532 arrt += bs2; 533 } 534 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 535 } else if (kaij->isTI) { 536 for (j=0; j<nz; j++) { 537 ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 538 for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 539 arrt += bs2; 540 } 541 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 542 } 543 ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr); 544 545 v = aa + diag[i] + 1; 546 vi = aj + diag[i] + 1; 547 nz = ai[i+1] - diag[i] - 1; 548 workt = work; 549 for (j=0; j<nz; j++) { 550 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 551 workt += bs; 552 } 553 arrt = arr; 554 if (T) { 555 for (j=0; j<nz; j++) { 556 ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 557 for (k=0; k<bs2; k++) arrt[k] *= v[j]; 558 arrt += bs2; 559 } 560 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 561 } else if (kaij->isTI) { 562 for (j=0; j<nz; j++) { 563 ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 564 for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 565 arrt += bs2; 566 } 567 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 568 } 569 570 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); 571 for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j); 572 573 idiag += bs2; 574 i2 += bs; 575 } 576 xb = t; 577 } 578 else xb = b; 579 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 580 idiag = kaij->ibdiag+bs2*(m-1); 581 i2 = bs * (m-1); 582 if (xb == b) { 583 for (i=m-1; i>=0; i--) { 584 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 585 586 v = aa + ai[i]; 587 vi = aj + ai[i]; 588 nz = diag[i] - ai[i]; 589 workt = work; 590 for (j=0; j<nz; j++) { 591 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 592 workt += bs; 593 } 594 arrt = arr; 595 if (T) { 596 for (j=0; j<nz; j++) { 597 ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 598 for (k=0; k<bs2; k++) arrt[k] *= v[j]; 599 arrt += bs2; 600 } 601 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 602 } else if (kaij->isTI) { 603 for (j=0; j<nz; j++) { 604 ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 605 for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 606 arrt += bs2; 607 } 608 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 609 } 610 611 v = aa + diag[i] + 1; 612 vi = aj + diag[i] + 1; 613 nz = ai[i+1] - diag[i] - 1; 614 workt = work; 615 for (j=0; j<nz; j++) { 616 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 617 workt += bs; 618 } 619 arrt = arr; 620 if (T) { 621 for (j=0; j<nz; j++) { 622 ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 623 for (k=0; k<bs2; k++) arrt[k] *= v[j]; 624 arrt += bs2; 625 } 626 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 627 } else if (kaij->isTI) { 628 for (j=0; j<nz; j++) { 629 ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 630 for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 631 arrt += bs2; 632 } 633 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 634 } 635 636 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); 637 for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j); 638 } 639 } else { 640 for (i=m-1; i>=0; i--) { 641 ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 642 v = aa + diag[i] + 1; 643 vi = aj + diag[i] + 1; 644 nz = ai[i+1] - diag[i] - 1; 645 workt = work; 646 for (j=0; j<nz; j++) { 647 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 648 workt += bs; 649 } 650 arrt = arr; 651 if (T) { 652 for (j=0; j<nz; j++) { 653 ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 654 for (k=0; k<bs2; k++) arrt[k] *= v[j]; 655 arrt += bs2; 656 } 657 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 658 } else if (kaij->isTI) { 659 for (j=0; j<nz; j++) { 660 ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 661 for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 662 arrt += bs2; 663 } 664 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 665 } 666 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); 667 for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j); 668 } 669 idiag -= bs2; 670 i2 -= bs; 671 } 672 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 673 } 674 } 675 676 ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr); 677 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 678 679 PetscFunctionReturn(0); 680 } 681 682 /*===================================================================================*/ 683 684 PetscErrorCode KAIJMultAdd_MPI(Mat A,Vec xx,Vec yy,Vec zz) 685 { 686 Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data; 687 PetscErrorCode ierr; 688 689 PetscFunctionBegin; 690 if (!yy) { 691 ierr = VecSet(zz,0.0);CHKERRQ(ierr); 692 } else { 693 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 694 } 695 /* start the scatter */ 696 ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 697 ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,zz,zz);CHKERRQ(ierr); 698 ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 699 ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr); 700 PetscFunctionReturn(0); 701 } 702 703 PetscErrorCode MatMult_MPIKAIJ_dof(Mat A,Vec xx,Vec yy) 704 { 705 PetscErrorCode ierr; 706 PetscFunctionBegin; 707 ierr = KAIJMultAdd_MPI(A,xx,PETSC_NULL,yy);CHKERRQ(ierr); 708 PetscFunctionReturn(0); 709 } 710 711 PetscErrorCode MatMultAdd_MPIKAIJ_dof(Mat A,Vec xx,Vec yy, Vec zz) 712 { 713 PetscErrorCode ierr; 714 PetscFunctionBegin; 715 ierr = KAIJMultAdd_MPI(A,xx,yy,zz);CHKERRQ(ierr); 716 PetscFunctionReturn(0); 717 } 718 719 PetscErrorCode MatInvertBlockDiagonal_MPIKAIJ_dof(Mat A,const PetscScalar **values) 720 { 721 Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data; 722 PetscErrorCode ierr; 723 724 PetscFunctionBegin; 725 ierr = (*b->AIJ->ops->invertblockdiagonal)(b->AIJ,values);CHKERRQ(ierr); 726 PetscFunctionReturn(0); 727 } 728 729 /* ----------------------------------------------------------------*/ 730 731 PetscErrorCode MatGetRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values) 732 { 733 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*) A->data; 734 PetscErrorCode ierr,diag; 735 PetscInt nzaij,nz,*colsaij,*idx,i,j,p=b->p,q=b->q,r=row/p,s=row%p,c; 736 PetscScalar *vaij,*v,*S=b->S,*T=b->T; 737 738 PetscFunctionBegin; 739 if (b->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 740 b->getrowactive = PETSC_TRUE; 741 if (row < 0 || row >= A->rmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range",row); 742 743 if ((!S) && (!T) && (!b->isTI)) { 744 if (ncols) *ncols = 0; 745 if (cols) *cols = NULL; 746 if (values) *values = NULL; 747 PetscFunctionReturn(0); 748 } 749 750 if (T || b->isTI) { 751 ierr = MatGetRow_SeqAIJ(b->AIJ,r,&nzaij,&colsaij,&vaij);CHKERRQ(ierr); 752 diag = PETSC_FALSE; 753 c = nzaij; 754 for (i=0; i<nzaij; i++) { 755 /* check if this row contains a diagonal entry */ 756 if (colsaij[i] == r) { 757 diag = PETSC_TRUE; 758 c = i; 759 } 760 } 761 } else nzaij = c = 0; 762 763 /* calculate size of row */ 764 nz = 0; 765 if (S) nz += q; 766 if (T || b->isTI) nz += (diag && S ? (nzaij-1)*q : nzaij*q); 767 768 if (cols || values) { 769 ierr = PetscMalloc2(nz,&idx,nz,&v);CHKERRQ(ierr); 770 if (b->isTI) { 771 for (i=0; i<nzaij; i++) { 772 for (j=0; j<q; j++) { 773 idx[i*q+j] = colsaij[i]*q+j; 774 v[i*q+j] = (j==s ? vaij[i] : 0); 775 } 776 } 777 } else if (T) { 778 for (i=0; i<nzaij; i++) { 779 for (j=0; j<q; j++) { 780 idx[i*q+j] = colsaij[i]*q+j; 781 v[i*q+j] = vaij[i]*T[s+j*p]; 782 } 783 } 784 } 785 if (S) { 786 for (j=0; j<q; j++) { 787 idx[c*q+j] = r*q+j; 788 v[c*q+j] += S[s+j*p]; 789 } 790 } 791 } 792 793 if (ncols) *ncols = nz; 794 if (cols) *cols = idx; 795 if (values) *values = v; 796 797 PetscFunctionReturn(0); 798 } 799 800 PetscErrorCode MatRestoreRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 801 { 802 PetscErrorCode ierr; 803 PetscFunctionBegin; 804 ierr = PetscFree2(*idx,*v);CHKERRQ(ierr); 805 ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE; 806 PetscFunctionReturn(0); 807 } 808 809 PetscErrorCode MatGetRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values) 810 { 811 Mat_MPIKAIJ *b = (Mat_MPIKAIJ*) A->data; 812 Mat MatAIJ = ((Mat_SeqKAIJ*)b->AIJ->data)->AIJ; 813 Mat MatOAIJ = ((Mat_SeqKAIJ*)b->OAIJ->data)->AIJ; 814 Mat AIJ = b->A; 815 PetscErrorCode ierr; 816 const PetscInt rstart=A->rmap->rstart,rend=A->rmap->rend,p=b->p,q=b->q,*garray; 817 PetscInt nz,*idx,ncolsaij,ncolsoaij,*colsaij,*colsoaij,r,s,c,i,j,lrow; 818 PetscScalar *v,*vals,*ovals,*S=b->S,*T=b->T; 819 PetscBool diag; 820 821 PetscFunctionBegin; 822 if (b->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 823 b->getrowactive = PETSC_TRUE; 824 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 825 lrow = row - rstart; 826 827 if ((!S) && (!T) && (!b->isTI)) { 828 if (ncols) *ncols = 0; 829 if (cols) *cols = NULL; 830 if (values) *values = NULL; 831 PetscFunctionReturn(0); 832 } 833 834 r = lrow/p; 835 s = lrow%p; 836 837 if (T || b->isTI) { 838 ierr = MatMPIAIJGetSeqAIJ(AIJ,NULL,NULL,&garray); 839 ierr = MatGetRow_SeqAIJ(MatAIJ,lrow/p,&ncolsaij,&colsaij,&vals);CHKERRQ(ierr); 840 ierr = MatGetRow_SeqAIJ(MatOAIJ,lrow/p,&ncolsoaij,&colsoaij,&ovals);CHKERRQ(ierr); 841 842 diag = PETSC_FALSE; 843 c = ncolsaij + ncolsoaij; 844 for (i=0; i<ncolsaij; i++) { 845 /* check if this row contains a diagonal entry */ 846 if (colsaij[i] == r) { 847 diag = PETSC_TRUE; 848 c = i; 849 } 850 } 851 } else c = 0; 852 853 /* calculate size of row */ 854 nz = 0; 855 if (S) nz += q; 856 if (T || b->isTI) nz += (diag && S ? (ncolsaij+ncolsoaij-1)*q : (ncolsaij+ncolsoaij)*q); 857 858 if (cols || values) { 859 ierr = PetscMalloc2(nz,&idx,nz,&v);CHKERRQ(ierr); 860 if (b->isTI) { 861 for (i=0; i<ncolsaij; i++) { 862 for (j=0; j<q; j++) { 863 idx[i*q+j] = (colsaij[i]+rstart/p)*q+j; 864 v[i*q+j] = (j==s ? vals[i] : 0.0); 865 } 866 } 867 for (i=0; i<ncolsoaij; i++) { 868 for (j=0; j<q; j++) { 869 idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j; 870 v[(i+ncolsaij)*q+j] = (j==s ? ovals[i]: 0.0); 871 } 872 } 873 } else if (T) { 874 for (i=0; i<ncolsaij; i++) { 875 for (j=0; j<q; j++) { 876 idx[i*q+j] = (colsaij[i]+rstart/p)*q+j; 877 v[i*q+j] = vals[i]*T[s+j*p]; 878 } 879 } 880 for (i=0; i<ncolsoaij; i++) { 881 for (j=0; j<q; j++) { 882 idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j; 883 v[(i+ncolsaij)*q+j] = ovals[i]*T[s+j*p]; 884 } 885 } 886 } 887 if (S) { 888 for (j=0; j<q; j++) { 889 idx[c*q+j] = (r+rstart/p)*q+j; 890 v[c*q+j] += S[s+j*p]; 891 } 892 } 893 } 894 895 if (ncols) *ncols = nz; 896 if (cols) *cols = idx; 897 if (values) *values = v; 898 PetscFunctionReturn(0); 899 } 900 901 PetscErrorCode MatRestoreRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 902 { 903 PetscErrorCode ierr; 904 PetscFunctionBegin; 905 ierr = PetscFree2(*idx,*v);CHKERRQ(ierr); 906 ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE; 907 PetscFunctionReturn(0); 908 } 909 910 PetscErrorCode MatCreateSubMatrix_KAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat) 911 { 912 PetscErrorCode ierr; 913 Mat A; 914 915 PetscFunctionBegin; 916 ierr = MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr); 917 ierr = MatCreateSubMatrix(A,isrow,iscol,cll,newmat);CHKERRQ(ierr); 918 ierr = MatDestroy(&A);CHKERRQ(ierr); 919 PetscFunctionReturn(0); 920 } 921 922 /* ---------------------------------------------------------------------------------- */ 923 /*@C 924 MatCreateKAIJ - Creates a matrix type to be used for matrices of the following form: 925 926 [I \otimes S + A \otimes T] 927 928 where 929 S is a dense (p \times q) matrix 930 T is a dense (p \times q) matrix 931 A is an AIJ (n \times n) matrix 932 I is the identity matrix 933 The resulting matrix is (np \times nq) 934 935 The matrix type is based on MATSEQAIJ for a sequential matrix A, and MATMPIAIJ for a distributed matrix A. 936 S is always stored independently on all processes as a PetscScalar array in column-major format. 937 938 Collective 939 940 Input Parameters: 941 + A - the AIJ matrix 942 . S - the S matrix, stored as a PetscScalar array (column-major) 943 . T - the T matrix, stored as a PetscScalar array (column-major) 944 . p - number of rows in S and T 945 - q - number of columns in S and T 946 947 Output Parameter: 948 . kaij - the new KAIJ matrix 949 950 Operations provided: 951 + MatMult 952 . MatMultAdd 953 . MatInvertBlockDiagonal 954 - MatView 955 956 Level: advanced 957 958 .seealso: MatKAIJGetAIJ(), MATKAIJ 959 @*/ 960 PetscErrorCode MatCreateKAIJ(Mat A,PetscInt p,PetscInt q,const PetscScalar S[],const PetscScalar T[],Mat *kaij) 961 { 962 PetscErrorCode ierr; 963 PetscMPIInt size; 964 PetscInt n,i,j; 965 Mat B; 966 PetscBool isTI = PETSC_FALSE; 967 968 PetscFunctionBegin; 969 970 /* check if T is an identity matrix */ 971 if (T && (p == q)) { 972 isTI = PETSC_TRUE; 973 for (i=0; i<p; i++) { 974 for (j=0; j<q; j++) { 975 if (i == j) { 976 /* diagonal term must be 1 */ 977 if (T[i+j*p] != 1.0) isTI = PETSC_FALSE; 978 } else { 979 /* off-diagonal term must be 0 */ 980 if (T[i+j*p] != 0.0) isTI = PETSC_FALSE; 981 } 982 } 983 } 984 } 985 986 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 987 988 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 989 ierr = MatSetSizes(B,p*A->rmap->n,q*A->cmap->n,p*A->rmap->N,q*A->cmap->N);CHKERRQ(ierr); 990 ierr = PetscLayoutSetBlockSize(B->rmap,p);CHKERRQ(ierr); 991 ierr = PetscLayoutSetBlockSize(B->cmap,q);CHKERRQ(ierr); 992 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 993 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 994 995 B->assembled = PETSC_TRUE; 996 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 997 998 if (size == 1) { 999 Mat_SeqKAIJ *b; 1000 1001 ierr = MatSetType(B,MATSEQKAIJ);CHKERRQ(ierr); 1002 1003 B->ops->setup = NULL; 1004 B->ops->destroy = MatDestroy_SeqKAIJ; 1005 B->ops->view = MatView_SeqKAIJ; 1006 b = (Mat_SeqKAIJ*)B->data; 1007 b->p = p; 1008 b->q = q; 1009 b->AIJ = A; 1010 b->isTI = isTI; 1011 if (S) { 1012 ierr = PetscMalloc(p*q*sizeof(PetscScalar),&b->S);CHKERRQ(ierr); 1013 ierr = PetscMemcpy(b->S,S,p*q*sizeof(PetscScalar));CHKERRQ(ierr); 1014 } else b->S = NULL; 1015 if (T && (!isTI)) { 1016 ierr = PetscMalloc(p*q*sizeof(PetscScalar),&b->T);CHKERRQ(ierr); 1017 ierr = PetscMemcpy(b->T,T,p*q*sizeof(PetscScalar));CHKERRQ(ierr); 1018 } else b->T = NULL; 1019 1020 B->ops->mult = MatMult_SeqKAIJ_N; 1021 B->ops->multadd = MatMultAdd_SeqKAIJ_N; 1022 B->ops->invertblockdiagonal = MatInvertBlockDiagonal_SeqKAIJ_N; 1023 B->ops->getrow = MatGetRow_SeqKAIJ; 1024 B->ops->restorerow = MatRestoreRow_SeqKAIJ; 1025 B->ops->sor = MatSOR_SeqKAIJ; 1026 1027 /* 1028 This is commented out since MATKAIJ will use the basic MatConvert (which uses MatGetRow()). 1029 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqkaij_seqaij_C",MatConvert_SeqKAIJ_SeqAIJ);CHKERRQ(ierr); 1030 */ 1031 1032 } else { 1033 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*)A->data; 1034 Mat_MPIKAIJ *b; 1035 IS from,to; 1036 Vec gvec; 1037 1038 ierr = MatSetType(B,MATMPIKAIJ);CHKERRQ(ierr); 1039 1040 B->ops->setup = NULL; 1041 B->ops->destroy = MatDestroy_MPIKAIJ; 1042 B->ops->view = MatView_MPIKAIJ; 1043 1044 b = (Mat_MPIKAIJ*)B->data; 1045 b->p = p; 1046 b->q = q; 1047 b->A = A; 1048 b->isTI = isTI; 1049 if (S) { 1050 ierr = PetscMalloc(p*q*sizeof(PetscScalar),&b->S);CHKERRQ(ierr); 1051 ierr = PetscMemcpy(b->S,S,p*q*sizeof(PetscScalar));CHKERRQ(ierr); 1052 } else b->S = NULL; 1053 if (T &&(!isTI)) { 1054 ierr = PetscMalloc(p*q*sizeof(PetscScalar),&b->T);CHKERRQ(ierr); 1055 ierr = PetscMemcpy(b->T,T,p*q*sizeof(PetscScalar));CHKERRQ(ierr); 1056 } else b->T = NULL; 1057 1058 ierr = MatCreateKAIJ(mpiaij->A,p,q,S ,T,&b->AIJ);CHKERRQ(ierr); 1059 ierr = MatCreateKAIJ(mpiaij->B,p,q,NULL,T,&b->OAIJ);CHKERRQ(ierr); 1060 1061 ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr); 1062 ierr = VecCreate(PETSC_COMM_SELF,&b->w);CHKERRQ(ierr); 1063 ierr = VecSetSizes(b->w,n*q,n*q);CHKERRQ(ierr); 1064 ierr = VecSetBlockSize(b->w,q);CHKERRQ(ierr); 1065 ierr = VecSetType(b->w,VECSEQ);CHKERRQ(ierr); 1066 1067 /* create two temporary Index sets for build scatter gather */ 1068 ierr = ISCreateBlock(PetscObjectComm((PetscObject)A),q,n,mpiaij->garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); 1069 ierr = ISCreateStride(PETSC_COMM_SELF,n*q,0,1,&to);CHKERRQ(ierr); 1070 1071 /* create temporary global vector to generate scatter context */ 1072 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),q,q*A->cmap->n,q*A->cmap->N,NULL,&gvec);CHKERRQ(ierr); 1073 1074 /* generate the scatter context */ 1075 ierr = VecScatterCreateWithData(gvec,from,b->w,to,&b->ctx);CHKERRQ(ierr); 1076 1077 ierr = ISDestroy(&from);CHKERRQ(ierr); 1078 ierr = ISDestroy(&to);CHKERRQ(ierr); 1079 ierr = VecDestroy(&gvec);CHKERRQ(ierr); 1080 1081 B->ops->mult = MatMult_MPIKAIJ_dof; 1082 B->ops->multadd = MatMultAdd_MPIKAIJ_dof; 1083 B->ops->invertblockdiagonal = MatInvertBlockDiagonal_MPIKAIJ_dof; 1084 B->ops->getrow = MatGetRow_MPIKAIJ; 1085 B->ops->restorerow = MatRestoreRow_MPIKAIJ; 1086 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIKAIJ);CHKERRQ(ierr); 1087 } 1088 B->ops->createsubmatrix = MatCreateSubMatrix_KAIJ; 1089 B->assembled = PETSC_TRUE; 1090 ierr = MatSetUp(B);CHKERRQ(ierr); 1091 *kaij = B; 1092 ierr = MatViewFromOptions(B,NULL,"-mat_view");CHKERRQ(ierr); 1093 1094 PetscFunctionReturn(0); 1095 } 1096