1*49bd79ccSDebojyoti Ghosh 2*49bd79ccSDebojyoti Ghosh /* 3*49bd79ccSDebojyoti Ghosh Defines the basic matrix operations for the KAIJ matrix storage format. 4*49bd79ccSDebojyoti Ghosh This format is used to evaluate matrices of the form: 5*49bd79ccSDebojyoti Ghosh 6*49bd79ccSDebojyoti Ghosh [I \otimes S + A \otimes T] 7*49bd79ccSDebojyoti Ghosh 8*49bd79ccSDebojyoti Ghosh where 9*49bd79ccSDebojyoti Ghosh S is a dense (p \times q) matrix 10*49bd79ccSDebojyoti Ghosh T is a dense (p \times q) matrix 11*49bd79ccSDebojyoti Ghosh A is an AIJ (n \times n) matrix 12*49bd79ccSDebojyoti Ghosh I is the identity matrix 13*49bd79ccSDebojyoti Ghosh 14*49bd79ccSDebojyoti Ghosh The resulting matrix is (np \times nq) 15*49bd79ccSDebojyoti Ghosh 16*49bd79ccSDebojyoti Ghosh We provide: 17*49bd79ccSDebojyoti Ghosh MatMult() 18*49bd79ccSDebojyoti Ghosh MatMultAdd() 19*49bd79ccSDebojyoti Ghosh MatInvertBlockDiagonal() 20*49bd79ccSDebojyoti Ghosh and 21*49bd79ccSDebojyoti Ghosh MatCreateKAIJ(Mat,PetscInt,PetscInt,const PetscScalar[],const PetscScalar[],Mat*) 22*49bd79ccSDebojyoti Ghosh 23*49bd79ccSDebojyoti Ghosh This single directory handles both the sequential and parallel codes 24*49bd79ccSDebojyoti Ghosh */ 25*49bd79ccSDebojyoti Ghosh 26*49bd79ccSDebojyoti Ghosh #include <../src/mat/impls/kaij/kaij.h> /*I "petscmat.h" I*/ 27*49bd79ccSDebojyoti Ghosh #include <../src/mat/utils/freespace.h> 28*49bd79ccSDebojyoti Ghosh #include <petsc/private/vecimpl.h> 29*49bd79ccSDebojyoti Ghosh 30*49bd79ccSDebojyoti Ghosh /*@C 31*49bd79ccSDebojyoti Ghosh MatKAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the KAIJ matrix 32*49bd79ccSDebojyoti Ghosh 33*49bd79ccSDebojyoti Ghosh Not Collective, but if the KAIJ matrix is parallel, the AIJ matrix is also parallel 34*49bd79ccSDebojyoti Ghosh 35*49bd79ccSDebojyoti Ghosh Input Parameter: 36*49bd79ccSDebojyoti Ghosh . A - the KAIJ matrix 37*49bd79ccSDebojyoti Ghosh 38*49bd79ccSDebojyoti Ghosh Output Parameter: 39*49bd79ccSDebojyoti Ghosh . B - the AIJ matrix 40*49bd79ccSDebojyoti Ghosh 41*49bd79ccSDebojyoti Ghosh Level: advanced 42*49bd79ccSDebojyoti Ghosh 43*49bd79ccSDebojyoti Ghosh Notes: The reference count on the AIJ matrix is not increased so you should not destroy it. 44*49bd79ccSDebojyoti Ghosh 45*49bd79ccSDebojyoti Ghosh .seealso: MatCreateKAIJ() 46*49bd79ccSDebojyoti Ghosh @*/ 47*49bd79ccSDebojyoti Ghosh PetscErrorCode MatKAIJGetAIJ(Mat A,Mat *B) 48*49bd79ccSDebojyoti Ghosh { 49*49bd79ccSDebojyoti Ghosh PetscErrorCode ierr; 50*49bd79ccSDebojyoti Ghosh PetscBool ismpikaij,isseqkaij; 51*49bd79ccSDebojyoti Ghosh 52*49bd79ccSDebojyoti Ghosh PetscFunctionBegin; 53*49bd79ccSDebojyoti Ghosh ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIKAIJ,&ismpikaij);CHKERRQ(ierr); 54*49bd79ccSDebojyoti Ghosh ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQKAIJ,&isseqkaij);CHKERRQ(ierr); 55*49bd79ccSDebojyoti Ghosh if (ismpikaij) { 56*49bd79ccSDebojyoti Ghosh Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data; 57*49bd79ccSDebojyoti Ghosh 58*49bd79ccSDebojyoti Ghosh *B = b->A; 59*49bd79ccSDebojyoti Ghosh } else if (isseqkaij) { 60*49bd79ccSDebojyoti Ghosh Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 61*49bd79ccSDebojyoti Ghosh 62*49bd79ccSDebojyoti Ghosh *B = b->AIJ; 63*49bd79ccSDebojyoti Ghosh } else { 64*49bd79ccSDebojyoti Ghosh *B = A; 65*49bd79ccSDebojyoti Ghosh } 66*49bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 67*49bd79ccSDebojyoti Ghosh } 68*49bd79ccSDebojyoti Ghosh 69*49bd79ccSDebojyoti Ghosh /*@C 70*49bd79ccSDebojyoti Ghosh MatKAIJGetS - Get the S matrix describing the shift action of the KAIJ matrix 71*49bd79ccSDebojyoti Ghosh 72*49bd79ccSDebojyoti Ghosh Not Collective, the entire S is stored and returned independently on all processes 73*49bd79ccSDebojyoti Ghosh 74*49bd79ccSDebojyoti Ghosh Input Parameter: 75*49bd79ccSDebojyoti Ghosh . A - the KAIJ matrix 76*49bd79ccSDebojyoti Ghosh 77*49bd79ccSDebojyoti Ghosh Output Parameter: 78*49bd79ccSDebojyoti Ghosh . S - the S matrix, in form of a scalar array in column-major format 79*49bd79ccSDebojyoti Ghosh 80*49bd79ccSDebojyoti Ghosh Level: advanced 81*49bd79ccSDebojyoti Ghosh 82*49bd79ccSDebojyoti Ghosh Notes: The reference count on the S matrix is not increased so you should not destroy it. 83*49bd79ccSDebojyoti Ghosh 84*49bd79ccSDebojyoti Ghosh .seealso: MatCreateKAIJ() 85*49bd79ccSDebojyoti Ghosh @*/ 86*49bd79ccSDebojyoti Ghosh PetscErrorCode MatKAIJGetS(Mat A,const PetscScalar **S) 87*49bd79ccSDebojyoti Ghosh { 88*49bd79ccSDebojyoti Ghosh Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 89*49bd79ccSDebojyoti Ghosh PetscFunctionBegin; 90*49bd79ccSDebojyoti Ghosh *S = b->S; 91*49bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 92*49bd79ccSDebojyoti Ghosh } 93*49bd79ccSDebojyoti Ghosh 94*49bd79ccSDebojyoti Ghosh /*@C 95*49bd79ccSDebojyoti Ghosh MatKAIJGetT - Get the T matrix describing the shift action of the KAIJ matrix 96*49bd79ccSDebojyoti Ghosh 97*49bd79ccSDebojyoti Ghosh Not Collective, the entire T is stored and returned independently on all processes 98*49bd79ccSDebojyoti Ghosh 99*49bd79ccSDebojyoti Ghosh Input Parameter: 100*49bd79ccSDebojyoti Ghosh . A - the KAIJ matrix 101*49bd79ccSDebojyoti Ghosh 102*49bd79ccSDebojyoti Ghosh Output Parameter: 103*49bd79ccSDebojyoti Ghosh . T - the T matrix, in form of a scalar array in column-major format 104*49bd79ccSDebojyoti Ghosh 105*49bd79ccSDebojyoti Ghosh Level: advanced 106*49bd79ccSDebojyoti Ghosh 107*49bd79ccSDebojyoti Ghosh Notes: The reference count on the T matrix is not increased so you should not destroy it. 108*49bd79ccSDebojyoti Ghosh 109*49bd79ccSDebojyoti Ghosh .seealso: MatCreateKAIJ() 110*49bd79ccSDebojyoti Ghosh @*/ 111*49bd79ccSDebojyoti Ghosh PetscErrorCode MatKAIJGetT(Mat A,const PetscScalar **T) 112*49bd79ccSDebojyoti Ghosh { 113*49bd79ccSDebojyoti Ghosh Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 114*49bd79ccSDebojyoti Ghosh PetscFunctionBegin; 115*49bd79ccSDebojyoti Ghosh *T = b->T; 116*49bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 117*49bd79ccSDebojyoti Ghosh } 118*49bd79ccSDebojyoti Ghosh 119*49bd79ccSDebojyoti Ghosh PetscErrorCode MatDestroy_SeqKAIJ(Mat A) 120*49bd79ccSDebojyoti Ghosh { 121*49bd79ccSDebojyoti Ghosh PetscErrorCode ierr; 122*49bd79ccSDebojyoti Ghosh Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 123*49bd79ccSDebojyoti Ghosh 124*49bd79ccSDebojyoti Ghosh PetscFunctionBegin; 125*49bd79ccSDebojyoti Ghosh ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr); 126*49bd79ccSDebojyoti Ghosh ierr = PetscFree3(b->S,b->T,b->ibdiag);CHKERRQ(ierr); 127*49bd79ccSDebojyoti Ghosh ierr = PetscFree5(b->sor.w,b->sor.y,b->sor.work,b->sor.t,b->sor.arr);CHKERRQ(ierr); 128*49bd79ccSDebojyoti Ghosh ierr = PetscFree(A->data);CHKERRQ(ierr); 129*49bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 130*49bd79ccSDebojyoti Ghosh } 131*49bd79ccSDebojyoti Ghosh 132*49bd79ccSDebojyoti Ghosh PetscErrorCode MatSetUp_KAIJ(Mat A) 133*49bd79ccSDebojyoti Ghosh { 134*49bd79ccSDebojyoti Ghosh PetscFunctionBegin; 135*49bd79ccSDebojyoti Ghosh SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Must use MatCreateKAIJ() to create KAIJ matrices"); 136*49bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 137*49bd79ccSDebojyoti Ghosh } 138*49bd79ccSDebojyoti Ghosh 139*49bd79ccSDebojyoti Ghosh PetscErrorCode MatView_SeqKAIJ(Mat A,PetscViewer viewer) 140*49bd79ccSDebojyoti Ghosh { 141*49bd79ccSDebojyoti Ghosh PetscErrorCode ierr; 142*49bd79ccSDebojyoti Ghosh Mat B; 143*49bd79ccSDebojyoti Ghosh 144*49bd79ccSDebojyoti Ghosh PetscFunctionBegin; 145*49bd79ccSDebojyoti Ghosh ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 146*49bd79ccSDebojyoti Ghosh ierr = MatView(B,viewer);CHKERRQ(ierr); 147*49bd79ccSDebojyoti Ghosh ierr = MatDestroy(&B);CHKERRQ(ierr); 148*49bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 149*49bd79ccSDebojyoti Ghosh } 150*49bd79ccSDebojyoti Ghosh 151*49bd79ccSDebojyoti Ghosh PetscErrorCode MatView_MPIKAIJ(Mat A,PetscViewer viewer) 152*49bd79ccSDebojyoti Ghosh { 153*49bd79ccSDebojyoti Ghosh PetscErrorCode ierr; 154*49bd79ccSDebojyoti Ghosh Mat B; 155*49bd79ccSDebojyoti Ghosh 156*49bd79ccSDebojyoti Ghosh PetscFunctionBegin; 157*49bd79ccSDebojyoti Ghosh ierr = MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 158*49bd79ccSDebojyoti Ghosh ierr = MatView(B,viewer);CHKERRQ(ierr); 159*49bd79ccSDebojyoti Ghosh ierr = MatDestroy(&B);CHKERRQ(ierr); 160*49bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 161*49bd79ccSDebojyoti Ghosh } 162*49bd79ccSDebojyoti Ghosh 163*49bd79ccSDebojyoti Ghosh PetscErrorCode MatDestroy_MPIKAIJ(Mat A) 164*49bd79ccSDebojyoti Ghosh { 165*49bd79ccSDebojyoti Ghosh PetscErrorCode ierr; 166*49bd79ccSDebojyoti Ghosh Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data; 167*49bd79ccSDebojyoti Ghosh 168*49bd79ccSDebojyoti Ghosh PetscFunctionBegin; 169*49bd79ccSDebojyoti Ghosh ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr); 170*49bd79ccSDebojyoti Ghosh ierr = MatDestroy(&b->OAIJ);CHKERRQ(ierr); 171*49bd79ccSDebojyoti Ghosh ierr = MatDestroy(&b->A);CHKERRQ(ierr); 172*49bd79ccSDebojyoti Ghosh ierr = VecScatterDestroy(&b->ctx);CHKERRQ(ierr); 173*49bd79ccSDebojyoti Ghosh ierr = VecDestroy(&b->w);CHKERRQ(ierr); 174*49bd79ccSDebojyoti Ghosh ierr = PetscFree3(b->S,b->T,b->ibdiag);CHKERRQ(ierr); 175*49bd79ccSDebojyoti Ghosh ierr = PetscFree(A->data);CHKERRQ(ierr); 176*49bd79ccSDebojyoti Ghosh 177*49bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 178*49bd79ccSDebojyoti Ghosh } 179*49bd79ccSDebojyoti Ghosh 180*49bd79ccSDebojyoti Ghosh /*MC 181*49bd79ccSDebojyoti Ghosh MATKAIJ - MATKAIJ = "kaij" - A matrix type to be used to evaluate matrices of the following form: 182*49bd79ccSDebojyoti Ghosh 183*49bd79ccSDebojyoti Ghosh [I \otimes S + A \otimes T] 184*49bd79ccSDebojyoti Ghosh 185*49bd79ccSDebojyoti Ghosh where 186*49bd79ccSDebojyoti Ghosh S is a dense (p \times q) matrix 187*49bd79ccSDebojyoti Ghosh T is a dense (p \times q) matrix 188*49bd79ccSDebojyoti Ghosh A is an AIJ (n \times n) matrix 189*49bd79ccSDebojyoti Ghosh I is the identity matrix 190*49bd79ccSDebojyoti Ghosh The resulting matrix is (np \times nq) 191*49bd79ccSDebojyoti Ghosh 192*49bd79ccSDebojyoti Ghosh The matrix type is based on MATSEQAIJ for a sequential matrix A, and MATMPIAIJ for a distributed matrix A. 193*49bd79ccSDebojyoti Ghosh S and T are always stored independently on all processes as a PetscScalar array in column-major format. 194*49bd79ccSDebojyoti Ghosh 195*49bd79ccSDebojyoti Ghosh Operations provided: 196*49bd79ccSDebojyoti Ghosh . MatMult 197*49bd79ccSDebojyoti Ghosh . MatMultAdd 198*49bd79ccSDebojyoti Ghosh . MatInvertBlockDiagonal 199*49bd79ccSDebojyoti Ghosh 200*49bd79ccSDebojyoti Ghosh Level: advanced 201*49bd79ccSDebojyoti Ghosh 202*49bd79ccSDebojyoti Ghosh .seealso: MatKAIJGetAIJ(), MatKAIJGetS(), MatKAIJGetT(), MatCreateKAIJ() 203*49bd79ccSDebojyoti Ghosh M*/ 204*49bd79ccSDebojyoti Ghosh 205*49bd79ccSDebojyoti Ghosh PETSC_EXTERN PetscErrorCode MatCreate_KAIJ(Mat A) 206*49bd79ccSDebojyoti Ghosh { 207*49bd79ccSDebojyoti Ghosh PetscErrorCode ierr; 208*49bd79ccSDebojyoti Ghosh Mat_MPIKAIJ *b; 209*49bd79ccSDebojyoti Ghosh PetscMPIInt size; 210*49bd79ccSDebojyoti Ghosh 211*49bd79ccSDebojyoti Ghosh PetscFunctionBegin; 212*49bd79ccSDebojyoti Ghosh ierr = PetscNewLog(A,&b);CHKERRQ(ierr); 213*49bd79ccSDebojyoti Ghosh A->data = (void*)b; 214*49bd79ccSDebojyoti Ghosh 215*49bd79ccSDebojyoti Ghosh ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 216*49bd79ccSDebojyoti Ghosh 217*49bd79ccSDebojyoti Ghosh A->ops->setup = MatSetUp_KAIJ; 218*49bd79ccSDebojyoti Ghosh 219*49bd79ccSDebojyoti Ghosh b->w = 0; 220*49bd79ccSDebojyoti Ghosh ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 221*49bd79ccSDebojyoti Ghosh if (size == 1) { 222*49bd79ccSDebojyoti Ghosh ierr = PetscObjectChangeTypeName((PetscObject)A,MATSEQKAIJ);CHKERRQ(ierr); 223*49bd79ccSDebojyoti Ghosh } else { 224*49bd79ccSDebojyoti Ghosh ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIKAIJ);CHKERRQ(ierr); 225*49bd79ccSDebojyoti Ghosh } 226*49bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 227*49bd79ccSDebojyoti Ghosh } 228*49bd79ccSDebojyoti Ghosh 229*49bd79ccSDebojyoti Ghosh /* --------------------------------------------------------------------------------------*/ 230*49bd79ccSDebojyoti Ghosh 231*49bd79ccSDebojyoti Ghosh /* zz = yy + Axx */ 232*49bd79ccSDebojyoti Ghosh PetscErrorCode KAIJMultAdd_Seq(Mat A,Vec xx,Vec yy,Vec zz) 233*49bd79ccSDebojyoti Ghosh { 234*49bd79ccSDebojyoti Ghosh Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 235*49bd79ccSDebojyoti Ghosh Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 236*49bd79ccSDebojyoti Ghosh const PetscScalar *s = b->S, *t = b->T; 237*49bd79ccSDebojyoti Ghosh const PetscScalar *x,*v,*bx; 238*49bd79ccSDebojyoti Ghosh PetscScalar *y,*sums; 239*49bd79ccSDebojyoti Ghosh PetscErrorCode ierr; 240*49bd79ccSDebojyoti Ghosh const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 241*49bd79ccSDebojyoti Ghosh PetscInt n,i,jrow,j,l,p=b->p,q=b->q,k; 242*49bd79ccSDebojyoti Ghosh 243*49bd79ccSDebojyoti Ghosh PetscFunctionBegin; 244*49bd79ccSDebojyoti Ghosh 245*49bd79ccSDebojyoti Ghosh if (!yy) { 246*49bd79ccSDebojyoti Ghosh ierr = VecSet(zz,0.0);CHKERRQ(ierr); 247*49bd79ccSDebojyoti Ghosh } else { 248*49bd79ccSDebojyoti Ghosh ierr = VecCopy(yy,zz);CHKERRQ(ierr); 249*49bd79ccSDebojyoti Ghosh } 250*49bd79ccSDebojyoti Ghosh if ((!s) && (!t) && (!b->isTI)) PetscFunctionReturn(0); 251*49bd79ccSDebojyoti Ghosh 252*49bd79ccSDebojyoti Ghosh ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 253*49bd79ccSDebojyoti Ghosh ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 254*49bd79ccSDebojyoti Ghosh idx = a->j; 255*49bd79ccSDebojyoti Ghosh v = a->a; 256*49bd79ccSDebojyoti Ghosh ii = a->i; 257*49bd79ccSDebojyoti Ghosh 258*49bd79ccSDebojyoti Ghosh if (b->isTI) { 259*49bd79ccSDebojyoti Ghosh for (i=0; i<m; i++) { 260*49bd79ccSDebojyoti Ghosh jrow = ii[i]; 261*49bd79ccSDebojyoti Ghosh n = ii[i+1] - jrow; 262*49bd79ccSDebojyoti Ghosh sums = y + p*i; 263*49bd79ccSDebojyoti Ghosh for (j=0; j<n; j++) { 264*49bd79ccSDebojyoti Ghosh for (k=0; k<p; k++) { 265*49bd79ccSDebojyoti Ghosh sums[k] += v[jrow+j]*x[q*idx[jrow+j]+k]; 266*49bd79ccSDebojyoti Ghosh } 267*49bd79ccSDebojyoti Ghosh } 268*49bd79ccSDebojyoti Ghosh } 269*49bd79ccSDebojyoti Ghosh } else if (t) { 270*49bd79ccSDebojyoti Ghosh for (i=0; i<m; i++) { 271*49bd79ccSDebojyoti Ghosh jrow = ii[i]; 272*49bd79ccSDebojyoti Ghosh n = ii[i+1] - jrow; 273*49bd79ccSDebojyoti Ghosh sums = y + p*i; 274*49bd79ccSDebojyoti Ghosh bx = x + q*i; 275*49bd79ccSDebojyoti Ghosh for (j=0; j<n; j++) { 276*49bd79ccSDebojyoti Ghosh for (k=0; k<p; k++) { 277*49bd79ccSDebojyoti Ghosh for (l=0; l<q; l++) { 278*49bd79ccSDebojyoti Ghosh sums[k] += v[jrow+j]*t[k+l*p]*x[q*idx[jrow+j]+l]; 279*49bd79ccSDebojyoti Ghosh } 280*49bd79ccSDebojyoti Ghosh } 281*49bd79ccSDebojyoti Ghosh } 282*49bd79ccSDebojyoti Ghosh } 283*49bd79ccSDebojyoti Ghosh } 284*49bd79ccSDebojyoti Ghosh if (s) { 285*49bd79ccSDebojyoti Ghosh for (i=0; i<m; i++) { 286*49bd79ccSDebojyoti Ghosh jrow = ii[i]; 287*49bd79ccSDebojyoti Ghosh n = ii[i+1] - jrow; 288*49bd79ccSDebojyoti Ghosh sums = y + p*i; 289*49bd79ccSDebojyoti Ghosh bx = x + q*i; 290*49bd79ccSDebojyoti Ghosh if (i < b->AIJ->cmap->n) { 291*49bd79ccSDebojyoti Ghosh for (j=0; j<q; j++) { 292*49bd79ccSDebojyoti Ghosh for (k=0; k<p; k++) { 293*49bd79ccSDebojyoti Ghosh sums[k] += s[k+j*p]*bx[j]; 294*49bd79ccSDebojyoti Ghosh } 295*49bd79ccSDebojyoti Ghosh } 296*49bd79ccSDebojyoti Ghosh } 297*49bd79ccSDebojyoti Ghosh } 298*49bd79ccSDebojyoti Ghosh } 299*49bd79ccSDebojyoti Ghosh 300*49bd79ccSDebojyoti Ghosh ierr = PetscLogFlops((2.0*p*q-p)*m+2*p*a->nz);CHKERRQ(ierr); 301*49bd79ccSDebojyoti Ghosh ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 302*49bd79ccSDebojyoti Ghosh ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 303*49bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 304*49bd79ccSDebojyoti Ghosh } 305*49bd79ccSDebojyoti Ghosh 306*49bd79ccSDebojyoti Ghosh PetscErrorCode MatMult_SeqKAIJ_N(Mat A,Vec xx,Vec yy) 307*49bd79ccSDebojyoti Ghosh { 308*49bd79ccSDebojyoti Ghosh PetscErrorCode ierr; 309*49bd79ccSDebojyoti Ghosh PetscFunctionBegin; 310*49bd79ccSDebojyoti Ghosh ierr = KAIJMultAdd_Seq(A,xx,PETSC_NULL,yy);CHKERRQ(ierr); 311*49bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 312*49bd79ccSDebojyoti Ghosh } 313*49bd79ccSDebojyoti Ghosh 314*49bd79ccSDebojyoti Ghosh PetscErrorCode MatMultAdd_SeqKAIJ_N(Mat A,Vec xx,Vec yy,Vec zz) 315*49bd79ccSDebojyoti Ghosh { 316*49bd79ccSDebojyoti Ghosh PetscErrorCode ierr; 317*49bd79ccSDebojyoti Ghosh PetscFunctionBegin; 318*49bd79ccSDebojyoti Ghosh ierr = KAIJMultAdd_Seq(A,xx,yy,zz);CHKERRQ(ierr); 319*49bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 320*49bd79ccSDebojyoti Ghosh } 321*49bd79ccSDebojyoti Ghosh 322*49bd79ccSDebojyoti Ghosh #include <petsc/private/kernels/blockinvert.h> 323*49bd79ccSDebojyoti Ghosh 324*49bd79ccSDebojyoti Ghosh PetscErrorCode MatInvertBlockDiagonal_SeqKAIJ_N(Mat A,const PetscScalar **values) 325*49bd79ccSDebojyoti Ghosh { 326*49bd79ccSDebojyoti Ghosh Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 327*49bd79ccSDebojyoti Ghosh Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 328*49bd79ccSDebojyoti Ghosh const PetscScalar *S = b->S; 329*49bd79ccSDebojyoti Ghosh const PetscScalar *T = b->T; 330*49bd79ccSDebojyoti Ghosh const PetscScalar *v = a->a; 331*49bd79ccSDebojyoti Ghosh const PetscInt p = b->p, q = b->q, m = b->AIJ->rmap->n, *idx = a->j, *ii = a->i; 332*49bd79ccSDebojyoti Ghosh PetscErrorCode ierr; 333*49bd79ccSDebojyoti Ghosh PetscInt i,j,*v_pivots,dof,dof2; 334*49bd79ccSDebojyoti Ghosh PetscScalar *diag,aval,*v_work; 335*49bd79ccSDebojyoti Ghosh 336*49bd79ccSDebojyoti Ghosh PetscFunctionBegin; 337*49bd79ccSDebojyoti Ghosh if (p != q) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Block size must be square to calculate inverse."); 338*49bd79ccSDebojyoti Ghosh if ((!S) && (!T) && (!b->isTI)) { 339*49bd79ccSDebojyoti Ghosh SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Cannot invert a zero matrix."); 340*49bd79ccSDebojyoti Ghosh } 341*49bd79ccSDebojyoti Ghosh 342*49bd79ccSDebojyoti Ghosh dof = p; 343*49bd79ccSDebojyoti Ghosh dof2 = dof*dof; 344*49bd79ccSDebojyoti Ghosh 345*49bd79ccSDebojyoti Ghosh if (b->ibdiagvalid) { 346*49bd79ccSDebojyoti Ghosh if (values) *values = b->ibdiag; 347*49bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 348*49bd79ccSDebojyoti Ghosh } 349*49bd79ccSDebojyoti Ghosh if (!b->ibdiag) { 350*49bd79ccSDebojyoti Ghosh ierr = PetscMalloc(dof2*m*sizeof(PetscScalar),&b->ibdiag);CHKERRQ(ierr); 351*49bd79ccSDebojyoti Ghosh ierr = PetscLogObjectMemory((PetscObject)A,dof2*m*sizeof(PetscScalar));CHKERRQ(ierr); 352*49bd79ccSDebojyoti Ghosh } 353*49bd79ccSDebojyoti Ghosh if (values) *values = b->ibdiag; 354*49bd79ccSDebojyoti Ghosh diag = b->ibdiag; 355*49bd79ccSDebojyoti Ghosh 356*49bd79ccSDebojyoti Ghosh ierr = PetscMalloc2(dof,&v_work,dof,&v_pivots);CHKERRQ(ierr); 357*49bd79ccSDebojyoti Ghosh for (i=0; i<m; i++) { 358*49bd79ccSDebojyoti Ghosh if (S) { 359*49bd79ccSDebojyoti Ghosh ierr = PetscMemcpy(diag,S,dof2*sizeof(PetscScalar));CHKERRQ(ierr); 360*49bd79ccSDebojyoti Ghosh } else { 361*49bd79ccSDebojyoti Ghosh ierr = PetscMemzero(diag,dof2*sizeof(PetscScalar));CHKERRQ(ierr); 362*49bd79ccSDebojyoti Ghosh } 363*49bd79ccSDebojyoti Ghosh if (b->isTI) { 364*49bd79ccSDebojyoti Ghosh aval = 0; 365*49bd79ccSDebojyoti Ghosh for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j]; 366*49bd79ccSDebojyoti Ghosh for (j=0; j<dof; j++) diag[j+dof*j] += aval; 367*49bd79ccSDebojyoti Ghosh } else if (T) { 368*49bd79ccSDebojyoti Ghosh aval = 0; 369*49bd79ccSDebojyoti Ghosh for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j]; 370*49bd79ccSDebojyoti Ghosh for (j=0; j<dof2; j++) diag[j] += aval*T[j]; 371*49bd79ccSDebojyoti Ghosh } 372*49bd79ccSDebojyoti Ghosh ierr = PetscKernel_A_gets_inverse_A(dof,diag,v_pivots,v_work,PETSC_FALSE,NULL);CHKERRQ(ierr); 373*49bd79ccSDebojyoti Ghosh diag += dof2; 374*49bd79ccSDebojyoti Ghosh } 375*49bd79ccSDebojyoti Ghosh ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr); 376*49bd79ccSDebojyoti Ghosh 377*49bd79ccSDebojyoti Ghosh b->ibdiagvalid = PETSC_TRUE; 378*49bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 379*49bd79ccSDebojyoti Ghosh } 380*49bd79ccSDebojyoti Ghosh 381*49bd79ccSDebojyoti Ghosh static PetscErrorCode MatGetDiagonalBlock_MPIKAIJ(Mat A,Mat *B) 382*49bd79ccSDebojyoti Ghosh { 383*49bd79ccSDebojyoti Ghosh Mat_MPIKAIJ *kaij = (Mat_MPIKAIJ*) A->data; 384*49bd79ccSDebojyoti Ghosh 385*49bd79ccSDebojyoti Ghosh PetscFunctionBegin; 386*49bd79ccSDebojyoti Ghosh *B = kaij->AIJ; 387*49bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 388*49bd79ccSDebojyoti Ghosh } 389*49bd79ccSDebojyoti Ghosh 390*49bd79ccSDebojyoti Ghosh PetscErrorCode MatSOR_SeqKAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 391*49bd79ccSDebojyoti Ghosh { 392*49bd79ccSDebojyoti Ghosh PetscErrorCode ierr; 393*49bd79ccSDebojyoti Ghosh Mat_SeqKAIJ *kaij = (Mat_SeqKAIJ*) A->data; 394*49bd79ccSDebojyoti Ghosh Mat_SeqAIJ *a = (Mat_SeqAIJ*)kaij->AIJ->data; 395*49bd79ccSDebojyoti Ghosh const PetscScalar *aa = a->a, *T = kaij->T, *v; 396*49bd79ccSDebojyoti Ghosh const PetscInt m = kaij->AIJ->rmap->n, *ai=a->i, *aj=a->j, p = kaij->p, q = kaij->q, *diag, *vi; 397*49bd79ccSDebojyoti Ghosh const PetscScalar *b, *xb, *idiag; 398*49bd79ccSDebojyoti Ghosh PetscScalar *x, *work, *workt, *w, *y, *arr, *t, *arrt; 399*49bd79ccSDebojyoti Ghosh PetscInt i, j, k, i2, bs, bs2, nz; 400*49bd79ccSDebojyoti Ghosh 401*49bd79ccSDebojyoti Ghosh 402*49bd79ccSDebojyoti Ghosh PetscFunctionBegin; 403*49bd79ccSDebojyoti Ghosh its = its*lits; 404*49bd79ccSDebojyoti Ghosh if (flag & SOR_EISENSTAT) SETERRQ (PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 405*49bd79ccSDebojyoti Ghosh if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 406*49bd79ccSDebojyoti Ghosh if (fshift) SETERRQ (PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 407*49bd79ccSDebojyoti Ghosh if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) 408*49bd79ccSDebojyoti Ghosh SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 409*49bd79ccSDebojyoti Ghosh if (p != q) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSOR for KAIJ: Sorry, no support for non-square dense blocks"); 410*49bd79ccSDebojyoti Ghosh else {bs = p; bs2 = bs*bs; } 411*49bd79ccSDebojyoti Ghosh 412*49bd79ccSDebojyoti Ghosh if (!m) PetscFunctionReturn(0); 413*49bd79ccSDebojyoti Ghosh 414*49bd79ccSDebojyoti Ghosh if (!kaij->ibdiagvalid) { ierr = MatInvertBlockDiagonal_SeqKAIJ_N(A,NULL);CHKERRQ(ierr); } 415*49bd79ccSDebojyoti Ghosh idiag = kaij->ibdiag; 416*49bd79ccSDebojyoti Ghosh diag = a->diag; 417*49bd79ccSDebojyoti Ghosh 418*49bd79ccSDebojyoti Ghosh if (!kaij->sor.setup) { 419*49bd79ccSDebojyoti Ghosh ierr = PetscMalloc5(bs,&kaij->sor.w,bs,&kaij->sor.y,m*bs,&kaij->sor.work,m*bs,&kaij->sor.t,m*bs2,&kaij->sor.arr);CHKERRQ(ierr); 420*49bd79ccSDebojyoti Ghosh kaij->sor.setup = PETSC_TRUE; 421*49bd79ccSDebojyoti Ghosh } 422*49bd79ccSDebojyoti Ghosh y = kaij->sor.y; 423*49bd79ccSDebojyoti Ghosh w = kaij->sor.w; 424*49bd79ccSDebojyoti Ghosh work = kaij->sor.work; 425*49bd79ccSDebojyoti Ghosh t = kaij->sor.t; 426*49bd79ccSDebojyoti Ghosh arr = kaij->sor.arr; 427*49bd79ccSDebojyoti Ghosh 428*49bd79ccSDebojyoti Ghosh ierr = VecGetArray(xx,&x); CHKERRQ(ierr); 429*49bd79ccSDebojyoti Ghosh ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 430*49bd79ccSDebojyoti Ghosh 431*49bd79ccSDebojyoti Ghosh if (flag & SOR_ZERO_INITIAL_GUESS) { 432*49bd79ccSDebojyoti Ghosh if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 433*49bd79ccSDebojyoti Ghosh PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x); /* x[0:bs] <- D^{-1} b[0:bs] */ 434*49bd79ccSDebojyoti Ghosh ierr = PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr); 435*49bd79ccSDebojyoti Ghosh i2 = bs; 436*49bd79ccSDebojyoti Ghosh idiag += bs2; 437*49bd79ccSDebojyoti Ghosh for (i=1; i<m; i++) { 438*49bd79ccSDebojyoti Ghosh v = aa + ai[i]; 439*49bd79ccSDebojyoti Ghosh vi = aj + ai[i]; 440*49bd79ccSDebojyoti Ghosh nz = diag[i] - ai[i]; 441*49bd79ccSDebojyoti Ghosh 442*49bd79ccSDebojyoti Ghosh if (T) { /* b - T (Arow * x) */ 443*49bd79ccSDebojyoti Ghosh for (k=0; k<bs; k++) w[k] = 0; 444*49bd79ccSDebojyoti Ghosh for (j=0; j<nz; j++) { 445*49bd79ccSDebojyoti Ghosh for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k]; 446*49bd79ccSDebojyoti Ghosh } 447*49bd79ccSDebojyoti Ghosh PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs,w,T,&t[i2]); 448*49bd79ccSDebojyoti Ghosh for (k=0; k<bs; k++) t[i2+k] += b[i2+k]; 449*49bd79ccSDebojyoti Ghosh } else if (kaij->isTI) { 450*49bd79ccSDebojyoti Ghosh for (k=0; k<bs; k++) t[i2+k] = b[i2+k]; 451*49bd79ccSDebojyoti Ghosh for (j=0; j<nz; j++) { 452*49bd79ccSDebojyoti Ghosh for (k=0; k<bs; k++) t[i2+k] -= v[j] * x[vi[j]*bs+k]; 453*49bd79ccSDebojyoti Ghosh } 454*49bd79ccSDebojyoti Ghosh } else { 455*49bd79ccSDebojyoti Ghosh for (k=0; k<bs; k++) t[i2+k] = b[i2+k]; 456*49bd79ccSDebojyoti Ghosh } 457*49bd79ccSDebojyoti Ghosh 458*49bd79ccSDebojyoti Ghosh PetscKernel_w_gets_Ar_times_v(bs,bs,t+i2,idiag,y); 459*49bd79ccSDebojyoti Ghosh for (j=0; j<bs; j++) x[i2+j] = omega * y[j]; 460*49bd79ccSDebojyoti Ghosh 461*49bd79ccSDebojyoti Ghosh idiag += bs2; 462*49bd79ccSDebojyoti Ghosh i2 += bs; 463*49bd79ccSDebojyoti Ghosh } 464*49bd79ccSDebojyoti Ghosh /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 465*49bd79ccSDebojyoti Ghosh ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr); 466*49bd79ccSDebojyoti Ghosh xb = t; 467*49bd79ccSDebojyoti Ghosh } else xb = b; 468*49bd79ccSDebojyoti Ghosh if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 469*49bd79ccSDebojyoti Ghosh idiag = kaij->ibdiag+bs2*(m-1); 470*49bd79ccSDebojyoti Ghosh i2 = bs * (m-1); 471*49bd79ccSDebojyoti Ghosh ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 472*49bd79ccSDebojyoti Ghosh PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 473*49bd79ccSDebojyoti Ghosh i2 -= bs; 474*49bd79ccSDebojyoti Ghosh idiag -= bs2; 475*49bd79ccSDebojyoti Ghosh for (i=m-2; i>=0; i--) { 476*49bd79ccSDebojyoti Ghosh v = aa + diag[i] + 1 ; 477*49bd79ccSDebojyoti Ghosh vi = aj + diag[i] + 1; 478*49bd79ccSDebojyoti Ghosh nz = ai[i+1] - diag[i] - 1; 479*49bd79ccSDebojyoti Ghosh 480*49bd79ccSDebojyoti Ghosh if (T) { /* FIXME: This branch untested */ 481*49bd79ccSDebojyoti Ghosh ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 482*49bd79ccSDebojyoti Ghosh /* copy all rows of x that are needed into contiguous space */ 483*49bd79ccSDebojyoti Ghosh workt = work; 484*49bd79ccSDebojyoti Ghosh for (j=0; j<nz; j++) { 485*49bd79ccSDebojyoti Ghosh ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 486*49bd79ccSDebojyoti Ghosh workt += bs; 487*49bd79ccSDebojyoti Ghosh } 488*49bd79ccSDebojyoti Ghosh arrt = arr; 489*49bd79ccSDebojyoti Ghosh for (j=0; j<nz; j++) { 490*49bd79ccSDebojyoti Ghosh ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 491*49bd79ccSDebojyoti Ghosh for (k=0; k<bs2; k++) arrt[k] *= v[j]; 492*49bd79ccSDebojyoti Ghosh arrt += bs2; 493*49bd79ccSDebojyoti Ghosh } 494*49bd79ccSDebojyoti Ghosh PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 495*49bd79ccSDebojyoti Ghosh } else if (kaij->isTI) { 496*49bd79ccSDebojyoti Ghosh for (k=0; k<bs; k++) w[k] = t[i2+k]; 497*49bd79ccSDebojyoti Ghosh for (j=0; j<nz; j++) { 498*49bd79ccSDebojyoti Ghosh for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k]; 499*49bd79ccSDebojyoti Ghosh } 500*49bd79ccSDebojyoti Ghosh } 501*49bd79ccSDebojyoti Ghosh 502*49bd79ccSDebojyoti Ghosh PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); /* RHS incorrect for omega != 1.0 */ 503*49bd79ccSDebojyoti Ghosh for (j=0; j<bs; j++) x[i2+j] = (1.0-omega) * x[i2+j] + omega * y[j]; 504*49bd79ccSDebojyoti Ghosh 505*49bd79ccSDebojyoti Ghosh idiag -= bs2; 506*49bd79ccSDebojyoti Ghosh i2 -= bs; 507*49bd79ccSDebojyoti Ghosh } 508*49bd79ccSDebojyoti Ghosh ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 509*49bd79ccSDebojyoti Ghosh } 510*49bd79ccSDebojyoti Ghosh its--; 511*49bd79ccSDebojyoti Ghosh } 512*49bd79ccSDebojyoti Ghosh while (its--) { /* FIXME: This branch not updated */ 513*49bd79ccSDebojyoti Ghosh if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 514*49bd79ccSDebojyoti Ghosh i2 = 0; 515*49bd79ccSDebojyoti Ghosh idiag = kaij->ibdiag; 516*49bd79ccSDebojyoti Ghosh for (i=0; i<m; i++) { 517*49bd79ccSDebojyoti Ghosh ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 518*49bd79ccSDebojyoti Ghosh 519*49bd79ccSDebojyoti Ghosh v = aa + ai[i]; 520*49bd79ccSDebojyoti Ghosh vi = aj + ai[i]; 521*49bd79ccSDebojyoti Ghosh nz = diag[i] - ai[i]; 522*49bd79ccSDebojyoti Ghosh workt = work; 523*49bd79ccSDebojyoti Ghosh for (j=0; j<nz; j++) { 524*49bd79ccSDebojyoti Ghosh ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 525*49bd79ccSDebojyoti Ghosh workt += bs; 526*49bd79ccSDebojyoti Ghosh } 527*49bd79ccSDebojyoti Ghosh arrt = arr; 528*49bd79ccSDebojyoti Ghosh if (T) { 529*49bd79ccSDebojyoti Ghosh for (j=0; j<nz; j++) { 530*49bd79ccSDebojyoti Ghosh ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 531*49bd79ccSDebojyoti Ghosh for (k=0; k<bs2; k++) arrt[k] *= v[j]; 532*49bd79ccSDebojyoti Ghosh arrt += bs2; 533*49bd79ccSDebojyoti Ghosh } 534*49bd79ccSDebojyoti Ghosh PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 535*49bd79ccSDebojyoti Ghosh } else if (kaij->isTI) { 536*49bd79ccSDebojyoti Ghosh for (j=0; j<nz; j++) { 537*49bd79ccSDebojyoti Ghosh ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 538*49bd79ccSDebojyoti Ghosh for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 539*49bd79ccSDebojyoti Ghosh arrt += bs2; 540*49bd79ccSDebojyoti Ghosh } 541*49bd79ccSDebojyoti Ghosh PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 542*49bd79ccSDebojyoti Ghosh } 543*49bd79ccSDebojyoti Ghosh ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr); 544*49bd79ccSDebojyoti Ghosh 545*49bd79ccSDebojyoti Ghosh v = aa + diag[i] + 1; 546*49bd79ccSDebojyoti Ghosh vi = aj + diag[i] + 1; 547*49bd79ccSDebojyoti Ghosh nz = ai[i+1] - diag[i] - 1; 548*49bd79ccSDebojyoti Ghosh workt = work; 549*49bd79ccSDebojyoti Ghosh for (j=0; j<nz; j++) { 550*49bd79ccSDebojyoti Ghosh ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 551*49bd79ccSDebojyoti Ghosh workt += bs; 552*49bd79ccSDebojyoti Ghosh } 553*49bd79ccSDebojyoti Ghosh arrt = arr; 554*49bd79ccSDebojyoti Ghosh if (T) { 555*49bd79ccSDebojyoti Ghosh for (j=0; j<nz; j++) { 556*49bd79ccSDebojyoti Ghosh ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 557*49bd79ccSDebojyoti Ghosh for (k=0; k<bs2; k++) arrt[k] *= v[j]; 558*49bd79ccSDebojyoti Ghosh arrt += bs2; 559*49bd79ccSDebojyoti Ghosh } 560*49bd79ccSDebojyoti Ghosh PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 561*49bd79ccSDebojyoti Ghosh } else if (kaij->isTI) { 562*49bd79ccSDebojyoti Ghosh for (j=0; j<nz; j++) { 563*49bd79ccSDebojyoti Ghosh ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 564*49bd79ccSDebojyoti Ghosh for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 565*49bd79ccSDebojyoti Ghosh arrt += bs2; 566*49bd79ccSDebojyoti Ghosh } 567*49bd79ccSDebojyoti Ghosh PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 568*49bd79ccSDebojyoti Ghosh } 569*49bd79ccSDebojyoti Ghosh 570*49bd79ccSDebojyoti Ghosh PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); 571*49bd79ccSDebojyoti Ghosh for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j); 572*49bd79ccSDebojyoti Ghosh 573*49bd79ccSDebojyoti Ghosh idiag += bs2; 574*49bd79ccSDebojyoti Ghosh i2 += bs; 575*49bd79ccSDebojyoti Ghosh } 576*49bd79ccSDebojyoti Ghosh xb = t; 577*49bd79ccSDebojyoti Ghosh } 578*49bd79ccSDebojyoti Ghosh else xb = b; 579*49bd79ccSDebojyoti Ghosh if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 580*49bd79ccSDebojyoti Ghosh idiag = kaij->ibdiag+bs2*(m-1); 581*49bd79ccSDebojyoti Ghosh i2 = bs * (m-1); 582*49bd79ccSDebojyoti Ghosh if (xb == b) { 583*49bd79ccSDebojyoti Ghosh for (i=m-1; i>=0; i--) { 584*49bd79ccSDebojyoti Ghosh ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 585*49bd79ccSDebojyoti Ghosh 586*49bd79ccSDebojyoti Ghosh v = aa + ai[i]; 587*49bd79ccSDebojyoti Ghosh vi = aj + ai[i]; 588*49bd79ccSDebojyoti Ghosh nz = diag[i] - ai[i]; 589*49bd79ccSDebojyoti Ghosh workt = work; 590*49bd79ccSDebojyoti Ghosh for (j=0; j<nz; j++) { 591*49bd79ccSDebojyoti Ghosh ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 592*49bd79ccSDebojyoti Ghosh workt += bs; 593*49bd79ccSDebojyoti Ghosh } 594*49bd79ccSDebojyoti Ghosh arrt = arr; 595*49bd79ccSDebojyoti Ghosh if (T) { 596*49bd79ccSDebojyoti Ghosh for (j=0; j<nz; j++) { 597*49bd79ccSDebojyoti Ghosh ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 598*49bd79ccSDebojyoti Ghosh for (k=0; k<bs2; k++) arrt[k] *= v[j]; 599*49bd79ccSDebojyoti Ghosh arrt += bs2; 600*49bd79ccSDebojyoti Ghosh } 601*49bd79ccSDebojyoti Ghosh PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 602*49bd79ccSDebojyoti Ghosh } else if (kaij->isTI) { 603*49bd79ccSDebojyoti Ghosh for (j=0; j<nz; j++) { 604*49bd79ccSDebojyoti Ghosh ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 605*49bd79ccSDebojyoti Ghosh for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 606*49bd79ccSDebojyoti Ghosh arrt += bs2; 607*49bd79ccSDebojyoti Ghosh } 608*49bd79ccSDebojyoti Ghosh PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 609*49bd79ccSDebojyoti Ghosh } 610*49bd79ccSDebojyoti Ghosh 611*49bd79ccSDebojyoti Ghosh v = aa + diag[i] + 1; 612*49bd79ccSDebojyoti Ghosh vi = aj + diag[i] + 1; 613*49bd79ccSDebojyoti Ghosh nz = ai[i+1] - diag[i] - 1; 614*49bd79ccSDebojyoti Ghosh workt = work; 615*49bd79ccSDebojyoti Ghosh for (j=0; j<nz; j++) { 616*49bd79ccSDebojyoti Ghosh ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 617*49bd79ccSDebojyoti Ghosh workt += bs; 618*49bd79ccSDebojyoti Ghosh } 619*49bd79ccSDebojyoti Ghosh arrt = arr; 620*49bd79ccSDebojyoti Ghosh if (T) { 621*49bd79ccSDebojyoti Ghosh for (j=0; j<nz; j++) { 622*49bd79ccSDebojyoti Ghosh ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 623*49bd79ccSDebojyoti Ghosh for (k=0; k<bs2; k++) arrt[k] *= v[j]; 624*49bd79ccSDebojyoti Ghosh arrt += bs2; 625*49bd79ccSDebojyoti Ghosh } 626*49bd79ccSDebojyoti Ghosh PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 627*49bd79ccSDebojyoti Ghosh } else if (kaij->isTI) { 628*49bd79ccSDebojyoti Ghosh for (j=0; j<nz; j++) { 629*49bd79ccSDebojyoti Ghosh ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 630*49bd79ccSDebojyoti Ghosh for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 631*49bd79ccSDebojyoti Ghosh arrt += bs2; 632*49bd79ccSDebojyoti Ghosh } 633*49bd79ccSDebojyoti Ghosh PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 634*49bd79ccSDebojyoti Ghosh } 635*49bd79ccSDebojyoti Ghosh 636*49bd79ccSDebojyoti Ghosh PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); 637*49bd79ccSDebojyoti Ghosh for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j); 638*49bd79ccSDebojyoti Ghosh } 639*49bd79ccSDebojyoti Ghosh } else { 640*49bd79ccSDebojyoti Ghosh for (i=m-1; i>=0; i--) { 641*49bd79ccSDebojyoti Ghosh ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 642*49bd79ccSDebojyoti Ghosh v = aa + diag[i] + 1; 643*49bd79ccSDebojyoti Ghosh vi = aj + diag[i] + 1; 644*49bd79ccSDebojyoti Ghosh nz = ai[i+1] - diag[i] - 1; 645*49bd79ccSDebojyoti Ghosh workt = work; 646*49bd79ccSDebojyoti Ghosh for (j=0; j<nz; j++) { 647*49bd79ccSDebojyoti Ghosh ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 648*49bd79ccSDebojyoti Ghosh workt += bs; 649*49bd79ccSDebojyoti Ghosh } 650*49bd79ccSDebojyoti Ghosh arrt = arr; 651*49bd79ccSDebojyoti Ghosh if (T) { 652*49bd79ccSDebojyoti Ghosh for (j=0; j<nz; j++) { 653*49bd79ccSDebojyoti Ghosh ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 654*49bd79ccSDebojyoti Ghosh for (k=0; k<bs2; k++) arrt[k] *= v[j]; 655*49bd79ccSDebojyoti Ghosh arrt += bs2; 656*49bd79ccSDebojyoti Ghosh } 657*49bd79ccSDebojyoti Ghosh PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 658*49bd79ccSDebojyoti Ghosh } else if (kaij->isTI) { 659*49bd79ccSDebojyoti Ghosh for (j=0; j<nz; j++) { 660*49bd79ccSDebojyoti Ghosh ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 661*49bd79ccSDebojyoti Ghosh for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 662*49bd79ccSDebojyoti Ghosh arrt += bs2; 663*49bd79ccSDebojyoti Ghosh } 664*49bd79ccSDebojyoti Ghosh PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 665*49bd79ccSDebojyoti Ghosh } 666*49bd79ccSDebojyoti Ghosh PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); 667*49bd79ccSDebojyoti Ghosh for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j); 668*49bd79ccSDebojyoti Ghosh } 669*49bd79ccSDebojyoti Ghosh idiag -= bs2; 670*49bd79ccSDebojyoti Ghosh i2 -= bs; 671*49bd79ccSDebojyoti Ghosh } 672*49bd79ccSDebojyoti Ghosh ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 673*49bd79ccSDebojyoti Ghosh } 674*49bd79ccSDebojyoti Ghosh } 675*49bd79ccSDebojyoti Ghosh 676*49bd79ccSDebojyoti Ghosh ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr); 677*49bd79ccSDebojyoti Ghosh ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 678*49bd79ccSDebojyoti Ghosh 679*49bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 680*49bd79ccSDebojyoti Ghosh } 681*49bd79ccSDebojyoti Ghosh 682*49bd79ccSDebojyoti Ghosh /*===================================================================================*/ 683*49bd79ccSDebojyoti Ghosh 684*49bd79ccSDebojyoti Ghosh PetscErrorCode KAIJMultAdd_MPI(Mat A,Vec xx,Vec yy,Vec zz) 685*49bd79ccSDebojyoti Ghosh { 686*49bd79ccSDebojyoti Ghosh Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data; 687*49bd79ccSDebojyoti Ghosh PetscErrorCode ierr; 688*49bd79ccSDebojyoti Ghosh 689*49bd79ccSDebojyoti Ghosh PetscFunctionBegin; 690*49bd79ccSDebojyoti Ghosh if (!yy) { 691*49bd79ccSDebojyoti Ghosh ierr = VecSet(zz,0.0);CHKERRQ(ierr); 692*49bd79ccSDebojyoti Ghosh } else { 693*49bd79ccSDebojyoti Ghosh ierr = VecCopy(yy,zz);CHKERRQ(ierr); 694*49bd79ccSDebojyoti Ghosh } 695*49bd79ccSDebojyoti Ghosh /* start the scatter */ 696*49bd79ccSDebojyoti Ghosh ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 697*49bd79ccSDebojyoti Ghosh ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,zz,zz);CHKERRQ(ierr); 698*49bd79ccSDebojyoti Ghosh ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 699*49bd79ccSDebojyoti Ghosh ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr); 700*49bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 701*49bd79ccSDebojyoti Ghosh } 702*49bd79ccSDebojyoti Ghosh 703*49bd79ccSDebojyoti Ghosh PetscErrorCode MatMult_MPIKAIJ_dof(Mat A,Vec xx,Vec yy) 704*49bd79ccSDebojyoti Ghosh { 705*49bd79ccSDebojyoti Ghosh PetscErrorCode ierr; 706*49bd79ccSDebojyoti Ghosh PetscFunctionBegin; 707*49bd79ccSDebojyoti Ghosh ierr = KAIJMultAdd_MPI(A,xx,PETSC_NULL,yy);CHKERRQ(ierr); 708*49bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 709*49bd79ccSDebojyoti Ghosh } 710*49bd79ccSDebojyoti Ghosh 711*49bd79ccSDebojyoti Ghosh PetscErrorCode MatMultAdd_MPIKAIJ_dof(Mat A,Vec xx,Vec yy, Vec zz) 712*49bd79ccSDebojyoti Ghosh { 713*49bd79ccSDebojyoti Ghosh PetscErrorCode ierr; 714*49bd79ccSDebojyoti Ghosh PetscFunctionBegin; 715*49bd79ccSDebojyoti Ghosh ierr = KAIJMultAdd_MPI(A,xx,yy,zz);CHKERRQ(ierr); 716*49bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 717*49bd79ccSDebojyoti Ghosh } 718*49bd79ccSDebojyoti Ghosh 719*49bd79ccSDebojyoti Ghosh PetscErrorCode MatInvertBlockDiagonal_MPIKAIJ_dof(Mat A,const PetscScalar **values) 720*49bd79ccSDebojyoti Ghosh { 721*49bd79ccSDebojyoti Ghosh Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data; 722*49bd79ccSDebojyoti Ghosh PetscErrorCode ierr; 723*49bd79ccSDebojyoti Ghosh 724*49bd79ccSDebojyoti Ghosh PetscFunctionBegin; 725*49bd79ccSDebojyoti Ghosh ierr = (*b->AIJ->ops->invertblockdiagonal)(b->AIJ,values);CHKERRQ(ierr); 726*49bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 727*49bd79ccSDebojyoti Ghosh } 728*49bd79ccSDebojyoti Ghosh 729*49bd79ccSDebojyoti Ghosh /* ----------------------------------------------------------------*/ 730*49bd79ccSDebojyoti Ghosh 731*49bd79ccSDebojyoti Ghosh PetscErrorCode MatGetRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values) 732*49bd79ccSDebojyoti Ghosh { 733*49bd79ccSDebojyoti Ghosh Mat_SeqKAIJ *b = (Mat_SeqKAIJ*) A->data; 734*49bd79ccSDebojyoti Ghosh PetscErrorCode ierr,diag; 735*49bd79ccSDebojyoti Ghosh PetscInt nzaij,nz,*colsaij,*idx,i,j,p=b->p,q=b->q,r=row/p,s=row%p,c; 736*49bd79ccSDebojyoti Ghosh PetscScalar *vaij,*v,*S=b->S,*T=b->T; 737*49bd79ccSDebojyoti Ghosh 738*49bd79ccSDebojyoti Ghosh PetscFunctionBegin; 739*49bd79ccSDebojyoti Ghosh if (b->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 740*49bd79ccSDebojyoti Ghosh b->getrowactive = PETSC_TRUE; 741*49bd79ccSDebojyoti Ghosh if (row < 0 || row >= A->rmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range",row); 742*49bd79ccSDebojyoti Ghosh 743*49bd79ccSDebojyoti Ghosh if ((!S) && (!T) && (!b->isTI)) { 744*49bd79ccSDebojyoti Ghosh if (ncols) *ncols = 0; 745*49bd79ccSDebojyoti Ghosh if (cols) *cols = NULL; 746*49bd79ccSDebojyoti Ghosh if (values) *values = NULL; 747*49bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 748*49bd79ccSDebojyoti Ghosh } 749*49bd79ccSDebojyoti Ghosh 750*49bd79ccSDebojyoti Ghosh if (T || b->isTI) { 751*49bd79ccSDebojyoti Ghosh ierr = MatGetRow_SeqAIJ(b->AIJ,r,&nzaij,&colsaij,&vaij);CHKERRQ(ierr); 752*49bd79ccSDebojyoti Ghosh diag = PETSC_FALSE; 753*49bd79ccSDebojyoti Ghosh c = nzaij; 754*49bd79ccSDebojyoti Ghosh for (i=0; i<nzaij; i++) { 755*49bd79ccSDebojyoti Ghosh /* check if this row contains a diagonal entry */ 756*49bd79ccSDebojyoti Ghosh if (colsaij[i] == r) { 757*49bd79ccSDebojyoti Ghosh diag = PETSC_TRUE; 758*49bd79ccSDebojyoti Ghosh c = i; 759*49bd79ccSDebojyoti Ghosh } 760*49bd79ccSDebojyoti Ghosh } 761*49bd79ccSDebojyoti Ghosh } else nzaij = c = 0; 762*49bd79ccSDebojyoti Ghosh 763*49bd79ccSDebojyoti Ghosh /* calculate size of row */ 764*49bd79ccSDebojyoti Ghosh nz = 0; 765*49bd79ccSDebojyoti Ghosh if (S) nz += q; 766*49bd79ccSDebojyoti Ghosh if (T || b->isTI) nz += (diag && S ? (nzaij-1)*q : nzaij*q); 767*49bd79ccSDebojyoti Ghosh 768*49bd79ccSDebojyoti Ghosh if (cols || values) { 769*49bd79ccSDebojyoti Ghosh ierr = PetscMalloc2(nz,&idx,nz,&v);CHKERRQ(ierr); 770*49bd79ccSDebojyoti Ghosh if (b->isTI) { 771*49bd79ccSDebojyoti Ghosh for (i=0; i<nzaij; i++) { 772*49bd79ccSDebojyoti Ghosh for (j=0; j<q; j++) { 773*49bd79ccSDebojyoti Ghosh idx[i*q+j] = colsaij[i]*q+j; 774*49bd79ccSDebojyoti Ghosh v[i*q+j] = (j==s ? vaij[i] : 0); 775*49bd79ccSDebojyoti Ghosh } 776*49bd79ccSDebojyoti Ghosh } 777*49bd79ccSDebojyoti Ghosh } else if (T) { 778*49bd79ccSDebojyoti Ghosh for (i=0; i<nzaij; i++) { 779*49bd79ccSDebojyoti Ghosh for (j=0; j<q; j++) { 780*49bd79ccSDebojyoti Ghosh idx[i*q+j] = colsaij[i]*q+j; 781*49bd79ccSDebojyoti Ghosh v[i*q+j] = vaij[i]*T[s+j*p]; 782*49bd79ccSDebojyoti Ghosh } 783*49bd79ccSDebojyoti Ghosh } 784*49bd79ccSDebojyoti Ghosh } 785*49bd79ccSDebojyoti Ghosh if (S) { 786*49bd79ccSDebojyoti Ghosh for (j=0; j<q; j++) { 787*49bd79ccSDebojyoti Ghosh idx[c*q+j] = r*q+j; 788*49bd79ccSDebojyoti Ghosh v[c*q+j] += S[s+j*p]; 789*49bd79ccSDebojyoti Ghosh } 790*49bd79ccSDebojyoti Ghosh } 791*49bd79ccSDebojyoti Ghosh } 792*49bd79ccSDebojyoti Ghosh 793*49bd79ccSDebojyoti Ghosh if (ncols) *ncols = nz; 794*49bd79ccSDebojyoti Ghosh if (cols) *cols = idx; 795*49bd79ccSDebojyoti Ghosh if (values) *values = v; 796*49bd79ccSDebojyoti Ghosh 797*49bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 798*49bd79ccSDebojyoti Ghosh } 799*49bd79ccSDebojyoti Ghosh 800*49bd79ccSDebojyoti Ghosh PetscErrorCode MatRestoreRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 801*49bd79ccSDebojyoti Ghosh { 802*49bd79ccSDebojyoti Ghosh PetscErrorCode ierr; 803*49bd79ccSDebojyoti Ghosh PetscFunctionBegin; 804*49bd79ccSDebojyoti Ghosh ierr = PetscFree2(*idx,*v);CHKERRQ(ierr); 805*49bd79ccSDebojyoti Ghosh ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE; 806*49bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 807*49bd79ccSDebojyoti Ghosh } 808*49bd79ccSDebojyoti Ghosh 809*49bd79ccSDebojyoti Ghosh PetscErrorCode MatGetRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values) 810*49bd79ccSDebojyoti Ghosh { 811*49bd79ccSDebojyoti Ghosh Mat_MPIKAIJ *b = (Mat_MPIKAIJ*) A->data; 812*49bd79ccSDebojyoti Ghosh Mat MatAIJ = ((Mat_SeqKAIJ*)b->AIJ->data)->AIJ; 813*49bd79ccSDebojyoti Ghosh Mat MatOAIJ = ((Mat_SeqKAIJ*)b->OAIJ->data)->AIJ; 814*49bd79ccSDebojyoti Ghosh Mat AIJ = b->A; 815*49bd79ccSDebojyoti Ghosh PetscErrorCode ierr; 816*49bd79ccSDebojyoti Ghosh const PetscInt rstart=A->rmap->rstart,rend=A->rmap->rend,p=b->p,q=b->q,*garray; 817*49bd79ccSDebojyoti Ghosh PetscInt nz,*idx,ncolsaij,ncolsoaij,*colsaij,*colsoaij,r,s,c,i,j,lrow; 818*49bd79ccSDebojyoti Ghosh PetscScalar *v,*vals,*ovals,*S=b->S,*T=b->T; 819*49bd79ccSDebojyoti Ghosh PetscBool diag; 820*49bd79ccSDebojyoti Ghosh 821*49bd79ccSDebojyoti Ghosh PetscFunctionBegin; 822*49bd79ccSDebojyoti Ghosh if (b->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 823*49bd79ccSDebojyoti Ghosh b->getrowactive = PETSC_TRUE; 824*49bd79ccSDebojyoti Ghosh if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 825*49bd79ccSDebojyoti Ghosh lrow = row - rstart; 826*49bd79ccSDebojyoti Ghosh 827*49bd79ccSDebojyoti Ghosh if ((!S) && (!T) && (!b->isTI)) { 828*49bd79ccSDebojyoti Ghosh if (ncols) *ncols = 0; 829*49bd79ccSDebojyoti Ghosh if (cols) *cols = NULL; 830*49bd79ccSDebojyoti Ghosh if (values) *values = NULL; 831*49bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 832*49bd79ccSDebojyoti Ghosh } 833*49bd79ccSDebojyoti Ghosh 834*49bd79ccSDebojyoti Ghosh r = lrow/p; 835*49bd79ccSDebojyoti Ghosh s = lrow%p; 836*49bd79ccSDebojyoti Ghosh 837*49bd79ccSDebojyoti Ghosh if (T || b->isTI) { 838*49bd79ccSDebojyoti Ghosh ierr = MatMPIAIJGetSeqAIJ(AIJ,NULL,NULL,&garray); 839*49bd79ccSDebojyoti Ghosh ierr = MatGetRow_SeqAIJ(MatAIJ,lrow/p,&ncolsaij,&colsaij,&vals);CHKERRQ(ierr); 840*49bd79ccSDebojyoti Ghosh ierr = MatGetRow_SeqAIJ(MatOAIJ,lrow/p,&ncolsoaij,&colsoaij,&ovals);CHKERRQ(ierr); 841*49bd79ccSDebojyoti Ghosh 842*49bd79ccSDebojyoti Ghosh diag = PETSC_FALSE; 843*49bd79ccSDebojyoti Ghosh c = ncolsaij + ncolsoaij; 844*49bd79ccSDebojyoti Ghosh for (i=0; i<ncolsaij; i++) { 845*49bd79ccSDebojyoti Ghosh /* check if this row contains a diagonal entry */ 846*49bd79ccSDebojyoti Ghosh if (colsaij[i] == r) { 847*49bd79ccSDebojyoti Ghosh diag = PETSC_TRUE; 848*49bd79ccSDebojyoti Ghosh c = i; 849*49bd79ccSDebojyoti Ghosh } 850*49bd79ccSDebojyoti Ghosh } 851*49bd79ccSDebojyoti Ghosh } else c = 0; 852*49bd79ccSDebojyoti Ghosh 853*49bd79ccSDebojyoti Ghosh /* calculate size of row */ 854*49bd79ccSDebojyoti Ghosh nz = 0; 855*49bd79ccSDebojyoti Ghosh if (S) nz += q; 856*49bd79ccSDebojyoti Ghosh if (T || b->isTI) nz += (diag && S ? (ncolsaij+ncolsoaij-1)*q : (ncolsaij+ncolsoaij)*q); 857*49bd79ccSDebojyoti Ghosh 858*49bd79ccSDebojyoti Ghosh if (cols || values) { 859*49bd79ccSDebojyoti Ghosh ierr = PetscMalloc2(nz,&idx,nz,&v);CHKERRQ(ierr); 860*49bd79ccSDebojyoti Ghosh if (b->isTI) { 861*49bd79ccSDebojyoti Ghosh for (i=0; i<ncolsaij; i++) { 862*49bd79ccSDebojyoti Ghosh for (j=0; j<q; j++) { 863*49bd79ccSDebojyoti Ghosh idx[i*q+j] = (colsaij[i]+rstart/p)*q+j; 864*49bd79ccSDebojyoti Ghosh v[i*q+j] = (j==s ? vals[i] : 0.0); 865*49bd79ccSDebojyoti Ghosh } 866*49bd79ccSDebojyoti Ghosh } 867*49bd79ccSDebojyoti Ghosh for (i=0; i<ncolsoaij; i++) { 868*49bd79ccSDebojyoti Ghosh for (j=0; j<q; j++) { 869*49bd79ccSDebojyoti Ghosh idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j; 870*49bd79ccSDebojyoti Ghosh v[(i+ncolsaij)*q+j] = (j==s ? ovals[i]: 0.0); 871*49bd79ccSDebojyoti Ghosh } 872*49bd79ccSDebojyoti Ghosh } 873*49bd79ccSDebojyoti Ghosh } else if (T) { 874*49bd79ccSDebojyoti Ghosh for (i=0; i<ncolsaij; i++) { 875*49bd79ccSDebojyoti Ghosh for (j=0; j<q; j++) { 876*49bd79ccSDebojyoti Ghosh idx[i*q+j] = (colsaij[i]+rstart/p)*q+j; 877*49bd79ccSDebojyoti Ghosh v[i*q+j] = vals[i]*T[s+j*p]; 878*49bd79ccSDebojyoti Ghosh } 879*49bd79ccSDebojyoti Ghosh } 880*49bd79ccSDebojyoti Ghosh for (i=0; i<ncolsoaij; i++) { 881*49bd79ccSDebojyoti Ghosh for (j=0; j<q; j++) { 882*49bd79ccSDebojyoti Ghosh idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j; 883*49bd79ccSDebojyoti Ghosh v[(i+ncolsaij)*q+j] = ovals[i]*T[s+j*p]; 884*49bd79ccSDebojyoti Ghosh } 885*49bd79ccSDebojyoti Ghosh } 886*49bd79ccSDebojyoti Ghosh } 887*49bd79ccSDebojyoti Ghosh if (S) { 888*49bd79ccSDebojyoti Ghosh for (j=0; j<q; j++) { 889*49bd79ccSDebojyoti Ghosh idx[c*q+j] = (r+rstart/p)*q+j; 890*49bd79ccSDebojyoti Ghosh v[c*q+j] += S[s+j*p]; 891*49bd79ccSDebojyoti Ghosh } 892*49bd79ccSDebojyoti Ghosh } 893*49bd79ccSDebojyoti Ghosh } 894*49bd79ccSDebojyoti Ghosh 895*49bd79ccSDebojyoti Ghosh if (ncols) *ncols = nz; 896*49bd79ccSDebojyoti Ghosh if (cols) *cols = idx; 897*49bd79ccSDebojyoti Ghosh if (values) *values = v; 898*49bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 899*49bd79ccSDebojyoti Ghosh } 900*49bd79ccSDebojyoti Ghosh 901*49bd79ccSDebojyoti Ghosh PetscErrorCode MatRestoreRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 902*49bd79ccSDebojyoti Ghosh { 903*49bd79ccSDebojyoti Ghosh PetscErrorCode ierr; 904*49bd79ccSDebojyoti Ghosh PetscFunctionBegin; 905*49bd79ccSDebojyoti Ghosh ierr = PetscFree2(*idx,*v);CHKERRQ(ierr); 906*49bd79ccSDebojyoti Ghosh ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE; 907*49bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 908*49bd79ccSDebojyoti Ghosh } 909*49bd79ccSDebojyoti Ghosh 910*49bd79ccSDebojyoti Ghosh PetscErrorCode MatCreateSubMatrix_KAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat) 911*49bd79ccSDebojyoti Ghosh { 912*49bd79ccSDebojyoti Ghosh PetscErrorCode ierr; 913*49bd79ccSDebojyoti Ghosh Mat A; 914*49bd79ccSDebojyoti Ghosh 915*49bd79ccSDebojyoti Ghosh PetscFunctionBegin; 916*49bd79ccSDebojyoti Ghosh ierr = MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr); 917*49bd79ccSDebojyoti Ghosh ierr = MatCreateSubMatrix(A,isrow,iscol,cll,newmat);CHKERRQ(ierr); 918*49bd79ccSDebojyoti Ghosh ierr = MatDestroy(&A);CHKERRQ(ierr); 919*49bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 920*49bd79ccSDebojyoti Ghosh } 921*49bd79ccSDebojyoti Ghosh 922*49bd79ccSDebojyoti Ghosh /* ---------------------------------------------------------------------------------- */ 923*49bd79ccSDebojyoti Ghosh /*@C 924*49bd79ccSDebojyoti Ghosh MatCreateKAIJ - Creates a matrix type to be used for matrices of the following form: 925*49bd79ccSDebojyoti Ghosh 926*49bd79ccSDebojyoti Ghosh [I \otimes S + A \otimes T] 927*49bd79ccSDebojyoti Ghosh 928*49bd79ccSDebojyoti Ghosh where 929*49bd79ccSDebojyoti Ghosh S is a dense (p \times q) matrix 930*49bd79ccSDebojyoti Ghosh T is a dense (p \times q) matrix 931*49bd79ccSDebojyoti Ghosh A is an AIJ (n \times n) matrix 932*49bd79ccSDebojyoti Ghosh I is the identity matrix 933*49bd79ccSDebojyoti Ghosh The resulting matrix is (np \times nq) 934*49bd79ccSDebojyoti Ghosh 935*49bd79ccSDebojyoti Ghosh The matrix type is based on MATSEQAIJ for a sequential matrix A, and MATMPIAIJ for a distributed matrix A. 936*49bd79ccSDebojyoti Ghosh S is always stored independently on all processes as a PetscScalar array in column-major format. 937*49bd79ccSDebojyoti Ghosh 938*49bd79ccSDebojyoti Ghosh Collective 939*49bd79ccSDebojyoti Ghosh 940*49bd79ccSDebojyoti Ghosh Input Parameters: 941*49bd79ccSDebojyoti Ghosh + A - the AIJ matrix 942*49bd79ccSDebojyoti Ghosh . S - the S matrix, stored as a PetscScalar array (column-major) 943*49bd79ccSDebojyoti Ghosh . T - the T matrix, stored as a PetscScalar array (column-major) 944*49bd79ccSDebojyoti Ghosh . p - number of rows in S and T 945*49bd79ccSDebojyoti Ghosh - q - number of columns in S and T 946*49bd79ccSDebojyoti Ghosh 947*49bd79ccSDebojyoti Ghosh Output Parameter: 948*49bd79ccSDebojyoti Ghosh . kaij - the new KAIJ matrix 949*49bd79ccSDebojyoti Ghosh 950*49bd79ccSDebojyoti Ghosh Operations provided: 951*49bd79ccSDebojyoti Ghosh + MatMult 952*49bd79ccSDebojyoti Ghosh . MatMultAdd 953*49bd79ccSDebojyoti Ghosh . MatInvertBlockDiagonal 954*49bd79ccSDebojyoti Ghosh - MatView 955*49bd79ccSDebojyoti Ghosh 956*49bd79ccSDebojyoti Ghosh Level: advanced 957*49bd79ccSDebojyoti Ghosh 958*49bd79ccSDebojyoti Ghosh .seealso: MatKAIJGetAIJ(), MATKAIJ 959*49bd79ccSDebojyoti Ghosh @*/ 960*49bd79ccSDebojyoti Ghosh PetscErrorCode MatCreateKAIJ(Mat A,PetscInt p,PetscInt q,const PetscScalar S[],const PetscScalar T[],Mat *kaij) 961*49bd79ccSDebojyoti Ghosh { 962*49bd79ccSDebojyoti Ghosh PetscErrorCode ierr; 963*49bd79ccSDebojyoti Ghosh PetscMPIInt size; 964*49bd79ccSDebojyoti Ghosh PetscInt n,i,j; 965*49bd79ccSDebojyoti Ghosh Mat B; 966*49bd79ccSDebojyoti Ghosh PetscBool isTI = PETSC_FALSE; 967*49bd79ccSDebojyoti Ghosh 968*49bd79ccSDebojyoti Ghosh PetscFunctionBegin; 969*49bd79ccSDebojyoti Ghosh 970*49bd79ccSDebojyoti Ghosh /* check if T is an identity matrix */ 971*49bd79ccSDebojyoti Ghosh if (T && (p == q)) { 972*49bd79ccSDebojyoti Ghosh isTI = PETSC_TRUE; 973*49bd79ccSDebojyoti Ghosh for (i=0; i<p; i++) { 974*49bd79ccSDebojyoti Ghosh for (j=0; j<q; j++) { 975*49bd79ccSDebojyoti Ghosh if (i == j) { 976*49bd79ccSDebojyoti Ghosh /* diagonal term must be 1 */ 977*49bd79ccSDebojyoti Ghosh if (T[i+j*p] != 1.0) isTI = PETSC_FALSE; 978*49bd79ccSDebojyoti Ghosh } else { 979*49bd79ccSDebojyoti Ghosh /* off-diagonal term must be 0 */ 980*49bd79ccSDebojyoti Ghosh if (T[i+j*p] != 0.0) isTI = PETSC_FALSE; 981*49bd79ccSDebojyoti Ghosh } 982*49bd79ccSDebojyoti Ghosh } 983*49bd79ccSDebojyoti Ghosh } 984*49bd79ccSDebojyoti Ghosh } 985*49bd79ccSDebojyoti Ghosh 986*49bd79ccSDebojyoti Ghosh ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 987*49bd79ccSDebojyoti Ghosh 988*49bd79ccSDebojyoti Ghosh ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 989*49bd79ccSDebojyoti Ghosh ierr = MatSetSizes(B,p*A->rmap->n,q*A->cmap->n,p*A->rmap->N,q*A->cmap->N);CHKERRQ(ierr); 990*49bd79ccSDebojyoti Ghosh ierr = PetscLayoutSetBlockSize(B->rmap,p);CHKERRQ(ierr); 991*49bd79ccSDebojyoti Ghosh ierr = PetscLayoutSetBlockSize(B->cmap,q);CHKERRQ(ierr); 992*49bd79ccSDebojyoti Ghosh ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 993*49bd79ccSDebojyoti Ghosh ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 994*49bd79ccSDebojyoti Ghosh 995*49bd79ccSDebojyoti Ghosh B->assembled = PETSC_TRUE; 996*49bd79ccSDebojyoti Ghosh ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 997*49bd79ccSDebojyoti Ghosh 998*49bd79ccSDebojyoti Ghosh if (size == 1) { 999*49bd79ccSDebojyoti Ghosh Mat_SeqKAIJ *b; 1000*49bd79ccSDebojyoti Ghosh 1001*49bd79ccSDebojyoti Ghosh ierr = MatSetType(B,MATSEQKAIJ);CHKERRQ(ierr); 1002*49bd79ccSDebojyoti Ghosh 1003*49bd79ccSDebojyoti Ghosh B->ops->setup = NULL; 1004*49bd79ccSDebojyoti Ghosh B->ops->destroy = MatDestroy_SeqKAIJ; 1005*49bd79ccSDebojyoti Ghosh B->ops->view = MatView_SeqKAIJ; 1006*49bd79ccSDebojyoti Ghosh b = (Mat_SeqKAIJ*)B->data; 1007*49bd79ccSDebojyoti Ghosh b->p = p; 1008*49bd79ccSDebojyoti Ghosh b->q = q; 1009*49bd79ccSDebojyoti Ghosh b->AIJ = A; 1010*49bd79ccSDebojyoti Ghosh b->isTI = isTI; 1011*49bd79ccSDebojyoti Ghosh if (S) { 1012*49bd79ccSDebojyoti Ghosh ierr = PetscMalloc(p*q*sizeof(PetscScalar),&b->S);CHKERRQ(ierr); 1013*49bd79ccSDebojyoti Ghosh ierr = PetscMemcpy(b->S,S,p*q*sizeof(PetscScalar));CHKERRQ(ierr); 1014*49bd79ccSDebojyoti Ghosh } else b->S = NULL; 1015*49bd79ccSDebojyoti Ghosh if (T && (!isTI)) { 1016*49bd79ccSDebojyoti Ghosh ierr = PetscMalloc(p*q*sizeof(PetscScalar),&b->T);CHKERRQ(ierr); 1017*49bd79ccSDebojyoti Ghosh ierr = PetscMemcpy(b->T,T,p*q*sizeof(PetscScalar));CHKERRQ(ierr); 1018*49bd79ccSDebojyoti Ghosh } else b->T = NULL; 1019*49bd79ccSDebojyoti Ghosh 1020*49bd79ccSDebojyoti Ghosh B->ops->mult = MatMult_SeqKAIJ_N; 1021*49bd79ccSDebojyoti Ghosh B->ops->multadd = MatMultAdd_SeqKAIJ_N; 1022*49bd79ccSDebojyoti Ghosh B->ops->invertblockdiagonal = MatInvertBlockDiagonal_SeqKAIJ_N; 1023*49bd79ccSDebojyoti Ghosh B->ops->getrow = MatGetRow_SeqKAIJ; 1024*49bd79ccSDebojyoti Ghosh B->ops->restorerow = MatRestoreRow_SeqKAIJ; 1025*49bd79ccSDebojyoti Ghosh B->ops->sor = MatSOR_SeqKAIJ; 1026*49bd79ccSDebojyoti Ghosh 1027*49bd79ccSDebojyoti Ghosh /* 1028*49bd79ccSDebojyoti Ghosh This is commented out since MATKAIJ will use the basic MatConvert (which uses MatGetRow()). 1029*49bd79ccSDebojyoti Ghosh ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqkaij_seqaij_C",MatConvert_SeqKAIJ_SeqAIJ);CHKERRQ(ierr); 1030*49bd79ccSDebojyoti Ghosh */ 1031*49bd79ccSDebojyoti Ghosh 1032*49bd79ccSDebojyoti Ghosh } else { 1033*49bd79ccSDebojyoti Ghosh Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*)A->data; 1034*49bd79ccSDebojyoti Ghosh Mat_MPIKAIJ *b; 1035*49bd79ccSDebojyoti Ghosh IS from,to; 1036*49bd79ccSDebojyoti Ghosh Vec gvec; 1037*49bd79ccSDebojyoti Ghosh 1038*49bd79ccSDebojyoti Ghosh ierr = MatSetType(B,MATMPIKAIJ);CHKERRQ(ierr); 1039*49bd79ccSDebojyoti Ghosh 1040*49bd79ccSDebojyoti Ghosh B->ops->setup = NULL; 1041*49bd79ccSDebojyoti Ghosh B->ops->destroy = MatDestroy_MPIKAIJ; 1042*49bd79ccSDebojyoti Ghosh B->ops->view = MatView_MPIKAIJ; 1043*49bd79ccSDebojyoti Ghosh 1044*49bd79ccSDebojyoti Ghosh b = (Mat_MPIKAIJ*)B->data; 1045*49bd79ccSDebojyoti Ghosh b->p = p; 1046*49bd79ccSDebojyoti Ghosh b->q = q; 1047*49bd79ccSDebojyoti Ghosh b->A = A; 1048*49bd79ccSDebojyoti Ghosh b->isTI = isTI; 1049*49bd79ccSDebojyoti Ghosh if (S) { 1050*49bd79ccSDebojyoti Ghosh ierr = PetscMalloc(p*q*sizeof(PetscScalar),&b->S);CHKERRQ(ierr); 1051*49bd79ccSDebojyoti Ghosh ierr = PetscMemcpy(b->S,S,p*q*sizeof(PetscScalar));CHKERRQ(ierr); 1052*49bd79ccSDebojyoti Ghosh } else b->S = NULL; 1053*49bd79ccSDebojyoti Ghosh if (T &&(!isTI)) { 1054*49bd79ccSDebojyoti Ghosh ierr = PetscMalloc(p*q*sizeof(PetscScalar),&b->T);CHKERRQ(ierr); 1055*49bd79ccSDebojyoti Ghosh ierr = PetscMemcpy(b->T,T,p*q*sizeof(PetscScalar));CHKERRQ(ierr); 1056*49bd79ccSDebojyoti Ghosh } else b->T = NULL; 1057*49bd79ccSDebojyoti Ghosh 1058*49bd79ccSDebojyoti Ghosh ierr = MatCreateKAIJ(mpiaij->A,p,q,S ,T,&b->AIJ);CHKERRQ(ierr); 1059*49bd79ccSDebojyoti Ghosh ierr = MatCreateKAIJ(mpiaij->B,p,q,NULL,T,&b->OAIJ);CHKERRQ(ierr); 1060*49bd79ccSDebojyoti Ghosh 1061*49bd79ccSDebojyoti Ghosh ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr); 1062*49bd79ccSDebojyoti Ghosh ierr = VecCreate(PETSC_COMM_SELF,&b->w);CHKERRQ(ierr); 1063*49bd79ccSDebojyoti Ghosh ierr = VecSetSizes(b->w,n*q,n*q);CHKERRQ(ierr); 1064*49bd79ccSDebojyoti Ghosh ierr = VecSetBlockSize(b->w,q);CHKERRQ(ierr); 1065*49bd79ccSDebojyoti Ghosh ierr = VecSetType(b->w,VECSEQ);CHKERRQ(ierr); 1066*49bd79ccSDebojyoti Ghosh 1067*49bd79ccSDebojyoti Ghosh /* create two temporary Index sets for build scatter gather */ 1068*49bd79ccSDebojyoti Ghosh ierr = ISCreateBlock(PetscObjectComm((PetscObject)A),q,n,mpiaij->garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); 1069*49bd79ccSDebojyoti Ghosh ierr = ISCreateStride(PETSC_COMM_SELF,n*q,0,1,&to);CHKERRQ(ierr); 1070*49bd79ccSDebojyoti Ghosh 1071*49bd79ccSDebojyoti Ghosh /* create temporary global vector to generate scatter context */ 1072*49bd79ccSDebojyoti Ghosh ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),q,q*A->cmap->n,q*A->cmap->N,NULL,&gvec);CHKERRQ(ierr); 1073*49bd79ccSDebojyoti Ghosh 1074*49bd79ccSDebojyoti Ghosh /* generate the scatter context */ 1075*49bd79ccSDebojyoti Ghosh ierr = VecScatterCreate(gvec,from,b->w,to,&b->ctx);CHKERRQ(ierr); 1076*49bd79ccSDebojyoti Ghosh 1077*49bd79ccSDebojyoti Ghosh ierr = ISDestroy(&from);CHKERRQ(ierr); 1078*49bd79ccSDebojyoti Ghosh ierr = ISDestroy(&to);CHKERRQ(ierr); 1079*49bd79ccSDebojyoti Ghosh ierr = VecDestroy(&gvec);CHKERRQ(ierr); 1080*49bd79ccSDebojyoti Ghosh 1081*49bd79ccSDebojyoti Ghosh B->ops->mult = MatMult_MPIKAIJ_dof; 1082*49bd79ccSDebojyoti Ghosh B->ops->multadd = MatMultAdd_MPIKAIJ_dof; 1083*49bd79ccSDebojyoti Ghosh B->ops->invertblockdiagonal = MatInvertBlockDiagonal_MPIKAIJ_dof; 1084*49bd79ccSDebojyoti Ghosh B->ops->getrow = MatGetRow_MPIKAIJ; 1085*49bd79ccSDebojyoti Ghosh B->ops->restorerow = MatRestoreRow_MPIKAIJ; 1086*49bd79ccSDebojyoti Ghosh ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIKAIJ);CHKERRQ(ierr); 1087*49bd79ccSDebojyoti Ghosh } 1088*49bd79ccSDebojyoti Ghosh B->ops->createsubmatrix = MatCreateSubMatrix_KAIJ; 1089*49bd79ccSDebojyoti Ghosh B->assembled = PETSC_TRUE; 1090*49bd79ccSDebojyoti Ghosh ierr = MatSetUp(B);CHKERRQ(ierr); 1091*49bd79ccSDebojyoti Ghosh *kaij = B; 1092*49bd79ccSDebojyoti Ghosh ierr = MatViewFromOptions(B,NULL,"-mat_view");CHKERRQ(ierr); 1093*49bd79ccSDebojyoti Ghosh 1094*49bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 1095*49bd79ccSDebojyoti Ghosh } 1096