149bd79ccSDebojyoti Ghosh 249bd79ccSDebojyoti Ghosh /* 349bd79ccSDebojyoti Ghosh Defines the basic matrix operations for the KAIJ matrix storage format. 449bd79ccSDebojyoti Ghosh This format is used to evaluate matrices of the form: 549bd79ccSDebojyoti Ghosh 649bd79ccSDebojyoti Ghosh [I \otimes S + A \otimes T] 749bd79ccSDebojyoti Ghosh 849bd79ccSDebojyoti Ghosh where 949bd79ccSDebojyoti Ghosh S is a dense (p \times q) matrix 1049bd79ccSDebojyoti Ghosh T is a dense (p \times q) matrix 1149bd79ccSDebojyoti Ghosh A is an AIJ (n \times n) matrix 1249bd79ccSDebojyoti Ghosh I is the identity matrix 1349bd79ccSDebojyoti Ghosh 1449bd79ccSDebojyoti Ghosh The resulting matrix is (np \times nq) 1549bd79ccSDebojyoti Ghosh 1649bd79ccSDebojyoti Ghosh We provide: 1749bd79ccSDebojyoti Ghosh MatMult() 1849bd79ccSDebojyoti Ghosh MatMultAdd() 1949bd79ccSDebojyoti Ghosh MatInvertBlockDiagonal() 2049bd79ccSDebojyoti Ghosh and 2149bd79ccSDebojyoti Ghosh MatCreateKAIJ(Mat,PetscInt,PetscInt,const PetscScalar[],const PetscScalar[],Mat*) 2249bd79ccSDebojyoti Ghosh 2349bd79ccSDebojyoti Ghosh This single directory handles both the sequential and parallel codes 2449bd79ccSDebojyoti Ghosh */ 2549bd79ccSDebojyoti Ghosh 2649bd79ccSDebojyoti Ghosh #include <../src/mat/impls/kaij/kaij.h> /*I "petscmat.h" I*/ 2749bd79ccSDebojyoti Ghosh #include <../src/mat/utils/freespace.h> 2849bd79ccSDebojyoti Ghosh #include <petsc/private/vecimpl.h> 2949bd79ccSDebojyoti Ghosh 3049bd79ccSDebojyoti Ghosh /*@C 3149bd79ccSDebojyoti Ghosh MatKAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the KAIJ matrix 3249bd79ccSDebojyoti Ghosh 3349bd79ccSDebojyoti Ghosh Not Collective, but if the KAIJ matrix is parallel, the AIJ matrix is also parallel 3449bd79ccSDebojyoti Ghosh 3549bd79ccSDebojyoti Ghosh Input Parameter: 3649bd79ccSDebojyoti Ghosh . A - the KAIJ matrix 3749bd79ccSDebojyoti Ghosh 3849bd79ccSDebojyoti Ghosh Output Parameter: 3949bd79ccSDebojyoti Ghosh . B - the AIJ matrix 4049bd79ccSDebojyoti Ghosh 4149bd79ccSDebojyoti Ghosh Level: advanced 4249bd79ccSDebojyoti Ghosh 4349bd79ccSDebojyoti Ghosh Notes: The reference count on the AIJ matrix is not increased so you should not destroy it. 4449bd79ccSDebojyoti Ghosh 4549bd79ccSDebojyoti Ghosh .seealso: MatCreateKAIJ() 4649bd79ccSDebojyoti Ghosh @*/ 4749bd79ccSDebojyoti Ghosh PetscErrorCode MatKAIJGetAIJ(Mat A,Mat *B) 4849bd79ccSDebojyoti Ghosh { 4949bd79ccSDebojyoti Ghosh PetscErrorCode ierr; 5049bd79ccSDebojyoti Ghosh PetscBool ismpikaij,isseqkaij; 5149bd79ccSDebojyoti Ghosh 5249bd79ccSDebojyoti Ghosh PetscFunctionBegin; 5349bd79ccSDebojyoti Ghosh ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIKAIJ,&ismpikaij);CHKERRQ(ierr); 5449bd79ccSDebojyoti Ghosh ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQKAIJ,&isseqkaij);CHKERRQ(ierr); 5549bd79ccSDebojyoti Ghosh if (ismpikaij) { 5649bd79ccSDebojyoti Ghosh Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data; 5749bd79ccSDebojyoti Ghosh 5849bd79ccSDebojyoti Ghosh *B = b->A; 5949bd79ccSDebojyoti Ghosh } else if (isseqkaij) { 6049bd79ccSDebojyoti Ghosh Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 6149bd79ccSDebojyoti Ghosh 6249bd79ccSDebojyoti Ghosh *B = b->AIJ; 6349bd79ccSDebojyoti Ghosh } else { 6449bd79ccSDebojyoti Ghosh *B = A; 6549bd79ccSDebojyoti Ghosh } 6649bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 6749bd79ccSDebojyoti Ghosh } 6849bd79ccSDebojyoti Ghosh 6949bd79ccSDebojyoti Ghosh /*@C 7049bd79ccSDebojyoti Ghosh MatKAIJGetS - Get the S matrix describing the shift action of the KAIJ matrix 7149bd79ccSDebojyoti Ghosh 7249bd79ccSDebojyoti Ghosh Not Collective, the entire S is stored and returned independently on all processes 7349bd79ccSDebojyoti Ghosh 7449bd79ccSDebojyoti Ghosh Input Parameter: 7549bd79ccSDebojyoti Ghosh . A - the KAIJ matrix 7649bd79ccSDebojyoti Ghosh 7749bd79ccSDebojyoti Ghosh Output Parameter: 7849bd79ccSDebojyoti Ghosh . S - the S matrix, in form of a scalar array in column-major format 7949bd79ccSDebojyoti Ghosh 80*31a97b9aSRichard Tran Mills Notes: 81*31a97b9aSRichard Tran Mills The dimensions of the output array can be determined by calling MatGetBlockSizes() on the KAIJ matrix. 82*31a97b9aSRichard Tran Mills The row block and column block sizes returned will correspond to the number of rows and columns, respectively, in S. 83*31a97b9aSRichard Tran Mills 8449bd79ccSDebojyoti Ghosh Level: advanced 8549bd79ccSDebojyoti Ghosh 86*31a97b9aSRichard Tran Mills .seealso: MatCreateKAIJ(), MatGetBlockSizes() 8749bd79ccSDebojyoti Ghosh @*/ 8849bd79ccSDebojyoti Ghosh PetscErrorCode MatKAIJGetS(Mat A,const PetscScalar **S) 8949bd79ccSDebojyoti Ghosh { 9049bd79ccSDebojyoti Ghosh Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 9149bd79ccSDebojyoti Ghosh PetscFunctionBegin; 9249bd79ccSDebojyoti Ghosh *S = b->S; 9349bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 9449bd79ccSDebojyoti Ghosh } 9549bd79ccSDebojyoti Ghosh 9649bd79ccSDebojyoti Ghosh /*@C 97*31a97b9aSRichard Tran Mills MatKAIJGetT - Get the transformation matrix T associated with the KAIJ matrix 9849bd79ccSDebojyoti Ghosh 9949bd79ccSDebojyoti Ghosh Not Collective, the entire T is stored and returned independently on all processes 10049bd79ccSDebojyoti Ghosh 10149bd79ccSDebojyoti Ghosh Input Parameter: 10249bd79ccSDebojyoti Ghosh . A - the KAIJ matrix 10349bd79ccSDebojyoti Ghosh 10449bd79ccSDebojyoti Ghosh Output Parameter: 10549bd79ccSDebojyoti Ghosh . T - the T matrix, in form of a scalar array in column-major format 10649bd79ccSDebojyoti Ghosh 107*31a97b9aSRichard Tran Mills Notes: 108*31a97b9aSRichard Tran Mills The dimensions of the output array can be determined by calling MatGetBlockSizes() on the KAIJ matrix. 109*31a97b9aSRichard Tran Mills The row block and column block sizes returned will correspond to the number of rows and columns, respectively, in T. 110*31a97b9aSRichard Tran Mills 11149bd79ccSDebojyoti Ghosh Level: advanced 11249bd79ccSDebojyoti Ghosh 113*31a97b9aSRichard Tran Mills .seealso: MatCreateKAIJ(), MatGetBlockSizes() 11449bd79ccSDebojyoti Ghosh @*/ 11549bd79ccSDebojyoti Ghosh PetscErrorCode MatKAIJGetT(Mat A,const PetscScalar **T) 11649bd79ccSDebojyoti Ghosh { 11749bd79ccSDebojyoti Ghosh Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 11849bd79ccSDebojyoti Ghosh PetscFunctionBegin; 11949bd79ccSDebojyoti Ghosh *T = b->T; 12049bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 12149bd79ccSDebojyoti Ghosh } 12249bd79ccSDebojyoti Ghosh 12349bd79ccSDebojyoti Ghosh PetscErrorCode MatDestroy_SeqKAIJ(Mat A) 12449bd79ccSDebojyoti Ghosh { 12549bd79ccSDebojyoti Ghosh PetscErrorCode ierr; 12649bd79ccSDebojyoti Ghosh Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 12749bd79ccSDebojyoti Ghosh 12849bd79ccSDebojyoti Ghosh PetscFunctionBegin; 12949bd79ccSDebojyoti Ghosh ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr); 13049bd79ccSDebojyoti Ghosh ierr = PetscFree3(b->S,b->T,b->ibdiag);CHKERRQ(ierr); 13149bd79ccSDebojyoti Ghosh ierr = PetscFree5(b->sor.w,b->sor.y,b->sor.work,b->sor.t,b->sor.arr);CHKERRQ(ierr); 13249bd79ccSDebojyoti Ghosh ierr = PetscFree(A->data);CHKERRQ(ierr); 13349bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 13449bd79ccSDebojyoti Ghosh } 13549bd79ccSDebojyoti Ghosh 13649bd79ccSDebojyoti Ghosh PetscErrorCode MatSetUp_KAIJ(Mat A) 13749bd79ccSDebojyoti Ghosh { 13849bd79ccSDebojyoti Ghosh PetscFunctionBegin; 13949bd79ccSDebojyoti Ghosh SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Must use MatCreateKAIJ() to create KAIJ matrices"); 14049bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 14149bd79ccSDebojyoti Ghosh } 14249bd79ccSDebojyoti Ghosh 14349bd79ccSDebojyoti Ghosh PetscErrorCode MatView_SeqKAIJ(Mat A,PetscViewer viewer) 14449bd79ccSDebojyoti Ghosh { 14549bd79ccSDebojyoti Ghosh PetscErrorCode ierr; 14649bd79ccSDebojyoti Ghosh Mat B; 14749bd79ccSDebojyoti Ghosh 14849bd79ccSDebojyoti Ghosh PetscFunctionBegin; 14949bd79ccSDebojyoti Ghosh ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 15049bd79ccSDebojyoti Ghosh ierr = MatView(B,viewer);CHKERRQ(ierr); 15149bd79ccSDebojyoti Ghosh ierr = MatDestroy(&B);CHKERRQ(ierr); 15249bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 15349bd79ccSDebojyoti Ghosh } 15449bd79ccSDebojyoti Ghosh 15549bd79ccSDebojyoti Ghosh PetscErrorCode MatView_MPIKAIJ(Mat A,PetscViewer viewer) 15649bd79ccSDebojyoti Ghosh { 15749bd79ccSDebojyoti Ghosh PetscErrorCode ierr; 15849bd79ccSDebojyoti Ghosh Mat B; 15949bd79ccSDebojyoti Ghosh 16049bd79ccSDebojyoti Ghosh PetscFunctionBegin; 16149bd79ccSDebojyoti Ghosh ierr = MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 16249bd79ccSDebojyoti Ghosh ierr = MatView(B,viewer);CHKERRQ(ierr); 16349bd79ccSDebojyoti Ghosh ierr = MatDestroy(&B);CHKERRQ(ierr); 16449bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 16549bd79ccSDebojyoti Ghosh } 16649bd79ccSDebojyoti Ghosh 16749bd79ccSDebojyoti Ghosh PetscErrorCode MatDestroy_MPIKAIJ(Mat A) 16849bd79ccSDebojyoti Ghosh { 16949bd79ccSDebojyoti Ghosh PetscErrorCode ierr; 17049bd79ccSDebojyoti Ghosh Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data; 17149bd79ccSDebojyoti Ghosh 17249bd79ccSDebojyoti Ghosh PetscFunctionBegin; 17349bd79ccSDebojyoti Ghosh ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr); 17449bd79ccSDebojyoti Ghosh ierr = MatDestroy(&b->OAIJ);CHKERRQ(ierr); 17549bd79ccSDebojyoti Ghosh ierr = MatDestroy(&b->A);CHKERRQ(ierr); 17649bd79ccSDebojyoti Ghosh ierr = VecScatterDestroy(&b->ctx);CHKERRQ(ierr); 17749bd79ccSDebojyoti Ghosh ierr = VecDestroy(&b->w);CHKERRQ(ierr); 17849bd79ccSDebojyoti Ghosh ierr = PetscFree3(b->S,b->T,b->ibdiag);CHKERRQ(ierr); 17949bd79ccSDebojyoti Ghosh ierr = PetscFree(A->data);CHKERRQ(ierr); 18049bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 18149bd79ccSDebojyoti Ghosh } 18249bd79ccSDebojyoti Ghosh 18349bd79ccSDebojyoti Ghosh /*MC 18449bd79ccSDebojyoti Ghosh MATKAIJ - MATKAIJ = "kaij" - A matrix type to be used to evaluate matrices of the following form: 18549bd79ccSDebojyoti Ghosh 18649bd79ccSDebojyoti Ghosh [I \otimes S + A \otimes T] 18749bd79ccSDebojyoti Ghosh 18849bd79ccSDebojyoti Ghosh where 18949bd79ccSDebojyoti Ghosh S is a dense (p \times q) matrix 19049bd79ccSDebojyoti Ghosh T is a dense (p \times q) matrix 19149bd79ccSDebojyoti Ghosh A is an AIJ (n \times n) matrix 19249bd79ccSDebojyoti Ghosh I is the identity matrix 19349bd79ccSDebojyoti Ghosh The resulting matrix is (np \times nq) 19449bd79ccSDebojyoti Ghosh 19549bd79ccSDebojyoti Ghosh The matrix type is based on MATSEQAIJ for a sequential matrix A, and MATMPIAIJ for a distributed matrix A. 19649bd79ccSDebojyoti Ghosh S and T are always stored independently on all processes as a PetscScalar array in column-major format. 19749bd79ccSDebojyoti Ghosh 19849bd79ccSDebojyoti Ghosh Operations provided: 19949bd79ccSDebojyoti Ghosh . MatMult 20049bd79ccSDebojyoti Ghosh . MatMultAdd 20149bd79ccSDebojyoti Ghosh . MatInvertBlockDiagonal 20249bd79ccSDebojyoti Ghosh 20349bd79ccSDebojyoti Ghosh Level: advanced 20449bd79ccSDebojyoti Ghosh 20549bd79ccSDebojyoti Ghosh .seealso: MatKAIJGetAIJ(), MatKAIJGetS(), MatKAIJGetT(), MatCreateKAIJ() 20649bd79ccSDebojyoti Ghosh M*/ 20749bd79ccSDebojyoti Ghosh 20849bd79ccSDebojyoti Ghosh PETSC_EXTERN PetscErrorCode MatCreate_KAIJ(Mat A) 20949bd79ccSDebojyoti Ghosh { 21049bd79ccSDebojyoti Ghosh PetscErrorCode ierr; 21149bd79ccSDebojyoti Ghosh Mat_MPIKAIJ *b; 21249bd79ccSDebojyoti Ghosh PetscMPIInt size; 21349bd79ccSDebojyoti Ghosh 21449bd79ccSDebojyoti Ghosh PetscFunctionBegin; 21549bd79ccSDebojyoti Ghosh ierr = PetscNewLog(A,&b);CHKERRQ(ierr); 21649bd79ccSDebojyoti Ghosh A->data = (void*)b; 21749bd79ccSDebojyoti Ghosh 21849bd79ccSDebojyoti Ghosh ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 21949bd79ccSDebojyoti Ghosh 22049bd79ccSDebojyoti Ghosh A->ops->setup = MatSetUp_KAIJ; 22149bd79ccSDebojyoti Ghosh 22249bd79ccSDebojyoti Ghosh b->w = 0; 22349bd79ccSDebojyoti Ghosh ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 22449bd79ccSDebojyoti Ghosh if (size == 1) { 22549bd79ccSDebojyoti Ghosh ierr = PetscObjectChangeTypeName((PetscObject)A,MATSEQKAIJ);CHKERRQ(ierr); 22649bd79ccSDebojyoti Ghosh } else { 22749bd79ccSDebojyoti Ghosh ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIKAIJ);CHKERRQ(ierr); 22849bd79ccSDebojyoti Ghosh } 22949bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 23049bd79ccSDebojyoti Ghosh } 23149bd79ccSDebojyoti Ghosh 23249bd79ccSDebojyoti Ghosh /* --------------------------------------------------------------------------------------*/ 23349bd79ccSDebojyoti Ghosh 23449bd79ccSDebojyoti Ghosh /* zz = yy + Axx */ 23549bd79ccSDebojyoti Ghosh PetscErrorCode KAIJMultAdd_Seq(Mat A,Vec xx,Vec yy,Vec zz) 23649bd79ccSDebojyoti Ghosh { 23749bd79ccSDebojyoti Ghosh Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 23849bd79ccSDebojyoti Ghosh Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 23949bd79ccSDebojyoti Ghosh const PetscScalar *s = b->S, *t = b->T; 24049bd79ccSDebojyoti Ghosh const PetscScalar *x,*v,*bx; 24149bd79ccSDebojyoti Ghosh PetscScalar *y,*sums; 24249bd79ccSDebojyoti Ghosh PetscErrorCode ierr; 24349bd79ccSDebojyoti Ghosh const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 24449bd79ccSDebojyoti Ghosh PetscInt n,i,jrow,j,l,p=b->p,q=b->q,k; 24549bd79ccSDebojyoti Ghosh 24649bd79ccSDebojyoti Ghosh PetscFunctionBegin; 24749bd79ccSDebojyoti Ghosh if (!yy) { 24849bd79ccSDebojyoti Ghosh ierr = VecSet(zz,0.0);CHKERRQ(ierr); 24949bd79ccSDebojyoti Ghosh } else { 25049bd79ccSDebojyoti Ghosh ierr = VecCopy(yy,zz);CHKERRQ(ierr); 25149bd79ccSDebojyoti Ghosh } 25249bd79ccSDebojyoti Ghosh if ((!s) && (!t) && (!b->isTI)) PetscFunctionReturn(0); 25349bd79ccSDebojyoti Ghosh 25449bd79ccSDebojyoti Ghosh ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 25549bd79ccSDebojyoti Ghosh ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 25649bd79ccSDebojyoti Ghosh idx = a->j; 25749bd79ccSDebojyoti Ghosh v = a->a; 25849bd79ccSDebojyoti Ghosh ii = a->i; 25949bd79ccSDebojyoti Ghosh 26049bd79ccSDebojyoti Ghosh if (b->isTI) { 26149bd79ccSDebojyoti Ghosh for (i=0; i<m; i++) { 26249bd79ccSDebojyoti Ghosh jrow = ii[i]; 26349bd79ccSDebojyoti Ghosh n = ii[i+1] - jrow; 26449bd79ccSDebojyoti Ghosh sums = y + p*i; 26549bd79ccSDebojyoti Ghosh for (j=0; j<n; j++) { 26649bd79ccSDebojyoti Ghosh for (k=0; k<p; k++) { 26749bd79ccSDebojyoti Ghosh sums[k] += v[jrow+j]*x[q*idx[jrow+j]+k]; 26849bd79ccSDebojyoti Ghosh } 26949bd79ccSDebojyoti Ghosh } 27049bd79ccSDebojyoti Ghosh } 27149bd79ccSDebojyoti Ghosh } else if (t) { 27249bd79ccSDebojyoti Ghosh for (i=0; i<m; i++) { 27349bd79ccSDebojyoti Ghosh jrow = ii[i]; 27449bd79ccSDebojyoti Ghosh n = ii[i+1] - jrow; 27549bd79ccSDebojyoti Ghosh sums = y + p*i; 27649bd79ccSDebojyoti Ghosh bx = x + q*i; 27749bd79ccSDebojyoti Ghosh for (j=0; j<n; j++) { 27849bd79ccSDebojyoti Ghosh for (k=0; k<p; k++) { 27949bd79ccSDebojyoti Ghosh for (l=0; l<q; l++) { 28049bd79ccSDebojyoti Ghosh sums[k] += v[jrow+j]*t[k+l*p]*x[q*idx[jrow+j]+l]; 28149bd79ccSDebojyoti Ghosh } 28249bd79ccSDebojyoti Ghosh } 28349bd79ccSDebojyoti Ghosh } 28449bd79ccSDebojyoti Ghosh } 28549bd79ccSDebojyoti Ghosh } 28649bd79ccSDebojyoti Ghosh if (s) { 28749bd79ccSDebojyoti Ghosh for (i=0; i<m; i++) { 28849bd79ccSDebojyoti Ghosh jrow = ii[i]; 28949bd79ccSDebojyoti Ghosh n = ii[i+1] - jrow; 29049bd79ccSDebojyoti Ghosh sums = y + p*i; 29149bd79ccSDebojyoti Ghosh bx = x + q*i; 29249bd79ccSDebojyoti Ghosh if (i < b->AIJ->cmap->n) { 29349bd79ccSDebojyoti Ghosh for (j=0; j<q; j++) { 29449bd79ccSDebojyoti Ghosh for (k=0; k<p; k++) { 29549bd79ccSDebojyoti Ghosh sums[k] += s[k+j*p]*bx[j]; 29649bd79ccSDebojyoti Ghosh } 29749bd79ccSDebojyoti Ghosh } 29849bd79ccSDebojyoti Ghosh } 29949bd79ccSDebojyoti Ghosh } 30049bd79ccSDebojyoti Ghosh } 30149bd79ccSDebojyoti Ghosh 30249bd79ccSDebojyoti Ghosh ierr = PetscLogFlops((2.0*p*q-p)*m+2*p*a->nz);CHKERRQ(ierr); 30349bd79ccSDebojyoti Ghosh ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 30449bd79ccSDebojyoti Ghosh ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 30549bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 30649bd79ccSDebojyoti Ghosh } 30749bd79ccSDebojyoti Ghosh 30849bd79ccSDebojyoti Ghosh PetscErrorCode MatMult_SeqKAIJ_N(Mat A,Vec xx,Vec yy) 30949bd79ccSDebojyoti Ghosh { 31049bd79ccSDebojyoti Ghosh PetscErrorCode ierr; 31149bd79ccSDebojyoti Ghosh PetscFunctionBegin; 31249bd79ccSDebojyoti Ghosh ierr = KAIJMultAdd_Seq(A,xx,PETSC_NULL,yy);CHKERRQ(ierr); 31349bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 31449bd79ccSDebojyoti Ghosh } 31549bd79ccSDebojyoti Ghosh 31649bd79ccSDebojyoti Ghosh PetscErrorCode MatMultAdd_SeqKAIJ_N(Mat A,Vec xx,Vec yy,Vec zz) 31749bd79ccSDebojyoti Ghosh { 31849bd79ccSDebojyoti Ghosh PetscErrorCode ierr; 31949bd79ccSDebojyoti Ghosh PetscFunctionBegin; 32049bd79ccSDebojyoti Ghosh ierr = KAIJMultAdd_Seq(A,xx,yy,zz);CHKERRQ(ierr); 32149bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 32249bd79ccSDebojyoti Ghosh } 32349bd79ccSDebojyoti Ghosh 32449bd79ccSDebojyoti Ghosh #include <petsc/private/kernels/blockinvert.h> 32549bd79ccSDebojyoti Ghosh 32649bd79ccSDebojyoti Ghosh PetscErrorCode MatInvertBlockDiagonal_SeqKAIJ_N(Mat A,const PetscScalar **values) 32749bd79ccSDebojyoti Ghosh { 32849bd79ccSDebojyoti Ghosh Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 32949bd79ccSDebojyoti Ghosh Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 33049bd79ccSDebojyoti Ghosh const PetscScalar *S = b->S; 33149bd79ccSDebojyoti Ghosh const PetscScalar *T = b->T; 33249bd79ccSDebojyoti Ghosh const PetscScalar *v = a->a; 33349bd79ccSDebojyoti Ghosh const PetscInt p = b->p, q = b->q, m = b->AIJ->rmap->n, *idx = a->j, *ii = a->i; 33449bd79ccSDebojyoti Ghosh PetscErrorCode ierr; 33549bd79ccSDebojyoti Ghosh PetscInt i,j,*v_pivots,dof,dof2; 33649bd79ccSDebojyoti Ghosh PetscScalar *diag,aval,*v_work; 33749bd79ccSDebojyoti Ghosh 33849bd79ccSDebojyoti Ghosh PetscFunctionBegin; 33949bd79ccSDebojyoti Ghosh if (p != q) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Block size must be square to calculate inverse."); 340*31a97b9aSRichard Tran Mills if ((!S) && (!T) && (!b->isTI)) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Cannot invert a zero matrix."); 34149bd79ccSDebojyoti Ghosh 34249bd79ccSDebojyoti Ghosh dof = p; 34349bd79ccSDebojyoti Ghosh dof2 = dof*dof; 34449bd79ccSDebojyoti Ghosh 34549bd79ccSDebojyoti Ghosh if (b->ibdiagvalid) { 34649bd79ccSDebojyoti Ghosh if (values) *values = b->ibdiag; 34749bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 34849bd79ccSDebojyoti Ghosh } 34949bd79ccSDebojyoti Ghosh if (!b->ibdiag) { 35049bd79ccSDebojyoti Ghosh ierr = PetscMalloc(dof2*m*sizeof(PetscScalar),&b->ibdiag);CHKERRQ(ierr); 35149bd79ccSDebojyoti Ghosh ierr = PetscLogObjectMemory((PetscObject)A,dof2*m*sizeof(PetscScalar));CHKERRQ(ierr); 35249bd79ccSDebojyoti Ghosh } 35349bd79ccSDebojyoti Ghosh if (values) *values = b->ibdiag; 35449bd79ccSDebojyoti Ghosh diag = b->ibdiag; 35549bd79ccSDebojyoti Ghosh 35649bd79ccSDebojyoti Ghosh ierr = PetscMalloc2(dof,&v_work,dof,&v_pivots);CHKERRQ(ierr); 35749bd79ccSDebojyoti Ghosh for (i=0; i<m; i++) { 35849bd79ccSDebojyoti Ghosh if (S) { 35949bd79ccSDebojyoti Ghosh ierr = PetscMemcpy(diag,S,dof2*sizeof(PetscScalar));CHKERRQ(ierr); 36049bd79ccSDebojyoti Ghosh } else { 36149bd79ccSDebojyoti Ghosh ierr = PetscMemzero(diag,dof2*sizeof(PetscScalar));CHKERRQ(ierr); 36249bd79ccSDebojyoti Ghosh } 36349bd79ccSDebojyoti Ghosh if (b->isTI) { 36449bd79ccSDebojyoti Ghosh aval = 0; 36549bd79ccSDebojyoti Ghosh for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j]; 36649bd79ccSDebojyoti Ghosh for (j=0; j<dof; j++) diag[j+dof*j] += aval; 36749bd79ccSDebojyoti Ghosh } else if (T) { 36849bd79ccSDebojyoti Ghosh aval = 0; 36949bd79ccSDebojyoti Ghosh for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j]; 37049bd79ccSDebojyoti Ghosh for (j=0; j<dof2; j++) diag[j] += aval*T[j]; 37149bd79ccSDebojyoti Ghosh } 37249bd79ccSDebojyoti Ghosh ierr = PetscKernel_A_gets_inverse_A(dof,diag,v_pivots,v_work,PETSC_FALSE,NULL);CHKERRQ(ierr); 37349bd79ccSDebojyoti Ghosh diag += dof2; 37449bd79ccSDebojyoti Ghosh } 37549bd79ccSDebojyoti Ghosh ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr); 37649bd79ccSDebojyoti Ghosh 37749bd79ccSDebojyoti Ghosh b->ibdiagvalid = PETSC_TRUE; 37849bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 37949bd79ccSDebojyoti Ghosh } 38049bd79ccSDebojyoti Ghosh 38149bd79ccSDebojyoti Ghosh static PetscErrorCode MatGetDiagonalBlock_MPIKAIJ(Mat A,Mat *B) 38249bd79ccSDebojyoti Ghosh { 38349bd79ccSDebojyoti Ghosh Mat_MPIKAIJ *kaij = (Mat_MPIKAIJ*) A->data; 38449bd79ccSDebojyoti Ghosh 38549bd79ccSDebojyoti Ghosh PetscFunctionBegin; 38649bd79ccSDebojyoti Ghosh *B = kaij->AIJ; 38749bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 38849bd79ccSDebojyoti Ghosh } 38949bd79ccSDebojyoti Ghosh 39049bd79ccSDebojyoti Ghosh PetscErrorCode MatSOR_SeqKAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 39149bd79ccSDebojyoti Ghosh { 39249bd79ccSDebojyoti Ghosh PetscErrorCode ierr; 39349bd79ccSDebojyoti Ghosh Mat_SeqKAIJ *kaij = (Mat_SeqKAIJ*) A->data; 39449bd79ccSDebojyoti Ghosh Mat_SeqAIJ *a = (Mat_SeqAIJ*)kaij->AIJ->data; 39549bd79ccSDebojyoti Ghosh const PetscScalar *aa = a->a, *T = kaij->T, *v; 39649bd79ccSDebojyoti Ghosh const PetscInt m = kaij->AIJ->rmap->n, *ai=a->i, *aj=a->j, p = kaij->p, q = kaij->q, *diag, *vi; 39749bd79ccSDebojyoti Ghosh const PetscScalar *b, *xb, *idiag; 39849bd79ccSDebojyoti Ghosh PetscScalar *x, *work, *workt, *w, *y, *arr, *t, *arrt; 39949bd79ccSDebojyoti Ghosh PetscInt i, j, k, i2, bs, bs2, nz; 40049bd79ccSDebojyoti Ghosh 40149bd79ccSDebojyoti Ghosh 40249bd79ccSDebojyoti Ghosh PetscFunctionBegin; 40349bd79ccSDebojyoti Ghosh its = its*lits; 40449bd79ccSDebojyoti Ghosh if (flag & SOR_EISENSTAT) SETERRQ (PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 40549bd79ccSDebojyoti Ghosh if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 40649bd79ccSDebojyoti Ghosh if (fshift) SETERRQ (PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 407*31a97b9aSRichard Tran Mills if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 40849bd79ccSDebojyoti Ghosh if (p != q) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSOR for KAIJ: Sorry, no support for non-square dense blocks"); 40949bd79ccSDebojyoti Ghosh else {bs = p; bs2 = bs*bs; } 41049bd79ccSDebojyoti Ghosh 41149bd79ccSDebojyoti Ghosh if (!m) PetscFunctionReturn(0); 41249bd79ccSDebojyoti Ghosh 41349bd79ccSDebojyoti Ghosh if (!kaij->ibdiagvalid) { ierr = MatInvertBlockDiagonal_SeqKAIJ_N(A,NULL);CHKERRQ(ierr); } 41449bd79ccSDebojyoti Ghosh idiag = kaij->ibdiag; 41549bd79ccSDebojyoti Ghosh diag = a->diag; 41649bd79ccSDebojyoti Ghosh 41749bd79ccSDebojyoti Ghosh if (!kaij->sor.setup) { 41849bd79ccSDebojyoti Ghosh ierr = PetscMalloc5(bs,&kaij->sor.w,bs,&kaij->sor.y,m*bs,&kaij->sor.work,m*bs,&kaij->sor.t,m*bs2,&kaij->sor.arr);CHKERRQ(ierr); 41949bd79ccSDebojyoti Ghosh kaij->sor.setup = PETSC_TRUE; 42049bd79ccSDebojyoti Ghosh } 42149bd79ccSDebojyoti Ghosh y = kaij->sor.y; 42249bd79ccSDebojyoti Ghosh w = kaij->sor.w; 42349bd79ccSDebojyoti Ghosh work = kaij->sor.work; 42449bd79ccSDebojyoti Ghosh t = kaij->sor.t; 42549bd79ccSDebojyoti Ghosh arr = kaij->sor.arr; 42649bd79ccSDebojyoti Ghosh 42749bd79ccSDebojyoti Ghosh ierr = VecGetArray(xx,&x); CHKERRQ(ierr); 42849bd79ccSDebojyoti Ghosh ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 42949bd79ccSDebojyoti Ghosh 43049bd79ccSDebojyoti Ghosh if (flag & SOR_ZERO_INITIAL_GUESS) { 43149bd79ccSDebojyoti Ghosh if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 43249bd79ccSDebojyoti Ghosh PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x); /* x[0:bs] <- D^{-1} b[0:bs] */ 43349bd79ccSDebojyoti Ghosh ierr = PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr); 43449bd79ccSDebojyoti Ghosh i2 = bs; 43549bd79ccSDebojyoti Ghosh idiag += bs2; 43649bd79ccSDebojyoti Ghosh for (i=1; i<m; i++) { 43749bd79ccSDebojyoti Ghosh v = aa + ai[i]; 43849bd79ccSDebojyoti Ghosh vi = aj + ai[i]; 43949bd79ccSDebojyoti Ghosh nz = diag[i] - ai[i]; 44049bd79ccSDebojyoti Ghosh 44149bd79ccSDebojyoti Ghosh if (T) { /* b - T (Arow * x) */ 44249bd79ccSDebojyoti Ghosh for (k=0; k<bs; k++) w[k] = 0; 44349bd79ccSDebojyoti Ghosh for (j=0; j<nz; j++) { 44449bd79ccSDebojyoti Ghosh for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k]; 44549bd79ccSDebojyoti Ghosh } 44649bd79ccSDebojyoti Ghosh PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs,w,T,&t[i2]); 44749bd79ccSDebojyoti Ghosh for (k=0; k<bs; k++) t[i2+k] += b[i2+k]; 44849bd79ccSDebojyoti Ghosh } else if (kaij->isTI) { 44949bd79ccSDebojyoti Ghosh for (k=0; k<bs; k++) t[i2+k] = b[i2+k]; 45049bd79ccSDebojyoti Ghosh for (j=0; j<nz; j++) { 45149bd79ccSDebojyoti Ghosh for (k=0; k<bs; k++) t[i2+k] -= v[j] * x[vi[j]*bs+k]; 45249bd79ccSDebojyoti Ghosh } 45349bd79ccSDebojyoti Ghosh } else { 45449bd79ccSDebojyoti Ghosh for (k=0; k<bs; k++) t[i2+k] = b[i2+k]; 45549bd79ccSDebojyoti Ghosh } 45649bd79ccSDebojyoti Ghosh 45749bd79ccSDebojyoti Ghosh PetscKernel_w_gets_Ar_times_v(bs,bs,t+i2,idiag,y); 45849bd79ccSDebojyoti Ghosh for (j=0; j<bs; j++) x[i2+j] = omega * y[j]; 45949bd79ccSDebojyoti Ghosh 46049bd79ccSDebojyoti Ghosh idiag += bs2; 46149bd79ccSDebojyoti Ghosh i2 += bs; 46249bd79ccSDebojyoti Ghosh } 46349bd79ccSDebojyoti Ghosh /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 46449bd79ccSDebojyoti Ghosh ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr); 46549bd79ccSDebojyoti Ghosh xb = t; 46649bd79ccSDebojyoti Ghosh } else xb = b; 46749bd79ccSDebojyoti Ghosh if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 46849bd79ccSDebojyoti Ghosh idiag = kaij->ibdiag+bs2*(m-1); 46949bd79ccSDebojyoti Ghosh i2 = bs * (m-1); 47049bd79ccSDebojyoti Ghosh ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 47149bd79ccSDebojyoti Ghosh PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 47249bd79ccSDebojyoti Ghosh i2 -= bs; 47349bd79ccSDebojyoti Ghosh idiag -= bs2; 47449bd79ccSDebojyoti Ghosh for (i=m-2; i>=0; i--) { 47549bd79ccSDebojyoti Ghosh v = aa + diag[i] + 1 ; 47649bd79ccSDebojyoti Ghosh vi = aj + diag[i] + 1; 47749bd79ccSDebojyoti Ghosh nz = ai[i+1] - diag[i] - 1; 47849bd79ccSDebojyoti Ghosh 47949bd79ccSDebojyoti Ghosh if (T) { /* FIXME: This branch untested */ 48049bd79ccSDebojyoti Ghosh ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 48149bd79ccSDebojyoti Ghosh /* copy all rows of x that are needed into contiguous space */ 48249bd79ccSDebojyoti Ghosh workt = work; 48349bd79ccSDebojyoti Ghosh for (j=0; j<nz; j++) { 48449bd79ccSDebojyoti Ghosh ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 48549bd79ccSDebojyoti Ghosh workt += bs; 48649bd79ccSDebojyoti Ghosh } 48749bd79ccSDebojyoti Ghosh arrt = arr; 48849bd79ccSDebojyoti Ghosh for (j=0; j<nz; j++) { 48949bd79ccSDebojyoti Ghosh ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 49049bd79ccSDebojyoti Ghosh for (k=0; k<bs2; k++) arrt[k] *= v[j]; 49149bd79ccSDebojyoti Ghosh arrt += bs2; 49249bd79ccSDebojyoti Ghosh } 49349bd79ccSDebojyoti Ghosh PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 49449bd79ccSDebojyoti Ghosh } else if (kaij->isTI) { 49549bd79ccSDebojyoti Ghosh for (k=0; k<bs; k++) w[k] = t[i2+k]; 49649bd79ccSDebojyoti Ghosh for (j=0; j<nz; j++) { 49749bd79ccSDebojyoti Ghosh for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k]; 49849bd79ccSDebojyoti Ghosh } 49949bd79ccSDebojyoti Ghosh } 50049bd79ccSDebojyoti Ghosh 50149bd79ccSDebojyoti Ghosh PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); /* RHS incorrect for omega != 1.0 */ 50249bd79ccSDebojyoti Ghosh for (j=0; j<bs; j++) x[i2+j] = (1.0-omega) * x[i2+j] + omega * y[j]; 50349bd79ccSDebojyoti Ghosh 50449bd79ccSDebojyoti Ghosh idiag -= bs2; 50549bd79ccSDebojyoti Ghosh i2 -= bs; 50649bd79ccSDebojyoti Ghosh } 50749bd79ccSDebojyoti Ghosh ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 50849bd79ccSDebojyoti Ghosh } 50949bd79ccSDebojyoti Ghosh its--; 51049bd79ccSDebojyoti Ghosh } 51149bd79ccSDebojyoti Ghosh while (its--) { /* FIXME: This branch not updated */ 51249bd79ccSDebojyoti Ghosh if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 51349bd79ccSDebojyoti Ghosh i2 = 0; 51449bd79ccSDebojyoti Ghosh idiag = kaij->ibdiag; 51549bd79ccSDebojyoti Ghosh for (i=0; i<m; i++) { 51649bd79ccSDebojyoti Ghosh ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 51749bd79ccSDebojyoti Ghosh 51849bd79ccSDebojyoti Ghosh v = aa + ai[i]; 51949bd79ccSDebojyoti Ghosh vi = aj + ai[i]; 52049bd79ccSDebojyoti Ghosh nz = diag[i] - ai[i]; 52149bd79ccSDebojyoti Ghosh workt = work; 52249bd79ccSDebojyoti Ghosh for (j=0; j<nz; j++) { 52349bd79ccSDebojyoti Ghosh ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 52449bd79ccSDebojyoti Ghosh workt += bs; 52549bd79ccSDebojyoti Ghosh } 52649bd79ccSDebojyoti Ghosh arrt = arr; 52749bd79ccSDebojyoti Ghosh if (T) { 52849bd79ccSDebojyoti Ghosh for (j=0; j<nz; j++) { 52949bd79ccSDebojyoti Ghosh ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 53049bd79ccSDebojyoti Ghosh for (k=0; k<bs2; k++) arrt[k] *= v[j]; 53149bd79ccSDebojyoti Ghosh arrt += bs2; 53249bd79ccSDebojyoti Ghosh } 53349bd79ccSDebojyoti Ghosh PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 53449bd79ccSDebojyoti Ghosh } else if (kaij->isTI) { 53549bd79ccSDebojyoti Ghosh for (j=0; j<nz; j++) { 53649bd79ccSDebojyoti Ghosh ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 53749bd79ccSDebojyoti Ghosh for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 53849bd79ccSDebojyoti Ghosh arrt += bs2; 53949bd79ccSDebojyoti Ghosh } 54049bd79ccSDebojyoti Ghosh PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 54149bd79ccSDebojyoti Ghosh } 54249bd79ccSDebojyoti Ghosh ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr); 54349bd79ccSDebojyoti Ghosh 54449bd79ccSDebojyoti Ghosh v = aa + diag[i] + 1; 54549bd79ccSDebojyoti Ghosh vi = aj + diag[i] + 1; 54649bd79ccSDebojyoti Ghosh nz = ai[i+1] - diag[i] - 1; 54749bd79ccSDebojyoti Ghosh workt = work; 54849bd79ccSDebojyoti Ghosh for (j=0; j<nz; j++) { 54949bd79ccSDebojyoti Ghosh ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 55049bd79ccSDebojyoti Ghosh workt += bs; 55149bd79ccSDebojyoti Ghosh } 55249bd79ccSDebojyoti Ghosh arrt = arr; 55349bd79ccSDebojyoti Ghosh if (T) { 55449bd79ccSDebojyoti Ghosh for (j=0; j<nz; j++) { 55549bd79ccSDebojyoti Ghosh ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 55649bd79ccSDebojyoti Ghosh for (k=0; k<bs2; k++) arrt[k] *= v[j]; 55749bd79ccSDebojyoti Ghosh arrt += bs2; 55849bd79ccSDebojyoti Ghosh } 55949bd79ccSDebojyoti Ghosh PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 56049bd79ccSDebojyoti Ghosh } else if (kaij->isTI) { 56149bd79ccSDebojyoti Ghosh for (j=0; j<nz; j++) { 56249bd79ccSDebojyoti Ghosh ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 56349bd79ccSDebojyoti Ghosh for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 56449bd79ccSDebojyoti Ghosh arrt += bs2; 56549bd79ccSDebojyoti Ghosh } 56649bd79ccSDebojyoti Ghosh PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 56749bd79ccSDebojyoti Ghosh } 56849bd79ccSDebojyoti Ghosh 56949bd79ccSDebojyoti Ghosh PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); 57049bd79ccSDebojyoti Ghosh for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j); 57149bd79ccSDebojyoti Ghosh 57249bd79ccSDebojyoti Ghosh idiag += bs2; 57349bd79ccSDebojyoti Ghosh i2 += bs; 57449bd79ccSDebojyoti Ghosh } 57549bd79ccSDebojyoti Ghosh xb = t; 57649bd79ccSDebojyoti Ghosh } 57749bd79ccSDebojyoti Ghosh else xb = b; 57849bd79ccSDebojyoti Ghosh if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 57949bd79ccSDebojyoti Ghosh idiag = kaij->ibdiag+bs2*(m-1); 58049bd79ccSDebojyoti Ghosh i2 = bs * (m-1); 58149bd79ccSDebojyoti Ghosh if (xb == b) { 58249bd79ccSDebojyoti Ghosh for (i=m-1; i>=0; i--) { 58349bd79ccSDebojyoti Ghosh ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 58449bd79ccSDebojyoti Ghosh 58549bd79ccSDebojyoti Ghosh v = aa + ai[i]; 58649bd79ccSDebojyoti Ghosh vi = aj + ai[i]; 58749bd79ccSDebojyoti Ghosh nz = diag[i] - ai[i]; 58849bd79ccSDebojyoti Ghosh workt = work; 58949bd79ccSDebojyoti Ghosh for (j=0; j<nz; j++) { 59049bd79ccSDebojyoti Ghosh ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 59149bd79ccSDebojyoti Ghosh workt += bs; 59249bd79ccSDebojyoti Ghosh } 59349bd79ccSDebojyoti Ghosh arrt = arr; 59449bd79ccSDebojyoti Ghosh if (T) { 59549bd79ccSDebojyoti Ghosh for (j=0; j<nz; j++) { 59649bd79ccSDebojyoti Ghosh ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 59749bd79ccSDebojyoti Ghosh for (k=0; k<bs2; k++) arrt[k] *= v[j]; 59849bd79ccSDebojyoti Ghosh arrt += bs2; 59949bd79ccSDebojyoti Ghosh } 60049bd79ccSDebojyoti Ghosh PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 60149bd79ccSDebojyoti Ghosh } else if (kaij->isTI) { 60249bd79ccSDebojyoti Ghosh for (j=0; j<nz; j++) { 60349bd79ccSDebojyoti Ghosh ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 60449bd79ccSDebojyoti Ghosh for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 60549bd79ccSDebojyoti Ghosh arrt += bs2; 60649bd79ccSDebojyoti Ghosh } 60749bd79ccSDebojyoti Ghosh PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 60849bd79ccSDebojyoti Ghosh } 60949bd79ccSDebojyoti Ghosh 61049bd79ccSDebojyoti Ghosh v = aa + diag[i] + 1; 61149bd79ccSDebojyoti Ghosh vi = aj + diag[i] + 1; 61249bd79ccSDebojyoti Ghosh nz = ai[i+1] - diag[i] - 1; 61349bd79ccSDebojyoti Ghosh workt = work; 61449bd79ccSDebojyoti Ghosh for (j=0; j<nz; j++) { 61549bd79ccSDebojyoti Ghosh ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 61649bd79ccSDebojyoti Ghosh workt += bs; 61749bd79ccSDebojyoti Ghosh } 61849bd79ccSDebojyoti Ghosh arrt = arr; 61949bd79ccSDebojyoti Ghosh if (T) { 62049bd79ccSDebojyoti Ghosh for (j=0; j<nz; j++) { 62149bd79ccSDebojyoti Ghosh ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 62249bd79ccSDebojyoti Ghosh for (k=0; k<bs2; k++) arrt[k] *= v[j]; 62349bd79ccSDebojyoti Ghosh arrt += bs2; 62449bd79ccSDebojyoti Ghosh } 62549bd79ccSDebojyoti Ghosh PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 62649bd79ccSDebojyoti Ghosh } else if (kaij->isTI) { 62749bd79ccSDebojyoti Ghosh for (j=0; j<nz; j++) { 62849bd79ccSDebojyoti Ghosh ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 62949bd79ccSDebojyoti Ghosh for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 63049bd79ccSDebojyoti Ghosh arrt += bs2; 63149bd79ccSDebojyoti Ghosh } 63249bd79ccSDebojyoti Ghosh PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 63349bd79ccSDebojyoti Ghosh } 63449bd79ccSDebojyoti Ghosh 63549bd79ccSDebojyoti Ghosh PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); 63649bd79ccSDebojyoti Ghosh for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j); 63749bd79ccSDebojyoti Ghosh } 63849bd79ccSDebojyoti Ghosh } else { 63949bd79ccSDebojyoti Ghosh for (i=m-1; i>=0; i--) { 64049bd79ccSDebojyoti Ghosh ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 64149bd79ccSDebojyoti Ghosh v = aa + diag[i] + 1; 64249bd79ccSDebojyoti Ghosh vi = aj + diag[i] + 1; 64349bd79ccSDebojyoti Ghosh nz = ai[i+1] - diag[i] - 1; 64449bd79ccSDebojyoti Ghosh workt = work; 64549bd79ccSDebojyoti Ghosh for (j=0; j<nz; j++) { 64649bd79ccSDebojyoti Ghosh ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 64749bd79ccSDebojyoti Ghosh workt += bs; 64849bd79ccSDebojyoti Ghosh } 64949bd79ccSDebojyoti Ghosh arrt = arr; 65049bd79ccSDebojyoti Ghosh if (T) { 65149bd79ccSDebojyoti Ghosh for (j=0; j<nz; j++) { 65249bd79ccSDebojyoti Ghosh ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 65349bd79ccSDebojyoti Ghosh for (k=0; k<bs2; k++) arrt[k] *= v[j]; 65449bd79ccSDebojyoti Ghosh arrt += bs2; 65549bd79ccSDebojyoti Ghosh } 65649bd79ccSDebojyoti Ghosh PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 65749bd79ccSDebojyoti Ghosh } else if (kaij->isTI) { 65849bd79ccSDebojyoti Ghosh for (j=0; j<nz; j++) { 65949bd79ccSDebojyoti Ghosh ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 66049bd79ccSDebojyoti Ghosh for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 66149bd79ccSDebojyoti Ghosh arrt += bs2; 66249bd79ccSDebojyoti Ghosh } 66349bd79ccSDebojyoti Ghosh PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 66449bd79ccSDebojyoti Ghosh } 66549bd79ccSDebojyoti Ghosh PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); 66649bd79ccSDebojyoti Ghosh for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j); 66749bd79ccSDebojyoti Ghosh } 66849bd79ccSDebojyoti Ghosh idiag -= bs2; 66949bd79ccSDebojyoti Ghosh i2 -= bs; 67049bd79ccSDebojyoti Ghosh } 67149bd79ccSDebojyoti Ghosh ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 67249bd79ccSDebojyoti Ghosh } 67349bd79ccSDebojyoti Ghosh } 67449bd79ccSDebojyoti Ghosh 67549bd79ccSDebojyoti Ghosh ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr); 67649bd79ccSDebojyoti Ghosh ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 67749bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 67849bd79ccSDebojyoti Ghosh } 67949bd79ccSDebojyoti Ghosh 68049bd79ccSDebojyoti Ghosh /*===================================================================================*/ 68149bd79ccSDebojyoti Ghosh 68249bd79ccSDebojyoti Ghosh PetscErrorCode KAIJMultAdd_MPI(Mat A,Vec xx,Vec yy,Vec zz) 68349bd79ccSDebojyoti Ghosh { 68449bd79ccSDebojyoti Ghosh Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data; 68549bd79ccSDebojyoti Ghosh PetscErrorCode ierr; 68649bd79ccSDebojyoti Ghosh 68749bd79ccSDebojyoti Ghosh PetscFunctionBegin; 68849bd79ccSDebojyoti Ghosh if (!yy) { 68949bd79ccSDebojyoti Ghosh ierr = VecSet(zz,0.0);CHKERRQ(ierr); 69049bd79ccSDebojyoti Ghosh } else { 69149bd79ccSDebojyoti Ghosh ierr = VecCopy(yy,zz);CHKERRQ(ierr); 69249bd79ccSDebojyoti Ghosh } 69349bd79ccSDebojyoti Ghosh /* start the scatter */ 69449bd79ccSDebojyoti Ghosh ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 69549bd79ccSDebojyoti Ghosh ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,zz,zz);CHKERRQ(ierr); 69649bd79ccSDebojyoti Ghosh ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 69749bd79ccSDebojyoti Ghosh ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr); 69849bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 69949bd79ccSDebojyoti Ghosh } 70049bd79ccSDebojyoti Ghosh 70149bd79ccSDebojyoti Ghosh PetscErrorCode MatMult_MPIKAIJ_dof(Mat A,Vec xx,Vec yy) 70249bd79ccSDebojyoti Ghosh { 70349bd79ccSDebojyoti Ghosh PetscErrorCode ierr; 70449bd79ccSDebojyoti Ghosh PetscFunctionBegin; 70549bd79ccSDebojyoti Ghosh ierr = KAIJMultAdd_MPI(A,xx,PETSC_NULL,yy);CHKERRQ(ierr); 70649bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 70749bd79ccSDebojyoti Ghosh } 70849bd79ccSDebojyoti Ghosh 70949bd79ccSDebojyoti Ghosh PetscErrorCode MatMultAdd_MPIKAIJ_dof(Mat A,Vec xx,Vec yy, Vec zz) 71049bd79ccSDebojyoti Ghosh { 71149bd79ccSDebojyoti Ghosh PetscErrorCode ierr; 71249bd79ccSDebojyoti Ghosh PetscFunctionBegin; 71349bd79ccSDebojyoti Ghosh ierr = KAIJMultAdd_MPI(A,xx,yy,zz);CHKERRQ(ierr); 71449bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 71549bd79ccSDebojyoti Ghosh } 71649bd79ccSDebojyoti Ghosh 71749bd79ccSDebojyoti Ghosh PetscErrorCode MatInvertBlockDiagonal_MPIKAIJ_dof(Mat A,const PetscScalar **values) 71849bd79ccSDebojyoti Ghosh { 71949bd79ccSDebojyoti Ghosh Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data; 72049bd79ccSDebojyoti Ghosh PetscErrorCode ierr; 72149bd79ccSDebojyoti Ghosh 72249bd79ccSDebojyoti Ghosh PetscFunctionBegin; 72349bd79ccSDebojyoti Ghosh ierr = (*b->AIJ->ops->invertblockdiagonal)(b->AIJ,values);CHKERRQ(ierr); 72449bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 72549bd79ccSDebojyoti Ghosh } 72649bd79ccSDebojyoti Ghosh 72749bd79ccSDebojyoti Ghosh /* ----------------------------------------------------------------*/ 72849bd79ccSDebojyoti Ghosh 72949bd79ccSDebojyoti Ghosh PetscErrorCode MatGetRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values) 73049bd79ccSDebojyoti Ghosh { 73149bd79ccSDebojyoti Ghosh Mat_SeqKAIJ *b = (Mat_SeqKAIJ*) A->data; 73249bd79ccSDebojyoti Ghosh PetscErrorCode ierr,diag; 73349bd79ccSDebojyoti Ghosh PetscInt nzaij,nz,*colsaij,*idx,i,j,p=b->p,q=b->q,r=row/p,s=row%p,c; 73449bd79ccSDebojyoti Ghosh PetscScalar *vaij,*v,*S=b->S,*T=b->T; 73549bd79ccSDebojyoti Ghosh 73649bd79ccSDebojyoti Ghosh PetscFunctionBegin; 73749bd79ccSDebojyoti Ghosh if (b->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 73849bd79ccSDebojyoti Ghosh b->getrowactive = PETSC_TRUE; 73949bd79ccSDebojyoti Ghosh if (row < 0 || row >= A->rmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range",row); 74049bd79ccSDebojyoti Ghosh 74149bd79ccSDebojyoti Ghosh if ((!S) && (!T) && (!b->isTI)) { 74249bd79ccSDebojyoti Ghosh if (ncols) *ncols = 0; 74349bd79ccSDebojyoti Ghosh if (cols) *cols = NULL; 74449bd79ccSDebojyoti Ghosh if (values) *values = NULL; 74549bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 74649bd79ccSDebojyoti Ghosh } 74749bd79ccSDebojyoti Ghosh 74849bd79ccSDebojyoti Ghosh if (T || b->isTI) { 74949bd79ccSDebojyoti Ghosh ierr = MatGetRow_SeqAIJ(b->AIJ,r,&nzaij,&colsaij,&vaij);CHKERRQ(ierr); 75049bd79ccSDebojyoti Ghosh diag = PETSC_FALSE; 75149bd79ccSDebojyoti Ghosh c = nzaij; 75249bd79ccSDebojyoti Ghosh for (i=0; i<nzaij; i++) { 75349bd79ccSDebojyoti Ghosh /* check if this row contains a diagonal entry */ 75449bd79ccSDebojyoti Ghosh if (colsaij[i] == r) { 75549bd79ccSDebojyoti Ghosh diag = PETSC_TRUE; 75649bd79ccSDebojyoti Ghosh c = i; 75749bd79ccSDebojyoti Ghosh } 75849bd79ccSDebojyoti Ghosh } 75949bd79ccSDebojyoti Ghosh } else nzaij = c = 0; 76049bd79ccSDebojyoti Ghosh 76149bd79ccSDebojyoti Ghosh /* calculate size of row */ 76249bd79ccSDebojyoti Ghosh nz = 0; 76349bd79ccSDebojyoti Ghosh if (S) nz += q; 76449bd79ccSDebojyoti Ghosh if (T || b->isTI) nz += (diag && S ? (nzaij-1)*q : nzaij*q); 76549bd79ccSDebojyoti Ghosh 76649bd79ccSDebojyoti Ghosh if (cols || values) { 76749bd79ccSDebojyoti Ghosh ierr = PetscMalloc2(nz,&idx,nz,&v);CHKERRQ(ierr); 76849bd79ccSDebojyoti Ghosh if (b->isTI) { 76949bd79ccSDebojyoti Ghosh for (i=0; i<nzaij; i++) { 77049bd79ccSDebojyoti Ghosh for (j=0; j<q; j++) { 77149bd79ccSDebojyoti Ghosh idx[i*q+j] = colsaij[i]*q+j; 77249bd79ccSDebojyoti Ghosh v[i*q+j] = (j==s ? vaij[i] : 0); 77349bd79ccSDebojyoti Ghosh } 77449bd79ccSDebojyoti Ghosh } 77549bd79ccSDebojyoti Ghosh } else if (T) { 77649bd79ccSDebojyoti Ghosh for (i=0; i<nzaij; i++) { 77749bd79ccSDebojyoti Ghosh for (j=0; j<q; j++) { 77849bd79ccSDebojyoti Ghosh idx[i*q+j] = colsaij[i]*q+j; 77949bd79ccSDebojyoti Ghosh v[i*q+j] = vaij[i]*T[s+j*p]; 78049bd79ccSDebojyoti Ghosh } 78149bd79ccSDebojyoti Ghosh } 78249bd79ccSDebojyoti Ghosh } 78349bd79ccSDebojyoti Ghosh if (S) { 78449bd79ccSDebojyoti Ghosh for (j=0; j<q; j++) { 78549bd79ccSDebojyoti Ghosh idx[c*q+j] = r*q+j; 78649bd79ccSDebojyoti Ghosh v[c*q+j] += S[s+j*p]; 78749bd79ccSDebojyoti Ghosh } 78849bd79ccSDebojyoti Ghosh } 78949bd79ccSDebojyoti Ghosh } 79049bd79ccSDebojyoti Ghosh 79149bd79ccSDebojyoti Ghosh if (ncols) *ncols = nz; 79249bd79ccSDebojyoti Ghosh if (cols) *cols = idx; 79349bd79ccSDebojyoti Ghosh if (values) *values = v; 79449bd79ccSDebojyoti Ghosh 79549bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 79649bd79ccSDebojyoti Ghosh } 79749bd79ccSDebojyoti Ghosh 79849bd79ccSDebojyoti Ghosh PetscErrorCode MatRestoreRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 79949bd79ccSDebojyoti Ghosh { 80049bd79ccSDebojyoti Ghosh PetscErrorCode ierr; 80149bd79ccSDebojyoti Ghosh PetscFunctionBegin; 80249bd79ccSDebojyoti Ghosh ierr = PetscFree2(*idx,*v);CHKERRQ(ierr); 80349bd79ccSDebojyoti Ghosh ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE; 80449bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 80549bd79ccSDebojyoti Ghosh } 80649bd79ccSDebojyoti Ghosh 80749bd79ccSDebojyoti Ghosh PetscErrorCode MatGetRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values) 80849bd79ccSDebojyoti Ghosh { 80949bd79ccSDebojyoti Ghosh Mat_MPIKAIJ *b = (Mat_MPIKAIJ*) A->data; 81049bd79ccSDebojyoti Ghosh Mat MatAIJ = ((Mat_SeqKAIJ*)b->AIJ->data)->AIJ; 81149bd79ccSDebojyoti Ghosh Mat MatOAIJ = ((Mat_SeqKAIJ*)b->OAIJ->data)->AIJ; 81249bd79ccSDebojyoti Ghosh Mat AIJ = b->A; 81349bd79ccSDebojyoti Ghosh PetscErrorCode ierr; 81449bd79ccSDebojyoti Ghosh const PetscInt rstart=A->rmap->rstart,rend=A->rmap->rend,p=b->p,q=b->q,*garray; 81549bd79ccSDebojyoti Ghosh PetscInt nz,*idx,ncolsaij,ncolsoaij,*colsaij,*colsoaij,r,s,c,i,j,lrow; 81649bd79ccSDebojyoti Ghosh PetscScalar *v,*vals,*ovals,*S=b->S,*T=b->T; 81749bd79ccSDebojyoti Ghosh PetscBool diag; 81849bd79ccSDebojyoti Ghosh 81949bd79ccSDebojyoti Ghosh PetscFunctionBegin; 82049bd79ccSDebojyoti Ghosh if (b->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 82149bd79ccSDebojyoti Ghosh b->getrowactive = PETSC_TRUE; 82249bd79ccSDebojyoti Ghosh if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 82349bd79ccSDebojyoti Ghosh lrow = row - rstart; 82449bd79ccSDebojyoti Ghosh 82549bd79ccSDebojyoti Ghosh if ((!S) && (!T) && (!b->isTI)) { 82649bd79ccSDebojyoti Ghosh if (ncols) *ncols = 0; 82749bd79ccSDebojyoti Ghosh if (cols) *cols = NULL; 82849bd79ccSDebojyoti Ghosh if (values) *values = NULL; 82949bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 83049bd79ccSDebojyoti Ghosh } 83149bd79ccSDebojyoti Ghosh 83249bd79ccSDebojyoti Ghosh r = lrow/p; 83349bd79ccSDebojyoti Ghosh s = lrow%p; 83449bd79ccSDebojyoti Ghosh 83549bd79ccSDebojyoti Ghosh if (T || b->isTI) { 83649bd79ccSDebojyoti Ghosh ierr = MatMPIAIJGetSeqAIJ(AIJ,NULL,NULL,&garray); 83749bd79ccSDebojyoti Ghosh ierr = MatGetRow_SeqAIJ(MatAIJ,lrow/p,&ncolsaij,&colsaij,&vals);CHKERRQ(ierr); 83849bd79ccSDebojyoti Ghosh ierr = MatGetRow_SeqAIJ(MatOAIJ,lrow/p,&ncolsoaij,&colsoaij,&ovals);CHKERRQ(ierr); 83949bd79ccSDebojyoti Ghosh 84049bd79ccSDebojyoti Ghosh diag = PETSC_FALSE; 84149bd79ccSDebojyoti Ghosh c = ncolsaij + ncolsoaij; 84249bd79ccSDebojyoti Ghosh for (i=0; i<ncolsaij; i++) { 84349bd79ccSDebojyoti Ghosh /* check if this row contains a diagonal entry */ 84449bd79ccSDebojyoti Ghosh if (colsaij[i] == r) { 84549bd79ccSDebojyoti Ghosh diag = PETSC_TRUE; 84649bd79ccSDebojyoti Ghosh c = i; 84749bd79ccSDebojyoti Ghosh } 84849bd79ccSDebojyoti Ghosh } 84949bd79ccSDebojyoti Ghosh } else c = 0; 85049bd79ccSDebojyoti Ghosh 85149bd79ccSDebojyoti Ghosh /* calculate size of row */ 85249bd79ccSDebojyoti Ghosh nz = 0; 85349bd79ccSDebojyoti Ghosh if (S) nz += q; 85449bd79ccSDebojyoti Ghosh if (T || b->isTI) nz += (diag && S ? (ncolsaij+ncolsoaij-1)*q : (ncolsaij+ncolsoaij)*q); 85549bd79ccSDebojyoti Ghosh 85649bd79ccSDebojyoti Ghosh if (cols || values) { 85749bd79ccSDebojyoti Ghosh ierr = PetscMalloc2(nz,&idx,nz,&v);CHKERRQ(ierr); 85849bd79ccSDebojyoti Ghosh if (b->isTI) { 85949bd79ccSDebojyoti Ghosh for (i=0; i<ncolsaij; i++) { 86049bd79ccSDebojyoti Ghosh for (j=0; j<q; j++) { 86149bd79ccSDebojyoti Ghosh idx[i*q+j] = (colsaij[i]+rstart/p)*q+j; 86249bd79ccSDebojyoti Ghosh v[i*q+j] = (j==s ? vals[i] : 0.0); 86349bd79ccSDebojyoti Ghosh } 86449bd79ccSDebojyoti Ghosh } 86549bd79ccSDebojyoti Ghosh for (i=0; i<ncolsoaij; i++) { 86649bd79ccSDebojyoti Ghosh for (j=0; j<q; j++) { 86749bd79ccSDebojyoti Ghosh idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j; 86849bd79ccSDebojyoti Ghosh v[(i+ncolsaij)*q+j] = (j==s ? ovals[i]: 0.0); 86949bd79ccSDebojyoti Ghosh } 87049bd79ccSDebojyoti Ghosh } 87149bd79ccSDebojyoti Ghosh } else if (T) { 87249bd79ccSDebojyoti Ghosh for (i=0; i<ncolsaij; i++) { 87349bd79ccSDebojyoti Ghosh for (j=0; j<q; j++) { 87449bd79ccSDebojyoti Ghosh idx[i*q+j] = (colsaij[i]+rstart/p)*q+j; 87549bd79ccSDebojyoti Ghosh v[i*q+j] = vals[i]*T[s+j*p]; 87649bd79ccSDebojyoti Ghosh } 87749bd79ccSDebojyoti Ghosh } 87849bd79ccSDebojyoti Ghosh for (i=0; i<ncolsoaij; i++) { 87949bd79ccSDebojyoti Ghosh for (j=0; j<q; j++) { 88049bd79ccSDebojyoti Ghosh idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j; 88149bd79ccSDebojyoti Ghosh v[(i+ncolsaij)*q+j] = ovals[i]*T[s+j*p]; 88249bd79ccSDebojyoti Ghosh } 88349bd79ccSDebojyoti Ghosh } 88449bd79ccSDebojyoti Ghosh } 88549bd79ccSDebojyoti Ghosh if (S) { 88649bd79ccSDebojyoti Ghosh for (j=0; j<q; j++) { 88749bd79ccSDebojyoti Ghosh idx[c*q+j] = (r+rstart/p)*q+j; 88849bd79ccSDebojyoti Ghosh v[c*q+j] += S[s+j*p]; 88949bd79ccSDebojyoti Ghosh } 89049bd79ccSDebojyoti Ghosh } 89149bd79ccSDebojyoti Ghosh } 89249bd79ccSDebojyoti Ghosh 89349bd79ccSDebojyoti Ghosh if (ncols) *ncols = nz; 89449bd79ccSDebojyoti Ghosh if (cols) *cols = idx; 89549bd79ccSDebojyoti Ghosh if (values) *values = v; 89649bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 89749bd79ccSDebojyoti Ghosh } 89849bd79ccSDebojyoti Ghosh 89949bd79ccSDebojyoti Ghosh PetscErrorCode MatRestoreRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 90049bd79ccSDebojyoti Ghosh { 90149bd79ccSDebojyoti Ghosh PetscErrorCode ierr; 90249bd79ccSDebojyoti Ghosh PetscFunctionBegin; 90349bd79ccSDebojyoti Ghosh ierr = PetscFree2(*idx,*v);CHKERRQ(ierr); 90449bd79ccSDebojyoti Ghosh ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE; 90549bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 90649bd79ccSDebojyoti Ghosh } 90749bd79ccSDebojyoti Ghosh 90849bd79ccSDebojyoti Ghosh PetscErrorCode MatCreateSubMatrix_KAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat) 90949bd79ccSDebojyoti Ghosh { 91049bd79ccSDebojyoti Ghosh PetscErrorCode ierr; 91149bd79ccSDebojyoti Ghosh Mat A; 91249bd79ccSDebojyoti Ghosh 91349bd79ccSDebojyoti Ghosh PetscFunctionBegin; 91449bd79ccSDebojyoti Ghosh ierr = MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr); 91549bd79ccSDebojyoti Ghosh ierr = MatCreateSubMatrix(A,isrow,iscol,cll,newmat);CHKERRQ(ierr); 91649bd79ccSDebojyoti Ghosh ierr = MatDestroy(&A);CHKERRQ(ierr); 91749bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 91849bd79ccSDebojyoti Ghosh } 91949bd79ccSDebojyoti Ghosh 92049bd79ccSDebojyoti Ghosh /* ---------------------------------------------------------------------------------- */ 92149bd79ccSDebojyoti Ghosh /*@C 92249bd79ccSDebojyoti Ghosh MatCreateKAIJ - Creates a matrix type to be used for matrices of the following form: 92349bd79ccSDebojyoti Ghosh 92449bd79ccSDebojyoti Ghosh [I \otimes S + A \otimes T] 92549bd79ccSDebojyoti Ghosh 92649bd79ccSDebojyoti Ghosh where 92749bd79ccSDebojyoti Ghosh S is a dense (p \times q) matrix 92849bd79ccSDebojyoti Ghosh T is a dense (p \times q) matrix 92949bd79ccSDebojyoti Ghosh A is an AIJ (n \times n) matrix 93049bd79ccSDebojyoti Ghosh I is the identity matrix 93149bd79ccSDebojyoti Ghosh The resulting matrix is (np \times nq) 93249bd79ccSDebojyoti Ghosh 93349bd79ccSDebojyoti Ghosh The matrix type is based on MATSEQAIJ for a sequential matrix A, and MATMPIAIJ for a distributed matrix A. 93449bd79ccSDebojyoti Ghosh S is always stored independently on all processes as a PetscScalar array in column-major format. 93549bd79ccSDebojyoti Ghosh 93649bd79ccSDebojyoti Ghosh Collective 93749bd79ccSDebojyoti Ghosh 93849bd79ccSDebojyoti Ghosh Input Parameters: 93949bd79ccSDebojyoti Ghosh + A - the AIJ matrix 94049bd79ccSDebojyoti Ghosh . S - the S matrix, stored as a PetscScalar array (column-major) 94149bd79ccSDebojyoti Ghosh . T - the T matrix, stored as a PetscScalar array (column-major) 94249bd79ccSDebojyoti Ghosh . p - number of rows in S and T 94349bd79ccSDebojyoti Ghosh - q - number of columns in S and T 94449bd79ccSDebojyoti Ghosh 94549bd79ccSDebojyoti Ghosh Output Parameter: 94649bd79ccSDebojyoti Ghosh . kaij - the new KAIJ matrix 94749bd79ccSDebojyoti Ghosh 94849bd79ccSDebojyoti Ghosh Operations provided: 94949bd79ccSDebojyoti Ghosh + MatMult 95049bd79ccSDebojyoti Ghosh . MatMultAdd 95149bd79ccSDebojyoti Ghosh . MatInvertBlockDiagonal 95249bd79ccSDebojyoti Ghosh - MatView 95349bd79ccSDebojyoti Ghosh 95449bd79ccSDebojyoti Ghosh Level: advanced 95549bd79ccSDebojyoti Ghosh 95649bd79ccSDebojyoti Ghosh .seealso: MatKAIJGetAIJ(), MATKAIJ 95749bd79ccSDebojyoti Ghosh @*/ 95849bd79ccSDebojyoti Ghosh PetscErrorCode MatCreateKAIJ(Mat A,PetscInt p,PetscInt q,const PetscScalar S[],const PetscScalar T[],Mat *kaij) 95949bd79ccSDebojyoti Ghosh { 96049bd79ccSDebojyoti Ghosh PetscErrorCode ierr; 96149bd79ccSDebojyoti Ghosh PetscMPIInt size; 96249bd79ccSDebojyoti Ghosh PetscInt n,i,j; 96349bd79ccSDebojyoti Ghosh Mat B; 96449bd79ccSDebojyoti Ghosh PetscBool isTI = PETSC_FALSE; 96549bd79ccSDebojyoti Ghosh 96649bd79ccSDebojyoti Ghosh PetscFunctionBegin; 96749bd79ccSDebojyoti Ghosh 96849bd79ccSDebojyoti Ghosh /* check if T is an identity matrix */ 96949bd79ccSDebojyoti Ghosh if (T && (p == q)) { 97049bd79ccSDebojyoti Ghosh isTI = PETSC_TRUE; 97149bd79ccSDebojyoti Ghosh for (i=0; i<p; i++) { 97249bd79ccSDebojyoti Ghosh for (j=0; j<q; j++) { 97349bd79ccSDebojyoti Ghosh if (i == j) { 97449bd79ccSDebojyoti Ghosh /* diagonal term must be 1 */ 97549bd79ccSDebojyoti Ghosh if (T[i+j*p] != 1.0) isTI = PETSC_FALSE; 97649bd79ccSDebojyoti Ghosh } else { 97749bd79ccSDebojyoti Ghosh /* off-diagonal term must be 0 */ 97849bd79ccSDebojyoti Ghosh if (T[i+j*p] != 0.0) isTI = PETSC_FALSE; 97949bd79ccSDebojyoti Ghosh } 98049bd79ccSDebojyoti Ghosh } 98149bd79ccSDebojyoti Ghosh } 98249bd79ccSDebojyoti Ghosh } 98349bd79ccSDebojyoti Ghosh 98449bd79ccSDebojyoti Ghosh ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 98549bd79ccSDebojyoti Ghosh 98649bd79ccSDebojyoti Ghosh ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 98749bd79ccSDebojyoti Ghosh ierr = MatSetSizes(B,p*A->rmap->n,q*A->cmap->n,p*A->rmap->N,q*A->cmap->N);CHKERRQ(ierr); 98849bd79ccSDebojyoti Ghosh ierr = PetscLayoutSetBlockSize(B->rmap,p);CHKERRQ(ierr); 98949bd79ccSDebojyoti Ghosh ierr = PetscLayoutSetBlockSize(B->cmap,q);CHKERRQ(ierr); 99049bd79ccSDebojyoti Ghosh ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 99149bd79ccSDebojyoti Ghosh ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 99249bd79ccSDebojyoti Ghosh 99349bd79ccSDebojyoti Ghosh B->assembled = PETSC_TRUE; 99449bd79ccSDebojyoti Ghosh ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 99549bd79ccSDebojyoti Ghosh 99649bd79ccSDebojyoti Ghosh if (size == 1) { 99749bd79ccSDebojyoti Ghosh Mat_SeqKAIJ *b; 99849bd79ccSDebojyoti Ghosh 99949bd79ccSDebojyoti Ghosh ierr = MatSetType(B,MATSEQKAIJ);CHKERRQ(ierr); 100049bd79ccSDebojyoti Ghosh 100149bd79ccSDebojyoti Ghosh B->ops->setup = NULL; 100249bd79ccSDebojyoti Ghosh B->ops->destroy = MatDestroy_SeqKAIJ; 100349bd79ccSDebojyoti Ghosh B->ops->view = MatView_SeqKAIJ; 100449bd79ccSDebojyoti Ghosh b = (Mat_SeqKAIJ*)B->data; 100549bd79ccSDebojyoti Ghosh b->p = p; 100649bd79ccSDebojyoti Ghosh b->q = q; 100749bd79ccSDebojyoti Ghosh b->AIJ = A; 100849bd79ccSDebojyoti Ghosh b->isTI = isTI; 100949bd79ccSDebojyoti Ghosh if (S) { 101049bd79ccSDebojyoti Ghosh ierr = PetscMalloc(p*q*sizeof(PetscScalar),&b->S);CHKERRQ(ierr); 101149bd79ccSDebojyoti Ghosh ierr = PetscMemcpy(b->S,S,p*q*sizeof(PetscScalar));CHKERRQ(ierr); 101249bd79ccSDebojyoti Ghosh } else b->S = NULL; 101349bd79ccSDebojyoti Ghosh if (T && (!isTI)) { 101449bd79ccSDebojyoti Ghosh ierr = PetscMalloc(p*q*sizeof(PetscScalar),&b->T);CHKERRQ(ierr); 101549bd79ccSDebojyoti Ghosh ierr = PetscMemcpy(b->T,T,p*q*sizeof(PetscScalar));CHKERRQ(ierr); 101649bd79ccSDebojyoti Ghosh } else b->T = NULL; 101749bd79ccSDebojyoti Ghosh 101849bd79ccSDebojyoti Ghosh B->ops->mult = MatMult_SeqKAIJ_N; 101949bd79ccSDebojyoti Ghosh B->ops->multadd = MatMultAdd_SeqKAIJ_N; 102049bd79ccSDebojyoti Ghosh B->ops->invertblockdiagonal = MatInvertBlockDiagonal_SeqKAIJ_N; 102149bd79ccSDebojyoti Ghosh B->ops->getrow = MatGetRow_SeqKAIJ; 102249bd79ccSDebojyoti Ghosh B->ops->restorerow = MatRestoreRow_SeqKAIJ; 102349bd79ccSDebojyoti Ghosh B->ops->sor = MatSOR_SeqKAIJ; 102449bd79ccSDebojyoti Ghosh } else { 102549bd79ccSDebojyoti Ghosh Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*)A->data; 102649bd79ccSDebojyoti Ghosh Mat_MPIKAIJ *b; 102749bd79ccSDebojyoti Ghosh IS from,to; 102849bd79ccSDebojyoti Ghosh Vec gvec; 102949bd79ccSDebojyoti Ghosh 103049bd79ccSDebojyoti Ghosh ierr = MatSetType(B,MATMPIKAIJ);CHKERRQ(ierr); 103149bd79ccSDebojyoti Ghosh 103249bd79ccSDebojyoti Ghosh B->ops->setup = NULL; 103349bd79ccSDebojyoti Ghosh B->ops->destroy = MatDestroy_MPIKAIJ; 103449bd79ccSDebojyoti Ghosh B->ops->view = MatView_MPIKAIJ; 103549bd79ccSDebojyoti Ghosh 103649bd79ccSDebojyoti Ghosh b = (Mat_MPIKAIJ*)B->data; 103749bd79ccSDebojyoti Ghosh b->p = p; 103849bd79ccSDebojyoti Ghosh b->q = q; 103949bd79ccSDebojyoti Ghosh b->A = A; 104049bd79ccSDebojyoti Ghosh b->isTI = isTI; 104149bd79ccSDebojyoti Ghosh if (S) { 104249bd79ccSDebojyoti Ghosh ierr = PetscMalloc(p*q*sizeof(PetscScalar),&b->S);CHKERRQ(ierr); 104349bd79ccSDebojyoti Ghosh ierr = PetscMemcpy(b->S,S,p*q*sizeof(PetscScalar));CHKERRQ(ierr); 104449bd79ccSDebojyoti Ghosh } else b->S = NULL; 104549bd79ccSDebojyoti Ghosh if (T &&(!isTI)) { 104649bd79ccSDebojyoti Ghosh ierr = PetscMalloc(p*q*sizeof(PetscScalar),&b->T);CHKERRQ(ierr); 104749bd79ccSDebojyoti Ghosh ierr = PetscMemcpy(b->T,T,p*q*sizeof(PetscScalar));CHKERRQ(ierr); 104849bd79ccSDebojyoti Ghosh } else b->T = NULL; 104949bd79ccSDebojyoti Ghosh 105049bd79ccSDebojyoti Ghosh ierr = MatCreateKAIJ(mpiaij->A,p,q,S ,T,&b->AIJ);CHKERRQ(ierr); 105149bd79ccSDebojyoti Ghosh ierr = MatCreateKAIJ(mpiaij->B,p,q,NULL,T,&b->OAIJ);CHKERRQ(ierr); 105249bd79ccSDebojyoti Ghosh 105349bd79ccSDebojyoti Ghosh ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr); 105449bd79ccSDebojyoti Ghosh ierr = VecCreate(PETSC_COMM_SELF,&b->w);CHKERRQ(ierr); 105549bd79ccSDebojyoti Ghosh ierr = VecSetSizes(b->w,n*q,n*q);CHKERRQ(ierr); 105649bd79ccSDebojyoti Ghosh ierr = VecSetBlockSize(b->w,q);CHKERRQ(ierr); 105749bd79ccSDebojyoti Ghosh ierr = VecSetType(b->w,VECSEQ);CHKERRQ(ierr); 105849bd79ccSDebojyoti Ghosh 105949bd79ccSDebojyoti Ghosh /* create two temporary Index sets for build scatter gather */ 106049bd79ccSDebojyoti Ghosh ierr = ISCreateBlock(PetscObjectComm((PetscObject)A),q,n,mpiaij->garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); 106149bd79ccSDebojyoti Ghosh ierr = ISCreateStride(PETSC_COMM_SELF,n*q,0,1,&to);CHKERRQ(ierr); 106249bd79ccSDebojyoti Ghosh 106349bd79ccSDebojyoti Ghosh /* create temporary global vector to generate scatter context */ 106449bd79ccSDebojyoti Ghosh ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),q,q*A->cmap->n,q*A->cmap->N,NULL,&gvec);CHKERRQ(ierr); 106549bd79ccSDebojyoti Ghosh 106649bd79ccSDebojyoti Ghosh /* generate the scatter context */ 1067b5a089c7SRichard Tran Mills ierr = VecScatterCreateWithData(gvec,from,b->w,to,&b->ctx);CHKERRQ(ierr); 106849bd79ccSDebojyoti Ghosh 106949bd79ccSDebojyoti Ghosh ierr = ISDestroy(&from);CHKERRQ(ierr); 107049bd79ccSDebojyoti Ghosh ierr = ISDestroy(&to);CHKERRQ(ierr); 107149bd79ccSDebojyoti Ghosh ierr = VecDestroy(&gvec);CHKERRQ(ierr); 107249bd79ccSDebojyoti Ghosh 107349bd79ccSDebojyoti Ghosh B->ops->mult = MatMult_MPIKAIJ_dof; 107449bd79ccSDebojyoti Ghosh B->ops->multadd = MatMultAdd_MPIKAIJ_dof; 107549bd79ccSDebojyoti Ghosh B->ops->invertblockdiagonal = MatInvertBlockDiagonal_MPIKAIJ_dof; 107649bd79ccSDebojyoti Ghosh B->ops->getrow = MatGetRow_MPIKAIJ; 107749bd79ccSDebojyoti Ghosh B->ops->restorerow = MatRestoreRow_MPIKAIJ; 107849bd79ccSDebojyoti Ghosh ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIKAIJ);CHKERRQ(ierr); 107949bd79ccSDebojyoti Ghosh } 108049bd79ccSDebojyoti Ghosh B->ops->createsubmatrix = MatCreateSubMatrix_KAIJ; 108149bd79ccSDebojyoti Ghosh B->assembled = PETSC_TRUE; 108249bd79ccSDebojyoti Ghosh ierr = MatSetUp(B);CHKERRQ(ierr); 108349bd79ccSDebojyoti Ghosh *kaij = B; 108449bd79ccSDebojyoti Ghosh ierr = MatViewFromOptions(B,NULL,"-mat_view");CHKERRQ(ierr); 108549bd79ccSDebojyoti Ghosh 108649bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 108749bd79ccSDebojyoti Ghosh } 1088