xref: /petsc/src/mat/impls/kaij/kaij.c (revision b5a089c7208fc0d2d664fc82f0d8188edb3b6a89)
149bd79ccSDebojyoti Ghosh 
249bd79ccSDebojyoti Ghosh /*
349bd79ccSDebojyoti Ghosh   Defines the basic matrix operations for the KAIJ  matrix storage format.
449bd79ccSDebojyoti Ghosh   This format is used to evaluate matrices of the form:
549bd79ccSDebojyoti Ghosh 
649bd79ccSDebojyoti Ghosh     [I \otimes S + A \otimes T]
749bd79ccSDebojyoti Ghosh 
849bd79ccSDebojyoti Ghosh   where
949bd79ccSDebojyoti Ghosh     S is a dense (p \times q) matrix
1049bd79ccSDebojyoti Ghosh     T is a dense (p \times q) matrix
1149bd79ccSDebojyoti Ghosh     A is an AIJ  (n \times n) matrix
1249bd79ccSDebojyoti Ghosh     I is the identity matrix
1349bd79ccSDebojyoti Ghosh 
1449bd79ccSDebojyoti Ghosh   The resulting matrix is (np \times nq)
1549bd79ccSDebojyoti Ghosh 
1649bd79ccSDebojyoti Ghosh   We provide:
1749bd79ccSDebojyoti Ghosh      MatMult()
1849bd79ccSDebojyoti Ghosh      MatMultAdd()
1949bd79ccSDebojyoti Ghosh      MatInvertBlockDiagonal()
2049bd79ccSDebojyoti Ghosh   and
2149bd79ccSDebojyoti Ghosh      MatCreateKAIJ(Mat,PetscInt,PetscInt,const PetscScalar[],const PetscScalar[],Mat*)
2249bd79ccSDebojyoti Ghosh 
2349bd79ccSDebojyoti Ghosh   This single directory handles both the sequential and parallel codes
2449bd79ccSDebojyoti Ghosh */
2549bd79ccSDebojyoti Ghosh 
2649bd79ccSDebojyoti Ghosh #include <../src/mat/impls/kaij/kaij.h> /*I "petscmat.h" I*/
2749bd79ccSDebojyoti Ghosh #include <../src/mat/utils/freespace.h>
2849bd79ccSDebojyoti Ghosh #include <petsc/private/vecimpl.h>
2949bd79ccSDebojyoti Ghosh 
3049bd79ccSDebojyoti Ghosh /*@C
3149bd79ccSDebojyoti Ghosh    MatKAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the KAIJ matrix
3249bd79ccSDebojyoti Ghosh 
3349bd79ccSDebojyoti Ghosh    Not Collective, but if the KAIJ matrix is parallel, the AIJ matrix is also parallel
3449bd79ccSDebojyoti Ghosh 
3549bd79ccSDebojyoti Ghosh    Input Parameter:
3649bd79ccSDebojyoti Ghosh .  A - the KAIJ matrix
3749bd79ccSDebojyoti Ghosh 
3849bd79ccSDebojyoti Ghosh    Output Parameter:
3949bd79ccSDebojyoti Ghosh .  B - the AIJ matrix
4049bd79ccSDebojyoti Ghosh 
4149bd79ccSDebojyoti Ghosh    Level: advanced
4249bd79ccSDebojyoti Ghosh 
4349bd79ccSDebojyoti Ghosh    Notes: The reference count on the AIJ matrix is not increased so you should not destroy it.
4449bd79ccSDebojyoti Ghosh 
4549bd79ccSDebojyoti Ghosh .seealso: MatCreateKAIJ()
4649bd79ccSDebojyoti Ghosh @*/
4749bd79ccSDebojyoti Ghosh PetscErrorCode  MatKAIJGetAIJ(Mat A,Mat *B)
4849bd79ccSDebojyoti Ghosh {
4949bd79ccSDebojyoti Ghosh   PetscErrorCode ierr;
5049bd79ccSDebojyoti Ghosh   PetscBool      ismpikaij,isseqkaij;
5149bd79ccSDebojyoti Ghosh 
5249bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
5349bd79ccSDebojyoti Ghosh   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIKAIJ,&ismpikaij);CHKERRQ(ierr);
5449bd79ccSDebojyoti Ghosh   ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQKAIJ,&isseqkaij);CHKERRQ(ierr);
5549bd79ccSDebojyoti Ghosh   if (ismpikaij) {
5649bd79ccSDebojyoti Ghosh     Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data;
5749bd79ccSDebojyoti Ghosh 
5849bd79ccSDebojyoti Ghosh     *B = b->A;
5949bd79ccSDebojyoti Ghosh   } else if (isseqkaij) {
6049bd79ccSDebojyoti Ghosh     Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
6149bd79ccSDebojyoti Ghosh 
6249bd79ccSDebojyoti Ghosh     *B = b->AIJ;
6349bd79ccSDebojyoti Ghosh   } else {
6449bd79ccSDebojyoti Ghosh     *B = A;
6549bd79ccSDebojyoti Ghosh   }
6649bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
6749bd79ccSDebojyoti Ghosh }
6849bd79ccSDebojyoti Ghosh 
6949bd79ccSDebojyoti Ghosh /*@C
7049bd79ccSDebojyoti Ghosh    MatKAIJGetS - Get the S matrix describing the shift action of the KAIJ matrix
7149bd79ccSDebojyoti Ghosh 
7249bd79ccSDebojyoti Ghosh    Not Collective, the entire S is stored and returned independently on all processes
7349bd79ccSDebojyoti Ghosh 
7449bd79ccSDebojyoti Ghosh    Input Parameter:
7549bd79ccSDebojyoti Ghosh .  A - the KAIJ matrix
7649bd79ccSDebojyoti Ghosh 
7749bd79ccSDebojyoti Ghosh    Output Parameter:
7849bd79ccSDebojyoti Ghosh .  S - the S matrix, in form of a scalar array in column-major format
7949bd79ccSDebojyoti Ghosh 
8049bd79ccSDebojyoti Ghosh    Level: advanced
8149bd79ccSDebojyoti Ghosh 
8249bd79ccSDebojyoti Ghosh    Notes: The reference count on the S matrix is not increased so you should not destroy it.
8349bd79ccSDebojyoti Ghosh 
8449bd79ccSDebojyoti Ghosh .seealso: MatCreateKAIJ()
8549bd79ccSDebojyoti Ghosh @*/
8649bd79ccSDebojyoti Ghosh PetscErrorCode  MatKAIJGetS(Mat A,const PetscScalar **S)
8749bd79ccSDebojyoti Ghosh {
8849bd79ccSDebojyoti Ghosh   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
8949bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
9049bd79ccSDebojyoti Ghosh   *S = b->S;
9149bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
9249bd79ccSDebojyoti Ghosh }
9349bd79ccSDebojyoti Ghosh 
9449bd79ccSDebojyoti Ghosh /*@C
9549bd79ccSDebojyoti Ghosh    MatKAIJGetT - Get the T matrix describing the shift action of the KAIJ matrix
9649bd79ccSDebojyoti Ghosh 
9749bd79ccSDebojyoti Ghosh    Not Collective, the entire T is stored and returned independently on all processes
9849bd79ccSDebojyoti Ghosh 
9949bd79ccSDebojyoti Ghosh    Input Parameter:
10049bd79ccSDebojyoti Ghosh .  A - the KAIJ matrix
10149bd79ccSDebojyoti Ghosh 
10249bd79ccSDebojyoti Ghosh    Output Parameter:
10349bd79ccSDebojyoti Ghosh .  T - the T matrix, in form of a scalar array in column-major format
10449bd79ccSDebojyoti Ghosh 
10549bd79ccSDebojyoti Ghosh    Level: advanced
10649bd79ccSDebojyoti Ghosh 
10749bd79ccSDebojyoti Ghosh    Notes: The reference count on the T matrix is not increased so you should not destroy it.
10849bd79ccSDebojyoti Ghosh 
10949bd79ccSDebojyoti Ghosh .seealso: MatCreateKAIJ()
11049bd79ccSDebojyoti Ghosh @*/
11149bd79ccSDebojyoti Ghosh PetscErrorCode  MatKAIJGetT(Mat A,const PetscScalar **T)
11249bd79ccSDebojyoti Ghosh {
11349bd79ccSDebojyoti Ghosh   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
11449bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
11549bd79ccSDebojyoti Ghosh   *T = b->T;
11649bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
11749bd79ccSDebojyoti Ghosh }
11849bd79ccSDebojyoti Ghosh 
11949bd79ccSDebojyoti Ghosh PetscErrorCode MatDestroy_SeqKAIJ(Mat A)
12049bd79ccSDebojyoti Ghosh {
12149bd79ccSDebojyoti Ghosh   PetscErrorCode ierr;
12249bd79ccSDebojyoti Ghosh   Mat_SeqKAIJ    *b = (Mat_SeqKAIJ*)A->data;
12349bd79ccSDebojyoti Ghosh 
12449bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
12549bd79ccSDebojyoti Ghosh   ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr);
12649bd79ccSDebojyoti Ghosh   ierr = PetscFree3(b->S,b->T,b->ibdiag);CHKERRQ(ierr);
12749bd79ccSDebojyoti Ghosh   ierr = PetscFree5(b->sor.w,b->sor.y,b->sor.work,b->sor.t,b->sor.arr);CHKERRQ(ierr);
12849bd79ccSDebojyoti Ghosh   ierr = PetscFree(A->data);CHKERRQ(ierr);
12949bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
13049bd79ccSDebojyoti Ghosh }
13149bd79ccSDebojyoti Ghosh 
13249bd79ccSDebojyoti Ghosh PetscErrorCode MatSetUp_KAIJ(Mat A)
13349bd79ccSDebojyoti Ghosh {
13449bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
13549bd79ccSDebojyoti Ghosh   SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Must use MatCreateKAIJ() to create KAIJ matrices");
13649bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
13749bd79ccSDebojyoti Ghosh }
13849bd79ccSDebojyoti Ghosh 
13949bd79ccSDebojyoti Ghosh PetscErrorCode MatView_SeqKAIJ(Mat A,PetscViewer viewer)
14049bd79ccSDebojyoti Ghosh {
14149bd79ccSDebojyoti Ghosh   PetscErrorCode ierr;
14249bd79ccSDebojyoti Ghosh   Mat            B;
14349bd79ccSDebojyoti Ghosh 
14449bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
14549bd79ccSDebojyoti Ghosh   ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
14649bd79ccSDebojyoti Ghosh   ierr = MatView(B,viewer);CHKERRQ(ierr);
14749bd79ccSDebojyoti Ghosh   ierr = MatDestroy(&B);CHKERRQ(ierr);
14849bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
14949bd79ccSDebojyoti Ghosh }
15049bd79ccSDebojyoti Ghosh 
15149bd79ccSDebojyoti Ghosh PetscErrorCode MatView_MPIKAIJ(Mat A,PetscViewer viewer)
15249bd79ccSDebojyoti Ghosh {
15349bd79ccSDebojyoti Ghosh   PetscErrorCode ierr;
15449bd79ccSDebojyoti Ghosh   Mat            B;
15549bd79ccSDebojyoti Ghosh 
15649bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
15749bd79ccSDebojyoti Ghosh   ierr = MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
15849bd79ccSDebojyoti Ghosh   ierr = MatView(B,viewer);CHKERRQ(ierr);
15949bd79ccSDebojyoti Ghosh   ierr = MatDestroy(&B);CHKERRQ(ierr);
16049bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
16149bd79ccSDebojyoti Ghosh }
16249bd79ccSDebojyoti Ghosh 
16349bd79ccSDebojyoti Ghosh PetscErrorCode MatDestroy_MPIKAIJ(Mat A)
16449bd79ccSDebojyoti Ghosh {
16549bd79ccSDebojyoti Ghosh   PetscErrorCode ierr;
16649bd79ccSDebojyoti Ghosh   Mat_MPIKAIJ    *b = (Mat_MPIKAIJ*)A->data;
16749bd79ccSDebojyoti Ghosh 
16849bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
16949bd79ccSDebojyoti Ghosh   ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr);
17049bd79ccSDebojyoti Ghosh   ierr = MatDestroy(&b->OAIJ);CHKERRQ(ierr);
17149bd79ccSDebojyoti Ghosh   ierr = MatDestroy(&b->A);CHKERRQ(ierr);
17249bd79ccSDebojyoti Ghosh   ierr = VecScatterDestroy(&b->ctx);CHKERRQ(ierr);
17349bd79ccSDebojyoti Ghosh   ierr = VecDestroy(&b->w);CHKERRQ(ierr);
17449bd79ccSDebojyoti Ghosh   ierr = PetscFree3(b->S,b->T,b->ibdiag);CHKERRQ(ierr);
17549bd79ccSDebojyoti Ghosh   ierr = PetscFree(A->data);CHKERRQ(ierr);
17649bd79ccSDebojyoti Ghosh 
17749bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
17849bd79ccSDebojyoti Ghosh }
17949bd79ccSDebojyoti Ghosh 
18049bd79ccSDebojyoti Ghosh /*MC
18149bd79ccSDebojyoti Ghosh   MATKAIJ - MATKAIJ = "kaij" - A matrix type to be used to evaluate matrices of the following form:
18249bd79ccSDebojyoti Ghosh 
18349bd79ccSDebojyoti Ghosh     [I \otimes S + A \otimes T]
18449bd79ccSDebojyoti Ghosh 
18549bd79ccSDebojyoti Ghosh   where
18649bd79ccSDebojyoti Ghosh     S is a dense (p \times q) matrix
18749bd79ccSDebojyoti Ghosh     T is a dense (p \times q) matrix
18849bd79ccSDebojyoti Ghosh     A is an AIJ  (n \times n) matrix
18949bd79ccSDebojyoti Ghosh     I is the identity matrix
19049bd79ccSDebojyoti Ghosh   The resulting matrix is (np \times nq)
19149bd79ccSDebojyoti Ghosh 
19249bd79ccSDebojyoti Ghosh   The matrix type is based on MATSEQAIJ for a sequential matrix A, and MATMPIAIJ for a distributed matrix A.
19349bd79ccSDebojyoti Ghosh   S and T are always stored independently on all processes as a PetscScalar array in column-major format.
19449bd79ccSDebojyoti Ghosh 
19549bd79ccSDebojyoti Ghosh   Operations provided:
19649bd79ccSDebojyoti Ghosh . MatMult
19749bd79ccSDebojyoti Ghosh . MatMultAdd
19849bd79ccSDebojyoti Ghosh . MatInvertBlockDiagonal
19949bd79ccSDebojyoti Ghosh 
20049bd79ccSDebojyoti Ghosh   Level: advanced
20149bd79ccSDebojyoti Ghosh 
20249bd79ccSDebojyoti Ghosh .seealso: MatKAIJGetAIJ(), MatKAIJGetS(), MatKAIJGetT(), MatCreateKAIJ()
20349bd79ccSDebojyoti Ghosh M*/
20449bd79ccSDebojyoti Ghosh 
20549bd79ccSDebojyoti Ghosh PETSC_EXTERN PetscErrorCode MatCreate_KAIJ(Mat A)
20649bd79ccSDebojyoti Ghosh {
20749bd79ccSDebojyoti Ghosh   PetscErrorCode ierr;
20849bd79ccSDebojyoti Ghosh   Mat_MPIKAIJ    *b;
20949bd79ccSDebojyoti Ghosh   PetscMPIInt    size;
21049bd79ccSDebojyoti Ghosh 
21149bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
21249bd79ccSDebojyoti Ghosh   ierr     = PetscNewLog(A,&b);CHKERRQ(ierr);
21349bd79ccSDebojyoti Ghosh   A->data  = (void*)b;
21449bd79ccSDebojyoti Ghosh 
21549bd79ccSDebojyoti Ghosh   ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
21649bd79ccSDebojyoti Ghosh 
21749bd79ccSDebojyoti Ghosh   A->ops->setup = MatSetUp_KAIJ;
21849bd79ccSDebojyoti Ghosh 
21949bd79ccSDebojyoti Ghosh   b->w    = 0;
22049bd79ccSDebojyoti Ghosh   ierr    = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
22149bd79ccSDebojyoti Ghosh   if (size == 1) {
22249bd79ccSDebojyoti Ghosh     ierr = PetscObjectChangeTypeName((PetscObject)A,MATSEQKAIJ);CHKERRQ(ierr);
22349bd79ccSDebojyoti Ghosh   } else {
22449bd79ccSDebojyoti Ghosh     ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIKAIJ);CHKERRQ(ierr);
22549bd79ccSDebojyoti Ghosh   }
22649bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
22749bd79ccSDebojyoti Ghosh }
22849bd79ccSDebojyoti Ghosh 
22949bd79ccSDebojyoti Ghosh /* --------------------------------------------------------------------------------------*/
23049bd79ccSDebojyoti Ghosh 
23149bd79ccSDebojyoti Ghosh /* zz = yy + Axx */
23249bd79ccSDebojyoti Ghosh PetscErrorCode KAIJMultAdd_Seq(Mat A,Vec xx,Vec yy,Vec zz)
23349bd79ccSDebojyoti Ghosh {
23449bd79ccSDebojyoti Ghosh   Mat_SeqKAIJ       *b = (Mat_SeqKAIJ*)A->data;
23549bd79ccSDebojyoti Ghosh   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
23649bd79ccSDebojyoti Ghosh   const PetscScalar *s = b->S, *t = b->T;
23749bd79ccSDebojyoti Ghosh   const PetscScalar *x,*v,*bx;
23849bd79ccSDebojyoti Ghosh   PetscScalar       *y,*sums;
23949bd79ccSDebojyoti Ghosh   PetscErrorCode    ierr;
24049bd79ccSDebojyoti Ghosh   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
24149bd79ccSDebojyoti Ghosh   PetscInt          n,i,jrow,j,l,p=b->p,q=b->q,k;
24249bd79ccSDebojyoti Ghosh 
24349bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
24449bd79ccSDebojyoti Ghosh 
24549bd79ccSDebojyoti Ghosh   if (!yy) {
24649bd79ccSDebojyoti Ghosh     ierr = VecSet(zz,0.0);CHKERRQ(ierr);
24749bd79ccSDebojyoti Ghosh   } else {
24849bd79ccSDebojyoti Ghosh     ierr = VecCopy(yy,zz);CHKERRQ(ierr);
24949bd79ccSDebojyoti Ghosh   }
25049bd79ccSDebojyoti Ghosh   if ((!s) && (!t) && (!b->isTI)) PetscFunctionReturn(0);
25149bd79ccSDebojyoti Ghosh 
25249bd79ccSDebojyoti Ghosh   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
25349bd79ccSDebojyoti Ghosh   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
25449bd79ccSDebojyoti Ghosh   idx  = a->j;
25549bd79ccSDebojyoti Ghosh   v    = a->a;
25649bd79ccSDebojyoti Ghosh   ii   = a->i;
25749bd79ccSDebojyoti Ghosh 
25849bd79ccSDebojyoti Ghosh   if (b->isTI) {
25949bd79ccSDebojyoti Ghosh     for (i=0; i<m; i++) {
26049bd79ccSDebojyoti Ghosh       jrow = ii[i];
26149bd79ccSDebojyoti Ghosh       n    = ii[i+1] - jrow;
26249bd79ccSDebojyoti Ghosh       sums = y + p*i;
26349bd79ccSDebojyoti Ghosh       for (j=0; j<n; j++) {
26449bd79ccSDebojyoti Ghosh         for (k=0; k<p; k++) {
26549bd79ccSDebojyoti Ghosh           sums[k] += v[jrow+j]*x[q*idx[jrow+j]+k];
26649bd79ccSDebojyoti Ghosh         }
26749bd79ccSDebojyoti Ghosh       }
26849bd79ccSDebojyoti Ghosh     }
26949bd79ccSDebojyoti Ghosh   } else if (t) {
27049bd79ccSDebojyoti Ghosh     for (i=0; i<m; i++) {
27149bd79ccSDebojyoti Ghosh       jrow = ii[i];
27249bd79ccSDebojyoti Ghosh       n    = ii[i+1] - jrow;
27349bd79ccSDebojyoti Ghosh       sums = y + p*i;
27449bd79ccSDebojyoti Ghosh       bx   = x + q*i;
27549bd79ccSDebojyoti Ghosh       for (j=0; j<n; j++) {
27649bd79ccSDebojyoti Ghosh         for (k=0; k<p; k++) {
27749bd79ccSDebojyoti Ghosh           for (l=0; l<q; l++) {
27849bd79ccSDebojyoti Ghosh             sums[k] += v[jrow+j]*t[k+l*p]*x[q*idx[jrow+j]+l];
27949bd79ccSDebojyoti Ghosh           }
28049bd79ccSDebojyoti Ghosh         }
28149bd79ccSDebojyoti Ghosh       }
28249bd79ccSDebojyoti Ghosh     }
28349bd79ccSDebojyoti Ghosh   }
28449bd79ccSDebojyoti Ghosh   if (s) {
28549bd79ccSDebojyoti Ghosh     for (i=0; i<m; i++) {
28649bd79ccSDebojyoti Ghosh       jrow = ii[i];
28749bd79ccSDebojyoti Ghosh       n    = ii[i+1] - jrow;
28849bd79ccSDebojyoti Ghosh       sums = y + p*i;
28949bd79ccSDebojyoti Ghosh       bx   = x + q*i;
29049bd79ccSDebojyoti Ghosh       if (i < b->AIJ->cmap->n) {
29149bd79ccSDebojyoti Ghosh         for (j=0; j<q; j++) {
29249bd79ccSDebojyoti Ghosh           for (k=0; k<p; k++) {
29349bd79ccSDebojyoti Ghosh             sums[k] += s[k+j*p]*bx[j];
29449bd79ccSDebojyoti Ghosh           }
29549bd79ccSDebojyoti Ghosh         }
29649bd79ccSDebojyoti Ghosh       }
29749bd79ccSDebojyoti Ghosh     }
29849bd79ccSDebojyoti Ghosh   }
29949bd79ccSDebojyoti Ghosh 
30049bd79ccSDebojyoti Ghosh   ierr = PetscLogFlops((2.0*p*q-p)*m+2*p*a->nz);CHKERRQ(ierr);
30149bd79ccSDebojyoti Ghosh   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
30249bd79ccSDebojyoti Ghosh   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
30349bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
30449bd79ccSDebojyoti Ghosh }
30549bd79ccSDebojyoti Ghosh 
30649bd79ccSDebojyoti Ghosh PetscErrorCode MatMult_SeqKAIJ_N(Mat A,Vec xx,Vec yy)
30749bd79ccSDebojyoti Ghosh {
30849bd79ccSDebojyoti Ghosh   PetscErrorCode ierr;
30949bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
31049bd79ccSDebojyoti Ghosh   ierr = KAIJMultAdd_Seq(A,xx,PETSC_NULL,yy);CHKERRQ(ierr);
31149bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
31249bd79ccSDebojyoti Ghosh }
31349bd79ccSDebojyoti Ghosh 
31449bd79ccSDebojyoti Ghosh PetscErrorCode MatMultAdd_SeqKAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
31549bd79ccSDebojyoti Ghosh {
31649bd79ccSDebojyoti Ghosh   PetscErrorCode ierr;
31749bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
31849bd79ccSDebojyoti Ghosh   ierr = KAIJMultAdd_Seq(A,xx,yy,zz);CHKERRQ(ierr);
31949bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
32049bd79ccSDebojyoti Ghosh }
32149bd79ccSDebojyoti Ghosh 
32249bd79ccSDebojyoti Ghosh #include <petsc/private/kernels/blockinvert.h>
32349bd79ccSDebojyoti Ghosh 
32449bd79ccSDebojyoti Ghosh PetscErrorCode MatInvertBlockDiagonal_SeqKAIJ_N(Mat A,const PetscScalar **values)
32549bd79ccSDebojyoti Ghosh {
32649bd79ccSDebojyoti Ghosh   Mat_SeqKAIJ       *b  = (Mat_SeqKAIJ*)A->data;
32749bd79ccSDebojyoti Ghosh   Mat_SeqAIJ        *a  = (Mat_SeqAIJ*)b->AIJ->data;
32849bd79ccSDebojyoti Ghosh   const PetscScalar *S  = b->S;
32949bd79ccSDebojyoti Ghosh   const PetscScalar *T  = b->T;
33049bd79ccSDebojyoti Ghosh   const PetscScalar *v  = a->a;
33149bd79ccSDebojyoti Ghosh   const PetscInt     p  = b->p, q = b->q, m = b->AIJ->rmap->n, *idx = a->j, *ii = a->i;
33249bd79ccSDebojyoti Ghosh   PetscErrorCode    ierr;
33349bd79ccSDebojyoti Ghosh   PetscInt          i,j,*v_pivots,dof,dof2;
33449bd79ccSDebojyoti Ghosh   PetscScalar       *diag,aval,*v_work;
33549bd79ccSDebojyoti Ghosh 
33649bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
33749bd79ccSDebojyoti Ghosh   if (p != q) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Block size must be square to calculate inverse.");
33849bd79ccSDebojyoti Ghosh   if ((!S) && (!T) && (!b->isTI)) {
33949bd79ccSDebojyoti Ghosh     SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Cannot invert a zero matrix.");
34049bd79ccSDebojyoti Ghosh   }
34149bd79ccSDebojyoti Ghosh 
34249bd79ccSDebojyoti Ghosh   dof  = p;
34349bd79ccSDebojyoti Ghosh   dof2 = dof*dof;
34449bd79ccSDebojyoti Ghosh 
34549bd79ccSDebojyoti Ghosh   if (b->ibdiagvalid) {
34649bd79ccSDebojyoti Ghosh     if (values) *values = b->ibdiag;
34749bd79ccSDebojyoti Ghosh     PetscFunctionReturn(0);
34849bd79ccSDebojyoti Ghosh   }
34949bd79ccSDebojyoti Ghosh   if (!b->ibdiag) {
35049bd79ccSDebojyoti Ghosh     ierr = PetscMalloc(dof2*m*sizeof(PetscScalar),&b->ibdiag);CHKERRQ(ierr);
35149bd79ccSDebojyoti Ghosh     ierr = PetscLogObjectMemory((PetscObject)A,dof2*m*sizeof(PetscScalar));CHKERRQ(ierr);
35249bd79ccSDebojyoti Ghosh   }
35349bd79ccSDebojyoti Ghosh   if (values) *values = b->ibdiag;
35449bd79ccSDebojyoti Ghosh   diag = b->ibdiag;
35549bd79ccSDebojyoti Ghosh 
35649bd79ccSDebojyoti Ghosh   ierr = PetscMalloc2(dof,&v_work,dof,&v_pivots);CHKERRQ(ierr);
35749bd79ccSDebojyoti Ghosh   for (i=0; i<m; i++) {
35849bd79ccSDebojyoti Ghosh     if (S) {
35949bd79ccSDebojyoti Ghosh       ierr = PetscMemcpy(diag,S,dof2*sizeof(PetscScalar));CHKERRQ(ierr);
36049bd79ccSDebojyoti Ghosh     } else {
36149bd79ccSDebojyoti Ghosh       ierr = PetscMemzero(diag,dof2*sizeof(PetscScalar));CHKERRQ(ierr);
36249bd79ccSDebojyoti Ghosh     }
36349bd79ccSDebojyoti Ghosh     if (b->isTI) {
36449bd79ccSDebojyoti Ghosh       aval = 0;
36549bd79ccSDebojyoti Ghosh       for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j];
36649bd79ccSDebojyoti Ghosh       for (j=0; j<dof; j++) diag[j+dof*j] += aval;
36749bd79ccSDebojyoti Ghosh     } else if (T) {
36849bd79ccSDebojyoti Ghosh       aval = 0;
36949bd79ccSDebojyoti Ghosh       for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j];
37049bd79ccSDebojyoti Ghosh       for (j=0; j<dof2; j++) diag[j] += aval*T[j];
37149bd79ccSDebojyoti Ghosh     }
37249bd79ccSDebojyoti Ghosh     ierr = PetscKernel_A_gets_inverse_A(dof,diag,v_pivots,v_work,PETSC_FALSE,NULL);CHKERRQ(ierr);
37349bd79ccSDebojyoti Ghosh     diag += dof2;
37449bd79ccSDebojyoti Ghosh   }
37549bd79ccSDebojyoti Ghosh   ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr);
37649bd79ccSDebojyoti Ghosh 
37749bd79ccSDebojyoti Ghosh   b->ibdiagvalid = PETSC_TRUE;
37849bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
37949bd79ccSDebojyoti Ghosh }
38049bd79ccSDebojyoti Ghosh 
38149bd79ccSDebojyoti Ghosh static PetscErrorCode MatGetDiagonalBlock_MPIKAIJ(Mat A,Mat *B)
38249bd79ccSDebojyoti Ghosh {
38349bd79ccSDebojyoti Ghosh   Mat_MPIKAIJ *kaij = (Mat_MPIKAIJ*) A->data;
38449bd79ccSDebojyoti Ghosh 
38549bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
38649bd79ccSDebojyoti Ghosh   *B = kaij->AIJ;
38749bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
38849bd79ccSDebojyoti Ghosh }
38949bd79ccSDebojyoti Ghosh 
39049bd79ccSDebojyoti Ghosh PetscErrorCode MatSOR_SeqKAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
39149bd79ccSDebojyoti Ghosh {
39249bd79ccSDebojyoti Ghosh   PetscErrorCode    ierr;
39349bd79ccSDebojyoti Ghosh   Mat_SeqKAIJ       *kaij = (Mat_SeqKAIJ*) A->data;
39449bd79ccSDebojyoti Ghosh   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)kaij->AIJ->data;
39549bd79ccSDebojyoti Ghosh   const PetscScalar *aa = a->a, *T = kaij->T, *v;
39649bd79ccSDebojyoti Ghosh   const PetscInt    m  = kaij->AIJ->rmap->n, *ai=a->i, *aj=a->j, p = kaij->p, q = kaij->q, *diag, *vi;
39749bd79ccSDebojyoti Ghosh   const PetscScalar *b, *xb, *idiag;
39849bd79ccSDebojyoti Ghosh   PetscScalar       *x, *work, *workt, *w, *y, *arr, *t, *arrt;
39949bd79ccSDebojyoti Ghosh   PetscInt          i, j, k, i2, bs, bs2, nz;
40049bd79ccSDebojyoti Ghosh 
40149bd79ccSDebojyoti Ghosh 
40249bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
40349bd79ccSDebojyoti Ghosh   its = its*lits;
40449bd79ccSDebojyoti Ghosh   if (flag & SOR_EISENSTAT) SETERRQ (PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
40549bd79ccSDebojyoti Ghosh   if (its <= 0)             SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
40649bd79ccSDebojyoti Ghosh   if (fshift)               SETERRQ (PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
40749bd79ccSDebojyoti Ghosh   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER))
40849bd79ccSDebojyoti Ghosh     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
40949bd79ccSDebojyoti Ghosh   if (p != q) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSOR for KAIJ: Sorry, no support for non-square dense blocks");
41049bd79ccSDebojyoti Ghosh   else        {bs = p; bs2 = bs*bs; }
41149bd79ccSDebojyoti Ghosh 
41249bd79ccSDebojyoti Ghosh   if (!m) PetscFunctionReturn(0);
41349bd79ccSDebojyoti Ghosh 
41449bd79ccSDebojyoti Ghosh   if (!kaij->ibdiagvalid) { ierr = MatInvertBlockDiagonal_SeqKAIJ_N(A,NULL);CHKERRQ(ierr); }
41549bd79ccSDebojyoti Ghosh   idiag = kaij->ibdiag;
41649bd79ccSDebojyoti Ghosh   diag  = a->diag;
41749bd79ccSDebojyoti Ghosh 
41849bd79ccSDebojyoti Ghosh   if (!kaij->sor.setup) {
41949bd79ccSDebojyoti Ghosh     ierr = PetscMalloc5(bs,&kaij->sor.w,bs,&kaij->sor.y,m*bs,&kaij->sor.work,m*bs,&kaij->sor.t,m*bs2,&kaij->sor.arr);CHKERRQ(ierr);
42049bd79ccSDebojyoti Ghosh     kaij->sor.setup = PETSC_TRUE;
42149bd79ccSDebojyoti Ghosh   }
42249bd79ccSDebojyoti Ghosh   y     = kaij->sor.y;
42349bd79ccSDebojyoti Ghosh   w     = kaij->sor.w;
42449bd79ccSDebojyoti Ghosh   work  = kaij->sor.work;
42549bd79ccSDebojyoti Ghosh   t     = kaij->sor.t;
42649bd79ccSDebojyoti Ghosh   arr   = kaij->sor.arr;
42749bd79ccSDebojyoti Ghosh 
42849bd79ccSDebojyoti Ghosh   ierr = VecGetArray(xx,&x);    CHKERRQ(ierr);
42949bd79ccSDebojyoti Ghosh   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
43049bd79ccSDebojyoti Ghosh 
43149bd79ccSDebojyoti Ghosh   if (flag & SOR_ZERO_INITIAL_GUESS) {
43249bd79ccSDebojyoti Ghosh     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
43349bd79ccSDebojyoti Ghosh       PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);                            /* x[0:bs] <- D^{-1} b[0:bs] */
43449bd79ccSDebojyoti Ghosh       ierr   =  PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr);
43549bd79ccSDebojyoti Ghosh       i2     =  bs;
43649bd79ccSDebojyoti Ghosh       idiag  += bs2;
43749bd79ccSDebojyoti Ghosh       for (i=1; i<m; i++) {
43849bd79ccSDebojyoti Ghosh         v  = aa + ai[i];
43949bd79ccSDebojyoti Ghosh         vi = aj + ai[i];
44049bd79ccSDebojyoti Ghosh         nz = diag[i] - ai[i];
44149bd79ccSDebojyoti Ghosh 
44249bd79ccSDebojyoti Ghosh         if (T) {                /* b - T (Arow * x) */
44349bd79ccSDebojyoti Ghosh           for (k=0; k<bs; k++) w[k] = 0;
44449bd79ccSDebojyoti Ghosh           for (j=0; j<nz; j++) {
44549bd79ccSDebojyoti Ghosh             for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k];
44649bd79ccSDebojyoti Ghosh           }
44749bd79ccSDebojyoti Ghosh           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs,w,T,&t[i2]);
44849bd79ccSDebojyoti Ghosh           for (k=0; k<bs; k++) t[i2+k] += b[i2+k];
44949bd79ccSDebojyoti Ghosh         } else if (kaij->isTI) {
45049bd79ccSDebojyoti Ghosh           for (k=0; k<bs; k++) t[i2+k] = b[i2+k];
45149bd79ccSDebojyoti Ghosh           for (j=0; j<nz; j++) {
45249bd79ccSDebojyoti Ghosh             for (k=0; k<bs; k++) t[i2+k] -= v[j] * x[vi[j]*bs+k];
45349bd79ccSDebojyoti Ghosh           }
45449bd79ccSDebojyoti Ghosh         } else {
45549bd79ccSDebojyoti Ghosh           for (k=0; k<bs; k++) t[i2+k] = b[i2+k];
45649bd79ccSDebojyoti Ghosh         }
45749bd79ccSDebojyoti Ghosh 
45849bd79ccSDebojyoti Ghosh         PetscKernel_w_gets_Ar_times_v(bs,bs,t+i2,idiag,y);
45949bd79ccSDebojyoti Ghosh         for (j=0; j<bs; j++) x[i2+j] = omega * y[j];
46049bd79ccSDebojyoti Ghosh 
46149bd79ccSDebojyoti Ghosh         idiag += bs2;
46249bd79ccSDebojyoti Ghosh         i2    += bs;
46349bd79ccSDebojyoti Ghosh       }
46449bd79ccSDebojyoti Ghosh       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
46549bd79ccSDebojyoti Ghosh       ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr);
46649bd79ccSDebojyoti Ghosh       xb = t;
46749bd79ccSDebojyoti Ghosh     } else xb = b;
46849bd79ccSDebojyoti Ghosh     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
46949bd79ccSDebojyoti Ghosh       idiag = kaij->ibdiag+bs2*(m-1);
47049bd79ccSDebojyoti Ghosh       i2    = bs * (m-1);
47149bd79ccSDebojyoti Ghosh       ierr  = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
47249bd79ccSDebojyoti Ghosh       PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
47349bd79ccSDebojyoti Ghosh       i2    -= bs;
47449bd79ccSDebojyoti Ghosh       idiag -= bs2;
47549bd79ccSDebojyoti Ghosh       for (i=m-2; i>=0; i--) {
47649bd79ccSDebojyoti Ghosh         v  = aa + diag[i] + 1 ;
47749bd79ccSDebojyoti Ghosh         vi = aj + diag[i] + 1;
47849bd79ccSDebojyoti Ghosh         nz = ai[i+1] - diag[i] - 1;
47949bd79ccSDebojyoti Ghosh 
48049bd79ccSDebojyoti Ghosh         if (T) {                /* FIXME: This branch untested */
48149bd79ccSDebojyoti Ghosh           ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
48249bd79ccSDebojyoti Ghosh           /* copy all rows of x that are needed into contiguous space */
48349bd79ccSDebojyoti Ghosh           workt = work;
48449bd79ccSDebojyoti Ghosh           for (j=0; j<nz; j++) {
48549bd79ccSDebojyoti Ghosh             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
48649bd79ccSDebojyoti Ghosh             workt += bs;
48749bd79ccSDebojyoti Ghosh           }
48849bd79ccSDebojyoti Ghosh           arrt = arr;
48949bd79ccSDebojyoti Ghosh           for (j=0; j<nz; j++) {
49049bd79ccSDebojyoti Ghosh             ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
49149bd79ccSDebojyoti Ghosh             for (k=0; k<bs2; k++) arrt[k] *= v[j];
49249bd79ccSDebojyoti Ghosh             arrt += bs2;
49349bd79ccSDebojyoti Ghosh           }
49449bd79ccSDebojyoti Ghosh           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
49549bd79ccSDebojyoti Ghosh         } else if (kaij->isTI) {
49649bd79ccSDebojyoti Ghosh           for (k=0; k<bs; k++) w[k] = t[i2+k];
49749bd79ccSDebojyoti Ghosh           for (j=0; j<nz; j++) {
49849bd79ccSDebojyoti Ghosh             for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k];
49949bd79ccSDebojyoti Ghosh           }
50049bd79ccSDebojyoti Ghosh         }
50149bd79ccSDebojyoti Ghosh 
50249bd79ccSDebojyoti Ghosh         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); /* RHS incorrect for omega != 1.0 */
50349bd79ccSDebojyoti Ghosh         for (j=0; j<bs; j++) x[i2+j] = (1.0-omega) * x[i2+j] + omega * y[j];
50449bd79ccSDebojyoti Ghosh 
50549bd79ccSDebojyoti Ghosh         idiag -= bs2;
50649bd79ccSDebojyoti Ghosh         i2    -= bs;
50749bd79ccSDebojyoti Ghosh       }
50849bd79ccSDebojyoti Ghosh       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
50949bd79ccSDebojyoti Ghosh     }
51049bd79ccSDebojyoti Ghosh     its--;
51149bd79ccSDebojyoti Ghosh   }
51249bd79ccSDebojyoti Ghosh   while (its--) {               /* FIXME: This branch not updated */
51349bd79ccSDebojyoti Ghosh     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
51449bd79ccSDebojyoti Ghosh       i2     =  0;
51549bd79ccSDebojyoti Ghosh       idiag  = kaij->ibdiag;
51649bd79ccSDebojyoti Ghosh       for (i=0; i<m; i++) {
51749bd79ccSDebojyoti Ghosh         ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
51849bd79ccSDebojyoti Ghosh 
51949bd79ccSDebojyoti Ghosh         v  = aa + ai[i];
52049bd79ccSDebojyoti Ghosh         vi = aj + ai[i];
52149bd79ccSDebojyoti Ghosh         nz = diag[i] - ai[i];
52249bd79ccSDebojyoti Ghosh         workt = work;
52349bd79ccSDebojyoti Ghosh         for (j=0; j<nz; j++) {
52449bd79ccSDebojyoti Ghosh           ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
52549bd79ccSDebojyoti Ghosh           workt += bs;
52649bd79ccSDebojyoti Ghosh         }
52749bd79ccSDebojyoti Ghosh         arrt = arr;
52849bd79ccSDebojyoti Ghosh         if (T) {
52949bd79ccSDebojyoti Ghosh           for (j=0; j<nz; j++) {
53049bd79ccSDebojyoti Ghosh             ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
53149bd79ccSDebojyoti Ghosh             for (k=0; k<bs2; k++) arrt[k] *= v[j];
53249bd79ccSDebojyoti Ghosh             arrt += bs2;
53349bd79ccSDebojyoti Ghosh           }
53449bd79ccSDebojyoti Ghosh           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
53549bd79ccSDebojyoti Ghosh         } else if (kaij->isTI) {
53649bd79ccSDebojyoti Ghosh           for (j=0; j<nz; j++) {
53749bd79ccSDebojyoti Ghosh             ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
53849bd79ccSDebojyoti Ghosh             for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
53949bd79ccSDebojyoti Ghosh             arrt += bs2;
54049bd79ccSDebojyoti Ghosh           }
54149bd79ccSDebojyoti Ghosh           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
54249bd79ccSDebojyoti Ghosh         }
54349bd79ccSDebojyoti Ghosh         ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr);
54449bd79ccSDebojyoti Ghosh 
54549bd79ccSDebojyoti Ghosh         v  = aa + diag[i] + 1;
54649bd79ccSDebojyoti Ghosh         vi = aj + diag[i] + 1;
54749bd79ccSDebojyoti Ghosh         nz = ai[i+1] - diag[i] - 1;
54849bd79ccSDebojyoti Ghosh         workt = work;
54949bd79ccSDebojyoti Ghosh         for (j=0; j<nz; j++) {
55049bd79ccSDebojyoti Ghosh           ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
55149bd79ccSDebojyoti Ghosh           workt += bs;
55249bd79ccSDebojyoti Ghosh         }
55349bd79ccSDebojyoti Ghosh         arrt = arr;
55449bd79ccSDebojyoti Ghosh         if (T) {
55549bd79ccSDebojyoti Ghosh           for (j=0; j<nz; j++) {
55649bd79ccSDebojyoti Ghosh             ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
55749bd79ccSDebojyoti Ghosh             for (k=0; k<bs2; k++) arrt[k] *= v[j];
55849bd79ccSDebojyoti Ghosh             arrt += bs2;
55949bd79ccSDebojyoti Ghosh           }
56049bd79ccSDebojyoti Ghosh           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
56149bd79ccSDebojyoti Ghosh         } else if (kaij->isTI) {
56249bd79ccSDebojyoti Ghosh           for (j=0; j<nz; j++) {
56349bd79ccSDebojyoti Ghosh             ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
56449bd79ccSDebojyoti Ghosh             for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
56549bd79ccSDebojyoti Ghosh             arrt += bs2;
56649bd79ccSDebojyoti Ghosh           }
56749bd79ccSDebojyoti Ghosh           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
56849bd79ccSDebojyoti Ghosh         }
56949bd79ccSDebojyoti Ghosh 
57049bd79ccSDebojyoti Ghosh         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y);
57149bd79ccSDebojyoti Ghosh         for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j);
57249bd79ccSDebojyoti Ghosh 
57349bd79ccSDebojyoti Ghosh         idiag += bs2;
57449bd79ccSDebojyoti Ghosh         i2    += bs;
57549bd79ccSDebojyoti Ghosh       }
57649bd79ccSDebojyoti Ghosh       xb = t;
57749bd79ccSDebojyoti Ghosh     }
57849bd79ccSDebojyoti Ghosh     else xb = b;
57949bd79ccSDebojyoti Ghosh     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
58049bd79ccSDebojyoti Ghosh       idiag = kaij->ibdiag+bs2*(m-1);
58149bd79ccSDebojyoti Ghosh       i2    = bs * (m-1);
58249bd79ccSDebojyoti Ghosh       if (xb == b) {
58349bd79ccSDebojyoti Ghosh         for (i=m-1; i>=0; i--) {
58449bd79ccSDebojyoti Ghosh           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
58549bd79ccSDebojyoti Ghosh 
58649bd79ccSDebojyoti Ghosh           v  = aa + ai[i];
58749bd79ccSDebojyoti Ghosh           vi = aj + ai[i];
58849bd79ccSDebojyoti Ghosh           nz = diag[i] - ai[i];
58949bd79ccSDebojyoti Ghosh           workt = work;
59049bd79ccSDebojyoti Ghosh           for (j=0; j<nz; j++) {
59149bd79ccSDebojyoti Ghosh             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
59249bd79ccSDebojyoti Ghosh             workt += bs;
59349bd79ccSDebojyoti Ghosh           }
59449bd79ccSDebojyoti Ghosh           arrt = arr;
59549bd79ccSDebojyoti Ghosh           if (T) {
59649bd79ccSDebojyoti Ghosh             for (j=0; j<nz; j++) {
59749bd79ccSDebojyoti Ghosh               ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
59849bd79ccSDebojyoti Ghosh               for (k=0; k<bs2; k++) arrt[k] *= v[j];
59949bd79ccSDebojyoti Ghosh               arrt += bs2;
60049bd79ccSDebojyoti Ghosh             }
60149bd79ccSDebojyoti Ghosh             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
60249bd79ccSDebojyoti Ghosh           } else if (kaij->isTI) {
60349bd79ccSDebojyoti Ghosh             for (j=0; j<nz; j++) {
60449bd79ccSDebojyoti Ghosh               ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
60549bd79ccSDebojyoti Ghosh               for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
60649bd79ccSDebojyoti Ghosh               arrt += bs2;
60749bd79ccSDebojyoti Ghosh             }
60849bd79ccSDebojyoti Ghosh             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
60949bd79ccSDebojyoti Ghosh           }
61049bd79ccSDebojyoti Ghosh 
61149bd79ccSDebojyoti Ghosh           v  = aa + diag[i] + 1;
61249bd79ccSDebojyoti Ghosh           vi = aj + diag[i] + 1;
61349bd79ccSDebojyoti Ghosh           nz = ai[i+1] - diag[i] - 1;
61449bd79ccSDebojyoti Ghosh           workt = work;
61549bd79ccSDebojyoti Ghosh           for (j=0; j<nz; j++) {
61649bd79ccSDebojyoti Ghosh             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
61749bd79ccSDebojyoti Ghosh             workt += bs;
61849bd79ccSDebojyoti Ghosh           }
61949bd79ccSDebojyoti Ghosh           arrt = arr;
62049bd79ccSDebojyoti Ghosh           if (T) {
62149bd79ccSDebojyoti Ghosh             for (j=0; j<nz; j++) {
62249bd79ccSDebojyoti Ghosh               ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
62349bd79ccSDebojyoti Ghosh               for (k=0; k<bs2; k++) arrt[k] *= v[j];
62449bd79ccSDebojyoti Ghosh               arrt += bs2;
62549bd79ccSDebojyoti Ghosh             }
62649bd79ccSDebojyoti Ghosh             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
62749bd79ccSDebojyoti Ghosh           } else if (kaij->isTI) {
62849bd79ccSDebojyoti Ghosh             for (j=0; j<nz; j++) {
62949bd79ccSDebojyoti Ghosh               ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
63049bd79ccSDebojyoti Ghosh               for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
63149bd79ccSDebojyoti Ghosh               arrt += bs2;
63249bd79ccSDebojyoti Ghosh             }
63349bd79ccSDebojyoti Ghosh             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
63449bd79ccSDebojyoti Ghosh           }
63549bd79ccSDebojyoti Ghosh 
63649bd79ccSDebojyoti Ghosh           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y);
63749bd79ccSDebojyoti Ghosh           for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j);
63849bd79ccSDebojyoti Ghosh         }
63949bd79ccSDebojyoti Ghosh       } else {
64049bd79ccSDebojyoti Ghosh         for (i=m-1; i>=0; i--) {
64149bd79ccSDebojyoti Ghosh           ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
64249bd79ccSDebojyoti Ghosh           v  = aa + diag[i] + 1;
64349bd79ccSDebojyoti Ghosh           vi = aj + diag[i] + 1;
64449bd79ccSDebojyoti Ghosh           nz = ai[i+1] - diag[i] - 1;
64549bd79ccSDebojyoti Ghosh           workt = work;
64649bd79ccSDebojyoti Ghosh           for (j=0; j<nz; j++) {
64749bd79ccSDebojyoti Ghosh             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
64849bd79ccSDebojyoti Ghosh             workt += bs;
64949bd79ccSDebojyoti Ghosh           }
65049bd79ccSDebojyoti Ghosh           arrt = arr;
65149bd79ccSDebojyoti Ghosh           if (T) {
65249bd79ccSDebojyoti Ghosh             for (j=0; j<nz; j++) {
65349bd79ccSDebojyoti Ghosh               ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
65449bd79ccSDebojyoti Ghosh               for (k=0; k<bs2; k++) arrt[k] *= v[j];
65549bd79ccSDebojyoti Ghosh               arrt += bs2;
65649bd79ccSDebojyoti Ghosh             }
65749bd79ccSDebojyoti Ghosh             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
65849bd79ccSDebojyoti Ghosh           } else if (kaij->isTI) {
65949bd79ccSDebojyoti Ghosh             for (j=0; j<nz; j++) {
66049bd79ccSDebojyoti Ghosh               ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
66149bd79ccSDebojyoti Ghosh               for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
66249bd79ccSDebojyoti Ghosh               arrt += bs2;
66349bd79ccSDebojyoti Ghosh             }
66449bd79ccSDebojyoti Ghosh             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
66549bd79ccSDebojyoti Ghosh           }
66649bd79ccSDebojyoti Ghosh           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y);
66749bd79ccSDebojyoti Ghosh           for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j);
66849bd79ccSDebojyoti Ghosh         }
66949bd79ccSDebojyoti Ghosh         idiag -= bs2;
67049bd79ccSDebojyoti Ghosh         i2    -= bs;
67149bd79ccSDebojyoti Ghosh       }
67249bd79ccSDebojyoti Ghosh       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
67349bd79ccSDebojyoti Ghosh     }
67449bd79ccSDebojyoti Ghosh   }
67549bd79ccSDebojyoti Ghosh 
67649bd79ccSDebojyoti Ghosh   ierr = VecRestoreArray(xx,&x);    CHKERRQ(ierr);
67749bd79ccSDebojyoti Ghosh   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
67849bd79ccSDebojyoti Ghosh 
67949bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
68049bd79ccSDebojyoti Ghosh }
68149bd79ccSDebojyoti Ghosh 
68249bd79ccSDebojyoti Ghosh /*===================================================================================*/
68349bd79ccSDebojyoti Ghosh 
68449bd79ccSDebojyoti Ghosh PetscErrorCode KAIJMultAdd_MPI(Mat A,Vec xx,Vec yy,Vec zz)
68549bd79ccSDebojyoti Ghosh {
68649bd79ccSDebojyoti Ghosh   Mat_MPIKAIJ    *b = (Mat_MPIKAIJ*)A->data;
68749bd79ccSDebojyoti Ghosh   PetscErrorCode ierr;
68849bd79ccSDebojyoti Ghosh 
68949bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
69049bd79ccSDebojyoti Ghosh   if (!yy) {
69149bd79ccSDebojyoti Ghosh     ierr = VecSet(zz,0.0);CHKERRQ(ierr);
69249bd79ccSDebojyoti Ghosh   } else {
69349bd79ccSDebojyoti Ghosh     ierr = VecCopy(yy,zz);CHKERRQ(ierr);
69449bd79ccSDebojyoti Ghosh   }
69549bd79ccSDebojyoti Ghosh   /* start the scatter */
69649bd79ccSDebojyoti Ghosh   ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
69749bd79ccSDebojyoti Ghosh   ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,zz,zz);CHKERRQ(ierr);
69849bd79ccSDebojyoti Ghosh   ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
69949bd79ccSDebojyoti Ghosh   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr);
70049bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
70149bd79ccSDebojyoti Ghosh }
70249bd79ccSDebojyoti Ghosh 
70349bd79ccSDebojyoti Ghosh PetscErrorCode MatMult_MPIKAIJ_dof(Mat A,Vec xx,Vec yy)
70449bd79ccSDebojyoti Ghosh {
70549bd79ccSDebojyoti Ghosh   PetscErrorCode ierr;
70649bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
70749bd79ccSDebojyoti Ghosh   ierr = KAIJMultAdd_MPI(A,xx,PETSC_NULL,yy);CHKERRQ(ierr);
70849bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
70949bd79ccSDebojyoti Ghosh }
71049bd79ccSDebojyoti Ghosh 
71149bd79ccSDebojyoti Ghosh PetscErrorCode MatMultAdd_MPIKAIJ_dof(Mat A,Vec xx,Vec yy, Vec zz)
71249bd79ccSDebojyoti Ghosh {
71349bd79ccSDebojyoti Ghosh   PetscErrorCode ierr;
71449bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
71549bd79ccSDebojyoti Ghosh   ierr = KAIJMultAdd_MPI(A,xx,yy,zz);CHKERRQ(ierr);
71649bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
71749bd79ccSDebojyoti Ghosh }
71849bd79ccSDebojyoti Ghosh 
71949bd79ccSDebojyoti Ghosh PetscErrorCode MatInvertBlockDiagonal_MPIKAIJ_dof(Mat A,const PetscScalar **values)
72049bd79ccSDebojyoti Ghosh {
72149bd79ccSDebojyoti Ghosh   Mat_MPIKAIJ     *b = (Mat_MPIKAIJ*)A->data;
72249bd79ccSDebojyoti Ghosh   PetscErrorCode  ierr;
72349bd79ccSDebojyoti Ghosh 
72449bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
72549bd79ccSDebojyoti Ghosh   ierr = (*b->AIJ->ops->invertblockdiagonal)(b->AIJ,values);CHKERRQ(ierr);
72649bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
72749bd79ccSDebojyoti Ghosh }
72849bd79ccSDebojyoti Ghosh 
72949bd79ccSDebojyoti Ghosh /* ----------------------------------------------------------------*/
73049bd79ccSDebojyoti Ghosh 
73149bd79ccSDebojyoti Ghosh PetscErrorCode MatGetRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values)
73249bd79ccSDebojyoti Ghosh {
73349bd79ccSDebojyoti Ghosh   Mat_SeqKAIJ     *b    = (Mat_SeqKAIJ*) A->data;
73449bd79ccSDebojyoti Ghosh   PetscErrorCode  ierr,diag;
73549bd79ccSDebojyoti Ghosh   PetscInt        nzaij,nz,*colsaij,*idx,i,j,p=b->p,q=b->q,r=row/p,s=row%p,c;
73649bd79ccSDebojyoti Ghosh   PetscScalar     *vaij,*v,*S=b->S,*T=b->T;
73749bd79ccSDebojyoti Ghosh 
73849bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
73949bd79ccSDebojyoti Ghosh   if (b->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
74049bd79ccSDebojyoti Ghosh   b->getrowactive = PETSC_TRUE;
74149bd79ccSDebojyoti Ghosh   if (row < 0 || row >= A->rmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range",row);
74249bd79ccSDebojyoti Ghosh 
74349bd79ccSDebojyoti Ghosh   if ((!S) && (!T) && (!b->isTI)) {
74449bd79ccSDebojyoti Ghosh     if (ncols)    *ncols  = 0;
74549bd79ccSDebojyoti Ghosh     if (cols)     *cols   = NULL;
74649bd79ccSDebojyoti Ghosh     if (values)   *values = NULL;
74749bd79ccSDebojyoti Ghosh     PetscFunctionReturn(0);
74849bd79ccSDebojyoti Ghosh   }
74949bd79ccSDebojyoti Ghosh 
75049bd79ccSDebojyoti Ghosh   if (T || b->isTI) {
75149bd79ccSDebojyoti Ghosh     ierr  = MatGetRow_SeqAIJ(b->AIJ,r,&nzaij,&colsaij,&vaij);CHKERRQ(ierr);
75249bd79ccSDebojyoti Ghosh     diag  = PETSC_FALSE;
75349bd79ccSDebojyoti Ghosh     c     = nzaij;
75449bd79ccSDebojyoti Ghosh     for (i=0; i<nzaij; i++) {
75549bd79ccSDebojyoti Ghosh       /* check if this row contains a diagonal entry */
75649bd79ccSDebojyoti Ghosh       if (colsaij[i] == r) {
75749bd79ccSDebojyoti Ghosh         diag = PETSC_TRUE;
75849bd79ccSDebojyoti Ghosh         c = i;
75949bd79ccSDebojyoti Ghosh       }
76049bd79ccSDebojyoti Ghosh     }
76149bd79ccSDebojyoti Ghosh   } else nzaij = c = 0;
76249bd79ccSDebojyoti Ghosh 
76349bd79ccSDebojyoti Ghosh   /* calculate size of row */
76449bd79ccSDebojyoti Ghosh   nz = 0;
76549bd79ccSDebojyoti Ghosh   if (S)            nz += q;
76649bd79ccSDebojyoti Ghosh   if (T || b->isTI) nz += (diag && S ? (nzaij-1)*q : nzaij*q);
76749bd79ccSDebojyoti Ghosh 
76849bd79ccSDebojyoti Ghosh   if (cols || values) {
76949bd79ccSDebojyoti Ghosh     ierr = PetscMalloc2(nz,&idx,nz,&v);CHKERRQ(ierr);
77049bd79ccSDebojyoti Ghosh     if (b->isTI) {
77149bd79ccSDebojyoti Ghosh       for (i=0; i<nzaij; i++) {
77249bd79ccSDebojyoti Ghosh         for (j=0; j<q; j++) {
77349bd79ccSDebojyoti Ghosh           idx[i*q+j] = colsaij[i]*q+j;
77449bd79ccSDebojyoti Ghosh           v[i*q+j]   = (j==s ? vaij[i] : 0);
77549bd79ccSDebojyoti Ghosh         }
77649bd79ccSDebojyoti Ghosh       }
77749bd79ccSDebojyoti Ghosh     } else if (T) {
77849bd79ccSDebojyoti Ghosh       for (i=0; i<nzaij; i++) {
77949bd79ccSDebojyoti Ghosh         for (j=0; j<q; j++) {
78049bd79ccSDebojyoti Ghosh           idx[i*q+j] = colsaij[i]*q+j;
78149bd79ccSDebojyoti Ghosh           v[i*q+j]   = vaij[i]*T[s+j*p];
78249bd79ccSDebojyoti Ghosh         }
78349bd79ccSDebojyoti Ghosh       }
78449bd79ccSDebojyoti Ghosh     }
78549bd79ccSDebojyoti Ghosh     if (S) {
78649bd79ccSDebojyoti Ghosh       for (j=0; j<q; j++) {
78749bd79ccSDebojyoti Ghosh         idx[c*q+j] = r*q+j;
78849bd79ccSDebojyoti Ghosh         v[c*q+j]  += S[s+j*p];
78949bd79ccSDebojyoti Ghosh       }
79049bd79ccSDebojyoti Ghosh     }
79149bd79ccSDebojyoti Ghosh   }
79249bd79ccSDebojyoti Ghosh 
79349bd79ccSDebojyoti Ghosh   if (ncols)    *ncols  = nz;
79449bd79ccSDebojyoti Ghosh   if (cols)     *cols   = idx;
79549bd79ccSDebojyoti Ghosh   if (values)   *values = v;
79649bd79ccSDebojyoti Ghosh 
79749bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
79849bd79ccSDebojyoti Ghosh }
79949bd79ccSDebojyoti Ghosh 
80049bd79ccSDebojyoti Ghosh PetscErrorCode MatRestoreRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
80149bd79ccSDebojyoti Ghosh {
80249bd79ccSDebojyoti Ghosh   PetscErrorCode ierr;
80349bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
80449bd79ccSDebojyoti Ghosh   ierr = PetscFree2(*idx,*v);CHKERRQ(ierr);
80549bd79ccSDebojyoti Ghosh   ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE;
80649bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
80749bd79ccSDebojyoti Ghosh }
80849bd79ccSDebojyoti Ghosh 
80949bd79ccSDebojyoti Ghosh PetscErrorCode MatGetRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values)
81049bd79ccSDebojyoti Ghosh {
81149bd79ccSDebojyoti Ghosh   Mat_MPIKAIJ     *b      = (Mat_MPIKAIJ*) A->data;
81249bd79ccSDebojyoti Ghosh   Mat             MatAIJ  = ((Mat_SeqKAIJ*)b->AIJ->data)->AIJ;
81349bd79ccSDebojyoti Ghosh   Mat             MatOAIJ = ((Mat_SeqKAIJ*)b->OAIJ->data)->AIJ;
81449bd79ccSDebojyoti Ghosh   Mat             AIJ     = b->A;
81549bd79ccSDebojyoti Ghosh   PetscErrorCode  ierr;
81649bd79ccSDebojyoti Ghosh   const PetscInt  rstart=A->rmap->rstart,rend=A->rmap->rend,p=b->p,q=b->q,*garray;
81749bd79ccSDebojyoti Ghosh   PetscInt        nz,*idx,ncolsaij,ncolsoaij,*colsaij,*colsoaij,r,s,c,i,j,lrow;
81849bd79ccSDebojyoti Ghosh   PetscScalar     *v,*vals,*ovals,*S=b->S,*T=b->T;
81949bd79ccSDebojyoti Ghosh   PetscBool       diag;
82049bd79ccSDebojyoti Ghosh 
82149bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
82249bd79ccSDebojyoti Ghosh   if (b->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
82349bd79ccSDebojyoti Ghosh   b->getrowactive = PETSC_TRUE;
82449bd79ccSDebojyoti Ghosh   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
82549bd79ccSDebojyoti Ghosh   lrow = row - rstart;
82649bd79ccSDebojyoti Ghosh 
82749bd79ccSDebojyoti Ghosh   if ((!S) && (!T) && (!b->isTI)) {
82849bd79ccSDebojyoti Ghosh     if (ncols)    *ncols  = 0;
82949bd79ccSDebojyoti Ghosh     if (cols)     *cols   = NULL;
83049bd79ccSDebojyoti Ghosh     if (values)   *values = NULL;
83149bd79ccSDebojyoti Ghosh     PetscFunctionReturn(0);
83249bd79ccSDebojyoti Ghosh   }
83349bd79ccSDebojyoti Ghosh 
83449bd79ccSDebojyoti Ghosh   r = lrow/p;
83549bd79ccSDebojyoti Ghosh   s = lrow%p;
83649bd79ccSDebojyoti Ghosh 
83749bd79ccSDebojyoti Ghosh   if (T || b->isTI) {
83849bd79ccSDebojyoti Ghosh     ierr = MatMPIAIJGetSeqAIJ(AIJ,NULL,NULL,&garray);
83949bd79ccSDebojyoti Ghosh     ierr = MatGetRow_SeqAIJ(MatAIJ,lrow/p,&ncolsaij,&colsaij,&vals);CHKERRQ(ierr);
84049bd79ccSDebojyoti Ghosh     ierr = MatGetRow_SeqAIJ(MatOAIJ,lrow/p,&ncolsoaij,&colsoaij,&ovals);CHKERRQ(ierr);
84149bd79ccSDebojyoti Ghosh 
84249bd79ccSDebojyoti Ghosh     diag  = PETSC_FALSE;
84349bd79ccSDebojyoti Ghosh     c     = ncolsaij + ncolsoaij;
84449bd79ccSDebojyoti Ghosh     for (i=0; i<ncolsaij; i++) {
84549bd79ccSDebojyoti Ghosh       /* check if this row contains a diagonal entry */
84649bd79ccSDebojyoti Ghosh       if (colsaij[i] == r) {
84749bd79ccSDebojyoti Ghosh         diag = PETSC_TRUE;
84849bd79ccSDebojyoti Ghosh         c = i;
84949bd79ccSDebojyoti Ghosh       }
85049bd79ccSDebojyoti Ghosh     }
85149bd79ccSDebojyoti Ghosh   } else c = 0;
85249bd79ccSDebojyoti Ghosh 
85349bd79ccSDebojyoti Ghosh   /* calculate size of row */
85449bd79ccSDebojyoti Ghosh   nz = 0;
85549bd79ccSDebojyoti Ghosh   if (S)            nz += q;
85649bd79ccSDebojyoti Ghosh   if (T || b->isTI) nz += (diag && S ? (ncolsaij+ncolsoaij-1)*q : (ncolsaij+ncolsoaij)*q);
85749bd79ccSDebojyoti Ghosh 
85849bd79ccSDebojyoti Ghosh   if (cols || values) {
85949bd79ccSDebojyoti Ghosh     ierr = PetscMalloc2(nz,&idx,nz,&v);CHKERRQ(ierr);
86049bd79ccSDebojyoti Ghosh     if (b->isTI) {
86149bd79ccSDebojyoti Ghosh       for (i=0; i<ncolsaij; i++) {
86249bd79ccSDebojyoti Ghosh         for (j=0; j<q; j++) {
86349bd79ccSDebojyoti Ghosh           idx[i*q+j] = (colsaij[i]+rstart/p)*q+j;
86449bd79ccSDebojyoti Ghosh           v[i*q+j]   = (j==s ? vals[i] : 0.0);
86549bd79ccSDebojyoti Ghosh         }
86649bd79ccSDebojyoti Ghosh       }
86749bd79ccSDebojyoti Ghosh       for (i=0; i<ncolsoaij; i++) {
86849bd79ccSDebojyoti Ghosh         for (j=0; j<q; j++) {
86949bd79ccSDebojyoti Ghosh           idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j;
87049bd79ccSDebojyoti Ghosh           v[(i+ncolsaij)*q+j]   = (j==s ? ovals[i]: 0.0);
87149bd79ccSDebojyoti Ghosh         }
87249bd79ccSDebojyoti Ghosh       }
87349bd79ccSDebojyoti Ghosh     } else if (T) {
87449bd79ccSDebojyoti Ghosh       for (i=0; i<ncolsaij; i++) {
87549bd79ccSDebojyoti Ghosh         for (j=0; j<q; j++) {
87649bd79ccSDebojyoti Ghosh           idx[i*q+j] = (colsaij[i]+rstart/p)*q+j;
87749bd79ccSDebojyoti Ghosh           v[i*q+j]   = vals[i]*T[s+j*p];
87849bd79ccSDebojyoti Ghosh         }
87949bd79ccSDebojyoti Ghosh       }
88049bd79ccSDebojyoti Ghosh       for (i=0; i<ncolsoaij; i++) {
88149bd79ccSDebojyoti Ghosh         for (j=0; j<q; j++) {
88249bd79ccSDebojyoti Ghosh           idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j;
88349bd79ccSDebojyoti Ghosh           v[(i+ncolsaij)*q+j]   = ovals[i]*T[s+j*p];
88449bd79ccSDebojyoti Ghosh         }
88549bd79ccSDebojyoti Ghosh       }
88649bd79ccSDebojyoti Ghosh     }
88749bd79ccSDebojyoti Ghosh     if (S) {
88849bd79ccSDebojyoti Ghosh       for (j=0; j<q; j++) {
88949bd79ccSDebojyoti Ghosh         idx[c*q+j] = (r+rstart/p)*q+j;
89049bd79ccSDebojyoti Ghosh         v[c*q+j]  += S[s+j*p];
89149bd79ccSDebojyoti Ghosh       }
89249bd79ccSDebojyoti Ghosh     }
89349bd79ccSDebojyoti Ghosh   }
89449bd79ccSDebojyoti Ghosh 
89549bd79ccSDebojyoti Ghosh   if (ncols)  *ncols  = nz;
89649bd79ccSDebojyoti Ghosh   if (cols)   *cols   = idx;
89749bd79ccSDebojyoti Ghosh   if (values) *values = v;
89849bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
89949bd79ccSDebojyoti Ghosh }
90049bd79ccSDebojyoti Ghosh 
90149bd79ccSDebojyoti Ghosh PetscErrorCode MatRestoreRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
90249bd79ccSDebojyoti Ghosh {
90349bd79ccSDebojyoti Ghosh   PetscErrorCode ierr;
90449bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
90549bd79ccSDebojyoti Ghosh   ierr = PetscFree2(*idx,*v);CHKERRQ(ierr);
90649bd79ccSDebojyoti Ghosh   ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE;
90749bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
90849bd79ccSDebojyoti Ghosh }
90949bd79ccSDebojyoti Ghosh 
91049bd79ccSDebojyoti Ghosh PetscErrorCode  MatCreateSubMatrix_KAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat)
91149bd79ccSDebojyoti Ghosh {
91249bd79ccSDebojyoti Ghosh   PetscErrorCode ierr;
91349bd79ccSDebojyoti Ghosh   Mat            A;
91449bd79ccSDebojyoti Ghosh 
91549bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
91649bd79ccSDebojyoti Ghosh   ierr = MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr);
91749bd79ccSDebojyoti Ghosh   ierr = MatCreateSubMatrix(A,isrow,iscol,cll,newmat);CHKERRQ(ierr);
91849bd79ccSDebojyoti Ghosh   ierr = MatDestroy(&A);CHKERRQ(ierr);
91949bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
92049bd79ccSDebojyoti Ghosh }
92149bd79ccSDebojyoti Ghosh 
92249bd79ccSDebojyoti Ghosh /* ---------------------------------------------------------------------------------- */
92349bd79ccSDebojyoti Ghosh /*@C
92449bd79ccSDebojyoti Ghosh   MatCreateKAIJ - Creates a matrix type to be used for matrices of the following form:
92549bd79ccSDebojyoti Ghosh 
92649bd79ccSDebojyoti Ghosh     [I \otimes S + A \otimes T]
92749bd79ccSDebojyoti Ghosh 
92849bd79ccSDebojyoti Ghosh   where
92949bd79ccSDebojyoti Ghosh     S is a dense (p \times q) matrix
93049bd79ccSDebojyoti Ghosh     T is a dense (p \times q) matrix
93149bd79ccSDebojyoti Ghosh     A is an AIJ  (n \times n) matrix
93249bd79ccSDebojyoti Ghosh     I is the identity matrix
93349bd79ccSDebojyoti Ghosh   The resulting matrix is (np \times nq)
93449bd79ccSDebojyoti Ghosh 
93549bd79ccSDebojyoti Ghosh   The matrix type is based on MATSEQAIJ for a sequential matrix A, and MATMPIAIJ for a distributed matrix A.
93649bd79ccSDebojyoti Ghosh   S is always stored independently on all processes as a PetscScalar array in column-major format.
93749bd79ccSDebojyoti Ghosh 
93849bd79ccSDebojyoti Ghosh   Collective
93949bd79ccSDebojyoti Ghosh 
94049bd79ccSDebojyoti Ghosh   Input Parameters:
94149bd79ccSDebojyoti Ghosh + A - the AIJ matrix
94249bd79ccSDebojyoti Ghosh . S - the S matrix, stored as a PetscScalar array (column-major)
94349bd79ccSDebojyoti Ghosh . T - the T matrix, stored as a PetscScalar array (column-major)
94449bd79ccSDebojyoti Ghosh . p - number of rows in S and T
94549bd79ccSDebojyoti Ghosh - q - number of columns in S and T
94649bd79ccSDebojyoti Ghosh 
94749bd79ccSDebojyoti Ghosh   Output Parameter:
94849bd79ccSDebojyoti Ghosh . kaij - the new KAIJ matrix
94949bd79ccSDebojyoti Ghosh 
95049bd79ccSDebojyoti Ghosh   Operations provided:
95149bd79ccSDebojyoti Ghosh + MatMult
95249bd79ccSDebojyoti Ghosh . MatMultAdd
95349bd79ccSDebojyoti Ghosh . MatInvertBlockDiagonal
95449bd79ccSDebojyoti Ghosh - MatView
95549bd79ccSDebojyoti Ghosh 
95649bd79ccSDebojyoti Ghosh   Level: advanced
95749bd79ccSDebojyoti Ghosh 
95849bd79ccSDebojyoti Ghosh .seealso: MatKAIJGetAIJ(), MATKAIJ
95949bd79ccSDebojyoti Ghosh @*/
96049bd79ccSDebojyoti Ghosh PetscErrorCode  MatCreateKAIJ(Mat A,PetscInt p,PetscInt q,const PetscScalar S[],const PetscScalar T[],Mat *kaij)
96149bd79ccSDebojyoti Ghosh {
96249bd79ccSDebojyoti Ghosh   PetscErrorCode ierr;
96349bd79ccSDebojyoti Ghosh   PetscMPIInt    size;
96449bd79ccSDebojyoti Ghosh   PetscInt       n,i,j;
96549bd79ccSDebojyoti Ghosh   Mat            B;
96649bd79ccSDebojyoti Ghosh   PetscBool      isTI = PETSC_FALSE;
96749bd79ccSDebojyoti Ghosh 
96849bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
96949bd79ccSDebojyoti Ghosh 
97049bd79ccSDebojyoti Ghosh   /* check if T is an identity matrix */
97149bd79ccSDebojyoti Ghosh   if (T && (p == q)) {
97249bd79ccSDebojyoti Ghosh     isTI = PETSC_TRUE;
97349bd79ccSDebojyoti Ghosh     for (i=0; i<p; i++) {
97449bd79ccSDebojyoti Ghosh       for (j=0; j<q; j++) {
97549bd79ccSDebojyoti Ghosh         if (i == j) {
97649bd79ccSDebojyoti Ghosh           /* diagonal term must be 1 */
97749bd79ccSDebojyoti Ghosh           if (T[i+j*p] != 1.0) isTI = PETSC_FALSE;
97849bd79ccSDebojyoti Ghosh         } else {
97949bd79ccSDebojyoti Ghosh           /* off-diagonal term must be 0 */
98049bd79ccSDebojyoti Ghosh           if (T[i+j*p] != 0.0) isTI = PETSC_FALSE;
98149bd79ccSDebojyoti Ghosh         }
98249bd79ccSDebojyoti Ghosh       }
98349bd79ccSDebojyoti Ghosh     }
98449bd79ccSDebojyoti Ghosh   }
98549bd79ccSDebojyoti Ghosh 
98649bd79ccSDebojyoti Ghosh   ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
98749bd79ccSDebojyoti Ghosh 
98849bd79ccSDebojyoti Ghosh   ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
98949bd79ccSDebojyoti Ghosh   ierr = MatSetSizes(B,p*A->rmap->n,q*A->cmap->n,p*A->rmap->N,q*A->cmap->N);CHKERRQ(ierr);
99049bd79ccSDebojyoti Ghosh   ierr = PetscLayoutSetBlockSize(B->rmap,p);CHKERRQ(ierr);
99149bd79ccSDebojyoti Ghosh   ierr = PetscLayoutSetBlockSize(B->cmap,q);CHKERRQ(ierr);
99249bd79ccSDebojyoti Ghosh   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
99349bd79ccSDebojyoti Ghosh   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
99449bd79ccSDebojyoti Ghosh 
99549bd79ccSDebojyoti Ghosh   B->assembled = PETSC_TRUE;
99649bd79ccSDebojyoti Ghosh   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
99749bd79ccSDebojyoti Ghosh 
99849bd79ccSDebojyoti Ghosh   if (size == 1) {
99949bd79ccSDebojyoti Ghosh     Mat_SeqKAIJ *b;
100049bd79ccSDebojyoti Ghosh 
100149bd79ccSDebojyoti Ghosh     ierr = MatSetType(B,MATSEQKAIJ);CHKERRQ(ierr);
100249bd79ccSDebojyoti Ghosh 
100349bd79ccSDebojyoti Ghosh     B->ops->setup   = NULL;
100449bd79ccSDebojyoti Ghosh     B->ops->destroy = MatDestroy_SeqKAIJ;
100549bd79ccSDebojyoti Ghosh     B->ops->view    = MatView_SeqKAIJ;
100649bd79ccSDebojyoti Ghosh     b               = (Mat_SeqKAIJ*)B->data;
100749bd79ccSDebojyoti Ghosh     b->p            = p;
100849bd79ccSDebojyoti Ghosh     b->q            = q;
100949bd79ccSDebojyoti Ghosh     b->AIJ          = A;
101049bd79ccSDebojyoti Ghosh     b->isTI         = isTI;
101149bd79ccSDebojyoti Ghosh     if (S) {
101249bd79ccSDebojyoti Ghosh       ierr = PetscMalloc(p*q*sizeof(PetscScalar),&b->S);CHKERRQ(ierr);
101349bd79ccSDebojyoti Ghosh       ierr = PetscMemcpy(b->S,S,p*q*sizeof(PetscScalar));CHKERRQ(ierr);
101449bd79ccSDebojyoti Ghosh     } else  b->S = NULL;
101549bd79ccSDebojyoti Ghosh     if (T && (!isTI)) {
101649bd79ccSDebojyoti Ghosh       ierr = PetscMalloc(p*q*sizeof(PetscScalar),&b->T);CHKERRQ(ierr);
101749bd79ccSDebojyoti Ghosh       ierr = PetscMemcpy(b->T,T,p*q*sizeof(PetscScalar));CHKERRQ(ierr);
101849bd79ccSDebojyoti Ghosh     } else b->T = NULL;
101949bd79ccSDebojyoti Ghosh 
102049bd79ccSDebojyoti Ghosh     B->ops->mult                = MatMult_SeqKAIJ_N;
102149bd79ccSDebojyoti Ghosh     B->ops->multadd             = MatMultAdd_SeqKAIJ_N;
102249bd79ccSDebojyoti Ghosh     B->ops->invertblockdiagonal = MatInvertBlockDiagonal_SeqKAIJ_N;
102349bd79ccSDebojyoti Ghosh     B->ops->getrow              = MatGetRow_SeqKAIJ;
102449bd79ccSDebojyoti Ghosh     B->ops->restorerow          = MatRestoreRow_SeqKAIJ;
102549bd79ccSDebojyoti Ghosh     B->ops->sor                 = MatSOR_SeqKAIJ;
102649bd79ccSDebojyoti Ghosh 
102749bd79ccSDebojyoti Ghosh     /*
102849bd79ccSDebojyoti Ghosh     This is commented out since MATKAIJ will use the basic MatConvert (which uses MatGetRow()).
102949bd79ccSDebojyoti Ghosh     ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqkaij_seqaij_C",MatConvert_SeqKAIJ_SeqAIJ);CHKERRQ(ierr);
103049bd79ccSDebojyoti Ghosh     */
103149bd79ccSDebojyoti Ghosh 
103249bd79ccSDebojyoti Ghosh   } else {
103349bd79ccSDebojyoti Ghosh     Mat_MPIAIJ  *mpiaij = (Mat_MPIAIJ*)A->data;
103449bd79ccSDebojyoti Ghosh     Mat_MPIKAIJ *b;
103549bd79ccSDebojyoti Ghosh     IS          from,to;
103649bd79ccSDebojyoti Ghosh     Vec         gvec;
103749bd79ccSDebojyoti Ghosh 
103849bd79ccSDebojyoti Ghosh     ierr = MatSetType(B,MATMPIKAIJ);CHKERRQ(ierr);
103949bd79ccSDebojyoti Ghosh 
104049bd79ccSDebojyoti Ghosh     B->ops->setup   = NULL;
104149bd79ccSDebojyoti Ghosh     B->ops->destroy = MatDestroy_MPIKAIJ;
104249bd79ccSDebojyoti Ghosh     B->ops->view    = MatView_MPIKAIJ;
104349bd79ccSDebojyoti Ghosh 
104449bd79ccSDebojyoti Ghosh     b       = (Mat_MPIKAIJ*)B->data;
104549bd79ccSDebojyoti Ghosh     b->p    = p;
104649bd79ccSDebojyoti Ghosh     b->q    = q;
104749bd79ccSDebojyoti Ghosh     b->A    = A;
104849bd79ccSDebojyoti Ghosh     b->isTI = isTI;
104949bd79ccSDebojyoti Ghosh     if (S) {
105049bd79ccSDebojyoti Ghosh       ierr = PetscMalloc(p*q*sizeof(PetscScalar),&b->S);CHKERRQ(ierr);
105149bd79ccSDebojyoti Ghosh       ierr = PetscMemcpy(b->S,S,p*q*sizeof(PetscScalar));CHKERRQ(ierr);
105249bd79ccSDebojyoti Ghosh     } else  b->S = NULL;
105349bd79ccSDebojyoti Ghosh     if (T &&(!isTI)) {
105449bd79ccSDebojyoti Ghosh       ierr = PetscMalloc(p*q*sizeof(PetscScalar),&b->T);CHKERRQ(ierr);
105549bd79ccSDebojyoti Ghosh       ierr = PetscMemcpy(b->T,T,p*q*sizeof(PetscScalar));CHKERRQ(ierr);
105649bd79ccSDebojyoti Ghosh     } else b->T = NULL;
105749bd79ccSDebojyoti Ghosh 
105849bd79ccSDebojyoti Ghosh     ierr = MatCreateKAIJ(mpiaij->A,p,q,S   ,T,&b->AIJ);CHKERRQ(ierr);
105949bd79ccSDebojyoti Ghosh     ierr = MatCreateKAIJ(mpiaij->B,p,q,NULL,T,&b->OAIJ);CHKERRQ(ierr);
106049bd79ccSDebojyoti Ghosh 
106149bd79ccSDebojyoti Ghosh     ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr);
106249bd79ccSDebojyoti Ghosh     ierr = VecCreate(PETSC_COMM_SELF,&b->w);CHKERRQ(ierr);
106349bd79ccSDebojyoti Ghosh     ierr = VecSetSizes(b->w,n*q,n*q);CHKERRQ(ierr);
106449bd79ccSDebojyoti Ghosh     ierr = VecSetBlockSize(b->w,q);CHKERRQ(ierr);
106549bd79ccSDebojyoti Ghosh     ierr = VecSetType(b->w,VECSEQ);CHKERRQ(ierr);
106649bd79ccSDebojyoti Ghosh 
106749bd79ccSDebojyoti Ghosh     /* create two temporary Index sets for build scatter gather */
106849bd79ccSDebojyoti Ghosh     ierr = ISCreateBlock(PetscObjectComm((PetscObject)A),q,n,mpiaij->garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr);
106949bd79ccSDebojyoti Ghosh     ierr = ISCreateStride(PETSC_COMM_SELF,n*q,0,1,&to);CHKERRQ(ierr);
107049bd79ccSDebojyoti Ghosh 
107149bd79ccSDebojyoti Ghosh     /* create temporary global vector to generate scatter context */
107249bd79ccSDebojyoti Ghosh     ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),q,q*A->cmap->n,q*A->cmap->N,NULL,&gvec);CHKERRQ(ierr);
107349bd79ccSDebojyoti Ghosh 
107449bd79ccSDebojyoti Ghosh     /* generate the scatter context */
1075*b5a089c7SRichard Tran Mills     ierr = VecScatterCreateWithData(gvec,from,b->w,to,&b->ctx);CHKERRQ(ierr);
107649bd79ccSDebojyoti Ghosh 
107749bd79ccSDebojyoti Ghosh     ierr = ISDestroy(&from);CHKERRQ(ierr);
107849bd79ccSDebojyoti Ghosh     ierr = ISDestroy(&to);CHKERRQ(ierr);
107949bd79ccSDebojyoti Ghosh     ierr = VecDestroy(&gvec);CHKERRQ(ierr);
108049bd79ccSDebojyoti Ghosh 
108149bd79ccSDebojyoti Ghosh     B->ops->mult                = MatMult_MPIKAIJ_dof;
108249bd79ccSDebojyoti Ghosh     B->ops->multadd             = MatMultAdd_MPIKAIJ_dof;
108349bd79ccSDebojyoti Ghosh     B->ops->invertblockdiagonal = MatInvertBlockDiagonal_MPIKAIJ_dof;
108449bd79ccSDebojyoti Ghosh     B->ops->getrow              = MatGetRow_MPIKAIJ;
108549bd79ccSDebojyoti Ghosh     B->ops->restorerow          = MatRestoreRow_MPIKAIJ;
108649bd79ccSDebojyoti Ghosh     ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIKAIJ);CHKERRQ(ierr);
108749bd79ccSDebojyoti Ghosh   }
108849bd79ccSDebojyoti Ghosh   B->ops->createsubmatrix = MatCreateSubMatrix_KAIJ;
108949bd79ccSDebojyoti Ghosh   B->assembled = PETSC_TRUE;
109049bd79ccSDebojyoti Ghosh   ierr  = MatSetUp(B);CHKERRQ(ierr);
109149bd79ccSDebojyoti Ghosh   *kaij = B;
109249bd79ccSDebojyoti Ghosh   ierr  = MatViewFromOptions(B,NULL,"-mat_view");CHKERRQ(ierr);
109349bd79ccSDebojyoti Ghosh 
109449bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
109549bd79ccSDebojyoti Ghosh }
1096