xref: /petsc/src/mat/impls/kaij/kaij.c (revision a84f80695ca69670cd9a132d82ad57a724a655b8)
149bd79ccSDebojyoti Ghosh 
249bd79ccSDebojyoti Ghosh /*
349bd79ccSDebojyoti Ghosh   Defines the basic matrix operations for the KAIJ  matrix storage format.
449bd79ccSDebojyoti Ghosh   This format is used to evaluate matrices of the form:
549bd79ccSDebojyoti Ghosh 
649bd79ccSDebojyoti Ghosh     [I \otimes S + A \otimes T]
749bd79ccSDebojyoti Ghosh 
849bd79ccSDebojyoti Ghosh   where
949bd79ccSDebojyoti Ghosh     S is a dense (p \times q) matrix
1049bd79ccSDebojyoti Ghosh     T is a dense (p \times q) matrix
1149bd79ccSDebojyoti Ghosh     A is an AIJ  (n \times n) matrix
1249bd79ccSDebojyoti Ghosh     I is the identity matrix
1349bd79ccSDebojyoti Ghosh 
1449bd79ccSDebojyoti Ghosh   The resulting matrix is (np \times nq)
1549bd79ccSDebojyoti Ghosh 
1649bd79ccSDebojyoti Ghosh   We provide:
1749bd79ccSDebojyoti Ghosh      MatMult()
1849bd79ccSDebojyoti Ghosh      MatMultAdd()
1949bd79ccSDebojyoti Ghosh      MatInvertBlockDiagonal()
2049bd79ccSDebojyoti Ghosh   and
2149bd79ccSDebojyoti Ghosh      MatCreateKAIJ(Mat,PetscInt,PetscInt,const PetscScalar[],const PetscScalar[],Mat*)
2249bd79ccSDebojyoti Ghosh 
2349bd79ccSDebojyoti Ghosh   This single directory handles both the sequential and parallel codes
2449bd79ccSDebojyoti Ghosh */
2549bd79ccSDebojyoti Ghosh 
2649bd79ccSDebojyoti Ghosh #include <../src/mat/impls/kaij/kaij.h> /*I "petscmat.h" I*/
2749bd79ccSDebojyoti Ghosh #include <../src/mat/utils/freespace.h>
2849bd79ccSDebojyoti Ghosh #include <petsc/private/vecimpl.h>
2949bd79ccSDebojyoti Ghosh 
3049bd79ccSDebojyoti Ghosh /*@C
3149bd79ccSDebojyoti Ghosh    MatKAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the KAIJ matrix
3249bd79ccSDebojyoti Ghosh 
3349bd79ccSDebojyoti Ghosh    Not Collective, but if the KAIJ matrix is parallel, the AIJ matrix is also parallel
3449bd79ccSDebojyoti Ghosh 
3549bd79ccSDebojyoti Ghosh    Input Parameter:
3649bd79ccSDebojyoti Ghosh .  A - the KAIJ matrix
3749bd79ccSDebojyoti Ghosh 
3849bd79ccSDebojyoti Ghosh    Output Parameter:
3949bd79ccSDebojyoti Ghosh .  B - the AIJ matrix
4049bd79ccSDebojyoti Ghosh 
4149bd79ccSDebojyoti Ghosh    Level: advanced
4249bd79ccSDebojyoti Ghosh 
4349bd79ccSDebojyoti Ghosh    Notes: The reference count on the AIJ matrix is not increased so you should not destroy it.
4449bd79ccSDebojyoti Ghosh 
4549bd79ccSDebojyoti Ghosh .seealso: MatCreateKAIJ()
4649bd79ccSDebojyoti Ghosh @*/
4749bd79ccSDebojyoti Ghosh PetscErrorCode  MatKAIJGetAIJ(Mat A,Mat *B)
4849bd79ccSDebojyoti Ghosh {
4949bd79ccSDebojyoti Ghosh   PetscErrorCode ierr;
5049bd79ccSDebojyoti Ghosh   PetscBool      ismpikaij,isseqkaij;
5149bd79ccSDebojyoti Ghosh 
5249bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
5349bd79ccSDebojyoti Ghosh   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIKAIJ,&ismpikaij);CHKERRQ(ierr);
5449bd79ccSDebojyoti Ghosh   ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQKAIJ,&isseqkaij);CHKERRQ(ierr);
5549bd79ccSDebojyoti Ghosh   if (ismpikaij) {
5649bd79ccSDebojyoti Ghosh     Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data;
5749bd79ccSDebojyoti Ghosh 
5849bd79ccSDebojyoti Ghosh     *B = b->A;
5949bd79ccSDebojyoti Ghosh   } else if (isseqkaij) {
6049bd79ccSDebojyoti Ghosh     Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
6149bd79ccSDebojyoti Ghosh 
6249bd79ccSDebojyoti Ghosh     *B = b->AIJ;
6349bd79ccSDebojyoti Ghosh   } else {
6449bd79ccSDebojyoti Ghosh     *B = A;
6549bd79ccSDebojyoti Ghosh   }
6649bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
6749bd79ccSDebojyoti Ghosh }
6849bd79ccSDebojyoti Ghosh 
6949bd79ccSDebojyoti Ghosh /*@C
7049bd79ccSDebojyoti Ghosh    MatKAIJGetS - Get the S matrix describing the shift action of the KAIJ matrix
7149bd79ccSDebojyoti Ghosh 
720567c835SRichard Tran Mills    Not Collective; the entire S is stored and returned independently on all processes.
7349bd79ccSDebojyoti Ghosh 
7449bd79ccSDebojyoti Ghosh    Input Parameter:
7549bd79ccSDebojyoti Ghosh .  A - the KAIJ matrix
7649bd79ccSDebojyoti Ghosh 
7749bd79ccSDebojyoti Ghosh    Output Parameter:
7849bd79ccSDebojyoti Ghosh .  S - the S matrix, in form of a scalar array in column-major format
7949bd79ccSDebojyoti Ghosh 
8031a97b9aSRichard Tran Mills    Notes:
8131a97b9aSRichard Tran Mills    The dimensions of the output array can be determined by calling MatGetBlockSizes() on the KAIJ matrix.
8231a97b9aSRichard Tran Mills    The row block and column block sizes returned will correspond to the number of rows and columns, respectively, in S.
8331a97b9aSRichard Tran Mills 
8449bd79ccSDebojyoti Ghosh    Level: advanced
8549bd79ccSDebojyoti Ghosh 
8631a97b9aSRichard Tran Mills .seealso: MatCreateKAIJ(), MatGetBlockSizes()
8749bd79ccSDebojyoti Ghosh @*/
8849bd79ccSDebojyoti Ghosh PetscErrorCode  MatKAIJGetS(Mat A,const PetscScalar **S)
8949bd79ccSDebojyoti Ghosh {
9049bd79ccSDebojyoti Ghosh   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
9149bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
9249bd79ccSDebojyoti Ghosh   *S = b->S;
9349bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
9449bd79ccSDebojyoti Ghosh }
9549bd79ccSDebojyoti Ghosh 
9649bd79ccSDebojyoti Ghosh /*@C
9731a97b9aSRichard Tran Mills    MatKAIJGetT - Get the transformation matrix T associated with the KAIJ matrix
9849bd79ccSDebojyoti Ghosh 
990567c835SRichard Tran Mills    Not Collective; the entire T is stored and returned independently on all processes
10049bd79ccSDebojyoti Ghosh 
10149bd79ccSDebojyoti Ghosh    Input Parameter:
10249bd79ccSDebojyoti Ghosh .  A - the KAIJ matrix
10349bd79ccSDebojyoti Ghosh 
10449bd79ccSDebojyoti Ghosh    Output Parameter:
10549bd79ccSDebojyoti Ghosh .  T - the T matrix, in form of a scalar array in column-major format
10649bd79ccSDebojyoti Ghosh 
10731a97b9aSRichard Tran Mills    Notes:
10831a97b9aSRichard Tran Mills    The dimensions of the output array can be determined by calling MatGetBlockSizes() on the KAIJ matrix.
10931a97b9aSRichard Tran Mills    The row block and column block sizes returned will correspond to the number of rows and columns, respectively, in T.
11031a97b9aSRichard Tran Mills 
11149bd79ccSDebojyoti Ghosh    Level: advanced
11249bd79ccSDebojyoti Ghosh 
11331a97b9aSRichard Tran Mills .seealso: MatCreateKAIJ(), MatGetBlockSizes()
11449bd79ccSDebojyoti Ghosh @*/
11549bd79ccSDebojyoti Ghosh PetscErrorCode  MatKAIJGetT(Mat A,const PetscScalar **T)
11649bd79ccSDebojyoti Ghosh {
11749bd79ccSDebojyoti Ghosh   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
11849bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
11949bd79ccSDebojyoti Ghosh   *T = b->T;
12049bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
12149bd79ccSDebojyoti Ghosh }
12249bd79ccSDebojyoti Ghosh 
1230567c835SRichard Tran Mills /*@
1240567c835SRichard Tran Mills    MatKAIJSetAIJ - Set the AIJ matrix describing the blockwise action of the KAIJ matrix
1250567c835SRichard Tran Mills 
1260567c835SRichard Tran Mills    Logically Collective; if the AIJ matrix is parallel, the KAIJ matrix is also parallel
1270567c835SRichard Tran Mills 
1280567c835SRichard Tran Mills    Input Parameters:
1290567c835SRichard Tran Mills +  A - the KAIJ matrix
1300567c835SRichard Tran Mills -  B - the AIJ matrix
1310567c835SRichard Tran Mills 
1320567c835SRichard Tran Mills    Level: advanced
1330567c835SRichard Tran Mills 
1340567c835SRichard Tran Mills .seealso: MatKAIJGetAIJ(), MatKAIJSetS(), MatKAIJSetT()
1350567c835SRichard Tran Mills @*/
1360567c835SRichard Tran Mills PetscErrorCode MatKAIJSetAIJ(Mat A,Mat B)
1370567c835SRichard Tran Mills {
1380567c835SRichard Tran Mills   PetscErrorCode ierr;
1390567c835SRichard Tran Mills   PetscMPIInt    size;
1400567c835SRichard Tran Mills 
1410567c835SRichard Tran Mills   PetscFunctionBegin;
1420567c835SRichard Tran Mills   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
1430567c835SRichard Tran Mills   if (size == 1) {
1440567c835SRichard Tran Mills     Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data;
1450567c835SRichard Tran Mills     a->AIJ = B;
1460567c835SRichard Tran Mills   } else {
1470567c835SRichard Tran Mills     Mat_MPIKAIJ *a = (Mat_MPIKAIJ*)A->data;
1480567c835SRichard Tran Mills     a->A = B;
1490567c835SRichard Tran Mills   }
1500567c835SRichard Tran Mills   PetscFunctionReturn(0);
1510567c835SRichard Tran Mills }
1520567c835SRichard Tran Mills 
1530567c835SRichard Tran Mills /*@C
1540567c835SRichard Tran Mills    MatKAIJSetS - Set the S matrix describing the shift action of the KAIJ matrix
1550567c835SRichard Tran Mills 
1560567c835SRichard Tran Mills    Logically Collective; the entire S is stored independently on all processes.
1570567c835SRichard Tran Mills 
1580567c835SRichard Tran Mills    Input Parameters:
1590567c835SRichard Tran Mills +  A - the KAIJ matrix
1600567c835SRichard Tran Mills .  p - the number of rows in S
1610567c835SRichard Tran Mills .  q - the number of columns in S
1620567c835SRichard Tran Mills -  S - the S matrix, in form of a scalar array in column-major format
1630567c835SRichard Tran Mills 
1640567c835SRichard Tran Mills    Notes: The dimensions p and q must match those of the transformation matrix T associated with the KAIJ matrix.
1650567c835SRichard Tran Mills 
1660567c835SRichard Tran Mills    Level: Advanced
1670567c835SRichard Tran Mills 
1680567c835SRichard Tran Mills .seealso: MatKAIJGetS(), MatKAIJSetT(), MatKAIJSetAIJ()
1690567c835SRichard Tran Mills @*/
1700567c835SRichard Tran Mills PetscErrorCode MatKAIJSetS(Mat A,PetscInt p,PetscInt q,const PetscScalar S[])
1710567c835SRichard Tran Mills {
1720567c835SRichard Tran Mills   PetscErrorCode ierr;
1730567c835SRichard Tran Mills   Mat_SeqKAIJ    *a = (Mat_SeqKAIJ*)A->data;
1740567c835SRichard Tran Mills 
1750567c835SRichard Tran Mills   PetscFunctionBegin;
1760567c835SRichard Tran Mills   if (a->S) {
1770567c835SRichard Tran Mills     ierr = PetscFree(a->S);CHKERRQ(ierr);
1780567c835SRichard Tran Mills   }
1790567c835SRichard Tran Mills   if (S) {
180*a84f8069SRichard Tran Mills     ierr = PetscMalloc1(p*q*sizeof(PetscScalar),&a->S);CHKERRQ(ierr);
1810567c835SRichard Tran Mills     ierr = PetscMemcpy(a->S,S,p*q*sizeof(PetscScalar));CHKERRQ(ierr);
1820567c835SRichard Tran Mills   } else  a->S = NULL;
1830567c835SRichard Tran Mills 
1840567c835SRichard Tran Mills   a->p = p;
1850567c835SRichard Tran Mills   a->q = q;
1860567c835SRichard Tran Mills 
1870567c835SRichard Tran Mills   PetscFunctionReturn(0);
1880567c835SRichard Tran Mills }
1890567c835SRichard Tran Mills 
1900567c835SRichard Tran Mills /*@C
1910567c835SRichard Tran Mills    MatKAIJSetT - Set the transformation matrix T associated with the KAIJ matrix
1920567c835SRichard Tran Mills 
1930567c835SRichard Tran Mills    Logically Collective; the entire T is stored independently on all processes.
1940567c835SRichard Tran Mills 
1950567c835SRichard Tran Mills    Input Parameters:
1960567c835SRichard Tran Mills +  A - the KAIJ matrix
1970567c835SRichard Tran Mills .  p - the number of rows in S
1980567c835SRichard Tran Mills .  q - the number of columns in S
1990567c835SRichard Tran Mills -  T - the T matrix, in form of a scalar array in column-major format
2000567c835SRichard Tran Mills 
2010567c835SRichard Tran Mills    Notes: The dimensions p and q must match those of the shift matrix S associated with the KAIJ matrix.
2020567c835SRichard Tran Mills 
2030567c835SRichard Tran Mills    Level: Advanced
2040567c835SRichard Tran Mills 
2050567c835SRichard Tran Mills .seealso: MatKAIJGetT(), MatKAIJSetS(), MatKAIJSetAIJ()
2060567c835SRichard Tran Mills @*/
2070567c835SRichard Tran Mills PetscErrorCode MatKAIJSetT(Mat A,PetscInt p,PetscInt q,const PetscScalar T[])
2080567c835SRichard Tran Mills {
2090567c835SRichard Tran Mills   PetscErrorCode ierr;
2100567c835SRichard Tran Mills   PetscInt       i,j;
2110567c835SRichard Tran Mills   Mat_SeqKAIJ    *a = (Mat_SeqKAIJ*)A->data;
2120567c835SRichard Tran Mills   PetscBool      isTI = PETSC_FALSE;
2130567c835SRichard Tran Mills 
2140567c835SRichard Tran Mills   PetscFunctionBegin;
2150567c835SRichard Tran Mills   /* check if T is an identity matrix */
2160567c835SRichard Tran Mills   if (T && (p == q)) {
2170567c835SRichard Tran Mills     isTI = PETSC_TRUE;
2180567c835SRichard Tran Mills     for (i=0; i<p; i++) {
2190567c835SRichard Tran Mills       for (j=0; j<q; j++) {
2200567c835SRichard Tran Mills         if (i == j) {
2210567c835SRichard Tran Mills           /* diagonal term must be 1 */
2220567c835SRichard Tran Mills           if (T[i+j*p] != 1.0) isTI = PETSC_FALSE;
2230567c835SRichard Tran Mills         } else {
2240567c835SRichard Tran Mills           /* off-diagonal term must be 0 */
2250567c835SRichard Tran Mills           if (T[i+j*p] != 0.0) isTI = PETSC_FALSE;
2260567c835SRichard Tran Mills         }
2270567c835SRichard Tran Mills       }
2280567c835SRichard Tran Mills     }
2290567c835SRichard Tran Mills   }
2300567c835SRichard Tran Mills   a->isTI = isTI;
2310567c835SRichard Tran Mills 
2320567c835SRichard Tran Mills   if (a->T) {
2330567c835SRichard Tran Mills     ierr = PetscFree(a->T);CHKERRQ(ierr);
2340567c835SRichard Tran Mills   }
2350567c835SRichard Tran Mills   if (T && (!isTI)) {
236*a84f8069SRichard Tran Mills     ierr = PetscMalloc1(p*q*sizeof(PetscScalar),&a->T);CHKERRQ(ierr);
2370567c835SRichard Tran Mills     ierr = PetscMemcpy(a->T,T,p*q*sizeof(PetscScalar));CHKERRQ(ierr);
23850d19d74SRichard Tran Mills   } else a->T = NULL;
2390567c835SRichard Tran Mills 
2400567c835SRichard Tran Mills   a->p = p;
2410567c835SRichard Tran Mills   a->q = q;
2420567c835SRichard Tran Mills   PetscFunctionReturn(0);
2430567c835SRichard Tran Mills }
2440567c835SRichard Tran Mills 
24549bd79ccSDebojyoti Ghosh PetscErrorCode MatDestroy_SeqKAIJ(Mat A)
24649bd79ccSDebojyoti Ghosh {
24749bd79ccSDebojyoti Ghosh   PetscErrorCode ierr;
24849bd79ccSDebojyoti Ghosh   Mat_SeqKAIJ    *b = (Mat_SeqKAIJ*)A->data;
24949bd79ccSDebojyoti Ghosh 
25049bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
25149bd79ccSDebojyoti Ghosh   ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr);
252*a84f8069SRichard Tran Mills   ierr = PetscFree(b->S);CHKERRQ(ierr);
253*a84f8069SRichard Tran Mills   ierr = PetscFree(b->T);CHKERRQ(ierr);
254*a84f8069SRichard Tran Mills   ierr = PetscFree(b->ibdiag);CHKERRQ(ierr);
25549bd79ccSDebojyoti Ghosh   ierr = PetscFree5(b->sor.w,b->sor.y,b->sor.work,b->sor.t,b->sor.arr);CHKERRQ(ierr);
25649bd79ccSDebojyoti Ghosh   ierr = PetscFree(A->data);CHKERRQ(ierr);
25749bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
25849bd79ccSDebojyoti Ghosh }
25949bd79ccSDebojyoti Ghosh 
26049bd79ccSDebojyoti Ghosh PetscErrorCode MatSetUp_KAIJ(Mat A)
26149bd79ccSDebojyoti Ghosh {
2620567c835SRichard Tran Mills   PetscErrorCode ierr;
2630567c835SRichard Tran Mills   PetscInt       n;
2640567c835SRichard Tran Mills   PetscMPIInt    size;
2650567c835SRichard Tran Mills   Mat_SeqKAIJ    *seqkaij = (Mat_SeqKAIJ*)A->data;
2660567c835SRichard Tran Mills 
26749bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
2680567c835SRichard Tran Mills   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
2690567c835SRichard Tran Mills   if (size == 1) {
2700567c835SRichard Tran Mills     ierr = MatSetSizes(A,seqkaij->p*seqkaij->AIJ->rmap->n,seqkaij->q*seqkaij->AIJ->cmap->n,seqkaij->p*seqkaij->AIJ->rmap->N,seqkaij->q*seqkaij->AIJ->cmap->N);CHKERRQ(ierr);
2710567c835SRichard Tran Mills     ierr = PetscLayoutSetBlockSize(A->rmap,seqkaij->p);CHKERRQ(ierr);
2720567c835SRichard Tran Mills     ierr = PetscLayoutSetBlockSize(A->cmap,seqkaij->q);CHKERRQ(ierr);
2730567c835SRichard Tran Mills     ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
2740567c835SRichard Tran Mills     ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
2750567c835SRichard Tran Mills     ierr = PetscObjectReference((PetscObject)seqkaij->AIJ);CHKERRQ(ierr);
2760567c835SRichard Tran Mills   } else {
2770567c835SRichard Tran Mills     Mat_MPIKAIJ *a;
2780567c835SRichard Tran Mills     Mat_MPIAIJ  *mpiaij;
2790567c835SRichard Tran Mills     IS          from,to;
2800567c835SRichard Tran Mills     Vec         gvec;
2810567c835SRichard Tran Mills     PetscScalar *T;
2820567c835SRichard Tran Mills     PetscInt    i,j;
2830567c835SRichard Tran Mills 
2840567c835SRichard Tran Mills     a = (Mat_MPIKAIJ*)A->data;
285d3f912faSRichard Tran Mills     mpiaij = (Mat_MPIAIJ*)a->A->data;
2860567c835SRichard Tran Mills     ierr = MatSetSizes(A,a->p*a->A->rmap->n,a->q*a->A->cmap->n,a->p*a->A->rmap->N,a->q*a->A->cmap->N);CHKERRQ(ierr);
2870567c835SRichard Tran Mills     ierr = PetscLayoutSetBlockSize(A->rmap,seqkaij->p);CHKERRQ(ierr);
2880567c835SRichard Tran Mills     ierr = PetscLayoutSetBlockSize(A->cmap,seqkaij->q);CHKERRQ(ierr);
2890567c835SRichard Tran Mills     ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
2900567c835SRichard Tran Mills     ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
2910567c835SRichard Tran Mills     ierr = PetscObjectReference((PetscObject)a->A);CHKERRQ(ierr);
2920567c835SRichard Tran Mills 
2930567c835SRichard Tran Mills     if (a->isTI) {
2940567c835SRichard Tran Mills       /* If the transformation matrix associated with the parallel matrix A is the identity matrix, then a->T will be NULL.
2950567c835SRichard Tran Mills        * In this case, if we pass a->T directly to the MatCreateKAIJ() calls to create the sequential submatrices, the routine will
2960567c835SRichard Tran Mills        * not be able to tell that transformation matrix should be set to the identity; thus we create a temporary identity matrix
2970567c835SRichard Tran Mills        * to pass in. */
298*a84f8069SRichard Tran Mills       ierr = PetscMalloc1(a->p*a->q*sizeof(PetscScalar),&T);CHKERRQ(ierr);
2990567c835SRichard Tran Mills       for (i=0; i<a->p; i++) {
3000567c835SRichard Tran Mills         for (j=0; j<a->q; j++) {
3010567c835SRichard Tran Mills           if (i==j) T[i+j*a->p] = 1.0;
3020567c835SRichard Tran Mills           else      T[i+j*a->p] = 0.0;
3030567c835SRichard Tran Mills         }
3040567c835SRichard Tran Mills       }
3050567c835SRichard Tran Mills     } else T = a->T;
3060567c835SRichard Tran Mills     ierr = MatCreateKAIJ(mpiaij->A,a->p,a->q,a->S,T,&a->AIJ);CHKERRQ(ierr);
3070567c835SRichard Tran Mills     ierr = MatCreateKAIJ(mpiaij->B,a->p,a->q,NULL,T,&a->OAIJ);CHKERRQ(ierr);
3080567c835SRichard Tran Mills     if (a->isTI) {
3090567c835SRichard Tran Mills       ierr = PetscFree(T);CHKERRQ(ierr);
3100567c835SRichard Tran Mills     }
3110567c835SRichard Tran Mills 
3120567c835SRichard Tran Mills     ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr);
3130567c835SRichard Tran Mills     ierr = VecCreate(PETSC_COMM_SELF,&a->w);CHKERRQ(ierr);
3140567c835SRichard Tran Mills     ierr = VecSetSizes(a->w,n*a->q,n*a->q);CHKERRQ(ierr);
3150567c835SRichard Tran Mills     ierr = VecSetBlockSize(a->w,a->q);CHKERRQ(ierr);
3160567c835SRichard Tran Mills     ierr = VecSetType(a->w,VECSEQ);CHKERRQ(ierr);
3170567c835SRichard Tran Mills 
3180567c835SRichard Tran Mills     /* create two temporary Index sets for build scatter gather */
3190567c835SRichard Tran Mills     ierr = ISCreateBlock(PetscObjectComm((PetscObject)a->A),a->q,n,mpiaij->garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr);
3200567c835SRichard Tran Mills     ierr = ISCreateStride(PETSC_COMM_SELF,n*a->q,0,1,&to);CHKERRQ(ierr);
3210567c835SRichard Tran Mills 
3220567c835SRichard Tran Mills     /* create temporary global vector to generate scatter context */
3230567c835SRichard Tran Mills     ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)a->A),a->q,a->q*a->A->cmap->n,a->q*a->A->cmap->N,NULL,&gvec);CHKERRQ(ierr);
3240567c835SRichard Tran Mills 
3250567c835SRichard Tran Mills     /* generate the scatter context */
3264589b4e5SRichard Tran Mills     ierr = VecScatterCreate(gvec,from,a->w,to,&a->ctx);CHKERRQ(ierr);
3270567c835SRichard Tran Mills 
3280567c835SRichard Tran Mills     ierr = ISDestroy(&from);CHKERRQ(ierr);
3290567c835SRichard Tran Mills     ierr = ISDestroy(&to);CHKERRQ(ierr);
3300567c835SRichard Tran Mills     ierr = VecDestroy(&gvec);CHKERRQ(ierr);
3310567c835SRichard Tran Mills   }
3320567c835SRichard Tran Mills 
3330567c835SRichard Tran Mills   A->assembled = PETSC_TRUE;
33449bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
33549bd79ccSDebojyoti Ghosh }
33649bd79ccSDebojyoti Ghosh 
33749bd79ccSDebojyoti Ghosh PetscErrorCode MatView_SeqKAIJ(Mat A,PetscViewer viewer)
33849bd79ccSDebojyoti Ghosh {
33949bd79ccSDebojyoti Ghosh   PetscErrorCode ierr;
34049bd79ccSDebojyoti Ghosh   Mat            B;
34149bd79ccSDebojyoti Ghosh 
34249bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
34349bd79ccSDebojyoti Ghosh   ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
34449bd79ccSDebojyoti Ghosh   ierr = MatView(B,viewer);CHKERRQ(ierr);
34549bd79ccSDebojyoti Ghosh   ierr = MatDestroy(&B);CHKERRQ(ierr);
34649bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
34749bd79ccSDebojyoti Ghosh }
34849bd79ccSDebojyoti Ghosh 
34949bd79ccSDebojyoti Ghosh PetscErrorCode MatView_MPIKAIJ(Mat A,PetscViewer viewer)
35049bd79ccSDebojyoti Ghosh {
35149bd79ccSDebojyoti Ghosh   PetscErrorCode ierr;
35249bd79ccSDebojyoti Ghosh   Mat            B;
35349bd79ccSDebojyoti Ghosh 
35449bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
35549bd79ccSDebojyoti Ghosh   ierr = MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
35649bd79ccSDebojyoti Ghosh   ierr = MatView(B,viewer);CHKERRQ(ierr);
35749bd79ccSDebojyoti Ghosh   ierr = MatDestroy(&B);CHKERRQ(ierr);
35849bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
35949bd79ccSDebojyoti Ghosh }
36049bd79ccSDebojyoti Ghosh 
36149bd79ccSDebojyoti Ghosh PetscErrorCode MatDestroy_MPIKAIJ(Mat A)
36249bd79ccSDebojyoti Ghosh {
36349bd79ccSDebojyoti Ghosh   PetscErrorCode ierr;
36449bd79ccSDebojyoti Ghosh   Mat_MPIKAIJ    *b = (Mat_MPIKAIJ*)A->data;
36549bd79ccSDebojyoti Ghosh 
36649bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
36749bd79ccSDebojyoti Ghosh   ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr);
36849bd79ccSDebojyoti Ghosh   ierr = MatDestroy(&b->OAIJ);CHKERRQ(ierr);
36949bd79ccSDebojyoti Ghosh   ierr = MatDestroy(&b->A);CHKERRQ(ierr);
37049bd79ccSDebojyoti Ghosh   ierr = VecScatterDestroy(&b->ctx);CHKERRQ(ierr);
37149bd79ccSDebojyoti Ghosh   ierr = VecDestroy(&b->w);CHKERRQ(ierr);
372*a84f8069SRichard Tran Mills   ierr = PetscFree(b->S);CHKERRQ(ierr);
373*a84f8069SRichard Tran Mills   ierr = PetscFree(b->T);CHKERRQ(ierr);
374*a84f8069SRichard Tran Mills   ierr = PetscFree(b->ibdiag);CHKERRQ(ierr);
37549bd79ccSDebojyoti Ghosh   ierr = PetscFree(A->data);CHKERRQ(ierr);
37649bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
37749bd79ccSDebojyoti Ghosh }
37849bd79ccSDebojyoti Ghosh 
37949bd79ccSDebojyoti Ghosh 
38049bd79ccSDebojyoti Ghosh /* --------------------------------------------------------------------------------------*/
38149bd79ccSDebojyoti Ghosh 
38249bd79ccSDebojyoti Ghosh /* zz = yy + Axx */
38349bd79ccSDebojyoti Ghosh PetscErrorCode KAIJMultAdd_Seq(Mat A,Vec xx,Vec yy,Vec zz)
38449bd79ccSDebojyoti Ghosh {
38549bd79ccSDebojyoti Ghosh   Mat_SeqKAIJ       *b = (Mat_SeqKAIJ*)A->data;
38649bd79ccSDebojyoti Ghosh   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
38749bd79ccSDebojyoti Ghosh   const PetscScalar *s = b->S, *t = b->T;
38849bd79ccSDebojyoti Ghosh   const PetscScalar *x,*v,*bx;
38949bd79ccSDebojyoti Ghosh   PetscScalar       *y,*sums;
39049bd79ccSDebojyoti Ghosh   PetscErrorCode    ierr;
39149bd79ccSDebojyoti Ghosh   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
39249bd79ccSDebojyoti Ghosh   PetscInt          n,i,jrow,j,l,p=b->p,q=b->q,k;
39349bd79ccSDebojyoti Ghosh 
39449bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
39549bd79ccSDebojyoti Ghosh   if (!yy) {
39649bd79ccSDebojyoti Ghosh     ierr = VecSet(zz,0.0);CHKERRQ(ierr);
39749bd79ccSDebojyoti Ghosh   } else {
39849bd79ccSDebojyoti Ghosh     ierr = VecCopy(yy,zz);CHKERRQ(ierr);
39949bd79ccSDebojyoti Ghosh   }
40049bd79ccSDebojyoti Ghosh   if ((!s) && (!t) && (!b->isTI)) PetscFunctionReturn(0);
40149bd79ccSDebojyoti Ghosh 
40249bd79ccSDebojyoti Ghosh   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
40349bd79ccSDebojyoti Ghosh   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
40449bd79ccSDebojyoti Ghosh   idx  = a->j;
40549bd79ccSDebojyoti Ghosh   v    = a->a;
40649bd79ccSDebojyoti Ghosh   ii   = a->i;
40749bd79ccSDebojyoti Ghosh 
40849bd79ccSDebojyoti Ghosh   if (b->isTI) {
40949bd79ccSDebojyoti Ghosh     for (i=0; i<m; i++) {
41049bd79ccSDebojyoti Ghosh       jrow = ii[i];
41149bd79ccSDebojyoti Ghosh       n    = ii[i+1] - jrow;
41249bd79ccSDebojyoti Ghosh       sums = y + p*i;
41349bd79ccSDebojyoti Ghosh       for (j=0; j<n; j++) {
41449bd79ccSDebojyoti Ghosh         for (k=0; k<p; k++) {
41549bd79ccSDebojyoti Ghosh           sums[k] += v[jrow+j]*x[q*idx[jrow+j]+k];
41649bd79ccSDebojyoti Ghosh         }
41749bd79ccSDebojyoti Ghosh       }
41849bd79ccSDebojyoti Ghosh     }
41949bd79ccSDebojyoti Ghosh   } else if (t) {
42049bd79ccSDebojyoti Ghosh     for (i=0; i<m; i++) {
42149bd79ccSDebojyoti Ghosh       jrow = ii[i];
42249bd79ccSDebojyoti Ghosh       n    = ii[i+1] - jrow;
42349bd79ccSDebojyoti Ghosh       sums = y + p*i;
42449bd79ccSDebojyoti Ghosh       bx   = x + q*i;
42549bd79ccSDebojyoti Ghosh       for (j=0; j<n; j++) {
42649bd79ccSDebojyoti Ghosh         for (k=0; k<p; k++) {
42749bd79ccSDebojyoti Ghosh           for (l=0; l<q; l++) {
42849bd79ccSDebojyoti Ghosh             sums[k] += v[jrow+j]*t[k+l*p]*x[q*idx[jrow+j]+l];
42949bd79ccSDebojyoti Ghosh           }
43049bd79ccSDebojyoti Ghosh         }
43149bd79ccSDebojyoti Ghosh       }
43249bd79ccSDebojyoti Ghosh     }
43349bd79ccSDebojyoti Ghosh   }
43449bd79ccSDebojyoti Ghosh   if (s) {
43549bd79ccSDebojyoti Ghosh     for (i=0; i<m; i++) {
43649bd79ccSDebojyoti Ghosh       jrow = ii[i];
43749bd79ccSDebojyoti Ghosh       n    = ii[i+1] - jrow;
43849bd79ccSDebojyoti Ghosh       sums = y + p*i;
43949bd79ccSDebojyoti Ghosh       bx   = x + q*i;
44049bd79ccSDebojyoti Ghosh       if (i < b->AIJ->cmap->n) {
44149bd79ccSDebojyoti Ghosh         for (j=0; j<q; j++) {
44249bd79ccSDebojyoti Ghosh           for (k=0; k<p; k++) {
44349bd79ccSDebojyoti Ghosh             sums[k] += s[k+j*p]*bx[j];
44449bd79ccSDebojyoti Ghosh           }
44549bd79ccSDebojyoti Ghosh         }
44649bd79ccSDebojyoti Ghosh       }
44749bd79ccSDebojyoti Ghosh     }
44849bd79ccSDebojyoti Ghosh   }
44949bd79ccSDebojyoti Ghosh 
45049bd79ccSDebojyoti Ghosh   ierr = PetscLogFlops((2.0*p*q-p)*m+2*p*a->nz);CHKERRQ(ierr);
45149bd79ccSDebojyoti Ghosh   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
45249bd79ccSDebojyoti Ghosh   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
45349bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
45449bd79ccSDebojyoti Ghosh }
45549bd79ccSDebojyoti Ghosh 
45649bd79ccSDebojyoti Ghosh PetscErrorCode MatMult_SeqKAIJ_N(Mat A,Vec xx,Vec yy)
45749bd79ccSDebojyoti Ghosh {
45849bd79ccSDebojyoti Ghosh   PetscErrorCode ierr;
45949bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
46049bd79ccSDebojyoti Ghosh   ierr = KAIJMultAdd_Seq(A,xx,PETSC_NULL,yy);CHKERRQ(ierr);
46149bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
46249bd79ccSDebojyoti Ghosh }
46349bd79ccSDebojyoti Ghosh 
46449bd79ccSDebojyoti Ghosh PetscErrorCode MatMultAdd_SeqKAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
46549bd79ccSDebojyoti Ghosh {
46649bd79ccSDebojyoti Ghosh   PetscErrorCode ierr;
46749bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
46849bd79ccSDebojyoti Ghosh   ierr = KAIJMultAdd_Seq(A,xx,yy,zz);CHKERRQ(ierr);
46949bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
47049bd79ccSDebojyoti Ghosh }
47149bd79ccSDebojyoti Ghosh 
47249bd79ccSDebojyoti Ghosh #include <petsc/private/kernels/blockinvert.h>
47349bd79ccSDebojyoti Ghosh 
47449bd79ccSDebojyoti Ghosh PetscErrorCode MatInvertBlockDiagonal_SeqKAIJ_N(Mat A,const PetscScalar **values)
47549bd79ccSDebojyoti Ghosh {
47649bd79ccSDebojyoti Ghosh   Mat_SeqKAIJ       *b  = (Mat_SeqKAIJ*)A->data;
47749bd79ccSDebojyoti Ghosh   Mat_SeqAIJ        *a  = (Mat_SeqAIJ*)b->AIJ->data;
47849bd79ccSDebojyoti Ghosh   const PetscScalar *S  = b->S;
47949bd79ccSDebojyoti Ghosh   const PetscScalar *T  = b->T;
48049bd79ccSDebojyoti Ghosh   const PetscScalar *v  = a->a;
48149bd79ccSDebojyoti Ghosh   const PetscInt     p  = b->p, q = b->q, m = b->AIJ->rmap->n, *idx = a->j, *ii = a->i;
48249bd79ccSDebojyoti Ghosh   PetscErrorCode    ierr;
48349bd79ccSDebojyoti Ghosh   PetscInt          i,j,*v_pivots,dof,dof2;
48449bd79ccSDebojyoti Ghosh   PetscScalar       *diag,aval,*v_work;
48549bd79ccSDebojyoti Ghosh 
48649bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
48749bd79ccSDebojyoti Ghosh   if (p != q) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Block size must be square to calculate inverse.");
48831a97b9aSRichard Tran Mills   if ((!S) && (!T) && (!b->isTI)) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Cannot invert a zero matrix.");
48949bd79ccSDebojyoti Ghosh 
49049bd79ccSDebojyoti Ghosh   dof  = p;
49149bd79ccSDebojyoti Ghosh   dof2 = dof*dof;
49249bd79ccSDebojyoti Ghosh 
49349bd79ccSDebojyoti Ghosh   if (b->ibdiagvalid) {
49449bd79ccSDebojyoti Ghosh     if (values) *values = b->ibdiag;
49549bd79ccSDebojyoti Ghosh     PetscFunctionReturn(0);
49649bd79ccSDebojyoti Ghosh   }
49749bd79ccSDebojyoti Ghosh   if (!b->ibdiag) {
498*a84f8069SRichard Tran Mills     ierr = PetscMalloc1(dof2*m*sizeof(PetscScalar),&b->ibdiag);CHKERRQ(ierr);
49949bd79ccSDebojyoti Ghosh     ierr = PetscLogObjectMemory((PetscObject)A,dof2*m*sizeof(PetscScalar));CHKERRQ(ierr);
50049bd79ccSDebojyoti Ghosh   }
50149bd79ccSDebojyoti Ghosh   if (values) *values = b->ibdiag;
50249bd79ccSDebojyoti Ghosh   diag = b->ibdiag;
50349bd79ccSDebojyoti Ghosh 
50449bd79ccSDebojyoti Ghosh   ierr = PetscMalloc2(dof,&v_work,dof,&v_pivots);CHKERRQ(ierr);
50549bd79ccSDebojyoti Ghosh   for (i=0; i<m; i++) {
50649bd79ccSDebojyoti Ghosh     if (S) {
50749bd79ccSDebojyoti Ghosh       ierr = PetscMemcpy(diag,S,dof2*sizeof(PetscScalar));CHKERRQ(ierr);
50849bd79ccSDebojyoti Ghosh     } else {
50949bd79ccSDebojyoti Ghosh       ierr = PetscMemzero(diag,dof2*sizeof(PetscScalar));CHKERRQ(ierr);
51049bd79ccSDebojyoti Ghosh     }
51149bd79ccSDebojyoti Ghosh     if (b->isTI) {
51249bd79ccSDebojyoti Ghosh       aval = 0;
51349bd79ccSDebojyoti Ghosh       for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j];
51449bd79ccSDebojyoti Ghosh       for (j=0; j<dof; j++) diag[j+dof*j] += aval;
51549bd79ccSDebojyoti Ghosh     } else if (T) {
51649bd79ccSDebojyoti Ghosh       aval = 0;
51749bd79ccSDebojyoti Ghosh       for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j];
51849bd79ccSDebojyoti Ghosh       for (j=0; j<dof2; j++) diag[j] += aval*T[j];
51949bd79ccSDebojyoti Ghosh     }
52049bd79ccSDebojyoti Ghosh     ierr = PetscKernel_A_gets_inverse_A(dof,diag,v_pivots,v_work,PETSC_FALSE,NULL);CHKERRQ(ierr);
52149bd79ccSDebojyoti Ghosh     diag += dof2;
52249bd79ccSDebojyoti Ghosh   }
52349bd79ccSDebojyoti Ghosh   ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr);
52449bd79ccSDebojyoti Ghosh 
52549bd79ccSDebojyoti Ghosh   b->ibdiagvalid = PETSC_TRUE;
52649bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
52749bd79ccSDebojyoti Ghosh }
52849bd79ccSDebojyoti Ghosh 
52949bd79ccSDebojyoti Ghosh static PetscErrorCode MatGetDiagonalBlock_MPIKAIJ(Mat A,Mat *B)
53049bd79ccSDebojyoti Ghosh {
53149bd79ccSDebojyoti Ghosh   Mat_MPIKAIJ *kaij = (Mat_MPIKAIJ*) A->data;
53249bd79ccSDebojyoti Ghosh 
53349bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
53449bd79ccSDebojyoti Ghosh   *B = kaij->AIJ;
53549bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
53649bd79ccSDebojyoti Ghosh }
53749bd79ccSDebojyoti Ghosh 
53849bd79ccSDebojyoti Ghosh PetscErrorCode MatSOR_SeqKAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
53949bd79ccSDebojyoti Ghosh {
54049bd79ccSDebojyoti Ghosh   PetscErrorCode    ierr;
54149bd79ccSDebojyoti Ghosh   Mat_SeqKAIJ       *kaij = (Mat_SeqKAIJ*) A->data;
54249bd79ccSDebojyoti Ghosh   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)kaij->AIJ->data;
54349bd79ccSDebojyoti Ghosh   const PetscScalar *aa = a->a, *T = kaij->T, *v;
54449bd79ccSDebojyoti Ghosh   const PetscInt    m  = kaij->AIJ->rmap->n, *ai=a->i, *aj=a->j, p = kaij->p, q = kaij->q, *diag, *vi;
54549bd79ccSDebojyoti Ghosh   const PetscScalar *b, *xb, *idiag;
54649bd79ccSDebojyoti Ghosh   PetscScalar       *x, *work, *workt, *w, *y, *arr, *t, *arrt;
54749bd79ccSDebojyoti Ghosh   PetscInt          i, j, k, i2, bs, bs2, nz;
54849bd79ccSDebojyoti Ghosh 
54949bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
55049bd79ccSDebojyoti Ghosh   its = its*lits;
55149bd79ccSDebojyoti Ghosh   if (flag & SOR_EISENSTAT) SETERRQ (PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
55249bd79ccSDebojyoti Ghosh   if (its <= 0)             SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
55349bd79ccSDebojyoti Ghosh   if (fshift)               SETERRQ (PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
55431a97b9aSRichard Tran Mills   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
55549bd79ccSDebojyoti Ghosh   if (p != q) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSOR for KAIJ: Sorry, no support for non-square dense blocks");
55649bd79ccSDebojyoti Ghosh   else        {bs = p; bs2 = bs*bs; }
55749bd79ccSDebojyoti Ghosh 
55849bd79ccSDebojyoti Ghosh   if (!m) PetscFunctionReturn(0);
55949bd79ccSDebojyoti Ghosh 
56049bd79ccSDebojyoti Ghosh   if (!kaij->ibdiagvalid) { ierr = MatInvertBlockDiagonal_SeqKAIJ_N(A,NULL);CHKERRQ(ierr); }
56149bd79ccSDebojyoti Ghosh   idiag = kaij->ibdiag;
56249bd79ccSDebojyoti Ghosh   diag  = a->diag;
56349bd79ccSDebojyoti Ghosh 
56449bd79ccSDebojyoti Ghosh   if (!kaij->sor.setup) {
56549bd79ccSDebojyoti Ghosh     ierr = PetscMalloc5(bs,&kaij->sor.w,bs,&kaij->sor.y,m*bs,&kaij->sor.work,m*bs,&kaij->sor.t,m*bs2,&kaij->sor.arr);CHKERRQ(ierr);
56649bd79ccSDebojyoti Ghosh     kaij->sor.setup = PETSC_TRUE;
56749bd79ccSDebojyoti Ghosh   }
56849bd79ccSDebojyoti Ghosh   y     = kaij->sor.y;
56949bd79ccSDebojyoti Ghosh   w     = kaij->sor.w;
57049bd79ccSDebojyoti Ghosh   work  = kaij->sor.work;
57149bd79ccSDebojyoti Ghosh   t     = kaij->sor.t;
57249bd79ccSDebojyoti Ghosh   arr   = kaij->sor.arr;
57349bd79ccSDebojyoti Ghosh 
57449bd79ccSDebojyoti Ghosh   ierr = VecGetArray(xx,&x);    CHKERRQ(ierr);
57549bd79ccSDebojyoti Ghosh   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
57649bd79ccSDebojyoti Ghosh 
57749bd79ccSDebojyoti Ghosh   if (flag & SOR_ZERO_INITIAL_GUESS) {
57849bd79ccSDebojyoti Ghosh     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
57949bd79ccSDebojyoti Ghosh       PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);                            /* x[0:bs] <- D^{-1} b[0:bs] */
58049bd79ccSDebojyoti Ghosh       ierr   =  PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr);
58149bd79ccSDebojyoti Ghosh       i2     =  bs;
58249bd79ccSDebojyoti Ghosh       idiag  += bs2;
58349bd79ccSDebojyoti Ghosh       for (i=1; i<m; i++) {
58449bd79ccSDebojyoti Ghosh         v  = aa + ai[i];
58549bd79ccSDebojyoti Ghosh         vi = aj + ai[i];
58649bd79ccSDebojyoti Ghosh         nz = diag[i] - ai[i];
58749bd79ccSDebojyoti Ghosh 
58849bd79ccSDebojyoti Ghosh         if (T) {                /* b - T (Arow * x) */
58949bd79ccSDebojyoti Ghosh           for (k=0; k<bs; k++) w[k] = 0;
59049bd79ccSDebojyoti Ghosh           for (j=0; j<nz; j++) {
59149bd79ccSDebojyoti Ghosh             for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k];
59249bd79ccSDebojyoti Ghosh           }
59349bd79ccSDebojyoti Ghosh           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs,w,T,&t[i2]);
59449bd79ccSDebojyoti Ghosh           for (k=0; k<bs; k++) t[i2+k] += b[i2+k];
59549bd79ccSDebojyoti Ghosh         } else if (kaij->isTI) {
59649bd79ccSDebojyoti Ghosh           for (k=0; k<bs; k++) t[i2+k] = b[i2+k];
59749bd79ccSDebojyoti Ghosh           for (j=0; j<nz; j++) {
59849bd79ccSDebojyoti Ghosh             for (k=0; k<bs; k++) t[i2+k] -= v[j] * x[vi[j]*bs+k];
59949bd79ccSDebojyoti Ghosh           }
60049bd79ccSDebojyoti Ghosh         } else {
60149bd79ccSDebojyoti Ghosh           for (k=0; k<bs; k++) t[i2+k] = b[i2+k];
60249bd79ccSDebojyoti Ghosh         }
60349bd79ccSDebojyoti Ghosh 
60449bd79ccSDebojyoti Ghosh         PetscKernel_w_gets_Ar_times_v(bs,bs,t+i2,idiag,y);
60549bd79ccSDebojyoti Ghosh         for (j=0; j<bs; j++) x[i2+j] = omega * y[j];
60649bd79ccSDebojyoti Ghosh 
60749bd79ccSDebojyoti Ghosh         idiag += bs2;
60849bd79ccSDebojyoti Ghosh         i2    += bs;
60949bd79ccSDebojyoti Ghosh       }
61049bd79ccSDebojyoti Ghosh       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
61149bd79ccSDebojyoti Ghosh       ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr);
61249bd79ccSDebojyoti Ghosh       xb = t;
61349bd79ccSDebojyoti Ghosh     } else xb = b;
61449bd79ccSDebojyoti Ghosh     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
61549bd79ccSDebojyoti Ghosh       idiag = kaij->ibdiag+bs2*(m-1);
61649bd79ccSDebojyoti Ghosh       i2    = bs * (m-1);
61749bd79ccSDebojyoti Ghosh       ierr  = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
61849bd79ccSDebojyoti Ghosh       PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
61949bd79ccSDebojyoti Ghosh       i2    -= bs;
62049bd79ccSDebojyoti Ghosh       idiag -= bs2;
62149bd79ccSDebojyoti Ghosh       for (i=m-2; i>=0; i--) {
62249bd79ccSDebojyoti Ghosh         v  = aa + diag[i] + 1 ;
62349bd79ccSDebojyoti Ghosh         vi = aj + diag[i] + 1;
62449bd79ccSDebojyoti Ghosh         nz = ai[i+1] - diag[i] - 1;
62549bd79ccSDebojyoti Ghosh 
62649bd79ccSDebojyoti Ghosh         if (T) {                /* FIXME: This branch untested */
62749bd79ccSDebojyoti Ghosh           ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
62849bd79ccSDebojyoti Ghosh           /* copy all rows of x that are needed into contiguous space */
62949bd79ccSDebojyoti Ghosh           workt = work;
63049bd79ccSDebojyoti Ghosh           for (j=0; j<nz; j++) {
63149bd79ccSDebojyoti Ghosh             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
63249bd79ccSDebojyoti Ghosh             workt += bs;
63349bd79ccSDebojyoti Ghosh           }
63449bd79ccSDebojyoti Ghosh           arrt = arr;
63549bd79ccSDebojyoti Ghosh           for (j=0; j<nz; j++) {
63649bd79ccSDebojyoti Ghosh             ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
63749bd79ccSDebojyoti Ghosh             for (k=0; k<bs2; k++) arrt[k] *= v[j];
63849bd79ccSDebojyoti Ghosh             arrt += bs2;
63949bd79ccSDebojyoti Ghosh           }
64049bd79ccSDebojyoti Ghosh           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
64149bd79ccSDebojyoti Ghosh         } else if (kaij->isTI) {
64249bd79ccSDebojyoti Ghosh           for (k=0; k<bs; k++) w[k] = t[i2+k];
64349bd79ccSDebojyoti Ghosh           for (j=0; j<nz; j++) {
64449bd79ccSDebojyoti Ghosh             for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k];
64549bd79ccSDebojyoti Ghosh           }
64649bd79ccSDebojyoti Ghosh         }
64749bd79ccSDebojyoti Ghosh 
64849bd79ccSDebojyoti Ghosh         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); /* RHS incorrect for omega != 1.0 */
64949bd79ccSDebojyoti Ghosh         for (j=0; j<bs; j++) x[i2+j] = (1.0-omega) * x[i2+j] + omega * y[j];
65049bd79ccSDebojyoti Ghosh 
65149bd79ccSDebojyoti Ghosh         idiag -= bs2;
65249bd79ccSDebojyoti Ghosh         i2    -= bs;
65349bd79ccSDebojyoti Ghosh       }
65449bd79ccSDebojyoti Ghosh       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
65549bd79ccSDebojyoti Ghosh     }
65649bd79ccSDebojyoti Ghosh     its--;
65749bd79ccSDebojyoti Ghosh   }
65849bd79ccSDebojyoti Ghosh   while (its--) {               /* FIXME: This branch not updated */
65949bd79ccSDebojyoti Ghosh     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
66049bd79ccSDebojyoti Ghosh       i2     =  0;
66149bd79ccSDebojyoti Ghosh       idiag  = kaij->ibdiag;
66249bd79ccSDebojyoti Ghosh       for (i=0; i<m; i++) {
66349bd79ccSDebojyoti Ghosh         ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
66449bd79ccSDebojyoti Ghosh 
66549bd79ccSDebojyoti Ghosh         v  = aa + ai[i];
66649bd79ccSDebojyoti Ghosh         vi = aj + ai[i];
66749bd79ccSDebojyoti Ghosh         nz = diag[i] - ai[i];
66849bd79ccSDebojyoti Ghosh         workt = work;
66949bd79ccSDebojyoti Ghosh         for (j=0; j<nz; j++) {
67049bd79ccSDebojyoti Ghosh           ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
67149bd79ccSDebojyoti Ghosh           workt += bs;
67249bd79ccSDebojyoti Ghosh         }
67349bd79ccSDebojyoti Ghosh         arrt = arr;
67449bd79ccSDebojyoti Ghosh         if (T) {
67549bd79ccSDebojyoti Ghosh           for (j=0; j<nz; j++) {
67649bd79ccSDebojyoti Ghosh             ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
67749bd79ccSDebojyoti Ghosh             for (k=0; k<bs2; k++) arrt[k] *= v[j];
67849bd79ccSDebojyoti Ghosh             arrt += bs2;
67949bd79ccSDebojyoti Ghosh           }
68049bd79ccSDebojyoti Ghosh           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
68149bd79ccSDebojyoti Ghosh         } else if (kaij->isTI) {
68249bd79ccSDebojyoti Ghosh           for (j=0; j<nz; j++) {
68349bd79ccSDebojyoti Ghosh             ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
68449bd79ccSDebojyoti Ghosh             for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
68549bd79ccSDebojyoti Ghosh             arrt += bs2;
68649bd79ccSDebojyoti Ghosh           }
68749bd79ccSDebojyoti Ghosh           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
68849bd79ccSDebojyoti Ghosh         }
68949bd79ccSDebojyoti Ghosh         ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr);
69049bd79ccSDebojyoti Ghosh 
69149bd79ccSDebojyoti Ghosh         v  = aa + diag[i] + 1;
69249bd79ccSDebojyoti Ghosh         vi = aj + diag[i] + 1;
69349bd79ccSDebojyoti Ghosh         nz = ai[i+1] - diag[i] - 1;
69449bd79ccSDebojyoti Ghosh         workt = work;
69549bd79ccSDebojyoti Ghosh         for (j=0; j<nz; j++) {
69649bd79ccSDebojyoti Ghosh           ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
69749bd79ccSDebojyoti Ghosh           workt += bs;
69849bd79ccSDebojyoti Ghosh         }
69949bd79ccSDebojyoti Ghosh         arrt = arr;
70049bd79ccSDebojyoti Ghosh         if (T) {
70149bd79ccSDebojyoti Ghosh           for (j=0; j<nz; j++) {
70249bd79ccSDebojyoti Ghosh             ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
70349bd79ccSDebojyoti Ghosh             for (k=0; k<bs2; k++) arrt[k] *= v[j];
70449bd79ccSDebojyoti Ghosh             arrt += bs2;
70549bd79ccSDebojyoti Ghosh           }
70649bd79ccSDebojyoti Ghosh           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
70749bd79ccSDebojyoti Ghosh         } else if (kaij->isTI) {
70849bd79ccSDebojyoti Ghosh           for (j=0; j<nz; j++) {
70949bd79ccSDebojyoti Ghosh             ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
71049bd79ccSDebojyoti Ghosh             for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
71149bd79ccSDebojyoti Ghosh             arrt += bs2;
71249bd79ccSDebojyoti Ghosh           }
71349bd79ccSDebojyoti Ghosh           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
71449bd79ccSDebojyoti Ghosh         }
71549bd79ccSDebojyoti Ghosh 
71649bd79ccSDebojyoti Ghosh         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y);
71749bd79ccSDebojyoti Ghosh         for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j);
71849bd79ccSDebojyoti Ghosh 
71949bd79ccSDebojyoti Ghosh         idiag += bs2;
72049bd79ccSDebojyoti Ghosh         i2    += bs;
72149bd79ccSDebojyoti Ghosh       }
72249bd79ccSDebojyoti Ghosh       xb = t;
72349bd79ccSDebojyoti Ghosh     }
72449bd79ccSDebojyoti Ghosh     else xb = b;
72549bd79ccSDebojyoti Ghosh     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
72649bd79ccSDebojyoti Ghosh       idiag = kaij->ibdiag+bs2*(m-1);
72749bd79ccSDebojyoti Ghosh       i2    = bs * (m-1);
72849bd79ccSDebojyoti Ghosh       if (xb == b) {
72949bd79ccSDebojyoti Ghosh         for (i=m-1; i>=0; i--) {
73049bd79ccSDebojyoti Ghosh           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
73149bd79ccSDebojyoti Ghosh 
73249bd79ccSDebojyoti Ghosh           v  = aa + ai[i];
73349bd79ccSDebojyoti Ghosh           vi = aj + ai[i];
73449bd79ccSDebojyoti Ghosh           nz = diag[i] - ai[i];
73549bd79ccSDebojyoti Ghosh           workt = work;
73649bd79ccSDebojyoti Ghosh           for (j=0; j<nz; j++) {
73749bd79ccSDebojyoti Ghosh             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
73849bd79ccSDebojyoti Ghosh             workt += bs;
73949bd79ccSDebojyoti Ghosh           }
74049bd79ccSDebojyoti Ghosh           arrt = arr;
74149bd79ccSDebojyoti Ghosh           if (T) {
74249bd79ccSDebojyoti Ghosh             for (j=0; j<nz; j++) {
74349bd79ccSDebojyoti Ghosh               ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
74449bd79ccSDebojyoti Ghosh               for (k=0; k<bs2; k++) arrt[k] *= v[j];
74549bd79ccSDebojyoti Ghosh               arrt += bs2;
74649bd79ccSDebojyoti Ghosh             }
74749bd79ccSDebojyoti Ghosh             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
74849bd79ccSDebojyoti Ghosh           } else if (kaij->isTI) {
74949bd79ccSDebojyoti Ghosh             for (j=0; j<nz; j++) {
75049bd79ccSDebojyoti Ghosh               ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
75149bd79ccSDebojyoti Ghosh               for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
75249bd79ccSDebojyoti Ghosh               arrt += bs2;
75349bd79ccSDebojyoti Ghosh             }
75449bd79ccSDebojyoti Ghosh             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
75549bd79ccSDebojyoti Ghosh           }
75649bd79ccSDebojyoti Ghosh 
75749bd79ccSDebojyoti Ghosh           v  = aa + diag[i] + 1;
75849bd79ccSDebojyoti Ghosh           vi = aj + diag[i] + 1;
75949bd79ccSDebojyoti Ghosh           nz = ai[i+1] - diag[i] - 1;
76049bd79ccSDebojyoti Ghosh           workt = work;
76149bd79ccSDebojyoti Ghosh           for (j=0; j<nz; j++) {
76249bd79ccSDebojyoti Ghosh             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
76349bd79ccSDebojyoti Ghosh             workt += bs;
76449bd79ccSDebojyoti Ghosh           }
76549bd79ccSDebojyoti Ghosh           arrt = arr;
76649bd79ccSDebojyoti Ghosh           if (T) {
76749bd79ccSDebojyoti Ghosh             for (j=0; j<nz; j++) {
76849bd79ccSDebojyoti Ghosh               ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
76949bd79ccSDebojyoti Ghosh               for (k=0; k<bs2; k++) arrt[k] *= v[j];
77049bd79ccSDebojyoti Ghosh               arrt += bs2;
77149bd79ccSDebojyoti Ghosh             }
77249bd79ccSDebojyoti Ghosh             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
77349bd79ccSDebojyoti Ghosh           } else if (kaij->isTI) {
77449bd79ccSDebojyoti Ghosh             for (j=0; j<nz; j++) {
77549bd79ccSDebojyoti Ghosh               ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
77649bd79ccSDebojyoti Ghosh               for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
77749bd79ccSDebojyoti Ghosh               arrt += bs2;
77849bd79ccSDebojyoti Ghosh             }
77949bd79ccSDebojyoti Ghosh             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
78049bd79ccSDebojyoti Ghosh           }
78149bd79ccSDebojyoti Ghosh 
78249bd79ccSDebojyoti Ghosh           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y);
78349bd79ccSDebojyoti Ghosh           for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j);
78449bd79ccSDebojyoti Ghosh         }
78549bd79ccSDebojyoti Ghosh       } else {
78649bd79ccSDebojyoti Ghosh         for (i=m-1; i>=0; i--) {
78749bd79ccSDebojyoti Ghosh           ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
78849bd79ccSDebojyoti Ghosh           v  = aa + diag[i] + 1;
78949bd79ccSDebojyoti Ghosh           vi = aj + diag[i] + 1;
79049bd79ccSDebojyoti Ghosh           nz = ai[i+1] - diag[i] - 1;
79149bd79ccSDebojyoti Ghosh           workt = work;
79249bd79ccSDebojyoti Ghosh           for (j=0; j<nz; j++) {
79349bd79ccSDebojyoti Ghosh             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
79449bd79ccSDebojyoti Ghosh             workt += bs;
79549bd79ccSDebojyoti Ghosh           }
79649bd79ccSDebojyoti Ghosh           arrt = arr;
79749bd79ccSDebojyoti Ghosh           if (T) {
79849bd79ccSDebojyoti Ghosh             for (j=0; j<nz; j++) {
79949bd79ccSDebojyoti Ghosh               ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
80049bd79ccSDebojyoti Ghosh               for (k=0; k<bs2; k++) arrt[k] *= v[j];
80149bd79ccSDebojyoti Ghosh               arrt += bs2;
80249bd79ccSDebojyoti Ghosh             }
80349bd79ccSDebojyoti Ghosh             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
80449bd79ccSDebojyoti Ghosh           } else if (kaij->isTI) {
80549bd79ccSDebojyoti Ghosh             for (j=0; j<nz; j++) {
80649bd79ccSDebojyoti Ghosh               ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
80749bd79ccSDebojyoti Ghosh               for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
80849bd79ccSDebojyoti Ghosh               arrt += bs2;
80949bd79ccSDebojyoti Ghosh             }
81049bd79ccSDebojyoti Ghosh             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
81149bd79ccSDebojyoti Ghosh           }
81249bd79ccSDebojyoti Ghosh           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y);
81349bd79ccSDebojyoti Ghosh           for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j);
81449bd79ccSDebojyoti Ghosh         }
81549bd79ccSDebojyoti Ghosh         idiag -= bs2;
81649bd79ccSDebojyoti Ghosh         i2    -= bs;
81749bd79ccSDebojyoti Ghosh       }
81849bd79ccSDebojyoti Ghosh       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
81949bd79ccSDebojyoti Ghosh     }
82049bd79ccSDebojyoti Ghosh   }
82149bd79ccSDebojyoti Ghosh 
82249bd79ccSDebojyoti Ghosh   ierr = VecRestoreArray(xx,&x);    CHKERRQ(ierr);
82349bd79ccSDebojyoti Ghosh   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
82449bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
82549bd79ccSDebojyoti Ghosh }
82649bd79ccSDebojyoti Ghosh 
82749bd79ccSDebojyoti Ghosh /*===================================================================================*/
82849bd79ccSDebojyoti Ghosh 
82949bd79ccSDebojyoti Ghosh PetscErrorCode KAIJMultAdd_MPI(Mat A,Vec xx,Vec yy,Vec zz)
83049bd79ccSDebojyoti Ghosh {
83149bd79ccSDebojyoti Ghosh   Mat_MPIKAIJ    *b = (Mat_MPIKAIJ*)A->data;
83249bd79ccSDebojyoti Ghosh   PetscErrorCode ierr;
83349bd79ccSDebojyoti Ghosh 
83449bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
83549bd79ccSDebojyoti Ghosh   if (!yy) {
83649bd79ccSDebojyoti Ghosh     ierr = VecSet(zz,0.0);CHKERRQ(ierr);
83749bd79ccSDebojyoti Ghosh   } else {
83849bd79ccSDebojyoti Ghosh     ierr = VecCopy(yy,zz);CHKERRQ(ierr);
83949bd79ccSDebojyoti Ghosh   }
84049bd79ccSDebojyoti Ghosh   /* start the scatter */
84149bd79ccSDebojyoti Ghosh   ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
84249bd79ccSDebojyoti Ghosh   ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,zz,zz);CHKERRQ(ierr);
84349bd79ccSDebojyoti Ghosh   ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
84449bd79ccSDebojyoti Ghosh   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr);
84549bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
84649bd79ccSDebojyoti Ghosh }
84749bd79ccSDebojyoti Ghosh 
84849bd79ccSDebojyoti Ghosh PetscErrorCode MatMult_MPIKAIJ_dof(Mat A,Vec xx,Vec yy)
84949bd79ccSDebojyoti Ghosh {
85049bd79ccSDebojyoti Ghosh   PetscErrorCode ierr;
85149bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
85249bd79ccSDebojyoti Ghosh   ierr = KAIJMultAdd_MPI(A,xx,PETSC_NULL,yy);CHKERRQ(ierr);
85349bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
85449bd79ccSDebojyoti Ghosh }
85549bd79ccSDebojyoti Ghosh 
85649bd79ccSDebojyoti Ghosh PetscErrorCode MatMultAdd_MPIKAIJ_dof(Mat A,Vec xx,Vec yy, Vec zz)
85749bd79ccSDebojyoti Ghosh {
85849bd79ccSDebojyoti Ghosh   PetscErrorCode ierr;
85949bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
86049bd79ccSDebojyoti Ghosh   ierr = KAIJMultAdd_MPI(A,xx,yy,zz);CHKERRQ(ierr);
86149bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
86249bd79ccSDebojyoti Ghosh }
86349bd79ccSDebojyoti Ghosh 
86449bd79ccSDebojyoti Ghosh PetscErrorCode MatInvertBlockDiagonal_MPIKAIJ_dof(Mat A,const PetscScalar **values)
86549bd79ccSDebojyoti Ghosh {
86649bd79ccSDebojyoti Ghosh   Mat_MPIKAIJ     *b = (Mat_MPIKAIJ*)A->data;
86749bd79ccSDebojyoti Ghosh   PetscErrorCode  ierr;
86849bd79ccSDebojyoti Ghosh 
86949bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
87049bd79ccSDebojyoti Ghosh   ierr = (*b->AIJ->ops->invertblockdiagonal)(b->AIJ,values);CHKERRQ(ierr);
87149bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
87249bd79ccSDebojyoti Ghosh }
87349bd79ccSDebojyoti Ghosh 
87449bd79ccSDebojyoti Ghosh /* ----------------------------------------------------------------*/
87549bd79ccSDebojyoti Ghosh 
87649bd79ccSDebojyoti Ghosh PetscErrorCode MatGetRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values)
87749bd79ccSDebojyoti Ghosh {
87849bd79ccSDebojyoti Ghosh   Mat_SeqKAIJ     *b   = (Mat_SeqKAIJ*) A->data;
8791ca5ffdbSRichard Tran Mills   PetscErrorCode  diag = PETSC_FALSE;
8801ca5ffdbSRichard Tran Mills   PetscErrorCode  ierr;
88149bd79ccSDebojyoti Ghosh   PetscInt        nzaij,nz,*colsaij,*idx,i,j,p=b->p,q=b->q,r=row/p,s=row%p,c;
88249bd79ccSDebojyoti Ghosh   PetscScalar     *vaij,*v,*S=b->S,*T=b->T;
88349bd79ccSDebojyoti Ghosh 
88449bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
88549bd79ccSDebojyoti Ghosh   if (b->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
88649bd79ccSDebojyoti Ghosh   b->getrowactive = PETSC_TRUE;
88749bd79ccSDebojyoti Ghosh   if (row < 0 || row >= A->rmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range",row);
88849bd79ccSDebojyoti Ghosh 
88949bd79ccSDebojyoti Ghosh   if ((!S) && (!T) && (!b->isTI)) {
89049bd79ccSDebojyoti Ghosh     if (ncols)    *ncols  = 0;
89149bd79ccSDebojyoti Ghosh     if (cols)     *cols   = NULL;
89249bd79ccSDebojyoti Ghosh     if (values)   *values = NULL;
89349bd79ccSDebojyoti Ghosh     PetscFunctionReturn(0);
89449bd79ccSDebojyoti Ghosh   }
89549bd79ccSDebojyoti Ghosh 
89649bd79ccSDebojyoti Ghosh   if (T || b->isTI) {
89749bd79ccSDebojyoti Ghosh     ierr  = MatGetRow_SeqAIJ(b->AIJ,r,&nzaij,&colsaij,&vaij);CHKERRQ(ierr);
89849bd79ccSDebojyoti Ghosh     c     = nzaij;
89949bd79ccSDebojyoti Ghosh     for (i=0; i<nzaij; i++) {
90049bd79ccSDebojyoti Ghosh       /* check if this row contains a diagonal entry */
90149bd79ccSDebojyoti Ghosh       if (colsaij[i] == r) {
90249bd79ccSDebojyoti Ghosh         diag = PETSC_TRUE;
90349bd79ccSDebojyoti Ghosh         c = i;
90449bd79ccSDebojyoti Ghosh       }
90549bd79ccSDebojyoti Ghosh     }
90649bd79ccSDebojyoti Ghosh   } else nzaij = c = 0;
90749bd79ccSDebojyoti Ghosh 
90849bd79ccSDebojyoti Ghosh   /* calculate size of row */
90949bd79ccSDebojyoti Ghosh   nz = 0;
91049bd79ccSDebojyoti Ghosh   if (S)            nz += q;
91149bd79ccSDebojyoti Ghosh   if (T || b->isTI) nz += (diag && S ? (nzaij-1)*q : nzaij*q);
91249bd79ccSDebojyoti Ghosh 
91349bd79ccSDebojyoti Ghosh   if (cols || values) {
91449bd79ccSDebojyoti Ghosh     ierr = PetscMalloc2(nz,&idx,nz,&v);CHKERRQ(ierr);
91538822f9dSRichard Tran Mills     for (i=0; i<q; i++) {
91638822f9dSRichard Tran Mills       /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */
91738822f9dSRichard Tran Mills       v[i] = 0.0;
91838822f9dSRichard Tran Mills     }
91949bd79ccSDebojyoti Ghosh     if (b->isTI) {
92049bd79ccSDebojyoti Ghosh       for (i=0; i<nzaij; i++) {
92149bd79ccSDebojyoti Ghosh         for (j=0; j<q; j++) {
92249bd79ccSDebojyoti Ghosh           idx[i*q+j] = colsaij[i]*q+j;
92349bd79ccSDebojyoti Ghosh           v[i*q+j]   = (j==s ? vaij[i] : 0);
92449bd79ccSDebojyoti Ghosh         }
92549bd79ccSDebojyoti Ghosh       }
92649bd79ccSDebojyoti Ghosh     } else if (T) {
92749bd79ccSDebojyoti Ghosh       for (i=0; i<nzaij; i++) {
92849bd79ccSDebojyoti Ghosh         for (j=0; j<q; j++) {
92949bd79ccSDebojyoti Ghosh           idx[i*q+j] = colsaij[i]*q+j;
93049bd79ccSDebojyoti Ghosh           v[i*q+j]   = vaij[i]*T[s+j*p];
93149bd79ccSDebojyoti Ghosh         }
93249bd79ccSDebojyoti Ghosh       }
93349bd79ccSDebojyoti Ghosh     }
93449bd79ccSDebojyoti Ghosh     if (S) {
93549bd79ccSDebojyoti Ghosh       for (j=0; j<q; j++) {
93649bd79ccSDebojyoti Ghosh         idx[c*q+j] = r*q+j;
93749bd79ccSDebojyoti Ghosh         v[c*q+j]  += S[s+j*p];
93849bd79ccSDebojyoti Ghosh       }
93949bd79ccSDebojyoti Ghosh     }
94049bd79ccSDebojyoti Ghosh   }
94149bd79ccSDebojyoti Ghosh 
94249bd79ccSDebojyoti Ghosh   if (ncols)    *ncols  = nz;
94349bd79ccSDebojyoti Ghosh   if (cols)     *cols   = idx;
94449bd79ccSDebojyoti Ghosh   if (values)   *values = v;
94549bd79ccSDebojyoti Ghosh 
94649bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
94749bd79ccSDebojyoti Ghosh }
94849bd79ccSDebojyoti Ghosh 
94949bd79ccSDebojyoti Ghosh PetscErrorCode MatRestoreRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
95049bd79ccSDebojyoti Ghosh {
95149bd79ccSDebojyoti Ghosh   PetscErrorCode ierr;
95249bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
95349bd79ccSDebojyoti Ghosh   ierr = PetscFree2(*idx,*v);CHKERRQ(ierr);
95449bd79ccSDebojyoti Ghosh   ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE;
95549bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
95649bd79ccSDebojyoti Ghosh }
95749bd79ccSDebojyoti Ghosh 
95849bd79ccSDebojyoti Ghosh PetscErrorCode MatGetRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values)
95949bd79ccSDebojyoti Ghosh {
96049bd79ccSDebojyoti Ghosh   Mat_MPIKAIJ     *b      = (Mat_MPIKAIJ*) A->data;
96149bd79ccSDebojyoti Ghosh   Mat             MatAIJ  = ((Mat_SeqKAIJ*)b->AIJ->data)->AIJ;
96249bd79ccSDebojyoti Ghosh   Mat             MatOAIJ = ((Mat_SeqKAIJ*)b->OAIJ->data)->AIJ;
96349bd79ccSDebojyoti Ghosh   Mat             AIJ     = b->A;
964fc64b2cfSRichard Tran Mills   PetscBool       diag    = PETSC_FALSE;
96549bd79ccSDebojyoti Ghosh   PetscErrorCode  ierr;
96649bd79ccSDebojyoti Ghosh   const PetscInt  rstart=A->rmap->rstart,rend=A->rmap->rend,p=b->p,q=b->q,*garray;
96749bd79ccSDebojyoti Ghosh   PetscInt        nz,*idx,ncolsaij,ncolsoaij,*colsaij,*colsoaij,r,s,c,i,j,lrow;
96849bd79ccSDebojyoti Ghosh   PetscScalar     *v,*vals,*ovals,*S=b->S,*T=b->T;
96949bd79ccSDebojyoti Ghosh 
97049bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
97149bd79ccSDebojyoti Ghosh   if (b->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
97249bd79ccSDebojyoti Ghosh   b->getrowactive = PETSC_TRUE;
97349bd79ccSDebojyoti Ghosh   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
97449bd79ccSDebojyoti Ghosh   lrow = row - rstart;
97549bd79ccSDebojyoti Ghosh 
97649bd79ccSDebojyoti Ghosh   if ((!S) && (!T) && (!b->isTI)) {
97749bd79ccSDebojyoti Ghosh     if (ncols)    *ncols  = 0;
97849bd79ccSDebojyoti Ghosh     if (cols)     *cols   = NULL;
97949bd79ccSDebojyoti Ghosh     if (values)   *values = NULL;
98049bd79ccSDebojyoti Ghosh     PetscFunctionReturn(0);
98149bd79ccSDebojyoti Ghosh   }
98249bd79ccSDebojyoti Ghosh 
98349bd79ccSDebojyoti Ghosh   r = lrow/p;
98449bd79ccSDebojyoti Ghosh   s = lrow%p;
98549bd79ccSDebojyoti Ghosh 
98649bd79ccSDebojyoti Ghosh   if (T || b->isTI) {
98749bd79ccSDebojyoti Ghosh     ierr = MatMPIAIJGetSeqAIJ(AIJ,NULL,NULL,&garray);
98849bd79ccSDebojyoti Ghosh     ierr = MatGetRow_SeqAIJ(MatAIJ,lrow/p,&ncolsaij,&colsaij,&vals);CHKERRQ(ierr);
98949bd79ccSDebojyoti Ghosh     ierr = MatGetRow_SeqAIJ(MatOAIJ,lrow/p,&ncolsoaij,&colsoaij,&ovals);CHKERRQ(ierr);
99049bd79ccSDebojyoti Ghosh 
99149bd79ccSDebojyoti Ghosh     c     = ncolsaij + ncolsoaij;
99249bd79ccSDebojyoti Ghosh     for (i=0; i<ncolsaij; i++) {
99349bd79ccSDebojyoti Ghosh       /* check if this row contains a diagonal entry */
99449bd79ccSDebojyoti Ghosh       if (colsaij[i] == r) {
99549bd79ccSDebojyoti Ghosh         diag = PETSC_TRUE;
99649bd79ccSDebojyoti Ghosh         c = i;
99749bd79ccSDebojyoti Ghosh       }
99849bd79ccSDebojyoti Ghosh     }
99949bd79ccSDebojyoti Ghosh   } else c = 0;
100049bd79ccSDebojyoti Ghosh 
100149bd79ccSDebojyoti Ghosh   /* calculate size of row */
100249bd79ccSDebojyoti Ghosh   nz = 0;
100349bd79ccSDebojyoti Ghosh   if (S)            nz += q;
100449bd79ccSDebojyoti Ghosh   if (T || b->isTI) nz += (diag && S ? (ncolsaij+ncolsoaij-1)*q : (ncolsaij+ncolsoaij)*q);
100549bd79ccSDebojyoti Ghosh 
100649bd79ccSDebojyoti Ghosh   if (cols || values) {
100749bd79ccSDebojyoti Ghosh     ierr = PetscMalloc2(nz,&idx,nz,&v);CHKERRQ(ierr);
1008a437a796SRichard Tran Mills     for (i=0; i<q; i++) {
1009a437a796SRichard Tran Mills       /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */
1010a437a796SRichard Tran Mills       v[i] = 0.0;
1011a437a796SRichard Tran Mills     }
101249bd79ccSDebojyoti Ghosh     if (b->isTI) {
101349bd79ccSDebojyoti Ghosh       for (i=0; i<ncolsaij; i++) {
101449bd79ccSDebojyoti Ghosh         for (j=0; j<q; j++) {
101549bd79ccSDebojyoti Ghosh           idx[i*q+j] = (colsaij[i]+rstart/p)*q+j;
101649bd79ccSDebojyoti Ghosh           v[i*q+j]   = (j==s ? vals[i] : 0.0);
101749bd79ccSDebojyoti Ghosh         }
101849bd79ccSDebojyoti Ghosh       }
101949bd79ccSDebojyoti Ghosh       for (i=0; i<ncolsoaij; i++) {
102049bd79ccSDebojyoti Ghosh         for (j=0; j<q; j++) {
102149bd79ccSDebojyoti Ghosh           idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j;
102249bd79ccSDebojyoti Ghosh           v[(i+ncolsaij)*q+j]   = (j==s ? ovals[i]: 0.0);
102349bd79ccSDebojyoti Ghosh         }
102449bd79ccSDebojyoti Ghosh       }
102549bd79ccSDebojyoti Ghosh     } else if (T) {
102649bd79ccSDebojyoti Ghosh       for (i=0; i<ncolsaij; i++) {
102749bd79ccSDebojyoti Ghosh         for (j=0; j<q; j++) {
102849bd79ccSDebojyoti Ghosh           idx[i*q+j] = (colsaij[i]+rstart/p)*q+j;
102949bd79ccSDebojyoti Ghosh           v[i*q+j]   = vals[i]*T[s+j*p];
103049bd79ccSDebojyoti Ghosh         }
103149bd79ccSDebojyoti Ghosh       }
103249bd79ccSDebojyoti Ghosh       for (i=0; i<ncolsoaij; i++) {
103349bd79ccSDebojyoti Ghosh         for (j=0; j<q; j++) {
103449bd79ccSDebojyoti Ghosh           idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j;
103549bd79ccSDebojyoti Ghosh           v[(i+ncolsaij)*q+j]   = ovals[i]*T[s+j*p];
103649bd79ccSDebojyoti Ghosh         }
103749bd79ccSDebojyoti Ghosh       }
103849bd79ccSDebojyoti Ghosh     }
103949bd79ccSDebojyoti Ghosh     if (S) {
104049bd79ccSDebojyoti Ghosh       for (j=0; j<q; j++) {
104149bd79ccSDebojyoti Ghosh         idx[c*q+j] = (r+rstart/p)*q+j;
104249bd79ccSDebojyoti Ghosh         v[c*q+j]  += S[s+j*p];
104349bd79ccSDebojyoti Ghosh       }
104449bd79ccSDebojyoti Ghosh     }
104549bd79ccSDebojyoti Ghosh   }
104649bd79ccSDebojyoti Ghosh 
104749bd79ccSDebojyoti Ghosh   if (ncols)  *ncols  = nz;
104849bd79ccSDebojyoti Ghosh   if (cols)   *cols   = idx;
104949bd79ccSDebojyoti Ghosh   if (values) *values = v;
105049bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
105149bd79ccSDebojyoti Ghosh }
105249bd79ccSDebojyoti Ghosh 
105349bd79ccSDebojyoti Ghosh PetscErrorCode MatRestoreRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
105449bd79ccSDebojyoti Ghosh {
105549bd79ccSDebojyoti Ghosh   PetscErrorCode ierr;
105649bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
105749bd79ccSDebojyoti Ghosh   ierr = PetscFree2(*idx,*v);CHKERRQ(ierr);
105849bd79ccSDebojyoti Ghosh   ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE;
105949bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
106049bd79ccSDebojyoti Ghosh }
106149bd79ccSDebojyoti Ghosh 
106249bd79ccSDebojyoti Ghosh PetscErrorCode  MatCreateSubMatrix_KAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat)
106349bd79ccSDebojyoti Ghosh {
106449bd79ccSDebojyoti Ghosh   PetscErrorCode ierr;
106549bd79ccSDebojyoti Ghosh   Mat            A;
106649bd79ccSDebojyoti Ghosh 
106749bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
106849bd79ccSDebojyoti Ghosh   ierr = MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr);
106949bd79ccSDebojyoti Ghosh   ierr = MatCreateSubMatrix(A,isrow,iscol,cll,newmat);CHKERRQ(ierr);
107049bd79ccSDebojyoti Ghosh   ierr = MatDestroy(&A);CHKERRQ(ierr);
107149bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
107249bd79ccSDebojyoti Ghosh }
107349bd79ccSDebojyoti Ghosh 
107449bd79ccSDebojyoti Ghosh /* ---------------------------------------------------------------------------------- */
107549bd79ccSDebojyoti Ghosh /*@C
107649bd79ccSDebojyoti Ghosh   MatCreateKAIJ - Creates a matrix type to be used for matrices of the following form:
107749bd79ccSDebojyoti Ghosh 
107849bd79ccSDebojyoti Ghosh     [I \otimes S + A \otimes T]
107949bd79ccSDebojyoti Ghosh 
108049bd79ccSDebojyoti Ghosh   where
108149bd79ccSDebojyoti Ghosh     S is a dense (p \times q) matrix
108249bd79ccSDebojyoti Ghosh     T is a dense (p \times q) matrix
108349bd79ccSDebojyoti Ghosh     A is an AIJ  (n \times n) matrix
108449bd79ccSDebojyoti Ghosh     I is the identity matrix
108549bd79ccSDebojyoti Ghosh   The resulting matrix is (np \times nq)
108649bd79ccSDebojyoti Ghosh 
108749bd79ccSDebojyoti Ghosh   The matrix type is based on MATSEQAIJ for a sequential matrix A, and MATMPIAIJ for a distributed matrix A.
108849bd79ccSDebojyoti Ghosh   S is always stored independently on all processes as a PetscScalar array in column-major format.
108949bd79ccSDebojyoti Ghosh 
109049bd79ccSDebojyoti Ghosh   Collective
109149bd79ccSDebojyoti Ghosh 
109249bd79ccSDebojyoti Ghosh   Input Parameters:
109349bd79ccSDebojyoti Ghosh + A - the AIJ matrix
109449bd79ccSDebojyoti Ghosh . S - the S matrix, stored as a PetscScalar array (column-major)
109549bd79ccSDebojyoti Ghosh . T - the T matrix, stored as a PetscScalar array (column-major)
109649bd79ccSDebojyoti Ghosh . p - number of rows in S and T
109749bd79ccSDebojyoti Ghosh - q - number of columns in S and T
109849bd79ccSDebojyoti Ghosh 
109949bd79ccSDebojyoti Ghosh   Output Parameter:
110049bd79ccSDebojyoti Ghosh . kaij - the new KAIJ matrix
110149bd79ccSDebojyoti Ghosh 
110249bd79ccSDebojyoti Ghosh   Operations provided:
110349bd79ccSDebojyoti Ghosh + MatMult
110449bd79ccSDebojyoti Ghosh . MatMultAdd
110549bd79ccSDebojyoti Ghosh . MatInvertBlockDiagonal
110649bd79ccSDebojyoti Ghosh - MatView
110749bd79ccSDebojyoti Ghosh 
110849bd79ccSDebojyoti Ghosh   Level: advanced
110949bd79ccSDebojyoti Ghosh 
11100567c835SRichard Tran Mills .seealso: MatKAIJSetAIJ(), MatKAIJSetS(), MatKAIJSetT(), MatKAIJGetAIJ(), MatKAIJGetS(), MatKAIJGetT(), MATKAIJ
111149bd79ccSDebojyoti Ghosh @*/
111249bd79ccSDebojyoti Ghosh PetscErrorCode  MatCreateKAIJ(Mat A,PetscInt p,PetscInt q,const PetscScalar S[],const PetscScalar T[],Mat *kaij)
111349bd79ccSDebojyoti Ghosh {
111449bd79ccSDebojyoti Ghosh   PetscErrorCode ierr;
111549bd79ccSDebojyoti Ghosh   PetscMPIInt    size;
111649bd79ccSDebojyoti Ghosh 
111749bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
11180567c835SRichard Tran Mills   ierr = MatCreate(PetscObjectComm((PetscObject)A),kaij);CHKERRQ(ierr);
111949bd79ccSDebojyoti Ghosh   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
112049bd79ccSDebojyoti Ghosh   if (size == 1) {
11210567c835SRichard Tran Mills     ierr = MatSetType(*kaij,MATSEQKAIJ);CHKERRQ(ierr);
112249bd79ccSDebojyoti Ghosh   } else {
11230567c835SRichard Tran Mills     ierr = MatSetType(*kaij,MATMPIKAIJ);CHKERRQ(ierr);
112449bd79ccSDebojyoti Ghosh   }
11250567c835SRichard Tran Mills   ierr = MatKAIJSetAIJ(*kaij,A);CHKERRQ(ierr);
11260567c835SRichard Tran Mills   ierr = MatKAIJSetS(*kaij,p,q,S);CHKERRQ(ierr);
11270567c835SRichard Tran Mills   ierr = MatKAIJSetT(*kaij,p,q,T);CHKERRQ(ierr);
11280567c835SRichard Tran Mills   ierr = MatSetUp(*kaij);
11290567c835SRichard Tran Mills   PetscFunctionReturn(0);
11300567c835SRichard Tran Mills }
113149bd79ccSDebojyoti Ghosh 
11320567c835SRichard Tran Mills /*MC
11330567c835SRichard Tran Mills   MATKAIJ - MATKAIJ = "kaij" - A matrix type to be used to evaluate matrices of the following form:
11340567c835SRichard Tran Mills 
11350567c835SRichard Tran Mills     [I \otimes S + A \otimes T]
11360567c835SRichard Tran Mills 
11370567c835SRichard Tran Mills   where
11380567c835SRichard Tran Mills     S is a dense (p \times q) matrix
11390567c835SRichard Tran Mills     T is a dense (p \times q) matrix
11400567c835SRichard Tran Mills     A is an AIJ  (n \times n) matrix
11410567c835SRichard Tran Mills     I is the identity matrix
11420567c835SRichard Tran Mills   The resulting matrix is (np \times nq)
11430567c835SRichard Tran Mills 
11440567c835SRichard Tran Mills   The matrix type is based on MATSEQAIJ for a sequential matrix A, and MATMPIAIJ for a distributed matrix A.
11450567c835SRichard Tran Mills   S and T are always stored independently on all processes as a PetscScalar array in column-major format.
11460567c835SRichard Tran Mills 
11470567c835SRichard Tran Mills   Operations provided:
11480567c835SRichard Tran Mills + MatMult
11490567c835SRichard Tran Mills . MatMultAdd
11500567c835SRichard Tran Mills . MatInvertBlockDiagonal
11510567c835SRichard Tran Mills - MatView
11520567c835SRichard Tran Mills 
11530567c835SRichard Tran Mills   Level: advanced
11540567c835SRichard Tran Mills 
11550567c835SRichard Tran Mills .seealso: MatKAIJSetAIJ(), MatKAIJSetS(), MatKAIJSetT(), MatKAIJGetAIJ(), MatKAIJGetS(), MatKAIJGetT(), MatCreateKAIJ()
11560567c835SRichard Tran Mills M*/
11570567c835SRichard Tran Mills 
11580567c835SRichard Tran Mills PETSC_EXTERN PetscErrorCode MatCreate_KAIJ(Mat A)
11590567c835SRichard Tran Mills {
11600567c835SRichard Tran Mills   PetscErrorCode ierr;
11610567c835SRichard Tran Mills   Mat_MPIKAIJ    *b;
11620567c835SRichard Tran Mills   PetscMPIInt    size;
11630567c835SRichard Tran Mills 
11640567c835SRichard Tran Mills   PetscFunctionBegin;
11650567c835SRichard Tran Mills   ierr     = PetscNewLog(A,&b);CHKERRQ(ierr);
11660567c835SRichard Tran Mills   A->data  = (void*)b;
11670567c835SRichard Tran Mills 
11680567c835SRichard Tran Mills   ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
11690567c835SRichard Tran Mills 
11700567c835SRichard Tran Mills   A->ops->setup = MatSetUp_KAIJ;
11710567c835SRichard Tran Mills 
11720567c835SRichard Tran Mills   b->w    = 0;
11730567c835SRichard Tran Mills   ierr    = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
11740567c835SRichard Tran Mills   if (size == 1) {
11750567c835SRichard Tran Mills     ierr = PetscObjectChangeTypeName((PetscObject)A,MATSEQKAIJ);CHKERRQ(ierr);
11760567c835SRichard Tran Mills 
11770567c835SRichard Tran Mills     A->ops->setup               = MatSetUp_KAIJ;
11780567c835SRichard Tran Mills     A->ops->destroy             = MatDestroy_SeqKAIJ;
11790567c835SRichard Tran Mills     A->ops->view                = MatView_SeqKAIJ;
11800567c835SRichard Tran Mills     A->ops->mult                = MatMult_SeqKAIJ_N;
11810567c835SRichard Tran Mills     A->ops->multadd             = MatMultAdd_SeqKAIJ_N;
11820567c835SRichard Tran Mills     A->ops->invertblockdiagonal = MatInvertBlockDiagonal_SeqKAIJ_N;
11830567c835SRichard Tran Mills     A->ops->getrow              = MatGetRow_SeqKAIJ;
11840567c835SRichard Tran Mills     A->ops->restorerow          = MatRestoreRow_SeqKAIJ;
11850567c835SRichard Tran Mills     A->ops->sor                 = MatSOR_SeqKAIJ;
11860567c835SRichard Tran Mills   } else {
11870567c835SRichard Tran Mills     ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIKAIJ);CHKERRQ(ierr);
11880567c835SRichard Tran Mills     A->ops->setup               = MatSetUp_KAIJ;
11890567c835SRichard Tran Mills     A->ops->destroy             = MatDestroy_MPIKAIJ;
11900567c835SRichard Tran Mills     A->ops->view                = MatView_MPIKAIJ;
11910567c835SRichard Tran Mills     A->ops->mult                = MatMult_MPIKAIJ_dof;
11920567c835SRichard Tran Mills     A->ops->multadd             = MatMultAdd_MPIKAIJ_dof;
11930567c835SRichard Tran Mills     A->ops->invertblockdiagonal = MatInvertBlockDiagonal_MPIKAIJ_dof;
11940567c835SRichard Tran Mills     A->ops->getrow              = MatGetRow_MPIKAIJ;
11950567c835SRichard Tran Mills     A->ops->restorerow          = MatRestoreRow_MPIKAIJ;
11960567c835SRichard Tran Mills     ierr = PetscObjectComposeFunction((PetscObject)A,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIKAIJ);CHKERRQ(ierr);
11970567c835SRichard Tran Mills   }
11980567c835SRichard Tran Mills   A->ops->createsubmatrix = MatCreateSubMatrix_KAIJ;
119949bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
120049bd79ccSDebojyoti Ghosh }
1201