149bd79ccSDebojyoti Ghosh 249bd79ccSDebojyoti Ghosh /* 349bd79ccSDebojyoti Ghosh Defines the basic matrix operations for the KAIJ matrix storage format. 449bd79ccSDebojyoti Ghosh This format is used to evaluate matrices of the form: 549bd79ccSDebojyoti Ghosh 649bd79ccSDebojyoti Ghosh [I \otimes S + A \otimes T] 749bd79ccSDebojyoti Ghosh 849bd79ccSDebojyoti Ghosh where 949bd79ccSDebojyoti Ghosh S is a dense (p \times q) matrix 1049bd79ccSDebojyoti Ghosh T is a dense (p \times q) matrix 1149bd79ccSDebojyoti Ghosh A is an AIJ (n \times n) matrix 1249bd79ccSDebojyoti Ghosh I is the identity matrix 1349bd79ccSDebojyoti Ghosh 1449bd79ccSDebojyoti Ghosh The resulting matrix is (np \times nq) 1549bd79ccSDebojyoti Ghosh 1649bd79ccSDebojyoti Ghosh We provide: 1749bd79ccSDebojyoti Ghosh MatMult() 1849bd79ccSDebojyoti Ghosh MatMultAdd() 1949bd79ccSDebojyoti Ghosh MatInvertBlockDiagonal() 2049bd79ccSDebojyoti Ghosh and 2149bd79ccSDebojyoti Ghosh MatCreateKAIJ(Mat,PetscInt,PetscInt,const PetscScalar[],const PetscScalar[],Mat*) 2249bd79ccSDebojyoti Ghosh 2349bd79ccSDebojyoti Ghosh This single directory handles both the sequential and parallel codes 2449bd79ccSDebojyoti Ghosh */ 2549bd79ccSDebojyoti Ghosh 2649bd79ccSDebojyoti Ghosh #include <../src/mat/impls/kaij/kaij.h> /*I "petscmat.h" I*/ 2749bd79ccSDebojyoti Ghosh #include <../src/mat/utils/freespace.h> 2849bd79ccSDebojyoti Ghosh #include <petsc/private/vecimpl.h> 2949bd79ccSDebojyoti Ghosh 3049bd79ccSDebojyoti Ghosh /*@C 3149bd79ccSDebojyoti Ghosh MatKAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the KAIJ matrix 3249bd79ccSDebojyoti Ghosh 3349bd79ccSDebojyoti Ghosh Not Collective, but if the KAIJ matrix is parallel, the AIJ matrix is also parallel 3449bd79ccSDebojyoti Ghosh 3549bd79ccSDebojyoti Ghosh Input Parameter: 3649bd79ccSDebojyoti Ghosh . A - the KAIJ matrix 3749bd79ccSDebojyoti Ghosh 3849bd79ccSDebojyoti Ghosh Output Parameter: 3949bd79ccSDebojyoti Ghosh . B - the AIJ matrix 4049bd79ccSDebojyoti Ghosh 4149bd79ccSDebojyoti Ghosh Level: advanced 4249bd79ccSDebojyoti Ghosh 4349bd79ccSDebojyoti Ghosh Notes: The reference count on the AIJ matrix is not increased so you should not destroy it. 4449bd79ccSDebojyoti Ghosh 4549bd79ccSDebojyoti Ghosh .seealso: MatCreateKAIJ() 4649bd79ccSDebojyoti Ghosh @*/ 4749bd79ccSDebojyoti Ghosh PetscErrorCode MatKAIJGetAIJ(Mat A,Mat *B) 4849bd79ccSDebojyoti Ghosh { 4949bd79ccSDebojyoti Ghosh PetscErrorCode ierr; 5049bd79ccSDebojyoti Ghosh PetscBool ismpikaij,isseqkaij; 5149bd79ccSDebojyoti Ghosh 5249bd79ccSDebojyoti Ghosh PetscFunctionBegin; 5349bd79ccSDebojyoti Ghosh ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIKAIJ,&ismpikaij);CHKERRQ(ierr); 5449bd79ccSDebojyoti Ghosh ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQKAIJ,&isseqkaij);CHKERRQ(ierr); 5549bd79ccSDebojyoti Ghosh if (ismpikaij) { 5649bd79ccSDebojyoti Ghosh Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data; 5749bd79ccSDebojyoti Ghosh 5849bd79ccSDebojyoti Ghosh *B = b->A; 5949bd79ccSDebojyoti Ghosh } else if (isseqkaij) { 6049bd79ccSDebojyoti Ghosh Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 6149bd79ccSDebojyoti Ghosh 6249bd79ccSDebojyoti Ghosh *B = b->AIJ; 63*b04351cbSRichard Tran Mills } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix passed in is not of type KAIJ"); 6449bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 6549bd79ccSDebojyoti Ghosh } 6649bd79ccSDebojyoti Ghosh 6749bd79ccSDebojyoti Ghosh /*@C 6849bd79ccSDebojyoti Ghosh MatKAIJGetS - Get the S matrix describing the shift action of the KAIJ matrix 6949bd79ccSDebojyoti Ghosh 700567c835SRichard Tran Mills Not Collective; the entire S is stored and returned independently on all processes. 7149bd79ccSDebojyoti Ghosh 7249bd79ccSDebojyoti Ghosh Input Parameter: 7349bd79ccSDebojyoti Ghosh . A - the KAIJ matrix 7449bd79ccSDebojyoti Ghosh 7549bd79ccSDebojyoti Ghosh Output Parameter: 7649bd79ccSDebojyoti Ghosh . S - the S matrix, in form of a scalar array in column-major format 7749bd79ccSDebojyoti Ghosh 7831a97b9aSRichard Tran Mills Notes: 7931a97b9aSRichard Tran Mills The dimensions of the output array can be determined by calling MatGetBlockSizes() on the KAIJ matrix. 8031a97b9aSRichard Tran Mills The row block and column block sizes returned will correspond to the number of rows and columns, respectively, in S. 8131a97b9aSRichard Tran Mills 8249bd79ccSDebojyoti Ghosh Level: advanced 8349bd79ccSDebojyoti Ghosh 8431a97b9aSRichard Tran Mills .seealso: MatCreateKAIJ(), MatGetBlockSizes() 8549bd79ccSDebojyoti Ghosh @*/ 8649bd79ccSDebojyoti Ghosh PetscErrorCode MatKAIJGetS(Mat A,const PetscScalar **S) 8749bd79ccSDebojyoti Ghosh { 8849bd79ccSDebojyoti Ghosh Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 8949bd79ccSDebojyoti Ghosh PetscFunctionBegin; 9049bd79ccSDebojyoti Ghosh *S = b->S; 9149bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 9249bd79ccSDebojyoti Ghosh } 9349bd79ccSDebojyoti Ghosh 9449bd79ccSDebojyoti Ghosh /*@C 9531a97b9aSRichard Tran Mills MatKAIJGetT - Get the transformation matrix T associated with the KAIJ matrix 9649bd79ccSDebojyoti Ghosh 970567c835SRichard Tran Mills Not Collective; the entire T is stored and returned independently on all processes 9849bd79ccSDebojyoti Ghosh 9949bd79ccSDebojyoti Ghosh Input Parameter: 10049bd79ccSDebojyoti Ghosh . A - the KAIJ matrix 10149bd79ccSDebojyoti Ghosh 10249bd79ccSDebojyoti Ghosh Output Parameter: 10349bd79ccSDebojyoti Ghosh . T - the T matrix, in form of a scalar array in column-major format 10449bd79ccSDebojyoti Ghosh 10531a97b9aSRichard Tran Mills Notes: 10631a97b9aSRichard Tran Mills The dimensions of the output array can be determined by calling MatGetBlockSizes() on the KAIJ matrix. 10731a97b9aSRichard Tran Mills The row block and column block sizes returned will correspond to the number of rows and columns, respectively, in T. 10831a97b9aSRichard Tran Mills 10949bd79ccSDebojyoti Ghosh Level: advanced 11049bd79ccSDebojyoti Ghosh 11131a97b9aSRichard Tran Mills .seealso: MatCreateKAIJ(), MatGetBlockSizes() 11249bd79ccSDebojyoti Ghosh @*/ 11349bd79ccSDebojyoti Ghosh PetscErrorCode MatKAIJGetT(Mat A,const PetscScalar **T) 11449bd79ccSDebojyoti Ghosh { 11549bd79ccSDebojyoti Ghosh Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 11649bd79ccSDebojyoti Ghosh PetscFunctionBegin; 11749bd79ccSDebojyoti Ghosh *T = b->T; 11849bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 11949bd79ccSDebojyoti Ghosh } 12049bd79ccSDebojyoti Ghosh 1210567c835SRichard Tran Mills /*@ 1220567c835SRichard Tran Mills MatKAIJSetAIJ - Set the AIJ matrix describing the blockwise action of the KAIJ matrix 1230567c835SRichard Tran Mills 1240567c835SRichard Tran Mills Logically Collective; if the AIJ matrix is parallel, the KAIJ matrix is also parallel 1250567c835SRichard Tran Mills 1260567c835SRichard Tran Mills Input Parameters: 1270567c835SRichard Tran Mills + A - the KAIJ matrix 1280567c835SRichard Tran Mills - B - the AIJ matrix 1290567c835SRichard Tran Mills 1300567c835SRichard Tran Mills Level: advanced 1310567c835SRichard Tran Mills 1320567c835SRichard Tran Mills .seealso: MatKAIJGetAIJ(), MatKAIJSetS(), MatKAIJSetT() 1330567c835SRichard Tran Mills @*/ 1340567c835SRichard Tran Mills PetscErrorCode MatKAIJSetAIJ(Mat A,Mat B) 1350567c835SRichard Tran Mills { 1360567c835SRichard Tran Mills PetscErrorCode ierr; 1370567c835SRichard Tran Mills PetscMPIInt size; 1380567c835SRichard Tran Mills 1390567c835SRichard Tran Mills PetscFunctionBegin; 1400567c835SRichard Tran Mills ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 1410567c835SRichard Tran Mills if (size == 1) { 1420567c835SRichard Tran Mills Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data; 1430567c835SRichard Tran Mills a->AIJ = B; 1440567c835SRichard Tran Mills } else { 1450567c835SRichard Tran Mills Mat_MPIKAIJ *a = (Mat_MPIKAIJ*)A->data; 1460567c835SRichard Tran Mills a->A = B; 1470567c835SRichard Tran Mills } 1480567c835SRichard Tran Mills PetscFunctionReturn(0); 1490567c835SRichard Tran Mills } 1500567c835SRichard Tran Mills 1510567c835SRichard Tran Mills /*@C 1520567c835SRichard Tran Mills MatKAIJSetS - Set the S matrix describing the shift action of the KAIJ matrix 1530567c835SRichard Tran Mills 1540567c835SRichard Tran Mills Logically Collective; the entire S is stored independently on all processes. 1550567c835SRichard Tran Mills 1560567c835SRichard Tran Mills Input Parameters: 1570567c835SRichard Tran Mills + A - the KAIJ matrix 1580567c835SRichard Tran Mills . p - the number of rows in S 1590567c835SRichard Tran Mills . q - the number of columns in S 1600567c835SRichard Tran Mills - S - the S matrix, in form of a scalar array in column-major format 1610567c835SRichard Tran Mills 1620567c835SRichard Tran Mills Notes: The dimensions p and q must match those of the transformation matrix T associated with the KAIJ matrix. 1630567c835SRichard Tran Mills 1640567c835SRichard Tran Mills Level: Advanced 1650567c835SRichard Tran Mills 1660567c835SRichard Tran Mills .seealso: MatKAIJGetS(), MatKAIJSetT(), MatKAIJSetAIJ() 1670567c835SRichard Tran Mills @*/ 1680567c835SRichard Tran Mills PetscErrorCode MatKAIJSetS(Mat A,PetscInt p,PetscInt q,const PetscScalar S[]) 1690567c835SRichard Tran Mills { 1700567c835SRichard Tran Mills PetscErrorCode ierr; 1710567c835SRichard Tran Mills Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data; 1720567c835SRichard Tran Mills 1730567c835SRichard Tran Mills PetscFunctionBegin; 1740567c835SRichard Tran Mills if (a->S) { 1750567c835SRichard Tran Mills ierr = PetscFree(a->S);CHKERRQ(ierr); 1760567c835SRichard Tran Mills } 1770567c835SRichard Tran Mills if (S) { 178a84f8069SRichard Tran Mills ierr = PetscMalloc1(p*q*sizeof(PetscScalar),&a->S);CHKERRQ(ierr); 1790567c835SRichard Tran Mills ierr = PetscMemcpy(a->S,S,p*q*sizeof(PetscScalar));CHKERRQ(ierr); 1800567c835SRichard Tran Mills } else a->S = NULL; 1810567c835SRichard Tran Mills 1820567c835SRichard Tran Mills a->p = p; 1830567c835SRichard Tran Mills a->q = q; 1840567c835SRichard Tran Mills 1850567c835SRichard Tran Mills PetscFunctionReturn(0); 1860567c835SRichard Tran Mills } 1870567c835SRichard Tran Mills 1880567c835SRichard Tran Mills /*@C 1890567c835SRichard Tran Mills MatKAIJSetT - Set the transformation matrix T associated with the KAIJ matrix 1900567c835SRichard Tran Mills 1910567c835SRichard Tran Mills Logically Collective; the entire T is stored independently on all processes. 1920567c835SRichard Tran Mills 1930567c835SRichard Tran Mills Input Parameters: 1940567c835SRichard Tran Mills + A - the KAIJ matrix 1950567c835SRichard Tran Mills . p - the number of rows in S 1960567c835SRichard Tran Mills . q - the number of columns in S 1970567c835SRichard Tran Mills - T - the T matrix, in form of a scalar array in column-major format 1980567c835SRichard Tran Mills 1990567c835SRichard Tran Mills Notes: The dimensions p and q must match those of the shift matrix S associated with the KAIJ matrix. 2000567c835SRichard Tran Mills 2010567c835SRichard Tran Mills Level: Advanced 2020567c835SRichard Tran Mills 2030567c835SRichard Tran Mills .seealso: MatKAIJGetT(), MatKAIJSetS(), MatKAIJSetAIJ() 2040567c835SRichard Tran Mills @*/ 2050567c835SRichard Tran Mills PetscErrorCode MatKAIJSetT(Mat A,PetscInt p,PetscInt q,const PetscScalar T[]) 2060567c835SRichard Tran Mills { 2070567c835SRichard Tran Mills PetscErrorCode ierr; 2080567c835SRichard Tran Mills PetscInt i,j; 2090567c835SRichard Tran Mills Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data; 2100567c835SRichard Tran Mills PetscBool isTI = PETSC_FALSE; 2110567c835SRichard Tran Mills 2120567c835SRichard Tran Mills PetscFunctionBegin; 2130567c835SRichard Tran Mills /* check if T is an identity matrix */ 2140567c835SRichard Tran Mills if (T && (p == q)) { 2150567c835SRichard Tran Mills isTI = PETSC_TRUE; 2160567c835SRichard Tran Mills for (i=0; i<p; i++) { 2170567c835SRichard Tran Mills for (j=0; j<q; j++) { 2180567c835SRichard Tran Mills if (i == j) { 2190567c835SRichard Tran Mills /* diagonal term must be 1 */ 2200567c835SRichard Tran Mills if (T[i+j*p] != 1.0) isTI = PETSC_FALSE; 2210567c835SRichard Tran Mills } else { 2220567c835SRichard Tran Mills /* off-diagonal term must be 0 */ 2230567c835SRichard Tran Mills if (T[i+j*p] != 0.0) isTI = PETSC_FALSE; 2240567c835SRichard Tran Mills } 2250567c835SRichard Tran Mills } 2260567c835SRichard Tran Mills } 2270567c835SRichard Tran Mills } 2280567c835SRichard Tran Mills a->isTI = isTI; 2290567c835SRichard Tran Mills 2300567c835SRichard Tran Mills if (a->T) { 2310567c835SRichard Tran Mills ierr = PetscFree(a->T);CHKERRQ(ierr); 2320567c835SRichard Tran Mills } 2330567c835SRichard Tran Mills if (T && (!isTI)) { 234a84f8069SRichard Tran Mills ierr = PetscMalloc1(p*q*sizeof(PetscScalar),&a->T);CHKERRQ(ierr); 2350567c835SRichard Tran Mills ierr = PetscMemcpy(a->T,T,p*q*sizeof(PetscScalar));CHKERRQ(ierr); 23650d19d74SRichard Tran Mills } else a->T = NULL; 2370567c835SRichard Tran Mills 2380567c835SRichard Tran Mills a->p = p; 2390567c835SRichard Tran Mills a->q = q; 2400567c835SRichard Tran Mills PetscFunctionReturn(0); 2410567c835SRichard Tran Mills } 2420567c835SRichard Tran Mills 24349bd79ccSDebojyoti Ghosh PetscErrorCode MatDestroy_SeqKAIJ(Mat A) 24449bd79ccSDebojyoti Ghosh { 24549bd79ccSDebojyoti Ghosh PetscErrorCode ierr; 24649bd79ccSDebojyoti Ghosh Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 24749bd79ccSDebojyoti Ghosh 24849bd79ccSDebojyoti Ghosh PetscFunctionBegin; 24949bd79ccSDebojyoti Ghosh ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr); 250a84f8069SRichard Tran Mills ierr = PetscFree(b->S);CHKERRQ(ierr); 251a84f8069SRichard Tran Mills ierr = PetscFree(b->T);CHKERRQ(ierr); 252a84f8069SRichard Tran Mills ierr = PetscFree(b->ibdiag);CHKERRQ(ierr); 25349bd79ccSDebojyoti Ghosh ierr = PetscFree5(b->sor.w,b->sor.y,b->sor.work,b->sor.t,b->sor.arr);CHKERRQ(ierr); 25449bd79ccSDebojyoti Ghosh ierr = PetscFree(A->data);CHKERRQ(ierr); 25549bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 25649bd79ccSDebojyoti Ghosh } 25749bd79ccSDebojyoti Ghosh 25849bd79ccSDebojyoti Ghosh PetscErrorCode MatSetUp_KAIJ(Mat A) 25949bd79ccSDebojyoti Ghosh { 2600567c835SRichard Tran Mills PetscErrorCode ierr; 2610567c835SRichard Tran Mills PetscInt n; 2620567c835SRichard Tran Mills PetscMPIInt size; 2630567c835SRichard Tran Mills Mat_SeqKAIJ *seqkaij = (Mat_SeqKAIJ*)A->data; 2640567c835SRichard Tran Mills 26549bd79ccSDebojyoti Ghosh PetscFunctionBegin; 2660567c835SRichard Tran Mills ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 2670567c835SRichard Tran Mills if (size == 1) { 2680567c835SRichard Tran Mills ierr = MatSetSizes(A,seqkaij->p*seqkaij->AIJ->rmap->n,seqkaij->q*seqkaij->AIJ->cmap->n,seqkaij->p*seqkaij->AIJ->rmap->N,seqkaij->q*seqkaij->AIJ->cmap->N);CHKERRQ(ierr); 2690567c835SRichard Tran Mills ierr = PetscLayoutSetBlockSize(A->rmap,seqkaij->p);CHKERRQ(ierr); 2700567c835SRichard Tran Mills ierr = PetscLayoutSetBlockSize(A->cmap,seqkaij->q);CHKERRQ(ierr); 2710567c835SRichard Tran Mills ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr); 2720567c835SRichard Tran Mills ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr); 2730567c835SRichard Tran Mills ierr = PetscObjectReference((PetscObject)seqkaij->AIJ);CHKERRQ(ierr); 2740567c835SRichard Tran Mills } else { 2750567c835SRichard Tran Mills Mat_MPIKAIJ *a; 2760567c835SRichard Tran Mills Mat_MPIAIJ *mpiaij; 2770567c835SRichard Tran Mills IS from,to; 2780567c835SRichard Tran Mills Vec gvec; 2790567c835SRichard Tran Mills PetscScalar *T; 2800567c835SRichard Tran Mills PetscInt i,j; 2810567c835SRichard Tran Mills 2820567c835SRichard Tran Mills a = (Mat_MPIKAIJ*)A->data; 283d3f912faSRichard Tran Mills mpiaij = (Mat_MPIAIJ*)a->A->data; 2840567c835SRichard Tran Mills ierr = MatSetSizes(A,a->p*a->A->rmap->n,a->q*a->A->cmap->n,a->p*a->A->rmap->N,a->q*a->A->cmap->N);CHKERRQ(ierr); 2850567c835SRichard Tran Mills ierr = PetscLayoutSetBlockSize(A->rmap,seqkaij->p);CHKERRQ(ierr); 2860567c835SRichard Tran Mills ierr = PetscLayoutSetBlockSize(A->cmap,seqkaij->q);CHKERRQ(ierr); 2870567c835SRichard Tran Mills ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr); 2880567c835SRichard Tran Mills ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr); 2890567c835SRichard Tran Mills ierr = PetscObjectReference((PetscObject)a->A);CHKERRQ(ierr); 2900567c835SRichard Tran Mills 2910567c835SRichard Tran Mills if (a->isTI) { 2920567c835SRichard Tran Mills /* If the transformation matrix associated with the parallel matrix A is the identity matrix, then a->T will be NULL. 2930567c835SRichard Tran Mills * In this case, if we pass a->T directly to the MatCreateKAIJ() calls to create the sequential submatrices, the routine will 2940567c835SRichard Tran Mills * not be able to tell that transformation matrix should be set to the identity; thus we create a temporary identity matrix 2950567c835SRichard Tran Mills * to pass in. */ 296a84f8069SRichard Tran Mills ierr = PetscMalloc1(a->p*a->q*sizeof(PetscScalar),&T);CHKERRQ(ierr); 2970567c835SRichard Tran Mills for (i=0; i<a->p; i++) { 2980567c835SRichard Tran Mills for (j=0; j<a->q; j++) { 2990567c835SRichard Tran Mills if (i==j) T[i+j*a->p] = 1.0; 3000567c835SRichard Tran Mills else T[i+j*a->p] = 0.0; 3010567c835SRichard Tran Mills } 3020567c835SRichard Tran Mills } 3030567c835SRichard Tran Mills } else T = a->T; 3040567c835SRichard Tran Mills ierr = MatCreateKAIJ(mpiaij->A,a->p,a->q,a->S,T,&a->AIJ);CHKERRQ(ierr); 3050567c835SRichard Tran Mills ierr = MatCreateKAIJ(mpiaij->B,a->p,a->q,NULL,T,&a->OAIJ);CHKERRQ(ierr); 3060567c835SRichard Tran Mills if (a->isTI) { 3070567c835SRichard Tran Mills ierr = PetscFree(T);CHKERRQ(ierr); 3080567c835SRichard Tran Mills } 3090567c835SRichard Tran Mills 3100567c835SRichard Tran Mills ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr); 3110567c835SRichard Tran Mills ierr = VecCreate(PETSC_COMM_SELF,&a->w);CHKERRQ(ierr); 3120567c835SRichard Tran Mills ierr = VecSetSizes(a->w,n*a->q,n*a->q);CHKERRQ(ierr); 3130567c835SRichard Tran Mills ierr = VecSetBlockSize(a->w,a->q);CHKERRQ(ierr); 3140567c835SRichard Tran Mills ierr = VecSetType(a->w,VECSEQ);CHKERRQ(ierr); 3150567c835SRichard Tran Mills 3160567c835SRichard Tran Mills /* create two temporary Index sets for build scatter gather */ 3170567c835SRichard Tran Mills ierr = ISCreateBlock(PetscObjectComm((PetscObject)a->A),a->q,n,mpiaij->garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); 3180567c835SRichard Tran Mills ierr = ISCreateStride(PETSC_COMM_SELF,n*a->q,0,1,&to);CHKERRQ(ierr); 3190567c835SRichard Tran Mills 3200567c835SRichard Tran Mills /* create temporary global vector to generate scatter context */ 3210567c835SRichard Tran Mills ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)a->A),a->q,a->q*a->A->cmap->n,a->q*a->A->cmap->N,NULL,&gvec);CHKERRQ(ierr); 3220567c835SRichard Tran Mills 3230567c835SRichard Tran Mills /* generate the scatter context */ 3244589b4e5SRichard Tran Mills ierr = VecScatterCreate(gvec,from,a->w,to,&a->ctx);CHKERRQ(ierr); 3250567c835SRichard Tran Mills 3260567c835SRichard Tran Mills ierr = ISDestroy(&from);CHKERRQ(ierr); 3270567c835SRichard Tran Mills ierr = ISDestroy(&to);CHKERRQ(ierr); 3280567c835SRichard Tran Mills ierr = VecDestroy(&gvec);CHKERRQ(ierr); 3290567c835SRichard Tran Mills } 3300567c835SRichard Tran Mills 3310567c835SRichard Tran Mills A->assembled = PETSC_TRUE; 33249bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 33349bd79ccSDebojyoti Ghosh } 33449bd79ccSDebojyoti Ghosh 33549bd79ccSDebojyoti Ghosh PetscErrorCode MatView_SeqKAIJ(Mat A,PetscViewer viewer) 33649bd79ccSDebojyoti Ghosh { 33749bd79ccSDebojyoti Ghosh PetscErrorCode ierr; 33849bd79ccSDebojyoti Ghosh Mat B; 33949bd79ccSDebojyoti Ghosh 34049bd79ccSDebojyoti Ghosh PetscFunctionBegin; 34149bd79ccSDebojyoti Ghosh ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 34249bd79ccSDebojyoti Ghosh ierr = MatView(B,viewer);CHKERRQ(ierr); 34349bd79ccSDebojyoti Ghosh ierr = MatDestroy(&B);CHKERRQ(ierr); 34449bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 34549bd79ccSDebojyoti Ghosh } 34649bd79ccSDebojyoti Ghosh 34749bd79ccSDebojyoti Ghosh PetscErrorCode MatView_MPIKAIJ(Mat A,PetscViewer viewer) 34849bd79ccSDebojyoti Ghosh { 34949bd79ccSDebojyoti Ghosh PetscErrorCode ierr; 35049bd79ccSDebojyoti Ghosh Mat B; 35149bd79ccSDebojyoti Ghosh 35249bd79ccSDebojyoti Ghosh PetscFunctionBegin; 35349bd79ccSDebojyoti Ghosh ierr = MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 35449bd79ccSDebojyoti Ghosh ierr = MatView(B,viewer);CHKERRQ(ierr); 35549bd79ccSDebojyoti Ghosh ierr = MatDestroy(&B);CHKERRQ(ierr); 35649bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 35749bd79ccSDebojyoti Ghosh } 35849bd79ccSDebojyoti Ghosh 35949bd79ccSDebojyoti Ghosh PetscErrorCode MatDestroy_MPIKAIJ(Mat A) 36049bd79ccSDebojyoti Ghosh { 36149bd79ccSDebojyoti Ghosh PetscErrorCode ierr; 36249bd79ccSDebojyoti Ghosh Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data; 36349bd79ccSDebojyoti Ghosh 36449bd79ccSDebojyoti Ghosh PetscFunctionBegin; 36549bd79ccSDebojyoti Ghosh ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr); 36649bd79ccSDebojyoti Ghosh ierr = MatDestroy(&b->OAIJ);CHKERRQ(ierr); 36749bd79ccSDebojyoti Ghosh ierr = MatDestroy(&b->A);CHKERRQ(ierr); 36849bd79ccSDebojyoti Ghosh ierr = VecScatterDestroy(&b->ctx);CHKERRQ(ierr); 36949bd79ccSDebojyoti Ghosh ierr = VecDestroy(&b->w);CHKERRQ(ierr); 370a84f8069SRichard Tran Mills ierr = PetscFree(b->S);CHKERRQ(ierr); 371a84f8069SRichard Tran Mills ierr = PetscFree(b->T);CHKERRQ(ierr); 372a84f8069SRichard Tran Mills ierr = PetscFree(b->ibdiag);CHKERRQ(ierr); 37349bd79ccSDebojyoti Ghosh ierr = PetscFree(A->data);CHKERRQ(ierr); 37449bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 37549bd79ccSDebojyoti Ghosh } 37649bd79ccSDebojyoti Ghosh 37749bd79ccSDebojyoti Ghosh 37849bd79ccSDebojyoti Ghosh /* --------------------------------------------------------------------------------------*/ 37949bd79ccSDebojyoti Ghosh 38049bd79ccSDebojyoti Ghosh /* zz = yy + Axx */ 38149bd79ccSDebojyoti Ghosh PetscErrorCode KAIJMultAdd_Seq(Mat A,Vec xx,Vec yy,Vec zz) 38249bd79ccSDebojyoti Ghosh { 38349bd79ccSDebojyoti Ghosh Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 38449bd79ccSDebojyoti Ghosh Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 38549bd79ccSDebojyoti Ghosh const PetscScalar *s = b->S, *t = b->T; 38649bd79ccSDebojyoti Ghosh const PetscScalar *x,*v,*bx; 38749bd79ccSDebojyoti Ghosh PetscScalar *y,*sums; 38849bd79ccSDebojyoti Ghosh PetscErrorCode ierr; 38949bd79ccSDebojyoti Ghosh const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 39049bd79ccSDebojyoti Ghosh PetscInt n,i,jrow,j,l,p=b->p,q=b->q,k; 39149bd79ccSDebojyoti Ghosh 39249bd79ccSDebojyoti Ghosh PetscFunctionBegin; 39349bd79ccSDebojyoti Ghosh if (!yy) { 39449bd79ccSDebojyoti Ghosh ierr = VecSet(zz,0.0);CHKERRQ(ierr); 39549bd79ccSDebojyoti Ghosh } else { 39649bd79ccSDebojyoti Ghosh ierr = VecCopy(yy,zz);CHKERRQ(ierr); 39749bd79ccSDebojyoti Ghosh } 39849bd79ccSDebojyoti Ghosh if ((!s) && (!t) && (!b->isTI)) PetscFunctionReturn(0); 39949bd79ccSDebojyoti Ghosh 40049bd79ccSDebojyoti Ghosh ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 40149bd79ccSDebojyoti Ghosh ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 40249bd79ccSDebojyoti Ghosh idx = a->j; 40349bd79ccSDebojyoti Ghosh v = a->a; 40449bd79ccSDebojyoti Ghosh ii = a->i; 40549bd79ccSDebojyoti Ghosh 40649bd79ccSDebojyoti Ghosh if (b->isTI) { 40749bd79ccSDebojyoti Ghosh for (i=0; i<m; i++) { 40849bd79ccSDebojyoti Ghosh jrow = ii[i]; 40949bd79ccSDebojyoti Ghosh n = ii[i+1] - jrow; 41049bd79ccSDebojyoti Ghosh sums = y + p*i; 41149bd79ccSDebojyoti Ghosh for (j=0; j<n; j++) { 41249bd79ccSDebojyoti Ghosh for (k=0; k<p; k++) { 41349bd79ccSDebojyoti Ghosh sums[k] += v[jrow+j]*x[q*idx[jrow+j]+k]; 41449bd79ccSDebojyoti Ghosh } 41549bd79ccSDebojyoti Ghosh } 41649bd79ccSDebojyoti Ghosh } 41749bd79ccSDebojyoti Ghosh } else if (t) { 41849bd79ccSDebojyoti Ghosh for (i=0; i<m; i++) { 41949bd79ccSDebojyoti Ghosh jrow = ii[i]; 42049bd79ccSDebojyoti Ghosh n = ii[i+1] - jrow; 42149bd79ccSDebojyoti Ghosh sums = y + p*i; 42249bd79ccSDebojyoti Ghosh bx = x + q*i; 42349bd79ccSDebojyoti Ghosh for (j=0; j<n; j++) { 42449bd79ccSDebojyoti Ghosh for (k=0; k<p; k++) { 42549bd79ccSDebojyoti Ghosh for (l=0; l<q; l++) { 42649bd79ccSDebojyoti Ghosh sums[k] += v[jrow+j]*t[k+l*p]*x[q*idx[jrow+j]+l]; 42749bd79ccSDebojyoti Ghosh } 42849bd79ccSDebojyoti Ghosh } 42949bd79ccSDebojyoti Ghosh } 43049bd79ccSDebojyoti Ghosh } 43149bd79ccSDebojyoti Ghosh } 43249bd79ccSDebojyoti Ghosh if (s) { 43349bd79ccSDebojyoti Ghosh for (i=0; i<m; i++) { 43449bd79ccSDebojyoti Ghosh jrow = ii[i]; 43549bd79ccSDebojyoti Ghosh n = ii[i+1] - jrow; 43649bd79ccSDebojyoti Ghosh sums = y + p*i; 43749bd79ccSDebojyoti Ghosh bx = x + q*i; 43849bd79ccSDebojyoti Ghosh if (i < b->AIJ->cmap->n) { 43949bd79ccSDebojyoti Ghosh for (j=0; j<q; j++) { 44049bd79ccSDebojyoti Ghosh for (k=0; k<p; k++) { 44149bd79ccSDebojyoti Ghosh sums[k] += s[k+j*p]*bx[j]; 44249bd79ccSDebojyoti Ghosh } 44349bd79ccSDebojyoti Ghosh } 44449bd79ccSDebojyoti Ghosh } 44549bd79ccSDebojyoti Ghosh } 44649bd79ccSDebojyoti Ghosh } 44749bd79ccSDebojyoti Ghosh 44849bd79ccSDebojyoti Ghosh ierr = PetscLogFlops((2.0*p*q-p)*m+2*p*a->nz);CHKERRQ(ierr); 44949bd79ccSDebojyoti Ghosh ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 45049bd79ccSDebojyoti Ghosh ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 45149bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 45249bd79ccSDebojyoti Ghosh } 45349bd79ccSDebojyoti Ghosh 45449bd79ccSDebojyoti Ghosh PetscErrorCode MatMult_SeqKAIJ_N(Mat A,Vec xx,Vec yy) 45549bd79ccSDebojyoti Ghosh { 45649bd79ccSDebojyoti Ghosh PetscErrorCode ierr; 45749bd79ccSDebojyoti Ghosh PetscFunctionBegin; 45849bd79ccSDebojyoti Ghosh ierr = KAIJMultAdd_Seq(A,xx,PETSC_NULL,yy);CHKERRQ(ierr); 45949bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 46049bd79ccSDebojyoti Ghosh } 46149bd79ccSDebojyoti Ghosh 46249bd79ccSDebojyoti Ghosh PetscErrorCode MatMultAdd_SeqKAIJ_N(Mat A,Vec xx,Vec yy,Vec zz) 46349bd79ccSDebojyoti Ghosh { 46449bd79ccSDebojyoti Ghosh PetscErrorCode ierr; 46549bd79ccSDebojyoti Ghosh PetscFunctionBegin; 46649bd79ccSDebojyoti Ghosh ierr = KAIJMultAdd_Seq(A,xx,yy,zz);CHKERRQ(ierr); 46749bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 46849bd79ccSDebojyoti Ghosh } 46949bd79ccSDebojyoti Ghosh 47049bd79ccSDebojyoti Ghosh #include <petsc/private/kernels/blockinvert.h> 47149bd79ccSDebojyoti Ghosh 47249bd79ccSDebojyoti Ghosh PetscErrorCode MatInvertBlockDiagonal_SeqKAIJ_N(Mat A,const PetscScalar **values) 47349bd79ccSDebojyoti Ghosh { 47449bd79ccSDebojyoti Ghosh Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 47549bd79ccSDebojyoti Ghosh Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 47649bd79ccSDebojyoti Ghosh const PetscScalar *S = b->S; 47749bd79ccSDebojyoti Ghosh const PetscScalar *T = b->T; 47849bd79ccSDebojyoti Ghosh const PetscScalar *v = a->a; 47949bd79ccSDebojyoti Ghosh const PetscInt p = b->p, q = b->q, m = b->AIJ->rmap->n, *idx = a->j, *ii = a->i; 48049bd79ccSDebojyoti Ghosh PetscErrorCode ierr; 48149bd79ccSDebojyoti Ghosh PetscInt i,j,*v_pivots,dof,dof2; 48249bd79ccSDebojyoti Ghosh PetscScalar *diag,aval,*v_work; 48349bd79ccSDebojyoti Ghosh 48449bd79ccSDebojyoti Ghosh PetscFunctionBegin; 48549bd79ccSDebojyoti Ghosh if (p != q) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Block size must be square to calculate inverse."); 48631a97b9aSRichard Tran Mills if ((!S) && (!T) && (!b->isTI)) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Cannot invert a zero matrix."); 48749bd79ccSDebojyoti Ghosh 48849bd79ccSDebojyoti Ghosh dof = p; 48949bd79ccSDebojyoti Ghosh dof2 = dof*dof; 49049bd79ccSDebojyoti Ghosh 49149bd79ccSDebojyoti Ghosh if (b->ibdiagvalid) { 49249bd79ccSDebojyoti Ghosh if (values) *values = b->ibdiag; 49349bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 49449bd79ccSDebojyoti Ghosh } 49549bd79ccSDebojyoti Ghosh if (!b->ibdiag) { 496a84f8069SRichard Tran Mills ierr = PetscMalloc1(dof2*m*sizeof(PetscScalar),&b->ibdiag);CHKERRQ(ierr); 49749bd79ccSDebojyoti Ghosh ierr = PetscLogObjectMemory((PetscObject)A,dof2*m*sizeof(PetscScalar));CHKERRQ(ierr); 49849bd79ccSDebojyoti Ghosh } 49949bd79ccSDebojyoti Ghosh if (values) *values = b->ibdiag; 50049bd79ccSDebojyoti Ghosh diag = b->ibdiag; 50149bd79ccSDebojyoti Ghosh 50249bd79ccSDebojyoti Ghosh ierr = PetscMalloc2(dof,&v_work,dof,&v_pivots);CHKERRQ(ierr); 50349bd79ccSDebojyoti Ghosh for (i=0; i<m; i++) { 50449bd79ccSDebojyoti Ghosh if (S) { 50549bd79ccSDebojyoti Ghosh ierr = PetscMemcpy(diag,S,dof2*sizeof(PetscScalar));CHKERRQ(ierr); 50649bd79ccSDebojyoti Ghosh } else { 50749bd79ccSDebojyoti Ghosh ierr = PetscMemzero(diag,dof2*sizeof(PetscScalar));CHKERRQ(ierr); 50849bd79ccSDebojyoti Ghosh } 50949bd79ccSDebojyoti Ghosh if (b->isTI) { 51049bd79ccSDebojyoti Ghosh aval = 0; 51149bd79ccSDebojyoti Ghosh for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j]; 51249bd79ccSDebojyoti Ghosh for (j=0; j<dof; j++) diag[j+dof*j] += aval; 51349bd79ccSDebojyoti Ghosh } else if (T) { 51449bd79ccSDebojyoti Ghosh aval = 0; 51549bd79ccSDebojyoti Ghosh for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j]; 51649bd79ccSDebojyoti Ghosh for (j=0; j<dof2; j++) diag[j] += aval*T[j]; 51749bd79ccSDebojyoti Ghosh } 51849bd79ccSDebojyoti Ghosh ierr = PetscKernel_A_gets_inverse_A(dof,diag,v_pivots,v_work,PETSC_FALSE,NULL);CHKERRQ(ierr); 51949bd79ccSDebojyoti Ghosh diag += dof2; 52049bd79ccSDebojyoti Ghosh } 52149bd79ccSDebojyoti Ghosh ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr); 52249bd79ccSDebojyoti Ghosh 52349bd79ccSDebojyoti Ghosh b->ibdiagvalid = PETSC_TRUE; 52449bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 52549bd79ccSDebojyoti Ghosh } 52649bd79ccSDebojyoti Ghosh 52749bd79ccSDebojyoti Ghosh static PetscErrorCode MatGetDiagonalBlock_MPIKAIJ(Mat A,Mat *B) 52849bd79ccSDebojyoti Ghosh { 52949bd79ccSDebojyoti Ghosh Mat_MPIKAIJ *kaij = (Mat_MPIKAIJ*) A->data; 53049bd79ccSDebojyoti Ghosh 53149bd79ccSDebojyoti Ghosh PetscFunctionBegin; 53249bd79ccSDebojyoti Ghosh *B = kaij->AIJ; 53349bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 53449bd79ccSDebojyoti Ghosh } 53549bd79ccSDebojyoti Ghosh 53649bd79ccSDebojyoti Ghosh PetscErrorCode MatSOR_SeqKAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 53749bd79ccSDebojyoti Ghosh { 53849bd79ccSDebojyoti Ghosh PetscErrorCode ierr; 53949bd79ccSDebojyoti Ghosh Mat_SeqKAIJ *kaij = (Mat_SeqKAIJ*) A->data; 54049bd79ccSDebojyoti Ghosh Mat_SeqAIJ *a = (Mat_SeqAIJ*)kaij->AIJ->data; 54149bd79ccSDebojyoti Ghosh const PetscScalar *aa = a->a, *T = kaij->T, *v; 54249bd79ccSDebojyoti Ghosh const PetscInt m = kaij->AIJ->rmap->n, *ai=a->i, *aj=a->j, p = kaij->p, q = kaij->q, *diag, *vi; 54349bd79ccSDebojyoti Ghosh const PetscScalar *b, *xb, *idiag; 54449bd79ccSDebojyoti Ghosh PetscScalar *x, *work, *workt, *w, *y, *arr, *t, *arrt; 54549bd79ccSDebojyoti Ghosh PetscInt i, j, k, i2, bs, bs2, nz; 54649bd79ccSDebojyoti Ghosh 54749bd79ccSDebojyoti Ghosh PetscFunctionBegin; 54849bd79ccSDebojyoti Ghosh its = its*lits; 54949bd79ccSDebojyoti Ghosh if (flag & SOR_EISENSTAT) SETERRQ (PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 55049bd79ccSDebojyoti Ghosh if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 55149bd79ccSDebojyoti Ghosh if (fshift) SETERRQ (PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 55231a97b9aSRichard Tran Mills if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 55349bd79ccSDebojyoti Ghosh if (p != q) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSOR for KAIJ: Sorry, no support for non-square dense blocks"); 55449bd79ccSDebojyoti Ghosh else {bs = p; bs2 = bs*bs; } 55549bd79ccSDebojyoti Ghosh 55649bd79ccSDebojyoti Ghosh if (!m) PetscFunctionReturn(0); 55749bd79ccSDebojyoti Ghosh 55849bd79ccSDebojyoti Ghosh if (!kaij->ibdiagvalid) { ierr = MatInvertBlockDiagonal_SeqKAIJ_N(A,NULL);CHKERRQ(ierr); } 55949bd79ccSDebojyoti Ghosh idiag = kaij->ibdiag; 56049bd79ccSDebojyoti Ghosh diag = a->diag; 56149bd79ccSDebojyoti Ghosh 56249bd79ccSDebojyoti Ghosh if (!kaij->sor.setup) { 56349bd79ccSDebojyoti Ghosh ierr = PetscMalloc5(bs,&kaij->sor.w,bs,&kaij->sor.y,m*bs,&kaij->sor.work,m*bs,&kaij->sor.t,m*bs2,&kaij->sor.arr);CHKERRQ(ierr); 56449bd79ccSDebojyoti Ghosh kaij->sor.setup = PETSC_TRUE; 56549bd79ccSDebojyoti Ghosh } 56649bd79ccSDebojyoti Ghosh y = kaij->sor.y; 56749bd79ccSDebojyoti Ghosh w = kaij->sor.w; 56849bd79ccSDebojyoti Ghosh work = kaij->sor.work; 56949bd79ccSDebojyoti Ghosh t = kaij->sor.t; 57049bd79ccSDebojyoti Ghosh arr = kaij->sor.arr; 57149bd79ccSDebojyoti Ghosh 57249bd79ccSDebojyoti Ghosh ierr = VecGetArray(xx,&x); CHKERRQ(ierr); 57349bd79ccSDebojyoti Ghosh ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 57449bd79ccSDebojyoti Ghosh 57549bd79ccSDebojyoti Ghosh if (flag & SOR_ZERO_INITIAL_GUESS) { 57649bd79ccSDebojyoti Ghosh if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 57749bd79ccSDebojyoti Ghosh PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x); /* x[0:bs] <- D^{-1} b[0:bs] */ 57849bd79ccSDebojyoti Ghosh ierr = PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr); 57949bd79ccSDebojyoti Ghosh i2 = bs; 58049bd79ccSDebojyoti Ghosh idiag += bs2; 58149bd79ccSDebojyoti Ghosh for (i=1; i<m; i++) { 58249bd79ccSDebojyoti Ghosh v = aa + ai[i]; 58349bd79ccSDebojyoti Ghosh vi = aj + ai[i]; 58449bd79ccSDebojyoti Ghosh nz = diag[i] - ai[i]; 58549bd79ccSDebojyoti Ghosh 58649bd79ccSDebojyoti Ghosh if (T) { /* b - T (Arow * x) */ 58749bd79ccSDebojyoti Ghosh for (k=0; k<bs; k++) w[k] = 0; 58849bd79ccSDebojyoti Ghosh for (j=0; j<nz; j++) { 58949bd79ccSDebojyoti Ghosh for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k]; 59049bd79ccSDebojyoti Ghosh } 59149bd79ccSDebojyoti Ghosh PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs,w,T,&t[i2]); 59249bd79ccSDebojyoti Ghosh for (k=0; k<bs; k++) t[i2+k] += b[i2+k]; 59349bd79ccSDebojyoti Ghosh } else if (kaij->isTI) { 59449bd79ccSDebojyoti Ghosh for (k=0; k<bs; k++) t[i2+k] = b[i2+k]; 59549bd79ccSDebojyoti Ghosh for (j=0; j<nz; j++) { 59649bd79ccSDebojyoti Ghosh for (k=0; k<bs; k++) t[i2+k] -= v[j] * x[vi[j]*bs+k]; 59749bd79ccSDebojyoti Ghosh } 59849bd79ccSDebojyoti Ghosh } else { 59949bd79ccSDebojyoti Ghosh for (k=0; k<bs; k++) t[i2+k] = b[i2+k]; 60049bd79ccSDebojyoti Ghosh } 60149bd79ccSDebojyoti Ghosh 60249bd79ccSDebojyoti Ghosh PetscKernel_w_gets_Ar_times_v(bs,bs,t+i2,idiag,y); 60349bd79ccSDebojyoti Ghosh for (j=0; j<bs; j++) x[i2+j] = omega * y[j]; 60449bd79ccSDebojyoti Ghosh 60549bd79ccSDebojyoti Ghosh idiag += bs2; 60649bd79ccSDebojyoti Ghosh i2 += bs; 60749bd79ccSDebojyoti Ghosh } 60849bd79ccSDebojyoti Ghosh /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 60949bd79ccSDebojyoti Ghosh ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr); 61049bd79ccSDebojyoti Ghosh xb = t; 61149bd79ccSDebojyoti Ghosh } else xb = b; 61249bd79ccSDebojyoti Ghosh if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 61349bd79ccSDebojyoti Ghosh idiag = kaij->ibdiag+bs2*(m-1); 61449bd79ccSDebojyoti Ghosh i2 = bs * (m-1); 61549bd79ccSDebojyoti Ghosh ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 61649bd79ccSDebojyoti Ghosh PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 61749bd79ccSDebojyoti Ghosh i2 -= bs; 61849bd79ccSDebojyoti Ghosh idiag -= bs2; 61949bd79ccSDebojyoti Ghosh for (i=m-2; i>=0; i--) { 62049bd79ccSDebojyoti Ghosh v = aa + diag[i] + 1 ; 62149bd79ccSDebojyoti Ghosh vi = aj + diag[i] + 1; 62249bd79ccSDebojyoti Ghosh nz = ai[i+1] - diag[i] - 1; 62349bd79ccSDebojyoti Ghosh 62449bd79ccSDebojyoti Ghosh if (T) { /* FIXME: This branch untested */ 62549bd79ccSDebojyoti Ghosh ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 62649bd79ccSDebojyoti Ghosh /* copy all rows of x that are needed into contiguous space */ 62749bd79ccSDebojyoti Ghosh workt = work; 62849bd79ccSDebojyoti Ghosh for (j=0; j<nz; j++) { 62949bd79ccSDebojyoti Ghosh ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 63049bd79ccSDebojyoti Ghosh workt += bs; 63149bd79ccSDebojyoti Ghosh } 63249bd79ccSDebojyoti Ghosh arrt = arr; 63349bd79ccSDebojyoti Ghosh for (j=0; j<nz; j++) { 63449bd79ccSDebojyoti Ghosh ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 63549bd79ccSDebojyoti Ghosh for (k=0; k<bs2; k++) arrt[k] *= v[j]; 63649bd79ccSDebojyoti Ghosh arrt += bs2; 63749bd79ccSDebojyoti Ghosh } 63849bd79ccSDebojyoti Ghosh PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 63949bd79ccSDebojyoti Ghosh } else if (kaij->isTI) { 64049bd79ccSDebojyoti Ghosh for (k=0; k<bs; k++) w[k] = t[i2+k]; 64149bd79ccSDebojyoti Ghosh for (j=0; j<nz; j++) { 64249bd79ccSDebojyoti Ghosh for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k]; 64349bd79ccSDebojyoti Ghosh } 64449bd79ccSDebojyoti Ghosh } 64549bd79ccSDebojyoti Ghosh 64649bd79ccSDebojyoti Ghosh PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); /* RHS incorrect for omega != 1.0 */ 64749bd79ccSDebojyoti Ghosh for (j=0; j<bs; j++) x[i2+j] = (1.0-omega) * x[i2+j] + omega * y[j]; 64849bd79ccSDebojyoti Ghosh 64949bd79ccSDebojyoti Ghosh idiag -= bs2; 65049bd79ccSDebojyoti Ghosh i2 -= bs; 65149bd79ccSDebojyoti Ghosh } 65249bd79ccSDebojyoti Ghosh ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 65349bd79ccSDebojyoti Ghosh } 65449bd79ccSDebojyoti Ghosh its--; 65549bd79ccSDebojyoti Ghosh } 65649bd79ccSDebojyoti Ghosh while (its--) { /* FIXME: This branch not updated */ 65749bd79ccSDebojyoti Ghosh if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 65849bd79ccSDebojyoti Ghosh i2 = 0; 65949bd79ccSDebojyoti Ghosh idiag = kaij->ibdiag; 66049bd79ccSDebojyoti Ghosh for (i=0; i<m; i++) { 66149bd79ccSDebojyoti Ghosh ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 66249bd79ccSDebojyoti Ghosh 66349bd79ccSDebojyoti Ghosh v = aa + ai[i]; 66449bd79ccSDebojyoti Ghosh vi = aj + ai[i]; 66549bd79ccSDebojyoti Ghosh nz = diag[i] - ai[i]; 66649bd79ccSDebojyoti Ghosh workt = work; 66749bd79ccSDebojyoti Ghosh for (j=0; j<nz; j++) { 66849bd79ccSDebojyoti Ghosh ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 66949bd79ccSDebojyoti Ghosh workt += bs; 67049bd79ccSDebojyoti Ghosh } 67149bd79ccSDebojyoti Ghosh arrt = arr; 67249bd79ccSDebojyoti Ghosh if (T) { 67349bd79ccSDebojyoti Ghosh for (j=0; j<nz; j++) { 67449bd79ccSDebojyoti Ghosh ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 67549bd79ccSDebojyoti Ghosh for (k=0; k<bs2; k++) arrt[k] *= v[j]; 67649bd79ccSDebojyoti Ghosh arrt += bs2; 67749bd79ccSDebojyoti Ghosh } 67849bd79ccSDebojyoti Ghosh PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 67949bd79ccSDebojyoti Ghosh } else if (kaij->isTI) { 68049bd79ccSDebojyoti Ghosh for (j=0; j<nz; j++) { 68149bd79ccSDebojyoti Ghosh ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 68249bd79ccSDebojyoti Ghosh for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 68349bd79ccSDebojyoti Ghosh arrt += bs2; 68449bd79ccSDebojyoti Ghosh } 68549bd79ccSDebojyoti Ghosh PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 68649bd79ccSDebojyoti Ghosh } 68749bd79ccSDebojyoti Ghosh ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr); 68849bd79ccSDebojyoti Ghosh 68949bd79ccSDebojyoti Ghosh v = aa + diag[i] + 1; 69049bd79ccSDebojyoti Ghosh vi = aj + diag[i] + 1; 69149bd79ccSDebojyoti Ghosh nz = ai[i+1] - diag[i] - 1; 69249bd79ccSDebojyoti Ghosh workt = work; 69349bd79ccSDebojyoti Ghosh for (j=0; j<nz; j++) { 69449bd79ccSDebojyoti Ghosh ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 69549bd79ccSDebojyoti Ghosh workt += bs; 69649bd79ccSDebojyoti Ghosh } 69749bd79ccSDebojyoti Ghosh arrt = arr; 69849bd79ccSDebojyoti Ghosh if (T) { 69949bd79ccSDebojyoti Ghosh for (j=0; j<nz; j++) { 70049bd79ccSDebojyoti Ghosh ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 70149bd79ccSDebojyoti Ghosh for (k=0; k<bs2; k++) arrt[k] *= v[j]; 70249bd79ccSDebojyoti Ghosh arrt += bs2; 70349bd79ccSDebojyoti Ghosh } 70449bd79ccSDebojyoti Ghosh PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 70549bd79ccSDebojyoti Ghosh } else if (kaij->isTI) { 70649bd79ccSDebojyoti Ghosh for (j=0; j<nz; j++) { 70749bd79ccSDebojyoti Ghosh ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 70849bd79ccSDebojyoti Ghosh for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 70949bd79ccSDebojyoti Ghosh arrt += bs2; 71049bd79ccSDebojyoti Ghosh } 71149bd79ccSDebojyoti Ghosh PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 71249bd79ccSDebojyoti Ghosh } 71349bd79ccSDebojyoti Ghosh 71449bd79ccSDebojyoti Ghosh PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); 71549bd79ccSDebojyoti Ghosh for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j); 71649bd79ccSDebojyoti Ghosh 71749bd79ccSDebojyoti Ghosh idiag += bs2; 71849bd79ccSDebojyoti Ghosh i2 += bs; 71949bd79ccSDebojyoti Ghosh } 72049bd79ccSDebojyoti Ghosh xb = t; 72149bd79ccSDebojyoti Ghosh } 72249bd79ccSDebojyoti Ghosh else xb = b; 72349bd79ccSDebojyoti Ghosh if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 72449bd79ccSDebojyoti Ghosh idiag = kaij->ibdiag+bs2*(m-1); 72549bd79ccSDebojyoti Ghosh i2 = bs * (m-1); 72649bd79ccSDebojyoti Ghosh if (xb == b) { 72749bd79ccSDebojyoti Ghosh for (i=m-1; i>=0; i--) { 72849bd79ccSDebojyoti Ghosh ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 72949bd79ccSDebojyoti Ghosh 73049bd79ccSDebojyoti Ghosh v = aa + ai[i]; 73149bd79ccSDebojyoti Ghosh vi = aj + ai[i]; 73249bd79ccSDebojyoti Ghosh nz = diag[i] - ai[i]; 73349bd79ccSDebojyoti Ghosh workt = work; 73449bd79ccSDebojyoti Ghosh for (j=0; j<nz; j++) { 73549bd79ccSDebojyoti Ghosh ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 73649bd79ccSDebojyoti Ghosh workt += bs; 73749bd79ccSDebojyoti Ghosh } 73849bd79ccSDebojyoti Ghosh arrt = arr; 73949bd79ccSDebojyoti Ghosh if (T) { 74049bd79ccSDebojyoti Ghosh for (j=0; j<nz; j++) { 74149bd79ccSDebojyoti Ghosh ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 74249bd79ccSDebojyoti Ghosh for (k=0; k<bs2; k++) arrt[k] *= v[j]; 74349bd79ccSDebojyoti Ghosh arrt += bs2; 74449bd79ccSDebojyoti Ghosh } 74549bd79ccSDebojyoti Ghosh PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 74649bd79ccSDebojyoti Ghosh } else if (kaij->isTI) { 74749bd79ccSDebojyoti Ghosh for (j=0; j<nz; j++) { 74849bd79ccSDebojyoti Ghosh ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 74949bd79ccSDebojyoti Ghosh for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 75049bd79ccSDebojyoti Ghosh arrt += bs2; 75149bd79ccSDebojyoti Ghosh } 75249bd79ccSDebojyoti Ghosh PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 75349bd79ccSDebojyoti Ghosh } 75449bd79ccSDebojyoti Ghosh 75549bd79ccSDebojyoti Ghosh v = aa + diag[i] + 1; 75649bd79ccSDebojyoti Ghosh vi = aj + diag[i] + 1; 75749bd79ccSDebojyoti Ghosh nz = ai[i+1] - diag[i] - 1; 75849bd79ccSDebojyoti Ghosh workt = work; 75949bd79ccSDebojyoti Ghosh for (j=0; j<nz; j++) { 76049bd79ccSDebojyoti Ghosh ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 76149bd79ccSDebojyoti Ghosh workt += bs; 76249bd79ccSDebojyoti Ghosh } 76349bd79ccSDebojyoti Ghosh arrt = arr; 76449bd79ccSDebojyoti Ghosh if (T) { 76549bd79ccSDebojyoti Ghosh for (j=0; j<nz; j++) { 76649bd79ccSDebojyoti Ghosh ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 76749bd79ccSDebojyoti Ghosh for (k=0; k<bs2; k++) arrt[k] *= v[j]; 76849bd79ccSDebojyoti Ghosh arrt += bs2; 76949bd79ccSDebojyoti Ghosh } 77049bd79ccSDebojyoti Ghosh PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 77149bd79ccSDebojyoti Ghosh } else if (kaij->isTI) { 77249bd79ccSDebojyoti Ghosh for (j=0; j<nz; j++) { 77349bd79ccSDebojyoti Ghosh ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 77449bd79ccSDebojyoti Ghosh for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 77549bd79ccSDebojyoti Ghosh arrt += bs2; 77649bd79ccSDebojyoti Ghosh } 77749bd79ccSDebojyoti Ghosh PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 77849bd79ccSDebojyoti Ghosh } 77949bd79ccSDebojyoti Ghosh 78049bd79ccSDebojyoti Ghosh PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); 78149bd79ccSDebojyoti Ghosh for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j); 78249bd79ccSDebojyoti Ghosh } 78349bd79ccSDebojyoti Ghosh } else { 78449bd79ccSDebojyoti Ghosh for (i=m-1; i>=0; i--) { 78549bd79ccSDebojyoti Ghosh ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 78649bd79ccSDebojyoti Ghosh v = aa + diag[i] + 1; 78749bd79ccSDebojyoti Ghosh vi = aj + diag[i] + 1; 78849bd79ccSDebojyoti Ghosh nz = ai[i+1] - diag[i] - 1; 78949bd79ccSDebojyoti Ghosh workt = work; 79049bd79ccSDebojyoti Ghosh for (j=0; j<nz; j++) { 79149bd79ccSDebojyoti Ghosh ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 79249bd79ccSDebojyoti Ghosh workt += bs; 79349bd79ccSDebojyoti Ghosh } 79449bd79ccSDebojyoti Ghosh arrt = arr; 79549bd79ccSDebojyoti Ghosh if (T) { 79649bd79ccSDebojyoti Ghosh for (j=0; j<nz; j++) { 79749bd79ccSDebojyoti Ghosh ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 79849bd79ccSDebojyoti Ghosh for (k=0; k<bs2; k++) arrt[k] *= v[j]; 79949bd79ccSDebojyoti Ghosh arrt += bs2; 80049bd79ccSDebojyoti Ghosh } 80149bd79ccSDebojyoti Ghosh PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 80249bd79ccSDebojyoti Ghosh } else if (kaij->isTI) { 80349bd79ccSDebojyoti Ghosh for (j=0; j<nz; j++) { 80449bd79ccSDebojyoti Ghosh ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 80549bd79ccSDebojyoti Ghosh for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 80649bd79ccSDebojyoti Ghosh arrt += bs2; 80749bd79ccSDebojyoti Ghosh } 80849bd79ccSDebojyoti Ghosh PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 80949bd79ccSDebojyoti Ghosh } 81049bd79ccSDebojyoti Ghosh PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); 81149bd79ccSDebojyoti Ghosh for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j); 81249bd79ccSDebojyoti Ghosh } 81349bd79ccSDebojyoti Ghosh idiag -= bs2; 81449bd79ccSDebojyoti Ghosh i2 -= bs; 81549bd79ccSDebojyoti Ghosh } 81649bd79ccSDebojyoti Ghosh ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 81749bd79ccSDebojyoti Ghosh } 81849bd79ccSDebojyoti Ghosh } 81949bd79ccSDebojyoti Ghosh 82049bd79ccSDebojyoti Ghosh ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr); 82149bd79ccSDebojyoti Ghosh ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 82249bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 82349bd79ccSDebojyoti Ghosh } 82449bd79ccSDebojyoti Ghosh 82549bd79ccSDebojyoti Ghosh /*===================================================================================*/ 82649bd79ccSDebojyoti Ghosh 82749bd79ccSDebojyoti Ghosh PetscErrorCode KAIJMultAdd_MPI(Mat A,Vec xx,Vec yy,Vec zz) 82849bd79ccSDebojyoti Ghosh { 82949bd79ccSDebojyoti Ghosh Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data; 83049bd79ccSDebojyoti Ghosh PetscErrorCode ierr; 83149bd79ccSDebojyoti Ghosh 83249bd79ccSDebojyoti Ghosh PetscFunctionBegin; 83349bd79ccSDebojyoti Ghosh if (!yy) { 83449bd79ccSDebojyoti Ghosh ierr = VecSet(zz,0.0);CHKERRQ(ierr); 83549bd79ccSDebojyoti Ghosh } else { 83649bd79ccSDebojyoti Ghosh ierr = VecCopy(yy,zz);CHKERRQ(ierr); 83749bd79ccSDebojyoti Ghosh } 83849bd79ccSDebojyoti Ghosh /* start the scatter */ 83949bd79ccSDebojyoti Ghosh ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 84049bd79ccSDebojyoti Ghosh ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,zz,zz);CHKERRQ(ierr); 84149bd79ccSDebojyoti Ghosh ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 84249bd79ccSDebojyoti Ghosh ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr); 84349bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 84449bd79ccSDebojyoti Ghosh } 84549bd79ccSDebojyoti Ghosh 84649bd79ccSDebojyoti Ghosh PetscErrorCode MatMult_MPIKAIJ_dof(Mat A,Vec xx,Vec yy) 84749bd79ccSDebojyoti Ghosh { 84849bd79ccSDebojyoti Ghosh PetscErrorCode ierr; 84949bd79ccSDebojyoti Ghosh PetscFunctionBegin; 85049bd79ccSDebojyoti Ghosh ierr = KAIJMultAdd_MPI(A,xx,PETSC_NULL,yy);CHKERRQ(ierr); 85149bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 85249bd79ccSDebojyoti Ghosh } 85349bd79ccSDebojyoti Ghosh 85449bd79ccSDebojyoti Ghosh PetscErrorCode MatMultAdd_MPIKAIJ_dof(Mat A,Vec xx,Vec yy, Vec zz) 85549bd79ccSDebojyoti Ghosh { 85649bd79ccSDebojyoti Ghosh PetscErrorCode ierr; 85749bd79ccSDebojyoti Ghosh PetscFunctionBegin; 85849bd79ccSDebojyoti Ghosh ierr = KAIJMultAdd_MPI(A,xx,yy,zz);CHKERRQ(ierr); 85949bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 86049bd79ccSDebojyoti Ghosh } 86149bd79ccSDebojyoti Ghosh 86249bd79ccSDebojyoti Ghosh PetscErrorCode MatInvertBlockDiagonal_MPIKAIJ_dof(Mat A,const PetscScalar **values) 86349bd79ccSDebojyoti Ghosh { 86449bd79ccSDebojyoti Ghosh Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data; 86549bd79ccSDebojyoti Ghosh PetscErrorCode ierr; 86649bd79ccSDebojyoti Ghosh 86749bd79ccSDebojyoti Ghosh PetscFunctionBegin; 86849bd79ccSDebojyoti Ghosh ierr = (*b->AIJ->ops->invertblockdiagonal)(b->AIJ,values);CHKERRQ(ierr); 86949bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 87049bd79ccSDebojyoti Ghosh } 87149bd79ccSDebojyoti Ghosh 87249bd79ccSDebojyoti Ghosh /* ----------------------------------------------------------------*/ 87349bd79ccSDebojyoti Ghosh 87449bd79ccSDebojyoti Ghosh PetscErrorCode MatGetRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values) 87549bd79ccSDebojyoti Ghosh { 87649bd79ccSDebojyoti Ghosh Mat_SeqKAIJ *b = (Mat_SeqKAIJ*) A->data; 8771ca5ffdbSRichard Tran Mills PetscErrorCode diag = PETSC_FALSE; 8781ca5ffdbSRichard Tran Mills PetscErrorCode ierr; 87949bd79ccSDebojyoti Ghosh PetscInt nzaij,nz,*colsaij,*idx,i,j,p=b->p,q=b->q,r=row/p,s=row%p,c; 88049bd79ccSDebojyoti Ghosh PetscScalar *vaij,*v,*S=b->S,*T=b->T; 88149bd79ccSDebojyoti Ghosh 88249bd79ccSDebojyoti Ghosh PetscFunctionBegin; 88349bd79ccSDebojyoti Ghosh if (b->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 88449bd79ccSDebojyoti Ghosh b->getrowactive = PETSC_TRUE; 88549bd79ccSDebojyoti Ghosh if (row < 0 || row >= A->rmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range",row); 88649bd79ccSDebojyoti Ghosh 88749bd79ccSDebojyoti Ghosh if ((!S) && (!T) && (!b->isTI)) { 88849bd79ccSDebojyoti Ghosh if (ncols) *ncols = 0; 88949bd79ccSDebojyoti Ghosh if (cols) *cols = NULL; 89049bd79ccSDebojyoti Ghosh if (values) *values = NULL; 89149bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 89249bd79ccSDebojyoti Ghosh } 89349bd79ccSDebojyoti Ghosh 89449bd79ccSDebojyoti Ghosh if (T || b->isTI) { 89549bd79ccSDebojyoti Ghosh ierr = MatGetRow_SeqAIJ(b->AIJ,r,&nzaij,&colsaij,&vaij);CHKERRQ(ierr); 89649bd79ccSDebojyoti Ghosh c = nzaij; 89749bd79ccSDebojyoti Ghosh for (i=0; i<nzaij; i++) { 89849bd79ccSDebojyoti Ghosh /* check if this row contains a diagonal entry */ 89949bd79ccSDebojyoti Ghosh if (colsaij[i] == r) { 90049bd79ccSDebojyoti Ghosh diag = PETSC_TRUE; 90149bd79ccSDebojyoti Ghosh c = i; 90249bd79ccSDebojyoti Ghosh } 90349bd79ccSDebojyoti Ghosh } 90449bd79ccSDebojyoti Ghosh } else nzaij = c = 0; 90549bd79ccSDebojyoti Ghosh 90649bd79ccSDebojyoti Ghosh /* calculate size of row */ 90749bd79ccSDebojyoti Ghosh nz = 0; 90849bd79ccSDebojyoti Ghosh if (S) nz += q; 90949bd79ccSDebojyoti Ghosh if (T || b->isTI) nz += (diag && S ? (nzaij-1)*q : nzaij*q); 91049bd79ccSDebojyoti Ghosh 91149bd79ccSDebojyoti Ghosh if (cols || values) { 91249bd79ccSDebojyoti Ghosh ierr = PetscMalloc2(nz,&idx,nz,&v);CHKERRQ(ierr); 91338822f9dSRichard Tran Mills for (i=0; i<q; i++) { 91438822f9dSRichard Tran Mills /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */ 91538822f9dSRichard Tran Mills v[i] = 0.0; 91638822f9dSRichard Tran Mills } 91749bd79ccSDebojyoti Ghosh if (b->isTI) { 91849bd79ccSDebojyoti Ghosh for (i=0; i<nzaij; i++) { 91949bd79ccSDebojyoti Ghosh for (j=0; j<q; j++) { 92049bd79ccSDebojyoti Ghosh idx[i*q+j] = colsaij[i]*q+j; 92149bd79ccSDebojyoti Ghosh v[i*q+j] = (j==s ? vaij[i] : 0); 92249bd79ccSDebojyoti Ghosh } 92349bd79ccSDebojyoti Ghosh } 92449bd79ccSDebojyoti Ghosh } else if (T) { 92549bd79ccSDebojyoti Ghosh for (i=0; i<nzaij; i++) { 92649bd79ccSDebojyoti Ghosh for (j=0; j<q; j++) { 92749bd79ccSDebojyoti Ghosh idx[i*q+j] = colsaij[i]*q+j; 92849bd79ccSDebojyoti Ghosh v[i*q+j] = vaij[i]*T[s+j*p]; 92949bd79ccSDebojyoti Ghosh } 93049bd79ccSDebojyoti Ghosh } 93149bd79ccSDebojyoti Ghosh } 93249bd79ccSDebojyoti Ghosh if (S) { 93349bd79ccSDebojyoti Ghosh for (j=0; j<q; j++) { 93449bd79ccSDebojyoti Ghosh idx[c*q+j] = r*q+j; 93549bd79ccSDebojyoti Ghosh v[c*q+j] += S[s+j*p]; 93649bd79ccSDebojyoti Ghosh } 93749bd79ccSDebojyoti Ghosh } 93849bd79ccSDebojyoti Ghosh } 93949bd79ccSDebojyoti Ghosh 94049bd79ccSDebojyoti Ghosh if (ncols) *ncols = nz; 94149bd79ccSDebojyoti Ghosh if (cols) *cols = idx; 94249bd79ccSDebojyoti Ghosh if (values) *values = v; 94349bd79ccSDebojyoti Ghosh 94449bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 94549bd79ccSDebojyoti Ghosh } 94649bd79ccSDebojyoti Ghosh 94749bd79ccSDebojyoti Ghosh PetscErrorCode MatRestoreRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 94849bd79ccSDebojyoti Ghosh { 94949bd79ccSDebojyoti Ghosh PetscErrorCode ierr; 95049bd79ccSDebojyoti Ghosh PetscFunctionBegin; 95149bd79ccSDebojyoti Ghosh ierr = PetscFree2(*idx,*v);CHKERRQ(ierr); 95249bd79ccSDebojyoti Ghosh ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE; 95349bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 95449bd79ccSDebojyoti Ghosh } 95549bd79ccSDebojyoti Ghosh 95649bd79ccSDebojyoti Ghosh PetscErrorCode MatGetRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values) 95749bd79ccSDebojyoti Ghosh { 95849bd79ccSDebojyoti Ghosh Mat_MPIKAIJ *b = (Mat_MPIKAIJ*) A->data; 95949bd79ccSDebojyoti Ghosh Mat MatAIJ = ((Mat_SeqKAIJ*)b->AIJ->data)->AIJ; 96049bd79ccSDebojyoti Ghosh Mat MatOAIJ = ((Mat_SeqKAIJ*)b->OAIJ->data)->AIJ; 96149bd79ccSDebojyoti Ghosh Mat AIJ = b->A; 962fc64b2cfSRichard Tran Mills PetscBool diag = PETSC_FALSE; 96349bd79ccSDebojyoti Ghosh PetscErrorCode ierr; 96449bd79ccSDebojyoti Ghosh const PetscInt rstart=A->rmap->rstart,rend=A->rmap->rend,p=b->p,q=b->q,*garray; 96549bd79ccSDebojyoti Ghosh PetscInt nz,*idx,ncolsaij,ncolsoaij,*colsaij,*colsoaij,r,s,c,i,j,lrow; 96649bd79ccSDebojyoti Ghosh PetscScalar *v,*vals,*ovals,*S=b->S,*T=b->T; 96749bd79ccSDebojyoti Ghosh 96849bd79ccSDebojyoti Ghosh PetscFunctionBegin; 96949bd79ccSDebojyoti Ghosh if (b->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 97049bd79ccSDebojyoti Ghosh b->getrowactive = PETSC_TRUE; 97149bd79ccSDebojyoti Ghosh if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 97249bd79ccSDebojyoti Ghosh lrow = row - rstart; 97349bd79ccSDebojyoti Ghosh 97449bd79ccSDebojyoti Ghosh if ((!S) && (!T) && (!b->isTI)) { 97549bd79ccSDebojyoti Ghosh if (ncols) *ncols = 0; 97649bd79ccSDebojyoti Ghosh if (cols) *cols = NULL; 97749bd79ccSDebojyoti Ghosh if (values) *values = NULL; 97849bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 97949bd79ccSDebojyoti Ghosh } 98049bd79ccSDebojyoti Ghosh 98149bd79ccSDebojyoti Ghosh r = lrow/p; 98249bd79ccSDebojyoti Ghosh s = lrow%p; 98349bd79ccSDebojyoti Ghosh 98449bd79ccSDebojyoti Ghosh if (T || b->isTI) { 98549bd79ccSDebojyoti Ghosh ierr = MatMPIAIJGetSeqAIJ(AIJ,NULL,NULL,&garray); 98649bd79ccSDebojyoti Ghosh ierr = MatGetRow_SeqAIJ(MatAIJ,lrow/p,&ncolsaij,&colsaij,&vals);CHKERRQ(ierr); 98749bd79ccSDebojyoti Ghosh ierr = MatGetRow_SeqAIJ(MatOAIJ,lrow/p,&ncolsoaij,&colsoaij,&ovals);CHKERRQ(ierr); 98849bd79ccSDebojyoti Ghosh 98949bd79ccSDebojyoti Ghosh c = ncolsaij + ncolsoaij; 99049bd79ccSDebojyoti Ghosh for (i=0; i<ncolsaij; i++) { 99149bd79ccSDebojyoti Ghosh /* check if this row contains a diagonal entry */ 99249bd79ccSDebojyoti Ghosh if (colsaij[i] == r) { 99349bd79ccSDebojyoti Ghosh diag = PETSC_TRUE; 99449bd79ccSDebojyoti Ghosh c = i; 99549bd79ccSDebojyoti Ghosh } 99649bd79ccSDebojyoti Ghosh } 99749bd79ccSDebojyoti Ghosh } else c = 0; 99849bd79ccSDebojyoti Ghosh 99949bd79ccSDebojyoti Ghosh /* calculate size of row */ 100049bd79ccSDebojyoti Ghosh nz = 0; 100149bd79ccSDebojyoti Ghosh if (S) nz += q; 100249bd79ccSDebojyoti Ghosh if (T || b->isTI) nz += (diag && S ? (ncolsaij+ncolsoaij-1)*q : (ncolsaij+ncolsoaij)*q); 100349bd79ccSDebojyoti Ghosh 100449bd79ccSDebojyoti Ghosh if (cols || values) { 100549bd79ccSDebojyoti Ghosh ierr = PetscMalloc2(nz,&idx,nz,&v);CHKERRQ(ierr); 1006a437a796SRichard Tran Mills for (i=0; i<q; i++) { 1007a437a796SRichard Tran Mills /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */ 1008a437a796SRichard Tran Mills v[i] = 0.0; 1009a437a796SRichard Tran Mills } 101049bd79ccSDebojyoti Ghosh if (b->isTI) { 101149bd79ccSDebojyoti Ghosh for (i=0; i<ncolsaij; i++) { 101249bd79ccSDebojyoti Ghosh for (j=0; j<q; j++) { 101349bd79ccSDebojyoti Ghosh idx[i*q+j] = (colsaij[i]+rstart/p)*q+j; 101449bd79ccSDebojyoti Ghosh v[i*q+j] = (j==s ? vals[i] : 0.0); 101549bd79ccSDebojyoti Ghosh } 101649bd79ccSDebojyoti Ghosh } 101749bd79ccSDebojyoti Ghosh for (i=0; i<ncolsoaij; i++) { 101849bd79ccSDebojyoti Ghosh for (j=0; j<q; j++) { 101949bd79ccSDebojyoti Ghosh idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j; 102049bd79ccSDebojyoti Ghosh v[(i+ncolsaij)*q+j] = (j==s ? ovals[i]: 0.0); 102149bd79ccSDebojyoti Ghosh } 102249bd79ccSDebojyoti Ghosh } 102349bd79ccSDebojyoti Ghosh } else if (T) { 102449bd79ccSDebojyoti Ghosh for (i=0; i<ncolsaij; i++) { 102549bd79ccSDebojyoti Ghosh for (j=0; j<q; j++) { 102649bd79ccSDebojyoti Ghosh idx[i*q+j] = (colsaij[i]+rstart/p)*q+j; 102749bd79ccSDebojyoti Ghosh v[i*q+j] = vals[i]*T[s+j*p]; 102849bd79ccSDebojyoti Ghosh } 102949bd79ccSDebojyoti Ghosh } 103049bd79ccSDebojyoti Ghosh for (i=0; i<ncolsoaij; i++) { 103149bd79ccSDebojyoti Ghosh for (j=0; j<q; j++) { 103249bd79ccSDebojyoti Ghosh idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j; 103349bd79ccSDebojyoti Ghosh v[(i+ncolsaij)*q+j] = ovals[i]*T[s+j*p]; 103449bd79ccSDebojyoti Ghosh } 103549bd79ccSDebojyoti Ghosh } 103649bd79ccSDebojyoti Ghosh } 103749bd79ccSDebojyoti Ghosh if (S) { 103849bd79ccSDebojyoti Ghosh for (j=0; j<q; j++) { 103949bd79ccSDebojyoti Ghosh idx[c*q+j] = (r+rstart/p)*q+j; 104049bd79ccSDebojyoti Ghosh v[c*q+j] += S[s+j*p]; 104149bd79ccSDebojyoti Ghosh } 104249bd79ccSDebojyoti Ghosh } 104349bd79ccSDebojyoti Ghosh } 104449bd79ccSDebojyoti Ghosh 104549bd79ccSDebojyoti Ghosh if (ncols) *ncols = nz; 104649bd79ccSDebojyoti Ghosh if (cols) *cols = idx; 104749bd79ccSDebojyoti Ghosh if (values) *values = v; 104849bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 104949bd79ccSDebojyoti Ghosh } 105049bd79ccSDebojyoti Ghosh 105149bd79ccSDebojyoti Ghosh PetscErrorCode MatRestoreRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 105249bd79ccSDebojyoti Ghosh { 105349bd79ccSDebojyoti Ghosh PetscErrorCode ierr; 105449bd79ccSDebojyoti Ghosh PetscFunctionBegin; 105549bd79ccSDebojyoti Ghosh ierr = PetscFree2(*idx,*v);CHKERRQ(ierr); 105649bd79ccSDebojyoti Ghosh ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE; 105749bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 105849bd79ccSDebojyoti Ghosh } 105949bd79ccSDebojyoti Ghosh 106049bd79ccSDebojyoti Ghosh PetscErrorCode MatCreateSubMatrix_KAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat) 106149bd79ccSDebojyoti Ghosh { 106249bd79ccSDebojyoti Ghosh PetscErrorCode ierr; 106349bd79ccSDebojyoti Ghosh Mat A; 106449bd79ccSDebojyoti Ghosh 106549bd79ccSDebojyoti Ghosh PetscFunctionBegin; 106649bd79ccSDebojyoti Ghosh ierr = MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr); 106749bd79ccSDebojyoti Ghosh ierr = MatCreateSubMatrix(A,isrow,iscol,cll,newmat);CHKERRQ(ierr); 106849bd79ccSDebojyoti Ghosh ierr = MatDestroy(&A);CHKERRQ(ierr); 106949bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 107049bd79ccSDebojyoti Ghosh } 107149bd79ccSDebojyoti Ghosh 107249bd79ccSDebojyoti Ghosh /* ---------------------------------------------------------------------------------- */ 107349bd79ccSDebojyoti Ghosh /*@C 107449bd79ccSDebojyoti Ghosh MatCreateKAIJ - Creates a matrix type to be used for matrices of the following form: 107549bd79ccSDebojyoti Ghosh 107649bd79ccSDebojyoti Ghosh [I \otimes S + A \otimes T] 107749bd79ccSDebojyoti Ghosh 107849bd79ccSDebojyoti Ghosh where 107949bd79ccSDebojyoti Ghosh S is a dense (p \times q) matrix 108049bd79ccSDebojyoti Ghosh T is a dense (p \times q) matrix 108149bd79ccSDebojyoti Ghosh A is an AIJ (n \times n) matrix 108249bd79ccSDebojyoti Ghosh I is the identity matrix 108349bd79ccSDebojyoti Ghosh The resulting matrix is (np \times nq) 108449bd79ccSDebojyoti Ghosh 108549bd79ccSDebojyoti Ghosh The matrix type is based on MATSEQAIJ for a sequential matrix A, and MATMPIAIJ for a distributed matrix A. 108649bd79ccSDebojyoti Ghosh S is always stored independently on all processes as a PetscScalar array in column-major format. 108749bd79ccSDebojyoti Ghosh 108849bd79ccSDebojyoti Ghosh Collective 108949bd79ccSDebojyoti Ghosh 109049bd79ccSDebojyoti Ghosh Input Parameters: 109149bd79ccSDebojyoti Ghosh + A - the AIJ matrix 109249bd79ccSDebojyoti Ghosh . S - the S matrix, stored as a PetscScalar array (column-major) 109349bd79ccSDebojyoti Ghosh . T - the T matrix, stored as a PetscScalar array (column-major) 109449bd79ccSDebojyoti Ghosh . p - number of rows in S and T 109549bd79ccSDebojyoti Ghosh - q - number of columns in S and T 109649bd79ccSDebojyoti Ghosh 109749bd79ccSDebojyoti Ghosh Output Parameter: 109849bd79ccSDebojyoti Ghosh . kaij - the new KAIJ matrix 109949bd79ccSDebojyoti Ghosh 110049bd79ccSDebojyoti Ghosh Operations provided: 110149bd79ccSDebojyoti Ghosh + MatMult 110249bd79ccSDebojyoti Ghosh . MatMultAdd 110349bd79ccSDebojyoti Ghosh . MatInvertBlockDiagonal 110449bd79ccSDebojyoti Ghosh - MatView 110549bd79ccSDebojyoti Ghosh 110649bd79ccSDebojyoti Ghosh Level: advanced 110749bd79ccSDebojyoti Ghosh 11080567c835SRichard Tran Mills .seealso: MatKAIJSetAIJ(), MatKAIJSetS(), MatKAIJSetT(), MatKAIJGetAIJ(), MatKAIJGetS(), MatKAIJGetT(), MATKAIJ 110949bd79ccSDebojyoti Ghosh @*/ 111049bd79ccSDebojyoti Ghosh PetscErrorCode MatCreateKAIJ(Mat A,PetscInt p,PetscInt q,const PetscScalar S[],const PetscScalar T[],Mat *kaij) 111149bd79ccSDebojyoti Ghosh { 111249bd79ccSDebojyoti Ghosh PetscErrorCode ierr; 111349bd79ccSDebojyoti Ghosh PetscMPIInt size; 111449bd79ccSDebojyoti Ghosh 111549bd79ccSDebojyoti Ghosh PetscFunctionBegin; 11160567c835SRichard Tran Mills ierr = MatCreate(PetscObjectComm((PetscObject)A),kaij);CHKERRQ(ierr); 111749bd79ccSDebojyoti Ghosh ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 111849bd79ccSDebojyoti Ghosh if (size == 1) { 11190567c835SRichard Tran Mills ierr = MatSetType(*kaij,MATSEQKAIJ);CHKERRQ(ierr); 112049bd79ccSDebojyoti Ghosh } else { 11210567c835SRichard Tran Mills ierr = MatSetType(*kaij,MATMPIKAIJ);CHKERRQ(ierr); 112249bd79ccSDebojyoti Ghosh } 11230567c835SRichard Tran Mills ierr = MatKAIJSetAIJ(*kaij,A);CHKERRQ(ierr); 11240567c835SRichard Tran Mills ierr = MatKAIJSetS(*kaij,p,q,S);CHKERRQ(ierr); 11250567c835SRichard Tran Mills ierr = MatKAIJSetT(*kaij,p,q,T);CHKERRQ(ierr); 11260567c835SRichard Tran Mills ierr = MatSetUp(*kaij); 11270567c835SRichard Tran Mills PetscFunctionReturn(0); 11280567c835SRichard Tran Mills } 112949bd79ccSDebojyoti Ghosh 11300567c835SRichard Tran Mills /*MC 11310567c835SRichard Tran Mills MATKAIJ - MATKAIJ = "kaij" - A matrix type to be used to evaluate matrices of the following form: 11320567c835SRichard Tran Mills 11330567c835SRichard Tran Mills [I \otimes S + A \otimes T] 11340567c835SRichard Tran Mills 11350567c835SRichard Tran Mills where 11360567c835SRichard Tran Mills S is a dense (p \times q) matrix 11370567c835SRichard Tran Mills T is a dense (p \times q) matrix 11380567c835SRichard Tran Mills A is an AIJ (n \times n) matrix 11390567c835SRichard Tran Mills I is the identity matrix 11400567c835SRichard Tran Mills The resulting matrix is (np \times nq) 11410567c835SRichard Tran Mills 11420567c835SRichard Tran Mills The matrix type is based on MATSEQAIJ for a sequential matrix A, and MATMPIAIJ for a distributed matrix A. 11430567c835SRichard Tran Mills S and T are always stored independently on all processes as a PetscScalar array in column-major format. 11440567c835SRichard Tran Mills 11450567c835SRichard Tran Mills Operations provided: 11460567c835SRichard Tran Mills + MatMult 11470567c835SRichard Tran Mills . MatMultAdd 11480567c835SRichard Tran Mills . MatInvertBlockDiagonal 11490567c835SRichard Tran Mills - MatView 11500567c835SRichard Tran Mills 11510567c835SRichard Tran Mills Level: advanced 11520567c835SRichard Tran Mills 11530567c835SRichard Tran Mills .seealso: MatKAIJSetAIJ(), MatKAIJSetS(), MatKAIJSetT(), MatKAIJGetAIJ(), MatKAIJGetS(), MatKAIJGetT(), MatCreateKAIJ() 11540567c835SRichard Tran Mills M*/ 11550567c835SRichard Tran Mills 11560567c835SRichard Tran Mills PETSC_EXTERN PetscErrorCode MatCreate_KAIJ(Mat A) 11570567c835SRichard Tran Mills { 11580567c835SRichard Tran Mills PetscErrorCode ierr; 11590567c835SRichard Tran Mills Mat_MPIKAIJ *b; 11600567c835SRichard Tran Mills PetscMPIInt size; 11610567c835SRichard Tran Mills 11620567c835SRichard Tran Mills PetscFunctionBegin; 11630567c835SRichard Tran Mills ierr = PetscNewLog(A,&b);CHKERRQ(ierr); 11640567c835SRichard Tran Mills A->data = (void*)b; 11650567c835SRichard Tran Mills 11660567c835SRichard Tran Mills ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 11670567c835SRichard Tran Mills 11680567c835SRichard Tran Mills A->ops->setup = MatSetUp_KAIJ; 11690567c835SRichard Tran Mills 11700567c835SRichard Tran Mills b->w = 0; 11710567c835SRichard Tran Mills ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 11720567c835SRichard Tran Mills if (size == 1) { 11730567c835SRichard Tran Mills ierr = PetscObjectChangeTypeName((PetscObject)A,MATSEQKAIJ);CHKERRQ(ierr); 11740567c835SRichard Tran Mills 11750567c835SRichard Tran Mills A->ops->setup = MatSetUp_KAIJ; 11760567c835SRichard Tran Mills A->ops->destroy = MatDestroy_SeqKAIJ; 11770567c835SRichard Tran Mills A->ops->view = MatView_SeqKAIJ; 11780567c835SRichard Tran Mills A->ops->mult = MatMult_SeqKAIJ_N; 11790567c835SRichard Tran Mills A->ops->multadd = MatMultAdd_SeqKAIJ_N; 11800567c835SRichard Tran Mills A->ops->invertblockdiagonal = MatInvertBlockDiagonal_SeqKAIJ_N; 11810567c835SRichard Tran Mills A->ops->getrow = MatGetRow_SeqKAIJ; 11820567c835SRichard Tran Mills A->ops->restorerow = MatRestoreRow_SeqKAIJ; 11830567c835SRichard Tran Mills A->ops->sor = MatSOR_SeqKAIJ; 11840567c835SRichard Tran Mills } else { 11850567c835SRichard Tran Mills ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIKAIJ);CHKERRQ(ierr); 11860567c835SRichard Tran Mills A->ops->setup = MatSetUp_KAIJ; 11870567c835SRichard Tran Mills A->ops->destroy = MatDestroy_MPIKAIJ; 11880567c835SRichard Tran Mills A->ops->view = MatView_MPIKAIJ; 11890567c835SRichard Tran Mills A->ops->mult = MatMult_MPIKAIJ_dof; 11900567c835SRichard Tran Mills A->ops->multadd = MatMultAdd_MPIKAIJ_dof; 11910567c835SRichard Tran Mills A->ops->invertblockdiagonal = MatInvertBlockDiagonal_MPIKAIJ_dof; 11920567c835SRichard Tran Mills A->ops->getrow = MatGetRow_MPIKAIJ; 11930567c835SRichard Tran Mills A->ops->restorerow = MatRestoreRow_MPIKAIJ; 11940567c835SRichard Tran Mills ierr = PetscObjectComposeFunction((PetscObject)A,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIKAIJ);CHKERRQ(ierr); 11950567c835SRichard Tran Mills } 11960567c835SRichard Tran Mills A->ops->createsubmatrix = MatCreateSubMatrix_KAIJ; 119749bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 119849bd79ccSDebojyoti Ghosh } 1199