149bd79ccSDebojyoti Ghosh 249bd79ccSDebojyoti Ghosh /* 349bd79ccSDebojyoti Ghosh Defines the basic matrix operations for the KAIJ matrix storage format. 449bd79ccSDebojyoti Ghosh This format is used to evaluate matrices of the form: 549bd79ccSDebojyoti Ghosh 649bd79ccSDebojyoti Ghosh [I \otimes S + A \otimes T] 749bd79ccSDebojyoti Ghosh 849bd79ccSDebojyoti Ghosh where 949bd79ccSDebojyoti Ghosh S is a dense (p \times q) matrix 1049bd79ccSDebojyoti Ghosh T is a dense (p \times q) matrix 1149bd79ccSDebojyoti Ghosh A is an AIJ (n \times n) matrix 1249bd79ccSDebojyoti Ghosh I is the identity matrix 1349bd79ccSDebojyoti Ghosh 1449bd79ccSDebojyoti Ghosh The resulting matrix is (np \times nq) 1549bd79ccSDebojyoti Ghosh 1649bd79ccSDebojyoti Ghosh We provide: 1749bd79ccSDebojyoti Ghosh MatMult() 1849bd79ccSDebojyoti Ghosh MatMultAdd() 1949bd79ccSDebojyoti Ghosh MatInvertBlockDiagonal() 2049bd79ccSDebojyoti Ghosh and 2149bd79ccSDebojyoti Ghosh MatCreateKAIJ(Mat,PetscInt,PetscInt,const PetscScalar[],const PetscScalar[],Mat*) 2249bd79ccSDebojyoti Ghosh 2349bd79ccSDebojyoti Ghosh This single directory handles both the sequential and parallel codes 2449bd79ccSDebojyoti Ghosh */ 2549bd79ccSDebojyoti Ghosh 2649bd79ccSDebojyoti Ghosh #include <../src/mat/impls/kaij/kaij.h> /*I "petscmat.h" I*/ 2749bd79ccSDebojyoti Ghosh #include <../src/mat/utils/freespace.h> 2849bd79ccSDebojyoti Ghosh #include <petsc/private/vecimpl.h> 2949bd79ccSDebojyoti Ghosh 3049bd79ccSDebojyoti Ghosh /*@C 3149bd79ccSDebojyoti Ghosh MatKAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the KAIJ matrix 3249bd79ccSDebojyoti Ghosh 3349bd79ccSDebojyoti Ghosh Not Collective, but if the KAIJ matrix is parallel, the AIJ matrix is also parallel 3449bd79ccSDebojyoti Ghosh 3549bd79ccSDebojyoti Ghosh Input Parameter: 3649bd79ccSDebojyoti Ghosh . A - the KAIJ matrix 3749bd79ccSDebojyoti Ghosh 3849bd79ccSDebojyoti Ghosh Output Parameter: 3949bd79ccSDebojyoti Ghosh . B - the AIJ matrix 4049bd79ccSDebojyoti Ghosh 4149bd79ccSDebojyoti Ghosh Level: advanced 4249bd79ccSDebojyoti Ghosh 4349bd79ccSDebojyoti Ghosh Notes: The reference count on the AIJ matrix is not increased so you should not destroy it. 4449bd79ccSDebojyoti Ghosh 45db781477SPatrick Sanan .seealso: `MatCreateKAIJ()` 4649bd79ccSDebojyoti Ghosh @*/ 47*9371c9d4SSatish Balay PetscErrorCode MatKAIJGetAIJ(Mat A, Mat *B) { 4849bd79ccSDebojyoti Ghosh PetscBool ismpikaij, isseqkaij; 4949bd79ccSDebojyoti Ghosh 5049bd79ccSDebojyoti Ghosh PetscFunctionBegin; 519566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIKAIJ, &ismpikaij)); 529566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQKAIJ, &isseqkaij)); 5349bd79ccSDebojyoti Ghosh if (ismpikaij) { 5449bd79ccSDebojyoti Ghosh Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data; 5549bd79ccSDebojyoti Ghosh 5649bd79ccSDebojyoti Ghosh *B = b->A; 5749bd79ccSDebojyoti Ghosh } else if (isseqkaij) { 5849bd79ccSDebojyoti Ghosh Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data; 5949bd79ccSDebojyoti Ghosh 6049bd79ccSDebojyoti Ghosh *B = b->AIJ; 61b04351cbSRichard Tran Mills } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix passed in is not of type KAIJ"); 6249bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 6349bd79ccSDebojyoti Ghosh } 6449bd79ccSDebojyoti Ghosh 6549bd79ccSDebojyoti Ghosh /*@C 6649bd79ccSDebojyoti Ghosh MatKAIJGetS - Get the S matrix describing the shift action of the KAIJ matrix 6749bd79ccSDebojyoti Ghosh 680567c835SRichard Tran Mills Not Collective; the entire S is stored and returned independently on all processes. 6949bd79ccSDebojyoti Ghosh 7049bd79ccSDebojyoti Ghosh Input Parameter: 7149bd79ccSDebojyoti Ghosh . A - the KAIJ matrix 7249bd79ccSDebojyoti Ghosh 73a5b5c723SRichard Tran Mills Output Parameters: 74a5b5c723SRichard Tran Mills + m - the number of rows in S 75a5b5c723SRichard Tran Mills . n - the number of columns in S 76a5b5c723SRichard Tran Mills - S - the S matrix, in form of a scalar array in column-major format 7749bd79ccSDebojyoti Ghosh 78a5b5c723SRichard Tran Mills Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired) 7931a97b9aSRichard Tran Mills 8049bd79ccSDebojyoti Ghosh Level: advanced 8149bd79ccSDebojyoti Ghosh 82db781477SPatrick Sanan .seealso: `MatCreateKAIJ()`, `MatGetBlockSizes()` 8349bd79ccSDebojyoti Ghosh @*/ 84*9371c9d4SSatish Balay PetscErrorCode MatKAIJGetS(Mat A, PetscInt *m, PetscInt *n, PetscScalar **S) { 8549bd79ccSDebojyoti Ghosh Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data; 8649bd79ccSDebojyoti Ghosh PetscFunctionBegin; 87a5b5c723SRichard Tran Mills if (m) *m = b->p; 88a5b5c723SRichard Tran Mills if (n) *n = b->q; 89a5b5c723SRichard Tran Mills if (S) *S = b->S; 90a5b5c723SRichard Tran Mills PetscFunctionReturn(0); 91a5b5c723SRichard Tran Mills } 92a5b5c723SRichard Tran Mills 93a5b5c723SRichard Tran Mills /*@C 94a5b5c723SRichard Tran Mills MatKAIJGetSRead - Get a read-only pointer to the S matrix describing the shift action of the KAIJ matrix 95a5b5c723SRichard Tran Mills 96a5b5c723SRichard Tran Mills Not Collective; the entire S is stored and returned independently on all processes. 97a5b5c723SRichard Tran Mills 98a5b5c723SRichard Tran Mills Input Parameter: 99a5b5c723SRichard Tran Mills . A - the KAIJ matrix 100a5b5c723SRichard Tran Mills 101a5b5c723SRichard Tran Mills Output Parameters: 102a5b5c723SRichard Tran Mills + m - the number of rows in S 103a5b5c723SRichard Tran Mills . n - the number of columns in S 104a5b5c723SRichard Tran Mills - S - the S matrix, in form of a scalar array in column-major format 105a5b5c723SRichard Tran Mills 106a5b5c723SRichard Tran Mills Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired) 107a5b5c723SRichard Tran Mills 108a5b5c723SRichard Tran Mills Level: advanced 109a5b5c723SRichard Tran Mills 110db781477SPatrick Sanan .seealso: `MatCreateKAIJ()`, `MatGetBlockSizes()` 111a5b5c723SRichard Tran Mills @*/ 112*9371c9d4SSatish Balay PetscErrorCode MatKAIJGetSRead(Mat A, PetscInt *m, PetscInt *n, const PetscScalar **S) { 113a5b5c723SRichard Tran Mills Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data; 114a5b5c723SRichard Tran Mills PetscFunctionBegin; 115a5b5c723SRichard Tran Mills if (m) *m = b->p; 116a5b5c723SRichard Tran Mills if (n) *n = b->q; 117a5b5c723SRichard Tran Mills if (S) *S = b->S; 118a5b5c723SRichard Tran Mills PetscFunctionReturn(0); 119a5b5c723SRichard Tran Mills } 120a5b5c723SRichard Tran Mills 121a5b5c723SRichard Tran Mills /*@C 122a5b5c723SRichard Tran Mills MatKAIJRestoreS - Restore array obtained with MatKAIJGetS() 123a5b5c723SRichard Tran Mills 124a5b5c723SRichard Tran Mills Not collective 125a5b5c723SRichard Tran Mills 126a5b5c723SRichard Tran Mills Input Parameter: 127a5b5c723SRichard Tran Mills . A - the KAIJ matrix 128a5b5c723SRichard Tran Mills 129a5b5c723SRichard Tran Mills Output Parameter: 130a5b5c723SRichard Tran Mills . S - location of pointer to array obtained with MatKAIJGetS() 131a5b5c723SRichard Tran Mills 132a5b5c723SRichard Tran Mills Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored. 133a5b5c723SRichard Tran Mills If NULL is passed, it will not attempt to zero the array pointer. 134a5b5c723SRichard Tran Mills 135a5b5c723SRichard Tran Mills Level: advanced 136db781477SPatrick Sanan .seealso: `MatKAIJGetS()`, `MatKAIJGetSRead()`, `MatKAIJRestoreSRead()` 137a5b5c723SRichard Tran Mills @*/ 138*9371c9d4SSatish Balay PetscErrorCode MatKAIJRestoreS(Mat A, PetscScalar **S) { 139a5b5c723SRichard Tran Mills PetscFunctionBegin; 140a5b5c723SRichard Tran Mills if (S) *S = NULL; 1419566063dSJacob Faibussowitsch PetscCall(PetscObjectStateIncrease((PetscObject)A)); 142a5b5c723SRichard Tran Mills PetscFunctionReturn(0); 143a5b5c723SRichard Tran Mills } 144a5b5c723SRichard Tran Mills 145a5b5c723SRichard Tran Mills /*@C 146a5b5c723SRichard Tran Mills MatKAIJRestoreSRead - Restore array obtained with MatKAIJGetSRead() 147a5b5c723SRichard Tran Mills 148a5b5c723SRichard Tran Mills Not collective 149a5b5c723SRichard Tran Mills 150a5b5c723SRichard Tran Mills Input Parameter: 151a5b5c723SRichard Tran Mills . A - the KAIJ matrix 152a5b5c723SRichard Tran Mills 153a5b5c723SRichard Tran Mills Output Parameter: 154a5b5c723SRichard Tran Mills . S - location of pointer to array obtained with MatKAIJGetS() 155a5b5c723SRichard Tran Mills 156a5b5c723SRichard Tran Mills Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored. 157a5b5c723SRichard Tran Mills If NULL is passed, it will not attempt to zero the array pointer. 158a5b5c723SRichard Tran Mills 159a5b5c723SRichard Tran Mills Level: advanced 160db781477SPatrick Sanan .seealso: `MatKAIJGetS()`, `MatKAIJGetSRead()`, `MatKAIJRestoreSRead()` 161a5b5c723SRichard Tran Mills @*/ 162*9371c9d4SSatish Balay PetscErrorCode MatKAIJRestoreSRead(Mat A, const PetscScalar **S) { 163a5b5c723SRichard Tran Mills PetscFunctionBegin; 164a5b5c723SRichard Tran Mills if (S) *S = NULL; 16549bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 16649bd79ccSDebojyoti Ghosh } 16749bd79ccSDebojyoti Ghosh 16849bd79ccSDebojyoti Ghosh /*@C 16931a97b9aSRichard Tran Mills MatKAIJGetT - Get the transformation matrix T associated with the KAIJ matrix 17049bd79ccSDebojyoti Ghosh 1710567c835SRichard Tran Mills Not Collective; the entire T is stored and returned independently on all processes 17249bd79ccSDebojyoti Ghosh 17349bd79ccSDebojyoti Ghosh Input Parameter: 17449bd79ccSDebojyoti Ghosh . A - the KAIJ matrix 17549bd79ccSDebojyoti Ghosh 176d8d19677SJose E. Roman Output Parameters: 177a5b5c723SRichard Tran Mills + m - the number of rows in T 178a5b5c723SRichard Tran Mills . n - the number of columns in T 179a5b5c723SRichard Tran Mills - T - the T matrix, in form of a scalar array in column-major format 18049bd79ccSDebojyoti Ghosh 181a5b5c723SRichard Tran Mills Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired) 18231a97b9aSRichard Tran Mills 18349bd79ccSDebojyoti Ghosh Level: advanced 18449bd79ccSDebojyoti Ghosh 185db781477SPatrick Sanan .seealso: `MatCreateKAIJ()`, `MatGetBlockSizes()` 18649bd79ccSDebojyoti Ghosh @*/ 187*9371c9d4SSatish Balay PetscErrorCode MatKAIJGetT(Mat A, PetscInt *m, PetscInt *n, PetscScalar **T) { 18849bd79ccSDebojyoti Ghosh Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data; 18949bd79ccSDebojyoti Ghosh PetscFunctionBegin; 190a5b5c723SRichard Tran Mills if (m) *m = b->p; 191a5b5c723SRichard Tran Mills if (n) *n = b->q; 192a5b5c723SRichard Tran Mills if (T) *T = b->T; 193a5b5c723SRichard Tran Mills PetscFunctionReturn(0); 194a5b5c723SRichard Tran Mills } 195a5b5c723SRichard Tran Mills 196a5b5c723SRichard Tran Mills /*@C 197a5b5c723SRichard Tran Mills MatKAIJGetTRead - Get a read-only pointer to the transformation matrix T associated with the KAIJ matrix 198a5b5c723SRichard Tran Mills 199a5b5c723SRichard Tran Mills Not Collective; the entire T is stored and returned independently on all processes 200a5b5c723SRichard Tran Mills 201a5b5c723SRichard Tran Mills Input Parameter: 202a5b5c723SRichard Tran Mills . A - the KAIJ matrix 203a5b5c723SRichard Tran Mills 204d8d19677SJose E. Roman Output Parameters: 205a5b5c723SRichard Tran Mills + m - the number of rows in T 206a5b5c723SRichard Tran Mills . n - the number of columns in T 207a5b5c723SRichard Tran Mills - T - the T matrix, in form of a scalar array in column-major format 208a5b5c723SRichard Tran Mills 209a5b5c723SRichard Tran Mills Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired) 210a5b5c723SRichard Tran Mills 211a5b5c723SRichard Tran Mills Level: advanced 212a5b5c723SRichard Tran Mills 213db781477SPatrick Sanan .seealso: `MatCreateKAIJ()`, `MatGetBlockSizes()` 214a5b5c723SRichard Tran Mills @*/ 215*9371c9d4SSatish Balay PetscErrorCode MatKAIJGetTRead(Mat A, PetscInt *m, PetscInt *n, const PetscScalar **T) { 216a5b5c723SRichard Tran Mills Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data; 217a5b5c723SRichard Tran Mills PetscFunctionBegin; 218a5b5c723SRichard Tran Mills if (m) *m = b->p; 219a5b5c723SRichard Tran Mills if (n) *n = b->q; 220a5b5c723SRichard Tran Mills if (T) *T = b->T; 221a5b5c723SRichard Tran Mills PetscFunctionReturn(0); 222a5b5c723SRichard Tran Mills } 223a5b5c723SRichard Tran Mills 224a5b5c723SRichard Tran Mills /*@C 225a5b5c723SRichard Tran Mills MatKAIJRestoreT - Restore array obtained with MatKAIJGetT() 226a5b5c723SRichard Tran Mills 227a5b5c723SRichard Tran Mills Not collective 228a5b5c723SRichard Tran Mills 229a5b5c723SRichard Tran Mills Input Parameter: 230a5b5c723SRichard Tran Mills . A - the KAIJ matrix 231a5b5c723SRichard Tran Mills 232a5b5c723SRichard Tran Mills Output Parameter: 233a5b5c723SRichard Tran Mills . T - location of pointer to array obtained with MatKAIJGetS() 234a5b5c723SRichard Tran Mills 235a5b5c723SRichard Tran Mills Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored. 236a5b5c723SRichard Tran Mills If NULL is passed, it will not attempt to zero the array pointer. 237a5b5c723SRichard Tran Mills 238a5b5c723SRichard Tran Mills Level: advanced 239db781477SPatrick Sanan .seealso: `MatKAIJGetT()`, `MatKAIJGetTRead()`, `MatKAIJRestoreTRead()` 240a5b5c723SRichard Tran Mills @*/ 241*9371c9d4SSatish Balay PetscErrorCode MatKAIJRestoreT(Mat A, PetscScalar **T) { 242a5b5c723SRichard Tran Mills PetscFunctionBegin; 243a5b5c723SRichard Tran Mills if (T) *T = NULL; 2449566063dSJacob Faibussowitsch PetscCall(PetscObjectStateIncrease((PetscObject)A)); 245a5b5c723SRichard Tran Mills PetscFunctionReturn(0); 246a5b5c723SRichard Tran Mills } 247a5b5c723SRichard Tran Mills 248a5b5c723SRichard Tran Mills /*@C 249a5b5c723SRichard Tran Mills MatKAIJRestoreTRead - Restore array obtained with MatKAIJGetTRead() 250a5b5c723SRichard Tran Mills 251a5b5c723SRichard Tran Mills Not collective 252a5b5c723SRichard Tran Mills 253a5b5c723SRichard Tran Mills Input Parameter: 254a5b5c723SRichard Tran Mills . A - the KAIJ matrix 255a5b5c723SRichard Tran Mills 256a5b5c723SRichard Tran Mills Output Parameter: 257a5b5c723SRichard Tran Mills . T - location of pointer to array obtained with MatKAIJGetS() 258a5b5c723SRichard Tran Mills 259a5b5c723SRichard Tran Mills Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored. 260a5b5c723SRichard Tran Mills If NULL is passed, it will not attempt to zero the array pointer. 261a5b5c723SRichard Tran Mills 262a5b5c723SRichard Tran Mills Level: advanced 263db781477SPatrick Sanan .seealso: `MatKAIJGetT()`, `MatKAIJGetTRead()`, `MatKAIJRestoreTRead()` 264a5b5c723SRichard Tran Mills @*/ 265*9371c9d4SSatish Balay PetscErrorCode MatKAIJRestoreTRead(Mat A, const PetscScalar **T) { 266a5b5c723SRichard Tran Mills PetscFunctionBegin; 267a5b5c723SRichard Tran Mills if (T) *T = NULL; 26849bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 26949bd79ccSDebojyoti Ghosh } 27049bd79ccSDebojyoti Ghosh 2710567c835SRichard Tran Mills /*@ 2720567c835SRichard Tran Mills MatKAIJSetAIJ - Set the AIJ matrix describing the blockwise action of the KAIJ matrix 2730567c835SRichard Tran Mills 2740567c835SRichard Tran Mills Logically Collective; if the AIJ matrix is parallel, the KAIJ matrix is also parallel 2750567c835SRichard Tran Mills 2760567c835SRichard Tran Mills Input Parameters: 2770567c835SRichard Tran Mills + A - the KAIJ matrix 2780567c835SRichard Tran Mills - B - the AIJ matrix 2790567c835SRichard Tran Mills 28015b9d025SRichard Tran Mills Notes: 28115b9d025SRichard Tran Mills This function increases the reference count on the AIJ matrix, so the user is free to destroy the matrix if it is not needed. 28215b9d025SRichard Tran Mills Changes to the entries of the AIJ matrix will immediately affect the KAIJ matrix. 28315b9d025SRichard Tran Mills 2840567c835SRichard Tran Mills Level: advanced 2850567c835SRichard Tran Mills 286db781477SPatrick Sanan .seealso: `MatKAIJGetAIJ()`, `MatKAIJSetS()`, `MatKAIJSetT()` 2870567c835SRichard Tran Mills @*/ 288*9371c9d4SSatish Balay PetscErrorCode MatKAIJSetAIJ(Mat A, Mat B) { 2890567c835SRichard Tran Mills PetscMPIInt size; 29006d61a37SPierre Jolivet PetscBool flg; 2910567c835SRichard Tran Mills 2920567c835SRichard Tran Mills PetscFunctionBegin; 2939566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 2940567c835SRichard Tran Mills if (size == 1) { 2959566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQAIJ, &flg)); 29628b400f6SJacob Faibussowitsch PetscCheck(flg, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "MatKAIJSetAIJ() with MATSEQKAIJ does not support %s as the AIJ mat", ((PetscObject)B)->type_name); 2970567c835SRichard Tran Mills Mat_SeqKAIJ *a = (Mat_SeqKAIJ *)A->data; 2980567c835SRichard Tran Mills a->AIJ = B; 2990567c835SRichard Tran Mills } else { 3000567c835SRichard Tran Mills Mat_MPIKAIJ *a = (Mat_MPIKAIJ *)A->data; 3010567c835SRichard Tran Mills a->A = B; 3020567c835SRichard Tran Mills } 3039566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)B)); 3040567c835SRichard Tran Mills PetscFunctionReturn(0); 3050567c835SRichard Tran Mills } 3060567c835SRichard Tran Mills 3070567c835SRichard Tran Mills /*@C 3080567c835SRichard Tran Mills MatKAIJSetS - Set the S matrix describing the shift action of the KAIJ matrix 3090567c835SRichard Tran Mills 3100567c835SRichard Tran Mills Logically Collective; the entire S is stored independently on all processes. 3110567c835SRichard Tran Mills 3120567c835SRichard Tran Mills Input Parameters: 3130567c835SRichard Tran Mills + A - the KAIJ matrix 3140567c835SRichard Tran Mills . p - the number of rows in S 3150567c835SRichard Tran Mills . q - the number of columns in S 3160567c835SRichard Tran Mills - S - the S matrix, in form of a scalar array in column-major format 3170567c835SRichard Tran Mills 3180567c835SRichard Tran Mills Notes: The dimensions p and q must match those of the transformation matrix T associated with the KAIJ matrix. 31988f48298SRichard Tran Mills The S matrix is copied, so the user can destroy this array. 3200567c835SRichard Tran Mills 3210567c835SRichard Tran Mills Level: Advanced 3220567c835SRichard Tran Mills 323db781477SPatrick Sanan .seealso: `MatKAIJGetS()`, `MatKAIJSetT()`, `MatKAIJSetAIJ()` 3240567c835SRichard Tran Mills @*/ 325*9371c9d4SSatish Balay PetscErrorCode MatKAIJSetS(Mat A, PetscInt p, PetscInt q, const PetscScalar S[]) { 3260567c835SRichard Tran Mills Mat_SeqKAIJ *a = (Mat_SeqKAIJ *)A->data; 3270567c835SRichard Tran Mills 3280567c835SRichard Tran Mills PetscFunctionBegin; 3299566063dSJacob Faibussowitsch PetscCall(PetscFree(a->S)); 3300567c835SRichard Tran Mills if (S) { 3319566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(p * q, &a->S)); 3329566063dSJacob Faibussowitsch PetscCall(PetscMemcpy(a->S, S, p * q * sizeof(PetscScalar))); 3330567c835SRichard Tran Mills } else a->S = NULL; 3340567c835SRichard Tran Mills 3350567c835SRichard Tran Mills a->p = p; 3360567c835SRichard Tran Mills a->q = q; 3370567c835SRichard Tran Mills PetscFunctionReturn(0); 3380567c835SRichard Tran Mills } 3390567c835SRichard Tran Mills 3400567c835SRichard Tran Mills /*@C 341910cf402Sprj- MatKAIJGetScaledIdentity - Check if both S and T are scaled identities. 342910cf402Sprj- 343910cf402Sprj- Logically Collective. 344910cf402Sprj- 345910cf402Sprj- Input Parameter: 346910cf402Sprj- . A - the KAIJ matrix 347910cf402Sprj- 348910cf402Sprj- Output Parameter: 349910cf402Sprj- . identity - the Boolean value 350910cf402Sprj- 351910cf402Sprj- Level: Advanced 352910cf402Sprj- 353db781477SPatrick Sanan .seealso: `MatKAIJGetS()`, `MatKAIJGetT()` 354910cf402Sprj- @*/ 355*9371c9d4SSatish Balay PetscErrorCode MatKAIJGetScaledIdentity(Mat A, PetscBool *identity) { 356910cf402Sprj- Mat_SeqKAIJ *a = (Mat_SeqKAIJ *)A->data; 357910cf402Sprj- PetscInt i, j; 358910cf402Sprj- 359910cf402Sprj- PetscFunctionBegin; 360910cf402Sprj- if (a->p != a->q) { 361910cf402Sprj- *identity = PETSC_FALSE; 362910cf402Sprj- PetscFunctionReturn(0); 363910cf402Sprj- } else *identity = PETSC_TRUE; 364910cf402Sprj- if (!a->isTI || a->S) { 365910cf402Sprj- for (i = 0; i < a->p && *identity; i++) { 366910cf402Sprj- for (j = 0; j < a->p && *identity; j++) { 367910cf402Sprj- if (i != j) { 368910cf402Sprj- if (a->S && PetscAbsScalar(a->S[i + j * a->p]) > PETSC_SMALL) *identity = PETSC_FALSE; 369910cf402Sprj- if (a->T && PetscAbsScalar(a->T[i + j * a->p]) > PETSC_SMALL) *identity = PETSC_FALSE; 370910cf402Sprj- } else { 371910cf402Sprj- if (a->S && PetscAbsScalar(a->S[i * (a->p + 1)] - a->S[0]) > PETSC_SMALL) *identity = PETSC_FALSE; 372910cf402Sprj- if (a->T && PetscAbsScalar(a->T[i * (a->p + 1)] - a->T[0]) > PETSC_SMALL) *identity = PETSC_FALSE; 373910cf402Sprj- } 374910cf402Sprj- } 375910cf402Sprj- } 376910cf402Sprj- } 377910cf402Sprj- PetscFunctionReturn(0); 378910cf402Sprj- } 379910cf402Sprj- 380910cf402Sprj- /*@C 3810567c835SRichard Tran Mills MatKAIJSetT - Set the transformation matrix T associated with the KAIJ matrix 3820567c835SRichard Tran Mills 3830567c835SRichard Tran Mills Logically Collective; the entire T is stored independently on all processes. 3840567c835SRichard Tran Mills 3850567c835SRichard Tran Mills Input Parameters: 3860567c835SRichard Tran Mills + A - the KAIJ matrix 3870567c835SRichard Tran Mills . p - the number of rows in S 3880567c835SRichard Tran Mills . q - the number of columns in S 3890567c835SRichard Tran Mills - T - the T matrix, in form of a scalar array in column-major format 3900567c835SRichard Tran Mills 3910567c835SRichard Tran Mills Notes: The dimensions p and q must match those of the shift matrix S associated with the KAIJ matrix. 39288f48298SRichard Tran Mills The T matrix is copied, so the user can destroy this array. 3930567c835SRichard Tran Mills 3940567c835SRichard Tran Mills Level: Advanced 3950567c835SRichard Tran Mills 396db781477SPatrick Sanan .seealso: `MatKAIJGetT()`, `MatKAIJSetS()`, `MatKAIJSetAIJ()` 3970567c835SRichard Tran Mills @*/ 398*9371c9d4SSatish Balay PetscErrorCode MatKAIJSetT(Mat A, PetscInt p, PetscInt q, const PetscScalar T[]) { 3990567c835SRichard Tran Mills PetscInt i, j; 4000567c835SRichard Tran Mills Mat_SeqKAIJ *a = (Mat_SeqKAIJ *)A->data; 4010567c835SRichard Tran Mills PetscBool isTI = PETSC_FALSE; 4020567c835SRichard Tran Mills 4030567c835SRichard Tran Mills PetscFunctionBegin; 4040567c835SRichard Tran Mills /* check if T is an identity matrix */ 4050567c835SRichard Tran Mills if (T && (p == q)) { 4060567c835SRichard Tran Mills isTI = PETSC_TRUE; 4070567c835SRichard Tran Mills for (i = 0; i < p; i++) { 4080567c835SRichard Tran Mills for (j = 0; j < q; j++) { 4090567c835SRichard Tran Mills if (i == j) { 4100567c835SRichard Tran Mills /* diagonal term must be 1 */ 4110567c835SRichard Tran Mills if (T[i + j * p] != 1.0) isTI = PETSC_FALSE; 4120567c835SRichard Tran Mills } else { 4130567c835SRichard Tran Mills /* off-diagonal term must be 0 */ 4140567c835SRichard Tran Mills if (T[i + j * p] != 0.0) isTI = PETSC_FALSE; 4150567c835SRichard Tran Mills } 4160567c835SRichard Tran Mills } 4170567c835SRichard Tran Mills } 4180567c835SRichard Tran Mills } 4190567c835SRichard Tran Mills a->isTI = isTI; 4200567c835SRichard Tran Mills 4219566063dSJacob Faibussowitsch PetscCall(PetscFree(a->T)); 4220567c835SRichard Tran Mills if (T && (!isTI)) { 4239566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(p * q, &a->T)); 4249566063dSJacob Faibussowitsch PetscCall(PetscMemcpy(a->T, T, p * q * sizeof(PetscScalar))); 42550d19d74SRichard Tran Mills } else a->T = NULL; 4260567c835SRichard Tran Mills 4270567c835SRichard Tran Mills a->p = p; 4280567c835SRichard Tran Mills a->q = q; 4290567c835SRichard Tran Mills PetscFunctionReturn(0); 4300567c835SRichard Tran Mills } 4310567c835SRichard Tran Mills 432*9371c9d4SSatish Balay PetscErrorCode MatDestroy_SeqKAIJ(Mat A) { 43349bd79ccSDebojyoti Ghosh Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data; 43449bd79ccSDebojyoti Ghosh 43549bd79ccSDebojyoti Ghosh PetscFunctionBegin; 4369566063dSJacob Faibussowitsch PetscCall(MatDestroy(&b->AIJ)); 4379566063dSJacob Faibussowitsch PetscCall(PetscFree(b->S)); 4389566063dSJacob Faibussowitsch PetscCall(PetscFree(b->T)); 4399566063dSJacob Faibussowitsch PetscCall(PetscFree(b->ibdiag)); 4409566063dSJacob Faibussowitsch PetscCall(PetscFree5(b->sor.w, b->sor.y, b->sor.work, b->sor.t, b->sor.arr)); 4419566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqkaij_seqaij_C", NULL)); 4429566063dSJacob Faibussowitsch PetscCall(PetscFree(A->data)); 44349bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 44449bd79ccSDebojyoti Ghosh } 44549bd79ccSDebojyoti Ghosh 446*9371c9d4SSatish Balay PETSC_INTERN PetscErrorCode MatKAIJ_build_AIJ_OAIJ(Mat A) { 447e0e5a793SRichard Tran Mills Mat_MPIKAIJ *a; 448e0e5a793SRichard Tran Mills Mat_MPIAIJ *mpiaij; 449e0e5a793SRichard Tran Mills PetscScalar *T; 450e0e5a793SRichard Tran Mills PetscInt i, j; 451e0e5a793SRichard Tran Mills PetscObjectState state; 452e0e5a793SRichard Tran Mills 453e0e5a793SRichard Tran Mills PetscFunctionBegin; 454e0e5a793SRichard Tran Mills a = (Mat_MPIKAIJ *)A->data; 455e0e5a793SRichard Tran Mills mpiaij = (Mat_MPIAIJ *)a->A->data; 456e0e5a793SRichard Tran Mills 4579566063dSJacob Faibussowitsch PetscCall(PetscObjectStateGet((PetscObject)a->A, &state)); 458e0e5a793SRichard Tran Mills if (state == a->state) { 459e0e5a793SRichard Tran Mills /* The existing AIJ and KAIJ members are up-to-date, so simply exit. */ 460e0e5a793SRichard Tran Mills PetscFunctionReturn(0); 461e0e5a793SRichard Tran Mills } else { 4629566063dSJacob Faibussowitsch PetscCall(MatDestroy(&a->AIJ)); 4639566063dSJacob Faibussowitsch PetscCall(MatDestroy(&a->OAIJ)); 464e0e5a793SRichard Tran Mills if (a->isTI) { 465e0e5a793SRichard Tran Mills /* If the transformation matrix associated with the parallel matrix A is the identity matrix, then a->T will be NULL. 466e0e5a793SRichard Tran Mills * In this case, if we pass a->T directly to the MatCreateKAIJ() calls to create the sequential submatrices, the routine will 467e0e5a793SRichard Tran Mills * not be able to tell that transformation matrix should be set to the identity; thus we create a temporary identity matrix 468e0e5a793SRichard Tran Mills * to pass in. */ 4699566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(a->p * a->q, &T)); 470e0e5a793SRichard Tran Mills for (i = 0; i < a->p; i++) { 471e0e5a793SRichard Tran Mills for (j = 0; j < a->q; j++) { 472e0e5a793SRichard Tran Mills if (i == j) T[i + j * a->p] = 1.0; 473e0e5a793SRichard Tran Mills else T[i + j * a->p] = 0.0; 474e0e5a793SRichard Tran Mills } 475e0e5a793SRichard Tran Mills } 476e0e5a793SRichard Tran Mills } else T = a->T; 4779566063dSJacob Faibussowitsch PetscCall(MatCreateKAIJ(mpiaij->A, a->p, a->q, a->S, T, &a->AIJ)); 4789566063dSJacob Faibussowitsch PetscCall(MatCreateKAIJ(mpiaij->B, a->p, a->q, NULL, T, &a->OAIJ)); 4791baa6e33SBarry Smith if (a->isTI) PetscCall(PetscFree(T)); 480e0e5a793SRichard Tran Mills a->state = state; 481e0e5a793SRichard Tran Mills } 482e0e5a793SRichard Tran Mills 483e0e5a793SRichard Tran Mills PetscFunctionReturn(0); 484e0e5a793SRichard Tran Mills } 485e0e5a793SRichard Tran Mills 486*9371c9d4SSatish Balay PetscErrorCode MatSetUp_KAIJ(Mat A) { 4870567c835SRichard Tran Mills PetscInt n; 4880567c835SRichard Tran Mills PetscMPIInt size; 4890567c835SRichard Tran Mills Mat_SeqKAIJ *seqkaij = (Mat_SeqKAIJ *)A->data; 4900567c835SRichard Tran Mills 49149bd79ccSDebojyoti Ghosh PetscFunctionBegin; 4929566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 4930567c835SRichard Tran Mills if (size == 1) { 4949566063dSJacob Faibussowitsch PetscCall(MatSetSizes(A, seqkaij->p * seqkaij->AIJ->rmap->n, seqkaij->q * seqkaij->AIJ->cmap->n, seqkaij->p * seqkaij->AIJ->rmap->N, seqkaij->q * seqkaij->AIJ->cmap->N)); 4959566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetBlockSize(A->rmap, seqkaij->p)); 4969566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetBlockSize(A->cmap, seqkaij->q)); 4979566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(A->rmap)); 4989566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(A->cmap)); 4990567c835SRichard Tran Mills } else { 5000567c835SRichard Tran Mills Mat_MPIKAIJ *a; 5010567c835SRichard Tran Mills Mat_MPIAIJ *mpiaij; 5020567c835SRichard Tran Mills IS from, to; 5030567c835SRichard Tran Mills Vec gvec; 5040567c835SRichard Tran Mills 5050567c835SRichard Tran Mills a = (Mat_MPIKAIJ *)A->data; 506d3f912faSRichard Tran Mills mpiaij = (Mat_MPIAIJ *)a->A->data; 5079566063dSJacob Faibussowitsch PetscCall(MatSetSizes(A, a->p * a->A->rmap->n, a->q * a->A->cmap->n, a->p * a->A->rmap->N, a->q * a->A->cmap->N)); 5089566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetBlockSize(A->rmap, seqkaij->p)); 5099566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetBlockSize(A->cmap, seqkaij->q)); 5109566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(A->rmap)); 5119566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(A->cmap)); 5120567c835SRichard Tran Mills 5139566063dSJacob Faibussowitsch PetscCall(MatKAIJ_build_AIJ_OAIJ(A)); 5140567c835SRichard Tran Mills 5159566063dSJacob Faibussowitsch PetscCall(VecGetSize(mpiaij->lvec, &n)); 5169566063dSJacob Faibussowitsch PetscCall(VecCreate(PETSC_COMM_SELF, &a->w)); 5179566063dSJacob Faibussowitsch PetscCall(VecSetSizes(a->w, n * a->q, n * a->q)); 5189566063dSJacob Faibussowitsch PetscCall(VecSetBlockSize(a->w, a->q)); 5199566063dSJacob Faibussowitsch PetscCall(VecSetType(a->w, VECSEQ)); 5200567c835SRichard Tran Mills 5210567c835SRichard Tran Mills /* create two temporary Index sets for build scatter gather */ 5229566063dSJacob Faibussowitsch PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)a->A), a->q, n, mpiaij->garray, PETSC_COPY_VALUES, &from)); 5239566063dSJacob Faibussowitsch PetscCall(ISCreateStride(PETSC_COMM_SELF, n * a->q, 0, 1, &to)); 5240567c835SRichard Tran Mills 5250567c835SRichard Tran Mills /* create temporary global vector to generate scatter context */ 5269566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)a->A), a->q, a->q * a->A->cmap->n, a->q * a->A->cmap->N, NULL, &gvec)); 5270567c835SRichard Tran Mills 5280567c835SRichard Tran Mills /* generate the scatter context */ 5299566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(gvec, from, a->w, to, &a->ctx)); 5300567c835SRichard Tran Mills 5319566063dSJacob Faibussowitsch PetscCall(ISDestroy(&from)); 5329566063dSJacob Faibussowitsch PetscCall(ISDestroy(&to)); 5339566063dSJacob Faibussowitsch PetscCall(VecDestroy(&gvec)); 5340567c835SRichard Tran Mills } 5350567c835SRichard Tran Mills 5360567c835SRichard Tran Mills A->assembled = PETSC_TRUE; 53749bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 53849bd79ccSDebojyoti Ghosh } 53949bd79ccSDebojyoti Ghosh 540*9371c9d4SSatish Balay PetscErrorCode MatView_KAIJ(Mat A, PetscViewer viewer) { 541e6985dafSRichard Tran Mills PetscViewerFormat format; 542e6985dafSRichard Tran Mills Mat_SeqKAIJ *a = (Mat_SeqKAIJ *)A->data; 54349bd79ccSDebojyoti Ghosh Mat B; 544e6985dafSRichard Tran Mills PetscInt i; 545e6985dafSRichard Tran Mills PetscBool ismpikaij; 54649bd79ccSDebojyoti Ghosh 54749bd79ccSDebojyoti Ghosh PetscFunctionBegin; 5489566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIKAIJ, &ismpikaij)); 5499566063dSJacob Faibussowitsch PetscCall(PetscViewerGetFormat(viewer, &format)); 550e6985dafSRichard Tran Mills if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL || format == PETSC_VIEWER_ASCII_IMPL) { 5519566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "S and T have %" PetscInt_FMT " rows and %" PetscInt_FMT " columns\n", a->p, a->q)); 552e6985dafSRichard Tran Mills 553e6985dafSRichard Tran Mills /* Print appropriate details for S. */ 554e6985dafSRichard Tran Mills if (!a->S) { 5559566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "S is NULL\n")); 556e6985dafSRichard Tran Mills } else if (format == PETSC_VIEWER_ASCII_IMPL) { 5579566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "Entries of S are ")); 558e6985dafSRichard Tran Mills for (i = 0; i < (a->p * a->q); i++) { 559e6985dafSRichard Tran Mills #if defined(PETSC_USE_COMPLEX) 5609566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e %18.16e ", (double)PetscRealPart(a->S[i]), (double)PetscImaginaryPart(a->S[i]))); 561e6985dafSRichard Tran Mills #else 5629566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e ", (double)PetscRealPart(a->S[i]))); 563e6985dafSRichard Tran Mills #endif 564e6985dafSRichard Tran Mills } 5659566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 56649bd79ccSDebojyoti Ghosh } 56749bd79ccSDebojyoti Ghosh 568e6985dafSRichard Tran Mills /* Print appropriate details for T. */ 569e6985dafSRichard Tran Mills if (a->isTI) { 5709566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "T is the identity matrix\n")); 571e6985dafSRichard Tran Mills } else if (!a->T) { 5729566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "T is NULL\n")); 573e6985dafSRichard Tran Mills } else if (format == PETSC_VIEWER_ASCII_IMPL) { 5749566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "Entries of T are ")); 575e6985dafSRichard Tran Mills for (i = 0; i < (a->p * a->q); i++) { 576e6985dafSRichard Tran Mills #if defined(PETSC_USE_COMPLEX) 5779566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e %18.16e ", (double)PetscRealPart(a->T[i]), (double)PetscImaginaryPart(a->T[i]))); 578e6985dafSRichard Tran Mills #else 5799566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e ", (double)PetscRealPart(a->T[i]))); 580e6985dafSRichard Tran Mills #endif 581e6985dafSRichard Tran Mills } 5829566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 583e6985dafSRichard Tran Mills } 58449bd79ccSDebojyoti Ghosh 585e6985dafSRichard Tran Mills /* Now print details for the AIJ matrix, using the AIJ viewer. */ 5869566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "Now viewing the associated AIJ matrix:\n")); 587e6985dafSRichard Tran Mills if (ismpikaij) { 588e6985dafSRichard Tran Mills Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data; 5899566063dSJacob Faibussowitsch PetscCall(MatView(b->A, viewer)); 590e6985dafSRichard Tran Mills } else { 5919566063dSJacob Faibussowitsch PetscCall(MatView(a->AIJ, viewer)); 592e6985dafSRichard Tran Mills } 593e6985dafSRichard Tran Mills 594e6985dafSRichard Tran Mills } else { 595e6985dafSRichard Tran Mills /* For all other matrix viewer output formats, simply convert to an AIJ matrix and call MatView() on that. */ 5969566063dSJacob Faibussowitsch PetscCall(MatConvert(A, MATAIJ, MAT_INITIAL_MATRIX, &B)); 5979566063dSJacob Faibussowitsch PetscCall(MatView(B, viewer)); 5989566063dSJacob Faibussowitsch PetscCall(MatDestroy(&B)); 599e6985dafSRichard Tran Mills } 60049bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 60149bd79ccSDebojyoti Ghosh } 60249bd79ccSDebojyoti Ghosh 603*9371c9d4SSatish Balay PetscErrorCode MatDestroy_MPIKAIJ(Mat A) { 60449bd79ccSDebojyoti Ghosh Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data; 60549bd79ccSDebojyoti Ghosh 60649bd79ccSDebojyoti Ghosh PetscFunctionBegin; 6079566063dSJacob Faibussowitsch PetscCall(MatDestroy(&b->AIJ)); 6089566063dSJacob Faibussowitsch PetscCall(MatDestroy(&b->OAIJ)); 6099566063dSJacob Faibussowitsch PetscCall(MatDestroy(&b->A)); 6109566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&b->ctx)); 6119566063dSJacob Faibussowitsch PetscCall(VecDestroy(&b->w)); 6129566063dSJacob Faibussowitsch PetscCall(PetscFree(b->S)); 6139566063dSJacob Faibussowitsch PetscCall(PetscFree(b->T)); 6149566063dSJacob Faibussowitsch PetscCall(PetscFree(b->ibdiag)); 6159566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetDiagonalBlock_C", NULL)); 6169566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpikaij_mpiaij_C", NULL)); 6179566063dSJacob Faibussowitsch PetscCall(PetscFree(A->data)); 61849bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 61949bd79ccSDebojyoti Ghosh } 62049bd79ccSDebojyoti Ghosh 62149bd79ccSDebojyoti Ghosh /* --------------------------------------------------------------------------------------*/ 62249bd79ccSDebojyoti Ghosh 62349bd79ccSDebojyoti Ghosh /* zz = yy + Axx */ 624*9371c9d4SSatish Balay PetscErrorCode MatMultAdd_SeqKAIJ(Mat A, Vec xx, Vec yy, Vec zz) { 62549bd79ccSDebojyoti Ghosh Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data; 62649bd79ccSDebojyoti Ghosh Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 62749bd79ccSDebojyoti Ghosh const PetscScalar *s = b->S, *t = b->T; 62849bd79ccSDebojyoti Ghosh const PetscScalar *x, *v, *bx; 62949bd79ccSDebojyoti Ghosh PetscScalar *y, *sums; 63049bd79ccSDebojyoti Ghosh const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 63149bd79ccSDebojyoti Ghosh PetscInt n, i, jrow, j, l, p = b->p, q = b->q, k; 63249bd79ccSDebojyoti Ghosh 63349bd79ccSDebojyoti Ghosh PetscFunctionBegin; 63449bd79ccSDebojyoti Ghosh if (!yy) { 6359566063dSJacob Faibussowitsch PetscCall(VecSet(zz, 0.0)); 63649bd79ccSDebojyoti Ghosh } else { 6379566063dSJacob Faibussowitsch PetscCall(VecCopy(yy, zz)); 63849bd79ccSDebojyoti Ghosh } 63949bd79ccSDebojyoti Ghosh if ((!s) && (!t) && (!b->isTI)) PetscFunctionReturn(0); 64049bd79ccSDebojyoti Ghosh 6419566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 6429566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz, &y)); 64349bd79ccSDebojyoti Ghosh idx = a->j; 64449bd79ccSDebojyoti Ghosh v = a->a; 64549bd79ccSDebojyoti Ghosh ii = a->i; 64649bd79ccSDebojyoti Ghosh 64749bd79ccSDebojyoti Ghosh if (b->isTI) { 64849bd79ccSDebojyoti Ghosh for (i = 0; i < m; i++) { 64949bd79ccSDebojyoti Ghosh jrow = ii[i]; 65049bd79ccSDebojyoti Ghosh n = ii[i + 1] - jrow; 65149bd79ccSDebojyoti Ghosh sums = y + p * i; 65249bd79ccSDebojyoti Ghosh for (j = 0; j < n; j++) { 653*9371c9d4SSatish Balay for (k = 0; k < p; k++) { sums[k] += v[jrow + j] * x[q * idx[jrow + j] + k]; } 65449bd79ccSDebojyoti Ghosh } 65549bd79ccSDebojyoti Ghosh } 6569566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(3.0 * (a->nz) * p)); 65749bd79ccSDebojyoti Ghosh } else if (t) { 65849bd79ccSDebojyoti Ghosh for (i = 0; i < m; i++) { 65949bd79ccSDebojyoti Ghosh jrow = ii[i]; 66049bd79ccSDebojyoti Ghosh n = ii[i + 1] - jrow; 66149bd79ccSDebojyoti Ghosh sums = y + p * i; 66249bd79ccSDebojyoti Ghosh for (j = 0; j < n; j++) { 66349bd79ccSDebojyoti Ghosh for (k = 0; k < p; k++) { 664*9371c9d4SSatish Balay for (l = 0; l < q; l++) { sums[k] += v[jrow + j] * t[k + l * p] * x[q * idx[jrow + j] + l]; } 66549bd79ccSDebojyoti Ghosh } 66649bd79ccSDebojyoti Ghosh } 66749bd79ccSDebojyoti Ghosh } 668234d9204SRichard Tran Mills /* The flop count below assumes that v[jrow+j] is hoisted out (which an optimizing compiler is likely to do), 669234d9204SRichard Tran Mills * and also that T part is hoisted outside this loop (in exchange for temporary storage) as (A \otimes I) (I \otimes T), 670234d9204SRichard Tran Mills * so that this multiply doesn't have to be redone for each matrix entry, but just once per column. The latter 671234d9204SRichard Tran Mills * transformation is much less likely to be applied, but we nonetheless count the minimum flops required. */ 6729566063dSJacob Faibussowitsch PetscCall(PetscLogFlops((2.0 * p * q - p) * m + 2.0 * p * a->nz)); 67349bd79ccSDebojyoti Ghosh } 67449bd79ccSDebojyoti Ghosh if (s) { 67549bd79ccSDebojyoti Ghosh for (i = 0; i < m; i++) { 67649bd79ccSDebojyoti Ghosh sums = y + p * i; 67749bd79ccSDebojyoti Ghosh bx = x + q * i; 67849bd79ccSDebojyoti Ghosh if (i < b->AIJ->cmap->n) { 67949bd79ccSDebojyoti Ghosh for (j = 0; j < q; j++) { 680*9371c9d4SSatish Balay for (k = 0; k < p; k++) { sums[k] += s[k + j * p] * bx[j]; } 68149bd79ccSDebojyoti Ghosh } 68249bd79ccSDebojyoti Ghosh } 68349bd79ccSDebojyoti Ghosh } 6849566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * m * p * q)); 68549bd79ccSDebojyoti Ghosh } 68649bd79ccSDebojyoti Ghosh 6879566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 6889566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz, &y)); 68949bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 69049bd79ccSDebojyoti Ghosh } 69149bd79ccSDebojyoti Ghosh 692*9371c9d4SSatish Balay PetscErrorCode MatMult_SeqKAIJ(Mat A, Vec xx, Vec yy) { 69349bd79ccSDebojyoti Ghosh PetscFunctionBegin; 6949566063dSJacob Faibussowitsch PetscCall(MatMultAdd_SeqKAIJ(A, xx, PETSC_NULL, yy)); 69549bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 69649bd79ccSDebojyoti Ghosh } 69749bd79ccSDebojyoti Ghosh 69849bd79ccSDebojyoti Ghosh #include <petsc/private/kernels/blockinvert.h> 69949bd79ccSDebojyoti Ghosh 700*9371c9d4SSatish Balay PetscErrorCode MatInvertBlockDiagonal_SeqKAIJ(Mat A, const PetscScalar **values) { 70149bd79ccSDebojyoti Ghosh Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data; 70249bd79ccSDebojyoti Ghosh Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 70349bd79ccSDebojyoti Ghosh const PetscScalar *S = b->S; 70449bd79ccSDebojyoti Ghosh const PetscScalar *T = b->T; 70549bd79ccSDebojyoti Ghosh const PetscScalar *v = a->a; 70649bd79ccSDebojyoti Ghosh const PetscInt p = b->p, q = b->q, m = b->AIJ->rmap->n, *idx = a->j, *ii = a->i; 70749bd79ccSDebojyoti Ghosh PetscInt i, j, *v_pivots, dof, dof2; 70849bd79ccSDebojyoti Ghosh PetscScalar *diag, aval, *v_work; 70949bd79ccSDebojyoti Ghosh 71049bd79ccSDebojyoti Ghosh PetscFunctionBegin; 71108401ef6SPierre Jolivet PetscCheck(p == q, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MATKAIJ: Block size must be square to calculate inverse."); 712aed4548fSBarry Smith PetscCheck(S || T || b->isTI, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MATKAIJ: Cannot invert a zero matrix."); 71349bd79ccSDebojyoti Ghosh 71449bd79ccSDebojyoti Ghosh dof = p; 71549bd79ccSDebojyoti Ghosh dof2 = dof * dof; 71649bd79ccSDebojyoti Ghosh 71749bd79ccSDebojyoti Ghosh if (b->ibdiagvalid) { 71849bd79ccSDebojyoti Ghosh if (values) *values = b->ibdiag; 71949bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 72049bd79ccSDebojyoti Ghosh } 72149bd79ccSDebojyoti Ghosh if (!b->ibdiag) { 7229566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(dof2 * m, &b->ibdiag)); 7239566063dSJacob Faibussowitsch PetscCall(PetscLogObjectMemory((PetscObject)A, dof2 * m * sizeof(PetscScalar))); 72449bd79ccSDebojyoti Ghosh } 72549bd79ccSDebojyoti Ghosh if (values) *values = b->ibdiag; 72649bd79ccSDebojyoti Ghosh diag = b->ibdiag; 72749bd79ccSDebojyoti Ghosh 7289566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(dof, &v_work, dof, &v_pivots)); 72949bd79ccSDebojyoti Ghosh for (i = 0; i < m; i++) { 73049bd79ccSDebojyoti Ghosh if (S) { 7319566063dSJacob Faibussowitsch PetscCall(PetscMemcpy(diag, S, dof2 * sizeof(PetscScalar))); 73249bd79ccSDebojyoti Ghosh } else { 7339566063dSJacob Faibussowitsch PetscCall(PetscMemzero(diag, dof2 * sizeof(PetscScalar))); 73449bd79ccSDebojyoti Ghosh } 73549bd79ccSDebojyoti Ghosh if (b->isTI) { 73649bd79ccSDebojyoti Ghosh aval = 0; 737*9371c9d4SSatish Balay for (j = ii[i]; j < ii[i + 1]; j++) 738*9371c9d4SSatish Balay if (idx[j] == i) aval = v[j]; 73949bd79ccSDebojyoti Ghosh for (j = 0; j < dof; j++) diag[j + dof * j] += aval; 74049bd79ccSDebojyoti Ghosh } else if (T) { 74149bd79ccSDebojyoti Ghosh aval = 0; 742*9371c9d4SSatish Balay for (j = ii[i]; j < ii[i + 1]; j++) 743*9371c9d4SSatish Balay if (idx[j] == i) aval = v[j]; 74449bd79ccSDebojyoti Ghosh for (j = 0; j < dof2; j++) diag[j] += aval * T[j]; 74549bd79ccSDebojyoti Ghosh } 7469566063dSJacob Faibussowitsch PetscCall(PetscKernel_A_gets_inverse_A(dof, diag, v_pivots, v_work, PETSC_FALSE, NULL)); 74749bd79ccSDebojyoti Ghosh diag += dof2; 74849bd79ccSDebojyoti Ghosh } 7499566063dSJacob Faibussowitsch PetscCall(PetscFree2(v_work, v_pivots)); 75049bd79ccSDebojyoti Ghosh 75149bd79ccSDebojyoti Ghosh b->ibdiagvalid = PETSC_TRUE; 75249bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 75349bd79ccSDebojyoti Ghosh } 75449bd79ccSDebojyoti Ghosh 755*9371c9d4SSatish Balay static PetscErrorCode MatGetDiagonalBlock_MPIKAIJ(Mat A, Mat *B) { 75649bd79ccSDebojyoti Ghosh Mat_MPIKAIJ *kaij = (Mat_MPIKAIJ *)A->data; 75749bd79ccSDebojyoti Ghosh 75849bd79ccSDebojyoti Ghosh PetscFunctionBegin; 75949bd79ccSDebojyoti Ghosh *B = kaij->AIJ; 76049bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 76149bd79ccSDebojyoti Ghosh } 76249bd79ccSDebojyoti Ghosh 763*9371c9d4SSatish Balay static PetscErrorCode MatConvert_KAIJ_AIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) { 764fac53fa1SPierre Jolivet Mat_SeqKAIJ *a = (Mat_SeqKAIJ *)A->data; 765fac53fa1SPierre Jolivet Mat AIJ, OAIJ, B; 766fac53fa1SPierre Jolivet PetscInt *d_nnz, *o_nnz = NULL, nz, i, j, m, d; 767fac53fa1SPierre Jolivet const PetscInt p = a->p, q = a->q; 768fac53fa1SPierre Jolivet PetscBool ismpikaij, missing; 769fac53fa1SPierre Jolivet 770fac53fa1SPierre Jolivet PetscFunctionBegin; 771fac53fa1SPierre Jolivet if (reuse != MAT_REUSE_MATRIX) { 7729566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIKAIJ, &ismpikaij)); 773fac53fa1SPierre Jolivet if (ismpikaij) { 774fac53fa1SPierre Jolivet Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data; 775fac53fa1SPierre Jolivet AIJ = ((Mat_SeqKAIJ *)b->AIJ->data)->AIJ; 776fac53fa1SPierre Jolivet OAIJ = ((Mat_SeqKAIJ *)b->OAIJ->data)->AIJ; 777fac53fa1SPierre Jolivet } else { 778fac53fa1SPierre Jolivet AIJ = a->AIJ; 779fac53fa1SPierre Jolivet OAIJ = NULL; 780fac53fa1SPierre Jolivet } 7819566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 7829566063dSJacob Faibussowitsch PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 7839566063dSJacob Faibussowitsch PetscCall(MatSetType(B, MATAIJ)); 7849566063dSJacob Faibussowitsch PetscCall(MatGetSize(AIJ, &m, NULL)); 7859566063dSJacob Faibussowitsch PetscCall(MatMissingDiagonal(AIJ, &missing, &d)); /* assumption that all successive rows will have a missing diagonal */ 786fac53fa1SPierre Jolivet if (!missing || !a->S) d = m; 7879566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(m * p, &d_nnz)); 788fac53fa1SPierre Jolivet for (i = 0; i < m; ++i) { 7899566063dSJacob Faibussowitsch PetscCall(MatGetRow_SeqAIJ(AIJ, i, &nz, NULL, NULL)); 790fac53fa1SPierre Jolivet for (j = 0; j < p; ++j) d_nnz[i * p + j] = nz * q + (i >= d) * q; 7919566063dSJacob Faibussowitsch PetscCall(MatRestoreRow_SeqAIJ(AIJ, i, &nz, NULL, NULL)); 792fac53fa1SPierre Jolivet } 793fac53fa1SPierre Jolivet if (OAIJ) { 7949566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(m * p, &o_nnz)); 795fac53fa1SPierre Jolivet for (i = 0; i < m; ++i) { 7969566063dSJacob Faibussowitsch PetscCall(MatGetRow_SeqAIJ(OAIJ, i, &nz, NULL, NULL)); 797fac53fa1SPierre Jolivet for (j = 0; j < p; ++j) o_nnz[i * p + j] = nz * q; 7989566063dSJacob Faibussowitsch PetscCall(MatRestoreRow_SeqAIJ(OAIJ, i, &nz, NULL, NULL)); 799fac53fa1SPierre Jolivet } 8009566063dSJacob Faibussowitsch PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 801fac53fa1SPierre Jolivet } else { 8029566063dSJacob Faibussowitsch PetscCall(MatSeqAIJSetPreallocation(B, 0, d_nnz)); 803fac53fa1SPierre Jolivet } 8049566063dSJacob Faibussowitsch PetscCall(PetscFree(d_nnz)); 8059566063dSJacob Faibussowitsch PetscCall(PetscFree(o_nnz)); 806fac53fa1SPierre Jolivet } else B = *newmat; 8079566063dSJacob Faibussowitsch PetscCall(MatConvert_Basic(A, newtype, MAT_REUSE_MATRIX, &B)); 808fac53fa1SPierre Jolivet if (reuse == MAT_INPLACE_MATRIX) { 8099566063dSJacob Faibussowitsch PetscCall(MatHeaderReplace(A, &B)); 810fac53fa1SPierre Jolivet } else *newmat = B; 811fac53fa1SPierre Jolivet PetscFunctionReturn(0); 812fac53fa1SPierre Jolivet } 813fac53fa1SPierre Jolivet 814*9371c9d4SSatish Balay PetscErrorCode MatSOR_SeqKAIJ(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) { 81549bd79ccSDebojyoti Ghosh Mat_SeqKAIJ *kaij = (Mat_SeqKAIJ *)A->data; 81649bd79ccSDebojyoti Ghosh Mat_SeqAIJ *a = (Mat_SeqAIJ *)kaij->AIJ->data; 81749bd79ccSDebojyoti Ghosh const PetscScalar *aa = a->a, *T = kaij->T, *v; 81849bd79ccSDebojyoti Ghosh const PetscInt m = kaij->AIJ->rmap->n, *ai = a->i, *aj = a->j, p = kaij->p, q = kaij->q, *diag, *vi; 81949bd79ccSDebojyoti Ghosh const PetscScalar *b, *xb, *idiag; 82049bd79ccSDebojyoti Ghosh PetscScalar *x, *work, *workt, *w, *y, *arr, *t, *arrt; 82149bd79ccSDebojyoti Ghosh PetscInt i, j, k, i2, bs, bs2, nz; 82249bd79ccSDebojyoti Ghosh 82349bd79ccSDebojyoti Ghosh PetscFunctionBegin; 82449bd79ccSDebojyoti Ghosh its = its * lits; 825aed4548fSBarry Smith PetscCheck(!(flag & SOR_EISENSTAT), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for Eisenstat"); 82608401ef6SPierre Jolivet PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits); 82728b400f6SJacob Faibussowitsch PetscCheck(!fshift, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for diagonal shift"); 82808401ef6SPierre Jolivet PetscCheck(!(flag & SOR_APPLY_UPPER) && !(flag & SOR_APPLY_LOWER), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for applying upper or lower triangular parts"); 82908401ef6SPierre Jolivet PetscCheck(p == q, PETSC_COMM_SELF, PETSC_ERR_SUP, "MatSOR for KAIJ: No support for non-square dense blocks"); 830f7d195e4SLawrence Mitchell bs = p; 831f7d195e4SLawrence Mitchell bs2 = bs * bs; 83249bd79ccSDebojyoti Ghosh 83349bd79ccSDebojyoti Ghosh if (!m) PetscFunctionReturn(0); 83449bd79ccSDebojyoti Ghosh 8359566063dSJacob Faibussowitsch if (!kaij->ibdiagvalid) PetscCall(MatInvertBlockDiagonal_SeqKAIJ(A, NULL)); 83649bd79ccSDebojyoti Ghosh idiag = kaij->ibdiag; 83749bd79ccSDebojyoti Ghosh diag = a->diag; 83849bd79ccSDebojyoti Ghosh 83949bd79ccSDebojyoti Ghosh if (!kaij->sor.setup) { 8409566063dSJacob Faibussowitsch PetscCall(PetscMalloc5(bs, &kaij->sor.w, bs, &kaij->sor.y, m * bs, &kaij->sor.work, m * bs, &kaij->sor.t, m * bs2, &kaij->sor.arr)); 84149bd79ccSDebojyoti Ghosh kaij->sor.setup = PETSC_TRUE; 84249bd79ccSDebojyoti Ghosh } 84349bd79ccSDebojyoti Ghosh y = kaij->sor.y; 84449bd79ccSDebojyoti Ghosh w = kaij->sor.w; 84549bd79ccSDebojyoti Ghosh work = kaij->sor.work; 84649bd79ccSDebojyoti Ghosh t = kaij->sor.t; 84749bd79ccSDebojyoti Ghosh arr = kaij->sor.arr; 84849bd79ccSDebojyoti Ghosh 849d0609cedSBarry Smith PetscCall(VecGetArray(xx, &x)); 8509566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(bb, &b)); 85149bd79ccSDebojyoti Ghosh 85249bd79ccSDebojyoti Ghosh if (flag & SOR_ZERO_INITIAL_GUESS) { 85349bd79ccSDebojyoti Ghosh if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 85449bd79ccSDebojyoti Ghosh PetscKernel_w_gets_Ar_times_v(bs, bs, b, idiag, x); /* x[0:bs] <- D^{-1} b[0:bs] */ 8559566063dSJacob Faibussowitsch PetscCall(PetscMemcpy(t, b, bs * sizeof(PetscScalar))); 85649bd79ccSDebojyoti Ghosh i2 = bs; 85749bd79ccSDebojyoti Ghosh idiag += bs2; 85849bd79ccSDebojyoti Ghosh for (i = 1; i < m; i++) { 85949bd79ccSDebojyoti Ghosh v = aa + ai[i]; 86049bd79ccSDebojyoti Ghosh vi = aj + ai[i]; 86149bd79ccSDebojyoti Ghosh nz = diag[i] - ai[i]; 86249bd79ccSDebojyoti Ghosh 86349bd79ccSDebojyoti Ghosh if (T) { /* b - T (Arow * x) */ 8649566063dSJacob Faibussowitsch PetscCall(PetscMemzero(w, bs * sizeof(PetscScalar))); 86549bd79ccSDebojyoti Ghosh for (j = 0; j < nz; j++) { 86649bd79ccSDebojyoti Ghosh for (k = 0; k < bs; k++) w[k] -= v[j] * x[vi[j] * bs + k]; 86749bd79ccSDebojyoti Ghosh } 86849bd79ccSDebojyoti Ghosh PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs, w, T, &t[i2]); 86949bd79ccSDebojyoti Ghosh for (k = 0; k < bs; k++) t[i2 + k] += b[i2 + k]; 87049bd79ccSDebojyoti Ghosh } else if (kaij->isTI) { 8719566063dSJacob Faibussowitsch PetscCall(PetscMemcpy(t + i2, b + i2, bs * sizeof(PetscScalar))); 87249bd79ccSDebojyoti Ghosh for (j = 0; j < nz; j++) { 87349bd79ccSDebojyoti Ghosh for (k = 0; k < bs; k++) t[i2 + k] -= v[j] * x[vi[j] * bs + k]; 87449bd79ccSDebojyoti Ghosh } 87549bd79ccSDebojyoti Ghosh } else { 8769566063dSJacob Faibussowitsch PetscCall(PetscMemcpy(t + i2, b + i2, bs * sizeof(PetscScalar))); 87749bd79ccSDebojyoti Ghosh } 87849bd79ccSDebojyoti Ghosh 87949bd79ccSDebojyoti Ghosh PetscKernel_w_gets_Ar_times_v(bs, bs, t + i2, idiag, y); 88049bd79ccSDebojyoti Ghosh for (j = 0; j < bs; j++) x[i2 + j] = omega * y[j]; 88149bd79ccSDebojyoti Ghosh 88249bd79ccSDebojyoti Ghosh idiag += bs2; 88349bd79ccSDebojyoti Ghosh i2 += bs; 88449bd79ccSDebojyoti Ghosh } 88549bd79ccSDebojyoti Ghosh /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 8869566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(1.0 * bs2 * a->nz)); 88749bd79ccSDebojyoti Ghosh xb = t; 88849bd79ccSDebojyoti Ghosh } else xb = b; 88949bd79ccSDebojyoti Ghosh if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 89049bd79ccSDebojyoti Ghosh idiag = kaij->ibdiag + bs2 * (m - 1); 89149bd79ccSDebojyoti Ghosh i2 = bs * (m - 1); 8929566063dSJacob Faibussowitsch PetscCall(PetscMemcpy(w, xb + i2, bs * sizeof(PetscScalar))); 89349bd79ccSDebojyoti Ghosh PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2); 89449bd79ccSDebojyoti Ghosh i2 -= bs; 89549bd79ccSDebojyoti Ghosh idiag -= bs2; 89649bd79ccSDebojyoti Ghosh for (i = m - 2; i >= 0; i--) { 89749bd79ccSDebojyoti Ghosh v = aa + diag[i] + 1; 89849bd79ccSDebojyoti Ghosh vi = aj + diag[i] + 1; 89949bd79ccSDebojyoti Ghosh nz = ai[i + 1] - diag[i] - 1; 90049bd79ccSDebojyoti Ghosh 90149bd79ccSDebojyoti Ghosh if (T) { /* FIXME: This branch untested */ 9029566063dSJacob Faibussowitsch PetscCall(PetscMemcpy(w, xb + i2, bs * sizeof(PetscScalar))); 90349bd79ccSDebojyoti Ghosh /* copy all rows of x that are needed into contiguous space */ 90449bd79ccSDebojyoti Ghosh workt = work; 90549bd79ccSDebojyoti Ghosh for (j = 0; j < nz; j++) { 9069566063dSJacob Faibussowitsch PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar))); 90749bd79ccSDebojyoti Ghosh workt += bs; 90849bd79ccSDebojyoti Ghosh } 90949bd79ccSDebojyoti Ghosh arrt = arr; 91049bd79ccSDebojyoti Ghosh for (j = 0; j < nz; j++) { 9119566063dSJacob Faibussowitsch PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar))); 91249bd79ccSDebojyoti Ghosh for (k = 0; k < bs2; k++) arrt[k] *= v[j]; 91349bd79ccSDebojyoti Ghosh arrt += bs2; 91449bd79ccSDebojyoti Ghosh } 91549bd79ccSDebojyoti Ghosh PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work); 91649bd79ccSDebojyoti Ghosh } else if (kaij->isTI) { 9179566063dSJacob Faibussowitsch PetscCall(PetscMemcpy(w, t + i2, bs * sizeof(PetscScalar))); 91849bd79ccSDebojyoti Ghosh for (j = 0; j < nz; j++) { 91949bd79ccSDebojyoti Ghosh for (k = 0; k < bs; k++) w[k] -= v[j] * x[vi[j] * bs + k]; 92049bd79ccSDebojyoti Ghosh } 92149bd79ccSDebojyoti Ghosh } 92249bd79ccSDebojyoti Ghosh 92349bd79ccSDebojyoti Ghosh PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, y); /* RHS incorrect for omega != 1.0 */ 92449bd79ccSDebojyoti Ghosh for (j = 0; j < bs; j++) x[i2 + j] = (1.0 - omega) * x[i2 + j] + omega * y[j]; 92549bd79ccSDebojyoti Ghosh 92649bd79ccSDebojyoti Ghosh idiag -= bs2; 92749bd79ccSDebojyoti Ghosh i2 -= bs; 92849bd79ccSDebojyoti Ghosh } 9299566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(1.0 * bs2 * (a->nz))); 93049bd79ccSDebojyoti Ghosh } 93149bd79ccSDebojyoti Ghosh its--; 93249bd79ccSDebojyoti Ghosh } 93349bd79ccSDebojyoti Ghosh while (its--) { /* FIXME: This branch not updated */ 93449bd79ccSDebojyoti Ghosh if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 93549bd79ccSDebojyoti Ghosh i2 = 0; 93649bd79ccSDebojyoti Ghosh idiag = kaij->ibdiag; 93749bd79ccSDebojyoti Ghosh for (i = 0; i < m; i++) { 9389566063dSJacob Faibussowitsch PetscCall(PetscMemcpy(w, b + i2, bs * sizeof(PetscScalar))); 93949bd79ccSDebojyoti Ghosh 94049bd79ccSDebojyoti Ghosh v = aa + ai[i]; 94149bd79ccSDebojyoti Ghosh vi = aj + ai[i]; 94249bd79ccSDebojyoti Ghosh nz = diag[i] - ai[i]; 94349bd79ccSDebojyoti Ghosh workt = work; 94449bd79ccSDebojyoti Ghosh for (j = 0; j < nz; j++) { 9459566063dSJacob Faibussowitsch PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar))); 94649bd79ccSDebojyoti Ghosh workt += bs; 94749bd79ccSDebojyoti Ghosh } 94849bd79ccSDebojyoti Ghosh arrt = arr; 94949bd79ccSDebojyoti Ghosh if (T) { 95049bd79ccSDebojyoti Ghosh for (j = 0; j < nz; j++) { 9519566063dSJacob Faibussowitsch PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar))); 95249bd79ccSDebojyoti Ghosh for (k = 0; k < bs2; k++) arrt[k] *= v[j]; 95349bd79ccSDebojyoti Ghosh arrt += bs2; 95449bd79ccSDebojyoti Ghosh } 95549bd79ccSDebojyoti Ghosh PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work); 95649bd79ccSDebojyoti Ghosh } else if (kaij->isTI) { 95749bd79ccSDebojyoti Ghosh for (j = 0; j < nz; j++) { 9589566063dSJacob Faibussowitsch PetscCall(PetscMemzero(arrt, bs2 * sizeof(PetscScalar))); 95949bd79ccSDebojyoti Ghosh for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j]; 96049bd79ccSDebojyoti Ghosh arrt += bs2; 96149bd79ccSDebojyoti Ghosh } 96249bd79ccSDebojyoti Ghosh PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work); 96349bd79ccSDebojyoti Ghosh } 9649566063dSJacob Faibussowitsch PetscCall(PetscMemcpy(t + i2, w, bs * sizeof(PetscScalar))); 96549bd79ccSDebojyoti Ghosh 96649bd79ccSDebojyoti Ghosh v = aa + diag[i] + 1; 96749bd79ccSDebojyoti Ghosh vi = aj + diag[i] + 1; 96849bd79ccSDebojyoti Ghosh nz = ai[i + 1] - diag[i] - 1; 96949bd79ccSDebojyoti Ghosh workt = work; 97049bd79ccSDebojyoti Ghosh for (j = 0; j < nz; j++) { 9719566063dSJacob Faibussowitsch PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar))); 97249bd79ccSDebojyoti Ghosh workt += bs; 97349bd79ccSDebojyoti Ghosh } 97449bd79ccSDebojyoti Ghosh arrt = arr; 97549bd79ccSDebojyoti Ghosh if (T) { 97649bd79ccSDebojyoti Ghosh for (j = 0; j < nz; j++) { 9779566063dSJacob Faibussowitsch PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar))); 97849bd79ccSDebojyoti Ghosh for (k = 0; k < bs2; k++) arrt[k] *= v[j]; 97949bd79ccSDebojyoti Ghosh arrt += bs2; 98049bd79ccSDebojyoti Ghosh } 98149bd79ccSDebojyoti Ghosh PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work); 98249bd79ccSDebojyoti Ghosh } else if (kaij->isTI) { 98349bd79ccSDebojyoti Ghosh for (j = 0; j < nz; j++) { 9849566063dSJacob Faibussowitsch PetscCall(PetscMemzero(arrt, bs2 * sizeof(PetscScalar))); 98549bd79ccSDebojyoti Ghosh for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j]; 98649bd79ccSDebojyoti Ghosh arrt += bs2; 98749bd79ccSDebojyoti Ghosh } 98849bd79ccSDebojyoti Ghosh PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work); 98949bd79ccSDebojyoti Ghosh } 99049bd79ccSDebojyoti Ghosh 99149bd79ccSDebojyoti Ghosh PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, y); 99249bd79ccSDebojyoti Ghosh for (j = 0; j < bs; j++) *(x + i2 + j) = (1.0 - omega) * *(x + i2 + j) + omega * *(y + j); 99349bd79ccSDebojyoti Ghosh 99449bd79ccSDebojyoti Ghosh idiag += bs2; 99549bd79ccSDebojyoti Ghosh i2 += bs; 99649bd79ccSDebojyoti Ghosh } 99749bd79ccSDebojyoti Ghosh xb = t; 998*9371c9d4SSatish Balay } else xb = b; 99949bd79ccSDebojyoti Ghosh if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 100049bd79ccSDebojyoti Ghosh idiag = kaij->ibdiag + bs2 * (m - 1); 100149bd79ccSDebojyoti Ghosh i2 = bs * (m - 1); 100249bd79ccSDebojyoti Ghosh if (xb == b) { 100349bd79ccSDebojyoti Ghosh for (i = m - 1; i >= 0; i--) { 10049566063dSJacob Faibussowitsch PetscCall(PetscMemcpy(w, b + i2, bs * sizeof(PetscScalar))); 100549bd79ccSDebojyoti Ghosh 100649bd79ccSDebojyoti Ghosh v = aa + ai[i]; 100749bd79ccSDebojyoti Ghosh vi = aj + ai[i]; 100849bd79ccSDebojyoti Ghosh nz = diag[i] - ai[i]; 100949bd79ccSDebojyoti Ghosh workt = work; 101049bd79ccSDebojyoti Ghosh for (j = 0; j < nz; j++) { 10119566063dSJacob Faibussowitsch PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar))); 101249bd79ccSDebojyoti Ghosh workt += bs; 101349bd79ccSDebojyoti Ghosh } 101449bd79ccSDebojyoti Ghosh arrt = arr; 101549bd79ccSDebojyoti Ghosh if (T) { 101649bd79ccSDebojyoti Ghosh for (j = 0; j < nz; j++) { 10179566063dSJacob Faibussowitsch PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar))); 101849bd79ccSDebojyoti Ghosh for (k = 0; k < bs2; k++) arrt[k] *= v[j]; 101949bd79ccSDebojyoti Ghosh arrt += bs2; 102049bd79ccSDebojyoti Ghosh } 102149bd79ccSDebojyoti Ghosh PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work); 102249bd79ccSDebojyoti Ghosh } else if (kaij->isTI) { 102349bd79ccSDebojyoti Ghosh for (j = 0; j < nz; j++) { 10249566063dSJacob Faibussowitsch PetscCall(PetscMemzero(arrt, bs2 * sizeof(PetscScalar))); 102549bd79ccSDebojyoti Ghosh for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j]; 102649bd79ccSDebojyoti Ghosh arrt += bs2; 102749bd79ccSDebojyoti Ghosh } 102849bd79ccSDebojyoti Ghosh PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work); 102949bd79ccSDebojyoti Ghosh } 103049bd79ccSDebojyoti Ghosh 103149bd79ccSDebojyoti Ghosh v = aa + diag[i] + 1; 103249bd79ccSDebojyoti Ghosh vi = aj + diag[i] + 1; 103349bd79ccSDebojyoti Ghosh nz = ai[i + 1] - diag[i] - 1; 103449bd79ccSDebojyoti Ghosh workt = work; 103549bd79ccSDebojyoti Ghosh for (j = 0; j < nz; j++) { 10369566063dSJacob Faibussowitsch PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar))); 103749bd79ccSDebojyoti Ghosh workt += bs; 103849bd79ccSDebojyoti Ghosh } 103949bd79ccSDebojyoti Ghosh arrt = arr; 104049bd79ccSDebojyoti Ghosh if (T) { 104149bd79ccSDebojyoti Ghosh for (j = 0; j < nz; j++) { 10429566063dSJacob Faibussowitsch PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar))); 104349bd79ccSDebojyoti Ghosh for (k = 0; k < bs2; k++) arrt[k] *= v[j]; 104449bd79ccSDebojyoti Ghosh arrt += bs2; 104549bd79ccSDebojyoti Ghosh } 104649bd79ccSDebojyoti Ghosh PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work); 104749bd79ccSDebojyoti Ghosh } else if (kaij->isTI) { 104849bd79ccSDebojyoti Ghosh for (j = 0; j < nz; j++) { 10499566063dSJacob Faibussowitsch PetscCall(PetscMemzero(arrt, bs2 * sizeof(PetscScalar))); 105049bd79ccSDebojyoti Ghosh for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j]; 105149bd79ccSDebojyoti Ghosh arrt += bs2; 105249bd79ccSDebojyoti Ghosh } 105349bd79ccSDebojyoti Ghosh PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work); 105449bd79ccSDebojyoti Ghosh } 105549bd79ccSDebojyoti Ghosh 105649bd79ccSDebojyoti Ghosh PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, y); 105749bd79ccSDebojyoti Ghosh for (j = 0; j < bs; j++) *(x + i2 + j) = (1.0 - omega) * *(x + i2 + j) + omega * *(y + j); 105849bd79ccSDebojyoti Ghosh } 105949bd79ccSDebojyoti Ghosh } else { 106049bd79ccSDebojyoti Ghosh for (i = m - 1; i >= 0; i--) { 10619566063dSJacob Faibussowitsch PetscCall(PetscMemcpy(w, xb + i2, bs * sizeof(PetscScalar))); 106249bd79ccSDebojyoti Ghosh v = aa + diag[i] + 1; 106349bd79ccSDebojyoti Ghosh vi = aj + diag[i] + 1; 106449bd79ccSDebojyoti Ghosh nz = ai[i + 1] - diag[i] - 1; 106549bd79ccSDebojyoti Ghosh workt = work; 106649bd79ccSDebojyoti Ghosh for (j = 0; j < nz; j++) { 10679566063dSJacob Faibussowitsch PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar))); 106849bd79ccSDebojyoti Ghosh workt += bs; 106949bd79ccSDebojyoti Ghosh } 107049bd79ccSDebojyoti Ghosh arrt = arr; 107149bd79ccSDebojyoti Ghosh if (T) { 107249bd79ccSDebojyoti Ghosh for (j = 0; j < nz; j++) { 10739566063dSJacob Faibussowitsch PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar))); 107449bd79ccSDebojyoti Ghosh for (k = 0; k < bs2; k++) arrt[k] *= v[j]; 107549bd79ccSDebojyoti Ghosh arrt += bs2; 107649bd79ccSDebojyoti Ghosh } 107749bd79ccSDebojyoti Ghosh PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work); 107849bd79ccSDebojyoti Ghosh } else if (kaij->isTI) { 107949bd79ccSDebojyoti Ghosh for (j = 0; j < nz; j++) { 10809566063dSJacob Faibussowitsch PetscCall(PetscMemzero(arrt, bs2 * sizeof(PetscScalar))); 108149bd79ccSDebojyoti Ghosh for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j]; 108249bd79ccSDebojyoti Ghosh arrt += bs2; 108349bd79ccSDebojyoti Ghosh } 108449bd79ccSDebojyoti Ghosh PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work); 108549bd79ccSDebojyoti Ghosh } 108649bd79ccSDebojyoti Ghosh PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, y); 108749bd79ccSDebojyoti Ghosh for (j = 0; j < bs; j++) *(x + i2 + j) = (1.0 - omega) * *(x + i2 + j) + omega * *(y + j); 108849bd79ccSDebojyoti Ghosh } 108949bd79ccSDebojyoti Ghosh } 10909566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(1.0 * bs2 * (a->nz))); 109149bd79ccSDebojyoti Ghosh } 109249bd79ccSDebojyoti Ghosh } 109349bd79ccSDebojyoti Ghosh 1094d0609cedSBarry Smith PetscCall(VecRestoreArray(xx, &x)); 10959566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(bb, &b)); 109649bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 109749bd79ccSDebojyoti Ghosh } 109849bd79ccSDebojyoti Ghosh 109949bd79ccSDebojyoti Ghosh /*===================================================================================*/ 110049bd79ccSDebojyoti Ghosh 1101*9371c9d4SSatish Balay PetscErrorCode MatMultAdd_MPIKAIJ(Mat A, Vec xx, Vec yy, Vec zz) { 110249bd79ccSDebojyoti Ghosh Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data; 110349bd79ccSDebojyoti Ghosh 110449bd79ccSDebojyoti Ghosh PetscFunctionBegin; 110549bd79ccSDebojyoti Ghosh if (!yy) { 11069566063dSJacob Faibussowitsch PetscCall(VecSet(zz, 0.0)); 110749bd79ccSDebojyoti Ghosh } else { 11089566063dSJacob Faibussowitsch PetscCall(VecCopy(yy, zz)); 110949bd79ccSDebojyoti Ghosh } 11109566063dSJacob Faibussowitsch PetscCall(MatKAIJ_build_AIJ_OAIJ(A)); /* Ensure b->AIJ and b->OAIJ are up to date. */ 111149bd79ccSDebojyoti Ghosh /* start the scatter */ 11129566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD)); 11139566063dSJacob Faibussowitsch PetscCall((*b->AIJ->ops->multadd)(b->AIJ, xx, zz, zz)); 11149566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD)); 11159566063dSJacob Faibussowitsch PetscCall((*b->OAIJ->ops->multadd)(b->OAIJ, b->w, zz, zz)); 111649bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 111749bd79ccSDebojyoti Ghosh } 111849bd79ccSDebojyoti Ghosh 1119*9371c9d4SSatish Balay PetscErrorCode MatMult_MPIKAIJ(Mat A, Vec xx, Vec yy) { 112049bd79ccSDebojyoti Ghosh PetscFunctionBegin; 11219566063dSJacob Faibussowitsch PetscCall(MatMultAdd_MPIKAIJ(A, xx, PETSC_NULL, yy)); 112249bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 112349bd79ccSDebojyoti Ghosh } 112449bd79ccSDebojyoti Ghosh 1125*9371c9d4SSatish Balay PetscErrorCode MatInvertBlockDiagonal_MPIKAIJ(Mat A, const PetscScalar **values) { 112649bd79ccSDebojyoti Ghosh Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data; 112749bd79ccSDebojyoti Ghosh 112849bd79ccSDebojyoti Ghosh PetscFunctionBegin; 11299566063dSJacob Faibussowitsch PetscCall(MatKAIJ_build_AIJ_OAIJ(A)); /* Ensure b->AIJ is up to date. */ 11309566063dSJacob Faibussowitsch PetscCall((*b->AIJ->ops->invertblockdiagonal)(b->AIJ, values)); 113149bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 113249bd79ccSDebojyoti Ghosh } 113349bd79ccSDebojyoti Ghosh 113449bd79ccSDebojyoti Ghosh /* ----------------------------------------------------------------*/ 113549bd79ccSDebojyoti Ghosh 1136*9371c9d4SSatish Balay PetscErrorCode MatGetRow_SeqKAIJ(Mat A, PetscInt row, PetscInt *ncols, PetscInt **cols, PetscScalar **values) { 113749bd79ccSDebojyoti Ghosh Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data; 11381ca5ffdbSRichard Tran Mills PetscErrorCode diag = PETSC_FALSE; 113949bd79ccSDebojyoti Ghosh PetscInt nzaij, nz, *colsaij, *idx, i, j, p = b->p, q = b->q, r = row / p, s = row % p, c; 114049bd79ccSDebojyoti Ghosh PetscScalar *vaij, *v, *S = b->S, *T = b->T; 114149bd79ccSDebojyoti Ghosh 114249bd79ccSDebojyoti Ghosh PetscFunctionBegin; 114328b400f6SJacob Faibussowitsch PetscCheck(!b->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 114449bd79ccSDebojyoti Ghosh b->getrowactive = PETSC_TRUE; 1145aed4548fSBarry Smith PetscCheck(row >= 0 && row < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range", row); 114649bd79ccSDebojyoti Ghosh 114749bd79ccSDebojyoti Ghosh if ((!S) && (!T) && (!b->isTI)) { 114849bd79ccSDebojyoti Ghosh if (ncols) *ncols = 0; 114949bd79ccSDebojyoti Ghosh if (cols) *cols = NULL; 115049bd79ccSDebojyoti Ghosh if (values) *values = NULL; 115149bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 115249bd79ccSDebojyoti Ghosh } 115349bd79ccSDebojyoti Ghosh 115449bd79ccSDebojyoti Ghosh if (T || b->isTI) { 11559566063dSJacob Faibussowitsch PetscCall(MatGetRow_SeqAIJ(b->AIJ, r, &nzaij, &colsaij, &vaij)); 115649bd79ccSDebojyoti Ghosh c = nzaij; 115749bd79ccSDebojyoti Ghosh for (i = 0; i < nzaij; i++) { 115849bd79ccSDebojyoti Ghosh /* check if this row contains a diagonal entry */ 115949bd79ccSDebojyoti Ghosh if (colsaij[i] == r) { 116049bd79ccSDebojyoti Ghosh diag = PETSC_TRUE; 116149bd79ccSDebojyoti Ghosh c = i; 116249bd79ccSDebojyoti Ghosh } 116349bd79ccSDebojyoti Ghosh } 116449bd79ccSDebojyoti Ghosh } else nzaij = c = 0; 116549bd79ccSDebojyoti Ghosh 116649bd79ccSDebojyoti Ghosh /* calculate size of row */ 116749bd79ccSDebojyoti Ghosh nz = 0; 116849bd79ccSDebojyoti Ghosh if (S) nz += q; 116949bd79ccSDebojyoti Ghosh if (T || b->isTI) nz += (diag && S ? (nzaij - 1) * q : nzaij * q); 117049bd79ccSDebojyoti Ghosh 117149bd79ccSDebojyoti Ghosh if (cols || values) { 11729566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(nz, &idx, nz, &v)); 117338822f9dSRichard Tran Mills for (i = 0; i < q; i++) { 117438822f9dSRichard Tran Mills /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */ 117538822f9dSRichard Tran Mills v[i] = 0.0; 117638822f9dSRichard Tran Mills } 117749bd79ccSDebojyoti Ghosh if (b->isTI) { 117849bd79ccSDebojyoti Ghosh for (i = 0; i < nzaij; i++) { 117949bd79ccSDebojyoti Ghosh for (j = 0; j < q; j++) { 118049bd79ccSDebojyoti Ghosh idx[i * q + j] = colsaij[i] * q + j; 118149bd79ccSDebojyoti Ghosh v[i * q + j] = (j == s ? vaij[i] : 0); 118249bd79ccSDebojyoti Ghosh } 118349bd79ccSDebojyoti Ghosh } 118449bd79ccSDebojyoti Ghosh } else if (T) { 118549bd79ccSDebojyoti Ghosh for (i = 0; i < nzaij; i++) { 118649bd79ccSDebojyoti Ghosh for (j = 0; j < q; j++) { 118749bd79ccSDebojyoti Ghosh idx[i * q + j] = colsaij[i] * q + j; 118849bd79ccSDebojyoti Ghosh v[i * q + j] = vaij[i] * T[s + j * p]; 118949bd79ccSDebojyoti Ghosh } 119049bd79ccSDebojyoti Ghosh } 119149bd79ccSDebojyoti Ghosh } 119249bd79ccSDebojyoti Ghosh if (S) { 119349bd79ccSDebojyoti Ghosh for (j = 0; j < q; j++) { 119449bd79ccSDebojyoti Ghosh idx[c * q + j] = r * q + j; 119549bd79ccSDebojyoti Ghosh v[c * q + j] += S[s + j * p]; 119649bd79ccSDebojyoti Ghosh } 119749bd79ccSDebojyoti Ghosh } 119849bd79ccSDebojyoti Ghosh } 119949bd79ccSDebojyoti Ghosh 120049bd79ccSDebojyoti Ghosh if (ncols) *ncols = nz; 120149bd79ccSDebojyoti Ghosh if (cols) *cols = idx; 120249bd79ccSDebojyoti Ghosh if (values) *values = v; 120349bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 120449bd79ccSDebojyoti Ghosh } 120549bd79ccSDebojyoti Ghosh 1206*9371c9d4SSatish Balay PetscErrorCode MatRestoreRow_SeqKAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) { 120749bd79ccSDebojyoti Ghosh PetscFunctionBegin; 1208cb4a9cd9SHong Zhang if (nz) *nz = 0; 12099566063dSJacob Faibussowitsch PetscCall(PetscFree2(*idx, *v)); 121049bd79ccSDebojyoti Ghosh ((Mat_SeqKAIJ *)A->data)->getrowactive = PETSC_FALSE; 121149bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 121249bd79ccSDebojyoti Ghosh } 121349bd79ccSDebojyoti Ghosh 1214*9371c9d4SSatish Balay PetscErrorCode MatGetRow_MPIKAIJ(Mat A, PetscInt row, PetscInt *ncols, PetscInt **cols, PetscScalar **values) { 121549bd79ccSDebojyoti Ghosh Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data; 121649bd79ccSDebojyoti Ghosh Mat AIJ = b->A; 1217fc64b2cfSRichard Tran Mills PetscBool diag = PETSC_FALSE; 1218761d359dSRichard Tran Mills Mat MatAIJ, MatOAIJ; 121949bd79ccSDebojyoti Ghosh const PetscInt rstart = A->rmap->rstart, rend = A->rmap->rend, p = b->p, q = b->q, *garray; 1220389eba51SJed Brown PetscInt nz, *idx, ncolsaij = 0, ncolsoaij = 0, *colsaij, *colsoaij, r, s, c, i, j, lrow; 122149bd79ccSDebojyoti Ghosh PetscScalar *v, *vals, *ovals, *S = b->S, *T = b->T; 122249bd79ccSDebojyoti Ghosh 122349bd79ccSDebojyoti Ghosh PetscFunctionBegin; 12249566063dSJacob Faibussowitsch PetscCall(MatKAIJ_build_AIJ_OAIJ(A)); /* Ensure b->AIJ and b->OAIJ are up to date. */ 1225761d359dSRichard Tran Mills MatAIJ = ((Mat_SeqKAIJ *)b->AIJ->data)->AIJ; 1226761d359dSRichard Tran Mills MatOAIJ = ((Mat_SeqKAIJ *)b->OAIJ->data)->AIJ; 122728b400f6SJacob Faibussowitsch PetscCheck(!b->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 122849bd79ccSDebojyoti Ghosh b->getrowactive = PETSC_TRUE; 1229aed4548fSBarry Smith PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows"); 123049bd79ccSDebojyoti Ghosh lrow = row - rstart; 123149bd79ccSDebojyoti Ghosh 123249bd79ccSDebojyoti Ghosh if ((!S) && (!T) && (!b->isTI)) { 123349bd79ccSDebojyoti Ghosh if (ncols) *ncols = 0; 123449bd79ccSDebojyoti Ghosh if (cols) *cols = NULL; 123549bd79ccSDebojyoti Ghosh if (values) *values = NULL; 123649bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 123749bd79ccSDebojyoti Ghosh } 123849bd79ccSDebojyoti Ghosh 123949bd79ccSDebojyoti Ghosh r = lrow / p; 124049bd79ccSDebojyoti Ghosh s = lrow % p; 124149bd79ccSDebojyoti Ghosh 124249bd79ccSDebojyoti Ghosh if (T || b->isTI) { 12439566063dSJacob Faibussowitsch PetscCall(MatMPIAIJGetSeqAIJ(AIJ, NULL, NULL, &garray)); 12449566063dSJacob Faibussowitsch PetscCall(MatGetRow_SeqAIJ(MatAIJ, lrow / p, &ncolsaij, &colsaij, &vals)); 12459566063dSJacob Faibussowitsch PetscCall(MatGetRow_SeqAIJ(MatOAIJ, lrow / p, &ncolsoaij, &colsoaij, &ovals)); 124649bd79ccSDebojyoti Ghosh 124749bd79ccSDebojyoti Ghosh c = ncolsaij + ncolsoaij; 124849bd79ccSDebojyoti Ghosh for (i = 0; i < ncolsaij; i++) { 124949bd79ccSDebojyoti Ghosh /* check if this row contains a diagonal entry */ 125049bd79ccSDebojyoti Ghosh if (colsaij[i] == r) { 125149bd79ccSDebojyoti Ghosh diag = PETSC_TRUE; 125249bd79ccSDebojyoti Ghosh c = i; 125349bd79ccSDebojyoti Ghosh } 125449bd79ccSDebojyoti Ghosh } 125549bd79ccSDebojyoti Ghosh } else c = 0; 125649bd79ccSDebojyoti Ghosh 125749bd79ccSDebojyoti Ghosh /* calculate size of row */ 125849bd79ccSDebojyoti Ghosh nz = 0; 125949bd79ccSDebojyoti Ghosh if (S) nz += q; 126049bd79ccSDebojyoti Ghosh if (T || b->isTI) nz += (diag && S ? (ncolsaij + ncolsoaij - 1) * q : (ncolsaij + ncolsoaij) * q); 126149bd79ccSDebojyoti Ghosh 126249bd79ccSDebojyoti Ghosh if (cols || values) { 12639566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(nz, &idx, nz, &v)); 1264a437a796SRichard Tran Mills for (i = 0; i < q; i++) { 1265a437a796SRichard Tran Mills /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */ 1266a437a796SRichard Tran Mills v[i] = 0.0; 1267a437a796SRichard Tran Mills } 126849bd79ccSDebojyoti Ghosh if (b->isTI) { 126949bd79ccSDebojyoti Ghosh for (i = 0; i < ncolsaij; i++) { 127049bd79ccSDebojyoti Ghosh for (j = 0; j < q; j++) { 127149bd79ccSDebojyoti Ghosh idx[i * q + j] = (colsaij[i] + rstart / p) * q + j; 127249bd79ccSDebojyoti Ghosh v[i * q + j] = (j == s ? vals[i] : 0.0); 127349bd79ccSDebojyoti Ghosh } 127449bd79ccSDebojyoti Ghosh } 127549bd79ccSDebojyoti Ghosh for (i = 0; i < ncolsoaij; i++) { 127649bd79ccSDebojyoti Ghosh for (j = 0; j < q; j++) { 127749bd79ccSDebojyoti Ghosh idx[(i + ncolsaij) * q + j] = garray[colsoaij[i]] * q + j; 127849bd79ccSDebojyoti Ghosh v[(i + ncolsaij) * q + j] = (j == s ? ovals[i] : 0.0); 127949bd79ccSDebojyoti Ghosh } 128049bd79ccSDebojyoti Ghosh } 128149bd79ccSDebojyoti Ghosh } else if (T) { 128249bd79ccSDebojyoti Ghosh for (i = 0; i < ncolsaij; i++) { 128349bd79ccSDebojyoti Ghosh for (j = 0; j < q; j++) { 128449bd79ccSDebojyoti Ghosh idx[i * q + j] = (colsaij[i] + rstart / p) * q + j; 128549bd79ccSDebojyoti Ghosh v[i * q + j] = vals[i] * T[s + j * p]; 128649bd79ccSDebojyoti Ghosh } 128749bd79ccSDebojyoti Ghosh } 128849bd79ccSDebojyoti Ghosh for (i = 0; i < ncolsoaij; i++) { 128949bd79ccSDebojyoti Ghosh for (j = 0; j < q; j++) { 129049bd79ccSDebojyoti Ghosh idx[(i + ncolsaij) * q + j] = garray[colsoaij[i]] * q + j; 129149bd79ccSDebojyoti Ghosh v[(i + ncolsaij) * q + j] = ovals[i] * T[s + j * p]; 129249bd79ccSDebojyoti Ghosh } 129349bd79ccSDebojyoti Ghosh } 129449bd79ccSDebojyoti Ghosh } 129549bd79ccSDebojyoti Ghosh if (S) { 129649bd79ccSDebojyoti Ghosh for (j = 0; j < q; j++) { 129749bd79ccSDebojyoti Ghosh idx[c * q + j] = (r + rstart / p) * q + j; 129849bd79ccSDebojyoti Ghosh v[c * q + j] += S[s + j * p]; 129949bd79ccSDebojyoti Ghosh } 130049bd79ccSDebojyoti Ghosh } 130149bd79ccSDebojyoti Ghosh } 130249bd79ccSDebojyoti Ghosh 130349bd79ccSDebojyoti Ghosh if (ncols) *ncols = nz; 130449bd79ccSDebojyoti Ghosh if (cols) *cols = idx; 130549bd79ccSDebojyoti Ghosh if (values) *values = v; 130649bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 130749bd79ccSDebojyoti Ghosh } 130849bd79ccSDebojyoti Ghosh 1309*9371c9d4SSatish Balay PetscErrorCode MatRestoreRow_MPIKAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) { 131049bd79ccSDebojyoti Ghosh PetscFunctionBegin; 13119566063dSJacob Faibussowitsch PetscCall(PetscFree2(*idx, *v)); 131249bd79ccSDebojyoti Ghosh ((Mat_SeqKAIJ *)A->data)->getrowactive = PETSC_FALSE; 131349bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 131449bd79ccSDebojyoti Ghosh } 131549bd79ccSDebojyoti Ghosh 1316*9371c9d4SSatish Balay PetscErrorCode MatCreateSubMatrix_KAIJ(Mat mat, IS isrow, IS iscol, MatReuse cll, Mat *newmat) { 131749bd79ccSDebojyoti Ghosh Mat A; 131849bd79ccSDebojyoti Ghosh 131949bd79ccSDebojyoti Ghosh PetscFunctionBegin; 13209566063dSJacob Faibussowitsch PetscCall(MatConvert(mat, MATAIJ, MAT_INITIAL_MATRIX, &A)); 13219566063dSJacob Faibussowitsch PetscCall(MatCreateSubMatrix(A, isrow, iscol, cll, newmat)); 13229566063dSJacob Faibussowitsch PetscCall(MatDestroy(&A)); 132349bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 132449bd79ccSDebojyoti Ghosh } 132549bd79ccSDebojyoti Ghosh 132649bd79ccSDebojyoti Ghosh /* ---------------------------------------------------------------------------------- */ 132749bd79ccSDebojyoti Ghosh /*@C 132849bd79ccSDebojyoti Ghosh MatCreateKAIJ - Creates a matrix type to be used for matrices of the following form: 132949bd79ccSDebojyoti Ghosh 133049bd79ccSDebojyoti Ghosh [I \otimes S + A \otimes T] 133149bd79ccSDebojyoti Ghosh 133249bd79ccSDebojyoti Ghosh where 133349bd79ccSDebojyoti Ghosh S is a dense (p \times q) matrix 133449bd79ccSDebojyoti Ghosh T is a dense (p \times q) matrix 133549bd79ccSDebojyoti Ghosh A is an AIJ (n \times n) matrix 133649bd79ccSDebojyoti Ghosh I is the identity matrix 133749bd79ccSDebojyoti Ghosh The resulting matrix is (np \times nq) 133849bd79ccSDebojyoti Ghosh 1339d3b90ce1SRichard Tran Mills S and T are always stored independently on all processes as PetscScalar arrays in column-major format. 134049bd79ccSDebojyoti Ghosh 134149bd79ccSDebojyoti Ghosh Collective 134249bd79ccSDebojyoti Ghosh 134349bd79ccSDebojyoti Ghosh Input Parameters: 134449bd79ccSDebojyoti Ghosh + A - the AIJ matrix 134549bd79ccSDebojyoti Ghosh . p - number of rows in S and T 1346d3b90ce1SRichard Tran Mills . q - number of columns in S and T 1347d3b90ce1SRichard Tran Mills . S - the S matrix (can be PETSC_NULL), stored as a PetscScalar array (column-major) 1348d3b90ce1SRichard Tran Mills - T - the T matrix (can be PETSC_NULL), stored as a PetscScalar array (column-major) 134949bd79ccSDebojyoti Ghosh 135049bd79ccSDebojyoti Ghosh Output Parameter: 135149bd79ccSDebojyoti Ghosh . kaij - the new KAIJ matrix 135249bd79ccSDebojyoti Ghosh 1353d3b90ce1SRichard Tran Mills Notes: 1354d3b90ce1SRichard Tran Mills This function increases the reference count on the AIJ matrix, so the user is free to destroy the matrix if it is not needed. 1355d3b90ce1SRichard Tran Mills Changes to the entries of the AIJ matrix will immediately affect the KAIJ matrix. 135649bd79ccSDebojyoti Ghosh 1357761d359dSRichard Tran Mills Developer Notes: 1358761d359dSRichard Tran Mills In the MATMPIKAIJ case, the internal 'AIJ' and 'OAIJ' sequential KAIJ matrices are kept up to date by tracking the object state 1359761d359dSRichard Tran Mills of the AIJ matrix 'A' that describes the blockwise action of the MATMPIKAIJ matrix and, if the object state has changed, lazily 1360761d359dSRichard Tran Mills rebuilding 'AIJ' and 'OAIJ' just before executing operations with the MATMPIKAIJ matrix. If new types of operations are added, 1361761d359dSRichard Tran Mills routines implementing those must also ensure these are rebuilt when needed (by calling the internal MatKAIJ_build_AIJ_OAIJ() routine). 1362761d359dSRichard Tran Mills 136349bd79ccSDebojyoti Ghosh Level: advanced 136449bd79ccSDebojyoti Ghosh 1365db781477SPatrick Sanan .seealso: `MatKAIJSetAIJ()`, `MatKAIJSetS()`, `MatKAIJSetT()`, `MatKAIJGetAIJ()`, `MatKAIJGetS()`, `MatKAIJGetT()`, `MATKAIJ` 136649bd79ccSDebojyoti Ghosh @*/ 1367*9371c9d4SSatish Balay PetscErrorCode MatCreateKAIJ(Mat A, PetscInt p, PetscInt q, const PetscScalar S[], const PetscScalar T[], Mat *kaij) { 136849bd79ccSDebojyoti Ghosh PetscFunctionBegin; 13699566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)A), kaij)); 13709566063dSJacob Faibussowitsch PetscCall(MatSetType(*kaij, MATKAIJ)); 13719566063dSJacob Faibussowitsch PetscCall(MatKAIJSetAIJ(*kaij, A)); 13729566063dSJacob Faibussowitsch PetscCall(MatKAIJSetS(*kaij, p, q, S)); 13739566063dSJacob Faibussowitsch PetscCall(MatKAIJSetT(*kaij, p, q, T)); 13749566063dSJacob Faibussowitsch PetscCall(MatSetUp(*kaij)); 13750567c835SRichard Tran Mills PetscFunctionReturn(0); 13760567c835SRichard Tran Mills } 137749bd79ccSDebojyoti Ghosh 13780567c835SRichard Tran Mills /*MC 13795881e567SRichard Tran Mills MATKAIJ - MATKAIJ = "kaij" - A matrix type to be used to evaluate matrices of form 13805881e567SRichard Tran Mills [I \otimes S + A \otimes T], 13810567c835SRichard Tran Mills where 13825881e567SRichard Tran Mills S is a dense (p \times q) matrix, 13835881e567SRichard Tran Mills T is a dense (p \times q) matrix, 13845881e567SRichard Tran Mills A is an AIJ (n \times n) matrix, 13855881e567SRichard Tran Mills and I is the identity matrix. 13865881e567SRichard Tran Mills The resulting matrix is (np \times nq). 13870567c835SRichard Tran Mills 1388d3b90ce1SRichard Tran Mills S and T are always stored independently on all processes as PetscScalar arrays in column-major format. 13890567c835SRichard Tran Mills 13905881e567SRichard Tran Mills Notes: 13915881e567SRichard Tran Mills A linear system with multiple right-hand sides, AX = B, can be expressed in the KAIJ-friendly form of (A \otimes I) x = b, 13925881e567SRichard Tran Mills where x and b are column vectors containing the row-major representations of X and B. 13935881e567SRichard Tran Mills 13940567c835SRichard Tran Mills Level: advanced 13950567c835SRichard Tran Mills 1396db781477SPatrick Sanan .seealso: `MatKAIJSetAIJ()`, `MatKAIJSetS()`, `MatKAIJSetT()`, `MatKAIJGetAIJ()`, `MatKAIJGetS()`, `MatKAIJGetT()`, `MatCreateKAIJ()` 13970567c835SRichard Tran Mills M*/ 13980567c835SRichard Tran Mills 1399*9371c9d4SSatish Balay PETSC_EXTERN PetscErrorCode MatCreate_KAIJ(Mat A) { 14000567c835SRichard Tran Mills Mat_MPIKAIJ *b; 14010567c835SRichard Tran Mills PetscMPIInt size; 14020567c835SRichard Tran Mills 14030567c835SRichard Tran Mills PetscFunctionBegin; 14049566063dSJacob Faibussowitsch PetscCall(PetscNewLog(A, &b)); 14050567c835SRichard Tran Mills A->data = (void *)b; 14060567c835SRichard Tran Mills 14079566063dSJacob Faibussowitsch PetscCall(PetscMemzero(A->ops, sizeof(struct _MatOps))); 14080567c835SRichard Tran Mills 1409f4259b30SLisandro Dalcin b->w = NULL; 14109566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 14110567c835SRichard Tran Mills if (size == 1) { 14129566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSEQKAIJ)); 14130567c835SRichard Tran Mills A->ops->destroy = MatDestroy_SeqKAIJ; 1414bb6fb833SRichard Tran Mills A->ops->mult = MatMult_SeqKAIJ; 1415bb6fb833SRichard Tran Mills A->ops->multadd = MatMultAdd_SeqKAIJ; 1416bb6fb833SRichard Tran Mills A->ops->invertblockdiagonal = MatInvertBlockDiagonal_SeqKAIJ; 14170567c835SRichard Tran Mills A->ops->getrow = MatGetRow_SeqKAIJ; 14180567c835SRichard Tran Mills A->ops->restorerow = MatRestoreRow_SeqKAIJ; 14190567c835SRichard Tran Mills A->ops->sor = MatSOR_SeqKAIJ; 14209566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqkaij_seqaij_C", MatConvert_KAIJ_AIJ)); 14210567c835SRichard Tran Mills } else { 14229566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATMPIKAIJ)); 14230567c835SRichard Tran Mills A->ops->destroy = MatDestroy_MPIKAIJ; 1424bb6fb833SRichard Tran Mills A->ops->mult = MatMult_MPIKAIJ; 1425bb6fb833SRichard Tran Mills A->ops->multadd = MatMultAdd_MPIKAIJ; 1426bb6fb833SRichard Tran Mills A->ops->invertblockdiagonal = MatInvertBlockDiagonal_MPIKAIJ; 14270567c835SRichard Tran Mills A->ops->getrow = MatGetRow_MPIKAIJ; 14280567c835SRichard Tran Mills A->ops->restorerow = MatRestoreRow_MPIKAIJ; 14299566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetDiagonalBlock_C", MatGetDiagonalBlock_MPIKAIJ)); 14309566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpikaij_mpiaij_C", MatConvert_KAIJ_AIJ)); 14310567c835SRichard Tran Mills } 143206d61a37SPierre Jolivet A->ops->setup = MatSetUp_KAIJ; 143306d61a37SPierre Jolivet A->ops->view = MatView_KAIJ; 14340567c835SRichard Tran Mills A->ops->createsubmatrix = MatCreateSubMatrix_KAIJ; 143549bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 143649bd79ccSDebojyoti Ghosh } 1437