149bd79ccSDebojyoti Ghosh 249bd79ccSDebojyoti Ghosh /* 349bd79ccSDebojyoti Ghosh Defines the basic matrix operations for the KAIJ matrix storage format. 449bd79ccSDebojyoti Ghosh This format is used to evaluate matrices of the form: 549bd79ccSDebojyoti Ghosh 649bd79ccSDebojyoti Ghosh [I \otimes S + A \otimes T] 749bd79ccSDebojyoti Ghosh 849bd79ccSDebojyoti Ghosh where 949bd79ccSDebojyoti Ghosh S is a dense (p \times q) matrix 1049bd79ccSDebojyoti Ghosh T is a dense (p \times q) matrix 1149bd79ccSDebojyoti Ghosh A is an AIJ (n \times n) matrix 1249bd79ccSDebojyoti Ghosh I is the identity matrix 1349bd79ccSDebojyoti Ghosh 1449bd79ccSDebojyoti Ghosh The resulting matrix is (np \times nq) 1549bd79ccSDebojyoti Ghosh 1649bd79ccSDebojyoti Ghosh We provide: 1749bd79ccSDebojyoti Ghosh MatMult() 1849bd79ccSDebojyoti Ghosh MatMultAdd() 1949bd79ccSDebojyoti Ghosh MatInvertBlockDiagonal() 2049bd79ccSDebojyoti Ghosh and 2149bd79ccSDebojyoti Ghosh MatCreateKAIJ(Mat,PetscInt,PetscInt,const PetscScalar[],const PetscScalar[],Mat*) 2249bd79ccSDebojyoti Ghosh 2349bd79ccSDebojyoti Ghosh This single directory handles both the sequential and parallel codes 2449bd79ccSDebojyoti Ghosh */ 2549bd79ccSDebojyoti Ghosh 2649bd79ccSDebojyoti Ghosh #include <../src/mat/impls/kaij/kaij.h> /*I "petscmat.h" I*/ 2749bd79ccSDebojyoti Ghosh #include <../src/mat/utils/freespace.h> 2849bd79ccSDebojyoti Ghosh #include <petsc/private/vecimpl.h> 2949bd79ccSDebojyoti Ghosh 3049bd79ccSDebojyoti Ghosh /*@C 3111a5261eSBarry Smith MatKAIJGetAIJ - Get the `MATAIJ` matrix describing the blockwise action of the `MATKAIJ` matrix 3249bd79ccSDebojyoti Ghosh 3311a5261eSBarry Smith Not Collective, but if the `MATKAIJ` matrix is parallel, the `MATAIJ` matrix is also parallel 3449bd79ccSDebojyoti Ghosh 3549bd79ccSDebojyoti Ghosh Input Parameter: 3611a5261eSBarry Smith . A - the `MATKAIJ` matrix 3749bd79ccSDebojyoti Ghosh 3849bd79ccSDebojyoti Ghosh Output Parameter: 3911a5261eSBarry Smith . B - the `MATAIJ` matrix 4049bd79ccSDebojyoti Ghosh 4149bd79ccSDebojyoti Ghosh Level: advanced 4249bd79ccSDebojyoti Ghosh 4311a5261eSBarry Smith Note: 4411a5261eSBarry Smith The reference count on the `MATAIJ` matrix is not increased so you should not destroy it. 4549bd79ccSDebojyoti Ghosh 4611a5261eSBarry Smith .seealso: `MatCreateKAIJ()`, `MATKAIJ`, `MATAIJ` 4749bd79ccSDebojyoti Ghosh @*/ 489371c9d4SSatish Balay PetscErrorCode MatKAIJGetAIJ(Mat A, Mat *B) { 4949bd79ccSDebojyoti Ghosh PetscBool ismpikaij, isseqkaij; 5049bd79ccSDebojyoti Ghosh 5149bd79ccSDebojyoti Ghosh PetscFunctionBegin; 529566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIKAIJ, &ismpikaij)); 539566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQKAIJ, &isseqkaij)); 5449bd79ccSDebojyoti Ghosh if (ismpikaij) { 5549bd79ccSDebojyoti Ghosh Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data; 5649bd79ccSDebojyoti Ghosh 5749bd79ccSDebojyoti Ghosh *B = b->A; 5849bd79ccSDebojyoti Ghosh } else if (isseqkaij) { 5949bd79ccSDebojyoti Ghosh Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data; 6049bd79ccSDebojyoti Ghosh 6149bd79ccSDebojyoti Ghosh *B = b->AIJ; 62b04351cbSRichard Tran Mills } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix passed in is not of type KAIJ"); 6349bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 6449bd79ccSDebojyoti Ghosh } 6549bd79ccSDebojyoti Ghosh 6649bd79ccSDebojyoti Ghosh /*@C 6711a5261eSBarry Smith MatKAIJGetS - Get the S matrix describing the shift action of the `MATKAIJ` matrix 6849bd79ccSDebojyoti Ghosh 690567c835SRichard Tran Mills Not Collective; the entire S is stored and returned independently on all processes. 7049bd79ccSDebojyoti Ghosh 7149bd79ccSDebojyoti Ghosh Input Parameter: 7211a5261eSBarry Smith . A - the `MATKAIJ` matrix 7349bd79ccSDebojyoti Ghosh 74a5b5c723SRichard Tran Mills Output Parameters: 75a5b5c723SRichard Tran Mills + m - the number of rows in S 76a5b5c723SRichard Tran Mills . n - the number of columns in S 77a5b5c723SRichard Tran Mills - S - the S matrix, in form of a scalar array in column-major format 7849bd79ccSDebojyoti Ghosh 7911a5261eSBarry Smith Note: 8011a5261eSBarry Smith All output parameters are optional (pass NULL if not desired) 8131a97b9aSRichard Tran Mills 8249bd79ccSDebojyoti Ghosh Level: advanced 8349bd79ccSDebojyoti Ghosh 8411a5261eSBarry Smith .seealso: `MATKAIJ`, `MatCreateKAIJ()`, `MatGetBlockSizes()` 8549bd79ccSDebojyoti Ghosh @*/ 869371c9d4SSatish Balay PetscErrorCode MatKAIJGetS(Mat A, PetscInt *m, PetscInt *n, PetscScalar **S) { 8749bd79ccSDebojyoti Ghosh Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data; 8849bd79ccSDebojyoti Ghosh PetscFunctionBegin; 89a5b5c723SRichard Tran Mills if (m) *m = b->p; 90a5b5c723SRichard Tran Mills if (n) *n = b->q; 91a5b5c723SRichard Tran Mills if (S) *S = b->S; 92a5b5c723SRichard Tran Mills PetscFunctionReturn(0); 93a5b5c723SRichard Tran Mills } 94a5b5c723SRichard Tran Mills 95a5b5c723SRichard Tran Mills /*@C 9611a5261eSBarry Smith MatKAIJGetSRead - Get a read-only pointer to the S matrix describing the shift action of the `MATKAIJ` matrix 97a5b5c723SRichard Tran Mills 98a5b5c723SRichard Tran Mills Not Collective; the entire S is stored and returned independently on all processes. 99a5b5c723SRichard Tran Mills 100a5b5c723SRichard Tran Mills Input Parameter: 10111a5261eSBarry Smith . A - the `MATKAIJ` matrix 102a5b5c723SRichard Tran Mills 103a5b5c723SRichard Tran Mills Output Parameters: 104a5b5c723SRichard Tran Mills + m - the number of rows in S 105a5b5c723SRichard Tran Mills . n - the number of columns in S 106a5b5c723SRichard Tran Mills - S - the S matrix, in form of a scalar array in column-major format 107a5b5c723SRichard Tran Mills 10811a5261eSBarry Smith Note: 10911a5261eSBarry Smith All output parameters are optional (pass NULL if not desired) 110a5b5c723SRichard Tran Mills 111a5b5c723SRichard Tran Mills Level: advanced 112a5b5c723SRichard Tran Mills 11311a5261eSBarry Smith .seealso: `MATKAIJ`, `MatCreateKAIJ()`, `MatGetBlockSizes()` 114a5b5c723SRichard Tran Mills @*/ 1159371c9d4SSatish Balay PetscErrorCode MatKAIJGetSRead(Mat A, PetscInt *m, PetscInt *n, const PetscScalar **S) { 116a5b5c723SRichard Tran Mills Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data; 117a5b5c723SRichard Tran Mills PetscFunctionBegin; 118a5b5c723SRichard Tran Mills if (m) *m = b->p; 119a5b5c723SRichard Tran Mills if (n) *n = b->q; 120a5b5c723SRichard Tran Mills if (S) *S = b->S; 121a5b5c723SRichard Tran Mills PetscFunctionReturn(0); 122a5b5c723SRichard Tran Mills } 123a5b5c723SRichard Tran Mills 124a5b5c723SRichard Tran Mills /*@C 12511a5261eSBarry Smith MatKAIJRestoreS - Restore array obtained with `MatKAIJGetS()` 126a5b5c723SRichard Tran Mills 127a5b5c723SRichard Tran Mills Not collective 128a5b5c723SRichard Tran Mills 129a5b5c723SRichard Tran Mills Input Parameter: 13011a5261eSBarry Smith . A - the `MATKAIJ` matrix 131a5b5c723SRichard Tran Mills 132a5b5c723SRichard Tran Mills Output Parameter: 13311a5261eSBarry Smith . S - location of pointer to array obtained with `MatKAIJGetS()` 134a5b5c723SRichard Tran Mills 13511a5261eSBarry Smith Note: 13611a5261eSBarry Smith This routine zeros the array pointer to prevent accidental reuse after it has been restored. 137a5b5c723SRichard Tran Mills If NULL is passed, it will not attempt to zero the array pointer. 138a5b5c723SRichard Tran Mills 139a5b5c723SRichard Tran Mills Level: advanced 14011a5261eSBarry Smith 14111a5261eSBarry Smith .seealso: `MATKAIJ`, `MatKAIJGetS()`, `MatKAIJGetSRead()`, `MatKAIJRestoreSRead()` 142a5b5c723SRichard Tran Mills @*/ 1439371c9d4SSatish Balay PetscErrorCode MatKAIJRestoreS(Mat A, PetscScalar **S) { 144a5b5c723SRichard Tran Mills PetscFunctionBegin; 145a5b5c723SRichard Tran Mills if (S) *S = NULL; 1469566063dSJacob Faibussowitsch PetscCall(PetscObjectStateIncrease((PetscObject)A)); 147a5b5c723SRichard Tran Mills PetscFunctionReturn(0); 148a5b5c723SRichard Tran Mills } 149a5b5c723SRichard Tran Mills 150a5b5c723SRichard Tran Mills /*@C 15111a5261eSBarry Smith MatKAIJRestoreSRead - Restore array obtained with `MatKAIJGetSRead()` 152a5b5c723SRichard Tran Mills 153a5b5c723SRichard Tran Mills Not collective 154a5b5c723SRichard Tran Mills 155a5b5c723SRichard Tran Mills Input Parameter: 15611a5261eSBarry Smith . A - the `MATKAIJ` matrix 157a5b5c723SRichard Tran Mills 158a5b5c723SRichard Tran Mills Output Parameter: 15911a5261eSBarry Smith . S - location of pointer to array obtained with `MatKAIJGetS()` 160a5b5c723SRichard Tran Mills 16111a5261eSBarry Smith Note: 16211a5261eSBarry Smith This routine zeros the array pointer to prevent accidental reuse after it has been restored. 163a5b5c723SRichard Tran Mills If NULL is passed, it will not attempt to zero the array pointer. 164a5b5c723SRichard Tran Mills 165a5b5c723SRichard Tran Mills Level: advanced 16611a5261eSBarry Smith 16711a5261eSBarry Smith .seealso: `MATKAIJ`, `MatKAIJGetS()`, `MatKAIJGetSRead()`, `MatKAIJRestoreSRead()` 168a5b5c723SRichard Tran Mills @*/ 1699371c9d4SSatish Balay PetscErrorCode MatKAIJRestoreSRead(Mat A, const PetscScalar **S) { 170a5b5c723SRichard Tran Mills PetscFunctionBegin; 171a5b5c723SRichard Tran Mills if (S) *S = NULL; 17249bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 17349bd79ccSDebojyoti Ghosh } 17449bd79ccSDebojyoti Ghosh 17549bd79ccSDebojyoti Ghosh /*@C 17611a5261eSBarry Smith MatKAIJGetT - Get the transformation matrix T associated with the `MATKAIJ` matrix 17749bd79ccSDebojyoti Ghosh 1780567c835SRichard Tran Mills Not Collective; the entire T is stored and returned independently on all processes 17949bd79ccSDebojyoti Ghosh 18049bd79ccSDebojyoti Ghosh Input Parameter: 18111a5261eSBarry Smith . A - the `MATKAIJ` matrix 18249bd79ccSDebojyoti Ghosh 183d8d19677SJose E. Roman Output Parameters: 184a5b5c723SRichard Tran Mills + m - the number of rows in T 185a5b5c723SRichard Tran Mills . n - the number of columns in T 186a5b5c723SRichard Tran Mills - T - the T matrix, in form of a scalar array in column-major format 18749bd79ccSDebojyoti Ghosh 18811a5261eSBarry Smith Note: 18911a5261eSBarry Smith All output parameters are optional (pass NULL if not desired) 19031a97b9aSRichard Tran Mills 19149bd79ccSDebojyoti Ghosh Level: advanced 19249bd79ccSDebojyoti Ghosh 19311a5261eSBarry Smith .seealso: `MATKAIJ`, `MatCreateKAIJ()`, `MatGetBlockSizes()` 19449bd79ccSDebojyoti Ghosh @*/ 1959371c9d4SSatish Balay PetscErrorCode MatKAIJGetT(Mat A, PetscInt *m, PetscInt *n, PetscScalar **T) { 19649bd79ccSDebojyoti Ghosh Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data; 19749bd79ccSDebojyoti Ghosh PetscFunctionBegin; 198a5b5c723SRichard Tran Mills if (m) *m = b->p; 199a5b5c723SRichard Tran Mills if (n) *n = b->q; 200a5b5c723SRichard Tran Mills if (T) *T = b->T; 201a5b5c723SRichard Tran Mills PetscFunctionReturn(0); 202a5b5c723SRichard Tran Mills } 203a5b5c723SRichard Tran Mills 204a5b5c723SRichard Tran Mills /*@C 20511a5261eSBarry Smith MatKAIJGetTRead - Get a read-only pointer to the transformation matrix T associated with the `MATKAIJ` matrix 206a5b5c723SRichard Tran Mills 207a5b5c723SRichard Tran Mills Not Collective; the entire T is stored and returned independently on all processes 208a5b5c723SRichard Tran Mills 209a5b5c723SRichard Tran Mills Input Parameter: 21011a5261eSBarry Smith . A - the `MATKAIJ` matrix 211a5b5c723SRichard Tran Mills 212d8d19677SJose E. Roman Output Parameters: 213a5b5c723SRichard Tran Mills + m - the number of rows in T 214a5b5c723SRichard Tran Mills . n - the number of columns in T 215a5b5c723SRichard Tran Mills - T - the T matrix, in form of a scalar array in column-major format 216a5b5c723SRichard Tran Mills 21711a5261eSBarry Smith Note: All output parameters are optional (pass NULL if not desired) 218a5b5c723SRichard Tran Mills 219a5b5c723SRichard Tran Mills Level: advanced 220a5b5c723SRichard Tran Mills 22111a5261eSBarry Smith .seealso: `MATKAIJ`, `MatCreateKAIJ()`, `MatGetBlockSizes()` 222a5b5c723SRichard Tran Mills @*/ 2239371c9d4SSatish Balay PetscErrorCode MatKAIJGetTRead(Mat A, PetscInt *m, PetscInt *n, const PetscScalar **T) { 224a5b5c723SRichard Tran Mills Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data; 225a5b5c723SRichard Tran Mills PetscFunctionBegin; 226a5b5c723SRichard Tran Mills if (m) *m = b->p; 227a5b5c723SRichard Tran Mills if (n) *n = b->q; 228a5b5c723SRichard Tran Mills if (T) *T = b->T; 229a5b5c723SRichard Tran Mills PetscFunctionReturn(0); 230a5b5c723SRichard Tran Mills } 231a5b5c723SRichard Tran Mills 232a5b5c723SRichard Tran Mills /*@C 23311a5261eSBarry Smith MatKAIJRestoreT - Restore array obtained with `MatKAIJGetT()` 234a5b5c723SRichard Tran Mills 235a5b5c723SRichard Tran Mills Not collective 236a5b5c723SRichard Tran Mills 237a5b5c723SRichard Tran Mills Input Parameter: 23811a5261eSBarry Smith . A - the `MATKAIJ` matrix 239a5b5c723SRichard Tran Mills 240a5b5c723SRichard Tran Mills Output Parameter: 24111a5261eSBarry Smith . T - location of pointer to array obtained with `MatKAIJGetS()` 242a5b5c723SRichard Tran Mills 24311a5261eSBarry Smith Note: 24411a5261eSBarry Smith This routine zeros the array pointer to prevent accidental reuse after it has been restored. 245a5b5c723SRichard Tran Mills If NULL is passed, it will not attempt to zero the array pointer. 246a5b5c723SRichard Tran Mills 247a5b5c723SRichard Tran Mills Level: advanced 24811a5261eSBarry Smith 24911a5261eSBarry Smith .seealso: `MATKAIJ`, `MatKAIJGetT()`, `MatKAIJGetTRead()`, `MatKAIJRestoreTRead()` 250a5b5c723SRichard Tran Mills @*/ 2519371c9d4SSatish Balay PetscErrorCode MatKAIJRestoreT(Mat A, PetscScalar **T) { 252a5b5c723SRichard Tran Mills PetscFunctionBegin; 253a5b5c723SRichard Tran Mills if (T) *T = NULL; 2549566063dSJacob Faibussowitsch PetscCall(PetscObjectStateIncrease((PetscObject)A)); 255a5b5c723SRichard Tran Mills PetscFunctionReturn(0); 256a5b5c723SRichard Tran Mills } 257a5b5c723SRichard Tran Mills 258a5b5c723SRichard Tran Mills /*@C 25911a5261eSBarry Smith MatKAIJRestoreTRead - Restore array obtained with `MatKAIJGetTRead()` 260a5b5c723SRichard Tran Mills 261a5b5c723SRichard Tran Mills Not collective 262a5b5c723SRichard Tran Mills 263a5b5c723SRichard Tran Mills Input Parameter: 26411a5261eSBarry Smith . A - the `MATKAIJ` matrix 265a5b5c723SRichard Tran Mills 266a5b5c723SRichard Tran Mills Output Parameter: 26711a5261eSBarry Smith . T - location of pointer to array obtained with `MatKAIJGetS()` 268a5b5c723SRichard Tran Mills 26911a5261eSBarry Smith Note: 27011a5261eSBarry Smith This routine zeros the array pointer to prevent accidental reuse after it has been restored. 271a5b5c723SRichard Tran Mills If NULL is passed, it will not attempt to zero the array pointer. 272a5b5c723SRichard Tran Mills 273a5b5c723SRichard Tran Mills Level: advanced 27411a5261eSBarry Smith 27511a5261eSBarry Smith .seealso: `MATKAIJ`, `MatKAIJGetT()`, `MatKAIJGetTRead()`, `MatKAIJRestoreTRead()` 276a5b5c723SRichard Tran Mills @*/ 2779371c9d4SSatish Balay PetscErrorCode MatKAIJRestoreTRead(Mat A, const PetscScalar **T) { 278a5b5c723SRichard Tran Mills PetscFunctionBegin; 279a5b5c723SRichard Tran Mills if (T) *T = NULL; 28049bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 28149bd79ccSDebojyoti Ghosh } 28249bd79ccSDebojyoti Ghosh 2830567c835SRichard Tran Mills /*@ 28411a5261eSBarry Smith MatKAIJSetAIJ - Set the `MATAIJ` matrix describing the blockwise action of the `MATKAIJ` matrix 2850567c835SRichard Tran Mills 28611a5261eSBarry Smith Logically Collective; if the `MATAIJ` matrix is parallel, the `MATKAIJ` matrix is also parallel 2870567c835SRichard Tran Mills 2880567c835SRichard Tran Mills Input Parameters: 28911a5261eSBarry Smith + A - the `MATKAIJ` matrix 29011a5261eSBarry Smith - B - the `MATAIJ` matrix 2910567c835SRichard Tran Mills 29215b9d025SRichard Tran Mills Notes: 29311a5261eSBarry Smith This function increases the reference count on the `MATAIJ` matrix, so the user is free to destroy the matrix if it is not needed. 29411a5261eSBarry Smith 29511a5261eSBarry Smith Changes to the entries of the `MATAIJ` matrix will immediately affect the `MATKAIJ` matrix. 29615b9d025SRichard Tran Mills 2970567c835SRichard Tran Mills Level: advanced 2980567c835SRichard Tran Mills 29911a5261eSBarry Smith .seealso: `MATKAIJ`, `MatKAIJGetAIJ()`, `MatKAIJSetS()`, `MatKAIJSetT()` 3000567c835SRichard Tran Mills @*/ 3019371c9d4SSatish Balay PetscErrorCode MatKAIJSetAIJ(Mat A, Mat B) { 3020567c835SRichard Tran Mills PetscMPIInt size; 30306d61a37SPierre Jolivet PetscBool flg; 3040567c835SRichard Tran Mills 3050567c835SRichard Tran Mills PetscFunctionBegin; 3069566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3070567c835SRichard Tran Mills if (size == 1) { 3089566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQAIJ, &flg)); 30928b400f6SJacob Faibussowitsch PetscCheck(flg, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "MatKAIJSetAIJ() with MATSEQKAIJ does not support %s as the AIJ mat", ((PetscObject)B)->type_name); 3100567c835SRichard Tran Mills Mat_SeqKAIJ *a = (Mat_SeqKAIJ *)A->data; 3110567c835SRichard Tran Mills a->AIJ = B; 3120567c835SRichard Tran Mills } else { 3130567c835SRichard Tran Mills Mat_MPIKAIJ *a = (Mat_MPIKAIJ *)A->data; 3140567c835SRichard Tran Mills a->A = B; 3150567c835SRichard Tran Mills } 3169566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)B)); 3170567c835SRichard Tran Mills PetscFunctionReturn(0); 3180567c835SRichard Tran Mills } 3190567c835SRichard Tran Mills 3200567c835SRichard Tran Mills /*@C 32111a5261eSBarry Smith MatKAIJSetS - Set the S matrix describing the shift action of the `MATKAIJ` matrix 3220567c835SRichard Tran Mills 3230567c835SRichard Tran Mills Logically Collective; the entire S is stored independently on all processes. 3240567c835SRichard Tran Mills 3250567c835SRichard Tran Mills Input Parameters: 32611a5261eSBarry Smith + A - the `MATKAIJ` matrix 3270567c835SRichard Tran Mills . p - the number of rows in S 3280567c835SRichard Tran Mills . q - the number of columns in S 3290567c835SRichard Tran Mills - S - the S matrix, in form of a scalar array in column-major format 3300567c835SRichard Tran Mills 33111a5261eSBarry Smith Notes: 33211a5261eSBarry Smith The dimensions p and q must match those of the transformation matrix T associated with the `MATKAIJ` matrix. 33311a5261eSBarry Smith 33488f48298SRichard Tran Mills The S matrix is copied, so the user can destroy this array. 3350567c835SRichard Tran Mills 33611a5261eSBarry Smith Level: advanced 3370567c835SRichard Tran Mills 33811a5261eSBarry Smith .seealso: `MATKAIJ`, `MatKAIJGetS()`, `MatKAIJSetT()`, `MatKAIJSetAIJ()` 3390567c835SRichard Tran Mills @*/ 3409371c9d4SSatish Balay PetscErrorCode MatKAIJSetS(Mat A, PetscInt p, PetscInt q, const PetscScalar S[]) { 3410567c835SRichard Tran Mills Mat_SeqKAIJ *a = (Mat_SeqKAIJ *)A->data; 3420567c835SRichard Tran Mills 3430567c835SRichard Tran Mills PetscFunctionBegin; 3449566063dSJacob Faibussowitsch PetscCall(PetscFree(a->S)); 3450567c835SRichard Tran Mills if (S) { 3469566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(p * q, &a->S)); 3479566063dSJacob Faibussowitsch PetscCall(PetscMemcpy(a->S, S, p * q * sizeof(PetscScalar))); 3480567c835SRichard Tran Mills } else a->S = NULL; 3490567c835SRichard Tran Mills 3500567c835SRichard Tran Mills a->p = p; 3510567c835SRichard Tran Mills a->q = q; 3520567c835SRichard Tran Mills PetscFunctionReturn(0); 3530567c835SRichard Tran Mills } 3540567c835SRichard Tran Mills 3550567c835SRichard Tran Mills /*@C 356910cf402Sprj- MatKAIJGetScaledIdentity - Check if both S and T are scaled identities. 357910cf402Sprj- 358910cf402Sprj- Logically Collective. 359910cf402Sprj- 360910cf402Sprj- Input Parameter: 36111a5261eSBarry Smith . A - the `MATKAIJ` matrix 362910cf402Sprj- 363910cf402Sprj- Output Parameter: 364910cf402Sprj- . identity - the Boolean value 365910cf402Sprj- 366910cf402Sprj- Level: Advanced 367910cf402Sprj- 36811a5261eSBarry Smith .seealso: `MATKAIJ`, `MatKAIJGetS()`, `MatKAIJGetT()` 369910cf402Sprj- @*/ 3709371c9d4SSatish Balay PetscErrorCode MatKAIJGetScaledIdentity(Mat A, PetscBool *identity) { 371910cf402Sprj- Mat_SeqKAIJ *a = (Mat_SeqKAIJ *)A->data; 372910cf402Sprj- PetscInt i, j; 373910cf402Sprj- 374910cf402Sprj- PetscFunctionBegin; 375910cf402Sprj- if (a->p != a->q) { 376910cf402Sprj- *identity = PETSC_FALSE; 377910cf402Sprj- PetscFunctionReturn(0); 378910cf402Sprj- } else *identity = PETSC_TRUE; 379910cf402Sprj- if (!a->isTI || a->S) { 380910cf402Sprj- for (i = 0; i < a->p && *identity; i++) { 381910cf402Sprj- for (j = 0; j < a->p && *identity; j++) { 382910cf402Sprj- if (i != j) { 383910cf402Sprj- if (a->S && PetscAbsScalar(a->S[i + j * a->p]) > PETSC_SMALL) *identity = PETSC_FALSE; 384910cf402Sprj- if (a->T && PetscAbsScalar(a->T[i + j * a->p]) > PETSC_SMALL) *identity = PETSC_FALSE; 385910cf402Sprj- } else { 386910cf402Sprj- if (a->S && PetscAbsScalar(a->S[i * (a->p + 1)] - a->S[0]) > PETSC_SMALL) *identity = PETSC_FALSE; 387910cf402Sprj- if (a->T && PetscAbsScalar(a->T[i * (a->p + 1)] - a->T[0]) > PETSC_SMALL) *identity = PETSC_FALSE; 388910cf402Sprj- } 389910cf402Sprj- } 390910cf402Sprj- } 391910cf402Sprj- } 392910cf402Sprj- PetscFunctionReturn(0); 393910cf402Sprj- } 394910cf402Sprj- 395910cf402Sprj- /*@C 39611a5261eSBarry Smith MatKAIJSetT - Set the transformation matrix T associated with the `MATKAIJ` matrix 3970567c835SRichard Tran Mills 3980567c835SRichard Tran Mills Logically Collective; the entire T is stored independently on all processes. 3990567c835SRichard Tran Mills 4000567c835SRichard Tran Mills Input Parameters: 40111a5261eSBarry Smith + A - the `MATKAIJ` matrix 4020567c835SRichard Tran Mills . p - the number of rows in S 4030567c835SRichard Tran Mills . q - the number of columns in S 4040567c835SRichard Tran Mills - T - the T matrix, in form of a scalar array in column-major format 4050567c835SRichard Tran Mills 40611a5261eSBarry Smith Notes: 40711a5261eSBarry Smith The dimensions p and q must match those of the shift matrix S associated with the `MATKAIJ` matrix. 40811a5261eSBarry Smith 40988f48298SRichard Tran Mills The T matrix is copied, so the user can destroy this array. 4100567c835SRichard Tran Mills 4110567c835SRichard Tran Mills Level: Advanced 4120567c835SRichard Tran Mills 41311a5261eSBarry Smith .seealso: `MATKAIJ`, `MatKAIJGetT()`, `MatKAIJSetS()`, `MatKAIJSetAIJ()` 4140567c835SRichard Tran Mills @*/ 4159371c9d4SSatish Balay PetscErrorCode MatKAIJSetT(Mat A, PetscInt p, PetscInt q, const PetscScalar T[]) { 4160567c835SRichard Tran Mills PetscInt i, j; 4170567c835SRichard Tran Mills Mat_SeqKAIJ *a = (Mat_SeqKAIJ *)A->data; 4180567c835SRichard Tran Mills PetscBool isTI = PETSC_FALSE; 4190567c835SRichard Tran Mills 4200567c835SRichard Tran Mills PetscFunctionBegin; 4210567c835SRichard Tran Mills /* check if T is an identity matrix */ 4220567c835SRichard Tran Mills if (T && (p == q)) { 4230567c835SRichard Tran Mills isTI = PETSC_TRUE; 4240567c835SRichard Tran Mills for (i = 0; i < p; i++) { 4250567c835SRichard Tran Mills for (j = 0; j < q; j++) { 4260567c835SRichard Tran Mills if (i == j) { 4270567c835SRichard Tran Mills /* diagonal term must be 1 */ 4280567c835SRichard Tran Mills if (T[i + j * p] != 1.0) isTI = PETSC_FALSE; 4290567c835SRichard Tran Mills } else { 4300567c835SRichard Tran Mills /* off-diagonal term must be 0 */ 4310567c835SRichard Tran Mills if (T[i + j * p] != 0.0) isTI = PETSC_FALSE; 4320567c835SRichard Tran Mills } 4330567c835SRichard Tran Mills } 4340567c835SRichard Tran Mills } 4350567c835SRichard Tran Mills } 4360567c835SRichard Tran Mills a->isTI = isTI; 4370567c835SRichard Tran Mills 4389566063dSJacob Faibussowitsch PetscCall(PetscFree(a->T)); 4390567c835SRichard Tran Mills if (T && (!isTI)) { 4409566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(p * q, &a->T)); 4419566063dSJacob Faibussowitsch PetscCall(PetscMemcpy(a->T, T, p * q * sizeof(PetscScalar))); 44250d19d74SRichard Tran Mills } else a->T = NULL; 4430567c835SRichard Tran Mills 4440567c835SRichard Tran Mills a->p = p; 4450567c835SRichard Tran Mills a->q = q; 4460567c835SRichard Tran Mills PetscFunctionReturn(0); 4470567c835SRichard Tran Mills } 4480567c835SRichard Tran Mills 4499371c9d4SSatish Balay PetscErrorCode MatDestroy_SeqKAIJ(Mat A) { 45049bd79ccSDebojyoti Ghosh Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data; 45149bd79ccSDebojyoti Ghosh 45249bd79ccSDebojyoti Ghosh PetscFunctionBegin; 4539566063dSJacob Faibussowitsch PetscCall(MatDestroy(&b->AIJ)); 4549566063dSJacob Faibussowitsch PetscCall(PetscFree(b->S)); 4559566063dSJacob Faibussowitsch PetscCall(PetscFree(b->T)); 4569566063dSJacob Faibussowitsch PetscCall(PetscFree(b->ibdiag)); 4579566063dSJacob Faibussowitsch PetscCall(PetscFree5(b->sor.w, b->sor.y, b->sor.work, b->sor.t, b->sor.arr)); 4589566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqkaij_seqaij_C", NULL)); 4599566063dSJacob Faibussowitsch PetscCall(PetscFree(A->data)); 46049bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 46149bd79ccSDebojyoti Ghosh } 46249bd79ccSDebojyoti Ghosh 4639371c9d4SSatish Balay PETSC_INTERN PetscErrorCode MatKAIJ_build_AIJ_OAIJ(Mat A) { 464e0e5a793SRichard Tran Mills Mat_MPIKAIJ *a; 465e0e5a793SRichard Tran Mills Mat_MPIAIJ *mpiaij; 466e0e5a793SRichard Tran Mills PetscScalar *T; 467e0e5a793SRichard Tran Mills PetscInt i, j; 468e0e5a793SRichard Tran Mills PetscObjectState state; 469e0e5a793SRichard Tran Mills 470e0e5a793SRichard Tran Mills PetscFunctionBegin; 471e0e5a793SRichard Tran Mills a = (Mat_MPIKAIJ *)A->data; 472e0e5a793SRichard Tran Mills mpiaij = (Mat_MPIAIJ *)a->A->data; 473e0e5a793SRichard Tran Mills 4749566063dSJacob Faibussowitsch PetscCall(PetscObjectStateGet((PetscObject)a->A, &state)); 475e0e5a793SRichard Tran Mills if (state == a->state) { 476e0e5a793SRichard Tran Mills /* The existing AIJ and KAIJ members are up-to-date, so simply exit. */ 477e0e5a793SRichard Tran Mills PetscFunctionReturn(0); 478e0e5a793SRichard Tran Mills } else { 4799566063dSJacob Faibussowitsch PetscCall(MatDestroy(&a->AIJ)); 4809566063dSJacob Faibussowitsch PetscCall(MatDestroy(&a->OAIJ)); 481e0e5a793SRichard Tran Mills if (a->isTI) { 482e0e5a793SRichard Tran Mills /* If the transformation matrix associated with the parallel matrix A is the identity matrix, then a->T will be NULL. 483e0e5a793SRichard Tran Mills * In this case, if we pass a->T directly to the MatCreateKAIJ() calls to create the sequential submatrices, the routine will 484e0e5a793SRichard Tran Mills * not be able to tell that transformation matrix should be set to the identity; thus we create a temporary identity matrix 485e0e5a793SRichard Tran Mills * to pass in. */ 4869566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(a->p * a->q, &T)); 487e0e5a793SRichard Tran Mills for (i = 0; i < a->p; i++) { 488e0e5a793SRichard Tran Mills for (j = 0; j < a->q; j++) { 489e0e5a793SRichard Tran Mills if (i == j) T[i + j * a->p] = 1.0; 490e0e5a793SRichard Tran Mills else T[i + j * a->p] = 0.0; 491e0e5a793SRichard Tran Mills } 492e0e5a793SRichard Tran Mills } 493e0e5a793SRichard Tran Mills } else T = a->T; 4949566063dSJacob Faibussowitsch PetscCall(MatCreateKAIJ(mpiaij->A, a->p, a->q, a->S, T, &a->AIJ)); 4959566063dSJacob Faibussowitsch PetscCall(MatCreateKAIJ(mpiaij->B, a->p, a->q, NULL, T, &a->OAIJ)); 4961baa6e33SBarry Smith if (a->isTI) PetscCall(PetscFree(T)); 497e0e5a793SRichard Tran Mills a->state = state; 498e0e5a793SRichard Tran Mills } 499e0e5a793SRichard Tran Mills 500e0e5a793SRichard Tran Mills PetscFunctionReturn(0); 501e0e5a793SRichard Tran Mills } 502e0e5a793SRichard Tran Mills 5039371c9d4SSatish Balay PetscErrorCode MatSetUp_KAIJ(Mat A) { 5040567c835SRichard Tran Mills PetscInt n; 5050567c835SRichard Tran Mills PetscMPIInt size; 5060567c835SRichard Tran Mills Mat_SeqKAIJ *seqkaij = (Mat_SeqKAIJ *)A->data; 5070567c835SRichard Tran Mills 50849bd79ccSDebojyoti Ghosh PetscFunctionBegin; 5099566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 5100567c835SRichard Tran Mills if (size == 1) { 5119566063dSJacob Faibussowitsch PetscCall(MatSetSizes(A, seqkaij->p * seqkaij->AIJ->rmap->n, seqkaij->q * seqkaij->AIJ->cmap->n, seqkaij->p * seqkaij->AIJ->rmap->N, seqkaij->q * seqkaij->AIJ->cmap->N)); 5129566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetBlockSize(A->rmap, seqkaij->p)); 5139566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetBlockSize(A->cmap, seqkaij->q)); 5149566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(A->rmap)); 5159566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(A->cmap)); 5160567c835SRichard Tran Mills } else { 5170567c835SRichard Tran Mills Mat_MPIKAIJ *a; 5180567c835SRichard Tran Mills Mat_MPIAIJ *mpiaij; 5190567c835SRichard Tran Mills IS from, to; 5200567c835SRichard Tran Mills Vec gvec; 5210567c835SRichard Tran Mills 5220567c835SRichard Tran Mills a = (Mat_MPIKAIJ *)A->data; 523d3f912faSRichard Tran Mills mpiaij = (Mat_MPIAIJ *)a->A->data; 5249566063dSJacob Faibussowitsch PetscCall(MatSetSizes(A, a->p * a->A->rmap->n, a->q * a->A->cmap->n, a->p * a->A->rmap->N, a->q * a->A->cmap->N)); 5259566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetBlockSize(A->rmap, seqkaij->p)); 5269566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetBlockSize(A->cmap, seqkaij->q)); 5279566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(A->rmap)); 5289566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(A->cmap)); 5290567c835SRichard Tran Mills 5309566063dSJacob Faibussowitsch PetscCall(MatKAIJ_build_AIJ_OAIJ(A)); 5310567c835SRichard Tran Mills 5329566063dSJacob Faibussowitsch PetscCall(VecGetSize(mpiaij->lvec, &n)); 5339566063dSJacob Faibussowitsch PetscCall(VecCreate(PETSC_COMM_SELF, &a->w)); 5349566063dSJacob Faibussowitsch PetscCall(VecSetSizes(a->w, n * a->q, n * a->q)); 5359566063dSJacob Faibussowitsch PetscCall(VecSetBlockSize(a->w, a->q)); 5369566063dSJacob Faibussowitsch PetscCall(VecSetType(a->w, VECSEQ)); 5370567c835SRichard Tran Mills 5380567c835SRichard Tran Mills /* create two temporary Index sets for build scatter gather */ 5399566063dSJacob Faibussowitsch PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)a->A), a->q, n, mpiaij->garray, PETSC_COPY_VALUES, &from)); 5409566063dSJacob Faibussowitsch PetscCall(ISCreateStride(PETSC_COMM_SELF, n * a->q, 0, 1, &to)); 5410567c835SRichard Tran Mills 5420567c835SRichard Tran Mills /* create temporary global vector to generate scatter context */ 5439566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)a->A), a->q, a->q * a->A->cmap->n, a->q * a->A->cmap->N, NULL, &gvec)); 5440567c835SRichard Tran Mills 5450567c835SRichard Tran Mills /* generate the scatter context */ 5469566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(gvec, from, a->w, to, &a->ctx)); 5470567c835SRichard Tran Mills 5489566063dSJacob Faibussowitsch PetscCall(ISDestroy(&from)); 5499566063dSJacob Faibussowitsch PetscCall(ISDestroy(&to)); 5509566063dSJacob Faibussowitsch PetscCall(VecDestroy(&gvec)); 5510567c835SRichard Tran Mills } 5520567c835SRichard Tran Mills 5530567c835SRichard Tran Mills A->assembled = PETSC_TRUE; 55449bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 55549bd79ccSDebojyoti Ghosh } 55649bd79ccSDebojyoti Ghosh 5579371c9d4SSatish Balay PetscErrorCode MatView_KAIJ(Mat A, PetscViewer viewer) { 558e6985dafSRichard Tran Mills PetscViewerFormat format; 559e6985dafSRichard Tran Mills Mat_SeqKAIJ *a = (Mat_SeqKAIJ *)A->data; 56049bd79ccSDebojyoti Ghosh Mat B; 561e6985dafSRichard Tran Mills PetscInt i; 562e6985dafSRichard Tran Mills PetscBool ismpikaij; 56349bd79ccSDebojyoti Ghosh 56449bd79ccSDebojyoti Ghosh PetscFunctionBegin; 5659566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIKAIJ, &ismpikaij)); 5669566063dSJacob Faibussowitsch PetscCall(PetscViewerGetFormat(viewer, &format)); 567e6985dafSRichard Tran Mills if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL || format == PETSC_VIEWER_ASCII_IMPL) { 5689566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "S and T have %" PetscInt_FMT " rows and %" PetscInt_FMT " columns\n", a->p, a->q)); 569e6985dafSRichard Tran Mills 570e6985dafSRichard Tran Mills /* Print appropriate details for S. */ 571e6985dafSRichard Tran Mills if (!a->S) { 5729566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "S is NULL\n")); 573e6985dafSRichard Tran Mills } else if (format == PETSC_VIEWER_ASCII_IMPL) { 5749566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "Entries of S are ")); 575e6985dafSRichard Tran Mills for (i = 0; i < (a->p * a->q); i++) { 576e6985dafSRichard Tran Mills #if defined(PETSC_USE_COMPLEX) 5779566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e %18.16e ", (double)PetscRealPart(a->S[i]), (double)PetscImaginaryPart(a->S[i]))); 578e6985dafSRichard Tran Mills #else 5799566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e ", (double)PetscRealPart(a->S[i]))); 580e6985dafSRichard Tran Mills #endif 581e6985dafSRichard Tran Mills } 5829566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 58349bd79ccSDebojyoti Ghosh } 58449bd79ccSDebojyoti Ghosh 585e6985dafSRichard Tran Mills /* Print appropriate details for T. */ 586e6985dafSRichard Tran Mills if (a->isTI) { 5879566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "T is the identity matrix\n")); 588e6985dafSRichard Tran Mills } else if (!a->T) { 5899566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "T is NULL\n")); 590e6985dafSRichard Tran Mills } else if (format == PETSC_VIEWER_ASCII_IMPL) { 5919566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "Entries of T are ")); 592e6985dafSRichard Tran Mills for (i = 0; i < (a->p * a->q); i++) { 593e6985dafSRichard Tran Mills #if defined(PETSC_USE_COMPLEX) 5949566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e %18.16e ", (double)PetscRealPart(a->T[i]), (double)PetscImaginaryPart(a->T[i]))); 595e6985dafSRichard Tran Mills #else 5969566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e ", (double)PetscRealPart(a->T[i]))); 597e6985dafSRichard Tran Mills #endif 598e6985dafSRichard Tran Mills } 5999566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 600e6985dafSRichard Tran Mills } 60149bd79ccSDebojyoti Ghosh 602e6985dafSRichard Tran Mills /* Now print details for the AIJ matrix, using the AIJ viewer. */ 6039566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "Now viewing the associated AIJ matrix:\n")); 604e6985dafSRichard Tran Mills if (ismpikaij) { 605e6985dafSRichard Tran Mills Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data; 6069566063dSJacob Faibussowitsch PetscCall(MatView(b->A, viewer)); 607e6985dafSRichard Tran Mills } else { 6089566063dSJacob Faibussowitsch PetscCall(MatView(a->AIJ, viewer)); 609e6985dafSRichard Tran Mills } 610e6985dafSRichard Tran Mills 611e6985dafSRichard Tran Mills } else { 612e6985dafSRichard Tran Mills /* For all other matrix viewer output formats, simply convert to an AIJ matrix and call MatView() on that. */ 6139566063dSJacob Faibussowitsch PetscCall(MatConvert(A, MATAIJ, MAT_INITIAL_MATRIX, &B)); 6149566063dSJacob Faibussowitsch PetscCall(MatView(B, viewer)); 6159566063dSJacob Faibussowitsch PetscCall(MatDestroy(&B)); 616e6985dafSRichard Tran Mills } 61749bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 61849bd79ccSDebojyoti Ghosh } 61949bd79ccSDebojyoti Ghosh 6209371c9d4SSatish Balay PetscErrorCode MatDestroy_MPIKAIJ(Mat A) { 62149bd79ccSDebojyoti Ghosh Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data; 62249bd79ccSDebojyoti Ghosh 62349bd79ccSDebojyoti Ghosh PetscFunctionBegin; 6249566063dSJacob Faibussowitsch PetscCall(MatDestroy(&b->AIJ)); 6259566063dSJacob Faibussowitsch PetscCall(MatDestroy(&b->OAIJ)); 6269566063dSJacob Faibussowitsch PetscCall(MatDestroy(&b->A)); 6279566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&b->ctx)); 6289566063dSJacob Faibussowitsch PetscCall(VecDestroy(&b->w)); 6299566063dSJacob Faibussowitsch PetscCall(PetscFree(b->S)); 6309566063dSJacob Faibussowitsch PetscCall(PetscFree(b->T)); 6319566063dSJacob Faibussowitsch PetscCall(PetscFree(b->ibdiag)); 6329566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetDiagonalBlock_C", NULL)); 6339566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpikaij_mpiaij_C", NULL)); 6349566063dSJacob Faibussowitsch PetscCall(PetscFree(A->data)); 63549bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 63649bd79ccSDebojyoti Ghosh } 63749bd79ccSDebojyoti Ghosh 63849bd79ccSDebojyoti Ghosh /* --------------------------------------------------------------------------------------*/ 63949bd79ccSDebojyoti Ghosh 64049bd79ccSDebojyoti Ghosh /* zz = yy + Axx */ 6419371c9d4SSatish Balay PetscErrorCode MatMultAdd_SeqKAIJ(Mat A, Vec xx, Vec yy, Vec zz) { 64249bd79ccSDebojyoti Ghosh Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data; 64349bd79ccSDebojyoti Ghosh Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 64449bd79ccSDebojyoti Ghosh const PetscScalar *s = b->S, *t = b->T; 64549bd79ccSDebojyoti Ghosh const PetscScalar *x, *v, *bx; 64649bd79ccSDebojyoti Ghosh PetscScalar *y, *sums; 64749bd79ccSDebojyoti Ghosh const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 64849bd79ccSDebojyoti Ghosh PetscInt n, i, jrow, j, l, p = b->p, q = b->q, k; 64949bd79ccSDebojyoti Ghosh 65049bd79ccSDebojyoti Ghosh PetscFunctionBegin; 65149bd79ccSDebojyoti Ghosh if (!yy) { 6529566063dSJacob Faibussowitsch PetscCall(VecSet(zz, 0.0)); 65349bd79ccSDebojyoti Ghosh } else { 6549566063dSJacob Faibussowitsch PetscCall(VecCopy(yy, zz)); 65549bd79ccSDebojyoti Ghosh } 65649bd79ccSDebojyoti Ghosh if ((!s) && (!t) && (!b->isTI)) PetscFunctionReturn(0); 65749bd79ccSDebojyoti Ghosh 6589566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 6599566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz, &y)); 66049bd79ccSDebojyoti Ghosh idx = a->j; 66149bd79ccSDebojyoti Ghosh v = a->a; 66249bd79ccSDebojyoti Ghosh ii = a->i; 66349bd79ccSDebojyoti Ghosh 66449bd79ccSDebojyoti Ghosh if (b->isTI) { 66549bd79ccSDebojyoti Ghosh for (i = 0; i < m; i++) { 66649bd79ccSDebojyoti Ghosh jrow = ii[i]; 66749bd79ccSDebojyoti Ghosh n = ii[i + 1] - jrow; 66849bd79ccSDebojyoti Ghosh sums = y + p * i; 66949bd79ccSDebojyoti Ghosh for (j = 0; j < n; j++) { 670ad540459SPierre Jolivet for (k = 0; k < p; k++) sums[k] += v[jrow + j] * x[q * idx[jrow + j] + k]; 67149bd79ccSDebojyoti Ghosh } 67249bd79ccSDebojyoti Ghosh } 6739566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(3.0 * (a->nz) * p)); 67449bd79ccSDebojyoti Ghosh } else if (t) { 67549bd79ccSDebojyoti Ghosh for (i = 0; i < m; i++) { 67649bd79ccSDebojyoti Ghosh jrow = ii[i]; 67749bd79ccSDebojyoti Ghosh n = ii[i + 1] - jrow; 67849bd79ccSDebojyoti Ghosh sums = y + p * i; 67949bd79ccSDebojyoti Ghosh for (j = 0; j < n; j++) { 68049bd79ccSDebojyoti Ghosh for (k = 0; k < p; k++) { 681ad540459SPierre Jolivet for (l = 0; l < q; l++) sums[k] += v[jrow + j] * t[k + l * p] * x[q * idx[jrow + j] + l]; 68249bd79ccSDebojyoti Ghosh } 68349bd79ccSDebojyoti Ghosh } 68449bd79ccSDebojyoti Ghosh } 685234d9204SRichard Tran Mills /* The flop count below assumes that v[jrow+j] is hoisted out (which an optimizing compiler is likely to do), 686234d9204SRichard Tran Mills * and also that T part is hoisted outside this loop (in exchange for temporary storage) as (A \otimes I) (I \otimes T), 687234d9204SRichard Tran Mills * so that this multiply doesn't have to be redone for each matrix entry, but just once per column. The latter 688234d9204SRichard Tran Mills * transformation is much less likely to be applied, but we nonetheless count the minimum flops required. */ 6899566063dSJacob Faibussowitsch PetscCall(PetscLogFlops((2.0 * p * q - p) * m + 2.0 * p * a->nz)); 69049bd79ccSDebojyoti Ghosh } 69149bd79ccSDebojyoti Ghosh if (s) { 69249bd79ccSDebojyoti Ghosh for (i = 0; i < m; i++) { 69349bd79ccSDebojyoti Ghosh sums = y + p * i; 69449bd79ccSDebojyoti Ghosh bx = x + q * i; 69549bd79ccSDebojyoti Ghosh if (i < b->AIJ->cmap->n) { 69649bd79ccSDebojyoti Ghosh for (j = 0; j < q; j++) { 697ad540459SPierre Jolivet for (k = 0; k < p; k++) sums[k] += s[k + j * p] * bx[j]; 69849bd79ccSDebojyoti Ghosh } 69949bd79ccSDebojyoti Ghosh } 70049bd79ccSDebojyoti Ghosh } 7019566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * m * p * q)); 70249bd79ccSDebojyoti Ghosh } 70349bd79ccSDebojyoti Ghosh 7049566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 7059566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz, &y)); 70649bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 70749bd79ccSDebojyoti Ghosh } 70849bd79ccSDebojyoti Ghosh 7099371c9d4SSatish Balay PetscErrorCode MatMult_SeqKAIJ(Mat A, Vec xx, Vec yy) { 71049bd79ccSDebojyoti Ghosh PetscFunctionBegin; 7119566063dSJacob Faibussowitsch PetscCall(MatMultAdd_SeqKAIJ(A, xx, PETSC_NULL, yy)); 71249bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 71349bd79ccSDebojyoti Ghosh } 71449bd79ccSDebojyoti Ghosh 71549bd79ccSDebojyoti Ghosh #include <petsc/private/kernels/blockinvert.h> 71649bd79ccSDebojyoti Ghosh 7179371c9d4SSatish Balay PetscErrorCode MatInvertBlockDiagonal_SeqKAIJ(Mat A, const PetscScalar **values) { 71849bd79ccSDebojyoti Ghosh Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data; 71949bd79ccSDebojyoti Ghosh Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 72049bd79ccSDebojyoti Ghosh const PetscScalar *S = b->S; 72149bd79ccSDebojyoti Ghosh const PetscScalar *T = b->T; 72249bd79ccSDebojyoti Ghosh const PetscScalar *v = a->a; 72349bd79ccSDebojyoti Ghosh const PetscInt p = b->p, q = b->q, m = b->AIJ->rmap->n, *idx = a->j, *ii = a->i; 72449bd79ccSDebojyoti Ghosh PetscInt i, j, *v_pivots, dof, dof2; 72549bd79ccSDebojyoti Ghosh PetscScalar *diag, aval, *v_work; 72649bd79ccSDebojyoti Ghosh 72749bd79ccSDebojyoti Ghosh PetscFunctionBegin; 72808401ef6SPierre Jolivet PetscCheck(p == q, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MATKAIJ: Block size must be square to calculate inverse."); 729aed4548fSBarry Smith PetscCheck(S || T || b->isTI, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MATKAIJ: Cannot invert a zero matrix."); 73049bd79ccSDebojyoti Ghosh 73149bd79ccSDebojyoti Ghosh dof = p; 73249bd79ccSDebojyoti Ghosh dof2 = dof * dof; 73349bd79ccSDebojyoti Ghosh 73449bd79ccSDebojyoti Ghosh if (b->ibdiagvalid) { 73549bd79ccSDebojyoti Ghosh if (values) *values = b->ibdiag; 73649bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 73749bd79ccSDebojyoti Ghosh } 738*4dfa11a4SJacob Faibussowitsch if (!b->ibdiag) { PetscCall(PetscMalloc1(dof2 * m, &b->ibdiag)); } 73949bd79ccSDebojyoti Ghosh if (values) *values = b->ibdiag; 74049bd79ccSDebojyoti Ghosh diag = b->ibdiag; 74149bd79ccSDebojyoti Ghosh 7429566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(dof, &v_work, dof, &v_pivots)); 74349bd79ccSDebojyoti Ghosh for (i = 0; i < m; i++) { 74449bd79ccSDebojyoti Ghosh if (S) { 7459566063dSJacob Faibussowitsch PetscCall(PetscMemcpy(diag, S, dof2 * sizeof(PetscScalar))); 74649bd79ccSDebojyoti Ghosh } else { 7479566063dSJacob Faibussowitsch PetscCall(PetscMemzero(diag, dof2 * sizeof(PetscScalar))); 74849bd79ccSDebojyoti Ghosh } 74949bd79ccSDebojyoti Ghosh if (b->isTI) { 75049bd79ccSDebojyoti Ghosh aval = 0; 7519371c9d4SSatish Balay for (j = ii[i]; j < ii[i + 1]; j++) 7529371c9d4SSatish Balay if (idx[j] == i) aval = v[j]; 75349bd79ccSDebojyoti Ghosh for (j = 0; j < dof; j++) diag[j + dof * j] += aval; 75449bd79ccSDebojyoti Ghosh } else if (T) { 75549bd79ccSDebojyoti Ghosh aval = 0; 7569371c9d4SSatish Balay for (j = ii[i]; j < ii[i + 1]; j++) 7579371c9d4SSatish Balay if (idx[j] == i) aval = v[j]; 75849bd79ccSDebojyoti Ghosh for (j = 0; j < dof2; j++) diag[j] += aval * T[j]; 75949bd79ccSDebojyoti Ghosh } 7609566063dSJacob Faibussowitsch PetscCall(PetscKernel_A_gets_inverse_A(dof, diag, v_pivots, v_work, PETSC_FALSE, NULL)); 76149bd79ccSDebojyoti Ghosh diag += dof2; 76249bd79ccSDebojyoti Ghosh } 7639566063dSJacob Faibussowitsch PetscCall(PetscFree2(v_work, v_pivots)); 76449bd79ccSDebojyoti Ghosh 76549bd79ccSDebojyoti Ghosh b->ibdiagvalid = PETSC_TRUE; 76649bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 76749bd79ccSDebojyoti Ghosh } 76849bd79ccSDebojyoti Ghosh 7699371c9d4SSatish Balay static PetscErrorCode MatGetDiagonalBlock_MPIKAIJ(Mat A, Mat *B) { 77049bd79ccSDebojyoti Ghosh Mat_MPIKAIJ *kaij = (Mat_MPIKAIJ *)A->data; 77149bd79ccSDebojyoti Ghosh 77249bd79ccSDebojyoti Ghosh PetscFunctionBegin; 77349bd79ccSDebojyoti Ghosh *B = kaij->AIJ; 77449bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 77549bd79ccSDebojyoti Ghosh } 77649bd79ccSDebojyoti Ghosh 7779371c9d4SSatish Balay static PetscErrorCode MatConvert_KAIJ_AIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) { 778fac53fa1SPierre Jolivet Mat_SeqKAIJ *a = (Mat_SeqKAIJ *)A->data; 779fac53fa1SPierre Jolivet Mat AIJ, OAIJ, B; 780fac53fa1SPierre Jolivet PetscInt *d_nnz, *o_nnz = NULL, nz, i, j, m, d; 781fac53fa1SPierre Jolivet const PetscInt p = a->p, q = a->q; 782fac53fa1SPierre Jolivet PetscBool ismpikaij, missing; 783fac53fa1SPierre Jolivet 784fac53fa1SPierre Jolivet PetscFunctionBegin; 785fac53fa1SPierre Jolivet if (reuse != MAT_REUSE_MATRIX) { 7869566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIKAIJ, &ismpikaij)); 787fac53fa1SPierre Jolivet if (ismpikaij) { 788fac53fa1SPierre Jolivet Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data; 789fac53fa1SPierre Jolivet AIJ = ((Mat_SeqKAIJ *)b->AIJ->data)->AIJ; 790fac53fa1SPierre Jolivet OAIJ = ((Mat_SeqKAIJ *)b->OAIJ->data)->AIJ; 791fac53fa1SPierre Jolivet } else { 792fac53fa1SPierre Jolivet AIJ = a->AIJ; 793fac53fa1SPierre Jolivet OAIJ = NULL; 794fac53fa1SPierre Jolivet } 7959566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 7969566063dSJacob Faibussowitsch PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 7979566063dSJacob Faibussowitsch PetscCall(MatSetType(B, MATAIJ)); 7989566063dSJacob Faibussowitsch PetscCall(MatGetSize(AIJ, &m, NULL)); 7999566063dSJacob Faibussowitsch PetscCall(MatMissingDiagonal(AIJ, &missing, &d)); /* assumption that all successive rows will have a missing diagonal */ 800fac53fa1SPierre Jolivet if (!missing || !a->S) d = m; 8019566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(m * p, &d_nnz)); 802fac53fa1SPierre Jolivet for (i = 0; i < m; ++i) { 8039566063dSJacob Faibussowitsch PetscCall(MatGetRow_SeqAIJ(AIJ, i, &nz, NULL, NULL)); 804fac53fa1SPierre Jolivet for (j = 0; j < p; ++j) d_nnz[i * p + j] = nz * q + (i >= d) * q; 8059566063dSJacob Faibussowitsch PetscCall(MatRestoreRow_SeqAIJ(AIJ, i, &nz, NULL, NULL)); 806fac53fa1SPierre Jolivet } 807fac53fa1SPierre Jolivet if (OAIJ) { 8089566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(m * p, &o_nnz)); 809fac53fa1SPierre Jolivet for (i = 0; i < m; ++i) { 8109566063dSJacob Faibussowitsch PetscCall(MatGetRow_SeqAIJ(OAIJ, i, &nz, NULL, NULL)); 811fac53fa1SPierre Jolivet for (j = 0; j < p; ++j) o_nnz[i * p + j] = nz * q; 8129566063dSJacob Faibussowitsch PetscCall(MatRestoreRow_SeqAIJ(OAIJ, i, &nz, NULL, NULL)); 813fac53fa1SPierre Jolivet } 8149566063dSJacob Faibussowitsch PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 815fac53fa1SPierre Jolivet } else { 8169566063dSJacob Faibussowitsch PetscCall(MatSeqAIJSetPreallocation(B, 0, d_nnz)); 817fac53fa1SPierre Jolivet } 8189566063dSJacob Faibussowitsch PetscCall(PetscFree(d_nnz)); 8199566063dSJacob Faibussowitsch PetscCall(PetscFree(o_nnz)); 820fac53fa1SPierre Jolivet } else B = *newmat; 8219566063dSJacob Faibussowitsch PetscCall(MatConvert_Basic(A, newtype, MAT_REUSE_MATRIX, &B)); 822fac53fa1SPierre Jolivet if (reuse == MAT_INPLACE_MATRIX) { 8239566063dSJacob Faibussowitsch PetscCall(MatHeaderReplace(A, &B)); 824fac53fa1SPierre Jolivet } else *newmat = B; 825fac53fa1SPierre Jolivet PetscFunctionReturn(0); 826fac53fa1SPierre Jolivet } 827fac53fa1SPierre Jolivet 8289371c9d4SSatish Balay PetscErrorCode MatSOR_SeqKAIJ(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) { 82949bd79ccSDebojyoti Ghosh Mat_SeqKAIJ *kaij = (Mat_SeqKAIJ *)A->data; 83049bd79ccSDebojyoti Ghosh Mat_SeqAIJ *a = (Mat_SeqAIJ *)kaij->AIJ->data; 83149bd79ccSDebojyoti Ghosh const PetscScalar *aa = a->a, *T = kaij->T, *v; 83249bd79ccSDebojyoti Ghosh const PetscInt m = kaij->AIJ->rmap->n, *ai = a->i, *aj = a->j, p = kaij->p, q = kaij->q, *diag, *vi; 83349bd79ccSDebojyoti Ghosh const PetscScalar *b, *xb, *idiag; 83449bd79ccSDebojyoti Ghosh PetscScalar *x, *work, *workt, *w, *y, *arr, *t, *arrt; 83549bd79ccSDebojyoti Ghosh PetscInt i, j, k, i2, bs, bs2, nz; 83649bd79ccSDebojyoti Ghosh 83749bd79ccSDebojyoti Ghosh PetscFunctionBegin; 83849bd79ccSDebojyoti Ghosh its = its * lits; 839aed4548fSBarry Smith PetscCheck(!(flag & SOR_EISENSTAT), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for Eisenstat"); 84008401ef6SPierre Jolivet PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits); 84128b400f6SJacob Faibussowitsch PetscCheck(!fshift, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for diagonal shift"); 84208401ef6SPierre Jolivet PetscCheck(!(flag & SOR_APPLY_UPPER) && !(flag & SOR_APPLY_LOWER), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for applying upper or lower triangular parts"); 84308401ef6SPierre Jolivet PetscCheck(p == q, PETSC_COMM_SELF, PETSC_ERR_SUP, "MatSOR for KAIJ: No support for non-square dense blocks"); 844f7d195e4SLawrence Mitchell bs = p; 845f7d195e4SLawrence Mitchell bs2 = bs * bs; 84649bd79ccSDebojyoti Ghosh 84749bd79ccSDebojyoti Ghosh if (!m) PetscFunctionReturn(0); 84849bd79ccSDebojyoti Ghosh 8499566063dSJacob Faibussowitsch if (!kaij->ibdiagvalid) PetscCall(MatInvertBlockDiagonal_SeqKAIJ(A, NULL)); 85049bd79ccSDebojyoti Ghosh idiag = kaij->ibdiag; 85149bd79ccSDebojyoti Ghosh diag = a->diag; 85249bd79ccSDebojyoti Ghosh 85349bd79ccSDebojyoti Ghosh if (!kaij->sor.setup) { 8549566063dSJacob Faibussowitsch PetscCall(PetscMalloc5(bs, &kaij->sor.w, bs, &kaij->sor.y, m * bs, &kaij->sor.work, m * bs, &kaij->sor.t, m * bs2, &kaij->sor.arr)); 85549bd79ccSDebojyoti Ghosh kaij->sor.setup = PETSC_TRUE; 85649bd79ccSDebojyoti Ghosh } 85749bd79ccSDebojyoti Ghosh y = kaij->sor.y; 85849bd79ccSDebojyoti Ghosh w = kaij->sor.w; 85949bd79ccSDebojyoti Ghosh work = kaij->sor.work; 86049bd79ccSDebojyoti Ghosh t = kaij->sor.t; 86149bd79ccSDebojyoti Ghosh arr = kaij->sor.arr; 86249bd79ccSDebojyoti Ghosh 863d0609cedSBarry Smith PetscCall(VecGetArray(xx, &x)); 8649566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(bb, &b)); 86549bd79ccSDebojyoti Ghosh 86649bd79ccSDebojyoti Ghosh if (flag & SOR_ZERO_INITIAL_GUESS) { 86749bd79ccSDebojyoti Ghosh if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 86849bd79ccSDebojyoti Ghosh PetscKernel_w_gets_Ar_times_v(bs, bs, b, idiag, x); /* x[0:bs] <- D^{-1} b[0:bs] */ 8699566063dSJacob Faibussowitsch PetscCall(PetscMemcpy(t, b, bs * sizeof(PetscScalar))); 87049bd79ccSDebojyoti Ghosh i2 = bs; 87149bd79ccSDebojyoti Ghosh idiag += bs2; 87249bd79ccSDebojyoti Ghosh for (i = 1; i < m; i++) { 87349bd79ccSDebojyoti Ghosh v = aa + ai[i]; 87449bd79ccSDebojyoti Ghosh vi = aj + ai[i]; 87549bd79ccSDebojyoti Ghosh nz = diag[i] - ai[i]; 87649bd79ccSDebojyoti Ghosh 87749bd79ccSDebojyoti Ghosh if (T) { /* b - T (Arow * x) */ 8789566063dSJacob Faibussowitsch PetscCall(PetscMemzero(w, bs * sizeof(PetscScalar))); 87949bd79ccSDebojyoti Ghosh for (j = 0; j < nz; j++) { 88049bd79ccSDebojyoti Ghosh for (k = 0; k < bs; k++) w[k] -= v[j] * x[vi[j] * bs + k]; 88149bd79ccSDebojyoti Ghosh } 88249bd79ccSDebojyoti Ghosh PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs, w, T, &t[i2]); 88349bd79ccSDebojyoti Ghosh for (k = 0; k < bs; k++) t[i2 + k] += b[i2 + k]; 88449bd79ccSDebojyoti Ghosh } else if (kaij->isTI) { 8859566063dSJacob Faibussowitsch PetscCall(PetscMemcpy(t + i2, b + i2, bs * sizeof(PetscScalar))); 88649bd79ccSDebojyoti Ghosh for (j = 0; j < nz; j++) { 88749bd79ccSDebojyoti Ghosh for (k = 0; k < bs; k++) t[i2 + k] -= v[j] * x[vi[j] * bs + k]; 88849bd79ccSDebojyoti Ghosh } 88949bd79ccSDebojyoti Ghosh } else { 8909566063dSJacob Faibussowitsch PetscCall(PetscMemcpy(t + i2, b + i2, bs * sizeof(PetscScalar))); 89149bd79ccSDebojyoti Ghosh } 89249bd79ccSDebojyoti Ghosh 89349bd79ccSDebojyoti Ghosh PetscKernel_w_gets_Ar_times_v(bs, bs, t + i2, idiag, y); 89449bd79ccSDebojyoti Ghosh for (j = 0; j < bs; j++) x[i2 + j] = omega * y[j]; 89549bd79ccSDebojyoti Ghosh 89649bd79ccSDebojyoti Ghosh idiag += bs2; 89749bd79ccSDebojyoti Ghosh i2 += bs; 89849bd79ccSDebojyoti Ghosh } 89949bd79ccSDebojyoti Ghosh /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 9009566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(1.0 * bs2 * a->nz)); 90149bd79ccSDebojyoti Ghosh xb = t; 90249bd79ccSDebojyoti Ghosh } else xb = b; 90349bd79ccSDebojyoti Ghosh if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 90449bd79ccSDebojyoti Ghosh idiag = kaij->ibdiag + bs2 * (m - 1); 90549bd79ccSDebojyoti Ghosh i2 = bs * (m - 1); 9069566063dSJacob Faibussowitsch PetscCall(PetscMemcpy(w, xb + i2, bs * sizeof(PetscScalar))); 90749bd79ccSDebojyoti Ghosh PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2); 90849bd79ccSDebojyoti Ghosh i2 -= bs; 90949bd79ccSDebojyoti Ghosh idiag -= bs2; 91049bd79ccSDebojyoti Ghosh for (i = m - 2; i >= 0; i--) { 91149bd79ccSDebojyoti Ghosh v = aa + diag[i] + 1; 91249bd79ccSDebojyoti Ghosh vi = aj + diag[i] + 1; 91349bd79ccSDebojyoti Ghosh nz = ai[i + 1] - diag[i] - 1; 91449bd79ccSDebojyoti Ghosh 91549bd79ccSDebojyoti Ghosh if (T) { /* FIXME: This branch untested */ 9169566063dSJacob Faibussowitsch PetscCall(PetscMemcpy(w, xb + i2, bs * sizeof(PetscScalar))); 91749bd79ccSDebojyoti Ghosh /* copy all rows of x that are needed into contiguous space */ 91849bd79ccSDebojyoti Ghosh workt = work; 91949bd79ccSDebojyoti Ghosh for (j = 0; j < nz; j++) { 9209566063dSJacob Faibussowitsch PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar))); 92149bd79ccSDebojyoti Ghosh workt += bs; 92249bd79ccSDebojyoti Ghosh } 92349bd79ccSDebojyoti Ghosh arrt = arr; 92449bd79ccSDebojyoti Ghosh for (j = 0; j < nz; j++) { 9259566063dSJacob Faibussowitsch PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar))); 92649bd79ccSDebojyoti Ghosh for (k = 0; k < bs2; k++) arrt[k] *= v[j]; 92749bd79ccSDebojyoti Ghosh arrt += bs2; 92849bd79ccSDebojyoti Ghosh } 92949bd79ccSDebojyoti Ghosh PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work); 93049bd79ccSDebojyoti Ghosh } else if (kaij->isTI) { 9319566063dSJacob Faibussowitsch PetscCall(PetscMemcpy(w, t + i2, bs * sizeof(PetscScalar))); 93249bd79ccSDebojyoti Ghosh for (j = 0; j < nz; j++) { 93349bd79ccSDebojyoti Ghosh for (k = 0; k < bs; k++) w[k] -= v[j] * x[vi[j] * bs + k]; 93449bd79ccSDebojyoti Ghosh } 93549bd79ccSDebojyoti Ghosh } 93649bd79ccSDebojyoti Ghosh 93749bd79ccSDebojyoti Ghosh PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, y); /* RHS incorrect for omega != 1.0 */ 93849bd79ccSDebojyoti Ghosh for (j = 0; j < bs; j++) x[i2 + j] = (1.0 - omega) * x[i2 + j] + omega * y[j]; 93949bd79ccSDebojyoti Ghosh 94049bd79ccSDebojyoti Ghosh idiag -= bs2; 94149bd79ccSDebojyoti Ghosh i2 -= bs; 94249bd79ccSDebojyoti Ghosh } 9439566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(1.0 * bs2 * (a->nz))); 94449bd79ccSDebojyoti Ghosh } 94549bd79ccSDebojyoti Ghosh its--; 94649bd79ccSDebojyoti Ghosh } 94749bd79ccSDebojyoti Ghosh while (its--) { /* FIXME: This branch not updated */ 94849bd79ccSDebojyoti Ghosh if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 94949bd79ccSDebojyoti Ghosh i2 = 0; 95049bd79ccSDebojyoti Ghosh idiag = kaij->ibdiag; 95149bd79ccSDebojyoti Ghosh for (i = 0; i < m; i++) { 9529566063dSJacob Faibussowitsch PetscCall(PetscMemcpy(w, b + i2, bs * sizeof(PetscScalar))); 95349bd79ccSDebojyoti Ghosh 95449bd79ccSDebojyoti Ghosh v = aa + ai[i]; 95549bd79ccSDebojyoti Ghosh vi = aj + ai[i]; 95649bd79ccSDebojyoti Ghosh nz = diag[i] - ai[i]; 95749bd79ccSDebojyoti Ghosh workt = work; 95849bd79ccSDebojyoti Ghosh for (j = 0; j < nz; j++) { 9599566063dSJacob Faibussowitsch PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar))); 96049bd79ccSDebojyoti Ghosh workt += bs; 96149bd79ccSDebojyoti Ghosh } 96249bd79ccSDebojyoti Ghosh arrt = arr; 96349bd79ccSDebojyoti Ghosh if (T) { 96449bd79ccSDebojyoti Ghosh for (j = 0; j < nz; j++) { 9659566063dSJacob Faibussowitsch PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar))); 96649bd79ccSDebojyoti Ghosh for (k = 0; k < bs2; k++) arrt[k] *= v[j]; 96749bd79ccSDebojyoti Ghosh arrt += bs2; 96849bd79ccSDebojyoti Ghosh } 96949bd79ccSDebojyoti Ghosh PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work); 97049bd79ccSDebojyoti Ghosh } else if (kaij->isTI) { 97149bd79ccSDebojyoti Ghosh for (j = 0; j < nz; j++) { 9729566063dSJacob Faibussowitsch PetscCall(PetscMemzero(arrt, bs2 * sizeof(PetscScalar))); 97349bd79ccSDebojyoti Ghosh for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j]; 97449bd79ccSDebojyoti Ghosh arrt += bs2; 97549bd79ccSDebojyoti Ghosh } 97649bd79ccSDebojyoti Ghosh PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work); 97749bd79ccSDebojyoti Ghosh } 9789566063dSJacob Faibussowitsch PetscCall(PetscMemcpy(t + i2, w, bs * sizeof(PetscScalar))); 97949bd79ccSDebojyoti Ghosh 98049bd79ccSDebojyoti Ghosh v = aa + diag[i] + 1; 98149bd79ccSDebojyoti Ghosh vi = aj + diag[i] + 1; 98249bd79ccSDebojyoti Ghosh nz = ai[i + 1] - diag[i] - 1; 98349bd79ccSDebojyoti Ghosh workt = work; 98449bd79ccSDebojyoti Ghosh for (j = 0; j < nz; j++) { 9859566063dSJacob Faibussowitsch PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar))); 98649bd79ccSDebojyoti Ghosh workt += bs; 98749bd79ccSDebojyoti Ghosh } 98849bd79ccSDebojyoti Ghosh arrt = arr; 98949bd79ccSDebojyoti Ghosh if (T) { 99049bd79ccSDebojyoti Ghosh for (j = 0; j < nz; j++) { 9919566063dSJacob Faibussowitsch PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar))); 99249bd79ccSDebojyoti Ghosh for (k = 0; k < bs2; k++) arrt[k] *= v[j]; 99349bd79ccSDebojyoti Ghosh arrt += bs2; 99449bd79ccSDebojyoti Ghosh } 99549bd79ccSDebojyoti Ghosh PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work); 99649bd79ccSDebojyoti Ghosh } else if (kaij->isTI) { 99749bd79ccSDebojyoti Ghosh for (j = 0; j < nz; j++) { 9989566063dSJacob Faibussowitsch PetscCall(PetscMemzero(arrt, bs2 * sizeof(PetscScalar))); 99949bd79ccSDebojyoti Ghosh for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j]; 100049bd79ccSDebojyoti Ghosh arrt += bs2; 100149bd79ccSDebojyoti Ghosh } 100249bd79ccSDebojyoti Ghosh PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work); 100349bd79ccSDebojyoti Ghosh } 100449bd79ccSDebojyoti Ghosh 100549bd79ccSDebojyoti Ghosh PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, y); 100649bd79ccSDebojyoti Ghosh for (j = 0; j < bs; j++) *(x + i2 + j) = (1.0 - omega) * *(x + i2 + j) + omega * *(y + j); 100749bd79ccSDebojyoti Ghosh 100849bd79ccSDebojyoti Ghosh idiag += bs2; 100949bd79ccSDebojyoti Ghosh i2 += bs; 101049bd79ccSDebojyoti Ghosh } 101149bd79ccSDebojyoti Ghosh xb = t; 10129371c9d4SSatish Balay } else xb = b; 101349bd79ccSDebojyoti Ghosh if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 101449bd79ccSDebojyoti Ghosh idiag = kaij->ibdiag + bs2 * (m - 1); 101549bd79ccSDebojyoti Ghosh i2 = bs * (m - 1); 101649bd79ccSDebojyoti Ghosh if (xb == b) { 101749bd79ccSDebojyoti Ghosh for (i = m - 1; i >= 0; i--) { 10189566063dSJacob Faibussowitsch PetscCall(PetscMemcpy(w, b + i2, bs * sizeof(PetscScalar))); 101949bd79ccSDebojyoti Ghosh 102049bd79ccSDebojyoti Ghosh v = aa + ai[i]; 102149bd79ccSDebojyoti Ghosh vi = aj + ai[i]; 102249bd79ccSDebojyoti Ghosh nz = diag[i] - ai[i]; 102349bd79ccSDebojyoti Ghosh workt = work; 102449bd79ccSDebojyoti Ghosh for (j = 0; j < nz; j++) { 10259566063dSJacob Faibussowitsch PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar))); 102649bd79ccSDebojyoti Ghosh workt += bs; 102749bd79ccSDebojyoti Ghosh } 102849bd79ccSDebojyoti Ghosh arrt = arr; 102949bd79ccSDebojyoti Ghosh if (T) { 103049bd79ccSDebojyoti Ghosh for (j = 0; j < nz; j++) { 10319566063dSJacob Faibussowitsch PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar))); 103249bd79ccSDebojyoti Ghosh for (k = 0; k < bs2; k++) arrt[k] *= v[j]; 103349bd79ccSDebojyoti Ghosh arrt += bs2; 103449bd79ccSDebojyoti Ghosh } 103549bd79ccSDebojyoti Ghosh PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work); 103649bd79ccSDebojyoti Ghosh } else if (kaij->isTI) { 103749bd79ccSDebojyoti Ghosh for (j = 0; j < nz; j++) { 10389566063dSJacob Faibussowitsch PetscCall(PetscMemzero(arrt, bs2 * sizeof(PetscScalar))); 103949bd79ccSDebojyoti Ghosh for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j]; 104049bd79ccSDebojyoti Ghosh arrt += bs2; 104149bd79ccSDebojyoti Ghosh } 104249bd79ccSDebojyoti Ghosh PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work); 104349bd79ccSDebojyoti Ghosh } 104449bd79ccSDebojyoti Ghosh 104549bd79ccSDebojyoti Ghosh v = aa + diag[i] + 1; 104649bd79ccSDebojyoti Ghosh vi = aj + diag[i] + 1; 104749bd79ccSDebojyoti Ghosh nz = ai[i + 1] - diag[i] - 1; 104849bd79ccSDebojyoti Ghosh workt = work; 104949bd79ccSDebojyoti Ghosh for (j = 0; j < nz; j++) { 10509566063dSJacob Faibussowitsch PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar))); 105149bd79ccSDebojyoti Ghosh workt += bs; 105249bd79ccSDebojyoti Ghosh } 105349bd79ccSDebojyoti Ghosh arrt = arr; 105449bd79ccSDebojyoti Ghosh if (T) { 105549bd79ccSDebojyoti Ghosh for (j = 0; j < nz; j++) { 10569566063dSJacob Faibussowitsch PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar))); 105749bd79ccSDebojyoti Ghosh for (k = 0; k < bs2; k++) arrt[k] *= v[j]; 105849bd79ccSDebojyoti Ghosh arrt += bs2; 105949bd79ccSDebojyoti Ghosh } 106049bd79ccSDebojyoti Ghosh PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work); 106149bd79ccSDebojyoti Ghosh } else if (kaij->isTI) { 106249bd79ccSDebojyoti Ghosh for (j = 0; j < nz; j++) { 10639566063dSJacob Faibussowitsch PetscCall(PetscMemzero(arrt, bs2 * sizeof(PetscScalar))); 106449bd79ccSDebojyoti Ghosh for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j]; 106549bd79ccSDebojyoti Ghosh arrt += bs2; 106649bd79ccSDebojyoti Ghosh } 106749bd79ccSDebojyoti Ghosh PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work); 106849bd79ccSDebojyoti Ghosh } 106949bd79ccSDebojyoti Ghosh 107049bd79ccSDebojyoti Ghosh PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, y); 107149bd79ccSDebojyoti Ghosh for (j = 0; j < bs; j++) *(x + i2 + j) = (1.0 - omega) * *(x + i2 + j) + omega * *(y + j); 107249bd79ccSDebojyoti Ghosh } 107349bd79ccSDebojyoti Ghosh } else { 107449bd79ccSDebojyoti Ghosh for (i = m - 1; i >= 0; i--) { 10759566063dSJacob Faibussowitsch PetscCall(PetscMemcpy(w, xb + i2, bs * sizeof(PetscScalar))); 107649bd79ccSDebojyoti Ghosh v = aa + diag[i] + 1; 107749bd79ccSDebojyoti Ghosh vi = aj + diag[i] + 1; 107849bd79ccSDebojyoti Ghosh nz = ai[i + 1] - diag[i] - 1; 107949bd79ccSDebojyoti Ghosh workt = work; 108049bd79ccSDebojyoti Ghosh for (j = 0; j < nz; j++) { 10819566063dSJacob Faibussowitsch PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar))); 108249bd79ccSDebojyoti Ghosh workt += bs; 108349bd79ccSDebojyoti Ghosh } 108449bd79ccSDebojyoti Ghosh arrt = arr; 108549bd79ccSDebojyoti Ghosh if (T) { 108649bd79ccSDebojyoti Ghosh for (j = 0; j < nz; j++) { 10879566063dSJacob Faibussowitsch PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar))); 108849bd79ccSDebojyoti Ghosh for (k = 0; k < bs2; k++) arrt[k] *= v[j]; 108949bd79ccSDebojyoti Ghosh arrt += bs2; 109049bd79ccSDebojyoti Ghosh } 109149bd79ccSDebojyoti Ghosh PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work); 109249bd79ccSDebojyoti Ghosh } else if (kaij->isTI) { 109349bd79ccSDebojyoti Ghosh for (j = 0; j < nz; j++) { 10949566063dSJacob Faibussowitsch PetscCall(PetscMemzero(arrt, bs2 * sizeof(PetscScalar))); 109549bd79ccSDebojyoti Ghosh for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j]; 109649bd79ccSDebojyoti Ghosh arrt += bs2; 109749bd79ccSDebojyoti Ghosh } 109849bd79ccSDebojyoti Ghosh PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work); 109949bd79ccSDebojyoti Ghosh } 110049bd79ccSDebojyoti Ghosh PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, y); 110149bd79ccSDebojyoti Ghosh for (j = 0; j < bs; j++) *(x + i2 + j) = (1.0 - omega) * *(x + i2 + j) + omega * *(y + j); 110249bd79ccSDebojyoti Ghosh } 110349bd79ccSDebojyoti Ghosh } 11049566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(1.0 * bs2 * (a->nz))); 110549bd79ccSDebojyoti Ghosh } 110649bd79ccSDebojyoti Ghosh } 110749bd79ccSDebojyoti Ghosh 1108d0609cedSBarry Smith PetscCall(VecRestoreArray(xx, &x)); 11099566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(bb, &b)); 111049bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 111149bd79ccSDebojyoti Ghosh } 111249bd79ccSDebojyoti Ghosh 111349bd79ccSDebojyoti Ghosh /*===================================================================================*/ 111449bd79ccSDebojyoti Ghosh 11159371c9d4SSatish Balay PetscErrorCode MatMultAdd_MPIKAIJ(Mat A, Vec xx, Vec yy, Vec zz) { 111649bd79ccSDebojyoti Ghosh Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data; 111749bd79ccSDebojyoti Ghosh 111849bd79ccSDebojyoti Ghosh PetscFunctionBegin; 111949bd79ccSDebojyoti Ghosh if (!yy) { 11209566063dSJacob Faibussowitsch PetscCall(VecSet(zz, 0.0)); 112149bd79ccSDebojyoti Ghosh } else { 11229566063dSJacob Faibussowitsch PetscCall(VecCopy(yy, zz)); 112349bd79ccSDebojyoti Ghosh } 11249566063dSJacob Faibussowitsch PetscCall(MatKAIJ_build_AIJ_OAIJ(A)); /* Ensure b->AIJ and b->OAIJ are up to date. */ 112549bd79ccSDebojyoti Ghosh /* start the scatter */ 11269566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD)); 11279566063dSJacob Faibussowitsch PetscCall((*b->AIJ->ops->multadd)(b->AIJ, xx, zz, zz)); 11289566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD)); 11299566063dSJacob Faibussowitsch PetscCall((*b->OAIJ->ops->multadd)(b->OAIJ, b->w, zz, zz)); 113049bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 113149bd79ccSDebojyoti Ghosh } 113249bd79ccSDebojyoti Ghosh 11339371c9d4SSatish Balay PetscErrorCode MatMult_MPIKAIJ(Mat A, Vec xx, Vec yy) { 113449bd79ccSDebojyoti Ghosh PetscFunctionBegin; 11359566063dSJacob Faibussowitsch PetscCall(MatMultAdd_MPIKAIJ(A, xx, PETSC_NULL, yy)); 113649bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 113749bd79ccSDebojyoti Ghosh } 113849bd79ccSDebojyoti Ghosh 11399371c9d4SSatish Balay PetscErrorCode MatInvertBlockDiagonal_MPIKAIJ(Mat A, const PetscScalar **values) { 114049bd79ccSDebojyoti Ghosh Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data; 114149bd79ccSDebojyoti Ghosh 114249bd79ccSDebojyoti Ghosh PetscFunctionBegin; 11439566063dSJacob Faibussowitsch PetscCall(MatKAIJ_build_AIJ_OAIJ(A)); /* Ensure b->AIJ is up to date. */ 11449566063dSJacob Faibussowitsch PetscCall((*b->AIJ->ops->invertblockdiagonal)(b->AIJ, values)); 114549bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 114649bd79ccSDebojyoti Ghosh } 114749bd79ccSDebojyoti Ghosh 114849bd79ccSDebojyoti Ghosh /* ----------------------------------------------------------------*/ 114949bd79ccSDebojyoti Ghosh 11509371c9d4SSatish Balay PetscErrorCode MatGetRow_SeqKAIJ(Mat A, PetscInt row, PetscInt *ncols, PetscInt **cols, PetscScalar **values) { 115149bd79ccSDebojyoti Ghosh Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data; 11521ca5ffdbSRichard Tran Mills PetscErrorCode diag = PETSC_FALSE; 115349bd79ccSDebojyoti Ghosh PetscInt nzaij, nz, *colsaij, *idx, i, j, p = b->p, q = b->q, r = row / p, s = row % p, c; 115449bd79ccSDebojyoti Ghosh PetscScalar *vaij, *v, *S = b->S, *T = b->T; 115549bd79ccSDebojyoti Ghosh 115649bd79ccSDebojyoti Ghosh PetscFunctionBegin; 115728b400f6SJacob Faibussowitsch PetscCheck(!b->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 115849bd79ccSDebojyoti Ghosh b->getrowactive = PETSC_TRUE; 1159aed4548fSBarry Smith PetscCheck(row >= 0 && row < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range", row); 116049bd79ccSDebojyoti Ghosh 116149bd79ccSDebojyoti Ghosh if ((!S) && (!T) && (!b->isTI)) { 116249bd79ccSDebojyoti Ghosh if (ncols) *ncols = 0; 116349bd79ccSDebojyoti Ghosh if (cols) *cols = NULL; 116449bd79ccSDebojyoti Ghosh if (values) *values = NULL; 116549bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 116649bd79ccSDebojyoti Ghosh } 116749bd79ccSDebojyoti Ghosh 116849bd79ccSDebojyoti Ghosh if (T || b->isTI) { 11699566063dSJacob Faibussowitsch PetscCall(MatGetRow_SeqAIJ(b->AIJ, r, &nzaij, &colsaij, &vaij)); 117049bd79ccSDebojyoti Ghosh c = nzaij; 117149bd79ccSDebojyoti Ghosh for (i = 0; i < nzaij; i++) { 117249bd79ccSDebojyoti Ghosh /* check if this row contains a diagonal entry */ 117349bd79ccSDebojyoti Ghosh if (colsaij[i] == r) { 117449bd79ccSDebojyoti Ghosh diag = PETSC_TRUE; 117549bd79ccSDebojyoti Ghosh c = i; 117649bd79ccSDebojyoti Ghosh } 117749bd79ccSDebojyoti Ghosh } 117849bd79ccSDebojyoti Ghosh } else nzaij = c = 0; 117949bd79ccSDebojyoti Ghosh 118049bd79ccSDebojyoti Ghosh /* calculate size of row */ 118149bd79ccSDebojyoti Ghosh nz = 0; 118249bd79ccSDebojyoti Ghosh if (S) nz += q; 118349bd79ccSDebojyoti Ghosh if (T || b->isTI) nz += (diag && S ? (nzaij - 1) * q : nzaij * q); 118449bd79ccSDebojyoti Ghosh 118549bd79ccSDebojyoti Ghosh if (cols || values) { 11869566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(nz, &idx, nz, &v)); 118738822f9dSRichard Tran Mills for (i = 0; i < q; i++) { 118838822f9dSRichard Tran Mills /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */ 118938822f9dSRichard Tran Mills v[i] = 0.0; 119038822f9dSRichard Tran Mills } 119149bd79ccSDebojyoti Ghosh if (b->isTI) { 119249bd79ccSDebojyoti Ghosh for (i = 0; i < nzaij; i++) { 119349bd79ccSDebojyoti Ghosh for (j = 0; j < q; j++) { 119449bd79ccSDebojyoti Ghosh idx[i * q + j] = colsaij[i] * q + j; 119549bd79ccSDebojyoti Ghosh v[i * q + j] = (j == s ? vaij[i] : 0); 119649bd79ccSDebojyoti Ghosh } 119749bd79ccSDebojyoti Ghosh } 119849bd79ccSDebojyoti Ghosh } else if (T) { 119949bd79ccSDebojyoti Ghosh for (i = 0; i < nzaij; i++) { 120049bd79ccSDebojyoti Ghosh for (j = 0; j < q; j++) { 120149bd79ccSDebojyoti Ghosh idx[i * q + j] = colsaij[i] * q + j; 120249bd79ccSDebojyoti Ghosh v[i * q + j] = vaij[i] * T[s + j * p]; 120349bd79ccSDebojyoti Ghosh } 120449bd79ccSDebojyoti Ghosh } 120549bd79ccSDebojyoti Ghosh } 120649bd79ccSDebojyoti Ghosh if (S) { 120749bd79ccSDebojyoti Ghosh for (j = 0; j < q; j++) { 120849bd79ccSDebojyoti Ghosh idx[c * q + j] = r * q + j; 120949bd79ccSDebojyoti Ghosh v[c * q + j] += S[s + j * p]; 121049bd79ccSDebojyoti Ghosh } 121149bd79ccSDebojyoti Ghosh } 121249bd79ccSDebojyoti Ghosh } 121349bd79ccSDebojyoti Ghosh 121449bd79ccSDebojyoti Ghosh if (ncols) *ncols = nz; 121549bd79ccSDebojyoti Ghosh if (cols) *cols = idx; 121649bd79ccSDebojyoti Ghosh if (values) *values = v; 121749bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 121849bd79ccSDebojyoti Ghosh } 121949bd79ccSDebojyoti Ghosh 12209371c9d4SSatish Balay PetscErrorCode MatRestoreRow_SeqKAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) { 122149bd79ccSDebojyoti Ghosh PetscFunctionBegin; 1222cb4a9cd9SHong Zhang if (nz) *nz = 0; 12239566063dSJacob Faibussowitsch PetscCall(PetscFree2(*idx, *v)); 122449bd79ccSDebojyoti Ghosh ((Mat_SeqKAIJ *)A->data)->getrowactive = PETSC_FALSE; 122549bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 122649bd79ccSDebojyoti Ghosh } 122749bd79ccSDebojyoti Ghosh 12289371c9d4SSatish Balay PetscErrorCode MatGetRow_MPIKAIJ(Mat A, PetscInt row, PetscInt *ncols, PetscInt **cols, PetscScalar **values) { 122949bd79ccSDebojyoti Ghosh Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data; 123049bd79ccSDebojyoti Ghosh Mat AIJ = b->A; 1231fc64b2cfSRichard Tran Mills PetscBool diag = PETSC_FALSE; 1232761d359dSRichard Tran Mills Mat MatAIJ, MatOAIJ; 123349bd79ccSDebojyoti Ghosh const PetscInt rstart = A->rmap->rstart, rend = A->rmap->rend, p = b->p, q = b->q, *garray; 1234389eba51SJed Brown PetscInt nz, *idx, ncolsaij = 0, ncolsoaij = 0, *colsaij, *colsoaij, r, s, c, i, j, lrow; 123549bd79ccSDebojyoti Ghosh PetscScalar *v, *vals, *ovals, *S = b->S, *T = b->T; 123649bd79ccSDebojyoti Ghosh 123749bd79ccSDebojyoti Ghosh PetscFunctionBegin; 12389566063dSJacob Faibussowitsch PetscCall(MatKAIJ_build_AIJ_OAIJ(A)); /* Ensure b->AIJ and b->OAIJ are up to date. */ 1239761d359dSRichard Tran Mills MatAIJ = ((Mat_SeqKAIJ *)b->AIJ->data)->AIJ; 1240761d359dSRichard Tran Mills MatOAIJ = ((Mat_SeqKAIJ *)b->OAIJ->data)->AIJ; 124128b400f6SJacob Faibussowitsch PetscCheck(!b->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 124249bd79ccSDebojyoti Ghosh b->getrowactive = PETSC_TRUE; 1243aed4548fSBarry Smith PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows"); 124449bd79ccSDebojyoti Ghosh lrow = row - rstart; 124549bd79ccSDebojyoti Ghosh 124649bd79ccSDebojyoti Ghosh if ((!S) && (!T) && (!b->isTI)) { 124749bd79ccSDebojyoti Ghosh if (ncols) *ncols = 0; 124849bd79ccSDebojyoti Ghosh if (cols) *cols = NULL; 124949bd79ccSDebojyoti Ghosh if (values) *values = NULL; 125049bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 125149bd79ccSDebojyoti Ghosh } 125249bd79ccSDebojyoti Ghosh 125349bd79ccSDebojyoti Ghosh r = lrow / p; 125449bd79ccSDebojyoti Ghosh s = lrow % p; 125549bd79ccSDebojyoti Ghosh 125649bd79ccSDebojyoti Ghosh if (T || b->isTI) { 12579566063dSJacob Faibussowitsch PetscCall(MatMPIAIJGetSeqAIJ(AIJ, NULL, NULL, &garray)); 12589566063dSJacob Faibussowitsch PetscCall(MatGetRow_SeqAIJ(MatAIJ, lrow / p, &ncolsaij, &colsaij, &vals)); 12599566063dSJacob Faibussowitsch PetscCall(MatGetRow_SeqAIJ(MatOAIJ, lrow / p, &ncolsoaij, &colsoaij, &ovals)); 126049bd79ccSDebojyoti Ghosh 126149bd79ccSDebojyoti Ghosh c = ncolsaij + ncolsoaij; 126249bd79ccSDebojyoti Ghosh for (i = 0; i < ncolsaij; i++) { 126349bd79ccSDebojyoti Ghosh /* check if this row contains a diagonal entry */ 126449bd79ccSDebojyoti Ghosh if (colsaij[i] == r) { 126549bd79ccSDebojyoti Ghosh diag = PETSC_TRUE; 126649bd79ccSDebojyoti Ghosh c = i; 126749bd79ccSDebojyoti Ghosh } 126849bd79ccSDebojyoti Ghosh } 126949bd79ccSDebojyoti Ghosh } else c = 0; 127049bd79ccSDebojyoti Ghosh 127149bd79ccSDebojyoti Ghosh /* calculate size of row */ 127249bd79ccSDebojyoti Ghosh nz = 0; 127349bd79ccSDebojyoti Ghosh if (S) nz += q; 127449bd79ccSDebojyoti Ghosh if (T || b->isTI) nz += (diag && S ? (ncolsaij + ncolsoaij - 1) * q : (ncolsaij + ncolsoaij) * q); 127549bd79ccSDebojyoti Ghosh 127649bd79ccSDebojyoti Ghosh if (cols || values) { 12779566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(nz, &idx, nz, &v)); 1278a437a796SRichard Tran Mills for (i = 0; i < q; i++) { 1279a437a796SRichard Tran Mills /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */ 1280a437a796SRichard Tran Mills v[i] = 0.0; 1281a437a796SRichard Tran Mills } 128249bd79ccSDebojyoti Ghosh if (b->isTI) { 128349bd79ccSDebojyoti Ghosh for (i = 0; i < ncolsaij; i++) { 128449bd79ccSDebojyoti Ghosh for (j = 0; j < q; j++) { 128549bd79ccSDebojyoti Ghosh idx[i * q + j] = (colsaij[i] + rstart / p) * q + j; 128649bd79ccSDebojyoti Ghosh v[i * q + j] = (j == s ? vals[i] : 0.0); 128749bd79ccSDebojyoti Ghosh } 128849bd79ccSDebojyoti Ghosh } 128949bd79ccSDebojyoti Ghosh for (i = 0; i < ncolsoaij; i++) { 129049bd79ccSDebojyoti Ghosh for (j = 0; j < q; j++) { 129149bd79ccSDebojyoti Ghosh idx[(i + ncolsaij) * q + j] = garray[colsoaij[i]] * q + j; 129249bd79ccSDebojyoti Ghosh v[(i + ncolsaij) * q + j] = (j == s ? ovals[i] : 0.0); 129349bd79ccSDebojyoti Ghosh } 129449bd79ccSDebojyoti Ghosh } 129549bd79ccSDebojyoti Ghosh } else if (T) { 129649bd79ccSDebojyoti Ghosh for (i = 0; i < ncolsaij; i++) { 129749bd79ccSDebojyoti Ghosh for (j = 0; j < q; j++) { 129849bd79ccSDebojyoti Ghosh idx[i * q + j] = (colsaij[i] + rstart / p) * q + j; 129949bd79ccSDebojyoti Ghosh v[i * q + j] = vals[i] * T[s + j * p]; 130049bd79ccSDebojyoti Ghosh } 130149bd79ccSDebojyoti Ghosh } 130249bd79ccSDebojyoti Ghosh for (i = 0; i < ncolsoaij; i++) { 130349bd79ccSDebojyoti Ghosh for (j = 0; j < q; j++) { 130449bd79ccSDebojyoti Ghosh idx[(i + ncolsaij) * q + j] = garray[colsoaij[i]] * q + j; 130549bd79ccSDebojyoti Ghosh v[(i + ncolsaij) * q + j] = ovals[i] * T[s + j * p]; 130649bd79ccSDebojyoti Ghosh } 130749bd79ccSDebojyoti Ghosh } 130849bd79ccSDebojyoti Ghosh } 130949bd79ccSDebojyoti Ghosh if (S) { 131049bd79ccSDebojyoti Ghosh for (j = 0; j < q; j++) { 131149bd79ccSDebojyoti Ghosh idx[c * q + j] = (r + rstart / p) * q + j; 131249bd79ccSDebojyoti Ghosh v[c * q + j] += S[s + j * p]; 131349bd79ccSDebojyoti Ghosh } 131449bd79ccSDebojyoti Ghosh } 131549bd79ccSDebojyoti Ghosh } 131649bd79ccSDebojyoti Ghosh 131749bd79ccSDebojyoti Ghosh if (ncols) *ncols = nz; 131849bd79ccSDebojyoti Ghosh if (cols) *cols = idx; 131949bd79ccSDebojyoti Ghosh if (values) *values = v; 132049bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 132149bd79ccSDebojyoti Ghosh } 132249bd79ccSDebojyoti Ghosh 13239371c9d4SSatish Balay PetscErrorCode MatRestoreRow_MPIKAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) { 132449bd79ccSDebojyoti Ghosh PetscFunctionBegin; 13259566063dSJacob Faibussowitsch PetscCall(PetscFree2(*idx, *v)); 132649bd79ccSDebojyoti Ghosh ((Mat_SeqKAIJ *)A->data)->getrowactive = PETSC_FALSE; 132749bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 132849bd79ccSDebojyoti Ghosh } 132949bd79ccSDebojyoti Ghosh 13309371c9d4SSatish Balay PetscErrorCode MatCreateSubMatrix_KAIJ(Mat mat, IS isrow, IS iscol, MatReuse cll, Mat *newmat) { 133149bd79ccSDebojyoti Ghosh Mat A; 133249bd79ccSDebojyoti Ghosh 133349bd79ccSDebojyoti Ghosh PetscFunctionBegin; 13349566063dSJacob Faibussowitsch PetscCall(MatConvert(mat, MATAIJ, MAT_INITIAL_MATRIX, &A)); 13359566063dSJacob Faibussowitsch PetscCall(MatCreateSubMatrix(A, isrow, iscol, cll, newmat)); 13369566063dSJacob Faibussowitsch PetscCall(MatDestroy(&A)); 133749bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 133849bd79ccSDebojyoti Ghosh } 133949bd79ccSDebojyoti Ghosh 134049bd79ccSDebojyoti Ghosh /* ---------------------------------------------------------------------------------- */ 134149bd79ccSDebojyoti Ghosh /*@C 134211a5261eSBarry Smith MatCreateKAIJ - Creates a matrix of type `MATKAIJ` to be used for matrices of the following form: 134349bd79ccSDebojyoti Ghosh 134449bd79ccSDebojyoti Ghosh [I \otimes S + A \otimes T] 134549bd79ccSDebojyoti Ghosh 134649bd79ccSDebojyoti Ghosh where 134749bd79ccSDebojyoti Ghosh S is a dense (p \times q) matrix 134849bd79ccSDebojyoti Ghosh T is a dense (p \times q) matrix 134911a5261eSBarry Smith A is an `MATAIJ` (n \times n) matrix 135049bd79ccSDebojyoti Ghosh I is the identity matrix 135149bd79ccSDebojyoti Ghosh The resulting matrix is (np \times nq) 135249bd79ccSDebojyoti Ghosh 135311a5261eSBarry Smith S and T are always stored independently on all processes as `PetscScalar` arrays in column-major format. 135449bd79ccSDebojyoti Ghosh 135549bd79ccSDebojyoti Ghosh Collective 135649bd79ccSDebojyoti Ghosh 135749bd79ccSDebojyoti Ghosh Input Parameters: 135811a5261eSBarry Smith + A - the `MATAIJ` matrix 135949bd79ccSDebojyoti Ghosh . p - number of rows in S and T 1360d3b90ce1SRichard Tran Mills . q - number of columns in S and T 136111a5261eSBarry Smith . S - the S matrix (can be NULL), stored as a `PetscScalar` array (column-major) 136211a5261eSBarry Smith - T - the T matrix (can be NULL), stored as a `PetscScalar` array (column-major) 136349bd79ccSDebojyoti Ghosh 136449bd79ccSDebojyoti Ghosh Output Parameter: 136511a5261eSBarry Smith . kaij - the new `MATKAIJ` matrix 136649bd79ccSDebojyoti Ghosh 1367d3b90ce1SRichard Tran Mills Notes: 136811a5261eSBarry Smith This function increases the reference count on the `MATAIJ` matrix, so the user is free to destroy the matrix if it is not needed. 136949bd79ccSDebojyoti Ghosh 137011a5261eSBarry Smith Changes to the entries of the `MATAIJ` matrix will immediately affect the `MATKAIJ` matrix. 137111a5261eSBarry Smith 137211a5261eSBarry Smith Developer Note: 137311a5261eSBarry Smith In the `MATMPIKAIJ` case, the internal 'AIJ' and 'OAIJ' sequential KAIJ matrices are kept up to date by tracking the object state 1374761d359dSRichard Tran Mills of the AIJ matrix 'A' that describes the blockwise action of the MATMPIKAIJ matrix and, if the object state has changed, lazily 1375761d359dSRichard Tran Mills rebuilding 'AIJ' and 'OAIJ' just before executing operations with the MATMPIKAIJ matrix. If new types of operations are added, 1376761d359dSRichard Tran Mills routines implementing those must also ensure these are rebuilt when needed (by calling the internal MatKAIJ_build_AIJ_OAIJ() routine). 1377761d359dSRichard Tran Mills 137849bd79ccSDebojyoti Ghosh Level: advanced 137949bd79ccSDebojyoti Ghosh 1380db781477SPatrick Sanan .seealso: `MatKAIJSetAIJ()`, `MatKAIJSetS()`, `MatKAIJSetT()`, `MatKAIJGetAIJ()`, `MatKAIJGetS()`, `MatKAIJGetT()`, `MATKAIJ` 138149bd79ccSDebojyoti Ghosh @*/ 13829371c9d4SSatish Balay PetscErrorCode MatCreateKAIJ(Mat A, PetscInt p, PetscInt q, const PetscScalar S[], const PetscScalar T[], Mat *kaij) { 138349bd79ccSDebojyoti Ghosh PetscFunctionBegin; 13849566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)A), kaij)); 13859566063dSJacob Faibussowitsch PetscCall(MatSetType(*kaij, MATKAIJ)); 13869566063dSJacob Faibussowitsch PetscCall(MatKAIJSetAIJ(*kaij, A)); 13879566063dSJacob Faibussowitsch PetscCall(MatKAIJSetS(*kaij, p, q, S)); 13889566063dSJacob Faibussowitsch PetscCall(MatKAIJSetT(*kaij, p, q, T)); 13899566063dSJacob Faibussowitsch PetscCall(MatSetUp(*kaij)); 13900567c835SRichard Tran Mills PetscFunctionReturn(0); 13910567c835SRichard Tran Mills } 139249bd79ccSDebojyoti Ghosh 13930567c835SRichard Tran Mills /*MC 13945881e567SRichard Tran Mills MATKAIJ - MATKAIJ = "kaij" - A matrix type to be used to evaluate matrices of form 13955881e567SRichard Tran Mills [I \otimes S + A \otimes T], 13960567c835SRichard Tran Mills where 13975881e567SRichard Tran Mills S is a dense (p \times q) matrix, 13985881e567SRichard Tran Mills T is a dense (p \times q) matrix, 13995881e567SRichard Tran Mills A is an AIJ (n \times n) matrix, 14005881e567SRichard Tran Mills and I is the identity matrix. 14015881e567SRichard Tran Mills The resulting matrix is (np \times nq). 14020567c835SRichard Tran Mills 140311a5261eSBarry Smith S and T are always stored independently on all processes as `PetscScalar` arrays in column-major format. 14040567c835SRichard Tran Mills 140511a5261eSBarry Smith Note: 14065881e567SRichard Tran Mills A linear system with multiple right-hand sides, AX = B, can be expressed in the KAIJ-friendly form of (A \otimes I) x = b, 14075881e567SRichard Tran Mills where x and b are column vectors containing the row-major representations of X and B. 14085881e567SRichard Tran Mills 14090567c835SRichard Tran Mills Level: advanced 14100567c835SRichard Tran Mills 1411db781477SPatrick Sanan .seealso: `MatKAIJSetAIJ()`, `MatKAIJSetS()`, `MatKAIJSetT()`, `MatKAIJGetAIJ()`, `MatKAIJGetS()`, `MatKAIJGetT()`, `MatCreateKAIJ()` 14120567c835SRichard Tran Mills M*/ 14130567c835SRichard Tran Mills 14149371c9d4SSatish Balay PETSC_EXTERN PetscErrorCode MatCreate_KAIJ(Mat A) { 14150567c835SRichard Tran Mills Mat_MPIKAIJ *b; 14160567c835SRichard Tran Mills PetscMPIInt size; 14170567c835SRichard Tran Mills 14180567c835SRichard Tran Mills PetscFunctionBegin; 1419*4dfa11a4SJacob Faibussowitsch PetscCall(PetscNew(&b)); 14200567c835SRichard Tran Mills A->data = (void *)b; 14210567c835SRichard Tran Mills 14229566063dSJacob Faibussowitsch PetscCall(PetscMemzero(A->ops, sizeof(struct _MatOps))); 14230567c835SRichard Tran Mills 1424f4259b30SLisandro Dalcin b->w = NULL; 14259566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 14260567c835SRichard Tran Mills if (size == 1) { 14279566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSEQKAIJ)); 14280567c835SRichard Tran Mills A->ops->destroy = MatDestroy_SeqKAIJ; 1429bb6fb833SRichard Tran Mills A->ops->mult = MatMult_SeqKAIJ; 1430bb6fb833SRichard Tran Mills A->ops->multadd = MatMultAdd_SeqKAIJ; 1431bb6fb833SRichard Tran Mills A->ops->invertblockdiagonal = MatInvertBlockDiagonal_SeqKAIJ; 14320567c835SRichard Tran Mills A->ops->getrow = MatGetRow_SeqKAIJ; 14330567c835SRichard Tran Mills A->ops->restorerow = MatRestoreRow_SeqKAIJ; 14340567c835SRichard Tran Mills A->ops->sor = MatSOR_SeqKAIJ; 14359566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqkaij_seqaij_C", MatConvert_KAIJ_AIJ)); 14360567c835SRichard Tran Mills } else { 14379566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATMPIKAIJ)); 14380567c835SRichard Tran Mills A->ops->destroy = MatDestroy_MPIKAIJ; 1439bb6fb833SRichard Tran Mills A->ops->mult = MatMult_MPIKAIJ; 1440bb6fb833SRichard Tran Mills A->ops->multadd = MatMultAdd_MPIKAIJ; 1441bb6fb833SRichard Tran Mills A->ops->invertblockdiagonal = MatInvertBlockDiagonal_MPIKAIJ; 14420567c835SRichard Tran Mills A->ops->getrow = MatGetRow_MPIKAIJ; 14430567c835SRichard Tran Mills A->ops->restorerow = MatRestoreRow_MPIKAIJ; 14449566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetDiagonalBlock_C", MatGetDiagonalBlock_MPIKAIJ)); 14459566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpikaij_mpiaij_C", MatConvert_KAIJ_AIJ)); 14460567c835SRichard Tran Mills } 144706d61a37SPierre Jolivet A->ops->setup = MatSetUp_KAIJ; 144806d61a37SPierre Jolivet A->ops->view = MatView_KAIJ; 14490567c835SRichard Tran Mills A->ops->createsubmatrix = MatCreateSubMatrix_KAIJ; 145049bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 145149bd79ccSDebojyoti Ghosh } 1452