149bd79ccSDebojyoti Ghosh 249bd79ccSDebojyoti Ghosh /* 349bd79ccSDebojyoti Ghosh Defines the basic matrix operations for the KAIJ matrix storage format. 449bd79ccSDebojyoti Ghosh This format is used to evaluate matrices of the form: 549bd79ccSDebojyoti Ghosh 649bd79ccSDebojyoti Ghosh [I \otimes S + A \otimes T] 749bd79ccSDebojyoti Ghosh 849bd79ccSDebojyoti Ghosh where 949bd79ccSDebojyoti Ghosh S is a dense (p \times q) matrix 1049bd79ccSDebojyoti Ghosh T is a dense (p \times q) matrix 1149bd79ccSDebojyoti Ghosh A is an AIJ (n \times n) matrix 1249bd79ccSDebojyoti Ghosh I is the identity matrix 1349bd79ccSDebojyoti Ghosh 1449bd79ccSDebojyoti Ghosh The resulting matrix is (np \times nq) 1549bd79ccSDebojyoti Ghosh 1649bd79ccSDebojyoti Ghosh We provide: 1749bd79ccSDebojyoti Ghosh MatMult() 1849bd79ccSDebojyoti Ghosh MatMultAdd() 1949bd79ccSDebojyoti Ghosh MatInvertBlockDiagonal() 2049bd79ccSDebojyoti Ghosh and 2149bd79ccSDebojyoti Ghosh MatCreateKAIJ(Mat,PetscInt,PetscInt,const PetscScalar[],const PetscScalar[],Mat*) 2249bd79ccSDebojyoti Ghosh 2349bd79ccSDebojyoti Ghosh This single directory handles both the sequential and parallel codes 2449bd79ccSDebojyoti Ghosh */ 2549bd79ccSDebojyoti Ghosh 2649bd79ccSDebojyoti Ghosh #include <../src/mat/impls/kaij/kaij.h> /*I "petscmat.h" I*/ 2749bd79ccSDebojyoti Ghosh #include <../src/mat/utils/freespace.h> 2849bd79ccSDebojyoti Ghosh #include <petsc/private/vecimpl.h> 2949bd79ccSDebojyoti Ghosh 3049bd79ccSDebojyoti Ghosh /*@C 3149bd79ccSDebojyoti Ghosh MatKAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the KAIJ matrix 3249bd79ccSDebojyoti Ghosh 3349bd79ccSDebojyoti Ghosh Not Collective, but if the KAIJ matrix is parallel, the AIJ matrix is also parallel 3449bd79ccSDebojyoti Ghosh 3549bd79ccSDebojyoti Ghosh Input Parameter: 3649bd79ccSDebojyoti Ghosh . A - the KAIJ matrix 3749bd79ccSDebojyoti Ghosh 3849bd79ccSDebojyoti Ghosh Output Parameter: 3949bd79ccSDebojyoti Ghosh . B - the AIJ matrix 4049bd79ccSDebojyoti Ghosh 4149bd79ccSDebojyoti Ghosh Level: advanced 4249bd79ccSDebojyoti Ghosh 4349bd79ccSDebojyoti Ghosh Notes: The reference count on the AIJ matrix is not increased so you should not destroy it. 4449bd79ccSDebojyoti Ghosh 4549bd79ccSDebojyoti Ghosh .seealso: MatCreateKAIJ() 4649bd79ccSDebojyoti Ghosh @*/ 4749bd79ccSDebojyoti Ghosh PetscErrorCode MatKAIJGetAIJ(Mat A,Mat *B) 4849bd79ccSDebojyoti Ghosh { 4949bd79ccSDebojyoti Ghosh PetscErrorCode ierr; 5049bd79ccSDebojyoti Ghosh PetscBool ismpikaij,isseqkaij; 5149bd79ccSDebojyoti Ghosh 5249bd79ccSDebojyoti Ghosh PetscFunctionBegin; 5349bd79ccSDebojyoti Ghosh ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIKAIJ,&ismpikaij);CHKERRQ(ierr); 5449bd79ccSDebojyoti Ghosh ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQKAIJ,&isseqkaij);CHKERRQ(ierr); 5549bd79ccSDebojyoti Ghosh if (ismpikaij) { 5649bd79ccSDebojyoti Ghosh Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data; 5749bd79ccSDebojyoti Ghosh 5849bd79ccSDebojyoti Ghosh *B = b->A; 5949bd79ccSDebojyoti Ghosh } else if (isseqkaij) { 6049bd79ccSDebojyoti Ghosh Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 6149bd79ccSDebojyoti Ghosh 6249bd79ccSDebojyoti Ghosh *B = b->AIJ; 63b04351cbSRichard Tran Mills } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix passed in is not of type KAIJ"); 6449bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 6549bd79ccSDebojyoti Ghosh } 6649bd79ccSDebojyoti Ghosh 6749bd79ccSDebojyoti Ghosh /*@C 6849bd79ccSDebojyoti Ghosh MatKAIJGetS - Get the S matrix describing the shift action of the KAIJ matrix 6949bd79ccSDebojyoti Ghosh 700567c835SRichard Tran Mills Not Collective; the entire S is stored and returned independently on all processes. 7149bd79ccSDebojyoti Ghosh 7249bd79ccSDebojyoti Ghosh Input Parameter: 7349bd79ccSDebojyoti Ghosh . A - the KAIJ matrix 7449bd79ccSDebojyoti Ghosh 75a5b5c723SRichard Tran Mills Output Parameters: 76a5b5c723SRichard Tran Mills + m - the number of rows in S 77a5b5c723SRichard Tran Mills . n - the number of columns in S 78a5b5c723SRichard Tran Mills - S - the S matrix, in form of a scalar array in column-major format 7949bd79ccSDebojyoti Ghosh 80a5b5c723SRichard Tran Mills Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired) 8131a97b9aSRichard Tran Mills 8249bd79ccSDebojyoti Ghosh Level: advanced 8349bd79ccSDebojyoti Ghosh 8431a97b9aSRichard Tran Mills .seealso: MatCreateKAIJ(), MatGetBlockSizes() 8549bd79ccSDebojyoti Ghosh @*/ 86a5b5c723SRichard Tran Mills PetscErrorCode MatKAIJGetS(Mat A,PetscInt *m,PetscInt *n,PetscScalar **S) 8749bd79ccSDebojyoti Ghosh { 8849bd79ccSDebojyoti Ghosh Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 8949bd79ccSDebojyoti Ghosh PetscFunctionBegin; 90a5b5c723SRichard Tran Mills if (m) *m = b->p; 91a5b5c723SRichard Tran Mills if (n) *n = b->q; 92a5b5c723SRichard Tran Mills if (S) *S = b->S; 93a5b5c723SRichard Tran Mills PetscFunctionReturn(0); 94a5b5c723SRichard Tran Mills } 95a5b5c723SRichard Tran Mills 96a5b5c723SRichard Tran Mills /*@C 97a5b5c723SRichard Tran Mills MatKAIJGetSRead - Get a read-only pointer to the S matrix describing the shift action of the KAIJ matrix 98a5b5c723SRichard Tran Mills 99a5b5c723SRichard Tran Mills Not Collective; the entire S is stored and returned independently on all processes. 100a5b5c723SRichard Tran Mills 101a5b5c723SRichard Tran Mills Input Parameter: 102a5b5c723SRichard Tran Mills . A - the KAIJ matrix 103a5b5c723SRichard Tran Mills 104a5b5c723SRichard Tran Mills Output Parameters: 105a5b5c723SRichard Tran Mills + m - the number of rows in S 106a5b5c723SRichard Tran Mills . n - the number of columns in S 107a5b5c723SRichard Tran Mills - S - the S matrix, in form of a scalar array in column-major format 108a5b5c723SRichard Tran Mills 109a5b5c723SRichard Tran Mills Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired) 110a5b5c723SRichard Tran Mills 111a5b5c723SRichard Tran Mills Level: advanced 112a5b5c723SRichard Tran Mills 113a5b5c723SRichard Tran Mills .seealso: MatCreateKAIJ(), MatGetBlockSizes() 114a5b5c723SRichard Tran Mills @*/ 115a5b5c723SRichard Tran Mills PetscErrorCode MatKAIJGetSRead(Mat A,PetscInt *m,PetscInt *n,const PetscScalar **S) 116a5b5c723SRichard Tran Mills { 117a5b5c723SRichard Tran Mills Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 118a5b5c723SRichard Tran Mills PetscFunctionBegin; 119a5b5c723SRichard Tran Mills if (m) *m = b->p; 120a5b5c723SRichard Tran Mills if (n) *n = b->q; 121a5b5c723SRichard Tran Mills if (S) *S = b->S; 122a5b5c723SRichard Tran Mills PetscFunctionReturn(0); 123a5b5c723SRichard Tran Mills } 124a5b5c723SRichard Tran Mills 125a5b5c723SRichard Tran Mills /*@C 126a5b5c723SRichard Tran Mills MatKAIJRestoreS - Restore array obtained with MatKAIJGetS() 127a5b5c723SRichard Tran Mills 128a5b5c723SRichard Tran Mills Not collective 129a5b5c723SRichard Tran Mills 130a5b5c723SRichard Tran Mills Input Parameter: 131a5b5c723SRichard Tran Mills . A - the KAIJ matrix 132a5b5c723SRichard Tran Mills 133a5b5c723SRichard Tran Mills Output Parameter: 134a5b5c723SRichard Tran Mills . S - location of pointer to array obtained with MatKAIJGetS() 135a5b5c723SRichard Tran Mills 136a5b5c723SRichard Tran Mills Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored. 137a5b5c723SRichard Tran Mills If NULL is passed, it will not attempt to zero the array pointer. 138a5b5c723SRichard Tran Mills 139a5b5c723SRichard Tran Mills Level: advanced 140a5b5c723SRichard Tran Mills .seealso: MatKAIJGetS(), MatKAIJGetSRead(), MatKAIJRestoreSRead() 141a5b5c723SRichard Tran Mills @*/ 142a5b5c723SRichard Tran Mills PetscErrorCode MatKAIJRestoreS(Mat A,PetscScalar **S) 143a5b5c723SRichard Tran Mills { 14466f58c76SRichard Tran Mills PetscErrorCode ierr; 14566f58c76SRichard Tran Mills 146a5b5c723SRichard Tran Mills PetscFunctionBegin; 147a5b5c723SRichard Tran Mills if (S) *S = NULL; 14866f58c76SRichard Tran Mills ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr); 149a5b5c723SRichard Tran Mills PetscFunctionReturn(0); 150a5b5c723SRichard Tran Mills } 151a5b5c723SRichard Tran Mills 152a5b5c723SRichard Tran Mills /*@C 153a5b5c723SRichard Tran Mills MatKAIJRestoreSRead - Restore array obtained with MatKAIJGetSRead() 154a5b5c723SRichard Tran Mills 155a5b5c723SRichard Tran Mills Not collective 156a5b5c723SRichard Tran Mills 157a5b5c723SRichard Tran Mills Input Parameter: 158a5b5c723SRichard Tran Mills . A - the KAIJ matrix 159a5b5c723SRichard Tran Mills 160a5b5c723SRichard Tran Mills Output Parameter: 161a5b5c723SRichard Tran Mills . S - location of pointer to array obtained with MatKAIJGetS() 162a5b5c723SRichard Tran Mills 163a5b5c723SRichard Tran Mills Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored. 164a5b5c723SRichard Tran Mills If NULL is passed, it will not attempt to zero the array pointer. 165a5b5c723SRichard Tran Mills 166a5b5c723SRichard Tran Mills Level: advanced 167a5b5c723SRichard Tran Mills .seealso: MatKAIJGetS(), MatKAIJGetSRead(), MatKAIJRestoreSRead() 168a5b5c723SRichard Tran Mills @*/ 169a5b5c723SRichard Tran Mills PetscErrorCode MatKAIJRestoreSRead(Mat A,const PetscScalar **S) 170a5b5c723SRichard Tran Mills { 171a5b5c723SRichard Tran Mills PetscFunctionBegin; 172a5b5c723SRichard Tran Mills if (S) *S = NULL; 17349bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 17449bd79ccSDebojyoti Ghosh } 17549bd79ccSDebojyoti Ghosh 17649bd79ccSDebojyoti Ghosh /*@C 17731a97b9aSRichard Tran Mills MatKAIJGetT - Get the transformation matrix T associated with the KAIJ matrix 17849bd79ccSDebojyoti Ghosh 1790567c835SRichard Tran Mills Not Collective; the entire T is stored and returned independently on all processes 18049bd79ccSDebojyoti Ghosh 18149bd79ccSDebojyoti Ghosh Input Parameter: 18249bd79ccSDebojyoti Ghosh . A - the KAIJ matrix 18349bd79ccSDebojyoti Ghosh 18449bd79ccSDebojyoti Ghosh Output Parameter: 185a5b5c723SRichard Tran Mills + m - the number of rows in T 186a5b5c723SRichard Tran Mills . n - the number of columns in T 187a5b5c723SRichard Tran Mills - T - the T matrix, in form of a scalar array in column-major format 18849bd79ccSDebojyoti Ghosh 189a5b5c723SRichard Tran Mills Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired) 19031a97b9aSRichard Tran Mills 19149bd79ccSDebojyoti Ghosh Level: advanced 19249bd79ccSDebojyoti Ghosh 19331a97b9aSRichard Tran Mills .seealso: MatCreateKAIJ(), MatGetBlockSizes() 19449bd79ccSDebojyoti Ghosh @*/ 195a5b5c723SRichard Tran Mills PetscErrorCode MatKAIJGetT(Mat A,PetscInt *m,PetscInt *n,PetscScalar **T) 19649bd79ccSDebojyoti Ghosh { 19749bd79ccSDebojyoti Ghosh Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 19849bd79ccSDebojyoti Ghosh PetscFunctionBegin; 199a5b5c723SRichard Tran Mills if (m) *m = b->p; 200a5b5c723SRichard Tran Mills if (n) *n = b->q; 201a5b5c723SRichard Tran Mills if (T) *T = b->T; 202a5b5c723SRichard Tran Mills PetscFunctionReturn(0); 203a5b5c723SRichard Tran Mills } 204a5b5c723SRichard Tran Mills 205a5b5c723SRichard Tran Mills /*@C 206a5b5c723SRichard Tran Mills MatKAIJGetTRead - Get a read-only pointer to the transformation matrix T associated with the KAIJ matrix 207a5b5c723SRichard Tran Mills 208a5b5c723SRichard Tran Mills Not Collective; the entire T is stored and returned independently on all processes 209a5b5c723SRichard Tran Mills 210a5b5c723SRichard Tran Mills Input Parameter: 211a5b5c723SRichard Tran Mills . A - the KAIJ matrix 212a5b5c723SRichard Tran Mills 213a5b5c723SRichard Tran Mills Output Parameter: 214a5b5c723SRichard Tran Mills + m - the number of rows in T 215a5b5c723SRichard Tran Mills . n - the number of columns in T 216a5b5c723SRichard Tran Mills - T - the T matrix, in form of a scalar array in column-major format 217a5b5c723SRichard Tran Mills 218a5b5c723SRichard Tran Mills Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired) 219a5b5c723SRichard Tran Mills 220a5b5c723SRichard Tran Mills Level: advanced 221a5b5c723SRichard Tran Mills 222a5b5c723SRichard Tran Mills .seealso: MatCreateKAIJ(), MatGetBlockSizes() 223a5b5c723SRichard Tran Mills @*/ 224a5b5c723SRichard Tran Mills PetscErrorCode MatKAIJGetTRead(Mat A,PetscInt *m,PetscInt *n,const PetscScalar **T) 225a5b5c723SRichard Tran Mills { 226a5b5c723SRichard Tran Mills Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 227a5b5c723SRichard Tran Mills PetscFunctionBegin; 228a5b5c723SRichard Tran Mills if (m) *m = b->p; 229a5b5c723SRichard Tran Mills if (n) *n = b->q; 230a5b5c723SRichard Tran Mills if (T) *T = b->T; 231a5b5c723SRichard Tran Mills PetscFunctionReturn(0); 232a5b5c723SRichard Tran Mills } 233a5b5c723SRichard Tran Mills 234a5b5c723SRichard Tran Mills /*@C 235a5b5c723SRichard Tran Mills MatKAIJRestoreT - Restore array obtained with MatKAIJGetT() 236a5b5c723SRichard Tran Mills 237a5b5c723SRichard Tran Mills Not collective 238a5b5c723SRichard Tran Mills 239a5b5c723SRichard Tran Mills Input Parameter: 240a5b5c723SRichard Tran Mills . A - the KAIJ matrix 241a5b5c723SRichard Tran Mills 242a5b5c723SRichard Tran Mills Output Parameter: 243a5b5c723SRichard Tran Mills . T - location of pointer to array obtained with MatKAIJGetS() 244a5b5c723SRichard Tran Mills 245a5b5c723SRichard Tran Mills Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored. 246a5b5c723SRichard Tran Mills If NULL is passed, it will not attempt to zero the array pointer. 247a5b5c723SRichard Tran Mills 248a5b5c723SRichard Tran Mills Level: advanced 249a5b5c723SRichard Tran Mills .seealso: MatKAIJGetT(), MatKAIJGetTRead(), MatKAIJRestoreTRead() 250a5b5c723SRichard Tran Mills @*/ 251a5b5c723SRichard Tran Mills PetscErrorCode MatKAIJRestoreT(Mat A,PetscScalar **T) 252a5b5c723SRichard Tran Mills { 25366f58c76SRichard Tran Mills PetscErrorCode ierr; 25466f58c76SRichard Tran Mills 255a5b5c723SRichard Tran Mills PetscFunctionBegin; 256a5b5c723SRichard Tran Mills if (T) *T = NULL; 25766f58c76SRichard Tran Mills ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr); 258a5b5c723SRichard Tran Mills PetscFunctionReturn(0); 259a5b5c723SRichard Tran Mills } 260a5b5c723SRichard Tran Mills 261a5b5c723SRichard Tran Mills /*@C 262a5b5c723SRichard Tran Mills MatKAIJRestoreTRead - Restore array obtained with MatKAIJGetTRead() 263a5b5c723SRichard Tran Mills 264a5b5c723SRichard Tran Mills Not collective 265a5b5c723SRichard Tran Mills 266a5b5c723SRichard Tran Mills Input Parameter: 267a5b5c723SRichard Tran Mills . A - the KAIJ matrix 268a5b5c723SRichard Tran Mills 269a5b5c723SRichard Tran Mills Output Parameter: 270a5b5c723SRichard Tran Mills . T - location of pointer to array obtained with MatKAIJGetS() 271a5b5c723SRichard Tran Mills 272a5b5c723SRichard Tran Mills Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored. 273a5b5c723SRichard Tran Mills If NULL is passed, it will not attempt to zero the array pointer. 274a5b5c723SRichard Tran Mills 275a5b5c723SRichard Tran Mills Level: advanced 276a5b5c723SRichard Tran Mills .seealso: MatKAIJGetT(), MatKAIJGetTRead(), MatKAIJRestoreTRead() 277a5b5c723SRichard Tran Mills @*/ 278a5b5c723SRichard Tran Mills PetscErrorCode MatKAIJRestoreTRead(Mat A,const PetscScalar **T) 279a5b5c723SRichard Tran Mills { 280a5b5c723SRichard Tran Mills PetscFunctionBegin; 281a5b5c723SRichard Tran Mills if (T) *T = NULL; 28249bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 28349bd79ccSDebojyoti Ghosh } 28449bd79ccSDebojyoti Ghosh 2850567c835SRichard Tran Mills /*@ 2860567c835SRichard Tran Mills MatKAIJSetAIJ - Set the AIJ matrix describing the blockwise action of the KAIJ matrix 2870567c835SRichard Tran Mills 2880567c835SRichard Tran Mills Logically Collective; if the AIJ matrix is parallel, the KAIJ matrix is also parallel 2890567c835SRichard Tran Mills 2900567c835SRichard Tran Mills Input Parameters: 2910567c835SRichard Tran Mills + A - the KAIJ matrix 2920567c835SRichard Tran Mills - B - the AIJ matrix 2930567c835SRichard Tran Mills 29415b9d025SRichard Tran Mills Notes: 29515b9d025SRichard Tran Mills This function increases the reference count on the AIJ matrix, so the user is free to destroy the matrix if it is not needed. 29615b9d025SRichard Tran Mills Changes to the entries of the AIJ matrix will immediately affect the KAIJ matrix. 29715b9d025SRichard Tran Mills 2980567c835SRichard Tran Mills Level: advanced 2990567c835SRichard Tran Mills 3000567c835SRichard Tran Mills .seealso: MatKAIJGetAIJ(), MatKAIJSetS(), MatKAIJSetT() 3010567c835SRichard Tran Mills @*/ 3020567c835SRichard Tran Mills PetscErrorCode MatKAIJSetAIJ(Mat A,Mat B) 3030567c835SRichard Tran Mills { 3040567c835SRichard Tran Mills PetscErrorCode ierr; 3050567c835SRichard Tran Mills PetscMPIInt size; 3060567c835SRichard Tran Mills 3070567c835SRichard Tran Mills PetscFunctionBegin; 3080567c835SRichard Tran Mills ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 3090567c835SRichard Tran Mills if (size == 1) { 3100567c835SRichard Tran Mills Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data; 3110567c835SRichard Tran Mills a->AIJ = B; 3120567c835SRichard Tran Mills } else { 3130567c835SRichard Tran Mills Mat_MPIKAIJ *a = (Mat_MPIKAIJ*)A->data; 3140567c835SRichard Tran Mills a->A = B; 3150567c835SRichard Tran Mills } 31615b9d025SRichard Tran Mills ierr = PetscObjectReference((PetscObject)B);CHKERRQ(ierr); 3170567c835SRichard Tran Mills PetscFunctionReturn(0); 3180567c835SRichard Tran Mills } 3190567c835SRichard Tran Mills 3200567c835SRichard Tran Mills /*@C 3210567c835SRichard Tran Mills MatKAIJSetS - Set the S matrix describing the shift action of the KAIJ matrix 3220567c835SRichard Tran Mills 3230567c835SRichard Tran Mills Logically Collective; the entire S is stored independently on all processes. 3240567c835SRichard Tran Mills 3250567c835SRichard Tran Mills Input Parameters: 3260567c835SRichard Tran Mills + A - the KAIJ matrix 3270567c835SRichard Tran Mills . p - the number of rows in S 3280567c835SRichard Tran Mills . q - the number of columns in S 3290567c835SRichard Tran Mills - S - the S matrix, in form of a scalar array in column-major format 3300567c835SRichard Tran Mills 3310567c835SRichard Tran Mills Notes: The dimensions p and q must match those of the transformation matrix T associated with the KAIJ matrix. 33288f48298SRichard Tran Mills The S matrix is copied, so the user can destroy this array. 3330567c835SRichard Tran Mills 3340567c835SRichard Tran Mills Level: Advanced 3350567c835SRichard Tran Mills 3360567c835SRichard Tran Mills .seealso: MatKAIJGetS(), MatKAIJSetT(), MatKAIJSetAIJ() 3370567c835SRichard Tran Mills @*/ 3380567c835SRichard Tran Mills PetscErrorCode MatKAIJSetS(Mat A,PetscInt p,PetscInt q,const PetscScalar S[]) 3390567c835SRichard Tran Mills { 3400567c835SRichard Tran Mills PetscErrorCode ierr; 3410567c835SRichard Tran Mills Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data; 3420567c835SRichard Tran Mills 3430567c835SRichard Tran Mills PetscFunctionBegin; 3440567c835SRichard Tran Mills ierr = PetscFree(a->S);CHKERRQ(ierr); 3450567c835SRichard Tran Mills if (S) { 346a84f8069SRichard Tran Mills ierr = PetscMalloc1(p*q*sizeof(PetscScalar),&a->S);CHKERRQ(ierr); 3470567c835SRichard Tran Mills ierr = PetscMemcpy(a->S,S,p*q*sizeof(PetscScalar));CHKERRQ(ierr); 3480567c835SRichard Tran Mills } else a->S = NULL; 3490567c835SRichard Tran Mills 3500567c835SRichard Tran Mills a->p = p; 3510567c835SRichard Tran Mills a->q = q; 3520567c835SRichard Tran Mills PetscFunctionReturn(0); 3530567c835SRichard Tran Mills } 3540567c835SRichard Tran Mills 3550567c835SRichard Tran Mills /*@C 3560567c835SRichard Tran Mills MatKAIJSetT - Set the transformation matrix T associated with the KAIJ matrix 3570567c835SRichard Tran Mills 3580567c835SRichard Tran Mills Logically Collective; the entire T is stored independently on all processes. 3590567c835SRichard Tran Mills 3600567c835SRichard Tran Mills Input Parameters: 3610567c835SRichard Tran Mills + A - the KAIJ matrix 3620567c835SRichard Tran Mills . p - the number of rows in S 3630567c835SRichard Tran Mills . q - the number of columns in S 3640567c835SRichard Tran Mills - T - the T matrix, in form of a scalar array in column-major format 3650567c835SRichard Tran Mills 3660567c835SRichard Tran Mills Notes: The dimensions p and q must match those of the shift matrix S associated with the KAIJ matrix. 36788f48298SRichard Tran Mills The T matrix is copied, so the user can destroy this array. 3680567c835SRichard Tran Mills 3690567c835SRichard Tran Mills Level: Advanced 3700567c835SRichard Tran Mills 3710567c835SRichard Tran Mills .seealso: MatKAIJGetT(), MatKAIJSetS(), MatKAIJSetAIJ() 3720567c835SRichard Tran Mills @*/ 3730567c835SRichard Tran Mills PetscErrorCode MatKAIJSetT(Mat A,PetscInt p,PetscInt q,const PetscScalar T[]) 3740567c835SRichard Tran Mills { 3750567c835SRichard Tran Mills PetscErrorCode ierr; 3760567c835SRichard Tran Mills PetscInt i,j; 3770567c835SRichard Tran Mills Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data; 3780567c835SRichard Tran Mills PetscBool isTI = PETSC_FALSE; 3790567c835SRichard Tran Mills 3800567c835SRichard Tran Mills PetscFunctionBegin; 3810567c835SRichard Tran Mills /* check if T is an identity matrix */ 3820567c835SRichard Tran Mills if (T && (p == q)) { 3830567c835SRichard Tran Mills isTI = PETSC_TRUE; 3840567c835SRichard Tran Mills for (i=0; i<p; i++) { 3850567c835SRichard Tran Mills for (j=0; j<q; j++) { 3860567c835SRichard Tran Mills if (i == j) { 3870567c835SRichard Tran Mills /* diagonal term must be 1 */ 3880567c835SRichard Tran Mills if (T[i+j*p] != 1.0) isTI = PETSC_FALSE; 3890567c835SRichard Tran Mills } else { 3900567c835SRichard Tran Mills /* off-diagonal term must be 0 */ 3910567c835SRichard Tran Mills if (T[i+j*p] != 0.0) isTI = PETSC_FALSE; 3920567c835SRichard Tran Mills } 3930567c835SRichard Tran Mills } 3940567c835SRichard Tran Mills } 3950567c835SRichard Tran Mills } 3960567c835SRichard Tran Mills a->isTI = isTI; 3970567c835SRichard Tran Mills 3980567c835SRichard Tran Mills ierr = PetscFree(a->T);CHKERRQ(ierr); 3990567c835SRichard Tran Mills if (T && (!isTI)) { 400a84f8069SRichard Tran Mills ierr = PetscMalloc1(p*q*sizeof(PetscScalar),&a->T);CHKERRQ(ierr); 4010567c835SRichard Tran Mills ierr = PetscMemcpy(a->T,T,p*q*sizeof(PetscScalar));CHKERRQ(ierr); 40250d19d74SRichard Tran Mills } else a->T = NULL; 4030567c835SRichard Tran Mills 4040567c835SRichard Tran Mills a->p = p; 4050567c835SRichard Tran Mills a->q = q; 4060567c835SRichard Tran Mills PetscFunctionReturn(0); 4070567c835SRichard Tran Mills } 4080567c835SRichard Tran Mills 40949bd79ccSDebojyoti Ghosh PetscErrorCode MatDestroy_SeqKAIJ(Mat A) 41049bd79ccSDebojyoti Ghosh { 41149bd79ccSDebojyoti Ghosh PetscErrorCode ierr; 41249bd79ccSDebojyoti Ghosh Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 41349bd79ccSDebojyoti Ghosh 41449bd79ccSDebojyoti Ghosh PetscFunctionBegin; 41549bd79ccSDebojyoti Ghosh ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr); 416a84f8069SRichard Tran Mills ierr = PetscFree(b->S);CHKERRQ(ierr); 417a84f8069SRichard Tran Mills ierr = PetscFree(b->T);CHKERRQ(ierr); 418a84f8069SRichard Tran Mills ierr = PetscFree(b->ibdiag);CHKERRQ(ierr); 41949bd79ccSDebojyoti Ghosh ierr = PetscFree5(b->sor.w,b->sor.y,b->sor.work,b->sor.t,b->sor.arr);CHKERRQ(ierr); 42049bd79ccSDebojyoti Ghosh ierr = PetscFree(A->data);CHKERRQ(ierr); 42149bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 42249bd79ccSDebojyoti Ghosh } 42349bd79ccSDebojyoti Ghosh 42449bd79ccSDebojyoti Ghosh PetscErrorCode MatSetUp_KAIJ(Mat A) 42549bd79ccSDebojyoti Ghosh { 4260567c835SRichard Tran Mills PetscErrorCode ierr; 4270567c835SRichard Tran Mills PetscInt n; 4280567c835SRichard Tran Mills PetscMPIInt size; 4290567c835SRichard Tran Mills Mat_SeqKAIJ *seqkaij = (Mat_SeqKAIJ*)A->data; 4300567c835SRichard Tran Mills 43149bd79ccSDebojyoti Ghosh PetscFunctionBegin; 4320567c835SRichard Tran Mills ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 4330567c835SRichard Tran Mills if (size == 1) { 4340567c835SRichard Tran Mills ierr = MatSetSizes(A,seqkaij->p*seqkaij->AIJ->rmap->n,seqkaij->q*seqkaij->AIJ->cmap->n,seqkaij->p*seqkaij->AIJ->rmap->N,seqkaij->q*seqkaij->AIJ->cmap->N);CHKERRQ(ierr); 4350567c835SRichard Tran Mills ierr = PetscLayoutSetBlockSize(A->rmap,seqkaij->p);CHKERRQ(ierr); 4360567c835SRichard Tran Mills ierr = PetscLayoutSetBlockSize(A->cmap,seqkaij->q);CHKERRQ(ierr); 4370567c835SRichard Tran Mills ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr); 4380567c835SRichard Tran Mills ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr); 4390567c835SRichard Tran Mills } else { 4400567c835SRichard Tran Mills Mat_MPIKAIJ *a; 4410567c835SRichard Tran Mills Mat_MPIAIJ *mpiaij; 4420567c835SRichard Tran Mills IS from,to; 4430567c835SRichard Tran Mills Vec gvec; 4440567c835SRichard Tran Mills PetscScalar *T; 4450567c835SRichard Tran Mills PetscInt i,j; 4460567c835SRichard Tran Mills 4470567c835SRichard Tran Mills a = (Mat_MPIKAIJ*)A->data; 448d3f912faSRichard Tran Mills mpiaij = (Mat_MPIAIJ*)a->A->data; 4490567c835SRichard Tran Mills ierr = MatSetSizes(A,a->p*a->A->rmap->n,a->q*a->A->cmap->n,a->p*a->A->rmap->N,a->q*a->A->cmap->N);CHKERRQ(ierr); 4500567c835SRichard Tran Mills ierr = PetscLayoutSetBlockSize(A->rmap,seqkaij->p);CHKERRQ(ierr); 4510567c835SRichard Tran Mills ierr = PetscLayoutSetBlockSize(A->cmap,seqkaij->q);CHKERRQ(ierr); 4520567c835SRichard Tran Mills ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr); 4530567c835SRichard Tran Mills ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr); 4540567c835SRichard Tran Mills 4550567c835SRichard Tran Mills if (a->isTI) { 4560567c835SRichard Tran Mills /* If the transformation matrix associated with the parallel matrix A is the identity matrix, then a->T will be NULL. 4570567c835SRichard Tran Mills * In this case, if we pass a->T directly to the MatCreateKAIJ() calls to create the sequential submatrices, the routine will 4580567c835SRichard Tran Mills * not be able to tell that transformation matrix should be set to the identity; thus we create a temporary identity matrix 4590567c835SRichard Tran Mills * to pass in. */ 460a84f8069SRichard Tran Mills ierr = PetscMalloc1(a->p*a->q*sizeof(PetscScalar),&T);CHKERRQ(ierr); 4610567c835SRichard Tran Mills for (i=0; i<a->p; i++) { 4620567c835SRichard Tran Mills for (j=0; j<a->q; j++) { 4630567c835SRichard Tran Mills if (i==j) T[i+j*a->p] = 1.0; 4640567c835SRichard Tran Mills else T[i+j*a->p] = 0.0; 4650567c835SRichard Tran Mills } 4660567c835SRichard Tran Mills } 4670567c835SRichard Tran Mills } else T = a->T; 4680567c835SRichard Tran Mills ierr = MatCreateKAIJ(mpiaij->A,a->p,a->q,a->S,T,&a->AIJ);CHKERRQ(ierr); 4690567c835SRichard Tran Mills ierr = MatCreateKAIJ(mpiaij->B,a->p,a->q,NULL,T,&a->OAIJ);CHKERRQ(ierr); 470c138d2acSRichard Tran Mills if (a->isTI) { 4710567c835SRichard Tran Mills ierr = PetscFree(T);CHKERRQ(ierr); 472c138d2acSRichard Tran Mills } 4730567c835SRichard Tran Mills 4740567c835SRichard Tran Mills ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr); 4750567c835SRichard Tran Mills ierr = VecCreate(PETSC_COMM_SELF,&a->w);CHKERRQ(ierr); 4760567c835SRichard Tran Mills ierr = VecSetSizes(a->w,n*a->q,n*a->q);CHKERRQ(ierr); 4770567c835SRichard Tran Mills ierr = VecSetBlockSize(a->w,a->q);CHKERRQ(ierr); 4780567c835SRichard Tran Mills ierr = VecSetType(a->w,VECSEQ);CHKERRQ(ierr); 4790567c835SRichard Tran Mills 4800567c835SRichard Tran Mills /* create two temporary Index sets for build scatter gather */ 4810567c835SRichard Tran Mills ierr = ISCreateBlock(PetscObjectComm((PetscObject)a->A),a->q,n,mpiaij->garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); 4820567c835SRichard Tran Mills ierr = ISCreateStride(PETSC_COMM_SELF,n*a->q,0,1,&to);CHKERRQ(ierr); 4830567c835SRichard Tran Mills 4840567c835SRichard Tran Mills /* create temporary global vector to generate scatter context */ 4850567c835SRichard Tran Mills ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)a->A),a->q,a->q*a->A->cmap->n,a->q*a->A->cmap->N,NULL,&gvec);CHKERRQ(ierr); 4860567c835SRichard Tran Mills 4870567c835SRichard Tran Mills /* generate the scatter context */ 4884589b4e5SRichard Tran Mills ierr = VecScatterCreate(gvec,from,a->w,to,&a->ctx);CHKERRQ(ierr); 4890567c835SRichard Tran Mills 4900567c835SRichard Tran Mills ierr = ISDestroy(&from);CHKERRQ(ierr); 4910567c835SRichard Tran Mills ierr = ISDestroy(&to);CHKERRQ(ierr); 4920567c835SRichard Tran Mills ierr = VecDestroy(&gvec);CHKERRQ(ierr); 4930567c835SRichard Tran Mills } 4940567c835SRichard Tran Mills 4950567c835SRichard Tran Mills A->assembled = PETSC_TRUE; 49649bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 49749bd79ccSDebojyoti Ghosh } 49849bd79ccSDebojyoti Ghosh 499e6985dafSRichard Tran Mills PetscErrorCode MatView_KAIJ(Mat A,PetscViewer viewer) 50049bd79ccSDebojyoti Ghosh { 501e6985dafSRichard Tran Mills PetscViewerFormat format; 502e6985dafSRichard Tran Mills Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data; 50349bd79ccSDebojyoti Ghosh Mat B; 504e6985dafSRichard Tran Mills PetscInt i; 505e6985dafSRichard Tran Mills PetscErrorCode ierr; 506e6985dafSRichard Tran Mills PetscBool ismpikaij; 50749bd79ccSDebojyoti Ghosh 50849bd79ccSDebojyoti Ghosh PetscFunctionBegin; 509e6985dafSRichard Tran Mills ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIKAIJ,&ismpikaij);CHKERRQ(ierr); 510e6985dafSRichard Tran Mills ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 511e6985dafSRichard Tran Mills if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL || format == PETSC_VIEWER_ASCII_IMPL) { 512e6985dafSRichard Tran Mills ierr = PetscViewerASCIIPrintf(viewer,"S and T have %D rows and %D columns\n",a->p,a->q);CHKERRQ(ierr); 513e6985dafSRichard Tran Mills 514e6985dafSRichard Tran Mills /* Print appropriate details for S. */ 515e6985dafSRichard Tran Mills if (!a->S) { 5162ae760e3SRichard Tran Mills ierr = PetscViewerASCIIPrintf(viewer,"S is NULL\n");CHKERRQ(ierr); 517e6985dafSRichard Tran Mills } else if (format == PETSC_VIEWER_ASCII_IMPL) { 518e6985dafSRichard Tran Mills ierr = PetscViewerASCIIPrintf(viewer,"Entries of S are ");CHKERRQ(ierr); 519e6985dafSRichard Tran Mills for (i=0; i<(a->p * a->q); i++) { 520e6985dafSRichard Tran Mills #if defined(PETSC_USE_COMPLEX) 521e6985dafSRichard Tran Mills ierr = PetscViewerASCIIPrintf(viewer,"%18.16e %18.16e ",(double)PetscRealPart(a->S[i]),(double)PetscImaginaryPart(a->S[i]));CHKERRQ(ierr); 522e6985dafSRichard Tran Mills #else 523e6985dafSRichard Tran Mills ierr = PetscViewerASCIIPrintf(viewer,"%18.16e ",(double)PetscRealPart(a->S[i]));CHKERRQ(ierr); 524e6985dafSRichard Tran Mills #endif 525e6985dafSRichard Tran Mills } 526e6985dafSRichard Tran Mills ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 52749bd79ccSDebojyoti Ghosh } 52849bd79ccSDebojyoti Ghosh 529e6985dafSRichard Tran Mills /* Print appropriate details for T. */ 530e6985dafSRichard Tran Mills if (a->isTI) { 5312ae760e3SRichard Tran Mills ierr = PetscViewerASCIIPrintf(viewer,"T is the identity matrix\n");CHKERRQ(ierr); 532e6985dafSRichard Tran Mills } else if (!a->T) { 5332ae760e3SRichard Tran Mills ierr = PetscViewerASCIIPrintf(viewer,"T is NULL\n");CHKERRQ(ierr); 534e6985dafSRichard Tran Mills } else if (format == PETSC_VIEWER_ASCII_IMPL) { 535e6985dafSRichard Tran Mills ierr = PetscViewerASCIIPrintf(viewer,"Entries of T are ");CHKERRQ(ierr); 536e6985dafSRichard Tran Mills for (i=0; i<(a->p * a->q); i++) { 537e6985dafSRichard Tran Mills #if defined(PETSC_USE_COMPLEX) 538e6985dafSRichard Tran Mills ierr = PetscViewerASCIIPrintf(viewer,"%18.16e %18.16e ",(double)PetscRealPart(a->T[i]),(double)PetscImaginaryPart(a->T[i]));CHKERRQ(ierr); 539e6985dafSRichard Tran Mills #else 540e6985dafSRichard Tran Mills ierr = PetscViewerASCIIPrintf(viewer,"%18.16e ",(double)PetscRealPart(a->T[i]));CHKERRQ(ierr); 541e6985dafSRichard Tran Mills #endif 542e6985dafSRichard Tran Mills } 543e6985dafSRichard Tran Mills ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 544e6985dafSRichard Tran Mills } 54549bd79ccSDebojyoti Ghosh 546e6985dafSRichard Tran Mills /* Now print details for the AIJ matrix, using the AIJ viewer. */ 547e6985dafSRichard Tran Mills ierr = PetscViewerASCIIPrintf(viewer,"Now viewing the associated AIJ matrix:\n");CHKERRQ(ierr); 548e6985dafSRichard Tran Mills if (ismpikaij) { 549e6985dafSRichard Tran Mills Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data; 550e6985dafSRichard Tran Mills ierr = MatView(b->A,viewer);CHKERRQ(ierr); 551e6985dafSRichard Tran Mills } else { 552e6985dafSRichard Tran Mills ierr = MatView(a->AIJ,viewer);CHKERRQ(ierr); 553e6985dafSRichard Tran Mills } 554e6985dafSRichard Tran Mills 555e6985dafSRichard Tran Mills } else { 556e6985dafSRichard Tran Mills /* For all other matrix viewer output formats, simply convert to an AIJ matrix and call MatView() on that. */ 557e6985dafSRichard Tran Mills if (ismpikaij) { 55849bd79ccSDebojyoti Ghosh ierr = MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 559e6985dafSRichard Tran Mills } else { 560e6985dafSRichard Tran Mills ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 561e6985dafSRichard Tran Mills } 56249bd79ccSDebojyoti Ghosh ierr = MatView(B,viewer);CHKERRQ(ierr); 56349bd79ccSDebojyoti Ghosh ierr = MatDestroy(&B);CHKERRQ(ierr); 564e6985dafSRichard Tran Mills } 56549bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 56649bd79ccSDebojyoti Ghosh } 56749bd79ccSDebojyoti Ghosh 56849bd79ccSDebojyoti Ghosh PetscErrorCode MatDestroy_MPIKAIJ(Mat A) 56949bd79ccSDebojyoti Ghosh { 57049bd79ccSDebojyoti Ghosh PetscErrorCode ierr; 57149bd79ccSDebojyoti Ghosh Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data; 57249bd79ccSDebojyoti Ghosh 57349bd79ccSDebojyoti Ghosh PetscFunctionBegin; 57449bd79ccSDebojyoti Ghosh ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr); 57549bd79ccSDebojyoti Ghosh ierr = MatDestroy(&b->OAIJ);CHKERRQ(ierr); 57649bd79ccSDebojyoti Ghosh ierr = MatDestroy(&b->A);CHKERRQ(ierr); 57749bd79ccSDebojyoti Ghosh ierr = VecScatterDestroy(&b->ctx);CHKERRQ(ierr); 57849bd79ccSDebojyoti Ghosh ierr = VecDestroy(&b->w);CHKERRQ(ierr); 579a84f8069SRichard Tran Mills ierr = PetscFree(b->S);CHKERRQ(ierr); 580a84f8069SRichard Tran Mills ierr = PetscFree(b->T);CHKERRQ(ierr); 581a84f8069SRichard Tran Mills ierr = PetscFree(b->ibdiag);CHKERRQ(ierr); 58249bd79ccSDebojyoti Ghosh ierr = PetscFree(A->data);CHKERRQ(ierr); 58349bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 58449bd79ccSDebojyoti Ghosh } 58549bd79ccSDebojyoti Ghosh 58649bd79ccSDebojyoti Ghosh /* --------------------------------------------------------------------------------------*/ 58749bd79ccSDebojyoti Ghosh 58849bd79ccSDebojyoti Ghosh /* zz = yy + Axx */ 589836168d5SRichard Tran Mills PetscErrorCode MatMultAdd_SeqKAIJ(Mat A,Vec xx,Vec yy,Vec zz) 59049bd79ccSDebojyoti Ghosh { 59149bd79ccSDebojyoti Ghosh Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 59249bd79ccSDebojyoti Ghosh Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 59349bd79ccSDebojyoti Ghosh const PetscScalar *s = b->S, *t = b->T; 59449bd79ccSDebojyoti Ghosh const PetscScalar *x,*v,*bx; 59549bd79ccSDebojyoti Ghosh PetscScalar *y,*sums; 59649bd79ccSDebojyoti Ghosh PetscErrorCode ierr; 59749bd79ccSDebojyoti Ghosh const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 59849bd79ccSDebojyoti Ghosh PetscInt n,i,jrow,j,l,p=b->p,q=b->q,k; 59949bd79ccSDebojyoti Ghosh 60049bd79ccSDebojyoti Ghosh PetscFunctionBegin; 60149bd79ccSDebojyoti Ghosh if (!yy) { 60249bd79ccSDebojyoti Ghosh ierr = VecSet(zz,0.0);CHKERRQ(ierr); 60349bd79ccSDebojyoti Ghosh } else { 60449bd79ccSDebojyoti Ghosh ierr = VecCopy(yy,zz);CHKERRQ(ierr); 60549bd79ccSDebojyoti Ghosh } 60649bd79ccSDebojyoti Ghosh if ((!s) && (!t) && (!b->isTI)) PetscFunctionReturn(0); 60749bd79ccSDebojyoti Ghosh 60849bd79ccSDebojyoti Ghosh ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 60949bd79ccSDebojyoti Ghosh ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 61049bd79ccSDebojyoti Ghosh idx = a->j; 61149bd79ccSDebojyoti Ghosh v = a->a; 61249bd79ccSDebojyoti Ghosh ii = a->i; 61349bd79ccSDebojyoti Ghosh 61449bd79ccSDebojyoti Ghosh if (b->isTI) { 61549bd79ccSDebojyoti Ghosh for (i=0; i<m; i++) { 61649bd79ccSDebojyoti Ghosh jrow = ii[i]; 61749bd79ccSDebojyoti Ghosh n = ii[i+1] - jrow; 61849bd79ccSDebojyoti Ghosh sums = y + p*i; 61949bd79ccSDebojyoti Ghosh for (j=0; j<n; j++) { 62049bd79ccSDebojyoti Ghosh for (k=0; k<p; k++) { 62149bd79ccSDebojyoti Ghosh sums[k] += v[jrow+j]*x[q*idx[jrow+j]+k]; 62249bd79ccSDebojyoti Ghosh } 62349bd79ccSDebojyoti Ghosh } 62449bd79ccSDebojyoti Ghosh } 625234d9204SRichard Tran Mills ierr = PetscLogFlops((a->nz)*3*p);CHKERRQ(ierr); 62649bd79ccSDebojyoti Ghosh } else if (t) { 62749bd79ccSDebojyoti Ghosh for (i=0; i<m; i++) { 62849bd79ccSDebojyoti Ghosh jrow = ii[i]; 62949bd79ccSDebojyoti Ghosh n = ii[i+1] - jrow; 63049bd79ccSDebojyoti Ghosh sums = y + p*i; 63149bd79ccSDebojyoti Ghosh for (j=0; j<n; j++) { 63249bd79ccSDebojyoti Ghosh for (k=0; k<p; k++) { 63349bd79ccSDebojyoti Ghosh for (l=0; l<q; l++) { 63449bd79ccSDebojyoti Ghosh sums[k] += v[jrow+j]*t[k+l*p]*x[q*idx[jrow+j]+l]; 63549bd79ccSDebojyoti Ghosh } 63649bd79ccSDebojyoti Ghosh } 63749bd79ccSDebojyoti Ghosh } 63849bd79ccSDebojyoti Ghosh } 639234d9204SRichard Tran Mills /* The flop count below assumes that v[jrow+j] is hoisted out (which an optimizing compiler is likely to do), 640234d9204SRichard Tran Mills * and also that T part is hoisted outside this loop (in exchange for temporary storage) as (A \otimes I) (I \otimes T), 641234d9204SRichard Tran Mills * so that this multiply doesn't have to be redone for each matrix entry, but just once per column. The latter 642234d9204SRichard Tran Mills * transformation is much less likely to be applied, but we nonetheless count the minimum flops required. */ 643234d9204SRichard Tran Mills ierr = PetscLogFlops((2.0*p*q-p)*m+2*p*a->nz);CHKERRQ(ierr); 64449bd79ccSDebojyoti Ghosh } 64549bd79ccSDebojyoti Ghosh if (s) { 64649bd79ccSDebojyoti Ghosh for (i=0; i<m; i++) { 64749bd79ccSDebojyoti Ghosh sums = y + p*i; 64849bd79ccSDebojyoti Ghosh bx = x + q*i; 64949bd79ccSDebojyoti Ghosh if (i < b->AIJ->cmap->n) { 65049bd79ccSDebojyoti Ghosh for (j=0; j<q; j++) { 65149bd79ccSDebojyoti Ghosh for (k=0; k<p; k++) { 65249bd79ccSDebojyoti Ghosh sums[k] += s[k+j*p]*bx[j]; 65349bd79ccSDebojyoti Ghosh } 65449bd79ccSDebojyoti Ghosh } 65549bd79ccSDebojyoti Ghosh } 65649bd79ccSDebojyoti Ghosh } 657234d9204SRichard Tran Mills ierr = PetscLogFlops(m*2*p*q);CHKERRQ(ierr); 65849bd79ccSDebojyoti Ghosh } 65949bd79ccSDebojyoti Ghosh 66049bd79ccSDebojyoti Ghosh ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 66149bd79ccSDebojyoti Ghosh ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 66249bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 66349bd79ccSDebojyoti Ghosh } 66449bd79ccSDebojyoti Ghosh 665bb6fb833SRichard Tran Mills PetscErrorCode MatMult_SeqKAIJ(Mat A,Vec xx,Vec yy) 66649bd79ccSDebojyoti Ghosh { 66749bd79ccSDebojyoti Ghosh PetscErrorCode ierr; 66849bd79ccSDebojyoti Ghosh PetscFunctionBegin; 669836168d5SRichard Tran Mills ierr = MatMultAdd_SeqKAIJ(A,xx,PETSC_NULL,yy);CHKERRQ(ierr); 67049bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 67149bd79ccSDebojyoti Ghosh } 67249bd79ccSDebojyoti Ghosh 67349bd79ccSDebojyoti Ghosh #include <petsc/private/kernels/blockinvert.h> 67449bd79ccSDebojyoti Ghosh 675bb6fb833SRichard Tran Mills PetscErrorCode MatInvertBlockDiagonal_SeqKAIJ(Mat A,const PetscScalar **values) 67649bd79ccSDebojyoti Ghosh { 67749bd79ccSDebojyoti Ghosh Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 67849bd79ccSDebojyoti Ghosh Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 67949bd79ccSDebojyoti Ghosh const PetscScalar *S = b->S; 68049bd79ccSDebojyoti Ghosh const PetscScalar *T = b->T; 68149bd79ccSDebojyoti Ghosh const PetscScalar *v = a->a; 68249bd79ccSDebojyoti Ghosh const PetscInt p = b->p, q = b->q, m = b->AIJ->rmap->n, *idx = a->j, *ii = a->i; 68349bd79ccSDebojyoti Ghosh PetscErrorCode ierr; 68449bd79ccSDebojyoti Ghosh PetscInt i,j,*v_pivots,dof,dof2; 68549bd79ccSDebojyoti Ghosh PetscScalar *diag,aval,*v_work; 68649bd79ccSDebojyoti Ghosh 68749bd79ccSDebojyoti Ghosh PetscFunctionBegin; 68849bd79ccSDebojyoti Ghosh if (p != q) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Block size must be square to calculate inverse."); 68931a97b9aSRichard Tran Mills if ((!S) && (!T) && (!b->isTI)) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Cannot invert a zero matrix."); 69049bd79ccSDebojyoti Ghosh 69149bd79ccSDebojyoti Ghosh dof = p; 69249bd79ccSDebojyoti Ghosh dof2 = dof*dof; 69349bd79ccSDebojyoti Ghosh 69449bd79ccSDebojyoti Ghosh if (b->ibdiagvalid) { 69549bd79ccSDebojyoti Ghosh if (values) *values = b->ibdiag; 69649bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 69749bd79ccSDebojyoti Ghosh } 69849bd79ccSDebojyoti Ghosh if (!b->ibdiag) { 699a84f8069SRichard Tran Mills ierr = PetscMalloc1(dof2*m*sizeof(PetscScalar),&b->ibdiag);CHKERRQ(ierr); 70049bd79ccSDebojyoti Ghosh ierr = PetscLogObjectMemory((PetscObject)A,dof2*m*sizeof(PetscScalar));CHKERRQ(ierr); 70149bd79ccSDebojyoti Ghosh } 70249bd79ccSDebojyoti Ghosh if (values) *values = b->ibdiag; 70349bd79ccSDebojyoti Ghosh diag = b->ibdiag; 70449bd79ccSDebojyoti Ghosh 70549bd79ccSDebojyoti Ghosh ierr = PetscMalloc2(dof,&v_work,dof,&v_pivots);CHKERRQ(ierr); 70649bd79ccSDebojyoti Ghosh for (i=0; i<m; i++) { 70749bd79ccSDebojyoti Ghosh if (S) { 70849bd79ccSDebojyoti Ghosh ierr = PetscMemcpy(diag,S,dof2*sizeof(PetscScalar));CHKERRQ(ierr); 70949bd79ccSDebojyoti Ghosh } else { 71049bd79ccSDebojyoti Ghosh ierr = PetscMemzero(diag,dof2*sizeof(PetscScalar));CHKERRQ(ierr); 71149bd79ccSDebojyoti Ghosh } 71249bd79ccSDebojyoti Ghosh if (b->isTI) { 71349bd79ccSDebojyoti Ghosh aval = 0; 71449bd79ccSDebojyoti Ghosh for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j]; 71549bd79ccSDebojyoti Ghosh for (j=0; j<dof; j++) diag[j+dof*j] += aval; 71649bd79ccSDebojyoti Ghosh } else if (T) { 71749bd79ccSDebojyoti Ghosh aval = 0; 71849bd79ccSDebojyoti Ghosh for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j]; 71949bd79ccSDebojyoti Ghosh for (j=0; j<dof2; j++) diag[j] += aval*T[j]; 72049bd79ccSDebojyoti Ghosh } 72149bd79ccSDebojyoti Ghosh ierr = PetscKernel_A_gets_inverse_A(dof,diag,v_pivots,v_work,PETSC_FALSE,NULL);CHKERRQ(ierr); 72249bd79ccSDebojyoti Ghosh diag += dof2; 72349bd79ccSDebojyoti Ghosh } 72449bd79ccSDebojyoti Ghosh ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr); 72549bd79ccSDebojyoti Ghosh 72649bd79ccSDebojyoti Ghosh b->ibdiagvalid = PETSC_TRUE; 72749bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 72849bd79ccSDebojyoti Ghosh } 72949bd79ccSDebojyoti Ghosh 73049bd79ccSDebojyoti Ghosh static PetscErrorCode MatGetDiagonalBlock_MPIKAIJ(Mat A,Mat *B) 73149bd79ccSDebojyoti Ghosh { 73249bd79ccSDebojyoti Ghosh Mat_MPIKAIJ *kaij = (Mat_MPIKAIJ*) A->data; 73349bd79ccSDebojyoti Ghosh 73449bd79ccSDebojyoti Ghosh PetscFunctionBegin; 73549bd79ccSDebojyoti Ghosh *B = kaij->AIJ; 73649bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 73749bd79ccSDebojyoti Ghosh } 73849bd79ccSDebojyoti Ghosh 73949bd79ccSDebojyoti Ghosh PetscErrorCode MatSOR_SeqKAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 74049bd79ccSDebojyoti Ghosh { 74149bd79ccSDebojyoti Ghosh PetscErrorCode ierr; 74249bd79ccSDebojyoti Ghosh Mat_SeqKAIJ *kaij = (Mat_SeqKAIJ*) A->data; 74349bd79ccSDebojyoti Ghosh Mat_SeqAIJ *a = (Mat_SeqAIJ*)kaij->AIJ->data; 74449bd79ccSDebojyoti Ghosh const PetscScalar *aa = a->a, *T = kaij->T, *v; 74549bd79ccSDebojyoti Ghosh const PetscInt m = kaij->AIJ->rmap->n, *ai=a->i, *aj=a->j, p = kaij->p, q = kaij->q, *diag, *vi; 74649bd79ccSDebojyoti Ghosh const PetscScalar *b, *xb, *idiag; 74749bd79ccSDebojyoti Ghosh PetscScalar *x, *work, *workt, *w, *y, *arr, *t, *arrt; 74849bd79ccSDebojyoti Ghosh PetscInt i, j, k, i2, bs, bs2, nz; 74949bd79ccSDebojyoti Ghosh 75049bd79ccSDebojyoti Ghosh PetscFunctionBegin; 75149bd79ccSDebojyoti Ghosh its = its*lits; 75249bd79ccSDebojyoti Ghosh if (flag & SOR_EISENSTAT) SETERRQ (PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 75349bd79ccSDebojyoti Ghosh if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 7546a375485SRichard Tran Mills if (fshift) SETERRQ (PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for diagonal shift"); 7556a375485SRichard Tran Mills if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for applying upper or lower triangular parts"); 7566a375485SRichard Tran Mills if (p != q) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSOR for KAIJ: No support for non-square dense blocks"); 75749bd79ccSDebojyoti Ghosh else {bs = p; bs2 = bs*bs; } 75849bd79ccSDebojyoti Ghosh 75949bd79ccSDebojyoti Ghosh if (!m) PetscFunctionReturn(0); 76049bd79ccSDebojyoti Ghosh 761bb6fb833SRichard Tran Mills if (!kaij->ibdiagvalid) { ierr = MatInvertBlockDiagonal_SeqKAIJ(A,NULL);CHKERRQ(ierr); } 76249bd79ccSDebojyoti Ghosh idiag = kaij->ibdiag; 76349bd79ccSDebojyoti Ghosh diag = a->diag; 76449bd79ccSDebojyoti Ghosh 76549bd79ccSDebojyoti Ghosh if (!kaij->sor.setup) { 76649bd79ccSDebojyoti Ghosh ierr = PetscMalloc5(bs,&kaij->sor.w,bs,&kaij->sor.y,m*bs,&kaij->sor.work,m*bs,&kaij->sor.t,m*bs2,&kaij->sor.arr);CHKERRQ(ierr); 76749bd79ccSDebojyoti Ghosh kaij->sor.setup = PETSC_TRUE; 76849bd79ccSDebojyoti Ghosh } 76949bd79ccSDebojyoti Ghosh y = kaij->sor.y; 77049bd79ccSDebojyoti Ghosh w = kaij->sor.w; 77149bd79ccSDebojyoti Ghosh work = kaij->sor.work; 77249bd79ccSDebojyoti Ghosh t = kaij->sor.t; 77349bd79ccSDebojyoti Ghosh arr = kaij->sor.arr; 77449bd79ccSDebojyoti Ghosh 77549bd79ccSDebojyoti Ghosh ierr = VecGetArray(xx,&x); CHKERRQ(ierr); 77649bd79ccSDebojyoti Ghosh ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 77749bd79ccSDebojyoti Ghosh 77849bd79ccSDebojyoti Ghosh if (flag & SOR_ZERO_INITIAL_GUESS) { 77949bd79ccSDebojyoti Ghosh if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 78049bd79ccSDebojyoti Ghosh PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x); /* x[0:bs] <- D^{-1} b[0:bs] */ 78149bd79ccSDebojyoti Ghosh ierr = PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr); 78249bd79ccSDebojyoti Ghosh i2 = bs; 78349bd79ccSDebojyoti Ghosh idiag += bs2; 78449bd79ccSDebojyoti Ghosh for (i=1; i<m; i++) { 78549bd79ccSDebojyoti Ghosh v = aa + ai[i]; 78649bd79ccSDebojyoti Ghosh vi = aj + ai[i]; 78749bd79ccSDebojyoti Ghosh nz = diag[i] - ai[i]; 78849bd79ccSDebojyoti Ghosh 78949bd79ccSDebojyoti Ghosh if (T) { /* b - T (Arow * x) */ 7902ae760e3SRichard Tran Mills ierr = PetscMemzero(w,bs*sizeof(PetscScalar));CHKERRQ(ierr); 79149bd79ccSDebojyoti Ghosh for (j=0; j<nz; j++) { 79249bd79ccSDebojyoti Ghosh for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k]; 79349bd79ccSDebojyoti Ghosh } 79449bd79ccSDebojyoti Ghosh PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs,w,T,&t[i2]); 79549bd79ccSDebojyoti Ghosh for (k=0; k<bs; k++) t[i2+k] += b[i2+k]; 79649bd79ccSDebojyoti Ghosh } else if (kaij->isTI) { 7979eb573c1SRichard Tran Mills ierr = PetscMemcpy(t+i2,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 79849bd79ccSDebojyoti Ghosh for (j=0; j<nz; j++) { 79949bd79ccSDebojyoti Ghosh for (k=0; k<bs; k++) t[i2+k] -= v[j] * x[vi[j]*bs+k]; 80049bd79ccSDebojyoti Ghosh } 80149bd79ccSDebojyoti Ghosh } else { 8029eb573c1SRichard Tran Mills ierr = PetscMemcpy(t+i2,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 80349bd79ccSDebojyoti Ghosh } 80449bd79ccSDebojyoti Ghosh 80549bd79ccSDebojyoti Ghosh PetscKernel_w_gets_Ar_times_v(bs,bs,t+i2,idiag,y); 80649bd79ccSDebojyoti Ghosh for (j=0; j<bs; j++) x[i2+j] = omega * y[j]; 80749bd79ccSDebojyoti Ghosh 80849bd79ccSDebojyoti Ghosh idiag += bs2; 80949bd79ccSDebojyoti Ghosh i2 += bs; 81049bd79ccSDebojyoti Ghosh } 81149bd79ccSDebojyoti Ghosh /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 81249bd79ccSDebojyoti Ghosh ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr); 81349bd79ccSDebojyoti Ghosh xb = t; 81449bd79ccSDebojyoti Ghosh } else xb = b; 81549bd79ccSDebojyoti Ghosh if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 81649bd79ccSDebojyoti Ghosh idiag = kaij->ibdiag+bs2*(m-1); 81749bd79ccSDebojyoti Ghosh i2 = bs * (m-1); 81849bd79ccSDebojyoti Ghosh ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 81949bd79ccSDebojyoti Ghosh PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 82049bd79ccSDebojyoti Ghosh i2 -= bs; 82149bd79ccSDebojyoti Ghosh idiag -= bs2; 82249bd79ccSDebojyoti Ghosh for (i=m-2; i>=0; i--) { 82349bd79ccSDebojyoti Ghosh v = aa + diag[i] + 1 ; 82449bd79ccSDebojyoti Ghosh vi = aj + diag[i] + 1; 82549bd79ccSDebojyoti Ghosh nz = ai[i+1] - diag[i] - 1; 82649bd79ccSDebojyoti Ghosh 82749bd79ccSDebojyoti Ghosh if (T) { /* FIXME: This branch untested */ 82849bd79ccSDebojyoti Ghosh ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 82949bd79ccSDebojyoti Ghosh /* copy all rows of x that are needed into contiguous space */ 83049bd79ccSDebojyoti Ghosh workt = work; 83149bd79ccSDebojyoti Ghosh for (j=0; j<nz; j++) { 83249bd79ccSDebojyoti Ghosh ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 83349bd79ccSDebojyoti Ghosh workt += bs; 83449bd79ccSDebojyoti Ghosh } 83549bd79ccSDebojyoti Ghosh arrt = arr; 83649bd79ccSDebojyoti Ghosh for (j=0; j<nz; j++) { 83749bd79ccSDebojyoti Ghosh ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 83849bd79ccSDebojyoti Ghosh for (k=0; k<bs2; k++) arrt[k] *= v[j]; 83949bd79ccSDebojyoti Ghosh arrt += bs2; 84049bd79ccSDebojyoti Ghosh } 84149bd79ccSDebojyoti Ghosh PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 84249bd79ccSDebojyoti Ghosh } else if (kaij->isTI) { 8439eb573c1SRichard Tran Mills ierr = PetscMemcpy(w,t+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 84449bd79ccSDebojyoti Ghosh for (j=0; j<nz; j++) { 84549bd79ccSDebojyoti Ghosh for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k]; 84649bd79ccSDebojyoti Ghosh } 84749bd79ccSDebojyoti Ghosh } 84849bd79ccSDebojyoti Ghosh 84949bd79ccSDebojyoti Ghosh PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); /* RHS incorrect for omega != 1.0 */ 85049bd79ccSDebojyoti Ghosh for (j=0; j<bs; j++) x[i2+j] = (1.0-omega) * x[i2+j] + omega * y[j]; 85149bd79ccSDebojyoti Ghosh 85249bd79ccSDebojyoti Ghosh idiag -= bs2; 85349bd79ccSDebojyoti Ghosh i2 -= bs; 85449bd79ccSDebojyoti Ghosh } 85549bd79ccSDebojyoti Ghosh ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 85649bd79ccSDebojyoti Ghosh } 85749bd79ccSDebojyoti Ghosh its--; 85849bd79ccSDebojyoti Ghosh } 85949bd79ccSDebojyoti Ghosh while (its--) { /* FIXME: This branch not updated */ 86049bd79ccSDebojyoti Ghosh if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 86149bd79ccSDebojyoti Ghosh i2 = 0; 86249bd79ccSDebojyoti Ghosh idiag = kaij->ibdiag; 86349bd79ccSDebojyoti Ghosh for (i=0; i<m; i++) { 86449bd79ccSDebojyoti Ghosh ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 86549bd79ccSDebojyoti Ghosh 86649bd79ccSDebojyoti Ghosh v = aa + ai[i]; 86749bd79ccSDebojyoti Ghosh vi = aj + ai[i]; 86849bd79ccSDebojyoti Ghosh nz = diag[i] - ai[i]; 86949bd79ccSDebojyoti Ghosh workt = work; 87049bd79ccSDebojyoti Ghosh for (j=0; j<nz; j++) { 87149bd79ccSDebojyoti Ghosh ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 87249bd79ccSDebojyoti Ghosh workt += bs; 87349bd79ccSDebojyoti Ghosh } 87449bd79ccSDebojyoti Ghosh arrt = arr; 87549bd79ccSDebojyoti Ghosh if (T) { 87649bd79ccSDebojyoti Ghosh for (j=0; j<nz; j++) { 87749bd79ccSDebojyoti Ghosh ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 87849bd79ccSDebojyoti Ghosh for (k=0; k<bs2; k++) arrt[k] *= v[j]; 87949bd79ccSDebojyoti Ghosh arrt += bs2; 88049bd79ccSDebojyoti Ghosh } 88149bd79ccSDebojyoti Ghosh PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 88249bd79ccSDebojyoti Ghosh } else if (kaij->isTI) { 88349bd79ccSDebojyoti Ghosh for (j=0; j<nz; j++) { 88449bd79ccSDebojyoti Ghosh ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 88549bd79ccSDebojyoti Ghosh for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 88649bd79ccSDebojyoti Ghosh arrt += bs2; 88749bd79ccSDebojyoti Ghosh } 88849bd79ccSDebojyoti Ghosh PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 88949bd79ccSDebojyoti Ghosh } 89049bd79ccSDebojyoti Ghosh ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr); 89149bd79ccSDebojyoti Ghosh 89249bd79ccSDebojyoti Ghosh v = aa + diag[i] + 1; 89349bd79ccSDebojyoti Ghosh vi = aj + diag[i] + 1; 89449bd79ccSDebojyoti Ghosh nz = ai[i+1] - diag[i] - 1; 89549bd79ccSDebojyoti Ghosh workt = work; 89649bd79ccSDebojyoti Ghosh for (j=0; j<nz; j++) { 89749bd79ccSDebojyoti Ghosh ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 89849bd79ccSDebojyoti Ghosh workt += bs; 89949bd79ccSDebojyoti Ghosh } 90049bd79ccSDebojyoti Ghosh arrt = arr; 90149bd79ccSDebojyoti Ghosh if (T) { 90249bd79ccSDebojyoti Ghosh for (j=0; j<nz; j++) { 90349bd79ccSDebojyoti Ghosh ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 90449bd79ccSDebojyoti Ghosh for (k=0; k<bs2; k++) arrt[k] *= v[j]; 90549bd79ccSDebojyoti Ghosh arrt += bs2; 90649bd79ccSDebojyoti Ghosh } 90749bd79ccSDebojyoti Ghosh PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 90849bd79ccSDebojyoti Ghosh } else if (kaij->isTI) { 90949bd79ccSDebojyoti Ghosh for (j=0; j<nz; j++) { 91049bd79ccSDebojyoti Ghosh ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 91149bd79ccSDebojyoti Ghosh for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 91249bd79ccSDebojyoti Ghosh arrt += bs2; 91349bd79ccSDebojyoti Ghosh } 91449bd79ccSDebojyoti Ghosh PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 91549bd79ccSDebojyoti Ghosh } 91649bd79ccSDebojyoti Ghosh 91749bd79ccSDebojyoti Ghosh PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); 91849bd79ccSDebojyoti Ghosh for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j); 91949bd79ccSDebojyoti Ghosh 92049bd79ccSDebojyoti Ghosh idiag += bs2; 92149bd79ccSDebojyoti Ghosh i2 += bs; 92249bd79ccSDebojyoti Ghosh } 92349bd79ccSDebojyoti Ghosh xb = t; 92449bd79ccSDebojyoti Ghosh } 92549bd79ccSDebojyoti Ghosh else xb = b; 92649bd79ccSDebojyoti Ghosh if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 92749bd79ccSDebojyoti Ghosh idiag = kaij->ibdiag+bs2*(m-1); 92849bd79ccSDebojyoti Ghosh i2 = bs * (m-1); 92949bd79ccSDebojyoti Ghosh if (xb == b) { 93049bd79ccSDebojyoti Ghosh for (i=m-1; i>=0; i--) { 93149bd79ccSDebojyoti Ghosh ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 93249bd79ccSDebojyoti Ghosh 93349bd79ccSDebojyoti Ghosh v = aa + ai[i]; 93449bd79ccSDebojyoti Ghosh vi = aj + ai[i]; 93549bd79ccSDebojyoti Ghosh nz = diag[i] - ai[i]; 93649bd79ccSDebojyoti Ghosh workt = work; 93749bd79ccSDebojyoti Ghosh for (j=0; j<nz; j++) { 93849bd79ccSDebojyoti Ghosh ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 93949bd79ccSDebojyoti Ghosh workt += bs; 94049bd79ccSDebojyoti Ghosh } 94149bd79ccSDebojyoti Ghosh arrt = arr; 94249bd79ccSDebojyoti Ghosh if (T) { 94349bd79ccSDebojyoti Ghosh for (j=0; j<nz; j++) { 94449bd79ccSDebojyoti Ghosh ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 94549bd79ccSDebojyoti Ghosh for (k=0; k<bs2; k++) arrt[k] *= v[j]; 94649bd79ccSDebojyoti Ghosh arrt += bs2; 94749bd79ccSDebojyoti Ghosh } 94849bd79ccSDebojyoti Ghosh PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 94949bd79ccSDebojyoti Ghosh } else if (kaij->isTI) { 95049bd79ccSDebojyoti Ghosh for (j=0; j<nz; j++) { 95149bd79ccSDebojyoti Ghosh ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 95249bd79ccSDebojyoti Ghosh for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 95349bd79ccSDebojyoti Ghosh arrt += bs2; 95449bd79ccSDebojyoti Ghosh } 95549bd79ccSDebojyoti Ghosh PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 95649bd79ccSDebojyoti Ghosh } 95749bd79ccSDebojyoti Ghosh 95849bd79ccSDebojyoti Ghosh v = aa + diag[i] + 1; 95949bd79ccSDebojyoti Ghosh vi = aj + diag[i] + 1; 96049bd79ccSDebojyoti Ghosh nz = ai[i+1] - diag[i] - 1; 96149bd79ccSDebojyoti Ghosh workt = work; 96249bd79ccSDebojyoti Ghosh for (j=0; j<nz; j++) { 96349bd79ccSDebojyoti Ghosh ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 96449bd79ccSDebojyoti Ghosh workt += bs; 96549bd79ccSDebojyoti Ghosh } 96649bd79ccSDebojyoti Ghosh arrt = arr; 96749bd79ccSDebojyoti Ghosh if (T) { 96849bd79ccSDebojyoti Ghosh for (j=0; j<nz; j++) { 96949bd79ccSDebojyoti Ghosh ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 97049bd79ccSDebojyoti Ghosh for (k=0; k<bs2; k++) arrt[k] *= v[j]; 97149bd79ccSDebojyoti Ghosh arrt += bs2; 97249bd79ccSDebojyoti Ghosh } 97349bd79ccSDebojyoti Ghosh PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 97449bd79ccSDebojyoti Ghosh } else if (kaij->isTI) { 97549bd79ccSDebojyoti Ghosh for (j=0; j<nz; j++) { 97649bd79ccSDebojyoti Ghosh ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 97749bd79ccSDebojyoti Ghosh for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 97849bd79ccSDebojyoti Ghosh arrt += bs2; 97949bd79ccSDebojyoti Ghosh } 98049bd79ccSDebojyoti Ghosh PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 98149bd79ccSDebojyoti Ghosh } 98249bd79ccSDebojyoti Ghosh 98349bd79ccSDebojyoti Ghosh PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); 98449bd79ccSDebojyoti Ghosh for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j); 98549bd79ccSDebojyoti Ghosh } 98649bd79ccSDebojyoti Ghosh } else { 98749bd79ccSDebojyoti Ghosh for (i=m-1; i>=0; i--) { 98849bd79ccSDebojyoti Ghosh ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 98949bd79ccSDebojyoti Ghosh v = aa + diag[i] + 1; 99049bd79ccSDebojyoti Ghosh vi = aj + diag[i] + 1; 99149bd79ccSDebojyoti Ghosh nz = ai[i+1] - diag[i] - 1; 99249bd79ccSDebojyoti Ghosh workt = work; 99349bd79ccSDebojyoti Ghosh for (j=0; j<nz; j++) { 99449bd79ccSDebojyoti Ghosh ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 99549bd79ccSDebojyoti Ghosh workt += bs; 99649bd79ccSDebojyoti Ghosh } 99749bd79ccSDebojyoti Ghosh arrt = arr; 99849bd79ccSDebojyoti Ghosh if (T) { 99949bd79ccSDebojyoti Ghosh for (j=0; j<nz; j++) { 100049bd79ccSDebojyoti Ghosh ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 100149bd79ccSDebojyoti Ghosh for (k=0; k<bs2; k++) arrt[k] *= v[j]; 100249bd79ccSDebojyoti Ghosh arrt += bs2; 100349bd79ccSDebojyoti Ghosh } 100449bd79ccSDebojyoti Ghosh PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 100549bd79ccSDebojyoti Ghosh } else if (kaij->isTI) { 100649bd79ccSDebojyoti Ghosh for (j=0; j<nz; j++) { 100749bd79ccSDebojyoti Ghosh ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 100849bd79ccSDebojyoti Ghosh for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 100949bd79ccSDebojyoti Ghosh arrt += bs2; 101049bd79ccSDebojyoti Ghosh } 101149bd79ccSDebojyoti Ghosh PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 101249bd79ccSDebojyoti Ghosh } 101349bd79ccSDebojyoti Ghosh PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); 101449bd79ccSDebojyoti Ghosh for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j); 101549bd79ccSDebojyoti Ghosh } 101649bd79ccSDebojyoti Ghosh } 101749bd79ccSDebojyoti Ghosh ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 101849bd79ccSDebojyoti Ghosh } 101949bd79ccSDebojyoti Ghosh } 102049bd79ccSDebojyoti Ghosh 102149bd79ccSDebojyoti Ghosh ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr); 102249bd79ccSDebojyoti Ghosh ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 102349bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 102449bd79ccSDebojyoti Ghosh } 102549bd79ccSDebojyoti Ghosh 102649bd79ccSDebojyoti Ghosh /*===================================================================================*/ 102749bd79ccSDebojyoti Ghosh 1028836168d5SRichard Tran Mills PetscErrorCode MatMultAdd_MPIKAIJ(Mat A,Vec xx,Vec yy,Vec zz) 102949bd79ccSDebojyoti Ghosh { 103049bd79ccSDebojyoti Ghosh Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data; 103149bd79ccSDebojyoti Ghosh PetscErrorCode ierr; 103249bd79ccSDebojyoti Ghosh 103349bd79ccSDebojyoti Ghosh PetscFunctionBegin; 103449bd79ccSDebojyoti Ghosh if (!yy) { 103549bd79ccSDebojyoti Ghosh ierr = VecSet(zz,0.0);CHKERRQ(ierr); 103649bd79ccSDebojyoti Ghosh } else { 103749bd79ccSDebojyoti Ghosh ierr = VecCopy(yy,zz);CHKERRQ(ierr); 103849bd79ccSDebojyoti Ghosh } 103949bd79ccSDebojyoti Ghosh /* start the scatter */ 104049bd79ccSDebojyoti Ghosh ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 104149bd79ccSDebojyoti Ghosh ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,zz,zz);CHKERRQ(ierr); 104249bd79ccSDebojyoti Ghosh ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 104349bd79ccSDebojyoti Ghosh ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr); 104449bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 104549bd79ccSDebojyoti Ghosh } 104649bd79ccSDebojyoti Ghosh 1047bb6fb833SRichard Tran Mills PetscErrorCode MatMult_MPIKAIJ(Mat A,Vec xx,Vec yy) 104849bd79ccSDebojyoti Ghosh { 104949bd79ccSDebojyoti Ghosh PetscErrorCode ierr; 105049bd79ccSDebojyoti Ghosh PetscFunctionBegin; 1051836168d5SRichard Tran Mills ierr = MatMultAdd_MPIKAIJ(A,xx,PETSC_NULL,yy);CHKERRQ(ierr); 105249bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 105349bd79ccSDebojyoti Ghosh } 105449bd79ccSDebojyoti Ghosh 1055bb6fb833SRichard Tran Mills PetscErrorCode MatInvertBlockDiagonal_MPIKAIJ(Mat A,const PetscScalar **values) 105649bd79ccSDebojyoti Ghosh { 105749bd79ccSDebojyoti Ghosh Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data; 105849bd79ccSDebojyoti Ghosh PetscErrorCode ierr; 105949bd79ccSDebojyoti Ghosh 106049bd79ccSDebojyoti Ghosh PetscFunctionBegin; 106149bd79ccSDebojyoti Ghosh ierr = (*b->AIJ->ops->invertblockdiagonal)(b->AIJ,values);CHKERRQ(ierr); 106249bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 106349bd79ccSDebojyoti Ghosh } 106449bd79ccSDebojyoti Ghosh 106549bd79ccSDebojyoti Ghosh /* ----------------------------------------------------------------*/ 106649bd79ccSDebojyoti Ghosh 106749bd79ccSDebojyoti Ghosh PetscErrorCode MatGetRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values) 106849bd79ccSDebojyoti Ghosh { 106949bd79ccSDebojyoti Ghosh Mat_SeqKAIJ *b = (Mat_SeqKAIJ*) A->data; 10701ca5ffdbSRichard Tran Mills PetscErrorCode diag = PETSC_FALSE; 10711ca5ffdbSRichard Tran Mills PetscErrorCode ierr; 107249bd79ccSDebojyoti Ghosh PetscInt nzaij,nz,*colsaij,*idx,i,j,p=b->p,q=b->q,r=row/p,s=row%p,c; 107349bd79ccSDebojyoti Ghosh PetscScalar *vaij,*v,*S=b->S,*T=b->T; 107449bd79ccSDebojyoti Ghosh 107549bd79ccSDebojyoti Ghosh PetscFunctionBegin; 107649bd79ccSDebojyoti Ghosh if (b->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 107749bd79ccSDebojyoti Ghosh b->getrowactive = PETSC_TRUE; 107849bd79ccSDebojyoti Ghosh if (row < 0 || row >= A->rmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range",row); 107949bd79ccSDebojyoti Ghosh 108049bd79ccSDebojyoti Ghosh if ((!S) && (!T) && (!b->isTI)) { 108149bd79ccSDebojyoti Ghosh if (ncols) *ncols = 0; 108249bd79ccSDebojyoti Ghosh if (cols) *cols = NULL; 108349bd79ccSDebojyoti Ghosh if (values) *values = NULL; 108449bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 108549bd79ccSDebojyoti Ghosh } 108649bd79ccSDebojyoti Ghosh 108749bd79ccSDebojyoti Ghosh if (T || b->isTI) { 108849bd79ccSDebojyoti Ghosh ierr = MatGetRow_SeqAIJ(b->AIJ,r,&nzaij,&colsaij,&vaij);CHKERRQ(ierr); 108949bd79ccSDebojyoti Ghosh c = nzaij; 109049bd79ccSDebojyoti Ghosh for (i=0; i<nzaij; i++) { 109149bd79ccSDebojyoti Ghosh /* check if this row contains a diagonal entry */ 109249bd79ccSDebojyoti Ghosh if (colsaij[i] == r) { 109349bd79ccSDebojyoti Ghosh diag = PETSC_TRUE; 109449bd79ccSDebojyoti Ghosh c = i; 109549bd79ccSDebojyoti Ghosh } 109649bd79ccSDebojyoti Ghosh } 109749bd79ccSDebojyoti Ghosh } else nzaij = c = 0; 109849bd79ccSDebojyoti Ghosh 109949bd79ccSDebojyoti Ghosh /* calculate size of row */ 110049bd79ccSDebojyoti Ghosh nz = 0; 110149bd79ccSDebojyoti Ghosh if (S) nz += q; 110249bd79ccSDebojyoti Ghosh if (T || b->isTI) nz += (diag && S ? (nzaij-1)*q : nzaij*q); 110349bd79ccSDebojyoti Ghosh 110449bd79ccSDebojyoti Ghosh if (cols || values) { 110549bd79ccSDebojyoti Ghosh ierr = PetscMalloc2(nz,&idx,nz,&v);CHKERRQ(ierr); 110638822f9dSRichard Tran Mills for (i=0; i<q; i++) { 110738822f9dSRichard Tran Mills /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */ 110838822f9dSRichard Tran Mills v[i] = 0.0; 110938822f9dSRichard Tran Mills } 111049bd79ccSDebojyoti Ghosh if (b->isTI) { 111149bd79ccSDebojyoti Ghosh for (i=0; i<nzaij; i++) { 111249bd79ccSDebojyoti Ghosh for (j=0; j<q; j++) { 111349bd79ccSDebojyoti Ghosh idx[i*q+j] = colsaij[i]*q+j; 111449bd79ccSDebojyoti Ghosh v[i*q+j] = (j==s ? vaij[i] : 0); 111549bd79ccSDebojyoti Ghosh } 111649bd79ccSDebojyoti Ghosh } 111749bd79ccSDebojyoti Ghosh } else if (T) { 111849bd79ccSDebojyoti Ghosh for (i=0; i<nzaij; i++) { 111949bd79ccSDebojyoti Ghosh for (j=0; j<q; j++) { 112049bd79ccSDebojyoti Ghosh idx[i*q+j] = colsaij[i]*q+j; 112149bd79ccSDebojyoti Ghosh v[i*q+j] = vaij[i]*T[s+j*p]; 112249bd79ccSDebojyoti Ghosh } 112349bd79ccSDebojyoti Ghosh } 112449bd79ccSDebojyoti Ghosh } 112549bd79ccSDebojyoti Ghosh if (S) { 112649bd79ccSDebojyoti Ghosh for (j=0; j<q; j++) { 112749bd79ccSDebojyoti Ghosh idx[c*q+j] = r*q+j; 112849bd79ccSDebojyoti Ghosh v[c*q+j] += S[s+j*p]; 112949bd79ccSDebojyoti Ghosh } 113049bd79ccSDebojyoti Ghosh } 113149bd79ccSDebojyoti Ghosh } 113249bd79ccSDebojyoti Ghosh 113349bd79ccSDebojyoti Ghosh if (ncols) *ncols = nz; 113449bd79ccSDebojyoti Ghosh if (cols) *cols = idx; 113549bd79ccSDebojyoti Ghosh if (values) *values = v; 113649bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 113749bd79ccSDebojyoti Ghosh } 113849bd79ccSDebojyoti Ghosh 113949bd79ccSDebojyoti Ghosh PetscErrorCode MatRestoreRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 114049bd79ccSDebojyoti Ghosh { 114149bd79ccSDebojyoti Ghosh PetscErrorCode ierr; 114249bd79ccSDebojyoti Ghosh PetscFunctionBegin; 114349bd79ccSDebojyoti Ghosh ierr = PetscFree2(*idx,*v);CHKERRQ(ierr); 114449bd79ccSDebojyoti Ghosh ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE; 114549bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 114649bd79ccSDebojyoti Ghosh } 114749bd79ccSDebojyoti Ghosh 114849bd79ccSDebojyoti Ghosh PetscErrorCode MatGetRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values) 114949bd79ccSDebojyoti Ghosh { 115049bd79ccSDebojyoti Ghosh Mat_MPIKAIJ *b = (Mat_MPIKAIJ*) A->data; 115149bd79ccSDebojyoti Ghosh Mat MatAIJ = ((Mat_SeqKAIJ*)b->AIJ->data)->AIJ; 115249bd79ccSDebojyoti Ghosh Mat MatOAIJ = ((Mat_SeqKAIJ*)b->OAIJ->data)->AIJ; 115349bd79ccSDebojyoti Ghosh Mat AIJ = b->A; 1154fc64b2cfSRichard Tran Mills PetscBool diag = PETSC_FALSE; 115549bd79ccSDebojyoti Ghosh PetscErrorCode ierr; 115649bd79ccSDebojyoti Ghosh const PetscInt rstart=A->rmap->rstart,rend=A->rmap->rend,p=b->p,q=b->q,*garray; 115749bd79ccSDebojyoti Ghosh PetscInt nz,*idx,ncolsaij,ncolsoaij,*colsaij,*colsoaij,r,s,c,i,j,lrow; 115849bd79ccSDebojyoti Ghosh PetscScalar *v,*vals,*ovals,*S=b->S,*T=b->T; 115949bd79ccSDebojyoti Ghosh 116049bd79ccSDebojyoti Ghosh PetscFunctionBegin; 116149bd79ccSDebojyoti Ghosh if (b->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 116249bd79ccSDebojyoti Ghosh b->getrowactive = PETSC_TRUE; 116349bd79ccSDebojyoti Ghosh if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 116449bd79ccSDebojyoti Ghosh lrow = row - rstart; 116549bd79ccSDebojyoti Ghosh 116649bd79ccSDebojyoti Ghosh if ((!S) && (!T) && (!b->isTI)) { 116749bd79ccSDebojyoti Ghosh if (ncols) *ncols = 0; 116849bd79ccSDebojyoti Ghosh if (cols) *cols = NULL; 116949bd79ccSDebojyoti Ghosh if (values) *values = NULL; 117049bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 117149bd79ccSDebojyoti Ghosh } 117249bd79ccSDebojyoti Ghosh 117349bd79ccSDebojyoti Ghosh r = lrow/p; 117449bd79ccSDebojyoti Ghosh s = lrow%p; 117549bd79ccSDebojyoti Ghosh 117649bd79ccSDebojyoti Ghosh if (T || b->isTI) { 11772ae760e3SRichard Tran Mills ierr = MatMPIAIJGetSeqAIJ(AIJ,NULL,NULL,&garray);CHKERRQ(ierr); 117849bd79ccSDebojyoti Ghosh ierr = MatGetRow_SeqAIJ(MatAIJ,lrow/p,&ncolsaij,&colsaij,&vals);CHKERRQ(ierr); 117949bd79ccSDebojyoti Ghosh ierr = MatGetRow_SeqAIJ(MatOAIJ,lrow/p,&ncolsoaij,&colsoaij,&ovals);CHKERRQ(ierr); 118049bd79ccSDebojyoti Ghosh 118149bd79ccSDebojyoti Ghosh c = ncolsaij + ncolsoaij; 118249bd79ccSDebojyoti Ghosh for (i=0; i<ncolsaij; i++) { 118349bd79ccSDebojyoti Ghosh /* check if this row contains a diagonal entry */ 118449bd79ccSDebojyoti Ghosh if (colsaij[i] == r) { 118549bd79ccSDebojyoti Ghosh diag = PETSC_TRUE; 118649bd79ccSDebojyoti Ghosh c = i; 118749bd79ccSDebojyoti Ghosh } 118849bd79ccSDebojyoti Ghosh } 118949bd79ccSDebojyoti Ghosh } else c = 0; 119049bd79ccSDebojyoti Ghosh 119149bd79ccSDebojyoti Ghosh /* calculate size of row */ 119249bd79ccSDebojyoti Ghosh nz = 0; 119349bd79ccSDebojyoti Ghosh if (S) nz += q; 119449bd79ccSDebojyoti Ghosh if (T || b->isTI) nz += (diag && S ? (ncolsaij+ncolsoaij-1)*q : (ncolsaij+ncolsoaij)*q); 119549bd79ccSDebojyoti Ghosh 119649bd79ccSDebojyoti Ghosh if (cols || values) { 119749bd79ccSDebojyoti Ghosh ierr = PetscMalloc2(nz,&idx,nz,&v);CHKERRQ(ierr); 1198a437a796SRichard Tran Mills for (i=0; i<q; i++) { 1199a437a796SRichard Tran Mills /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */ 1200a437a796SRichard Tran Mills v[i] = 0.0; 1201a437a796SRichard Tran Mills } 120249bd79ccSDebojyoti Ghosh if (b->isTI) { 120349bd79ccSDebojyoti Ghosh for (i=0; i<ncolsaij; i++) { 120449bd79ccSDebojyoti Ghosh for (j=0; j<q; j++) { 120549bd79ccSDebojyoti Ghosh idx[i*q+j] = (colsaij[i]+rstart/p)*q+j; 120649bd79ccSDebojyoti Ghosh v[i*q+j] = (j==s ? vals[i] : 0.0); 120749bd79ccSDebojyoti Ghosh } 120849bd79ccSDebojyoti Ghosh } 120949bd79ccSDebojyoti Ghosh for (i=0; i<ncolsoaij; i++) { 121049bd79ccSDebojyoti Ghosh for (j=0; j<q; j++) { 121149bd79ccSDebojyoti Ghosh idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j; 121249bd79ccSDebojyoti Ghosh v[(i+ncolsaij)*q+j] = (j==s ? ovals[i]: 0.0); 121349bd79ccSDebojyoti Ghosh } 121449bd79ccSDebojyoti Ghosh } 121549bd79ccSDebojyoti Ghosh } else if (T) { 121649bd79ccSDebojyoti Ghosh for (i=0; i<ncolsaij; i++) { 121749bd79ccSDebojyoti Ghosh for (j=0; j<q; j++) { 121849bd79ccSDebojyoti Ghosh idx[i*q+j] = (colsaij[i]+rstart/p)*q+j; 121949bd79ccSDebojyoti Ghosh v[i*q+j] = vals[i]*T[s+j*p]; 122049bd79ccSDebojyoti Ghosh } 122149bd79ccSDebojyoti Ghosh } 122249bd79ccSDebojyoti Ghosh for (i=0; i<ncolsoaij; i++) { 122349bd79ccSDebojyoti Ghosh for (j=0; j<q; j++) { 122449bd79ccSDebojyoti Ghosh idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j; 122549bd79ccSDebojyoti Ghosh v[(i+ncolsaij)*q+j] = ovals[i]*T[s+j*p]; 122649bd79ccSDebojyoti Ghosh } 122749bd79ccSDebojyoti Ghosh } 122849bd79ccSDebojyoti Ghosh } 122949bd79ccSDebojyoti Ghosh if (S) { 123049bd79ccSDebojyoti Ghosh for (j=0; j<q; j++) { 123149bd79ccSDebojyoti Ghosh idx[c*q+j] = (r+rstart/p)*q+j; 123249bd79ccSDebojyoti Ghosh v[c*q+j] += S[s+j*p]; 123349bd79ccSDebojyoti Ghosh } 123449bd79ccSDebojyoti Ghosh } 123549bd79ccSDebojyoti Ghosh } 123649bd79ccSDebojyoti Ghosh 123749bd79ccSDebojyoti Ghosh if (ncols) *ncols = nz; 123849bd79ccSDebojyoti Ghosh if (cols) *cols = idx; 123949bd79ccSDebojyoti Ghosh if (values) *values = v; 124049bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 124149bd79ccSDebojyoti Ghosh } 124249bd79ccSDebojyoti Ghosh 124349bd79ccSDebojyoti Ghosh PetscErrorCode MatRestoreRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 124449bd79ccSDebojyoti Ghosh { 124549bd79ccSDebojyoti Ghosh PetscErrorCode ierr; 124649bd79ccSDebojyoti Ghosh PetscFunctionBegin; 124749bd79ccSDebojyoti Ghosh ierr = PetscFree2(*idx,*v);CHKERRQ(ierr); 124849bd79ccSDebojyoti Ghosh ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE; 124949bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 125049bd79ccSDebojyoti Ghosh } 125149bd79ccSDebojyoti Ghosh 125249bd79ccSDebojyoti Ghosh PetscErrorCode MatCreateSubMatrix_KAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat) 125349bd79ccSDebojyoti Ghosh { 125449bd79ccSDebojyoti Ghosh PetscErrorCode ierr; 125549bd79ccSDebojyoti Ghosh Mat A; 125649bd79ccSDebojyoti Ghosh 125749bd79ccSDebojyoti Ghosh PetscFunctionBegin; 125849bd79ccSDebojyoti Ghosh ierr = MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr); 125949bd79ccSDebojyoti Ghosh ierr = MatCreateSubMatrix(A,isrow,iscol,cll,newmat);CHKERRQ(ierr); 126049bd79ccSDebojyoti Ghosh ierr = MatDestroy(&A);CHKERRQ(ierr); 126149bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 126249bd79ccSDebojyoti Ghosh } 126349bd79ccSDebojyoti Ghosh 126449bd79ccSDebojyoti Ghosh /* ---------------------------------------------------------------------------------- */ 126549bd79ccSDebojyoti Ghosh /*@C 126649bd79ccSDebojyoti Ghosh MatCreateKAIJ - Creates a matrix type to be used for matrices of the following form: 126749bd79ccSDebojyoti Ghosh 126849bd79ccSDebojyoti Ghosh [I \otimes S + A \otimes T] 126949bd79ccSDebojyoti Ghosh 127049bd79ccSDebojyoti Ghosh where 127149bd79ccSDebojyoti Ghosh S is a dense (p \times q) matrix 127249bd79ccSDebojyoti Ghosh T is a dense (p \times q) matrix 127349bd79ccSDebojyoti Ghosh A is an AIJ (n \times n) matrix 127449bd79ccSDebojyoti Ghosh I is the identity matrix 127549bd79ccSDebojyoti Ghosh The resulting matrix is (np \times nq) 127649bd79ccSDebojyoti Ghosh 1277d3b90ce1SRichard Tran Mills S and T are always stored independently on all processes as PetscScalar arrays in column-major format. 127849bd79ccSDebojyoti Ghosh 127949bd79ccSDebojyoti Ghosh Collective 128049bd79ccSDebojyoti Ghosh 128149bd79ccSDebojyoti Ghosh Input Parameters: 128249bd79ccSDebojyoti Ghosh + A - the AIJ matrix 128349bd79ccSDebojyoti Ghosh . p - number of rows in S and T 1284d3b90ce1SRichard Tran Mills . q - number of columns in S and T 1285d3b90ce1SRichard Tran Mills . S - the S matrix (can be PETSC_NULL), stored as a PetscScalar array (column-major) 1286d3b90ce1SRichard Tran Mills - T - the T matrix (can be PETSC_NULL), stored as a PetscScalar array (column-major) 128749bd79ccSDebojyoti Ghosh 128849bd79ccSDebojyoti Ghosh Output Parameter: 128949bd79ccSDebojyoti Ghosh . kaij - the new KAIJ matrix 129049bd79ccSDebojyoti Ghosh 1291d3b90ce1SRichard Tran Mills Notes: 1292d3b90ce1SRichard Tran Mills This function increases the reference count on the AIJ matrix, so the user is free to destroy the matrix if it is not needed. 1293d3b90ce1SRichard Tran Mills Changes to the entries of the AIJ matrix will immediately affect the KAIJ matrix. 129449bd79ccSDebojyoti Ghosh 129549bd79ccSDebojyoti Ghosh Level: advanced 129649bd79ccSDebojyoti Ghosh 12970567c835SRichard Tran Mills .seealso: MatKAIJSetAIJ(), MatKAIJSetS(), MatKAIJSetT(), MatKAIJGetAIJ(), MatKAIJGetS(), MatKAIJGetT(), MATKAIJ 129849bd79ccSDebojyoti Ghosh @*/ 129949bd79ccSDebojyoti Ghosh PetscErrorCode MatCreateKAIJ(Mat A,PetscInt p,PetscInt q,const PetscScalar S[],const PetscScalar T[],Mat *kaij) 130049bd79ccSDebojyoti Ghosh { 130149bd79ccSDebojyoti Ghosh PetscErrorCode ierr; 130249bd79ccSDebojyoti Ghosh PetscMPIInt size; 130349bd79ccSDebojyoti Ghosh 130449bd79ccSDebojyoti Ghosh PetscFunctionBegin; 13050567c835SRichard Tran Mills ierr = MatCreate(PetscObjectComm((PetscObject)A),kaij);CHKERRQ(ierr); 130649bd79ccSDebojyoti Ghosh ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 130749bd79ccSDebojyoti Ghosh if (size == 1) { 13080567c835SRichard Tran Mills ierr = MatSetType(*kaij,MATSEQKAIJ);CHKERRQ(ierr); 130949bd79ccSDebojyoti Ghosh } else { 13100567c835SRichard Tran Mills ierr = MatSetType(*kaij,MATMPIKAIJ);CHKERRQ(ierr); 131149bd79ccSDebojyoti Ghosh } 13120567c835SRichard Tran Mills ierr = MatKAIJSetAIJ(*kaij,A);CHKERRQ(ierr); 13130567c835SRichard Tran Mills ierr = MatKAIJSetS(*kaij,p,q,S);CHKERRQ(ierr); 13140567c835SRichard Tran Mills ierr = MatKAIJSetT(*kaij,p,q,T);CHKERRQ(ierr); 13152ae760e3SRichard Tran Mills ierr = MatSetUp(*kaij);CHKERRQ(ierr); 13160567c835SRichard Tran Mills PetscFunctionReturn(0); 13170567c835SRichard Tran Mills } 131849bd79ccSDebojyoti Ghosh 13190567c835SRichard Tran Mills /*MC 1320*5881e567SRichard Tran Mills MATKAIJ - MATKAIJ = "kaij" - A matrix type to be used to evaluate matrices of form 1321*5881e567SRichard Tran Mills [I \otimes S + A \otimes T], 13220567c835SRichard Tran Mills where 1323*5881e567SRichard Tran Mills S is a dense (p \times q) matrix, 1324*5881e567SRichard Tran Mills T is a dense (p \times q) matrix, 1325*5881e567SRichard Tran Mills A is an AIJ (n \times n) matrix, 1326*5881e567SRichard Tran Mills and I is the identity matrix. 1327*5881e567SRichard Tran Mills The resulting matrix is (np \times nq). 13280567c835SRichard Tran Mills 1329d3b90ce1SRichard Tran Mills S and T are always stored independently on all processes as PetscScalar arrays in column-major format. 13300567c835SRichard Tran Mills 1331*5881e567SRichard Tran Mills Notes: 1332*5881e567SRichard Tran Mills A linear system with multiple right-hand sides, AX = B, can be expressed in the KAIJ-friendly form of (A \otimes I) x = b, 1333*5881e567SRichard Tran Mills where x and b are column vectors containing the row-major representations of X and B. 1334*5881e567SRichard Tran Mills 13350567c835SRichard Tran Mills Level: advanced 13360567c835SRichard Tran Mills 13370567c835SRichard Tran Mills .seealso: MatKAIJSetAIJ(), MatKAIJSetS(), MatKAIJSetT(), MatKAIJGetAIJ(), MatKAIJGetS(), MatKAIJGetT(), MatCreateKAIJ() 13380567c835SRichard Tran Mills M*/ 13390567c835SRichard Tran Mills 13400567c835SRichard Tran Mills PETSC_EXTERN PetscErrorCode MatCreate_KAIJ(Mat A) 13410567c835SRichard Tran Mills { 13420567c835SRichard Tran Mills PetscErrorCode ierr; 13430567c835SRichard Tran Mills Mat_MPIKAIJ *b; 13440567c835SRichard Tran Mills PetscMPIInt size; 13450567c835SRichard Tran Mills 13460567c835SRichard Tran Mills PetscFunctionBegin; 13470567c835SRichard Tran Mills ierr = PetscNewLog(A,&b);CHKERRQ(ierr); 13480567c835SRichard Tran Mills A->data = (void*)b; 13490567c835SRichard Tran Mills 13500567c835SRichard Tran Mills ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 13510567c835SRichard Tran Mills 13520567c835SRichard Tran Mills A->ops->setup = MatSetUp_KAIJ; 13530567c835SRichard Tran Mills 13540567c835SRichard Tran Mills b->w = 0; 13550567c835SRichard Tran Mills ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 13560567c835SRichard Tran Mills if (size == 1) { 13570567c835SRichard Tran Mills ierr = PetscObjectChangeTypeName((PetscObject)A,MATSEQKAIJ);CHKERRQ(ierr); 13580567c835SRichard Tran Mills A->ops->setup = MatSetUp_KAIJ; 13590567c835SRichard Tran Mills A->ops->destroy = MatDestroy_SeqKAIJ; 1360e6985dafSRichard Tran Mills A->ops->view = MatView_KAIJ; 1361bb6fb833SRichard Tran Mills A->ops->mult = MatMult_SeqKAIJ; 1362bb6fb833SRichard Tran Mills A->ops->multadd = MatMultAdd_SeqKAIJ; 1363bb6fb833SRichard Tran Mills A->ops->invertblockdiagonal = MatInvertBlockDiagonal_SeqKAIJ; 13640567c835SRichard Tran Mills A->ops->getrow = MatGetRow_SeqKAIJ; 13650567c835SRichard Tran Mills A->ops->restorerow = MatRestoreRow_SeqKAIJ; 13660567c835SRichard Tran Mills A->ops->sor = MatSOR_SeqKAIJ; 13670567c835SRichard Tran Mills } else { 13680567c835SRichard Tran Mills ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIKAIJ);CHKERRQ(ierr); 13690567c835SRichard Tran Mills A->ops->setup = MatSetUp_KAIJ; 13700567c835SRichard Tran Mills A->ops->destroy = MatDestroy_MPIKAIJ; 1371e6985dafSRichard Tran Mills A->ops->view = MatView_KAIJ; 1372bb6fb833SRichard Tran Mills A->ops->mult = MatMult_MPIKAIJ; 1373bb6fb833SRichard Tran Mills A->ops->multadd = MatMultAdd_MPIKAIJ; 1374bb6fb833SRichard Tran Mills A->ops->invertblockdiagonal = MatInvertBlockDiagonal_MPIKAIJ; 13750567c835SRichard Tran Mills A->ops->getrow = MatGetRow_MPIKAIJ; 13760567c835SRichard Tran Mills A->ops->restorerow = MatRestoreRow_MPIKAIJ; 13770567c835SRichard Tran Mills ierr = PetscObjectComposeFunction((PetscObject)A,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIKAIJ);CHKERRQ(ierr); 13780567c835SRichard Tran Mills } 13790567c835SRichard Tran Mills A->ops->createsubmatrix = MatCreateSubMatrix_KAIJ; 138049bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 138149bd79ccSDebojyoti Ghosh } 1382