149bd79ccSDebojyoti Ghosh 249bd79ccSDebojyoti Ghosh /* 349bd79ccSDebojyoti Ghosh Defines the basic matrix operations for the KAIJ matrix storage format. 449bd79ccSDebojyoti Ghosh This format is used to evaluate matrices of the form: 549bd79ccSDebojyoti Ghosh 649bd79ccSDebojyoti Ghosh [I \otimes S + A \otimes T] 749bd79ccSDebojyoti Ghosh 849bd79ccSDebojyoti Ghosh where 949bd79ccSDebojyoti Ghosh S is a dense (p \times q) matrix 1049bd79ccSDebojyoti Ghosh T is a dense (p \times q) matrix 1149bd79ccSDebojyoti Ghosh A is an AIJ (n \times n) matrix 1249bd79ccSDebojyoti Ghosh I is the identity matrix 1349bd79ccSDebojyoti Ghosh 1449bd79ccSDebojyoti Ghosh The resulting matrix is (np \times nq) 1549bd79ccSDebojyoti Ghosh 1649bd79ccSDebojyoti Ghosh We provide: 1749bd79ccSDebojyoti Ghosh MatMult() 1849bd79ccSDebojyoti Ghosh MatMultAdd() 1949bd79ccSDebojyoti Ghosh MatInvertBlockDiagonal() 2049bd79ccSDebojyoti Ghosh and 2149bd79ccSDebojyoti Ghosh MatCreateKAIJ(Mat,PetscInt,PetscInt,const PetscScalar[],const PetscScalar[],Mat*) 2249bd79ccSDebojyoti Ghosh 2349bd79ccSDebojyoti Ghosh This single directory handles both the sequential and parallel codes 2449bd79ccSDebojyoti Ghosh */ 2549bd79ccSDebojyoti Ghosh 2649bd79ccSDebojyoti Ghosh #include <../src/mat/impls/kaij/kaij.h> /*I "petscmat.h" I*/ 2749bd79ccSDebojyoti Ghosh #include <../src/mat/utils/freespace.h> 2849bd79ccSDebojyoti Ghosh #include <petsc/private/vecimpl.h> 2949bd79ccSDebojyoti Ghosh 3049bd79ccSDebojyoti Ghosh /*@C 3149bd79ccSDebojyoti Ghosh MatKAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the KAIJ matrix 3249bd79ccSDebojyoti Ghosh 3349bd79ccSDebojyoti Ghosh Not Collective, but if the KAIJ matrix is parallel, the AIJ matrix is also parallel 3449bd79ccSDebojyoti Ghosh 3549bd79ccSDebojyoti Ghosh Input Parameter: 3649bd79ccSDebojyoti Ghosh . A - the KAIJ matrix 3749bd79ccSDebojyoti Ghosh 3849bd79ccSDebojyoti Ghosh Output Parameter: 3949bd79ccSDebojyoti Ghosh . B - the AIJ matrix 4049bd79ccSDebojyoti Ghosh 4149bd79ccSDebojyoti Ghosh Level: advanced 4249bd79ccSDebojyoti Ghosh 4349bd79ccSDebojyoti Ghosh Notes: The reference count on the AIJ matrix is not increased so you should not destroy it. 4449bd79ccSDebojyoti Ghosh 4549bd79ccSDebojyoti Ghosh .seealso: MatCreateKAIJ() 4649bd79ccSDebojyoti Ghosh @*/ 4749bd79ccSDebojyoti Ghosh PetscErrorCode MatKAIJGetAIJ(Mat A,Mat *B) 4849bd79ccSDebojyoti Ghosh { 4949bd79ccSDebojyoti Ghosh PetscErrorCode ierr; 5049bd79ccSDebojyoti Ghosh PetscBool ismpikaij,isseqkaij; 5149bd79ccSDebojyoti Ghosh 5249bd79ccSDebojyoti Ghosh PetscFunctionBegin; 5349bd79ccSDebojyoti Ghosh ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIKAIJ,&ismpikaij);CHKERRQ(ierr); 5449bd79ccSDebojyoti Ghosh ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQKAIJ,&isseqkaij);CHKERRQ(ierr); 5549bd79ccSDebojyoti Ghosh if (ismpikaij) { 5649bd79ccSDebojyoti Ghosh Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data; 5749bd79ccSDebojyoti Ghosh 5849bd79ccSDebojyoti Ghosh *B = b->A; 5949bd79ccSDebojyoti Ghosh } else if (isseqkaij) { 6049bd79ccSDebojyoti Ghosh Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 6149bd79ccSDebojyoti Ghosh 6249bd79ccSDebojyoti Ghosh *B = b->AIJ; 63b04351cbSRichard Tran Mills } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix passed in is not of type KAIJ"); 6449bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 6549bd79ccSDebojyoti Ghosh } 6649bd79ccSDebojyoti Ghosh 6749bd79ccSDebojyoti Ghosh /*@C 6849bd79ccSDebojyoti Ghosh MatKAIJGetS - Get the S matrix describing the shift action of the KAIJ matrix 6949bd79ccSDebojyoti Ghosh 700567c835SRichard Tran Mills Not Collective; the entire S is stored and returned independently on all processes. 7149bd79ccSDebojyoti Ghosh 7249bd79ccSDebojyoti Ghosh Input Parameter: 7349bd79ccSDebojyoti Ghosh . A - the KAIJ matrix 7449bd79ccSDebojyoti Ghosh 75a5b5c723SRichard Tran Mills Output Parameters: 76a5b5c723SRichard Tran Mills + m - the number of rows in S 77a5b5c723SRichard Tran Mills . n - the number of columns in S 78a5b5c723SRichard Tran Mills - S - the S matrix, in form of a scalar array in column-major format 7949bd79ccSDebojyoti Ghosh 80a5b5c723SRichard Tran Mills Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired) 8131a97b9aSRichard Tran Mills 8249bd79ccSDebojyoti Ghosh Level: advanced 8349bd79ccSDebojyoti Ghosh 8431a97b9aSRichard Tran Mills .seealso: MatCreateKAIJ(), MatGetBlockSizes() 8549bd79ccSDebojyoti Ghosh @*/ 86a5b5c723SRichard Tran Mills PetscErrorCode MatKAIJGetS(Mat A,PetscInt *m,PetscInt *n,PetscScalar **S) 8749bd79ccSDebojyoti Ghosh { 8849bd79ccSDebojyoti Ghosh Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 8949bd79ccSDebojyoti Ghosh PetscFunctionBegin; 90a5b5c723SRichard Tran Mills if (m) *m = b->p; 91a5b5c723SRichard Tran Mills if (n) *n = b->q; 92a5b5c723SRichard Tran Mills if (S) *S = b->S; 93a5b5c723SRichard Tran Mills PetscFunctionReturn(0); 94a5b5c723SRichard Tran Mills } 95a5b5c723SRichard Tran Mills 96a5b5c723SRichard Tran Mills /*@C 97a5b5c723SRichard Tran Mills MatKAIJGetSRead - Get a read-only pointer to the S matrix describing the shift action of the KAIJ matrix 98a5b5c723SRichard Tran Mills 99a5b5c723SRichard Tran Mills Not Collective; the entire S is stored and returned independently on all processes. 100a5b5c723SRichard Tran Mills 101a5b5c723SRichard Tran Mills Input Parameter: 102a5b5c723SRichard Tran Mills . A - the KAIJ matrix 103a5b5c723SRichard Tran Mills 104a5b5c723SRichard Tran Mills Output Parameters: 105a5b5c723SRichard Tran Mills + m - the number of rows in S 106a5b5c723SRichard Tran Mills . n - the number of columns in S 107a5b5c723SRichard Tran Mills - S - the S matrix, in form of a scalar array in column-major format 108a5b5c723SRichard Tran Mills 109a5b5c723SRichard Tran Mills Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired) 110a5b5c723SRichard Tran Mills 111a5b5c723SRichard Tran Mills Level: advanced 112a5b5c723SRichard Tran Mills 113a5b5c723SRichard Tran Mills .seealso: MatCreateKAIJ(), MatGetBlockSizes() 114a5b5c723SRichard Tran Mills @*/ 115a5b5c723SRichard Tran Mills PetscErrorCode MatKAIJGetSRead(Mat A,PetscInt *m,PetscInt *n,const PetscScalar **S) 116a5b5c723SRichard Tran Mills { 117a5b5c723SRichard Tran Mills Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 118a5b5c723SRichard Tran Mills PetscFunctionBegin; 119a5b5c723SRichard Tran Mills if (m) *m = b->p; 120a5b5c723SRichard Tran Mills if (n) *n = b->q; 121a5b5c723SRichard Tran Mills if (S) *S = b->S; 122a5b5c723SRichard Tran Mills PetscFunctionReturn(0); 123a5b5c723SRichard Tran Mills } 124a5b5c723SRichard Tran Mills 125a5b5c723SRichard Tran Mills /*@C 126a5b5c723SRichard Tran Mills MatKAIJRestoreS - Restore array obtained with MatKAIJGetS() 127a5b5c723SRichard Tran Mills 128a5b5c723SRichard Tran Mills Not collective 129a5b5c723SRichard Tran Mills 130a5b5c723SRichard Tran Mills Input Parameter: 131a5b5c723SRichard Tran Mills . A - the KAIJ matrix 132a5b5c723SRichard Tran Mills 133a5b5c723SRichard Tran Mills Output Parameter: 134a5b5c723SRichard Tran Mills . S - location of pointer to array obtained with MatKAIJGetS() 135a5b5c723SRichard Tran Mills 136a5b5c723SRichard Tran Mills Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored. 137a5b5c723SRichard Tran Mills If NULL is passed, it will not attempt to zero the array pointer. 138a5b5c723SRichard Tran Mills 139a5b5c723SRichard Tran Mills Level: advanced 140a5b5c723SRichard Tran Mills .seealso: MatKAIJGetS(), MatKAIJGetSRead(), MatKAIJRestoreSRead() 141a5b5c723SRichard Tran Mills @*/ 142a5b5c723SRichard Tran Mills PetscErrorCode MatKAIJRestoreS(Mat A,PetscScalar **S) 143a5b5c723SRichard Tran Mills { 144a5b5c723SRichard Tran Mills PetscFunctionBegin; 145a5b5c723SRichard Tran Mills if (S) *S = NULL; 146a5b5c723SRichard Tran Mills PetscObjectStateIncrease((PetscObject)A); 147a5b5c723SRichard Tran Mills PetscFunctionReturn(0); 148a5b5c723SRichard Tran Mills } 149a5b5c723SRichard Tran Mills 150a5b5c723SRichard Tran Mills /*@C 151a5b5c723SRichard Tran Mills MatKAIJRestoreSRead - Restore array obtained with MatKAIJGetSRead() 152a5b5c723SRichard Tran Mills 153a5b5c723SRichard Tran Mills Not collective 154a5b5c723SRichard Tran Mills 155a5b5c723SRichard Tran Mills Input Parameter: 156a5b5c723SRichard Tran Mills . A - the KAIJ matrix 157a5b5c723SRichard Tran Mills 158a5b5c723SRichard Tran Mills Output Parameter: 159a5b5c723SRichard Tran Mills . S - location of pointer to array obtained with MatKAIJGetS() 160a5b5c723SRichard Tran Mills 161a5b5c723SRichard Tran Mills Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored. 162a5b5c723SRichard Tran Mills If NULL is passed, it will not attempt to zero the array pointer. 163a5b5c723SRichard Tran Mills 164a5b5c723SRichard Tran Mills Level: advanced 165a5b5c723SRichard Tran Mills .seealso: MatKAIJGetS(), MatKAIJGetSRead(), MatKAIJRestoreSRead() 166a5b5c723SRichard Tran Mills @*/ 167a5b5c723SRichard Tran Mills PetscErrorCode MatKAIJRestoreSRead(Mat A,const PetscScalar **S) 168a5b5c723SRichard Tran Mills { 169a5b5c723SRichard Tran Mills PetscFunctionBegin; 170a5b5c723SRichard Tran Mills if (S) *S = NULL; 17149bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 17249bd79ccSDebojyoti Ghosh } 17349bd79ccSDebojyoti Ghosh 17449bd79ccSDebojyoti Ghosh /*@C 17531a97b9aSRichard Tran Mills MatKAIJGetT - Get the transformation matrix T associated with the KAIJ matrix 17649bd79ccSDebojyoti Ghosh 1770567c835SRichard Tran Mills Not Collective; the entire T is stored and returned independently on all processes 17849bd79ccSDebojyoti Ghosh 17949bd79ccSDebojyoti Ghosh Input Parameter: 18049bd79ccSDebojyoti Ghosh . A - the KAIJ matrix 18149bd79ccSDebojyoti Ghosh 18249bd79ccSDebojyoti Ghosh Output Parameter: 183a5b5c723SRichard Tran Mills + m - the number of rows in T 184a5b5c723SRichard Tran Mills . n - the number of columns in T 185a5b5c723SRichard Tran Mills - T - the T matrix, in form of a scalar array in column-major format 18649bd79ccSDebojyoti Ghosh 187a5b5c723SRichard Tran Mills Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired) 18831a97b9aSRichard Tran Mills 18949bd79ccSDebojyoti Ghosh Level: advanced 19049bd79ccSDebojyoti Ghosh 19131a97b9aSRichard Tran Mills .seealso: MatCreateKAIJ(), MatGetBlockSizes() 19249bd79ccSDebojyoti Ghosh @*/ 193a5b5c723SRichard Tran Mills PetscErrorCode MatKAIJGetT(Mat A,PetscInt *m,PetscInt *n,PetscScalar **T) 19449bd79ccSDebojyoti Ghosh { 19549bd79ccSDebojyoti Ghosh Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 19649bd79ccSDebojyoti Ghosh PetscFunctionBegin; 197a5b5c723SRichard Tran Mills if (m) *m = b->p; 198a5b5c723SRichard Tran Mills if (n) *n = b->q; 199a5b5c723SRichard Tran Mills if (T) *T = b->T; 200a5b5c723SRichard Tran Mills PetscFunctionReturn(0); 201a5b5c723SRichard Tran Mills } 202a5b5c723SRichard Tran Mills 203a5b5c723SRichard Tran Mills /*@C 204a5b5c723SRichard Tran Mills MatKAIJGetTRead - Get a read-only pointer to the transformation matrix T associated with the KAIJ matrix 205a5b5c723SRichard Tran Mills 206a5b5c723SRichard Tran Mills Not Collective; the entire T is stored and returned independently on all processes 207a5b5c723SRichard Tran Mills 208a5b5c723SRichard Tran Mills Input Parameter: 209a5b5c723SRichard Tran Mills . A - the KAIJ matrix 210a5b5c723SRichard Tran Mills 211a5b5c723SRichard Tran Mills Output Parameter: 212a5b5c723SRichard Tran Mills + m - the number of rows in T 213a5b5c723SRichard Tran Mills . n - the number of columns in T 214a5b5c723SRichard Tran Mills - T - the T matrix, in form of a scalar array in column-major format 215a5b5c723SRichard Tran Mills 216a5b5c723SRichard Tran Mills Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired) 217a5b5c723SRichard Tran Mills 218a5b5c723SRichard Tran Mills Level: advanced 219a5b5c723SRichard Tran Mills 220a5b5c723SRichard Tran Mills .seealso: MatCreateKAIJ(), MatGetBlockSizes() 221a5b5c723SRichard Tran Mills @*/ 222a5b5c723SRichard Tran Mills PetscErrorCode MatKAIJGetTRead(Mat A,PetscInt *m,PetscInt *n,const PetscScalar **T) 223a5b5c723SRichard Tran Mills { 224a5b5c723SRichard Tran Mills Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 225a5b5c723SRichard Tran Mills PetscFunctionBegin; 226a5b5c723SRichard Tran Mills if (m) *m = b->p; 227a5b5c723SRichard Tran Mills if (n) *n = b->q; 228a5b5c723SRichard Tran Mills if (T) *T = b->T; 229a5b5c723SRichard Tran Mills PetscFunctionReturn(0); 230a5b5c723SRichard Tran Mills } 231a5b5c723SRichard Tran Mills 232a5b5c723SRichard Tran Mills /*@C 233a5b5c723SRichard Tran Mills MatKAIJRestoreT - Restore array obtained with MatKAIJGetT() 234a5b5c723SRichard Tran Mills 235a5b5c723SRichard Tran Mills Not collective 236a5b5c723SRichard Tran Mills 237a5b5c723SRichard Tran Mills Input Parameter: 238a5b5c723SRichard Tran Mills . A - the KAIJ matrix 239a5b5c723SRichard Tran Mills 240a5b5c723SRichard Tran Mills Output Parameter: 241a5b5c723SRichard Tran Mills . T - location of pointer to array obtained with MatKAIJGetS() 242a5b5c723SRichard Tran Mills 243a5b5c723SRichard Tran Mills Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored. 244a5b5c723SRichard Tran Mills If NULL is passed, it will not attempt to zero the array pointer. 245a5b5c723SRichard Tran Mills 246a5b5c723SRichard Tran Mills Level: advanced 247a5b5c723SRichard Tran Mills .seealso: MatKAIJGetT(), MatKAIJGetTRead(), MatKAIJRestoreTRead() 248a5b5c723SRichard Tran Mills @*/ 249a5b5c723SRichard Tran Mills PetscErrorCode MatKAIJRestoreT(Mat A,PetscScalar **T) 250a5b5c723SRichard Tran Mills { 251a5b5c723SRichard Tran Mills PetscFunctionBegin; 252a5b5c723SRichard Tran Mills if (T) *T = NULL; 253a5b5c723SRichard Tran Mills PetscObjectStateIncrease((PetscObject)A); 254a5b5c723SRichard Tran Mills PetscFunctionReturn(0); 255a5b5c723SRichard Tran Mills } 256a5b5c723SRichard Tran Mills 257a5b5c723SRichard Tran Mills /*@C 258a5b5c723SRichard Tran Mills MatKAIJRestoreTRead - Restore array obtained with MatKAIJGetTRead() 259a5b5c723SRichard Tran Mills 260a5b5c723SRichard Tran Mills Not collective 261a5b5c723SRichard Tran Mills 262a5b5c723SRichard Tran Mills Input Parameter: 263a5b5c723SRichard Tran Mills . A - the KAIJ matrix 264a5b5c723SRichard Tran Mills 265a5b5c723SRichard Tran Mills Output Parameter: 266a5b5c723SRichard Tran Mills . T - location of pointer to array obtained with MatKAIJGetS() 267a5b5c723SRichard Tran Mills 268a5b5c723SRichard Tran Mills Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored. 269a5b5c723SRichard Tran Mills If NULL is passed, it will not attempt to zero the array pointer. 270a5b5c723SRichard Tran Mills 271a5b5c723SRichard Tran Mills Level: advanced 272a5b5c723SRichard Tran Mills .seealso: MatKAIJGetT(), MatKAIJGetTRead(), MatKAIJRestoreTRead() 273a5b5c723SRichard Tran Mills @*/ 274a5b5c723SRichard Tran Mills PetscErrorCode MatKAIJRestoreTRead(Mat A,const PetscScalar **T) 275a5b5c723SRichard Tran Mills { 276a5b5c723SRichard Tran Mills PetscFunctionBegin; 277a5b5c723SRichard Tran Mills if (T) *T = NULL; 27849bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 27949bd79ccSDebojyoti Ghosh } 28049bd79ccSDebojyoti Ghosh 2810567c835SRichard Tran Mills /*@ 2820567c835SRichard Tran Mills MatKAIJSetAIJ - Set the AIJ matrix describing the blockwise action of the KAIJ matrix 2830567c835SRichard Tran Mills 2840567c835SRichard Tran Mills Logically Collective; if the AIJ matrix is parallel, the KAIJ matrix is also parallel 2850567c835SRichard Tran Mills 2860567c835SRichard Tran Mills Input Parameters: 2870567c835SRichard Tran Mills + A - the KAIJ matrix 2880567c835SRichard Tran Mills - B - the AIJ matrix 2890567c835SRichard Tran Mills 29015b9d025SRichard Tran Mills Notes: 29115b9d025SRichard Tran Mills This function increases the reference count on the AIJ matrix, so the user is free to destroy the matrix if it is not needed. 29215b9d025SRichard Tran Mills Changes to the entries of the AIJ matrix will immediately affect the KAIJ matrix. 29315b9d025SRichard Tran Mills 2940567c835SRichard Tran Mills Level: advanced 2950567c835SRichard Tran Mills 2960567c835SRichard Tran Mills .seealso: MatKAIJGetAIJ(), MatKAIJSetS(), MatKAIJSetT() 2970567c835SRichard Tran Mills @*/ 2980567c835SRichard Tran Mills PetscErrorCode MatKAIJSetAIJ(Mat A,Mat B) 2990567c835SRichard Tran Mills { 3000567c835SRichard Tran Mills PetscErrorCode ierr; 3010567c835SRichard Tran Mills PetscMPIInt size; 3020567c835SRichard Tran Mills 3030567c835SRichard Tran Mills PetscFunctionBegin; 3040567c835SRichard Tran Mills ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 3050567c835SRichard Tran Mills if (size == 1) { 3060567c835SRichard Tran Mills Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data; 3070567c835SRichard Tran Mills a->AIJ = B; 3080567c835SRichard Tran Mills } else { 3090567c835SRichard Tran Mills Mat_MPIKAIJ *a = (Mat_MPIKAIJ*)A->data; 3100567c835SRichard Tran Mills a->A = B; 3110567c835SRichard Tran Mills } 31215b9d025SRichard Tran Mills ierr = PetscObjectReference((PetscObject)B);CHKERRQ(ierr); 3130567c835SRichard Tran Mills PetscFunctionReturn(0); 3140567c835SRichard Tran Mills } 3150567c835SRichard Tran Mills 3160567c835SRichard Tran Mills /*@C 3170567c835SRichard Tran Mills MatKAIJSetS - Set the S matrix describing the shift action of the KAIJ matrix 3180567c835SRichard Tran Mills 3190567c835SRichard Tran Mills Logically Collective; the entire S is stored independently on all processes. 3200567c835SRichard Tran Mills 3210567c835SRichard Tran Mills Input Parameters: 3220567c835SRichard Tran Mills + A - the KAIJ matrix 3230567c835SRichard Tran Mills . p - the number of rows in S 3240567c835SRichard Tran Mills . q - the number of columns in S 3250567c835SRichard Tran Mills - S - the S matrix, in form of a scalar array in column-major format 3260567c835SRichard Tran Mills 3270567c835SRichard Tran Mills Notes: The dimensions p and q must match those of the transformation matrix T associated with the KAIJ matrix. 32888f48298SRichard Tran Mills The S matrix is copied, so the user can destroy this array. 3290567c835SRichard Tran Mills 3300567c835SRichard Tran Mills Level: Advanced 3310567c835SRichard Tran Mills 3320567c835SRichard Tran Mills .seealso: MatKAIJGetS(), MatKAIJSetT(), MatKAIJSetAIJ() 3330567c835SRichard Tran Mills @*/ 3340567c835SRichard Tran Mills PetscErrorCode MatKAIJSetS(Mat A,PetscInt p,PetscInt q,const PetscScalar S[]) 3350567c835SRichard Tran Mills { 3360567c835SRichard Tran Mills PetscErrorCode ierr; 3370567c835SRichard Tran Mills Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data; 3380567c835SRichard Tran Mills 3390567c835SRichard Tran Mills PetscFunctionBegin; 3400567c835SRichard Tran Mills ierr = PetscFree(a->S);CHKERRQ(ierr); 3410567c835SRichard Tran Mills if (S) { 342a84f8069SRichard Tran Mills ierr = PetscMalloc1(p*q*sizeof(PetscScalar),&a->S);CHKERRQ(ierr); 3430567c835SRichard Tran Mills ierr = PetscMemcpy(a->S,S,p*q*sizeof(PetscScalar));CHKERRQ(ierr); 3440567c835SRichard Tran Mills } else a->S = NULL; 3450567c835SRichard Tran Mills 3460567c835SRichard Tran Mills a->p = p; 3470567c835SRichard Tran Mills a->q = q; 3480567c835SRichard Tran Mills PetscFunctionReturn(0); 3490567c835SRichard Tran Mills } 3500567c835SRichard Tran Mills 3510567c835SRichard Tran Mills /*@C 3520567c835SRichard Tran Mills MatKAIJSetT - Set the transformation matrix T associated with the KAIJ matrix 3530567c835SRichard Tran Mills 3540567c835SRichard Tran Mills Logically Collective; the entire T is stored independently on all processes. 3550567c835SRichard Tran Mills 3560567c835SRichard Tran Mills Input Parameters: 3570567c835SRichard Tran Mills + A - the KAIJ matrix 3580567c835SRichard Tran Mills . p - the number of rows in S 3590567c835SRichard Tran Mills . q - the number of columns in S 3600567c835SRichard Tran Mills - T - the T matrix, in form of a scalar array in column-major format 3610567c835SRichard Tran Mills 3620567c835SRichard Tran Mills Notes: The dimensions p and q must match those of the shift matrix S associated with the KAIJ matrix. 36388f48298SRichard Tran Mills The T matrix is copied, so the user can destroy this array. 3640567c835SRichard Tran Mills 3650567c835SRichard Tran Mills Level: Advanced 3660567c835SRichard Tran Mills 3670567c835SRichard Tran Mills .seealso: MatKAIJGetT(), MatKAIJSetS(), MatKAIJSetAIJ() 3680567c835SRichard Tran Mills @*/ 3690567c835SRichard Tran Mills PetscErrorCode MatKAIJSetT(Mat A,PetscInt p,PetscInt q,const PetscScalar T[]) 3700567c835SRichard Tran Mills { 3710567c835SRichard Tran Mills PetscErrorCode ierr; 3720567c835SRichard Tran Mills PetscInt i,j; 3730567c835SRichard Tran Mills Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data; 3740567c835SRichard Tran Mills PetscBool isTI = PETSC_FALSE; 3750567c835SRichard Tran Mills 3760567c835SRichard Tran Mills PetscFunctionBegin; 3770567c835SRichard Tran Mills /* check if T is an identity matrix */ 3780567c835SRichard Tran Mills if (T && (p == q)) { 3790567c835SRichard Tran Mills isTI = PETSC_TRUE; 3800567c835SRichard Tran Mills for (i=0; i<p; i++) { 3810567c835SRichard Tran Mills for (j=0; j<q; j++) { 3820567c835SRichard Tran Mills if (i == j) { 3830567c835SRichard Tran Mills /* diagonal term must be 1 */ 3840567c835SRichard Tran Mills if (T[i+j*p] != 1.0) isTI = PETSC_FALSE; 3850567c835SRichard Tran Mills } else { 3860567c835SRichard Tran Mills /* off-diagonal term must be 0 */ 3870567c835SRichard Tran Mills if (T[i+j*p] != 0.0) isTI = PETSC_FALSE; 3880567c835SRichard Tran Mills } 3890567c835SRichard Tran Mills } 3900567c835SRichard Tran Mills } 3910567c835SRichard Tran Mills } 3920567c835SRichard Tran Mills a->isTI = isTI; 3930567c835SRichard Tran Mills 3940567c835SRichard Tran Mills ierr = PetscFree(a->T);CHKERRQ(ierr); 3950567c835SRichard Tran Mills if (T && (!isTI)) { 396a84f8069SRichard Tran Mills ierr = PetscMalloc1(p*q*sizeof(PetscScalar),&a->T);CHKERRQ(ierr); 3970567c835SRichard Tran Mills ierr = PetscMemcpy(a->T,T,p*q*sizeof(PetscScalar));CHKERRQ(ierr); 39850d19d74SRichard Tran Mills } else a->T = NULL; 3990567c835SRichard Tran Mills 4000567c835SRichard Tran Mills a->p = p; 4010567c835SRichard Tran Mills a->q = q; 4020567c835SRichard Tran Mills PetscFunctionReturn(0); 4030567c835SRichard Tran Mills } 4040567c835SRichard Tran Mills 40549bd79ccSDebojyoti Ghosh PetscErrorCode MatDestroy_SeqKAIJ(Mat A) 40649bd79ccSDebojyoti Ghosh { 40749bd79ccSDebojyoti Ghosh PetscErrorCode ierr; 40849bd79ccSDebojyoti Ghosh Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 40949bd79ccSDebojyoti Ghosh 41049bd79ccSDebojyoti Ghosh PetscFunctionBegin; 41149bd79ccSDebojyoti Ghosh ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr); 412a84f8069SRichard Tran Mills ierr = PetscFree(b->S);CHKERRQ(ierr); 413a84f8069SRichard Tran Mills ierr = PetscFree(b->T);CHKERRQ(ierr); 414a84f8069SRichard Tran Mills ierr = PetscFree(b->ibdiag);CHKERRQ(ierr); 41549bd79ccSDebojyoti Ghosh ierr = PetscFree5(b->sor.w,b->sor.y,b->sor.work,b->sor.t,b->sor.arr);CHKERRQ(ierr); 41649bd79ccSDebojyoti Ghosh ierr = PetscFree(A->data);CHKERRQ(ierr); 41749bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 41849bd79ccSDebojyoti Ghosh } 41949bd79ccSDebojyoti Ghosh 42049bd79ccSDebojyoti Ghosh PetscErrorCode MatSetUp_KAIJ(Mat A) 42149bd79ccSDebojyoti Ghosh { 4220567c835SRichard Tran Mills PetscErrorCode ierr; 4230567c835SRichard Tran Mills PetscInt n; 4240567c835SRichard Tran Mills PetscMPIInt size; 4250567c835SRichard Tran Mills Mat_SeqKAIJ *seqkaij = (Mat_SeqKAIJ*)A->data; 4260567c835SRichard Tran Mills 42749bd79ccSDebojyoti Ghosh PetscFunctionBegin; 4280567c835SRichard Tran Mills ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 4290567c835SRichard Tran Mills if (size == 1) { 4300567c835SRichard Tran Mills ierr = MatSetSizes(A,seqkaij->p*seqkaij->AIJ->rmap->n,seqkaij->q*seqkaij->AIJ->cmap->n,seqkaij->p*seqkaij->AIJ->rmap->N,seqkaij->q*seqkaij->AIJ->cmap->N);CHKERRQ(ierr); 4310567c835SRichard Tran Mills ierr = PetscLayoutSetBlockSize(A->rmap,seqkaij->p);CHKERRQ(ierr); 4320567c835SRichard Tran Mills ierr = PetscLayoutSetBlockSize(A->cmap,seqkaij->q);CHKERRQ(ierr); 4330567c835SRichard Tran Mills ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr); 4340567c835SRichard Tran Mills ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr); 4350567c835SRichard Tran Mills } else { 4360567c835SRichard Tran Mills Mat_MPIKAIJ *a; 4370567c835SRichard Tran Mills Mat_MPIAIJ *mpiaij; 4380567c835SRichard Tran Mills IS from,to; 4390567c835SRichard Tran Mills Vec gvec; 4400567c835SRichard Tran Mills PetscScalar *T; 4410567c835SRichard Tran Mills PetscInt i,j; 4420567c835SRichard Tran Mills 4430567c835SRichard Tran Mills a = (Mat_MPIKAIJ*)A->data; 444d3f912faSRichard Tran Mills mpiaij = (Mat_MPIAIJ*)a->A->data; 4450567c835SRichard Tran Mills ierr = MatSetSizes(A,a->p*a->A->rmap->n,a->q*a->A->cmap->n,a->p*a->A->rmap->N,a->q*a->A->cmap->N);CHKERRQ(ierr); 4460567c835SRichard Tran Mills ierr = PetscLayoutSetBlockSize(A->rmap,seqkaij->p);CHKERRQ(ierr); 4470567c835SRichard Tran Mills ierr = PetscLayoutSetBlockSize(A->cmap,seqkaij->q);CHKERRQ(ierr); 4480567c835SRichard Tran Mills ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr); 4490567c835SRichard Tran Mills ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr); 4500567c835SRichard Tran Mills 4510567c835SRichard Tran Mills if (a->isTI) { 4520567c835SRichard Tran Mills /* If the transformation matrix associated with the parallel matrix A is the identity matrix, then a->T will be NULL. 4530567c835SRichard Tran Mills * In this case, if we pass a->T directly to the MatCreateKAIJ() calls to create the sequential submatrices, the routine will 4540567c835SRichard Tran Mills * not be able to tell that transformation matrix should be set to the identity; thus we create a temporary identity matrix 4550567c835SRichard Tran Mills * to pass in. */ 456a84f8069SRichard Tran Mills ierr = PetscMalloc1(a->p*a->q*sizeof(PetscScalar),&T);CHKERRQ(ierr); 4570567c835SRichard Tran Mills for (i=0; i<a->p; i++) { 4580567c835SRichard Tran Mills for (j=0; j<a->q; j++) { 4590567c835SRichard Tran Mills if (i==j) T[i+j*a->p] = 1.0; 4600567c835SRichard Tran Mills else T[i+j*a->p] = 0.0; 4610567c835SRichard Tran Mills } 4620567c835SRichard Tran Mills } 4630567c835SRichard Tran Mills } else T = a->T; 4640567c835SRichard Tran Mills ierr = MatCreateKAIJ(mpiaij->A,a->p,a->q,a->S,T,&a->AIJ);CHKERRQ(ierr); 4650567c835SRichard Tran Mills ierr = MatCreateKAIJ(mpiaij->B,a->p,a->q,NULL,T,&a->OAIJ);CHKERRQ(ierr); 4660567c835SRichard Tran Mills ierr = PetscFree(T);CHKERRQ(ierr); 4670567c835SRichard Tran Mills 4680567c835SRichard Tran Mills ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr); 4690567c835SRichard Tran Mills ierr = VecCreate(PETSC_COMM_SELF,&a->w);CHKERRQ(ierr); 4700567c835SRichard Tran Mills ierr = VecSetSizes(a->w,n*a->q,n*a->q);CHKERRQ(ierr); 4710567c835SRichard Tran Mills ierr = VecSetBlockSize(a->w,a->q);CHKERRQ(ierr); 4720567c835SRichard Tran Mills ierr = VecSetType(a->w,VECSEQ);CHKERRQ(ierr); 4730567c835SRichard Tran Mills 4740567c835SRichard Tran Mills /* create two temporary Index sets for build scatter gather */ 4750567c835SRichard Tran Mills ierr = ISCreateBlock(PetscObjectComm((PetscObject)a->A),a->q,n,mpiaij->garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); 4760567c835SRichard Tran Mills ierr = ISCreateStride(PETSC_COMM_SELF,n*a->q,0,1,&to);CHKERRQ(ierr); 4770567c835SRichard Tran Mills 4780567c835SRichard Tran Mills /* create temporary global vector to generate scatter context */ 4790567c835SRichard Tran Mills ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)a->A),a->q,a->q*a->A->cmap->n,a->q*a->A->cmap->N,NULL,&gvec);CHKERRQ(ierr); 4800567c835SRichard Tran Mills 4810567c835SRichard Tran Mills /* generate the scatter context */ 4824589b4e5SRichard Tran Mills ierr = VecScatterCreate(gvec,from,a->w,to,&a->ctx);CHKERRQ(ierr); 4830567c835SRichard Tran Mills 4840567c835SRichard Tran Mills ierr = ISDestroy(&from);CHKERRQ(ierr); 4850567c835SRichard Tran Mills ierr = ISDestroy(&to);CHKERRQ(ierr); 4860567c835SRichard Tran Mills ierr = VecDestroy(&gvec);CHKERRQ(ierr); 4870567c835SRichard Tran Mills } 4880567c835SRichard Tran Mills 4890567c835SRichard Tran Mills A->assembled = PETSC_TRUE; 49049bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 49149bd79ccSDebojyoti Ghosh } 49249bd79ccSDebojyoti Ghosh 49349bd79ccSDebojyoti Ghosh PetscErrorCode MatView_SeqKAIJ(Mat A,PetscViewer viewer) 49449bd79ccSDebojyoti Ghosh { 49549bd79ccSDebojyoti Ghosh PetscErrorCode ierr; 49649bd79ccSDebojyoti Ghosh Mat B; 49749bd79ccSDebojyoti Ghosh 49849bd79ccSDebojyoti Ghosh PetscFunctionBegin; 49949bd79ccSDebojyoti Ghosh ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 50049bd79ccSDebojyoti Ghosh ierr = MatView(B,viewer);CHKERRQ(ierr); 50149bd79ccSDebojyoti Ghosh ierr = MatDestroy(&B);CHKERRQ(ierr); 50249bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 50349bd79ccSDebojyoti Ghosh } 50449bd79ccSDebojyoti Ghosh 50549bd79ccSDebojyoti Ghosh PetscErrorCode MatView_MPIKAIJ(Mat A,PetscViewer viewer) 50649bd79ccSDebojyoti Ghosh { 50749bd79ccSDebojyoti Ghosh PetscErrorCode ierr; 50849bd79ccSDebojyoti Ghosh Mat B; 50949bd79ccSDebojyoti Ghosh 51049bd79ccSDebojyoti Ghosh PetscFunctionBegin; 51149bd79ccSDebojyoti Ghosh ierr = MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 51249bd79ccSDebojyoti Ghosh ierr = MatView(B,viewer);CHKERRQ(ierr); 51349bd79ccSDebojyoti Ghosh ierr = MatDestroy(&B);CHKERRQ(ierr); 51449bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 51549bd79ccSDebojyoti Ghosh } 51649bd79ccSDebojyoti Ghosh 51749bd79ccSDebojyoti Ghosh PetscErrorCode MatDestroy_MPIKAIJ(Mat A) 51849bd79ccSDebojyoti Ghosh { 51949bd79ccSDebojyoti Ghosh PetscErrorCode ierr; 52049bd79ccSDebojyoti Ghosh Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data; 52149bd79ccSDebojyoti Ghosh 52249bd79ccSDebojyoti Ghosh PetscFunctionBegin; 52349bd79ccSDebojyoti Ghosh ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr); 52449bd79ccSDebojyoti Ghosh ierr = MatDestroy(&b->OAIJ);CHKERRQ(ierr); 52549bd79ccSDebojyoti Ghosh ierr = MatDestroy(&b->A);CHKERRQ(ierr); 52649bd79ccSDebojyoti Ghosh ierr = VecScatterDestroy(&b->ctx);CHKERRQ(ierr); 52749bd79ccSDebojyoti Ghosh ierr = VecDestroy(&b->w);CHKERRQ(ierr); 528a84f8069SRichard Tran Mills ierr = PetscFree(b->S);CHKERRQ(ierr); 529a84f8069SRichard Tran Mills ierr = PetscFree(b->T);CHKERRQ(ierr); 530a84f8069SRichard Tran Mills ierr = PetscFree(b->ibdiag);CHKERRQ(ierr); 53149bd79ccSDebojyoti Ghosh ierr = PetscFree(A->data);CHKERRQ(ierr); 53249bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 53349bd79ccSDebojyoti Ghosh } 53449bd79ccSDebojyoti Ghosh 53549bd79ccSDebojyoti Ghosh /* --------------------------------------------------------------------------------------*/ 53649bd79ccSDebojyoti Ghosh 53749bd79ccSDebojyoti Ghosh /* zz = yy + Axx */ 538836168d5SRichard Tran Mills PetscErrorCode MatMultAdd_SeqKAIJ(Mat A,Vec xx,Vec yy,Vec zz) 53949bd79ccSDebojyoti Ghosh { 54049bd79ccSDebojyoti Ghosh Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 54149bd79ccSDebojyoti Ghosh Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 54249bd79ccSDebojyoti Ghosh const PetscScalar *s = b->S, *t = b->T; 54349bd79ccSDebojyoti Ghosh const PetscScalar *x,*v,*bx; 54449bd79ccSDebojyoti Ghosh PetscScalar *y,*sums; 54549bd79ccSDebojyoti Ghosh PetscErrorCode ierr; 54649bd79ccSDebojyoti Ghosh const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 54749bd79ccSDebojyoti Ghosh PetscInt n,i,jrow,j,l,p=b->p,q=b->q,k; 54849bd79ccSDebojyoti Ghosh 54949bd79ccSDebojyoti Ghosh PetscFunctionBegin; 55049bd79ccSDebojyoti Ghosh if (!yy) { 55149bd79ccSDebojyoti Ghosh ierr = VecSet(zz,0.0);CHKERRQ(ierr); 55249bd79ccSDebojyoti Ghosh } else { 55349bd79ccSDebojyoti Ghosh ierr = VecCopy(yy,zz);CHKERRQ(ierr); 55449bd79ccSDebojyoti Ghosh } 55549bd79ccSDebojyoti Ghosh if ((!s) && (!t) && (!b->isTI)) PetscFunctionReturn(0); 55649bd79ccSDebojyoti Ghosh 55749bd79ccSDebojyoti Ghosh ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 55849bd79ccSDebojyoti Ghosh ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 55949bd79ccSDebojyoti Ghosh idx = a->j; 56049bd79ccSDebojyoti Ghosh v = a->a; 56149bd79ccSDebojyoti Ghosh ii = a->i; 56249bd79ccSDebojyoti Ghosh 56349bd79ccSDebojyoti Ghosh if (b->isTI) { 56449bd79ccSDebojyoti Ghosh for (i=0; i<m; i++) { 56549bd79ccSDebojyoti Ghosh jrow = ii[i]; 56649bd79ccSDebojyoti Ghosh n = ii[i+1] - jrow; 56749bd79ccSDebojyoti Ghosh sums = y + p*i; 56849bd79ccSDebojyoti Ghosh for (j=0; j<n; j++) { 56949bd79ccSDebojyoti Ghosh for (k=0; k<p; k++) { 57049bd79ccSDebojyoti Ghosh sums[k] += v[jrow+j]*x[q*idx[jrow+j]+k]; 57149bd79ccSDebojyoti Ghosh } 57249bd79ccSDebojyoti Ghosh } 57349bd79ccSDebojyoti Ghosh } 57449bd79ccSDebojyoti Ghosh } else if (t) { 57549bd79ccSDebojyoti Ghosh for (i=0; i<m; i++) { 57649bd79ccSDebojyoti Ghosh jrow = ii[i]; 57749bd79ccSDebojyoti Ghosh n = ii[i+1] - jrow; 57849bd79ccSDebojyoti Ghosh sums = y + p*i; 57949bd79ccSDebojyoti Ghosh bx = x + q*i; 58049bd79ccSDebojyoti Ghosh for (j=0; j<n; j++) { 58149bd79ccSDebojyoti Ghosh for (k=0; k<p; k++) { 58249bd79ccSDebojyoti Ghosh for (l=0; l<q; l++) { 58349bd79ccSDebojyoti Ghosh sums[k] += v[jrow+j]*t[k+l*p]*x[q*idx[jrow+j]+l]; 58449bd79ccSDebojyoti Ghosh } 58549bd79ccSDebojyoti Ghosh } 58649bd79ccSDebojyoti Ghosh } 58749bd79ccSDebojyoti Ghosh } 58849bd79ccSDebojyoti Ghosh } 58949bd79ccSDebojyoti Ghosh if (s) { 59049bd79ccSDebojyoti Ghosh for (i=0; i<m; i++) { 59149bd79ccSDebojyoti Ghosh sums = y + p*i; 59249bd79ccSDebojyoti Ghosh bx = x + q*i; 59349bd79ccSDebojyoti Ghosh if (i < b->AIJ->cmap->n) { 59449bd79ccSDebojyoti Ghosh for (j=0; j<q; j++) { 59549bd79ccSDebojyoti Ghosh for (k=0; k<p; k++) { 59649bd79ccSDebojyoti Ghosh sums[k] += s[k+j*p]*bx[j]; 59749bd79ccSDebojyoti Ghosh } 59849bd79ccSDebojyoti Ghosh } 59949bd79ccSDebojyoti Ghosh } 60049bd79ccSDebojyoti Ghosh } 60149bd79ccSDebojyoti Ghosh } 60249bd79ccSDebojyoti Ghosh 60349bd79ccSDebojyoti Ghosh ierr = PetscLogFlops((2.0*p*q-p)*m+2*p*a->nz);CHKERRQ(ierr); 60449bd79ccSDebojyoti Ghosh ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 60549bd79ccSDebojyoti Ghosh ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 60649bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 60749bd79ccSDebojyoti Ghosh } 60849bd79ccSDebojyoti Ghosh 609*bb6fb833SRichard Tran Mills PetscErrorCode MatMult_SeqKAIJ(Mat A,Vec xx,Vec yy) 61049bd79ccSDebojyoti Ghosh { 61149bd79ccSDebojyoti Ghosh PetscErrorCode ierr; 61249bd79ccSDebojyoti Ghosh PetscFunctionBegin; 613836168d5SRichard Tran Mills ierr = MatMultAdd_SeqKAIJ(A,xx,PETSC_NULL,yy);CHKERRQ(ierr); 61449bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 61549bd79ccSDebojyoti Ghosh } 61649bd79ccSDebojyoti Ghosh 61749bd79ccSDebojyoti Ghosh #include <petsc/private/kernels/blockinvert.h> 61849bd79ccSDebojyoti Ghosh 619*bb6fb833SRichard Tran Mills PetscErrorCode MatInvertBlockDiagonal_SeqKAIJ(Mat A,const PetscScalar **values) 62049bd79ccSDebojyoti Ghosh { 62149bd79ccSDebojyoti Ghosh Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 62249bd79ccSDebojyoti Ghosh Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 62349bd79ccSDebojyoti Ghosh const PetscScalar *S = b->S; 62449bd79ccSDebojyoti Ghosh const PetscScalar *T = b->T; 62549bd79ccSDebojyoti Ghosh const PetscScalar *v = a->a; 62649bd79ccSDebojyoti Ghosh const PetscInt p = b->p, q = b->q, m = b->AIJ->rmap->n, *idx = a->j, *ii = a->i; 62749bd79ccSDebojyoti Ghosh PetscErrorCode ierr; 62849bd79ccSDebojyoti Ghosh PetscInt i,j,*v_pivots,dof,dof2; 62949bd79ccSDebojyoti Ghosh PetscScalar *diag,aval,*v_work; 63049bd79ccSDebojyoti Ghosh 63149bd79ccSDebojyoti Ghosh PetscFunctionBegin; 63249bd79ccSDebojyoti Ghosh if (p != q) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Block size must be square to calculate inverse."); 63331a97b9aSRichard Tran Mills if ((!S) && (!T) && (!b->isTI)) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Cannot invert a zero matrix."); 63449bd79ccSDebojyoti Ghosh 63549bd79ccSDebojyoti Ghosh dof = p; 63649bd79ccSDebojyoti Ghosh dof2 = dof*dof; 63749bd79ccSDebojyoti Ghosh 63849bd79ccSDebojyoti Ghosh if (b->ibdiagvalid) { 63949bd79ccSDebojyoti Ghosh if (values) *values = b->ibdiag; 64049bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 64149bd79ccSDebojyoti Ghosh } 64249bd79ccSDebojyoti Ghosh if (!b->ibdiag) { 643a84f8069SRichard Tran Mills ierr = PetscMalloc1(dof2*m*sizeof(PetscScalar),&b->ibdiag);CHKERRQ(ierr); 64449bd79ccSDebojyoti Ghosh ierr = PetscLogObjectMemory((PetscObject)A,dof2*m*sizeof(PetscScalar));CHKERRQ(ierr); 64549bd79ccSDebojyoti Ghosh } 64649bd79ccSDebojyoti Ghosh if (values) *values = b->ibdiag; 64749bd79ccSDebojyoti Ghosh diag = b->ibdiag; 64849bd79ccSDebojyoti Ghosh 64949bd79ccSDebojyoti Ghosh ierr = PetscMalloc2(dof,&v_work,dof,&v_pivots);CHKERRQ(ierr); 65049bd79ccSDebojyoti Ghosh for (i=0; i<m; i++) { 65149bd79ccSDebojyoti Ghosh if (S) { 65249bd79ccSDebojyoti Ghosh ierr = PetscMemcpy(diag,S,dof2*sizeof(PetscScalar));CHKERRQ(ierr); 65349bd79ccSDebojyoti Ghosh } else { 65449bd79ccSDebojyoti Ghosh ierr = PetscMemzero(diag,dof2*sizeof(PetscScalar));CHKERRQ(ierr); 65549bd79ccSDebojyoti Ghosh } 65649bd79ccSDebojyoti Ghosh if (b->isTI) { 65749bd79ccSDebojyoti Ghosh aval = 0; 65849bd79ccSDebojyoti Ghosh for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j]; 65949bd79ccSDebojyoti Ghosh for (j=0; j<dof; j++) diag[j+dof*j] += aval; 66049bd79ccSDebojyoti Ghosh } else if (T) { 66149bd79ccSDebojyoti Ghosh aval = 0; 66249bd79ccSDebojyoti Ghosh for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j]; 66349bd79ccSDebojyoti Ghosh for (j=0; j<dof2; j++) diag[j] += aval*T[j]; 66449bd79ccSDebojyoti Ghosh } 66549bd79ccSDebojyoti Ghosh ierr = PetscKernel_A_gets_inverse_A(dof,diag,v_pivots,v_work,PETSC_FALSE,NULL);CHKERRQ(ierr); 66649bd79ccSDebojyoti Ghosh diag += dof2; 66749bd79ccSDebojyoti Ghosh } 66849bd79ccSDebojyoti Ghosh ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr); 66949bd79ccSDebojyoti Ghosh 67049bd79ccSDebojyoti Ghosh b->ibdiagvalid = PETSC_TRUE; 67149bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 67249bd79ccSDebojyoti Ghosh } 67349bd79ccSDebojyoti Ghosh 67449bd79ccSDebojyoti Ghosh static PetscErrorCode MatGetDiagonalBlock_MPIKAIJ(Mat A,Mat *B) 67549bd79ccSDebojyoti Ghosh { 67649bd79ccSDebojyoti Ghosh Mat_MPIKAIJ *kaij = (Mat_MPIKAIJ*) A->data; 67749bd79ccSDebojyoti Ghosh 67849bd79ccSDebojyoti Ghosh PetscFunctionBegin; 67949bd79ccSDebojyoti Ghosh *B = kaij->AIJ; 68049bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 68149bd79ccSDebojyoti Ghosh } 68249bd79ccSDebojyoti Ghosh 68349bd79ccSDebojyoti Ghosh PetscErrorCode MatSOR_SeqKAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 68449bd79ccSDebojyoti Ghosh { 68549bd79ccSDebojyoti Ghosh PetscErrorCode ierr; 68649bd79ccSDebojyoti Ghosh Mat_SeqKAIJ *kaij = (Mat_SeqKAIJ*) A->data; 68749bd79ccSDebojyoti Ghosh Mat_SeqAIJ *a = (Mat_SeqAIJ*)kaij->AIJ->data; 68849bd79ccSDebojyoti Ghosh const PetscScalar *aa = a->a, *T = kaij->T, *v; 68949bd79ccSDebojyoti Ghosh const PetscInt m = kaij->AIJ->rmap->n, *ai=a->i, *aj=a->j, p = kaij->p, q = kaij->q, *diag, *vi; 69049bd79ccSDebojyoti Ghosh const PetscScalar *b, *xb, *idiag; 69149bd79ccSDebojyoti Ghosh PetscScalar *x, *work, *workt, *w, *y, *arr, *t, *arrt; 69249bd79ccSDebojyoti Ghosh PetscInt i, j, k, i2, bs, bs2, nz; 69349bd79ccSDebojyoti Ghosh 69449bd79ccSDebojyoti Ghosh PetscFunctionBegin; 69549bd79ccSDebojyoti Ghosh its = its*lits; 69649bd79ccSDebojyoti Ghosh if (flag & SOR_EISENSTAT) SETERRQ (PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 69749bd79ccSDebojyoti Ghosh if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 69849bd79ccSDebojyoti Ghosh if (fshift) SETERRQ (PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 69931a97b9aSRichard Tran Mills if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 70049bd79ccSDebojyoti Ghosh if (p != q) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSOR for KAIJ: Sorry, no support for non-square dense blocks"); 70149bd79ccSDebojyoti Ghosh else {bs = p; bs2 = bs*bs; } 70249bd79ccSDebojyoti Ghosh 70349bd79ccSDebojyoti Ghosh if (!m) PetscFunctionReturn(0); 70449bd79ccSDebojyoti Ghosh 705*bb6fb833SRichard Tran Mills if (!kaij->ibdiagvalid) { ierr = MatInvertBlockDiagonal_SeqKAIJ(A,NULL);CHKERRQ(ierr); } 70649bd79ccSDebojyoti Ghosh idiag = kaij->ibdiag; 70749bd79ccSDebojyoti Ghosh diag = a->diag; 70849bd79ccSDebojyoti Ghosh 70949bd79ccSDebojyoti Ghosh if (!kaij->sor.setup) { 71049bd79ccSDebojyoti Ghosh ierr = PetscMalloc5(bs,&kaij->sor.w,bs,&kaij->sor.y,m*bs,&kaij->sor.work,m*bs,&kaij->sor.t,m*bs2,&kaij->sor.arr);CHKERRQ(ierr); 71149bd79ccSDebojyoti Ghosh kaij->sor.setup = PETSC_TRUE; 71249bd79ccSDebojyoti Ghosh } 71349bd79ccSDebojyoti Ghosh y = kaij->sor.y; 71449bd79ccSDebojyoti Ghosh w = kaij->sor.w; 71549bd79ccSDebojyoti Ghosh work = kaij->sor.work; 71649bd79ccSDebojyoti Ghosh t = kaij->sor.t; 71749bd79ccSDebojyoti Ghosh arr = kaij->sor.arr; 71849bd79ccSDebojyoti Ghosh 71949bd79ccSDebojyoti Ghosh ierr = VecGetArray(xx,&x); CHKERRQ(ierr); 72049bd79ccSDebojyoti Ghosh ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 72149bd79ccSDebojyoti Ghosh 72249bd79ccSDebojyoti Ghosh if (flag & SOR_ZERO_INITIAL_GUESS) { 72349bd79ccSDebojyoti Ghosh if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 72449bd79ccSDebojyoti Ghosh PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x); /* x[0:bs] <- D^{-1} b[0:bs] */ 72549bd79ccSDebojyoti Ghosh ierr = PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr); 72649bd79ccSDebojyoti Ghosh i2 = bs; 72749bd79ccSDebojyoti Ghosh idiag += bs2; 72849bd79ccSDebojyoti Ghosh for (i=1; i<m; i++) { 72949bd79ccSDebojyoti Ghosh v = aa + ai[i]; 73049bd79ccSDebojyoti Ghosh vi = aj + ai[i]; 73149bd79ccSDebojyoti Ghosh nz = diag[i] - ai[i]; 73249bd79ccSDebojyoti Ghosh 73349bd79ccSDebojyoti Ghosh if (T) { /* b - T (Arow * x) */ 73449bd79ccSDebojyoti Ghosh for (k=0; k<bs; k++) w[k] = 0; 73549bd79ccSDebojyoti Ghosh for (j=0; j<nz; j++) { 73649bd79ccSDebojyoti Ghosh for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k]; 73749bd79ccSDebojyoti Ghosh } 73849bd79ccSDebojyoti Ghosh PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs,w,T,&t[i2]); 73949bd79ccSDebojyoti Ghosh for (k=0; k<bs; k++) t[i2+k] += b[i2+k]; 74049bd79ccSDebojyoti Ghosh } else if (kaij->isTI) { 74149bd79ccSDebojyoti Ghosh for (k=0; k<bs; k++) t[i2+k] = b[i2+k]; 74249bd79ccSDebojyoti Ghosh for (j=0; j<nz; j++) { 74349bd79ccSDebojyoti Ghosh for (k=0; k<bs; k++) t[i2+k] -= v[j] * x[vi[j]*bs+k]; 74449bd79ccSDebojyoti Ghosh } 74549bd79ccSDebojyoti Ghosh } else { 74649bd79ccSDebojyoti Ghosh for (k=0; k<bs; k++) t[i2+k] = b[i2+k]; 74749bd79ccSDebojyoti Ghosh } 74849bd79ccSDebojyoti Ghosh 74949bd79ccSDebojyoti Ghosh PetscKernel_w_gets_Ar_times_v(bs,bs,t+i2,idiag,y); 75049bd79ccSDebojyoti Ghosh for (j=0; j<bs; j++) x[i2+j] = omega * y[j]; 75149bd79ccSDebojyoti Ghosh 75249bd79ccSDebojyoti Ghosh idiag += bs2; 75349bd79ccSDebojyoti Ghosh i2 += bs; 75449bd79ccSDebojyoti Ghosh } 75549bd79ccSDebojyoti Ghosh /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 75649bd79ccSDebojyoti Ghosh ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr); 75749bd79ccSDebojyoti Ghosh xb = t; 75849bd79ccSDebojyoti Ghosh } else xb = b; 75949bd79ccSDebojyoti Ghosh if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 76049bd79ccSDebojyoti Ghosh idiag = kaij->ibdiag+bs2*(m-1); 76149bd79ccSDebojyoti Ghosh i2 = bs * (m-1); 76249bd79ccSDebojyoti Ghosh ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 76349bd79ccSDebojyoti Ghosh PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 76449bd79ccSDebojyoti Ghosh i2 -= bs; 76549bd79ccSDebojyoti Ghosh idiag -= bs2; 76649bd79ccSDebojyoti Ghosh for (i=m-2; i>=0; i--) { 76749bd79ccSDebojyoti Ghosh v = aa + diag[i] + 1 ; 76849bd79ccSDebojyoti Ghosh vi = aj + diag[i] + 1; 76949bd79ccSDebojyoti Ghosh nz = ai[i+1] - diag[i] - 1; 77049bd79ccSDebojyoti Ghosh 77149bd79ccSDebojyoti Ghosh if (T) { /* FIXME: This branch untested */ 77249bd79ccSDebojyoti Ghosh ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 77349bd79ccSDebojyoti Ghosh /* copy all rows of x that are needed into contiguous space */ 77449bd79ccSDebojyoti Ghosh workt = work; 77549bd79ccSDebojyoti Ghosh for (j=0; j<nz; j++) { 77649bd79ccSDebojyoti Ghosh ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 77749bd79ccSDebojyoti Ghosh workt += bs; 77849bd79ccSDebojyoti Ghosh } 77949bd79ccSDebojyoti Ghosh arrt = arr; 78049bd79ccSDebojyoti Ghosh for (j=0; j<nz; j++) { 78149bd79ccSDebojyoti Ghosh ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 78249bd79ccSDebojyoti Ghosh for (k=0; k<bs2; k++) arrt[k] *= v[j]; 78349bd79ccSDebojyoti Ghosh arrt += bs2; 78449bd79ccSDebojyoti Ghosh } 78549bd79ccSDebojyoti Ghosh PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 78649bd79ccSDebojyoti Ghosh } else if (kaij->isTI) { 78749bd79ccSDebojyoti Ghosh for (k=0; k<bs; k++) w[k] = t[i2+k]; 78849bd79ccSDebojyoti Ghosh for (j=0; j<nz; j++) { 78949bd79ccSDebojyoti Ghosh for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k]; 79049bd79ccSDebojyoti Ghosh } 79149bd79ccSDebojyoti Ghosh } 79249bd79ccSDebojyoti Ghosh 79349bd79ccSDebojyoti Ghosh PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); /* RHS incorrect for omega != 1.0 */ 79449bd79ccSDebojyoti Ghosh for (j=0; j<bs; j++) x[i2+j] = (1.0-omega) * x[i2+j] + omega * y[j]; 79549bd79ccSDebojyoti Ghosh 79649bd79ccSDebojyoti Ghosh idiag -= bs2; 79749bd79ccSDebojyoti Ghosh i2 -= bs; 79849bd79ccSDebojyoti Ghosh } 79949bd79ccSDebojyoti Ghosh ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 80049bd79ccSDebojyoti Ghosh } 80149bd79ccSDebojyoti Ghosh its--; 80249bd79ccSDebojyoti Ghosh } 80349bd79ccSDebojyoti Ghosh while (its--) { /* FIXME: This branch not updated */ 80449bd79ccSDebojyoti Ghosh if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 80549bd79ccSDebojyoti Ghosh i2 = 0; 80649bd79ccSDebojyoti Ghosh idiag = kaij->ibdiag; 80749bd79ccSDebojyoti Ghosh for (i=0; i<m; i++) { 80849bd79ccSDebojyoti Ghosh ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 80949bd79ccSDebojyoti Ghosh 81049bd79ccSDebojyoti Ghosh v = aa + ai[i]; 81149bd79ccSDebojyoti Ghosh vi = aj + ai[i]; 81249bd79ccSDebojyoti Ghosh nz = diag[i] - ai[i]; 81349bd79ccSDebojyoti Ghosh workt = work; 81449bd79ccSDebojyoti Ghosh for (j=0; j<nz; j++) { 81549bd79ccSDebojyoti Ghosh ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 81649bd79ccSDebojyoti Ghosh workt += bs; 81749bd79ccSDebojyoti Ghosh } 81849bd79ccSDebojyoti Ghosh arrt = arr; 81949bd79ccSDebojyoti Ghosh if (T) { 82049bd79ccSDebojyoti Ghosh for (j=0; j<nz; j++) { 82149bd79ccSDebojyoti Ghosh ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 82249bd79ccSDebojyoti Ghosh for (k=0; k<bs2; k++) arrt[k] *= v[j]; 82349bd79ccSDebojyoti Ghosh arrt += bs2; 82449bd79ccSDebojyoti Ghosh } 82549bd79ccSDebojyoti Ghosh PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 82649bd79ccSDebojyoti Ghosh } else if (kaij->isTI) { 82749bd79ccSDebojyoti Ghosh for (j=0; j<nz; j++) { 82849bd79ccSDebojyoti Ghosh ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 82949bd79ccSDebojyoti Ghosh for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 83049bd79ccSDebojyoti Ghosh arrt += bs2; 83149bd79ccSDebojyoti Ghosh } 83249bd79ccSDebojyoti Ghosh PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 83349bd79ccSDebojyoti Ghosh } 83449bd79ccSDebojyoti Ghosh ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr); 83549bd79ccSDebojyoti Ghosh 83649bd79ccSDebojyoti Ghosh v = aa + diag[i] + 1; 83749bd79ccSDebojyoti Ghosh vi = aj + diag[i] + 1; 83849bd79ccSDebojyoti Ghosh nz = ai[i+1] - diag[i] - 1; 83949bd79ccSDebojyoti Ghosh workt = work; 84049bd79ccSDebojyoti Ghosh for (j=0; j<nz; j++) { 84149bd79ccSDebojyoti Ghosh ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 84249bd79ccSDebojyoti Ghosh workt += bs; 84349bd79ccSDebojyoti Ghosh } 84449bd79ccSDebojyoti Ghosh arrt = arr; 84549bd79ccSDebojyoti Ghosh if (T) { 84649bd79ccSDebojyoti Ghosh for (j=0; j<nz; j++) { 84749bd79ccSDebojyoti Ghosh ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 84849bd79ccSDebojyoti Ghosh for (k=0; k<bs2; k++) arrt[k] *= v[j]; 84949bd79ccSDebojyoti Ghosh arrt += bs2; 85049bd79ccSDebojyoti Ghosh } 85149bd79ccSDebojyoti Ghosh PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 85249bd79ccSDebojyoti Ghosh } else if (kaij->isTI) { 85349bd79ccSDebojyoti Ghosh for (j=0; j<nz; j++) { 85449bd79ccSDebojyoti Ghosh ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 85549bd79ccSDebojyoti Ghosh for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 85649bd79ccSDebojyoti Ghosh arrt += bs2; 85749bd79ccSDebojyoti Ghosh } 85849bd79ccSDebojyoti Ghosh PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 85949bd79ccSDebojyoti Ghosh } 86049bd79ccSDebojyoti Ghosh 86149bd79ccSDebojyoti Ghosh PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); 86249bd79ccSDebojyoti Ghosh for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j); 86349bd79ccSDebojyoti Ghosh 86449bd79ccSDebojyoti Ghosh idiag += bs2; 86549bd79ccSDebojyoti Ghosh i2 += bs; 86649bd79ccSDebojyoti Ghosh } 86749bd79ccSDebojyoti Ghosh xb = t; 86849bd79ccSDebojyoti Ghosh } 86949bd79ccSDebojyoti Ghosh else xb = b; 87049bd79ccSDebojyoti Ghosh if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 87149bd79ccSDebojyoti Ghosh idiag = kaij->ibdiag+bs2*(m-1); 87249bd79ccSDebojyoti Ghosh i2 = bs * (m-1); 87349bd79ccSDebojyoti Ghosh if (xb == b) { 87449bd79ccSDebojyoti Ghosh for (i=m-1; i>=0; i--) { 87549bd79ccSDebojyoti Ghosh ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 87649bd79ccSDebojyoti Ghosh 87749bd79ccSDebojyoti Ghosh v = aa + ai[i]; 87849bd79ccSDebojyoti Ghosh vi = aj + ai[i]; 87949bd79ccSDebojyoti Ghosh nz = diag[i] - ai[i]; 88049bd79ccSDebojyoti Ghosh workt = work; 88149bd79ccSDebojyoti Ghosh for (j=0; j<nz; j++) { 88249bd79ccSDebojyoti Ghosh ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 88349bd79ccSDebojyoti Ghosh workt += bs; 88449bd79ccSDebojyoti Ghosh } 88549bd79ccSDebojyoti Ghosh arrt = arr; 88649bd79ccSDebojyoti Ghosh if (T) { 88749bd79ccSDebojyoti Ghosh for (j=0; j<nz; j++) { 88849bd79ccSDebojyoti Ghosh ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 88949bd79ccSDebojyoti Ghosh for (k=0; k<bs2; k++) arrt[k] *= v[j]; 89049bd79ccSDebojyoti Ghosh arrt += bs2; 89149bd79ccSDebojyoti Ghosh } 89249bd79ccSDebojyoti Ghosh PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 89349bd79ccSDebojyoti Ghosh } else if (kaij->isTI) { 89449bd79ccSDebojyoti Ghosh for (j=0; j<nz; j++) { 89549bd79ccSDebojyoti Ghosh ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 89649bd79ccSDebojyoti Ghosh for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 89749bd79ccSDebojyoti Ghosh arrt += bs2; 89849bd79ccSDebojyoti Ghosh } 89949bd79ccSDebojyoti Ghosh PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 90049bd79ccSDebojyoti Ghosh } 90149bd79ccSDebojyoti Ghosh 90249bd79ccSDebojyoti Ghosh v = aa + diag[i] + 1; 90349bd79ccSDebojyoti Ghosh vi = aj + diag[i] + 1; 90449bd79ccSDebojyoti Ghosh nz = ai[i+1] - diag[i] - 1; 90549bd79ccSDebojyoti Ghosh workt = work; 90649bd79ccSDebojyoti Ghosh for (j=0; j<nz; j++) { 90749bd79ccSDebojyoti Ghosh ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 90849bd79ccSDebojyoti Ghosh workt += bs; 90949bd79ccSDebojyoti Ghosh } 91049bd79ccSDebojyoti Ghosh arrt = arr; 91149bd79ccSDebojyoti Ghosh if (T) { 91249bd79ccSDebojyoti Ghosh for (j=0; j<nz; j++) { 91349bd79ccSDebojyoti Ghosh ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 91449bd79ccSDebojyoti Ghosh for (k=0; k<bs2; k++) arrt[k] *= v[j]; 91549bd79ccSDebojyoti Ghosh arrt += bs2; 91649bd79ccSDebojyoti Ghosh } 91749bd79ccSDebojyoti Ghosh PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 91849bd79ccSDebojyoti Ghosh } else if (kaij->isTI) { 91949bd79ccSDebojyoti Ghosh for (j=0; j<nz; j++) { 92049bd79ccSDebojyoti Ghosh ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 92149bd79ccSDebojyoti Ghosh for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 92249bd79ccSDebojyoti Ghosh arrt += bs2; 92349bd79ccSDebojyoti Ghosh } 92449bd79ccSDebojyoti Ghosh PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 92549bd79ccSDebojyoti Ghosh } 92649bd79ccSDebojyoti Ghosh 92749bd79ccSDebojyoti Ghosh PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); 92849bd79ccSDebojyoti Ghosh for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j); 92949bd79ccSDebojyoti Ghosh } 93049bd79ccSDebojyoti Ghosh } else { 93149bd79ccSDebojyoti Ghosh for (i=m-1; i>=0; i--) { 93249bd79ccSDebojyoti Ghosh ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 93349bd79ccSDebojyoti Ghosh v = aa + diag[i] + 1; 93449bd79ccSDebojyoti Ghosh vi = aj + diag[i] + 1; 93549bd79ccSDebojyoti Ghosh nz = ai[i+1] - diag[i] - 1; 93649bd79ccSDebojyoti Ghosh workt = work; 93749bd79ccSDebojyoti Ghosh for (j=0; j<nz; j++) { 93849bd79ccSDebojyoti Ghosh ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 93949bd79ccSDebojyoti Ghosh workt += bs; 94049bd79ccSDebojyoti Ghosh } 94149bd79ccSDebojyoti Ghosh arrt = arr; 94249bd79ccSDebojyoti Ghosh if (T) { 94349bd79ccSDebojyoti Ghosh for (j=0; j<nz; j++) { 94449bd79ccSDebojyoti Ghosh ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 94549bd79ccSDebojyoti Ghosh for (k=0; k<bs2; k++) arrt[k] *= v[j]; 94649bd79ccSDebojyoti Ghosh arrt += bs2; 94749bd79ccSDebojyoti Ghosh } 94849bd79ccSDebojyoti Ghosh PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 94949bd79ccSDebojyoti Ghosh } else if (kaij->isTI) { 95049bd79ccSDebojyoti Ghosh for (j=0; j<nz; j++) { 95149bd79ccSDebojyoti Ghosh ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 95249bd79ccSDebojyoti Ghosh for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 95349bd79ccSDebojyoti Ghosh arrt += bs2; 95449bd79ccSDebojyoti Ghosh } 95549bd79ccSDebojyoti Ghosh PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 95649bd79ccSDebojyoti Ghosh } 95749bd79ccSDebojyoti Ghosh PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); 95849bd79ccSDebojyoti Ghosh for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j); 95949bd79ccSDebojyoti Ghosh } 96049bd79ccSDebojyoti Ghosh idiag -= bs2; 96149bd79ccSDebojyoti Ghosh i2 -= bs; 96249bd79ccSDebojyoti Ghosh } 96349bd79ccSDebojyoti Ghosh ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 96449bd79ccSDebojyoti Ghosh } 96549bd79ccSDebojyoti Ghosh } 96649bd79ccSDebojyoti Ghosh 96749bd79ccSDebojyoti Ghosh ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr); 96849bd79ccSDebojyoti Ghosh ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 96949bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 97049bd79ccSDebojyoti Ghosh } 97149bd79ccSDebojyoti Ghosh 97249bd79ccSDebojyoti Ghosh /*===================================================================================*/ 97349bd79ccSDebojyoti Ghosh 974836168d5SRichard Tran Mills PetscErrorCode MatMultAdd_MPIKAIJ(Mat A,Vec xx,Vec yy,Vec zz) 97549bd79ccSDebojyoti Ghosh { 97649bd79ccSDebojyoti Ghosh Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data; 97749bd79ccSDebojyoti Ghosh PetscErrorCode ierr; 97849bd79ccSDebojyoti Ghosh 97949bd79ccSDebojyoti Ghosh PetscFunctionBegin; 98049bd79ccSDebojyoti Ghosh if (!yy) { 98149bd79ccSDebojyoti Ghosh ierr = VecSet(zz,0.0);CHKERRQ(ierr); 98249bd79ccSDebojyoti Ghosh } else { 98349bd79ccSDebojyoti Ghosh ierr = VecCopy(yy,zz);CHKERRQ(ierr); 98449bd79ccSDebojyoti Ghosh } 98549bd79ccSDebojyoti Ghosh /* start the scatter */ 98649bd79ccSDebojyoti Ghosh ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 98749bd79ccSDebojyoti Ghosh ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,zz,zz);CHKERRQ(ierr); 98849bd79ccSDebojyoti Ghosh ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 98949bd79ccSDebojyoti Ghosh ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr); 99049bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 99149bd79ccSDebojyoti Ghosh } 99249bd79ccSDebojyoti Ghosh 993*bb6fb833SRichard Tran Mills PetscErrorCode MatMult_MPIKAIJ(Mat A,Vec xx,Vec yy) 99449bd79ccSDebojyoti Ghosh { 99549bd79ccSDebojyoti Ghosh PetscErrorCode ierr; 99649bd79ccSDebojyoti Ghosh PetscFunctionBegin; 997836168d5SRichard Tran Mills ierr = MatMultAdd_MPIKAIJ(A,xx,PETSC_NULL,yy);CHKERRQ(ierr); 99849bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 99949bd79ccSDebojyoti Ghosh } 100049bd79ccSDebojyoti Ghosh 1001*bb6fb833SRichard Tran Mills PetscErrorCode MatInvertBlockDiagonal_MPIKAIJ(Mat A,const PetscScalar **values) 100249bd79ccSDebojyoti Ghosh { 100349bd79ccSDebojyoti Ghosh Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data; 100449bd79ccSDebojyoti Ghosh PetscErrorCode ierr; 100549bd79ccSDebojyoti Ghosh 100649bd79ccSDebojyoti Ghosh PetscFunctionBegin; 100749bd79ccSDebojyoti Ghosh ierr = (*b->AIJ->ops->invertblockdiagonal)(b->AIJ,values);CHKERRQ(ierr); 100849bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 100949bd79ccSDebojyoti Ghosh } 101049bd79ccSDebojyoti Ghosh 101149bd79ccSDebojyoti Ghosh /* ----------------------------------------------------------------*/ 101249bd79ccSDebojyoti Ghosh 101349bd79ccSDebojyoti Ghosh PetscErrorCode MatGetRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values) 101449bd79ccSDebojyoti Ghosh { 101549bd79ccSDebojyoti Ghosh Mat_SeqKAIJ *b = (Mat_SeqKAIJ*) A->data; 10161ca5ffdbSRichard Tran Mills PetscErrorCode diag = PETSC_FALSE; 10171ca5ffdbSRichard Tran Mills PetscErrorCode ierr; 101849bd79ccSDebojyoti Ghosh PetscInt nzaij,nz,*colsaij,*idx,i,j,p=b->p,q=b->q,r=row/p,s=row%p,c; 101949bd79ccSDebojyoti Ghosh PetscScalar *vaij,*v,*S=b->S,*T=b->T; 102049bd79ccSDebojyoti Ghosh 102149bd79ccSDebojyoti Ghosh PetscFunctionBegin; 102249bd79ccSDebojyoti Ghosh if (b->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 102349bd79ccSDebojyoti Ghosh b->getrowactive = PETSC_TRUE; 102449bd79ccSDebojyoti Ghosh if (row < 0 || row >= A->rmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range",row); 102549bd79ccSDebojyoti Ghosh 102649bd79ccSDebojyoti Ghosh if ((!S) && (!T) && (!b->isTI)) { 102749bd79ccSDebojyoti Ghosh if (ncols) *ncols = 0; 102849bd79ccSDebojyoti Ghosh if (cols) *cols = NULL; 102949bd79ccSDebojyoti Ghosh if (values) *values = NULL; 103049bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 103149bd79ccSDebojyoti Ghosh } 103249bd79ccSDebojyoti Ghosh 103349bd79ccSDebojyoti Ghosh if (T || b->isTI) { 103449bd79ccSDebojyoti Ghosh ierr = MatGetRow_SeqAIJ(b->AIJ,r,&nzaij,&colsaij,&vaij);CHKERRQ(ierr); 103549bd79ccSDebojyoti Ghosh c = nzaij; 103649bd79ccSDebojyoti Ghosh for (i=0; i<nzaij; i++) { 103749bd79ccSDebojyoti Ghosh /* check if this row contains a diagonal entry */ 103849bd79ccSDebojyoti Ghosh if (colsaij[i] == r) { 103949bd79ccSDebojyoti Ghosh diag = PETSC_TRUE; 104049bd79ccSDebojyoti Ghosh c = i; 104149bd79ccSDebojyoti Ghosh } 104249bd79ccSDebojyoti Ghosh } 104349bd79ccSDebojyoti Ghosh } else nzaij = c = 0; 104449bd79ccSDebojyoti Ghosh 104549bd79ccSDebojyoti Ghosh /* calculate size of row */ 104649bd79ccSDebojyoti Ghosh nz = 0; 104749bd79ccSDebojyoti Ghosh if (S) nz += q; 104849bd79ccSDebojyoti Ghosh if (T || b->isTI) nz += (diag && S ? (nzaij-1)*q : nzaij*q); 104949bd79ccSDebojyoti Ghosh 105049bd79ccSDebojyoti Ghosh if (cols || values) { 105149bd79ccSDebojyoti Ghosh ierr = PetscMalloc2(nz,&idx,nz,&v);CHKERRQ(ierr); 105238822f9dSRichard Tran Mills for (i=0; i<q; i++) { 105338822f9dSRichard Tran Mills /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */ 105438822f9dSRichard Tran Mills v[i] = 0.0; 105538822f9dSRichard Tran Mills } 105649bd79ccSDebojyoti Ghosh if (b->isTI) { 105749bd79ccSDebojyoti Ghosh for (i=0; i<nzaij; i++) { 105849bd79ccSDebojyoti Ghosh for (j=0; j<q; j++) { 105949bd79ccSDebojyoti Ghosh idx[i*q+j] = colsaij[i]*q+j; 106049bd79ccSDebojyoti Ghosh v[i*q+j] = (j==s ? vaij[i] : 0); 106149bd79ccSDebojyoti Ghosh } 106249bd79ccSDebojyoti Ghosh } 106349bd79ccSDebojyoti Ghosh } else if (T) { 106449bd79ccSDebojyoti Ghosh for (i=0; i<nzaij; i++) { 106549bd79ccSDebojyoti Ghosh for (j=0; j<q; j++) { 106649bd79ccSDebojyoti Ghosh idx[i*q+j] = colsaij[i]*q+j; 106749bd79ccSDebojyoti Ghosh v[i*q+j] = vaij[i]*T[s+j*p]; 106849bd79ccSDebojyoti Ghosh } 106949bd79ccSDebojyoti Ghosh } 107049bd79ccSDebojyoti Ghosh } 107149bd79ccSDebojyoti Ghosh if (S) { 107249bd79ccSDebojyoti Ghosh for (j=0; j<q; j++) { 107349bd79ccSDebojyoti Ghosh idx[c*q+j] = r*q+j; 107449bd79ccSDebojyoti Ghosh v[c*q+j] += S[s+j*p]; 107549bd79ccSDebojyoti Ghosh } 107649bd79ccSDebojyoti Ghosh } 107749bd79ccSDebojyoti Ghosh } 107849bd79ccSDebojyoti Ghosh 107949bd79ccSDebojyoti Ghosh if (ncols) *ncols = nz; 108049bd79ccSDebojyoti Ghosh if (cols) *cols = idx; 108149bd79ccSDebojyoti Ghosh if (values) *values = v; 108249bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 108349bd79ccSDebojyoti Ghosh } 108449bd79ccSDebojyoti Ghosh 108549bd79ccSDebojyoti Ghosh PetscErrorCode MatRestoreRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 108649bd79ccSDebojyoti Ghosh { 108749bd79ccSDebojyoti Ghosh PetscErrorCode ierr; 108849bd79ccSDebojyoti Ghosh PetscFunctionBegin; 108949bd79ccSDebojyoti Ghosh ierr = PetscFree2(*idx,*v);CHKERRQ(ierr); 109049bd79ccSDebojyoti Ghosh ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE; 109149bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 109249bd79ccSDebojyoti Ghosh } 109349bd79ccSDebojyoti Ghosh 109449bd79ccSDebojyoti Ghosh PetscErrorCode MatGetRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values) 109549bd79ccSDebojyoti Ghosh { 109649bd79ccSDebojyoti Ghosh Mat_MPIKAIJ *b = (Mat_MPIKAIJ*) A->data; 109749bd79ccSDebojyoti Ghosh Mat MatAIJ = ((Mat_SeqKAIJ*)b->AIJ->data)->AIJ; 109849bd79ccSDebojyoti Ghosh Mat MatOAIJ = ((Mat_SeqKAIJ*)b->OAIJ->data)->AIJ; 109949bd79ccSDebojyoti Ghosh Mat AIJ = b->A; 1100fc64b2cfSRichard Tran Mills PetscBool diag = PETSC_FALSE; 110149bd79ccSDebojyoti Ghosh PetscErrorCode ierr; 110249bd79ccSDebojyoti Ghosh const PetscInt rstart=A->rmap->rstart,rend=A->rmap->rend,p=b->p,q=b->q,*garray; 110349bd79ccSDebojyoti Ghosh PetscInt nz,*idx,ncolsaij,ncolsoaij,*colsaij,*colsoaij,r,s,c,i,j,lrow; 110449bd79ccSDebojyoti Ghosh PetscScalar *v,*vals,*ovals,*S=b->S,*T=b->T; 110549bd79ccSDebojyoti Ghosh 110649bd79ccSDebojyoti Ghosh PetscFunctionBegin; 110749bd79ccSDebojyoti Ghosh if (b->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 110849bd79ccSDebojyoti Ghosh b->getrowactive = PETSC_TRUE; 110949bd79ccSDebojyoti Ghosh if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 111049bd79ccSDebojyoti Ghosh lrow = row - rstart; 111149bd79ccSDebojyoti Ghosh 111249bd79ccSDebojyoti Ghosh if ((!S) && (!T) && (!b->isTI)) { 111349bd79ccSDebojyoti Ghosh if (ncols) *ncols = 0; 111449bd79ccSDebojyoti Ghosh if (cols) *cols = NULL; 111549bd79ccSDebojyoti Ghosh if (values) *values = NULL; 111649bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 111749bd79ccSDebojyoti Ghosh } 111849bd79ccSDebojyoti Ghosh 111949bd79ccSDebojyoti Ghosh r = lrow/p; 112049bd79ccSDebojyoti Ghosh s = lrow%p; 112149bd79ccSDebojyoti Ghosh 112249bd79ccSDebojyoti Ghosh if (T || b->isTI) { 112349bd79ccSDebojyoti Ghosh ierr = MatMPIAIJGetSeqAIJ(AIJ,NULL,NULL,&garray); 112449bd79ccSDebojyoti Ghosh ierr = MatGetRow_SeqAIJ(MatAIJ,lrow/p,&ncolsaij,&colsaij,&vals);CHKERRQ(ierr); 112549bd79ccSDebojyoti Ghosh ierr = MatGetRow_SeqAIJ(MatOAIJ,lrow/p,&ncolsoaij,&colsoaij,&ovals);CHKERRQ(ierr); 112649bd79ccSDebojyoti Ghosh 112749bd79ccSDebojyoti Ghosh c = ncolsaij + ncolsoaij; 112849bd79ccSDebojyoti Ghosh for (i=0; i<ncolsaij; i++) { 112949bd79ccSDebojyoti Ghosh /* check if this row contains a diagonal entry */ 113049bd79ccSDebojyoti Ghosh if (colsaij[i] == r) { 113149bd79ccSDebojyoti Ghosh diag = PETSC_TRUE; 113249bd79ccSDebojyoti Ghosh c = i; 113349bd79ccSDebojyoti Ghosh } 113449bd79ccSDebojyoti Ghosh } 113549bd79ccSDebojyoti Ghosh } else c = 0; 113649bd79ccSDebojyoti Ghosh 113749bd79ccSDebojyoti Ghosh /* calculate size of row */ 113849bd79ccSDebojyoti Ghosh nz = 0; 113949bd79ccSDebojyoti Ghosh if (S) nz += q; 114049bd79ccSDebojyoti Ghosh if (T || b->isTI) nz += (diag && S ? (ncolsaij+ncolsoaij-1)*q : (ncolsaij+ncolsoaij)*q); 114149bd79ccSDebojyoti Ghosh 114249bd79ccSDebojyoti Ghosh if (cols || values) { 114349bd79ccSDebojyoti Ghosh ierr = PetscMalloc2(nz,&idx,nz,&v);CHKERRQ(ierr); 1144a437a796SRichard Tran Mills for (i=0; i<q; i++) { 1145a437a796SRichard Tran Mills /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */ 1146a437a796SRichard Tran Mills v[i] = 0.0; 1147a437a796SRichard Tran Mills } 114849bd79ccSDebojyoti Ghosh if (b->isTI) { 114949bd79ccSDebojyoti Ghosh for (i=0; i<ncolsaij; i++) { 115049bd79ccSDebojyoti Ghosh for (j=0; j<q; j++) { 115149bd79ccSDebojyoti Ghosh idx[i*q+j] = (colsaij[i]+rstart/p)*q+j; 115249bd79ccSDebojyoti Ghosh v[i*q+j] = (j==s ? vals[i] : 0.0); 115349bd79ccSDebojyoti Ghosh } 115449bd79ccSDebojyoti Ghosh } 115549bd79ccSDebojyoti Ghosh for (i=0; i<ncolsoaij; i++) { 115649bd79ccSDebojyoti Ghosh for (j=0; j<q; j++) { 115749bd79ccSDebojyoti Ghosh idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j; 115849bd79ccSDebojyoti Ghosh v[(i+ncolsaij)*q+j] = (j==s ? ovals[i]: 0.0); 115949bd79ccSDebojyoti Ghosh } 116049bd79ccSDebojyoti Ghosh } 116149bd79ccSDebojyoti Ghosh } else if (T) { 116249bd79ccSDebojyoti Ghosh for (i=0; i<ncolsaij; i++) { 116349bd79ccSDebojyoti Ghosh for (j=0; j<q; j++) { 116449bd79ccSDebojyoti Ghosh idx[i*q+j] = (colsaij[i]+rstart/p)*q+j; 116549bd79ccSDebojyoti Ghosh v[i*q+j] = vals[i]*T[s+j*p]; 116649bd79ccSDebojyoti Ghosh } 116749bd79ccSDebojyoti Ghosh } 116849bd79ccSDebojyoti Ghosh for (i=0; i<ncolsoaij; i++) { 116949bd79ccSDebojyoti Ghosh for (j=0; j<q; j++) { 117049bd79ccSDebojyoti Ghosh idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j; 117149bd79ccSDebojyoti Ghosh v[(i+ncolsaij)*q+j] = ovals[i]*T[s+j*p]; 117249bd79ccSDebojyoti Ghosh } 117349bd79ccSDebojyoti Ghosh } 117449bd79ccSDebojyoti Ghosh } 117549bd79ccSDebojyoti Ghosh if (S) { 117649bd79ccSDebojyoti Ghosh for (j=0; j<q; j++) { 117749bd79ccSDebojyoti Ghosh idx[c*q+j] = (r+rstart/p)*q+j; 117849bd79ccSDebojyoti Ghosh v[c*q+j] += S[s+j*p]; 117949bd79ccSDebojyoti Ghosh } 118049bd79ccSDebojyoti Ghosh } 118149bd79ccSDebojyoti Ghosh } 118249bd79ccSDebojyoti Ghosh 118349bd79ccSDebojyoti Ghosh if (ncols) *ncols = nz; 118449bd79ccSDebojyoti Ghosh if (cols) *cols = idx; 118549bd79ccSDebojyoti Ghosh if (values) *values = v; 118649bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 118749bd79ccSDebojyoti Ghosh } 118849bd79ccSDebojyoti Ghosh 118949bd79ccSDebojyoti Ghosh PetscErrorCode MatRestoreRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 119049bd79ccSDebojyoti Ghosh { 119149bd79ccSDebojyoti Ghosh PetscErrorCode ierr; 119249bd79ccSDebojyoti Ghosh PetscFunctionBegin; 119349bd79ccSDebojyoti Ghosh ierr = PetscFree2(*idx,*v);CHKERRQ(ierr); 119449bd79ccSDebojyoti Ghosh ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE; 119549bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 119649bd79ccSDebojyoti Ghosh } 119749bd79ccSDebojyoti Ghosh 119849bd79ccSDebojyoti Ghosh PetscErrorCode MatCreateSubMatrix_KAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat) 119949bd79ccSDebojyoti Ghosh { 120049bd79ccSDebojyoti Ghosh PetscErrorCode ierr; 120149bd79ccSDebojyoti Ghosh Mat A; 120249bd79ccSDebojyoti Ghosh 120349bd79ccSDebojyoti Ghosh PetscFunctionBegin; 120449bd79ccSDebojyoti Ghosh ierr = MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr); 120549bd79ccSDebojyoti Ghosh ierr = MatCreateSubMatrix(A,isrow,iscol,cll,newmat);CHKERRQ(ierr); 120649bd79ccSDebojyoti Ghosh ierr = MatDestroy(&A);CHKERRQ(ierr); 120749bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 120849bd79ccSDebojyoti Ghosh } 120949bd79ccSDebojyoti Ghosh 121049bd79ccSDebojyoti Ghosh /* ---------------------------------------------------------------------------------- */ 121149bd79ccSDebojyoti Ghosh /*@C 121249bd79ccSDebojyoti Ghosh MatCreateKAIJ - Creates a matrix type to be used for matrices of the following form: 121349bd79ccSDebojyoti Ghosh 121449bd79ccSDebojyoti Ghosh [I \otimes S + A \otimes T] 121549bd79ccSDebojyoti Ghosh 121649bd79ccSDebojyoti Ghosh where 121749bd79ccSDebojyoti Ghosh S is a dense (p \times q) matrix 121849bd79ccSDebojyoti Ghosh T is a dense (p \times q) matrix 121949bd79ccSDebojyoti Ghosh A is an AIJ (n \times n) matrix 122049bd79ccSDebojyoti Ghosh I is the identity matrix 122149bd79ccSDebojyoti Ghosh The resulting matrix is (np \times nq) 122249bd79ccSDebojyoti Ghosh 122349bd79ccSDebojyoti Ghosh The matrix type is based on MATSEQAIJ for a sequential matrix A, and MATMPIAIJ for a distributed matrix A. 122449bd79ccSDebojyoti Ghosh S is always stored independently on all processes as a PetscScalar array in column-major format. 122549bd79ccSDebojyoti Ghosh 122649bd79ccSDebojyoti Ghosh Collective 122749bd79ccSDebojyoti Ghosh 122849bd79ccSDebojyoti Ghosh Input Parameters: 122949bd79ccSDebojyoti Ghosh + A - the AIJ matrix 123049bd79ccSDebojyoti Ghosh . S - the S matrix, stored as a PetscScalar array (column-major) 123149bd79ccSDebojyoti Ghosh . T - the T matrix, stored as a PetscScalar array (column-major) 123249bd79ccSDebojyoti Ghosh . p - number of rows in S and T 123349bd79ccSDebojyoti Ghosh - q - number of columns in S and T 123449bd79ccSDebojyoti Ghosh 123549bd79ccSDebojyoti Ghosh Output Parameter: 123649bd79ccSDebojyoti Ghosh . kaij - the new KAIJ matrix 123749bd79ccSDebojyoti Ghosh 123849bd79ccSDebojyoti Ghosh Operations provided: 123949bd79ccSDebojyoti Ghosh + MatMult 124049bd79ccSDebojyoti Ghosh . MatMultAdd 124149bd79ccSDebojyoti Ghosh . MatInvertBlockDiagonal 124249bd79ccSDebojyoti Ghosh - MatView 124349bd79ccSDebojyoti Ghosh 124449bd79ccSDebojyoti Ghosh Level: advanced 124549bd79ccSDebojyoti Ghosh 12460567c835SRichard Tran Mills .seealso: MatKAIJSetAIJ(), MatKAIJSetS(), MatKAIJSetT(), MatKAIJGetAIJ(), MatKAIJGetS(), MatKAIJGetT(), MATKAIJ 124749bd79ccSDebojyoti Ghosh @*/ 124849bd79ccSDebojyoti Ghosh PetscErrorCode MatCreateKAIJ(Mat A,PetscInt p,PetscInt q,const PetscScalar S[],const PetscScalar T[],Mat *kaij) 124949bd79ccSDebojyoti Ghosh { 125049bd79ccSDebojyoti Ghosh PetscErrorCode ierr; 125149bd79ccSDebojyoti Ghosh PetscMPIInt size; 125249bd79ccSDebojyoti Ghosh 125349bd79ccSDebojyoti Ghosh PetscFunctionBegin; 12540567c835SRichard Tran Mills ierr = MatCreate(PetscObjectComm((PetscObject)A),kaij);CHKERRQ(ierr); 125549bd79ccSDebojyoti Ghosh ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 125649bd79ccSDebojyoti Ghosh if (size == 1) { 12570567c835SRichard Tran Mills ierr = MatSetType(*kaij,MATSEQKAIJ);CHKERRQ(ierr); 125849bd79ccSDebojyoti Ghosh } else { 12590567c835SRichard Tran Mills ierr = MatSetType(*kaij,MATMPIKAIJ);CHKERRQ(ierr); 126049bd79ccSDebojyoti Ghosh } 12610567c835SRichard Tran Mills ierr = MatKAIJSetAIJ(*kaij,A);CHKERRQ(ierr); 12620567c835SRichard Tran Mills ierr = MatKAIJSetS(*kaij,p,q,S);CHKERRQ(ierr); 12630567c835SRichard Tran Mills ierr = MatKAIJSetT(*kaij,p,q,T);CHKERRQ(ierr); 12640567c835SRichard Tran Mills ierr = MatSetUp(*kaij); 12650567c835SRichard Tran Mills PetscFunctionReturn(0); 12660567c835SRichard Tran Mills } 126749bd79ccSDebojyoti Ghosh 12680567c835SRichard Tran Mills /*MC 12690567c835SRichard Tran Mills MATKAIJ - MATKAIJ = "kaij" - A matrix type to be used to evaluate matrices of the following form: 12700567c835SRichard Tran Mills 12710567c835SRichard Tran Mills [I \otimes S + A \otimes T] 12720567c835SRichard Tran Mills 12730567c835SRichard Tran Mills where 12740567c835SRichard Tran Mills S is a dense (p \times q) matrix 12750567c835SRichard Tran Mills T is a dense (p \times q) matrix 12760567c835SRichard Tran Mills A is an AIJ (n \times n) matrix 12770567c835SRichard Tran Mills I is the identity matrix 12780567c835SRichard Tran Mills The resulting matrix is (np \times nq) 12790567c835SRichard Tran Mills 12800567c835SRichard Tran Mills The matrix type is based on MATSEQAIJ for a sequential matrix A, and MATMPIAIJ for a distributed matrix A. 12810567c835SRichard Tran Mills S and T are always stored independently on all processes as a PetscScalar array in column-major format. 12820567c835SRichard Tran Mills 12830567c835SRichard Tran Mills Operations provided: 12840567c835SRichard Tran Mills + MatMult 12850567c835SRichard Tran Mills . MatMultAdd 12860567c835SRichard Tran Mills . MatInvertBlockDiagonal 12870567c835SRichard Tran Mills - MatView 12880567c835SRichard Tran Mills 12890567c835SRichard Tran Mills Level: advanced 12900567c835SRichard Tran Mills 12910567c835SRichard Tran Mills .seealso: MatKAIJSetAIJ(), MatKAIJSetS(), MatKAIJSetT(), MatKAIJGetAIJ(), MatKAIJGetS(), MatKAIJGetT(), MatCreateKAIJ() 12920567c835SRichard Tran Mills M*/ 12930567c835SRichard Tran Mills 12940567c835SRichard Tran Mills PETSC_EXTERN PetscErrorCode MatCreate_KAIJ(Mat A) 12950567c835SRichard Tran Mills { 12960567c835SRichard Tran Mills PetscErrorCode ierr; 12970567c835SRichard Tran Mills Mat_MPIKAIJ *b; 12980567c835SRichard Tran Mills PetscMPIInt size; 12990567c835SRichard Tran Mills 13000567c835SRichard Tran Mills PetscFunctionBegin; 13010567c835SRichard Tran Mills ierr = PetscNewLog(A,&b);CHKERRQ(ierr); 13020567c835SRichard Tran Mills A->data = (void*)b; 13030567c835SRichard Tran Mills 13040567c835SRichard Tran Mills ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 13050567c835SRichard Tran Mills 13060567c835SRichard Tran Mills A->ops->setup = MatSetUp_KAIJ; 13070567c835SRichard Tran Mills 13080567c835SRichard Tran Mills b->w = 0; 13090567c835SRichard Tran Mills ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 13100567c835SRichard Tran Mills if (size == 1) { 13110567c835SRichard Tran Mills ierr = PetscObjectChangeTypeName((PetscObject)A,MATSEQKAIJ);CHKERRQ(ierr); 13120567c835SRichard Tran Mills A->ops->setup = MatSetUp_KAIJ; 13130567c835SRichard Tran Mills A->ops->destroy = MatDestroy_SeqKAIJ; 13140567c835SRichard Tran Mills A->ops->view = MatView_SeqKAIJ; 1315*bb6fb833SRichard Tran Mills A->ops->mult = MatMult_SeqKAIJ; 1316*bb6fb833SRichard Tran Mills A->ops->multadd = MatMultAdd_SeqKAIJ; 1317*bb6fb833SRichard Tran Mills A->ops->invertblockdiagonal = MatInvertBlockDiagonal_SeqKAIJ; 13180567c835SRichard Tran Mills A->ops->getrow = MatGetRow_SeqKAIJ; 13190567c835SRichard Tran Mills A->ops->restorerow = MatRestoreRow_SeqKAIJ; 13200567c835SRichard Tran Mills A->ops->sor = MatSOR_SeqKAIJ; 13210567c835SRichard Tran Mills } else { 13220567c835SRichard Tran Mills ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIKAIJ);CHKERRQ(ierr); 13230567c835SRichard Tran Mills A->ops->setup = MatSetUp_KAIJ; 13240567c835SRichard Tran Mills A->ops->destroy = MatDestroy_MPIKAIJ; 13250567c835SRichard Tran Mills A->ops->view = MatView_MPIKAIJ; 1326*bb6fb833SRichard Tran Mills A->ops->mult = MatMult_MPIKAIJ; 1327*bb6fb833SRichard Tran Mills A->ops->multadd = MatMultAdd_MPIKAIJ; 1328*bb6fb833SRichard Tran Mills A->ops->invertblockdiagonal = MatInvertBlockDiagonal_MPIKAIJ; 13290567c835SRichard Tran Mills A->ops->getrow = MatGetRow_MPIKAIJ; 13300567c835SRichard Tran Mills A->ops->restorerow = MatRestoreRow_MPIKAIJ; 13310567c835SRichard Tran Mills ierr = PetscObjectComposeFunction((PetscObject)A,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIKAIJ);CHKERRQ(ierr); 13320567c835SRichard Tran Mills } 13330567c835SRichard Tran Mills A->ops->createsubmatrix = MatCreateSubMatrix_KAIJ; 133449bd79ccSDebojyoti Ghosh PetscFunctionReturn(0); 133549bd79ccSDebojyoti Ghosh } 1336