xref: /petsc/src/mat/impls/kaij/kaij.c (revision e6985dafcb1cbbb98fec7ef672f5d5c74f095d0e)
149bd79ccSDebojyoti Ghosh 
249bd79ccSDebojyoti Ghosh /*
349bd79ccSDebojyoti Ghosh   Defines the basic matrix operations for the KAIJ  matrix storage format.
449bd79ccSDebojyoti Ghosh   This format is used to evaluate matrices of the form:
549bd79ccSDebojyoti Ghosh 
649bd79ccSDebojyoti Ghosh     [I \otimes S + A \otimes T]
749bd79ccSDebojyoti Ghosh 
849bd79ccSDebojyoti Ghosh   where
949bd79ccSDebojyoti Ghosh     S is a dense (p \times q) matrix
1049bd79ccSDebojyoti Ghosh     T is a dense (p \times q) matrix
1149bd79ccSDebojyoti Ghosh     A is an AIJ  (n \times n) matrix
1249bd79ccSDebojyoti Ghosh     I is the identity matrix
1349bd79ccSDebojyoti Ghosh 
1449bd79ccSDebojyoti Ghosh   The resulting matrix is (np \times nq)
1549bd79ccSDebojyoti Ghosh 
1649bd79ccSDebojyoti Ghosh   We provide:
1749bd79ccSDebojyoti Ghosh      MatMult()
1849bd79ccSDebojyoti Ghosh      MatMultAdd()
1949bd79ccSDebojyoti Ghosh      MatInvertBlockDiagonal()
2049bd79ccSDebojyoti Ghosh   and
2149bd79ccSDebojyoti Ghosh      MatCreateKAIJ(Mat,PetscInt,PetscInt,const PetscScalar[],const PetscScalar[],Mat*)
2249bd79ccSDebojyoti Ghosh 
2349bd79ccSDebojyoti Ghosh   This single directory handles both the sequential and parallel codes
2449bd79ccSDebojyoti Ghosh */
2549bd79ccSDebojyoti Ghosh 
2649bd79ccSDebojyoti Ghosh #include <../src/mat/impls/kaij/kaij.h> /*I "petscmat.h" I*/
2749bd79ccSDebojyoti Ghosh #include <../src/mat/utils/freespace.h>
2849bd79ccSDebojyoti Ghosh #include <petsc/private/vecimpl.h>
2949bd79ccSDebojyoti Ghosh 
3049bd79ccSDebojyoti Ghosh /*@C
3149bd79ccSDebojyoti Ghosh    MatKAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the KAIJ matrix
3249bd79ccSDebojyoti Ghosh 
3349bd79ccSDebojyoti Ghosh    Not Collective, but if the KAIJ matrix is parallel, the AIJ matrix is also parallel
3449bd79ccSDebojyoti Ghosh 
3549bd79ccSDebojyoti Ghosh    Input Parameter:
3649bd79ccSDebojyoti Ghosh .  A - the KAIJ matrix
3749bd79ccSDebojyoti Ghosh 
3849bd79ccSDebojyoti Ghosh    Output Parameter:
3949bd79ccSDebojyoti Ghosh .  B - the AIJ matrix
4049bd79ccSDebojyoti Ghosh 
4149bd79ccSDebojyoti Ghosh    Level: advanced
4249bd79ccSDebojyoti Ghosh 
4349bd79ccSDebojyoti Ghosh    Notes: The reference count on the AIJ matrix is not increased so you should not destroy it.
4449bd79ccSDebojyoti Ghosh 
4549bd79ccSDebojyoti Ghosh .seealso: MatCreateKAIJ()
4649bd79ccSDebojyoti Ghosh @*/
4749bd79ccSDebojyoti Ghosh PetscErrorCode  MatKAIJGetAIJ(Mat A,Mat *B)
4849bd79ccSDebojyoti Ghosh {
4949bd79ccSDebojyoti Ghosh   PetscErrorCode ierr;
5049bd79ccSDebojyoti Ghosh   PetscBool      ismpikaij,isseqkaij;
5149bd79ccSDebojyoti Ghosh 
5249bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
5349bd79ccSDebojyoti Ghosh   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIKAIJ,&ismpikaij);CHKERRQ(ierr);
5449bd79ccSDebojyoti Ghosh   ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQKAIJ,&isseqkaij);CHKERRQ(ierr);
5549bd79ccSDebojyoti Ghosh   if (ismpikaij) {
5649bd79ccSDebojyoti Ghosh     Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data;
5749bd79ccSDebojyoti Ghosh 
5849bd79ccSDebojyoti Ghosh     *B = b->A;
5949bd79ccSDebojyoti Ghosh   } else if (isseqkaij) {
6049bd79ccSDebojyoti Ghosh     Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
6149bd79ccSDebojyoti Ghosh 
6249bd79ccSDebojyoti Ghosh     *B = b->AIJ;
63b04351cbSRichard Tran Mills   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix passed in is not of type KAIJ");
6449bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
6549bd79ccSDebojyoti Ghosh }
6649bd79ccSDebojyoti Ghosh 
6749bd79ccSDebojyoti Ghosh /*@C
6849bd79ccSDebojyoti Ghosh    MatKAIJGetS - Get the S matrix describing the shift action of the KAIJ matrix
6949bd79ccSDebojyoti Ghosh 
700567c835SRichard Tran Mills    Not Collective; the entire S is stored and returned independently on all processes.
7149bd79ccSDebojyoti Ghosh 
7249bd79ccSDebojyoti Ghosh    Input Parameter:
7349bd79ccSDebojyoti Ghosh .  A - the KAIJ matrix
7449bd79ccSDebojyoti Ghosh 
75a5b5c723SRichard Tran Mills    Output Parameters:
76a5b5c723SRichard Tran Mills +  m - the number of rows in S
77a5b5c723SRichard Tran Mills .  n - the number of columns in S
78a5b5c723SRichard Tran Mills -  S - the S matrix, in form of a scalar array in column-major format
7949bd79ccSDebojyoti Ghosh 
80a5b5c723SRichard Tran Mills    Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)
8131a97b9aSRichard Tran Mills 
8249bd79ccSDebojyoti Ghosh    Level: advanced
8349bd79ccSDebojyoti Ghosh 
8431a97b9aSRichard Tran Mills .seealso: MatCreateKAIJ(), MatGetBlockSizes()
8549bd79ccSDebojyoti Ghosh @*/
86a5b5c723SRichard Tran Mills PetscErrorCode MatKAIJGetS(Mat A,PetscInt *m,PetscInt *n,PetscScalar **S)
8749bd79ccSDebojyoti Ghosh {
8849bd79ccSDebojyoti Ghosh   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
8949bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
90a5b5c723SRichard Tran Mills   if (m) *m = b->p;
91a5b5c723SRichard Tran Mills   if (n) *n = b->q;
92a5b5c723SRichard Tran Mills   if (S) *S = b->S;
93a5b5c723SRichard Tran Mills   PetscFunctionReturn(0);
94a5b5c723SRichard Tran Mills }
95a5b5c723SRichard Tran Mills 
96a5b5c723SRichard Tran Mills /*@C
97a5b5c723SRichard Tran Mills    MatKAIJGetSRead - Get a read-only pointer to the S matrix describing the shift action of the KAIJ matrix
98a5b5c723SRichard Tran Mills 
99a5b5c723SRichard Tran Mills    Not Collective; the entire S is stored and returned independently on all processes.
100a5b5c723SRichard Tran Mills 
101a5b5c723SRichard Tran Mills    Input Parameter:
102a5b5c723SRichard Tran Mills .  A - the KAIJ matrix
103a5b5c723SRichard Tran Mills 
104a5b5c723SRichard Tran Mills    Output Parameters:
105a5b5c723SRichard Tran Mills +  m - the number of rows in S
106a5b5c723SRichard Tran Mills .  n - the number of columns in S
107a5b5c723SRichard Tran Mills -  S - the S matrix, in form of a scalar array in column-major format
108a5b5c723SRichard Tran Mills 
109a5b5c723SRichard Tran Mills    Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)
110a5b5c723SRichard Tran Mills 
111a5b5c723SRichard Tran Mills    Level: advanced
112a5b5c723SRichard Tran Mills 
113a5b5c723SRichard Tran Mills .seealso: MatCreateKAIJ(), MatGetBlockSizes()
114a5b5c723SRichard Tran Mills @*/
115a5b5c723SRichard Tran Mills PetscErrorCode MatKAIJGetSRead(Mat A,PetscInt *m,PetscInt *n,const PetscScalar **S)
116a5b5c723SRichard Tran Mills {
117a5b5c723SRichard Tran Mills   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
118a5b5c723SRichard Tran Mills   PetscFunctionBegin;
119a5b5c723SRichard Tran Mills   if (m) *m = b->p;
120a5b5c723SRichard Tran Mills   if (n) *n = b->q;
121a5b5c723SRichard Tran Mills   if (S) *S = b->S;
122a5b5c723SRichard Tran Mills   PetscFunctionReturn(0);
123a5b5c723SRichard Tran Mills }
124a5b5c723SRichard Tran Mills 
125a5b5c723SRichard Tran Mills /*@C
126a5b5c723SRichard Tran Mills   MatKAIJRestoreS - Restore array obtained with MatKAIJGetS()
127a5b5c723SRichard Tran Mills 
128a5b5c723SRichard Tran Mills   Not collective
129a5b5c723SRichard Tran Mills 
130a5b5c723SRichard Tran Mills   Input Parameter:
131a5b5c723SRichard Tran Mills . A - the KAIJ matrix
132a5b5c723SRichard Tran Mills 
133a5b5c723SRichard Tran Mills   Output Parameter:
134a5b5c723SRichard Tran Mills . S - location of pointer to array obtained with MatKAIJGetS()
135a5b5c723SRichard Tran Mills 
136a5b5c723SRichard Tran Mills   Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
137a5b5c723SRichard Tran Mills   If NULL is passed, it will not attempt to zero the array pointer.
138a5b5c723SRichard Tran Mills 
139a5b5c723SRichard Tran Mills   Level: advanced
140a5b5c723SRichard Tran Mills .seealso: MatKAIJGetS(), MatKAIJGetSRead(), MatKAIJRestoreSRead()
141a5b5c723SRichard Tran Mills @*/
142a5b5c723SRichard Tran Mills PetscErrorCode MatKAIJRestoreS(Mat A,PetscScalar **S)
143a5b5c723SRichard Tran Mills {
144a5b5c723SRichard Tran Mills   PetscFunctionBegin;
145a5b5c723SRichard Tran Mills   if (S) *S = NULL;
146a5b5c723SRichard Tran Mills   PetscObjectStateIncrease((PetscObject)A);
147a5b5c723SRichard Tran Mills   PetscFunctionReturn(0);
148a5b5c723SRichard Tran Mills }
149a5b5c723SRichard Tran Mills 
150a5b5c723SRichard Tran Mills /*@C
151a5b5c723SRichard Tran Mills   MatKAIJRestoreSRead - Restore array obtained with MatKAIJGetSRead()
152a5b5c723SRichard Tran Mills 
153a5b5c723SRichard Tran Mills   Not collective
154a5b5c723SRichard Tran Mills 
155a5b5c723SRichard Tran Mills   Input Parameter:
156a5b5c723SRichard Tran Mills . A - the KAIJ matrix
157a5b5c723SRichard Tran Mills 
158a5b5c723SRichard Tran Mills   Output Parameter:
159a5b5c723SRichard Tran Mills . S - location of pointer to array obtained with MatKAIJGetS()
160a5b5c723SRichard Tran Mills 
161a5b5c723SRichard Tran Mills   Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
162a5b5c723SRichard Tran Mills   If NULL is passed, it will not attempt to zero the array pointer.
163a5b5c723SRichard Tran Mills 
164a5b5c723SRichard Tran Mills   Level: advanced
165a5b5c723SRichard Tran Mills .seealso: MatKAIJGetS(), MatKAIJGetSRead(), MatKAIJRestoreSRead()
166a5b5c723SRichard Tran Mills @*/
167a5b5c723SRichard Tran Mills PetscErrorCode MatKAIJRestoreSRead(Mat A,const PetscScalar **S)
168a5b5c723SRichard Tran Mills {
169a5b5c723SRichard Tran Mills   PetscFunctionBegin;
170a5b5c723SRichard Tran Mills   if (S) *S = NULL;
17149bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
17249bd79ccSDebojyoti Ghosh }
17349bd79ccSDebojyoti Ghosh 
17449bd79ccSDebojyoti Ghosh /*@C
17531a97b9aSRichard Tran Mills    MatKAIJGetT - Get the transformation matrix T associated with the KAIJ matrix
17649bd79ccSDebojyoti Ghosh 
1770567c835SRichard Tran Mills    Not Collective; the entire T is stored and returned independently on all processes
17849bd79ccSDebojyoti Ghosh 
17949bd79ccSDebojyoti Ghosh    Input Parameter:
18049bd79ccSDebojyoti Ghosh .  A - the KAIJ matrix
18149bd79ccSDebojyoti Ghosh 
18249bd79ccSDebojyoti Ghosh    Output Parameter:
183a5b5c723SRichard Tran Mills +  m - the number of rows in T
184a5b5c723SRichard Tran Mills .  n - the number of columns in T
185a5b5c723SRichard Tran Mills -  T - the T matrix, in form of a scalar array in column-major format
18649bd79ccSDebojyoti Ghosh 
187a5b5c723SRichard Tran Mills    Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)
18831a97b9aSRichard Tran Mills 
18949bd79ccSDebojyoti Ghosh    Level: advanced
19049bd79ccSDebojyoti Ghosh 
19131a97b9aSRichard Tran Mills .seealso: MatCreateKAIJ(), MatGetBlockSizes()
19249bd79ccSDebojyoti Ghosh @*/
193a5b5c723SRichard Tran Mills PetscErrorCode MatKAIJGetT(Mat A,PetscInt *m,PetscInt *n,PetscScalar **T)
19449bd79ccSDebojyoti Ghosh {
19549bd79ccSDebojyoti Ghosh   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
19649bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
197a5b5c723SRichard Tran Mills   if (m) *m = b->p;
198a5b5c723SRichard Tran Mills   if (n) *n = b->q;
199a5b5c723SRichard Tran Mills   if (T) *T = b->T;
200a5b5c723SRichard Tran Mills   PetscFunctionReturn(0);
201a5b5c723SRichard Tran Mills }
202a5b5c723SRichard Tran Mills 
203a5b5c723SRichard Tran Mills /*@C
204a5b5c723SRichard Tran Mills    MatKAIJGetTRead - Get a read-only pointer to the transformation matrix T associated with the KAIJ matrix
205a5b5c723SRichard Tran Mills 
206a5b5c723SRichard Tran Mills    Not Collective; the entire T is stored and returned independently on all processes
207a5b5c723SRichard Tran Mills 
208a5b5c723SRichard Tran Mills    Input Parameter:
209a5b5c723SRichard Tran Mills .  A - the KAIJ matrix
210a5b5c723SRichard Tran Mills 
211a5b5c723SRichard Tran Mills    Output Parameter:
212a5b5c723SRichard Tran Mills +  m - the number of rows in T
213a5b5c723SRichard Tran Mills .  n - the number of columns in T
214a5b5c723SRichard Tran Mills -  T - the T matrix, in form of a scalar array in column-major format
215a5b5c723SRichard Tran Mills 
216a5b5c723SRichard Tran Mills    Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)
217a5b5c723SRichard Tran Mills 
218a5b5c723SRichard Tran Mills    Level: advanced
219a5b5c723SRichard Tran Mills 
220a5b5c723SRichard Tran Mills .seealso: MatCreateKAIJ(), MatGetBlockSizes()
221a5b5c723SRichard Tran Mills @*/
222a5b5c723SRichard Tran Mills PetscErrorCode MatKAIJGetTRead(Mat A,PetscInt *m,PetscInt *n,const PetscScalar **T)
223a5b5c723SRichard Tran Mills {
224a5b5c723SRichard Tran Mills   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
225a5b5c723SRichard Tran Mills   PetscFunctionBegin;
226a5b5c723SRichard Tran Mills   if (m) *m = b->p;
227a5b5c723SRichard Tran Mills   if (n) *n = b->q;
228a5b5c723SRichard Tran Mills   if (T) *T = b->T;
229a5b5c723SRichard Tran Mills   PetscFunctionReturn(0);
230a5b5c723SRichard Tran Mills }
231a5b5c723SRichard Tran Mills 
232a5b5c723SRichard Tran Mills /*@C
233a5b5c723SRichard Tran Mills   MatKAIJRestoreT - Restore array obtained with MatKAIJGetT()
234a5b5c723SRichard Tran Mills 
235a5b5c723SRichard Tran Mills   Not collective
236a5b5c723SRichard Tran Mills 
237a5b5c723SRichard Tran Mills   Input Parameter:
238a5b5c723SRichard Tran Mills . A - the KAIJ matrix
239a5b5c723SRichard Tran Mills 
240a5b5c723SRichard Tran Mills   Output Parameter:
241a5b5c723SRichard Tran Mills . T - location of pointer to array obtained with MatKAIJGetS()
242a5b5c723SRichard Tran Mills 
243a5b5c723SRichard Tran Mills   Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
244a5b5c723SRichard Tran Mills   If NULL is passed, it will not attempt to zero the array pointer.
245a5b5c723SRichard Tran Mills 
246a5b5c723SRichard Tran Mills   Level: advanced
247a5b5c723SRichard Tran Mills .seealso: MatKAIJGetT(), MatKAIJGetTRead(), MatKAIJRestoreTRead()
248a5b5c723SRichard Tran Mills @*/
249a5b5c723SRichard Tran Mills PetscErrorCode MatKAIJRestoreT(Mat A,PetscScalar **T)
250a5b5c723SRichard Tran Mills {
251a5b5c723SRichard Tran Mills   PetscFunctionBegin;
252a5b5c723SRichard Tran Mills   if (T) *T = NULL;
253a5b5c723SRichard Tran Mills   PetscObjectStateIncrease((PetscObject)A);
254a5b5c723SRichard Tran Mills   PetscFunctionReturn(0);
255a5b5c723SRichard Tran Mills }
256a5b5c723SRichard Tran Mills 
257a5b5c723SRichard Tran Mills /*@C
258a5b5c723SRichard Tran Mills   MatKAIJRestoreTRead - Restore array obtained with MatKAIJGetTRead()
259a5b5c723SRichard Tran Mills 
260a5b5c723SRichard Tran Mills   Not collective
261a5b5c723SRichard Tran Mills 
262a5b5c723SRichard Tran Mills   Input Parameter:
263a5b5c723SRichard Tran Mills . A - the KAIJ matrix
264a5b5c723SRichard Tran Mills 
265a5b5c723SRichard Tran Mills   Output Parameter:
266a5b5c723SRichard Tran Mills . T - location of pointer to array obtained with MatKAIJGetS()
267a5b5c723SRichard Tran Mills 
268a5b5c723SRichard Tran Mills   Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
269a5b5c723SRichard Tran Mills   If NULL is passed, it will not attempt to zero the array pointer.
270a5b5c723SRichard Tran Mills 
271a5b5c723SRichard Tran Mills   Level: advanced
272a5b5c723SRichard Tran Mills .seealso: MatKAIJGetT(), MatKAIJGetTRead(), MatKAIJRestoreTRead()
273a5b5c723SRichard Tran Mills @*/
274a5b5c723SRichard Tran Mills PetscErrorCode MatKAIJRestoreTRead(Mat A,const PetscScalar **T)
275a5b5c723SRichard Tran Mills {
276a5b5c723SRichard Tran Mills   PetscFunctionBegin;
277a5b5c723SRichard Tran Mills   if (T) *T = NULL;
27849bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
27949bd79ccSDebojyoti Ghosh }
28049bd79ccSDebojyoti Ghosh 
2810567c835SRichard Tran Mills /*@
2820567c835SRichard Tran Mills    MatKAIJSetAIJ - Set the AIJ matrix describing the blockwise action of the KAIJ matrix
2830567c835SRichard Tran Mills 
2840567c835SRichard Tran Mills    Logically Collective; if the AIJ matrix is parallel, the KAIJ matrix is also parallel
2850567c835SRichard Tran Mills 
2860567c835SRichard Tran Mills    Input Parameters:
2870567c835SRichard Tran Mills +  A - the KAIJ matrix
2880567c835SRichard Tran Mills -  B - the AIJ matrix
2890567c835SRichard Tran Mills 
29015b9d025SRichard Tran Mills    Notes:
29115b9d025SRichard Tran Mills    This function increases the reference count on the AIJ matrix, so the user is free to destroy the matrix if it is not needed.
29215b9d025SRichard Tran Mills    Changes to the entries of the AIJ matrix will immediately affect the KAIJ matrix.
29315b9d025SRichard Tran Mills 
2940567c835SRichard Tran Mills    Level: advanced
2950567c835SRichard Tran Mills 
2960567c835SRichard Tran Mills .seealso: MatKAIJGetAIJ(), MatKAIJSetS(), MatKAIJSetT()
2970567c835SRichard Tran Mills @*/
2980567c835SRichard Tran Mills PetscErrorCode MatKAIJSetAIJ(Mat A,Mat B)
2990567c835SRichard Tran Mills {
3000567c835SRichard Tran Mills   PetscErrorCode ierr;
3010567c835SRichard Tran Mills   PetscMPIInt    size;
3020567c835SRichard Tran Mills 
3030567c835SRichard Tran Mills   PetscFunctionBegin;
3040567c835SRichard Tran Mills   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
3050567c835SRichard Tran Mills   if (size == 1) {
3060567c835SRichard Tran Mills     Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data;
3070567c835SRichard Tran Mills     a->AIJ = B;
3080567c835SRichard Tran Mills   } else {
3090567c835SRichard Tran Mills     Mat_MPIKAIJ *a = (Mat_MPIKAIJ*)A->data;
3100567c835SRichard Tran Mills     a->A = B;
3110567c835SRichard Tran Mills   }
31215b9d025SRichard Tran Mills   ierr = PetscObjectReference((PetscObject)B);CHKERRQ(ierr);
3130567c835SRichard Tran Mills   PetscFunctionReturn(0);
3140567c835SRichard Tran Mills }
3150567c835SRichard Tran Mills 
3160567c835SRichard Tran Mills /*@C
3170567c835SRichard Tran Mills    MatKAIJSetS - Set the S matrix describing the shift action of the KAIJ matrix
3180567c835SRichard Tran Mills 
3190567c835SRichard Tran Mills    Logically Collective; the entire S is stored independently on all processes.
3200567c835SRichard Tran Mills 
3210567c835SRichard Tran Mills    Input Parameters:
3220567c835SRichard Tran Mills +  A - the KAIJ matrix
3230567c835SRichard Tran Mills .  p - the number of rows in S
3240567c835SRichard Tran Mills .  q - the number of columns in S
3250567c835SRichard Tran Mills -  S - the S matrix, in form of a scalar array in column-major format
3260567c835SRichard Tran Mills 
3270567c835SRichard Tran Mills    Notes: The dimensions p and q must match those of the transformation matrix T associated with the KAIJ matrix.
32888f48298SRichard Tran Mills    The S matrix is copied, so the user can destroy this array.
3290567c835SRichard Tran Mills 
3300567c835SRichard Tran Mills    Level: Advanced
3310567c835SRichard Tran Mills 
3320567c835SRichard Tran Mills .seealso: MatKAIJGetS(), MatKAIJSetT(), MatKAIJSetAIJ()
3330567c835SRichard Tran Mills @*/
3340567c835SRichard Tran Mills PetscErrorCode MatKAIJSetS(Mat A,PetscInt p,PetscInt q,const PetscScalar S[])
3350567c835SRichard Tran Mills {
3360567c835SRichard Tran Mills   PetscErrorCode ierr;
3370567c835SRichard Tran Mills   Mat_SeqKAIJ    *a = (Mat_SeqKAIJ*)A->data;
3380567c835SRichard Tran Mills 
3390567c835SRichard Tran Mills   PetscFunctionBegin;
3400567c835SRichard Tran Mills   ierr = PetscFree(a->S);CHKERRQ(ierr);
3410567c835SRichard Tran Mills   if (S) {
342a84f8069SRichard Tran Mills     ierr = PetscMalloc1(p*q*sizeof(PetscScalar),&a->S);CHKERRQ(ierr);
3430567c835SRichard Tran Mills     ierr = PetscMemcpy(a->S,S,p*q*sizeof(PetscScalar));CHKERRQ(ierr);
3440567c835SRichard Tran Mills   } else  a->S = NULL;
3450567c835SRichard Tran Mills 
3460567c835SRichard Tran Mills   a->p = p;
3470567c835SRichard Tran Mills   a->q = q;
3480567c835SRichard Tran Mills   PetscFunctionReturn(0);
3490567c835SRichard Tran Mills }
3500567c835SRichard Tran Mills 
3510567c835SRichard Tran Mills /*@C
3520567c835SRichard Tran Mills    MatKAIJSetT - Set the transformation matrix T associated with the KAIJ matrix
3530567c835SRichard Tran Mills 
3540567c835SRichard Tran Mills    Logically Collective; the entire T is stored independently on all processes.
3550567c835SRichard Tran Mills 
3560567c835SRichard Tran Mills    Input Parameters:
3570567c835SRichard Tran Mills +  A - the KAIJ matrix
3580567c835SRichard Tran Mills .  p - the number of rows in S
3590567c835SRichard Tran Mills .  q - the number of columns in S
3600567c835SRichard Tran Mills -  T - the T matrix, in form of a scalar array in column-major format
3610567c835SRichard Tran Mills 
3620567c835SRichard Tran Mills    Notes: The dimensions p and q must match those of the shift matrix S associated with the KAIJ matrix.
36388f48298SRichard Tran Mills    The T matrix is copied, so the user can destroy this array.
3640567c835SRichard Tran Mills 
3650567c835SRichard Tran Mills    Level: Advanced
3660567c835SRichard Tran Mills 
3670567c835SRichard Tran Mills .seealso: MatKAIJGetT(), MatKAIJSetS(), MatKAIJSetAIJ()
3680567c835SRichard Tran Mills @*/
3690567c835SRichard Tran Mills PetscErrorCode MatKAIJSetT(Mat A,PetscInt p,PetscInt q,const PetscScalar T[])
3700567c835SRichard Tran Mills {
3710567c835SRichard Tran Mills   PetscErrorCode ierr;
3720567c835SRichard Tran Mills   PetscInt       i,j;
3730567c835SRichard Tran Mills   Mat_SeqKAIJ    *a = (Mat_SeqKAIJ*)A->data;
3740567c835SRichard Tran Mills   PetscBool      isTI = PETSC_FALSE;
3750567c835SRichard Tran Mills 
3760567c835SRichard Tran Mills   PetscFunctionBegin;
3770567c835SRichard Tran Mills   /* check if T is an identity matrix */
3780567c835SRichard Tran Mills   if (T && (p == q)) {
3790567c835SRichard Tran Mills     isTI = PETSC_TRUE;
3800567c835SRichard Tran Mills     for (i=0; i<p; i++) {
3810567c835SRichard Tran Mills       for (j=0; j<q; j++) {
3820567c835SRichard Tran Mills         if (i == j) {
3830567c835SRichard Tran Mills           /* diagonal term must be 1 */
3840567c835SRichard Tran Mills           if (T[i+j*p] != 1.0) isTI = PETSC_FALSE;
3850567c835SRichard Tran Mills         } else {
3860567c835SRichard Tran Mills           /* off-diagonal term must be 0 */
3870567c835SRichard Tran Mills           if (T[i+j*p] != 0.0) isTI = PETSC_FALSE;
3880567c835SRichard Tran Mills         }
3890567c835SRichard Tran Mills       }
3900567c835SRichard Tran Mills     }
3910567c835SRichard Tran Mills   }
3920567c835SRichard Tran Mills   a->isTI = isTI;
3930567c835SRichard Tran Mills 
3940567c835SRichard Tran Mills   ierr = PetscFree(a->T);CHKERRQ(ierr);
3950567c835SRichard Tran Mills   if (T && (!isTI)) {
396a84f8069SRichard Tran Mills     ierr = PetscMalloc1(p*q*sizeof(PetscScalar),&a->T);CHKERRQ(ierr);
3970567c835SRichard Tran Mills     ierr = PetscMemcpy(a->T,T,p*q*sizeof(PetscScalar));CHKERRQ(ierr);
39850d19d74SRichard Tran Mills   } else a->T = NULL;
3990567c835SRichard Tran Mills 
4000567c835SRichard Tran Mills   a->p = p;
4010567c835SRichard Tran Mills   a->q = q;
4020567c835SRichard Tran Mills   PetscFunctionReturn(0);
4030567c835SRichard Tran Mills }
4040567c835SRichard Tran Mills 
40549bd79ccSDebojyoti Ghosh PetscErrorCode MatDestroy_SeqKAIJ(Mat A)
40649bd79ccSDebojyoti Ghosh {
40749bd79ccSDebojyoti Ghosh   PetscErrorCode ierr;
40849bd79ccSDebojyoti Ghosh   Mat_SeqKAIJ    *b = (Mat_SeqKAIJ*)A->data;
40949bd79ccSDebojyoti Ghosh 
41049bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
41149bd79ccSDebojyoti Ghosh   ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr);
412a84f8069SRichard Tran Mills   ierr = PetscFree(b->S);CHKERRQ(ierr);
413a84f8069SRichard Tran Mills   ierr = PetscFree(b->T);CHKERRQ(ierr);
414a84f8069SRichard Tran Mills   ierr = PetscFree(b->ibdiag);CHKERRQ(ierr);
41549bd79ccSDebojyoti Ghosh   ierr = PetscFree5(b->sor.w,b->sor.y,b->sor.work,b->sor.t,b->sor.arr);CHKERRQ(ierr);
41649bd79ccSDebojyoti Ghosh   ierr = PetscFree(A->data);CHKERRQ(ierr);
41749bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
41849bd79ccSDebojyoti Ghosh }
41949bd79ccSDebojyoti Ghosh 
42049bd79ccSDebojyoti Ghosh PetscErrorCode MatSetUp_KAIJ(Mat A)
42149bd79ccSDebojyoti Ghosh {
4220567c835SRichard Tran Mills   PetscErrorCode ierr;
4230567c835SRichard Tran Mills   PetscInt       n;
4240567c835SRichard Tran Mills   PetscMPIInt    size;
4250567c835SRichard Tran Mills   Mat_SeqKAIJ    *seqkaij = (Mat_SeqKAIJ*)A->data;
4260567c835SRichard Tran Mills 
42749bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
4280567c835SRichard Tran Mills   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
4290567c835SRichard Tran Mills   if (size == 1) {
4300567c835SRichard Tran Mills     ierr = MatSetSizes(A,seqkaij->p*seqkaij->AIJ->rmap->n,seqkaij->q*seqkaij->AIJ->cmap->n,seqkaij->p*seqkaij->AIJ->rmap->N,seqkaij->q*seqkaij->AIJ->cmap->N);CHKERRQ(ierr);
4310567c835SRichard Tran Mills     ierr = PetscLayoutSetBlockSize(A->rmap,seqkaij->p);CHKERRQ(ierr);
4320567c835SRichard Tran Mills     ierr = PetscLayoutSetBlockSize(A->cmap,seqkaij->q);CHKERRQ(ierr);
4330567c835SRichard Tran Mills     ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
4340567c835SRichard Tran Mills     ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
4350567c835SRichard Tran Mills   } else {
4360567c835SRichard Tran Mills     Mat_MPIKAIJ *a;
4370567c835SRichard Tran Mills     Mat_MPIAIJ  *mpiaij;
4380567c835SRichard Tran Mills     IS          from,to;
4390567c835SRichard Tran Mills     Vec         gvec;
4400567c835SRichard Tran Mills     PetscScalar *T;
4410567c835SRichard Tran Mills     PetscInt    i,j;
4420567c835SRichard Tran Mills 
4430567c835SRichard Tran Mills     a = (Mat_MPIKAIJ*)A->data;
444d3f912faSRichard Tran Mills     mpiaij = (Mat_MPIAIJ*)a->A->data;
4450567c835SRichard Tran Mills     ierr = MatSetSizes(A,a->p*a->A->rmap->n,a->q*a->A->cmap->n,a->p*a->A->rmap->N,a->q*a->A->cmap->N);CHKERRQ(ierr);
4460567c835SRichard Tran Mills     ierr = PetscLayoutSetBlockSize(A->rmap,seqkaij->p);CHKERRQ(ierr);
4470567c835SRichard Tran Mills     ierr = PetscLayoutSetBlockSize(A->cmap,seqkaij->q);CHKERRQ(ierr);
4480567c835SRichard Tran Mills     ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
4490567c835SRichard Tran Mills     ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
4500567c835SRichard Tran Mills 
4510567c835SRichard Tran Mills     if (a->isTI) {
4520567c835SRichard Tran Mills       /* If the transformation matrix associated with the parallel matrix A is the identity matrix, then a->T will be NULL.
4530567c835SRichard Tran Mills        * In this case, if we pass a->T directly to the MatCreateKAIJ() calls to create the sequential submatrices, the routine will
4540567c835SRichard Tran Mills        * not be able to tell that transformation matrix should be set to the identity; thus we create a temporary identity matrix
4550567c835SRichard Tran Mills        * to pass in. */
456a84f8069SRichard Tran Mills       ierr = PetscMalloc1(a->p*a->q*sizeof(PetscScalar),&T);CHKERRQ(ierr);
4570567c835SRichard Tran Mills       for (i=0; i<a->p; i++) {
4580567c835SRichard Tran Mills         for (j=0; j<a->q; j++) {
4590567c835SRichard Tran Mills           if (i==j) T[i+j*a->p] = 1.0;
4600567c835SRichard Tran Mills           else      T[i+j*a->p] = 0.0;
4610567c835SRichard Tran Mills         }
4620567c835SRichard Tran Mills       }
4630567c835SRichard Tran Mills     } else T = a->T;
4640567c835SRichard Tran Mills     ierr = MatCreateKAIJ(mpiaij->A,a->p,a->q,a->S,T,&a->AIJ);CHKERRQ(ierr);
4650567c835SRichard Tran Mills     ierr = MatCreateKAIJ(mpiaij->B,a->p,a->q,NULL,T,&a->OAIJ);CHKERRQ(ierr);
466c138d2acSRichard Tran Mills     if (a->isTI) {
4670567c835SRichard Tran Mills       ierr = PetscFree(T);CHKERRQ(ierr);
468c138d2acSRichard Tran Mills     }
4690567c835SRichard Tran Mills 
4700567c835SRichard Tran Mills     ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr);
4710567c835SRichard Tran Mills     ierr = VecCreate(PETSC_COMM_SELF,&a->w);CHKERRQ(ierr);
4720567c835SRichard Tran Mills     ierr = VecSetSizes(a->w,n*a->q,n*a->q);CHKERRQ(ierr);
4730567c835SRichard Tran Mills     ierr = VecSetBlockSize(a->w,a->q);CHKERRQ(ierr);
4740567c835SRichard Tran Mills     ierr = VecSetType(a->w,VECSEQ);CHKERRQ(ierr);
4750567c835SRichard Tran Mills 
4760567c835SRichard Tran Mills     /* create two temporary Index sets for build scatter gather */
4770567c835SRichard Tran Mills     ierr = ISCreateBlock(PetscObjectComm((PetscObject)a->A),a->q,n,mpiaij->garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr);
4780567c835SRichard Tran Mills     ierr = ISCreateStride(PETSC_COMM_SELF,n*a->q,0,1,&to);CHKERRQ(ierr);
4790567c835SRichard Tran Mills 
4800567c835SRichard Tran Mills     /* create temporary global vector to generate scatter context */
4810567c835SRichard Tran Mills     ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)a->A),a->q,a->q*a->A->cmap->n,a->q*a->A->cmap->N,NULL,&gvec);CHKERRQ(ierr);
4820567c835SRichard Tran Mills 
4830567c835SRichard Tran Mills     /* generate the scatter context */
4844589b4e5SRichard Tran Mills     ierr = VecScatterCreate(gvec,from,a->w,to,&a->ctx);CHKERRQ(ierr);
4850567c835SRichard Tran Mills 
4860567c835SRichard Tran Mills     ierr = ISDestroy(&from);CHKERRQ(ierr);
4870567c835SRichard Tran Mills     ierr = ISDestroy(&to);CHKERRQ(ierr);
4880567c835SRichard Tran Mills     ierr = VecDestroy(&gvec);CHKERRQ(ierr);
4890567c835SRichard Tran Mills   }
4900567c835SRichard Tran Mills 
4910567c835SRichard Tran Mills   A->assembled = PETSC_TRUE;
49249bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
49349bd79ccSDebojyoti Ghosh }
49449bd79ccSDebojyoti Ghosh 
495*e6985dafSRichard Tran Mills PetscErrorCode MatView_KAIJ(Mat A,PetscViewer viewer)
49649bd79ccSDebojyoti Ghosh {
497*e6985dafSRichard Tran Mills   PetscViewerFormat format;
498*e6985dafSRichard Tran Mills   Mat_SeqKAIJ       *a = (Mat_SeqKAIJ*)A->data;
49949bd79ccSDebojyoti Ghosh   Mat               B;
500*e6985dafSRichard Tran Mills   PetscInt          i;
501*e6985dafSRichard Tran Mills   PetscErrorCode    ierr;
502*e6985dafSRichard Tran Mills   PetscBool         ismpikaij;
50349bd79ccSDebojyoti Ghosh 
50449bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
505*e6985dafSRichard Tran Mills   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIKAIJ,&ismpikaij);CHKERRQ(ierr);
506*e6985dafSRichard Tran Mills   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
507*e6985dafSRichard Tran Mills   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL || format == PETSC_VIEWER_ASCII_IMPL) {
508*e6985dafSRichard Tran Mills     ierr = PetscViewerASCIIPrintf(viewer,"S and T have %D rows and %D columns\n",a->p,a->q);CHKERRQ(ierr);
509*e6985dafSRichard Tran Mills 
510*e6985dafSRichard Tran Mills     /* Print appropriate details for S. */
511*e6985dafSRichard Tran Mills     if (!a->S) {
512*e6985dafSRichard Tran Mills       ierr = PetscViewerASCIIPrintf(viewer,"S is NULL\n");
513*e6985dafSRichard Tran Mills     } else if (format == PETSC_VIEWER_ASCII_IMPL) {
514*e6985dafSRichard Tran Mills       ierr = PetscViewerASCIIPrintf(viewer,"Entries of S are ");CHKERRQ(ierr);
515*e6985dafSRichard Tran Mills       for (i=0; i<(a->p * a->q); i++) {
516*e6985dafSRichard Tran Mills #if defined(PETSC_USE_COMPLEX)
517*e6985dafSRichard Tran Mills         ierr = PetscViewerASCIIPrintf(viewer,"%18.16e %18.16e ",(double)PetscRealPart(a->S[i]),(double)PetscImaginaryPart(a->S[i]));CHKERRQ(ierr);
518*e6985dafSRichard Tran Mills #else
519*e6985dafSRichard Tran Mills         ierr = PetscViewerASCIIPrintf(viewer,"%18.16e ",(double)PetscRealPart(a->S[i]));CHKERRQ(ierr);
520*e6985dafSRichard Tran Mills #endif
521*e6985dafSRichard Tran Mills       }
522*e6985dafSRichard Tran Mills       ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
52349bd79ccSDebojyoti Ghosh     }
52449bd79ccSDebojyoti Ghosh 
525*e6985dafSRichard Tran Mills     /* Print appropriate details for T. */
526*e6985dafSRichard Tran Mills     if (a->isTI) {
527*e6985dafSRichard Tran Mills       ierr = PetscViewerASCIIPrintf(viewer,"T is the identity matrix\n");
528*e6985dafSRichard Tran Mills     } else if (!a->T) {
529*e6985dafSRichard Tran Mills       ierr = PetscViewerASCIIPrintf(viewer,"T is NULL\n");
530*e6985dafSRichard Tran Mills     } else if (format == PETSC_VIEWER_ASCII_IMPL) {
531*e6985dafSRichard Tran Mills       ierr = PetscViewerASCIIPrintf(viewer,"Entries of T are ");CHKERRQ(ierr);
532*e6985dafSRichard Tran Mills       for (i=0; i<(a->p * a->q); i++) {
533*e6985dafSRichard Tran Mills #if defined(PETSC_USE_COMPLEX)
534*e6985dafSRichard Tran Mills         ierr = PetscViewerASCIIPrintf(viewer,"%18.16e %18.16e ",(double)PetscRealPart(a->T[i]),(double)PetscImaginaryPart(a->T[i]));CHKERRQ(ierr);
535*e6985dafSRichard Tran Mills #else
536*e6985dafSRichard Tran Mills         ierr = PetscViewerASCIIPrintf(viewer,"%18.16e ",(double)PetscRealPart(a->T[i]));CHKERRQ(ierr);
537*e6985dafSRichard Tran Mills #endif
538*e6985dafSRichard Tran Mills       }
539*e6985dafSRichard Tran Mills       ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
540*e6985dafSRichard Tran Mills     }
54149bd79ccSDebojyoti Ghosh 
542*e6985dafSRichard Tran Mills     /* Now print details for the AIJ matrix, using the AIJ viewer. */
543*e6985dafSRichard Tran Mills     ierr = PetscViewerASCIIPrintf(viewer,"Now viewing the associated AIJ matrix:\n");CHKERRQ(ierr);
544*e6985dafSRichard Tran Mills     if (ismpikaij) {
545*e6985dafSRichard Tran Mills       Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data;
546*e6985dafSRichard Tran Mills       ierr = MatView(b->A,viewer);CHKERRQ(ierr);
547*e6985dafSRichard Tran Mills     } else {
548*e6985dafSRichard Tran Mills       ierr = MatView(a->AIJ,viewer);CHKERRQ(ierr);
549*e6985dafSRichard Tran Mills     }
550*e6985dafSRichard Tran Mills 
551*e6985dafSRichard Tran Mills   } else {
552*e6985dafSRichard Tran Mills     /* For all other matrix viewer output formats, simply convert to an AIJ matrix and call MatView() on that. */
553*e6985dafSRichard Tran Mills     if (ismpikaij) {
55449bd79ccSDebojyoti Ghosh       ierr = MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
555*e6985dafSRichard Tran Mills     } else {
556*e6985dafSRichard Tran Mills       ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
557*e6985dafSRichard Tran Mills     }
55849bd79ccSDebojyoti Ghosh     ierr = MatView(B,viewer);CHKERRQ(ierr);
55949bd79ccSDebojyoti Ghosh     ierr = MatDestroy(&B);CHKERRQ(ierr);
560*e6985dafSRichard Tran Mills   }
56149bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
56249bd79ccSDebojyoti Ghosh }
56349bd79ccSDebojyoti Ghosh 
56449bd79ccSDebojyoti Ghosh PetscErrorCode MatDestroy_MPIKAIJ(Mat A)
56549bd79ccSDebojyoti Ghosh {
56649bd79ccSDebojyoti Ghosh   PetscErrorCode ierr;
56749bd79ccSDebojyoti Ghosh   Mat_MPIKAIJ    *b = (Mat_MPIKAIJ*)A->data;
56849bd79ccSDebojyoti Ghosh 
56949bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
57049bd79ccSDebojyoti Ghosh   ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr);
57149bd79ccSDebojyoti Ghosh   ierr = MatDestroy(&b->OAIJ);CHKERRQ(ierr);
57249bd79ccSDebojyoti Ghosh   ierr = MatDestroy(&b->A);CHKERRQ(ierr);
57349bd79ccSDebojyoti Ghosh   ierr = VecScatterDestroy(&b->ctx);CHKERRQ(ierr);
57449bd79ccSDebojyoti Ghosh   ierr = VecDestroy(&b->w);CHKERRQ(ierr);
575a84f8069SRichard Tran Mills   ierr = PetscFree(b->S);CHKERRQ(ierr);
576a84f8069SRichard Tran Mills   ierr = PetscFree(b->T);CHKERRQ(ierr);
577a84f8069SRichard Tran Mills   ierr = PetscFree(b->ibdiag);CHKERRQ(ierr);
57849bd79ccSDebojyoti Ghosh   ierr = PetscFree(A->data);CHKERRQ(ierr);
57949bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
58049bd79ccSDebojyoti Ghosh }
58149bd79ccSDebojyoti Ghosh 
58249bd79ccSDebojyoti Ghosh /* --------------------------------------------------------------------------------------*/
58349bd79ccSDebojyoti Ghosh 
58449bd79ccSDebojyoti Ghosh /* zz = yy + Axx */
585836168d5SRichard Tran Mills PetscErrorCode MatMultAdd_SeqKAIJ(Mat A,Vec xx,Vec yy,Vec zz)
58649bd79ccSDebojyoti Ghosh {
58749bd79ccSDebojyoti Ghosh   Mat_SeqKAIJ       *b = (Mat_SeqKAIJ*)A->data;
58849bd79ccSDebojyoti Ghosh   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
58949bd79ccSDebojyoti Ghosh   const PetscScalar *s = b->S, *t = b->T;
59049bd79ccSDebojyoti Ghosh   const PetscScalar *x,*v,*bx;
59149bd79ccSDebojyoti Ghosh   PetscScalar       *y,*sums;
59249bd79ccSDebojyoti Ghosh   PetscErrorCode    ierr;
59349bd79ccSDebojyoti Ghosh   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
59449bd79ccSDebojyoti Ghosh   PetscInt          n,i,jrow,j,l,p=b->p,q=b->q,k;
59549bd79ccSDebojyoti Ghosh 
59649bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
59749bd79ccSDebojyoti Ghosh   if (!yy) {
59849bd79ccSDebojyoti Ghosh     ierr = VecSet(zz,0.0);CHKERRQ(ierr);
59949bd79ccSDebojyoti Ghosh   } else {
60049bd79ccSDebojyoti Ghosh     ierr = VecCopy(yy,zz);CHKERRQ(ierr);
60149bd79ccSDebojyoti Ghosh   }
60249bd79ccSDebojyoti Ghosh   if ((!s) && (!t) && (!b->isTI)) PetscFunctionReturn(0);
60349bd79ccSDebojyoti Ghosh 
60449bd79ccSDebojyoti Ghosh   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
60549bd79ccSDebojyoti Ghosh   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
60649bd79ccSDebojyoti Ghosh   idx  = a->j;
60749bd79ccSDebojyoti Ghosh   v    = a->a;
60849bd79ccSDebojyoti Ghosh   ii   = a->i;
60949bd79ccSDebojyoti Ghosh 
61049bd79ccSDebojyoti Ghosh   if (b->isTI) {
61149bd79ccSDebojyoti Ghosh     for (i=0; i<m; i++) {
61249bd79ccSDebojyoti Ghosh       jrow = ii[i];
61349bd79ccSDebojyoti Ghosh       n    = ii[i+1] - jrow;
61449bd79ccSDebojyoti Ghosh       sums = y + p*i;
61549bd79ccSDebojyoti Ghosh       for (j=0; j<n; j++) {
61649bd79ccSDebojyoti Ghosh         for (k=0; k<p; k++) {
61749bd79ccSDebojyoti Ghosh           sums[k] += v[jrow+j]*x[q*idx[jrow+j]+k];
61849bd79ccSDebojyoti Ghosh         }
61949bd79ccSDebojyoti Ghosh       }
62049bd79ccSDebojyoti Ghosh     }
62149bd79ccSDebojyoti Ghosh   } else if (t) {
62249bd79ccSDebojyoti Ghosh     for (i=0; i<m; i++) {
62349bd79ccSDebojyoti Ghosh       jrow = ii[i];
62449bd79ccSDebojyoti Ghosh       n    = ii[i+1] - jrow;
62549bd79ccSDebojyoti Ghosh       sums = y + p*i;
62649bd79ccSDebojyoti Ghosh       bx   = x + q*i;
62749bd79ccSDebojyoti Ghosh       for (j=0; j<n; j++) {
62849bd79ccSDebojyoti Ghosh         for (k=0; k<p; k++) {
62949bd79ccSDebojyoti Ghosh           for (l=0; l<q; l++) {
63049bd79ccSDebojyoti Ghosh             sums[k] += v[jrow+j]*t[k+l*p]*x[q*idx[jrow+j]+l];
63149bd79ccSDebojyoti Ghosh           }
63249bd79ccSDebojyoti Ghosh         }
63349bd79ccSDebojyoti Ghosh       }
63449bd79ccSDebojyoti Ghosh     }
63549bd79ccSDebojyoti Ghosh   }
63649bd79ccSDebojyoti Ghosh   if (s) {
63749bd79ccSDebojyoti Ghosh     for (i=0; i<m; i++) {
63849bd79ccSDebojyoti Ghosh       sums = y + p*i;
63949bd79ccSDebojyoti Ghosh       bx   = x + q*i;
64049bd79ccSDebojyoti Ghosh       if (i < b->AIJ->cmap->n) {
64149bd79ccSDebojyoti Ghosh         for (j=0; j<q; j++) {
64249bd79ccSDebojyoti Ghosh           for (k=0; k<p; k++) {
64349bd79ccSDebojyoti Ghosh             sums[k] += s[k+j*p]*bx[j];
64449bd79ccSDebojyoti Ghosh           }
64549bd79ccSDebojyoti Ghosh         }
64649bd79ccSDebojyoti Ghosh       }
64749bd79ccSDebojyoti Ghosh     }
64849bd79ccSDebojyoti Ghosh   }
64949bd79ccSDebojyoti Ghosh 
65049bd79ccSDebojyoti Ghosh   ierr = PetscLogFlops((2.0*p*q-p)*m+2*p*a->nz);CHKERRQ(ierr);
65149bd79ccSDebojyoti Ghosh   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
65249bd79ccSDebojyoti Ghosh   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
65349bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
65449bd79ccSDebojyoti Ghosh }
65549bd79ccSDebojyoti Ghosh 
656bb6fb833SRichard Tran Mills PetscErrorCode MatMult_SeqKAIJ(Mat A,Vec xx,Vec yy)
65749bd79ccSDebojyoti Ghosh {
65849bd79ccSDebojyoti Ghosh   PetscErrorCode ierr;
65949bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
660836168d5SRichard Tran Mills   ierr = MatMultAdd_SeqKAIJ(A,xx,PETSC_NULL,yy);CHKERRQ(ierr);
66149bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
66249bd79ccSDebojyoti Ghosh }
66349bd79ccSDebojyoti Ghosh 
66449bd79ccSDebojyoti Ghosh #include <petsc/private/kernels/blockinvert.h>
66549bd79ccSDebojyoti Ghosh 
666bb6fb833SRichard Tran Mills PetscErrorCode MatInvertBlockDiagonal_SeqKAIJ(Mat A,const PetscScalar **values)
66749bd79ccSDebojyoti Ghosh {
66849bd79ccSDebojyoti Ghosh   Mat_SeqKAIJ       *b  = (Mat_SeqKAIJ*)A->data;
66949bd79ccSDebojyoti Ghosh   Mat_SeqAIJ        *a  = (Mat_SeqAIJ*)b->AIJ->data;
67049bd79ccSDebojyoti Ghosh   const PetscScalar *S  = b->S;
67149bd79ccSDebojyoti Ghosh   const PetscScalar *T  = b->T;
67249bd79ccSDebojyoti Ghosh   const PetscScalar *v  = a->a;
67349bd79ccSDebojyoti Ghosh   const PetscInt     p  = b->p, q = b->q, m = b->AIJ->rmap->n, *idx = a->j, *ii = a->i;
67449bd79ccSDebojyoti Ghosh   PetscErrorCode    ierr;
67549bd79ccSDebojyoti Ghosh   PetscInt          i,j,*v_pivots,dof,dof2;
67649bd79ccSDebojyoti Ghosh   PetscScalar       *diag,aval,*v_work;
67749bd79ccSDebojyoti Ghosh 
67849bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
67949bd79ccSDebojyoti Ghosh   if (p != q) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Block size must be square to calculate inverse.");
68031a97b9aSRichard Tran Mills   if ((!S) && (!T) && (!b->isTI)) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Cannot invert a zero matrix.");
68149bd79ccSDebojyoti Ghosh 
68249bd79ccSDebojyoti Ghosh   dof  = p;
68349bd79ccSDebojyoti Ghosh   dof2 = dof*dof;
68449bd79ccSDebojyoti Ghosh 
68549bd79ccSDebojyoti Ghosh   if (b->ibdiagvalid) {
68649bd79ccSDebojyoti Ghosh     if (values) *values = b->ibdiag;
68749bd79ccSDebojyoti Ghosh     PetscFunctionReturn(0);
68849bd79ccSDebojyoti Ghosh   }
68949bd79ccSDebojyoti Ghosh   if (!b->ibdiag) {
690a84f8069SRichard Tran Mills     ierr = PetscMalloc1(dof2*m*sizeof(PetscScalar),&b->ibdiag);CHKERRQ(ierr);
69149bd79ccSDebojyoti Ghosh     ierr = PetscLogObjectMemory((PetscObject)A,dof2*m*sizeof(PetscScalar));CHKERRQ(ierr);
69249bd79ccSDebojyoti Ghosh   }
69349bd79ccSDebojyoti Ghosh   if (values) *values = b->ibdiag;
69449bd79ccSDebojyoti Ghosh   diag = b->ibdiag;
69549bd79ccSDebojyoti Ghosh 
69649bd79ccSDebojyoti Ghosh   ierr = PetscMalloc2(dof,&v_work,dof,&v_pivots);CHKERRQ(ierr);
69749bd79ccSDebojyoti Ghosh   for (i=0; i<m; i++) {
69849bd79ccSDebojyoti Ghosh     if (S) {
69949bd79ccSDebojyoti Ghosh       ierr = PetscMemcpy(diag,S,dof2*sizeof(PetscScalar));CHKERRQ(ierr);
70049bd79ccSDebojyoti Ghosh     } else {
70149bd79ccSDebojyoti Ghosh       ierr = PetscMemzero(diag,dof2*sizeof(PetscScalar));CHKERRQ(ierr);
70249bd79ccSDebojyoti Ghosh     }
70349bd79ccSDebojyoti Ghosh     if (b->isTI) {
70449bd79ccSDebojyoti Ghosh       aval = 0;
70549bd79ccSDebojyoti Ghosh       for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j];
70649bd79ccSDebojyoti Ghosh       for (j=0; j<dof; j++) diag[j+dof*j] += aval;
70749bd79ccSDebojyoti Ghosh     } else if (T) {
70849bd79ccSDebojyoti Ghosh       aval = 0;
70949bd79ccSDebojyoti Ghosh       for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j];
71049bd79ccSDebojyoti Ghosh       for (j=0; j<dof2; j++) diag[j] += aval*T[j];
71149bd79ccSDebojyoti Ghosh     }
71249bd79ccSDebojyoti Ghosh     ierr = PetscKernel_A_gets_inverse_A(dof,diag,v_pivots,v_work,PETSC_FALSE,NULL);CHKERRQ(ierr);
71349bd79ccSDebojyoti Ghosh     diag += dof2;
71449bd79ccSDebojyoti Ghosh   }
71549bd79ccSDebojyoti Ghosh   ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr);
71649bd79ccSDebojyoti Ghosh 
71749bd79ccSDebojyoti Ghosh   b->ibdiagvalid = PETSC_TRUE;
71849bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
71949bd79ccSDebojyoti Ghosh }
72049bd79ccSDebojyoti Ghosh 
72149bd79ccSDebojyoti Ghosh static PetscErrorCode MatGetDiagonalBlock_MPIKAIJ(Mat A,Mat *B)
72249bd79ccSDebojyoti Ghosh {
72349bd79ccSDebojyoti Ghosh   Mat_MPIKAIJ *kaij = (Mat_MPIKAIJ*) A->data;
72449bd79ccSDebojyoti Ghosh 
72549bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
72649bd79ccSDebojyoti Ghosh   *B = kaij->AIJ;
72749bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
72849bd79ccSDebojyoti Ghosh }
72949bd79ccSDebojyoti Ghosh 
73049bd79ccSDebojyoti Ghosh PetscErrorCode MatSOR_SeqKAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
73149bd79ccSDebojyoti Ghosh {
73249bd79ccSDebojyoti Ghosh   PetscErrorCode    ierr;
73349bd79ccSDebojyoti Ghosh   Mat_SeqKAIJ       *kaij = (Mat_SeqKAIJ*) A->data;
73449bd79ccSDebojyoti Ghosh   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)kaij->AIJ->data;
73549bd79ccSDebojyoti Ghosh   const PetscScalar *aa = a->a, *T = kaij->T, *v;
73649bd79ccSDebojyoti Ghosh   const PetscInt    m  = kaij->AIJ->rmap->n, *ai=a->i, *aj=a->j, p = kaij->p, q = kaij->q, *diag, *vi;
73749bd79ccSDebojyoti Ghosh   const PetscScalar *b, *xb, *idiag;
73849bd79ccSDebojyoti Ghosh   PetscScalar       *x, *work, *workt, *w, *y, *arr, *t, *arrt;
73949bd79ccSDebojyoti Ghosh   PetscInt          i, j, k, i2, bs, bs2, nz;
74049bd79ccSDebojyoti Ghosh 
74149bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
74249bd79ccSDebojyoti Ghosh   its = its*lits;
74349bd79ccSDebojyoti Ghosh   if (flag & SOR_EISENSTAT) SETERRQ (PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
74449bd79ccSDebojyoti Ghosh   if (its <= 0)             SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
7456a375485SRichard Tran Mills   if (fshift)               SETERRQ (PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for diagonal shift");
7466a375485SRichard Tran Mills   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for applying upper or lower triangular parts");
7476a375485SRichard Tran Mills   if (p != q) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSOR for KAIJ: No support for non-square dense blocks");
74849bd79ccSDebojyoti Ghosh   else        {bs = p; bs2 = bs*bs; }
74949bd79ccSDebojyoti Ghosh 
75049bd79ccSDebojyoti Ghosh   if (!m) PetscFunctionReturn(0);
75149bd79ccSDebojyoti Ghosh 
752bb6fb833SRichard Tran Mills   if (!kaij->ibdiagvalid) { ierr = MatInvertBlockDiagonal_SeqKAIJ(A,NULL);CHKERRQ(ierr); }
75349bd79ccSDebojyoti Ghosh   idiag = kaij->ibdiag;
75449bd79ccSDebojyoti Ghosh   diag  = a->diag;
75549bd79ccSDebojyoti Ghosh 
75649bd79ccSDebojyoti Ghosh   if (!kaij->sor.setup) {
75749bd79ccSDebojyoti Ghosh     ierr = PetscMalloc5(bs,&kaij->sor.w,bs,&kaij->sor.y,m*bs,&kaij->sor.work,m*bs,&kaij->sor.t,m*bs2,&kaij->sor.arr);CHKERRQ(ierr);
75849bd79ccSDebojyoti Ghosh     kaij->sor.setup = PETSC_TRUE;
75949bd79ccSDebojyoti Ghosh   }
76049bd79ccSDebojyoti Ghosh   y     = kaij->sor.y;
76149bd79ccSDebojyoti Ghosh   w     = kaij->sor.w;
76249bd79ccSDebojyoti Ghosh   work  = kaij->sor.work;
76349bd79ccSDebojyoti Ghosh   t     = kaij->sor.t;
76449bd79ccSDebojyoti Ghosh   arr   = kaij->sor.arr;
76549bd79ccSDebojyoti Ghosh 
76649bd79ccSDebojyoti Ghosh   ierr = VecGetArray(xx,&x);    CHKERRQ(ierr);
76749bd79ccSDebojyoti Ghosh   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
76849bd79ccSDebojyoti Ghosh 
76949bd79ccSDebojyoti Ghosh   if (flag & SOR_ZERO_INITIAL_GUESS) {
77049bd79ccSDebojyoti Ghosh     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
77149bd79ccSDebojyoti Ghosh       PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);                            /* x[0:bs] <- D^{-1} b[0:bs] */
77249bd79ccSDebojyoti Ghosh       ierr   =  PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr);
77349bd79ccSDebojyoti Ghosh       i2     =  bs;
77449bd79ccSDebojyoti Ghosh       idiag  += bs2;
77549bd79ccSDebojyoti Ghosh       for (i=1; i<m; i++) {
77649bd79ccSDebojyoti Ghosh         v  = aa + ai[i];
77749bd79ccSDebojyoti Ghosh         vi = aj + ai[i];
77849bd79ccSDebojyoti Ghosh         nz = diag[i] - ai[i];
77949bd79ccSDebojyoti Ghosh 
78049bd79ccSDebojyoti Ghosh         if (T) {                /* b - T (Arow * x) */
7819eb573c1SRichard Tran Mills           ierr = PetscMemzero(w,bs*sizeof(PetscScalar));
78249bd79ccSDebojyoti Ghosh           for (j=0; j<nz; j++) {
78349bd79ccSDebojyoti Ghosh             for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k];
78449bd79ccSDebojyoti Ghosh           }
78549bd79ccSDebojyoti Ghosh           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs,w,T,&t[i2]);
78649bd79ccSDebojyoti Ghosh           for (k=0; k<bs; k++) t[i2+k] += b[i2+k];
78749bd79ccSDebojyoti Ghosh         } else if (kaij->isTI) {
7889eb573c1SRichard Tran Mills           ierr = PetscMemcpy(t+i2,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
78949bd79ccSDebojyoti Ghosh           for (j=0; j<nz; j++) {
79049bd79ccSDebojyoti Ghosh             for (k=0; k<bs; k++) t[i2+k] -= v[j] * x[vi[j]*bs+k];
79149bd79ccSDebojyoti Ghosh           }
79249bd79ccSDebojyoti Ghosh         } else {
7939eb573c1SRichard Tran Mills           ierr = PetscMemcpy(t+i2,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
79449bd79ccSDebojyoti Ghosh         }
79549bd79ccSDebojyoti Ghosh 
79649bd79ccSDebojyoti Ghosh         PetscKernel_w_gets_Ar_times_v(bs,bs,t+i2,idiag,y);
79749bd79ccSDebojyoti Ghosh         for (j=0; j<bs; j++) x[i2+j] = omega * y[j];
79849bd79ccSDebojyoti Ghosh 
79949bd79ccSDebojyoti Ghosh         idiag += bs2;
80049bd79ccSDebojyoti Ghosh         i2    += bs;
80149bd79ccSDebojyoti Ghosh       }
80249bd79ccSDebojyoti Ghosh       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
80349bd79ccSDebojyoti Ghosh       ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr);
80449bd79ccSDebojyoti Ghosh       xb = t;
80549bd79ccSDebojyoti Ghosh     } else xb = b;
80649bd79ccSDebojyoti Ghosh     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
80749bd79ccSDebojyoti Ghosh       idiag = kaij->ibdiag+bs2*(m-1);
80849bd79ccSDebojyoti Ghosh       i2    = bs * (m-1);
80949bd79ccSDebojyoti Ghosh       ierr  = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
81049bd79ccSDebojyoti Ghosh       PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
81149bd79ccSDebojyoti Ghosh       i2    -= bs;
81249bd79ccSDebojyoti Ghosh       idiag -= bs2;
81349bd79ccSDebojyoti Ghosh       for (i=m-2; i>=0; i--) {
81449bd79ccSDebojyoti Ghosh         v  = aa + diag[i] + 1 ;
81549bd79ccSDebojyoti Ghosh         vi = aj + diag[i] + 1;
81649bd79ccSDebojyoti Ghosh         nz = ai[i+1] - diag[i] - 1;
81749bd79ccSDebojyoti Ghosh 
81849bd79ccSDebojyoti Ghosh         if (T) {                /* FIXME: This branch untested */
81949bd79ccSDebojyoti Ghosh           ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
82049bd79ccSDebojyoti Ghosh           /* copy all rows of x that are needed into contiguous space */
82149bd79ccSDebojyoti Ghosh           workt = work;
82249bd79ccSDebojyoti Ghosh           for (j=0; j<nz; j++) {
82349bd79ccSDebojyoti Ghosh             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
82449bd79ccSDebojyoti Ghosh             workt += bs;
82549bd79ccSDebojyoti Ghosh           }
82649bd79ccSDebojyoti Ghosh           arrt = arr;
82749bd79ccSDebojyoti Ghosh           for (j=0; j<nz; j++) {
82849bd79ccSDebojyoti Ghosh             ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
82949bd79ccSDebojyoti Ghosh             for (k=0; k<bs2; k++) arrt[k] *= v[j];
83049bd79ccSDebojyoti Ghosh             arrt += bs2;
83149bd79ccSDebojyoti Ghosh           }
83249bd79ccSDebojyoti Ghosh           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
83349bd79ccSDebojyoti Ghosh         } else if (kaij->isTI) {
8349eb573c1SRichard Tran Mills           ierr = PetscMemcpy(w,t+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
83549bd79ccSDebojyoti Ghosh           for (j=0; j<nz; j++) {
83649bd79ccSDebojyoti Ghosh             for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k];
83749bd79ccSDebojyoti Ghosh           }
83849bd79ccSDebojyoti Ghosh         }
83949bd79ccSDebojyoti Ghosh 
84049bd79ccSDebojyoti Ghosh         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); /* RHS incorrect for omega != 1.0 */
84149bd79ccSDebojyoti Ghosh         for (j=0; j<bs; j++) x[i2+j] = (1.0-omega) * x[i2+j] + omega * y[j];
84249bd79ccSDebojyoti Ghosh 
84349bd79ccSDebojyoti Ghosh         idiag -= bs2;
84449bd79ccSDebojyoti Ghosh         i2    -= bs;
84549bd79ccSDebojyoti Ghosh       }
84649bd79ccSDebojyoti Ghosh       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
84749bd79ccSDebojyoti Ghosh     }
84849bd79ccSDebojyoti Ghosh     its--;
84949bd79ccSDebojyoti Ghosh   }
85049bd79ccSDebojyoti Ghosh   while (its--) {               /* FIXME: This branch not updated */
85149bd79ccSDebojyoti Ghosh     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
85249bd79ccSDebojyoti Ghosh       i2     =  0;
85349bd79ccSDebojyoti Ghosh       idiag  = kaij->ibdiag;
85449bd79ccSDebojyoti Ghosh       for (i=0; i<m; i++) {
85549bd79ccSDebojyoti Ghosh         ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
85649bd79ccSDebojyoti Ghosh 
85749bd79ccSDebojyoti Ghosh         v  = aa + ai[i];
85849bd79ccSDebojyoti Ghosh         vi = aj + ai[i];
85949bd79ccSDebojyoti Ghosh         nz = diag[i] - ai[i];
86049bd79ccSDebojyoti Ghosh         workt = work;
86149bd79ccSDebojyoti Ghosh         for (j=0; j<nz; j++) {
86249bd79ccSDebojyoti Ghosh           ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
86349bd79ccSDebojyoti Ghosh           workt += bs;
86449bd79ccSDebojyoti Ghosh         }
86549bd79ccSDebojyoti Ghosh         arrt = arr;
86649bd79ccSDebojyoti Ghosh         if (T) {
86749bd79ccSDebojyoti Ghosh           for (j=0; j<nz; j++) {
86849bd79ccSDebojyoti Ghosh             ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
86949bd79ccSDebojyoti Ghosh             for (k=0; k<bs2; k++) arrt[k] *= v[j];
87049bd79ccSDebojyoti Ghosh             arrt += bs2;
87149bd79ccSDebojyoti Ghosh           }
87249bd79ccSDebojyoti Ghosh           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
87349bd79ccSDebojyoti Ghosh         } else if (kaij->isTI) {
87449bd79ccSDebojyoti Ghosh           for (j=0; j<nz; j++) {
87549bd79ccSDebojyoti Ghosh             ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
87649bd79ccSDebojyoti Ghosh             for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
87749bd79ccSDebojyoti Ghosh             arrt += bs2;
87849bd79ccSDebojyoti Ghosh           }
87949bd79ccSDebojyoti Ghosh           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
88049bd79ccSDebojyoti Ghosh         }
88149bd79ccSDebojyoti Ghosh         ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr);
88249bd79ccSDebojyoti Ghosh 
88349bd79ccSDebojyoti Ghosh         v  = aa + diag[i] + 1;
88449bd79ccSDebojyoti Ghosh         vi = aj + diag[i] + 1;
88549bd79ccSDebojyoti Ghosh         nz = ai[i+1] - diag[i] - 1;
88649bd79ccSDebojyoti Ghosh         workt = work;
88749bd79ccSDebojyoti Ghosh         for (j=0; j<nz; j++) {
88849bd79ccSDebojyoti Ghosh           ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
88949bd79ccSDebojyoti Ghosh           workt += bs;
89049bd79ccSDebojyoti Ghosh         }
89149bd79ccSDebojyoti Ghosh         arrt = arr;
89249bd79ccSDebojyoti Ghosh         if (T) {
89349bd79ccSDebojyoti Ghosh           for (j=0; j<nz; j++) {
89449bd79ccSDebojyoti Ghosh             ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
89549bd79ccSDebojyoti Ghosh             for (k=0; k<bs2; k++) arrt[k] *= v[j];
89649bd79ccSDebojyoti Ghosh             arrt += bs2;
89749bd79ccSDebojyoti Ghosh           }
89849bd79ccSDebojyoti Ghosh           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
89949bd79ccSDebojyoti Ghosh         } else if (kaij->isTI) {
90049bd79ccSDebojyoti Ghosh           for (j=0; j<nz; j++) {
90149bd79ccSDebojyoti Ghosh             ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
90249bd79ccSDebojyoti Ghosh             for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
90349bd79ccSDebojyoti Ghosh             arrt += bs2;
90449bd79ccSDebojyoti Ghosh           }
90549bd79ccSDebojyoti Ghosh           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
90649bd79ccSDebojyoti Ghosh         }
90749bd79ccSDebojyoti Ghosh 
90849bd79ccSDebojyoti Ghosh         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y);
90949bd79ccSDebojyoti Ghosh         for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j);
91049bd79ccSDebojyoti Ghosh 
91149bd79ccSDebojyoti Ghosh         idiag += bs2;
91249bd79ccSDebojyoti Ghosh         i2    += bs;
91349bd79ccSDebojyoti Ghosh       }
91449bd79ccSDebojyoti Ghosh       xb = t;
91549bd79ccSDebojyoti Ghosh     }
91649bd79ccSDebojyoti Ghosh     else xb = b;
91749bd79ccSDebojyoti Ghosh     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
91849bd79ccSDebojyoti Ghosh       idiag = kaij->ibdiag+bs2*(m-1);
91949bd79ccSDebojyoti Ghosh       i2    = bs * (m-1);
92049bd79ccSDebojyoti Ghosh       if (xb == b) {
92149bd79ccSDebojyoti Ghosh         for (i=m-1; i>=0; i--) {
92249bd79ccSDebojyoti Ghosh           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
92349bd79ccSDebojyoti Ghosh 
92449bd79ccSDebojyoti Ghosh           v  = aa + ai[i];
92549bd79ccSDebojyoti Ghosh           vi = aj + ai[i];
92649bd79ccSDebojyoti Ghosh           nz = diag[i] - ai[i];
92749bd79ccSDebojyoti Ghosh           workt = work;
92849bd79ccSDebojyoti Ghosh           for (j=0; j<nz; j++) {
92949bd79ccSDebojyoti Ghosh             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
93049bd79ccSDebojyoti Ghosh             workt += bs;
93149bd79ccSDebojyoti Ghosh           }
93249bd79ccSDebojyoti Ghosh           arrt = arr;
93349bd79ccSDebojyoti Ghosh           if (T) {
93449bd79ccSDebojyoti Ghosh             for (j=0; j<nz; j++) {
93549bd79ccSDebojyoti Ghosh               ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
93649bd79ccSDebojyoti Ghosh               for (k=0; k<bs2; k++) arrt[k] *= v[j];
93749bd79ccSDebojyoti Ghosh               arrt += bs2;
93849bd79ccSDebojyoti Ghosh             }
93949bd79ccSDebojyoti Ghosh             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
94049bd79ccSDebojyoti Ghosh           } else if (kaij->isTI) {
94149bd79ccSDebojyoti Ghosh             for (j=0; j<nz; j++) {
94249bd79ccSDebojyoti Ghosh               ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
94349bd79ccSDebojyoti Ghosh               for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
94449bd79ccSDebojyoti Ghosh               arrt += bs2;
94549bd79ccSDebojyoti Ghosh             }
94649bd79ccSDebojyoti Ghosh             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
94749bd79ccSDebojyoti Ghosh           }
94849bd79ccSDebojyoti Ghosh 
94949bd79ccSDebojyoti Ghosh           v  = aa + diag[i] + 1;
95049bd79ccSDebojyoti Ghosh           vi = aj + diag[i] + 1;
95149bd79ccSDebojyoti Ghosh           nz = ai[i+1] - diag[i] - 1;
95249bd79ccSDebojyoti Ghosh           workt = work;
95349bd79ccSDebojyoti Ghosh           for (j=0; j<nz; j++) {
95449bd79ccSDebojyoti Ghosh             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
95549bd79ccSDebojyoti Ghosh             workt += bs;
95649bd79ccSDebojyoti Ghosh           }
95749bd79ccSDebojyoti Ghosh           arrt = arr;
95849bd79ccSDebojyoti Ghosh           if (T) {
95949bd79ccSDebojyoti Ghosh             for (j=0; j<nz; j++) {
96049bd79ccSDebojyoti Ghosh               ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
96149bd79ccSDebojyoti Ghosh               for (k=0; k<bs2; k++) arrt[k] *= v[j];
96249bd79ccSDebojyoti Ghosh               arrt += bs2;
96349bd79ccSDebojyoti Ghosh             }
96449bd79ccSDebojyoti Ghosh             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
96549bd79ccSDebojyoti Ghosh           } else if (kaij->isTI) {
96649bd79ccSDebojyoti Ghosh             for (j=0; j<nz; j++) {
96749bd79ccSDebojyoti Ghosh               ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
96849bd79ccSDebojyoti Ghosh               for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
96949bd79ccSDebojyoti Ghosh               arrt += bs2;
97049bd79ccSDebojyoti Ghosh             }
97149bd79ccSDebojyoti Ghosh             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
97249bd79ccSDebojyoti Ghosh           }
97349bd79ccSDebojyoti Ghosh 
97449bd79ccSDebojyoti Ghosh           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y);
97549bd79ccSDebojyoti Ghosh           for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j);
97649bd79ccSDebojyoti Ghosh         }
97749bd79ccSDebojyoti Ghosh       } else {
97849bd79ccSDebojyoti Ghosh         for (i=m-1; i>=0; i--) {
97949bd79ccSDebojyoti Ghosh           ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
98049bd79ccSDebojyoti Ghosh           v  = aa + diag[i] + 1;
98149bd79ccSDebojyoti Ghosh           vi = aj + diag[i] + 1;
98249bd79ccSDebojyoti Ghosh           nz = ai[i+1] - diag[i] - 1;
98349bd79ccSDebojyoti Ghosh           workt = work;
98449bd79ccSDebojyoti Ghosh           for (j=0; j<nz; j++) {
98549bd79ccSDebojyoti Ghosh             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
98649bd79ccSDebojyoti Ghosh             workt += bs;
98749bd79ccSDebojyoti Ghosh           }
98849bd79ccSDebojyoti Ghosh           arrt = arr;
98949bd79ccSDebojyoti Ghosh           if (T) {
99049bd79ccSDebojyoti Ghosh             for (j=0; j<nz; j++) {
99149bd79ccSDebojyoti Ghosh               ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
99249bd79ccSDebojyoti Ghosh               for (k=0; k<bs2; k++) arrt[k] *= v[j];
99349bd79ccSDebojyoti Ghosh               arrt += bs2;
99449bd79ccSDebojyoti Ghosh             }
99549bd79ccSDebojyoti Ghosh             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
99649bd79ccSDebojyoti Ghosh           } else if (kaij->isTI) {
99749bd79ccSDebojyoti Ghosh             for (j=0; j<nz; j++) {
99849bd79ccSDebojyoti Ghosh               ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
99949bd79ccSDebojyoti Ghosh               for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
100049bd79ccSDebojyoti Ghosh               arrt += bs2;
100149bd79ccSDebojyoti Ghosh             }
100249bd79ccSDebojyoti Ghosh             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
100349bd79ccSDebojyoti Ghosh           }
100449bd79ccSDebojyoti Ghosh           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y);
100549bd79ccSDebojyoti Ghosh           for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j);
100649bd79ccSDebojyoti Ghosh         }
100749bd79ccSDebojyoti Ghosh         idiag -= bs2;
100849bd79ccSDebojyoti Ghosh         i2    -= bs;
100949bd79ccSDebojyoti Ghosh       }
101049bd79ccSDebojyoti Ghosh       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
101149bd79ccSDebojyoti Ghosh     }
101249bd79ccSDebojyoti Ghosh   }
101349bd79ccSDebojyoti Ghosh 
101449bd79ccSDebojyoti Ghosh   ierr = VecRestoreArray(xx,&x);    CHKERRQ(ierr);
101549bd79ccSDebojyoti Ghosh   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
101649bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
101749bd79ccSDebojyoti Ghosh }
101849bd79ccSDebojyoti Ghosh 
101949bd79ccSDebojyoti Ghosh /*===================================================================================*/
102049bd79ccSDebojyoti Ghosh 
1021836168d5SRichard Tran Mills PetscErrorCode MatMultAdd_MPIKAIJ(Mat A,Vec xx,Vec yy,Vec zz)
102249bd79ccSDebojyoti Ghosh {
102349bd79ccSDebojyoti Ghosh   Mat_MPIKAIJ    *b = (Mat_MPIKAIJ*)A->data;
102449bd79ccSDebojyoti Ghosh   PetscErrorCode ierr;
102549bd79ccSDebojyoti Ghosh 
102649bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
102749bd79ccSDebojyoti Ghosh   if (!yy) {
102849bd79ccSDebojyoti Ghosh     ierr = VecSet(zz,0.0);CHKERRQ(ierr);
102949bd79ccSDebojyoti Ghosh   } else {
103049bd79ccSDebojyoti Ghosh     ierr = VecCopy(yy,zz);CHKERRQ(ierr);
103149bd79ccSDebojyoti Ghosh   }
103249bd79ccSDebojyoti Ghosh   /* start the scatter */
103349bd79ccSDebojyoti Ghosh   ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
103449bd79ccSDebojyoti Ghosh   ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,zz,zz);CHKERRQ(ierr);
103549bd79ccSDebojyoti Ghosh   ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
103649bd79ccSDebojyoti Ghosh   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr);
103749bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
103849bd79ccSDebojyoti Ghosh }
103949bd79ccSDebojyoti Ghosh 
1040bb6fb833SRichard Tran Mills PetscErrorCode MatMult_MPIKAIJ(Mat A,Vec xx,Vec yy)
104149bd79ccSDebojyoti Ghosh {
104249bd79ccSDebojyoti Ghosh   PetscErrorCode ierr;
104349bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
1044836168d5SRichard Tran Mills   ierr = MatMultAdd_MPIKAIJ(A,xx,PETSC_NULL,yy);CHKERRQ(ierr);
104549bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
104649bd79ccSDebojyoti Ghosh }
104749bd79ccSDebojyoti Ghosh 
1048bb6fb833SRichard Tran Mills PetscErrorCode MatInvertBlockDiagonal_MPIKAIJ(Mat A,const PetscScalar **values)
104949bd79ccSDebojyoti Ghosh {
105049bd79ccSDebojyoti Ghosh   Mat_MPIKAIJ     *b = (Mat_MPIKAIJ*)A->data;
105149bd79ccSDebojyoti Ghosh   PetscErrorCode  ierr;
105249bd79ccSDebojyoti Ghosh 
105349bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
105449bd79ccSDebojyoti Ghosh   ierr = (*b->AIJ->ops->invertblockdiagonal)(b->AIJ,values);CHKERRQ(ierr);
105549bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
105649bd79ccSDebojyoti Ghosh }
105749bd79ccSDebojyoti Ghosh 
105849bd79ccSDebojyoti Ghosh /* ----------------------------------------------------------------*/
105949bd79ccSDebojyoti Ghosh 
106049bd79ccSDebojyoti Ghosh PetscErrorCode MatGetRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values)
106149bd79ccSDebojyoti Ghosh {
106249bd79ccSDebojyoti Ghosh   Mat_SeqKAIJ     *b   = (Mat_SeqKAIJ*) A->data;
10631ca5ffdbSRichard Tran Mills   PetscErrorCode  diag = PETSC_FALSE;
10641ca5ffdbSRichard Tran Mills   PetscErrorCode  ierr;
106549bd79ccSDebojyoti Ghosh   PetscInt        nzaij,nz,*colsaij,*idx,i,j,p=b->p,q=b->q,r=row/p,s=row%p,c;
106649bd79ccSDebojyoti Ghosh   PetscScalar     *vaij,*v,*S=b->S,*T=b->T;
106749bd79ccSDebojyoti Ghosh 
106849bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
106949bd79ccSDebojyoti Ghosh   if (b->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
107049bd79ccSDebojyoti Ghosh   b->getrowactive = PETSC_TRUE;
107149bd79ccSDebojyoti Ghosh   if (row < 0 || row >= A->rmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range",row);
107249bd79ccSDebojyoti Ghosh 
107349bd79ccSDebojyoti Ghosh   if ((!S) && (!T) && (!b->isTI)) {
107449bd79ccSDebojyoti Ghosh     if (ncols)    *ncols  = 0;
107549bd79ccSDebojyoti Ghosh     if (cols)     *cols   = NULL;
107649bd79ccSDebojyoti Ghosh     if (values)   *values = NULL;
107749bd79ccSDebojyoti Ghosh     PetscFunctionReturn(0);
107849bd79ccSDebojyoti Ghosh   }
107949bd79ccSDebojyoti Ghosh 
108049bd79ccSDebojyoti Ghosh   if (T || b->isTI) {
108149bd79ccSDebojyoti Ghosh     ierr  = MatGetRow_SeqAIJ(b->AIJ,r,&nzaij,&colsaij,&vaij);CHKERRQ(ierr);
108249bd79ccSDebojyoti Ghosh     c     = nzaij;
108349bd79ccSDebojyoti Ghosh     for (i=0; i<nzaij; i++) {
108449bd79ccSDebojyoti Ghosh       /* check if this row contains a diagonal entry */
108549bd79ccSDebojyoti Ghosh       if (colsaij[i] == r) {
108649bd79ccSDebojyoti Ghosh         diag = PETSC_TRUE;
108749bd79ccSDebojyoti Ghosh         c = i;
108849bd79ccSDebojyoti Ghosh       }
108949bd79ccSDebojyoti Ghosh     }
109049bd79ccSDebojyoti Ghosh   } else nzaij = c = 0;
109149bd79ccSDebojyoti Ghosh 
109249bd79ccSDebojyoti Ghosh   /* calculate size of row */
109349bd79ccSDebojyoti Ghosh   nz = 0;
109449bd79ccSDebojyoti Ghosh   if (S)            nz += q;
109549bd79ccSDebojyoti Ghosh   if (T || b->isTI) nz += (diag && S ? (nzaij-1)*q : nzaij*q);
109649bd79ccSDebojyoti Ghosh 
109749bd79ccSDebojyoti Ghosh   if (cols || values) {
109849bd79ccSDebojyoti Ghosh     ierr = PetscMalloc2(nz,&idx,nz,&v);CHKERRQ(ierr);
109938822f9dSRichard Tran Mills     for (i=0; i<q; i++) {
110038822f9dSRichard Tran Mills       /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */
110138822f9dSRichard Tran Mills       v[i] = 0.0;
110238822f9dSRichard Tran Mills     }
110349bd79ccSDebojyoti Ghosh     if (b->isTI) {
110449bd79ccSDebojyoti Ghosh       for (i=0; i<nzaij; i++) {
110549bd79ccSDebojyoti Ghosh         for (j=0; j<q; j++) {
110649bd79ccSDebojyoti Ghosh           idx[i*q+j] = colsaij[i]*q+j;
110749bd79ccSDebojyoti Ghosh           v[i*q+j]   = (j==s ? vaij[i] : 0);
110849bd79ccSDebojyoti Ghosh         }
110949bd79ccSDebojyoti Ghosh       }
111049bd79ccSDebojyoti Ghosh     } else if (T) {
111149bd79ccSDebojyoti Ghosh       for (i=0; i<nzaij; i++) {
111249bd79ccSDebojyoti Ghosh         for (j=0; j<q; j++) {
111349bd79ccSDebojyoti Ghosh           idx[i*q+j] = colsaij[i]*q+j;
111449bd79ccSDebojyoti Ghosh           v[i*q+j]   = vaij[i]*T[s+j*p];
111549bd79ccSDebojyoti Ghosh         }
111649bd79ccSDebojyoti Ghosh       }
111749bd79ccSDebojyoti Ghosh     }
111849bd79ccSDebojyoti Ghosh     if (S) {
111949bd79ccSDebojyoti Ghosh       for (j=0; j<q; j++) {
112049bd79ccSDebojyoti Ghosh         idx[c*q+j] = r*q+j;
112149bd79ccSDebojyoti Ghosh         v[c*q+j]  += S[s+j*p];
112249bd79ccSDebojyoti Ghosh       }
112349bd79ccSDebojyoti Ghosh     }
112449bd79ccSDebojyoti Ghosh   }
112549bd79ccSDebojyoti Ghosh 
112649bd79ccSDebojyoti Ghosh   if (ncols)    *ncols  = nz;
112749bd79ccSDebojyoti Ghosh   if (cols)     *cols   = idx;
112849bd79ccSDebojyoti Ghosh   if (values)   *values = v;
112949bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
113049bd79ccSDebojyoti Ghosh }
113149bd79ccSDebojyoti Ghosh 
113249bd79ccSDebojyoti Ghosh PetscErrorCode MatRestoreRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
113349bd79ccSDebojyoti Ghosh {
113449bd79ccSDebojyoti Ghosh   PetscErrorCode ierr;
113549bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
113649bd79ccSDebojyoti Ghosh   ierr = PetscFree2(*idx,*v);CHKERRQ(ierr);
113749bd79ccSDebojyoti Ghosh   ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE;
113849bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
113949bd79ccSDebojyoti Ghosh }
114049bd79ccSDebojyoti Ghosh 
114149bd79ccSDebojyoti Ghosh PetscErrorCode MatGetRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values)
114249bd79ccSDebojyoti Ghosh {
114349bd79ccSDebojyoti Ghosh   Mat_MPIKAIJ     *b      = (Mat_MPIKAIJ*) A->data;
114449bd79ccSDebojyoti Ghosh   Mat             MatAIJ  = ((Mat_SeqKAIJ*)b->AIJ->data)->AIJ;
114549bd79ccSDebojyoti Ghosh   Mat             MatOAIJ = ((Mat_SeqKAIJ*)b->OAIJ->data)->AIJ;
114649bd79ccSDebojyoti Ghosh   Mat             AIJ     = b->A;
1147fc64b2cfSRichard Tran Mills   PetscBool       diag    = PETSC_FALSE;
114849bd79ccSDebojyoti Ghosh   PetscErrorCode  ierr;
114949bd79ccSDebojyoti Ghosh   const PetscInt  rstart=A->rmap->rstart,rend=A->rmap->rend,p=b->p,q=b->q,*garray;
115049bd79ccSDebojyoti Ghosh   PetscInt        nz,*idx,ncolsaij,ncolsoaij,*colsaij,*colsoaij,r,s,c,i,j,lrow;
115149bd79ccSDebojyoti Ghosh   PetscScalar     *v,*vals,*ovals,*S=b->S,*T=b->T;
115249bd79ccSDebojyoti Ghosh 
115349bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
115449bd79ccSDebojyoti Ghosh   if (b->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
115549bd79ccSDebojyoti Ghosh   b->getrowactive = PETSC_TRUE;
115649bd79ccSDebojyoti Ghosh   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
115749bd79ccSDebojyoti Ghosh   lrow = row - rstart;
115849bd79ccSDebojyoti Ghosh 
115949bd79ccSDebojyoti Ghosh   if ((!S) && (!T) && (!b->isTI)) {
116049bd79ccSDebojyoti Ghosh     if (ncols)    *ncols  = 0;
116149bd79ccSDebojyoti Ghosh     if (cols)     *cols   = NULL;
116249bd79ccSDebojyoti Ghosh     if (values)   *values = NULL;
116349bd79ccSDebojyoti Ghosh     PetscFunctionReturn(0);
116449bd79ccSDebojyoti Ghosh   }
116549bd79ccSDebojyoti Ghosh 
116649bd79ccSDebojyoti Ghosh   r = lrow/p;
116749bd79ccSDebojyoti Ghosh   s = lrow%p;
116849bd79ccSDebojyoti Ghosh 
116949bd79ccSDebojyoti Ghosh   if (T || b->isTI) {
117049bd79ccSDebojyoti Ghosh     ierr = MatMPIAIJGetSeqAIJ(AIJ,NULL,NULL,&garray);
117149bd79ccSDebojyoti Ghosh     ierr = MatGetRow_SeqAIJ(MatAIJ,lrow/p,&ncolsaij,&colsaij,&vals);CHKERRQ(ierr);
117249bd79ccSDebojyoti Ghosh     ierr = MatGetRow_SeqAIJ(MatOAIJ,lrow/p,&ncolsoaij,&colsoaij,&ovals);CHKERRQ(ierr);
117349bd79ccSDebojyoti Ghosh 
117449bd79ccSDebojyoti Ghosh     c     = ncolsaij + ncolsoaij;
117549bd79ccSDebojyoti Ghosh     for (i=0; i<ncolsaij; i++) {
117649bd79ccSDebojyoti Ghosh       /* check if this row contains a diagonal entry */
117749bd79ccSDebojyoti Ghosh       if (colsaij[i] == r) {
117849bd79ccSDebojyoti Ghosh         diag = PETSC_TRUE;
117949bd79ccSDebojyoti Ghosh         c = i;
118049bd79ccSDebojyoti Ghosh       }
118149bd79ccSDebojyoti Ghosh     }
118249bd79ccSDebojyoti Ghosh   } else c = 0;
118349bd79ccSDebojyoti Ghosh 
118449bd79ccSDebojyoti Ghosh   /* calculate size of row */
118549bd79ccSDebojyoti Ghosh   nz = 0;
118649bd79ccSDebojyoti Ghosh   if (S)            nz += q;
118749bd79ccSDebojyoti Ghosh   if (T || b->isTI) nz += (diag && S ? (ncolsaij+ncolsoaij-1)*q : (ncolsaij+ncolsoaij)*q);
118849bd79ccSDebojyoti Ghosh 
118949bd79ccSDebojyoti Ghosh   if (cols || values) {
119049bd79ccSDebojyoti Ghosh     ierr = PetscMalloc2(nz,&idx,nz,&v);CHKERRQ(ierr);
1191a437a796SRichard Tran Mills     for (i=0; i<q; i++) {
1192a437a796SRichard Tran Mills       /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */
1193a437a796SRichard Tran Mills       v[i] = 0.0;
1194a437a796SRichard Tran Mills     }
119549bd79ccSDebojyoti Ghosh     if (b->isTI) {
119649bd79ccSDebojyoti Ghosh       for (i=0; i<ncolsaij; i++) {
119749bd79ccSDebojyoti Ghosh         for (j=0; j<q; j++) {
119849bd79ccSDebojyoti Ghosh           idx[i*q+j] = (colsaij[i]+rstart/p)*q+j;
119949bd79ccSDebojyoti Ghosh           v[i*q+j]   = (j==s ? vals[i] : 0.0);
120049bd79ccSDebojyoti Ghosh         }
120149bd79ccSDebojyoti Ghosh       }
120249bd79ccSDebojyoti Ghosh       for (i=0; i<ncolsoaij; i++) {
120349bd79ccSDebojyoti Ghosh         for (j=0; j<q; j++) {
120449bd79ccSDebojyoti Ghosh           idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j;
120549bd79ccSDebojyoti Ghosh           v[(i+ncolsaij)*q+j]   = (j==s ? ovals[i]: 0.0);
120649bd79ccSDebojyoti Ghosh         }
120749bd79ccSDebojyoti Ghosh       }
120849bd79ccSDebojyoti Ghosh     } else if (T) {
120949bd79ccSDebojyoti Ghosh       for (i=0; i<ncolsaij; i++) {
121049bd79ccSDebojyoti Ghosh         for (j=0; j<q; j++) {
121149bd79ccSDebojyoti Ghosh           idx[i*q+j] = (colsaij[i]+rstart/p)*q+j;
121249bd79ccSDebojyoti Ghosh           v[i*q+j]   = vals[i]*T[s+j*p];
121349bd79ccSDebojyoti Ghosh         }
121449bd79ccSDebojyoti Ghosh       }
121549bd79ccSDebojyoti Ghosh       for (i=0; i<ncolsoaij; i++) {
121649bd79ccSDebojyoti Ghosh         for (j=0; j<q; j++) {
121749bd79ccSDebojyoti Ghosh           idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j;
121849bd79ccSDebojyoti Ghosh           v[(i+ncolsaij)*q+j]   = ovals[i]*T[s+j*p];
121949bd79ccSDebojyoti Ghosh         }
122049bd79ccSDebojyoti Ghosh       }
122149bd79ccSDebojyoti Ghosh     }
122249bd79ccSDebojyoti Ghosh     if (S) {
122349bd79ccSDebojyoti Ghosh       for (j=0; j<q; j++) {
122449bd79ccSDebojyoti Ghosh         idx[c*q+j] = (r+rstart/p)*q+j;
122549bd79ccSDebojyoti Ghosh         v[c*q+j]  += S[s+j*p];
122649bd79ccSDebojyoti Ghosh       }
122749bd79ccSDebojyoti Ghosh     }
122849bd79ccSDebojyoti Ghosh   }
122949bd79ccSDebojyoti Ghosh 
123049bd79ccSDebojyoti Ghosh   if (ncols)  *ncols  = nz;
123149bd79ccSDebojyoti Ghosh   if (cols)   *cols   = idx;
123249bd79ccSDebojyoti Ghosh   if (values) *values = v;
123349bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
123449bd79ccSDebojyoti Ghosh }
123549bd79ccSDebojyoti Ghosh 
123649bd79ccSDebojyoti Ghosh PetscErrorCode MatRestoreRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
123749bd79ccSDebojyoti Ghosh {
123849bd79ccSDebojyoti Ghosh   PetscErrorCode ierr;
123949bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
124049bd79ccSDebojyoti Ghosh   ierr = PetscFree2(*idx,*v);CHKERRQ(ierr);
124149bd79ccSDebojyoti Ghosh   ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE;
124249bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
124349bd79ccSDebojyoti Ghosh }
124449bd79ccSDebojyoti Ghosh 
124549bd79ccSDebojyoti Ghosh PetscErrorCode  MatCreateSubMatrix_KAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat)
124649bd79ccSDebojyoti Ghosh {
124749bd79ccSDebojyoti Ghosh   PetscErrorCode ierr;
124849bd79ccSDebojyoti Ghosh   Mat            A;
124949bd79ccSDebojyoti Ghosh 
125049bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
125149bd79ccSDebojyoti Ghosh   ierr = MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr);
125249bd79ccSDebojyoti Ghosh   ierr = MatCreateSubMatrix(A,isrow,iscol,cll,newmat);CHKERRQ(ierr);
125349bd79ccSDebojyoti Ghosh   ierr = MatDestroy(&A);CHKERRQ(ierr);
125449bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
125549bd79ccSDebojyoti Ghosh }
125649bd79ccSDebojyoti Ghosh 
125749bd79ccSDebojyoti Ghosh /* ---------------------------------------------------------------------------------- */
125849bd79ccSDebojyoti Ghosh /*@C
125949bd79ccSDebojyoti Ghosh   MatCreateKAIJ - Creates a matrix type to be used for matrices of the following form:
126049bd79ccSDebojyoti Ghosh 
126149bd79ccSDebojyoti Ghosh     [I \otimes S + A \otimes T]
126249bd79ccSDebojyoti Ghosh 
126349bd79ccSDebojyoti Ghosh   where
126449bd79ccSDebojyoti Ghosh     S is a dense (p \times q) matrix
126549bd79ccSDebojyoti Ghosh     T is a dense (p \times q) matrix
126649bd79ccSDebojyoti Ghosh     A is an AIJ  (n \times n) matrix
126749bd79ccSDebojyoti Ghosh     I is the identity matrix
126849bd79ccSDebojyoti Ghosh   The resulting matrix is (np \times nq)
126949bd79ccSDebojyoti Ghosh 
1270d3b90ce1SRichard Tran Mills   S and T are always stored independently on all processes as PetscScalar arrays in column-major format.
127149bd79ccSDebojyoti Ghosh 
127249bd79ccSDebojyoti Ghosh   Collective
127349bd79ccSDebojyoti Ghosh 
127449bd79ccSDebojyoti Ghosh   Input Parameters:
127549bd79ccSDebojyoti Ghosh + A - the AIJ matrix
127649bd79ccSDebojyoti Ghosh . p - number of rows in S and T
1277d3b90ce1SRichard Tran Mills . q - number of columns in S and T
1278d3b90ce1SRichard Tran Mills . S - the S matrix (can be PETSC_NULL), stored as a PetscScalar array (column-major)
1279d3b90ce1SRichard Tran Mills - T - the T matrix (can be PETSC_NULL), stored as a PetscScalar array (column-major)
128049bd79ccSDebojyoti Ghosh 
128149bd79ccSDebojyoti Ghosh   Output Parameter:
128249bd79ccSDebojyoti Ghosh . kaij - the new KAIJ matrix
128349bd79ccSDebojyoti Ghosh 
1284d3b90ce1SRichard Tran Mills   Notes:
1285d3b90ce1SRichard Tran Mills   This function increases the reference count on the AIJ matrix, so the user is free to destroy the matrix if it is not needed.
1286d3b90ce1SRichard Tran Mills   Changes to the entries of the AIJ matrix will immediately affect the KAIJ matrix.
128749bd79ccSDebojyoti Ghosh 
128849bd79ccSDebojyoti Ghosh   Level: advanced
128949bd79ccSDebojyoti Ghosh 
12900567c835SRichard Tran Mills .seealso: MatKAIJSetAIJ(), MatKAIJSetS(), MatKAIJSetT(), MatKAIJGetAIJ(), MatKAIJGetS(), MatKAIJGetT(), MATKAIJ
129149bd79ccSDebojyoti Ghosh @*/
129249bd79ccSDebojyoti Ghosh PetscErrorCode  MatCreateKAIJ(Mat A,PetscInt p,PetscInt q,const PetscScalar S[],const PetscScalar T[],Mat *kaij)
129349bd79ccSDebojyoti Ghosh {
129449bd79ccSDebojyoti Ghosh   PetscErrorCode ierr;
129549bd79ccSDebojyoti Ghosh   PetscMPIInt    size;
129649bd79ccSDebojyoti Ghosh 
129749bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
12980567c835SRichard Tran Mills   ierr = MatCreate(PetscObjectComm((PetscObject)A),kaij);CHKERRQ(ierr);
129949bd79ccSDebojyoti Ghosh   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
130049bd79ccSDebojyoti Ghosh   if (size == 1) {
13010567c835SRichard Tran Mills     ierr = MatSetType(*kaij,MATSEQKAIJ);CHKERRQ(ierr);
130249bd79ccSDebojyoti Ghosh   } else {
13030567c835SRichard Tran Mills     ierr = MatSetType(*kaij,MATMPIKAIJ);CHKERRQ(ierr);
130449bd79ccSDebojyoti Ghosh   }
13050567c835SRichard Tran Mills   ierr = MatKAIJSetAIJ(*kaij,A);CHKERRQ(ierr);
13060567c835SRichard Tran Mills   ierr = MatKAIJSetS(*kaij,p,q,S);CHKERRQ(ierr);
13070567c835SRichard Tran Mills   ierr = MatKAIJSetT(*kaij,p,q,T);CHKERRQ(ierr);
13080567c835SRichard Tran Mills   ierr = MatSetUp(*kaij);
13090567c835SRichard Tran Mills   PetscFunctionReturn(0);
13100567c835SRichard Tran Mills }
131149bd79ccSDebojyoti Ghosh 
13120567c835SRichard Tran Mills /*MC
13130567c835SRichard Tran Mills   MATKAIJ - MATKAIJ = "kaij" - A matrix type to be used to evaluate matrices of the following form:
13140567c835SRichard Tran Mills 
13150567c835SRichard Tran Mills     [I \otimes S + A \otimes T]
13160567c835SRichard Tran Mills 
13170567c835SRichard Tran Mills   where
13180567c835SRichard Tran Mills     S is a dense (p \times q) matrix
13190567c835SRichard Tran Mills     T is a dense (p \times q) matrix
13200567c835SRichard Tran Mills     A is an AIJ  (n \times n) matrix
13210567c835SRichard Tran Mills     I is the identity matrix
13220567c835SRichard Tran Mills   The resulting matrix is (np \times nq)
13230567c835SRichard Tran Mills 
1324d3b90ce1SRichard Tran Mills   S and T are always stored independently on all processes as PetscScalar arrays in column-major format.
13250567c835SRichard Tran Mills 
13260567c835SRichard Tran Mills   Level: advanced
13270567c835SRichard Tran Mills 
13280567c835SRichard Tran Mills .seealso: MatKAIJSetAIJ(), MatKAIJSetS(), MatKAIJSetT(), MatKAIJGetAIJ(), MatKAIJGetS(), MatKAIJGetT(), MatCreateKAIJ()
13290567c835SRichard Tran Mills M*/
13300567c835SRichard Tran Mills 
13310567c835SRichard Tran Mills PETSC_EXTERN PetscErrorCode MatCreate_KAIJ(Mat A)
13320567c835SRichard Tran Mills {
13330567c835SRichard Tran Mills   PetscErrorCode ierr;
13340567c835SRichard Tran Mills   Mat_MPIKAIJ    *b;
13350567c835SRichard Tran Mills   PetscMPIInt    size;
13360567c835SRichard Tran Mills 
13370567c835SRichard Tran Mills   PetscFunctionBegin;
13380567c835SRichard Tran Mills   ierr     = PetscNewLog(A,&b);CHKERRQ(ierr);
13390567c835SRichard Tran Mills   A->data  = (void*)b;
13400567c835SRichard Tran Mills 
13410567c835SRichard Tran Mills   ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
13420567c835SRichard Tran Mills 
13430567c835SRichard Tran Mills   A->ops->setup = MatSetUp_KAIJ;
13440567c835SRichard Tran Mills 
13450567c835SRichard Tran Mills   b->w    = 0;
13460567c835SRichard Tran Mills   ierr    = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
13470567c835SRichard Tran Mills   if (size == 1) {
13480567c835SRichard Tran Mills     ierr = PetscObjectChangeTypeName((PetscObject)A,MATSEQKAIJ);CHKERRQ(ierr);
13490567c835SRichard Tran Mills     A->ops->setup               = MatSetUp_KAIJ;
13500567c835SRichard Tran Mills     A->ops->destroy             = MatDestroy_SeqKAIJ;
1351*e6985dafSRichard Tran Mills     A->ops->view                = MatView_KAIJ;
1352bb6fb833SRichard Tran Mills     A->ops->mult                = MatMult_SeqKAIJ;
1353bb6fb833SRichard Tran Mills     A->ops->multadd             = MatMultAdd_SeqKAIJ;
1354bb6fb833SRichard Tran Mills     A->ops->invertblockdiagonal = MatInvertBlockDiagonal_SeqKAIJ;
13550567c835SRichard Tran Mills     A->ops->getrow              = MatGetRow_SeqKAIJ;
13560567c835SRichard Tran Mills     A->ops->restorerow          = MatRestoreRow_SeqKAIJ;
13570567c835SRichard Tran Mills     A->ops->sor                 = MatSOR_SeqKAIJ;
13580567c835SRichard Tran Mills   } else {
13590567c835SRichard Tran Mills     ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIKAIJ);CHKERRQ(ierr);
13600567c835SRichard Tran Mills     A->ops->setup               = MatSetUp_KAIJ;
13610567c835SRichard Tran Mills     A->ops->destroy             = MatDestroy_MPIKAIJ;
1362*e6985dafSRichard Tran Mills     A->ops->view                = MatView_KAIJ;
1363bb6fb833SRichard Tran Mills     A->ops->mult                = MatMult_MPIKAIJ;
1364bb6fb833SRichard Tran Mills     A->ops->multadd             = MatMultAdd_MPIKAIJ;
1365bb6fb833SRichard Tran Mills     A->ops->invertblockdiagonal = MatInvertBlockDiagonal_MPIKAIJ;
13660567c835SRichard Tran Mills     A->ops->getrow              = MatGetRow_MPIKAIJ;
13670567c835SRichard Tran Mills     A->ops->restorerow          = MatRestoreRow_MPIKAIJ;
13680567c835SRichard Tran Mills     ierr = PetscObjectComposeFunction((PetscObject)A,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIKAIJ);CHKERRQ(ierr);
13690567c835SRichard Tran Mills   }
13700567c835SRichard Tran Mills   A->ops->createsubmatrix = MatCreateSubMatrix_KAIJ;
137149bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
137249bd79ccSDebojyoti Ghosh }
1373