xref: /petsc/src/mat/impls/kaij/kaij.c (revision fac53fa1e88ef6d68ac105d10e1a0dd99b02c5b2)
149bd79ccSDebojyoti Ghosh 
249bd79ccSDebojyoti Ghosh /*
349bd79ccSDebojyoti Ghosh   Defines the basic matrix operations for the KAIJ  matrix storage format.
449bd79ccSDebojyoti Ghosh   This format is used to evaluate matrices of the form:
549bd79ccSDebojyoti Ghosh 
649bd79ccSDebojyoti Ghosh     [I \otimes S + A \otimes T]
749bd79ccSDebojyoti Ghosh 
849bd79ccSDebojyoti Ghosh   where
949bd79ccSDebojyoti Ghosh     S is a dense (p \times q) matrix
1049bd79ccSDebojyoti Ghosh     T is a dense (p \times q) matrix
1149bd79ccSDebojyoti Ghosh     A is an AIJ  (n \times n) matrix
1249bd79ccSDebojyoti Ghosh     I is the identity matrix
1349bd79ccSDebojyoti Ghosh 
1449bd79ccSDebojyoti Ghosh   The resulting matrix is (np \times nq)
1549bd79ccSDebojyoti Ghosh 
1649bd79ccSDebojyoti Ghosh   We provide:
1749bd79ccSDebojyoti Ghosh      MatMult()
1849bd79ccSDebojyoti Ghosh      MatMultAdd()
1949bd79ccSDebojyoti Ghosh      MatInvertBlockDiagonal()
2049bd79ccSDebojyoti Ghosh   and
2149bd79ccSDebojyoti Ghosh      MatCreateKAIJ(Mat,PetscInt,PetscInt,const PetscScalar[],const PetscScalar[],Mat*)
2249bd79ccSDebojyoti Ghosh 
2349bd79ccSDebojyoti Ghosh   This single directory handles both the sequential and parallel codes
2449bd79ccSDebojyoti Ghosh */
2549bd79ccSDebojyoti Ghosh 
2649bd79ccSDebojyoti Ghosh #include <../src/mat/impls/kaij/kaij.h> /*I "petscmat.h" I*/
2749bd79ccSDebojyoti Ghosh #include <../src/mat/utils/freespace.h>
2849bd79ccSDebojyoti Ghosh #include <petsc/private/vecimpl.h>
2949bd79ccSDebojyoti Ghosh 
3049bd79ccSDebojyoti Ghosh /*@C
3149bd79ccSDebojyoti Ghosh    MatKAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the KAIJ matrix
3249bd79ccSDebojyoti Ghosh 
3349bd79ccSDebojyoti Ghosh    Not Collective, but if the KAIJ matrix is parallel, the AIJ matrix is also parallel
3449bd79ccSDebojyoti Ghosh 
3549bd79ccSDebojyoti Ghosh    Input Parameter:
3649bd79ccSDebojyoti Ghosh .  A - the KAIJ matrix
3749bd79ccSDebojyoti Ghosh 
3849bd79ccSDebojyoti Ghosh    Output Parameter:
3949bd79ccSDebojyoti Ghosh .  B - the AIJ matrix
4049bd79ccSDebojyoti Ghosh 
4149bd79ccSDebojyoti Ghosh    Level: advanced
4249bd79ccSDebojyoti Ghosh 
4349bd79ccSDebojyoti Ghosh    Notes: The reference count on the AIJ matrix is not increased so you should not destroy it.
4449bd79ccSDebojyoti Ghosh 
4549bd79ccSDebojyoti Ghosh .seealso: MatCreateKAIJ()
4649bd79ccSDebojyoti Ghosh @*/
4749bd79ccSDebojyoti Ghosh PetscErrorCode  MatKAIJGetAIJ(Mat A,Mat *B)
4849bd79ccSDebojyoti Ghosh {
4949bd79ccSDebojyoti Ghosh   PetscErrorCode ierr;
5049bd79ccSDebojyoti Ghosh   PetscBool      ismpikaij,isseqkaij;
5149bd79ccSDebojyoti Ghosh 
5249bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
5349bd79ccSDebojyoti Ghosh   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIKAIJ,&ismpikaij);CHKERRQ(ierr);
5449bd79ccSDebojyoti Ghosh   ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQKAIJ,&isseqkaij);CHKERRQ(ierr);
5549bd79ccSDebojyoti Ghosh   if (ismpikaij) {
5649bd79ccSDebojyoti Ghosh     Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data;
5749bd79ccSDebojyoti Ghosh 
5849bd79ccSDebojyoti Ghosh     *B = b->A;
5949bd79ccSDebojyoti Ghosh   } else if (isseqkaij) {
6049bd79ccSDebojyoti Ghosh     Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
6149bd79ccSDebojyoti Ghosh 
6249bd79ccSDebojyoti Ghosh     *B = b->AIJ;
63b04351cbSRichard Tran Mills   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix passed in is not of type KAIJ");
6449bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
6549bd79ccSDebojyoti Ghosh }
6649bd79ccSDebojyoti Ghosh 
6749bd79ccSDebojyoti Ghosh /*@C
6849bd79ccSDebojyoti Ghosh    MatKAIJGetS - Get the S matrix describing the shift action of the KAIJ matrix
6949bd79ccSDebojyoti Ghosh 
700567c835SRichard Tran Mills    Not Collective; the entire S is stored and returned independently on all processes.
7149bd79ccSDebojyoti Ghosh 
7249bd79ccSDebojyoti Ghosh    Input Parameter:
7349bd79ccSDebojyoti Ghosh .  A - the KAIJ matrix
7449bd79ccSDebojyoti Ghosh 
75a5b5c723SRichard Tran Mills    Output Parameters:
76a5b5c723SRichard Tran Mills +  m - the number of rows in S
77a5b5c723SRichard Tran Mills .  n - the number of columns in S
78a5b5c723SRichard Tran Mills -  S - the S matrix, in form of a scalar array in column-major format
7949bd79ccSDebojyoti Ghosh 
80a5b5c723SRichard Tran Mills    Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)
8131a97b9aSRichard Tran Mills 
8249bd79ccSDebojyoti Ghosh    Level: advanced
8349bd79ccSDebojyoti Ghosh 
8431a97b9aSRichard Tran Mills .seealso: MatCreateKAIJ(), MatGetBlockSizes()
8549bd79ccSDebojyoti Ghosh @*/
86a5b5c723SRichard Tran Mills PetscErrorCode MatKAIJGetS(Mat A,PetscInt *m,PetscInt *n,PetscScalar **S)
8749bd79ccSDebojyoti Ghosh {
8849bd79ccSDebojyoti Ghosh   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
8949bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
90a5b5c723SRichard Tran Mills   if (m) *m = b->p;
91a5b5c723SRichard Tran Mills   if (n) *n = b->q;
92a5b5c723SRichard Tran Mills   if (S) *S = b->S;
93a5b5c723SRichard Tran Mills   PetscFunctionReturn(0);
94a5b5c723SRichard Tran Mills }
95a5b5c723SRichard Tran Mills 
96a5b5c723SRichard Tran Mills /*@C
97a5b5c723SRichard Tran Mills    MatKAIJGetSRead - Get a read-only pointer to the S matrix describing the shift action of the KAIJ matrix
98a5b5c723SRichard Tran Mills 
99a5b5c723SRichard Tran Mills    Not Collective; the entire S is stored and returned independently on all processes.
100a5b5c723SRichard Tran Mills 
101a5b5c723SRichard Tran Mills    Input Parameter:
102a5b5c723SRichard Tran Mills .  A - the KAIJ matrix
103a5b5c723SRichard Tran Mills 
104a5b5c723SRichard Tran Mills    Output Parameters:
105a5b5c723SRichard Tran Mills +  m - the number of rows in S
106a5b5c723SRichard Tran Mills .  n - the number of columns in S
107a5b5c723SRichard Tran Mills -  S - the S matrix, in form of a scalar array in column-major format
108a5b5c723SRichard Tran Mills 
109a5b5c723SRichard Tran Mills    Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)
110a5b5c723SRichard Tran Mills 
111a5b5c723SRichard Tran Mills    Level: advanced
112a5b5c723SRichard Tran Mills 
113a5b5c723SRichard Tran Mills .seealso: MatCreateKAIJ(), MatGetBlockSizes()
114a5b5c723SRichard Tran Mills @*/
115a5b5c723SRichard Tran Mills PetscErrorCode MatKAIJGetSRead(Mat A,PetscInt *m,PetscInt *n,const PetscScalar **S)
116a5b5c723SRichard Tran Mills {
117a5b5c723SRichard Tran Mills   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
118a5b5c723SRichard Tran Mills   PetscFunctionBegin;
119a5b5c723SRichard Tran Mills   if (m) *m = b->p;
120a5b5c723SRichard Tran Mills   if (n) *n = b->q;
121a5b5c723SRichard Tran Mills   if (S) *S = b->S;
122a5b5c723SRichard Tran Mills   PetscFunctionReturn(0);
123a5b5c723SRichard Tran Mills }
124a5b5c723SRichard Tran Mills 
125a5b5c723SRichard Tran Mills /*@C
126a5b5c723SRichard Tran Mills   MatKAIJRestoreS - Restore array obtained with MatKAIJGetS()
127a5b5c723SRichard Tran Mills 
128a5b5c723SRichard Tran Mills   Not collective
129a5b5c723SRichard Tran Mills 
130a5b5c723SRichard Tran Mills   Input Parameter:
131a5b5c723SRichard Tran Mills . A - the KAIJ matrix
132a5b5c723SRichard Tran Mills 
133a5b5c723SRichard Tran Mills   Output Parameter:
134a5b5c723SRichard Tran Mills . S - location of pointer to array obtained with MatKAIJGetS()
135a5b5c723SRichard Tran Mills 
136a5b5c723SRichard Tran Mills   Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
137a5b5c723SRichard Tran Mills   If NULL is passed, it will not attempt to zero the array pointer.
138a5b5c723SRichard Tran Mills 
139a5b5c723SRichard Tran Mills   Level: advanced
140a5b5c723SRichard Tran Mills .seealso: MatKAIJGetS(), MatKAIJGetSRead(), MatKAIJRestoreSRead()
141a5b5c723SRichard Tran Mills @*/
142a5b5c723SRichard Tran Mills PetscErrorCode MatKAIJRestoreS(Mat A,PetscScalar **S)
143a5b5c723SRichard Tran Mills {
14466f58c76SRichard Tran Mills   PetscErrorCode ierr;
14566f58c76SRichard Tran Mills 
146a5b5c723SRichard Tran Mills   PetscFunctionBegin;
147a5b5c723SRichard Tran Mills   if (S) *S = NULL;
14866f58c76SRichard Tran Mills   ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr);
149a5b5c723SRichard Tran Mills   PetscFunctionReturn(0);
150a5b5c723SRichard Tran Mills }
151a5b5c723SRichard Tran Mills 
152a5b5c723SRichard Tran Mills /*@C
153a5b5c723SRichard Tran Mills   MatKAIJRestoreSRead - Restore array obtained with MatKAIJGetSRead()
154a5b5c723SRichard Tran Mills 
155a5b5c723SRichard Tran Mills   Not collective
156a5b5c723SRichard Tran Mills 
157a5b5c723SRichard Tran Mills   Input Parameter:
158a5b5c723SRichard Tran Mills . A - the KAIJ matrix
159a5b5c723SRichard Tran Mills 
160a5b5c723SRichard Tran Mills   Output Parameter:
161a5b5c723SRichard Tran Mills . S - location of pointer to array obtained with MatKAIJGetS()
162a5b5c723SRichard Tran Mills 
163a5b5c723SRichard Tran Mills   Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
164a5b5c723SRichard Tran Mills   If NULL is passed, it will not attempt to zero the array pointer.
165a5b5c723SRichard Tran Mills 
166a5b5c723SRichard Tran Mills   Level: advanced
167a5b5c723SRichard Tran Mills .seealso: MatKAIJGetS(), MatKAIJGetSRead(), MatKAIJRestoreSRead()
168a5b5c723SRichard Tran Mills @*/
169a5b5c723SRichard Tran Mills PetscErrorCode MatKAIJRestoreSRead(Mat A,const PetscScalar **S)
170a5b5c723SRichard Tran Mills {
171a5b5c723SRichard Tran Mills   PetscFunctionBegin;
172a5b5c723SRichard Tran Mills   if (S) *S = NULL;
17349bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
17449bd79ccSDebojyoti Ghosh }
17549bd79ccSDebojyoti Ghosh 
17649bd79ccSDebojyoti Ghosh /*@C
17731a97b9aSRichard Tran Mills    MatKAIJGetT - Get the transformation matrix T associated with the KAIJ matrix
17849bd79ccSDebojyoti Ghosh 
1790567c835SRichard Tran Mills    Not Collective; the entire T is stored and returned independently on all processes
18049bd79ccSDebojyoti Ghosh 
18149bd79ccSDebojyoti Ghosh    Input Parameter:
18249bd79ccSDebojyoti Ghosh .  A - the KAIJ matrix
18349bd79ccSDebojyoti Ghosh 
184d8d19677SJose E. Roman    Output Parameters:
185a5b5c723SRichard Tran Mills +  m - the number of rows in T
186a5b5c723SRichard Tran Mills .  n - the number of columns in T
187a5b5c723SRichard Tran Mills -  T - the T matrix, in form of a scalar array in column-major format
18849bd79ccSDebojyoti Ghosh 
189a5b5c723SRichard Tran Mills    Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)
19031a97b9aSRichard Tran Mills 
19149bd79ccSDebojyoti Ghosh    Level: advanced
19249bd79ccSDebojyoti Ghosh 
19331a97b9aSRichard Tran Mills .seealso: MatCreateKAIJ(), MatGetBlockSizes()
19449bd79ccSDebojyoti Ghosh @*/
195a5b5c723SRichard Tran Mills PetscErrorCode MatKAIJGetT(Mat A,PetscInt *m,PetscInt *n,PetscScalar **T)
19649bd79ccSDebojyoti Ghosh {
19749bd79ccSDebojyoti Ghosh   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
19849bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
199a5b5c723SRichard Tran Mills   if (m) *m = b->p;
200a5b5c723SRichard Tran Mills   if (n) *n = b->q;
201a5b5c723SRichard Tran Mills   if (T) *T = b->T;
202a5b5c723SRichard Tran Mills   PetscFunctionReturn(0);
203a5b5c723SRichard Tran Mills }
204a5b5c723SRichard Tran Mills 
205a5b5c723SRichard Tran Mills /*@C
206a5b5c723SRichard Tran Mills    MatKAIJGetTRead - Get a read-only pointer to the transformation matrix T associated with the KAIJ matrix
207a5b5c723SRichard Tran Mills 
208a5b5c723SRichard Tran Mills    Not Collective; the entire T is stored and returned independently on all processes
209a5b5c723SRichard Tran Mills 
210a5b5c723SRichard Tran Mills    Input Parameter:
211a5b5c723SRichard Tran Mills .  A - the KAIJ matrix
212a5b5c723SRichard Tran Mills 
213d8d19677SJose E. Roman    Output Parameters:
214a5b5c723SRichard Tran Mills +  m - the number of rows in T
215a5b5c723SRichard Tran Mills .  n - the number of columns in T
216a5b5c723SRichard Tran Mills -  T - the T matrix, in form of a scalar array in column-major format
217a5b5c723SRichard Tran Mills 
218a5b5c723SRichard Tran Mills    Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)
219a5b5c723SRichard Tran Mills 
220a5b5c723SRichard Tran Mills    Level: advanced
221a5b5c723SRichard Tran Mills 
222a5b5c723SRichard Tran Mills .seealso: MatCreateKAIJ(), MatGetBlockSizes()
223a5b5c723SRichard Tran Mills @*/
224a5b5c723SRichard Tran Mills PetscErrorCode MatKAIJGetTRead(Mat A,PetscInt *m,PetscInt *n,const PetscScalar **T)
225a5b5c723SRichard Tran Mills {
226a5b5c723SRichard Tran Mills   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
227a5b5c723SRichard Tran Mills   PetscFunctionBegin;
228a5b5c723SRichard Tran Mills   if (m) *m = b->p;
229a5b5c723SRichard Tran Mills   if (n) *n = b->q;
230a5b5c723SRichard Tran Mills   if (T) *T = b->T;
231a5b5c723SRichard Tran Mills   PetscFunctionReturn(0);
232a5b5c723SRichard Tran Mills }
233a5b5c723SRichard Tran Mills 
234a5b5c723SRichard Tran Mills /*@C
235a5b5c723SRichard Tran Mills   MatKAIJRestoreT - Restore array obtained with MatKAIJGetT()
236a5b5c723SRichard Tran Mills 
237a5b5c723SRichard Tran Mills   Not collective
238a5b5c723SRichard Tran Mills 
239a5b5c723SRichard Tran Mills   Input Parameter:
240a5b5c723SRichard Tran Mills . A - the KAIJ matrix
241a5b5c723SRichard Tran Mills 
242a5b5c723SRichard Tran Mills   Output Parameter:
243a5b5c723SRichard Tran Mills . T - location of pointer to array obtained with MatKAIJGetS()
244a5b5c723SRichard Tran Mills 
245a5b5c723SRichard Tran Mills   Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
246a5b5c723SRichard Tran Mills   If NULL is passed, it will not attempt to zero the array pointer.
247a5b5c723SRichard Tran Mills 
248a5b5c723SRichard Tran Mills   Level: advanced
249a5b5c723SRichard Tran Mills .seealso: MatKAIJGetT(), MatKAIJGetTRead(), MatKAIJRestoreTRead()
250a5b5c723SRichard Tran Mills @*/
251a5b5c723SRichard Tran Mills PetscErrorCode MatKAIJRestoreT(Mat A,PetscScalar **T)
252a5b5c723SRichard Tran Mills {
25366f58c76SRichard Tran Mills   PetscErrorCode ierr;
25466f58c76SRichard Tran Mills 
255a5b5c723SRichard Tran Mills   PetscFunctionBegin;
256a5b5c723SRichard Tran Mills   if (T) *T = NULL;
25766f58c76SRichard Tran Mills   ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr);
258a5b5c723SRichard Tran Mills   PetscFunctionReturn(0);
259a5b5c723SRichard Tran Mills }
260a5b5c723SRichard Tran Mills 
261a5b5c723SRichard Tran Mills /*@C
262a5b5c723SRichard Tran Mills   MatKAIJRestoreTRead - Restore array obtained with MatKAIJGetTRead()
263a5b5c723SRichard Tran Mills 
264a5b5c723SRichard Tran Mills   Not collective
265a5b5c723SRichard Tran Mills 
266a5b5c723SRichard Tran Mills   Input Parameter:
267a5b5c723SRichard Tran Mills . A - the KAIJ matrix
268a5b5c723SRichard Tran Mills 
269a5b5c723SRichard Tran Mills   Output Parameter:
270a5b5c723SRichard Tran Mills . T - location of pointer to array obtained with MatKAIJGetS()
271a5b5c723SRichard Tran Mills 
272a5b5c723SRichard Tran Mills   Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
273a5b5c723SRichard Tran Mills   If NULL is passed, it will not attempt to zero the array pointer.
274a5b5c723SRichard Tran Mills 
275a5b5c723SRichard Tran Mills   Level: advanced
276a5b5c723SRichard Tran Mills .seealso: MatKAIJGetT(), MatKAIJGetTRead(), MatKAIJRestoreTRead()
277a5b5c723SRichard Tran Mills @*/
278a5b5c723SRichard Tran Mills PetscErrorCode MatKAIJRestoreTRead(Mat A,const PetscScalar **T)
279a5b5c723SRichard Tran Mills {
280a5b5c723SRichard Tran Mills   PetscFunctionBegin;
281a5b5c723SRichard Tran Mills   if (T) *T = NULL;
28249bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
28349bd79ccSDebojyoti Ghosh }
28449bd79ccSDebojyoti Ghosh 
2850567c835SRichard Tran Mills /*@
2860567c835SRichard Tran Mills    MatKAIJSetAIJ - Set the AIJ matrix describing the blockwise action of the KAIJ matrix
2870567c835SRichard Tran Mills 
2880567c835SRichard Tran Mills    Logically Collective; if the AIJ matrix is parallel, the KAIJ matrix is also parallel
2890567c835SRichard Tran Mills 
2900567c835SRichard Tran Mills    Input Parameters:
2910567c835SRichard Tran Mills +  A - the KAIJ matrix
2920567c835SRichard Tran Mills -  B - the AIJ matrix
2930567c835SRichard Tran Mills 
29415b9d025SRichard Tran Mills    Notes:
29515b9d025SRichard Tran Mills    This function increases the reference count on the AIJ matrix, so the user is free to destroy the matrix if it is not needed.
29615b9d025SRichard Tran Mills    Changes to the entries of the AIJ matrix will immediately affect the KAIJ matrix.
29715b9d025SRichard Tran Mills 
2980567c835SRichard Tran Mills    Level: advanced
2990567c835SRichard Tran Mills 
3000567c835SRichard Tran Mills .seealso: MatKAIJGetAIJ(), MatKAIJSetS(), MatKAIJSetT()
3010567c835SRichard Tran Mills @*/
3020567c835SRichard Tran Mills PetscErrorCode MatKAIJSetAIJ(Mat A,Mat B)
3030567c835SRichard Tran Mills {
3040567c835SRichard Tran Mills   PetscErrorCode ierr;
3050567c835SRichard Tran Mills   PetscMPIInt    size;
30606d61a37SPierre Jolivet   PetscBool      flg;
3070567c835SRichard Tran Mills 
3080567c835SRichard Tran Mills   PetscFunctionBegin;
309ffc4695bSBarry Smith   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr);
3100567c835SRichard Tran Mills   if (size == 1) {
31106d61a37SPierre Jolivet     ierr = PetscObjectTypeCompare((PetscObject)B,MATSEQAIJ,&flg);CHKERRQ(ierr);
31206d61a37SPierre Jolivet     if (!flg) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"MatKAIJSetAIJ() with MATSEQKAIJ does not support %s as the AIJ mat",((PetscObject)B)->type_name);
3130567c835SRichard Tran Mills     Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data;
3140567c835SRichard Tran Mills     a->AIJ = B;
3150567c835SRichard Tran Mills   } else {
3160567c835SRichard Tran Mills     Mat_MPIKAIJ *a = (Mat_MPIKAIJ*)A->data;
3170567c835SRichard Tran Mills     a->A = B;
3180567c835SRichard Tran Mills   }
31915b9d025SRichard Tran Mills   ierr = PetscObjectReference((PetscObject)B);CHKERRQ(ierr);
3200567c835SRichard Tran Mills   PetscFunctionReturn(0);
3210567c835SRichard Tran Mills }
3220567c835SRichard Tran Mills 
3230567c835SRichard Tran Mills /*@C
3240567c835SRichard Tran Mills    MatKAIJSetS - Set the S matrix describing the shift action of the KAIJ matrix
3250567c835SRichard Tran Mills 
3260567c835SRichard Tran Mills    Logically Collective; the entire S is stored independently on all processes.
3270567c835SRichard Tran Mills 
3280567c835SRichard Tran Mills    Input Parameters:
3290567c835SRichard Tran Mills +  A - the KAIJ matrix
3300567c835SRichard Tran Mills .  p - the number of rows in S
3310567c835SRichard Tran Mills .  q - the number of columns in S
3320567c835SRichard Tran Mills -  S - the S matrix, in form of a scalar array in column-major format
3330567c835SRichard Tran Mills 
3340567c835SRichard Tran Mills    Notes: The dimensions p and q must match those of the transformation matrix T associated with the KAIJ matrix.
33588f48298SRichard Tran Mills    The S matrix is copied, so the user can destroy this array.
3360567c835SRichard Tran Mills 
3370567c835SRichard Tran Mills    Level: Advanced
3380567c835SRichard Tran Mills 
3390567c835SRichard Tran Mills .seealso: MatKAIJGetS(), MatKAIJSetT(), MatKAIJSetAIJ()
3400567c835SRichard Tran Mills @*/
3410567c835SRichard Tran Mills PetscErrorCode MatKAIJSetS(Mat A,PetscInt p,PetscInt q,const PetscScalar S[])
3420567c835SRichard Tran Mills {
3430567c835SRichard Tran Mills   PetscErrorCode ierr;
3440567c835SRichard Tran Mills   Mat_SeqKAIJ    *a = (Mat_SeqKAIJ*)A->data;
3450567c835SRichard Tran Mills 
3460567c835SRichard Tran Mills   PetscFunctionBegin;
3470567c835SRichard Tran Mills   ierr = PetscFree(a->S);CHKERRQ(ierr);
3480567c835SRichard Tran Mills   if (S) {
349a84f8069SRichard Tran Mills     ierr = PetscMalloc1(p*q*sizeof(PetscScalar),&a->S);CHKERRQ(ierr);
3500567c835SRichard Tran Mills     ierr = PetscMemcpy(a->S,S,p*q*sizeof(PetscScalar));CHKERRQ(ierr);
3510567c835SRichard Tran Mills   } else  a->S = NULL;
3520567c835SRichard Tran Mills 
3530567c835SRichard Tran Mills   a->p = p;
3540567c835SRichard Tran Mills   a->q = q;
3550567c835SRichard Tran Mills   PetscFunctionReturn(0);
3560567c835SRichard Tran Mills }
3570567c835SRichard Tran Mills 
3580567c835SRichard Tran Mills /*@C
359910cf402Sprj-    MatKAIJGetScaledIdentity - Check if both S and T are scaled identities.
360910cf402Sprj- 
361910cf402Sprj-    Logically Collective.
362910cf402Sprj- 
363910cf402Sprj-    Input Parameter:
364910cf402Sprj- .  A - the KAIJ matrix
365910cf402Sprj- 
366910cf402Sprj-   Output Parameter:
367910cf402Sprj- .  identity - the Boolean value
368910cf402Sprj- 
369910cf402Sprj-    Level: Advanced
370910cf402Sprj- 
371910cf402Sprj- .seealso: MatKAIJGetS(), MatKAIJGetT()
372910cf402Sprj- @*/
373910cf402Sprj- PetscErrorCode MatKAIJGetScaledIdentity(Mat A,PetscBool* identity)
374910cf402Sprj- {
375910cf402Sprj-   Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data;
376910cf402Sprj-   PetscInt    i,j;
377910cf402Sprj- 
378910cf402Sprj-   PetscFunctionBegin;
379910cf402Sprj-   if (a->p != a->q) {
380910cf402Sprj-     *identity = PETSC_FALSE;
381910cf402Sprj-     PetscFunctionReturn(0);
382910cf402Sprj-   } else *identity = PETSC_TRUE;
383910cf402Sprj-   if (!a->isTI || a->S) {
384910cf402Sprj-     for (i=0; i<a->p && *identity; i++) {
385910cf402Sprj-       for (j=0; j<a->p && *identity; j++) {
386910cf402Sprj-         if (i != j) {
387910cf402Sprj-           if (a->S && PetscAbsScalar(a->S[i+j*a->p]) > PETSC_SMALL) *identity = PETSC_FALSE;
388910cf402Sprj-           if (a->T && PetscAbsScalar(a->T[i+j*a->p]) > PETSC_SMALL) *identity = PETSC_FALSE;
389910cf402Sprj-         } else {
390910cf402Sprj-           if (a->S && PetscAbsScalar(a->S[i*(a->p+1)]-a->S[0]) > PETSC_SMALL) *identity = PETSC_FALSE;
391910cf402Sprj-           if (a->T && PetscAbsScalar(a->T[i*(a->p+1)]-a->T[0]) > PETSC_SMALL) *identity = PETSC_FALSE;
392910cf402Sprj-         }
393910cf402Sprj-       }
394910cf402Sprj-     }
395910cf402Sprj-   }
396910cf402Sprj-   PetscFunctionReturn(0);
397910cf402Sprj- }
398910cf402Sprj- 
399910cf402Sprj- /*@C
4000567c835SRichard Tran Mills    MatKAIJSetT - Set the transformation matrix T associated with the KAIJ matrix
4010567c835SRichard Tran Mills 
4020567c835SRichard Tran Mills    Logically Collective; the entire T is stored independently on all processes.
4030567c835SRichard Tran Mills 
4040567c835SRichard Tran Mills    Input Parameters:
4050567c835SRichard Tran Mills +  A - the KAIJ matrix
4060567c835SRichard Tran Mills .  p - the number of rows in S
4070567c835SRichard Tran Mills .  q - the number of columns in S
4080567c835SRichard Tran Mills -  T - the T matrix, in form of a scalar array in column-major format
4090567c835SRichard Tran Mills 
4100567c835SRichard Tran Mills    Notes: The dimensions p and q must match those of the shift matrix S associated with the KAIJ matrix.
41188f48298SRichard Tran Mills    The T matrix is copied, so the user can destroy this array.
4120567c835SRichard Tran Mills 
4130567c835SRichard Tran Mills    Level: Advanced
4140567c835SRichard Tran Mills 
4150567c835SRichard Tran Mills .seealso: MatKAIJGetT(), MatKAIJSetS(), MatKAIJSetAIJ()
4160567c835SRichard Tran Mills @*/
4170567c835SRichard Tran Mills PetscErrorCode MatKAIJSetT(Mat A,PetscInt p,PetscInt q,const PetscScalar T[])
4180567c835SRichard Tran Mills {
4190567c835SRichard Tran Mills   PetscErrorCode ierr;
4200567c835SRichard Tran Mills   PetscInt       i,j;
4210567c835SRichard Tran Mills   Mat_SeqKAIJ    *a = (Mat_SeqKAIJ*)A->data;
4220567c835SRichard Tran Mills   PetscBool      isTI = PETSC_FALSE;
4230567c835SRichard Tran Mills 
4240567c835SRichard Tran Mills   PetscFunctionBegin;
4250567c835SRichard Tran Mills   /* check if T is an identity matrix */
4260567c835SRichard Tran Mills   if (T && (p == q)) {
4270567c835SRichard Tran Mills     isTI = PETSC_TRUE;
4280567c835SRichard Tran Mills     for (i=0; i<p; i++) {
4290567c835SRichard Tran Mills       for (j=0; j<q; j++) {
4300567c835SRichard Tran Mills         if (i == j) {
4310567c835SRichard Tran Mills           /* diagonal term must be 1 */
4320567c835SRichard Tran Mills           if (T[i+j*p] != 1.0) isTI = PETSC_FALSE;
4330567c835SRichard Tran Mills         } else {
4340567c835SRichard Tran Mills           /* off-diagonal term must be 0 */
4350567c835SRichard Tran Mills           if (T[i+j*p] != 0.0) isTI = PETSC_FALSE;
4360567c835SRichard Tran Mills         }
4370567c835SRichard Tran Mills       }
4380567c835SRichard Tran Mills     }
4390567c835SRichard Tran Mills   }
4400567c835SRichard Tran Mills   a->isTI = isTI;
4410567c835SRichard Tran Mills 
4420567c835SRichard Tran Mills   ierr = PetscFree(a->T);CHKERRQ(ierr);
4430567c835SRichard Tran Mills   if (T && (!isTI)) {
444a84f8069SRichard Tran Mills     ierr = PetscMalloc1(p*q*sizeof(PetscScalar),&a->T);CHKERRQ(ierr);
4450567c835SRichard Tran Mills     ierr = PetscMemcpy(a->T,T,p*q*sizeof(PetscScalar));CHKERRQ(ierr);
44650d19d74SRichard Tran Mills   } else a->T = NULL;
4470567c835SRichard Tran Mills 
4480567c835SRichard Tran Mills   a->p = p;
4490567c835SRichard Tran Mills   a->q = q;
4500567c835SRichard Tran Mills   PetscFunctionReturn(0);
4510567c835SRichard Tran Mills }
4520567c835SRichard Tran Mills 
45349bd79ccSDebojyoti Ghosh PetscErrorCode MatDestroy_SeqKAIJ(Mat A)
45449bd79ccSDebojyoti Ghosh {
45549bd79ccSDebojyoti Ghosh   PetscErrorCode ierr;
45649bd79ccSDebojyoti Ghosh   Mat_SeqKAIJ    *b = (Mat_SeqKAIJ*)A->data;
45749bd79ccSDebojyoti Ghosh 
45849bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
45949bd79ccSDebojyoti Ghosh   ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr);
460a84f8069SRichard Tran Mills   ierr = PetscFree(b->S);CHKERRQ(ierr);
461a84f8069SRichard Tran Mills   ierr = PetscFree(b->T);CHKERRQ(ierr);
462a84f8069SRichard Tran Mills   ierr = PetscFree(b->ibdiag);CHKERRQ(ierr);
46349bd79ccSDebojyoti Ghosh   ierr = PetscFree5(b->sor.w,b->sor.y,b->sor.work,b->sor.t,b->sor.arr);CHKERRQ(ierr);
46406d61a37SPierre Jolivet   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqkaij_seqaij_C",NULL);CHKERRQ(ierr);
46549bd79ccSDebojyoti Ghosh   ierr = PetscFree(A->data);CHKERRQ(ierr);
46649bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
46749bd79ccSDebojyoti Ghosh }
46849bd79ccSDebojyoti Ghosh 
469e0e5a793SRichard Tran Mills PETSC_INTERN PetscErrorCode MatKAIJ_build_AIJ_OAIJ(Mat A)
470e0e5a793SRichard Tran Mills {
471e0e5a793SRichard Tran Mills   PetscErrorCode   ierr;
472e0e5a793SRichard Tran Mills   Mat_MPIKAIJ      *a;
473e0e5a793SRichard Tran Mills   Mat_MPIAIJ       *mpiaij;
474e0e5a793SRichard Tran Mills   PetscScalar      *T;
475e0e5a793SRichard Tran Mills   PetscInt         i,j;
476e0e5a793SRichard Tran Mills   PetscObjectState state;
477e0e5a793SRichard Tran Mills 
478e0e5a793SRichard Tran Mills   PetscFunctionBegin;
479e0e5a793SRichard Tran Mills   a = (Mat_MPIKAIJ*)A->data;
480e0e5a793SRichard Tran Mills   mpiaij = (Mat_MPIAIJ*)a->A->data;
481e0e5a793SRichard Tran Mills 
482e0e5a793SRichard Tran Mills   ierr = PetscObjectStateGet((PetscObject)a->A,&state);CHKERRQ(ierr);
483e0e5a793SRichard Tran Mills   if (state == a->state) {
484e0e5a793SRichard Tran Mills     /* The existing AIJ and KAIJ members are up-to-date, so simply exit. */
485e0e5a793SRichard Tran Mills     PetscFunctionReturn(0);
486e0e5a793SRichard Tran Mills   } else {
487e0e5a793SRichard Tran Mills     ierr = MatDestroy(&a->AIJ);CHKERRQ(ierr);
488e0e5a793SRichard Tran Mills     ierr = MatDestroy(&a->OAIJ);CHKERRQ(ierr);
489e0e5a793SRichard Tran Mills     if (a->isTI) {
490e0e5a793SRichard Tran Mills       /* If the transformation matrix associated with the parallel matrix A is the identity matrix, then a->T will be NULL.
491e0e5a793SRichard Tran Mills        * In this case, if we pass a->T directly to the MatCreateKAIJ() calls to create the sequential submatrices, the routine will
492e0e5a793SRichard Tran Mills        * not be able to tell that transformation matrix should be set to the identity; thus we create a temporary identity matrix
493e0e5a793SRichard Tran Mills        * to pass in. */
494e0e5a793SRichard Tran Mills       ierr = PetscMalloc1(a->p*a->q*sizeof(PetscScalar),&T);CHKERRQ(ierr);
495e0e5a793SRichard Tran Mills       for (i=0; i<a->p; i++) {
496e0e5a793SRichard Tran Mills         for (j=0; j<a->q; j++) {
497e0e5a793SRichard Tran Mills           if (i==j) T[i+j*a->p] = 1.0;
498e0e5a793SRichard Tran Mills           else      T[i+j*a->p] = 0.0;
499e0e5a793SRichard Tran Mills         }
500e0e5a793SRichard Tran Mills       }
501e0e5a793SRichard Tran Mills     } else T = a->T;
502e0e5a793SRichard Tran Mills     ierr = MatCreateKAIJ(mpiaij->A,a->p,a->q,a->S,T,&a->AIJ);CHKERRQ(ierr);
503e0e5a793SRichard Tran Mills     ierr = MatCreateKAIJ(mpiaij->B,a->p,a->q,NULL,T,&a->OAIJ);CHKERRQ(ierr);
504e0e5a793SRichard Tran Mills     if (a->isTI) {
505e0e5a793SRichard Tran Mills       ierr = PetscFree(T);CHKERRQ(ierr);
506e0e5a793SRichard Tran Mills     }
507e0e5a793SRichard Tran Mills     a->state = state;
508e0e5a793SRichard Tran Mills   }
509e0e5a793SRichard Tran Mills 
510e0e5a793SRichard Tran Mills   PetscFunctionReturn(0);
511e0e5a793SRichard Tran Mills }
512e0e5a793SRichard Tran Mills 
51349bd79ccSDebojyoti Ghosh PetscErrorCode MatSetUp_KAIJ(Mat A)
51449bd79ccSDebojyoti Ghosh {
5150567c835SRichard Tran Mills   PetscErrorCode ierr;
5160567c835SRichard Tran Mills   PetscInt       n;
5170567c835SRichard Tran Mills   PetscMPIInt    size;
5180567c835SRichard Tran Mills   Mat_SeqKAIJ    *seqkaij = (Mat_SeqKAIJ*)A->data;
5190567c835SRichard Tran Mills 
52049bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
521ffc4695bSBarry Smith   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr);
5220567c835SRichard Tran Mills   if (size == 1) {
5230567c835SRichard Tran Mills     ierr = MatSetSizes(A,seqkaij->p*seqkaij->AIJ->rmap->n,seqkaij->q*seqkaij->AIJ->cmap->n,seqkaij->p*seqkaij->AIJ->rmap->N,seqkaij->q*seqkaij->AIJ->cmap->N);CHKERRQ(ierr);
5240567c835SRichard Tran Mills     ierr = PetscLayoutSetBlockSize(A->rmap,seqkaij->p);CHKERRQ(ierr);
5250567c835SRichard Tran Mills     ierr = PetscLayoutSetBlockSize(A->cmap,seqkaij->q);CHKERRQ(ierr);
5260567c835SRichard Tran Mills     ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
5270567c835SRichard Tran Mills     ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
5280567c835SRichard Tran Mills   } else {
5290567c835SRichard Tran Mills     Mat_MPIKAIJ *a;
5300567c835SRichard Tran Mills     Mat_MPIAIJ  *mpiaij;
5310567c835SRichard Tran Mills     IS          from,to;
5320567c835SRichard Tran Mills     Vec         gvec;
5330567c835SRichard Tran Mills 
5340567c835SRichard Tran Mills     a = (Mat_MPIKAIJ*)A->data;
535d3f912faSRichard Tran Mills     mpiaij = (Mat_MPIAIJ*)a->A->data;
5360567c835SRichard Tran Mills     ierr = MatSetSizes(A,a->p*a->A->rmap->n,a->q*a->A->cmap->n,a->p*a->A->rmap->N,a->q*a->A->cmap->N);CHKERRQ(ierr);
5370567c835SRichard Tran Mills     ierr = PetscLayoutSetBlockSize(A->rmap,seqkaij->p);CHKERRQ(ierr);
5380567c835SRichard Tran Mills     ierr = PetscLayoutSetBlockSize(A->cmap,seqkaij->q);CHKERRQ(ierr);
5390567c835SRichard Tran Mills     ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
5400567c835SRichard Tran Mills     ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
5410567c835SRichard Tran Mills 
542eacc1826SRichard Tran Mills     ierr = MatKAIJ_build_AIJ_OAIJ(A);CHKERRQ(ierr);
5430567c835SRichard Tran Mills 
5440567c835SRichard Tran Mills     ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr);
5450567c835SRichard Tran Mills     ierr = VecCreate(PETSC_COMM_SELF,&a->w);CHKERRQ(ierr);
5460567c835SRichard Tran Mills     ierr = VecSetSizes(a->w,n*a->q,n*a->q);CHKERRQ(ierr);
5470567c835SRichard Tran Mills     ierr = VecSetBlockSize(a->w,a->q);CHKERRQ(ierr);
5480567c835SRichard Tran Mills     ierr = VecSetType(a->w,VECSEQ);CHKERRQ(ierr);
5490567c835SRichard Tran Mills 
5500567c835SRichard Tran Mills     /* create two temporary Index sets for build scatter gather */
5510567c835SRichard Tran Mills     ierr = ISCreateBlock(PetscObjectComm((PetscObject)a->A),a->q,n,mpiaij->garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr);
5520567c835SRichard Tran Mills     ierr = ISCreateStride(PETSC_COMM_SELF,n*a->q,0,1,&to);CHKERRQ(ierr);
5530567c835SRichard Tran Mills 
5540567c835SRichard Tran Mills     /* create temporary global vector to generate scatter context */
5550567c835SRichard Tran Mills     ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)a->A),a->q,a->q*a->A->cmap->n,a->q*a->A->cmap->N,NULL,&gvec);CHKERRQ(ierr);
5560567c835SRichard Tran Mills 
5570567c835SRichard Tran Mills     /* generate the scatter context */
5584589b4e5SRichard Tran Mills     ierr = VecScatterCreate(gvec,from,a->w,to,&a->ctx);CHKERRQ(ierr);
5590567c835SRichard Tran Mills 
5600567c835SRichard Tran Mills     ierr = ISDestroy(&from);CHKERRQ(ierr);
5610567c835SRichard Tran Mills     ierr = ISDestroy(&to);CHKERRQ(ierr);
5620567c835SRichard Tran Mills     ierr = VecDestroy(&gvec);CHKERRQ(ierr);
5630567c835SRichard Tran Mills   }
5640567c835SRichard Tran Mills 
5650567c835SRichard Tran Mills   A->assembled = PETSC_TRUE;
56649bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
56749bd79ccSDebojyoti Ghosh }
56849bd79ccSDebojyoti Ghosh 
569e6985dafSRichard Tran Mills PetscErrorCode MatView_KAIJ(Mat A,PetscViewer viewer)
57049bd79ccSDebojyoti Ghosh {
571e6985dafSRichard Tran Mills   PetscViewerFormat format;
572e6985dafSRichard Tran Mills   Mat_SeqKAIJ       *a = (Mat_SeqKAIJ*)A->data;
57349bd79ccSDebojyoti Ghosh   Mat               B;
574e6985dafSRichard Tran Mills   PetscInt          i;
575e6985dafSRichard Tran Mills   PetscErrorCode    ierr;
576e6985dafSRichard Tran Mills   PetscBool         ismpikaij;
57749bd79ccSDebojyoti Ghosh 
57849bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
579e6985dafSRichard Tran Mills   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIKAIJ,&ismpikaij);CHKERRQ(ierr);
580e6985dafSRichard Tran Mills   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
581e6985dafSRichard Tran Mills   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL || format == PETSC_VIEWER_ASCII_IMPL) {
582c0aa6a63SJacob Faibussowitsch     ierr = PetscViewerASCIIPrintf(viewer,"S and T have %" PetscInt_FMT " rows and %" PetscInt_FMT " columns\n",a->p,a->q);CHKERRQ(ierr);
583e6985dafSRichard Tran Mills 
584e6985dafSRichard Tran Mills     /* Print appropriate details for S. */
585e6985dafSRichard Tran Mills     if (!a->S) {
5862ae760e3SRichard Tran Mills       ierr = PetscViewerASCIIPrintf(viewer,"S is NULL\n");CHKERRQ(ierr);
587e6985dafSRichard Tran Mills     } else if (format == PETSC_VIEWER_ASCII_IMPL) {
588e6985dafSRichard Tran Mills       ierr = PetscViewerASCIIPrintf(viewer,"Entries of S are ");CHKERRQ(ierr);
589e6985dafSRichard Tran Mills       for (i=0; i<(a->p * a->q); i++) {
590e6985dafSRichard Tran Mills #if defined(PETSC_USE_COMPLEX)
591e6985dafSRichard Tran Mills         ierr = PetscViewerASCIIPrintf(viewer,"%18.16e %18.16e ",(double)PetscRealPart(a->S[i]),(double)PetscImaginaryPart(a->S[i]));CHKERRQ(ierr);
592e6985dafSRichard Tran Mills #else
593e6985dafSRichard Tran Mills         ierr = PetscViewerASCIIPrintf(viewer,"%18.16e ",(double)PetscRealPart(a->S[i]));CHKERRQ(ierr);
594e6985dafSRichard Tran Mills #endif
595e6985dafSRichard Tran Mills       }
596e6985dafSRichard Tran Mills       ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
59749bd79ccSDebojyoti Ghosh     }
59849bd79ccSDebojyoti Ghosh 
599e6985dafSRichard Tran Mills     /* Print appropriate details for T. */
600e6985dafSRichard Tran Mills     if (a->isTI) {
6012ae760e3SRichard Tran Mills       ierr = PetscViewerASCIIPrintf(viewer,"T is the identity matrix\n");CHKERRQ(ierr);
602e6985dafSRichard Tran Mills     } else if (!a->T) {
6032ae760e3SRichard Tran Mills       ierr = PetscViewerASCIIPrintf(viewer,"T is NULL\n");CHKERRQ(ierr);
604e6985dafSRichard Tran Mills     } else if (format == PETSC_VIEWER_ASCII_IMPL) {
605e6985dafSRichard Tran Mills       ierr = PetscViewerASCIIPrintf(viewer,"Entries of T are ");CHKERRQ(ierr);
606e6985dafSRichard Tran Mills       for (i=0; i<(a->p * a->q); i++) {
607e6985dafSRichard Tran Mills #if defined(PETSC_USE_COMPLEX)
608e6985dafSRichard Tran Mills         ierr = PetscViewerASCIIPrintf(viewer,"%18.16e %18.16e ",(double)PetscRealPart(a->T[i]),(double)PetscImaginaryPart(a->T[i]));CHKERRQ(ierr);
609e6985dafSRichard Tran Mills #else
610e6985dafSRichard Tran Mills         ierr = PetscViewerASCIIPrintf(viewer,"%18.16e ",(double)PetscRealPart(a->T[i]));CHKERRQ(ierr);
611e6985dafSRichard Tran Mills #endif
612e6985dafSRichard Tran Mills       }
613e6985dafSRichard Tran Mills       ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
614e6985dafSRichard Tran Mills     }
61549bd79ccSDebojyoti Ghosh 
616e6985dafSRichard Tran Mills     /* Now print details for the AIJ matrix, using the AIJ viewer. */
617e6985dafSRichard Tran Mills     ierr = PetscViewerASCIIPrintf(viewer,"Now viewing the associated AIJ matrix:\n");CHKERRQ(ierr);
618e6985dafSRichard Tran Mills     if (ismpikaij) {
619e6985dafSRichard Tran Mills       Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data;
620e6985dafSRichard Tran Mills       ierr = MatView(b->A,viewer);CHKERRQ(ierr);
621e6985dafSRichard Tran Mills     } else {
622e6985dafSRichard Tran Mills       ierr = MatView(a->AIJ,viewer);CHKERRQ(ierr);
623e6985dafSRichard Tran Mills     }
624e6985dafSRichard Tran Mills 
625e6985dafSRichard Tran Mills   } else {
626e6985dafSRichard Tran Mills     /* For all other matrix viewer output formats, simply convert to an AIJ matrix and call MatView() on that. */
62706d61a37SPierre Jolivet     ierr = MatConvert(A,MATAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
62849bd79ccSDebojyoti Ghosh     ierr = MatView(B,viewer);CHKERRQ(ierr);
62949bd79ccSDebojyoti Ghosh     ierr = MatDestroy(&B);CHKERRQ(ierr);
630e6985dafSRichard Tran Mills   }
63149bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
63249bd79ccSDebojyoti Ghosh }
63349bd79ccSDebojyoti Ghosh 
63449bd79ccSDebojyoti Ghosh PetscErrorCode MatDestroy_MPIKAIJ(Mat A)
63549bd79ccSDebojyoti Ghosh {
63649bd79ccSDebojyoti Ghosh   PetscErrorCode ierr;
63749bd79ccSDebojyoti Ghosh   Mat_MPIKAIJ    *b = (Mat_MPIKAIJ*)A->data;
63849bd79ccSDebojyoti Ghosh 
63949bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
64049bd79ccSDebojyoti Ghosh   ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr);
64149bd79ccSDebojyoti Ghosh   ierr = MatDestroy(&b->OAIJ);CHKERRQ(ierr);
64249bd79ccSDebojyoti Ghosh   ierr = MatDestroy(&b->A);CHKERRQ(ierr);
64349bd79ccSDebojyoti Ghosh   ierr = VecScatterDestroy(&b->ctx);CHKERRQ(ierr);
64449bd79ccSDebojyoti Ghosh   ierr = VecDestroy(&b->w);CHKERRQ(ierr);
645a84f8069SRichard Tran Mills   ierr = PetscFree(b->S);CHKERRQ(ierr);
646a84f8069SRichard Tran Mills   ierr = PetscFree(b->T);CHKERRQ(ierr);
647a84f8069SRichard Tran Mills   ierr = PetscFree(b->ibdiag);CHKERRQ(ierr);
64806d61a37SPierre Jolivet   ierr = PetscObjectComposeFunction((PetscObject)A,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
64906d61a37SPierre Jolivet   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_mpikaij_mpiaij_C",NULL);CHKERRQ(ierr);
65049bd79ccSDebojyoti Ghosh   ierr = PetscFree(A->data);CHKERRQ(ierr);
65149bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
65249bd79ccSDebojyoti Ghosh }
65349bd79ccSDebojyoti Ghosh 
65449bd79ccSDebojyoti Ghosh /* --------------------------------------------------------------------------------------*/
65549bd79ccSDebojyoti Ghosh 
65649bd79ccSDebojyoti Ghosh /* zz = yy + Axx */
657836168d5SRichard Tran Mills PetscErrorCode MatMultAdd_SeqKAIJ(Mat A,Vec xx,Vec yy,Vec zz)
65849bd79ccSDebojyoti Ghosh {
65949bd79ccSDebojyoti Ghosh   Mat_SeqKAIJ       *b = (Mat_SeqKAIJ*)A->data;
66049bd79ccSDebojyoti Ghosh   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
66149bd79ccSDebojyoti Ghosh   const PetscScalar *s = b->S, *t = b->T;
66249bd79ccSDebojyoti Ghosh   const PetscScalar *x,*v,*bx;
66349bd79ccSDebojyoti Ghosh   PetscScalar       *y,*sums;
66449bd79ccSDebojyoti Ghosh   PetscErrorCode    ierr;
66549bd79ccSDebojyoti Ghosh   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
66649bd79ccSDebojyoti Ghosh   PetscInt          n,i,jrow,j,l,p=b->p,q=b->q,k;
66749bd79ccSDebojyoti Ghosh 
66849bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
66949bd79ccSDebojyoti Ghosh   if (!yy) {
67049bd79ccSDebojyoti Ghosh     ierr = VecSet(zz,0.0);CHKERRQ(ierr);
67149bd79ccSDebojyoti Ghosh   } else {
67249bd79ccSDebojyoti Ghosh     ierr = VecCopy(yy,zz);CHKERRQ(ierr);
67349bd79ccSDebojyoti Ghosh   }
67449bd79ccSDebojyoti Ghosh   if ((!s) && (!t) && (!b->isTI)) PetscFunctionReturn(0);
67549bd79ccSDebojyoti Ghosh 
67649bd79ccSDebojyoti Ghosh   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
67749bd79ccSDebojyoti Ghosh   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
67849bd79ccSDebojyoti Ghosh   idx  = a->j;
67949bd79ccSDebojyoti Ghosh   v    = a->a;
68049bd79ccSDebojyoti Ghosh   ii   = a->i;
68149bd79ccSDebojyoti Ghosh 
68249bd79ccSDebojyoti Ghosh   if (b->isTI) {
68349bd79ccSDebojyoti Ghosh     for (i=0; i<m; i++) {
68449bd79ccSDebojyoti Ghosh       jrow = ii[i];
68549bd79ccSDebojyoti Ghosh       n    = ii[i+1] - jrow;
68649bd79ccSDebojyoti Ghosh       sums = y + p*i;
68749bd79ccSDebojyoti Ghosh       for (j=0; j<n; j++) {
68849bd79ccSDebojyoti Ghosh         for (k=0; k<p; k++) {
68949bd79ccSDebojyoti Ghosh           sums[k] += v[jrow+j]*x[q*idx[jrow+j]+k];
69049bd79ccSDebojyoti Ghosh         }
69149bd79ccSDebojyoti Ghosh       }
69249bd79ccSDebojyoti Ghosh     }
693ca0c957dSBarry Smith     ierr = PetscLogFlops(3.0*(a->nz)*p);CHKERRQ(ierr);
69449bd79ccSDebojyoti Ghosh   } else if (t) {
69549bd79ccSDebojyoti Ghosh     for (i=0; i<m; i++) {
69649bd79ccSDebojyoti Ghosh       jrow = ii[i];
69749bd79ccSDebojyoti Ghosh       n    = ii[i+1] - jrow;
69849bd79ccSDebojyoti Ghosh       sums = y + p*i;
69949bd79ccSDebojyoti Ghosh       for (j=0; j<n; j++) {
70049bd79ccSDebojyoti Ghosh         for (k=0; k<p; k++) {
70149bd79ccSDebojyoti Ghosh           for (l=0; l<q; l++) {
70249bd79ccSDebojyoti Ghosh             sums[k] += v[jrow+j]*t[k+l*p]*x[q*idx[jrow+j]+l];
70349bd79ccSDebojyoti Ghosh           }
70449bd79ccSDebojyoti Ghosh         }
70549bd79ccSDebojyoti Ghosh       }
70649bd79ccSDebojyoti Ghosh     }
707234d9204SRichard Tran Mills     /* The flop count below assumes that v[jrow+j] is hoisted out (which an optimizing compiler is likely to do),
708234d9204SRichard Tran Mills      * and also that T part is hoisted outside this loop (in exchange for temporary storage) as (A \otimes I) (I \otimes T),
709234d9204SRichard Tran Mills      * so that this multiply doesn't have to be redone for each matrix entry, but just once per column. The latter
710234d9204SRichard Tran Mills      * transformation is much less likely to be applied, but we nonetheless count the minimum flops required. */
711ca0c957dSBarry Smith     ierr = PetscLogFlops((2.0*p*q-p)*m+2.0*p*a->nz);CHKERRQ(ierr);
71249bd79ccSDebojyoti Ghosh   }
71349bd79ccSDebojyoti Ghosh   if (s) {
71449bd79ccSDebojyoti Ghosh     for (i=0; i<m; i++) {
71549bd79ccSDebojyoti Ghosh       sums = y + p*i;
71649bd79ccSDebojyoti Ghosh       bx   = x + q*i;
71749bd79ccSDebojyoti Ghosh       if (i < b->AIJ->cmap->n) {
71849bd79ccSDebojyoti Ghosh         for (j=0; j<q; j++) {
71949bd79ccSDebojyoti Ghosh           for (k=0; k<p; k++) {
72049bd79ccSDebojyoti Ghosh             sums[k] += s[k+j*p]*bx[j];
72149bd79ccSDebojyoti Ghosh           }
72249bd79ccSDebojyoti Ghosh         }
72349bd79ccSDebojyoti Ghosh       }
72449bd79ccSDebojyoti Ghosh     }
725ca0c957dSBarry Smith     ierr = PetscLogFlops(2.0*m*p*q);CHKERRQ(ierr);
72649bd79ccSDebojyoti Ghosh   }
72749bd79ccSDebojyoti Ghosh 
72849bd79ccSDebojyoti Ghosh   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
72949bd79ccSDebojyoti Ghosh   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
73049bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
73149bd79ccSDebojyoti Ghosh }
73249bd79ccSDebojyoti Ghosh 
733bb6fb833SRichard Tran Mills PetscErrorCode MatMult_SeqKAIJ(Mat A,Vec xx,Vec yy)
73449bd79ccSDebojyoti Ghosh {
73549bd79ccSDebojyoti Ghosh   PetscErrorCode ierr;
73649bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
737836168d5SRichard Tran Mills   ierr = MatMultAdd_SeqKAIJ(A,xx,PETSC_NULL,yy);CHKERRQ(ierr);
73849bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
73949bd79ccSDebojyoti Ghosh }
74049bd79ccSDebojyoti Ghosh 
74149bd79ccSDebojyoti Ghosh #include <petsc/private/kernels/blockinvert.h>
74249bd79ccSDebojyoti Ghosh 
743bb6fb833SRichard Tran Mills PetscErrorCode MatInvertBlockDiagonal_SeqKAIJ(Mat A,const PetscScalar **values)
74449bd79ccSDebojyoti Ghosh {
74549bd79ccSDebojyoti Ghosh   Mat_SeqKAIJ       *b  = (Mat_SeqKAIJ*)A->data;
74649bd79ccSDebojyoti Ghosh   Mat_SeqAIJ        *a  = (Mat_SeqAIJ*)b->AIJ->data;
74749bd79ccSDebojyoti Ghosh   const PetscScalar *S  = b->S;
74849bd79ccSDebojyoti Ghosh   const PetscScalar *T  = b->T;
74949bd79ccSDebojyoti Ghosh   const PetscScalar *v  = a->a;
75049bd79ccSDebojyoti Ghosh   const PetscInt     p  = b->p, q = b->q, m = b->AIJ->rmap->n, *idx = a->j, *ii = a->i;
75149bd79ccSDebojyoti Ghosh   PetscErrorCode    ierr;
75249bd79ccSDebojyoti Ghosh   PetscInt          i,j,*v_pivots,dof,dof2;
75349bd79ccSDebojyoti Ghosh   PetscScalar       *diag,aval,*v_work;
75449bd79ccSDebojyoti Ghosh 
75549bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
75649bd79ccSDebojyoti Ghosh   if (p != q) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Block size must be square to calculate inverse.");
75731a97b9aSRichard Tran Mills   if ((!S) && (!T) && (!b->isTI)) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Cannot invert a zero matrix.");
75849bd79ccSDebojyoti Ghosh 
75949bd79ccSDebojyoti Ghosh   dof  = p;
76049bd79ccSDebojyoti Ghosh   dof2 = dof*dof;
76149bd79ccSDebojyoti Ghosh 
76249bd79ccSDebojyoti Ghosh   if (b->ibdiagvalid) {
76349bd79ccSDebojyoti Ghosh     if (values) *values = b->ibdiag;
76449bd79ccSDebojyoti Ghosh     PetscFunctionReturn(0);
76549bd79ccSDebojyoti Ghosh   }
76649bd79ccSDebojyoti Ghosh   if (!b->ibdiag) {
767a84f8069SRichard Tran Mills     ierr = PetscMalloc1(dof2*m*sizeof(PetscScalar),&b->ibdiag);CHKERRQ(ierr);
76849bd79ccSDebojyoti Ghosh     ierr = PetscLogObjectMemory((PetscObject)A,dof2*m*sizeof(PetscScalar));CHKERRQ(ierr);
76949bd79ccSDebojyoti Ghosh   }
77049bd79ccSDebojyoti Ghosh   if (values) *values = b->ibdiag;
77149bd79ccSDebojyoti Ghosh   diag = b->ibdiag;
77249bd79ccSDebojyoti Ghosh 
77349bd79ccSDebojyoti Ghosh   ierr = PetscMalloc2(dof,&v_work,dof,&v_pivots);CHKERRQ(ierr);
77449bd79ccSDebojyoti Ghosh   for (i=0; i<m; i++) {
77549bd79ccSDebojyoti Ghosh     if (S) {
77649bd79ccSDebojyoti Ghosh       ierr = PetscMemcpy(diag,S,dof2*sizeof(PetscScalar));CHKERRQ(ierr);
77749bd79ccSDebojyoti Ghosh     } else {
77849bd79ccSDebojyoti Ghosh       ierr = PetscMemzero(diag,dof2*sizeof(PetscScalar));CHKERRQ(ierr);
77949bd79ccSDebojyoti Ghosh     }
78049bd79ccSDebojyoti Ghosh     if (b->isTI) {
78149bd79ccSDebojyoti Ghosh       aval = 0;
78249bd79ccSDebojyoti Ghosh       for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j];
78349bd79ccSDebojyoti Ghosh       for (j=0; j<dof; j++) diag[j+dof*j] += aval;
78449bd79ccSDebojyoti Ghosh     } else if (T) {
78549bd79ccSDebojyoti Ghosh       aval = 0;
78649bd79ccSDebojyoti Ghosh       for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j];
78749bd79ccSDebojyoti Ghosh       for (j=0; j<dof2; j++) diag[j] += aval*T[j];
78849bd79ccSDebojyoti Ghosh     }
78949bd79ccSDebojyoti Ghosh     ierr = PetscKernel_A_gets_inverse_A(dof,diag,v_pivots,v_work,PETSC_FALSE,NULL);CHKERRQ(ierr);
79049bd79ccSDebojyoti Ghosh     diag += dof2;
79149bd79ccSDebojyoti Ghosh   }
79249bd79ccSDebojyoti Ghosh   ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr);
79349bd79ccSDebojyoti Ghosh 
79449bd79ccSDebojyoti Ghosh   b->ibdiagvalid = PETSC_TRUE;
79549bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
79649bd79ccSDebojyoti Ghosh }
79749bd79ccSDebojyoti Ghosh 
79849bd79ccSDebojyoti Ghosh static PetscErrorCode MatGetDiagonalBlock_MPIKAIJ(Mat A,Mat *B)
79949bd79ccSDebojyoti Ghosh {
80049bd79ccSDebojyoti Ghosh   Mat_MPIKAIJ *kaij = (Mat_MPIKAIJ*) A->data;
80149bd79ccSDebojyoti Ghosh 
80249bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
80349bd79ccSDebojyoti Ghosh   *B = kaij->AIJ;
80449bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
80549bd79ccSDebojyoti Ghosh }
80649bd79ccSDebojyoti Ghosh 
807*fac53fa1SPierre Jolivet static PetscErrorCode MatConvert_KAIJ_AIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
808*fac53fa1SPierre Jolivet {
809*fac53fa1SPierre Jolivet   Mat_SeqKAIJ    *a = (Mat_SeqKAIJ*)A->data;
810*fac53fa1SPierre Jolivet   Mat            AIJ,OAIJ,B;
811*fac53fa1SPierre Jolivet   PetscInt       *d_nnz,*o_nnz = NULL,nz,i,j,m,d;
812*fac53fa1SPierre Jolivet   const PetscInt p = a->p,q = a->q;
813*fac53fa1SPierre Jolivet   PetscBool      ismpikaij,missing;
814*fac53fa1SPierre Jolivet   PetscErrorCode ierr;
815*fac53fa1SPierre Jolivet 
816*fac53fa1SPierre Jolivet   PetscFunctionBegin;
817*fac53fa1SPierre Jolivet   if (reuse != MAT_REUSE_MATRIX) {
818*fac53fa1SPierre Jolivet     ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIKAIJ,&ismpikaij);CHKERRQ(ierr);
819*fac53fa1SPierre Jolivet     if (ismpikaij) {
820*fac53fa1SPierre Jolivet       Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data;
821*fac53fa1SPierre Jolivet       AIJ = ((Mat_SeqKAIJ*)b->AIJ->data)->AIJ;
822*fac53fa1SPierre Jolivet       OAIJ = ((Mat_SeqKAIJ*)b->OAIJ->data)->AIJ;
823*fac53fa1SPierre Jolivet     } else {
824*fac53fa1SPierre Jolivet       AIJ = a->AIJ;
825*fac53fa1SPierre Jolivet       OAIJ = NULL;
826*fac53fa1SPierre Jolivet     }
827*fac53fa1SPierre Jolivet     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
828*fac53fa1SPierre Jolivet     ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
829*fac53fa1SPierre Jolivet     ierr = MatSetType(B,MATAIJ);CHKERRQ(ierr);
830*fac53fa1SPierre Jolivet     ierr = MatGetSize(AIJ,&m,NULL);CHKERRQ(ierr);
831*fac53fa1SPierre Jolivet     ierr = MatMissingDiagonal(AIJ,&missing,&d);CHKERRQ(ierr); /* assumption that all successive rows will have a missing diagonal */
832*fac53fa1SPierre Jolivet     if (!missing || !a->S) d = m;
833*fac53fa1SPierre Jolivet     ierr = PetscMalloc1(m*p,&d_nnz);CHKERRQ(ierr);
834*fac53fa1SPierre Jolivet     for (i = 0; i < m; ++i) {
835*fac53fa1SPierre Jolivet       ierr = MatGetRow_SeqAIJ(AIJ,i,&nz,NULL,NULL);CHKERRQ(ierr);
836*fac53fa1SPierre Jolivet       for (j = 0; j < p; ++j) d_nnz[i*p + j] = nz*q + (i >= d)*q;
837*fac53fa1SPierre Jolivet       ierr = MatRestoreRow_SeqAIJ(AIJ,i,&nz,NULL,NULL);CHKERRQ(ierr);
838*fac53fa1SPierre Jolivet     }
839*fac53fa1SPierre Jolivet     if (OAIJ) {
840*fac53fa1SPierre Jolivet       ierr = PetscMalloc1(m*p,&o_nnz);CHKERRQ(ierr);
841*fac53fa1SPierre Jolivet       for (i = 0; i < m; ++i) {
842*fac53fa1SPierre Jolivet         ierr = MatGetRow_SeqAIJ(OAIJ,i,&nz,NULL,NULL);CHKERRQ(ierr);
843*fac53fa1SPierre Jolivet         for (j = 0; j < p; ++j) o_nnz[i*p + j] = nz*q;
844*fac53fa1SPierre Jolivet         ierr = MatRestoreRow_SeqAIJ(OAIJ,i,&nz,NULL,NULL);CHKERRQ(ierr);
845*fac53fa1SPierre Jolivet       }
846*fac53fa1SPierre Jolivet       ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
847*fac53fa1SPierre Jolivet     } else {
848*fac53fa1SPierre Jolivet       ierr = MatSeqAIJSetPreallocation(B,0,d_nnz);CHKERRQ(ierr);
849*fac53fa1SPierre Jolivet     }
850*fac53fa1SPierre Jolivet     ierr = PetscFree(d_nnz);CHKERRQ(ierr);
851*fac53fa1SPierre Jolivet     ierr = PetscFree(o_nnz);CHKERRQ(ierr);
852*fac53fa1SPierre Jolivet   } else B = *newmat;
853*fac53fa1SPierre Jolivet   ierr = MatConvert_Basic(A,newtype,MAT_REUSE_MATRIX,&B);CHKERRQ(ierr);
854*fac53fa1SPierre Jolivet   if (reuse == MAT_INPLACE_MATRIX) {
855*fac53fa1SPierre Jolivet     ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr);
856*fac53fa1SPierre Jolivet   } else *newmat = B;
857*fac53fa1SPierre Jolivet   PetscFunctionReturn(0);
858*fac53fa1SPierre Jolivet }
859*fac53fa1SPierre Jolivet 
86049bd79ccSDebojyoti Ghosh PetscErrorCode MatSOR_SeqKAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
86149bd79ccSDebojyoti Ghosh {
86249bd79ccSDebojyoti Ghosh   PetscErrorCode    ierr;
86349bd79ccSDebojyoti Ghosh   Mat_SeqKAIJ       *kaij = (Mat_SeqKAIJ*) A->data;
86449bd79ccSDebojyoti Ghosh   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)kaij->AIJ->data;
86549bd79ccSDebojyoti Ghosh   const PetscScalar *aa = a->a, *T = kaij->T, *v;
86649bd79ccSDebojyoti Ghosh   const PetscInt    m  = kaij->AIJ->rmap->n, *ai=a->i, *aj=a->j, p = kaij->p, q = kaij->q, *diag, *vi;
86749bd79ccSDebojyoti Ghosh   const PetscScalar *b, *xb, *idiag;
86849bd79ccSDebojyoti Ghosh   PetscScalar       *x, *work, *workt, *w, *y, *arr, *t, *arrt;
86949bd79ccSDebojyoti Ghosh   PetscInt          i, j, k, i2, bs, bs2, nz;
87049bd79ccSDebojyoti Ghosh 
87149bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
87249bd79ccSDebojyoti Ghosh   its = its*lits;
87349bd79ccSDebojyoti Ghosh   if (flag & SOR_EISENSTAT) SETERRQ (PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
874c0aa6a63SJacob Faibussowitsch   if (its <= 0)             SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive",its,lits);
8756a375485SRichard Tran Mills   if (fshift)               SETERRQ (PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for diagonal shift");
8766a375485SRichard Tran Mills   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for applying upper or lower triangular parts");
8776a375485SRichard Tran Mills   if (p != q) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSOR for KAIJ: No support for non-square dense blocks");
87849bd79ccSDebojyoti Ghosh   else        {bs = p; bs2 = bs*bs; }
87949bd79ccSDebojyoti Ghosh 
88049bd79ccSDebojyoti Ghosh   if (!m) PetscFunctionReturn(0);
88149bd79ccSDebojyoti Ghosh 
882bb6fb833SRichard Tran Mills   if (!kaij->ibdiagvalid) { ierr = MatInvertBlockDiagonal_SeqKAIJ(A,NULL);CHKERRQ(ierr); }
88349bd79ccSDebojyoti Ghosh   idiag = kaij->ibdiag;
88449bd79ccSDebojyoti Ghosh   diag  = a->diag;
88549bd79ccSDebojyoti Ghosh 
88649bd79ccSDebojyoti Ghosh   if (!kaij->sor.setup) {
88749bd79ccSDebojyoti Ghosh     ierr = PetscMalloc5(bs,&kaij->sor.w,bs,&kaij->sor.y,m*bs,&kaij->sor.work,m*bs,&kaij->sor.t,m*bs2,&kaij->sor.arr);CHKERRQ(ierr);
88849bd79ccSDebojyoti Ghosh     kaij->sor.setup = PETSC_TRUE;
88949bd79ccSDebojyoti Ghosh   }
89049bd79ccSDebojyoti Ghosh   y     = kaij->sor.y;
89149bd79ccSDebojyoti Ghosh   w     = kaij->sor.w;
89249bd79ccSDebojyoti Ghosh   work  = kaij->sor.work;
89349bd79ccSDebojyoti Ghosh   t     = kaij->sor.t;
89449bd79ccSDebojyoti Ghosh   arr   = kaij->sor.arr;
89549bd79ccSDebojyoti Ghosh 
89649bd79ccSDebojyoti Ghosh   ierr = VecGetArray(xx,&x);    CHKERRQ(ierr);
89749bd79ccSDebojyoti Ghosh   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
89849bd79ccSDebojyoti Ghosh 
89949bd79ccSDebojyoti Ghosh   if (flag & SOR_ZERO_INITIAL_GUESS) {
90049bd79ccSDebojyoti Ghosh     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
90149bd79ccSDebojyoti Ghosh       PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);                            /* x[0:bs] <- D^{-1} b[0:bs] */
90249bd79ccSDebojyoti Ghosh       ierr   =  PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr);
90349bd79ccSDebojyoti Ghosh       i2     =  bs;
90449bd79ccSDebojyoti Ghosh       idiag  += bs2;
90549bd79ccSDebojyoti Ghosh       for (i=1; i<m; i++) {
90649bd79ccSDebojyoti Ghosh         v  = aa + ai[i];
90749bd79ccSDebojyoti Ghosh         vi = aj + ai[i];
90849bd79ccSDebojyoti Ghosh         nz = diag[i] - ai[i];
90949bd79ccSDebojyoti Ghosh 
91049bd79ccSDebojyoti Ghosh         if (T) {                /* b - T (Arow * x) */
9112ae760e3SRichard Tran Mills           ierr = PetscMemzero(w,bs*sizeof(PetscScalar));CHKERRQ(ierr);
91249bd79ccSDebojyoti Ghosh           for (j=0; j<nz; j++) {
91349bd79ccSDebojyoti Ghosh             for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k];
91449bd79ccSDebojyoti Ghosh           }
91549bd79ccSDebojyoti Ghosh           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs,w,T,&t[i2]);
91649bd79ccSDebojyoti Ghosh           for (k=0; k<bs; k++) t[i2+k] += b[i2+k];
91749bd79ccSDebojyoti Ghosh         } else if (kaij->isTI) {
9189eb573c1SRichard Tran Mills           ierr = PetscMemcpy(t+i2,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
91949bd79ccSDebojyoti Ghosh           for (j=0; j<nz; j++) {
92049bd79ccSDebojyoti Ghosh             for (k=0; k<bs; k++) t[i2+k] -= v[j] * x[vi[j]*bs+k];
92149bd79ccSDebojyoti Ghosh           }
92249bd79ccSDebojyoti Ghosh         } else {
9239eb573c1SRichard Tran Mills           ierr = PetscMemcpy(t+i2,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
92449bd79ccSDebojyoti Ghosh         }
92549bd79ccSDebojyoti Ghosh 
92649bd79ccSDebojyoti Ghosh         PetscKernel_w_gets_Ar_times_v(bs,bs,t+i2,idiag,y);
92749bd79ccSDebojyoti Ghosh         for (j=0; j<bs; j++) x[i2+j] = omega * y[j];
92849bd79ccSDebojyoti Ghosh 
92949bd79ccSDebojyoti Ghosh         idiag += bs2;
93049bd79ccSDebojyoti Ghosh         i2    += bs;
93149bd79ccSDebojyoti Ghosh       }
93249bd79ccSDebojyoti Ghosh       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
93349bd79ccSDebojyoti Ghosh       ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr);
93449bd79ccSDebojyoti Ghosh       xb = t;
93549bd79ccSDebojyoti Ghosh     } else xb = b;
93649bd79ccSDebojyoti Ghosh     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
93749bd79ccSDebojyoti Ghosh       idiag = kaij->ibdiag+bs2*(m-1);
93849bd79ccSDebojyoti Ghosh       i2    = bs * (m-1);
93949bd79ccSDebojyoti Ghosh       ierr  = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
94049bd79ccSDebojyoti Ghosh       PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
94149bd79ccSDebojyoti Ghosh       i2    -= bs;
94249bd79ccSDebojyoti Ghosh       idiag -= bs2;
94349bd79ccSDebojyoti Ghosh       for (i=m-2; i>=0; i--) {
94449bd79ccSDebojyoti Ghosh         v  = aa + diag[i] + 1 ;
94549bd79ccSDebojyoti Ghosh         vi = aj + diag[i] + 1;
94649bd79ccSDebojyoti Ghosh         nz = ai[i+1] - diag[i] - 1;
94749bd79ccSDebojyoti Ghosh 
94849bd79ccSDebojyoti Ghosh         if (T) {                /* FIXME: This branch untested */
94949bd79ccSDebojyoti Ghosh           ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
95049bd79ccSDebojyoti Ghosh           /* copy all rows of x that are needed into contiguous space */
95149bd79ccSDebojyoti Ghosh           workt = work;
95249bd79ccSDebojyoti Ghosh           for (j=0; j<nz; j++) {
95349bd79ccSDebojyoti Ghosh             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
95449bd79ccSDebojyoti Ghosh             workt += bs;
95549bd79ccSDebojyoti Ghosh           }
95649bd79ccSDebojyoti Ghosh           arrt = arr;
95749bd79ccSDebojyoti Ghosh           for (j=0; j<nz; j++) {
95849bd79ccSDebojyoti Ghosh             ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
95949bd79ccSDebojyoti Ghosh             for (k=0; k<bs2; k++) arrt[k] *= v[j];
96049bd79ccSDebojyoti Ghosh             arrt += bs2;
96149bd79ccSDebojyoti Ghosh           }
96249bd79ccSDebojyoti Ghosh           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
96349bd79ccSDebojyoti Ghosh         } else if (kaij->isTI) {
9649eb573c1SRichard Tran Mills           ierr = PetscMemcpy(w,t+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
96549bd79ccSDebojyoti Ghosh           for (j=0; j<nz; j++) {
96649bd79ccSDebojyoti Ghosh             for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k];
96749bd79ccSDebojyoti Ghosh           }
96849bd79ccSDebojyoti Ghosh         }
96949bd79ccSDebojyoti Ghosh 
97049bd79ccSDebojyoti Ghosh         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); /* RHS incorrect for omega != 1.0 */
97149bd79ccSDebojyoti Ghosh         for (j=0; j<bs; j++) x[i2+j] = (1.0-omega) * x[i2+j] + omega * y[j];
97249bd79ccSDebojyoti Ghosh 
97349bd79ccSDebojyoti Ghosh         idiag -= bs2;
97449bd79ccSDebojyoti Ghosh         i2    -= bs;
97549bd79ccSDebojyoti Ghosh       }
97649bd79ccSDebojyoti Ghosh       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
97749bd79ccSDebojyoti Ghosh     }
97849bd79ccSDebojyoti Ghosh     its--;
97949bd79ccSDebojyoti Ghosh   }
98049bd79ccSDebojyoti Ghosh   while (its--) {               /* FIXME: This branch not updated */
98149bd79ccSDebojyoti Ghosh     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
98249bd79ccSDebojyoti Ghosh       i2     =  0;
98349bd79ccSDebojyoti Ghosh       idiag  = kaij->ibdiag;
98449bd79ccSDebojyoti Ghosh       for (i=0; i<m; i++) {
98549bd79ccSDebojyoti Ghosh         ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
98649bd79ccSDebojyoti Ghosh 
98749bd79ccSDebojyoti Ghosh         v  = aa + ai[i];
98849bd79ccSDebojyoti Ghosh         vi = aj + ai[i];
98949bd79ccSDebojyoti Ghosh         nz = diag[i] - ai[i];
99049bd79ccSDebojyoti Ghosh         workt = work;
99149bd79ccSDebojyoti Ghosh         for (j=0; j<nz; j++) {
99249bd79ccSDebojyoti Ghosh           ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
99349bd79ccSDebojyoti Ghosh           workt += bs;
99449bd79ccSDebojyoti Ghosh         }
99549bd79ccSDebojyoti Ghosh         arrt = arr;
99649bd79ccSDebojyoti Ghosh         if (T) {
99749bd79ccSDebojyoti Ghosh           for (j=0; j<nz; j++) {
99849bd79ccSDebojyoti Ghosh             ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
99949bd79ccSDebojyoti Ghosh             for (k=0; k<bs2; k++) arrt[k] *= v[j];
100049bd79ccSDebojyoti Ghosh             arrt += bs2;
100149bd79ccSDebojyoti Ghosh           }
100249bd79ccSDebojyoti Ghosh           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
100349bd79ccSDebojyoti Ghosh         } else if (kaij->isTI) {
100449bd79ccSDebojyoti Ghosh           for (j=0; j<nz; j++) {
100549bd79ccSDebojyoti Ghosh             ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
100649bd79ccSDebojyoti Ghosh             for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
100749bd79ccSDebojyoti Ghosh             arrt += bs2;
100849bd79ccSDebojyoti Ghosh           }
100949bd79ccSDebojyoti Ghosh           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
101049bd79ccSDebojyoti Ghosh         }
101149bd79ccSDebojyoti Ghosh         ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr);
101249bd79ccSDebojyoti Ghosh 
101349bd79ccSDebojyoti Ghosh         v  = aa + diag[i] + 1;
101449bd79ccSDebojyoti Ghosh         vi = aj + diag[i] + 1;
101549bd79ccSDebojyoti Ghosh         nz = ai[i+1] - diag[i] - 1;
101649bd79ccSDebojyoti Ghosh         workt = work;
101749bd79ccSDebojyoti Ghosh         for (j=0; j<nz; j++) {
101849bd79ccSDebojyoti Ghosh           ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
101949bd79ccSDebojyoti Ghosh           workt += bs;
102049bd79ccSDebojyoti Ghosh         }
102149bd79ccSDebojyoti Ghosh         arrt = arr;
102249bd79ccSDebojyoti Ghosh         if (T) {
102349bd79ccSDebojyoti Ghosh           for (j=0; j<nz; j++) {
102449bd79ccSDebojyoti Ghosh             ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
102549bd79ccSDebojyoti Ghosh             for (k=0; k<bs2; k++) arrt[k] *= v[j];
102649bd79ccSDebojyoti Ghosh             arrt += bs2;
102749bd79ccSDebojyoti Ghosh           }
102849bd79ccSDebojyoti Ghosh           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
102949bd79ccSDebojyoti Ghosh         } else if (kaij->isTI) {
103049bd79ccSDebojyoti Ghosh           for (j=0; j<nz; j++) {
103149bd79ccSDebojyoti Ghosh             ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
103249bd79ccSDebojyoti Ghosh             for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
103349bd79ccSDebojyoti Ghosh             arrt += bs2;
103449bd79ccSDebojyoti Ghosh           }
103549bd79ccSDebojyoti Ghosh           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
103649bd79ccSDebojyoti Ghosh         }
103749bd79ccSDebojyoti Ghosh 
103849bd79ccSDebojyoti Ghosh         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y);
103949bd79ccSDebojyoti Ghosh         for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j);
104049bd79ccSDebojyoti Ghosh 
104149bd79ccSDebojyoti Ghosh         idiag += bs2;
104249bd79ccSDebojyoti Ghosh         i2    += bs;
104349bd79ccSDebojyoti Ghosh       }
104449bd79ccSDebojyoti Ghosh       xb = t;
104549bd79ccSDebojyoti Ghosh     }
104649bd79ccSDebojyoti Ghosh     else xb = b;
104749bd79ccSDebojyoti Ghosh     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
104849bd79ccSDebojyoti Ghosh       idiag = kaij->ibdiag+bs2*(m-1);
104949bd79ccSDebojyoti Ghosh       i2    = bs * (m-1);
105049bd79ccSDebojyoti Ghosh       if (xb == b) {
105149bd79ccSDebojyoti Ghosh         for (i=m-1; i>=0; i--) {
105249bd79ccSDebojyoti Ghosh           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
105349bd79ccSDebojyoti Ghosh 
105449bd79ccSDebojyoti Ghosh           v  = aa + ai[i];
105549bd79ccSDebojyoti Ghosh           vi = aj + ai[i];
105649bd79ccSDebojyoti Ghosh           nz = diag[i] - ai[i];
105749bd79ccSDebojyoti Ghosh           workt = work;
105849bd79ccSDebojyoti Ghosh           for (j=0; j<nz; j++) {
105949bd79ccSDebojyoti Ghosh             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
106049bd79ccSDebojyoti Ghosh             workt += bs;
106149bd79ccSDebojyoti Ghosh           }
106249bd79ccSDebojyoti Ghosh           arrt = arr;
106349bd79ccSDebojyoti Ghosh           if (T) {
106449bd79ccSDebojyoti Ghosh             for (j=0; j<nz; j++) {
106549bd79ccSDebojyoti Ghosh               ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
106649bd79ccSDebojyoti Ghosh               for (k=0; k<bs2; k++) arrt[k] *= v[j];
106749bd79ccSDebojyoti Ghosh               arrt += bs2;
106849bd79ccSDebojyoti Ghosh             }
106949bd79ccSDebojyoti Ghosh             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
107049bd79ccSDebojyoti Ghosh           } else if (kaij->isTI) {
107149bd79ccSDebojyoti Ghosh             for (j=0; j<nz; j++) {
107249bd79ccSDebojyoti Ghosh               ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
107349bd79ccSDebojyoti Ghosh               for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
107449bd79ccSDebojyoti Ghosh               arrt += bs2;
107549bd79ccSDebojyoti Ghosh             }
107649bd79ccSDebojyoti Ghosh             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
107749bd79ccSDebojyoti Ghosh           }
107849bd79ccSDebojyoti Ghosh 
107949bd79ccSDebojyoti Ghosh           v  = aa + diag[i] + 1;
108049bd79ccSDebojyoti Ghosh           vi = aj + diag[i] + 1;
108149bd79ccSDebojyoti Ghosh           nz = ai[i+1] - diag[i] - 1;
108249bd79ccSDebojyoti Ghosh           workt = work;
108349bd79ccSDebojyoti Ghosh           for (j=0; j<nz; j++) {
108449bd79ccSDebojyoti Ghosh             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
108549bd79ccSDebojyoti Ghosh             workt += bs;
108649bd79ccSDebojyoti Ghosh           }
108749bd79ccSDebojyoti Ghosh           arrt = arr;
108849bd79ccSDebojyoti Ghosh           if (T) {
108949bd79ccSDebojyoti Ghosh             for (j=0; j<nz; j++) {
109049bd79ccSDebojyoti Ghosh               ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
109149bd79ccSDebojyoti Ghosh               for (k=0; k<bs2; k++) arrt[k] *= v[j];
109249bd79ccSDebojyoti Ghosh               arrt += bs2;
109349bd79ccSDebojyoti Ghosh             }
109449bd79ccSDebojyoti Ghosh             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
109549bd79ccSDebojyoti Ghosh           } else if (kaij->isTI) {
109649bd79ccSDebojyoti Ghosh             for (j=0; j<nz; j++) {
109749bd79ccSDebojyoti Ghosh               ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
109849bd79ccSDebojyoti Ghosh               for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
109949bd79ccSDebojyoti Ghosh               arrt += bs2;
110049bd79ccSDebojyoti Ghosh             }
110149bd79ccSDebojyoti Ghosh             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
110249bd79ccSDebojyoti Ghosh           }
110349bd79ccSDebojyoti Ghosh 
110449bd79ccSDebojyoti Ghosh           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y);
110549bd79ccSDebojyoti Ghosh           for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j);
110649bd79ccSDebojyoti Ghosh         }
110749bd79ccSDebojyoti Ghosh       } else {
110849bd79ccSDebojyoti Ghosh         for (i=m-1; i>=0; i--) {
110949bd79ccSDebojyoti Ghosh           ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
111049bd79ccSDebojyoti Ghosh           v  = aa + diag[i] + 1;
111149bd79ccSDebojyoti Ghosh           vi = aj + diag[i] + 1;
111249bd79ccSDebojyoti Ghosh           nz = ai[i+1] - diag[i] - 1;
111349bd79ccSDebojyoti Ghosh           workt = work;
111449bd79ccSDebojyoti Ghosh           for (j=0; j<nz; j++) {
111549bd79ccSDebojyoti Ghosh             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
111649bd79ccSDebojyoti Ghosh             workt += bs;
111749bd79ccSDebojyoti Ghosh           }
111849bd79ccSDebojyoti Ghosh           arrt = arr;
111949bd79ccSDebojyoti Ghosh           if (T) {
112049bd79ccSDebojyoti Ghosh             for (j=0; j<nz; j++) {
112149bd79ccSDebojyoti Ghosh               ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
112249bd79ccSDebojyoti Ghosh               for (k=0; k<bs2; k++) arrt[k] *= v[j];
112349bd79ccSDebojyoti Ghosh               arrt += bs2;
112449bd79ccSDebojyoti Ghosh             }
112549bd79ccSDebojyoti Ghosh             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
112649bd79ccSDebojyoti Ghosh           } else if (kaij->isTI) {
112749bd79ccSDebojyoti Ghosh             for (j=0; j<nz; j++) {
112849bd79ccSDebojyoti Ghosh               ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
112949bd79ccSDebojyoti Ghosh               for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
113049bd79ccSDebojyoti Ghosh               arrt += bs2;
113149bd79ccSDebojyoti Ghosh             }
113249bd79ccSDebojyoti Ghosh             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
113349bd79ccSDebojyoti Ghosh           }
113449bd79ccSDebojyoti Ghosh           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y);
113549bd79ccSDebojyoti Ghosh           for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j);
113649bd79ccSDebojyoti Ghosh         }
113749bd79ccSDebojyoti Ghosh       }
113849bd79ccSDebojyoti Ghosh       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
113949bd79ccSDebojyoti Ghosh     }
114049bd79ccSDebojyoti Ghosh   }
114149bd79ccSDebojyoti Ghosh 
114249bd79ccSDebojyoti Ghosh   ierr = VecRestoreArray(xx,&x);    CHKERRQ(ierr);
114349bd79ccSDebojyoti Ghosh   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
114449bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
114549bd79ccSDebojyoti Ghosh }
114649bd79ccSDebojyoti Ghosh 
114749bd79ccSDebojyoti Ghosh /*===================================================================================*/
114849bd79ccSDebojyoti Ghosh 
1149836168d5SRichard Tran Mills PetscErrorCode MatMultAdd_MPIKAIJ(Mat A,Vec xx,Vec yy,Vec zz)
115049bd79ccSDebojyoti Ghosh {
115149bd79ccSDebojyoti Ghosh   Mat_MPIKAIJ    *b = (Mat_MPIKAIJ*)A->data;
115249bd79ccSDebojyoti Ghosh   PetscErrorCode ierr;
115349bd79ccSDebojyoti Ghosh 
115449bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
115549bd79ccSDebojyoti Ghosh   if (!yy) {
115649bd79ccSDebojyoti Ghosh     ierr = VecSet(zz,0.0);CHKERRQ(ierr);
115749bd79ccSDebojyoti Ghosh   } else {
115849bd79ccSDebojyoti Ghosh     ierr = VecCopy(yy,zz);CHKERRQ(ierr);
115949bd79ccSDebojyoti Ghosh   }
1160761d359dSRichard Tran Mills   ierr = MatKAIJ_build_AIJ_OAIJ(A);CHKERRQ(ierr); /* Ensure b->AIJ and b->OAIJ are up to date. */
116149bd79ccSDebojyoti Ghosh   /* start the scatter */
116249bd79ccSDebojyoti Ghosh   ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
116349bd79ccSDebojyoti Ghosh   ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,zz,zz);CHKERRQ(ierr);
116449bd79ccSDebojyoti Ghosh   ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
116549bd79ccSDebojyoti Ghosh   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr);
116649bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
116749bd79ccSDebojyoti Ghosh }
116849bd79ccSDebojyoti Ghosh 
1169bb6fb833SRichard Tran Mills PetscErrorCode MatMult_MPIKAIJ(Mat A,Vec xx,Vec yy)
117049bd79ccSDebojyoti Ghosh {
117149bd79ccSDebojyoti Ghosh   PetscErrorCode ierr;
117249bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
1173836168d5SRichard Tran Mills   ierr = MatMultAdd_MPIKAIJ(A,xx,PETSC_NULL,yy);CHKERRQ(ierr);
117449bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
117549bd79ccSDebojyoti Ghosh }
117649bd79ccSDebojyoti Ghosh 
1177bb6fb833SRichard Tran Mills PetscErrorCode MatInvertBlockDiagonal_MPIKAIJ(Mat A,const PetscScalar **values)
117849bd79ccSDebojyoti Ghosh {
117949bd79ccSDebojyoti Ghosh   Mat_MPIKAIJ     *b = (Mat_MPIKAIJ*)A->data;
118049bd79ccSDebojyoti Ghosh   PetscErrorCode  ierr;
118149bd79ccSDebojyoti Ghosh 
118249bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
1183761d359dSRichard Tran Mills   ierr = MatKAIJ_build_AIJ_OAIJ(A);CHKERRQ(ierr); /* Ensure b->AIJ is up to date. */
118449bd79ccSDebojyoti Ghosh   ierr = (*b->AIJ->ops->invertblockdiagonal)(b->AIJ,values);CHKERRQ(ierr);
118549bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
118649bd79ccSDebojyoti Ghosh }
118749bd79ccSDebojyoti Ghosh 
118849bd79ccSDebojyoti Ghosh /* ----------------------------------------------------------------*/
118949bd79ccSDebojyoti Ghosh 
119049bd79ccSDebojyoti Ghosh PetscErrorCode MatGetRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values)
119149bd79ccSDebojyoti Ghosh {
119249bd79ccSDebojyoti Ghosh   Mat_SeqKAIJ     *b   = (Mat_SeqKAIJ*) A->data;
11931ca5ffdbSRichard Tran Mills   PetscErrorCode  diag = PETSC_FALSE;
11941ca5ffdbSRichard Tran Mills   PetscErrorCode  ierr;
119549bd79ccSDebojyoti Ghosh   PetscInt        nzaij,nz,*colsaij,*idx,i,j,p=b->p,q=b->q,r=row/p,s=row%p,c;
119649bd79ccSDebojyoti Ghosh   PetscScalar     *vaij,*v,*S=b->S,*T=b->T;
119749bd79ccSDebojyoti Ghosh 
119849bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
119949bd79ccSDebojyoti Ghosh   if (b->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
120049bd79ccSDebojyoti Ghosh   b->getrowactive = PETSC_TRUE;
1201c0aa6a63SJacob Faibussowitsch   if (row < 0 || row >= A->rmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %" PetscInt_FMT " out of range",row);
120249bd79ccSDebojyoti Ghosh 
120349bd79ccSDebojyoti Ghosh   if ((!S) && (!T) && (!b->isTI)) {
120449bd79ccSDebojyoti Ghosh     if (ncols)    *ncols  = 0;
120549bd79ccSDebojyoti Ghosh     if (cols)     *cols   = NULL;
120649bd79ccSDebojyoti Ghosh     if (values)   *values = NULL;
120749bd79ccSDebojyoti Ghosh     PetscFunctionReturn(0);
120849bd79ccSDebojyoti Ghosh   }
120949bd79ccSDebojyoti Ghosh 
121049bd79ccSDebojyoti Ghosh   if (T || b->isTI) {
121149bd79ccSDebojyoti Ghosh     ierr  = MatGetRow_SeqAIJ(b->AIJ,r,&nzaij,&colsaij,&vaij);CHKERRQ(ierr);
121249bd79ccSDebojyoti Ghosh     c     = nzaij;
121349bd79ccSDebojyoti Ghosh     for (i=0; i<nzaij; i++) {
121449bd79ccSDebojyoti Ghosh       /* check if this row contains a diagonal entry */
121549bd79ccSDebojyoti Ghosh       if (colsaij[i] == r) {
121649bd79ccSDebojyoti Ghosh         diag = PETSC_TRUE;
121749bd79ccSDebojyoti Ghosh         c = i;
121849bd79ccSDebojyoti Ghosh       }
121949bd79ccSDebojyoti Ghosh     }
122049bd79ccSDebojyoti Ghosh   } else nzaij = c = 0;
122149bd79ccSDebojyoti Ghosh 
122249bd79ccSDebojyoti Ghosh   /* calculate size of row */
122349bd79ccSDebojyoti Ghosh   nz = 0;
122449bd79ccSDebojyoti Ghosh   if (S)            nz += q;
122549bd79ccSDebojyoti Ghosh   if (T || b->isTI) nz += (diag && S ? (nzaij-1)*q : nzaij*q);
122649bd79ccSDebojyoti Ghosh 
122749bd79ccSDebojyoti Ghosh   if (cols || values) {
122849bd79ccSDebojyoti Ghosh     ierr = PetscMalloc2(nz,&idx,nz,&v);CHKERRQ(ierr);
122938822f9dSRichard Tran Mills     for (i=0; i<q; i++) {
123038822f9dSRichard Tran Mills       /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */
123138822f9dSRichard Tran Mills       v[i] = 0.0;
123238822f9dSRichard Tran Mills     }
123349bd79ccSDebojyoti Ghosh     if (b->isTI) {
123449bd79ccSDebojyoti Ghosh       for (i=0; i<nzaij; i++) {
123549bd79ccSDebojyoti Ghosh         for (j=0; j<q; j++) {
123649bd79ccSDebojyoti Ghosh           idx[i*q+j] = colsaij[i]*q+j;
123749bd79ccSDebojyoti Ghosh           v[i*q+j]   = (j==s ? vaij[i] : 0);
123849bd79ccSDebojyoti Ghosh         }
123949bd79ccSDebojyoti Ghosh       }
124049bd79ccSDebojyoti Ghosh     } else if (T) {
124149bd79ccSDebojyoti Ghosh       for (i=0; i<nzaij; i++) {
124249bd79ccSDebojyoti Ghosh         for (j=0; j<q; j++) {
124349bd79ccSDebojyoti Ghosh           idx[i*q+j] = colsaij[i]*q+j;
124449bd79ccSDebojyoti Ghosh           v[i*q+j]   = vaij[i]*T[s+j*p];
124549bd79ccSDebojyoti Ghosh         }
124649bd79ccSDebojyoti Ghosh       }
124749bd79ccSDebojyoti Ghosh     }
124849bd79ccSDebojyoti Ghosh     if (S) {
124949bd79ccSDebojyoti Ghosh       for (j=0; j<q; j++) {
125049bd79ccSDebojyoti Ghosh         idx[c*q+j] = r*q+j;
125149bd79ccSDebojyoti Ghosh         v[c*q+j]  += S[s+j*p];
125249bd79ccSDebojyoti Ghosh       }
125349bd79ccSDebojyoti Ghosh     }
125449bd79ccSDebojyoti Ghosh   }
125549bd79ccSDebojyoti Ghosh 
125649bd79ccSDebojyoti Ghosh   if (ncols)    *ncols  = nz;
125749bd79ccSDebojyoti Ghosh   if (cols)     *cols   = idx;
125849bd79ccSDebojyoti Ghosh   if (values)   *values = v;
125949bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
126049bd79ccSDebojyoti Ghosh }
126149bd79ccSDebojyoti Ghosh 
126249bd79ccSDebojyoti Ghosh PetscErrorCode MatRestoreRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
126349bd79ccSDebojyoti Ghosh {
126449bd79ccSDebojyoti Ghosh   PetscErrorCode ierr;
1265cb4a9cd9SHong Zhang 
126649bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
1267cb4a9cd9SHong Zhang   if (nz) *nz = 0;
126849bd79ccSDebojyoti Ghosh   ierr = PetscFree2(*idx,*v);CHKERRQ(ierr);
126949bd79ccSDebojyoti Ghosh   ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE;
127049bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
127149bd79ccSDebojyoti Ghosh }
127249bd79ccSDebojyoti Ghosh 
127349bd79ccSDebojyoti Ghosh PetscErrorCode MatGetRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values)
127449bd79ccSDebojyoti Ghosh {
127549bd79ccSDebojyoti Ghosh   Mat_MPIKAIJ     *b      = (Mat_MPIKAIJ*) A->data;
127649bd79ccSDebojyoti Ghosh   Mat             AIJ     = b->A;
1277fc64b2cfSRichard Tran Mills   PetscBool       diag    = PETSC_FALSE;
1278761d359dSRichard Tran Mills   Mat             MatAIJ,MatOAIJ;
127949bd79ccSDebojyoti Ghosh   PetscErrorCode  ierr;
128049bd79ccSDebojyoti Ghosh   const PetscInt  rstart=A->rmap->rstart,rend=A->rmap->rend,p=b->p,q=b->q,*garray;
1281389eba51SJed Brown   PetscInt        nz,*idx,ncolsaij = 0,ncolsoaij = 0,*colsaij,*colsoaij,r,s,c,i,j,lrow;
128249bd79ccSDebojyoti Ghosh   PetscScalar     *v,*vals,*ovals,*S=b->S,*T=b->T;
128349bd79ccSDebojyoti Ghosh 
128449bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
1285761d359dSRichard Tran Mills   ierr = MatKAIJ_build_AIJ_OAIJ(A);CHKERRQ(ierr); /* Ensure b->AIJ and b->OAIJ are up to date. */
1286761d359dSRichard Tran Mills   MatAIJ  = ((Mat_SeqKAIJ*)b->AIJ->data)->AIJ;
1287761d359dSRichard Tran Mills   MatOAIJ = ((Mat_SeqKAIJ*)b->OAIJ->data)->AIJ;
128849bd79ccSDebojyoti Ghosh   if (b->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
128949bd79ccSDebojyoti Ghosh   b->getrowactive = PETSC_TRUE;
129049bd79ccSDebojyoti Ghosh   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
129149bd79ccSDebojyoti Ghosh   lrow = row - rstart;
129249bd79ccSDebojyoti Ghosh 
129349bd79ccSDebojyoti Ghosh   if ((!S) && (!T) && (!b->isTI)) {
129449bd79ccSDebojyoti Ghosh     if (ncols)    *ncols  = 0;
129549bd79ccSDebojyoti Ghosh     if (cols)     *cols   = NULL;
129649bd79ccSDebojyoti Ghosh     if (values)   *values = NULL;
129749bd79ccSDebojyoti Ghosh     PetscFunctionReturn(0);
129849bd79ccSDebojyoti Ghosh   }
129949bd79ccSDebojyoti Ghosh 
130049bd79ccSDebojyoti Ghosh   r = lrow/p;
130149bd79ccSDebojyoti Ghosh   s = lrow%p;
130249bd79ccSDebojyoti Ghosh 
130349bd79ccSDebojyoti Ghosh   if (T || b->isTI) {
13042ae760e3SRichard Tran Mills     ierr = MatMPIAIJGetSeqAIJ(AIJ,NULL,NULL,&garray);CHKERRQ(ierr);
130549bd79ccSDebojyoti Ghosh     ierr = MatGetRow_SeqAIJ(MatAIJ,lrow/p,&ncolsaij,&colsaij,&vals);CHKERRQ(ierr);
130649bd79ccSDebojyoti Ghosh     ierr = MatGetRow_SeqAIJ(MatOAIJ,lrow/p,&ncolsoaij,&colsoaij,&ovals);CHKERRQ(ierr);
130749bd79ccSDebojyoti Ghosh 
130849bd79ccSDebojyoti Ghosh     c     = ncolsaij + ncolsoaij;
130949bd79ccSDebojyoti Ghosh     for (i=0; i<ncolsaij; i++) {
131049bd79ccSDebojyoti Ghosh       /* check if this row contains a diagonal entry */
131149bd79ccSDebojyoti Ghosh       if (colsaij[i] == r) {
131249bd79ccSDebojyoti Ghosh         diag = PETSC_TRUE;
131349bd79ccSDebojyoti Ghosh         c = i;
131449bd79ccSDebojyoti Ghosh       }
131549bd79ccSDebojyoti Ghosh     }
131649bd79ccSDebojyoti Ghosh   } else c = 0;
131749bd79ccSDebojyoti Ghosh 
131849bd79ccSDebojyoti Ghosh   /* calculate size of row */
131949bd79ccSDebojyoti Ghosh   nz = 0;
132049bd79ccSDebojyoti Ghosh   if (S)            nz += q;
132149bd79ccSDebojyoti Ghosh   if (T || b->isTI) nz += (diag && S ? (ncolsaij+ncolsoaij-1)*q : (ncolsaij+ncolsoaij)*q);
132249bd79ccSDebojyoti Ghosh 
132349bd79ccSDebojyoti Ghosh   if (cols || values) {
132449bd79ccSDebojyoti Ghosh     ierr = PetscMalloc2(nz,&idx,nz,&v);CHKERRQ(ierr);
1325a437a796SRichard Tran Mills     for (i=0; i<q; i++) {
1326a437a796SRichard Tran Mills       /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */
1327a437a796SRichard Tran Mills       v[i] = 0.0;
1328a437a796SRichard Tran Mills     }
132949bd79ccSDebojyoti Ghosh     if (b->isTI) {
133049bd79ccSDebojyoti Ghosh       for (i=0; i<ncolsaij; i++) {
133149bd79ccSDebojyoti Ghosh         for (j=0; j<q; j++) {
133249bd79ccSDebojyoti Ghosh           idx[i*q+j] = (colsaij[i]+rstart/p)*q+j;
133349bd79ccSDebojyoti Ghosh           v[i*q+j]   = (j==s ? vals[i] : 0.0);
133449bd79ccSDebojyoti Ghosh         }
133549bd79ccSDebojyoti Ghosh       }
133649bd79ccSDebojyoti Ghosh       for (i=0; i<ncolsoaij; i++) {
133749bd79ccSDebojyoti Ghosh         for (j=0; j<q; j++) {
133849bd79ccSDebojyoti Ghosh           idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j;
133949bd79ccSDebojyoti Ghosh           v[(i+ncolsaij)*q+j]   = (j==s ? ovals[i]: 0.0);
134049bd79ccSDebojyoti Ghosh         }
134149bd79ccSDebojyoti Ghosh       }
134249bd79ccSDebojyoti Ghosh     } else if (T) {
134349bd79ccSDebojyoti Ghosh       for (i=0; i<ncolsaij; i++) {
134449bd79ccSDebojyoti Ghosh         for (j=0; j<q; j++) {
134549bd79ccSDebojyoti Ghosh           idx[i*q+j] = (colsaij[i]+rstart/p)*q+j;
134649bd79ccSDebojyoti Ghosh           v[i*q+j]   = vals[i]*T[s+j*p];
134749bd79ccSDebojyoti Ghosh         }
134849bd79ccSDebojyoti Ghosh       }
134949bd79ccSDebojyoti Ghosh       for (i=0; i<ncolsoaij; i++) {
135049bd79ccSDebojyoti Ghosh         for (j=0; j<q; j++) {
135149bd79ccSDebojyoti Ghosh           idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j;
135249bd79ccSDebojyoti Ghosh           v[(i+ncolsaij)*q+j]   = ovals[i]*T[s+j*p];
135349bd79ccSDebojyoti Ghosh         }
135449bd79ccSDebojyoti Ghosh       }
135549bd79ccSDebojyoti Ghosh     }
135649bd79ccSDebojyoti Ghosh     if (S) {
135749bd79ccSDebojyoti Ghosh       for (j=0; j<q; j++) {
135849bd79ccSDebojyoti Ghosh         idx[c*q+j] = (r+rstart/p)*q+j;
135949bd79ccSDebojyoti Ghosh         v[c*q+j]  += S[s+j*p];
136049bd79ccSDebojyoti Ghosh       }
136149bd79ccSDebojyoti Ghosh     }
136249bd79ccSDebojyoti Ghosh   }
136349bd79ccSDebojyoti Ghosh 
136449bd79ccSDebojyoti Ghosh   if (ncols)  *ncols  = nz;
136549bd79ccSDebojyoti Ghosh   if (cols)   *cols   = idx;
136649bd79ccSDebojyoti Ghosh   if (values) *values = v;
136749bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
136849bd79ccSDebojyoti Ghosh }
136949bd79ccSDebojyoti Ghosh 
137049bd79ccSDebojyoti Ghosh PetscErrorCode MatRestoreRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
137149bd79ccSDebojyoti Ghosh {
137249bd79ccSDebojyoti Ghosh   PetscErrorCode ierr;
137349bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
137449bd79ccSDebojyoti Ghosh   ierr = PetscFree2(*idx,*v);CHKERRQ(ierr);
137549bd79ccSDebojyoti Ghosh   ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE;
137649bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
137749bd79ccSDebojyoti Ghosh }
137849bd79ccSDebojyoti Ghosh 
137949bd79ccSDebojyoti Ghosh PetscErrorCode  MatCreateSubMatrix_KAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat)
138049bd79ccSDebojyoti Ghosh {
138149bd79ccSDebojyoti Ghosh   PetscErrorCode ierr;
138249bd79ccSDebojyoti Ghosh   Mat            A;
138349bd79ccSDebojyoti Ghosh 
138449bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
138549bd79ccSDebojyoti Ghosh   ierr = MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr);
138649bd79ccSDebojyoti Ghosh   ierr = MatCreateSubMatrix(A,isrow,iscol,cll,newmat);CHKERRQ(ierr);
138749bd79ccSDebojyoti Ghosh   ierr = MatDestroy(&A);CHKERRQ(ierr);
138849bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
138949bd79ccSDebojyoti Ghosh }
139049bd79ccSDebojyoti Ghosh 
139149bd79ccSDebojyoti Ghosh /* ---------------------------------------------------------------------------------- */
139249bd79ccSDebojyoti Ghosh /*@C
139349bd79ccSDebojyoti Ghosh   MatCreateKAIJ - Creates a matrix type to be used for matrices of the following form:
139449bd79ccSDebojyoti Ghosh 
139549bd79ccSDebojyoti Ghosh     [I \otimes S + A \otimes T]
139649bd79ccSDebojyoti Ghosh 
139749bd79ccSDebojyoti Ghosh   where
139849bd79ccSDebojyoti Ghosh     S is a dense (p \times q) matrix
139949bd79ccSDebojyoti Ghosh     T is a dense (p \times q) matrix
140049bd79ccSDebojyoti Ghosh     A is an AIJ  (n \times n) matrix
140149bd79ccSDebojyoti Ghosh     I is the identity matrix
140249bd79ccSDebojyoti Ghosh   The resulting matrix is (np \times nq)
140349bd79ccSDebojyoti Ghosh 
1404d3b90ce1SRichard Tran Mills   S and T are always stored independently on all processes as PetscScalar arrays in column-major format.
140549bd79ccSDebojyoti Ghosh 
140649bd79ccSDebojyoti Ghosh   Collective
140749bd79ccSDebojyoti Ghosh 
140849bd79ccSDebojyoti Ghosh   Input Parameters:
140949bd79ccSDebojyoti Ghosh + A - the AIJ matrix
141049bd79ccSDebojyoti Ghosh . p - number of rows in S and T
1411d3b90ce1SRichard Tran Mills . q - number of columns in S and T
1412d3b90ce1SRichard Tran Mills . S - the S matrix (can be PETSC_NULL), stored as a PetscScalar array (column-major)
1413d3b90ce1SRichard Tran Mills - T - the T matrix (can be PETSC_NULL), stored as a PetscScalar array (column-major)
141449bd79ccSDebojyoti Ghosh 
141549bd79ccSDebojyoti Ghosh   Output Parameter:
141649bd79ccSDebojyoti Ghosh . kaij - the new KAIJ matrix
141749bd79ccSDebojyoti Ghosh 
1418d3b90ce1SRichard Tran Mills   Notes:
1419d3b90ce1SRichard Tran Mills   This function increases the reference count on the AIJ matrix, so the user is free to destroy the matrix if it is not needed.
1420d3b90ce1SRichard Tran Mills   Changes to the entries of the AIJ matrix will immediately affect the KAIJ matrix.
142149bd79ccSDebojyoti Ghosh 
1422761d359dSRichard Tran Mills   Developer Notes:
1423761d359dSRichard Tran Mills   In the MATMPIKAIJ case, the internal 'AIJ' and 'OAIJ' sequential KAIJ matrices are kept up to date by tracking the object state
1424761d359dSRichard Tran Mills   of the AIJ matrix 'A' that describes the blockwise action of the MATMPIKAIJ matrix and, if the object state has changed, lazily
1425761d359dSRichard Tran Mills   rebuilding 'AIJ' and 'OAIJ' just before executing operations with the MATMPIKAIJ matrix. If new types of operations are added,
1426761d359dSRichard Tran Mills   routines implementing those must also ensure these are rebuilt when needed (by calling the internal MatKAIJ_build_AIJ_OAIJ() routine).
1427761d359dSRichard Tran Mills 
142849bd79ccSDebojyoti Ghosh   Level: advanced
142949bd79ccSDebojyoti Ghosh 
14300567c835SRichard Tran Mills .seealso: MatKAIJSetAIJ(), MatKAIJSetS(), MatKAIJSetT(), MatKAIJGetAIJ(), MatKAIJGetS(), MatKAIJGetT(), MATKAIJ
143149bd79ccSDebojyoti Ghosh @*/
143249bd79ccSDebojyoti Ghosh PetscErrorCode  MatCreateKAIJ(Mat A,PetscInt p,PetscInt q,const PetscScalar S[],const PetscScalar T[],Mat *kaij)
143349bd79ccSDebojyoti Ghosh {
143449bd79ccSDebojyoti Ghosh   PetscErrorCode ierr;
143549bd79ccSDebojyoti Ghosh 
143649bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
14370567c835SRichard Tran Mills   ierr = MatCreate(PetscObjectComm((PetscObject)A),kaij);CHKERRQ(ierr);
143806d61a37SPierre Jolivet   ierr = MatSetType(*kaij,MATKAIJ);CHKERRQ(ierr);
14390567c835SRichard Tran Mills   ierr = MatKAIJSetAIJ(*kaij,A);CHKERRQ(ierr);
14400567c835SRichard Tran Mills   ierr = MatKAIJSetS(*kaij,p,q,S);CHKERRQ(ierr);
14410567c835SRichard Tran Mills   ierr = MatKAIJSetT(*kaij,p,q,T);CHKERRQ(ierr);
14422ae760e3SRichard Tran Mills   ierr = MatSetUp(*kaij);CHKERRQ(ierr);
14430567c835SRichard Tran Mills   PetscFunctionReturn(0);
14440567c835SRichard Tran Mills }
144549bd79ccSDebojyoti Ghosh 
14460567c835SRichard Tran Mills /*MC
14475881e567SRichard Tran Mills   MATKAIJ - MATKAIJ = "kaij" - A matrix type to be used to evaluate matrices of form
14485881e567SRichard Tran Mills     [I \otimes S + A \otimes T],
14490567c835SRichard Tran Mills   where
14505881e567SRichard Tran Mills     S is a dense (p \times q) matrix,
14515881e567SRichard Tran Mills     T is a dense (p \times q) matrix,
14525881e567SRichard Tran Mills     A is an AIJ  (n \times n) matrix,
14535881e567SRichard Tran Mills     and I is the identity matrix.
14545881e567SRichard Tran Mills   The resulting matrix is (np \times nq).
14550567c835SRichard Tran Mills 
1456d3b90ce1SRichard Tran Mills   S and T are always stored independently on all processes as PetscScalar arrays in column-major format.
14570567c835SRichard Tran Mills 
14585881e567SRichard Tran Mills   Notes:
14595881e567SRichard Tran Mills   A linear system with multiple right-hand sides, AX = B, can be expressed in the KAIJ-friendly form of (A \otimes I) x = b,
14605881e567SRichard Tran Mills   where x and b are column vectors containing the row-major representations of X and B.
14615881e567SRichard Tran Mills 
14620567c835SRichard Tran Mills   Level: advanced
14630567c835SRichard Tran Mills 
14640567c835SRichard Tran Mills .seealso: MatKAIJSetAIJ(), MatKAIJSetS(), MatKAIJSetT(), MatKAIJGetAIJ(), MatKAIJGetS(), MatKAIJGetT(), MatCreateKAIJ()
14650567c835SRichard Tran Mills M*/
14660567c835SRichard Tran Mills 
14670567c835SRichard Tran Mills PETSC_EXTERN PetscErrorCode MatCreate_KAIJ(Mat A)
14680567c835SRichard Tran Mills {
14690567c835SRichard Tran Mills   PetscErrorCode ierr;
14700567c835SRichard Tran Mills   Mat_MPIKAIJ    *b;
14710567c835SRichard Tran Mills   PetscMPIInt    size;
14720567c835SRichard Tran Mills 
14730567c835SRichard Tran Mills   PetscFunctionBegin;
14740567c835SRichard Tran Mills   ierr     = PetscNewLog(A,&b);CHKERRQ(ierr);
14750567c835SRichard Tran Mills   A->data  = (void*)b;
14760567c835SRichard Tran Mills 
14770567c835SRichard Tran Mills   ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
14780567c835SRichard Tran Mills 
1479f4259b30SLisandro Dalcin   b->w    = NULL;
148055b25c41SPierre Jolivet   ierr    = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr);
14810567c835SRichard Tran Mills   if (size == 1) {
14820567c835SRichard Tran Mills     ierr = PetscObjectChangeTypeName((PetscObject)A,MATSEQKAIJ);CHKERRQ(ierr);
14830567c835SRichard Tran Mills     A->ops->destroy             = MatDestroy_SeqKAIJ;
1484bb6fb833SRichard Tran Mills     A->ops->mult                = MatMult_SeqKAIJ;
1485bb6fb833SRichard Tran Mills     A->ops->multadd             = MatMultAdd_SeqKAIJ;
1486bb6fb833SRichard Tran Mills     A->ops->invertblockdiagonal = MatInvertBlockDiagonal_SeqKAIJ;
14870567c835SRichard Tran Mills     A->ops->getrow              = MatGetRow_SeqKAIJ;
14880567c835SRichard Tran Mills     A->ops->restorerow          = MatRestoreRow_SeqKAIJ;
14890567c835SRichard Tran Mills     A->ops->sor                 = MatSOR_SeqKAIJ;
1490*fac53fa1SPierre Jolivet     ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqkaij_seqaij_C",MatConvert_KAIJ_AIJ);CHKERRQ(ierr);
14910567c835SRichard Tran Mills   } else {
14920567c835SRichard Tran Mills     ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIKAIJ);CHKERRQ(ierr);
14930567c835SRichard Tran Mills     A->ops->destroy             = MatDestroy_MPIKAIJ;
1494bb6fb833SRichard Tran Mills     A->ops->mult                = MatMult_MPIKAIJ;
1495bb6fb833SRichard Tran Mills     A->ops->multadd             = MatMultAdd_MPIKAIJ;
1496bb6fb833SRichard Tran Mills     A->ops->invertblockdiagonal = MatInvertBlockDiagonal_MPIKAIJ;
14970567c835SRichard Tran Mills     A->ops->getrow              = MatGetRow_MPIKAIJ;
14980567c835SRichard Tran Mills     A->ops->restorerow          = MatRestoreRow_MPIKAIJ;
14990567c835SRichard Tran Mills     ierr = PetscObjectComposeFunction((PetscObject)A,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIKAIJ);CHKERRQ(ierr);
1500*fac53fa1SPierre Jolivet     ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_mpikaij_mpiaij_C",MatConvert_KAIJ_AIJ);CHKERRQ(ierr);
15010567c835SRichard Tran Mills   }
150206d61a37SPierre Jolivet   A->ops->setup           = MatSetUp_KAIJ;
150306d61a37SPierre Jolivet   A->ops->view            = MatView_KAIJ;
15040567c835SRichard Tran Mills   A->ops->createsubmatrix = MatCreateSubMatrix_KAIJ;
150549bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
150649bd79ccSDebojyoti Ghosh }
1507