xref: /petsc/src/mat/impls/kaij/kaij.c (revision 66f58c76d198ec62676c74ae71f5633216e58e96)
149bd79ccSDebojyoti Ghosh 
249bd79ccSDebojyoti Ghosh /*
349bd79ccSDebojyoti Ghosh   Defines the basic matrix operations for the KAIJ  matrix storage format.
449bd79ccSDebojyoti Ghosh   This format is used to evaluate matrices of the form:
549bd79ccSDebojyoti Ghosh 
649bd79ccSDebojyoti Ghosh     [I \otimes S + A \otimes T]
749bd79ccSDebojyoti Ghosh 
849bd79ccSDebojyoti Ghosh   where
949bd79ccSDebojyoti Ghosh     S is a dense (p \times q) matrix
1049bd79ccSDebojyoti Ghosh     T is a dense (p \times q) matrix
1149bd79ccSDebojyoti Ghosh     A is an AIJ  (n \times n) matrix
1249bd79ccSDebojyoti Ghosh     I is the identity matrix
1349bd79ccSDebojyoti Ghosh 
1449bd79ccSDebojyoti Ghosh   The resulting matrix is (np \times nq)
1549bd79ccSDebojyoti Ghosh 
1649bd79ccSDebojyoti Ghosh   We provide:
1749bd79ccSDebojyoti Ghosh      MatMult()
1849bd79ccSDebojyoti Ghosh      MatMultAdd()
1949bd79ccSDebojyoti Ghosh      MatInvertBlockDiagonal()
2049bd79ccSDebojyoti Ghosh   and
2149bd79ccSDebojyoti Ghosh      MatCreateKAIJ(Mat,PetscInt,PetscInt,const PetscScalar[],const PetscScalar[],Mat*)
2249bd79ccSDebojyoti Ghosh 
2349bd79ccSDebojyoti Ghosh   This single directory handles both the sequential and parallel codes
2449bd79ccSDebojyoti Ghosh */
2549bd79ccSDebojyoti Ghosh 
2649bd79ccSDebojyoti Ghosh #include <../src/mat/impls/kaij/kaij.h> /*I "petscmat.h" I*/
2749bd79ccSDebojyoti Ghosh #include <../src/mat/utils/freespace.h>
2849bd79ccSDebojyoti Ghosh #include <petsc/private/vecimpl.h>
2949bd79ccSDebojyoti Ghosh 
3049bd79ccSDebojyoti Ghosh /*@C
3149bd79ccSDebojyoti Ghosh    MatKAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the KAIJ matrix
3249bd79ccSDebojyoti Ghosh 
3349bd79ccSDebojyoti Ghosh    Not Collective, but if the KAIJ matrix is parallel, the AIJ matrix is also parallel
3449bd79ccSDebojyoti Ghosh 
3549bd79ccSDebojyoti Ghosh    Input Parameter:
3649bd79ccSDebojyoti Ghosh .  A - the KAIJ matrix
3749bd79ccSDebojyoti Ghosh 
3849bd79ccSDebojyoti Ghosh    Output Parameter:
3949bd79ccSDebojyoti Ghosh .  B - the AIJ matrix
4049bd79ccSDebojyoti Ghosh 
4149bd79ccSDebojyoti Ghosh    Level: advanced
4249bd79ccSDebojyoti Ghosh 
4349bd79ccSDebojyoti Ghosh    Notes: The reference count on the AIJ matrix is not increased so you should not destroy it.
4449bd79ccSDebojyoti Ghosh 
4549bd79ccSDebojyoti Ghosh .seealso: MatCreateKAIJ()
4649bd79ccSDebojyoti Ghosh @*/
4749bd79ccSDebojyoti Ghosh PetscErrorCode  MatKAIJGetAIJ(Mat A,Mat *B)
4849bd79ccSDebojyoti Ghosh {
4949bd79ccSDebojyoti Ghosh   PetscErrorCode ierr;
5049bd79ccSDebojyoti Ghosh   PetscBool      ismpikaij,isseqkaij;
5149bd79ccSDebojyoti Ghosh 
5249bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
5349bd79ccSDebojyoti Ghosh   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIKAIJ,&ismpikaij);CHKERRQ(ierr);
5449bd79ccSDebojyoti Ghosh   ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQKAIJ,&isseqkaij);CHKERRQ(ierr);
5549bd79ccSDebojyoti Ghosh   if (ismpikaij) {
5649bd79ccSDebojyoti Ghosh     Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data;
5749bd79ccSDebojyoti Ghosh 
5849bd79ccSDebojyoti Ghosh     *B = b->A;
5949bd79ccSDebojyoti Ghosh   } else if (isseqkaij) {
6049bd79ccSDebojyoti Ghosh     Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
6149bd79ccSDebojyoti Ghosh 
6249bd79ccSDebojyoti Ghosh     *B = b->AIJ;
63b04351cbSRichard Tran Mills   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix passed in is not of type KAIJ");
6449bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
6549bd79ccSDebojyoti Ghosh }
6649bd79ccSDebojyoti Ghosh 
6749bd79ccSDebojyoti Ghosh /*@C
6849bd79ccSDebojyoti Ghosh    MatKAIJGetS - Get the S matrix describing the shift action of the KAIJ matrix
6949bd79ccSDebojyoti Ghosh 
700567c835SRichard Tran Mills    Not Collective; the entire S is stored and returned independently on all processes.
7149bd79ccSDebojyoti Ghosh 
7249bd79ccSDebojyoti Ghosh    Input Parameter:
7349bd79ccSDebojyoti Ghosh .  A - the KAIJ matrix
7449bd79ccSDebojyoti Ghosh 
75a5b5c723SRichard Tran Mills    Output Parameters:
76a5b5c723SRichard Tran Mills +  m - the number of rows in S
77a5b5c723SRichard Tran Mills .  n - the number of columns in S
78a5b5c723SRichard Tran Mills -  S - the S matrix, in form of a scalar array in column-major format
7949bd79ccSDebojyoti Ghosh 
80a5b5c723SRichard Tran Mills    Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)
8131a97b9aSRichard Tran Mills 
8249bd79ccSDebojyoti Ghosh    Level: advanced
8349bd79ccSDebojyoti Ghosh 
8431a97b9aSRichard Tran Mills .seealso: MatCreateKAIJ(), MatGetBlockSizes()
8549bd79ccSDebojyoti Ghosh @*/
86a5b5c723SRichard Tran Mills PetscErrorCode MatKAIJGetS(Mat A,PetscInt *m,PetscInt *n,PetscScalar **S)
8749bd79ccSDebojyoti Ghosh {
8849bd79ccSDebojyoti Ghosh   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
8949bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
90a5b5c723SRichard Tran Mills   if (m) *m = b->p;
91a5b5c723SRichard Tran Mills   if (n) *n = b->q;
92a5b5c723SRichard Tran Mills   if (S) *S = b->S;
93a5b5c723SRichard Tran Mills   PetscFunctionReturn(0);
94a5b5c723SRichard Tran Mills }
95a5b5c723SRichard Tran Mills 
96a5b5c723SRichard Tran Mills /*@C
97a5b5c723SRichard Tran Mills    MatKAIJGetSRead - Get a read-only pointer to the S matrix describing the shift action of the KAIJ matrix
98a5b5c723SRichard Tran Mills 
99a5b5c723SRichard Tran Mills    Not Collective; the entire S is stored and returned independently on all processes.
100a5b5c723SRichard Tran Mills 
101a5b5c723SRichard Tran Mills    Input Parameter:
102a5b5c723SRichard Tran Mills .  A - the KAIJ matrix
103a5b5c723SRichard Tran Mills 
104a5b5c723SRichard Tran Mills    Output Parameters:
105a5b5c723SRichard Tran Mills +  m - the number of rows in S
106a5b5c723SRichard Tran Mills .  n - the number of columns in S
107a5b5c723SRichard Tran Mills -  S - the S matrix, in form of a scalar array in column-major format
108a5b5c723SRichard Tran Mills 
109a5b5c723SRichard Tran Mills    Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)
110a5b5c723SRichard Tran Mills 
111a5b5c723SRichard Tran Mills    Level: advanced
112a5b5c723SRichard Tran Mills 
113a5b5c723SRichard Tran Mills .seealso: MatCreateKAIJ(), MatGetBlockSizes()
114a5b5c723SRichard Tran Mills @*/
115a5b5c723SRichard Tran Mills PetscErrorCode MatKAIJGetSRead(Mat A,PetscInt *m,PetscInt *n,const PetscScalar **S)
116a5b5c723SRichard Tran Mills {
117a5b5c723SRichard Tran Mills   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
118a5b5c723SRichard Tran Mills   PetscFunctionBegin;
119a5b5c723SRichard Tran Mills   if (m) *m = b->p;
120a5b5c723SRichard Tran Mills   if (n) *n = b->q;
121a5b5c723SRichard Tran Mills   if (S) *S = b->S;
122a5b5c723SRichard Tran Mills   PetscFunctionReturn(0);
123a5b5c723SRichard Tran Mills }
124a5b5c723SRichard Tran Mills 
125a5b5c723SRichard Tran Mills /*@C
126a5b5c723SRichard Tran Mills   MatKAIJRestoreS - Restore array obtained with MatKAIJGetS()
127a5b5c723SRichard Tran Mills 
128a5b5c723SRichard Tran Mills   Not collective
129a5b5c723SRichard Tran Mills 
130a5b5c723SRichard Tran Mills   Input Parameter:
131a5b5c723SRichard Tran Mills . A - the KAIJ matrix
132a5b5c723SRichard Tran Mills 
133a5b5c723SRichard Tran Mills   Output Parameter:
134a5b5c723SRichard Tran Mills . S - location of pointer to array obtained with MatKAIJGetS()
135a5b5c723SRichard Tran Mills 
136a5b5c723SRichard Tran Mills   Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
137a5b5c723SRichard Tran Mills   If NULL is passed, it will not attempt to zero the array pointer.
138a5b5c723SRichard Tran Mills 
139a5b5c723SRichard Tran Mills   Level: advanced
140a5b5c723SRichard Tran Mills .seealso: MatKAIJGetS(), MatKAIJGetSRead(), MatKAIJRestoreSRead()
141a5b5c723SRichard Tran Mills @*/
142a5b5c723SRichard Tran Mills PetscErrorCode MatKAIJRestoreS(Mat A,PetscScalar **S)
143a5b5c723SRichard Tran Mills {
144*66f58c76SRichard Tran Mills   PetscErrorCode ierr;
145*66f58c76SRichard Tran Mills 
146a5b5c723SRichard Tran Mills   PetscFunctionBegin;
147a5b5c723SRichard Tran Mills   if (S) *S = NULL;
148*66f58c76SRichard Tran Mills   ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr);
149a5b5c723SRichard Tran Mills   PetscFunctionReturn(0);
150a5b5c723SRichard Tran Mills }
151a5b5c723SRichard Tran Mills 
152a5b5c723SRichard Tran Mills /*@C
153a5b5c723SRichard Tran Mills   MatKAIJRestoreSRead - Restore array obtained with MatKAIJGetSRead()
154a5b5c723SRichard Tran Mills 
155a5b5c723SRichard Tran Mills   Not collective
156a5b5c723SRichard Tran Mills 
157a5b5c723SRichard Tran Mills   Input Parameter:
158a5b5c723SRichard Tran Mills . A - the KAIJ matrix
159a5b5c723SRichard Tran Mills 
160a5b5c723SRichard Tran Mills   Output Parameter:
161a5b5c723SRichard Tran Mills . S - location of pointer to array obtained with MatKAIJGetS()
162a5b5c723SRichard Tran Mills 
163a5b5c723SRichard Tran Mills   Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
164a5b5c723SRichard Tran Mills   If NULL is passed, it will not attempt to zero the array pointer.
165a5b5c723SRichard Tran Mills 
166a5b5c723SRichard Tran Mills   Level: advanced
167a5b5c723SRichard Tran Mills .seealso: MatKAIJGetS(), MatKAIJGetSRead(), MatKAIJRestoreSRead()
168a5b5c723SRichard Tran Mills @*/
169a5b5c723SRichard Tran Mills PetscErrorCode MatKAIJRestoreSRead(Mat A,const PetscScalar **S)
170a5b5c723SRichard Tran Mills {
171a5b5c723SRichard Tran Mills   PetscFunctionBegin;
172a5b5c723SRichard Tran Mills   if (S) *S = NULL;
17349bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
17449bd79ccSDebojyoti Ghosh }
17549bd79ccSDebojyoti Ghosh 
17649bd79ccSDebojyoti Ghosh /*@C
17731a97b9aSRichard Tran Mills    MatKAIJGetT - Get the transformation matrix T associated with the KAIJ matrix
17849bd79ccSDebojyoti Ghosh 
1790567c835SRichard Tran Mills    Not Collective; the entire T is stored and returned independently on all processes
18049bd79ccSDebojyoti Ghosh 
18149bd79ccSDebojyoti Ghosh    Input Parameter:
18249bd79ccSDebojyoti Ghosh .  A - the KAIJ matrix
18349bd79ccSDebojyoti Ghosh 
18449bd79ccSDebojyoti Ghosh    Output Parameter:
185a5b5c723SRichard Tran Mills +  m - the number of rows in T
186a5b5c723SRichard Tran Mills .  n - the number of columns in T
187a5b5c723SRichard Tran Mills -  T - the T matrix, in form of a scalar array in column-major format
18849bd79ccSDebojyoti Ghosh 
189a5b5c723SRichard Tran Mills    Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)
19031a97b9aSRichard Tran Mills 
19149bd79ccSDebojyoti Ghosh    Level: advanced
19249bd79ccSDebojyoti Ghosh 
19331a97b9aSRichard Tran Mills .seealso: MatCreateKAIJ(), MatGetBlockSizes()
19449bd79ccSDebojyoti Ghosh @*/
195a5b5c723SRichard Tran Mills PetscErrorCode MatKAIJGetT(Mat A,PetscInt *m,PetscInt *n,PetscScalar **T)
19649bd79ccSDebojyoti Ghosh {
19749bd79ccSDebojyoti Ghosh   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
19849bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
199a5b5c723SRichard Tran Mills   if (m) *m = b->p;
200a5b5c723SRichard Tran Mills   if (n) *n = b->q;
201a5b5c723SRichard Tran Mills   if (T) *T = b->T;
202a5b5c723SRichard Tran Mills   PetscFunctionReturn(0);
203a5b5c723SRichard Tran Mills }
204a5b5c723SRichard Tran Mills 
205a5b5c723SRichard Tran Mills /*@C
206a5b5c723SRichard Tran Mills    MatKAIJGetTRead - Get a read-only pointer to the transformation matrix T associated with the KAIJ matrix
207a5b5c723SRichard Tran Mills 
208a5b5c723SRichard Tran Mills    Not Collective; the entire T is stored and returned independently on all processes
209a5b5c723SRichard Tran Mills 
210a5b5c723SRichard Tran Mills    Input Parameter:
211a5b5c723SRichard Tran Mills .  A - the KAIJ matrix
212a5b5c723SRichard Tran Mills 
213a5b5c723SRichard Tran Mills    Output Parameter:
214a5b5c723SRichard Tran Mills +  m - the number of rows in T
215a5b5c723SRichard Tran Mills .  n - the number of columns in T
216a5b5c723SRichard Tran Mills -  T - the T matrix, in form of a scalar array in column-major format
217a5b5c723SRichard Tran Mills 
218a5b5c723SRichard Tran Mills    Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)
219a5b5c723SRichard Tran Mills 
220a5b5c723SRichard Tran Mills    Level: advanced
221a5b5c723SRichard Tran Mills 
222a5b5c723SRichard Tran Mills .seealso: MatCreateKAIJ(), MatGetBlockSizes()
223a5b5c723SRichard Tran Mills @*/
224a5b5c723SRichard Tran Mills PetscErrorCode MatKAIJGetTRead(Mat A,PetscInt *m,PetscInt *n,const PetscScalar **T)
225a5b5c723SRichard Tran Mills {
226a5b5c723SRichard Tran Mills   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
227a5b5c723SRichard Tran Mills   PetscFunctionBegin;
228a5b5c723SRichard Tran Mills   if (m) *m = b->p;
229a5b5c723SRichard Tran Mills   if (n) *n = b->q;
230a5b5c723SRichard Tran Mills   if (T) *T = b->T;
231a5b5c723SRichard Tran Mills   PetscFunctionReturn(0);
232a5b5c723SRichard Tran Mills }
233a5b5c723SRichard Tran Mills 
234a5b5c723SRichard Tran Mills /*@C
235a5b5c723SRichard Tran Mills   MatKAIJRestoreT - Restore array obtained with MatKAIJGetT()
236a5b5c723SRichard Tran Mills 
237a5b5c723SRichard Tran Mills   Not collective
238a5b5c723SRichard Tran Mills 
239a5b5c723SRichard Tran Mills   Input Parameter:
240a5b5c723SRichard Tran Mills . A - the KAIJ matrix
241a5b5c723SRichard Tran Mills 
242a5b5c723SRichard Tran Mills   Output Parameter:
243a5b5c723SRichard Tran Mills . T - location of pointer to array obtained with MatKAIJGetS()
244a5b5c723SRichard Tran Mills 
245a5b5c723SRichard Tran Mills   Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
246a5b5c723SRichard Tran Mills   If NULL is passed, it will not attempt to zero the array pointer.
247a5b5c723SRichard Tran Mills 
248a5b5c723SRichard Tran Mills   Level: advanced
249a5b5c723SRichard Tran Mills .seealso: MatKAIJGetT(), MatKAIJGetTRead(), MatKAIJRestoreTRead()
250a5b5c723SRichard Tran Mills @*/
251a5b5c723SRichard Tran Mills PetscErrorCode MatKAIJRestoreT(Mat A,PetscScalar **T)
252a5b5c723SRichard Tran Mills {
253*66f58c76SRichard Tran Mills   PetscErrorCode ierr;
254*66f58c76SRichard Tran Mills 
255a5b5c723SRichard Tran Mills   PetscFunctionBegin;
256a5b5c723SRichard Tran Mills   if (T) *T = NULL;
257*66f58c76SRichard Tran Mills   ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr);
258a5b5c723SRichard Tran Mills   PetscFunctionReturn(0);
259a5b5c723SRichard Tran Mills }
260a5b5c723SRichard Tran Mills 
261a5b5c723SRichard Tran Mills /*@C
262a5b5c723SRichard Tran Mills   MatKAIJRestoreTRead - Restore array obtained with MatKAIJGetTRead()
263a5b5c723SRichard Tran Mills 
264a5b5c723SRichard Tran Mills   Not collective
265a5b5c723SRichard Tran Mills 
266a5b5c723SRichard Tran Mills   Input Parameter:
267a5b5c723SRichard Tran Mills . A - the KAIJ matrix
268a5b5c723SRichard Tran Mills 
269a5b5c723SRichard Tran Mills   Output Parameter:
270a5b5c723SRichard Tran Mills . T - location of pointer to array obtained with MatKAIJGetS()
271a5b5c723SRichard Tran Mills 
272a5b5c723SRichard Tran Mills   Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
273a5b5c723SRichard Tran Mills   If NULL is passed, it will not attempt to zero the array pointer.
274a5b5c723SRichard Tran Mills 
275a5b5c723SRichard Tran Mills   Level: advanced
276a5b5c723SRichard Tran Mills .seealso: MatKAIJGetT(), MatKAIJGetTRead(), MatKAIJRestoreTRead()
277a5b5c723SRichard Tran Mills @*/
278a5b5c723SRichard Tran Mills PetscErrorCode MatKAIJRestoreTRead(Mat A,const PetscScalar **T)
279a5b5c723SRichard Tran Mills {
280a5b5c723SRichard Tran Mills   PetscFunctionBegin;
281a5b5c723SRichard Tran Mills   if (T) *T = NULL;
28249bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
28349bd79ccSDebojyoti Ghosh }
28449bd79ccSDebojyoti Ghosh 
2850567c835SRichard Tran Mills /*@
2860567c835SRichard Tran Mills    MatKAIJSetAIJ - Set the AIJ matrix describing the blockwise action of the KAIJ matrix
2870567c835SRichard Tran Mills 
2880567c835SRichard Tran Mills    Logically Collective; if the AIJ matrix is parallel, the KAIJ matrix is also parallel
2890567c835SRichard Tran Mills 
2900567c835SRichard Tran Mills    Input Parameters:
2910567c835SRichard Tran Mills +  A - the KAIJ matrix
2920567c835SRichard Tran Mills -  B - the AIJ matrix
2930567c835SRichard Tran Mills 
29415b9d025SRichard Tran Mills    Notes:
29515b9d025SRichard Tran Mills    This function increases the reference count on the AIJ matrix, so the user is free to destroy the matrix if it is not needed.
29615b9d025SRichard Tran Mills    Changes to the entries of the AIJ matrix will immediately affect the KAIJ matrix.
29715b9d025SRichard Tran Mills 
2980567c835SRichard Tran Mills    Level: advanced
2990567c835SRichard Tran Mills 
3000567c835SRichard Tran Mills .seealso: MatKAIJGetAIJ(), MatKAIJSetS(), MatKAIJSetT()
3010567c835SRichard Tran Mills @*/
3020567c835SRichard Tran Mills PetscErrorCode MatKAIJSetAIJ(Mat A,Mat B)
3030567c835SRichard Tran Mills {
3040567c835SRichard Tran Mills   PetscErrorCode ierr;
3050567c835SRichard Tran Mills   PetscMPIInt    size;
3060567c835SRichard Tran Mills 
3070567c835SRichard Tran Mills   PetscFunctionBegin;
3080567c835SRichard Tran Mills   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
3090567c835SRichard Tran Mills   if (size == 1) {
3100567c835SRichard Tran Mills     Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data;
3110567c835SRichard Tran Mills     a->AIJ = B;
3120567c835SRichard Tran Mills   } else {
3130567c835SRichard Tran Mills     Mat_MPIKAIJ *a = (Mat_MPIKAIJ*)A->data;
3140567c835SRichard Tran Mills     a->A = B;
3150567c835SRichard Tran Mills   }
31615b9d025SRichard Tran Mills   ierr = PetscObjectReference((PetscObject)B);CHKERRQ(ierr);
3170567c835SRichard Tran Mills   PetscFunctionReturn(0);
3180567c835SRichard Tran Mills }
3190567c835SRichard Tran Mills 
3200567c835SRichard Tran Mills /*@C
3210567c835SRichard Tran Mills    MatKAIJSetS - Set the S matrix describing the shift action of the KAIJ matrix
3220567c835SRichard Tran Mills 
3230567c835SRichard Tran Mills    Logically Collective; the entire S is stored independently on all processes.
3240567c835SRichard Tran Mills 
3250567c835SRichard Tran Mills    Input Parameters:
3260567c835SRichard Tran Mills +  A - the KAIJ matrix
3270567c835SRichard Tran Mills .  p - the number of rows in S
3280567c835SRichard Tran Mills .  q - the number of columns in S
3290567c835SRichard Tran Mills -  S - the S matrix, in form of a scalar array in column-major format
3300567c835SRichard Tran Mills 
3310567c835SRichard Tran Mills    Notes: The dimensions p and q must match those of the transformation matrix T associated with the KAIJ matrix.
33288f48298SRichard Tran Mills    The S matrix is copied, so the user can destroy this array.
3330567c835SRichard Tran Mills 
3340567c835SRichard Tran Mills    Level: Advanced
3350567c835SRichard Tran Mills 
3360567c835SRichard Tran Mills .seealso: MatKAIJGetS(), MatKAIJSetT(), MatKAIJSetAIJ()
3370567c835SRichard Tran Mills @*/
3380567c835SRichard Tran Mills PetscErrorCode MatKAIJSetS(Mat A,PetscInt p,PetscInt q,const PetscScalar S[])
3390567c835SRichard Tran Mills {
3400567c835SRichard Tran Mills   PetscErrorCode ierr;
3410567c835SRichard Tran Mills   Mat_SeqKAIJ    *a = (Mat_SeqKAIJ*)A->data;
3420567c835SRichard Tran Mills 
3430567c835SRichard Tran Mills   PetscFunctionBegin;
3440567c835SRichard Tran Mills   ierr = PetscFree(a->S);CHKERRQ(ierr);
3450567c835SRichard Tran Mills   if (S) {
346a84f8069SRichard Tran Mills     ierr = PetscMalloc1(p*q*sizeof(PetscScalar),&a->S);CHKERRQ(ierr);
3470567c835SRichard Tran Mills     ierr = PetscMemcpy(a->S,S,p*q*sizeof(PetscScalar));CHKERRQ(ierr);
3480567c835SRichard Tran Mills   } else  a->S = NULL;
3490567c835SRichard Tran Mills 
3500567c835SRichard Tran Mills   a->p = p;
3510567c835SRichard Tran Mills   a->q = q;
3520567c835SRichard Tran Mills   PetscFunctionReturn(0);
3530567c835SRichard Tran Mills }
3540567c835SRichard Tran Mills 
3550567c835SRichard Tran Mills /*@C
3560567c835SRichard Tran Mills    MatKAIJSetT - Set the transformation matrix T associated with the KAIJ matrix
3570567c835SRichard Tran Mills 
3580567c835SRichard Tran Mills    Logically Collective; the entire T is stored independently on all processes.
3590567c835SRichard Tran Mills 
3600567c835SRichard Tran Mills    Input Parameters:
3610567c835SRichard Tran Mills +  A - the KAIJ matrix
3620567c835SRichard Tran Mills .  p - the number of rows in S
3630567c835SRichard Tran Mills .  q - the number of columns in S
3640567c835SRichard Tran Mills -  T - the T matrix, in form of a scalar array in column-major format
3650567c835SRichard Tran Mills 
3660567c835SRichard Tran Mills    Notes: The dimensions p and q must match those of the shift matrix S associated with the KAIJ matrix.
36788f48298SRichard Tran Mills    The T matrix is copied, so the user can destroy this array.
3680567c835SRichard Tran Mills 
3690567c835SRichard Tran Mills    Level: Advanced
3700567c835SRichard Tran Mills 
3710567c835SRichard Tran Mills .seealso: MatKAIJGetT(), MatKAIJSetS(), MatKAIJSetAIJ()
3720567c835SRichard Tran Mills @*/
3730567c835SRichard Tran Mills PetscErrorCode MatKAIJSetT(Mat A,PetscInt p,PetscInt q,const PetscScalar T[])
3740567c835SRichard Tran Mills {
3750567c835SRichard Tran Mills   PetscErrorCode ierr;
3760567c835SRichard Tran Mills   PetscInt       i,j;
3770567c835SRichard Tran Mills   Mat_SeqKAIJ    *a = (Mat_SeqKAIJ*)A->data;
3780567c835SRichard Tran Mills   PetscBool      isTI = PETSC_FALSE;
3790567c835SRichard Tran Mills 
3800567c835SRichard Tran Mills   PetscFunctionBegin;
3810567c835SRichard Tran Mills   /* check if T is an identity matrix */
3820567c835SRichard Tran Mills   if (T && (p == q)) {
3830567c835SRichard Tran Mills     isTI = PETSC_TRUE;
3840567c835SRichard Tran Mills     for (i=0; i<p; i++) {
3850567c835SRichard Tran Mills       for (j=0; j<q; j++) {
3860567c835SRichard Tran Mills         if (i == j) {
3870567c835SRichard Tran Mills           /* diagonal term must be 1 */
3880567c835SRichard Tran Mills           if (T[i+j*p] != 1.0) isTI = PETSC_FALSE;
3890567c835SRichard Tran Mills         } else {
3900567c835SRichard Tran Mills           /* off-diagonal term must be 0 */
3910567c835SRichard Tran Mills           if (T[i+j*p] != 0.0) isTI = PETSC_FALSE;
3920567c835SRichard Tran Mills         }
3930567c835SRichard Tran Mills       }
3940567c835SRichard Tran Mills     }
3950567c835SRichard Tran Mills   }
3960567c835SRichard Tran Mills   a->isTI = isTI;
3970567c835SRichard Tran Mills 
3980567c835SRichard Tran Mills   ierr = PetscFree(a->T);CHKERRQ(ierr);
3990567c835SRichard Tran Mills   if (T && (!isTI)) {
400a84f8069SRichard Tran Mills     ierr = PetscMalloc1(p*q*sizeof(PetscScalar),&a->T);CHKERRQ(ierr);
4010567c835SRichard Tran Mills     ierr = PetscMemcpy(a->T,T,p*q*sizeof(PetscScalar));CHKERRQ(ierr);
40250d19d74SRichard Tran Mills   } else a->T = NULL;
4030567c835SRichard Tran Mills 
4040567c835SRichard Tran Mills   a->p = p;
4050567c835SRichard Tran Mills   a->q = q;
4060567c835SRichard Tran Mills   PetscFunctionReturn(0);
4070567c835SRichard Tran Mills }
4080567c835SRichard Tran Mills 
40949bd79ccSDebojyoti Ghosh PetscErrorCode MatDestroy_SeqKAIJ(Mat A)
41049bd79ccSDebojyoti Ghosh {
41149bd79ccSDebojyoti Ghosh   PetscErrorCode ierr;
41249bd79ccSDebojyoti Ghosh   Mat_SeqKAIJ    *b = (Mat_SeqKAIJ*)A->data;
41349bd79ccSDebojyoti Ghosh 
41449bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
41549bd79ccSDebojyoti Ghosh   ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr);
416a84f8069SRichard Tran Mills   ierr = PetscFree(b->S);CHKERRQ(ierr);
417a84f8069SRichard Tran Mills   ierr = PetscFree(b->T);CHKERRQ(ierr);
418a84f8069SRichard Tran Mills   ierr = PetscFree(b->ibdiag);CHKERRQ(ierr);
41949bd79ccSDebojyoti Ghosh   ierr = PetscFree5(b->sor.w,b->sor.y,b->sor.work,b->sor.t,b->sor.arr);CHKERRQ(ierr);
42049bd79ccSDebojyoti Ghosh   ierr = PetscFree(A->data);CHKERRQ(ierr);
42149bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
42249bd79ccSDebojyoti Ghosh }
42349bd79ccSDebojyoti Ghosh 
42449bd79ccSDebojyoti Ghosh PetscErrorCode MatSetUp_KAIJ(Mat A)
42549bd79ccSDebojyoti Ghosh {
4260567c835SRichard Tran Mills   PetscErrorCode ierr;
4270567c835SRichard Tran Mills   PetscInt       n;
4280567c835SRichard Tran Mills   PetscMPIInt    size;
4290567c835SRichard Tran Mills   Mat_SeqKAIJ    *seqkaij = (Mat_SeqKAIJ*)A->data;
4300567c835SRichard Tran Mills 
43149bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
4320567c835SRichard Tran Mills   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
4330567c835SRichard Tran Mills   if (size == 1) {
4340567c835SRichard Tran Mills     ierr = MatSetSizes(A,seqkaij->p*seqkaij->AIJ->rmap->n,seqkaij->q*seqkaij->AIJ->cmap->n,seqkaij->p*seqkaij->AIJ->rmap->N,seqkaij->q*seqkaij->AIJ->cmap->N);CHKERRQ(ierr);
4350567c835SRichard Tran Mills     ierr = PetscLayoutSetBlockSize(A->rmap,seqkaij->p);CHKERRQ(ierr);
4360567c835SRichard Tran Mills     ierr = PetscLayoutSetBlockSize(A->cmap,seqkaij->q);CHKERRQ(ierr);
4370567c835SRichard Tran Mills     ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
4380567c835SRichard Tran Mills     ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
4390567c835SRichard Tran Mills   } else {
4400567c835SRichard Tran Mills     Mat_MPIKAIJ *a;
4410567c835SRichard Tran Mills     Mat_MPIAIJ  *mpiaij;
4420567c835SRichard Tran Mills     IS          from,to;
4430567c835SRichard Tran Mills     Vec         gvec;
4440567c835SRichard Tran Mills     PetscScalar *T;
4450567c835SRichard Tran Mills     PetscInt    i,j;
4460567c835SRichard Tran Mills 
4470567c835SRichard Tran Mills     a = (Mat_MPIKAIJ*)A->data;
448d3f912faSRichard Tran Mills     mpiaij = (Mat_MPIAIJ*)a->A->data;
4490567c835SRichard Tran Mills     ierr = MatSetSizes(A,a->p*a->A->rmap->n,a->q*a->A->cmap->n,a->p*a->A->rmap->N,a->q*a->A->cmap->N);CHKERRQ(ierr);
4500567c835SRichard Tran Mills     ierr = PetscLayoutSetBlockSize(A->rmap,seqkaij->p);CHKERRQ(ierr);
4510567c835SRichard Tran Mills     ierr = PetscLayoutSetBlockSize(A->cmap,seqkaij->q);CHKERRQ(ierr);
4520567c835SRichard Tran Mills     ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
4530567c835SRichard Tran Mills     ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
4540567c835SRichard Tran Mills 
4550567c835SRichard Tran Mills     if (a->isTI) {
4560567c835SRichard Tran Mills       /* If the transformation matrix associated with the parallel matrix A is the identity matrix, then a->T will be NULL.
4570567c835SRichard Tran Mills        * In this case, if we pass a->T directly to the MatCreateKAIJ() calls to create the sequential submatrices, the routine will
4580567c835SRichard Tran Mills        * not be able to tell that transformation matrix should be set to the identity; thus we create a temporary identity matrix
4590567c835SRichard Tran Mills        * to pass in. */
460a84f8069SRichard Tran Mills       ierr = PetscMalloc1(a->p*a->q*sizeof(PetscScalar),&T);CHKERRQ(ierr);
4610567c835SRichard Tran Mills       for (i=0; i<a->p; i++) {
4620567c835SRichard Tran Mills         for (j=0; j<a->q; j++) {
4630567c835SRichard Tran Mills           if (i==j) T[i+j*a->p] = 1.0;
4640567c835SRichard Tran Mills           else      T[i+j*a->p] = 0.0;
4650567c835SRichard Tran Mills         }
4660567c835SRichard Tran Mills       }
4670567c835SRichard Tran Mills     } else T = a->T;
4680567c835SRichard Tran Mills     ierr = MatCreateKAIJ(mpiaij->A,a->p,a->q,a->S,T,&a->AIJ);CHKERRQ(ierr);
4690567c835SRichard Tran Mills     ierr = MatCreateKAIJ(mpiaij->B,a->p,a->q,NULL,T,&a->OAIJ);CHKERRQ(ierr);
470c138d2acSRichard Tran Mills     if (a->isTI) {
4710567c835SRichard Tran Mills       ierr = PetscFree(T);CHKERRQ(ierr);
472c138d2acSRichard Tran Mills     }
4730567c835SRichard Tran Mills 
4740567c835SRichard Tran Mills     ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr);
4750567c835SRichard Tran Mills     ierr = VecCreate(PETSC_COMM_SELF,&a->w);CHKERRQ(ierr);
4760567c835SRichard Tran Mills     ierr = VecSetSizes(a->w,n*a->q,n*a->q);CHKERRQ(ierr);
4770567c835SRichard Tran Mills     ierr = VecSetBlockSize(a->w,a->q);CHKERRQ(ierr);
4780567c835SRichard Tran Mills     ierr = VecSetType(a->w,VECSEQ);CHKERRQ(ierr);
4790567c835SRichard Tran Mills 
4800567c835SRichard Tran Mills     /* create two temporary Index sets for build scatter gather */
4810567c835SRichard Tran Mills     ierr = ISCreateBlock(PetscObjectComm((PetscObject)a->A),a->q,n,mpiaij->garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr);
4820567c835SRichard Tran Mills     ierr = ISCreateStride(PETSC_COMM_SELF,n*a->q,0,1,&to);CHKERRQ(ierr);
4830567c835SRichard Tran Mills 
4840567c835SRichard Tran Mills     /* create temporary global vector to generate scatter context */
4850567c835SRichard Tran Mills     ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)a->A),a->q,a->q*a->A->cmap->n,a->q*a->A->cmap->N,NULL,&gvec);CHKERRQ(ierr);
4860567c835SRichard Tran Mills 
4870567c835SRichard Tran Mills     /* generate the scatter context */
4884589b4e5SRichard Tran Mills     ierr = VecScatterCreate(gvec,from,a->w,to,&a->ctx);CHKERRQ(ierr);
4890567c835SRichard Tran Mills 
4900567c835SRichard Tran Mills     ierr = ISDestroy(&from);CHKERRQ(ierr);
4910567c835SRichard Tran Mills     ierr = ISDestroy(&to);CHKERRQ(ierr);
4920567c835SRichard Tran Mills     ierr = VecDestroy(&gvec);CHKERRQ(ierr);
4930567c835SRichard Tran Mills   }
4940567c835SRichard Tran Mills 
4950567c835SRichard Tran Mills   A->assembled = PETSC_TRUE;
49649bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
49749bd79ccSDebojyoti Ghosh }
49849bd79ccSDebojyoti Ghosh 
499e6985dafSRichard Tran Mills PetscErrorCode MatView_KAIJ(Mat A,PetscViewer viewer)
50049bd79ccSDebojyoti Ghosh {
501e6985dafSRichard Tran Mills   PetscViewerFormat format;
502e6985dafSRichard Tran Mills   Mat_SeqKAIJ       *a = (Mat_SeqKAIJ*)A->data;
50349bd79ccSDebojyoti Ghosh   Mat               B;
504e6985dafSRichard Tran Mills   PetscInt          i;
505e6985dafSRichard Tran Mills   PetscErrorCode    ierr;
506e6985dafSRichard Tran Mills   PetscBool         ismpikaij;
50749bd79ccSDebojyoti Ghosh 
50849bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
509e6985dafSRichard Tran Mills   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIKAIJ,&ismpikaij);CHKERRQ(ierr);
510e6985dafSRichard Tran Mills   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
511e6985dafSRichard Tran Mills   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL || format == PETSC_VIEWER_ASCII_IMPL) {
512e6985dafSRichard Tran Mills     ierr = PetscViewerASCIIPrintf(viewer,"S and T have %D rows and %D columns\n",a->p,a->q);CHKERRQ(ierr);
513e6985dafSRichard Tran Mills 
514e6985dafSRichard Tran Mills     /* Print appropriate details for S. */
515e6985dafSRichard Tran Mills     if (!a->S) {
516e6985dafSRichard Tran Mills       ierr = PetscViewerASCIIPrintf(viewer,"S is NULL\n");
517e6985dafSRichard Tran Mills     } else if (format == PETSC_VIEWER_ASCII_IMPL) {
518e6985dafSRichard Tran Mills       ierr = PetscViewerASCIIPrintf(viewer,"Entries of S are ");CHKERRQ(ierr);
519e6985dafSRichard Tran Mills       for (i=0; i<(a->p * a->q); i++) {
520e6985dafSRichard Tran Mills #if defined(PETSC_USE_COMPLEX)
521e6985dafSRichard Tran Mills         ierr = PetscViewerASCIIPrintf(viewer,"%18.16e %18.16e ",(double)PetscRealPart(a->S[i]),(double)PetscImaginaryPart(a->S[i]));CHKERRQ(ierr);
522e6985dafSRichard Tran Mills #else
523e6985dafSRichard Tran Mills         ierr = PetscViewerASCIIPrintf(viewer,"%18.16e ",(double)PetscRealPart(a->S[i]));CHKERRQ(ierr);
524e6985dafSRichard Tran Mills #endif
525e6985dafSRichard Tran Mills       }
526e6985dafSRichard Tran Mills       ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
52749bd79ccSDebojyoti Ghosh     }
52849bd79ccSDebojyoti Ghosh 
529e6985dafSRichard Tran Mills     /* Print appropriate details for T. */
530e6985dafSRichard Tran Mills     if (a->isTI) {
531e6985dafSRichard Tran Mills       ierr = PetscViewerASCIIPrintf(viewer,"T is the identity matrix\n");
532e6985dafSRichard Tran Mills     } else if (!a->T) {
533e6985dafSRichard Tran Mills       ierr = PetscViewerASCIIPrintf(viewer,"T is NULL\n");
534e6985dafSRichard Tran Mills     } else if (format == PETSC_VIEWER_ASCII_IMPL) {
535e6985dafSRichard Tran Mills       ierr = PetscViewerASCIIPrintf(viewer,"Entries of T are ");CHKERRQ(ierr);
536e6985dafSRichard Tran Mills       for (i=0; i<(a->p * a->q); i++) {
537e6985dafSRichard Tran Mills #if defined(PETSC_USE_COMPLEX)
538e6985dafSRichard Tran Mills         ierr = PetscViewerASCIIPrintf(viewer,"%18.16e %18.16e ",(double)PetscRealPart(a->T[i]),(double)PetscImaginaryPart(a->T[i]));CHKERRQ(ierr);
539e6985dafSRichard Tran Mills #else
540e6985dafSRichard Tran Mills         ierr = PetscViewerASCIIPrintf(viewer,"%18.16e ",(double)PetscRealPart(a->T[i]));CHKERRQ(ierr);
541e6985dafSRichard Tran Mills #endif
542e6985dafSRichard Tran Mills       }
543e6985dafSRichard Tran Mills       ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
544e6985dafSRichard Tran Mills     }
54549bd79ccSDebojyoti Ghosh 
546e6985dafSRichard Tran Mills     /* Now print details for the AIJ matrix, using the AIJ viewer. */
547e6985dafSRichard Tran Mills     ierr = PetscViewerASCIIPrintf(viewer,"Now viewing the associated AIJ matrix:\n");CHKERRQ(ierr);
548e6985dafSRichard Tran Mills     if (ismpikaij) {
549e6985dafSRichard Tran Mills       Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data;
550e6985dafSRichard Tran Mills       ierr = MatView(b->A,viewer);CHKERRQ(ierr);
551e6985dafSRichard Tran Mills     } else {
552e6985dafSRichard Tran Mills       ierr = MatView(a->AIJ,viewer);CHKERRQ(ierr);
553e6985dafSRichard Tran Mills     }
554e6985dafSRichard Tran Mills 
555e6985dafSRichard Tran Mills   } else {
556e6985dafSRichard Tran Mills     /* For all other matrix viewer output formats, simply convert to an AIJ matrix and call MatView() on that. */
557e6985dafSRichard Tran Mills     if (ismpikaij) {
55849bd79ccSDebojyoti Ghosh       ierr = MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
559e6985dafSRichard Tran Mills     } else {
560e6985dafSRichard Tran Mills       ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
561e6985dafSRichard Tran Mills     }
56249bd79ccSDebojyoti Ghosh     ierr = MatView(B,viewer);CHKERRQ(ierr);
56349bd79ccSDebojyoti Ghosh     ierr = MatDestroy(&B);CHKERRQ(ierr);
564e6985dafSRichard Tran Mills   }
56549bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
56649bd79ccSDebojyoti Ghosh }
56749bd79ccSDebojyoti Ghosh 
56849bd79ccSDebojyoti Ghosh PetscErrorCode MatDestroy_MPIKAIJ(Mat A)
56949bd79ccSDebojyoti Ghosh {
57049bd79ccSDebojyoti Ghosh   PetscErrorCode ierr;
57149bd79ccSDebojyoti Ghosh   Mat_MPIKAIJ    *b = (Mat_MPIKAIJ*)A->data;
57249bd79ccSDebojyoti Ghosh 
57349bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
57449bd79ccSDebojyoti Ghosh   ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr);
57549bd79ccSDebojyoti Ghosh   ierr = MatDestroy(&b->OAIJ);CHKERRQ(ierr);
57649bd79ccSDebojyoti Ghosh   ierr = MatDestroy(&b->A);CHKERRQ(ierr);
57749bd79ccSDebojyoti Ghosh   ierr = VecScatterDestroy(&b->ctx);CHKERRQ(ierr);
57849bd79ccSDebojyoti Ghosh   ierr = VecDestroy(&b->w);CHKERRQ(ierr);
579a84f8069SRichard Tran Mills   ierr = PetscFree(b->S);CHKERRQ(ierr);
580a84f8069SRichard Tran Mills   ierr = PetscFree(b->T);CHKERRQ(ierr);
581a84f8069SRichard Tran Mills   ierr = PetscFree(b->ibdiag);CHKERRQ(ierr);
58249bd79ccSDebojyoti Ghosh   ierr = PetscFree(A->data);CHKERRQ(ierr);
58349bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
58449bd79ccSDebojyoti Ghosh }
58549bd79ccSDebojyoti Ghosh 
58649bd79ccSDebojyoti Ghosh /* --------------------------------------------------------------------------------------*/
58749bd79ccSDebojyoti Ghosh 
58849bd79ccSDebojyoti Ghosh /* zz = yy + Axx */
589836168d5SRichard Tran Mills PetscErrorCode MatMultAdd_SeqKAIJ(Mat A,Vec xx,Vec yy,Vec zz)
59049bd79ccSDebojyoti Ghosh {
59149bd79ccSDebojyoti Ghosh   Mat_SeqKAIJ       *b = (Mat_SeqKAIJ*)A->data;
59249bd79ccSDebojyoti Ghosh   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
59349bd79ccSDebojyoti Ghosh   const PetscScalar *s = b->S, *t = b->T;
59449bd79ccSDebojyoti Ghosh   const PetscScalar *x,*v,*bx;
59549bd79ccSDebojyoti Ghosh   PetscScalar       *y,*sums;
59649bd79ccSDebojyoti Ghosh   PetscErrorCode    ierr;
59749bd79ccSDebojyoti Ghosh   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
59849bd79ccSDebojyoti Ghosh   PetscInt          n,i,jrow,j,l,p=b->p,q=b->q,k;
59949bd79ccSDebojyoti Ghosh 
60049bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
60149bd79ccSDebojyoti Ghosh   if (!yy) {
60249bd79ccSDebojyoti Ghosh     ierr = VecSet(zz,0.0);CHKERRQ(ierr);
60349bd79ccSDebojyoti Ghosh   } else {
60449bd79ccSDebojyoti Ghosh     ierr = VecCopy(yy,zz);CHKERRQ(ierr);
60549bd79ccSDebojyoti Ghosh   }
60649bd79ccSDebojyoti Ghosh   if ((!s) && (!t) && (!b->isTI)) PetscFunctionReturn(0);
60749bd79ccSDebojyoti Ghosh 
60849bd79ccSDebojyoti Ghosh   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
60949bd79ccSDebojyoti Ghosh   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
61049bd79ccSDebojyoti Ghosh   idx  = a->j;
61149bd79ccSDebojyoti Ghosh   v    = a->a;
61249bd79ccSDebojyoti Ghosh   ii   = a->i;
61349bd79ccSDebojyoti Ghosh 
61449bd79ccSDebojyoti Ghosh   if (b->isTI) {
61549bd79ccSDebojyoti Ghosh     for (i=0; i<m; i++) {
61649bd79ccSDebojyoti Ghosh       jrow = ii[i];
61749bd79ccSDebojyoti Ghosh       n    = ii[i+1] - jrow;
61849bd79ccSDebojyoti Ghosh       sums = y + p*i;
61949bd79ccSDebojyoti Ghosh       for (j=0; j<n; j++) {
62049bd79ccSDebojyoti Ghosh         for (k=0; k<p; k++) {
62149bd79ccSDebojyoti Ghosh           sums[k] += v[jrow+j]*x[q*idx[jrow+j]+k];
62249bd79ccSDebojyoti Ghosh         }
62349bd79ccSDebojyoti Ghosh       }
62449bd79ccSDebojyoti Ghosh     }
625234d9204SRichard Tran Mills     ierr = PetscLogFlops((a->nz)*3*p);CHKERRQ(ierr);
62649bd79ccSDebojyoti Ghosh   } else if (t) {
62749bd79ccSDebojyoti Ghosh     for (i=0; i<m; i++) {
62849bd79ccSDebojyoti Ghosh       jrow = ii[i];
62949bd79ccSDebojyoti Ghosh       n    = ii[i+1] - jrow;
63049bd79ccSDebojyoti Ghosh       sums = y + p*i;
63149bd79ccSDebojyoti Ghosh       bx   = x + q*i;
63249bd79ccSDebojyoti Ghosh       for (j=0; j<n; j++) {
63349bd79ccSDebojyoti Ghosh         for (k=0; k<p; k++) {
63449bd79ccSDebojyoti Ghosh           for (l=0; l<q; l++) {
63549bd79ccSDebojyoti Ghosh             sums[k] += v[jrow+j]*t[k+l*p]*x[q*idx[jrow+j]+l];
63649bd79ccSDebojyoti Ghosh           }
63749bd79ccSDebojyoti Ghosh         }
63849bd79ccSDebojyoti Ghosh       }
63949bd79ccSDebojyoti Ghosh     }
640234d9204SRichard Tran Mills     /* The flop count below assumes that v[jrow+j] is hoisted out (which an optimizing compiler is likely to do),
641234d9204SRichard Tran Mills      * and also that T part is hoisted outside this loop (in exchange for temporary storage) as (A \otimes I) (I \otimes T),
642234d9204SRichard Tran Mills      * so that this multiply doesn't have to be redone for each matrix entry, but just once per column. The latter
643234d9204SRichard Tran Mills      * transformation is much less likely to be applied, but we nonetheless count the minimum flops required. */
644234d9204SRichard Tran Mills     ierr = PetscLogFlops((2.0*p*q-p)*m+2*p*a->nz);CHKERRQ(ierr);
64549bd79ccSDebojyoti Ghosh   }
64649bd79ccSDebojyoti Ghosh   if (s) {
64749bd79ccSDebojyoti Ghosh     for (i=0; i<m; i++) {
64849bd79ccSDebojyoti Ghosh       sums = y + p*i;
64949bd79ccSDebojyoti Ghosh       bx   = x + q*i;
65049bd79ccSDebojyoti Ghosh       if (i < b->AIJ->cmap->n) {
65149bd79ccSDebojyoti Ghosh         for (j=0; j<q; j++) {
65249bd79ccSDebojyoti Ghosh           for (k=0; k<p; k++) {
65349bd79ccSDebojyoti Ghosh             sums[k] += s[k+j*p]*bx[j];
65449bd79ccSDebojyoti Ghosh           }
65549bd79ccSDebojyoti Ghosh         }
65649bd79ccSDebojyoti Ghosh       }
65749bd79ccSDebojyoti Ghosh     }
658234d9204SRichard Tran Mills     ierr = PetscLogFlops(m*2*p*q);CHKERRQ(ierr);
65949bd79ccSDebojyoti Ghosh   }
66049bd79ccSDebojyoti Ghosh 
66149bd79ccSDebojyoti Ghosh   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
66249bd79ccSDebojyoti Ghosh   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
66349bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
66449bd79ccSDebojyoti Ghosh }
66549bd79ccSDebojyoti Ghosh 
666bb6fb833SRichard Tran Mills PetscErrorCode MatMult_SeqKAIJ(Mat A,Vec xx,Vec yy)
66749bd79ccSDebojyoti Ghosh {
66849bd79ccSDebojyoti Ghosh   PetscErrorCode ierr;
66949bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
670836168d5SRichard Tran Mills   ierr = MatMultAdd_SeqKAIJ(A,xx,PETSC_NULL,yy);CHKERRQ(ierr);
67149bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
67249bd79ccSDebojyoti Ghosh }
67349bd79ccSDebojyoti Ghosh 
67449bd79ccSDebojyoti Ghosh #include <petsc/private/kernels/blockinvert.h>
67549bd79ccSDebojyoti Ghosh 
676bb6fb833SRichard Tran Mills PetscErrorCode MatInvertBlockDiagonal_SeqKAIJ(Mat A,const PetscScalar **values)
67749bd79ccSDebojyoti Ghosh {
67849bd79ccSDebojyoti Ghosh   Mat_SeqKAIJ       *b  = (Mat_SeqKAIJ*)A->data;
67949bd79ccSDebojyoti Ghosh   Mat_SeqAIJ        *a  = (Mat_SeqAIJ*)b->AIJ->data;
68049bd79ccSDebojyoti Ghosh   const PetscScalar *S  = b->S;
68149bd79ccSDebojyoti Ghosh   const PetscScalar *T  = b->T;
68249bd79ccSDebojyoti Ghosh   const PetscScalar *v  = a->a;
68349bd79ccSDebojyoti Ghosh   const PetscInt     p  = b->p, q = b->q, m = b->AIJ->rmap->n, *idx = a->j, *ii = a->i;
68449bd79ccSDebojyoti Ghosh   PetscErrorCode    ierr;
68549bd79ccSDebojyoti Ghosh   PetscInt          i,j,*v_pivots,dof,dof2;
68649bd79ccSDebojyoti Ghosh   PetscScalar       *diag,aval,*v_work;
68749bd79ccSDebojyoti Ghosh 
68849bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
68949bd79ccSDebojyoti Ghosh   if (p != q) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Block size must be square to calculate inverse.");
69031a97b9aSRichard Tran Mills   if ((!S) && (!T) && (!b->isTI)) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Cannot invert a zero matrix.");
69149bd79ccSDebojyoti Ghosh 
69249bd79ccSDebojyoti Ghosh   dof  = p;
69349bd79ccSDebojyoti Ghosh   dof2 = dof*dof;
69449bd79ccSDebojyoti Ghosh 
69549bd79ccSDebojyoti Ghosh   if (b->ibdiagvalid) {
69649bd79ccSDebojyoti Ghosh     if (values) *values = b->ibdiag;
69749bd79ccSDebojyoti Ghosh     PetscFunctionReturn(0);
69849bd79ccSDebojyoti Ghosh   }
69949bd79ccSDebojyoti Ghosh   if (!b->ibdiag) {
700a84f8069SRichard Tran Mills     ierr = PetscMalloc1(dof2*m*sizeof(PetscScalar),&b->ibdiag);CHKERRQ(ierr);
70149bd79ccSDebojyoti Ghosh     ierr = PetscLogObjectMemory((PetscObject)A,dof2*m*sizeof(PetscScalar));CHKERRQ(ierr);
70249bd79ccSDebojyoti Ghosh   }
70349bd79ccSDebojyoti Ghosh   if (values) *values = b->ibdiag;
70449bd79ccSDebojyoti Ghosh   diag = b->ibdiag;
70549bd79ccSDebojyoti Ghosh 
70649bd79ccSDebojyoti Ghosh   ierr = PetscMalloc2(dof,&v_work,dof,&v_pivots);CHKERRQ(ierr);
70749bd79ccSDebojyoti Ghosh   for (i=0; i<m; i++) {
70849bd79ccSDebojyoti Ghosh     if (S) {
70949bd79ccSDebojyoti Ghosh       ierr = PetscMemcpy(diag,S,dof2*sizeof(PetscScalar));CHKERRQ(ierr);
71049bd79ccSDebojyoti Ghosh     } else {
71149bd79ccSDebojyoti Ghosh       ierr = PetscMemzero(diag,dof2*sizeof(PetscScalar));CHKERRQ(ierr);
71249bd79ccSDebojyoti Ghosh     }
71349bd79ccSDebojyoti Ghosh     if (b->isTI) {
71449bd79ccSDebojyoti Ghosh       aval = 0;
71549bd79ccSDebojyoti Ghosh       for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j];
71649bd79ccSDebojyoti Ghosh       for (j=0; j<dof; j++) diag[j+dof*j] += aval;
71749bd79ccSDebojyoti Ghosh     } else if (T) {
71849bd79ccSDebojyoti Ghosh       aval = 0;
71949bd79ccSDebojyoti Ghosh       for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j];
72049bd79ccSDebojyoti Ghosh       for (j=0; j<dof2; j++) diag[j] += aval*T[j];
72149bd79ccSDebojyoti Ghosh     }
72249bd79ccSDebojyoti Ghosh     ierr = PetscKernel_A_gets_inverse_A(dof,diag,v_pivots,v_work,PETSC_FALSE,NULL);CHKERRQ(ierr);
72349bd79ccSDebojyoti Ghosh     diag += dof2;
72449bd79ccSDebojyoti Ghosh   }
72549bd79ccSDebojyoti Ghosh   ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr);
72649bd79ccSDebojyoti Ghosh 
72749bd79ccSDebojyoti Ghosh   b->ibdiagvalid = PETSC_TRUE;
72849bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
72949bd79ccSDebojyoti Ghosh }
73049bd79ccSDebojyoti Ghosh 
73149bd79ccSDebojyoti Ghosh static PetscErrorCode MatGetDiagonalBlock_MPIKAIJ(Mat A,Mat *B)
73249bd79ccSDebojyoti Ghosh {
73349bd79ccSDebojyoti Ghosh   Mat_MPIKAIJ *kaij = (Mat_MPIKAIJ*) A->data;
73449bd79ccSDebojyoti Ghosh 
73549bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
73649bd79ccSDebojyoti Ghosh   *B = kaij->AIJ;
73749bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
73849bd79ccSDebojyoti Ghosh }
73949bd79ccSDebojyoti Ghosh 
74049bd79ccSDebojyoti Ghosh PetscErrorCode MatSOR_SeqKAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
74149bd79ccSDebojyoti Ghosh {
74249bd79ccSDebojyoti Ghosh   PetscErrorCode    ierr;
74349bd79ccSDebojyoti Ghosh   Mat_SeqKAIJ       *kaij = (Mat_SeqKAIJ*) A->data;
74449bd79ccSDebojyoti Ghosh   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)kaij->AIJ->data;
74549bd79ccSDebojyoti Ghosh   const PetscScalar *aa = a->a, *T = kaij->T, *v;
74649bd79ccSDebojyoti Ghosh   const PetscInt    m  = kaij->AIJ->rmap->n, *ai=a->i, *aj=a->j, p = kaij->p, q = kaij->q, *diag, *vi;
74749bd79ccSDebojyoti Ghosh   const PetscScalar *b, *xb, *idiag;
74849bd79ccSDebojyoti Ghosh   PetscScalar       *x, *work, *workt, *w, *y, *arr, *t, *arrt;
74949bd79ccSDebojyoti Ghosh   PetscInt          i, j, k, i2, bs, bs2, nz;
75049bd79ccSDebojyoti Ghosh 
75149bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
75249bd79ccSDebojyoti Ghosh   its = its*lits;
75349bd79ccSDebojyoti Ghosh   if (flag & SOR_EISENSTAT) SETERRQ (PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
75449bd79ccSDebojyoti Ghosh   if (its <= 0)             SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
7556a375485SRichard Tran Mills   if (fshift)               SETERRQ (PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for diagonal shift");
7566a375485SRichard Tran Mills   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for applying upper or lower triangular parts");
7576a375485SRichard Tran Mills   if (p != q) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSOR for KAIJ: No support for non-square dense blocks");
75849bd79ccSDebojyoti Ghosh   else        {bs = p; bs2 = bs*bs; }
75949bd79ccSDebojyoti Ghosh 
76049bd79ccSDebojyoti Ghosh   if (!m) PetscFunctionReturn(0);
76149bd79ccSDebojyoti Ghosh 
762bb6fb833SRichard Tran Mills   if (!kaij->ibdiagvalid) { ierr = MatInvertBlockDiagonal_SeqKAIJ(A,NULL);CHKERRQ(ierr); }
76349bd79ccSDebojyoti Ghosh   idiag = kaij->ibdiag;
76449bd79ccSDebojyoti Ghosh   diag  = a->diag;
76549bd79ccSDebojyoti Ghosh 
76649bd79ccSDebojyoti Ghosh   if (!kaij->sor.setup) {
76749bd79ccSDebojyoti Ghosh     ierr = PetscMalloc5(bs,&kaij->sor.w,bs,&kaij->sor.y,m*bs,&kaij->sor.work,m*bs,&kaij->sor.t,m*bs2,&kaij->sor.arr);CHKERRQ(ierr);
76849bd79ccSDebojyoti Ghosh     kaij->sor.setup = PETSC_TRUE;
76949bd79ccSDebojyoti Ghosh   }
77049bd79ccSDebojyoti Ghosh   y     = kaij->sor.y;
77149bd79ccSDebojyoti Ghosh   w     = kaij->sor.w;
77249bd79ccSDebojyoti Ghosh   work  = kaij->sor.work;
77349bd79ccSDebojyoti Ghosh   t     = kaij->sor.t;
77449bd79ccSDebojyoti Ghosh   arr   = kaij->sor.arr;
77549bd79ccSDebojyoti Ghosh 
77649bd79ccSDebojyoti Ghosh   ierr = VecGetArray(xx,&x);    CHKERRQ(ierr);
77749bd79ccSDebojyoti Ghosh   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
77849bd79ccSDebojyoti Ghosh 
77949bd79ccSDebojyoti Ghosh   if (flag & SOR_ZERO_INITIAL_GUESS) {
78049bd79ccSDebojyoti Ghosh     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
78149bd79ccSDebojyoti Ghosh       PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);                            /* x[0:bs] <- D^{-1} b[0:bs] */
78249bd79ccSDebojyoti Ghosh       ierr   =  PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr);
78349bd79ccSDebojyoti Ghosh       i2     =  bs;
78449bd79ccSDebojyoti Ghosh       idiag  += bs2;
78549bd79ccSDebojyoti Ghosh       for (i=1; i<m; i++) {
78649bd79ccSDebojyoti Ghosh         v  = aa + ai[i];
78749bd79ccSDebojyoti Ghosh         vi = aj + ai[i];
78849bd79ccSDebojyoti Ghosh         nz = diag[i] - ai[i];
78949bd79ccSDebojyoti Ghosh 
79049bd79ccSDebojyoti Ghosh         if (T) {                /* b - T (Arow * x) */
7919eb573c1SRichard Tran Mills           ierr = PetscMemzero(w,bs*sizeof(PetscScalar));
79249bd79ccSDebojyoti Ghosh           for (j=0; j<nz; j++) {
79349bd79ccSDebojyoti Ghosh             for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k];
79449bd79ccSDebojyoti Ghosh           }
79549bd79ccSDebojyoti Ghosh           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs,w,T,&t[i2]);
79649bd79ccSDebojyoti Ghosh           for (k=0; k<bs; k++) t[i2+k] += b[i2+k];
79749bd79ccSDebojyoti Ghosh         } else if (kaij->isTI) {
7989eb573c1SRichard Tran Mills           ierr = PetscMemcpy(t+i2,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
79949bd79ccSDebojyoti Ghosh           for (j=0; j<nz; j++) {
80049bd79ccSDebojyoti Ghosh             for (k=0; k<bs; k++) t[i2+k] -= v[j] * x[vi[j]*bs+k];
80149bd79ccSDebojyoti Ghosh           }
80249bd79ccSDebojyoti Ghosh         } else {
8039eb573c1SRichard Tran Mills           ierr = PetscMemcpy(t+i2,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
80449bd79ccSDebojyoti Ghosh         }
80549bd79ccSDebojyoti Ghosh 
80649bd79ccSDebojyoti Ghosh         PetscKernel_w_gets_Ar_times_v(bs,bs,t+i2,idiag,y);
80749bd79ccSDebojyoti Ghosh         for (j=0; j<bs; j++) x[i2+j] = omega * y[j];
80849bd79ccSDebojyoti Ghosh 
80949bd79ccSDebojyoti Ghosh         idiag += bs2;
81049bd79ccSDebojyoti Ghosh         i2    += bs;
81149bd79ccSDebojyoti Ghosh       }
81249bd79ccSDebojyoti Ghosh       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
81349bd79ccSDebojyoti Ghosh       ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr);
81449bd79ccSDebojyoti Ghosh       xb = t;
81549bd79ccSDebojyoti Ghosh     } else xb = b;
81649bd79ccSDebojyoti Ghosh     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
81749bd79ccSDebojyoti Ghosh       idiag = kaij->ibdiag+bs2*(m-1);
81849bd79ccSDebojyoti Ghosh       i2    = bs * (m-1);
81949bd79ccSDebojyoti Ghosh       ierr  = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
82049bd79ccSDebojyoti Ghosh       PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
82149bd79ccSDebojyoti Ghosh       i2    -= bs;
82249bd79ccSDebojyoti Ghosh       idiag -= bs2;
82349bd79ccSDebojyoti Ghosh       for (i=m-2; i>=0; i--) {
82449bd79ccSDebojyoti Ghosh         v  = aa + diag[i] + 1 ;
82549bd79ccSDebojyoti Ghosh         vi = aj + diag[i] + 1;
82649bd79ccSDebojyoti Ghosh         nz = ai[i+1] - diag[i] - 1;
82749bd79ccSDebojyoti Ghosh 
82849bd79ccSDebojyoti Ghosh         if (T) {                /* FIXME: This branch untested */
82949bd79ccSDebojyoti Ghosh           ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
83049bd79ccSDebojyoti Ghosh           /* copy all rows of x that are needed into contiguous space */
83149bd79ccSDebojyoti Ghosh           workt = work;
83249bd79ccSDebojyoti Ghosh           for (j=0; j<nz; j++) {
83349bd79ccSDebojyoti Ghosh             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
83449bd79ccSDebojyoti Ghosh             workt += bs;
83549bd79ccSDebojyoti Ghosh           }
83649bd79ccSDebojyoti Ghosh           arrt = arr;
83749bd79ccSDebojyoti Ghosh           for (j=0; j<nz; j++) {
83849bd79ccSDebojyoti Ghosh             ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
83949bd79ccSDebojyoti Ghosh             for (k=0; k<bs2; k++) arrt[k] *= v[j];
84049bd79ccSDebojyoti Ghosh             arrt += bs2;
84149bd79ccSDebojyoti Ghosh           }
84249bd79ccSDebojyoti Ghosh           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
84349bd79ccSDebojyoti Ghosh         } else if (kaij->isTI) {
8449eb573c1SRichard Tran Mills           ierr = PetscMemcpy(w,t+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
84549bd79ccSDebojyoti Ghosh           for (j=0; j<nz; j++) {
84649bd79ccSDebojyoti Ghosh             for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k];
84749bd79ccSDebojyoti Ghosh           }
84849bd79ccSDebojyoti Ghosh         }
84949bd79ccSDebojyoti Ghosh 
85049bd79ccSDebojyoti Ghosh         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); /* RHS incorrect for omega != 1.0 */
85149bd79ccSDebojyoti Ghosh         for (j=0; j<bs; j++) x[i2+j] = (1.0-omega) * x[i2+j] + omega * y[j];
85249bd79ccSDebojyoti Ghosh 
85349bd79ccSDebojyoti Ghosh         idiag -= bs2;
85449bd79ccSDebojyoti Ghosh         i2    -= bs;
85549bd79ccSDebojyoti Ghosh       }
85649bd79ccSDebojyoti Ghosh       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
85749bd79ccSDebojyoti Ghosh     }
85849bd79ccSDebojyoti Ghosh     its--;
85949bd79ccSDebojyoti Ghosh   }
86049bd79ccSDebojyoti Ghosh   while (its--) {               /* FIXME: This branch not updated */
86149bd79ccSDebojyoti Ghosh     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
86249bd79ccSDebojyoti Ghosh       i2     =  0;
86349bd79ccSDebojyoti Ghosh       idiag  = kaij->ibdiag;
86449bd79ccSDebojyoti Ghosh       for (i=0; i<m; i++) {
86549bd79ccSDebojyoti Ghosh         ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
86649bd79ccSDebojyoti Ghosh 
86749bd79ccSDebojyoti Ghosh         v  = aa + ai[i];
86849bd79ccSDebojyoti Ghosh         vi = aj + ai[i];
86949bd79ccSDebojyoti Ghosh         nz = diag[i] - ai[i];
87049bd79ccSDebojyoti Ghosh         workt = work;
87149bd79ccSDebojyoti Ghosh         for (j=0; j<nz; j++) {
87249bd79ccSDebojyoti Ghosh           ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
87349bd79ccSDebojyoti Ghosh           workt += bs;
87449bd79ccSDebojyoti Ghosh         }
87549bd79ccSDebojyoti Ghosh         arrt = arr;
87649bd79ccSDebojyoti Ghosh         if (T) {
87749bd79ccSDebojyoti Ghosh           for (j=0; j<nz; j++) {
87849bd79ccSDebojyoti Ghosh             ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
87949bd79ccSDebojyoti Ghosh             for (k=0; k<bs2; k++) arrt[k] *= v[j];
88049bd79ccSDebojyoti Ghosh             arrt += bs2;
88149bd79ccSDebojyoti Ghosh           }
88249bd79ccSDebojyoti Ghosh           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
88349bd79ccSDebojyoti Ghosh         } else if (kaij->isTI) {
88449bd79ccSDebojyoti Ghosh           for (j=0; j<nz; j++) {
88549bd79ccSDebojyoti Ghosh             ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
88649bd79ccSDebojyoti Ghosh             for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
88749bd79ccSDebojyoti Ghosh             arrt += bs2;
88849bd79ccSDebojyoti Ghosh           }
88949bd79ccSDebojyoti Ghosh           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
89049bd79ccSDebojyoti Ghosh         }
89149bd79ccSDebojyoti Ghosh         ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr);
89249bd79ccSDebojyoti Ghosh 
89349bd79ccSDebojyoti Ghosh         v  = aa + diag[i] + 1;
89449bd79ccSDebojyoti Ghosh         vi = aj + diag[i] + 1;
89549bd79ccSDebojyoti Ghosh         nz = ai[i+1] - diag[i] - 1;
89649bd79ccSDebojyoti Ghosh         workt = work;
89749bd79ccSDebojyoti Ghosh         for (j=0; j<nz; j++) {
89849bd79ccSDebojyoti Ghosh           ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
89949bd79ccSDebojyoti Ghosh           workt += bs;
90049bd79ccSDebojyoti Ghosh         }
90149bd79ccSDebojyoti Ghosh         arrt = arr;
90249bd79ccSDebojyoti Ghosh         if (T) {
90349bd79ccSDebojyoti Ghosh           for (j=0; j<nz; j++) {
90449bd79ccSDebojyoti Ghosh             ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
90549bd79ccSDebojyoti Ghosh             for (k=0; k<bs2; k++) arrt[k] *= v[j];
90649bd79ccSDebojyoti Ghosh             arrt += bs2;
90749bd79ccSDebojyoti Ghosh           }
90849bd79ccSDebojyoti Ghosh           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
90949bd79ccSDebojyoti Ghosh         } else if (kaij->isTI) {
91049bd79ccSDebojyoti Ghosh           for (j=0; j<nz; j++) {
91149bd79ccSDebojyoti Ghosh             ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
91249bd79ccSDebojyoti Ghosh             for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
91349bd79ccSDebojyoti Ghosh             arrt += bs2;
91449bd79ccSDebojyoti Ghosh           }
91549bd79ccSDebojyoti Ghosh           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
91649bd79ccSDebojyoti Ghosh         }
91749bd79ccSDebojyoti Ghosh 
91849bd79ccSDebojyoti Ghosh         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y);
91949bd79ccSDebojyoti Ghosh         for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j);
92049bd79ccSDebojyoti Ghosh 
92149bd79ccSDebojyoti Ghosh         idiag += bs2;
92249bd79ccSDebojyoti Ghosh         i2    += bs;
92349bd79ccSDebojyoti Ghosh       }
92449bd79ccSDebojyoti Ghosh       xb = t;
92549bd79ccSDebojyoti Ghosh     }
92649bd79ccSDebojyoti Ghosh     else xb = b;
92749bd79ccSDebojyoti Ghosh     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
92849bd79ccSDebojyoti Ghosh       idiag = kaij->ibdiag+bs2*(m-1);
92949bd79ccSDebojyoti Ghosh       i2    = bs * (m-1);
93049bd79ccSDebojyoti Ghosh       if (xb == b) {
93149bd79ccSDebojyoti Ghosh         for (i=m-1; i>=0; i--) {
93249bd79ccSDebojyoti Ghosh           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
93349bd79ccSDebojyoti Ghosh 
93449bd79ccSDebojyoti Ghosh           v  = aa + ai[i];
93549bd79ccSDebojyoti Ghosh           vi = aj + ai[i];
93649bd79ccSDebojyoti Ghosh           nz = diag[i] - ai[i];
93749bd79ccSDebojyoti Ghosh           workt = work;
93849bd79ccSDebojyoti Ghosh           for (j=0; j<nz; j++) {
93949bd79ccSDebojyoti Ghosh             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
94049bd79ccSDebojyoti Ghosh             workt += bs;
94149bd79ccSDebojyoti Ghosh           }
94249bd79ccSDebojyoti Ghosh           arrt = arr;
94349bd79ccSDebojyoti Ghosh           if (T) {
94449bd79ccSDebojyoti Ghosh             for (j=0; j<nz; j++) {
94549bd79ccSDebojyoti Ghosh               ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
94649bd79ccSDebojyoti Ghosh               for (k=0; k<bs2; k++) arrt[k] *= v[j];
94749bd79ccSDebojyoti Ghosh               arrt += bs2;
94849bd79ccSDebojyoti Ghosh             }
94949bd79ccSDebojyoti Ghosh             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
95049bd79ccSDebojyoti Ghosh           } else if (kaij->isTI) {
95149bd79ccSDebojyoti Ghosh             for (j=0; j<nz; j++) {
95249bd79ccSDebojyoti Ghosh               ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
95349bd79ccSDebojyoti Ghosh               for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
95449bd79ccSDebojyoti Ghosh               arrt += bs2;
95549bd79ccSDebojyoti Ghosh             }
95649bd79ccSDebojyoti Ghosh             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
95749bd79ccSDebojyoti Ghosh           }
95849bd79ccSDebojyoti Ghosh 
95949bd79ccSDebojyoti Ghosh           v  = aa + diag[i] + 1;
96049bd79ccSDebojyoti Ghosh           vi = aj + diag[i] + 1;
96149bd79ccSDebojyoti Ghosh           nz = ai[i+1] - diag[i] - 1;
96249bd79ccSDebojyoti Ghosh           workt = work;
96349bd79ccSDebojyoti Ghosh           for (j=0; j<nz; j++) {
96449bd79ccSDebojyoti Ghosh             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
96549bd79ccSDebojyoti Ghosh             workt += bs;
96649bd79ccSDebojyoti Ghosh           }
96749bd79ccSDebojyoti Ghosh           arrt = arr;
96849bd79ccSDebojyoti Ghosh           if (T) {
96949bd79ccSDebojyoti Ghosh             for (j=0; j<nz; j++) {
97049bd79ccSDebojyoti Ghosh               ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
97149bd79ccSDebojyoti Ghosh               for (k=0; k<bs2; k++) arrt[k] *= v[j];
97249bd79ccSDebojyoti Ghosh               arrt += bs2;
97349bd79ccSDebojyoti Ghosh             }
97449bd79ccSDebojyoti Ghosh             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
97549bd79ccSDebojyoti Ghosh           } else if (kaij->isTI) {
97649bd79ccSDebojyoti Ghosh             for (j=0; j<nz; j++) {
97749bd79ccSDebojyoti Ghosh               ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
97849bd79ccSDebojyoti Ghosh               for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
97949bd79ccSDebojyoti Ghosh               arrt += bs2;
98049bd79ccSDebojyoti Ghosh             }
98149bd79ccSDebojyoti Ghosh             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
98249bd79ccSDebojyoti Ghosh           }
98349bd79ccSDebojyoti Ghosh 
98449bd79ccSDebojyoti Ghosh           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y);
98549bd79ccSDebojyoti Ghosh           for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j);
98649bd79ccSDebojyoti Ghosh         }
98749bd79ccSDebojyoti Ghosh       } else {
98849bd79ccSDebojyoti Ghosh         for (i=m-1; i>=0; i--) {
98949bd79ccSDebojyoti Ghosh           ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
99049bd79ccSDebojyoti Ghosh           v  = aa + diag[i] + 1;
99149bd79ccSDebojyoti Ghosh           vi = aj + diag[i] + 1;
99249bd79ccSDebojyoti Ghosh           nz = ai[i+1] - diag[i] - 1;
99349bd79ccSDebojyoti Ghosh           workt = work;
99449bd79ccSDebojyoti Ghosh           for (j=0; j<nz; j++) {
99549bd79ccSDebojyoti Ghosh             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
99649bd79ccSDebojyoti Ghosh             workt += bs;
99749bd79ccSDebojyoti Ghosh           }
99849bd79ccSDebojyoti Ghosh           arrt = arr;
99949bd79ccSDebojyoti Ghosh           if (T) {
100049bd79ccSDebojyoti Ghosh             for (j=0; j<nz; j++) {
100149bd79ccSDebojyoti Ghosh               ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
100249bd79ccSDebojyoti Ghosh               for (k=0; k<bs2; k++) arrt[k] *= v[j];
100349bd79ccSDebojyoti Ghosh               arrt += bs2;
100449bd79ccSDebojyoti Ghosh             }
100549bd79ccSDebojyoti Ghosh             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
100649bd79ccSDebojyoti Ghosh           } else if (kaij->isTI) {
100749bd79ccSDebojyoti Ghosh             for (j=0; j<nz; j++) {
100849bd79ccSDebojyoti Ghosh               ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
100949bd79ccSDebojyoti Ghosh               for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
101049bd79ccSDebojyoti Ghosh               arrt += bs2;
101149bd79ccSDebojyoti Ghosh             }
101249bd79ccSDebojyoti Ghosh             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
101349bd79ccSDebojyoti Ghosh           }
101449bd79ccSDebojyoti Ghosh           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y);
101549bd79ccSDebojyoti Ghosh           for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j);
101649bd79ccSDebojyoti Ghosh         }
101749bd79ccSDebojyoti Ghosh         idiag -= bs2;
101849bd79ccSDebojyoti Ghosh         i2    -= bs;
101949bd79ccSDebojyoti Ghosh       }
102049bd79ccSDebojyoti Ghosh       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
102149bd79ccSDebojyoti Ghosh     }
102249bd79ccSDebojyoti Ghosh   }
102349bd79ccSDebojyoti Ghosh 
102449bd79ccSDebojyoti Ghosh   ierr = VecRestoreArray(xx,&x);    CHKERRQ(ierr);
102549bd79ccSDebojyoti Ghosh   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
102649bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
102749bd79ccSDebojyoti Ghosh }
102849bd79ccSDebojyoti Ghosh 
102949bd79ccSDebojyoti Ghosh /*===================================================================================*/
103049bd79ccSDebojyoti Ghosh 
1031836168d5SRichard Tran Mills PetscErrorCode MatMultAdd_MPIKAIJ(Mat A,Vec xx,Vec yy,Vec zz)
103249bd79ccSDebojyoti Ghosh {
103349bd79ccSDebojyoti Ghosh   Mat_MPIKAIJ    *b = (Mat_MPIKAIJ*)A->data;
103449bd79ccSDebojyoti Ghosh   PetscErrorCode ierr;
103549bd79ccSDebojyoti Ghosh 
103649bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
103749bd79ccSDebojyoti Ghosh   if (!yy) {
103849bd79ccSDebojyoti Ghosh     ierr = VecSet(zz,0.0);CHKERRQ(ierr);
103949bd79ccSDebojyoti Ghosh   } else {
104049bd79ccSDebojyoti Ghosh     ierr = VecCopy(yy,zz);CHKERRQ(ierr);
104149bd79ccSDebojyoti Ghosh   }
104249bd79ccSDebojyoti Ghosh   /* start the scatter */
104349bd79ccSDebojyoti Ghosh   ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
104449bd79ccSDebojyoti Ghosh   ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,zz,zz);CHKERRQ(ierr);
104549bd79ccSDebojyoti Ghosh   ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
104649bd79ccSDebojyoti Ghosh   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr);
104749bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
104849bd79ccSDebojyoti Ghosh }
104949bd79ccSDebojyoti Ghosh 
1050bb6fb833SRichard Tran Mills PetscErrorCode MatMult_MPIKAIJ(Mat A,Vec xx,Vec yy)
105149bd79ccSDebojyoti Ghosh {
105249bd79ccSDebojyoti Ghosh   PetscErrorCode ierr;
105349bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
1054836168d5SRichard Tran Mills   ierr = MatMultAdd_MPIKAIJ(A,xx,PETSC_NULL,yy);CHKERRQ(ierr);
105549bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
105649bd79ccSDebojyoti Ghosh }
105749bd79ccSDebojyoti Ghosh 
1058bb6fb833SRichard Tran Mills PetscErrorCode MatInvertBlockDiagonal_MPIKAIJ(Mat A,const PetscScalar **values)
105949bd79ccSDebojyoti Ghosh {
106049bd79ccSDebojyoti Ghosh   Mat_MPIKAIJ     *b = (Mat_MPIKAIJ*)A->data;
106149bd79ccSDebojyoti Ghosh   PetscErrorCode  ierr;
106249bd79ccSDebojyoti Ghosh 
106349bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
106449bd79ccSDebojyoti Ghosh   ierr = (*b->AIJ->ops->invertblockdiagonal)(b->AIJ,values);CHKERRQ(ierr);
106549bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
106649bd79ccSDebojyoti Ghosh }
106749bd79ccSDebojyoti Ghosh 
106849bd79ccSDebojyoti Ghosh /* ----------------------------------------------------------------*/
106949bd79ccSDebojyoti Ghosh 
107049bd79ccSDebojyoti Ghosh PetscErrorCode MatGetRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values)
107149bd79ccSDebojyoti Ghosh {
107249bd79ccSDebojyoti Ghosh   Mat_SeqKAIJ     *b   = (Mat_SeqKAIJ*) A->data;
10731ca5ffdbSRichard Tran Mills   PetscErrorCode  diag = PETSC_FALSE;
10741ca5ffdbSRichard Tran Mills   PetscErrorCode  ierr;
107549bd79ccSDebojyoti Ghosh   PetscInt        nzaij,nz,*colsaij,*idx,i,j,p=b->p,q=b->q,r=row/p,s=row%p,c;
107649bd79ccSDebojyoti Ghosh   PetscScalar     *vaij,*v,*S=b->S,*T=b->T;
107749bd79ccSDebojyoti Ghosh 
107849bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
107949bd79ccSDebojyoti Ghosh   if (b->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
108049bd79ccSDebojyoti Ghosh   b->getrowactive = PETSC_TRUE;
108149bd79ccSDebojyoti Ghosh   if (row < 0 || row >= A->rmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range",row);
108249bd79ccSDebojyoti Ghosh 
108349bd79ccSDebojyoti Ghosh   if ((!S) && (!T) && (!b->isTI)) {
108449bd79ccSDebojyoti Ghosh     if (ncols)    *ncols  = 0;
108549bd79ccSDebojyoti Ghosh     if (cols)     *cols   = NULL;
108649bd79ccSDebojyoti Ghosh     if (values)   *values = NULL;
108749bd79ccSDebojyoti Ghosh     PetscFunctionReturn(0);
108849bd79ccSDebojyoti Ghosh   }
108949bd79ccSDebojyoti Ghosh 
109049bd79ccSDebojyoti Ghosh   if (T || b->isTI) {
109149bd79ccSDebojyoti Ghosh     ierr  = MatGetRow_SeqAIJ(b->AIJ,r,&nzaij,&colsaij,&vaij);CHKERRQ(ierr);
109249bd79ccSDebojyoti Ghosh     c     = nzaij;
109349bd79ccSDebojyoti Ghosh     for (i=0; i<nzaij; i++) {
109449bd79ccSDebojyoti Ghosh       /* check if this row contains a diagonal entry */
109549bd79ccSDebojyoti Ghosh       if (colsaij[i] == r) {
109649bd79ccSDebojyoti Ghosh         diag = PETSC_TRUE;
109749bd79ccSDebojyoti Ghosh         c = i;
109849bd79ccSDebojyoti Ghosh       }
109949bd79ccSDebojyoti Ghosh     }
110049bd79ccSDebojyoti Ghosh   } else nzaij = c = 0;
110149bd79ccSDebojyoti Ghosh 
110249bd79ccSDebojyoti Ghosh   /* calculate size of row */
110349bd79ccSDebojyoti Ghosh   nz = 0;
110449bd79ccSDebojyoti Ghosh   if (S)            nz += q;
110549bd79ccSDebojyoti Ghosh   if (T || b->isTI) nz += (diag && S ? (nzaij-1)*q : nzaij*q);
110649bd79ccSDebojyoti Ghosh 
110749bd79ccSDebojyoti Ghosh   if (cols || values) {
110849bd79ccSDebojyoti Ghosh     ierr = PetscMalloc2(nz,&idx,nz,&v);CHKERRQ(ierr);
110938822f9dSRichard Tran Mills     for (i=0; i<q; i++) {
111038822f9dSRichard Tran Mills       /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */
111138822f9dSRichard Tran Mills       v[i] = 0.0;
111238822f9dSRichard Tran Mills     }
111349bd79ccSDebojyoti Ghosh     if (b->isTI) {
111449bd79ccSDebojyoti Ghosh       for (i=0; i<nzaij; i++) {
111549bd79ccSDebojyoti Ghosh         for (j=0; j<q; j++) {
111649bd79ccSDebojyoti Ghosh           idx[i*q+j] = colsaij[i]*q+j;
111749bd79ccSDebojyoti Ghosh           v[i*q+j]   = (j==s ? vaij[i] : 0);
111849bd79ccSDebojyoti Ghosh         }
111949bd79ccSDebojyoti Ghosh       }
112049bd79ccSDebojyoti Ghosh     } else if (T) {
112149bd79ccSDebojyoti Ghosh       for (i=0; i<nzaij; i++) {
112249bd79ccSDebojyoti Ghosh         for (j=0; j<q; j++) {
112349bd79ccSDebojyoti Ghosh           idx[i*q+j] = colsaij[i]*q+j;
112449bd79ccSDebojyoti Ghosh           v[i*q+j]   = vaij[i]*T[s+j*p];
112549bd79ccSDebojyoti Ghosh         }
112649bd79ccSDebojyoti Ghosh       }
112749bd79ccSDebojyoti Ghosh     }
112849bd79ccSDebojyoti Ghosh     if (S) {
112949bd79ccSDebojyoti Ghosh       for (j=0; j<q; j++) {
113049bd79ccSDebojyoti Ghosh         idx[c*q+j] = r*q+j;
113149bd79ccSDebojyoti Ghosh         v[c*q+j]  += S[s+j*p];
113249bd79ccSDebojyoti Ghosh       }
113349bd79ccSDebojyoti Ghosh     }
113449bd79ccSDebojyoti Ghosh   }
113549bd79ccSDebojyoti Ghosh 
113649bd79ccSDebojyoti Ghosh   if (ncols)    *ncols  = nz;
113749bd79ccSDebojyoti Ghosh   if (cols)     *cols   = idx;
113849bd79ccSDebojyoti Ghosh   if (values)   *values = v;
113949bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
114049bd79ccSDebojyoti Ghosh }
114149bd79ccSDebojyoti Ghosh 
114249bd79ccSDebojyoti Ghosh PetscErrorCode MatRestoreRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
114349bd79ccSDebojyoti Ghosh {
114449bd79ccSDebojyoti Ghosh   PetscErrorCode ierr;
114549bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
114649bd79ccSDebojyoti Ghosh   ierr = PetscFree2(*idx,*v);CHKERRQ(ierr);
114749bd79ccSDebojyoti Ghosh   ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE;
114849bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
114949bd79ccSDebojyoti Ghosh }
115049bd79ccSDebojyoti Ghosh 
115149bd79ccSDebojyoti Ghosh PetscErrorCode MatGetRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values)
115249bd79ccSDebojyoti Ghosh {
115349bd79ccSDebojyoti Ghosh   Mat_MPIKAIJ     *b      = (Mat_MPIKAIJ*) A->data;
115449bd79ccSDebojyoti Ghosh   Mat             MatAIJ  = ((Mat_SeqKAIJ*)b->AIJ->data)->AIJ;
115549bd79ccSDebojyoti Ghosh   Mat             MatOAIJ = ((Mat_SeqKAIJ*)b->OAIJ->data)->AIJ;
115649bd79ccSDebojyoti Ghosh   Mat             AIJ     = b->A;
1157fc64b2cfSRichard Tran Mills   PetscBool       diag    = PETSC_FALSE;
115849bd79ccSDebojyoti Ghosh   PetscErrorCode  ierr;
115949bd79ccSDebojyoti Ghosh   const PetscInt  rstart=A->rmap->rstart,rend=A->rmap->rend,p=b->p,q=b->q,*garray;
116049bd79ccSDebojyoti Ghosh   PetscInt        nz,*idx,ncolsaij,ncolsoaij,*colsaij,*colsoaij,r,s,c,i,j,lrow;
116149bd79ccSDebojyoti Ghosh   PetscScalar     *v,*vals,*ovals,*S=b->S,*T=b->T;
116249bd79ccSDebojyoti Ghosh 
116349bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
116449bd79ccSDebojyoti Ghosh   if (b->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
116549bd79ccSDebojyoti Ghosh   b->getrowactive = PETSC_TRUE;
116649bd79ccSDebojyoti Ghosh   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
116749bd79ccSDebojyoti Ghosh   lrow = row - rstart;
116849bd79ccSDebojyoti Ghosh 
116949bd79ccSDebojyoti Ghosh   if ((!S) && (!T) && (!b->isTI)) {
117049bd79ccSDebojyoti Ghosh     if (ncols)    *ncols  = 0;
117149bd79ccSDebojyoti Ghosh     if (cols)     *cols   = NULL;
117249bd79ccSDebojyoti Ghosh     if (values)   *values = NULL;
117349bd79ccSDebojyoti Ghosh     PetscFunctionReturn(0);
117449bd79ccSDebojyoti Ghosh   }
117549bd79ccSDebojyoti Ghosh 
117649bd79ccSDebojyoti Ghosh   r = lrow/p;
117749bd79ccSDebojyoti Ghosh   s = lrow%p;
117849bd79ccSDebojyoti Ghosh 
117949bd79ccSDebojyoti Ghosh   if (T || b->isTI) {
118049bd79ccSDebojyoti Ghosh     ierr = MatMPIAIJGetSeqAIJ(AIJ,NULL,NULL,&garray);
118149bd79ccSDebojyoti Ghosh     ierr = MatGetRow_SeqAIJ(MatAIJ,lrow/p,&ncolsaij,&colsaij,&vals);CHKERRQ(ierr);
118249bd79ccSDebojyoti Ghosh     ierr = MatGetRow_SeqAIJ(MatOAIJ,lrow/p,&ncolsoaij,&colsoaij,&ovals);CHKERRQ(ierr);
118349bd79ccSDebojyoti Ghosh 
118449bd79ccSDebojyoti Ghosh     c     = ncolsaij + ncolsoaij;
118549bd79ccSDebojyoti Ghosh     for (i=0; i<ncolsaij; i++) {
118649bd79ccSDebojyoti Ghosh       /* check if this row contains a diagonal entry */
118749bd79ccSDebojyoti Ghosh       if (colsaij[i] == r) {
118849bd79ccSDebojyoti Ghosh         diag = PETSC_TRUE;
118949bd79ccSDebojyoti Ghosh         c = i;
119049bd79ccSDebojyoti Ghosh       }
119149bd79ccSDebojyoti Ghosh     }
119249bd79ccSDebojyoti Ghosh   } else c = 0;
119349bd79ccSDebojyoti Ghosh 
119449bd79ccSDebojyoti Ghosh   /* calculate size of row */
119549bd79ccSDebojyoti Ghosh   nz = 0;
119649bd79ccSDebojyoti Ghosh   if (S)            nz += q;
119749bd79ccSDebojyoti Ghosh   if (T || b->isTI) nz += (diag && S ? (ncolsaij+ncolsoaij-1)*q : (ncolsaij+ncolsoaij)*q);
119849bd79ccSDebojyoti Ghosh 
119949bd79ccSDebojyoti Ghosh   if (cols || values) {
120049bd79ccSDebojyoti Ghosh     ierr = PetscMalloc2(nz,&idx,nz,&v);CHKERRQ(ierr);
1201a437a796SRichard Tran Mills     for (i=0; i<q; i++) {
1202a437a796SRichard Tran Mills       /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */
1203a437a796SRichard Tran Mills       v[i] = 0.0;
1204a437a796SRichard Tran Mills     }
120549bd79ccSDebojyoti Ghosh     if (b->isTI) {
120649bd79ccSDebojyoti Ghosh       for (i=0; i<ncolsaij; i++) {
120749bd79ccSDebojyoti Ghosh         for (j=0; j<q; j++) {
120849bd79ccSDebojyoti Ghosh           idx[i*q+j] = (colsaij[i]+rstart/p)*q+j;
120949bd79ccSDebojyoti Ghosh           v[i*q+j]   = (j==s ? vals[i] : 0.0);
121049bd79ccSDebojyoti Ghosh         }
121149bd79ccSDebojyoti Ghosh       }
121249bd79ccSDebojyoti Ghosh       for (i=0; i<ncolsoaij; i++) {
121349bd79ccSDebojyoti Ghosh         for (j=0; j<q; j++) {
121449bd79ccSDebojyoti Ghosh           idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j;
121549bd79ccSDebojyoti Ghosh           v[(i+ncolsaij)*q+j]   = (j==s ? ovals[i]: 0.0);
121649bd79ccSDebojyoti Ghosh         }
121749bd79ccSDebojyoti Ghosh       }
121849bd79ccSDebojyoti Ghosh     } else if (T) {
121949bd79ccSDebojyoti Ghosh       for (i=0; i<ncolsaij; i++) {
122049bd79ccSDebojyoti Ghosh         for (j=0; j<q; j++) {
122149bd79ccSDebojyoti Ghosh           idx[i*q+j] = (colsaij[i]+rstart/p)*q+j;
122249bd79ccSDebojyoti Ghosh           v[i*q+j]   = vals[i]*T[s+j*p];
122349bd79ccSDebojyoti Ghosh         }
122449bd79ccSDebojyoti Ghosh       }
122549bd79ccSDebojyoti Ghosh       for (i=0; i<ncolsoaij; i++) {
122649bd79ccSDebojyoti Ghosh         for (j=0; j<q; j++) {
122749bd79ccSDebojyoti Ghosh           idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j;
122849bd79ccSDebojyoti Ghosh           v[(i+ncolsaij)*q+j]   = ovals[i]*T[s+j*p];
122949bd79ccSDebojyoti Ghosh         }
123049bd79ccSDebojyoti Ghosh       }
123149bd79ccSDebojyoti Ghosh     }
123249bd79ccSDebojyoti Ghosh     if (S) {
123349bd79ccSDebojyoti Ghosh       for (j=0; j<q; j++) {
123449bd79ccSDebojyoti Ghosh         idx[c*q+j] = (r+rstart/p)*q+j;
123549bd79ccSDebojyoti Ghosh         v[c*q+j]  += S[s+j*p];
123649bd79ccSDebojyoti Ghosh       }
123749bd79ccSDebojyoti Ghosh     }
123849bd79ccSDebojyoti Ghosh   }
123949bd79ccSDebojyoti Ghosh 
124049bd79ccSDebojyoti Ghosh   if (ncols)  *ncols  = nz;
124149bd79ccSDebojyoti Ghosh   if (cols)   *cols   = idx;
124249bd79ccSDebojyoti Ghosh   if (values) *values = v;
124349bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
124449bd79ccSDebojyoti Ghosh }
124549bd79ccSDebojyoti Ghosh 
124649bd79ccSDebojyoti Ghosh PetscErrorCode MatRestoreRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
124749bd79ccSDebojyoti Ghosh {
124849bd79ccSDebojyoti Ghosh   PetscErrorCode ierr;
124949bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
125049bd79ccSDebojyoti Ghosh   ierr = PetscFree2(*idx,*v);CHKERRQ(ierr);
125149bd79ccSDebojyoti Ghosh   ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE;
125249bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
125349bd79ccSDebojyoti Ghosh }
125449bd79ccSDebojyoti Ghosh 
125549bd79ccSDebojyoti Ghosh PetscErrorCode  MatCreateSubMatrix_KAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat)
125649bd79ccSDebojyoti Ghosh {
125749bd79ccSDebojyoti Ghosh   PetscErrorCode ierr;
125849bd79ccSDebojyoti Ghosh   Mat            A;
125949bd79ccSDebojyoti Ghosh 
126049bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
126149bd79ccSDebojyoti Ghosh   ierr = MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr);
126249bd79ccSDebojyoti Ghosh   ierr = MatCreateSubMatrix(A,isrow,iscol,cll,newmat);CHKERRQ(ierr);
126349bd79ccSDebojyoti Ghosh   ierr = MatDestroy(&A);CHKERRQ(ierr);
126449bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
126549bd79ccSDebojyoti Ghosh }
126649bd79ccSDebojyoti Ghosh 
126749bd79ccSDebojyoti Ghosh /* ---------------------------------------------------------------------------------- */
126849bd79ccSDebojyoti Ghosh /*@C
126949bd79ccSDebojyoti Ghosh   MatCreateKAIJ - Creates a matrix type to be used for matrices of the following form:
127049bd79ccSDebojyoti Ghosh 
127149bd79ccSDebojyoti Ghosh     [I \otimes S + A \otimes T]
127249bd79ccSDebojyoti Ghosh 
127349bd79ccSDebojyoti Ghosh   where
127449bd79ccSDebojyoti Ghosh     S is a dense (p \times q) matrix
127549bd79ccSDebojyoti Ghosh     T is a dense (p \times q) matrix
127649bd79ccSDebojyoti Ghosh     A is an AIJ  (n \times n) matrix
127749bd79ccSDebojyoti Ghosh     I is the identity matrix
127849bd79ccSDebojyoti Ghosh   The resulting matrix is (np \times nq)
127949bd79ccSDebojyoti Ghosh 
1280d3b90ce1SRichard Tran Mills   S and T are always stored independently on all processes as PetscScalar arrays in column-major format.
128149bd79ccSDebojyoti Ghosh 
128249bd79ccSDebojyoti Ghosh   Collective
128349bd79ccSDebojyoti Ghosh 
128449bd79ccSDebojyoti Ghosh   Input Parameters:
128549bd79ccSDebojyoti Ghosh + A - the AIJ matrix
128649bd79ccSDebojyoti Ghosh . p - number of rows in S and T
1287d3b90ce1SRichard Tran Mills . q - number of columns in S and T
1288d3b90ce1SRichard Tran Mills . S - the S matrix (can be PETSC_NULL), stored as a PetscScalar array (column-major)
1289d3b90ce1SRichard Tran Mills - T - the T matrix (can be PETSC_NULL), stored as a PetscScalar array (column-major)
129049bd79ccSDebojyoti Ghosh 
129149bd79ccSDebojyoti Ghosh   Output Parameter:
129249bd79ccSDebojyoti Ghosh . kaij - the new KAIJ matrix
129349bd79ccSDebojyoti Ghosh 
1294d3b90ce1SRichard Tran Mills   Notes:
1295d3b90ce1SRichard Tran Mills   This function increases the reference count on the AIJ matrix, so the user is free to destroy the matrix if it is not needed.
1296d3b90ce1SRichard Tran Mills   Changes to the entries of the AIJ matrix will immediately affect the KAIJ matrix.
129749bd79ccSDebojyoti Ghosh 
129849bd79ccSDebojyoti Ghosh   Level: advanced
129949bd79ccSDebojyoti Ghosh 
13000567c835SRichard Tran Mills .seealso: MatKAIJSetAIJ(), MatKAIJSetS(), MatKAIJSetT(), MatKAIJGetAIJ(), MatKAIJGetS(), MatKAIJGetT(), MATKAIJ
130149bd79ccSDebojyoti Ghosh @*/
130249bd79ccSDebojyoti Ghosh PetscErrorCode  MatCreateKAIJ(Mat A,PetscInt p,PetscInt q,const PetscScalar S[],const PetscScalar T[],Mat *kaij)
130349bd79ccSDebojyoti Ghosh {
130449bd79ccSDebojyoti Ghosh   PetscErrorCode ierr;
130549bd79ccSDebojyoti Ghosh   PetscMPIInt    size;
130649bd79ccSDebojyoti Ghosh 
130749bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
13080567c835SRichard Tran Mills   ierr = MatCreate(PetscObjectComm((PetscObject)A),kaij);CHKERRQ(ierr);
130949bd79ccSDebojyoti Ghosh   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
131049bd79ccSDebojyoti Ghosh   if (size == 1) {
13110567c835SRichard Tran Mills     ierr = MatSetType(*kaij,MATSEQKAIJ);CHKERRQ(ierr);
131249bd79ccSDebojyoti Ghosh   } else {
13130567c835SRichard Tran Mills     ierr = MatSetType(*kaij,MATMPIKAIJ);CHKERRQ(ierr);
131449bd79ccSDebojyoti Ghosh   }
13150567c835SRichard Tran Mills   ierr = MatKAIJSetAIJ(*kaij,A);CHKERRQ(ierr);
13160567c835SRichard Tran Mills   ierr = MatKAIJSetS(*kaij,p,q,S);CHKERRQ(ierr);
13170567c835SRichard Tran Mills   ierr = MatKAIJSetT(*kaij,p,q,T);CHKERRQ(ierr);
13180567c835SRichard Tran Mills   ierr = MatSetUp(*kaij);
13190567c835SRichard Tran Mills   PetscFunctionReturn(0);
13200567c835SRichard Tran Mills }
132149bd79ccSDebojyoti Ghosh 
13220567c835SRichard Tran Mills /*MC
13230567c835SRichard Tran Mills   MATKAIJ - MATKAIJ = "kaij" - A matrix type to be used to evaluate matrices of the following form:
13240567c835SRichard Tran Mills 
13250567c835SRichard Tran Mills     [I \otimes S + A \otimes T]
13260567c835SRichard Tran Mills 
13270567c835SRichard Tran Mills   where
13280567c835SRichard Tran Mills     S is a dense (p \times q) matrix
13290567c835SRichard Tran Mills     T is a dense (p \times q) matrix
13300567c835SRichard Tran Mills     A is an AIJ  (n \times n) matrix
13310567c835SRichard Tran Mills     I is the identity matrix
13320567c835SRichard Tran Mills   The resulting matrix is (np \times nq)
13330567c835SRichard Tran Mills 
1334d3b90ce1SRichard Tran Mills   S and T are always stored independently on all processes as PetscScalar arrays in column-major format.
13350567c835SRichard Tran Mills 
13360567c835SRichard Tran Mills   Level: advanced
13370567c835SRichard Tran Mills 
13380567c835SRichard Tran Mills .seealso: MatKAIJSetAIJ(), MatKAIJSetS(), MatKAIJSetT(), MatKAIJGetAIJ(), MatKAIJGetS(), MatKAIJGetT(), MatCreateKAIJ()
13390567c835SRichard Tran Mills M*/
13400567c835SRichard Tran Mills 
13410567c835SRichard Tran Mills PETSC_EXTERN PetscErrorCode MatCreate_KAIJ(Mat A)
13420567c835SRichard Tran Mills {
13430567c835SRichard Tran Mills   PetscErrorCode ierr;
13440567c835SRichard Tran Mills   Mat_MPIKAIJ    *b;
13450567c835SRichard Tran Mills   PetscMPIInt    size;
13460567c835SRichard Tran Mills 
13470567c835SRichard Tran Mills   PetscFunctionBegin;
13480567c835SRichard Tran Mills   ierr     = PetscNewLog(A,&b);CHKERRQ(ierr);
13490567c835SRichard Tran Mills   A->data  = (void*)b;
13500567c835SRichard Tran Mills 
13510567c835SRichard Tran Mills   ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
13520567c835SRichard Tran Mills 
13530567c835SRichard Tran Mills   A->ops->setup = MatSetUp_KAIJ;
13540567c835SRichard Tran Mills 
13550567c835SRichard Tran Mills   b->w    = 0;
13560567c835SRichard Tran Mills   ierr    = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
13570567c835SRichard Tran Mills   if (size == 1) {
13580567c835SRichard Tran Mills     ierr = PetscObjectChangeTypeName((PetscObject)A,MATSEQKAIJ);CHKERRQ(ierr);
13590567c835SRichard Tran Mills     A->ops->setup               = MatSetUp_KAIJ;
13600567c835SRichard Tran Mills     A->ops->destroy             = MatDestroy_SeqKAIJ;
1361e6985dafSRichard Tran Mills     A->ops->view                = MatView_KAIJ;
1362bb6fb833SRichard Tran Mills     A->ops->mult                = MatMult_SeqKAIJ;
1363bb6fb833SRichard Tran Mills     A->ops->multadd             = MatMultAdd_SeqKAIJ;
1364bb6fb833SRichard Tran Mills     A->ops->invertblockdiagonal = MatInvertBlockDiagonal_SeqKAIJ;
13650567c835SRichard Tran Mills     A->ops->getrow              = MatGetRow_SeqKAIJ;
13660567c835SRichard Tran Mills     A->ops->restorerow          = MatRestoreRow_SeqKAIJ;
13670567c835SRichard Tran Mills     A->ops->sor                 = MatSOR_SeqKAIJ;
13680567c835SRichard Tran Mills   } else {
13690567c835SRichard Tran Mills     ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIKAIJ);CHKERRQ(ierr);
13700567c835SRichard Tran Mills     A->ops->setup               = MatSetUp_KAIJ;
13710567c835SRichard Tran Mills     A->ops->destroy             = MatDestroy_MPIKAIJ;
1372e6985dafSRichard Tran Mills     A->ops->view                = MatView_KAIJ;
1373bb6fb833SRichard Tran Mills     A->ops->mult                = MatMult_MPIKAIJ;
1374bb6fb833SRichard Tran Mills     A->ops->multadd             = MatMultAdd_MPIKAIJ;
1375bb6fb833SRichard Tran Mills     A->ops->invertblockdiagonal = MatInvertBlockDiagonal_MPIKAIJ;
13760567c835SRichard Tran Mills     A->ops->getrow              = MatGetRow_MPIKAIJ;
13770567c835SRichard Tran Mills     A->ops->restorerow          = MatRestoreRow_MPIKAIJ;
13780567c835SRichard Tran Mills     ierr = PetscObjectComposeFunction((PetscObject)A,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIKAIJ);CHKERRQ(ierr);
13790567c835SRichard Tran Mills   }
13800567c835SRichard Tran Mills   A->ops->createsubmatrix = MatCreateSubMatrix_KAIJ;
138149bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
138249bd79ccSDebojyoti Ghosh }
1383