xref: /petsc/src/mat/impls/kaij/kaij.c (revision a5b5c723ae2a227082d33696a25cd0d59883d64a)
149bd79ccSDebojyoti Ghosh 
249bd79ccSDebojyoti Ghosh /*
349bd79ccSDebojyoti Ghosh   Defines the basic matrix operations for the KAIJ  matrix storage format.
449bd79ccSDebojyoti Ghosh   This format is used to evaluate matrices of the form:
549bd79ccSDebojyoti Ghosh 
649bd79ccSDebojyoti Ghosh     [I \otimes S + A \otimes T]
749bd79ccSDebojyoti Ghosh 
849bd79ccSDebojyoti Ghosh   where
949bd79ccSDebojyoti Ghosh     S is a dense (p \times q) matrix
1049bd79ccSDebojyoti Ghosh     T is a dense (p \times q) matrix
1149bd79ccSDebojyoti Ghosh     A is an AIJ  (n \times n) matrix
1249bd79ccSDebojyoti Ghosh     I is the identity matrix
1349bd79ccSDebojyoti Ghosh 
1449bd79ccSDebojyoti Ghosh   The resulting matrix is (np \times nq)
1549bd79ccSDebojyoti Ghosh 
1649bd79ccSDebojyoti Ghosh   We provide:
1749bd79ccSDebojyoti Ghosh      MatMult()
1849bd79ccSDebojyoti Ghosh      MatMultAdd()
1949bd79ccSDebojyoti Ghosh      MatInvertBlockDiagonal()
2049bd79ccSDebojyoti Ghosh   and
2149bd79ccSDebojyoti Ghosh      MatCreateKAIJ(Mat,PetscInt,PetscInt,const PetscScalar[],const PetscScalar[],Mat*)
2249bd79ccSDebojyoti Ghosh 
2349bd79ccSDebojyoti Ghosh   This single directory handles both the sequential and parallel codes
2449bd79ccSDebojyoti Ghosh */
2549bd79ccSDebojyoti Ghosh 
2649bd79ccSDebojyoti Ghosh #include <../src/mat/impls/kaij/kaij.h> /*I "petscmat.h" I*/
2749bd79ccSDebojyoti Ghosh #include <../src/mat/utils/freespace.h>
2849bd79ccSDebojyoti Ghosh #include <petsc/private/vecimpl.h>
2949bd79ccSDebojyoti Ghosh 
3049bd79ccSDebojyoti Ghosh /*@C
3149bd79ccSDebojyoti Ghosh    MatKAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the KAIJ matrix
3249bd79ccSDebojyoti Ghosh 
3349bd79ccSDebojyoti Ghosh    Not Collective, but if the KAIJ matrix is parallel, the AIJ matrix is also parallel
3449bd79ccSDebojyoti Ghosh 
3549bd79ccSDebojyoti Ghosh    Input Parameter:
3649bd79ccSDebojyoti Ghosh .  A - the KAIJ matrix
3749bd79ccSDebojyoti Ghosh 
3849bd79ccSDebojyoti Ghosh    Output Parameter:
3949bd79ccSDebojyoti Ghosh .  B - the AIJ matrix
4049bd79ccSDebojyoti Ghosh 
4149bd79ccSDebojyoti Ghosh    Level: advanced
4249bd79ccSDebojyoti Ghosh 
4349bd79ccSDebojyoti Ghosh    Notes: The reference count on the AIJ matrix is not increased so you should not destroy it.
4449bd79ccSDebojyoti Ghosh 
4549bd79ccSDebojyoti Ghosh .seealso: MatCreateKAIJ()
4649bd79ccSDebojyoti Ghosh @*/
4749bd79ccSDebojyoti Ghosh PetscErrorCode  MatKAIJGetAIJ(Mat A,Mat *B)
4849bd79ccSDebojyoti Ghosh {
4949bd79ccSDebojyoti Ghosh   PetscErrorCode ierr;
5049bd79ccSDebojyoti Ghosh   PetscBool      ismpikaij,isseqkaij;
5149bd79ccSDebojyoti Ghosh 
5249bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
5349bd79ccSDebojyoti Ghosh   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIKAIJ,&ismpikaij);CHKERRQ(ierr);
5449bd79ccSDebojyoti Ghosh   ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQKAIJ,&isseqkaij);CHKERRQ(ierr);
5549bd79ccSDebojyoti Ghosh   if (ismpikaij) {
5649bd79ccSDebojyoti Ghosh     Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data;
5749bd79ccSDebojyoti Ghosh 
5849bd79ccSDebojyoti Ghosh     *B = b->A;
5949bd79ccSDebojyoti Ghosh   } else if (isseqkaij) {
6049bd79ccSDebojyoti Ghosh     Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
6149bd79ccSDebojyoti Ghosh 
6249bd79ccSDebojyoti Ghosh     *B = b->AIJ;
63b04351cbSRichard Tran Mills   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix passed in is not of type KAIJ");
6449bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
6549bd79ccSDebojyoti Ghosh }
6649bd79ccSDebojyoti Ghosh 
6749bd79ccSDebojyoti Ghosh /*@C
6849bd79ccSDebojyoti Ghosh    MatKAIJGetS - Get the S matrix describing the shift action of the KAIJ matrix
6949bd79ccSDebojyoti Ghosh 
700567c835SRichard Tran Mills    Not Collective; the entire S is stored and returned independently on all processes.
7149bd79ccSDebojyoti Ghosh 
7249bd79ccSDebojyoti Ghosh    Input Parameter:
7349bd79ccSDebojyoti Ghosh .  A - the KAIJ matrix
7449bd79ccSDebojyoti Ghosh 
75*a5b5c723SRichard Tran Mills    Output Parameters:
76*a5b5c723SRichard Tran Mills +  m - the number of rows in S
77*a5b5c723SRichard Tran Mills .  n - the number of columns in S
78*a5b5c723SRichard Tran Mills -  S - the S matrix, in form of a scalar array in column-major format
7949bd79ccSDebojyoti Ghosh 
80*a5b5c723SRichard Tran Mills    Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)
8131a97b9aSRichard Tran Mills 
8249bd79ccSDebojyoti Ghosh    Level: advanced
8349bd79ccSDebojyoti Ghosh 
8431a97b9aSRichard Tran Mills .seealso: MatCreateKAIJ(), MatGetBlockSizes()
8549bd79ccSDebojyoti Ghosh @*/
86*a5b5c723SRichard Tran Mills PetscErrorCode MatKAIJGetS(Mat A,PetscInt *m,PetscInt *n,PetscScalar **S)
8749bd79ccSDebojyoti Ghosh {
8849bd79ccSDebojyoti Ghosh   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
8949bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
90*a5b5c723SRichard Tran Mills   if (m) *m = b->p;
91*a5b5c723SRichard Tran Mills   if (n) *n = b->q;
92*a5b5c723SRichard Tran Mills   if (S) *S = b->S;
93*a5b5c723SRichard Tran Mills   PetscFunctionReturn(0);
94*a5b5c723SRichard Tran Mills }
95*a5b5c723SRichard Tran Mills 
96*a5b5c723SRichard Tran Mills /*@C
97*a5b5c723SRichard Tran Mills    MatKAIJGetSRead - Get a read-only pointer to the S matrix describing the shift action of the KAIJ matrix
98*a5b5c723SRichard Tran Mills 
99*a5b5c723SRichard Tran Mills    Not Collective; the entire S is stored and returned independently on all processes.
100*a5b5c723SRichard Tran Mills 
101*a5b5c723SRichard Tran Mills    Input Parameter:
102*a5b5c723SRichard Tran Mills .  A - the KAIJ matrix
103*a5b5c723SRichard Tran Mills 
104*a5b5c723SRichard Tran Mills    Output Parameters:
105*a5b5c723SRichard Tran Mills +  m - the number of rows in S
106*a5b5c723SRichard Tran Mills .  n - the number of columns in S
107*a5b5c723SRichard Tran Mills -  S - the S matrix, in form of a scalar array in column-major format
108*a5b5c723SRichard Tran Mills 
109*a5b5c723SRichard Tran Mills    Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)
110*a5b5c723SRichard Tran Mills 
111*a5b5c723SRichard Tran Mills    Level: advanced
112*a5b5c723SRichard Tran Mills 
113*a5b5c723SRichard Tran Mills .seealso: MatCreateKAIJ(), MatGetBlockSizes()
114*a5b5c723SRichard Tran Mills @*/
115*a5b5c723SRichard Tran Mills PetscErrorCode MatKAIJGetSRead(Mat A,PetscInt *m,PetscInt *n,const PetscScalar **S)
116*a5b5c723SRichard Tran Mills {
117*a5b5c723SRichard Tran Mills   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
118*a5b5c723SRichard Tran Mills   PetscFunctionBegin;
119*a5b5c723SRichard Tran Mills   if (m) *m = b->p;
120*a5b5c723SRichard Tran Mills   if (n) *n = b->q;
121*a5b5c723SRichard Tran Mills   if (S) *S = b->S;
122*a5b5c723SRichard Tran Mills   PetscFunctionReturn(0);
123*a5b5c723SRichard Tran Mills }
124*a5b5c723SRichard Tran Mills 
125*a5b5c723SRichard Tran Mills /*@C
126*a5b5c723SRichard Tran Mills   MatKAIJRestoreS - Restore array obtained with MatKAIJGetS()
127*a5b5c723SRichard Tran Mills 
128*a5b5c723SRichard Tran Mills   Not collective
129*a5b5c723SRichard Tran Mills 
130*a5b5c723SRichard Tran Mills   Input Parameter:
131*a5b5c723SRichard Tran Mills . A - the KAIJ matrix
132*a5b5c723SRichard Tran Mills 
133*a5b5c723SRichard Tran Mills   Output Parameter:
134*a5b5c723SRichard Tran Mills . S - location of pointer to array obtained with MatKAIJGetS()
135*a5b5c723SRichard Tran Mills 
136*a5b5c723SRichard Tran Mills   Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
137*a5b5c723SRichard Tran Mills   If NULL is passed, it will not attempt to zero the array pointer.
138*a5b5c723SRichard Tran Mills 
139*a5b5c723SRichard Tran Mills   Level: advanced
140*a5b5c723SRichard Tran Mills .seealso: MatKAIJGetS(), MatKAIJGetSRead(), MatKAIJRestoreSRead()
141*a5b5c723SRichard Tran Mills @*/
142*a5b5c723SRichard Tran Mills PetscErrorCode MatKAIJRestoreS(Mat A,PetscScalar **S)
143*a5b5c723SRichard Tran Mills {
144*a5b5c723SRichard Tran Mills   PetscFunctionBegin;
145*a5b5c723SRichard Tran Mills   if (S) *S = NULL;
146*a5b5c723SRichard Tran Mills   PetscObjectStateIncrease((PetscObject)A);
147*a5b5c723SRichard Tran Mills   PetscFunctionReturn(0);
148*a5b5c723SRichard Tran Mills }
149*a5b5c723SRichard Tran Mills 
150*a5b5c723SRichard Tran Mills /*@C
151*a5b5c723SRichard Tran Mills   MatKAIJRestoreSRead - Restore array obtained with MatKAIJGetSRead()
152*a5b5c723SRichard Tran Mills 
153*a5b5c723SRichard Tran Mills   Not collective
154*a5b5c723SRichard Tran Mills 
155*a5b5c723SRichard Tran Mills   Input Parameter:
156*a5b5c723SRichard Tran Mills . A - the KAIJ matrix
157*a5b5c723SRichard Tran Mills 
158*a5b5c723SRichard Tran Mills   Output Parameter:
159*a5b5c723SRichard Tran Mills . S - location of pointer to array obtained with MatKAIJGetS()
160*a5b5c723SRichard Tran Mills 
161*a5b5c723SRichard Tran Mills   Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
162*a5b5c723SRichard Tran Mills   If NULL is passed, it will not attempt to zero the array pointer.
163*a5b5c723SRichard Tran Mills 
164*a5b5c723SRichard Tran Mills   Level: advanced
165*a5b5c723SRichard Tran Mills .seealso: MatKAIJGetS(), MatKAIJGetSRead(), MatKAIJRestoreSRead()
166*a5b5c723SRichard Tran Mills @*/
167*a5b5c723SRichard Tran Mills PetscErrorCode MatKAIJRestoreSRead(Mat A,const PetscScalar **S)
168*a5b5c723SRichard Tran Mills {
169*a5b5c723SRichard Tran Mills   PetscFunctionBegin;
170*a5b5c723SRichard Tran Mills   if (S) *S = NULL;
17149bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
17249bd79ccSDebojyoti Ghosh }
17349bd79ccSDebojyoti Ghosh 
17449bd79ccSDebojyoti Ghosh /*@C
17531a97b9aSRichard Tran Mills    MatKAIJGetT - Get the transformation matrix T associated with the KAIJ matrix
17649bd79ccSDebojyoti Ghosh 
1770567c835SRichard Tran Mills    Not Collective; the entire T is stored and returned independently on all processes
17849bd79ccSDebojyoti Ghosh 
17949bd79ccSDebojyoti Ghosh    Input Parameter:
18049bd79ccSDebojyoti Ghosh .  A - the KAIJ matrix
18149bd79ccSDebojyoti Ghosh 
18249bd79ccSDebojyoti Ghosh    Output Parameter:
183*a5b5c723SRichard Tran Mills +  m - the number of rows in T
184*a5b5c723SRichard Tran Mills .  n - the number of columns in T
185*a5b5c723SRichard Tran Mills -  T - the T matrix, in form of a scalar array in column-major format
18649bd79ccSDebojyoti Ghosh 
187*a5b5c723SRichard Tran Mills    Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)
18831a97b9aSRichard Tran Mills 
18949bd79ccSDebojyoti Ghosh    Level: advanced
19049bd79ccSDebojyoti Ghosh 
19131a97b9aSRichard Tran Mills .seealso: MatCreateKAIJ(), MatGetBlockSizes()
19249bd79ccSDebojyoti Ghosh @*/
193*a5b5c723SRichard Tran Mills PetscErrorCode MatKAIJGetT(Mat A,PetscInt *m,PetscInt *n,PetscScalar **T)
19449bd79ccSDebojyoti Ghosh {
19549bd79ccSDebojyoti Ghosh   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
19649bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
197*a5b5c723SRichard Tran Mills   if (m) *m = b->p;
198*a5b5c723SRichard Tran Mills   if (n) *n = b->q;
199*a5b5c723SRichard Tran Mills   if (T) *T = b->T;
200*a5b5c723SRichard Tran Mills   PetscFunctionReturn(0);
201*a5b5c723SRichard Tran Mills }
202*a5b5c723SRichard Tran Mills 
203*a5b5c723SRichard Tran Mills /*@C
204*a5b5c723SRichard Tran Mills    MatKAIJGetTRead - Get a read-only pointer to the transformation matrix T associated with the KAIJ matrix
205*a5b5c723SRichard Tran Mills 
206*a5b5c723SRichard Tran Mills    Not Collective; the entire T is stored and returned independently on all processes
207*a5b5c723SRichard Tran Mills 
208*a5b5c723SRichard Tran Mills    Input Parameter:
209*a5b5c723SRichard Tran Mills .  A - the KAIJ matrix
210*a5b5c723SRichard Tran Mills 
211*a5b5c723SRichard Tran Mills    Output Parameter:
212*a5b5c723SRichard Tran Mills +  m - the number of rows in T
213*a5b5c723SRichard Tran Mills .  n - the number of columns in T
214*a5b5c723SRichard Tran Mills -  T - the T matrix, in form of a scalar array in column-major format
215*a5b5c723SRichard Tran Mills 
216*a5b5c723SRichard Tran Mills    Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)
217*a5b5c723SRichard Tran Mills 
218*a5b5c723SRichard Tran Mills    Level: advanced
219*a5b5c723SRichard Tran Mills 
220*a5b5c723SRichard Tran Mills .seealso: MatCreateKAIJ(), MatGetBlockSizes()
221*a5b5c723SRichard Tran Mills @*/
222*a5b5c723SRichard Tran Mills PetscErrorCode MatKAIJGetTRead(Mat A,PetscInt *m,PetscInt *n,const PetscScalar **T)
223*a5b5c723SRichard Tran Mills {
224*a5b5c723SRichard Tran Mills   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
225*a5b5c723SRichard Tran Mills   PetscFunctionBegin;
226*a5b5c723SRichard Tran Mills   if (m) *m = b->p;
227*a5b5c723SRichard Tran Mills   if (n) *n = b->q;
228*a5b5c723SRichard Tran Mills   if (T) *T = b->T;
229*a5b5c723SRichard Tran Mills   PetscFunctionReturn(0);
230*a5b5c723SRichard Tran Mills }
231*a5b5c723SRichard Tran Mills 
232*a5b5c723SRichard Tran Mills /*@C
233*a5b5c723SRichard Tran Mills   MatKAIJRestoreT - Restore array obtained with MatKAIJGetT()
234*a5b5c723SRichard Tran Mills 
235*a5b5c723SRichard Tran Mills   Not collective
236*a5b5c723SRichard Tran Mills 
237*a5b5c723SRichard Tran Mills   Input Parameter:
238*a5b5c723SRichard Tran Mills . A - the KAIJ matrix
239*a5b5c723SRichard Tran Mills 
240*a5b5c723SRichard Tran Mills   Output Parameter:
241*a5b5c723SRichard Tran Mills . T - location of pointer to array obtained with MatKAIJGetS()
242*a5b5c723SRichard Tran Mills 
243*a5b5c723SRichard Tran Mills   Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
244*a5b5c723SRichard Tran Mills   If NULL is passed, it will not attempt to zero the array pointer.
245*a5b5c723SRichard Tran Mills 
246*a5b5c723SRichard Tran Mills   Level: advanced
247*a5b5c723SRichard Tran Mills .seealso: MatKAIJGetT(), MatKAIJGetTRead(), MatKAIJRestoreTRead()
248*a5b5c723SRichard Tran Mills @*/
249*a5b5c723SRichard Tran Mills PetscErrorCode MatKAIJRestoreT(Mat A,PetscScalar **T)
250*a5b5c723SRichard Tran Mills {
251*a5b5c723SRichard Tran Mills   PetscFunctionBegin;
252*a5b5c723SRichard Tran Mills   if (T) *T = NULL;
253*a5b5c723SRichard Tran Mills   PetscObjectStateIncrease((PetscObject)A);
254*a5b5c723SRichard Tran Mills   PetscFunctionReturn(0);
255*a5b5c723SRichard Tran Mills }
256*a5b5c723SRichard Tran Mills 
257*a5b5c723SRichard Tran Mills /*@C
258*a5b5c723SRichard Tran Mills   MatKAIJRestoreTRead - Restore array obtained with MatKAIJGetTRead()
259*a5b5c723SRichard Tran Mills 
260*a5b5c723SRichard Tran Mills   Not collective
261*a5b5c723SRichard Tran Mills 
262*a5b5c723SRichard Tran Mills   Input Parameter:
263*a5b5c723SRichard Tran Mills . A - the KAIJ matrix
264*a5b5c723SRichard Tran Mills 
265*a5b5c723SRichard Tran Mills   Output Parameter:
266*a5b5c723SRichard Tran Mills . T - location of pointer to array obtained with MatKAIJGetS()
267*a5b5c723SRichard Tran Mills 
268*a5b5c723SRichard Tran Mills   Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
269*a5b5c723SRichard Tran Mills   If NULL is passed, it will not attempt to zero the array pointer.
270*a5b5c723SRichard Tran Mills 
271*a5b5c723SRichard Tran Mills   Level: advanced
272*a5b5c723SRichard Tran Mills .seealso: MatKAIJGetT(), MatKAIJGetTRead(), MatKAIJRestoreTRead()
273*a5b5c723SRichard Tran Mills @*/
274*a5b5c723SRichard Tran Mills PetscErrorCode MatKAIJRestoreTRead(Mat A,const PetscScalar **T)
275*a5b5c723SRichard Tran Mills {
276*a5b5c723SRichard Tran Mills   PetscFunctionBegin;
277*a5b5c723SRichard Tran Mills   if (T) *T = NULL;
27849bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
27949bd79ccSDebojyoti Ghosh }
28049bd79ccSDebojyoti Ghosh 
2810567c835SRichard Tran Mills /*@
2820567c835SRichard Tran Mills    MatKAIJSetAIJ - Set the AIJ matrix describing the blockwise action of the KAIJ matrix
2830567c835SRichard Tran Mills 
2840567c835SRichard Tran Mills    Logically Collective; if the AIJ matrix is parallel, the KAIJ matrix is also parallel
2850567c835SRichard Tran Mills 
2860567c835SRichard Tran Mills    Input Parameters:
2870567c835SRichard Tran Mills +  A - the KAIJ matrix
2880567c835SRichard Tran Mills -  B - the AIJ matrix
2890567c835SRichard Tran Mills 
2900567c835SRichard Tran Mills    Level: advanced
2910567c835SRichard Tran Mills 
2920567c835SRichard Tran Mills .seealso: MatKAIJGetAIJ(), MatKAIJSetS(), MatKAIJSetT()
2930567c835SRichard Tran Mills @*/
2940567c835SRichard Tran Mills PetscErrorCode MatKAIJSetAIJ(Mat A,Mat B)
2950567c835SRichard Tran Mills {
2960567c835SRichard Tran Mills   PetscErrorCode ierr;
2970567c835SRichard Tran Mills   PetscMPIInt    size;
2980567c835SRichard Tran Mills 
2990567c835SRichard Tran Mills   PetscFunctionBegin;
3000567c835SRichard Tran Mills   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
3010567c835SRichard Tran Mills   if (size == 1) {
3020567c835SRichard Tran Mills     Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data;
3030567c835SRichard Tran Mills     a->AIJ = B;
3040567c835SRichard Tran Mills   } else {
3050567c835SRichard Tran Mills     Mat_MPIKAIJ *a = (Mat_MPIKAIJ*)A->data;
3060567c835SRichard Tran Mills     a->A = B;
3070567c835SRichard Tran Mills   }
3080567c835SRichard Tran Mills   PetscFunctionReturn(0);
3090567c835SRichard Tran Mills }
3100567c835SRichard Tran Mills 
3110567c835SRichard Tran Mills /*@C
3120567c835SRichard Tran Mills    MatKAIJSetS - Set the S matrix describing the shift action of the KAIJ matrix
3130567c835SRichard Tran Mills 
3140567c835SRichard Tran Mills    Logically Collective; the entire S is stored independently on all processes.
3150567c835SRichard Tran Mills 
3160567c835SRichard Tran Mills    Input Parameters:
3170567c835SRichard Tran Mills +  A - the KAIJ matrix
3180567c835SRichard Tran Mills .  p - the number of rows in S
3190567c835SRichard Tran Mills .  q - the number of columns in S
3200567c835SRichard Tran Mills -  S - the S matrix, in form of a scalar array in column-major format
3210567c835SRichard Tran Mills 
3220567c835SRichard Tran Mills    Notes: The dimensions p and q must match those of the transformation matrix T associated with the KAIJ matrix.
3230567c835SRichard Tran Mills 
3240567c835SRichard Tran Mills    Level: Advanced
3250567c835SRichard Tran Mills 
3260567c835SRichard Tran Mills .seealso: MatKAIJGetS(), MatKAIJSetT(), MatKAIJSetAIJ()
3270567c835SRichard Tran Mills @*/
3280567c835SRichard Tran Mills PetscErrorCode MatKAIJSetS(Mat A,PetscInt p,PetscInt q,const PetscScalar S[])
3290567c835SRichard Tran Mills {
3300567c835SRichard Tran Mills   PetscErrorCode ierr;
3310567c835SRichard Tran Mills   Mat_SeqKAIJ    *a = (Mat_SeqKAIJ*)A->data;
3320567c835SRichard Tran Mills 
3330567c835SRichard Tran Mills   PetscFunctionBegin;
3340567c835SRichard Tran Mills   ierr = PetscFree(a->S);CHKERRQ(ierr);
3350567c835SRichard Tran Mills   if (S) {
336a84f8069SRichard Tran Mills     ierr = PetscMalloc1(p*q*sizeof(PetscScalar),&a->S);CHKERRQ(ierr);
3370567c835SRichard Tran Mills     ierr = PetscMemcpy(a->S,S,p*q*sizeof(PetscScalar));CHKERRQ(ierr);
3380567c835SRichard Tran Mills   } else  a->S = NULL;
3390567c835SRichard Tran Mills 
3400567c835SRichard Tran Mills   a->p = p;
3410567c835SRichard Tran Mills   a->q = q;
3420567c835SRichard Tran Mills   PetscFunctionReturn(0);
3430567c835SRichard Tran Mills }
3440567c835SRichard Tran Mills 
3450567c835SRichard Tran Mills /*@C
3460567c835SRichard Tran Mills    MatKAIJSetT - Set the transformation matrix T associated with the KAIJ matrix
3470567c835SRichard Tran Mills 
3480567c835SRichard Tran Mills    Logically Collective; the entire T is stored independently on all processes.
3490567c835SRichard Tran Mills 
3500567c835SRichard Tran Mills    Input Parameters:
3510567c835SRichard Tran Mills +  A - the KAIJ matrix
3520567c835SRichard Tran Mills .  p - the number of rows in S
3530567c835SRichard Tran Mills .  q - the number of columns in S
3540567c835SRichard Tran Mills -  T - the T matrix, in form of a scalar array in column-major format
3550567c835SRichard Tran Mills 
3560567c835SRichard Tran Mills    Notes: The dimensions p and q must match those of the shift matrix S associated with the KAIJ matrix.
3570567c835SRichard Tran Mills 
3580567c835SRichard Tran Mills    Level: Advanced
3590567c835SRichard Tran Mills 
3600567c835SRichard Tran Mills .seealso: MatKAIJGetT(), MatKAIJSetS(), MatKAIJSetAIJ()
3610567c835SRichard Tran Mills @*/
3620567c835SRichard Tran Mills PetscErrorCode MatKAIJSetT(Mat A,PetscInt p,PetscInt q,const PetscScalar T[])
3630567c835SRichard Tran Mills {
3640567c835SRichard Tran Mills   PetscErrorCode ierr;
3650567c835SRichard Tran Mills   PetscInt       i,j;
3660567c835SRichard Tran Mills   Mat_SeqKAIJ    *a = (Mat_SeqKAIJ*)A->data;
3670567c835SRichard Tran Mills   PetscBool      isTI = PETSC_FALSE;
3680567c835SRichard Tran Mills 
3690567c835SRichard Tran Mills   PetscFunctionBegin;
3700567c835SRichard Tran Mills   /* check if T is an identity matrix */
3710567c835SRichard Tran Mills   if (T && (p == q)) {
3720567c835SRichard Tran Mills     isTI = PETSC_TRUE;
3730567c835SRichard Tran Mills     for (i=0; i<p; i++) {
3740567c835SRichard Tran Mills       for (j=0; j<q; j++) {
3750567c835SRichard Tran Mills         if (i == j) {
3760567c835SRichard Tran Mills           /* diagonal term must be 1 */
3770567c835SRichard Tran Mills           if (T[i+j*p] != 1.0) isTI = PETSC_FALSE;
3780567c835SRichard Tran Mills         } else {
3790567c835SRichard Tran Mills           /* off-diagonal term must be 0 */
3800567c835SRichard Tran Mills           if (T[i+j*p] != 0.0) isTI = PETSC_FALSE;
3810567c835SRichard Tran Mills         }
3820567c835SRichard Tran Mills       }
3830567c835SRichard Tran Mills     }
3840567c835SRichard Tran Mills   }
3850567c835SRichard Tran Mills   a->isTI = isTI;
3860567c835SRichard Tran Mills 
3870567c835SRichard Tran Mills   ierr = PetscFree(a->T);CHKERRQ(ierr);
3880567c835SRichard Tran Mills   if (T && (!isTI)) {
389a84f8069SRichard Tran Mills     ierr = PetscMalloc1(p*q*sizeof(PetscScalar),&a->T);CHKERRQ(ierr);
3900567c835SRichard Tran Mills     ierr = PetscMemcpy(a->T,T,p*q*sizeof(PetscScalar));CHKERRQ(ierr);
39150d19d74SRichard Tran Mills   } else a->T = NULL;
3920567c835SRichard Tran Mills 
3930567c835SRichard Tran Mills   a->p = p;
3940567c835SRichard Tran Mills   a->q = q;
3950567c835SRichard Tran Mills   PetscFunctionReturn(0);
3960567c835SRichard Tran Mills }
3970567c835SRichard Tran Mills 
39849bd79ccSDebojyoti Ghosh PetscErrorCode MatDestroy_SeqKAIJ(Mat A)
39949bd79ccSDebojyoti Ghosh {
40049bd79ccSDebojyoti Ghosh   PetscErrorCode ierr;
40149bd79ccSDebojyoti Ghosh   Mat_SeqKAIJ    *b = (Mat_SeqKAIJ*)A->data;
40249bd79ccSDebojyoti Ghosh 
40349bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
40449bd79ccSDebojyoti Ghosh   ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr);
405a84f8069SRichard Tran Mills   ierr = PetscFree(b->S);CHKERRQ(ierr);
406a84f8069SRichard Tran Mills   ierr = PetscFree(b->T);CHKERRQ(ierr);
407a84f8069SRichard Tran Mills   ierr = PetscFree(b->ibdiag);CHKERRQ(ierr);
40849bd79ccSDebojyoti Ghosh   ierr = PetscFree5(b->sor.w,b->sor.y,b->sor.work,b->sor.t,b->sor.arr);CHKERRQ(ierr);
40949bd79ccSDebojyoti Ghosh   ierr = PetscFree(A->data);CHKERRQ(ierr);
41049bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
41149bd79ccSDebojyoti Ghosh }
41249bd79ccSDebojyoti Ghosh 
41349bd79ccSDebojyoti Ghosh PetscErrorCode MatSetUp_KAIJ(Mat A)
41449bd79ccSDebojyoti Ghosh {
4150567c835SRichard Tran Mills   PetscErrorCode ierr;
4160567c835SRichard Tran Mills   PetscInt       n;
4170567c835SRichard Tran Mills   PetscMPIInt    size;
4180567c835SRichard Tran Mills   Mat_SeqKAIJ    *seqkaij = (Mat_SeqKAIJ*)A->data;
4190567c835SRichard Tran Mills 
42049bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
4210567c835SRichard Tran Mills   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
4220567c835SRichard Tran Mills   if (size == 1) {
4230567c835SRichard Tran Mills     ierr = MatSetSizes(A,seqkaij->p*seqkaij->AIJ->rmap->n,seqkaij->q*seqkaij->AIJ->cmap->n,seqkaij->p*seqkaij->AIJ->rmap->N,seqkaij->q*seqkaij->AIJ->cmap->N);CHKERRQ(ierr);
4240567c835SRichard Tran Mills     ierr = PetscLayoutSetBlockSize(A->rmap,seqkaij->p);CHKERRQ(ierr);
4250567c835SRichard Tran Mills     ierr = PetscLayoutSetBlockSize(A->cmap,seqkaij->q);CHKERRQ(ierr);
4260567c835SRichard Tran Mills     ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
4270567c835SRichard Tran Mills     ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
4280567c835SRichard Tran Mills     ierr = PetscObjectReference((PetscObject)seqkaij->AIJ);CHKERRQ(ierr);
4290567c835SRichard Tran Mills   } else {
4300567c835SRichard Tran Mills     Mat_MPIKAIJ *a;
4310567c835SRichard Tran Mills     Mat_MPIAIJ  *mpiaij;
4320567c835SRichard Tran Mills     IS          from,to;
4330567c835SRichard Tran Mills     Vec         gvec;
4340567c835SRichard Tran Mills     PetscScalar *T;
4350567c835SRichard Tran Mills     PetscInt    i,j;
4360567c835SRichard Tran Mills 
4370567c835SRichard Tran Mills     a = (Mat_MPIKAIJ*)A->data;
438d3f912faSRichard Tran Mills     mpiaij = (Mat_MPIAIJ*)a->A->data;
4390567c835SRichard Tran Mills     ierr = MatSetSizes(A,a->p*a->A->rmap->n,a->q*a->A->cmap->n,a->p*a->A->rmap->N,a->q*a->A->cmap->N);CHKERRQ(ierr);
4400567c835SRichard Tran Mills     ierr = PetscLayoutSetBlockSize(A->rmap,seqkaij->p);CHKERRQ(ierr);
4410567c835SRichard Tran Mills     ierr = PetscLayoutSetBlockSize(A->cmap,seqkaij->q);CHKERRQ(ierr);
4420567c835SRichard Tran Mills     ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
4430567c835SRichard Tran Mills     ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
4440567c835SRichard Tran Mills     ierr = PetscObjectReference((PetscObject)a->A);CHKERRQ(ierr);
4450567c835SRichard Tran Mills 
4460567c835SRichard Tran Mills     if (a->isTI) {
4470567c835SRichard Tran Mills       /* If the transformation matrix associated with the parallel matrix A is the identity matrix, then a->T will be NULL.
4480567c835SRichard Tran Mills        * In this case, if we pass a->T directly to the MatCreateKAIJ() calls to create the sequential submatrices, the routine will
4490567c835SRichard Tran Mills        * not be able to tell that transformation matrix should be set to the identity; thus we create a temporary identity matrix
4500567c835SRichard Tran Mills        * to pass in. */
451a84f8069SRichard Tran Mills       ierr = PetscMalloc1(a->p*a->q*sizeof(PetscScalar),&T);CHKERRQ(ierr);
4520567c835SRichard Tran Mills       for (i=0; i<a->p; i++) {
4530567c835SRichard Tran Mills         for (j=0; j<a->q; j++) {
4540567c835SRichard Tran Mills           if (i==j) T[i+j*a->p] = 1.0;
4550567c835SRichard Tran Mills           else      T[i+j*a->p] = 0.0;
4560567c835SRichard Tran Mills         }
4570567c835SRichard Tran Mills       }
4580567c835SRichard Tran Mills     } else T = a->T;
4590567c835SRichard Tran Mills     ierr = MatCreateKAIJ(mpiaij->A,a->p,a->q,a->S,T,&a->AIJ);CHKERRQ(ierr);
4600567c835SRichard Tran Mills     ierr = MatCreateKAIJ(mpiaij->B,a->p,a->q,NULL,T,&a->OAIJ);CHKERRQ(ierr);
4610567c835SRichard Tran Mills     ierr = PetscFree(T);CHKERRQ(ierr);
4620567c835SRichard Tran Mills 
4630567c835SRichard Tran Mills     ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr);
4640567c835SRichard Tran Mills     ierr = VecCreate(PETSC_COMM_SELF,&a->w);CHKERRQ(ierr);
4650567c835SRichard Tran Mills     ierr = VecSetSizes(a->w,n*a->q,n*a->q);CHKERRQ(ierr);
4660567c835SRichard Tran Mills     ierr = VecSetBlockSize(a->w,a->q);CHKERRQ(ierr);
4670567c835SRichard Tran Mills     ierr = VecSetType(a->w,VECSEQ);CHKERRQ(ierr);
4680567c835SRichard Tran Mills 
4690567c835SRichard Tran Mills     /* create two temporary Index sets for build scatter gather */
4700567c835SRichard Tran Mills     ierr = ISCreateBlock(PetscObjectComm((PetscObject)a->A),a->q,n,mpiaij->garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr);
4710567c835SRichard Tran Mills     ierr = ISCreateStride(PETSC_COMM_SELF,n*a->q,0,1,&to);CHKERRQ(ierr);
4720567c835SRichard Tran Mills 
4730567c835SRichard Tran Mills     /* create temporary global vector to generate scatter context */
4740567c835SRichard Tran Mills     ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)a->A),a->q,a->q*a->A->cmap->n,a->q*a->A->cmap->N,NULL,&gvec);CHKERRQ(ierr);
4750567c835SRichard Tran Mills 
4760567c835SRichard Tran Mills     /* generate the scatter context */
4774589b4e5SRichard Tran Mills     ierr = VecScatterCreate(gvec,from,a->w,to,&a->ctx);CHKERRQ(ierr);
4780567c835SRichard Tran Mills 
4790567c835SRichard Tran Mills     ierr = ISDestroy(&from);CHKERRQ(ierr);
4800567c835SRichard Tran Mills     ierr = ISDestroy(&to);CHKERRQ(ierr);
4810567c835SRichard Tran Mills     ierr = VecDestroy(&gvec);CHKERRQ(ierr);
4820567c835SRichard Tran Mills   }
4830567c835SRichard Tran Mills 
4840567c835SRichard Tran Mills   A->assembled = PETSC_TRUE;
48549bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
48649bd79ccSDebojyoti Ghosh }
48749bd79ccSDebojyoti Ghosh 
48849bd79ccSDebojyoti Ghosh PetscErrorCode MatView_SeqKAIJ(Mat A,PetscViewer viewer)
48949bd79ccSDebojyoti Ghosh {
49049bd79ccSDebojyoti Ghosh   PetscErrorCode ierr;
49149bd79ccSDebojyoti Ghosh   Mat            B;
49249bd79ccSDebojyoti Ghosh 
49349bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
49449bd79ccSDebojyoti Ghosh   ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
49549bd79ccSDebojyoti Ghosh   ierr = MatView(B,viewer);CHKERRQ(ierr);
49649bd79ccSDebojyoti Ghosh   ierr = MatDestroy(&B);CHKERRQ(ierr);
49749bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
49849bd79ccSDebojyoti Ghosh }
49949bd79ccSDebojyoti Ghosh 
50049bd79ccSDebojyoti Ghosh PetscErrorCode MatView_MPIKAIJ(Mat A,PetscViewer viewer)
50149bd79ccSDebojyoti Ghosh {
50249bd79ccSDebojyoti Ghosh   PetscErrorCode ierr;
50349bd79ccSDebojyoti Ghosh   Mat            B;
50449bd79ccSDebojyoti Ghosh 
50549bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
50649bd79ccSDebojyoti Ghosh   ierr = MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
50749bd79ccSDebojyoti Ghosh   ierr = MatView(B,viewer);CHKERRQ(ierr);
50849bd79ccSDebojyoti Ghosh   ierr = MatDestroy(&B);CHKERRQ(ierr);
50949bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
51049bd79ccSDebojyoti Ghosh }
51149bd79ccSDebojyoti Ghosh 
51249bd79ccSDebojyoti Ghosh PetscErrorCode MatDestroy_MPIKAIJ(Mat A)
51349bd79ccSDebojyoti Ghosh {
51449bd79ccSDebojyoti Ghosh   PetscErrorCode ierr;
51549bd79ccSDebojyoti Ghosh   Mat_MPIKAIJ    *b = (Mat_MPIKAIJ*)A->data;
51649bd79ccSDebojyoti Ghosh 
51749bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
51849bd79ccSDebojyoti Ghosh   ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr);
51949bd79ccSDebojyoti Ghosh   ierr = MatDestroy(&b->OAIJ);CHKERRQ(ierr);
52049bd79ccSDebojyoti Ghosh   ierr = MatDestroy(&b->A);CHKERRQ(ierr);
52149bd79ccSDebojyoti Ghosh   ierr = VecScatterDestroy(&b->ctx);CHKERRQ(ierr);
52249bd79ccSDebojyoti Ghosh   ierr = VecDestroy(&b->w);CHKERRQ(ierr);
523a84f8069SRichard Tran Mills   ierr = PetscFree(b->S);CHKERRQ(ierr);
524a84f8069SRichard Tran Mills   ierr = PetscFree(b->T);CHKERRQ(ierr);
525a84f8069SRichard Tran Mills   ierr = PetscFree(b->ibdiag);CHKERRQ(ierr);
52649bd79ccSDebojyoti Ghosh   ierr = PetscFree(A->data);CHKERRQ(ierr);
52749bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
52849bd79ccSDebojyoti Ghosh }
52949bd79ccSDebojyoti Ghosh 
53049bd79ccSDebojyoti Ghosh /* --------------------------------------------------------------------------------------*/
53149bd79ccSDebojyoti Ghosh 
53249bd79ccSDebojyoti Ghosh /* zz = yy + Axx */
53349bd79ccSDebojyoti Ghosh PetscErrorCode KAIJMultAdd_Seq(Mat A,Vec xx,Vec yy,Vec zz)
53449bd79ccSDebojyoti Ghosh {
53549bd79ccSDebojyoti Ghosh   Mat_SeqKAIJ       *b = (Mat_SeqKAIJ*)A->data;
53649bd79ccSDebojyoti Ghosh   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
53749bd79ccSDebojyoti Ghosh   const PetscScalar *s = b->S, *t = b->T;
53849bd79ccSDebojyoti Ghosh   const PetscScalar *x,*v,*bx;
53949bd79ccSDebojyoti Ghosh   PetscScalar       *y,*sums;
54049bd79ccSDebojyoti Ghosh   PetscErrorCode    ierr;
54149bd79ccSDebojyoti Ghosh   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
54249bd79ccSDebojyoti Ghosh   PetscInt          n,i,jrow,j,l,p=b->p,q=b->q,k;
54349bd79ccSDebojyoti Ghosh 
54449bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
54549bd79ccSDebojyoti Ghosh   if (!yy) {
54649bd79ccSDebojyoti Ghosh     ierr = VecSet(zz,0.0);CHKERRQ(ierr);
54749bd79ccSDebojyoti Ghosh   } else {
54849bd79ccSDebojyoti Ghosh     ierr = VecCopy(yy,zz);CHKERRQ(ierr);
54949bd79ccSDebojyoti Ghosh   }
55049bd79ccSDebojyoti Ghosh   if ((!s) && (!t) && (!b->isTI)) PetscFunctionReturn(0);
55149bd79ccSDebojyoti Ghosh 
55249bd79ccSDebojyoti Ghosh   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
55349bd79ccSDebojyoti Ghosh   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
55449bd79ccSDebojyoti Ghosh   idx  = a->j;
55549bd79ccSDebojyoti Ghosh   v    = a->a;
55649bd79ccSDebojyoti Ghosh   ii   = a->i;
55749bd79ccSDebojyoti Ghosh 
55849bd79ccSDebojyoti Ghosh   if (b->isTI) {
55949bd79ccSDebojyoti Ghosh     for (i=0; i<m; i++) {
56049bd79ccSDebojyoti Ghosh       jrow = ii[i];
56149bd79ccSDebojyoti Ghosh       n    = ii[i+1] - jrow;
56249bd79ccSDebojyoti Ghosh       sums = y + p*i;
56349bd79ccSDebojyoti Ghosh       for (j=0; j<n; j++) {
56449bd79ccSDebojyoti Ghosh         for (k=0; k<p; k++) {
56549bd79ccSDebojyoti Ghosh           sums[k] += v[jrow+j]*x[q*idx[jrow+j]+k];
56649bd79ccSDebojyoti Ghosh         }
56749bd79ccSDebojyoti Ghosh       }
56849bd79ccSDebojyoti Ghosh     }
56949bd79ccSDebojyoti Ghosh   } else if (t) {
57049bd79ccSDebojyoti Ghosh     for (i=0; i<m; i++) {
57149bd79ccSDebojyoti Ghosh       jrow = ii[i];
57249bd79ccSDebojyoti Ghosh       n    = ii[i+1] - jrow;
57349bd79ccSDebojyoti Ghosh       sums = y + p*i;
57449bd79ccSDebojyoti Ghosh       bx   = x + q*i;
57549bd79ccSDebojyoti Ghosh       for (j=0; j<n; j++) {
57649bd79ccSDebojyoti Ghosh         for (k=0; k<p; k++) {
57749bd79ccSDebojyoti Ghosh           for (l=0; l<q; l++) {
57849bd79ccSDebojyoti Ghosh             sums[k] += v[jrow+j]*t[k+l*p]*x[q*idx[jrow+j]+l];
57949bd79ccSDebojyoti Ghosh           }
58049bd79ccSDebojyoti Ghosh         }
58149bd79ccSDebojyoti Ghosh       }
58249bd79ccSDebojyoti Ghosh     }
58349bd79ccSDebojyoti Ghosh   }
58449bd79ccSDebojyoti Ghosh   if (s) {
58549bd79ccSDebojyoti Ghosh     for (i=0; i<m; i++) {
58649bd79ccSDebojyoti Ghosh       jrow = ii[i];
58749bd79ccSDebojyoti Ghosh       n    = ii[i+1] - jrow;
58849bd79ccSDebojyoti Ghosh       sums = y + p*i;
58949bd79ccSDebojyoti Ghosh       bx   = x + q*i;
59049bd79ccSDebojyoti Ghosh       if (i < b->AIJ->cmap->n) {
59149bd79ccSDebojyoti Ghosh         for (j=0; j<q; j++) {
59249bd79ccSDebojyoti Ghosh           for (k=0; k<p; k++) {
59349bd79ccSDebojyoti Ghosh             sums[k] += s[k+j*p]*bx[j];
59449bd79ccSDebojyoti Ghosh           }
59549bd79ccSDebojyoti Ghosh         }
59649bd79ccSDebojyoti Ghosh       }
59749bd79ccSDebojyoti Ghosh     }
59849bd79ccSDebojyoti Ghosh   }
59949bd79ccSDebojyoti Ghosh 
60049bd79ccSDebojyoti Ghosh   ierr = PetscLogFlops((2.0*p*q-p)*m+2*p*a->nz);CHKERRQ(ierr);
60149bd79ccSDebojyoti Ghosh   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
60249bd79ccSDebojyoti Ghosh   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
60349bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
60449bd79ccSDebojyoti Ghosh }
60549bd79ccSDebojyoti Ghosh 
60649bd79ccSDebojyoti Ghosh PetscErrorCode MatMult_SeqKAIJ_N(Mat A,Vec xx,Vec yy)
60749bd79ccSDebojyoti Ghosh {
60849bd79ccSDebojyoti Ghosh   PetscErrorCode ierr;
60949bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
61049bd79ccSDebojyoti Ghosh   ierr = KAIJMultAdd_Seq(A,xx,PETSC_NULL,yy);CHKERRQ(ierr);
61149bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
61249bd79ccSDebojyoti Ghosh }
61349bd79ccSDebojyoti Ghosh 
61449bd79ccSDebojyoti Ghosh PetscErrorCode MatMultAdd_SeqKAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
61549bd79ccSDebojyoti Ghosh {
61649bd79ccSDebojyoti Ghosh   PetscErrorCode ierr;
61749bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
61849bd79ccSDebojyoti Ghosh   ierr = KAIJMultAdd_Seq(A,xx,yy,zz);CHKERRQ(ierr);
61949bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
62049bd79ccSDebojyoti Ghosh }
62149bd79ccSDebojyoti Ghosh 
62249bd79ccSDebojyoti Ghosh #include <petsc/private/kernels/blockinvert.h>
62349bd79ccSDebojyoti Ghosh 
62449bd79ccSDebojyoti Ghosh PetscErrorCode MatInvertBlockDiagonal_SeqKAIJ_N(Mat A,const PetscScalar **values)
62549bd79ccSDebojyoti Ghosh {
62649bd79ccSDebojyoti Ghosh   Mat_SeqKAIJ       *b  = (Mat_SeqKAIJ*)A->data;
62749bd79ccSDebojyoti Ghosh   Mat_SeqAIJ        *a  = (Mat_SeqAIJ*)b->AIJ->data;
62849bd79ccSDebojyoti Ghosh   const PetscScalar *S  = b->S;
62949bd79ccSDebojyoti Ghosh   const PetscScalar *T  = b->T;
63049bd79ccSDebojyoti Ghosh   const PetscScalar *v  = a->a;
63149bd79ccSDebojyoti Ghosh   const PetscInt     p  = b->p, q = b->q, m = b->AIJ->rmap->n, *idx = a->j, *ii = a->i;
63249bd79ccSDebojyoti Ghosh   PetscErrorCode    ierr;
63349bd79ccSDebojyoti Ghosh   PetscInt          i,j,*v_pivots,dof,dof2;
63449bd79ccSDebojyoti Ghosh   PetscScalar       *diag,aval,*v_work;
63549bd79ccSDebojyoti Ghosh 
63649bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
63749bd79ccSDebojyoti Ghosh   if (p != q) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Block size must be square to calculate inverse.");
63831a97b9aSRichard Tran Mills   if ((!S) && (!T) && (!b->isTI)) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Cannot invert a zero matrix.");
63949bd79ccSDebojyoti Ghosh 
64049bd79ccSDebojyoti Ghosh   dof  = p;
64149bd79ccSDebojyoti Ghosh   dof2 = dof*dof;
64249bd79ccSDebojyoti Ghosh 
64349bd79ccSDebojyoti Ghosh   if (b->ibdiagvalid) {
64449bd79ccSDebojyoti Ghosh     if (values) *values = b->ibdiag;
64549bd79ccSDebojyoti Ghosh     PetscFunctionReturn(0);
64649bd79ccSDebojyoti Ghosh   }
64749bd79ccSDebojyoti Ghosh   if (!b->ibdiag) {
648a84f8069SRichard Tran Mills     ierr = PetscMalloc1(dof2*m*sizeof(PetscScalar),&b->ibdiag);CHKERRQ(ierr);
64949bd79ccSDebojyoti Ghosh     ierr = PetscLogObjectMemory((PetscObject)A,dof2*m*sizeof(PetscScalar));CHKERRQ(ierr);
65049bd79ccSDebojyoti Ghosh   }
65149bd79ccSDebojyoti Ghosh   if (values) *values = b->ibdiag;
65249bd79ccSDebojyoti Ghosh   diag = b->ibdiag;
65349bd79ccSDebojyoti Ghosh 
65449bd79ccSDebojyoti Ghosh   ierr = PetscMalloc2(dof,&v_work,dof,&v_pivots);CHKERRQ(ierr);
65549bd79ccSDebojyoti Ghosh   for (i=0; i<m; i++) {
65649bd79ccSDebojyoti Ghosh     if (S) {
65749bd79ccSDebojyoti Ghosh       ierr = PetscMemcpy(diag,S,dof2*sizeof(PetscScalar));CHKERRQ(ierr);
65849bd79ccSDebojyoti Ghosh     } else {
65949bd79ccSDebojyoti Ghosh       ierr = PetscMemzero(diag,dof2*sizeof(PetscScalar));CHKERRQ(ierr);
66049bd79ccSDebojyoti Ghosh     }
66149bd79ccSDebojyoti Ghosh     if (b->isTI) {
66249bd79ccSDebojyoti Ghosh       aval = 0;
66349bd79ccSDebojyoti Ghosh       for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j];
66449bd79ccSDebojyoti Ghosh       for (j=0; j<dof; j++) diag[j+dof*j] += aval;
66549bd79ccSDebojyoti Ghosh     } else if (T) {
66649bd79ccSDebojyoti Ghosh       aval = 0;
66749bd79ccSDebojyoti Ghosh       for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j];
66849bd79ccSDebojyoti Ghosh       for (j=0; j<dof2; j++) diag[j] += aval*T[j];
66949bd79ccSDebojyoti Ghosh     }
67049bd79ccSDebojyoti Ghosh     ierr = PetscKernel_A_gets_inverse_A(dof,diag,v_pivots,v_work,PETSC_FALSE,NULL);CHKERRQ(ierr);
67149bd79ccSDebojyoti Ghosh     diag += dof2;
67249bd79ccSDebojyoti Ghosh   }
67349bd79ccSDebojyoti Ghosh   ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr);
67449bd79ccSDebojyoti Ghosh 
67549bd79ccSDebojyoti Ghosh   b->ibdiagvalid = PETSC_TRUE;
67649bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
67749bd79ccSDebojyoti Ghosh }
67849bd79ccSDebojyoti Ghosh 
67949bd79ccSDebojyoti Ghosh static PetscErrorCode MatGetDiagonalBlock_MPIKAIJ(Mat A,Mat *B)
68049bd79ccSDebojyoti Ghosh {
68149bd79ccSDebojyoti Ghosh   Mat_MPIKAIJ *kaij = (Mat_MPIKAIJ*) A->data;
68249bd79ccSDebojyoti Ghosh 
68349bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
68449bd79ccSDebojyoti Ghosh   *B = kaij->AIJ;
68549bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
68649bd79ccSDebojyoti Ghosh }
68749bd79ccSDebojyoti Ghosh 
68849bd79ccSDebojyoti Ghosh PetscErrorCode MatSOR_SeqKAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
68949bd79ccSDebojyoti Ghosh {
69049bd79ccSDebojyoti Ghosh   PetscErrorCode    ierr;
69149bd79ccSDebojyoti Ghosh   Mat_SeqKAIJ       *kaij = (Mat_SeqKAIJ*) A->data;
69249bd79ccSDebojyoti Ghosh   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)kaij->AIJ->data;
69349bd79ccSDebojyoti Ghosh   const PetscScalar *aa = a->a, *T = kaij->T, *v;
69449bd79ccSDebojyoti Ghosh   const PetscInt    m  = kaij->AIJ->rmap->n, *ai=a->i, *aj=a->j, p = kaij->p, q = kaij->q, *diag, *vi;
69549bd79ccSDebojyoti Ghosh   const PetscScalar *b, *xb, *idiag;
69649bd79ccSDebojyoti Ghosh   PetscScalar       *x, *work, *workt, *w, *y, *arr, *t, *arrt;
69749bd79ccSDebojyoti Ghosh   PetscInt          i, j, k, i2, bs, bs2, nz;
69849bd79ccSDebojyoti Ghosh 
69949bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
70049bd79ccSDebojyoti Ghosh   its = its*lits;
70149bd79ccSDebojyoti Ghosh   if (flag & SOR_EISENSTAT) SETERRQ (PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
70249bd79ccSDebojyoti Ghosh   if (its <= 0)             SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
70349bd79ccSDebojyoti Ghosh   if (fshift)               SETERRQ (PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
70431a97b9aSRichard Tran Mills   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
70549bd79ccSDebojyoti Ghosh   if (p != q) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSOR for KAIJ: Sorry, no support for non-square dense blocks");
70649bd79ccSDebojyoti Ghosh   else        {bs = p; bs2 = bs*bs; }
70749bd79ccSDebojyoti Ghosh 
70849bd79ccSDebojyoti Ghosh   if (!m) PetscFunctionReturn(0);
70949bd79ccSDebojyoti Ghosh 
71049bd79ccSDebojyoti Ghosh   if (!kaij->ibdiagvalid) { ierr = MatInvertBlockDiagonal_SeqKAIJ_N(A,NULL);CHKERRQ(ierr); }
71149bd79ccSDebojyoti Ghosh   idiag = kaij->ibdiag;
71249bd79ccSDebojyoti Ghosh   diag  = a->diag;
71349bd79ccSDebojyoti Ghosh 
71449bd79ccSDebojyoti Ghosh   if (!kaij->sor.setup) {
71549bd79ccSDebojyoti Ghosh     ierr = PetscMalloc5(bs,&kaij->sor.w,bs,&kaij->sor.y,m*bs,&kaij->sor.work,m*bs,&kaij->sor.t,m*bs2,&kaij->sor.arr);CHKERRQ(ierr);
71649bd79ccSDebojyoti Ghosh     kaij->sor.setup = PETSC_TRUE;
71749bd79ccSDebojyoti Ghosh   }
71849bd79ccSDebojyoti Ghosh   y     = kaij->sor.y;
71949bd79ccSDebojyoti Ghosh   w     = kaij->sor.w;
72049bd79ccSDebojyoti Ghosh   work  = kaij->sor.work;
72149bd79ccSDebojyoti Ghosh   t     = kaij->sor.t;
72249bd79ccSDebojyoti Ghosh   arr   = kaij->sor.arr;
72349bd79ccSDebojyoti Ghosh 
72449bd79ccSDebojyoti Ghosh   ierr = VecGetArray(xx,&x);    CHKERRQ(ierr);
72549bd79ccSDebojyoti Ghosh   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
72649bd79ccSDebojyoti Ghosh 
72749bd79ccSDebojyoti Ghosh   if (flag & SOR_ZERO_INITIAL_GUESS) {
72849bd79ccSDebojyoti Ghosh     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
72949bd79ccSDebojyoti Ghosh       PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);                            /* x[0:bs] <- D^{-1} b[0:bs] */
73049bd79ccSDebojyoti Ghosh       ierr   =  PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr);
73149bd79ccSDebojyoti Ghosh       i2     =  bs;
73249bd79ccSDebojyoti Ghosh       idiag  += bs2;
73349bd79ccSDebojyoti Ghosh       for (i=1; i<m; i++) {
73449bd79ccSDebojyoti Ghosh         v  = aa + ai[i];
73549bd79ccSDebojyoti Ghosh         vi = aj + ai[i];
73649bd79ccSDebojyoti Ghosh         nz = diag[i] - ai[i];
73749bd79ccSDebojyoti Ghosh 
73849bd79ccSDebojyoti Ghosh         if (T) {                /* b - T (Arow * x) */
73949bd79ccSDebojyoti Ghosh           for (k=0; k<bs; k++) w[k] = 0;
74049bd79ccSDebojyoti Ghosh           for (j=0; j<nz; j++) {
74149bd79ccSDebojyoti Ghosh             for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k];
74249bd79ccSDebojyoti Ghosh           }
74349bd79ccSDebojyoti Ghosh           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs,w,T,&t[i2]);
74449bd79ccSDebojyoti Ghosh           for (k=0; k<bs; k++) t[i2+k] += b[i2+k];
74549bd79ccSDebojyoti Ghosh         } else if (kaij->isTI) {
74649bd79ccSDebojyoti Ghosh           for (k=0; k<bs; k++) t[i2+k] = b[i2+k];
74749bd79ccSDebojyoti Ghosh           for (j=0; j<nz; j++) {
74849bd79ccSDebojyoti Ghosh             for (k=0; k<bs; k++) t[i2+k] -= v[j] * x[vi[j]*bs+k];
74949bd79ccSDebojyoti Ghosh           }
75049bd79ccSDebojyoti Ghosh         } else {
75149bd79ccSDebojyoti Ghosh           for (k=0; k<bs; k++) t[i2+k] = b[i2+k];
75249bd79ccSDebojyoti Ghosh         }
75349bd79ccSDebojyoti Ghosh 
75449bd79ccSDebojyoti Ghosh         PetscKernel_w_gets_Ar_times_v(bs,bs,t+i2,idiag,y);
75549bd79ccSDebojyoti Ghosh         for (j=0; j<bs; j++) x[i2+j] = omega * y[j];
75649bd79ccSDebojyoti Ghosh 
75749bd79ccSDebojyoti Ghosh         idiag += bs2;
75849bd79ccSDebojyoti Ghosh         i2    += bs;
75949bd79ccSDebojyoti Ghosh       }
76049bd79ccSDebojyoti Ghosh       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
76149bd79ccSDebojyoti Ghosh       ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr);
76249bd79ccSDebojyoti Ghosh       xb = t;
76349bd79ccSDebojyoti Ghosh     } else xb = b;
76449bd79ccSDebojyoti Ghosh     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
76549bd79ccSDebojyoti Ghosh       idiag = kaij->ibdiag+bs2*(m-1);
76649bd79ccSDebojyoti Ghosh       i2    = bs * (m-1);
76749bd79ccSDebojyoti Ghosh       ierr  = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
76849bd79ccSDebojyoti Ghosh       PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
76949bd79ccSDebojyoti Ghosh       i2    -= bs;
77049bd79ccSDebojyoti Ghosh       idiag -= bs2;
77149bd79ccSDebojyoti Ghosh       for (i=m-2; i>=0; i--) {
77249bd79ccSDebojyoti Ghosh         v  = aa + diag[i] + 1 ;
77349bd79ccSDebojyoti Ghosh         vi = aj + diag[i] + 1;
77449bd79ccSDebojyoti Ghosh         nz = ai[i+1] - diag[i] - 1;
77549bd79ccSDebojyoti Ghosh 
77649bd79ccSDebojyoti Ghosh         if (T) {                /* FIXME: This branch untested */
77749bd79ccSDebojyoti Ghosh           ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
77849bd79ccSDebojyoti Ghosh           /* copy all rows of x that are needed into contiguous space */
77949bd79ccSDebojyoti Ghosh           workt = work;
78049bd79ccSDebojyoti Ghosh           for (j=0; j<nz; j++) {
78149bd79ccSDebojyoti Ghosh             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
78249bd79ccSDebojyoti Ghosh             workt += bs;
78349bd79ccSDebojyoti Ghosh           }
78449bd79ccSDebojyoti Ghosh           arrt = arr;
78549bd79ccSDebojyoti Ghosh           for (j=0; j<nz; j++) {
78649bd79ccSDebojyoti Ghosh             ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
78749bd79ccSDebojyoti Ghosh             for (k=0; k<bs2; k++) arrt[k] *= v[j];
78849bd79ccSDebojyoti Ghosh             arrt += bs2;
78949bd79ccSDebojyoti Ghosh           }
79049bd79ccSDebojyoti Ghosh           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
79149bd79ccSDebojyoti Ghosh         } else if (kaij->isTI) {
79249bd79ccSDebojyoti Ghosh           for (k=0; k<bs; k++) w[k] = t[i2+k];
79349bd79ccSDebojyoti Ghosh           for (j=0; j<nz; j++) {
79449bd79ccSDebojyoti Ghosh             for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k];
79549bd79ccSDebojyoti Ghosh           }
79649bd79ccSDebojyoti Ghosh         }
79749bd79ccSDebojyoti Ghosh 
79849bd79ccSDebojyoti Ghosh         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); /* RHS incorrect for omega != 1.0 */
79949bd79ccSDebojyoti Ghosh         for (j=0; j<bs; j++) x[i2+j] = (1.0-omega) * x[i2+j] + omega * y[j];
80049bd79ccSDebojyoti Ghosh 
80149bd79ccSDebojyoti Ghosh         idiag -= bs2;
80249bd79ccSDebojyoti Ghosh         i2    -= bs;
80349bd79ccSDebojyoti Ghosh       }
80449bd79ccSDebojyoti Ghosh       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
80549bd79ccSDebojyoti Ghosh     }
80649bd79ccSDebojyoti Ghosh     its--;
80749bd79ccSDebojyoti Ghosh   }
80849bd79ccSDebojyoti Ghosh   while (its--) {               /* FIXME: This branch not updated */
80949bd79ccSDebojyoti Ghosh     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
81049bd79ccSDebojyoti Ghosh       i2     =  0;
81149bd79ccSDebojyoti Ghosh       idiag  = kaij->ibdiag;
81249bd79ccSDebojyoti Ghosh       for (i=0; i<m; i++) {
81349bd79ccSDebojyoti Ghosh         ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
81449bd79ccSDebojyoti Ghosh 
81549bd79ccSDebojyoti Ghosh         v  = aa + ai[i];
81649bd79ccSDebojyoti Ghosh         vi = aj + ai[i];
81749bd79ccSDebojyoti Ghosh         nz = diag[i] - ai[i];
81849bd79ccSDebojyoti Ghosh         workt = work;
81949bd79ccSDebojyoti Ghosh         for (j=0; j<nz; j++) {
82049bd79ccSDebojyoti Ghosh           ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
82149bd79ccSDebojyoti Ghosh           workt += bs;
82249bd79ccSDebojyoti Ghosh         }
82349bd79ccSDebojyoti Ghosh         arrt = arr;
82449bd79ccSDebojyoti Ghosh         if (T) {
82549bd79ccSDebojyoti Ghosh           for (j=0; j<nz; j++) {
82649bd79ccSDebojyoti Ghosh             ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
82749bd79ccSDebojyoti Ghosh             for (k=0; k<bs2; k++) arrt[k] *= v[j];
82849bd79ccSDebojyoti Ghosh             arrt += bs2;
82949bd79ccSDebojyoti Ghosh           }
83049bd79ccSDebojyoti Ghosh           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
83149bd79ccSDebojyoti Ghosh         } else if (kaij->isTI) {
83249bd79ccSDebojyoti Ghosh           for (j=0; j<nz; j++) {
83349bd79ccSDebojyoti Ghosh             ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
83449bd79ccSDebojyoti Ghosh             for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
83549bd79ccSDebojyoti Ghosh             arrt += bs2;
83649bd79ccSDebojyoti Ghosh           }
83749bd79ccSDebojyoti Ghosh           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
83849bd79ccSDebojyoti Ghosh         }
83949bd79ccSDebojyoti Ghosh         ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr);
84049bd79ccSDebojyoti Ghosh 
84149bd79ccSDebojyoti Ghosh         v  = aa + diag[i] + 1;
84249bd79ccSDebojyoti Ghosh         vi = aj + diag[i] + 1;
84349bd79ccSDebojyoti Ghosh         nz = ai[i+1] - diag[i] - 1;
84449bd79ccSDebojyoti Ghosh         workt = work;
84549bd79ccSDebojyoti Ghosh         for (j=0; j<nz; j++) {
84649bd79ccSDebojyoti Ghosh           ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
84749bd79ccSDebojyoti Ghosh           workt += bs;
84849bd79ccSDebojyoti Ghosh         }
84949bd79ccSDebojyoti Ghosh         arrt = arr;
85049bd79ccSDebojyoti Ghosh         if (T) {
85149bd79ccSDebojyoti Ghosh           for (j=0; j<nz; j++) {
85249bd79ccSDebojyoti Ghosh             ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
85349bd79ccSDebojyoti Ghosh             for (k=0; k<bs2; k++) arrt[k] *= v[j];
85449bd79ccSDebojyoti Ghosh             arrt += bs2;
85549bd79ccSDebojyoti Ghosh           }
85649bd79ccSDebojyoti Ghosh           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
85749bd79ccSDebojyoti Ghosh         } else if (kaij->isTI) {
85849bd79ccSDebojyoti Ghosh           for (j=0; j<nz; j++) {
85949bd79ccSDebojyoti Ghosh             ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
86049bd79ccSDebojyoti Ghosh             for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
86149bd79ccSDebojyoti Ghosh             arrt += bs2;
86249bd79ccSDebojyoti Ghosh           }
86349bd79ccSDebojyoti Ghosh           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
86449bd79ccSDebojyoti Ghosh         }
86549bd79ccSDebojyoti Ghosh 
86649bd79ccSDebojyoti Ghosh         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y);
86749bd79ccSDebojyoti Ghosh         for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j);
86849bd79ccSDebojyoti Ghosh 
86949bd79ccSDebojyoti Ghosh         idiag += bs2;
87049bd79ccSDebojyoti Ghosh         i2    += bs;
87149bd79ccSDebojyoti Ghosh       }
87249bd79ccSDebojyoti Ghosh       xb = t;
87349bd79ccSDebojyoti Ghosh     }
87449bd79ccSDebojyoti Ghosh     else xb = b;
87549bd79ccSDebojyoti Ghosh     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
87649bd79ccSDebojyoti Ghosh       idiag = kaij->ibdiag+bs2*(m-1);
87749bd79ccSDebojyoti Ghosh       i2    = bs * (m-1);
87849bd79ccSDebojyoti Ghosh       if (xb == b) {
87949bd79ccSDebojyoti Ghosh         for (i=m-1; i>=0; i--) {
88049bd79ccSDebojyoti Ghosh           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
88149bd79ccSDebojyoti Ghosh 
88249bd79ccSDebojyoti Ghosh           v  = aa + ai[i];
88349bd79ccSDebojyoti Ghosh           vi = aj + ai[i];
88449bd79ccSDebojyoti Ghosh           nz = diag[i] - ai[i];
88549bd79ccSDebojyoti Ghosh           workt = work;
88649bd79ccSDebojyoti Ghosh           for (j=0; j<nz; j++) {
88749bd79ccSDebojyoti Ghosh             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
88849bd79ccSDebojyoti Ghosh             workt += bs;
88949bd79ccSDebojyoti Ghosh           }
89049bd79ccSDebojyoti Ghosh           arrt = arr;
89149bd79ccSDebojyoti Ghosh           if (T) {
89249bd79ccSDebojyoti Ghosh             for (j=0; j<nz; j++) {
89349bd79ccSDebojyoti Ghosh               ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
89449bd79ccSDebojyoti Ghosh               for (k=0; k<bs2; k++) arrt[k] *= v[j];
89549bd79ccSDebojyoti Ghosh               arrt += bs2;
89649bd79ccSDebojyoti Ghosh             }
89749bd79ccSDebojyoti Ghosh             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
89849bd79ccSDebojyoti Ghosh           } else if (kaij->isTI) {
89949bd79ccSDebojyoti Ghosh             for (j=0; j<nz; j++) {
90049bd79ccSDebojyoti Ghosh               ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
90149bd79ccSDebojyoti Ghosh               for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
90249bd79ccSDebojyoti Ghosh               arrt += bs2;
90349bd79ccSDebojyoti Ghosh             }
90449bd79ccSDebojyoti Ghosh             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
90549bd79ccSDebojyoti Ghosh           }
90649bd79ccSDebojyoti Ghosh 
90749bd79ccSDebojyoti Ghosh           v  = aa + diag[i] + 1;
90849bd79ccSDebojyoti Ghosh           vi = aj + diag[i] + 1;
90949bd79ccSDebojyoti Ghosh           nz = ai[i+1] - diag[i] - 1;
91049bd79ccSDebojyoti Ghosh           workt = work;
91149bd79ccSDebojyoti Ghosh           for (j=0; j<nz; j++) {
91249bd79ccSDebojyoti Ghosh             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
91349bd79ccSDebojyoti Ghosh             workt += bs;
91449bd79ccSDebojyoti Ghosh           }
91549bd79ccSDebojyoti Ghosh           arrt = arr;
91649bd79ccSDebojyoti Ghosh           if (T) {
91749bd79ccSDebojyoti Ghosh             for (j=0; j<nz; j++) {
91849bd79ccSDebojyoti Ghosh               ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
91949bd79ccSDebojyoti Ghosh               for (k=0; k<bs2; k++) arrt[k] *= v[j];
92049bd79ccSDebojyoti Ghosh               arrt += bs2;
92149bd79ccSDebojyoti Ghosh             }
92249bd79ccSDebojyoti Ghosh             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
92349bd79ccSDebojyoti Ghosh           } else if (kaij->isTI) {
92449bd79ccSDebojyoti Ghosh             for (j=0; j<nz; j++) {
92549bd79ccSDebojyoti Ghosh               ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
92649bd79ccSDebojyoti Ghosh               for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
92749bd79ccSDebojyoti Ghosh               arrt += bs2;
92849bd79ccSDebojyoti Ghosh             }
92949bd79ccSDebojyoti Ghosh             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
93049bd79ccSDebojyoti Ghosh           }
93149bd79ccSDebojyoti Ghosh 
93249bd79ccSDebojyoti Ghosh           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y);
93349bd79ccSDebojyoti Ghosh           for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j);
93449bd79ccSDebojyoti Ghosh         }
93549bd79ccSDebojyoti Ghosh       } else {
93649bd79ccSDebojyoti Ghosh         for (i=m-1; i>=0; i--) {
93749bd79ccSDebojyoti Ghosh           ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
93849bd79ccSDebojyoti Ghosh           v  = aa + diag[i] + 1;
93949bd79ccSDebojyoti Ghosh           vi = aj + diag[i] + 1;
94049bd79ccSDebojyoti Ghosh           nz = ai[i+1] - diag[i] - 1;
94149bd79ccSDebojyoti Ghosh           workt = work;
94249bd79ccSDebojyoti Ghosh           for (j=0; j<nz; j++) {
94349bd79ccSDebojyoti Ghosh             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
94449bd79ccSDebojyoti Ghosh             workt += bs;
94549bd79ccSDebojyoti Ghosh           }
94649bd79ccSDebojyoti Ghosh           arrt = arr;
94749bd79ccSDebojyoti Ghosh           if (T) {
94849bd79ccSDebojyoti Ghosh             for (j=0; j<nz; j++) {
94949bd79ccSDebojyoti Ghosh               ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
95049bd79ccSDebojyoti Ghosh               for (k=0; k<bs2; k++) arrt[k] *= v[j];
95149bd79ccSDebojyoti Ghosh               arrt += bs2;
95249bd79ccSDebojyoti Ghosh             }
95349bd79ccSDebojyoti Ghosh             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
95449bd79ccSDebojyoti Ghosh           } else if (kaij->isTI) {
95549bd79ccSDebojyoti Ghosh             for (j=0; j<nz; j++) {
95649bd79ccSDebojyoti Ghosh               ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
95749bd79ccSDebojyoti Ghosh               for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
95849bd79ccSDebojyoti Ghosh               arrt += bs2;
95949bd79ccSDebojyoti Ghosh             }
96049bd79ccSDebojyoti Ghosh             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
96149bd79ccSDebojyoti Ghosh           }
96249bd79ccSDebojyoti Ghosh           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y);
96349bd79ccSDebojyoti Ghosh           for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j);
96449bd79ccSDebojyoti Ghosh         }
96549bd79ccSDebojyoti Ghosh         idiag -= bs2;
96649bd79ccSDebojyoti Ghosh         i2    -= bs;
96749bd79ccSDebojyoti Ghosh       }
96849bd79ccSDebojyoti Ghosh       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
96949bd79ccSDebojyoti Ghosh     }
97049bd79ccSDebojyoti Ghosh   }
97149bd79ccSDebojyoti Ghosh 
97249bd79ccSDebojyoti Ghosh   ierr = VecRestoreArray(xx,&x);    CHKERRQ(ierr);
97349bd79ccSDebojyoti Ghosh   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
97449bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
97549bd79ccSDebojyoti Ghosh }
97649bd79ccSDebojyoti Ghosh 
97749bd79ccSDebojyoti Ghosh /*===================================================================================*/
97849bd79ccSDebojyoti Ghosh 
97949bd79ccSDebojyoti Ghosh PetscErrorCode KAIJMultAdd_MPI(Mat A,Vec xx,Vec yy,Vec zz)
98049bd79ccSDebojyoti Ghosh {
98149bd79ccSDebojyoti Ghosh   Mat_MPIKAIJ    *b = (Mat_MPIKAIJ*)A->data;
98249bd79ccSDebojyoti Ghosh   PetscErrorCode ierr;
98349bd79ccSDebojyoti Ghosh 
98449bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
98549bd79ccSDebojyoti Ghosh   if (!yy) {
98649bd79ccSDebojyoti Ghosh     ierr = VecSet(zz,0.0);CHKERRQ(ierr);
98749bd79ccSDebojyoti Ghosh   } else {
98849bd79ccSDebojyoti Ghosh     ierr = VecCopy(yy,zz);CHKERRQ(ierr);
98949bd79ccSDebojyoti Ghosh   }
99049bd79ccSDebojyoti Ghosh   /* start the scatter */
99149bd79ccSDebojyoti Ghosh   ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
99249bd79ccSDebojyoti Ghosh   ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,zz,zz);CHKERRQ(ierr);
99349bd79ccSDebojyoti Ghosh   ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
99449bd79ccSDebojyoti Ghosh   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr);
99549bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
99649bd79ccSDebojyoti Ghosh }
99749bd79ccSDebojyoti Ghosh 
99849bd79ccSDebojyoti Ghosh PetscErrorCode MatMult_MPIKAIJ_dof(Mat A,Vec xx,Vec yy)
99949bd79ccSDebojyoti Ghosh {
100049bd79ccSDebojyoti Ghosh   PetscErrorCode ierr;
100149bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
100249bd79ccSDebojyoti Ghosh   ierr = KAIJMultAdd_MPI(A,xx,PETSC_NULL,yy);CHKERRQ(ierr);
100349bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
100449bd79ccSDebojyoti Ghosh }
100549bd79ccSDebojyoti Ghosh 
100649bd79ccSDebojyoti Ghosh PetscErrorCode MatMultAdd_MPIKAIJ_dof(Mat A,Vec xx,Vec yy, Vec zz)
100749bd79ccSDebojyoti Ghosh {
100849bd79ccSDebojyoti Ghosh   PetscErrorCode ierr;
100949bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
101049bd79ccSDebojyoti Ghosh   ierr = KAIJMultAdd_MPI(A,xx,yy,zz);CHKERRQ(ierr);
101149bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
101249bd79ccSDebojyoti Ghosh }
101349bd79ccSDebojyoti Ghosh 
101449bd79ccSDebojyoti Ghosh PetscErrorCode MatInvertBlockDiagonal_MPIKAIJ_dof(Mat A,const PetscScalar **values)
101549bd79ccSDebojyoti Ghosh {
101649bd79ccSDebojyoti Ghosh   Mat_MPIKAIJ     *b = (Mat_MPIKAIJ*)A->data;
101749bd79ccSDebojyoti Ghosh   PetscErrorCode  ierr;
101849bd79ccSDebojyoti Ghosh 
101949bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
102049bd79ccSDebojyoti Ghosh   ierr = (*b->AIJ->ops->invertblockdiagonal)(b->AIJ,values);CHKERRQ(ierr);
102149bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
102249bd79ccSDebojyoti Ghosh }
102349bd79ccSDebojyoti Ghosh 
102449bd79ccSDebojyoti Ghosh /* ----------------------------------------------------------------*/
102549bd79ccSDebojyoti Ghosh 
102649bd79ccSDebojyoti Ghosh PetscErrorCode MatGetRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values)
102749bd79ccSDebojyoti Ghosh {
102849bd79ccSDebojyoti Ghosh   Mat_SeqKAIJ     *b   = (Mat_SeqKAIJ*) A->data;
10291ca5ffdbSRichard Tran Mills   PetscErrorCode  diag = PETSC_FALSE;
10301ca5ffdbSRichard Tran Mills   PetscErrorCode  ierr;
103149bd79ccSDebojyoti Ghosh   PetscInt        nzaij,nz,*colsaij,*idx,i,j,p=b->p,q=b->q,r=row/p,s=row%p,c;
103249bd79ccSDebojyoti Ghosh   PetscScalar     *vaij,*v,*S=b->S,*T=b->T;
103349bd79ccSDebojyoti Ghosh 
103449bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
103549bd79ccSDebojyoti Ghosh   if (b->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
103649bd79ccSDebojyoti Ghosh   b->getrowactive = PETSC_TRUE;
103749bd79ccSDebojyoti Ghosh   if (row < 0 || row >= A->rmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range",row);
103849bd79ccSDebojyoti Ghosh 
103949bd79ccSDebojyoti Ghosh   if ((!S) && (!T) && (!b->isTI)) {
104049bd79ccSDebojyoti Ghosh     if (ncols)    *ncols  = 0;
104149bd79ccSDebojyoti Ghosh     if (cols)     *cols   = NULL;
104249bd79ccSDebojyoti Ghosh     if (values)   *values = NULL;
104349bd79ccSDebojyoti Ghosh     PetscFunctionReturn(0);
104449bd79ccSDebojyoti Ghosh   }
104549bd79ccSDebojyoti Ghosh 
104649bd79ccSDebojyoti Ghosh   if (T || b->isTI) {
104749bd79ccSDebojyoti Ghosh     ierr  = MatGetRow_SeqAIJ(b->AIJ,r,&nzaij,&colsaij,&vaij);CHKERRQ(ierr);
104849bd79ccSDebojyoti Ghosh     c     = nzaij;
104949bd79ccSDebojyoti Ghosh     for (i=0; i<nzaij; i++) {
105049bd79ccSDebojyoti Ghosh       /* check if this row contains a diagonal entry */
105149bd79ccSDebojyoti Ghosh       if (colsaij[i] == r) {
105249bd79ccSDebojyoti Ghosh         diag = PETSC_TRUE;
105349bd79ccSDebojyoti Ghosh         c = i;
105449bd79ccSDebojyoti Ghosh       }
105549bd79ccSDebojyoti Ghosh     }
105649bd79ccSDebojyoti Ghosh   } else nzaij = c = 0;
105749bd79ccSDebojyoti Ghosh 
105849bd79ccSDebojyoti Ghosh   /* calculate size of row */
105949bd79ccSDebojyoti Ghosh   nz = 0;
106049bd79ccSDebojyoti Ghosh   if (S)            nz += q;
106149bd79ccSDebojyoti Ghosh   if (T || b->isTI) nz += (diag && S ? (nzaij-1)*q : nzaij*q);
106249bd79ccSDebojyoti Ghosh 
106349bd79ccSDebojyoti Ghosh   if (cols || values) {
106449bd79ccSDebojyoti Ghosh     ierr = PetscMalloc2(nz,&idx,nz,&v);CHKERRQ(ierr);
106538822f9dSRichard Tran Mills     for (i=0; i<q; i++) {
106638822f9dSRichard Tran Mills       /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */
106738822f9dSRichard Tran Mills       v[i] = 0.0;
106838822f9dSRichard Tran Mills     }
106949bd79ccSDebojyoti Ghosh     if (b->isTI) {
107049bd79ccSDebojyoti Ghosh       for (i=0; i<nzaij; i++) {
107149bd79ccSDebojyoti Ghosh         for (j=0; j<q; j++) {
107249bd79ccSDebojyoti Ghosh           idx[i*q+j] = colsaij[i]*q+j;
107349bd79ccSDebojyoti Ghosh           v[i*q+j]   = (j==s ? vaij[i] : 0);
107449bd79ccSDebojyoti Ghosh         }
107549bd79ccSDebojyoti Ghosh       }
107649bd79ccSDebojyoti Ghosh     } else if (T) {
107749bd79ccSDebojyoti Ghosh       for (i=0; i<nzaij; i++) {
107849bd79ccSDebojyoti Ghosh         for (j=0; j<q; j++) {
107949bd79ccSDebojyoti Ghosh           idx[i*q+j] = colsaij[i]*q+j;
108049bd79ccSDebojyoti Ghosh           v[i*q+j]   = vaij[i]*T[s+j*p];
108149bd79ccSDebojyoti Ghosh         }
108249bd79ccSDebojyoti Ghosh       }
108349bd79ccSDebojyoti Ghosh     }
108449bd79ccSDebojyoti Ghosh     if (S) {
108549bd79ccSDebojyoti Ghosh       for (j=0; j<q; j++) {
108649bd79ccSDebojyoti Ghosh         idx[c*q+j] = r*q+j;
108749bd79ccSDebojyoti Ghosh         v[c*q+j]  += S[s+j*p];
108849bd79ccSDebojyoti Ghosh       }
108949bd79ccSDebojyoti Ghosh     }
109049bd79ccSDebojyoti Ghosh   }
109149bd79ccSDebojyoti Ghosh 
109249bd79ccSDebojyoti Ghosh   if (ncols)    *ncols  = nz;
109349bd79ccSDebojyoti Ghosh   if (cols)     *cols   = idx;
109449bd79ccSDebojyoti Ghosh   if (values)   *values = v;
109549bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
109649bd79ccSDebojyoti Ghosh }
109749bd79ccSDebojyoti Ghosh 
109849bd79ccSDebojyoti Ghosh PetscErrorCode MatRestoreRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
109949bd79ccSDebojyoti Ghosh {
110049bd79ccSDebojyoti Ghosh   PetscErrorCode ierr;
110149bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
110249bd79ccSDebojyoti Ghosh   ierr = PetscFree2(*idx,*v);CHKERRQ(ierr);
110349bd79ccSDebojyoti Ghosh   ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE;
110449bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
110549bd79ccSDebojyoti Ghosh }
110649bd79ccSDebojyoti Ghosh 
110749bd79ccSDebojyoti Ghosh PetscErrorCode MatGetRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values)
110849bd79ccSDebojyoti Ghosh {
110949bd79ccSDebojyoti Ghosh   Mat_MPIKAIJ     *b      = (Mat_MPIKAIJ*) A->data;
111049bd79ccSDebojyoti Ghosh   Mat             MatAIJ  = ((Mat_SeqKAIJ*)b->AIJ->data)->AIJ;
111149bd79ccSDebojyoti Ghosh   Mat             MatOAIJ = ((Mat_SeqKAIJ*)b->OAIJ->data)->AIJ;
111249bd79ccSDebojyoti Ghosh   Mat             AIJ     = b->A;
1113fc64b2cfSRichard Tran Mills   PetscBool       diag    = PETSC_FALSE;
111449bd79ccSDebojyoti Ghosh   PetscErrorCode  ierr;
111549bd79ccSDebojyoti Ghosh   const PetscInt  rstart=A->rmap->rstart,rend=A->rmap->rend,p=b->p,q=b->q,*garray;
111649bd79ccSDebojyoti Ghosh   PetscInt        nz,*idx,ncolsaij,ncolsoaij,*colsaij,*colsoaij,r,s,c,i,j,lrow;
111749bd79ccSDebojyoti Ghosh   PetscScalar     *v,*vals,*ovals,*S=b->S,*T=b->T;
111849bd79ccSDebojyoti Ghosh 
111949bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
112049bd79ccSDebojyoti Ghosh   if (b->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
112149bd79ccSDebojyoti Ghosh   b->getrowactive = PETSC_TRUE;
112249bd79ccSDebojyoti Ghosh   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
112349bd79ccSDebojyoti Ghosh   lrow = row - rstart;
112449bd79ccSDebojyoti Ghosh 
112549bd79ccSDebojyoti Ghosh   if ((!S) && (!T) && (!b->isTI)) {
112649bd79ccSDebojyoti Ghosh     if (ncols)    *ncols  = 0;
112749bd79ccSDebojyoti Ghosh     if (cols)     *cols   = NULL;
112849bd79ccSDebojyoti Ghosh     if (values)   *values = NULL;
112949bd79ccSDebojyoti Ghosh     PetscFunctionReturn(0);
113049bd79ccSDebojyoti Ghosh   }
113149bd79ccSDebojyoti Ghosh 
113249bd79ccSDebojyoti Ghosh   r = lrow/p;
113349bd79ccSDebojyoti Ghosh   s = lrow%p;
113449bd79ccSDebojyoti Ghosh 
113549bd79ccSDebojyoti Ghosh   if (T || b->isTI) {
113649bd79ccSDebojyoti Ghosh     ierr = MatMPIAIJGetSeqAIJ(AIJ,NULL,NULL,&garray);
113749bd79ccSDebojyoti Ghosh     ierr = MatGetRow_SeqAIJ(MatAIJ,lrow/p,&ncolsaij,&colsaij,&vals);CHKERRQ(ierr);
113849bd79ccSDebojyoti Ghosh     ierr = MatGetRow_SeqAIJ(MatOAIJ,lrow/p,&ncolsoaij,&colsoaij,&ovals);CHKERRQ(ierr);
113949bd79ccSDebojyoti Ghosh 
114049bd79ccSDebojyoti Ghosh     c     = ncolsaij + ncolsoaij;
114149bd79ccSDebojyoti Ghosh     for (i=0; i<ncolsaij; i++) {
114249bd79ccSDebojyoti Ghosh       /* check if this row contains a diagonal entry */
114349bd79ccSDebojyoti Ghosh       if (colsaij[i] == r) {
114449bd79ccSDebojyoti Ghosh         diag = PETSC_TRUE;
114549bd79ccSDebojyoti Ghosh         c = i;
114649bd79ccSDebojyoti Ghosh       }
114749bd79ccSDebojyoti Ghosh     }
114849bd79ccSDebojyoti Ghosh   } else c = 0;
114949bd79ccSDebojyoti Ghosh 
115049bd79ccSDebojyoti Ghosh   /* calculate size of row */
115149bd79ccSDebojyoti Ghosh   nz = 0;
115249bd79ccSDebojyoti Ghosh   if (S)            nz += q;
115349bd79ccSDebojyoti Ghosh   if (T || b->isTI) nz += (diag && S ? (ncolsaij+ncolsoaij-1)*q : (ncolsaij+ncolsoaij)*q);
115449bd79ccSDebojyoti Ghosh 
115549bd79ccSDebojyoti Ghosh   if (cols || values) {
115649bd79ccSDebojyoti Ghosh     ierr = PetscMalloc2(nz,&idx,nz,&v);CHKERRQ(ierr);
1157a437a796SRichard Tran Mills     for (i=0; i<q; i++) {
1158a437a796SRichard Tran Mills       /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */
1159a437a796SRichard Tran Mills       v[i] = 0.0;
1160a437a796SRichard Tran Mills     }
116149bd79ccSDebojyoti Ghosh     if (b->isTI) {
116249bd79ccSDebojyoti Ghosh       for (i=0; i<ncolsaij; i++) {
116349bd79ccSDebojyoti Ghosh         for (j=0; j<q; j++) {
116449bd79ccSDebojyoti Ghosh           idx[i*q+j] = (colsaij[i]+rstart/p)*q+j;
116549bd79ccSDebojyoti Ghosh           v[i*q+j]   = (j==s ? vals[i] : 0.0);
116649bd79ccSDebojyoti Ghosh         }
116749bd79ccSDebojyoti Ghosh       }
116849bd79ccSDebojyoti Ghosh       for (i=0; i<ncolsoaij; i++) {
116949bd79ccSDebojyoti Ghosh         for (j=0; j<q; j++) {
117049bd79ccSDebojyoti Ghosh           idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j;
117149bd79ccSDebojyoti Ghosh           v[(i+ncolsaij)*q+j]   = (j==s ? ovals[i]: 0.0);
117249bd79ccSDebojyoti Ghosh         }
117349bd79ccSDebojyoti Ghosh       }
117449bd79ccSDebojyoti Ghosh     } else if (T) {
117549bd79ccSDebojyoti Ghosh       for (i=0; i<ncolsaij; i++) {
117649bd79ccSDebojyoti Ghosh         for (j=0; j<q; j++) {
117749bd79ccSDebojyoti Ghosh           idx[i*q+j] = (colsaij[i]+rstart/p)*q+j;
117849bd79ccSDebojyoti Ghosh           v[i*q+j]   = vals[i]*T[s+j*p];
117949bd79ccSDebojyoti Ghosh         }
118049bd79ccSDebojyoti Ghosh       }
118149bd79ccSDebojyoti Ghosh       for (i=0; i<ncolsoaij; i++) {
118249bd79ccSDebojyoti Ghosh         for (j=0; j<q; j++) {
118349bd79ccSDebojyoti Ghosh           idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j;
118449bd79ccSDebojyoti Ghosh           v[(i+ncolsaij)*q+j]   = ovals[i]*T[s+j*p];
118549bd79ccSDebojyoti Ghosh         }
118649bd79ccSDebojyoti Ghosh       }
118749bd79ccSDebojyoti Ghosh     }
118849bd79ccSDebojyoti Ghosh     if (S) {
118949bd79ccSDebojyoti Ghosh       for (j=0; j<q; j++) {
119049bd79ccSDebojyoti Ghosh         idx[c*q+j] = (r+rstart/p)*q+j;
119149bd79ccSDebojyoti Ghosh         v[c*q+j]  += S[s+j*p];
119249bd79ccSDebojyoti Ghosh       }
119349bd79ccSDebojyoti Ghosh     }
119449bd79ccSDebojyoti Ghosh   }
119549bd79ccSDebojyoti Ghosh 
119649bd79ccSDebojyoti Ghosh   if (ncols)  *ncols  = nz;
119749bd79ccSDebojyoti Ghosh   if (cols)   *cols   = idx;
119849bd79ccSDebojyoti Ghosh   if (values) *values = v;
119949bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
120049bd79ccSDebojyoti Ghosh }
120149bd79ccSDebojyoti Ghosh 
120249bd79ccSDebojyoti Ghosh PetscErrorCode MatRestoreRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
120349bd79ccSDebojyoti Ghosh {
120449bd79ccSDebojyoti Ghosh   PetscErrorCode ierr;
120549bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
120649bd79ccSDebojyoti Ghosh   ierr = PetscFree2(*idx,*v);CHKERRQ(ierr);
120749bd79ccSDebojyoti Ghosh   ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE;
120849bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
120949bd79ccSDebojyoti Ghosh }
121049bd79ccSDebojyoti Ghosh 
121149bd79ccSDebojyoti Ghosh PetscErrorCode  MatCreateSubMatrix_KAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat)
121249bd79ccSDebojyoti Ghosh {
121349bd79ccSDebojyoti Ghosh   PetscErrorCode ierr;
121449bd79ccSDebojyoti Ghosh   Mat            A;
121549bd79ccSDebojyoti Ghosh 
121649bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
121749bd79ccSDebojyoti Ghosh   ierr = MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr);
121849bd79ccSDebojyoti Ghosh   ierr = MatCreateSubMatrix(A,isrow,iscol,cll,newmat);CHKERRQ(ierr);
121949bd79ccSDebojyoti Ghosh   ierr = MatDestroy(&A);CHKERRQ(ierr);
122049bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
122149bd79ccSDebojyoti Ghosh }
122249bd79ccSDebojyoti Ghosh 
122349bd79ccSDebojyoti Ghosh /* ---------------------------------------------------------------------------------- */
122449bd79ccSDebojyoti Ghosh /*@C
122549bd79ccSDebojyoti Ghosh   MatCreateKAIJ - Creates a matrix type to be used for matrices of the following form:
122649bd79ccSDebojyoti Ghosh 
122749bd79ccSDebojyoti Ghosh     [I \otimes S + A \otimes T]
122849bd79ccSDebojyoti Ghosh 
122949bd79ccSDebojyoti Ghosh   where
123049bd79ccSDebojyoti Ghosh     S is a dense (p \times q) matrix
123149bd79ccSDebojyoti Ghosh     T is a dense (p \times q) matrix
123249bd79ccSDebojyoti Ghosh     A is an AIJ  (n \times n) matrix
123349bd79ccSDebojyoti Ghosh     I is the identity matrix
123449bd79ccSDebojyoti Ghosh   The resulting matrix is (np \times nq)
123549bd79ccSDebojyoti Ghosh 
123649bd79ccSDebojyoti Ghosh   The matrix type is based on MATSEQAIJ for a sequential matrix A, and MATMPIAIJ for a distributed matrix A.
123749bd79ccSDebojyoti Ghosh   S is always stored independently on all processes as a PetscScalar array in column-major format.
123849bd79ccSDebojyoti Ghosh 
123949bd79ccSDebojyoti Ghosh   Collective
124049bd79ccSDebojyoti Ghosh 
124149bd79ccSDebojyoti Ghosh   Input Parameters:
124249bd79ccSDebojyoti Ghosh + A - the AIJ matrix
124349bd79ccSDebojyoti Ghosh . S - the S matrix, stored as a PetscScalar array (column-major)
124449bd79ccSDebojyoti Ghosh . T - the T matrix, stored as a PetscScalar array (column-major)
124549bd79ccSDebojyoti Ghosh . p - number of rows in S and T
124649bd79ccSDebojyoti Ghosh - q - number of columns in S and T
124749bd79ccSDebojyoti Ghosh 
124849bd79ccSDebojyoti Ghosh   Output Parameter:
124949bd79ccSDebojyoti Ghosh . kaij - the new KAIJ matrix
125049bd79ccSDebojyoti Ghosh 
125149bd79ccSDebojyoti Ghosh   Operations provided:
125249bd79ccSDebojyoti Ghosh + MatMult
125349bd79ccSDebojyoti Ghosh . MatMultAdd
125449bd79ccSDebojyoti Ghosh . MatInvertBlockDiagonal
125549bd79ccSDebojyoti Ghosh - MatView
125649bd79ccSDebojyoti Ghosh 
125749bd79ccSDebojyoti Ghosh   Level: advanced
125849bd79ccSDebojyoti Ghosh 
12590567c835SRichard Tran Mills .seealso: MatKAIJSetAIJ(), MatKAIJSetS(), MatKAIJSetT(), MatKAIJGetAIJ(), MatKAIJGetS(), MatKAIJGetT(), MATKAIJ
126049bd79ccSDebojyoti Ghosh @*/
126149bd79ccSDebojyoti Ghosh PetscErrorCode  MatCreateKAIJ(Mat A,PetscInt p,PetscInt q,const PetscScalar S[],const PetscScalar T[],Mat *kaij)
126249bd79ccSDebojyoti Ghosh {
126349bd79ccSDebojyoti Ghosh   PetscErrorCode ierr;
126449bd79ccSDebojyoti Ghosh   PetscMPIInt    size;
126549bd79ccSDebojyoti Ghosh 
126649bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
12670567c835SRichard Tran Mills   ierr = MatCreate(PetscObjectComm((PetscObject)A),kaij);CHKERRQ(ierr);
126849bd79ccSDebojyoti Ghosh   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
126949bd79ccSDebojyoti Ghosh   if (size == 1) {
12700567c835SRichard Tran Mills     ierr = MatSetType(*kaij,MATSEQKAIJ);CHKERRQ(ierr);
127149bd79ccSDebojyoti Ghosh   } else {
12720567c835SRichard Tran Mills     ierr = MatSetType(*kaij,MATMPIKAIJ);CHKERRQ(ierr);
127349bd79ccSDebojyoti Ghosh   }
12740567c835SRichard Tran Mills   ierr = MatKAIJSetAIJ(*kaij,A);CHKERRQ(ierr);
12750567c835SRichard Tran Mills   ierr = MatKAIJSetS(*kaij,p,q,S);CHKERRQ(ierr);
12760567c835SRichard Tran Mills   ierr = MatKAIJSetT(*kaij,p,q,T);CHKERRQ(ierr);
12770567c835SRichard Tran Mills   ierr = MatSetUp(*kaij);
12780567c835SRichard Tran Mills   PetscFunctionReturn(0);
12790567c835SRichard Tran Mills }
128049bd79ccSDebojyoti Ghosh 
12810567c835SRichard Tran Mills /*MC
12820567c835SRichard Tran Mills   MATKAIJ - MATKAIJ = "kaij" - A matrix type to be used to evaluate matrices of the following form:
12830567c835SRichard Tran Mills 
12840567c835SRichard Tran Mills     [I \otimes S + A \otimes T]
12850567c835SRichard Tran Mills 
12860567c835SRichard Tran Mills   where
12870567c835SRichard Tran Mills     S is a dense (p \times q) matrix
12880567c835SRichard Tran Mills     T is a dense (p \times q) matrix
12890567c835SRichard Tran Mills     A is an AIJ  (n \times n) matrix
12900567c835SRichard Tran Mills     I is the identity matrix
12910567c835SRichard Tran Mills   The resulting matrix is (np \times nq)
12920567c835SRichard Tran Mills 
12930567c835SRichard Tran Mills   The matrix type is based on MATSEQAIJ for a sequential matrix A, and MATMPIAIJ for a distributed matrix A.
12940567c835SRichard Tran Mills   S and T are always stored independently on all processes as a PetscScalar array in column-major format.
12950567c835SRichard Tran Mills 
12960567c835SRichard Tran Mills   Operations provided:
12970567c835SRichard Tran Mills + MatMult
12980567c835SRichard Tran Mills . MatMultAdd
12990567c835SRichard Tran Mills . MatInvertBlockDiagonal
13000567c835SRichard Tran Mills - MatView
13010567c835SRichard Tran Mills 
13020567c835SRichard Tran Mills   Level: advanced
13030567c835SRichard Tran Mills 
13040567c835SRichard Tran Mills .seealso: MatKAIJSetAIJ(), MatKAIJSetS(), MatKAIJSetT(), MatKAIJGetAIJ(), MatKAIJGetS(), MatKAIJGetT(), MatCreateKAIJ()
13050567c835SRichard Tran Mills M*/
13060567c835SRichard Tran Mills 
13070567c835SRichard Tran Mills PETSC_EXTERN PetscErrorCode MatCreate_KAIJ(Mat A)
13080567c835SRichard Tran Mills {
13090567c835SRichard Tran Mills   PetscErrorCode ierr;
13100567c835SRichard Tran Mills   Mat_MPIKAIJ    *b;
13110567c835SRichard Tran Mills   PetscMPIInt    size;
13120567c835SRichard Tran Mills 
13130567c835SRichard Tran Mills   PetscFunctionBegin;
13140567c835SRichard Tran Mills   ierr     = PetscNewLog(A,&b);CHKERRQ(ierr);
13150567c835SRichard Tran Mills   A->data  = (void*)b;
13160567c835SRichard Tran Mills 
13170567c835SRichard Tran Mills   ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
13180567c835SRichard Tran Mills 
13190567c835SRichard Tran Mills   A->ops->setup = MatSetUp_KAIJ;
13200567c835SRichard Tran Mills 
13210567c835SRichard Tran Mills   b->w    = 0;
13220567c835SRichard Tran Mills   ierr    = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
13230567c835SRichard Tran Mills   if (size == 1) {
13240567c835SRichard Tran Mills     ierr = PetscObjectChangeTypeName((PetscObject)A,MATSEQKAIJ);CHKERRQ(ierr);
13250567c835SRichard Tran Mills     A->ops->setup               = MatSetUp_KAIJ;
13260567c835SRichard Tran Mills     A->ops->destroy             = MatDestroy_SeqKAIJ;
13270567c835SRichard Tran Mills     A->ops->view                = MatView_SeqKAIJ;
13280567c835SRichard Tran Mills     A->ops->mult                = MatMult_SeqKAIJ_N;
13290567c835SRichard Tran Mills     A->ops->multadd             = MatMultAdd_SeqKAIJ_N;
13300567c835SRichard Tran Mills     A->ops->invertblockdiagonal = MatInvertBlockDiagonal_SeqKAIJ_N;
13310567c835SRichard Tran Mills     A->ops->getrow              = MatGetRow_SeqKAIJ;
13320567c835SRichard Tran Mills     A->ops->restorerow          = MatRestoreRow_SeqKAIJ;
13330567c835SRichard Tran Mills     A->ops->sor                 = MatSOR_SeqKAIJ;
13340567c835SRichard Tran Mills   } else {
13350567c835SRichard Tran Mills     ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIKAIJ);CHKERRQ(ierr);
13360567c835SRichard Tran Mills     A->ops->setup               = MatSetUp_KAIJ;
13370567c835SRichard Tran Mills     A->ops->destroy             = MatDestroy_MPIKAIJ;
13380567c835SRichard Tran Mills     A->ops->view                = MatView_MPIKAIJ;
13390567c835SRichard Tran Mills     A->ops->mult                = MatMult_MPIKAIJ_dof;
13400567c835SRichard Tran Mills     A->ops->multadd             = MatMultAdd_MPIKAIJ_dof;
13410567c835SRichard Tran Mills     A->ops->invertblockdiagonal = MatInvertBlockDiagonal_MPIKAIJ_dof;
13420567c835SRichard Tran Mills     A->ops->getrow              = MatGetRow_MPIKAIJ;
13430567c835SRichard Tran Mills     A->ops->restorerow          = MatRestoreRow_MPIKAIJ;
13440567c835SRichard Tran Mills     ierr = PetscObjectComposeFunction((PetscObject)A,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIKAIJ);CHKERRQ(ierr);
13450567c835SRichard Tran Mills   }
13460567c835SRichard Tran Mills   A->ops->createsubmatrix = MatCreateSubMatrix_KAIJ;
134749bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
134849bd79ccSDebojyoti Ghosh }
1349