xref: /petsc/src/mat/impls/kaij/kaij.c (revision 49bd79ccbacc9a2669ec09d6a41f2f533e7405b6)
1*49bd79ccSDebojyoti Ghosh 
2*49bd79ccSDebojyoti Ghosh /*
3*49bd79ccSDebojyoti Ghosh   Defines the basic matrix operations for the KAIJ  matrix storage format.
4*49bd79ccSDebojyoti Ghosh   This format is used to evaluate matrices of the form:
5*49bd79ccSDebojyoti Ghosh 
6*49bd79ccSDebojyoti Ghosh     [I \otimes S + A \otimes T]
7*49bd79ccSDebojyoti Ghosh 
8*49bd79ccSDebojyoti Ghosh   where
9*49bd79ccSDebojyoti Ghosh     S is a dense (p \times q) matrix
10*49bd79ccSDebojyoti Ghosh     T is a dense (p \times q) matrix
11*49bd79ccSDebojyoti Ghosh     A is an AIJ  (n \times n) matrix
12*49bd79ccSDebojyoti Ghosh     I is the identity matrix
13*49bd79ccSDebojyoti Ghosh 
14*49bd79ccSDebojyoti Ghosh   The resulting matrix is (np \times nq)
15*49bd79ccSDebojyoti Ghosh 
16*49bd79ccSDebojyoti Ghosh   We provide:
17*49bd79ccSDebojyoti Ghosh      MatMult()
18*49bd79ccSDebojyoti Ghosh      MatMultAdd()
19*49bd79ccSDebojyoti Ghosh      MatInvertBlockDiagonal()
20*49bd79ccSDebojyoti Ghosh   and
21*49bd79ccSDebojyoti Ghosh      MatCreateKAIJ(Mat,PetscInt,PetscInt,const PetscScalar[],const PetscScalar[],Mat*)
22*49bd79ccSDebojyoti Ghosh 
23*49bd79ccSDebojyoti Ghosh   This single directory handles both the sequential and parallel codes
24*49bd79ccSDebojyoti Ghosh */
25*49bd79ccSDebojyoti Ghosh 
26*49bd79ccSDebojyoti Ghosh #include <../src/mat/impls/kaij/kaij.h> /*I "petscmat.h" I*/
27*49bd79ccSDebojyoti Ghosh #include <../src/mat/utils/freespace.h>
28*49bd79ccSDebojyoti Ghosh #include <petsc/private/vecimpl.h>
29*49bd79ccSDebojyoti Ghosh 
30*49bd79ccSDebojyoti Ghosh /*@C
31*49bd79ccSDebojyoti Ghosh    MatKAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the KAIJ matrix
32*49bd79ccSDebojyoti Ghosh 
33*49bd79ccSDebojyoti Ghosh    Not Collective, but if the KAIJ matrix is parallel, the AIJ matrix is also parallel
34*49bd79ccSDebojyoti Ghosh 
35*49bd79ccSDebojyoti Ghosh    Input Parameter:
36*49bd79ccSDebojyoti Ghosh .  A - the KAIJ matrix
37*49bd79ccSDebojyoti Ghosh 
38*49bd79ccSDebojyoti Ghosh    Output Parameter:
39*49bd79ccSDebojyoti Ghosh .  B - the AIJ matrix
40*49bd79ccSDebojyoti Ghosh 
41*49bd79ccSDebojyoti Ghosh    Level: advanced
42*49bd79ccSDebojyoti Ghosh 
43*49bd79ccSDebojyoti Ghosh    Notes: The reference count on the AIJ matrix is not increased so you should not destroy it.
44*49bd79ccSDebojyoti Ghosh 
45*49bd79ccSDebojyoti Ghosh .seealso: MatCreateKAIJ()
46*49bd79ccSDebojyoti Ghosh @*/
47*49bd79ccSDebojyoti Ghosh PetscErrorCode  MatKAIJGetAIJ(Mat A,Mat *B)
48*49bd79ccSDebojyoti Ghosh {
49*49bd79ccSDebojyoti Ghosh   PetscErrorCode ierr;
50*49bd79ccSDebojyoti Ghosh   PetscBool      ismpikaij,isseqkaij;
51*49bd79ccSDebojyoti Ghosh 
52*49bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
53*49bd79ccSDebojyoti Ghosh   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIKAIJ,&ismpikaij);CHKERRQ(ierr);
54*49bd79ccSDebojyoti Ghosh   ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQKAIJ,&isseqkaij);CHKERRQ(ierr);
55*49bd79ccSDebojyoti Ghosh   if (ismpikaij) {
56*49bd79ccSDebojyoti Ghosh     Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data;
57*49bd79ccSDebojyoti Ghosh 
58*49bd79ccSDebojyoti Ghosh     *B = b->A;
59*49bd79ccSDebojyoti Ghosh   } else if (isseqkaij) {
60*49bd79ccSDebojyoti Ghosh     Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
61*49bd79ccSDebojyoti Ghosh 
62*49bd79ccSDebojyoti Ghosh     *B = b->AIJ;
63*49bd79ccSDebojyoti Ghosh   } else {
64*49bd79ccSDebojyoti Ghosh     *B = A;
65*49bd79ccSDebojyoti Ghosh   }
66*49bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
67*49bd79ccSDebojyoti Ghosh }
68*49bd79ccSDebojyoti Ghosh 
69*49bd79ccSDebojyoti Ghosh /*@C
70*49bd79ccSDebojyoti Ghosh    MatKAIJGetS - Get the S matrix describing the shift action of the KAIJ matrix
71*49bd79ccSDebojyoti Ghosh 
72*49bd79ccSDebojyoti Ghosh    Not Collective, the entire S is stored and returned independently on all processes
73*49bd79ccSDebojyoti Ghosh 
74*49bd79ccSDebojyoti Ghosh    Input Parameter:
75*49bd79ccSDebojyoti Ghosh .  A - the KAIJ matrix
76*49bd79ccSDebojyoti Ghosh 
77*49bd79ccSDebojyoti Ghosh    Output Parameter:
78*49bd79ccSDebojyoti Ghosh .  S - the S matrix, in form of a scalar array in column-major format
79*49bd79ccSDebojyoti Ghosh 
80*49bd79ccSDebojyoti Ghosh    Level: advanced
81*49bd79ccSDebojyoti Ghosh 
82*49bd79ccSDebojyoti Ghosh    Notes: The reference count on the S matrix is not increased so you should not destroy it.
83*49bd79ccSDebojyoti Ghosh 
84*49bd79ccSDebojyoti Ghosh .seealso: MatCreateKAIJ()
85*49bd79ccSDebojyoti Ghosh @*/
86*49bd79ccSDebojyoti Ghosh PetscErrorCode  MatKAIJGetS(Mat A,const PetscScalar **S)
87*49bd79ccSDebojyoti Ghosh {
88*49bd79ccSDebojyoti Ghosh   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
89*49bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
90*49bd79ccSDebojyoti Ghosh   *S = b->S;
91*49bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
92*49bd79ccSDebojyoti Ghosh }
93*49bd79ccSDebojyoti Ghosh 
94*49bd79ccSDebojyoti Ghosh /*@C
95*49bd79ccSDebojyoti Ghosh    MatKAIJGetT - Get the T matrix describing the shift action of the KAIJ matrix
96*49bd79ccSDebojyoti Ghosh 
97*49bd79ccSDebojyoti Ghosh    Not Collective, the entire T is stored and returned independently on all processes
98*49bd79ccSDebojyoti Ghosh 
99*49bd79ccSDebojyoti Ghosh    Input Parameter:
100*49bd79ccSDebojyoti Ghosh .  A - the KAIJ matrix
101*49bd79ccSDebojyoti Ghosh 
102*49bd79ccSDebojyoti Ghosh    Output Parameter:
103*49bd79ccSDebojyoti Ghosh .  T - the T matrix, in form of a scalar array in column-major format
104*49bd79ccSDebojyoti Ghosh 
105*49bd79ccSDebojyoti Ghosh    Level: advanced
106*49bd79ccSDebojyoti Ghosh 
107*49bd79ccSDebojyoti Ghosh    Notes: The reference count on the T matrix is not increased so you should not destroy it.
108*49bd79ccSDebojyoti Ghosh 
109*49bd79ccSDebojyoti Ghosh .seealso: MatCreateKAIJ()
110*49bd79ccSDebojyoti Ghosh @*/
111*49bd79ccSDebojyoti Ghosh PetscErrorCode  MatKAIJGetT(Mat A,const PetscScalar **T)
112*49bd79ccSDebojyoti Ghosh {
113*49bd79ccSDebojyoti Ghosh   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
114*49bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
115*49bd79ccSDebojyoti Ghosh   *T = b->T;
116*49bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
117*49bd79ccSDebojyoti Ghosh }
118*49bd79ccSDebojyoti Ghosh 
119*49bd79ccSDebojyoti Ghosh PetscErrorCode MatDestroy_SeqKAIJ(Mat A)
120*49bd79ccSDebojyoti Ghosh {
121*49bd79ccSDebojyoti Ghosh   PetscErrorCode ierr;
122*49bd79ccSDebojyoti Ghosh   Mat_SeqKAIJ    *b = (Mat_SeqKAIJ*)A->data;
123*49bd79ccSDebojyoti Ghosh 
124*49bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
125*49bd79ccSDebojyoti Ghosh   ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr);
126*49bd79ccSDebojyoti Ghosh   ierr = PetscFree3(b->S,b->T,b->ibdiag);CHKERRQ(ierr);
127*49bd79ccSDebojyoti Ghosh   ierr = PetscFree5(b->sor.w,b->sor.y,b->sor.work,b->sor.t,b->sor.arr);CHKERRQ(ierr);
128*49bd79ccSDebojyoti Ghosh   ierr = PetscFree(A->data);CHKERRQ(ierr);
129*49bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
130*49bd79ccSDebojyoti Ghosh }
131*49bd79ccSDebojyoti Ghosh 
132*49bd79ccSDebojyoti Ghosh PetscErrorCode MatSetUp_KAIJ(Mat A)
133*49bd79ccSDebojyoti Ghosh {
134*49bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
135*49bd79ccSDebojyoti Ghosh   SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Must use MatCreateKAIJ() to create KAIJ matrices");
136*49bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
137*49bd79ccSDebojyoti Ghosh }
138*49bd79ccSDebojyoti Ghosh 
139*49bd79ccSDebojyoti Ghosh PetscErrorCode MatView_SeqKAIJ(Mat A,PetscViewer viewer)
140*49bd79ccSDebojyoti Ghosh {
141*49bd79ccSDebojyoti Ghosh   PetscErrorCode ierr;
142*49bd79ccSDebojyoti Ghosh   Mat            B;
143*49bd79ccSDebojyoti Ghosh 
144*49bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
145*49bd79ccSDebojyoti Ghosh   ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
146*49bd79ccSDebojyoti Ghosh   ierr = MatView(B,viewer);CHKERRQ(ierr);
147*49bd79ccSDebojyoti Ghosh   ierr = MatDestroy(&B);CHKERRQ(ierr);
148*49bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
149*49bd79ccSDebojyoti Ghosh }
150*49bd79ccSDebojyoti Ghosh 
151*49bd79ccSDebojyoti Ghosh PetscErrorCode MatView_MPIKAIJ(Mat A,PetscViewer viewer)
152*49bd79ccSDebojyoti Ghosh {
153*49bd79ccSDebojyoti Ghosh   PetscErrorCode ierr;
154*49bd79ccSDebojyoti Ghosh   Mat            B;
155*49bd79ccSDebojyoti Ghosh 
156*49bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
157*49bd79ccSDebojyoti Ghosh   ierr = MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
158*49bd79ccSDebojyoti Ghosh   ierr = MatView(B,viewer);CHKERRQ(ierr);
159*49bd79ccSDebojyoti Ghosh   ierr = MatDestroy(&B);CHKERRQ(ierr);
160*49bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
161*49bd79ccSDebojyoti Ghosh }
162*49bd79ccSDebojyoti Ghosh 
163*49bd79ccSDebojyoti Ghosh PetscErrorCode MatDestroy_MPIKAIJ(Mat A)
164*49bd79ccSDebojyoti Ghosh {
165*49bd79ccSDebojyoti Ghosh   PetscErrorCode ierr;
166*49bd79ccSDebojyoti Ghosh   Mat_MPIKAIJ    *b = (Mat_MPIKAIJ*)A->data;
167*49bd79ccSDebojyoti Ghosh 
168*49bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
169*49bd79ccSDebojyoti Ghosh   ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr);
170*49bd79ccSDebojyoti Ghosh   ierr = MatDestroy(&b->OAIJ);CHKERRQ(ierr);
171*49bd79ccSDebojyoti Ghosh   ierr = MatDestroy(&b->A);CHKERRQ(ierr);
172*49bd79ccSDebojyoti Ghosh   ierr = VecScatterDestroy(&b->ctx);CHKERRQ(ierr);
173*49bd79ccSDebojyoti Ghosh   ierr = VecDestroy(&b->w);CHKERRQ(ierr);
174*49bd79ccSDebojyoti Ghosh   ierr = PetscFree3(b->S,b->T,b->ibdiag);CHKERRQ(ierr);
175*49bd79ccSDebojyoti Ghosh   ierr = PetscFree(A->data);CHKERRQ(ierr);
176*49bd79ccSDebojyoti Ghosh 
177*49bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
178*49bd79ccSDebojyoti Ghosh }
179*49bd79ccSDebojyoti Ghosh 
180*49bd79ccSDebojyoti Ghosh /*MC
181*49bd79ccSDebojyoti Ghosh   MATKAIJ - MATKAIJ = "kaij" - A matrix type to be used to evaluate matrices of the following form:
182*49bd79ccSDebojyoti Ghosh 
183*49bd79ccSDebojyoti Ghosh     [I \otimes S + A \otimes T]
184*49bd79ccSDebojyoti Ghosh 
185*49bd79ccSDebojyoti Ghosh   where
186*49bd79ccSDebojyoti Ghosh     S is a dense (p \times q) matrix
187*49bd79ccSDebojyoti Ghosh     T is a dense (p \times q) matrix
188*49bd79ccSDebojyoti Ghosh     A is an AIJ  (n \times n) matrix
189*49bd79ccSDebojyoti Ghosh     I is the identity matrix
190*49bd79ccSDebojyoti Ghosh   The resulting matrix is (np \times nq)
191*49bd79ccSDebojyoti Ghosh 
192*49bd79ccSDebojyoti Ghosh   The matrix type is based on MATSEQAIJ for a sequential matrix A, and MATMPIAIJ for a distributed matrix A.
193*49bd79ccSDebojyoti Ghosh   S and T are always stored independently on all processes as a PetscScalar array in column-major format.
194*49bd79ccSDebojyoti Ghosh 
195*49bd79ccSDebojyoti Ghosh   Operations provided:
196*49bd79ccSDebojyoti Ghosh . MatMult
197*49bd79ccSDebojyoti Ghosh . MatMultAdd
198*49bd79ccSDebojyoti Ghosh . MatInvertBlockDiagonal
199*49bd79ccSDebojyoti Ghosh 
200*49bd79ccSDebojyoti Ghosh   Level: advanced
201*49bd79ccSDebojyoti Ghosh 
202*49bd79ccSDebojyoti Ghosh .seealso: MatKAIJGetAIJ(), MatKAIJGetS(), MatKAIJGetT(), MatCreateKAIJ()
203*49bd79ccSDebojyoti Ghosh M*/
204*49bd79ccSDebojyoti Ghosh 
205*49bd79ccSDebojyoti Ghosh PETSC_EXTERN PetscErrorCode MatCreate_KAIJ(Mat A)
206*49bd79ccSDebojyoti Ghosh {
207*49bd79ccSDebojyoti Ghosh   PetscErrorCode ierr;
208*49bd79ccSDebojyoti Ghosh   Mat_MPIKAIJ    *b;
209*49bd79ccSDebojyoti Ghosh   PetscMPIInt    size;
210*49bd79ccSDebojyoti Ghosh 
211*49bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
212*49bd79ccSDebojyoti Ghosh   ierr     = PetscNewLog(A,&b);CHKERRQ(ierr);
213*49bd79ccSDebojyoti Ghosh   A->data  = (void*)b;
214*49bd79ccSDebojyoti Ghosh 
215*49bd79ccSDebojyoti Ghosh   ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
216*49bd79ccSDebojyoti Ghosh 
217*49bd79ccSDebojyoti Ghosh   A->ops->setup = MatSetUp_KAIJ;
218*49bd79ccSDebojyoti Ghosh 
219*49bd79ccSDebojyoti Ghosh   b->w    = 0;
220*49bd79ccSDebojyoti Ghosh   ierr    = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
221*49bd79ccSDebojyoti Ghosh   if (size == 1) {
222*49bd79ccSDebojyoti Ghosh     ierr = PetscObjectChangeTypeName((PetscObject)A,MATSEQKAIJ);CHKERRQ(ierr);
223*49bd79ccSDebojyoti Ghosh   } else {
224*49bd79ccSDebojyoti Ghosh     ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIKAIJ);CHKERRQ(ierr);
225*49bd79ccSDebojyoti Ghosh   }
226*49bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
227*49bd79ccSDebojyoti Ghosh }
228*49bd79ccSDebojyoti Ghosh 
229*49bd79ccSDebojyoti Ghosh /* --------------------------------------------------------------------------------------*/
230*49bd79ccSDebojyoti Ghosh 
231*49bd79ccSDebojyoti Ghosh /* zz = yy + Axx */
232*49bd79ccSDebojyoti Ghosh PetscErrorCode KAIJMultAdd_Seq(Mat A,Vec xx,Vec yy,Vec zz)
233*49bd79ccSDebojyoti Ghosh {
234*49bd79ccSDebojyoti Ghosh   Mat_SeqKAIJ       *b = (Mat_SeqKAIJ*)A->data;
235*49bd79ccSDebojyoti Ghosh   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
236*49bd79ccSDebojyoti Ghosh   const PetscScalar *s = b->S, *t = b->T;
237*49bd79ccSDebojyoti Ghosh   const PetscScalar *x,*v,*bx;
238*49bd79ccSDebojyoti Ghosh   PetscScalar       *y,*sums;
239*49bd79ccSDebojyoti Ghosh   PetscErrorCode    ierr;
240*49bd79ccSDebojyoti Ghosh   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
241*49bd79ccSDebojyoti Ghosh   PetscInt          n,i,jrow,j,l,p=b->p,q=b->q,k;
242*49bd79ccSDebojyoti Ghosh 
243*49bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
244*49bd79ccSDebojyoti Ghosh 
245*49bd79ccSDebojyoti Ghosh   if (!yy) {
246*49bd79ccSDebojyoti Ghosh     ierr = VecSet(zz,0.0);CHKERRQ(ierr);
247*49bd79ccSDebojyoti Ghosh   } else {
248*49bd79ccSDebojyoti Ghosh     ierr = VecCopy(yy,zz);CHKERRQ(ierr);
249*49bd79ccSDebojyoti Ghosh   }
250*49bd79ccSDebojyoti Ghosh   if ((!s) && (!t) && (!b->isTI)) PetscFunctionReturn(0);
251*49bd79ccSDebojyoti Ghosh 
252*49bd79ccSDebojyoti Ghosh   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
253*49bd79ccSDebojyoti Ghosh   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
254*49bd79ccSDebojyoti Ghosh   idx  = a->j;
255*49bd79ccSDebojyoti Ghosh   v    = a->a;
256*49bd79ccSDebojyoti Ghosh   ii   = a->i;
257*49bd79ccSDebojyoti Ghosh 
258*49bd79ccSDebojyoti Ghosh   if (b->isTI) {
259*49bd79ccSDebojyoti Ghosh     for (i=0; i<m; i++) {
260*49bd79ccSDebojyoti Ghosh       jrow = ii[i];
261*49bd79ccSDebojyoti Ghosh       n    = ii[i+1] - jrow;
262*49bd79ccSDebojyoti Ghosh       sums = y + p*i;
263*49bd79ccSDebojyoti Ghosh       for (j=0; j<n; j++) {
264*49bd79ccSDebojyoti Ghosh         for (k=0; k<p; k++) {
265*49bd79ccSDebojyoti Ghosh           sums[k] += v[jrow+j]*x[q*idx[jrow+j]+k];
266*49bd79ccSDebojyoti Ghosh         }
267*49bd79ccSDebojyoti Ghosh       }
268*49bd79ccSDebojyoti Ghosh     }
269*49bd79ccSDebojyoti Ghosh   } else if (t) {
270*49bd79ccSDebojyoti Ghosh     for (i=0; i<m; i++) {
271*49bd79ccSDebojyoti Ghosh       jrow = ii[i];
272*49bd79ccSDebojyoti Ghosh       n    = ii[i+1] - jrow;
273*49bd79ccSDebojyoti Ghosh       sums = y + p*i;
274*49bd79ccSDebojyoti Ghosh       bx   = x + q*i;
275*49bd79ccSDebojyoti Ghosh       for (j=0; j<n; j++) {
276*49bd79ccSDebojyoti Ghosh         for (k=0; k<p; k++) {
277*49bd79ccSDebojyoti Ghosh           for (l=0; l<q; l++) {
278*49bd79ccSDebojyoti Ghosh             sums[k] += v[jrow+j]*t[k+l*p]*x[q*idx[jrow+j]+l];
279*49bd79ccSDebojyoti Ghosh           }
280*49bd79ccSDebojyoti Ghosh         }
281*49bd79ccSDebojyoti Ghosh       }
282*49bd79ccSDebojyoti Ghosh     }
283*49bd79ccSDebojyoti Ghosh   }
284*49bd79ccSDebojyoti Ghosh   if (s) {
285*49bd79ccSDebojyoti Ghosh     for (i=0; i<m; i++) {
286*49bd79ccSDebojyoti Ghosh       jrow = ii[i];
287*49bd79ccSDebojyoti Ghosh       n    = ii[i+1] - jrow;
288*49bd79ccSDebojyoti Ghosh       sums = y + p*i;
289*49bd79ccSDebojyoti Ghosh       bx   = x + q*i;
290*49bd79ccSDebojyoti Ghosh       if (i < b->AIJ->cmap->n) {
291*49bd79ccSDebojyoti Ghosh         for (j=0; j<q; j++) {
292*49bd79ccSDebojyoti Ghosh           for (k=0; k<p; k++) {
293*49bd79ccSDebojyoti Ghosh             sums[k] += s[k+j*p]*bx[j];
294*49bd79ccSDebojyoti Ghosh           }
295*49bd79ccSDebojyoti Ghosh         }
296*49bd79ccSDebojyoti Ghosh       }
297*49bd79ccSDebojyoti Ghosh     }
298*49bd79ccSDebojyoti Ghosh   }
299*49bd79ccSDebojyoti Ghosh 
300*49bd79ccSDebojyoti Ghosh   ierr = PetscLogFlops((2.0*p*q-p)*m+2*p*a->nz);CHKERRQ(ierr);
301*49bd79ccSDebojyoti Ghosh   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
302*49bd79ccSDebojyoti Ghosh   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
303*49bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
304*49bd79ccSDebojyoti Ghosh }
305*49bd79ccSDebojyoti Ghosh 
306*49bd79ccSDebojyoti Ghosh PetscErrorCode MatMult_SeqKAIJ_N(Mat A,Vec xx,Vec yy)
307*49bd79ccSDebojyoti Ghosh {
308*49bd79ccSDebojyoti Ghosh   PetscErrorCode ierr;
309*49bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
310*49bd79ccSDebojyoti Ghosh   ierr = KAIJMultAdd_Seq(A,xx,PETSC_NULL,yy);CHKERRQ(ierr);
311*49bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
312*49bd79ccSDebojyoti Ghosh }
313*49bd79ccSDebojyoti Ghosh 
314*49bd79ccSDebojyoti Ghosh PetscErrorCode MatMultAdd_SeqKAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
315*49bd79ccSDebojyoti Ghosh {
316*49bd79ccSDebojyoti Ghosh   PetscErrorCode ierr;
317*49bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
318*49bd79ccSDebojyoti Ghosh   ierr = KAIJMultAdd_Seq(A,xx,yy,zz);CHKERRQ(ierr);
319*49bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
320*49bd79ccSDebojyoti Ghosh }
321*49bd79ccSDebojyoti Ghosh 
322*49bd79ccSDebojyoti Ghosh #include <petsc/private/kernels/blockinvert.h>
323*49bd79ccSDebojyoti Ghosh 
324*49bd79ccSDebojyoti Ghosh PetscErrorCode MatInvertBlockDiagonal_SeqKAIJ_N(Mat A,const PetscScalar **values)
325*49bd79ccSDebojyoti Ghosh {
326*49bd79ccSDebojyoti Ghosh   Mat_SeqKAIJ       *b  = (Mat_SeqKAIJ*)A->data;
327*49bd79ccSDebojyoti Ghosh   Mat_SeqAIJ        *a  = (Mat_SeqAIJ*)b->AIJ->data;
328*49bd79ccSDebojyoti Ghosh   const PetscScalar *S  = b->S;
329*49bd79ccSDebojyoti Ghosh   const PetscScalar *T  = b->T;
330*49bd79ccSDebojyoti Ghosh   const PetscScalar *v  = a->a;
331*49bd79ccSDebojyoti Ghosh   const PetscInt     p  = b->p, q = b->q, m = b->AIJ->rmap->n, *idx = a->j, *ii = a->i;
332*49bd79ccSDebojyoti Ghosh   PetscErrorCode    ierr;
333*49bd79ccSDebojyoti Ghosh   PetscInt          i,j,*v_pivots,dof,dof2;
334*49bd79ccSDebojyoti Ghosh   PetscScalar       *diag,aval,*v_work;
335*49bd79ccSDebojyoti Ghosh 
336*49bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
337*49bd79ccSDebojyoti Ghosh   if (p != q) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Block size must be square to calculate inverse.");
338*49bd79ccSDebojyoti Ghosh   if ((!S) && (!T) && (!b->isTI)) {
339*49bd79ccSDebojyoti Ghosh     SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Cannot invert a zero matrix.");
340*49bd79ccSDebojyoti Ghosh   }
341*49bd79ccSDebojyoti Ghosh 
342*49bd79ccSDebojyoti Ghosh   dof  = p;
343*49bd79ccSDebojyoti Ghosh   dof2 = dof*dof;
344*49bd79ccSDebojyoti Ghosh 
345*49bd79ccSDebojyoti Ghosh   if (b->ibdiagvalid) {
346*49bd79ccSDebojyoti Ghosh     if (values) *values = b->ibdiag;
347*49bd79ccSDebojyoti Ghosh     PetscFunctionReturn(0);
348*49bd79ccSDebojyoti Ghosh   }
349*49bd79ccSDebojyoti Ghosh   if (!b->ibdiag) {
350*49bd79ccSDebojyoti Ghosh     ierr = PetscMalloc(dof2*m*sizeof(PetscScalar),&b->ibdiag);CHKERRQ(ierr);
351*49bd79ccSDebojyoti Ghosh     ierr = PetscLogObjectMemory((PetscObject)A,dof2*m*sizeof(PetscScalar));CHKERRQ(ierr);
352*49bd79ccSDebojyoti Ghosh   }
353*49bd79ccSDebojyoti Ghosh   if (values) *values = b->ibdiag;
354*49bd79ccSDebojyoti Ghosh   diag = b->ibdiag;
355*49bd79ccSDebojyoti Ghosh 
356*49bd79ccSDebojyoti Ghosh   ierr = PetscMalloc2(dof,&v_work,dof,&v_pivots);CHKERRQ(ierr);
357*49bd79ccSDebojyoti Ghosh   for (i=0; i<m; i++) {
358*49bd79ccSDebojyoti Ghosh     if (S) {
359*49bd79ccSDebojyoti Ghosh       ierr = PetscMemcpy(diag,S,dof2*sizeof(PetscScalar));CHKERRQ(ierr);
360*49bd79ccSDebojyoti Ghosh     } else {
361*49bd79ccSDebojyoti Ghosh       ierr = PetscMemzero(diag,dof2*sizeof(PetscScalar));CHKERRQ(ierr);
362*49bd79ccSDebojyoti Ghosh     }
363*49bd79ccSDebojyoti Ghosh     if (b->isTI) {
364*49bd79ccSDebojyoti Ghosh       aval = 0;
365*49bd79ccSDebojyoti Ghosh       for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j];
366*49bd79ccSDebojyoti Ghosh       for (j=0; j<dof; j++) diag[j+dof*j] += aval;
367*49bd79ccSDebojyoti Ghosh     } else if (T) {
368*49bd79ccSDebojyoti Ghosh       aval = 0;
369*49bd79ccSDebojyoti Ghosh       for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j];
370*49bd79ccSDebojyoti Ghosh       for (j=0; j<dof2; j++) diag[j] += aval*T[j];
371*49bd79ccSDebojyoti Ghosh     }
372*49bd79ccSDebojyoti Ghosh     ierr = PetscKernel_A_gets_inverse_A(dof,diag,v_pivots,v_work,PETSC_FALSE,NULL);CHKERRQ(ierr);
373*49bd79ccSDebojyoti Ghosh     diag += dof2;
374*49bd79ccSDebojyoti Ghosh   }
375*49bd79ccSDebojyoti Ghosh   ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr);
376*49bd79ccSDebojyoti Ghosh 
377*49bd79ccSDebojyoti Ghosh   b->ibdiagvalid = PETSC_TRUE;
378*49bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
379*49bd79ccSDebojyoti Ghosh }
380*49bd79ccSDebojyoti Ghosh 
381*49bd79ccSDebojyoti Ghosh static PetscErrorCode MatGetDiagonalBlock_MPIKAIJ(Mat A,Mat *B)
382*49bd79ccSDebojyoti Ghosh {
383*49bd79ccSDebojyoti Ghosh   Mat_MPIKAIJ *kaij = (Mat_MPIKAIJ*) A->data;
384*49bd79ccSDebojyoti Ghosh 
385*49bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
386*49bd79ccSDebojyoti Ghosh   *B = kaij->AIJ;
387*49bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
388*49bd79ccSDebojyoti Ghosh }
389*49bd79ccSDebojyoti Ghosh 
390*49bd79ccSDebojyoti Ghosh PetscErrorCode MatSOR_SeqKAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
391*49bd79ccSDebojyoti Ghosh {
392*49bd79ccSDebojyoti Ghosh   PetscErrorCode    ierr;
393*49bd79ccSDebojyoti Ghosh   Mat_SeqKAIJ       *kaij = (Mat_SeqKAIJ*) A->data;
394*49bd79ccSDebojyoti Ghosh   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)kaij->AIJ->data;
395*49bd79ccSDebojyoti Ghosh   const PetscScalar *aa = a->a, *T = kaij->T, *v;
396*49bd79ccSDebojyoti Ghosh   const PetscInt    m  = kaij->AIJ->rmap->n, *ai=a->i, *aj=a->j, p = kaij->p, q = kaij->q, *diag, *vi;
397*49bd79ccSDebojyoti Ghosh   const PetscScalar *b, *xb, *idiag;
398*49bd79ccSDebojyoti Ghosh   PetscScalar       *x, *work, *workt, *w, *y, *arr, *t, *arrt;
399*49bd79ccSDebojyoti Ghosh   PetscInt          i, j, k, i2, bs, bs2, nz;
400*49bd79ccSDebojyoti Ghosh 
401*49bd79ccSDebojyoti Ghosh 
402*49bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
403*49bd79ccSDebojyoti Ghosh   its = its*lits;
404*49bd79ccSDebojyoti Ghosh   if (flag & SOR_EISENSTAT) SETERRQ (PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
405*49bd79ccSDebojyoti Ghosh   if (its <= 0)             SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
406*49bd79ccSDebojyoti Ghosh   if (fshift)               SETERRQ (PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
407*49bd79ccSDebojyoti Ghosh   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER))
408*49bd79ccSDebojyoti Ghosh     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
409*49bd79ccSDebojyoti Ghosh   if (p != q) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSOR for KAIJ: Sorry, no support for non-square dense blocks");
410*49bd79ccSDebojyoti Ghosh   else        {bs = p; bs2 = bs*bs; }
411*49bd79ccSDebojyoti Ghosh 
412*49bd79ccSDebojyoti Ghosh   if (!m) PetscFunctionReturn(0);
413*49bd79ccSDebojyoti Ghosh 
414*49bd79ccSDebojyoti Ghosh   if (!kaij->ibdiagvalid) { ierr = MatInvertBlockDiagonal_SeqKAIJ_N(A,NULL);CHKERRQ(ierr); }
415*49bd79ccSDebojyoti Ghosh   idiag = kaij->ibdiag;
416*49bd79ccSDebojyoti Ghosh   diag  = a->diag;
417*49bd79ccSDebojyoti Ghosh 
418*49bd79ccSDebojyoti Ghosh   if (!kaij->sor.setup) {
419*49bd79ccSDebojyoti Ghosh     ierr = PetscMalloc5(bs,&kaij->sor.w,bs,&kaij->sor.y,m*bs,&kaij->sor.work,m*bs,&kaij->sor.t,m*bs2,&kaij->sor.arr);CHKERRQ(ierr);
420*49bd79ccSDebojyoti Ghosh     kaij->sor.setup = PETSC_TRUE;
421*49bd79ccSDebojyoti Ghosh   }
422*49bd79ccSDebojyoti Ghosh   y     = kaij->sor.y;
423*49bd79ccSDebojyoti Ghosh   w     = kaij->sor.w;
424*49bd79ccSDebojyoti Ghosh   work  = kaij->sor.work;
425*49bd79ccSDebojyoti Ghosh   t     = kaij->sor.t;
426*49bd79ccSDebojyoti Ghosh   arr   = kaij->sor.arr;
427*49bd79ccSDebojyoti Ghosh 
428*49bd79ccSDebojyoti Ghosh   ierr = VecGetArray(xx,&x);    CHKERRQ(ierr);
429*49bd79ccSDebojyoti Ghosh   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
430*49bd79ccSDebojyoti Ghosh 
431*49bd79ccSDebojyoti Ghosh   if (flag & SOR_ZERO_INITIAL_GUESS) {
432*49bd79ccSDebojyoti Ghosh     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
433*49bd79ccSDebojyoti Ghosh       PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);                            /* x[0:bs] <- D^{-1} b[0:bs] */
434*49bd79ccSDebojyoti Ghosh       ierr   =  PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr);
435*49bd79ccSDebojyoti Ghosh       i2     =  bs;
436*49bd79ccSDebojyoti Ghosh       idiag  += bs2;
437*49bd79ccSDebojyoti Ghosh       for (i=1; i<m; i++) {
438*49bd79ccSDebojyoti Ghosh         v  = aa + ai[i];
439*49bd79ccSDebojyoti Ghosh         vi = aj + ai[i];
440*49bd79ccSDebojyoti Ghosh         nz = diag[i] - ai[i];
441*49bd79ccSDebojyoti Ghosh 
442*49bd79ccSDebojyoti Ghosh         if (T) {                /* b - T (Arow * x) */
443*49bd79ccSDebojyoti Ghosh           for (k=0; k<bs; k++) w[k] = 0;
444*49bd79ccSDebojyoti Ghosh           for (j=0; j<nz; j++) {
445*49bd79ccSDebojyoti Ghosh             for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k];
446*49bd79ccSDebojyoti Ghosh           }
447*49bd79ccSDebojyoti Ghosh           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs,w,T,&t[i2]);
448*49bd79ccSDebojyoti Ghosh           for (k=0; k<bs; k++) t[i2+k] += b[i2+k];
449*49bd79ccSDebojyoti Ghosh         } else if (kaij->isTI) {
450*49bd79ccSDebojyoti Ghosh           for (k=0; k<bs; k++) t[i2+k] = b[i2+k];
451*49bd79ccSDebojyoti Ghosh           for (j=0; j<nz; j++) {
452*49bd79ccSDebojyoti Ghosh             for (k=0; k<bs; k++) t[i2+k] -= v[j] * x[vi[j]*bs+k];
453*49bd79ccSDebojyoti Ghosh           }
454*49bd79ccSDebojyoti Ghosh         } else {
455*49bd79ccSDebojyoti Ghosh           for (k=0; k<bs; k++) t[i2+k] = b[i2+k];
456*49bd79ccSDebojyoti Ghosh         }
457*49bd79ccSDebojyoti Ghosh 
458*49bd79ccSDebojyoti Ghosh         PetscKernel_w_gets_Ar_times_v(bs,bs,t+i2,idiag,y);
459*49bd79ccSDebojyoti Ghosh         for (j=0; j<bs; j++) x[i2+j] = omega * y[j];
460*49bd79ccSDebojyoti Ghosh 
461*49bd79ccSDebojyoti Ghosh         idiag += bs2;
462*49bd79ccSDebojyoti Ghosh         i2    += bs;
463*49bd79ccSDebojyoti Ghosh       }
464*49bd79ccSDebojyoti Ghosh       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
465*49bd79ccSDebojyoti Ghosh       ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr);
466*49bd79ccSDebojyoti Ghosh       xb = t;
467*49bd79ccSDebojyoti Ghosh     } else xb = b;
468*49bd79ccSDebojyoti Ghosh     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
469*49bd79ccSDebojyoti Ghosh       idiag = kaij->ibdiag+bs2*(m-1);
470*49bd79ccSDebojyoti Ghosh       i2    = bs * (m-1);
471*49bd79ccSDebojyoti Ghosh       ierr  = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
472*49bd79ccSDebojyoti Ghosh       PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
473*49bd79ccSDebojyoti Ghosh       i2    -= bs;
474*49bd79ccSDebojyoti Ghosh       idiag -= bs2;
475*49bd79ccSDebojyoti Ghosh       for (i=m-2; i>=0; i--) {
476*49bd79ccSDebojyoti Ghosh         v  = aa + diag[i] + 1 ;
477*49bd79ccSDebojyoti Ghosh         vi = aj + diag[i] + 1;
478*49bd79ccSDebojyoti Ghosh         nz = ai[i+1] - diag[i] - 1;
479*49bd79ccSDebojyoti Ghosh 
480*49bd79ccSDebojyoti Ghosh         if (T) {                /* FIXME: This branch untested */
481*49bd79ccSDebojyoti Ghosh           ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
482*49bd79ccSDebojyoti Ghosh           /* copy all rows of x that are needed into contiguous space */
483*49bd79ccSDebojyoti Ghosh           workt = work;
484*49bd79ccSDebojyoti Ghosh           for (j=0; j<nz; j++) {
485*49bd79ccSDebojyoti Ghosh             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
486*49bd79ccSDebojyoti Ghosh             workt += bs;
487*49bd79ccSDebojyoti Ghosh           }
488*49bd79ccSDebojyoti Ghosh           arrt = arr;
489*49bd79ccSDebojyoti Ghosh           for (j=0; j<nz; j++) {
490*49bd79ccSDebojyoti Ghosh             ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
491*49bd79ccSDebojyoti Ghosh             for (k=0; k<bs2; k++) arrt[k] *= v[j];
492*49bd79ccSDebojyoti Ghosh             arrt += bs2;
493*49bd79ccSDebojyoti Ghosh           }
494*49bd79ccSDebojyoti Ghosh           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
495*49bd79ccSDebojyoti Ghosh         } else if (kaij->isTI) {
496*49bd79ccSDebojyoti Ghosh           for (k=0; k<bs; k++) w[k] = t[i2+k];
497*49bd79ccSDebojyoti Ghosh           for (j=0; j<nz; j++) {
498*49bd79ccSDebojyoti Ghosh             for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k];
499*49bd79ccSDebojyoti Ghosh           }
500*49bd79ccSDebojyoti Ghosh         }
501*49bd79ccSDebojyoti Ghosh 
502*49bd79ccSDebojyoti Ghosh         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); /* RHS incorrect for omega != 1.0 */
503*49bd79ccSDebojyoti Ghosh         for (j=0; j<bs; j++) x[i2+j] = (1.0-omega) * x[i2+j] + omega * y[j];
504*49bd79ccSDebojyoti Ghosh 
505*49bd79ccSDebojyoti Ghosh         idiag -= bs2;
506*49bd79ccSDebojyoti Ghosh         i2    -= bs;
507*49bd79ccSDebojyoti Ghosh       }
508*49bd79ccSDebojyoti Ghosh       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
509*49bd79ccSDebojyoti Ghosh     }
510*49bd79ccSDebojyoti Ghosh     its--;
511*49bd79ccSDebojyoti Ghosh   }
512*49bd79ccSDebojyoti Ghosh   while (its--) {               /* FIXME: This branch not updated */
513*49bd79ccSDebojyoti Ghosh     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
514*49bd79ccSDebojyoti Ghosh       i2     =  0;
515*49bd79ccSDebojyoti Ghosh       idiag  = kaij->ibdiag;
516*49bd79ccSDebojyoti Ghosh       for (i=0; i<m; i++) {
517*49bd79ccSDebojyoti Ghosh         ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
518*49bd79ccSDebojyoti Ghosh 
519*49bd79ccSDebojyoti Ghosh         v  = aa + ai[i];
520*49bd79ccSDebojyoti Ghosh         vi = aj + ai[i];
521*49bd79ccSDebojyoti Ghosh         nz = diag[i] - ai[i];
522*49bd79ccSDebojyoti Ghosh         workt = work;
523*49bd79ccSDebojyoti Ghosh         for (j=0; j<nz; j++) {
524*49bd79ccSDebojyoti Ghosh           ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
525*49bd79ccSDebojyoti Ghosh           workt += bs;
526*49bd79ccSDebojyoti Ghosh         }
527*49bd79ccSDebojyoti Ghosh         arrt = arr;
528*49bd79ccSDebojyoti Ghosh         if (T) {
529*49bd79ccSDebojyoti Ghosh           for (j=0; j<nz; j++) {
530*49bd79ccSDebojyoti Ghosh             ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
531*49bd79ccSDebojyoti Ghosh             for (k=0; k<bs2; k++) arrt[k] *= v[j];
532*49bd79ccSDebojyoti Ghosh             arrt += bs2;
533*49bd79ccSDebojyoti Ghosh           }
534*49bd79ccSDebojyoti Ghosh           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
535*49bd79ccSDebojyoti Ghosh         } else if (kaij->isTI) {
536*49bd79ccSDebojyoti Ghosh           for (j=0; j<nz; j++) {
537*49bd79ccSDebojyoti Ghosh             ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
538*49bd79ccSDebojyoti Ghosh             for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
539*49bd79ccSDebojyoti Ghosh             arrt += bs2;
540*49bd79ccSDebojyoti Ghosh           }
541*49bd79ccSDebojyoti Ghosh           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
542*49bd79ccSDebojyoti Ghosh         }
543*49bd79ccSDebojyoti Ghosh         ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr);
544*49bd79ccSDebojyoti Ghosh 
545*49bd79ccSDebojyoti Ghosh         v  = aa + diag[i] + 1;
546*49bd79ccSDebojyoti Ghosh         vi = aj + diag[i] + 1;
547*49bd79ccSDebojyoti Ghosh         nz = ai[i+1] - diag[i] - 1;
548*49bd79ccSDebojyoti Ghosh         workt = work;
549*49bd79ccSDebojyoti Ghosh         for (j=0; j<nz; j++) {
550*49bd79ccSDebojyoti Ghosh           ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
551*49bd79ccSDebojyoti Ghosh           workt += bs;
552*49bd79ccSDebojyoti Ghosh         }
553*49bd79ccSDebojyoti Ghosh         arrt = arr;
554*49bd79ccSDebojyoti Ghosh         if (T) {
555*49bd79ccSDebojyoti Ghosh           for (j=0; j<nz; j++) {
556*49bd79ccSDebojyoti Ghosh             ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
557*49bd79ccSDebojyoti Ghosh             for (k=0; k<bs2; k++) arrt[k] *= v[j];
558*49bd79ccSDebojyoti Ghosh             arrt += bs2;
559*49bd79ccSDebojyoti Ghosh           }
560*49bd79ccSDebojyoti Ghosh           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
561*49bd79ccSDebojyoti Ghosh         } else if (kaij->isTI) {
562*49bd79ccSDebojyoti Ghosh           for (j=0; j<nz; j++) {
563*49bd79ccSDebojyoti Ghosh             ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
564*49bd79ccSDebojyoti Ghosh             for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
565*49bd79ccSDebojyoti Ghosh             arrt += bs2;
566*49bd79ccSDebojyoti Ghosh           }
567*49bd79ccSDebojyoti Ghosh           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
568*49bd79ccSDebojyoti Ghosh         }
569*49bd79ccSDebojyoti Ghosh 
570*49bd79ccSDebojyoti Ghosh         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y);
571*49bd79ccSDebojyoti Ghosh         for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j);
572*49bd79ccSDebojyoti Ghosh 
573*49bd79ccSDebojyoti Ghosh         idiag += bs2;
574*49bd79ccSDebojyoti Ghosh         i2    += bs;
575*49bd79ccSDebojyoti Ghosh       }
576*49bd79ccSDebojyoti Ghosh       xb = t;
577*49bd79ccSDebojyoti Ghosh     }
578*49bd79ccSDebojyoti Ghosh     else xb = b;
579*49bd79ccSDebojyoti Ghosh     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
580*49bd79ccSDebojyoti Ghosh       idiag = kaij->ibdiag+bs2*(m-1);
581*49bd79ccSDebojyoti Ghosh       i2    = bs * (m-1);
582*49bd79ccSDebojyoti Ghosh       if (xb == b) {
583*49bd79ccSDebojyoti Ghosh         for (i=m-1; i>=0; i--) {
584*49bd79ccSDebojyoti Ghosh           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
585*49bd79ccSDebojyoti Ghosh 
586*49bd79ccSDebojyoti Ghosh           v  = aa + ai[i];
587*49bd79ccSDebojyoti Ghosh           vi = aj + ai[i];
588*49bd79ccSDebojyoti Ghosh           nz = diag[i] - ai[i];
589*49bd79ccSDebojyoti Ghosh           workt = work;
590*49bd79ccSDebojyoti Ghosh           for (j=0; j<nz; j++) {
591*49bd79ccSDebojyoti Ghosh             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
592*49bd79ccSDebojyoti Ghosh             workt += bs;
593*49bd79ccSDebojyoti Ghosh           }
594*49bd79ccSDebojyoti Ghosh           arrt = arr;
595*49bd79ccSDebojyoti Ghosh           if (T) {
596*49bd79ccSDebojyoti Ghosh             for (j=0; j<nz; j++) {
597*49bd79ccSDebojyoti Ghosh               ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
598*49bd79ccSDebojyoti Ghosh               for (k=0; k<bs2; k++) arrt[k] *= v[j];
599*49bd79ccSDebojyoti Ghosh               arrt += bs2;
600*49bd79ccSDebojyoti Ghosh             }
601*49bd79ccSDebojyoti Ghosh             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
602*49bd79ccSDebojyoti Ghosh           } else if (kaij->isTI) {
603*49bd79ccSDebojyoti Ghosh             for (j=0; j<nz; j++) {
604*49bd79ccSDebojyoti Ghosh               ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
605*49bd79ccSDebojyoti Ghosh               for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
606*49bd79ccSDebojyoti Ghosh               arrt += bs2;
607*49bd79ccSDebojyoti Ghosh             }
608*49bd79ccSDebojyoti Ghosh             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
609*49bd79ccSDebojyoti Ghosh           }
610*49bd79ccSDebojyoti Ghosh 
611*49bd79ccSDebojyoti Ghosh           v  = aa + diag[i] + 1;
612*49bd79ccSDebojyoti Ghosh           vi = aj + diag[i] + 1;
613*49bd79ccSDebojyoti Ghosh           nz = ai[i+1] - diag[i] - 1;
614*49bd79ccSDebojyoti Ghosh           workt = work;
615*49bd79ccSDebojyoti Ghosh           for (j=0; j<nz; j++) {
616*49bd79ccSDebojyoti Ghosh             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
617*49bd79ccSDebojyoti Ghosh             workt += bs;
618*49bd79ccSDebojyoti Ghosh           }
619*49bd79ccSDebojyoti Ghosh           arrt = arr;
620*49bd79ccSDebojyoti Ghosh           if (T) {
621*49bd79ccSDebojyoti Ghosh             for (j=0; j<nz; j++) {
622*49bd79ccSDebojyoti Ghosh               ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
623*49bd79ccSDebojyoti Ghosh               for (k=0; k<bs2; k++) arrt[k] *= v[j];
624*49bd79ccSDebojyoti Ghosh               arrt += bs2;
625*49bd79ccSDebojyoti Ghosh             }
626*49bd79ccSDebojyoti Ghosh             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
627*49bd79ccSDebojyoti Ghosh           } else if (kaij->isTI) {
628*49bd79ccSDebojyoti Ghosh             for (j=0; j<nz; j++) {
629*49bd79ccSDebojyoti Ghosh               ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
630*49bd79ccSDebojyoti Ghosh               for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
631*49bd79ccSDebojyoti Ghosh               arrt += bs2;
632*49bd79ccSDebojyoti Ghosh             }
633*49bd79ccSDebojyoti Ghosh             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
634*49bd79ccSDebojyoti Ghosh           }
635*49bd79ccSDebojyoti Ghosh 
636*49bd79ccSDebojyoti Ghosh           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y);
637*49bd79ccSDebojyoti Ghosh           for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j);
638*49bd79ccSDebojyoti Ghosh         }
639*49bd79ccSDebojyoti Ghosh       } else {
640*49bd79ccSDebojyoti Ghosh         for (i=m-1; i>=0; i--) {
641*49bd79ccSDebojyoti Ghosh           ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
642*49bd79ccSDebojyoti Ghosh           v  = aa + diag[i] + 1;
643*49bd79ccSDebojyoti Ghosh           vi = aj + diag[i] + 1;
644*49bd79ccSDebojyoti Ghosh           nz = ai[i+1] - diag[i] - 1;
645*49bd79ccSDebojyoti Ghosh           workt = work;
646*49bd79ccSDebojyoti Ghosh           for (j=0; j<nz; j++) {
647*49bd79ccSDebojyoti Ghosh             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
648*49bd79ccSDebojyoti Ghosh             workt += bs;
649*49bd79ccSDebojyoti Ghosh           }
650*49bd79ccSDebojyoti Ghosh           arrt = arr;
651*49bd79ccSDebojyoti Ghosh           if (T) {
652*49bd79ccSDebojyoti Ghosh             for (j=0; j<nz; j++) {
653*49bd79ccSDebojyoti Ghosh               ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
654*49bd79ccSDebojyoti Ghosh               for (k=0; k<bs2; k++) arrt[k] *= v[j];
655*49bd79ccSDebojyoti Ghosh               arrt += bs2;
656*49bd79ccSDebojyoti Ghosh             }
657*49bd79ccSDebojyoti Ghosh             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
658*49bd79ccSDebojyoti Ghosh           } else if (kaij->isTI) {
659*49bd79ccSDebojyoti Ghosh             for (j=0; j<nz; j++) {
660*49bd79ccSDebojyoti Ghosh               ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
661*49bd79ccSDebojyoti Ghosh               for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
662*49bd79ccSDebojyoti Ghosh               arrt += bs2;
663*49bd79ccSDebojyoti Ghosh             }
664*49bd79ccSDebojyoti Ghosh             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
665*49bd79ccSDebojyoti Ghosh           }
666*49bd79ccSDebojyoti Ghosh           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y);
667*49bd79ccSDebojyoti Ghosh           for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j);
668*49bd79ccSDebojyoti Ghosh         }
669*49bd79ccSDebojyoti Ghosh         idiag -= bs2;
670*49bd79ccSDebojyoti Ghosh         i2    -= bs;
671*49bd79ccSDebojyoti Ghosh       }
672*49bd79ccSDebojyoti Ghosh       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
673*49bd79ccSDebojyoti Ghosh     }
674*49bd79ccSDebojyoti Ghosh   }
675*49bd79ccSDebojyoti Ghosh 
676*49bd79ccSDebojyoti Ghosh   ierr = VecRestoreArray(xx,&x);    CHKERRQ(ierr);
677*49bd79ccSDebojyoti Ghosh   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
678*49bd79ccSDebojyoti Ghosh 
679*49bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
680*49bd79ccSDebojyoti Ghosh }
681*49bd79ccSDebojyoti Ghosh 
682*49bd79ccSDebojyoti Ghosh /*===================================================================================*/
683*49bd79ccSDebojyoti Ghosh 
684*49bd79ccSDebojyoti Ghosh PetscErrorCode KAIJMultAdd_MPI(Mat A,Vec xx,Vec yy,Vec zz)
685*49bd79ccSDebojyoti Ghosh {
686*49bd79ccSDebojyoti Ghosh   Mat_MPIKAIJ    *b = (Mat_MPIKAIJ*)A->data;
687*49bd79ccSDebojyoti Ghosh   PetscErrorCode ierr;
688*49bd79ccSDebojyoti Ghosh 
689*49bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
690*49bd79ccSDebojyoti Ghosh   if (!yy) {
691*49bd79ccSDebojyoti Ghosh     ierr = VecSet(zz,0.0);CHKERRQ(ierr);
692*49bd79ccSDebojyoti Ghosh   } else {
693*49bd79ccSDebojyoti Ghosh     ierr = VecCopy(yy,zz);CHKERRQ(ierr);
694*49bd79ccSDebojyoti Ghosh   }
695*49bd79ccSDebojyoti Ghosh   /* start the scatter */
696*49bd79ccSDebojyoti Ghosh   ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
697*49bd79ccSDebojyoti Ghosh   ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,zz,zz);CHKERRQ(ierr);
698*49bd79ccSDebojyoti Ghosh   ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
699*49bd79ccSDebojyoti Ghosh   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr);
700*49bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
701*49bd79ccSDebojyoti Ghosh }
702*49bd79ccSDebojyoti Ghosh 
703*49bd79ccSDebojyoti Ghosh PetscErrorCode MatMult_MPIKAIJ_dof(Mat A,Vec xx,Vec yy)
704*49bd79ccSDebojyoti Ghosh {
705*49bd79ccSDebojyoti Ghosh   PetscErrorCode ierr;
706*49bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
707*49bd79ccSDebojyoti Ghosh   ierr = KAIJMultAdd_MPI(A,xx,PETSC_NULL,yy);CHKERRQ(ierr);
708*49bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
709*49bd79ccSDebojyoti Ghosh }
710*49bd79ccSDebojyoti Ghosh 
711*49bd79ccSDebojyoti Ghosh PetscErrorCode MatMultAdd_MPIKAIJ_dof(Mat A,Vec xx,Vec yy, Vec zz)
712*49bd79ccSDebojyoti Ghosh {
713*49bd79ccSDebojyoti Ghosh   PetscErrorCode ierr;
714*49bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
715*49bd79ccSDebojyoti Ghosh   ierr = KAIJMultAdd_MPI(A,xx,yy,zz);CHKERRQ(ierr);
716*49bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
717*49bd79ccSDebojyoti Ghosh }
718*49bd79ccSDebojyoti Ghosh 
719*49bd79ccSDebojyoti Ghosh PetscErrorCode MatInvertBlockDiagonal_MPIKAIJ_dof(Mat A,const PetscScalar **values)
720*49bd79ccSDebojyoti Ghosh {
721*49bd79ccSDebojyoti Ghosh   Mat_MPIKAIJ     *b = (Mat_MPIKAIJ*)A->data;
722*49bd79ccSDebojyoti Ghosh   PetscErrorCode  ierr;
723*49bd79ccSDebojyoti Ghosh 
724*49bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
725*49bd79ccSDebojyoti Ghosh   ierr = (*b->AIJ->ops->invertblockdiagonal)(b->AIJ,values);CHKERRQ(ierr);
726*49bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
727*49bd79ccSDebojyoti Ghosh }
728*49bd79ccSDebojyoti Ghosh 
729*49bd79ccSDebojyoti Ghosh /* ----------------------------------------------------------------*/
730*49bd79ccSDebojyoti Ghosh 
731*49bd79ccSDebojyoti Ghosh PetscErrorCode MatGetRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values)
732*49bd79ccSDebojyoti Ghosh {
733*49bd79ccSDebojyoti Ghosh   Mat_SeqKAIJ     *b    = (Mat_SeqKAIJ*) A->data;
734*49bd79ccSDebojyoti Ghosh   PetscErrorCode  ierr,diag;
735*49bd79ccSDebojyoti Ghosh   PetscInt        nzaij,nz,*colsaij,*idx,i,j,p=b->p,q=b->q,r=row/p,s=row%p,c;
736*49bd79ccSDebojyoti Ghosh   PetscScalar     *vaij,*v,*S=b->S,*T=b->T;
737*49bd79ccSDebojyoti Ghosh 
738*49bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
739*49bd79ccSDebojyoti Ghosh   if (b->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
740*49bd79ccSDebojyoti Ghosh   b->getrowactive = PETSC_TRUE;
741*49bd79ccSDebojyoti Ghosh   if (row < 0 || row >= A->rmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range",row);
742*49bd79ccSDebojyoti Ghosh 
743*49bd79ccSDebojyoti Ghosh   if ((!S) && (!T) && (!b->isTI)) {
744*49bd79ccSDebojyoti Ghosh     if (ncols)    *ncols  = 0;
745*49bd79ccSDebojyoti Ghosh     if (cols)     *cols   = NULL;
746*49bd79ccSDebojyoti Ghosh     if (values)   *values = NULL;
747*49bd79ccSDebojyoti Ghosh     PetscFunctionReturn(0);
748*49bd79ccSDebojyoti Ghosh   }
749*49bd79ccSDebojyoti Ghosh 
750*49bd79ccSDebojyoti Ghosh   if (T || b->isTI) {
751*49bd79ccSDebojyoti Ghosh     ierr  = MatGetRow_SeqAIJ(b->AIJ,r,&nzaij,&colsaij,&vaij);CHKERRQ(ierr);
752*49bd79ccSDebojyoti Ghosh     diag  = PETSC_FALSE;
753*49bd79ccSDebojyoti Ghosh     c     = nzaij;
754*49bd79ccSDebojyoti Ghosh     for (i=0; i<nzaij; i++) {
755*49bd79ccSDebojyoti Ghosh       /* check if this row contains a diagonal entry */
756*49bd79ccSDebojyoti Ghosh       if (colsaij[i] == r) {
757*49bd79ccSDebojyoti Ghosh         diag = PETSC_TRUE;
758*49bd79ccSDebojyoti Ghosh         c = i;
759*49bd79ccSDebojyoti Ghosh       }
760*49bd79ccSDebojyoti Ghosh     }
761*49bd79ccSDebojyoti Ghosh   } else nzaij = c = 0;
762*49bd79ccSDebojyoti Ghosh 
763*49bd79ccSDebojyoti Ghosh   /* calculate size of row */
764*49bd79ccSDebojyoti Ghosh   nz = 0;
765*49bd79ccSDebojyoti Ghosh   if (S)            nz += q;
766*49bd79ccSDebojyoti Ghosh   if (T || b->isTI) nz += (diag && S ? (nzaij-1)*q : nzaij*q);
767*49bd79ccSDebojyoti Ghosh 
768*49bd79ccSDebojyoti Ghosh   if (cols || values) {
769*49bd79ccSDebojyoti Ghosh     ierr = PetscMalloc2(nz,&idx,nz,&v);CHKERRQ(ierr);
770*49bd79ccSDebojyoti Ghosh     if (b->isTI) {
771*49bd79ccSDebojyoti Ghosh       for (i=0; i<nzaij; i++) {
772*49bd79ccSDebojyoti Ghosh         for (j=0; j<q; j++) {
773*49bd79ccSDebojyoti Ghosh           idx[i*q+j] = colsaij[i]*q+j;
774*49bd79ccSDebojyoti Ghosh           v[i*q+j]   = (j==s ? vaij[i] : 0);
775*49bd79ccSDebojyoti Ghosh         }
776*49bd79ccSDebojyoti Ghosh       }
777*49bd79ccSDebojyoti Ghosh     } else if (T) {
778*49bd79ccSDebojyoti Ghosh       for (i=0; i<nzaij; i++) {
779*49bd79ccSDebojyoti Ghosh         for (j=0; j<q; j++) {
780*49bd79ccSDebojyoti Ghosh           idx[i*q+j] = colsaij[i]*q+j;
781*49bd79ccSDebojyoti Ghosh           v[i*q+j]   = vaij[i]*T[s+j*p];
782*49bd79ccSDebojyoti Ghosh         }
783*49bd79ccSDebojyoti Ghosh       }
784*49bd79ccSDebojyoti Ghosh     }
785*49bd79ccSDebojyoti Ghosh     if (S) {
786*49bd79ccSDebojyoti Ghosh       for (j=0; j<q; j++) {
787*49bd79ccSDebojyoti Ghosh         idx[c*q+j] = r*q+j;
788*49bd79ccSDebojyoti Ghosh         v[c*q+j]  += S[s+j*p];
789*49bd79ccSDebojyoti Ghosh       }
790*49bd79ccSDebojyoti Ghosh     }
791*49bd79ccSDebojyoti Ghosh   }
792*49bd79ccSDebojyoti Ghosh 
793*49bd79ccSDebojyoti Ghosh   if (ncols)    *ncols  = nz;
794*49bd79ccSDebojyoti Ghosh   if (cols)     *cols   = idx;
795*49bd79ccSDebojyoti Ghosh   if (values)   *values = v;
796*49bd79ccSDebojyoti Ghosh 
797*49bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
798*49bd79ccSDebojyoti Ghosh }
799*49bd79ccSDebojyoti Ghosh 
800*49bd79ccSDebojyoti Ghosh PetscErrorCode MatRestoreRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
801*49bd79ccSDebojyoti Ghosh {
802*49bd79ccSDebojyoti Ghosh   PetscErrorCode ierr;
803*49bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
804*49bd79ccSDebojyoti Ghosh   ierr = PetscFree2(*idx,*v);CHKERRQ(ierr);
805*49bd79ccSDebojyoti Ghosh   ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE;
806*49bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
807*49bd79ccSDebojyoti Ghosh }
808*49bd79ccSDebojyoti Ghosh 
809*49bd79ccSDebojyoti Ghosh PetscErrorCode MatGetRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values)
810*49bd79ccSDebojyoti Ghosh {
811*49bd79ccSDebojyoti Ghosh   Mat_MPIKAIJ     *b      = (Mat_MPIKAIJ*) A->data;
812*49bd79ccSDebojyoti Ghosh   Mat             MatAIJ  = ((Mat_SeqKAIJ*)b->AIJ->data)->AIJ;
813*49bd79ccSDebojyoti Ghosh   Mat             MatOAIJ = ((Mat_SeqKAIJ*)b->OAIJ->data)->AIJ;
814*49bd79ccSDebojyoti Ghosh   Mat             AIJ     = b->A;
815*49bd79ccSDebojyoti Ghosh   PetscErrorCode  ierr;
816*49bd79ccSDebojyoti Ghosh   const PetscInt  rstart=A->rmap->rstart,rend=A->rmap->rend,p=b->p,q=b->q,*garray;
817*49bd79ccSDebojyoti Ghosh   PetscInt        nz,*idx,ncolsaij,ncolsoaij,*colsaij,*colsoaij,r,s,c,i,j,lrow;
818*49bd79ccSDebojyoti Ghosh   PetscScalar     *v,*vals,*ovals,*S=b->S,*T=b->T;
819*49bd79ccSDebojyoti Ghosh   PetscBool       diag;
820*49bd79ccSDebojyoti Ghosh 
821*49bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
822*49bd79ccSDebojyoti Ghosh   if (b->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
823*49bd79ccSDebojyoti Ghosh   b->getrowactive = PETSC_TRUE;
824*49bd79ccSDebojyoti Ghosh   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
825*49bd79ccSDebojyoti Ghosh   lrow = row - rstart;
826*49bd79ccSDebojyoti Ghosh 
827*49bd79ccSDebojyoti Ghosh   if ((!S) && (!T) && (!b->isTI)) {
828*49bd79ccSDebojyoti Ghosh     if (ncols)    *ncols  = 0;
829*49bd79ccSDebojyoti Ghosh     if (cols)     *cols   = NULL;
830*49bd79ccSDebojyoti Ghosh     if (values)   *values = NULL;
831*49bd79ccSDebojyoti Ghosh     PetscFunctionReturn(0);
832*49bd79ccSDebojyoti Ghosh   }
833*49bd79ccSDebojyoti Ghosh 
834*49bd79ccSDebojyoti Ghosh   r = lrow/p;
835*49bd79ccSDebojyoti Ghosh   s = lrow%p;
836*49bd79ccSDebojyoti Ghosh 
837*49bd79ccSDebojyoti Ghosh   if (T || b->isTI) {
838*49bd79ccSDebojyoti Ghosh     ierr = MatMPIAIJGetSeqAIJ(AIJ,NULL,NULL,&garray);
839*49bd79ccSDebojyoti Ghosh     ierr = MatGetRow_SeqAIJ(MatAIJ,lrow/p,&ncolsaij,&colsaij,&vals);CHKERRQ(ierr);
840*49bd79ccSDebojyoti Ghosh     ierr = MatGetRow_SeqAIJ(MatOAIJ,lrow/p,&ncolsoaij,&colsoaij,&ovals);CHKERRQ(ierr);
841*49bd79ccSDebojyoti Ghosh 
842*49bd79ccSDebojyoti Ghosh     diag  = PETSC_FALSE;
843*49bd79ccSDebojyoti Ghosh     c     = ncolsaij + ncolsoaij;
844*49bd79ccSDebojyoti Ghosh     for (i=0; i<ncolsaij; i++) {
845*49bd79ccSDebojyoti Ghosh       /* check if this row contains a diagonal entry */
846*49bd79ccSDebojyoti Ghosh       if (colsaij[i] == r) {
847*49bd79ccSDebojyoti Ghosh         diag = PETSC_TRUE;
848*49bd79ccSDebojyoti Ghosh         c = i;
849*49bd79ccSDebojyoti Ghosh       }
850*49bd79ccSDebojyoti Ghosh     }
851*49bd79ccSDebojyoti Ghosh   } else c = 0;
852*49bd79ccSDebojyoti Ghosh 
853*49bd79ccSDebojyoti Ghosh   /* calculate size of row */
854*49bd79ccSDebojyoti Ghosh   nz = 0;
855*49bd79ccSDebojyoti Ghosh   if (S)            nz += q;
856*49bd79ccSDebojyoti Ghosh   if (T || b->isTI) nz += (diag && S ? (ncolsaij+ncolsoaij-1)*q : (ncolsaij+ncolsoaij)*q);
857*49bd79ccSDebojyoti Ghosh 
858*49bd79ccSDebojyoti Ghosh   if (cols || values) {
859*49bd79ccSDebojyoti Ghosh     ierr = PetscMalloc2(nz,&idx,nz,&v);CHKERRQ(ierr);
860*49bd79ccSDebojyoti Ghosh     if (b->isTI) {
861*49bd79ccSDebojyoti Ghosh       for (i=0; i<ncolsaij; i++) {
862*49bd79ccSDebojyoti Ghosh         for (j=0; j<q; j++) {
863*49bd79ccSDebojyoti Ghosh           idx[i*q+j] = (colsaij[i]+rstart/p)*q+j;
864*49bd79ccSDebojyoti Ghosh           v[i*q+j]   = (j==s ? vals[i] : 0.0);
865*49bd79ccSDebojyoti Ghosh         }
866*49bd79ccSDebojyoti Ghosh       }
867*49bd79ccSDebojyoti Ghosh       for (i=0; i<ncolsoaij; i++) {
868*49bd79ccSDebojyoti Ghosh         for (j=0; j<q; j++) {
869*49bd79ccSDebojyoti Ghosh           idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j;
870*49bd79ccSDebojyoti Ghosh           v[(i+ncolsaij)*q+j]   = (j==s ? ovals[i]: 0.0);
871*49bd79ccSDebojyoti Ghosh         }
872*49bd79ccSDebojyoti Ghosh       }
873*49bd79ccSDebojyoti Ghosh     } else if (T) {
874*49bd79ccSDebojyoti Ghosh       for (i=0; i<ncolsaij; i++) {
875*49bd79ccSDebojyoti Ghosh         for (j=0; j<q; j++) {
876*49bd79ccSDebojyoti Ghosh           idx[i*q+j] = (colsaij[i]+rstart/p)*q+j;
877*49bd79ccSDebojyoti Ghosh           v[i*q+j]   = vals[i]*T[s+j*p];
878*49bd79ccSDebojyoti Ghosh         }
879*49bd79ccSDebojyoti Ghosh       }
880*49bd79ccSDebojyoti Ghosh       for (i=0; i<ncolsoaij; i++) {
881*49bd79ccSDebojyoti Ghosh         for (j=0; j<q; j++) {
882*49bd79ccSDebojyoti Ghosh           idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j;
883*49bd79ccSDebojyoti Ghosh           v[(i+ncolsaij)*q+j]   = ovals[i]*T[s+j*p];
884*49bd79ccSDebojyoti Ghosh         }
885*49bd79ccSDebojyoti Ghosh       }
886*49bd79ccSDebojyoti Ghosh     }
887*49bd79ccSDebojyoti Ghosh     if (S) {
888*49bd79ccSDebojyoti Ghosh       for (j=0; j<q; j++) {
889*49bd79ccSDebojyoti Ghosh         idx[c*q+j] = (r+rstart/p)*q+j;
890*49bd79ccSDebojyoti Ghosh         v[c*q+j]  += S[s+j*p];
891*49bd79ccSDebojyoti Ghosh       }
892*49bd79ccSDebojyoti Ghosh     }
893*49bd79ccSDebojyoti Ghosh   }
894*49bd79ccSDebojyoti Ghosh 
895*49bd79ccSDebojyoti Ghosh   if (ncols)  *ncols  = nz;
896*49bd79ccSDebojyoti Ghosh   if (cols)   *cols   = idx;
897*49bd79ccSDebojyoti Ghosh   if (values) *values = v;
898*49bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
899*49bd79ccSDebojyoti Ghosh }
900*49bd79ccSDebojyoti Ghosh 
901*49bd79ccSDebojyoti Ghosh PetscErrorCode MatRestoreRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
902*49bd79ccSDebojyoti Ghosh {
903*49bd79ccSDebojyoti Ghosh   PetscErrorCode ierr;
904*49bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
905*49bd79ccSDebojyoti Ghosh   ierr = PetscFree2(*idx,*v);CHKERRQ(ierr);
906*49bd79ccSDebojyoti Ghosh   ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE;
907*49bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
908*49bd79ccSDebojyoti Ghosh }
909*49bd79ccSDebojyoti Ghosh 
910*49bd79ccSDebojyoti Ghosh PetscErrorCode  MatCreateSubMatrix_KAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat)
911*49bd79ccSDebojyoti Ghosh {
912*49bd79ccSDebojyoti Ghosh   PetscErrorCode ierr;
913*49bd79ccSDebojyoti Ghosh   Mat            A;
914*49bd79ccSDebojyoti Ghosh 
915*49bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
916*49bd79ccSDebojyoti Ghosh   ierr = MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr);
917*49bd79ccSDebojyoti Ghosh   ierr = MatCreateSubMatrix(A,isrow,iscol,cll,newmat);CHKERRQ(ierr);
918*49bd79ccSDebojyoti Ghosh   ierr = MatDestroy(&A);CHKERRQ(ierr);
919*49bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
920*49bd79ccSDebojyoti Ghosh }
921*49bd79ccSDebojyoti Ghosh 
922*49bd79ccSDebojyoti Ghosh /* ---------------------------------------------------------------------------------- */
923*49bd79ccSDebojyoti Ghosh /*@C
924*49bd79ccSDebojyoti Ghosh   MatCreateKAIJ - Creates a matrix type to be used for matrices of the following form:
925*49bd79ccSDebojyoti Ghosh 
926*49bd79ccSDebojyoti Ghosh     [I \otimes S + A \otimes T]
927*49bd79ccSDebojyoti Ghosh 
928*49bd79ccSDebojyoti Ghosh   where
929*49bd79ccSDebojyoti Ghosh     S is a dense (p \times q) matrix
930*49bd79ccSDebojyoti Ghosh     T is a dense (p \times q) matrix
931*49bd79ccSDebojyoti Ghosh     A is an AIJ  (n \times n) matrix
932*49bd79ccSDebojyoti Ghosh     I is the identity matrix
933*49bd79ccSDebojyoti Ghosh   The resulting matrix is (np \times nq)
934*49bd79ccSDebojyoti Ghosh 
935*49bd79ccSDebojyoti Ghosh   The matrix type is based on MATSEQAIJ for a sequential matrix A, and MATMPIAIJ for a distributed matrix A.
936*49bd79ccSDebojyoti Ghosh   S is always stored independently on all processes as a PetscScalar array in column-major format.
937*49bd79ccSDebojyoti Ghosh 
938*49bd79ccSDebojyoti Ghosh   Collective
939*49bd79ccSDebojyoti Ghosh 
940*49bd79ccSDebojyoti Ghosh   Input Parameters:
941*49bd79ccSDebojyoti Ghosh + A - the AIJ matrix
942*49bd79ccSDebojyoti Ghosh . S - the S matrix, stored as a PetscScalar array (column-major)
943*49bd79ccSDebojyoti Ghosh . T - the T matrix, stored as a PetscScalar array (column-major)
944*49bd79ccSDebojyoti Ghosh . p - number of rows in S and T
945*49bd79ccSDebojyoti Ghosh - q - number of columns in S and T
946*49bd79ccSDebojyoti Ghosh 
947*49bd79ccSDebojyoti Ghosh   Output Parameter:
948*49bd79ccSDebojyoti Ghosh . kaij - the new KAIJ matrix
949*49bd79ccSDebojyoti Ghosh 
950*49bd79ccSDebojyoti Ghosh   Operations provided:
951*49bd79ccSDebojyoti Ghosh + MatMult
952*49bd79ccSDebojyoti Ghosh . MatMultAdd
953*49bd79ccSDebojyoti Ghosh . MatInvertBlockDiagonal
954*49bd79ccSDebojyoti Ghosh - MatView
955*49bd79ccSDebojyoti Ghosh 
956*49bd79ccSDebojyoti Ghosh   Level: advanced
957*49bd79ccSDebojyoti Ghosh 
958*49bd79ccSDebojyoti Ghosh .seealso: MatKAIJGetAIJ(), MATKAIJ
959*49bd79ccSDebojyoti Ghosh @*/
960*49bd79ccSDebojyoti Ghosh PetscErrorCode  MatCreateKAIJ(Mat A,PetscInt p,PetscInt q,const PetscScalar S[],const PetscScalar T[],Mat *kaij)
961*49bd79ccSDebojyoti Ghosh {
962*49bd79ccSDebojyoti Ghosh   PetscErrorCode ierr;
963*49bd79ccSDebojyoti Ghosh   PetscMPIInt    size;
964*49bd79ccSDebojyoti Ghosh   PetscInt       n,i,j;
965*49bd79ccSDebojyoti Ghosh   Mat            B;
966*49bd79ccSDebojyoti Ghosh   PetscBool      isTI = PETSC_FALSE;
967*49bd79ccSDebojyoti Ghosh 
968*49bd79ccSDebojyoti Ghosh   PetscFunctionBegin;
969*49bd79ccSDebojyoti Ghosh 
970*49bd79ccSDebojyoti Ghosh   /* check if T is an identity matrix */
971*49bd79ccSDebojyoti Ghosh   if (T && (p == q)) {
972*49bd79ccSDebojyoti Ghosh     isTI = PETSC_TRUE;
973*49bd79ccSDebojyoti Ghosh     for (i=0; i<p; i++) {
974*49bd79ccSDebojyoti Ghosh       for (j=0; j<q; j++) {
975*49bd79ccSDebojyoti Ghosh         if (i == j) {
976*49bd79ccSDebojyoti Ghosh           /* diagonal term must be 1 */
977*49bd79ccSDebojyoti Ghosh           if (T[i+j*p] != 1.0) isTI = PETSC_FALSE;
978*49bd79ccSDebojyoti Ghosh         } else {
979*49bd79ccSDebojyoti Ghosh           /* off-diagonal term must be 0 */
980*49bd79ccSDebojyoti Ghosh           if (T[i+j*p] != 0.0) isTI = PETSC_FALSE;
981*49bd79ccSDebojyoti Ghosh         }
982*49bd79ccSDebojyoti Ghosh       }
983*49bd79ccSDebojyoti Ghosh     }
984*49bd79ccSDebojyoti Ghosh   }
985*49bd79ccSDebojyoti Ghosh 
986*49bd79ccSDebojyoti Ghosh   ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
987*49bd79ccSDebojyoti Ghosh 
988*49bd79ccSDebojyoti Ghosh   ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
989*49bd79ccSDebojyoti Ghosh   ierr = MatSetSizes(B,p*A->rmap->n,q*A->cmap->n,p*A->rmap->N,q*A->cmap->N);CHKERRQ(ierr);
990*49bd79ccSDebojyoti Ghosh   ierr = PetscLayoutSetBlockSize(B->rmap,p);CHKERRQ(ierr);
991*49bd79ccSDebojyoti Ghosh   ierr = PetscLayoutSetBlockSize(B->cmap,q);CHKERRQ(ierr);
992*49bd79ccSDebojyoti Ghosh   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
993*49bd79ccSDebojyoti Ghosh   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
994*49bd79ccSDebojyoti Ghosh 
995*49bd79ccSDebojyoti Ghosh   B->assembled = PETSC_TRUE;
996*49bd79ccSDebojyoti Ghosh   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
997*49bd79ccSDebojyoti Ghosh 
998*49bd79ccSDebojyoti Ghosh   if (size == 1) {
999*49bd79ccSDebojyoti Ghosh     Mat_SeqKAIJ *b;
1000*49bd79ccSDebojyoti Ghosh 
1001*49bd79ccSDebojyoti Ghosh     ierr = MatSetType(B,MATSEQKAIJ);CHKERRQ(ierr);
1002*49bd79ccSDebojyoti Ghosh 
1003*49bd79ccSDebojyoti Ghosh     B->ops->setup   = NULL;
1004*49bd79ccSDebojyoti Ghosh     B->ops->destroy = MatDestroy_SeqKAIJ;
1005*49bd79ccSDebojyoti Ghosh     B->ops->view    = MatView_SeqKAIJ;
1006*49bd79ccSDebojyoti Ghosh     b               = (Mat_SeqKAIJ*)B->data;
1007*49bd79ccSDebojyoti Ghosh     b->p            = p;
1008*49bd79ccSDebojyoti Ghosh     b->q            = q;
1009*49bd79ccSDebojyoti Ghosh     b->AIJ          = A;
1010*49bd79ccSDebojyoti Ghosh     b->isTI         = isTI;
1011*49bd79ccSDebojyoti Ghosh     if (S) {
1012*49bd79ccSDebojyoti Ghosh       ierr = PetscMalloc(p*q*sizeof(PetscScalar),&b->S);CHKERRQ(ierr);
1013*49bd79ccSDebojyoti Ghosh       ierr = PetscMemcpy(b->S,S,p*q*sizeof(PetscScalar));CHKERRQ(ierr);
1014*49bd79ccSDebojyoti Ghosh     } else  b->S = NULL;
1015*49bd79ccSDebojyoti Ghosh     if (T && (!isTI)) {
1016*49bd79ccSDebojyoti Ghosh       ierr = PetscMalloc(p*q*sizeof(PetscScalar),&b->T);CHKERRQ(ierr);
1017*49bd79ccSDebojyoti Ghosh       ierr = PetscMemcpy(b->T,T,p*q*sizeof(PetscScalar));CHKERRQ(ierr);
1018*49bd79ccSDebojyoti Ghosh     } else b->T = NULL;
1019*49bd79ccSDebojyoti Ghosh 
1020*49bd79ccSDebojyoti Ghosh     B->ops->mult                = MatMult_SeqKAIJ_N;
1021*49bd79ccSDebojyoti Ghosh     B->ops->multadd             = MatMultAdd_SeqKAIJ_N;
1022*49bd79ccSDebojyoti Ghosh     B->ops->invertblockdiagonal = MatInvertBlockDiagonal_SeqKAIJ_N;
1023*49bd79ccSDebojyoti Ghosh     B->ops->getrow              = MatGetRow_SeqKAIJ;
1024*49bd79ccSDebojyoti Ghosh     B->ops->restorerow          = MatRestoreRow_SeqKAIJ;
1025*49bd79ccSDebojyoti Ghosh     B->ops->sor                 = MatSOR_SeqKAIJ;
1026*49bd79ccSDebojyoti Ghosh 
1027*49bd79ccSDebojyoti Ghosh     /*
1028*49bd79ccSDebojyoti Ghosh     This is commented out since MATKAIJ will use the basic MatConvert (which uses MatGetRow()).
1029*49bd79ccSDebojyoti Ghosh     ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqkaij_seqaij_C",MatConvert_SeqKAIJ_SeqAIJ);CHKERRQ(ierr);
1030*49bd79ccSDebojyoti Ghosh     */
1031*49bd79ccSDebojyoti Ghosh 
1032*49bd79ccSDebojyoti Ghosh   } else {
1033*49bd79ccSDebojyoti Ghosh     Mat_MPIAIJ  *mpiaij = (Mat_MPIAIJ*)A->data;
1034*49bd79ccSDebojyoti Ghosh     Mat_MPIKAIJ *b;
1035*49bd79ccSDebojyoti Ghosh     IS          from,to;
1036*49bd79ccSDebojyoti Ghosh     Vec         gvec;
1037*49bd79ccSDebojyoti Ghosh 
1038*49bd79ccSDebojyoti Ghosh     ierr = MatSetType(B,MATMPIKAIJ);CHKERRQ(ierr);
1039*49bd79ccSDebojyoti Ghosh 
1040*49bd79ccSDebojyoti Ghosh     B->ops->setup   = NULL;
1041*49bd79ccSDebojyoti Ghosh     B->ops->destroy = MatDestroy_MPIKAIJ;
1042*49bd79ccSDebojyoti Ghosh     B->ops->view    = MatView_MPIKAIJ;
1043*49bd79ccSDebojyoti Ghosh 
1044*49bd79ccSDebojyoti Ghosh     b       = (Mat_MPIKAIJ*)B->data;
1045*49bd79ccSDebojyoti Ghosh     b->p    = p;
1046*49bd79ccSDebojyoti Ghosh     b->q    = q;
1047*49bd79ccSDebojyoti Ghosh     b->A    = A;
1048*49bd79ccSDebojyoti Ghosh     b->isTI = isTI;
1049*49bd79ccSDebojyoti Ghosh     if (S) {
1050*49bd79ccSDebojyoti Ghosh       ierr = PetscMalloc(p*q*sizeof(PetscScalar),&b->S);CHKERRQ(ierr);
1051*49bd79ccSDebojyoti Ghosh       ierr = PetscMemcpy(b->S,S,p*q*sizeof(PetscScalar));CHKERRQ(ierr);
1052*49bd79ccSDebojyoti Ghosh     } else  b->S = NULL;
1053*49bd79ccSDebojyoti Ghosh     if (T &&(!isTI)) {
1054*49bd79ccSDebojyoti Ghosh       ierr = PetscMalloc(p*q*sizeof(PetscScalar),&b->T);CHKERRQ(ierr);
1055*49bd79ccSDebojyoti Ghosh       ierr = PetscMemcpy(b->T,T,p*q*sizeof(PetscScalar));CHKERRQ(ierr);
1056*49bd79ccSDebojyoti Ghosh     } else b->T = NULL;
1057*49bd79ccSDebojyoti Ghosh 
1058*49bd79ccSDebojyoti Ghosh     ierr = MatCreateKAIJ(mpiaij->A,p,q,S   ,T,&b->AIJ);CHKERRQ(ierr);
1059*49bd79ccSDebojyoti Ghosh     ierr = MatCreateKAIJ(mpiaij->B,p,q,NULL,T,&b->OAIJ);CHKERRQ(ierr);
1060*49bd79ccSDebojyoti Ghosh 
1061*49bd79ccSDebojyoti Ghosh     ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr);
1062*49bd79ccSDebojyoti Ghosh     ierr = VecCreate(PETSC_COMM_SELF,&b->w);CHKERRQ(ierr);
1063*49bd79ccSDebojyoti Ghosh     ierr = VecSetSizes(b->w,n*q,n*q);CHKERRQ(ierr);
1064*49bd79ccSDebojyoti Ghosh     ierr = VecSetBlockSize(b->w,q);CHKERRQ(ierr);
1065*49bd79ccSDebojyoti Ghosh     ierr = VecSetType(b->w,VECSEQ);CHKERRQ(ierr);
1066*49bd79ccSDebojyoti Ghosh 
1067*49bd79ccSDebojyoti Ghosh     /* create two temporary Index sets for build scatter gather */
1068*49bd79ccSDebojyoti Ghosh     ierr = ISCreateBlock(PetscObjectComm((PetscObject)A),q,n,mpiaij->garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr);
1069*49bd79ccSDebojyoti Ghosh     ierr = ISCreateStride(PETSC_COMM_SELF,n*q,0,1,&to);CHKERRQ(ierr);
1070*49bd79ccSDebojyoti Ghosh 
1071*49bd79ccSDebojyoti Ghosh     /* create temporary global vector to generate scatter context */
1072*49bd79ccSDebojyoti Ghosh     ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),q,q*A->cmap->n,q*A->cmap->N,NULL,&gvec);CHKERRQ(ierr);
1073*49bd79ccSDebojyoti Ghosh 
1074*49bd79ccSDebojyoti Ghosh     /* generate the scatter context */
1075*49bd79ccSDebojyoti Ghosh     ierr = VecScatterCreate(gvec,from,b->w,to,&b->ctx);CHKERRQ(ierr);
1076*49bd79ccSDebojyoti Ghosh 
1077*49bd79ccSDebojyoti Ghosh     ierr = ISDestroy(&from);CHKERRQ(ierr);
1078*49bd79ccSDebojyoti Ghosh     ierr = ISDestroy(&to);CHKERRQ(ierr);
1079*49bd79ccSDebojyoti Ghosh     ierr = VecDestroy(&gvec);CHKERRQ(ierr);
1080*49bd79ccSDebojyoti Ghosh 
1081*49bd79ccSDebojyoti Ghosh     B->ops->mult                = MatMult_MPIKAIJ_dof;
1082*49bd79ccSDebojyoti Ghosh     B->ops->multadd             = MatMultAdd_MPIKAIJ_dof;
1083*49bd79ccSDebojyoti Ghosh     B->ops->invertblockdiagonal = MatInvertBlockDiagonal_MPIKAIJ_dof;
1084*49bd79ccSDebojyoti Ghosh     B->ops->getrow              = MatGetRow_MPIKAIJ;
1085*49bd79ccSDebojyoti Ghosh     B->ops->restorerow          = MatRestoreRow_MPIKAIJ;
1086*49bd79ccSDebojyoti Ghosh     ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIKAIJ);CHKERRQ(ierr);
1087*49bd79ccSDebojyoti Ghosh   }
1088*49bd79ccSDebojyoti Ghosh   B->ops->createsubmatrix = MatCreateSubMatrix_KAIJ;
1089*49bd79ccSDebojyoti Ghosh   B->assembled = PETSC_TRUE;
1090*49bd79ccSDebojyoti Ghosh   ierr  = MatSetUp(B);CHKERRQ(ierr);
1091*49bd79ccSDebojyoti Ghosh   *kaij = B;
1092*49bd79ccSDebojyoti Ghosh   ierr  = MatViewFromOptions(B,NULL,"-mat_view");CHKERRQ(ierr);
1093*49bd79ccSDebojyoti Ghosh 
1094*49bd79ccSDebojyoti Ghosh   PetscFunctionReturn(0);
1095*49bd79ccSDebojyoti Ghosh }
1096