xref: /petsc/src/mat/impls/kaij/kaij.c (revision 8b415be07902a23ec52b305a3785e96a3bfce180)
1 
2 /*
3   Defines the basic matrix operations for the KAIJ  matrix storage format.
4   This format is used to evaluate matrices of the form:
5 
6     [I \otimes S + A \otimes T]
7 
8   where
9     S is a dense (p \times q) matrix
10     T is a dense (p \times q) matrix
11     A is an AIJ  (n \times n) matrix
12     I is the identity matrix
13 
14   The resulting matrix is (np \times nq)
15 
16   We provide:
17      MatMult()
18      MatMultAdd()
19      MatInvertBlockDiagonal()
20   and
21      MatCreateKAIJ(Mat,PetscInt,PetscInt,const PetscScalar[],const PetscScalar[],Mat*)
22 
23   This single directory handles both the sequential and parallel codes
24 */
25 
26 #include <../src/mat/impls/kaij/kaij.h> /*I "petscmat.h" I*/
27 #include <../src/mat/utils/freespace.h>
28 #include <petsc/private/vecimpl.h>
29 
30 /*@C
31    MatKAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the KAIJ matrix
32 
33    Not Collective, but if the KAIJ matrix is parallel, the AIJ matrix is also parallel
34 
35    Input Parameter:
36 .  A - the KAIJ matrix
37 
38    Output Parameter:
39 .  B - the AIJ matrix
40 
41    Level: advanced
42 
43    Notes: The reference count on the AIJ matrix is not increased so you should not destroy it.
44 
45 .seealso: MatCreateKAIJ()
46 @*/
47 PetscErrorCode  MatKAIJGetAIJ(Mat A,Mat *B)
48 {
49   PetscErrorCode ierr;
50   PetscBool      ismpikaij,isseqkaij;
51 
52   PetscFunctionBegin;
53   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIKAIJ,&ismpikaij);CHKERRQ(ierr);
54   ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQKAIJ,&isseqkaij);CHKERRQ(ierr);
55   if (ismpikaij) {
56     Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data;
57 
58     *B = b->A;
59   } else if (isseqkaij) {
60     Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
61 
62     *B = b->AIJ;
63   } else {
64     *B = A;
65   }
66   PetscFunctionReturn(0);
67 }
68 
69 /*@C
70    MatKAIJGetS - Get the S matrix describing the shift action of the KAIJ matrix
71 
72    Not Collective, the entire S is stored and returned independently on all processes
73 
74    Input Parameter:
75 .  A - the KAIJ matrix
76 
77    Output Parameter:
78 .  S - the S matrix, in form of a scalar array in column-major format
79 
80    Notes:
81    The dimensions of the output array can be determined by calling MatGetBlockSizes() on the KAIJ matrix.
82    The row block and column block sizes returned will correspond to the number of rows and columns, respectively, in S.
83 
84    Level: advanced
85 
86 .seealso: MatCreateKAIJ(), MatGetBlockSizes()
87 @*/
88 PetscErrorCode  MatKAIJGetS(Mat A,const PetscScalar **S)
89 {
90   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
91   PetscFunctionBegin;
92   *S = b->S;
93   PetscFunctionReturn(0);
94 }
95 
96 /*@C
97    MatKAIJGetT - Get the transformation matrix T associated with the KAIJ matrix
98 
99    Not Collective, the entire T is stored and returned independently on all processes
100 
101    Input Parameter:
102 .  A - the KAIJ matrix
103 
104    Output Parameter:
105 .  T - the T matrix, in form of a scalar array in column-major format
106 
107    Notes:
108    The dimensions of the output array can be determined by calling MatGetBlockSizes() on the KAIJ matrix.
109    The row block and column block sizes returned will correspond to the number of rows and columns, respectively, in T.
110 
111    Level: advanced
112 
113 .seealso: MatCreateKAIJ(), MatGetBlockSizes()
114 @*/
115 PetscErrorCode  MatKAIJGetT(Mat A,const PetscScalar **T)
116 {
117   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
118   PetscFunctionBegin;
119   *T = b->T;
120   PetscFunctionReturn(0);
121 }
122 
123 PetscErrorCode MatDestroy_SeqKAIJ(Mat A)
124 {
125   PetscErrorCode ierr;
126   Mat_SeqKAIJ    *b = (Mat_SeqKAIJ*)A->data;
127 
128   PetscFunctionBegin;
129   ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr);
130   ierr = PetscFree3(b->S,b->T,b->ibdiag);CHKERRQ(ierr);
131   ierr = PetscFree5(b->sor.w,b->sor.y,b->sor.work,b->sor.t,b->sor.arr);CHKERRQ(ierr);
132   ierr = PetscFree(A->data);CHKERRQ(ierr);
133   PetscFunctionReturn(0);
134 }
135 
136 PetscErrorCode MatSetUp_KAIJ(Mat A)
137 {
138   PetscFunctionBegin;
139   SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Must use MatCreateKAIJ() to create KAIJ matrices");
140   PetscFunctionReturn(0);
141 }
142 
143 PetscErrorCode MatView_SeqKAIJ(Mat A,PetscViewer viewer)
144 {
145   PetscErrorCode ierr;
146   Mat            B;
147 
148   PetscFunctionBegin;
149   ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
150   ierr = MatView(B,viewer);CHKERRQ(ierr);
151   ierr = MatDestroy(&B);CHKERRQ(ierr);
152   PetscFunctionReturn(0);
153 }
154 
155 PetscErrorCode MatView_MPIKAIJ(Mat A,PetscViewer viewer)
156 {
157   PetscErrorCode ierr;
158   Mat            B;
159 
160   PetscFunctionBegin;
161   ierr = MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
162   ierr = MatView(B,viewer);CHKERRQ(ierr);
163   ierr = MatDestroy(&B);CHKERRQ(ierr);
164   PetscFunctionReturn(0);
165 }
166 
167 PetscErrorCode MatDestroy_MPIKAIJ(Mat A)
168 {
169   PetscErrorCode ierr;
170   Mat_MPIKAIJ    *b = (Mat_MPIKAIJ*)A->data;
171 
172   PetscFunctionBegin;
173   ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr);
174   ierr = MatDestroy(&b->OAIJ);CHKERRQ(ierr);
175   ierr = MatDestroy(&b->A);CHKERRQ(ierr);
176   ierr = VecScatterDestroy(&b->ctx);CHKERRQ(ierr);
177   ierr = VecDestroy(&b->w);CHKERRQ(ierr);
178   ierr = PetscFree3(b->S,b->T,b->ibdiag);CHKERRQ(ierr);
179   ierr = PetscFree(A->data);CHKERRQ(ierr);
180   PetscFunctionReturn(0);
181 }
182 
183 /*MC
184   MATKAIJ - MATKAIJ = "kaij" - A matrix type to be used to evaluate matrices of the following form:
185 
186     [I \otimes S + A \otimes T]
187 
188   where
189     S is a dense (p \times q) matrix
190     T is a dense (p \times q) matrix
191     A is an AIJ  (n \times n) matrix
192     I is the identity matrix
193   The resulting matrix is (np \times nq)
194 
195   The matrix type is based on MATSEQAIJ for a sequential matrix A, and MATMPIAIJ for a distributed matrix A.
196   S and T are always stored independently on all processes as a PetscScalar array in column-major format.
197 
198   Operations provided:
199 . MatMult
200 . MatMultAdd
201 . MatInvertBlockDiagonal
202 
203   Level: advanced
204 
205 .seealso: MatKAIJGetAIJ(), MatKAIJGetS(), MatKAIJGetT(), MatCreateKAIJ()
206 M*/
207 
208 PETSC_EXTERN PetscErrorCode MatCreate_KAIJ(Mat A)
209 {
210   PetscErrorCode ierr;
211   Mat_MPIKAIJ    *b;
212   PetscMPIInt    size;
213 
214   PetscFunctionBegin;
215   ierr     = PetscNewLog(A,&b);CHKERRQ(ierr);
216   A->data  = (void*)b;
217 
218   ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
219 
220   A->ops->setup = MatSetUp_KAIJ;
221 
222   b->w    = 0;
223   ierr    = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
224   if (size == 1) {
225     ierr = PetscObjectChangeTypeName((PetscObject)A,MATSEQKAIJ);CHKERRQ(ierr);
226   } else {
227     ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIKAIJ);CHKERRQ(ierr);
228   }
229   PetscFunctionReturn(0);
230 }
231 
232 /* --------------------------------------------------------------------------------------*/
233 
234 /* zz = yy + Axx */
235 PetscErrorCode KAIJMultAdd_Seq(Mat A,Vec xx,Vec yy,Vec zz)
236 {
237   Mat_SeqKAIJ       *b = (Mat_SeqKAIJ*)A->data;
238   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
239   const PetscScalar *s = b->S, *t = b->T;
240   const PetscScalar *x,*v,*bx;
241   PetscScalar       *y,*sums;
242   PetscErrorCode    ierr;
243   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
244   PetscInt          n,i,jrow,j,l,p=b->p,q=b->q,k;
245 
246   PetscFunctionBegin;
247   if (!yy) {
248     ierr = VecSet(zz,0.0);CHKERRQ(ierr);
249   } else {
250     ierr = VecCopy(yy,zz);CHKERRQ(ierr);
251   }
252   if ((!s) && (!t) && (!b->isTI)) PetscFunctionReturn(0);
253 
254   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
255   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
256   idx  = a->j;
257   v    = a->a;
258   ii   = a->i;
259 
260   if (b->isTI) {
261     for (i=0; i<m; i++) {
262       jrow = ii[i];
263       n    = ii[i+1] - jrow;
264       sums = y + p*i;
265       for (j=0; j<n; j++) {
266         for (k=0; k<p; k++) {
267           sums[k] += v[jrow+j]*x[q*idx[jrow+j]+k];
268         }
269       }
270     }
271   } else if (t) {
272     for (i=0; i<m; i++) {
273       jrow = ii[i];
274       n    = ii[i+1] - jrow;
275       sums = y + p*i;
276       bx   = x + q*i;
277       for (j=0; j<n; j++) {
278         for (k=0; k<p; k++) {
279           for (l=0; l<q; l++) {
280             sums[k] += v[jrow+j]*t[k+l*p]*x[q*idx[jrow+j]+l];
281           }
282         }
283       }
284     }
285   }
286   if (s) {
287     for (i=0; i<m; i++) {
288       jrow = ii[i];
289       n    = ii[i+1] - jrow;
290       sums = y + p*i;
291       bx   = x + q*i;
292       if (i < b->AIJ->cmap->n) {
293         for (j=0; j<q; j++) {
294           for (k=0; k<p; k++) {
295             sums[k] += s[k+j*p]*bx[j];
296           }
297         }
298       }
299     }
300   }
301 
302   ierr = PetscLogFlops((2.0*p*q-p)*m+2*p*a->nz);CHKERRQ(ierr);
303   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
304   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
305   PetscFunctionReturn(0);
306 }
307 
308 PetscErrorCode MatMult_SeqKAIJ_N(Mat A,Vec xx,Vec yy)
309 {
310   PetscErrorCode ierr;
311   PetscFunctionBegin;
312   ierr = KAIJMultAdd_Seq(A,xx,PETSC_NULL,yy);CHKERRQ(ierr);
313   PetscFunctionReturn(0);
314 }
315 
316 PetscErrorCode MatMultAdd_SeqKAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
317 {
318   PetscErrorCode ierr;
319   PetscFunctionBegin;
320   ierr = KAIJMultAdd_Seq(A,xx,yy,zz);CHKERRQ(ierr);
321   PetscFunctionReturn(0);
322 }
323 
324 #include <petsc/private/kernels/blockinvert.h>
325 
326 PetscErrorCode MatInvertBlockDiagonal_SeqKAIJ_N(Mat A,const PetscScalar **values)
327 {
328   Mat_SeqKAIJ       *b  = (Mat_SeqKAIJ*)A->data;
329   Mat_SeqAIJ        *a  = (Mat_SeqAIJ*)b->AIJ->data;
330   const PetscScalar *S  = b->S;
331   const PetscScalar *T  = b->T;
332   const PetscScalar *v  = a->a;
333   const PetscInt     p  = b->p, q = b->q, m = b->AIJ->rmap->n, *idx = a->j, *ii = a->i;
334   PetscErrorCode    ierr;
335   PetscInt          i,j,*v_pivots,dof,dof2;
336   PetscScalar       *diag,aval,*v_work;
337 
338   PetscFunctionBegin;
339   if (p != q) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Block size must be square to calculate inverse.");
340   if ((!S) && (!T) && (!b->isTI)) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Cannot invert a zero matrix.");
341 
342   dof  = p;
343   dof2 = dof*dof;
344 
345   if (b->ibdiagvalid) {
346     if (values) *values = b->ibdiag;
347     PetscFunctionReturn(0);
348   }
349   if (!b->ibdiag) {
350     ierr = PetscMalloc(dof2*m*sizeof(PetscScalar),&b->ibdiag);CHKERRQ(ierr);
351     ierr = PetscLogObjectMemory((PetscObject)A,dof2*m*sizeof(PetscScalar));CHKERRQ(ierr);
352   }
353   if (values) *values = b->ibdiag;
354   diag = b->ibdiag;
355 
356   ierr = PetscMalloc2(dof,&v_work,dof,&v_pivots);CHKERRQ(ierr);
357   for (i=0; i<m; i++) {
358     if (S) {
359       ierr = PetscMemcpy(diag,S,dof2*sizeof(PetscScalar));CHKERRQ(ierr);
360     } else {
361       ierr = PetscMemzero(diag,dof2*sizeof(PetscScalar));CHKERRQ(ierr);
362     }
363     if (b->isTI) {
364       aval = 0;
365       for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j];
366       for (j=0; j<dof; j++) diag[j+dof*j] += aval;
367     } else if (T) {
368       aval = 0;
369       for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j];
370       for (j=0; j<dof2; j++) diag[j] += aval*T[j];
371     }
372     ierr = PetscKernel_A_gets_inverse_A(dof,diag,v_pivots,v_work,PETSC_FALSE,NULL);CHKERRQ(ierr);
373     diag += dof2;
374   }
375   ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr);
376 
377   b->ibdiagvalid = PETSC_TRUE;
378   PetscFunctionReturn(0);
379 }
380 
381 static PetscErrorCode MatGetDiagonalBlock_MPIKAIJ(Mat A,Mat *B)
382 {
383   Mat_MPIKAIJ *kaij = (Mat_MPIKAIJ*) A->data;
384 
385   PetscFunctionBegin;
386   *B = kaij->AIJ;
387   PetscFunctionReturn(0);
388 }
389 
390 PetscErrorCode MatSOR_SeqKAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
391 {
392   PetscErrorCode    ierr;
393   Mat_SeqKAIJ       *kaij = (Mat_SeqKAIJ*) A->data;
394   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)kaij->AIJ->data;
395   const PetscScalar *aa = a->a, *T = kaij->T, *v;
396   const PetscInt    m  = kaij->AIJ->rmap->n, *ai=a->i, *aj=a->j, p = kaij->p, q = kaij->q, *diag, *vi;
397   const PetscScalar *b, *xb, *idiag;
398   PetscScalar       *x, *work, *workt, *w, *y, *arr, *t, *arrt;
399   PetscInt          i, j, k, i2, bs, bs2, nz;
400 
401   PetscFunctionBegin;
402   its = its*lits;
403   if (flag & SOR_EISENSTAT) SETERRQ (PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
404   if (its <= 0)             SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
405   if (fshift)               SETERRQ (PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
406   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
407   if (p != q) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSOR for KAIJ: Sorry, no support for non-square dense blocks");
408   else        {bs = p; bs2 = bs*bs; }
409 
410   if (!m) PetscFunctionReturn(0);
411 
412   if (!kaij->ibdiagvalid) { ierr = MatInvertBlockDiagonal_SeqKAIJ_N(A,NULL);CHKERRQ(ierr); }
413   idiag = kaij->ibdiag;
414   diag  = a->diag;
415 
416   if (!kaij->sor.setup) {
417     ierr = PetscMalloc5(bs,&kaij->sor.w,bs,&kaij->sor.y,m*bs,&kaij->sor.work,m*bs,&kaij->sor.t,m*bs2,&kaij->sor.arr);CHKERRQ(ierr);
418     kaij->sor.setup = PETSC_TRUE;
419   }
420   y     = kaij->sor.y;
421   w     = kaij->sor.w;
422   work  = kaij->sor.work;
423   t     = kaij->sor.t;
424   arr   = kaij->sor.arr;
425 
426   ierr = VecGetArray(xx,&x);    CHKERRQ(ierr);
427   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
428 
429   if (flag & SOR_ZERO_INITIAL_GUESS) {
430     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
431       PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);                            /* x[0:bs] <- D^{-1} b[0:bs] */
432       ierr   =  PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr);
433       i2     =  bs;
434       idiag  += bs2;
435       for (i=1; i<m; i++) {
436         v  = aa + ai[i];
437         vi = aj + ai[i];
438         nz = diag[i] - ai[i];
439 
440         if (T) {                /* b - T (Arow * x) */
441           for (k=0; k<bs; k++) w[k] = 0;
442           for (j=0; j<nz; j++) {
443             for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k];
444           }
445           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs,w,T,&t[i2]);
446           for (k=0; k<bs; k++) t[i2+k] += b[i2+k];
447         } else if (kaij->isTI) {
448           for (k=0; k<bs; k++) t[i2+k] = b[i2+k];
449           for (j=0; j<nz; j++) {
450             for (k=0; k<bs; k++) t[i2+k] -= v[j] * x[vi[j]*bs+k];
451           }
452         } else {
453           for (k=0; k<bs; k++) t[i2+k] = b[i2+k];
454         }
455 
456         PetscKernel_w_gets_Ar_times_v(bs,bs,t+i2,idiag,y);
457         for (j=0; j<bs; j++) x[i2+j] = omega * y[j];
458 
459         idiag += bs2;
460         i2    += bs;
461       }
462       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
463       ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr);
464       xb = t;
465     } else xb = b;
466     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
467       idiag = kaij->ibdiag+bs2*(m-1);
468       i2    = bs * (m-1);
469       ierr  = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
470       PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
471       i2    -= bs;
472       idiag -= bs2;
473       for (i=m-2; i>=0; i--) {
474         v  = aa + diag[i] + 1 ;
475         vi = aj + diag[i] + 1;
476         nz = ai[i+1] - diag[i] - 1;
477 
478         if (T) {                /* FIXME: This branch untested */
479           ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
480           /* copy all rows of x that are needed into contiguous space */
481           workt = work;
482           for (j=0; j<nz; j++) {
483             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
484             workt += bs;
485           }
486           arrt = arr;
487           for (j=0; j<nz; j++) {
488             ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
489             for (k=0; k<bs2; k++) arrt[k] *= v[j];
490             arrt += bs2;
491           }
492           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
493         } else if (kaij->isTI) {
494           for (k=0; k<bs; k++) w[k] = t[i2+k];
495           for (j=0; j<nz; j++) {
496             for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k];
497           }
498         }
499 
500         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); /* RHS incorrect for omega != 1.0 */
501         for (j=0; j<bs; j++) x[i2+j] = (1.0-omega) * x[i2+j] + omega * y[j];
502 
503         idiag -= bs2;
504         i2    -= bs;
505       }
506       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
507     }
508     its--;
509   }
510   while (its--) {               /* FIXME: This branch not updated */
511     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
512       i2     =  0;
513       idiag  = kaij->ibdiag;
514       for (i=0; i<m; i++) {
515         ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
516 
517         v  = aa + ai[i];
518         vi = aj + ai[i];
519         nz = diag[i] - ai[i];
520         workt = work;
521         for (j=0; j<nz; j++) {
522           ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
523           workt += bs;
524         }
525         arrt = arr;
526         if (T) {
527           for (j=0; j<nz; j++) {
528             ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
529             for (k=0; k<bs2; k++) arrt[k] *= v[j];
530             arrt += bs2;
531           }
532           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
533         } else if (kaij->isTI) {
534           for (j=0; j<nz; j++) {
535             ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
536             for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
537             arrt += bs2;
538           }
539           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
540         }
541         ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr);
542 
543         v  = aa + diag[i] + 1;
544         vi = aj + diag[i] + 1;
545         nz = ai[i+1] - diag[i] - 1;
546         workt = work;
547         for (j=0; j<nz; j++) {
548           ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
549           workt += bs;
550         }
551         arrt = arr;
552         if (T) {
553           for (j=0; j<nz; j++) {
554             ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
555             for (k=0; k<bs2; k++) arrt[k] *= v[j];
556             arrt += bs2;
557           }
558           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
559         } else if (kaij->isTI) {
560           for (j=0; j<nz; j++) {
561             ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
562             for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
563             arrt += bs2;
564           }
565           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
566         }
567 
568         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y);
569         for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j);
570 
571         idiag += bs2;
572         i2    += bs;
573       }
574       xb = t;
575     }
576     else xb = b;
577     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
578       idiag = kaij->ibdiag+bs2*(m-1);
579       i2    = bs * (m-1);
580       if (xb == b) {
581         for (i=m-1; i>=0; i--) {
582           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
583 
584           v  = aa + ai[i];
585           vi = aj + ai[i];
586           nz = diag[i] - ai[i];
587           workt = work;
588           for (j=0; j<nz; j++) {
589             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
590             workt += bs;
591           }
592           arrt = arr;
593           if (T) {
594             for (j=0; j<nz; j++) {
595               ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
596               for (k=0; k<bs2; k++) arrt[k] *= v[j];
597               arrt += bs2;
598             }
599             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
600           } else if (kaij->isTI) {
601             for (j=0; j<nz; j++) {
602               ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
603               for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
604               arrt += bs2;
605             }
606             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
607           }
608 
609           v  = aa + diag[i] + 1;
610           vi = aj + diag[i] + 1;
611           nz = ai[i+1] - diag[i] - 1;
612           workt = work;
613           for (j=0; j<nz; j++) {
614             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
615             workt += bs;
616           }
617           arrt = arr;
618           if (T) {
619             for (j=0; j<nz; j++) {
620               ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
621               for (k=0; k<bs2; k++) arrt[k] *= v[j];
622               arrt += bs2;
623             }
624             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
625           } else if (kaij->isTI) {
626             for (j=0; j<nz; j++) {
627               ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
628               for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
629               arrt += bs2;
630             }
631             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
632           }
633 
634           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y);
635           for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j);
636         }
637       } else {
638         for (i=m-1; i>=0; i--) {
639           ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
640           v  = aa + diag[i] + 1;
641           vi = aj + diag[i] + 1;
642           nz = ai[i+1] - diag[i] - 1;
643           workt = work;
644           for (j=0; j<nz; j++) {
645             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
646             workt += bs;
647           }
648           arrt = arr;
649           if (T) {
650             for (j=0; j<nz; j++) {
651               ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
652               for (k=0; k<bs2; k++) arrt[k] *= v[j];
653               arrt += bs2;
654             }
655             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
656           } else if (kaij->isTI) {
657             for (j=0; j<nz; j++) {
658               ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
659               for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
660               arrt += bs2;
661             }
662             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
663           }
664           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y);
665           for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j);
666         }
667         idiag -= bs2;
668         i2    -= bs;
669       }
670       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
671     }
672   }
673 
674   ierr = VecRestoreArray(xx,&x);    CHKERRQ(ierr);
675   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
676   PetscFunctionReturn(0);
677 }
678 
679 /*===================================================================================*/
680 
681 PetscErrorCode KAIJMultAdd_MPI(Mat A,Vec xx,Vec yy,Vec zz)
682 {
683   Mat_MPIKAIJ    *b = (Mat_MPIKAIJ*)A->data;
684   PetscErrorCode ierr;
685 
686   PetscFunctionBegin;
687   if (!yy) {
688     ierr = VecSet(zz,0.0);CHKERRQ(ierr);
689   } else {
690     ierr = VecCopy(yy,zz);CHKERRQ(ierr);
691   }
692   /* start the scatter */
693   ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
694   ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,zz,zz);CHKERRQ(ierr);
695   ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
696   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr);
697   PetscFunctionReturn(0);
698 }
699 
700 PetscErrorCode MatMult_MPIKAIJ_dof(Mat A,Vec xx,Vec yy)
701 {
702   PetscErrorCode ierr;
703   PetscFunctionBegin;
704   ierr = KAIJMultAdd_MPI(A,xx,PETSC_NULL,yy);CHKERRQ(ierr);
705   PetscFunctionReturn(0);
706 }
707 
708 PetscErrorCode MatMultAdd_MPIKAIJ_dof(Mat A,Vec xx,Vec yy, Vec zz)
709 {
710   PetscErrorCode ierr;
711   PetscFunctionBegin;
712   ierr = KAIJMultAdd_MPI(A,xx,yy,zz);CHKERRQ(ierr);
713   PetscFunctionReturn(0);
714 }
715 
716 PetscErrorCode MatInvertBlockDiagonal_MPIKAIJ_dof(Mat A,const PetscScalar **values)
717 {
718   Mat_MPIKAIJ     *b = (Mat_MPIKAIJ*)A->data;
719   PetscErrorCode  ierr;
720 
721   PetscFunctionBegin;
722   ierr = (*b->AIJ->ops->invertblockdiagonal)(b->AIJ,values);CHKERRQ(ierr);
723   PetscFunctionReturn(0);
724 }
725 
726 /* ----------------------------------------------------------------*/
727 
728 PetscErrorCode MatGetRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values)
729 {
730   Mat_SeqKAIJ     *b    = (Mat_SeqKAIJ*) A->data;
731   PetscErrorCode  ierr,diag;
732   PetscInt        nzaij,nz,*colsaij,*idx,i,j,p=b->p,q=b->q,r=row/p,s=row%p,c;
733   PetscScalar     *vaij,*v,*S=b->S,*T=b->T;
734 
735   PetscFunctionBegin;
736   if (b->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
737   b->getrowactive = PETSC_TRUE;
738   if (row < 0 || row >= A->rmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range",row);
739 
740   if ((!S) && (!T) && (!b->isTI)) {
741     if (ncols)    *ncols  = 0;
742     if (cols)     *cols   = NULL;
743     if (values)   *values = NULL;
744     PetscFunctionReturn(0);
745   }
746 
747   if (T || b->isTI) {
748     ierr  = MatGetRow_SeqAIJ(b->AIJ,r,&nzaij,&colsaij,&vaij);CHKERRQ(ierr);
749     diag  = PETSC_FALSE;
750     c     = nzaij;
751     for (i=0; i<nzaij; i++) {
752       /* check if this row contains a diagonal entry */
753       if (colsaij[i] == r) {
754         diag = PETSC_TRUE;
755         c = i;
756       }
757     }
758   } else nzaij = c = 0;
759 
760   /* calculate size of row */
761   nz = 0;
762   if (S)            nz += q;
763   if (T || b->isTI) nz += (diag && S ? (nzaij-1)*q : nzaij*q);
764 
765   if (cols || values) {
766     ierr = PetscMalloc2(nz,&idx,nz,&v);CHKERRQ(ierr);
767     if (b->isTI) {
768       for (i=0; i<nzaij; i++) {
769         for (j=0; j<q; j++) {
770           idx[i*q+j] = colsaij[i]*q+j;
771           v[i*q+j]   = (j==s ? vaij[i] : 0);
772         }
773       }
774     } else if (T) {
775       for (i=0; i<nzaij; i++) {
776         for (j=0; j<q; j++) {
777           idx[i*q+j] = colsaij[i]*q+j;
778           v[i*q+j]   = vaij[i]*T[s+j*p];
779         }
780       }
781     }
782     if (S) {
783       for (j=0; j<q; j++) {
784         idx[c*q+j] = r*q+j;
785         v[c*q+j]  += S[s+j*p];
786       }
787     }
788   }
789 
790   if (ncols)    *ncols  = nz;
791   if (cols)     *cols   = idx;
792   if (values)   *values = v;
793 
794   PetscFunctionReturn(0);
795 }
796 
797 PetscErrorCode MatRestoreRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
798 {
799   PetscErrorCode ierr;
800   PetscFunctionBegin;
801   ierr = PetscFree2(*idx,*v);CHKERRQ(ierr);
802   ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE;
803   PetscFunctionReturn(0);
804 }
805 
806 PetscErrorCode MatGetRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values)
807 {
808   Mat_MPIKAIJ     *b      = (Mat_MPIKAIJ*) A->data;
809   Mat             MatAIJ  = ((Mat_SeqKAIJ*)b->AIJ->data)->AIJ;
810   Mat             MatOAIJ = ((Mat_SeqKAIJ*)b->OAIJ->data)->AIJ;
811   Mat             AIJ     = b->A;
812   PetscErrorCode  ierr;
813   const PetscInt  rstart=A->rmap->rstart,rend=A->rmap->rend,p=b->p,q=b->q,*garray;
814   PetscInt        nz,*idx,ncolsaij,ncolsoaij,*colsaij,*colsoaij,r,s,c,i,j,lrow;
815   PetscScalar     *v,*vals,*ovals,*S=b->S,*T=b->T;
816   PetscBool       diag;
817 
818   PetscFunctionBegin;
819   if (b->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
820   b->getrowactive = PETSC_TRUE;
821   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
822   lrow = row - rstart;
823 
824   if ((!S) && (!T) && (!b->isTI)) {
825     if (ncols)    *ncols  = 0;
826     if (cols)     *cols   = NULL;
827     if (values)   *values = NULL;
828     PetscFunctionReturn(0);
829   }
830 
831   r = lrow/p;
832   s = lrow%p;
833 
834   if (T || b->isTI) {
835     ierr = MatMPIAIJGetSeqAIJ(AIJ,NULL,NULL,&garray);
836     ierr = MatGetRow_SeqAIJ(MatAIJ,lrow/p,&ncolsaij,&colsaij,&vals);CHKERRQ(ierr);
837     ierr = MatGetRow_SeqAIJ(MatOAIJ,lrow/p,&ncolsoaij,&colsoaij,&ovals);CHKERRQ(ierr);
838 
839     diag  = PETSC_FALSE;
840     c     = ncolsaij + ncolsoaij;
841     for (i=0; i<ncolsaij; i++) {
842       /* check if this row contains a diagonal entry */
843       if (colsaij[i] == r) {
844         diag = PETSC_TRUE;
845         c = i;
846       }
847     }
848   } else c = 0;
849 
850   /* calculate size of row */
851   nz = 0;
852   if (S)            nz += q;
853   if (T || b->isTI) nz += (diag && S ? (ncolsaij+ncolsoaij-1)*q : (ncolsaij+ncolsoaij)*q);
854 
855   if (cols || values) {
856     ierr = PetscMalloc2(nz,&idx,nz,&v);CHKERRQ(ierr);
857     if (b->isTI) {
858       for (i=0; i<ncolsaij; i++) {
859         for (j=0; j<q; j++) {
860           idx[i*q+j] = (colsaij[i]+rstart/p)*q+j;
861           v[i*q+j]   = (j==s ? vals[i] : 0.0);
862         }
863       }
864       for (i=0; i<ncolsoaij; i++) {
865         for (j=0; j<q; j++) {
866           idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j;
867           v[(i+ncolsaij)*q+j]   = (j==s ? ovals[i]: 0.0);
868         }
869       }
870     } else if (T) {
871       for (i=0; i<ncolsaij; i++) {
872         for (j=0; j<q; j++) {
873           idx[i*q+j] = (colsaij[i]+rstart/p)*q+j;
874           v[i*q+j]   = vals[i]*T[s+j*p];
875         }
876       }
877       for (i=0; i<ncolsoaij; i++) {
878         for (j=0; j<q; j++) {
879           idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j;
880           v[(i+ncolsaij)*q+j]   = ovals[i]*T[s+j*p];
881         }
882       }
883     }
884     if (S) {
885       for (j=0; j<q; j++) {
886         idx[c*q+j] = (r+rstart/p)*q+j;
887         v[c*q+j]  += S[s+j*p];
888       }
889     }
890   }
891 
892   if (ncols)  *ncols  = nz;
893   if (cols)   *cols   = idx;
894   if (values) *values = v;
895   PetscFunctionReturn(0);
896 }
897 
898 PetscErrorCode MatRestoreRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
899 {
900   PetscErrorCode ierr;
901   PetscFunctionBegin;
902   ierr = PetscFree2(*idx,*v);CHKERRQ(ierr);
903   ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE;
904   PetscFunctionReturn(0);
905 }
906 
907 PetscErrorCode  MatCreateSubMatrix_KAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat)
908 {
909   PetscErrorCode ierr;
910   Mat            A;
911 
912   PetscFunctionBegin;
913   ierr = MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr);
914   ierr = MatCreateSubMatrix(A,isrow,iscol,cll,newmat);CHKERRQ(ierr);
915   ierr = MatDestroy(&A);CHKERRQ(ierr);
916   PetscFunctionReturn(0);
917 }
918 
919 /* ---------------------------------------------------------------------------------- */
920 /*@C
921   MatCreateKAIJ - Creates a matrix type to be used for matrices of the following form:
922 
923     [I \otimes S + A \otimes T]
924 
925   where
926     S is a dense (p \times q) matrix
927     T is a dense (p \times q) matrix
928     A is an AIJ  (n \times n) matrix
929     I is the identity matrix
930   The resulting matrix is (np \times nq)
931 
932   The matrix type is based on MATSEQAIJ for a sequential matrix A, and MATMPIAIJ for a distributed matrix A.
933   S is always stored independently on all processes as a PetscScalar array in column-major format.
934 
935   Collective
936 
937   Input Parameters:
938 + A - the AIJ matrix
939 . S - the S matrix, stored as a PetscScalar array (column-major)
940 . T - the T matrix, stored as a PetscScalar array (column-major)
941 . p - number of rows in S and T
942 - q - number of columns in S and T
943 
944   Output Parameter:
945 . kaij - the new KAIJ matrix
946 
947   Operations provided:
948 + MatMult
949 . MatMultAdd
950 . MatInvertBlockDiagonal
951 - MatView
952 
953   Level: advanced
954 
955 .seealso: MatKAIJGetAIJ(), MATKAIJ
956 @*/
957 PetscErrorCode  MatCreateKAIJ(Mat A,PetscInt p,PetscInt q,const PetscScalar S[],const PetscScalar T[],Mat *kaij)
958 {
959   PetscErrorCode ierr;
960   PetscMPIInt    size;
961   PetscInt       n,i,j;
962   Mat            B;
963   PetscBool      isTI = PETSC_FALSE;
964 
965   PetscFunctionBegin;
966 
967   /* check if T is an identity matrix */
968   if (T && (p == q)) {
969     isTI = PETSC_TRUE;
970     for (i=0; i<p; i++) {
971       for (j=0; j<q; j++) {
972         if (i == j) {
973           /* diagonal term must be 1 */
974           if (T[i+j*p] != 1.0) isTI = PETSC_FALSE;
975         } else {
976           /* off-diagonal term must be 0 */
977           if (T[i+j*p] != 0.0) isTI = PETSC_FALSE;
978         }
979       }
980     }
981   }
982 
983   ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
984 
985   ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
986   ierr = MatSetSizes(B,p*A->rmap->n,q*A->cmap->n,p*A->rmap->N,q*A->cmap->N);CHKERRQ(ierr);
987   ierr = PetscLayoutSetBlockSize(B->rmap,p);CHKERRQ(ierr);
988   ierr = PetscLayoutSetBlockSize(B->cmap,q);CHKERRQ(ierr);
989   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
990   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
991 
992   B->assembled = PETSC_TRUE;
993   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
994 
995   if (size == 1) {
996     Mat_SeqKAIJ *b;
997 
998     ierr = MatSetType(B,MATSEQKAIJ);CHKERRQ(ierr);
999 
1000     B->ops->setup   = NULL;
1001     B->ops->destroy = MatDestroy_SeqKAIJ;
1002     B->ops->view    = MatView_SeqKAIJ;
1003     b               = (Mat_SeqKAIJ*)B->data;
1004     b->p            = p;
1005     b->q            = q;
1006     b->AIJ          = A;
1007     b->isTI         = isTI;
1008     if (S) {
1009       ierr = PetscMalloc(p*q*sizeof(PetscScalar),&b->S);CHKERRQ(ierr);
1010       ierr = PetscMemcpy(b->S,S,p*q*sizeof(PetscScalar));CHKERRQ(ierr);
1011     } else  b->S = NULL;
1012     if (T && (!isTI)) {
1013       ierr = PetscMalloc(p*q*sizeof(PetscScalar),&b->T);CHKERRQ(ierr);
1014       ierr = PetscMemcpy(b->T,T,p*q*sizeof(PetscScalar));CHKERRQ(ierr);
1015     } else b->T = NULL;
1016 
1017     B->ops->mult                = MatMult_SeqKAIJ_N;
1018     B->ops->multadd             = MatMultAdd_SeqKAIJ_N;
1019     B->ops->invertblockdiagonal = MatInvertBlockDiagonal_SeqKAIJ_N;
1020     B->ops->getrow              = MatGetRow_SeqKAIJ;
1021     B->ops->restorerow          = MatRestoreRow_SeqKAIJ;
1022     B->ops->sor                 = MatSOR_SeqKAIJ;
1023   } else {
1024     Mat_MPIAIJ  *mpiaij = (Mat_MPIAIJ*)A->data;
1025     Mat_MPIKAIJ *b;
1026     IS          from,to;
1027     Vec         gvec;
1028 
1029     ierr = MatSetType(B,MATMPIKAIJ);CHKERRQ(ierr);
1030 
1031     B->ops->setup   = NULL;
1032     B->ops->destroy = MatDestroy_MPIKAIJ;
1033     B->ops->view    = MatView_MPIKAIJ;
1034 
1035     b       = (Mat_MPIKAIJ*)B->data;
1036     b->p    = p;
1037     b->q    = q;
1038     b->A    = A;
1039     b->isTI = isTI;
1040     if (S) {
1041       ierr = PetscMalloc(p*q*sizeof(PetscScalar),&b->S);CHKERRQ(ierr);
1042       ierr = PetscMemcpy(b->S,S,p*q*sizeof(PetscScalar));CHKERRQ(ierr);
1043     } else  b->S = NULL;
1044     if (T &&(!isTI)) {
1045       ierr = PetscMalloc(p*q*sizeof(PetscScalar),&b->T);CHKERRQ(ierr);
1046       ierr = PetscMemcpy(b->T,T,p*q*sizeof(PetscScalar));CHKERRQ(ierr);
1047     } else b->T = NULL;
1048 
1049     ierr = MatCreateKAIJ(mpiaij->A,p,q,S   ,T,&b->AIJ);CHKERRQ(ierr);
1050     ierr = MatCreateKAIJ(mpiaij->B,p,q,NULL,T,&b->OAIJ);CHKERRQ(ierr);
1051 
1052     ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr);
1053     ierr = VecCreate(PETSC_COMM_SELF,&b->w);CHKERRQ(ierr);
1054     ierr = VecSetSizes(b->w,n*q,n*q);CHKERRQ(ierr);
1055     ierr = VecSetBlockSize(b->w,q);CHKERRQ(ierr);
1056     ierr = VecSetType(b->w,VECSEQ);CHKERRQ(ierr);
1057 
1058     /* create two temporary Index sets for build scatter gather */
1059     ierr = ISCreateBlock(PetscObjectComm((PetscObject)A),q,n,mpiaij->garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr);
1060     ierr = ISCreateStride(PETSC_COMM_SELF,n*q,0,1,&to);CHKERRQ(ierr);
1061 
1062     /* create temporary global vector to generate scatter context */
1063     ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),q,q*A->cmap->n,q*A->cmap->N,NULL,&gvec);CHKERRQ(ierr);
1064 
1065     /* generate the scatter context */
1066     ierr = VecScatterCreateWithData(gvec,from,b->w,to,&b->ctx);CHKERRQ(ierr);
1067 
1068     ierr = ISDestroy(&from);CHKERRQ(ierr);
1069     ierr = ISDestroy(&to);CHKERRQ(ierr);
1070     ierr = VecDestroy(&gvec);CHKERRQ(ierr);
1071 
1072     B->ops->mult                = MatMult_MPIKAIJ_dof;
1073     B->ops->multadd             = MatMultAdd_MPIKAIJ_dof;
1074     B->ops->invertblockdiagonal = MatInvertBlockDiagonal_MPIKAIJ_dof;
1075     B->ops->getrow              = MatGetRow_MPIKAIJ;
1076     B->ops->restorerow          = MatRestoreRow_MPIKAIJ;
1077     ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIKAIJ);CHKERRQ(ierr);
1078   }
1079   B->ops->createsubmatrix = MatCreateSubMatrix_KAIJ;
1080   B->assembled = PETSC_TRUE;
1081   ierr  = MatSetUp(B);CHKERRQ(ierr);
1082   *kaij = B;
1083   ierr  = MatViewFromOptions(B,NULL,"-mat_view");CHKERRQ(ierr);
1084 
1085   PetscFunctionReturn(0);
1086 }
1087