xref: /petsc/src/mat/impls/kaij/kaij.c (revision b5a089c7208fc0d2d664fc82f0d8188edb3b6a89)
1 
2 /*
3   Defines the basic matrix operations for the KAIJ  matrix storage format.
4   This format is used to evaluate matrices of the form:
5 
6     [I \otimes S + A \otimes T]
7 
8   where
9     S is a dense (p \times q) matrix
10     T is a dense (p \times q) matrix
11     A is an AIJ  (n \times n) matrix
12     I is the identity matrix
13 
14   The resulting matrix is (np \times nq)
15 
16   We provide:
17      MatMult()
18      MatMultAdd()
19      MatInvertBlockDiagonal()
20   and
21      MatCreateKAIJ(Mat,PetscInt,PetscInt,const PetscScalar[],const PetscScalar[],Mat*)
22 
23   This single directory handles both the sequential and parallel codes
24 */
25 
26 #include <../src/mat/impls/kaij/kaij.h> /*I "petscmat.h" I*/
27 #include <../src/mat/utils/freespace.h>
28 #include <petsc/private/vecimpl.h>
29 
30 /*@C
31    MatKAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the KAIJ matrix
32 
33    Not Collective, but if the KAIJ matrix is parallel, the AIJ matrix is also parallel
34 
35    Input Parameter:
36 .  A - the KAIJ matrix
37 
38    Output Parameter:
39 .  B - the AIJ matrix
40 
41    Level: advanced
42 
43    Notes: The reference count on the AIJ matrix is not increased so you should not destroy it.
44 
45 .seealso: MatCreateKAIJ()
46 @*/
47 PetscErrorCode  MatKAIJGetAIJ(Mat A,Mat *B)
48 {
49   PetscErrorCode ierr;
50   PetscBool      ismpikaij,isseqkaij;
51 
52   PetscFunctionBegin;
53   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIKAIJ,&ismpikaij);CHKERRQ(ierr);
54   ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQKAIJ,&isseqkaij);CHKERRQ(ierr);
55   if (ismpikaij) {
56     Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data;
57 
58     *B = b->A;
59   } else if (isseqkaij) {
60     Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
61 
62     *B = b->AIJ;
63   } else {
64     *B = A;
65   }
66   PetscFunctionReturn(0);
67 }
68 
69 /*@C
70    MatKAIJGetS - Get the S matrix describing the shift action of the KAIJ matrix
71 
72    Not Collective, the entire S is stored and returned independently on all processes
73 
74    Input Parameter:
75 .  A - the KAIJ matrix
76 
77    Output Parameter:
78 .  S - the S matrix, in form of a scalar array in column-major format
79 
80    Level: advanced
81 
82    Notes: The reference count on the S matrix is not increased so you should not destroy it.
83 
84 .seealso: MatCreateKAIJ()
85 @*/
86 PetscErrorCode  MatKAIJGetS(Mat A,const PetscScalar **S)
87 {
88   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
89   PetscFunctionBegin;
90   *S = b->S;
91   PetscFunctionReturn(0);
92 }
93 
94 /*@C
95    MatKAIJGetT - Get the T matrix describing the shift action of the KAIJ matrix
96 
97    Not Collective, the entire T is stored and returned independently on all processes
98 
99    Input Parameter:
100 .  A - the KAIJ matrix
101 
102    Output Parameter:
103 .  T - the T matrix, in form of a scalar array in column-major format
104 
105    Level: advanced
106 
107    Notes: The reference count on the T matrix is not increased so you should not destroy it.
108 
109 .seealso: MatCreateKAIJ()
110 @*/
111 PetscErrorCode  MatKAIJGetT(Mat A,const PetscScalar **T)
112 {
113   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
114   PetscFunctionBegin;
115   *T = b->T;
116   PetscFunctionReturn(0);
117 }
118 
119 PetscErrorCode MatDestroy_SeqKAIJ(Mat A)
120 {
121   PetscErrorCode ierr;
122   Mat_SeqKAIJ    *b = (Mat_SeqKAIJ*)A->data;
123 
124   PetscFunctionBegin;
125   ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr);
126   ierr = PetscFree3(b->S,b->T,b->ibdiag);CHKERRQ(ierr);
127   ierr = PetscFree5(b->sor.w,b->sor.y,b->sor.work,b->sor.t,b->sor.arr);CHKERRQ(ierr);
128   ierr = PetscFree(A->data);CHKERRQ(ierr);
129   PetscFunctionReturn(0);
130 }
131 
132 PetscErrorCode MatSetUp_KAIJ(Mat A)
133 {
134   PetscFunctionBegin;
135   SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Must use MatCreateKAIJ() to create KAIJ matrices");
136   PetscFunctionReturn(0);
137 }
138 
139 PetscErrorCode MatView_SeqKAIJ(Mat A,PetscViewer viewer)
140 {
141   PetscErrorCode ierr;
142   Mat            B;
143 
144   PetscFunctionBegin;
145   ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
146   ierr = MatView(B,viewer);CHKERRQ(ierr);
147   ierr = MatDestroy(&B);CHKERRQ(ierr);
148   PetscFunctionReturn(0);
149 }
150 
151 PetscErrorCode MatView_MPIKAIJ(Mat A,PetscViewer viewer)
152 {
153   PetscErrorCode ierr;
154   Mat            B;
155 
156   PetscFunctionBegin;
157   ierr = MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
158   ierr = MatView(B,viewer);CHKERRQ(ierr);
159   ierr = MatDestroy(&B);CHKERRQ(ierr);
160   PetscFunctionReturn(0);
161 }
162 
163 PetscErrorCode MatDestroy_MPIKAIJ(Mat A)
164 {
165   PetscErrorCode ierr;
166   Mat_MPIKAIJ    *b = (Mat_MPIKAIJ*)A->data;
167 
168   PetscFunctionBegin;
169   ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr);
170   ierr = MatDestroy(&b->OAIJ);CHKERRQ(ierr);
171   ierr = MatDestroy(&b->A);CHKERRQ(ierr);
172   ierr = VecScatterDestroy(&b->ctx);CHKERRQ(ierr);
173   ierr = VecDestroy(&b->w);CHKERRQ(ierr);
174   ierr = PetscFree3(b->S,b->T,b->ibdiag);CHKERRQ(ierr);
175   ierr = PetscFree(A->data);CHKERRQ(ierr);
176 
177   PetscFunctionReturn(0);
178 }
179 
180 /*MC
181   MATKAIJ - MATKAIJ = "kaij" - A matrix type to be used to evaluate matrices of the following form:
182 
183     [I \otimes S + A \otimes T]
184 
185   where
186     S is a dense (p \times q) matrix
187     T is a dense (p \times q) matrix
188     A is an AIJ  (n \times n) matrix
189     I is the identity matrix
190   The resulting matrix is (np \times nq)
191 
192   The matrix type is based on MATSEQAIJ for a sequential matrix A, and MATMPIAIJ for a distributed matrix A.
193   S and T are always stored independently on all processes as a PetscScalar array in column-major format.
194 
195   Operations provided:
196 . MatMult
197 . MatMultAdd
198 . MatInvertBlockDiagonal
199 
200   Level: advanced
201 
202 .seealso: MatKAIJGetAIJ(), MatKAIJGetS(), MatKAIJGetT(), MatCreateKAIJ()
203 M*/
204 
205 PETSC_EXTERN PetscErrorCode MatCreate_KAIJ(Mat A)
206 {
207   PetscErrorCode ierr;
208   Mat_MPIKAIJ    *b;
209   PetscMPIInt    size;
210 
211   PetscFunctionBegin;
212   ierr     = PetscNewLog(A,&b);CHKERRQ(ierr);
213   A->data  = (void*)b;
214 
215   ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
216 
217   A->ops->setup = MatSetUp_KAIJ;
218 
219   b->w    = 0;
220   ierr    = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
221   if (size == 1) {
222     ierr = PetscObjectChangeTypeName((PetscObject)A,MATSEQKAIJ);CHKERRQ(ierr);
223   } else {
224     ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIKAIJ);CHKERRQ(ierr);
225   }
226   PetscFunctionReturn(0);
227 }
228 
229 /* --------------------------------------------------------------------------------------*/
230 
231 /* zz = yy + Axx */
232 PetscErrorCode KAIJMultAdd_Seq(Mat A,Vec xx,Vec yy,Vec zz)
233 {
234   Mat_SeqKAIJ       *b = (Mat_SeqKAIJ*)A->data;
235   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
236   const PetscScalar *s = b->S, *t = b->T;
237   const PetscScalar *x,*v,*bx;
238   PetscScalar       *y,*sums;
239   PetscErrorCode    ierr;
240   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
241   PetscInt          n,i,jrow,j,l,p=b->p,q=b->q,k;
242 
243   PetscFunctionBegin;
244 
245   if (!yy) {
246     ierr = VecSet(zz,0.0);CHKERRQ(ierr);
247   } else {
248     ierr = VecCopy(yy,zz);CHKERRQ(ierr);
249   }
250   if ((!s) && (!t) && (!b->isTI)) PetscFunctionReturn(0);
251 
252   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
253   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
254   idx  = a->j;
255   v    = a->a;
256   ii   = a->i;
257 
258   if (b->isTI) {
259     for (i=0; i<m; i++) {
260       jrow = ii[i];
261       n    = ii[i+1] - jrow;
262       sums = y + p*i;
263       for (j=0; j<n; j++) {
264         for (k=0; k<p; k++) {
265           sums[k] += v[jrow+j]*x[q*idx[jrow+j]+k];
266         }
267       }
268     }
269   } else if (t) {
270     for (i=0; i<m; i++) {
271       jrow = ii[i];
272       n    = ii[i+1] - jrow;
273       sums = y + p*i;
274       bx   = x + q*i;
275       for (j=0; j<n; j++) {
276         for (k=0; k<p; k++) {
277           for (l=0; l<q; l++) {
278             sums[k] += v[jrow+j]*t[k+l*p]*x[q*idx[jrow+j]+l];
279           }
280         }
281       }
282     }
283   }
284   if (s) {
285     for (i=0; i<m; i++) {
286       jrow = ii[i];
287       n    = ii[i+1] - jrow;
288       sums = y + p*i;
289       bx   = x + q*i;
290       if (i < b->AIJ->cmap->n) {
291         for (j=0; j<q; j++) {
292           for (k=0; k<p; k++) {
293             sums[k] += s[k+j*p]*bx[j];
294           }
295         }
296       }
297     }
298   }
299 
300   ierr = PetscLogFlops((2.0*p*q-p)*m+2*p*a->nz);CHKERRQ(ierr);
301   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
302   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
303   PetscFunctionReturn(0);
304 }
305 
306 PetscErrorCode MatMult_SeqKAIJ_N(Mat A,Vec xx,Vec yy)
307 {
308   PetscErrorCode ierr;
309   PetscFunctionBegin;
310   ierr = KAIJMultAdd_Seq(A,xx,PETSC_NULL,yy);CHKERRQ(ierr);
311   PetscFunctionReturn(0);
312 }
313 
314 PetscErrorCode MatMultAdd_SeqKAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
315 {
316   PetscErrorCode ierr;
317   PetscFunctionBegin;
318   ierr = KAIJMultAdd_Seq(A,xx,yy,zz);CHKERRQ(ierr);
319   PetscFunctionReturn(0);
320 }
321 
322 #include <petsc/private/kernels/blockinvert.h>
323 
324 PetscErrorCode MatInvertBlockDiagonal_SeqKAIJ_N(Mat A,const PetscScalar **values)
325 {
326   Mat_SeqKAIJ       *b  = (Mat_SeqKAIJ*)A->data;
327   Mat_SeqAIJ        *a  = (Mat_SeqAIJ*)b->AIJ->data;
328   const PetscScalar *S  = b->S;
329   const PetscScalar *T  = b->T;
330   const PetscScalar *v  = a->a;
331   const PetscInt     p  = b->p, q = b->q, m = b->AIJ->rmap->n, *idx = a->j, *ii = a->i;
332   PetscErrorCode    ierr;
333   PetscInt          i,j,*v_pivots,dof,dof2;
334   PetscScalar       *diag,aval,*v_work;
335 
336   PetscFunctionBegin;
337   if (p != q) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Block size must be square to calculate inverse.");
338   if ((!S) && (!T) && (!b->isTI)) {
339     SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Cannot invert a zero matrix.");
340   }
341 
342   dof  = p;
343   dof2 = dof*dof;
344 
345   if (b->ibdiagvalid) {
346     if (values) *values = b->ibdiag;
347     PetscFunctionReturn(0);
348   }
349   if (!b->ibdiag) {
350     ierr = PetscMalloc(dof2*m*sizeof(PetscScalar),&b->ibdiag);CHKERRQ(ierr);
351     ierr = PetscLogObjectMemory((PetscObject)A,dof2*m*sizeof(PetscScalar));CHKERRQ(ierr);
352   }
353   if (values) *values = b->ibdiag;
354   diag = b->ibdiag;
355 
356   ierr = PetscMalloc2(dof,&v_work,dof,&v_pivots);CHKERRQ(ierr);
357   for (i=0; i<m; i++) {
358     if (S) {
359       ierr = PetscMemcpy(diag,S,dof2*sizeof(PetscScalar));CHKERRQ(ierr);
360     } else {
361       ierr = PetscMemzero(diag,dof2*sizeof(PetscScalar));CHKERRQ(ierr);
362     }
363     if (b->isTI) {
364       aval = 0;
365       for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j];
366       for (j=0; j<dof; j++) diag[j+dof*j] += aval;
367     } else if (T) {
368       aval = 0;
369       for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j];
370       for (j=0; j<dof2; j++) diag[j] += aval*T[j];
371     }
372     ierr = PetscKernel_A_gets_inverse_A(dof,diag,v_pivots,v_work,PETSC_FALSE,NULL);CHKERRQ(ierr);
373     diag += dof2;
374   }
375   ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr);
376 
377   b->ibdiagvalid = PETSC_TRUE;
378   PetscFunctionReturn(0);
379 }
380 
381 static PetscErrorCode MatGetDiagonalBlock_MPIKAIJ(Mat A,Mat *B)
382 {
383   Mat_MPIKAIJ *kaij = (Mat_MPIKAIJ*) A->data;
384 
385   PetscFunctionBegin;
386   *B = kaij->AIJ;
387   PetscFunctionReturn(0);
388 }
389 
390 PetscErrorCode MatSOR_SeqKAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
391 {
392   PetscErrorCode    ierr;
393   Mat_SeqKAIJ       *kaij = (Mat_SeqKAIJ*) A->data;
394   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)kaij->AIJ->data;
395   const PetscScalar *aa = a->a, *T = kaij->T, *v;
396   const PetscInt    m  = kaij->AIJ->rmap->n, *ai=a->i, *aj=a->j, p = kaij->p, q = kaij->q, *diag, *vi;
397   const PetscScalar *b, *xb, *idiag;
398   PetscScalar       *x, *work, *workt, *w, *y, *arr, *t, *arrt;
399   PetscInt          i, j, k, i2, bs, bs2, nz;
400 
401 
402   PetscFunctionBegin;
403   its = its*lits;
404   if (flag & SOR_EISENSTAT) SETERRQ (PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
405   if (its <= 0)             SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
406   if (fshift)               SETERRQ (PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
407   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER))
408     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
409   if (p != q) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSOR for KAIJ: Sorry, no support for non-square dense blocks");
410   else        {bs = p; bs2 = bs*bs; }
411 
412   if (!m) PetscFunctionReturn(0);
413 
414   if (!kaij->ibdiagvalid) { ierr = MatInvertBlockDiagonal_SeqKAIJ_N(A,NULL);CHKERRQ(ierr); }
415   idiag = kaij->ibdiag;
416   diag  = a->diag;
417 
418   if (!kaij->sor.setup) {
419     ierr = PetscMalloc5(bs,&kaij->sor.w,bs,&kaij->sor.y,m*bs,&kaij->sor.work,m*bs,&kaij->sor.t,m*bs2,&kaij->sor.arr);CHKERRQ(ierr);
420     kaij->sor.setup = PETSC_TRUE;
421   }
422   y     = kaij->sor.y;
423   w     = kaij->sor.w;
424   work  = kaij->sor.work;
425   t     = kaij->sor.t;
426   arr   = kaij->sor.arr;
427 
428   ierr = VecGetArray(xx,&x);    CHKERRQ(ierr);
429   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
430 
431   if (flag & SOR_ZERO_INITIAL_GUESS) {
432     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
433       PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);                            /* x[0:bs] <- D^{-1} b[0:bs] */
434       ierr   =  PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr);
435       i2     =  bs;
436       idiag  += bs2;
437       for (i=1; i<m; i++) {
438         v  = aa + ai[i];
439         vi = aj + ai[i];
440         nz = diag[i] - ai[i];
441 
442         if (T) {                /* b - T (Arow * x) */
443           for (k=0; k<bs; k++) w[k] = 0;
444           for (j=0; j<nz; j++) {
445             for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k];
446           }
447           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs,w,T,&t[i2]);
448           for (k=0; k<bs; k++) t[i2+k] += b[i2+k];
449         } else if (kaij->isTI) {
450           for (k=0; k<bs; k++) t[i2+k] = b[i2+k];
451           for (j=0; j<nz; j++) {
452             for (k=0; k<bs; k++) t[i2+k] -= v[j] * x[vi[j]*bs+k];
453           }
454         } else {
455           for (k=0; k<bs; k++) t[i2+k] = b[i2+k];
456         }
457 
458         PetscKernel_w_gets_Ar_times_v(bs,bs,t+i2,idiag,y);
459         for (j=0; j<bs; j++) x[i2+j] = omega * y[j];
460 
461         idiag += bs2;
462         i2    += bs;
463       }
464       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
465       ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr);
466       xb = t;
467     } else xb = b;
468     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
469       idiag = kaij->ibdiag+bs2*(m-1);
470       i2    = bs * (m-1);
471       ierr  = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
472       PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
473       i2    -= bs;
474       idiag -= bs2;
475       for (i=m-2; i>=0; i--) {
476         v  = aa + diag[i] + 1 ;
477         vi = aj + diag[i] + 1;
478         nz = ai[i+1] - diag[i] - 1;
479 
480         if (T) {                /* FIXME: This branch untested */
481           ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
482           /* copy all rows of x that are needed into contiguous space */
483           workt = work;
484           for (j=0; j<nz; j++) {
485             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
486             workt += bs;
487           }
488           arrt = arr;
489           for (j=0; j<nz; j++) {
490             ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
491             for (k=0; k<bs2; k++) arrt[k] *= v[j];
492             arrt += bs2;
493           }
494           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
495         } else if (kaij->isTI) {
496           for (k=0; k<bs; k++) w[k] = t[i2+k];
497           for (j=0; j<nz; j++) {
498             for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k];
499           }
500         }
501 
502         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); /* RHS incorrect for omega != 1.0 */
503         for (j=0; j<bs; j++) x[i2+j] = (1.0-omega) * x[i2+j] + omega * y[j];
504 
505         idiag -= bs2;
506         i2    -= bs;
507       }
508       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
509     }
510     its--;
511   }
512   while (its--) {               /* FIXME: This branch not updated */
513     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
514       i2     =  0;
515       idiag  = kaij->ibdiag;
516       for (i=0; i<m; i++) {
517         ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
518 
519         v  = aa + ai[i];
520         vi = aj + ai[i];
521         nz = diag[i] - ai[i];
522         workt = work;
523         for (j=0; j<nz; j++) {
524           ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
525           workt += bs;
526         }
527         arrt = arr;
528         if (T) {
529           for (j=0; j<nz; j++) {
530             ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
531             for (k=0; k<bs2; k++) arrt[k] *= v[j];
532             arrt += bs2;
533           }
534           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
535         } else if (kaij->isTI) {
536           for (j=0; j<nz; j++) {
537             ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
538             for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
539             arrt += bs2;
540           }
541           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
542         }
543         ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr);
544 
545         v  = aa + diag[i] + 1;
546         vi = aj + diag[i] + 1;
547         nz = ai[i+1] - diag[i] - 1;
548         workt = work;
549         for (j=0; j<nz; j++) {
550           ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
551           workt += bs;
552         }
553         arrt = arr;
554         if (T) {
555           for (j=0; j<nz; j++) {
556             ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
557             for (k=0; k<bs2; k++) arrt[k] *= v[j];
558             arrt += bs2;
559           }
560           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
561         } else if (kaij->isTI) {
562           for (j=0; j<nz; j++) {
563             ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
564             for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
565             arrt += bs2;
566           }
567           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
568         }
569 
570         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y);
571         for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j);
572 
573         idiag += bs2;
574         i2    += bs;
575       }
576       xb = t;
577     }
578     else xb = b;
579     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
580       idiag = kaij->ibdiag+bs2*(m-1);
581       i2    = bs * (m-1);
582       if (xb == b) {
583         for (i=m-1; i>=0; i--) {
584           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
585 
586           v  = aa + ai[i];
587           vi = aj + ai[i];
588           nz = diag[i] - ai[i];
589           workt = work;
590           for (j=0; j<nz; j++) {
591             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
592             workt += bs;
593           }
594           arrt = arr;
595           if (T) {
596             for (j=0; j<nz; j++) {
597               ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
598               for (k=0; k<bs2; k++) arrt[k] *= v[j];
599               arrt += bs2;
600             }
601             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
602           } else if (kaij->isTI) {
603             for (j=0; j<nz; j++) {
604               ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
605               for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
606               arrt += bs2;
607             }
608             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
609           }
610 
611           v  = aa + diag[i] + 1;
612           vi = aj + diag[i] + 1;
613           nz = ai[i+1] - diag[i] - 1;
614           workt = work;
615           for (j=0; j<nz; j++) {
616             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
617             workt += bs;
618           }
619           arrt = arr;
620           if (T) {
621             for (j=0; j<nz; j++) {
622               ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
623               for (k=0; k<bs2; k++) arrt[k] *= v[j];
624               arrt += bs2;
625             }
626             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
627           } else if (kaij->isTI) {
628             for (j=0; j<nz; j++) {
629               ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
630               for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
631               arrt += bs2;
632             }
633             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
634           }
635 
636           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y);
637           for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j);
638         }
639       } else {
640         for (i=m-1; i>=0; i--) {
641           ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
642           v  = aa + diag[i] + 1;
643           vi = aj + diag[i] + 1;
644           nz = ai[i+1] - diag[i] - 1;
645           workt = work;
646           for (j=0; j<nz; j++) {
647             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
648             workt += bs;
649           }
650           arrt = arr;
651           if (T) {
652             for (j=0; j<nz; j++) {
653               ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
654               for (k=0; k<bs2; k++) arrt[k] *= v[j];
655               arrt += bs2;
656             }
657             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
658           } else if (kaij->isTI) {
659             for (j=0; j<nz; j++) {
660               ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
661               for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
662               arrt += bs2;
663             }
664             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
665           }
666           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y);
667           for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j);
668         }
669         idiag -= bs2;
670         i2    -= bs;
671       }
672       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
673     }
674   }
675 
676   ierr = VecRestoreArray(xx,&x);    CHKERRQ(ierr);
677   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
678 
679   PetscFunctionReturn(0);
680 }
681 
682 /*===================================================================================*/
683 
684 PetscErrorCode KAIJMultAdd_MPI(Mat A,Vec xx,Vec yy,Vec zz)
685 {
686   Mat_MPIKAIJ    *b = (Mat_MPIKAIJ*)A->data;
687   PetscErrorCode ierr;
688 
689   PetscFunctionBegin;
690   if (!yy) {
691     ierr = VecSet(zz,0.0);CHKERRQ(ierr);
692   } else {
693     ierr = VecCopy(yy,zz);CHKERRQ(ierr);
694   }
695   /* start the scatter */
696   ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
697   ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,zz,zz);CHKERRQ(ierr);
698   ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
699   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr);
700   PetscFunctionReturn(0);
701 }
702 
703 PetscErrorCode MatMult_MPIKAIJ_dof(Mat A,Vec xx,Vec yy)
704 {
705   PetscErrorCode ierr;
706   PetscFunctionBegin;
707   ierr = KAIJMultAdd_MPI(A,xx,PETSC_NULL,yy);CHKERRQ(ierr);
708   PetscFunctionReturn(0);
709 }
710 
711 PetscErrorCode MatMultAdd_MPIKAIJ_dof(Mat A,Vec xx,Vec yy, Vec zz)
712 {
713   PetscErrorCode ierr;
714   PetscFunctionBegin;
715   ierr = KAIJMultAdd_MPI(A,xx,yy,zz);CHKERRQ(ierr);
716   PetscFunctionReturn(0);
717 }
718 
719 PetscErrorCode MatInvertBlockDiagonal_MPIKAIJ_dof(Mat A,const PetscScalar **values)
720 {
721   Mat_MPIKAIJ     *b = (Mat_MPIKAIJ*)A->data;
722   PetscErrorCode  ierr;
723 
724   PetscFunctionBegin;
725   ierr = (*b->AIJ->ops->invertblockdiagonal)(b->AIJ,values);CHKERRQ(ierr);
726   PetscFunctionReturn(0);
727 }
728 
729 /* ----------------------------------------------------------------*/
730 
731 PetscErrorCode MatGetRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values)
732 {
733   Mat_SeqKAIJ     *b    = (Mat_SeqKAIJ*) A->data;
734   PetscErrorCode  ierr,diag;
735   PetscInt        nzaij,nz,*colsaij,*idx,i,j,p=b->p,q=b->q,r=row/p,s=row%p,c;
736   PetscScalar     *vaij,*v,*S=b->S,*T=b->T;
737 
738   PetscFunctionBegin;
739   if (b->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
740   b->getrowactive = PETSC_TRUE;
741   if (row < 0 || row >= A->rmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range",row);
742 
743   if ((!S) && (!T) && (!b->isTI)) {
744     if (ncols)    *ncols  = 0;
745     if (cols)     *cols   = NULL;
746     if (values)   *values = NULL;
747     PetscFunctionReturn(0);
748   }
749 
750   if (T || b->isTI) {
751     ierr  = MatGetRow_SeqAIJ(b->AIJ,r,&nzaij,&colsaij,&vaij);CHKERRQ(ierr);
752     diag  = PETSC_FALSE;
753     c     = nzaij;
754     for (i=0; i<nzaij; i++) {
755       /* check if this row contains a diagonal entry */
756       if (colsaij[i] == r) {
757         diag = PETSC_TRUE;
758         c = i;
759       }
760     }
761   } else nzaij = c = 0;
762 
763   /* calculate size of row */
764   nz = 0;
765   if (S)            nz += q;
766   if (T || b->isTI) nz += (diag && S ? (nzaij-1)*q : nzaij*q);
767 
768   if (cols || values) {
769     ierr = PetscMalloc2(nz,&idx,nz,&v);CHKERRQ(ierr);
770     if (b->isTI) {
771       for (i=0; i<nzaij; i++) {
772         for (j=0; j<q; j++) {
773           idx[i*q+j] = colsaij[i]*q+j;
774           v[i*q+j]   = (j==s ? vaij[i] : 0);
775         }
776       }
777     } else if (T) {
778       for (i=0; i<nzaij; i++) {
779         for (j=0; j<q; j++) {
780           idx[i*q+j] = colsaij[i]*q+j;
781           v[i*q+j]   = vaij[i]*T[s+j*p];
782         }
783       }
784     }
785     if (S) {
786       for (j=0; j<q; j++) {
787         idx[c*q+j] = r*q+j;
788         v[c*q+j]  += S[s+j*p];
789       }
790     }
791   }
792 
793   if (ncols)    *ncols  = nz;
794   if (cols)     *cols   = idx;
795   if (values)   *values = v;
796 
797   PetscFunctionReturn(0);
798 }
799 
800 PetscErrorCode MatRestoreRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
801 {
802   PetscErrorCode ierr;
803   PetscFunctionBegin;
804   ierr = PetscFree2(*idx,*v);CHKERRQ(ierr);
805   ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE;
806   PetscFunctionReturn(0);
807 }
808 
809 PetscErrorCode MatGetRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values)
810 {
811   Mat_MPIKAIJ     *b      = (Mat_MPIKAIJ*) A->data;
812   Mat             MatAIJ  = ((Mat_SeqKAIJ*)b->AIJ->data)->AIJ;
813   Mat             MatOAIJ = ((Mat_SeqKAIJ*)b->OAIJ->data)->AIJ;
814   Mat             AIJ     = b->A;
815   PetscErrorCode  ierr;
816   const PetscInt  rstart=A->rmap->rstart,rend=A->rmap->rend,p=b->p,q=b->q,*garray;
817   PetscInt        nz,*idx,ncolsaij,ncolsoaij,*colsaij,*colsoaij,r,s,c,i,j,lrow;
818   PetscScalar     *v,*vals,*ovals,*S=b->S,*T=b->T;
819   PetscBool       diag;
820 
821   PetscFunctionBegin;
822   if (b->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
823   b->getrowactive = PETSC_TRUE;
824   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
825   lrow = row - rstart;
826 
827   if ((!S) && (!T) && (!b->isTI)) {
828     if (ncols)    *ncols  = 0;
829     if (cols)     *cols   = NULL;
830     if (values)   *values = NULL;
831     PetscFunctionReturn(0);
832   }
833 
834   r = lrow/p;
835   s = lrow%p;
836 
837   if (T || b->isTI) {
838     ierr = MatMPIAIJGetSeqAIJ(AIJ,NULL,NULL,&garray);
839     ierr = MatGetRow_SeqAIJ(MatAIJ,lrow/p,&ncolsaij,&colsaij,&vals);CHKERRQ(ierr);
840     ierr = MatGetRow_SeqAIJ(MatOAIJ,lrow/p,&ncolsoaij,&colsoaij,&ovals);CHKERRQ(ierr);
841 
842     diag  = PETSC_FALSE;
843     c     = ncolsaij + ncolsoaij;
844     for (i=0; i<ncolsaij; i++) {
845       /* check if this row contains a diagonal entry */
846       if (colsaij[i] == r) {
847         diag = PETSC_TRUE;
848         c = i;
849       }
850     }
851   } else c = 0;
852 
853   /* calculate size of row */
854   nz = 0;
855   if (S)            nz += q;
856   if (T || b->isTI) nz += (diag && S ? (ncolsaij+ncolsoaij-1)*q : (ncolsaij+ncolsoaij)*q);
857 
858   if (cols || values) {
859     ierr = PetscMalloc2(nz,&idx,nz,&v);CHKERRQ(ierr);
860     if (b->isTI) {
861       for (i=0; i<ncolsaij; i++) {
862         for (j=0; j<q; j++) {
863           idx[i*q+j] = (colsaij[i]+rstart/p)*q+j;
864           v[i*q+j]   = (j==s ? vals[i] : 0.0);
865         }
866       }
867       for (i=0; i<ncolsoaij; i++) {
868         for (j=0; j<q; j++) {
869           idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j;
870           v[(i+ncolsaij)*q+j]   = (j==s ? ovals[i]: 0.0);
871         }
872       }
873     } else if (T) {
874       for (i=0; i<ncolsaij; i++) {
875         for (j=0; j<q; j++) {
876           idx[i*q+j] = (colsaij[i]+rstart/p)*q+j;
877           v[i*q+j]   = vals[i]*T[s+j*p];
878         }
879       }
880       for (i=0; i<ncolsoaij; i++) {
881         for (j=0; j<q; j++) {
882           idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j;
883           v[(i+ncolsaij)*q+j]   = ovals[i]*T[s+j*p];
884         }
885       }
886     }
887     if (S) {
888       for (j=0; j<q; j++) {
889         idx[c*q+j] = (r+rstart/p)*q+j;
890         v[c*q+j]  += S[s+j*p];
891       }
892     }
893   }
894 
895   if (ncols)  *ncols  = nz;
896   if (cols)   *cols   = idx;
897   if (values) *values = v;
898   PetscFunctionReturn(0);
899 }
900 
901 PetscErrorCode MatRestoreRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
902 {
903   PetscErrorCode ierr;
904   PetscFunctionBegin;
905   ierr = PetscFree2(*idx,*v);CHKERRQ(ierr);
906   ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE;
907   PetscFunctionReturn(0);
908 }
909 
910 PetscErrorCode  MatCreateSubMatrix_KAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat)
911 {
912   PetscErrorCode ierr;
913   Mat            A;
914 
915   PetscFunctionBegin;
916   ierr = MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr);
917   ierr = MatCreateSubMatrix(A,isrow,iscol,cll,newmat);CHKERRQ(ierr);
918   ierr = MatDestroy(&A);CHKERRQ(ierr);
919   PetscFunctionReturn(0);
920 }
921 
922 /* ---------------------------------------------------------------------------------- */
923 /*@C
924   MatCreateKAIJ - Creates a matrix type to be used for matrices of the following form:
925 
926     [I \otimes S + A \otimes T]
927 
928   where
929     S is a dense (p \times q) matrix
930     T is a dense (p \times q) matrix
931     A is an AIJ  (n \times n) matrix
932     I is the identity matrix
933   The resulting matrix is (np \times nq)
934 
935   The matrix type is based on MATSEQAIJ for a sequential matrix A, and MATMPIAIJ for a distributed matrix A.
936   S is always stored independently on all processes as a PetscScalar array in column-major format.
937 
938   Collective
939 
940   Input Parameters:
941 + A - the AIJ matrix
942 . S - the S matrix, stored as a PetscScalar array (column-major)
943 . T - the T matrix, stored as a PetscScalar array (column-major)
944 . p - number of rows in S and T
945 - q - number of columns in S and T
946 
947   Output Parameter:
948 . kaij - the new KAIJ matrix
949 
950   Operations provided:
951 + MatMult
952 . MatMultAdd
953 . MatInvertBlockDiagonal
954 - MatView
955 
956   Level: advanced
957 
958 .seealso: MatKAIJGetAIJ(), MATKAIJ
959 @*/
960 PetscErrorCode  MatCreateKAIJ(Mat A,PetscInt p,PetscInt q,const PetscScalar S[],const PetscScalar T[],Mat *kaij)
961 {
962   PetscErrorCode ierr;
963   PetscMPIInt    size;
964   PetscInt       n,i,j;
965   Mat            B;
966   PetscBool      isTI = PETSC_FALSE;
967 
968   PetscFunctionBegin;
969 
970   /* check if T is an identity matrix */
971   if (T && (p == q)) {
972     isTI = PETSC_TRUE;
973     for (i=0; i<p; i++) {
974       for (j=0; j<q; j++) {
975         if (i == j) {
976           /* diagonal term must be 1 */
977           if (T[i+j*p] != 1.0) isTI = PETSC_FALSE;
978         } else {
979           /* off-diagonal term must be 0 */
980           if (T[i+j*p] != 0.0) isTI = PETSC_FALSE;
981         }
982       }
983     }
984   }
985 
986   ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
987 
988   ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
989   ierr = MatSetSizes(B,p*A->rmap->n,q*A->cmap->n,p*A->rmap->N,q*A->cmap->N);CHKERRQ(ierr);
990   ierr = PetscLayoutSetBlockSize(B->rmap,p);CHKERRQ(ierr);
991   ierr = PetscLayoutSetBlockSize(B->cmap,q);CHKERRQ(ierr);
992   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
993   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
994 
995   B->assembled = PETSC_TRUE;
996   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
997 
998   if (size == 1) {
999     Mat_SeqKAIJ *b;
1000 
1001     ierr = MatSetType(B,MATSEQKAIJ);CHKERRQ(ierr);
1002 
1003     B->ops->setup   = NULL;
1004     B->ops->destroy = MatDestroy_SeqKAIJ;
1005     B->ops->view    = MatView_SeqKAIJ;
1006     b               = (Mat_SeqKAIJ*)B->data;
1007     b->p            = p;
1008     b->q            = q;
1009     b->AIJ          = A;
1010     b->isTI         = isTI;
1011     if (S) {
1012       ierr = PetscMalloc(p*q*sizeof(PetscScalar),&b->S);CHKERRQ(ierr);
1013       ierr = PetscMemcpy(b->S,S,p*q*sizeof(PetscScalar));CHKERRQ(ierr);
1014     } else  b->S = NULL;
1015     if (T && (!isTI)) {
1016       ierr = PetscMalloc(p*q*sizeof(PetscScalar),&b->T);CHKERRQ(ierr);
1017       ierr = PetscMemcpy(b->T,T,p*q*sizeof(PetscScalar));CHKERRQ(ierr);
1018     } else b->T = NULL;
1019 
1020     B->ops->mult                = MatMult_SeqKAIJ_N;
1021     B->ops->multadd             = MatMultAdd_SeqKAIJ_N;
1022     B->ops->invertblockdiagonal = MatInvertBlockDiagonal_SeqKAIJ_N;
1023     B->ops->getrow              = MatGetRow_SeqKAIJ;
1024     B->ops->restorerow          = MatRestoreRow_SeqKAIJ;
1025     B->ops->sor                 = MatSOR_SeqKAIJ;
1026 
1027     /*
1028     This is commented out since MATKAIJ will use the basic MatConvert (which uses MatGetRow()).
1029     ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqkaij_seqaij_C",MatConvert_SeqKAIJ_SeqAIJ);CHKERRQ(ierr);
1030     */
1031 
1032   } else {
1033     Mat_MPIAIJ  *mpiaij = (Mat_MPIAIJ*)A->data;
1034     Mat_MPIKAIJ *b;
1035     IS          from,to;
1036     Vec         gvec;
1037 
1038     ierr = MatSetType(B,MATMPIKAIJ);CHKERRQ(ierr);
1039 
1040     B->ops->setup   = NULL;
1041     B->ops->destroy = MatDestroy_MPIKAIJ;
1042     B->ops->view    = MatView_MPIKAIJ;
1043 
1044     b       = (Mat_MPIKAIJ*)B->data;
1045     b->p    = p;
1046     b->q    = q;
1047     b->A    = A;
1048     b->isTI = isTI;
1049     if (S) {
1050       ierr = PetscMalloc(p*q*sizeof(PetscScalar),&b->S);CHKERRQ(ierr);
1051       ierr = PetscMemcpy(b->S,S,p*q*sizeof(PetscScalar));CHKERRQ(ierr);
1052     } else  b->S = NULL;
1053     if (T &&(!isTI)) {
1054       ierr = PetscMalloc(p*q*sizeof(PetscScalar),&b->T);CHKERRQ(ierr);
1055       ierr = PetscMemcpy(b->T,T,p*q*sizeof(PetscScalar));CHKERRQ(ierr);
1056     } else b->T = NULL;
1057 
1058     ierr = MatCreateKAIJ(mpiaij->A,p,q,S   ,T,&b->AIJ);CHKERRQ(ierr);
1059     ierr = MatCreateKAIJ(mpiaij->B,p,q,NULL,T,&b->OAIJ);CHKERRQ(ierr);
1060 
1061     ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr);
1062     ierr = VecCreate(PETSC_COMM_SELF,&b->w);CHKERRQ(ierr);
1063     ierr = VecSetSizes(b->w,n*q,n*q);CHKERRQ(ierr);
1064     ierr = VecSetBlockSize(b->w,q);CHKERRQ(ierr);
1065     ierr = VecSetType(b->w,VECSEQ);CHKERRQ(ierr);
1066 
1067     /* create two temporary Index sets for build scatter gather */
1068     ierr = ISCreateBlock(PetscObjectComm((PetscObject)A),q,n,mpiaij->garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr);
1069     ierr = ISCreateStride(PETSC_COMM_SELF,n*q,0,1,&to);CHKERRQ(ierr);
1070 
1071     /* create temporary global vector to generate scatter context */
1072     ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),q,q*A->cmap->n,q*A->cmap->N,NULL,&gvec);CHKERRQ(ierr);
1073 
1074     /* generate the scatter context */
1075     ierr = VecScatterCreateWithData(gvec,from,b->w,to,&b->ctx);CHKERRQ(ierr);
1076 
1077     ierr = ISDestroy(&from);CHKERRQ(ierr);
1078     ierr = ISDestroy(&to);CHKERRQ(ierr);
1079     ierr = VecDestroy(&gvec);CHKERRQ(ierr);
1080 
1081     B->ops->mult                = MatMult_MPIKAIJ_dof;
1082     B->ops->multadd             = MatMultAdd_MPIKAIJ_dof;
1083     B->ops->invertblockdiagonal = MatInvertBlockDiagonal_MPIKAIJ_dof;
1084     B->ops->getrow              = MatGetRow_MPIKAIJ;
1085     B->ops->restorerow          = MatRestoreRow_MPIKAIJ;
1086     ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIKAIJ);CHKERRQ(ierr);
1087   }
1088   B->ops->createsubmatrix = MatCreateSubMatrix_KAIJ;
1089   B->assembled = PETSC_TRUE;
1090   ierr  = MatSetUp(B);CHKERRQ(ierr);
1091   *kaij = B;
1092   ierr  = MatViewFromOptions(B,NULL,"-mat_view");CHKERRQ(ierr);
1093 
1094   PetscFunctionReturn(0);
1095 }
1096