xref: /petsc/src/mat/impls/kaij/kaij.c (revision 31a97b9a3c358588ac4428bcc7cbff2ea31f18a2)
1 
2 /*
3   Defines the basic matrix operations for the KAIJ  matrix storage format.
4   This format is used to evaluate matrices of the form:
5 
6     [I \otimes S + A \otimes T]
7 
8   where
9     S is a dense (p \times q) matrix
10     T is a dense (p \times q) matrix
11     A is an AIJ  (n \times n) matrix
12     I is the identity matrix
13 
14   The resulting matrix is (np \times nq)
15 
16   We provide:
17      MatMult()
18      MatMultAdd()
19      MatInvertBlockDiagonal()
20   and
21      MatCreateKAIJ(Mat,PetscInt,PetscInt,const PetscScalar[],const PetscScalar[],Mat*)
22 
23   This single directory handles both the sequential and parallel codes
24 */
25 
26 #include <../src/mat/impls/kaij/kaij.h> /*I "petscmat.h" I*/
27 #include <../src/mat/utils/freespace.h>
28 #include <petsc/private/vecimpl.h>
29 
30 /*@C
31    MatKAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the KAIJ matrix
32 
33    Not Collective, but if the KAIJ matrix is parallel, the AIJ matrix is also parallel
34 
35    Input Parameter:
36 .  A - the KAIJ matrix
37 
38    Output Parameter:
39 .  B - the AIJ matrix
40 
41    Level: advanced
42 
43    Notes: The reference count on the AIJ matrix is not increased so you should not destroy it.
44 
45 .seealso: MatCreateKAIJ()
46 @*/
47 PetscErrorCode  MatKAIJGetAIJ(Mat A,Mat *B)
48 {
49   PetscErrorCode ierr;
50   PetscBool      ismpikaij,isseqkaij;
51 
52   PetscFunctionBegin;
53   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIKAIJ,&ismpikaij);CHKERRQ(ierr);
54   ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQKAIJ,&isseqkaij);CHKERRQ(ierr);
55   if (ismpikaij) {
56     Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data;
57 
58     *B = b->A;
59   } else if (isseqkaij) {
60     Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
61 
62     *B = b->AIJ;
63   } else {
64     *B = A;
65   }
66   PetscFunctionReturn(0);
67 }
68 
69 /*@C
70    MatKAIJGetS - Get the S matrix describing the shift action of the KAIJ matrix
71 
72    Not Collective, the entire S is stored and returned independently on all processes
73 
74    Input Parameter:
75 .  A - the KAIJ matrix
76 
77    Output Parameter:
78 .  S - the S matrix, in form of a scalar array in column-major format
79 
80    Notes:
81    The dimensions of the output array can be determined by calling MatGetBlockSizes() on the KAIJ matrix.
82    The row block and column block sizes returned will correspond to the number of rows and columns, respectively, in S.
83 
84    Level: advanced
85 
86 .seealso: MatCreateKAIJ(), MatGetBlockSizes()
87 @*/
88 PetscErrorCode  MatKAIJGetS(Mat A,const PetscScalar **S)
89 {
90   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
91   PetscFunctionBegin;
92   *S = b->S;
93   PetscFunctionReturn(0);
94 }
95 
96 /*@C
97    MatKAIJGetT - Get the transformation matrix T associated with the KAIJ matrix
98 
99    Not Collective, the entire T is stored and returned independently on all processes
100 
101    Input Parameter:
102 .  A - the KAIJ matrix
103 
104    Output Parameter:
105 .  T - the T matrix, in form of a scalar array in column-major format
106 
107    Notes:
108    The dimensions of the output array can be determined by calling MatGetBlockSizes() on the KAIJ matrix.
109    The row block and column block sizes returned will correspond to the number of rows and columns, respectively, in T.
110 
111    Level: advanced
112 
113 .seealso: MatCreateKAIJ(), MatGetBlockSizes()
114 @*/
115 PetscErrorCode  MatKAIJGetT(Mat A,const PetscScalar **T)
116 {
117   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
118   PetscFunctionBegin;
119   *T = b->T;
120   PetscFunctionReturn(0);
121 }
122 
123 PetscErrorCode MatDestroy_SeqKAIJ(Mat A)
124 {
125   PetscErrorCode ierr;
126   Mat_SeqKAIJ    *b = (Mat_SeqKAIJ*)A->data;
127 
128   PetscFunctionBegin;
129   ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr);
130   ierr = PetscFree3(b->S,b->T,b->ibdiag);CHKERRQ(ierr);
131   ierr = PetscFree5(b->sor.w,b->sor.y,b->sor.work,b->sor.t,b->sor.arr);CHKERRQ(ierr);
132   ierr = PetscFree(A->data);CHKERRQ(ierr);
133   PetscFunctionReturn(0);
134 }
135 
136 PetscErrorCode MatSetUp_KAIJ(Mat A)
137 {
138   PetscFunctionBegin;
139   SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Must use MatCreateKAIJ() to create KAIJ matrices");
140   PetscFunctionReturn(0);
141 }
142 
143 PetscErrorCode MatView_SeqKAIJ(Mat A,PetscViewer viewer)
144 {
145   PetscErrorCode ierr;
146   Mat            B;
147 
148   PetscFunctionBegin;
149   ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
150   ierr = MatView(B,viewer);CHKERRQ(ierr);
151   ierr = MatDestroy(&B);CHKERRQ(ierr);
152   PetscFunctionReturn(0);
153 }
154 
155 PetscErrorCode MatView_MPIKAIJ(Mat A,PetscViewer viewer)
156 {
157   PetscErrorCode ierr;
158   Mat            B;
159 
160   PetscFunctionBegin;
161   ierr = MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
162   ierr = MatView(B,viewer);CHKERRQ(ierr);
163   ierr = MatDestroy(&B);CHKERRQ(ierr);
164   PetscFunctionReturn(0);
165 }
166 
167 PetscErrorCode MatDestroy_MPIKAIJ(Mat A)
168 {
169   PetscErrorCode ierr;
170   Mat_MPIKAIJ    *b = (Mat_MPIKAIJ*)A->data;
171 
172   PetscFunctionBegin;
173   ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr);
174   ierr = MatDestroy(&b->OAIJ);CHKERRQ(ierr);
175   ierr = MatDestroy(&b->A);CHKERRQ(ierr);
176   ierr = VecScatterDestroy(&b->ctx);CHKERRQ(ierr);
177   ierr = VecDestroy(&b->w);CHKERRQ(ierr);
178   ierr = PetscFree3(b->S,b->T,b->ibdiag);CHKERRQ(ierr);
179   ierr = PetscFree(A->data);CHKERRQ(ierr);
180   PetscFunctionReturn(0);
181 }
182 
183 /*MC
184   MATKAIJ - MATKAIJ = "kaij" - A matrix type to be used to evaluate matrices of the following form:
185 
186     [I \otimes S + A \otimes T]
187 
188   where
189     S is a dense (p \times q) matrix
190     T is a dense (p \times q) matrix
191     A is an AIJ  (n \times n) matrix
192     I is the identity matrix
193   The resulting matrix is (np \times nq)
194 
195   The matrix type is based on MATSEQAIJ for a sequential matrix A, and MATMPIAIJ for a distributed matrix A.
196   S and T are always stored independently on all processes as a PetscScalar array in column-major format.
197 
198   Operations provided:
199 . MatMult
200 . MatMultAdd
201 . MatInvertBlockDiagonal
202 
203   Level: advanced
204 
205 .seealso: MatKAIJGetAIJ(), MatKAIJGetS(), MatKAIJGetT(), MatCreateKAIJ()
206 M*/
207 
208 PETSC_EXTERN PetscErrorCode MatCreate_KAIJ(Mat A)
209 {
210   PetscErrorCode ierr;
211   Mat_MPIKAIJ    *b;
212   PetscMPIInt    size;
213 
214   PetscFunctionBegin;
215   ierr     = PetscNewLog(A,&b);CHKERRQ(ierr);
216   A->data  = (void*)b;
217 
218   ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
219 
220   A->ops->setup = MatSetUp_KAIJ;
221 
222   b->w    = 0;
223   ierr    = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
224   if (size == 1) {
225     ierr = PetscObjectChangeTypeName((PetscObject)A,MATSEQKAIJ);CHKERRQ(ierr);
226   } else {
227     ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIKAIJ);CHKERRQ(ierr);
228   }
229   PetscFunctionReturn(0);
230 }
231 
232 /* --------------------------------------------------------------------------------------*/
233 
234 /* zz = yy + Axx */
235 PetscErrorCode KAIJMultAdd_Seq(Mat A,Vec xx,Vec yy,Vec zz)
236 {
237   Mat_SeqKAIJ       *b = (Mat_SeqKAIJ*)A->data;
238   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
239   const PetscScalar *s = b->S, *t = b->T;
240   const PetscScalar *x,*v,*bx;
241   PetscScalar       *y,*sums;
242   PetscErrorCode    ierr;
243   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
244   PetscInt          n,i,jrow,j,l,p=b->p,q=b->q,k;
245 
246   PetscFunctionBegin;
247   if (!yy) {
248     ierr = VecSet(zz,0.0);CHKERRQ(ierr);
249   } else {
250     ierr = VecCopy(yy,zz);CHKERRQ(ierr);
251   }
252   if ((!s) && (!t) && (!b->isTI)) PetscFunctionReturn(0);
253 
254   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
255   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
256   idx  = a->j;
257   v    = a->a;
258   ii   = a->i;
259 
260   if (b->isTI) {
261     for (i=0; i<m; i++) {
262       jrow = ii[i];
263       n    = ii[i+1] - jrow;
264       sums = y + p*i;
265       for (j=0; j<n; j++) {
266         for (k=0; k<p; k++) {
267           sums[k] += v[jrow+j]*x[q*idx[jrow+j]+k];
268         }
269       }
270     }
271   } else if (t) {
272     for (i=0; i<m; i++) {
273       jrow = ii[i];
274       n    = ii[i+1] - jrow;
275       sums = y + p*i;
276       bx   = x + q*i;
277       for (j=0; j<n; j++) {
278         for (k=0; k<p; k++) {
279           for (l=0; l<q; l++) {
280             sums[k] += v[jrow+j]*t[k+l*p]*x[q*idx[jrow+j]+l];
281           }
282         }
283       }
284     }
285   }
286   if (s) {
287     for (i=0; i<m; i++) {
288       jrow = ii[i];
289       n    = ii[i+1] - jrow;
290       sums = y + p*i;
291       bx   = x + q*i;
292       if (i < b->AIJ->cmap->n) {
293         for (j=0; j<q; j++) {
294           for (k=0; k<p; k++) {
295             sums[k] += s[k+j*p]*bx[j];
296           }
297         }
298       }
299     }
300   }
301 
302   ierr = PetscLogFlops((2.0*p*q-p)*m+2*p*a->nz);CHKERRQ(ierr);
303   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
304   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
305   PetscFunctionReturn(0);
306 }
307 
308 PetscErrorCode MatMult_SeqKAIJ_N(Mat A,Vec xx,Vec yy)
309 {
310   PetscErrorCode ierr;
311   PetscFunctionBegin;
312   ierr = KAIJMultAdd_Seq(A,xx,PETSC_NULL,yy);CHKERRQ(ierr);
313   PetscFunctionReturn(0);
314 }
315 
316 PetscErrorCode MatMultAdd_SeqKAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
317 {
318   PetscErrorCode ierr;
319   PetscFunctionBegin;
320   ierr = KAIJMultAdd_Seq(A,xx,yy,zz);CHKERRQ(ierr);
321   PetscFunctionReturn(0);
322 }
323 
324 #include <petsc/private/kernels/blockinvert.h>
325 
326 PetscErrorCode MatInvertBlockDiagonal_SeqKAIJ_N(Mat A,const PetscScalar **values)
327 {
328   Mat_SeqKAIJ       *b  = (Mat_SeqKAIJ*)A->data;
329   Mat_SeqAIJ        *a  = (Mat_SeqAIJ*)b->AIJ->data;
330   const PetscScalar *S  = b->S;
331   const PetscScalar *T  = b->T;
332   const PetscScalar *v  = a->a;
333   const PetscInt     p  = b->p, q = b->q, m = b->AIJ->rmap->n, *idx = a->j, *ii = a->i;
334   PetscErrorCode    ierr;
335   PetscInt          i,j,*v_pivots,dof,dof2;
336   PetscScalar       *diag,aval,*v_work;
337 
338   PetscFunctionBegin;
339   if (p != q) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Block size must be square to calculate inverse.");
340   if ((!S) && (!T) && (!b->isTI)) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Cannot invert a zero matrix.");
341 
342   dof  = p;
343   dof2 = dof*dof;
344 
345   if (b->ibdiagvalid) {
346     if (values) *values = b->ibdiag;
347     PetscFunctionReturn(0);
348   }
349   if (!b->ibdiag) {
350     ierr = PetscMalloc(dof2*m*sizeof(PetscScalar),&b->ibdiag);CHKERRQ(ierr);
351     ierr = PetscLogObjectMemory((PetscObject)A,dof2*m*sizeof(PetscScalar));CHKERRQ(ierr);
352   }
353   if (values) *values = b->ibdiag;
354   diag = b->ibdiag;
355 
356   ierr = PetscMalloc2(dof,&v_work,dof,&v_pivots);CHKERRQ(ierr);
357   for (i=0; i<m; i++) {
358     if (S) {
359       ierr = PetscMemcpy(diag,S,dof2*sizeof(PetscScalar));CHKERRQ(ierr);
360     } else {
361       ierr = PetscMemzero(diag,dof2*sizeof(PetscScalar));CHKERRQ(ierr);
362     }
363     if (b->isTI) {
364       aval = 0;
365       for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j];
366       for (j=0; j<dof; j++) diag[j+dof*j] += aval;
367     } else if (T) {
368       aval = 0;
369       for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j];
370       for (j=0; j<dof2; j++) diag[j] += aval*T[j];
371     }
372     ierr = PetscKernel_A_gets_inverse_A(dof,diag,v_pivots,v_work,PETSC_FALSE,NULL);CHKERRQ(ierr);
373     diag += dof2;
374   }
375   ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr);
376 
377   b->ibdiagvalid = PETSC_TRUE;
378   PetscFunctionReturn(0);
379 }
380 
381 static PetscErrorCode MatGetDiagonalBlock_MPIKAIJ(Mat A,Mat *B)
382 {
383   Mat_MPIKAIJ *kaij = (Mat_MPIKAIJ*) A->data;
384 
385   PetscFunctionBegin;
386   *B = kaij->AIJ;
387   PetscFunctionReturn(0);
388 }
389 
390 PetscErrorCode MatSOR_SeqKAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
391 {
392   PetscErrorCode    ierr;
393   Mat_SeqKAIJ       *kaij = (Mat_SeqKAIJ*) A->data;
394   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)kaij->AIJ->data;
395   const PetscScalar *aa = a->a, *T = kaij->T, *v;
396   const PetscInt    m  = kaij->AIJ->rmap->n, *ai=a->i, *aj=a->j, p = kaij->p, q = kaij->q, *diag, *vi;
397   const PetscScalar *b, *xb, *idiag;
398   PetscScalar       *x, *work, *workt, *w, *y, *arr, *t, *arrt;
399   PetscInt          i, j, k, i2, bs, bs2, nz;
400 
401 
402   PetscFunctionBegin;
403   its = its*lits;
404   if (flag & SOR_EISENSTAT) SETERRQ (PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
405   if (its <= 0)             SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
406   if (fshift)               SETERRQ (PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
407   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
408   if (p != q) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSOR for KAIJ: Sorry, no support for non-square dense blocks");
409   else        {bs = p; bs2 = bs*bs; }
410 
411   if (!m) PetscFunctionReturn(0);
412 
413   if (!kaij->ibdiagvalid) { ierr = MatInvertBlockDiagonal_SeqKAIJ_N(A,NULL);CHKERRQ(ierr); }
414   idiag = kaij->ibdiag;
415   diag  = a->diag;
416 
417   if (!kaij->sor.setup) {
418     ierr = PetscMalloc5(bs,&kaij->sor.w,bs,&kaij->sor.y,m*bs,&kaij->sor.work,m*bs,&kaij->sor.t,m*bs2,&kaij->sor.arr);CHKERRQ(ierr);
419     kaij->sor.setup = PETSC_TRUE;
420   }
421   y     = kaij->sor.y;
422   w     = kaij->sor.w;
423   work  = kaij->sor.work;
424   t     = kaij->sor.t;
425   arr   = kaij->sor.arr;
426 
427   ierr = VecGetArray(xx,&x);    CHKERRQ(ierr);
428   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
429 
430   if (flag & SOR_ZERO_INITIAL_GUESS) {
431     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
432       PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);                            /* x[0:bs] <- D^{-1} b[0:bs] */
433       ierr   =  PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr);
434       i2     =  bs;
435       idiag  += bs2;
436       for (i=1; i<m; i++) {
437         v  = aa + ai[i];
438         vi = aj + ai[i];
439         nz = diag[i] - ai[i];
440 
441         if (T) {                /* b - T (Arow * x) */
442           for (k=0; k<bs; k++) w[k] = 0;
443           for (j=0; j<nz; j++) {
444             for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k];
445           }
446           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs,w,T,&t[i2]);
447           for (k=0; k<bs; k++) t[i2+k] += b[i2+k];
448         } else if (kaij->isTI) {
449           for (k=0; k<bs; k++) t[i2+k] = b[i2+k];
450           for (j=0; j<nz; j++) {
451             for (k=0; k<bs; k++) t[i2+k] -= v[j] * x[vi[j]*bs+k];
452           }
453         } else {
454           for (k=0; k<bs; k++) t[i2+k] = b[i2+k];
455         }
456 
457         PetscKernel_w_gets_Ar_times_v(bs,bs,t+i2,idiag,y);
458         for (j=0; j<bs; j++) x[i2+j] = omega * y[j];
459 
460         idiag += bs2;
461         i2    += bs;
462       }
463       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
464       ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr);
465       xb = t;
466     } else xb = b;
467     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
468       idiag = kaij->ibdiag+bs2*(m-1);
469       i2    = bs * (m-1);
470       ierr  = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
471       PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
472       i2    -= bs;
473       idiag -= bs2;
474       for (i=m-2; i>=0; i--) {
475         v  = aa + diag[i] + 1 ;
476         vi = aj + diag[i] + 1;
477         nz = ai[i+1] - diag[i] - 1;
478 
479         if (T) {                /* FIXME: This branch untested */
480           ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
481           /* copy all rows of x that are needed into contiguous space */
482           workt = work;
483           for (j=0; j<nz; j++) {
484             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
485             workt += bs;
486           }
487           arrt = arr;
488           for (j=0; j<nz; j++) {
489             ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
490             for (k=0; k<bs2; k++) arrt[k] *= v[j];
491             arrt += bs2;
492           }
493           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
494         } else if (kaij->isTI) {
495           for (k=0; k<bs; k++) w[k] = t[i2+k];
496           for (j=0; j<nz; j++) {
497             for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k];
498           }
499         }
500 
501         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); /* RHS incorrect for omega != 1.0 */
502         for (j=0; j<bs; j++) x[i2+j] = (1.0-omega) * x[i2+j] + omega * y[j];
503 
504         idiag -= bs2;
505         i2    -= bs;
506       }
507       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
508     }
509     its--;
510   }
511   while (its--) {               /* FIXME: This branch not updated */
512     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
513       i2     =  0;
514       idiag  = kaij->ibdiag;
515       for (i=0; i<m; i++) {
516         ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
517 
518         v  = aa + ai[i];
519         vi = aj + ai[i];
520         nz = diag[i] - ai[i];
521         workt = work;
522         for (j=0; j<nz; j++) {
523           ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
524           workt += bs;
525         }
526         arrt = arr;
527         if (T) {
528           for (j=0; j<nz; j++) {
529             ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
530             for (k=0; k<bs2; k++) arrt[k] *= v[j];
531             arrt += bs2;
532           }
533           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
534         } else if (kaij->isTI) {
535           for (j=0; j<nz; j++) {
536             ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
537             for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
538             arrt += bs2;
539           }
540           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
541         }
542         ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr);
543 
544         v  = aa + diag[i] + 1;
545         vi = aj + diag[i] + 1;
546         nz = ai[i+1] - diag[i] - 1;
547         workt = work;
548         for (j=0; j<nz; j++) {
549           ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
550           workt += bs;
551         }
552         arrt = arr;
553         if (T) {
554           for (j=0; j<nz; j++) {
555             ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
556             for (k=0; k<bs2; k++) arrt[k] *= v[j];
557             arrt += bs2;
558           }
559           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
560         } else if (kaij->isTI) {
561           for (j=0; j<nz; j++) {
562             ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
563             for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
564             arrt += bs2;
565           }
566           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
567         }
568 
569         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y);
570         for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j);
571 
572         idiag += bs2;
573         i2    += bs;
574       }
575       xb = t;
576     }
577     else xb = b;
578     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
579       idiag = kaij->ibdiag+bs2*(m-1);
580       i2    = bs * (m-1);
581       if (xb == b) {
582         for (i=m-1; i>=0; i--) {
583           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
584 
585           v  = aa + ai[i];
586           vi = aj + ai[i];
587           nz = diag[i] - ai[i];
588           workt = work;
589           for (j=0; j<nz; j++) {
590             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
591             workt += bs;
592           }
593           arrt = arr;
594           if (T) {
595             for (j=0; j<nz; j++) {
596               ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
597               for (k=0; k<bs2; k++) arrt[k] *= v[j];
598               arrt += bs2;
599             }
600             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
601           } else if (kaij->isTI) {
602             for (j=0; j<nz; j++) {
603               ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
604               for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
605               arrt += bs2;
606             }
607             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
608           }
609 
610           v  = aa + diag[i] + 1;
611           vi = aj + diag[i] + 1;
612           nz = ai[i+1] - diag[i] - 1;
613           workt = work;
614           for (j=0; j<nz; j++) {
615             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
616             workt += bs;
617           }
618           arrt = arr;
619           if (T) {
620             for (j=0; j<nz; j++) {
621               ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
622               for (k=0; k<bs2; k++) arrt[k] *= v[j];
623               arrt += bs2;
624             }
625             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
626           } else if (kaij->isTI) {
627             for (j=0; j<nz; j++) {
628               ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
629               for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
630               arrt += bs2;
631             }
632             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
633           }
634 
635           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y);
636           for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j);
637         }
638       } else {
639         for (i=m-1; i>=0; i--) {
640           ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
641           v  = aa + diag[i] + 1;
642           vi = aj + diag[i] + 1;
643           nz = ai[i+1] - diag[i] - 1;
644           workt = work;
645           for (j=0; j<nz; j++) {
646             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
647             workt += bs;
648           }
649           arrt = arr;
650           if (T) {
651             for (j=0; j<nz; j++) {
652               ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
653               for (k=0; k<bs2; k++) arrt[k] *= v[j];
654               arrt += bs2;
655             }
656             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
657           } else if (kaij->isTI) {
658             for (j=0; j<nz; j++) {
659               ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
660               for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
661               arrt += bs2;
662             }
663             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
664           }
665           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y);
666           for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j);
667         }
668         idiag -= bs2;
669         i2    -= bs;
670       }
671       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
672     }
673   }
674 
675   ierr = VecRestoreArray(xx,&x);    CHKERRQ(ierr);
676   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
677   PetscFunctionReturn(0);
678 }
679 
680 /*===================================================================================*/
681 
682 PetscErrorCode KAIJMultAdd_MPI(Mat A,Vec xx,Vec yy,Vec zz)
683 {
684   Mat_MPIKAIJ    *b = (Mat_MPIKAIJ*)A->data;
685   PetscErrorCode ierr;
686 
687   PetscFunctionBegin;
688   if (!yy) {
689     ierr = VecSet(zz,0.0);CHKERRQ(ierr);
690   } else {
691     ierr = VecCopy(yy,zz);CHKERRQ(ierr);
692   }
693   /* start the scatter */
694   ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
695   ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,zz,zz);CHKERRQ(ierr);
696   ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
697   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr);
698   PetscFunctionReturn(0);
699 }
700 
701 PetscErrorCode MatMult_MPIKAIJ_dof(Mat A,Vec xx,Vec yy)
702 {
703   PetscErrorCode ierr;
704   PetscFunctionBegin;
705   ierr = KAIJMultAdd_MPI(A,xx,PETSC_NULL,yy);CHKERRQ(ierr);
706   PetscFunctionReturn(0);
707 }
708 
709 PetscErrorCode MatMultAdd_MPIKAIJ_dof(Mat A,Vec xx,Vec yy, Vec zz)
710 {
711   PetscErrorCode ierr;
712   PetscFunctionBegin;
713   ierr = KAIJMultAdd_MPI(A,xx,yy,zz);CHKERRQ(ierr);
714   PetscFunctionReturn(0);
715 }
716 
717 PetscErrorCode MatInvertBlockDiagonal_MPIKAIJ_dof(Mat A,const PetscScalar **values)
718 {
719   Mat_MPIKAIJ     *b = (Mat_MPIKAIJ*)A->data;
720   PetscErrorCode  ierr;
721 
722   PetscFunctionBegin;
723   ierr = (*b->AIJ->ops->invertblockdiagonal)(b->AIJ,values);CHKERRQ(ierr);
724   PetscFunctionReturn(0);
725 }
726 
727 /* ----------------------------------------------------------------*/
728 
729 PetscErrorCode MatGetRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values)
730 {
731   Mat_SeqKAIJ     *b    = (Mat_SeqKAIJ*) A->data;
732   PetscErrorCode  ierr,diag;
733   PetscInt        nzaij,nz,*colsaij,*idx,i,j,p=b->p,q=b->q,r=row/p,s=row%p,c;
734   PetscScalar     *vaij,*v,*S=b->S,*T=b->T;
735 
736   PetscFunctionBegin;
737   if (b->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
738   b->getrowactive = PETSC_TRUE;
739   if (row < 0 || row >= A->rmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range",row);
740 
741   if ((!S) && (!T) && (!b->isTI)) {
742     if (ncols)    *ncols  = 0;
743     if (cols)     *cols   = NULL;
744     if (values)   *values = NULL;
745     PetscFunctionReturn(0);
746   }
747 
748   if (T || b->isTI) {
749     ierr  = MatGetRow_SeqAIJ(b->AIJ,r,&nzaij,&colsaij,&vaij);CHKERRQ(ierr);
750     diag  = PETSC_FALSE;
751     c     = nzaij;
752     for (i=0; i<nzaij; i++) {
753       /* check if this row contains a diagonal entry */
754       if (colsaij[i] == r) {
755         diag = PETSC_TRUE;
756         c = i;
757       }
758     }
759   } else nzaij = c = 0;
760 
761   /* calculate size of row */
762   nz = 0;
763   if (S)            nz += q;
764   if (T || b->isTI) nz += (diag && S ? (nzaij-1)*q : nzaij*q);
765 
766   if (cols || values) {
767     ierr = PetscMalloc2(nz,&idx,nz,&v);CHKERRQ(ierr);
768     if (b->isTI) {
769       for (i=0; i<nzaij; i++) {
770         for (j=0; j<q; j++) {
771           idx[i*q+j] = colsaij[i]*q+j;
772           v[i*q+j]   = (j==s ? vaij[i] : 0);
773         }
774       }
775     } else if (T) {
776       for (i=0; i<nzaij; i++) {
777         for (j=0; j<q; j++) {
778           idx[i*q+j] = colsaij[i]*q+j;
779           v[i*q+j]   = vaij[i]*T[s+j*p];
780         }
781       }
782     }
783     if (S) {
784       for (j=0; j<q; j++) {
785         idx[c*q+j] = r*q+j;
786         v[c*q+j]  += S[s+j*p];
787       }
788     }
789   }
790 
791   if (ncols)    *ncols  = nz;
792   if (cols)     *cols   = idx;
793   if (values)   *values = v;
794 
795   PetscFunctionReturn(0);
796 }
797 
798 PetscErrorCode MatRestoreRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
799 {
800   PetscErrorCode ierr;
801   PetscFunctionBegin;
802   ierr = PetscFree2(*idx,*v);CHKERRQ(ierr);
803   ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE;
804   PetscFunctionReturn(0);
805 }
806 
807 PetscErrorCode MatGetRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values)
808 {
809   Mat_MPIKAIJ     *b      = (Mat_MPIKAIJ*) A->data;
810   Mat             MatAIJ  = ((Mat_SeqKAIJ*)b->AIJ->data)->AIJ;
811   Mat             MatOAIJ = ((Mat_SeqKAIJ*)b->OAIJ->data)->AIJ;
812   Mat             AIJ     = b->A;
813   PetscErrorCode  ierr;
814   const PetscInt  rstart=A->rmap->rstart,rend=A->rmap->rend,p=b->p,q=b->q,*garray;
815   PetscInt        nz,*idx,ncolsaij,ncolsoaij,*colsaij,*colsoaij,r,s,c,i,j,lrow;
816   PetscScalar     *v,*vals,*ovals,*S=b->S,*T=b->T;
817   PetscBool       diag;
818 
819   PetscFunctionBegin;
820   if (b->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
821   b->getrowactive = PETSC_TRUE;
822   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
823   lrow = row - rstart;
824 
825   if ((!S) && (!T) && (!b->isTI)) {
826     if (ncols)    *ncols  = 0;
827     if (cols)     *cols   = NULL;
828     if (values)   *values = NULL;
829     PetscFunctionReturn(0);
830   }
831 
832   r = lrow/p;
833   s = lrow%p;
834 
835   if (T || b->isTI) {
836     ierr = MatMPIAIJGetSeqAIJ(AIJ,NULL,NULL,&garray);
837     ierr = MatGetRow_SeqAIJ(MatAIJ,lrow/p,&ncolsaij,&colsaij,&vals);CHKERRQ(ierr);
838     ierr = MatGetRow_SeqAIJ(MatOAIJ,lrow/p,&ncolsoaij,&colsoaij,&ovals);CHKERRQ(ierr);
839 
840     diag  = PETSC_FALSE;
841     c     = ncolsaij + ncolsoaij;
842     for (i=0; i<ncolsaij; i++) {
843       /* check if this row contains a diagonal entry */
844       if (colsaij[i] == r) {
845         diag = PETSC_TRUE;
846         c = i;
847       }
848     }
849   } else c = 0;
850 
851   /* calculate size of row */
852   nz = 0;
853   if (S)            nz += q;
854   if (T || b->isTI) nz += (diag && S ? (ncolsaij+ncolsoaij-1)*q : (ncolsaij+ncolsoaij)*q);
855 
856   if (cols || values) {
857     ierr = PetscMalloc2(nz,&idx,nz,&v);CHKERRQ(ierr);
858     if (b->isTI) {
859       for (i=0; i<ncolsaij; i++) {
860         for (j=0; j<q; j++) {
861           idx[i*q+j] = (colsaij[i]+rstart/p)*q+j;
862           v[i*q+j]   = (j==s ? vals[i] : 0.0);
863         }
864       }
865       for (i=0; i<ncolsoaij; i++) {
866         for (j=0; j<q; j++) {
867           idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j;
868           v[(i+ncolsaij)*q+j]   = (j==s ? ovals[i]: 0.0);
869         }
870       }
871     } else if (T) {
872       for (i=0; i<ncolsaij; i++) {
873         for (j=0; j<q; j++) {
874           idx[i*q+j] = (colsaij[i]+rstart/p)*q+j;
875           v[i*q+j]   = vals[i]*T[s+j*p];
876         }
877       }
878       for (i=0; i<ncolsoaij; i++) {
879         for (j=0; j<q; j++) {
880           idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j;
881           v[(i+ncolsaij)*q+j]   = ovals[i]*T[s+j*p];
882         }
883       }
884     }
885     if (S) {
886       for (j=0; j<q; j++) {
887         idx[c*q+j] = (r+rstart/p)*q+j;
888         v[c*q+j]  += S[s+j*p];
889       }
890     }
891   }
892 
893   if (ncols)  *ncols  = nz;
894   if (cols)   *cols   = idx;
895   if (values) *values = v;
896   PetscFunctionReturn(0);
897 }
898 
899 PetscErrorCode MatRestoreRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
900 {
901   PetscErrorCode ierr;
902   PetscFunctionBegin;
903   ierr = PetscFree2(*idx,*v);CHKERRQ(ierr);
904   ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE;
905   PetscFunctionReturn(0);
906 }
907 
908 PetscErrorCode  MatCreateSubMatrix_KAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat)
909 {
910   PetscErrorCode ierr;
911   Mat            A;
912 
913   PetscFunctionBegin;
914   ierr = MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr);
915   ierr = MatCreateSubMatrix(A,isrow,iscol,cll,newmat);CHKERRQ(ierr);
916   ierr = MatDestroy(&A);CHKERRQ(ierr);
917   PetscFunctionReturn(0);
918 }
919 
920 /* ---------------------------------------------------------------------------------- */
921 /*@C
922   MatCreateKAIJ - Creates a matrix type to be used for matrices of the following form:
923 
924     [I \otimes S + A \otimes T]
925 
926   where
927     S is a dense (p \times q) matrix
928     T is a dense (p \times q) matrix
929     A is an AIJ  (n \times n) matrix
930     I is the identity matrix
931   The resulting matrix is (np \times nq)
932 
933   The matrix type is based on MATSEQAIJ for a sequential matrix A, and MATMPIAIJ for a distributed matrix A.
934   S is always stored independently on all processes as a PetscScalar array in column-major format.
935 
936   Collective
937 
938   Input Parameters:
939 + A - the AIJ matrix
940 . S - the S matrix, stored as a PetscScalar array (column-major)
941 . T - the T matrix, stored as a PetscScalar array (column-major)
942 . p - number of rows in S and T
943 - q - number of columns in S and T
944 
945   Output Parameter:
946 . kaij - the new KAIJ matrix
947 
948   Operations provided:
949 + MatMult
950 . MatMultAdd
951 . MatInvertBlockDiagonal
952 - MatView
953 
954   Level: advanced
955 
956 .seealso: MatKAIJGetAIJ(), MATKAIJ
957 @*/
958 PetscErrorCode  MatCreateKAIJ(Mat A,PetscInt p,PetscInt q,const PetscScalar S[],const PetscScalar T[],Mat *kaij)
959 {
960   PetscErrorCode ierr;
961   PetscMPIInt    size;
962   PetscInt       n,i,j;
963   Mat            B;
964   PetscBool      isTI = PETSC_FALSE;
965 
966   PetscFunctionBegin;
967 
968   /* check if T is an identity matrix */
969   if (T && (p == q)) {
970     isTI = PETSC_TRUE;
971     for (i=0; i<p; i++) {
972       for (j=0; j<q; j++) {
973         if (i == j) {
974           /* diagonal term must be 1 */
975           if (T[i+j*p] != 1.0) isTI = PETSC_FALSE;
976         } else {
977           /* off-diagonal term must be 0 */
978           if (T[i+j*p] != 0.0) isTI = PETSC_FALSE;
979         }
980       }
981     }
982   }
983 
984   ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
985 
986   ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
987   ierr = MatSetSizes(B,p*A->rmap->n,q*A->cmap->n,p*A->rmap->N,q*A->cmap->N);CHKERRQ(ierr);
988   ierr = PetscLayoutSetBlockSize(B->rmap,p);CHKERRQ(ierr);
989   ierr = PetscLayoutSetBlockSize(B->cmap,q);CHKERRQ(ierr);
990   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
991   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
992 
993   B->assembled = PETSC_TRUE;
994   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
995 
996   if (size == 1) {
997     Mat_SeqKAIJ *b;
998 
999     ierr = MatSetType(B,MATSEQKAIJ);CHKERRQ(ierr);
1000 
1001     B->ops->setup   = NULL;
1002     B->ops->destroy = MatDestroy_SeqKAIJ;
1003     B->ops->view    = MatView_SeqKAIJ;
1004     b               = (Mat_SeqKAIJ*)B->data;
1005     b->p            = p;
1006     b->q            = q;
1007     b->AIJ          = A;
1008     b->isTI         = isTI;
1009     if (S) {
1010       ierr = PetscMalloc(p*q*sizeof(PetscScalar),&b->S);CHKERRQ(ierr);
1011       ierr = PetscMemcpy(b->S,S,p*q*sizeof(PetscScalar));CHKERRQ(ierr);
1012     } else  b->S = NULL;
1013     if (T && (!isTI)) {
1014       ierr = PetscMalloc(p*q*sizeof(PetscScalar),&b->T);CHKERRQ(ierr);
1015       ierr = PetscMemcpy(b->T,T,p*q*sizeof(PetscScalar));CHKERRQ(ierr);
1016     } else b->T = NULL;
1017 
1018     B->ops->mult                = MatMult_SeqKAIJ_N;
1019     B->ops->multadd             = MatMultAdd_SeqKAIJ_N;
1020     B->ops->invertblockdiagonal = MatInvertBlockDiagonal_SeqKAIJ_N;
1021     B->ops->getrow              = MatGetRow_SeqKAIJ;
1022     B->ops->restorerow          = MatRestoreRow_SeqKAIJ;
1023     B->ops->sor                 = MatSOR_SeqKAIJ;
1024   } else {
1025     Mat_MPIAIJ  *mpiaij = (Mat_MPIAIJ*)A->data;
1026     Mat_MPIKAIJ *b;
1027     IS          from,to;
1028     Vec         gvec;
1029 
1030     ierr = MatSetType(B,MATMPIKAIJ);CHKERRQ(ierr);
1031 
1032     B->ops->setup   = NULL;
1033     B->ops->destroy = MatDestroy_MPIKAIJ;
1034     B->ops->view    = MatView_MPIKAIJ;
1035 
1036     b       = (Mat_MPIKAIJ*)B->data;
1037     b->p    = p;
1038     b->q    = q;
1039     b->A    = A;
1040     b->isTI = isTI;
1041     if (S) {
1042       ierr = PetscMalloc(p*q*sizeof(PetscScalar),&b->S);CHKERRQ(ierr);
1043       ierr = PetscMemcpy(b->S,S,p*q*sizeof(PetscScalar));CHKERRQ(ierr);
1044     } else  b->S = NULL;
1045     if (T &&(!isTI)) {
1046       ierr = PetscMalloc(p*q*sizeof(PetscScalar),&b->T);CHKERRQ(ierr);
1047       ierr = PetscMemcpy(b->T,T,p*q*sizeof(PetscScalar));CHKERRQ(ierr);
1048     } else b->T = NULL;
1049 
1050     ierr = MatCreateKAIJ(mpiaij->A,p,q,S   ,T,&b->AIJ);CHKERRQ(ierr);
1051     ierr = MatCreateKAIJ(mpiaij->B,p,q,NULL,T,&b->OAIJ);CHKERRQ(ierr);
1052 
1053     ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr);
1054     ierr = VecCreate(PETSC_COMM_SELF,&b->w);CHKERRQ(ierr);
1055     ierr = VecSetSizes(b->w,n*q,n*q);CHKERRQ(ierr);
1056     ierr = VecSetBlockSize(b->w,q);CHKERRQ(ierr);
1057     ierr = VecSetType(b->w,VECSEQ);CHKERRQ(ierr);
1058 
1059     /* create two temporary Index sets for build scatter gather */
1060     ierr = ISCreateBlock(PetscObjectComm((PetscObject)A),q,n,mpiaij->garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr);
1061     ierr = ISCreateStride(PETSC_COMM_SELF,n*q,0,1,&to);CHKERRQ(ierr);
1062 
1063     /* create temporary global vector to generate scatter context */
1064     ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),q,q*A->cmap->n,q*A->cmap->N,NULL,&gvec);CHKERRQ(ierr);
1065 
1066     /* generate the scatter context */
1067     ierr = VecScatterCreateWithData(gvec,from,b->w,to,&b->ctx);CHKERRQ(ierr);
1068 
1069     ierr = ISDestroy(&from);CHKERRQ(ierr);
1070     ierr = ISDestroy(&to);CHKERRQ(ierr);
1071     ierr = VecDestroy(&gvec);CHKERRQ(ierr);
1072 
1073     B->ops->mult                = MatMult_MPIKAIJ_dof;
1074     B->ops->multadd             = MatMultAdd_MPIKAIJ_dof;
1075     B->ops->invertblockdiagonal = MatInvertBlockDiagonal_MPIKAIJ_dof;
1076     B->ops->getrow              = MatGetRow_MPIKAIJ;
1077     B->ops->restorerow          = MatRestoreRow_MPIKAIJ;
1078     ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIKAIJ);CHKERRQ(ierr);
1079   }
1080   B->ops->createsubmatrix = MatCreateSubMatrix_KAIJ;
1081   B->assembled = PETSC_TRUE;
1082   ierr  = MatSetUp(B);CHKERRQ(ierr);
1083   *kaij = B;
1084   ierr  = MatViewFromOptions(B,NULL,"-mat_view");CHKERRQ(ierr);
1085 
1086   PetscFunctionReturn(0);
1087 }
1088