xref: /petsc/src/mat/impls/kaij/kaij.c (revision 836168d5ade6e5b801d70b525699ac477d4a6b50)
1 
2 /*
3   Defines the basic matrix operations for the KAIJ  matrix storage format.
4   This format is used to evaluate matrices of the form:
5 
6     [I \otimes S + A \otimes T]
7 
8   where
9     S is a dense (p \times q) matrix
10     T is a dense (p \times q) matrix
11     A is an AIJ  (n \times n) matrix
12     I is the identity matrix
13 
14   The resulting matrix is (np \times nq)
15 
16   We provide:
17      MatMult()
18      MatMultAdd()
19      MatInvertBlockDiagonal()
20   and
21      MatCreateKAIJ(Mat,PetscInt,PetscInt,const PetscScalar[],const PetscScalar[],Mat*)
22 
23   This single directory handles both the sequential and parallel codes
24 */
25 
26 #include <../src/mat/impls/kaij/kaij.h> /*I "petscmat.h" I*/
27 #include <../src/mat/utils/freespace.h>
28 #include <petsc/private/vecimpl.h>
29 
30 /*@C
31    MatKAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the KAIJ matrix
32 
33    Not Collective, but if the KAIJ matrix is parallel, the AIJ matrix is also parallel
34 
35    Input Parameter:
36 .  A - the KAIJ matrix
37 
38    Output Parameter:
39 .  B - the AIJ matrix
40 
41    Level: advanced
42 
43    Notes: The reference count on the AIJ matrix is not increased so you should not destroy it.
44 
45 .seealso: MatCreateKAIJ()
46 @*/
47 PetscErrorCode  MatKAIJGetAIJ(Mat A,Mat *B)
48 {
49   PetscErrorCode ierr;
50   PetscBool      ismpikaij,isseqkaij;
51 
52   PetscFunctionBegin;
53   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIKAIJ,&ismpikaij);CHKERRQ(ierr);
54   ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQKAIJ,&isseqkaij);CHKERRQ(ierr);
55   if (ismpikaij) {
56     Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data;
57 
58     *B = b->A;
59   } else if (isseqkaij) {
60     Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
61 
62     *B = b->AIJ;
63   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix passed in is not of type KAIJ");
64   PetscFunctionReturn(0);
65 }
66 
67 /*@C
68    MatKAIJGetS - Get the S matrix describing the shift action of the KAIJ matrix
69 
70    Not Collective; the entire S is stored and returned independently on all processes.
71 
72    Input Parameter:
73 .  A - the KAIJ matrix
74 
75    Output Parameters:
76 +  m - the number of rows in S
77 .  n - the number of columns in S
78 -  S - the S matrix, in form of a scalar array in column-major format
79 
80    Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)
81 
82    Level: advanced
83 
84 .seealso: MatCreateKAIJ(), MatGetBlockSizes()
85 @*/
86 PetscErrorCode MatKAIJGetS(Mat A,PetscInt *m,PetscInt *n,PetscScalar **S)
87 {
88   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
89   PetscFunctionBegin;
90   if (m) *m = b->p;
91   if (n) *n = b->q;
92   if (S) *S = b->S;
93   PetscFunctionReturn(0);
94 }
95 
96 /*@C
97    MatKAIJGetSRead - Get a read-only pointer to the S matrix describing the shift action of the KAIJ matrix
98 
99    Not Collective; the entire S is stored and returned independently on all processes.
100 
101    Input Parameter:
102 .  A - the KAIJ matrix
103 
104    Output Parameters:
105 +  m - the number of rows in S
106 .  n - the number of columns in S
107 -  S - the S matrix, in form of a scalar array in column-major format
108 
109    Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)
110 
111    Level: advanced
112 
113 .seealso: MatCreateKAIJ(), MatGetBlockSizes()
114 @*/
115 PetscErrorCode MatKAIJGetSRead(Mat A,PetscInt *m,PetscInt *n,const PetscScalar **S)
116 {
117   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
118   PetscFunctionBegin;
119   if (m) *m = b->p;
120   if (n) *n = b->q;
121   if (S) *S = b->S;
122   PetscFunctionReturn(0);
123 }
124 
125 /*@C
126   MatKAIJRestoreS - Restore array obtained with MatKAIJGetS()
127 
128   Not collective
129 
130   Input Parameter:
131 . A - the KAIJ matrix
132 
133   Output Parameter:
134 . S - location of pointer to array obtained with MatKAIJGetS()
135 
136   Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
137   If NULL is passed, it will not attempt to zero the array pointer.
138 
139   Level: advanced
140 .seealso: MatKAIJGetS(), MatKAIJGetSRead(), MatKAIJRestoreSRead()
141 @*/
142 PetscErrorCode MatKAIJRestoreS(Mat A,PetscScalar **S)
143 {
144   PetscFunctionBegin;
145   if (S) *S = NULL;
146   PetscObjectStateIncrease((PetscObject)A);
147   PetscFunctionReturn(0);
148 }
149 
150 /*@C
151   MatKAIJRestoreSRead - Restore array obtained with MatKAIJGetSRead()
152 
153   Not collective
154 
155   Input Parameter:
156 . A - the KAIJ matrix
157 
158   Output Parameter:
159 . S - location of pointer to array obtained with MatKAIJGetS()
160 
161   Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
162   If NULL is passed, it will not attempt to zero the array pointer.
163 
164   Level: advanced
165 .seealso: MatKAIJGetS(), MatKAIJGetSRead(), MatKAIJRestoreSRead()
166 @*/
167 PetscErrorCode MatKAIJRestoreSRead(Mat A,const PetscScalar **S)
168 {
169   PetscFunctionBegin;
170   if (S) *S = NULL;
171   PetscFunctionReturn(0);
172 }
173 
174 /*@C
175    MatKAIJGetT - Get the transformation matrix T associated with the KAIJ matrix
176 
177    Not Collective; the entire T is stored and returned independently on all processes
178 
179    Input Parameter:
180 .  A - the KAIJ matrix
181 
182    Output Parameter:
183 +  m - the number of rows in T
184 .  n - the number of columns in T
185 -  T - the T matrix, in form of a scalar array in column-major format
186 
187    Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)
188 
189    Level: advanced
190 
191 .seealso: MatCreateKAIJ(), MatGetBlockSizes()
192 @*/
193 PetscErrorCode MatKAIJGetT(Mat A,PetscInt *m,PetscInt *n,PetscScalar **T)
194 {
195   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
196   PetscFunctionBegin;
197   if (m) *m = b->p;
198   if (n) *n = b->q;
199   if (T) *T = b->T;
200   PetscFunctionReturn(0);
201 }
202 
203 /*@C
204    MatKAIJGetTRead - Get a read-only pointer to the transformation matrix T associated with the KAIJ matrix
205 
206    Not Collective; the entire T is stored and returned independently on all processes
207 
208    Input Parameter:
209 .  A - the KAIJ matrix
210 
211    Output Parameter:
212 +  m - the number of rows in T
213 .  n - the number of columns in T
214 -  T - the T matrix, in form of a scalar array in column-major format
215 
216    Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)
217 
218    Level: advanced
219 
220 .seealso: MatCreateKAIJ(), MatGetBlockSizes()
221 @*/
222 PetscErrorCode MatKAIJGetTRead(Mat A,PetscInt *m,PetscInt *n,const PetscScalar **T)
223 {
224   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
225   PetscFunctionBegin;
226   if (m) *m = b->p;
227   if (n) *n = b->q;
228   if (T) *T = b->T;
229   PetscFunctionReturn(0);
230 }
231 
232 /*@C
233   MatKAIJRestoreT - Restore array obtained with MatKAIJGetT()
234 
235   Not collective
236 
237   Input Parameter:
238 . A - the KAIJ matrix
239 
240   Output Parameter:
241 . T - location of pointer to array obtained with MatKAIJGetS()
242 
243   Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
244   If NULL is passed, it will not attempt to zero the array pointer.
245 
246   Level: advanced
247 .seealso: MatKAIJGetT(), MatKAIJGetTRead(), MatKAIJRestoreTRead()
248 @*/
249 PetscErrorCode MatKAIJRestoreT(Mat A,PetscScalar **T)
250 {
251   PetscFunctionBegin;
252   if (T) *T = NULL;
253   PetscObjectStateIncrease((PetscObject)A);
254   PetscFunctionReturn(0);
255 }
256 
257 /*@C
258   MatKAIJRestoreTRead - Restore array obtained with MatKAIJGetTRead()
259 
260   Not collective
261 
262   Input Parameter:
263 . A - the KAIJ matrix
264 
265   Output Parameter:
266 . T - location of pointer to array obtained with MatKAIJGetS()
267 
268   Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
269   If NULL is passed, it will not attempt to zero the array pointer.
270 
271   Level: advanced
272 .seealso: MatKAIJGetT(), MatKAIJGetTRead(), MatKAIJRestoreTRead()
273 @*/
274 PetscErrorCode MatKAIJRestoreTRead(Mat A,const PetscScalar **T)
275 {
276   PetscFunctionBegin;
277   if (T) *T = NULL;
278   PetscFunctionReturn(0);
279 }
280 
281 /*@
282    MatKAIJSetAIJ - Set the AIJ matrix describing the blockwise action of the KAIJ matrix
283 
284    Logically Collective; if the AIJ matrix is parallel, the KAIJ matrix is also parallel
285 
286    Input Parameters:
287 +  A - the KAIJ matrix
288 -  B - the AIJ matrix
289 
290    Notes:
291    This function increases the reference count on the AIJ matrix, so the user is free to destroy the matrix if it is not needed.
292    Changes to the entries of the AIJ matrix will immediately affect the KAIJ matrix.
293 
294    Level: advanced
295 
296 .seealso: MatKAIJGetAIJ(), MatKAIJSetS(), MatKAIJSetT()
297 @*/
298 PetscErrorCode MatKAIJSetAIJ(Mat A,Mat B)
299 {
300   PetscErrorCode ierr;
301   PetscMPIInt    size;
302 
303   PetscFunctionBegin;
304   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
305   if (size == 1) {
306     Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data;
307     a->AIJ = B;
308   } else {
309     Mat_MPIKAIJ *a = (Mat_MPIKAIJ*)A->data;
310     a->A = B;
311   }
312   ierr = PetscObjectReference((PetscObject)B);CHKERRQ(ierr);
313   PetscFunctionReturn(0);
314 }
315 
316 /*@C
317    MatKAIJSetS - Set the S matrix describing the shift action of the KAIJ matrix
318 
319    Logically Collective; the entire S is stored independently on all processes.
320 
321    Input Parameters:
322 +  A - the KAIJ matrix
323 .  p - the number of rows in S
324 .  q - the number of columns in S
325 -  S - the S matrix, in form of a scalar array in column-major format
326 
327    Notes: The dimensions p and q must match those of the transformation matrix T associated with the KAIJ matrix.
328    The S matrix is copied, so the user can destroy this array.
329 
330    Level: Advanced
331 
332 .seealso: MatKAIJGetS(), MatKAIJSetT(), MatKAIJSetAIJ()
333 @*/
334 PetscErrorCode MatKAIJSetS(Mat A,PetscInt p,PetscInt q,const PetscScalar S[])
335 {
336   PetscErrorCode ierr;
337   Mat_SeqKAIJ    *a = (Mat_SeqKAIJ*)A->data;
338 
339   PetscFunctionBegin;
340   ierr = PetscFree(a->S);CHKERRQ(ierr);
341   if (S) {
342     ierr = PetscMalloc1(p*q*sizeof(PetscScalar),&a->S);CHKERRQ(ierr);
343     ierr = PetscMemcpy(a->S,S,p*q*sizeof(PetscScalar));CHKERRQ(ierr);
344   } else  a->S = NULL;
345 
346   a->p = p;
347   a->q = q;
348   PetscFunctionReturn(0);
349 }
350 
351 /*@C
352    MatKAIJSetT - Set the transformation matrix T associated with the KAIJ matrix
353 
354    Logically Collective; the entire T is stored independently on all processes.
355 
356    Input Parameters:
357 +  A - the KAIJ matrix
358 .  p - the number of rows in S
359 .  q - the number of columns in S
360 -  T - the T matrix, in form of a scalar array in column-major format
361 
362    Notes: The dimensions p and q must match those of the shift matrix S associated with the KAIJ matrix.
363    The T matrix is copied, so the user can destroy this array.
364 
365    Level: Advanced
366 
367 .seealso: MatKAIJGetT(), MatKAIJSetS(), MatKAIJSetAIJ()
368 @*/
369 PetscErrorCode MatKAIJSetT(Mat A,PetscInt p,PetscInt q,const PetscScalar T[])
370 {
371   PetscErrorCode ierr;
372   PetscInt       i,j;
373   Mat_SeqKAIJ    *a = (Mat_SeqKAIJ*)A->data;
374   PetscBool      isTI = PETSC_FALSE;
375 
376   PetscFunctionBegin;
377   /* check if T is an identity matrix */
378   if (T && (p == q)) {
379     isTI = PETSC_TRUE;
380     for (i=0; i<p; i++) {
381       for (j=0; j<q; j++) {
382         if (i == j) {
383           /* diagonal term must be 1 */
384           if (T[i+j*p] != 1.0) isTI = PETSC_FALSE;
385         } else {
386           /* off-diagonal term must be 0 */
387           if (T[i+j*p] != 0.0) isTI = PETSC_FALSE;
388         }
389       }
390     }
391   }
392   a->isTI = isTI;
393 
394   ierr = PetscFree(a->T);CHKERRQ(ierr);
395   if (T && (!isTI)) {
396     ierr = PetscMalloc1(p*q*sizeof(PetscScalar),&a->T);CHKERRQ(ierr);
397     ierr = PetscMemcpy(a->T,T,p*q*sizeof(PetscScalar));CHKERRQ(ierr);
398   } else a->T = NULL;
399 
400   a->p = p;
401   a->q = q;
402   PetscFunctionReturn(0);
403 }
404 
405 PetscErrorCode MatDestroy_SeqKAIJ(Mat A)
406 {
407   PetscErrorCode ierr;
408   Mat_SeqKAIJ    *b = (Mat_SeqKAIJ*)A->data;
409 
410   PetscFunctionBegin;
411   ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr);
412   ierr = PetscFree(b->S);CHKERRQ(ierr);
413   ierr = PetscFree(b->T);CHKERRQ(ierr);
414   ierr = PetscFree(b->ibdiag);CHKERRQ(ierr);
415   ierr = PetscFree5(b->sor.w,b->sor.y,b->sor.work,b->sor.t,b->sor.arr);CHKERRQ(ierr);
416   ierr = PetscFree(A->data);CHKERRQ(ierr);
417   PetscFunctionReturn(0);
418 }
419 
420 PetscErrorCode MatSetUp_KAIJ(Mat A)
421 {
422   PetscErrorCode ierr;
423   PetscInt       n;
424   PetscMPIInt    size;
425   Mat_SeqKAIJ    *seqkaij = (Mat_SeqKAIJ*)A->data;
426 
427   PetscFunctionBegin;
428   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
429   if (size == 1) {
430     ierr = MatSetSizes(A,seqkaij->p*seqkaij->AIJ->rmap->n,seqkaij->q*seqkaij->AIJ->cmap->n,seqkaij->p*seqkaij->AIJ->rmap->N,seqkaij->q*seqkaij->AIJ->cmap->N);CHKERRQ(ierr);
431     ierr = PetscLayoutSetBlockSize(A->rmap,seqkaij->p);CHKERRQ(ierr);
432     ierr = PetscLayoutSetBlockSize(A->cmap,seqkaij->q);CHKERRQ(ierr);
433     ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
434     ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
435   } else {
436     Mat_MPIKAIJ *a;
437     Mat_MPIAIJ  *mpiaij;
438     IS          from,to;
439     Vec         gvec;
440     PetscScalar *T;
441     PetscInt    i,j;
442 
443     a = (Mat_MPIKAIJ*)A->data;
444     mpiaij = (Mat_MPIAIJ*)a->A->data;
445     ierr = MatSetSizes(A,a->p*a->A->rmap->n,a->q*a->A->cmap->n,a->p*a->A->rmap->N,a->q*a->A->cmap->N);CHKERRQ(ierr);
446     ierr = PetscLayoutSetBlockSize(A->rmap,seqkaij->p);CHKERRQ(ierr);
447     ierr = PetscLayoutSetBlockSize(A->cmap,seqkaij->q);CHKERRQ(ierr);
448     ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
449     ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
450 
451     if (a->isTI) {
452       /* If the transformation matrix associated with the parallel matrix A is the identity matrix, then a->T will be NULL.
453        * In this case, if we pass a->T directly to the MatCreateKAIJ() calls to create the sequential submatrices, the routine will
454        * not be able to tell that transformation matrix should be set to the identity; thus we create a temporary identity matrix
455        * to pass in. */
456       ierr = PetscMalloc1(a->p*a->q*sizeof(PetscScalar),&T);CHKERRQ(ierr);
457       for (i=0; i<a->p; i++) {
458         for (j=0; j<a->q; j++) {
459           if (i==j) T[i+j*a->p] = 1.0;
460           else      T[i+j*a->p] = 0.0;
461         }
462       }
463     } else T = a->T;
464     ierr = MatCreateKAIJ(mpiaij->A,a->p,a->q,a->S,T,&a->AIJ);CHKERRQ(ierr);
465     ierr = MatCreateKAIJ(mpiaij->B,a->p,a->q,NULL,T,&a->OAIJ);CHKERRQ(ierr);
466     ierr = PetscFree(T);CHKERRQ(ierr);
467 
468     ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr);
469     ierr = VecCreate(PETSC_COMM_SELF,&a->w);CHKERRQ(ierr);
470     ierr = VecSetSizes(a->w,n*a->q,n*a->q);CHKERRQ(ierr);
471     ierr = VecSetBlockSize(a->w,a->q);CHKERRQ(ierr);
472     ierr = VecSetType(a->w,VECSEQ);CHKERRQ(ierr);
473 
474     /* create two temporary Index sets for build scatter gather */
475     ierr = ISCreateBlock(PetscObjectComm((PetscObject)a->A),a->q,n,mpiaij->garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr);
476     ierr = ISCreateStride(PETSC_COMM_SELF,n*a->q,0,1,&to);CHKERRQ(ierr);
477 
478     /* create temporary global vector to generate scatter context */
479     ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)a->A),a->q,a->q*a->A->cmap->n,a->q*a->A->cmap->N,NULL,&gvec);CHKERRQ(ierr);
480 
481     /* generate the scatter context */
482     ierr = VecScatterCreate(gvec,from,a->w,to,&a->ctx);CHKERRQ(ierr);
483 
484     ierr = ISDestroy(&from);CHKERRQ(ierr);
485     ierr = ISDestroy(&to);CHKERRQ(ierr);
486     ierr = VecDestroy(&gvec);CHKERRQ(ierr);
487   }
488 
489   A->assembled = PETSC_TRUE;
490   PetscFunctionReturn(0);
491 }
492 
493 PetscErrorCode MatView_SeqKAIJ(Mat A,PetscViewer viewer)
494 {
495   PetscErrorCode ierr;
496   Mat            B;
497 
498   PetscFunctionBegin;
499   ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
500   ierr = MatView(B,viewer);CHKERRQ(ierr);
501   ierr = MatDestroy(&B);CHKERRQ(ierr);
502   PetscFunctionReturn(0);
503 }
504 
505 PetscErrorCode MatView_MPIKAIJ(Mat A,PetscViewer viewer)
506 {
507   PetscErrorCode ierr;
508   Mat            B;
509 
510   PetscFunctionBegin;
511   ierr = MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
512   ierr = MatView(B,viewer);CHKERRQ(ierr);
513   ierr = MatDestroy(&B);CHKERRQ(ierr);
514   PetscFunctionReturn(0);
515 }
516 
517 PetscErrorCode MatDestroy_MPIKAIJ(Mat A)
518 {
519   PetscErrorCode ierr;
520   Mat_MPIKAIJ    *b = (Mat_MPIKAIJ*)A->data;
521 
522   PetscFunctionBegin;
523   ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr);
524   ierr = MatDestroy(&b->OAIJ);CHKERRQ(ierr);
525   ierr = MatDestroy(&b->A);CHKERRQ(ierr);
526   ierr = VecScatterDestroy(&b->ctx);CHKERRQ(ierr);
527   ierr = VecDestroy(&b->w);CHKERRQ(ierr);
528   ierr = PetscFree(b->S);CHKERRQ(ierr);
529   ierr = PetscFree(b->T);CHKERRQ(ierr);
530   ierr = PetscFree(b->ibdiag);CHKERRQ(ierr);
531   ierr = PetscFree(A->data);CHKERRQ(ierr);
532   PetscFunctionReturn(0);
533 }
534 
535 /* --------------------------------------------------------------------------------------*/
536 
537 /* zz = yy + Axx */
538 PetscErrorCode MatMultAdd_SeqKAIJ(Mat A,Vec xx,Vec yy,Vec zz)
539 {
540   Mat_SeqKAIJ       *b = (Mat_SeqKAIJ*)A->data;
541   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
542   const PetscScalar *s = b->S, *t = b->T;
543   const PetscScalar *x,*v,*bx;
544   PetscScalar       *y,*sums;
545   PetscErrorCode    ierr;
546   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
547   PetscInt          n,i,jrow,j,l,p=b->p,q=b->q,k;
548 
549   PetscFunctionBegin;
550   if (!yy) {
551     ierr = VecSet(zz,0.0);CHKERRQ(ierr);
552   } else {
553     ierr = VecCopy(yy,zz);CHKERRQ(ierr);
554   }
555   if ((!s) && (!t) && (!b->isTI)) PetscFunctionReturn(0);
556 
557   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
558   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
559   idx  = a->j;
560   v    = a->a;
561   ii   = a->i;
562 
563   if (b->isTI) {
564     for (i=0; i<m; i++) {
565       jrow = ii[i];
566       n    = ii[i+1] - jrow;
567       sums = y + p*i;
568       for (j=0; j<n; j++) {
569         for (k=0; k<p; k++) {
570           sums[k] += v[jrow+j]*x[q*idx[jrow+j]+k];
571         }
572       }
573     }
574   } else if (t) {
575     for (i=0; i<m; i++) {
576       jrow = ii[i];
577       n    = ii[i+1] - jrow;
578       sums = y + p*i;
579       bx   = x + q*i;
580       for (j=0; j<n; j++) {
581         for (k=0; k<p; k++) {
582           for (l=0; l<q; l++) {
583             sums[k] += v[jrow+j]*t[k+l*p]*x[q*idx[jrow+j]+l];
584           }
585         }
586       }
587     }
588   }
589   if (s) {
590     for (i=0; i<m; i++) {
591       jrow = ii[i];
592       n    = ii[i+1] - jrow;
593       sums = y + p*i;
594       bx   = x + q*i;
595       if (i < b->AIJ->cmap->n) {
596         for (j=0; j<q; j++) {
597           for (k=0; k<p; k++) {
598             sums[k] += s[k+j*p]*bx[j];
599           }
600         }
601       }
602     }
603   }
604 
605   ierr = PetscLogFlops((2.0*p*q-p)*m+2*p*a->nz);CHKERRQ(ierr);
606   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
607   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
608   PetscFunctionReturn(0);
609 }
610 
611 PetscErrorCode MatMult_SeqKAIJ_N(Mat A,Vec xx,Vec yy)
612 {
613   PetscErrorCode ierr;
614   PetscFunctionBegin;
615   ierr = MatMultAdd_SeqKAIJ(A,xx,PETSC_NULL,yy);CHKERRQ(ierr);
616   PetscFunctionReturn(0);
617 }
618 
619 PetscErrorCode MatMultAdd_SeqKAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
620 {
621   PetscErrorCode ierr;
622   PetscFunctionBegin;
623   ierr = MatMultAdd_SeqKAIJ(A,xx,yy,zz);CHKERRQ(ierr);
624   PetscFunctionReturn(0);
625 }
626 
627 #include <petsc/private/kernels/blockinvert.h>
628 
629 PetscErrorCode MatInvertBlockDiagonal_SeqKAIJ_N(Mat A,const PetscScalar **values)
630 {
631   Mat_SeqKAIJ       *b  = (Mat_SeqKAIJ*)A->data;
632   Mat_SeqAIJ        *a  = (Mat_SeqAIJ*)b->AIJ->data;
633   const PetscScalar *S  = b->S;
634   const PetscScalar *T  = b->T;
635   const PetscScalar *v  = a->a;
636   const PetscInt     p  = b->p, q = b->q, m = b->AIJ->rmap->n, *idx = a->j, *ii = a->i;
637   PetscErrorCode    ierr;
638   PetscInt          i,j,*v_pivots,dof,dof2;
639   PetscScalar       *diag,aval,*v_work;
640 
641   PetscFunctionBegin;
642   if (p != q) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Block size must be square to calculate inverse.");
643   if ((!S) && (!T) && (!b->isTI)) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Cannot invert a zero matrix.");
644 
645   dof  = p;
646   dof2 = dof*dof;
647 
648   if (b->ibdiagvalid) {
649     if (values) *values = b->ibdiag;
650     PetscFunctionReturn(0);
651   }
652   if (!b->ibdiag) {
653     ierr = PetscMalloc1(dof2*m*sizeof(PetscScalar),&b->ibdiag);CHKERRQ(ierr);
654     ierr = PetscLogObjectMemory((PetscObject)A,dof2*m*sizeof(PetscScalar));CHKERRQ(ierr);
655   }
656   if (values) *values = b->ibdiag;
657   diag = b->ibdiag;
658 
659   ierr = PetscMalloc2(dof,&v_work,dof,&v_pivots);CHKERRQ(ierr);
660   for (i=0; i<m; i++) {
661     if (S) {
662       ierr = PetscMemcpy(diag,S,dof2*sizeof(PetscScalar));CHKERRQ(ierr);
663     } else {
664       ierr = PetscMemzero(diag,dof2*sizeof(PetscScalar));CHKERRQ(ierr);
665     }
666     if (b->isTI) {
667       aval = 0;
668       for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j];
669       for (j=0; j<dof; j++) diag[j+dof*j] += aval;
670     } else if (T) {
671       aval = 0;
672       for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j];
673       for (j=0; j<dof2; j++) diag[j] += aval*T[j];
674     }
675     ierr = PetscKernel_A_gets_inverse_A(dof,diag,v_pivots,v_work,PETSC_FALSE,NULL);CHKERRQ(ierr);
676     diag += dof2;
677   }
678   ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr);
679 
680   b->ibdiagvalid = PETSC_TRUE;
681   PetscFunctionReturn(0);
682 }
683 
684 static PetscErrorCode MatGetDiagonalBlock_MPIKAIJ(Mat A,Mat *B)
685 {
686   Mat_MPIKAIJ *kaij = (Mat_MPIKAIJ*) A->data;
687 
688   PetscFunctionBegin;
689   *B = kaij->AIJ;
690   PetscFunctionReturn(0);
691 }
692 
693 PetscErrorCode MatSOR_SeqKAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
694 {
695   PetscErrorCode    ierr;
696   Mat_SeqKAIJ       *kaij = (Mat_SeqKAIJ*) A->data;
697   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)kaij->AIJ->data;
698   const PetscScalar *aa = a->a, *T = kaij->T, *v;
699   const PetscInt    m  = kaij->AIJ->rmap->n, *ai=a->i, *aj=a->j, p = kaij->p, q = kaij->q, *diag, *vi;
700   const PetscScalar *b, *xb, *idiag;
701   PetscScalar       *x, *work, *workt, *w, *y, *arr, *t, *arrt;
702   PetscInt          i, j, k, i2, bs, bs2, nz;
703 
704   PetscFunctionBegin;
705   its = its*lits;
706   if (flag & SOR_EISENSTAT) SETERRQ (PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
707   if (its <= 0)             SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
708   if (fshift)               SETERRQ (PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
709   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
710   if (p != q) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSOR for KAIJ: Sorry, no support for non-square dense blocks");
711   else        {bs = p; bs2 = bs*bs; }
712 
713   if (!m) PetscFunctionReturn(0);
714 
715   if (!kaij->ibdiagvalid) { ierr = MatInvertBlockDiagonal_SeqKAIJ_N(A,NULL);CHKERRQ(ierr); }
716   idiag = kaij->ibdiag;
717   diag  = a->diag;
718 
719   if (!kaij->sor.setup) {
720     ierr = PetscMalloc5(bs,&kaij->sor.w,bs,&kaij->sor.y,m*bs,&kaij->sor.work,m*bs,&kaij->sor.t,m*bs2,&kaij->sor.arr);CHKERRQ(ierr);
721     kaij->sor.setup = PETSC_TRUE;
722   }
723   y     = kaij->sor.y;
724   w     = kaij->sor.w;
725   work  = kaij->sor.work;
726   t     = kaij->sor.t;
727   arr   = kaij->sor.arr;
728 
729   ierr = VecGetArray(xx,&x);    CHKERRQ(ierr);
730   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
731 
732   if (flag & SOR_ZERO_INITIAL_GUESS) {
733     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
734       PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);                            /* x[0:bs] <- D^{-1} b[0:bs] */
735       ierr   =  PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr);
736       i2     =  bs;
737       idiag  += bs2;
738       for (i=1; i<m; i++) {
739         v  = aa + ai[i];
740         vi = aj + ai[i];
741         nz = diag[i] - ai[i];
742 
743         if (T) {                /* b - T (Arow * x) */
744           for (k=0; k<bs; k++) w[k] = 0;
745           for (j=0; j<nz; j++) {
746             for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k];
747           }
748           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs,w,T,&t[i2]);
749           for (k=0; k<bs; k++) t[i2+k] += b[i2+k];
750         } else if (kaij->isTI) {
751           for (k=0; k<bs; k++) t[i2+k] = b[i2+k];
752           for (j=0; j<nz; j++) {
753             for (k=0; k<bs; k++) t[i2+k] -= v[j] * x[vi[j]*bs+k];
754           }
755         } else {
756           for (k=0; k<bs; k++) t[i2+k] = b[i2+k];
757         }
758 
759         PetscKernel_w_gets_Ar_times_v(bs,bs,t+i2,idiag,y);
760         for (j=0; j<bs; j++) x[i2+j] = omega * y[j];
761 
762         idiag += bs2;
763         i2    += bs;
764       }
765       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
766       ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr);
767       xb = t;
768     } else xb = b;
769     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
770       idiag = kaij->ibdiag+bs2*(m-1);
771       i2    = bs * (m-1);
772       ierr  = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
773       PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
774       i2    -= bs;
775       idiag -= bs2;
776       for (i=m-2; i>=0; i--) {
777         v  = aa + diag[i] + 1 ;
778         vi = aj + diag[i] + 1;
779         nz = ai[i+1] - diag[i] - 1;
780 
781         if (T) {                /* FIXME: This branch untested */
782           ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
783           /* copy all rows of x that are needed into contiguous space */
784           workt = work;
785           for (j=0; j<nz; j++) {
786             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
787             workt += bs;
788           }
789           arrt = arr;
790           for (j=0; j<nz; j++) {
791             ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
792             for (k=0; k<bs2; k++) arrt[k] *= v[j];
793             arrt += bs2;
794           }
795           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
796         } else if (kaij->isTI) {
797           for (k=0; k<bs; k++) w[k] = t[i2+k];
798           for (j=0; j<nz; j++) {
799             for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k];
800           }
801         }
802 
803         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); /* RHS incorrect for omega != 1.0 */
804         for (j=0; j<bs; j++) x[i2+j] = (1.0-omega) * x[i2+j] + omega * y[j];
805 
806         idiag -= bs2;
807         i2    -= bs;
808       }
809       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
810     }
811     its--;
812   }
813   while (its--) {               /* FIXME: This branch not updated */
814     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
815       i2     =  0;
816       idiag  = kaij->ibdiag;
817       for (i=0; i<m; i++) {
818         ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
819 
820         v  = aa + ai[i];
821         vi = aj + ai[i];
822         nz = diag[i] - ai[i];
823         workt = work;
824         for (j=0; j<nz; j++) {
825           ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
826           workt += bs;
827         }
828         arrt = arr;
829         if (T) {
830           for (j=0; j<nz; j++) {
831             ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
832             for (k=0; k<bs2; k++) arrt[k] *= v[j];
833             arrt += bs2;
834           }
835           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
836         } else if (kaij->isTI) {
837           for (j=0; j<nz; j++) {
838             ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
839             for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
840             arrt += bs2;
841           }
842           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
843         }
844         ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr);
845 
846         v  = aa + diag[i] + 1;
847         vi = aj + diag[i] + 1;
848         nz = ai[i+1] - diag[i] - 1;
849         workt = work;
850         for (j=0; j<nz; j++) {
851           ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
852           workt += bs;
853         }
854         arrt = arr;
855         if (T) {
856           for (j=0; j<nz; j++) {
857             ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
858             for (k=0; k<bs2; k++) arrt[k] *= v[j];
859             arrt += bs2;
860           }
861           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
862         } else if (kaij->isTI) {
863           for (j=0; j<nz; j++) {
864             ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
865             for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
866             arrt += bs2;
867           }
868           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
869         }
870 
871         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y);
872         for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j);
873 
874         idiag += bs2;
875         i2    += bs;
876       }
877       xb = t;
878     }
879     else xb = b;
880     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
881       idiag = kaij->ibdiag+bs2*(m-1);
882       i2    = bs * (m-1);
883       if (xb == b) {
884         for (i=m-1; i>=0; i--) {
885           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
886 
887           v  = aa + ai[i];
888           vi = aj + ai[i];
889           nz = diag[i] - ai[i];
890           workt = work;
891           for (j=0; j<nz; j++) {
892             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
893             workt += bs;
894           }
895           arrt = arr;
896           if (T) {
897             for (j=0; j<nz; j++) {
898               ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
899               for (k=0; k<bs2; k++) arrt[k] *= v[j];
900               arrt += bs2;
901             }
902             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
903           } else if (kaij->isTI) {
904             for (j=0; j<nz; j++) {
905               ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
906               for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
907               arrt += bs2;
908             }
909             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
910           }
911 
912           v  = aa + diag[i] + 1;
913           vi = aj + diag[i] + 1;
914           nz = ai[i+1] - diag[i] - 1;
915           workt = work;
916           for (j=0; j<nz; j++) {
917             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
918             workt += bs;
919           }
920           arrt = arr;
921           if (T) {
922             for (j=0; j<nz; j++) {
923               ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
924               for (k=0; k<bs2; k++) arrt[k] *= v[j];
925               arrt += bs2;
926             }
927             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
928           } else if (kaij->isTI) {
929             for (j=0; j<nz; j++) {
930               ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
931               for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
932               arrt += bs2;
933             }
934             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
935           }
936 
937           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y);
938           for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j);
939         }
940       } else {
941         for (i=m-1; i>=0; i--) {
942           ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
943           v  = aa + diag[i] + 1;
944           vi = aj + diag[i] + 1;
945           nz = ai[i+1] - diag[i] - 1;
946           workt = work;
947           for (j=0; j<nz; j++) {
948             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
949             workt += bs;
950           }
951           arrt = arr;
952           if (T) {
953             for (j=0; j<nz; j++) {
954               ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
955               for (k=0; k<bs2; k++) arrt[k] *= v[j];
956               arrt += bs2;
957             }
958             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
959           } else if (kaij->isTI) {
960             for (j=0; j<nz; j++) {
961               ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
962               for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
963               arrt += bs2;
964             }
965             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
966           }
967           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y);
968           for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j);
969         }
970         idiag -= bs2;
971         i2    -= bs;
972       }
973       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
974     }
975   }
976 
977   ierr = VecRestoreArray(xx,&x);    CHKERRQ(ierr);
978   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
979   PetscFunctionReturn(0);
980 }
981 
982 /*===================================================================================*/
983 
984 PetscErrorCode MatMultAdd_MPIKAIJ(Mat A,Vec xx,Vec yy,Vec zz)
985 {
986   Mat_MPIKAIJ    *b = (Mat_MPIKAIJ*)A->data;
987   PetscErrorCode ierr;
988 
989   PetscFunctionBegin;
990   if (!yy) {
991     ierr = VecSet(zz,0.0);CHKERRQ(ierr);
992   } else {
993     ierr = VecCopy(yy,zz);CHKERRQ(ierr);
994   }
995   /* start the scatter */
996   ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
997   ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,zz,zz);CHKERRQ(ierr);
998   ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
999   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr);
1000   PetscFunctionReturn(0);
1001 }
1002 
1003 PetscErrorCode MatMult_MPIKAIJ_dof(Mat A,Vec xx,Vec yy)
1004 {
1005   PetscErrorCode ierr;
1006   PetscFunctionBegin;
1007   ierr = MatMultAdd_MPIKAIJ(A,xx,PETSC_NULL,yy);CHKERRQ(ierr);
1008   PetscFunctionReturn(0);
1009 }
1010 
1011 PetscErrorCode MatMultAdd_MPIKAIJ_dof(Mat A,Vec xx,Vec yy, Vec zz)
1012 {
1013   PetscErrorCode ierr;
1014   PetscFunctionBegin;
1015   ierr = MatMultAdd_MPIKAIJ(A,xx,yy,zz);CHKERRQ(ierr);
1016   PetscFunctionReturn(0);
1017 }
1018 
1019 PetscErrorCode MatInvertBlockDiagonal_MPIKAIJ_dof(Mat A,const PetscScalar **values)
1020 {
1021   Mat_MPIKAIJ     *b = (Mat_MPIKAIJ*)A->data;
1022   PetscErrorCode  ierr;
1023 
1024   PetscFunctionBegin;
1025   ierr = (*b->AIJ->ops->invertblockdiagonal)(b->AIJ,values);CHKERRQ(ierr);
1026   PetscFunctionReturn(0);
1027 }
1028 
1029 /* ----------------------------------------------------------------*/
1030 
1031 PetscErrorCode MatGetRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values)
1032 {
1033   Mat_SeqKAIJ     *b   = (Mat_SeqKAIJ*) A->data;
1034   PetscErrorCode  diag = PETSC_FALSE;
1035   PetscErrorCode  ierr;
1036   PetscInt        nzaij,nz,*colsaij,*idx,i,j,p=b->p,q=b->q,r=row/p,s=row%p,c;
1037   PetscScalar     *vaij,*v,*S=b->S,*T=b->T;
1038 
1039   PetscFunctionBegin;
1040   if (b->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1041   b->getrowactive = PETSC_TRUE;
1042   if (row < 0 || row >= A->rmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range",row);
1043 
1044   if ((!S) && (!T) && (!b->isTI)) {
1045     if (ncols)    *ncols  = 0;
1046     if (cols)     *cols   = NULL;
1047     if (values)   *values = NULL;
1048     PetscFunctionReturn(0);
1049   }
1050 
1051   if (T || b->isTI) {
1052     ierr  = MatGetRow_SeqAIJ(b->AIJ,r,&nzaij,&colsaij,&vaij);CHKERRQ(ierr);
1053     c     = nzaij;
1054     for (i=0; i<nzaij; i++) {
1055       /* check if this row contains a diagonal entry */
1056       if (colsaij[i] == r) {
1057         diag = PETSC_TRUE;
1058         c = i;
1059       }
1060     }
1061   } else nzaij = c = 0;
1062 
1063   /* calculate size of row */
1064   nz = 0;
1065   if (S)            nz += q;
1066   if (T || b->isTI) nz += (diag && S ? (nzaij-1)*q : nzaij*q);
1067 
1068   if (cols || values) {
1069     ierr = PetscMalloc2(nz,&idx,nz,&v);CHKERRQ(ierr);
1070     for (i=0; i<q; i++) {
1071       /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */
1072       v[i] = 0.0;
1073     }
1074     if (b->isTI) {
1075       for (i=0; i<nzaij; i++) {
1076         for (j=0; j<q; j++) {
1077           idx[i*q+j] = colsaij[i]*q+j;
1078           v[i*q+j]   = (j==s ? vaij[i] : 0);
1079         }
1080       }
1081     } else if (T) {
1082       for (i=0; i<nzaij; i++) {
1083         for (j=0; j<q; j++) {
1084           idx[i*q+j] = colsaij[i]*q+j;
1085           v[i*q+j]   = vaij[i]*T[s+j*p];
1086         }
1087       }
1088     }
1089     if (S) {
1090       for (j=0; j<q; j++) {
1091         idx[c*q+j] = r*q+j;
1092         v[c*q+j]  += S[s+j*p];
1093       }
1094     }
1095   }
1096 
1097   if (ncols)    *ncols  = nz;
1098   if (cols)     *cols   = idx;
1099   if (values)   *values = v;
1100   PetscFunctionReturn(0);
1101 }
1102 
1103 PetscErrorCode MatRestoreRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1104 {
1105   PetscErrorCode ierr;
1106   PetscFunctionBegin;
1107   ierr = PetscFree2(*idx,*v);CHKERRQ(ierr);
1108   ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE;
1109   PetscFunctionReturn(0);
1110 }
1111 
1112 PetscErrorCode MatGetRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values)
1113 {
1114   Mat_MPIKAIJ     *b      = (Mat_MPIKAIJ*) A->data;
1115   Mat             MatAIJ  = ((Mat_SeqKAIJ*)b->AIJ->data)->AIJ;
1116   Mat             MatOAIJ = ((Mat_SeqKAIJ*)b->OAIJ->data)->AIJ;
1117   Mat             AIJ     = b->A;
1118   PetscBool       diag    = PETSC_FALSE;
1119   PetscErrorCode  ierr;
1120   const PetscInt  rstart=A->rmap->rstart,rend=A->rmap->rend,p=b->p,q=b->q,*garray;
1121   PetscInt        nz,*idx,ncolsaij,ncolsoaij,*colsaij,*colsoaij,r,s,c,i,j,lrow;
1122   PetscScalar     *v,*vals,*ovals,*S=b->S,*T=b->T;
1123 
1124   PetscFunctionBegin;
1125   if (b->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1126   b->getrowactive = PETSC_TRUE;
1127   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1128   lrow = row - rstart;
1129 
1130   if ((!S) && (!T) && (!b->isTI)) {
1131     if (ncols)    *ncols  = 0;
1132     if (cols)     *cols   = NULL;
1133     if (values)   *values = NULL;
1134     PetscFunctionReturn(0);
1135   }
1136 
1137   r = lrow/p;
1138   s = lrow%p;
1139 
1140   if (T || b->isTI) {
1141     ierr = MatMPIAIJGetSeqAIJ(AIJ,NULL,NULL,&garray);
1142     ierr = MatGetRow_SeqAIJ(MatAIJ,lrow/p,&ncolsaij,&colsaij,&vals);CHKERRQ(ierr);
1143     ierr = MatGetRow_SeqAIJ(MatOAIJ,lrow/p,&ncolsoaij,&colsoaij,&ovals);CHKERRQ(ierr);
1144 
1145     c     = ncolsaij + ncolsoaij;
1146     for (i=0; i<ncolsaij; i++) {
1147       /* check if this row contains a diagonal entry */
1148       if (colsaij[i] == r) {
1149         diag = PETSC_TRUE;
1150         c = i;
1151       }
1152     }
1153   } else c = 0;
1154 
1155   /* calculate size of row */
1156   nz = 0;
1157   if (S)            nz += q;
1158   if (T || b->isTI) nz += (diag && S ? (ncolsaij+ncolsoaij-1)*q : (ncolsaij+ncolsoaij)*q);
1159 
1160   if (cols || values) {
1161     ierr = PetscMalloc2(nz,&idx,nz,&v);CHKERRQ(ierr);
1162     for (i=0; i<q; i++) {
1163       /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */
1164       v[i] = 0.0;
1165     }
1166     if (b->isTI) {
1167       for (i=0; i<ncolsaij; i++) {
1168         for (j=0; j<q; j++) {
1169           idx[i*q+j] = (colsaij[i]+rstart/p)*q+j;
1170           v[i*q+j]   = (j==s ? vals[i] : 0.0);
1171         }
1172       }
1173       for (i=0; i<ncolsoaij; i++) {
1174         for (j=0; j<q; j++) {
1175           idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j;
1176           v[(i+ncolsaij)*q+j]   = (j==s ? ovals[i]: 0.0);
1177         }
1178       }
1179     } else if (T) {
1180       for (i=0; i<ncolsaij; i++) {
1181         for (j=0; j<q; j++) {
1182           idx[i*q+j] = (colsaij[i]+rstart/p)*q+j;
1183           v[i*q+j]   = vals[i]*T[s+j*p];
1184         }
1185       }
1186       for (i=0; i<ncolsoaij; i++) {
1187         for (j=0; j<q; j++) {
1188           idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j;
1189           v[(i+ncolsaij)*q+j]   = ovals[i]*T[s+j*p];
1190         }
1191       }
1192     }
1193     if (S) {
1194       for (j=0; j<q; j++) {
1195         idx[c*q+j] = (r+rstart/p)*q+j;
1196         v[c*q+j]  += S[s+j*p];
1197       }
1198     }
1199   }
1200 
1201   if (ncols)  *ncols  = nz;
1202   if (cols)   *cols   = idx;
1203   if (values) *values = v;
1204   PetscFunctionReturn(0);
1205 }
1206 
1207 PetscErrorCode MatRestoreRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1208 {
1209   PetscErrorCode ierr;
1210   PetscFunctionBegin;
1211   ierr = PetscFree2(*idx,*v);CHKERRQ(ierr);
1212   ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE;
1213   PetscFunctionReturn(0);
1214 }
1215 
1216 PetscErrorCode  MatCreateSubMatrix_KAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat)
1217 {
1218   PetscErrorCode ierr;
1219   Mat            A;
1220 
1221   PetscFunctionBegin;
1222   ierr = MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr);
1223   ierr = MatCreateSubMatrix(A,isrow,iscol,cll,newmat);CHKERRQ(ierr);
1224   ierr = MatDestroy(&A);CHKERRQ(ierr);
1225   PetscFunctionReturn(0);
1226 }
1227 
1228 /* ---------------------------------------------------------------------------------- */
1229 /*@C
1230   MatCreateKAIJ - Creates a matrix type to be used for matrices of the following form:
1231 
1232     [I \otimes S + A \otimes T]
1233 
1234   where
1235     S is a dense (p \times q) matrix
1236     T is a dense (p \times q) matrix
1237     A is an AIJ  (n \times n) matrix
1238     I is the identity matrix
1239   The resulting matrix is (np \times nq)
1240 
1241   The matrix type is based on MATSEQAIJ for a sequential matrix A, and MATMPIAIJ for a distributed matrix A.
1242   S is always stored independently on all processes as a PetscScalar array in column-major format.
1243 
1244   Collective
1245 
1246   Input Parameters:
1247 + A - the AIJ matrix
1248 . S - the S matrix, stored as a PetscScalar array (column-major)
1249 . T - the T matrix, stored as a PetscScalar array (column-major)
1250 . p - number of rows in S and T
1251 - q - number of columns in S and T
1252 
1253   Output Parameter:
1254 . kaij - the new KAIJ matrix
1255 
1256   Operations provided:
1257 + MatMult
1258 . MatMultAdd
1259 . MatInvertBlockDiagonal
1260 - MatView
1261 
1262   Level: advanced
1263 
1264 .seealso: MatKAIJSetAIJ(), MatKAIJSetS(), MatKAIJSetT(), MatKAIJGetAIJ(), MatKAIJGetS(), MatKAIJGetT(), MATKAIJ
1265 @*/
1266 PetscErrorCode  MatCreateKAIJ(Mat A,PetscInt p,PetscInt q,const PetscScalar S[],const PetscScalar T[],Mat *kaij)
1267 {
1268   PetscErrorCode ierr;
1269   PetscMPIInt    size;
1270 
1271   PetscFunctionBegin;
1272   ierr = MatCreate(PetscObjectComm((PetscObject)A),kaij);CHKERRQ(ierr);
1273   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
1274   if (size == 1) {
1275     ierr = MatSetType(*kaij,MATSEQKAIJ);CHKERRQ(ierr);
1276   } else {
1277     ierr = MatSetType(*kaij,MATMPIKAIJ);CHKERRQ(ierr);
1278   }
1279   ierr = MatKAIJSetAIJ(*kaij,A);CHKERRQ(ierr);
1280   ierr = MatKAIJSetS(*kaij,p,q,S);CHKERRQ(ierr);
1281   ierr = MatKAIJSetT(*kaij,p,q,T);CHKERRQ(ierr);
1282   ierr = MatSetUp(*kaij);
1283   PetscFunctionReturn(0);
1284 }
1285 
1286 /*MC
1287   MATKAIJ - MATKAIJ = "kaij" - A matrix type to be used to evaluate matrices of the following form:
1288 
1289     [I \otimes S + A \otimes T]
1290 
1291   where
1292     S is a dense (p \times q) matrix
1293     T is a dense (p \times q) matrix
1294     A is an AIJ  (n \times n) matrix
1295     I is the identity matrix
1296   The resulting matrix is (np \times nq)
1297 
1298   The matrix type is based on MATSEQAIJ for a sequential matrix A, and MATMPIAIJ for a distributed matrix A.
1299   S and T are always stored independently on all processes as a PetscScalar array in column-major format.
1300 
1301   Operations provided:
1302 + MatMult
1303 . MatMultAdd
1304 . MatInvertBlockDiagonal
1305 - MatView
1306 
1307   Level: advanced
1308 
1309 .seealso: MatKAIJSetAIJ(), MatKAIJSetS(), MatKAIJSetT(), MatKAIJGetAIJ(), MatKAIJGetS(), MatKAIJGetT(), MatCreateKAIJ()
1310 M*/
1311 
1312 PETSC_EXTERN PetscErrorCode MatCreate_KAIJ(Mat A)
1313 {
1314   PetscErrorCode ierr;
1315   Mat_MPIKAIJ    *b;
1316   PetscMPIInt    size;
1317 
1318   PetscFunctionBegin;
1319   ierr     = PetscNewLog(A,&b);CHKERRQ(ierr);
1320   A->data  = (void*)b;
1321 
1322   ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1323 
1324   A->ops->setup = MatSetUp_KAIJ;
1325 
1326   b->w    = 0;
1327   ierr    = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
1328   if (size == 1) {
1329     ierr = PetscObjectChangeTypeName((PetscObject)A,MATSEQKAIJ);CHKERRQ(ierr);
1330     A->ops->setup               = MatSetUp_KAIJ;
1331     A->ops->destroy             = MatDestroy_SeqKAIJ;
1332     A->ops->view                = MatView_SeqKAIJ;
1333     A->ops->mult                = MatMult_SeqKAIJ_N;
1334     A->ops->multadd             = MatMultAdd_SeqKAIJ_N;
1335     A->ops->invertblockdiagonal = MatInvertBlockDiagonal_SeqKAIJ_N;
1336     A->ops->getrow              = MatGetRow_SeqKAIJ;
1337     A->ops->restorerow          = MatRestoreRow_SeqKAIJ;
1338     A->ops->sor                 = MatSOR_SeqKAIJ;
1339   } else {
1340     ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIKAIJ);CHKERRQ(ierr);
1341     A->ops->setup               = MatSetUp_KAIJ;
1342     A->ops->destroy             = MatDestroy_MPIKAIJ;
1343     A->ops->view                = MatView_MPIKAIJ;
1344     A->ops->mult                = MatMult_MPIKAIJ_dof;
1345     A->ops->multadd             = MatMultAdd_MPIKAIJ_dof;
1346     A->ops->invertblockdiagonal = MatInvertBlockDiagonal_MPIKAIJ_dof;
1347     A->ops->getrow              = MatGetRow_MPIKAIJ;
1348     A->ops->restorerow          = MatRestoreRow_MPIKAIJ;
1349     ierr = PetscObjectComposeFunction((PetscObject)A,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIKAIJ);CHKERRQ(ierr);
1350   }
1351   A->ops->createsubmatrix = MatCreateSubMatrix_KAIJ;
1352   PetscFunctionReturn(0);
1353 }
1354