xref: /petsc/src/mat/impls/kaij/kaij.c (revision bb6fb833bca96b0697dcafed18d86e36728252eb)
1 
2 /*
3   Defines the basic matrix operations for the KAIJ  matrix storage format.
4   This format is used to evaluate matrices of the form:
5 
6     [I \otimes S + A \otimes T]
7 
8   where
9     S is a dense (p \times q) matrix
10     T is a dense (p \times q) matrix
11     A is an AIJ  (n \times n) matrix
12     I is the identity matrix
13 
14   The resulting matrix is (np \times nq)
15 
16   We provide:
17      MatMult()
18      MatMultAdd()
19      MatInvertBlockDiagonal()
20   and
21      MatCreateKAIJ(Mat,PetscInt,PetscInt,const PetscScalar[],const PetscScalar[],Mat*)
22 
23   This single directory handles both the sequential and parallel codes
24 */
25 
26 #include <../src/mat/impls/kaij/kaij.h> /*I "petscmat.h" I*/
27 #include <../src/mat/utils/freespace.h>
28 #include <petsc/private/vecimpl.h>
29 
30 /*@C
31    MatKAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the KAIJ matrix
32 
33    Not Collective, but if the KAIJ matrix is parallel, the AIJ matrix is also parallel
34 
35    Input Parameter:
36 .  A - the KAIJ matrix
37 
38    Output Parameter:
39 .  B - the AIJ matrix
40 
41    Level: advanced
42 
43    Notes: The reference count on the AIJ matrix is not increased so you should not destroy it.
44 
45 .seealso: MatCreateKAIJ()
46 @*/
47 PetscErrorCode  MatKAIJGetAIJ(Mat A,Mat *B)
48 {
49   PetscErrorCode ierr;
50   PetscBool      ismpikaij,isseqkaij;
51 
52   PetscFunctionBegin;
53   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIKAIJ,&ismpikaij);CHKERRQ(ierr);
54   ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQKAIJ,&isseqkaij);CHKERRQ(ierr);
55   if (ismpikaij) {
56     Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data;
57 
58     *B = b->A;
59   } else if (isseqkaij) {
60     Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
61 
62     *B = b->AIJ;
63   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix passed in is not of type KAIJ");
64   PetscFunctionReturn(0);
65 }
66 
67 /*@C
68    MatKAIJGetS - Get the S matrix describing the shift action of the KAIJ matrix
69 
70    Not Collective; the entire S is stored and returned independently on all processes.
71 
72    Input Parameter:
73 .  A - the KAIJ matrix
74 
75    Output Parameters:
76 +  m - the number of rows in S
77 .  n - the number of columns in S
78 -  S - the S matrix, in form of a scalar array in column-major format
79 
80    Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)
81 
82    Level: advanced
83 
84 .seealso: MatCreateKAIJ(), MatGetBlockSizes()
85 @*/
86 PetscErrorCode MatKAIJGetS(Mat A,PetscInt *m,PetscInt *n,PetscScalar **S)
87 {
88   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
89   PetscFunctionBegin;
90   if (m) *m = b->p;
91   if (n) *n = b->q;
92   if (S) *S = b->S;
93   PetscFunctionReturn(0);
94 }
95 
96 /*@C
97    MatKAIJGetSRead - Get a read-only pointer to the S matrix describing the shift action of the KAIJ matrix
98 
99    Not Collective; the entire S is stored and returned independently on all processes.
100 
101    Input Parameter:
102 .  A - the KAIJ matrix
103 
104    Output Parameters:
105 +  m - the number of rows in S
106 .  n - the number of columns in S
107 -  S - the S matrix, in form of a scalar array in column-major format
108 
109    Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)
110 
111    Level: advanced
112 
113 .seealso: MatCreateKAIJ(), MatGetBlockSizes()
114 @*/
115 PetscErrorCode MatKAIJGetSRead(Mat A,PetscInt *m,PetscInt *n,const PetscScalar **S)
116 {
117   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
118   PetscFunctionBegin;
119   if (m) *m = b->p;
120   if (n) *n = b->q;
121   if (S) *S = b->S;
122   PetscFunctionReturn(0);
123 }
124 
125 /*@C
126   MatKAIJRestoreS - Restore array obtained with MatKAIJGetS()
127 
128   Not collective
129 
130   Input Parameter:
131 . A - the KAIJ matrix
132 
133   Output Parameter:
134 . S - location of pointer to array obtained with MatKAIJGetS()
135 
136   Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
137   If NULL is passed, it will not attempt to zero the array pointer.
138 
139   Level: advanced
140 .seealso: MatKAIJGetS(), MatKAIJGetSRead(), MatKAIJRestoreSRead()
141 @*/
142 PetscErrorCode MatKAIJRestoreS(Mat A,PetscScalar **S)
143 {
144   PetscFunctionBegin;
145   if (S) *S = NULL;
146   PetscObjectStateIncrease((PetscObject)A);
147   PetscFunctionReturn(0);
148 }
149 
150 /*@C
151   MatKAIJRestoreSRead - Restore array obtained with MatKAIJGetSRead()
152 
153   Not collective
154 
155   Input Parameter:
156 . A - the KAIJ matrix
157 
158   Output Parameter:
159 . S - location of pointer to array obtained with MatKAIJGetS()
160 
161   Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
162   If NULL is passed, it will not attempt to zero the array pointer.
163 
164   Level: advanced
165 .seealso: MatKAIJGetS(), MatKAIJGetSRead(), MatKAIJRestoreSRead()
166 @*/
167 PetscErrorCode MatKAIJRestoreSRead(Mat A,const PetscScalar **S)
168 {
169   PetscFunctionBegin;
170   if (S) *S = NULL;
171   PetscFunctionReturn(0);
172 }
173 
174 /*@C
175    MatKAIJGetT - Get the transformation matrix T associated with the KAIJ matrix
176 
177    Not Collective; the entire T is stored and returned independently on all processes
178 
179    Input Parameter:
180 .  A - the KAIJ matrix
181 
182    Output Parameter:
183 +  m - the number of rows in T
184 .  n - the number of columns in T
185 -  T - the T matrix, in form of a scalar array in column-major format
186 
187    Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)
188 
189    Level: advanced
190 
191 .seealso: MatCreateKAIJ(), MatGetBlockSizes()
192 @*/
193 PetscErrorCode MatKAIJGetT(Mat A,PetscInt *m,PetscInt *n,PetscScalar **T)
194 {
195   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
196   PetscFunctionBegin;
197   if (m) *m = b->p;
198   if (n) *n = b->q;
199   if (T) *T = b->T;
200   PetscFunctionReturn(0);
201 }
202 
203 /*@C
204    MatKAIJGetTRead - Get a read-only pointer to the transformation matrix T associated with the KAIJ matrix
205 
206    Not Collective; the entire T is stored and returned independently on all processes
207 
208    Input Parameter:
209 .  A - the KAIJ matrix
210 
211    Output Parameter:
212 +  m - the number of rows in T
213 .  n - the number of columns in T
214 -  T - the T matrix, in form of a scalar array in column-major format
215 
216    Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)
217 
218    Level: advanced
219 
220 .seealso: MatCreateKAIJ(), MatGetBlockSizes()
221 @*/
222 PetscErrorCode MatKAIJGetTRead(Mat A,PetscInt *m,PetscInt *n,const PetscScalar **T)
223 {
224   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
225   PetscFunctionBegin;
226   if (m) *m = b->p;
227   if (n) *n = b->q;
228   if (T) *T = b->T;
229   PetscFunctionReturn(0);
230 }
231 
232 /*@C
233   MatKAIJRestoreT - Restore array obtained with MatKAIJGetT()
234 
235   Not collective
236 
237   Input Parameter:
238 . A - the KAIJ matrix
239 
240   Output Parameter:
241 . T - location of pointer to array obtained with MatKAIJGetS()
242 
243   Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
244   If NULL is passed, it will not attempt to zero the array pointer.
245 
246   Level: advanced
247 .seealso: MatKAIJGetT(), MatKAIJGetTRead(), MatKAIJRestoreTRead()
248 @*/
249 PetscErrorCode MatKAIJRestoreT(Mat A,PetscScalar **T)
250 {
251   PetscFunctionBegin;
252   if (T) *T = NULL;
253   PetscObjectStateIncrease((PetscObject)A);
254   PetscFunctionReturn(0);
255 }
256 
257 /*@C
258   MatKAIJRestoreTRead - Restore array obtained with MatKAIJGetTRead()
259 
260   Not collective
261 
262   Input Parameter:
263 . A - the KAIJ matrix
264 
265   Output Parameter:
266 . T - location of pointer to array obtained with MatKAIJGetS()
267 
268   Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
269   If NULL is passed, it will not attempt to zero the array pointer.
270 
271   Level: advanced
272 .seealso: MatKAIJGetT(), MatKAIJGetTRead(), MatKAIJRestoreTRead()
273 @*/
274 PetscErrorCode MatKAIJRestoreTRead(Mat A,const PetscScalar **T)
275 {
276   PetscFunctionBegin;
277   if (T) *T = NULL;
278   PetscFunctionReturn(0);
279 }
280 
281 /*@
282    MatKAIJSetAIJ - Set the AIJ matrix describing the blockwise action of the KAIJ matrix
283 
284    Logically Collective; if the AIJ matrix is parallel, the KAIJ matrix is also parallel
285 
286    Input Parameters:
287 +  A - the KAIJ matrix
288 -  B - the AIJ matrix
289 
290    Notes:
291    This function increases the reference count on the AIJ matrix, so the user is free to destroy the matrix if it is not needed.
292    Changes to the entries of the AIJ matrix will immediately affect the KAIJ matrix.
293 
294    Level: advanced
295 
296 .seealso: MatKAIJGetAIJ(), MatKAIJSetS(), MatKAIJSetT()
297 @*/
298 PetscErrorCode MatKAIJSetAIJ(Mat A,Mat B)
299 {
300   PetscErrorCode ierr;
301   PetscMPIInt    size;
302 
303   PetscFunctionBegin;
304   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
305   if (size == 1) {
306     Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data;
307     a->AIJ = B;
308   } else {
309     Mat_MPIKAIJ *a = (Mat_MPIKAIJ*)A->data;
310     a->A = B;
311   }
312   ierr = PetscObjectReference((PetscObject)B);CHKERRQ(ierr);
313   PetscFunctionReturn(0);
314 }
315 
316 /*@C
317    MatKAIJSetS - Set the S matrix describing the shift action of the KAIJ matrix
318 
319    Logically Collective; the entire S is stored independently on all processes.
320 
321    Input Parameters:
322 +  A - the KAIJ matrix
323 .  p - the number of rows in S
324 .  q - the number of columns in S
325 -  S - the S matrix, in form of a scalar array in column-major format
326 
327    Notes: The dimensions p and q must match those of the transformation matrix T associated with the KAIJ matrix.
328    The S matrix is copied, so the user can destroy this array.
329 
330    Level: Advanced
331 
332 .seealso: MatKAIJGetS(), MatKAIJSetT(), MatKAIJSetAIJ()
333 @*/
334 PetscErrorCode MatKAIJSetS(Mat A,PetscInt p,PetscInt q,const PetscScalar S[])
335 {
336   PetscErrorCode ierr;
337   Mat_SeqKAIJ    *a = (Mat_SeqKAIJ*)A->data;
338 
339   PetscFunctionBegin;
340   ierr = PetscFree(a->S);CHKERRQ(ierr);
341   if (S) {
342     ierr = PetscMalloc1(p*q*sizeof(PetscScalar),&a->S);CHKERRQ(ierr);
343     ierr = PetscMemcpy(a->S,S,p*q*sizeof(PetscScalar));CHKERRQ(ierr);
344   } else  a->S = NULL;
345 
346   a->p = p;
347   a->q = q;
348   PetscFunctionReturn(0);
349 }
350 
351 /*@C
352    MatKAIJSetT - Set the transformation matrix T associated with the KAIJ matrix
353 
354    Logically Collective; the entire T is stored independently on all processes.
355 
356    Input Parameters:
357 +  A - the KAIJ matrix
358 .  p - the number of rows in S
359 .  q - the number of columns in S
360 -  T - the T matrix, in form of a scalar array in column-major format
361 
362    Notes: The dimensions p and q must match those of the shift matrix S associated with the KAIJ matrix.
363    The T matrix is copied, so the user can destroy this array.
364 
365    Level: Advanced
366 
367 .seealso: MatKAIJGetT(), MatKAIJSetS(), MatKAIJSetAIJ()
368 @*/
369 PetscErrorCode MatKAIJSetT(Mat A,PetscInt p,PetscInt q,const PetscScalar T[])
370 {
371   PetscErrorCode ierr;
372   PetscInt       i,j;
373   Mat_SeqKAIJ    *a = (Mat_SeqKAIJ*)A->data;
374   PetscBool      isTI = PETSC_FALSE;
375 
376   PetscFunctionBegin;
377   /* check if T is an identity matrix */
378   if (T && (p == q)) {
379     isTI = PETSC_TRUE;
380     for (i=0; i<p; i++) {
381       for (j=0; j<q; j++) {
382         if (i == j) {
383           /* diagonal term must be 1 */
384           if (T[i+j*p] != 1.0) isTI = PETSC_FALSE;
385         } else {
386           /* off-diagonal term must be 0 */
387           if (T[i+j*p] != 0.0) isTI = PETSC_FALSE;
388         }
389       }
390     }
391   }
392   a->isTI = isTI;
393 
394   ierr = PetscFree(a->T);CHKERRQ(ierr);
395   if (T && (!isTI)) {
396     ierr = PetscMalloc1(p*q*sizeof(PetscScalar),&a->T);CHKERRQ(ierr);
397     ierr = PetscMemcpy(a->T,T,p*q*sizeof(PetscScalar));CHKERRQ(ierr);
398   } else a->T = NULL;
399 
400   a->p = p;
401   a->q = q;
402   PetscFunctionReturn(0);
403 }
404 
405 PetscErrorCode MatDestroy_SeqKAIJ(Mat A)
406 {
407   PetscErrorCode ierr;
408   Mat_SeqKAIJ    *b = (Mat_SeqKAIJ*)A->data;
409 
410   PetscFunctionBegin;
411   ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr);
412   ierr = PetscFree(b->S);CHKERRQ(ierr);
413   ierr = PetscFree(b->T);CHKERRQ(ierr);
414   ierr = PetscFree(b->ibdiag);CHKERRQ(ierr);
415   ierr = PetscFree5(b->sor.w,b->sor.y,b->sor.work,b->sor.t,b->sor.arr);CHKERRQ(ierr);
416   ierr = PetscFree(A->data);CHKERRQ(ierr);
417   PetscFunctionReturn(0);
418 }
419 
420 PetscErrorCode MatSetUp_KAIJ(Mat A)
421 {
422   PetscErrorCode ierr;
423   PetscInt       n;
424   PetscMPIInt    size;
425   Mat_SeqKAIJ    *seqkaij = (Mat_SeqKAIJ*)A->data;
426 
427   PetscFunctionBegin;
428   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
429   if (size == 1) {
430     ierr = MatSetSizes(A,seqkaij->p*seqkaij->AIJ->rmap->n,seqkaij->q*seqkaij->AIJ->cmap->n,seqkaij->p*seqkaij->AIJ->rmap->N,seqkaij->q*seqkaij->AIJ->cmap->N);CHKERRQ(ierr);
431     ierr = PetscLayoutSetBlockSize(A->rmap,seqkaij->p);CHKERRQ(ierr);
432     ierr = PetscLayoutSetBlockSize(A->cmap,seqkaij->q);CHKERRQ(ierr);
433     ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
434     ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
435   } else {
436     Mat_MPIKAIJ *a;
437     Mat_MPIAIJ  *mpiaij;
438     IS          from,to;
439     Vec         gvec;
440     PetscScalar *T;
441     PetscInt    i,j;
442 
443     a = (Mat_MPIKAIJ*)A->data;
444     mpiaij = (Mat_MPIAIJ*)a->A->data;
445     ierr = MatSetSizes(A,a->p*a->A->rmap->n,a->q*a->A->cmap->n,a->p*a->A->rmap->N,a->q*a->A->cmap->N);CHKERRQ(ierr);
446     ierr = PetscLayoutSetBlockSize(A->rmap,seqkaij->p);CHKERRQ(ierr);
447     ierr = PetscLayoutSetBlockSize(A->cmap,seqkaij->q);CHKERRQ(ierr);
448     ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
449     ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
450 
451     if (a->isTI) {
452       /* If the transformation matrix associated with the parallel matrix A is the identity matrix, then a->T will be NULL.
453        * In this case, if we pass a->T directly to the MatCreateKAIJ() calls to create the sequential submatrices, the routine will
454        * not be able to tell that transformation matrix should be set to the identity; thus we create a temporary identity matrix
455        * to pass in. */
456       ierr = PetscMalloc1(a->p*a->q*sizeof(PetscScalar),&T);CHKERRQ(ierr);
457       for (i=0; i<a->p; i++) {
458         for (j=0; j<a->q; j++) {
459           if (i==j) T[i+j*a->p] = 1.0;
460           else      T[i+j*a->p] = 0.0;
461         }
462       }
463     } else T = a->T;
464     ierr = MatCreateKAIJ(mpiaij->A,a->p,a->q,a->S,T,&a->AIJ);CHKERRQ(ierr);
465     ierr = MatCreateKAIJ(mpiaij->B,a->p,a->q,NULL,T,&a->OAIJ);CHKERRQ(ierr);
466     ierr = PetscFree(T);CHKERRQ(ierr);
467 
468     ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr);
469     ierr = VecCreate(PETSC_COMM_SELF,&a->w);CHKERRQ(ierr);
470     ierr = VecSetSizes(a->w,n*a->q,n*a->q);CHKERRQ(ierr);
471     ierr = VecSetBlockSize(a->w,a->q);CHKERRQ(ierr);
472     ierr = VecSetType(a->w,VECSEQ);CHKERRQ(ierr);
473 
474     /* create two temporary Index sets for build scatter gather */
475     ierr = ISCreateBlock(PetscObjectComm((PetscObject)a->A),a->q,n,mpiaij->garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr);
476     ierr = ISCreateStride(PETSC_COMM_SELF,n*a->q,0,1,&to);CHKERRQ(ierr);
477 
478     /* create temporary global vector to generate scatter context */
479     ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)a->A),a->q,a->q*a->A->cmap->n,a->q*a->A->cmap->N,NULL,&gvec);CHKERRQ(ierr);
480 
481     /* generate the scatter context */
482     ierr = VecScatterCreate(gvec,from,a->w,to,&a->ctx);CHKERRQ(ierr);
483 
484     ierr = ISDestroy(&from);CHKERRQ(ierr);
485     ierr = ISDestroy(&to);CHKERRQ(ierr);
486     ierr = VecDestroy(&gvec);CHKERRQ(ierr);
487   }
488 
489   A->assembled = PETSC_TRUE;
490   PetscFunctionReturn(0);
491 }
492 
493 PetscErrorCode MatView_SeqKAIJ(Mat A,PetscViewer viewer)
494 {
495   PetscErrorCode ierr;
496   Mat            B;
497 
498   PetscFunctionBegin;
499   ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
500   ierr = MatView(B,viewer);CHKERRQ(ierr);
501   ierr = MatDestroy(&B);CHKERRQ(ierr);
502   PetscFunctionReturn(0);
503 }
504 
505 PetscErrorCode MatView_MPIKAIJ(Mat A,PetscViewer viewer)
506 {
507   PetscErrorCode ierr;
508   Mat            B;
509 
510   PetscFunctionBegin;
511   ierr = MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
512   ierr = MatView(B,viewer);CHKERRQ(ierr);
513   ierr = MatDestroy(&B);CHKERRQ(ierr);
514   PetscFunctionReturn(0);
515 }
516 
517 PetscErrorCode MatDestroy_MPIKAIJ(Mat A)
518 {
519   PetscErrorCode ierr;
520   Mat_MPIKAIJ    *b = (Mat_MPIKAIJ*)A->data;
521 
522   PetscFunctionBegin;
523   ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr);
524   ierr = MatDestroy(&b->OAIJ);CHKERRQ(ierr);
525   ierr = MatDestroy(&b->A);CHKERRQ(ierr);
526   ierr = VecScatterDestroy(&b->ctx);CHKERRQ(ierr);
527   ierr = VecDestroy(&b->w);CHKERRQ(ierr);
528   ierr = PetscFree(b->S);CHKERRQ(ierr);
529   ierr = PetscFree(b->T);CHKERRQ(ierr);
530   ierr = PetscFree(b->ibdiag);CHKERRQ(ierr);
531   ierr = PetscFree(A->data);CHKERRQ(ierr);
532   PetscFunctionReturn(0);
533 }
534 
535 /* --------------------------------------------------------------------------------------*/
536 
537 /* zz = yy + Axx */
538 PetscErrorCode MatMultAdd_SeqKAIJ(Mat A,Vec xx,Vec yy,Vec zz)
539 {
540   Mat_SeqKAIJ       *b = (Mat_SeqKAIJ*)A->data;
541   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
542   const PetscScalar *s = b->S, *t = b->T;
543   const PetscScalar *x,*v,*bx;
544   PetscScalar       *y,*sums;
545   PetscErrorCode    ierr;
546   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
547   PetscInt          n,i,jrow,j,l,p=b->p,q=b->q,k;
548 
549   PetscFunctionBegin;
550   if (!yy) {
551     ierr = VecSet(zz,0.0);CHKERRQ(ierr);
552   } else {
553     ierr = VecCopy(yy,zz);CHKERRQ(ierr);
554   }
555   if ((!s) && (!t) && (!b->isTI)) PetscFunctionReturn(0);
556 
557   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
558   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
559   idx  = a->j;
560   v    = a->a;
561   ii   = a->i;
562 
563   if (b->isTI) {
564     for (i=0; i<m; i++) {
565       jrow = ii[i];
566       n    = ii[i+1] - jrow;
567       sums = y + p*i;
568       for (j=0; j<n; j++) {
569         for (k=0; k<p; k++) {
570           sums[k] += v[jrow+j]*x[q*idx[jrow+j]+k];
571         }
572       }
573     }
574   } else if (t) {
575     for (i=0; i<m; i++) {
576       jrow = ii[i];
577       n    = ii[i+1] - jrow;
578       sums = y + p*i;
579       bx   = x + q*i;
580       for (j=0; j<n; j++) {
581         for (k=0; k<p; k++) {
582           for (l=0; l<q; l++) {
583             sums[k] += v[jrow+j]*t[k+l*p]*x[q*idx[jrow+j]+l];
584           }
585         }
586       }
587     }
588   }
589   if (s) {
590     for (i=0; i<m; i++) {
591       sums = y + p*i;
592       bx   = x + q*i;
593       if (i < b->AIJ->cmap->n) {
594         for (j=0; j<q; j++) {
595           for (k=0; k<p; k++) {
596             sums[k] += s[k+j*p]*bx[j];
597           }
598         }
599       }
600     }
601   }
602 
603   ierr = PetscLogFlops((2.0*p*q-p)*m+2*p*a->nz);CHKERRQ(ierr);
604   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
605   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
606   PetscFunctionReturn(0);
607 }
608 
609 PetscErrorCode MatMult_SeqKAIJ(Mat A,Vec xx,Vec yy)
610 {
611   PetscErrorCode ierr;
612   PetscFunctionBegin;
613   ierr = MatMultAdd_SeqKAIJ(A,xx,PETSC_NULL,yy);CHKERRQ(ierr);
614   PetscFunctionReturn(0);
615 }
616 
617 #include <petsc/private/kernels/blockinvert.h>
618 
619 PetscErrorCode MatInvertBlockDiagonal_SeqKAIJ(Mat A,const PetscScalar **values)
620 {
621   Mat_SeqKAIJ       *b  = (Mat_SeqKAIJ*)A->data;
622   Mat_SeqAIJ        *a  = (Mat_SeqAIJ*)b->AIJ->data;
623   const PetscScalar *S  = b->S;
624   const PetscScalar *T  = b->T;
625   const PetscScalar *v  = a->a;
626   const PetscInt     p  = b->p, q = b->q, m = b->AIJ->rmap->n, *idx = a->j, *ii = a->i;
627   PetscErrorCode    ierr;
628   PetscInt          i,j,*v_pivots,dof,dof2;
629   PetscScalar       *diag,aval,*v_work;
630 
631   PetscFunctionBegin;
632   if (p != q) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Block size must be square to calculate inverse.");
633   if ((!S) && (!T) && (!b->isTI)) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Cannot invert a zero matrix.");
634 
635   dof  = p;
636   dof2 = dof*dof;
637 
638   if (b->ibdiagvalid) {
639     if (values) *values = b->ibdiag;
640     PetscFunctionReturn(0);
641   }
642   if (!b->ibdiag) {
643     ierr = PetscMalloc1(dof2*m*sizeof(PetscScalar),&b->ibdiag);CHKERRQ(ierr);
644     ierr = PetscLogObjectMemory((PetscObject)A,dof2*m*sizeof(PetscScalar));CHKERRQ(ierr);
645   }
646   if (values) *values = b->ibdiag;
647   diag = b->ibdiag;
648 
649   ierr = PetscMalloc2(dof,&v_work,dof,&v_pivots);CHKERRQ(ierr);
650   for (i=0; i<m; i++) {
651     if (S) {
652       ierr = PetscMemcpy(diag,S,dof2*sizeof(PetscScalar));CHKERRQ(ierr);
653     } else {
654       ierr = PetscMemzero(diag,dof2*sizeof(PetscScalar));CHKERRQ(ierr);
655     }
656     if (b->isTI) {
657       aval = 0;
658       for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j];
659       for (j=0; j<dof; j++) diag[j+dof*j] += aval;
660     } else if (T) {
661       aval = 0;
662       for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j];
663       for (j=0; j<dof2; j++) diag[j] += aval*T[j];
664     }
665     ierr = PetscKernel_A_gets_inverse_A(dof,diag,v_pivots,v_work,PETSC_FALSE,NULL);CHKERRQ(ierr);
666     diag += dof2;
667   }
668   ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr);
669 
670   b->ibdiagvalid = PETSC_TRUE;
671   PetscFunctionReturn(0);
672 }
673 
674 static PetscErrorCode MatGetDiagonalBlock_MPIKAIJ(Mat A,Mat *B)
675 {
676   Mat_MPIKAIJ *kaij = (Mat_MPIKAIJ*) A->data;
677 
678   PetscFunctionBegin;
679   *B = kaij->AIJ;
680   PetscFunctionReturn(0);
681 }
682 
683 PetscErrorCode MatSOR_SeqKAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
684 {
685   PetscErrorCode    ierr;
686   Mat_SeqKAIJ       *kaij = (Mat_SeqKAIJ*) A->data;
687   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)kaij->AIJ->data;
688   const PetscScalar *aa = a->a, *T = kaij->T, *v;
689   const PetscInt    m  = kaij->AIJ->rmap->n, *ai=a->i, *aj=a->j, p = kaij->p, q = kaij->q, *diag, *vi;
690   const PetscScalar *b, *xb, *idiag;
691   PetscScalar       *x, *work, *workt, *w, *y, *arr, *t, *arrt;
692   PetscInt          i, j, k, i2, bs, bs2, nz;
693 
694   PetscFunctionBegin;
695   its = its*lits;
696   if (flag & SOR_EISENSTAT) SETERRQ (PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
697   if (its <= 0)             SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
698   if (fshift)               SETERRQ (PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
699   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
700   if (p != q) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSOR for KAIJ: Sorry, no support for non-square dense blocks");
701   else        {bs = p; bs2 = bs*bs; }
702 
703   if (!m) PetscFunctionReturn(0);
704 
705   if (!kaij->ibdiagvalid) { ierr = MatInvertBlockDiagonal_SeqKAIJ(A,NULL);CHKERRQ(ierr); }
706   idiag = kaij->ibdiag;
707   diag  = a->diag;
708 
709   if (!kaij->sor.setup) {
710     ierr = PetscMalloc5(bs,&kaij->sor.w,bs,&kaij->sor.y,m*bs,&kaij->sor.work,m*bs,&kaij->sor.t,m*bs2,&kaij->sor.arr);CHKERRQ(ierr);
711     kaij->sor.setup = PETSC_TRUE;
712   }
713   y     = kaij->sor.y;
714   w     = kaij->sor.w;
715   work  = kaij->sor.work;
716   t     = kaij->sor.t;
717   arr   = kaij->sor.arr;
718 
719   ierr = VecGetArray(xx,&x);    CHKERRQ(ierr);
720   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
721 
722   if (flag & SOR_ZERO_INITIAL_GUESS) {
723     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
724       PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);                            /* x[0:bs] <- D^{-1} b[0:bs] */
725       ierr   =  PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr);
726       i2     =  bs;
727       idiag  += bs2;
728       for (i=1; i<m; i++) {
729         v  = aa + ai[i];
730         vi = aj + ai[i];
731         nz = diag[i] - ai[i];
732 
733         if (T) {                /* b - T (Arow * x) */
734           for (k=0; k<bs; k++) w[k] = 0;
735           for (j=0; j<nz; j++) {
736             for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k];
737           }
738           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs,w,T,&t[i2]);
739           for (k=0; k<bs; k++) t[i2+k] += b[i2+k];
740         } else if (kaij->isTI) {
741           for (k=0; k<bs; k++) t[i2+k] = b[i2+k];
742           for (j=0; j<nz; j++) {
743             for (k=0; k<bs; k++) t[i2+k] -= v[j] * x[vi[j]*bs+k];
744           }
745         } else {
746           for (k=0; k<bs; k++) t[i2+k] = b[i2+k];
747         }
748 
749         PetscKernel_w_gets_Ar_times_v(bs,bs,t+i2,idiag,y);
750         for (j=0; j<bs; j++) x[i2+j] = omega * y[j];
751 
752         idiag += bs2;
753         i2    += bs;
754       }
755       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
756       ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr);
757       xb = t;
758     } else xb = b;
759     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
760       idiag = kaij->ibdiag+bs2*(m-1);
761       i2    = bs * (m-1);
762       ierr  = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
763       PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
764       i2    -= bs;
765       idiag -= bs2;
766       for (i=m-2; i>=0; i--) {
767         v  = aa + diag[i] + 1 ;
768         vi = aj + diag[i] + 1;
769         nz = ai[i+1] - diag[i] - 1;
770 
771         if (T) {                /* FIXME: This branch untested */
772           ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
773           /* copy all rows of x that are needed into contiguous space */
774           workt = work;
775           for (j=0; j<nz; j++) {
776             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
777             workt += bs;
778           }
779           arrt = arr;
780           for (j=0; j<nz; j++) {
781             ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
782             for (k=0; k<bs2; k++) arrt[k] *= v[j];
783             arrt += bs2;
784           }
785           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
786         } else if (kaij->isTI) {
787           for (k=0; k<bs; k++) w[k] = t[i2+k];
788           for (j=0; j<nz; j++) {
789             for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k];
790           }
791         }
792 
793         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); /* RHS incorrect for omega != 1.0 */
794         for (j=0; j<bs; j++) x[i2+j] = (1.0-omega) * x[i2+j] + omega * y[j];
795 
796         idiag -= bs2;
797         i2    -= bs;
798       }
799       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
800     }
801     its--;
802   }
803   while (its--) {               /* FIXME: This branch not updated */
804     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
805       i2     =  0;
806       idiag  = kaij->ibdiag;
807       for (i=0; i<m; i++) {
808         ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
809 
810         v  = aa + ai[i];
811         vi = aj + ai[i];
812         nz = diag[i] - ai[i];
813         workt = work;
814         for (j=0; j<nz; j++) {
815           ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
816           workt += bs;
817         }
818         arrt = arr;
819         if (T) {
820           for (j=0; j<nz; j++) {
821             ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
822             for (k=0; k<bs2; k++) arrt[k] *= v[j];
823             arrt += bs2;
824           }
825           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
826         } else if (kaij->isTI) {
827           for (j=0; j<nz; j++) {
828             ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
829             for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
830             arrt += bs2;
831           }
832           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
833         }
834         ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr);
835 
836         v  = aa + diag[i] + 1;
837         vi = aj + diag[i] + 1;
838         nz = ai[i+1] - diag[i] - 1;
839         workt = work;
840         for (j=0; j<nz; j++) {
841           ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
842           workt += bs;
843         }
844         arrt = arr;
845         if (T) {
846           for (j=0; j<nz; j++) {
847             ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
848             for (k=0; k<bs2; k++) arrt[k] *= v[j];
849             arrt += bs2;
850           }
851           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
852         } else if (kaij->isTI) {
853           for (j=0; j<nz; j++) {
854             ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
855             for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
856             arrt += bs2;
857           }
858           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
859         }
860 
861         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y);
862         for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j);
863 
864         idiag += bs2;
865         i2    += bs;
866       }
867       xb = t;
868     }
869     else xb = b;
870     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
871       idiag = kaij->ibdiag+bs2*(m-1);
872       i2    = bs * (m-1);
873       if (xb == b) {
874         for (i=m-1; i>=0; i--) {
875           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
876 
877           v  = aa + ai[i];
878           vi = aj + ai[i];
879           nz = diag[i] - ai[i];
880           workt = work;
881           for (j=0; j<nz; j++) {
882             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
883             workt += bs;
884           }
885           arrt = arr;
886           if (T) {
887             for (j=0; j<nz; j++) {
888               ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
889               for (k=0; k<bs2; k++) arrt[k] *= v[j];
890               arrt += bs2;
891             }
892             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
893           } else if (kaij->isTI) {
894             for (j=0; j<nz; j++) {
895               ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
896               for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
897               arrt += bs2;
898             }
899             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
900           }
901 
902           v  = aa + diag[i] + 1;
903           vi = aj + diag[i] + 1;
904           nz = ai[i+1] - diag[i] - 1;
905           workt = work;
906           for (j=0; j<nz; j++) {
907             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
908             workt += bs;
909           }
910           arrt = arr;
911           if (T) {
912             for (j=0; j<nz; j++) {
913               ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
914               for (k=0; k<bs2; k++) arrt[k] *= v[j];
915               arrt += bs2;
916             }
917             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
918           } else if (kaij->isTI) {
919             for (j=0; j<nz; j++) {
920               ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
921               for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
922               arrt += bs2;
923             }
924             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
925           }
926 
927           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y);
928           for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j);
929         }
930       } else {
931         for (i=m-1; i>=0; i--) {
932           ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
933           v  = aa + diag[i] + 1;
934           vi = aj + diag[i] + 1;
935           nz = ai[i+1] - diag[i] - 1;
936           workt = work;
937           for (j=0; j<nz; j++) {
938             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
939             workt += bs;
940           }
941           arrt = arr;
942           if (T) {
943             for (j=0; j<nz; j++) {
944               ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
945               for (k=0; k<bs2; k++) arrt[k] *= v[j];
946               arrt += bs2;
947             }
948             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
949           } else if (kaij->isTI) {
950             for (j=0; j<nz; j++) {
951               ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
952               for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
953               arrt += bs2;
954             }
955             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
956           }
957           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y);
958           for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j);
959         }
960         idiag -= bs2;
961         i2    -= bs;
962       }
963       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
964     }
965   }
966 
967   ierr = VecRestoreArray(xx,&x);    CHKERRQ(ierr);
968   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
969   PetscFunctionReturn(0);
970 }
971 
972 /*===================================================================================*/
973 
974 PetscErrorCode MatMultAdd_MPIKAIJ(Mat A,Vec xx,Vec yy,Vec zz)
975 {
976   Mat_MPIKAIJ    *b = (Mat_MPIKAIJ*)A->data;
977   PetscErrorCode ierr;
978 
979   PetscFunctionBegin;
980   if (!yy) {
981     ierr = VecSet(zz,0.0);CHKERRQ(ierr);
982   } else {
983     ierr = VecCopy(yy,zz);CHKERRQ(ierr);
984   }
985   /* start the scatter */
986   ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
987   ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,zz,zz);CHKERRQ(ierr);
988   ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
989   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr);
990   PetscFunctionReturn(0);
991 }
992 
993 PetscErrorCode MatMult_MPIKAIJ(Mat A,Vec xx,Vec yy)
994 {
995   PetscErrorCode ierr;
996   PetscFunctionBegin;
997   ierr = MatMultAdd_MPIKAIJ(A,xx,PETSC_NULL,yy);CHKERRQ(ierr);
998   PetscFunctionReturn(0);
999 }
1000 
1001 PetscErrorCode MatInvertBlockDiagonal_MPIKAIJ(Mat A,const PetscScalar **values)
1002 {
1003   Mat_MPIKAIJ     *b = (Mat_MPIKAIJ*)A->data;
1004   PetscErrorCode  ierr;
1005 
1006   PetscFunctionBegin;
1007   ierr = (*b->AIJ->ops->invertblockdiagonal)(b->AIJ,values);CHKERRQ(ierr);
1008   PetscFunctionReturn(0);
1009 }
1010 
1011 /* ----------------------------------------------------------------*/
1012 
1013 PetscErrorCode MatGetRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values)
1014 {
1015   Mat_SeqKAIJ     *b   = (Mat_SeqKAIJ*) A->data;
1016   PetscErrorCode  diag = PETSC_FALSE;
1017   PetscErrorCode  ierr;
1018   PetscInt        nzaij,nz,*colsaij,*idx,i,j,p=b->p,q=b->q,r=row/p,s=row%p,c;
1019   PetscScalar     *vaij,*v,*S=b->S,*T=b->T;
1020 
1021   PetscFunctionBegin;
1022   if (b->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1023   b->getrowactive = PETSC_TRUE;
1024   if (row < 0 || row >= A->rmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range",row);
1025 
1026   if ((!S) && (!T) && (!b->isTI)) {
1027     if (ncols)    *ncols  = 0;
1028     if (cols)     *cols   = NULL;
1029     if (values)   *values = NULL;
1030     PetscFunctionReturn(0);
1031   }
1032 
1033   if (T || b->isTI) {
1034     ierr  = MatGetRow_SeqAIJ(b->AIJ,r,&nzaij,&colsaij,&vaij);CHKERRQ(ierr);
1035     c     = nzaij;
1036     for (i=0; i<nzaij; i++) {
1037       /* check if this row contains a diagonal entry */
1038       if (colsaij[i] == r) {
1039         diag = PETSC_TRUE;
1040         c = i;
1041       }
1042     }
1043   } else nzaij = c = 0;
1044 
1045   /* calculate size of row */
1046   nz = 0;
1047   if (S)            nz += q;
1048   if (T || b->isTI) nz += (diag && S ? (nzaij-1)*q : nzaij*q);
1049 
1050   if (cols || values) {
1051     ierr = PetscMalloc2(nz,&idx,nz,&v);CHKERRQ(ierr);
1052     for (i=0; i<q; i++) {
1053       /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */
1054       v[i] = 0.0;
1055     }
1056     if (b->isTI) {
1057       for (i=0; i<nzaij; i++) {
1058         for (j=0; j<q; j++) {
1059           idx[i*q+j] = colsaij[i]*q+j;
1060           v[i*q+j]   = (j==s ? vaij[i] : 0);
1061         }
1062       }
1063     } else if (T) {
1064       for (i=0; i<nzaij; i++) {
1065         for (j=0; j<q; j++) {
1066           idx[i*q+j] = colsaij[i]*q+j;
1067           v[i*q+j]   = vaij[i]*T[s+j*p];
1068         }
1069       }
1070     }
1071     if (S) {
1072       for (j=0; j<q; j++) {
1073         idx[c*q+j] = r*q+j;
1074         v[c*q+j]  += S[s+j*p];
1075       }
1076     }
1077   }
1078 
1079   if (ncols)    *ncols  = nz;
1080   if (cols)     *cols   = idx;
1081   if (values)   *values = v;
1082   PetscFunctionReturn(0);
1083 }
1084 
1085 PetscErrorCode MatRestoreRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1086 {
1087   PetscErrorCode ierr;
1088   PetscFunctionBegin;
1089   ierr = PetscFree2(*idx,*v);CHKERRQ(ierr);
1090   ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE;
1091   PetscFunctionReturn(0);
1092 }
1093 
1094 PetscErrorCode MatGetRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values)
1095 {
1096   Mat_MPIKAIJ     *b      = (Mat_MPIKAIJ*) A->data;
1097   Mat             MatAIJ  = ((Mat_SeqKAIJ*)b->AIJ->data)->AIJ;
1098   Mat             MatOAIJ = ((Mat_SeqKAIJ*)b->OAIJ->data)->AIJ;
1099   Mat             AIJ     = b->A;
1100   PetscBool       diag    = PETSC_FALSE;
1101   PetscErrorCode  ierr;
1102   const PetscInt  rstart=A->rmap->rstart,rend=A->rmap->rend,p=b->p,q=b->q,*garray;
1103   PetscInt        nz,*idx,ncolsaij,ncolsoaij,*colsaij,*colsoaij,r,s,c,i,j,lrow;
1104   PetscScalar     *v,*vals,*ovals,*S=b->S,*T=b->T;
1105 
1106   PetscFunctionBegin;
1107   if (b->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1108   b->getrowactive = PETSC_TRUE;
1109   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1110   lrow = row - rstart;
1111 
1112   if ((!S) && (!T) && (!b->isTI)) {
1113     if (ncols)    *ncols  = 0;
1114     if (cols)     *cols   = NULL;
1115     if (values)   *values = NULL;
1116     PetscFunctionReturn(0);
1117   }
1118 
1119   r = lrow/p;
1120   s = lrow%p;
1121 
1122   if (T || b->isTI) {
1123     ierr = MatMPIAIJGetSeqAIJ(AIJ,NULL,NULL,&garray);
1124     ierr = MatGetRow_SeqAIJ(MatAIJ,lrow/p,&ncolsaij,&colsaij,&vals);CHKERRQ(ierr);
1125     ierr = MatGetRow_SeqAIJ(MatOAIJ,lrow/p,&ncolsoaij,&colsoaij,&ovals);CHKERRQ(ierr);
1126 
1127     c     = ncolsaij + ncolsoaij;
1128     for (i=0; i<ncolsaij; i++) {
1129       /* check if this row contains a diagonal entry */
1130       if (colsaij[i] == r) {
1131         diag = PETSC_TRUE;
1132         c = i;
1133       }
1134     }
1135   } else c = 0;
1136 
1137   /* calculate size of row */
1138   nz = 0;
1139   if (S)            nz += q;
1140   if (T || b->isTI) nz += (diag && S ? (ncolsaij+ncolsoaij-1)*q : (ncolsaij+ncolsoaij)*q);
1141 
1142   if (cols || values) {
1143     ierr = PetscMalloc2(nz,&idx,nz,&v);CHKERRQ(ierr);
1144     for (i=0; i<q; i++) {
1145       /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */
1146       v[i] = 0.0;
1147     }
1148     if (b->isTI) {
1149       for (i=0; i<ncolsaij; i++) {
1150         for (j=0; j<q; j++) {
1151           idx[i*q+j] = (colsaij[i]+rstart/p)*q+j;
1152           v[i*q+j]   = (j==s ? vals[i] : 0.0);
1153         }
1154       }
1155       for (i=0; i<ncolsoaij; i++) {
1156         for (j=0; j<q; j++) {
1157           idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j;
1158           v[(i+ncolsaij)*q+j]   = (j==s ? ovals[i]: 0.0);
1159         }
1160       }
1161     } else if (T) {
1162       for (i=0; i<ncolsaij; i++) {
1163         for (j=0; j<q; j++) {
1164           idx[i*q+j] = (colsaij[i]+rstart/p)*q+j;
1165           v[i*q+j]   = vals[i]*T[s+j*p];
1166         }
1167       }
1168       for (i=0; i<ncolsoaij; i++) {
1169         for (j=0; j<q; j++) {
1170           idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j;
1171           v[(i+ncolsaij)*q+j]   = ovals[i]*T[s+j*p];
1172         }
1173       }
1174     }
1175     if (S) {
1176       for (j=0; j<q; j++) {
1177         idx[c*q+j] = (r+rstart/p)*q+j;
1178         v[c*q+j]  += S[s+j*p];
1179       }
1180     }
1181   }
1182 
1183   if (ncols)  *ncols  = nz;
1184   if (cols)   *cols   = idx;
1185   if (values) *values = v;
1186   PetscFunctionReturn(0);
1187 }
1188 
1189 PetscErrorCode MatRestoreRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1190 {
1191   PetscErrorCode ierr;
1192   PetscFunctionBegin;
1193   ierr = PetscFree2(*idx,*v);CHKERRQ(ierr);
1194   ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE;
1195   PetscFunctionReturn(0);
1196 }
1197 
1198 PetscErrorCode  MatCreateSubMatrix_KAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat)
1199 {
1200   PetscErrorCode ierr;
1201   Mat            A;
1202 
1203   PetscFunctionBegin;
1204   ierr = MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr);
1205   ierr = MatCreateSubMatrix(A,isrow,iscol,cll,newmat);CHKERRQ(ierr);
1206   ierr = MatDestroy(&A);CHKERRQ(ierr);
1207   PetscFunctionReturn(0);
1208 }
1209 
1210 /* ---------------------------------------------------------------------------------- */
1211 /*@C
1212   MatCreateKAIJ - Creates a matrix type to be used for matrices of the following form:
1213 
1214     [I \otimes S + A \otimes T]
1215 
1216   where
1217     S is a dense (p \times q) matrix
1218     T is a dense (p \times q) matrix
1219     A is an AIJ  (n \times n) matrix
1220     I is the identity matrix
1221   The resulting matrix is (np \times nq)
1222 
1223   The matrix type is based on MATSEQAIJ for a sequential matrix A, and MATMPIAIJ for a distributed matrix A.
1224   S is always stored independently on all processes as a PetscScalar array in column-major format.
1225 
1226   Collective
1227 
1228   Input Parameters:
1229 + A - the AIJ matrix
1230 . S - the S matrix, stored as a PetscScalar array (column-major)
1231 . T - the T matrix, stored as a PetscScalar array (column-major)
1232 . p - number of rows in S and T
1233 - q - number of columns in S and T
1234 
1235   Output Parameter:
1236 . kaij - the new KAIJ matrix
1237 
1238   Operations provided:
1239 + MatMult
1240 . MatMultAdd
1241 . MatInvertBlockDiagonal
1242 - MatView
1243 
1244   Level: advanced
1245 
1246 .seealso: MatKAIJSetAIJ(), MatKAIJSetS(), MatKAIJSetT(), MatKAIJGetAIJ(), MatKAIJGetS(), MatKAIJGetT(), MATKAIJ
1247 @*/
1248 PetscErrorCode  MatCreateKAIJ(Mat A,PetscInt p,PetscInt q,const PetscScalar S[],const PetscScalar T[],Mat *kaij)
1249 {
1250   PetscErrorCode ierr;
1251   PetscMPIInt    size;
1252 
1253   PetscFunctionBegin;
1254   ierr = MatCreate(PetscObjectComm((PetscObject)A),kaij);CHKERRQ(ierr);
1255   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
1256   if (size == 1) {
1257     ierr = MatSetType(*kaij,MATSEQKAIJ);CHKERRQ(ierr);
1258   } else {
1259     ierr = MatSetType(*kaij,MATMPIKAIJ);CHKERRQ(ierr);
1260   }
1261   ierr = MatKAIJSetAIJ(*kaij,A);CHKERRQ(ierr);
1262   ierr = MatKAIJSetS(*kaij,p,q,S);CHKERRQ(ierr);
1263   ierr = MatKAIJSetT(*kaij,p,q,T);CHKERRQ(ierr);
1264   ierr = MatSetUp(*kaij);
1265   PetscFunctionReturn(0);
1266 }
1267 
1268 /*MC
1269   MATKAIJ - MATKAIJ = "kaij" - A matrix type to be used to evaluate matrices of the following form:
1270 
1271     [I \otimes S + A \otimes T]
1272 
1273   where
1274     S is a dense (p \times q) matrix
1275     T is a dense (p \times q) matrix
1276     A is an AIJ  (n \times n) matrix
1277     I is the identity matrix
1278   The resulting matrix is (np \times nq)
1279 
1280   The matrix type is based on MATSEQAIJ for a sequential matrix A, and MATMPIAIJ for a distributed matrix A.
1281   S and T are always stored independently on all processes as a PetscScalar array in column-major format.
1282 
1283   Operations provided:
1284 + MatMult
1285 . MatMultAdd
1286 . MatInvertBlockDiagonal
1287 - MatView
1288 
1289   Level: advanced
1290 
1291 .seealso: MatKAIJSetAIJ(), MatKAIJSetS(), MatKAIJSetT(), MatKAIJGetAIJ(), MatKAIJGetS(), MatKAIJGetT(), MatCreateKAIJ()
1292 M*/
1293 
1294 PETSC_EXTERN PetscErrorCode MatCreate_KAIJ(Mat A)
1295 {
1296   PetscErrorCode ierr;
1297   Mat_MPIKAIJ    *b;
1298   PetscMPIInt    size;
1299 
1300   PetscFunctionBegin;
1301   ierr     = PetscNewLog(A,&b);CHKERRQ(ierr);
1302   A->data  = (void*)b;
1303 
1304   ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1305 
1306   A->ops->setup = MatSetUp_KAIJ;
1307 
1308   b->w    = 0;
1309   ierr    = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
1310   if (size == 1) {
1311     ierr = PetscObjectChangeTypeName((PetscObject)A,MATSEQKAIJ);CHKERRQ(ierr);
1312     A->ops->setup               = MatSetUp_KAIJ;
1313     A->ops->destroy             = MatDestroy_SeqKAIJ;
1314     A->ops->view                = MatView_SeqKAIJ;
1315     A->ops->mult                = MatMult_SeqKAIJ;
1316     A->ops->multadd             = MatMultAdd_SeqKAIJ;
1317     A->ops->invertblockdiagonal = MatInvertBlockDiagonal_SeqKAIJ;
1318     A->ops->getrow              = MatGetRow_SeqKAIJ;
1319     A->ops->restorerow          = MatRestoreRow_SeqKAIJ;
1320     A->ops->sor                 = MatSOR_SeqKAIJ;
1321   } else {
1322     ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIKAIJ);CHKERRQ(ierr);
1323     A->ops->setup               = MatSetUp_KAIJ;
1324     A->ops->destroy             = MatDestroy_MPIKAIJ;
1325     A->ops->view                = MatView_MPIKAIJ;
1326     A->ops->mult                = MatMult_MPIKAIJ;
1327     A->ops->multadd             = MatMultAdd_MPIKAIJ;
1328     A->ops->invertblockdiagonal = MatInvertBlockDiagonal_MPIKAIJ;
1329     A->ops->getrow              = MatGetRow_MPIKAIJ;
1330     A->ops->restorerow          = MatRestoreRow_MPIKAIJ;
1331     ierr = PetscObjectComposeFunction((PetscObject)A,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIKAIJ);CHKERRQ(ierr);
1332   }
1333   A->ops->createsubmatrix = MatCreateSubMatrix_KAIJ;
1334   PetscFunctionReturn(0);
1335 }
1336