xref: /petsc/src/mat/impls/kaij/kaij.c (revision e6985dafcb1cbbb98fec7ef672f5d5c74f095d0e)
1 
2 /*
3   Defines the basic matrix operations for the KAIJ  matrix storage format.
4   This format is used to evaluate matrices of the form:
5 
6     [I \otimes S + A \otimes T]
7 
8   where
9     S is a dense (p \times q) matrix
10     T is a dense (p \times q) matrix
11     A is an AIJ  (n \times n) matrix
12     I is the identity matrix
13 
14   The resulting matrix is (np \times nq)
15 
16   We provide:
17      MatMult()
18      MatMultAdd()
19      MatInvertBlockDiagonal()
20   and
21      MatCreateKAIJ(Mat,PetscInt,PetscInt,const PetscScalar[],const PetscScalar[],Mat*)
22 
23   This single directory handles both the sequential and parallel codes
24 */
25 
26 #include <../src/mat/impls/kaij/kaij.h> /*I "petscmat.h" I*/
27 #include <../src/mat/utils/freespace.h>
28 #include <petsc/private/vecimpl.h>
29 
30 /*@C
31    MatKAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the KAIJ matrix
32 
33    Not Collective, but if the KAIJ matrix is parallel, the AIJ matrix is also parallel
34 
35    Input Parameter:
36 .  A - the KAIJ matrix
37 
38    Output Parameter:
39 .  B - the AIJ matrix
40 
41    Level: advanced
42 
43    Notes: The reference count on the AIJ matrix is not increased so you should not destroy it.
44 
45 .seealso: MatCreateKAIJ()
46 @*/
47 PetscErrorCode  MatKAIJGetAIJ(Mat A,Mat *B)
48 {
49   PetscErrorCode ierr;
50   PetscBool      ismpikaij,isseqkaij;
51 
52   PetscFunctionBegin;
53   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIKAIJ,&ismpikaij);CHKERRQ(ierr);
54   ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQKAIJ,&isseqkaij);CHKERRQ(ierr);
55   if (ismpikaij) {
56     Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data;
57 
58     *B = b->A;
59   } else if (isseqkaij) {
60     Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
61 
62     *B = b->AIJ;
63   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix passed in is not of type KAIJ");
64   PetscFunctionReturn(0);
65 }
66 
67 /*@C
68    MatKAIJGetS - Get the S matrix describing the shift action of the KAIJ matrix
69 
70    Not Collective; the entire S is stored and returned independently on all processes.
71 
72    Input Parameter:
73 .  A - the KAIJ matrix
74 
75    Output Parameters:
76 +  m - the number of rows in S
77 .  n - the number of columns in S
78 -  S - the S matrix, in form of a scalar array in column-major format
79 
80    Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)
81 
82    Level: advanced
83 
84 .seealso: MatCreateKAIJ(), MatGetBlockSizes()
85 @*/
86 PetscErrorCode MatKAIJGetS(Mat A,PetscInt *m,PetscInt *n,PetscScalar **S)
87 {
88   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
89   PetscFunctionBegin;
90   if (m) *m = b->p;
91   if (n) *n = b->q;
92   if (S) *S = b->S;
93   PetscFunctionReturn(0);
94 }
95 
96 /*@C
97    MatKAIJGetSRead - Get a read-only pointer to the S matrix describing the shift action of the KAIJ matrix
98 
99    Not Collective; the entire S is stored and returned independently on all processes.
100 
101    Input Parameter:
102 .  A - the KAIJ matrix
103 
104    Output Parameters:
105 +  m - the number of rows in S
106 .  n - the number of columns in S
107 -  S - the S matrix, in form of a scalar array in column-major format
108 
109    Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)
110 
111    Level: advanced
112 
113 .seealso: MatCreateKAIJ(), MatGetBlockSizes()
114 @*/
115 PetscErrorCode MatKAIJGetSRead(Mat A,PetscInt *m,PetscInt *n,const PetscScalar **S)
116 {
117   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
118   PetscFunctionBegin;
119   if (m) *m = b->p;
120   if (n) *n = b->q;
121   if (S) *S = b->S;
122   PetscFunctionReturn(0);
123 }
124 
125 /*@C
126   MatKAIJRestoreS - Restore array obtained with MatKAIJGetS()
127 
128   Not collective
129 
130   Input Parameter:
131 . A - the KAIJ matrix
132 
133   Output Parameter:
134 . S - location of pointer to array obtained with MatKAIJGetS()
135 
136   Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
137   If NULL is passed, it will not attempt to zero the array pointer.
138 
139   Level: advanced
140 .seealso: MatKAIJGetS(), MatKAIJGetSRead(), MatKAIJRestoreSRead()
141 @*/
142 PetscErrorCode MatKAIJRestoreS(Mat A,PetscScalar **S)
143 {
144   PetscFunctionBegin;
145   if (S) *S = NULL;
146   PetscObjectStateIncrease((PetscObject)A);
147   PetscFunctionReturn(0);
148 }
149 
150 /*@C
151   MatKAIJRestoreSRead - Restore array obtained with MatKAIJGetSRead()
152 
153   Not collective
154 
155   Input Parameter:
156 . A - the KAIJ matrix
157 
158   Output Parameter:
159 . S - location of pointer to array obtained with MatKAIJGetS()
160 
161   Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
162   If NULL is passed, it will not attempt to zero the array pointer.
163 
164   Level: advanced
165 .seealso: MatKAIJGetS(), MatKAIJGetSRead(), MatKAIJRestoreSRead()
166 @*/
167 PetscErrorCode MatKAIJRestoreSRead(Mat A,const PetscScalar **S)
168 {
169   PetscFunctionBegin;
170   if (S) *S = NULL;
171   PetscFunctionReturn(0);
172 }
173 
174 /*@C
175    MatKAIJGetT - Get the transformation matrix T associated with the KAIJ matrix
176 
177    Not Collective; the entire T is stored and returned independently on all processes
178 
179    Input Parameter:
180 .  A - the KAIJ matrix
181 
182    Output Parameter:
183 +  m - the number of rows in T
184 .  n - the number of columns in T
185 -  T - the T matrix, in form of a scalar array in column-major format
186 
187    Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)
188 
189    Level: advanced
190 
191 .seealso: MatCreateKAIJ(), MatGetBlockSizes()
192 @*/
193 PetscErrorCode MatKAIJGetT(Mat A,PetscInt *m,PetscInt *n,PetscScalar **T)
194 {
195   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
196   PetscFunctionBegin;
197   if (m) *m = b->p;
198   if (n) *n = b->q;
199   if (T) *T = b->T;
200   PetscFunctionReturn(0);
201 }
202 
203 /*@C
204    MatKAIJGetTRead - Get a read-only pointer to the transformation matrix T associated with the KAIJ matrix
205 
206    Not Collective; the entire T is stored and returned independently on all processes
207 
208    Input Parameter:
209 .  A - the KAIJ matrix
210 
211    Output Parameter:
212 +  m - the number of rows in T
213 .  n - the number of columns in T
214 -  T - the T matrix, in form of a scalar array in column-major format
215 
216    Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)
217 
218    Level: advanced
219 
220 .seealso: MatCreateKAIJ(), MatGetBlockSizes()
221 @*/
222 PetscErrorCode MatKAIJGetTRead(Mat A,PetscInt *m,PetscInt *n,const PetscScalar **T)
223 {
224   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
225   PetscFunctionBegin;
226   if (m) *m = b->p;
227   if (n) *n = b->q;
228   if (T) *T = b->T;
229   PetscFunctionReturn(0);
230 }
231 
232 /*@C
233   MatKAIJRestoreT - Restore array obtained with MatKAIJGetT()
234 
235   Not collective
236 
237   Input Parameter:
238 . A - the KAIJ matrix
239 
240   Output Parameter:
241 . T - location of pointer to array obtained with MatKAIJGetS()
242 
243   Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
244   If NULL is passed, it will not attempt to zero the array pointer.
245 
246   Level: advanced
247 .seealso: MatKAIJGetT(), MatKAIJGetTRead(), MatKAIJRestoreTRead()
248 @*/
249 PetscErrorCode MatKAIJRestoreT(Mat A,PetscScalar **T)
250 {
251   PetscFunctionBegin;
252   if (T) *T = NULL;
253   PetscObjectStateIncrease((PetscObject)A);
254   PetscFunctionReturn(0);
255 }
256 
257 /*@C
258   MatKAIJRestoreTRead - Restore array obtained with MatKAIJGetTRead()
259 
260   Not collective
261 
262   Input Parameter:
263 . A - the KAIJ matrix
264 
265   Output Parameter:
266 . T - location of pointer to array obtained with MatKAIJGetS()
267 
268   Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
269   If NULL is passed, it will not attempt to zero the array pointer.
270 
271   Level: advanced
272 .seealso: MatKAIJGetT(), MatKAIJGetTRead(), MatKAIJRestoreTRead()
273 @*/
274 PetscErrorCode MatKAIJRestoreTRead(Mat A,const PetscScalar **T)
275 {
276   PetscFunctionBegin;
277   if (T) *T = NULL;
278   PetscFunctionReturn(0);
279 }
280 
281 /*@
282    MatKAIJSetAIJ - Set the AIJ matrix describing the blockwise action of the KAIJ matrix
283 
284    Logically Collective; if the AIJ matrix is parallel, the KAIJ matrix is also parallel
285 
286    Input Parameters:
287 +  A - the KAIJ matrix
288 -  B - the AIJ matrix
289 
290    Notes:
291    This function increases the reference count on the AIJ matrix, so the user is free to destroy the matrix if it is not needed.
292    Changes to the entries of the AIJ matrix will immediately affect the KAIJ matrix.
293 
294    Level: advanced
295 
296 .seealso: MatKAIJGetAIJ(), MatKAIJSetS(), MatKAIJSetT()
297 @*/
298 PetscErrorCode MatKAIJSetAIJ(Mat A,Mat B)
299 {
300   PetscErrorCode ierr;
301   PetscMPIInt    size;
302 
303   PetscFunctionBegin;
304   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
305   if (size == 1) {
306     Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data;
307     a->AIJ = B;
308   } else {
309     Mat_MPIKAIJ *a = (Mat_MPIKAIJ*)A->data;
310     a->A = B;
311   }
312   ierr = PetscObjectReference((PetscObject)B);CHKERRQ(ierr);
313   PetscFunctionReturn(0);
314 }
315 
316 /*@C
317    MatKAIJSetS - Set the S matrix describing the shift action of the KAIJ matrix
318 
319    Logically Collective; the entire S is stored independently on all processes.
320 
321    Input Parameters:
322 +  A - the KAIJ matrix
323 .  p - the number of rows in S
324 .  q - the number of columns in S
325 -  S - the S matrix, in form of a scalar array in column-major format
326 
327    Notes: The dimensions p and q must match those of the transformation matrix T associated with the KAIJ matrix.
328    The S matrix is copied, so the user can destroy this array.
329 
330    Level: Advanced
331 
332 .seealso: MatKAIJGetS(), MatKAIJSetT(), MatKAIJSetAIJ()
333 @*/
334 PetscErrorCode MatKAIJSetS(Mat A,PetscInt p,PetscInt q,const PetscScalar S[])
335 {
336   PetscErrorCode ierr;
337   Mat_SeqKAIJ    *a = (Mat_SeqKAIJ*)A->data;
338 
339   PetscFunctionBegin;
340   ierr = PetscFree(a->S);CHKERRQ(ierr);
341   if (S) {
342     ierr = PetscMalloc1(p*q*sizeof(PetscScalar),&a->S);CHKERRQ(ierr);
343     ierr = PetscMemcpy(a->S,S,p*q*sizeof(PetscScalar));CHKERRQ(ierr);
344   } else  a->S = NULL;
345 
346   a->p = p;
347   a->q = q;
348   PetscFunctionReturn(0);
349 }
350 
351 /*@C
352    MatKAIJSetT - Set the transformation matrix T associated with the KAIJ matrix
353 
354    Logically Collective; the entire T is stored independently on all processes.
355 
356    Input Parameters:
357 +  A - the KAIJ matrix
358 .  p - the number of rows in S
359 .  q - the number of columns in S
360 -  T - the T matrix, in form of a scalar array in column-major format
361 
362    Notes: The dimensions p and q must match those of the shift matrix S associated with the KAIJ matrix.
363    The T matrix is copied, so the user can destroy this array.
364 
365    Level: Advanced
366 
367 .seealso: MatKAIJGetT(), MatKAIJSetS(), MatKAIJSetAIJ()
368 @*/
369 PetscErrorCode MatKAIJSetT(Mat A,PetscInt p,PetscInt q,const PetscScalar T[])
370 {
371   PetscErrorCode ierr;
372   PetscInt       i,j;
373   Mat_SeqKAIJ    *a = (Mat_SeqKAIJ*)A->data;
374   PetscBool      isTI = PETSC_FALSE;
375 
376   PetscFunctionBegin;
377   /* check if T is an identity matrix */
378   if (T && (p == q)) {
379     isTI = PETSC_TRUE;
380     for (i=0; i<p; i++) {
381       for (j=0; j<q; j++) {
382         if (i == j) {
383           /* diagonal term must be 1 */
384           if (T[i+j*p] != 1.0) isTI = PETSC_FALSE;
385         } else {
386           /* off-diagonal term must be 0 */
387           if (T[i+j*p] != 0.0) isTI = PETSC_FALSE;
388         }
389       }
390     }
391   }
392   a->isTI = isTI;
393 
394   ierr = PetscFree(a->T);CHKERRQ(ierr);
395   if (T && (!isTI)) {
396     ierr = PetscMalloc1(p*q*sizeof(PetscScalar),&a->T);CHKERRQ(ierr);
397     ierr = PetscMemcpy(a->T,T,p*q*sizeof(PetscScalar));CHKERRQ(ierr);
398   } else a->T = NULL;
399 
400   a->p = p;
401   a->q = q;
402   PetscFunctionReturn(0);
403 }
404 
405 PetscErrorCode MatDestroy_SeqKAIJ(Mat A)
406 {
407   PetscErrorCode ierr;
408   Mat_SeqKAIJ    *b = (Mat_SeqKAIJ*)A->data;
409 
410   PetscFunctionBegin;
411   ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr);
412   ierr = PetscFree(b->S);CHKERRQ(ierr);
413   ierr = PetscFree(b->T);CHKERRQ(ierr);
414   ierr = PetscFree(b->ibdiag);CHKERRQ(ierr);
415   ierr = PetscFree5(b->sor.w,b->sor.y,b->sor.work,b->sor.t,b->sor.arr);CHKERRQ(ierr);
416   ierr = PetscFree(A->data);CHKERRQ(ierr);
417   PetscFunctionReturn(0);
418 }
419 
420 PetscErrorCode MatSetUp_KAIJ(Mat A)
421 {
422   PetscErrorCode ierr;
423   PetscInt       n;
424   PetscMPIInt    size;
425   Mat_SeqKAIJ    *seqkaij = (Mat_SeqKAIJ*)A->data;
426 
427   PetscFunctionBegin;
428   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
429   if (size == 1) {
430     ierr = MatSetSizes(A,seqkaij->p*seqkaij->AIJ->rmap->n,seqkaij->q*seqkaij->AIJ->cmap->n,seqkaij->p*seqkaij->AIJ->rmap->N,seqkaij->q*seqkaij->AIJ->cmap->N);CHKERRQ(ierr);
431     ierr = PetscLayoutSetBlockSize(A->rmap,seqkaij->p);CHKERRQ(ierr);
432     ierr = PetscLayoutSetBlockSize(A->cmap,seqkaij->q);CHKERRQ(ierr);
433     ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
434     ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
435   } else {
436     Mat_MPIKAIJ *a;
437     Mat_MPIAIJ  *mpiaij;
438     IS          from,to;
439     Vec         gvec;
440     PetscScalar *T;
441     PetscInt    i,j;
442 
443     a = (Mat_MPIKAIJ*)A->data;
444     mpiaij = (Mat_MPIAIJ*)a->A->data;
445     ierr = MatSetSizes(A,a->p*a->A->rmap->n,a->q*a->A->cmap->n,a->p*a->A->rmap->N,a->q*a->A->cmap->N);CHKERRQ(ierr);
446     ierr = PetscLayoutSetBlockSize(A->rmap,seqkaij->p);CHKERRQ(ierr);
447     ierr = PetscLayoutSetBlockSize(A->cmap,seqkaij->q);CHKERRQ(ierr);
448     ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
449     ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
450 
451     if (a->isTI) {
452       /* If the transformation matrix associated with the parallel matrix A is the identity matrix, then a->T will be NULL.
453        * In this case, if we pass a->T directly to the MatCreateKAIJ() calls to create the sequential submatrices, the routine will
454        * not be able to tell that transformation matrix should be set to the identity; thus we create a temporary identity matrix
455        * to pass in. */
456       ierr = PetscMalloc1(a->p*a->q*sizeof(PetscScalar),&T);CHKERRQ(ierr);
457       for (i=0; i<a->p; i++) {
458         for (j=0; j<a->q; j++) {
459           if (i==j) T[i+j*a->p] = 1.0;
460           else      T[i+j*a->p] = 0.0;
461         }
462       }
463     } else T = a->T;
464     ierr = MatCreateKAIJ(mpiaij->A,a->p,a->q,a->S,T,&a->AIJ);CHKERRQ(ierr);
465     ierr = MatCreateKAIJ(mpiaij->B,a->p,a->q,NULL,T,&a->OAIJ);CHKERRQ(ierr);
466     if (a->isTI) {
467       ierr = PetscFree(T);CHKERRQ(ierr);
468     }
469 
470     ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr);
471     ierr = VecCreate(PETSC_COMM_SELF,&a->w);CHKERRQ(ierr);
472     ierr = VecSetSizes(a->w,n*a->q,n*a->q);CHKERRQ(ierr);
473     ierr = VecSetBlockSize(a->w,a->q);CHKERRQ(ierr);
474     ierr = VecSetType(a->w,VECSEQ);CHKERRQ(ierr);
475 
476     /* create two temporary Index sets for build scatter gather */
477     ierr = ISCreateBlock(PetscObjectComm((PetscObject)a->A),a->q,n,mpiaij->garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr);
478     ierr = ISCreateStride(PETSC_COMM_SELF,n*a->q,0,1,&to);CHKERRQ(ierr);
479 
480     /* create temporary global vector to generate scatter context */
481     ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)a->A),a->q,a->q*a->A->cmap->n,a->q*a->A->cmap->N,NULL,&gvec);CHKERRQ(ierr);
482 
483     /* generate the scatter context */
484     ierr = VecScatterCreate(gvec,from,a->w,to,&a->ctx);CHKERRQ(ierr);
485 
486     ierr = ISDestroy(&from);CHKERRQ(ierr);
487     ierr = ISDestroy(&to);CHKERRQ(ierr);
488     ierr = VecDestroy(&gvec);CHKERRQ(ierr);
489   }
490 
491   A->assembled = PETSC_TRUE;
492   PetscFunctionReturn(0);
493 }
494 
495 PetscErrorCode MatView_KAIJ(Mat A,PetscViewer viewer)
496 {
497   PetscViewerFormat format;
498   Mat_SeqKAIJ       *a = (Mat_SeqKAIJ*)A->data;
499   Mat               B;
500   PetscInt          i;
501   PetscErrorCode    ierr;
502   PetscBool         ismpikaij;
503 
504   PetscFunctionBegin;
505   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIKAIJ,&ismpikaij);CHKERRQ(ierr);
506   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
507   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL || format == PETSC_VIEWER_ASCII_IMPL) {
508     ierr = PetscViewerASCIIPrintf(viewer,"S and T have %D rows and %D columns\n",a->p,a->q);CHKERRQ(ierr);
509 
510     /* Print appropriate details for S. */
511     if (!a->S) {
512       ierr = PetscViewerASCIIPrintf(viewer,"S is NULL\n");
513     } else if (format == PETSC_VIEWER_ASCII_IMPL) {
514       ierr = PetscViewerASCIIPrintf(viewer,"Entries of S are ");CHKERRQ(ierr);
515       for (i=0; i<(a->p * a->q); i++) {
516 #if defined(PETSC_USE_COMPLEX)
517         ierr = PetscViewerASCIIPrintf(viewer,"%18.16e %18.16e ",(double)PetscRealPart(a->S[i]),(double)PetscImaginaryPart(a->S[i]));CHKERRQ(ierr);
518 #else
519         ierr = PetscViewerASCIIPrintf(viewer,"%18.16e ",(double)PetscRealPart(a->S[i]));CHKERRQ(ierr);
520 #endif
521       }
522       ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
523     }
524 
525     /* Print appropriate details for T. */
526     if (a->isTI) {
527       ierr = PetscViewerASCIIPrintf(viewer,"T is the identity matrix\n");
528     } else if (!a->T) {
529       ierr = PetscViewerASCIIPrintf(viewer,"T is NULL\n");
530     } else if (format == PETSC_VIEWER_ASCII_IMPL) {
531       ierr = PetscViewerASCIIPrintf(viewer,"Entries of T are ");CHKERRQ(ierr);
532       for (i=0; i<(a->p * a->q); i++) {
533 #if defined(PETSC_USE_COMPLEX)
534         ierr = PetscViewerASCIIPrintf(viewer,"%18.16e %18.16e ",(double)PetscRealPart(a->T[i]),(double)PetscImaginaryPart(a->T[i]));CHKERRQ(ierr);
535 #else
536         ierr = PetscViewerASCIIPrintf(viewer,"%18.16e ",(double)PetscRealPart(a->T[i]));CHKERRQ(ierr);
537 #endif
538       }
539       ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
540     }
541 
542     /* Now print details for the AIJ matrix, using the AIJ viewer. */
543     ierr = PetscViewerASCIIPrintf(viewer,"Now viewing the associated AIJ matrix:\n");CHKERRQ(ierr);
544     if (ismpikaij) {
545       Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data;
546       ierr = MatView(b->A,viewer);CHKERRQ(ierr);
547     } else {
548       ierr = MatView(a->AIJ,viewer);CHKERRQ(ierr);
549     }
550 
551   } else {
552     /* For all other matrix viewer output formats, simply convert to an AIJ matrix and call MatView() on that. */
553     if (ismpikaij) {
554       ierr = MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
555     } else {
556       ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
557     }
558     ierr = MatView(B,viewer);CHKERRQ(ierr);
559     ierr = MatDestroy(&B);CHKERRQ(ierr);
560   }
561   PetscFunctionReturn(0);
562 }
563 
564 PetscErrorCode MatDestroy_MPIKAIJ(Mat A)
565 {
566   PetscErrorCode ierr;
567   Mat_MPIKAIJ    *b = (Mat_MPIKAIJ*)A->data;
568 
569   PetscFunctionBegin;
570   ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr);
571   ierr = MatDestroy(&b->OAIJ);CHKERRQ(ierr);
572   ierr = MatDestroy(&b->A);CHKERRQ(ierr);
573   ierr = VecScatterDestroy(&b->ctx);CHKERRQ(ierr);
574   ierr = VecDestroy(&b->w);CHKERRQ(ierr);
575   ierr = PetscFree(b->S);CHKERRQ(ierr);
576   ierr = PetscFree(b->T);CHKERRQ(ierr);
577   ierr = PetscFree(b->ibdiag);CHKERRQ(ierr);
578   ierr = PetscFree(A->data);CHKERRQ(ierr);
579   PetscFunctionReturn(0);
580 }
581 
582 /* --------------------------------------------------------------------------------------*/
583 
584 /* zz = yy + Axx */
585 PetscErrorCode MatMultAdd_SeqKAIJ(Mat A,Vec xx,Vec yy,Vec zz)
586 {
587   Mat_SeqKAIJ       *b = (Mat_SeqKAIJ*)A->data;
588   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
589   const PetscScalar *s = b->S, *t = b->T;
590   const PetscScalar *x,*v,*bx;
591   PetscScalar       *y,*sums;
592   PetscErrorCode    ierr;
593   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
594   PetscInt          n,i,jrow,j,l,p=b->p,q=b->q,k;
595 
596   PetscFunctionBegin;
597   if (!yy) {
598     ierr = VecSet(zz,0.0);CHKERRQ(ierr);
599   } else {
600     ierr = VecCopy(yy,zz);CHKERRQ(ierr);
601   }
602   if ((!s) && (!t) && (!b->isTI)) PetscFunctionReturn(0);
603 
604   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
605   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
606   idx  = a->j;
607   v    = a->a;
608   ii   = a->i;
609 
610   if (b->isTI) {
611     for (i=0; i<m; i++) {
612       jrow = ii[i];
613       n    = ii[i+1] - jrow;
614       sums = y + p*i;
615       for (j=0; j<n; j++) {
616         for (k=0; k<p; k++) {
617           sums[k] += v[jrow+j]*x[q*idx[jrow+j]+k];
618         }
619       }
620     }
621   } else if (t) {
622     for (i=0; i<m; i++) {
623       jrow = ii[i];
624       n    = ii[i+1] - jrow;
625       sums = y + p*i;
626       bx   = x + q*i;
627       for (j=0; j<n; j++) {
628         for (k=0; k<p; k++) {
629           for (l=0; l<q; l++) {
630             sums[k] += v[jrow+j]*t[k+l*p]*x[q*idx[jrow+j]+l];
631           }
632         }
633       }
634     }
635   }
636   if (s) {
637     for (i=0; i<m; i++) {
638       sums = y + p*i;
639       bx   = x + q*i;
640       if (i < b->AIJ->cmap->n) {
641         for (j=0; j<q; j++) {
642           for (k=0; k<p; k++) {
643             sums[k] += s[k+j*p]*bx[j];
644           }
645         }
646       }
647     }
648   }
649 
650   ierr = PetscLogFlops((2.0*p*q-p)*m+2*p*a->nz);CHKERRQ(ierr);
651   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
652   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
653   PetscFunctionReturn(0);
654 }
655 
656 PetscErrorCode MatMult_SeqKAIJ(Mat A,Vec xx,Vec yy)
657 {
658   PetscErrorCode ierr;
659   PetscFunctionBegin;
660   ierr = MatMultAdd_SeqKAIJ(A,xx,PETSC_NULL,yy);CHKERRQ(ierr);
661   PetscFunctionReturn(0);
662 }
663 
664 #include <petsc/private/kernels/blockinvert.h>
665 
666 PetscErrorCode MatInvertBlockDiagonal_SeqKAIJ(Mat A,const PetscScalar **values)
667 {
668   Mat_SeqKAIJ       *b  = (Mat_SeqKAIJ*)A->data;
669   Mat_SeqAIJ        *a  = (Mat_SeqAIJ*)b->AIJ->data;
670   const PetscScalar *S  = b->S;
671   const PetscScalar *T  = b->T;
672   const PetscScalar *v  = a->a;
673   const PetscInt     p  = b->p, q = b->q, m = b->AIJ->rmap->n, *idx = a->j, *ii = a->i;
674   PetscErrorCode    ierr;
675   PetscInt          i,j,*v_pivots,dof,dof2;
676   PetscScalar       *diag,aval,*v_work;
677 
678   PetscFunctionBegin;
679   if (p != q) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Block size must be square to calculate inverse.");
680   if ((!S) && (!T) && (!b->isTI)) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Cannot invert a zero matrix.");
681 
682   dof  = p;
683   dof2 = dof*dof;
684 
685   if (b->ibdiagvalid) {
686     if (values) *values = b->ibdiag;
687     PetscFunctionReturn(0);
688   }
689   if (!b->ibdiag) {
690     ierr = PetscMalloc1(dof2*m*sizeof(PetscScalar),&b->ibdiag);CHKERRQ(ierr);
691     ierr = PetscLogObjectMemory((PetscObject)A,dof2*m*sizeof(PetscScalar));CHKERRQ(ierr);
692   }
693   if (values) *values = b->ibdiag;
694   diag = b->ibdiag;
695 
696   ierr = PetscMalloc2(dof,&v_work,dof,&v_pivots);CHKERRQ(ierr);
697   for (i=0; i<m; i++) {
698     if (S) {
699       ierr = PetscMemcpy(diag,S,dof2*sizeof(PetscScalar));CHKERRQ(ierr);
700     } else {
701       ierr = PetscMemzero(diag,dof2*sizeof(PetscScalar));CHKERRQ(ierr);
702     }
703     if (b->isTI) {
704       aval = 0;
705       for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j];
706       for (j=0; j<dof; j++) diag[j+dof*j] += aval;
707     } else if (T) {
708       aval = 0;
709       for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j];
710       for (j=0; j<dof2; j++) diag[j] += aval*T[j];
711     }
712     ierr = PetscKernel_A_gets_inverse_A(dof,diag,v_pivots,v_work,PETSC_FALSE,NULL);CHKERRQ(ierr);
713     diag += dof2;
714   }
715   ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr);
716 
717   b->ibdiagvalid = PETSC_TRUE;
718   PetscFunctionReturn(0);
719 }
720 
721 static PetscErrorCode MatGetDiagonalBlock_MPIKAIJ(Mat A,Mat *B)
722 {
723   Mat_MPIKAIJ *kaij = (Mat_MPIKAIJ*) A->data;
724 
725   PetscFunctionBegin;
726   *B = kaij->AIJ;
727   PetscFunctionReturn(0);
728 }
729 
730 PetscErrorCode MatSOR_SeqKAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
731 {
732   PetscErrorCode    ierr;
733   Mat_SeqKAIJ       *kaij = (Mat_SeqKAIJ*) A->data;
734   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)kaij->AIJ->data;
735   const PetscScalar *aa = a->a, *T = kaij->T, *v;
736   const PetscInt    m  = kaij->AIJ->rmap->n, *ai=a->i, *aj=a->j, p = kaij->p, q = kaij->q, *diag, *vi;
737   const PetscScalar *b, *xb, *idiag;
738   PetscScalar       *x, *work, *workt, *w, *y, *arr, *t, *arrt;
739   PetscInt          i, j, k, i2, bs, bs2, nz;
740 
741   PetscFunctionBegin;
742   its = its*lits;
743   if (flag & SOR_EISENSTAT) SETERRQ (PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
744   if (its <= 0)             SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
745   if (fshift)               SETERRQ (PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for diagonal shift");
746   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for applying upper or lower triangular parts");
747   if (p != q) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSOR for KAIJ: No support for non-square dense blocks");
748   else        {bs = p; bs2 = bs*bs; }
749 
750   if (!m) PetscFunctionReturn(0);
751 
752   if (!kaij->ibdiagvalid) { ierr = MatInvertBlockDiagonal_SeqKAIJ(A,NULL);CHKERRQ(ierr); }
753   idiag = kaij->ibdiag;
754   diag  = a->diag;
755 
756   if (!kaij->sor.setup) {
757     ierr = PetscMalloc5(bs,&kaij->sor.w,bs,&kaij->sor.y,m*bs,&kaij->sor.work,m*bs,&kaij->sor.t,m*bs2,&kaij->sor.arr);CHKERRQ(ierr);
758     kaij->sor.setup = PETSC_TRUE;
759   }
760   y     = kaij->sor.y;
761   w     = kaij->sor.w;
762   work  = kaij->sor.work;
763   t     = kaij->sor.t;
764   arr   = kaij->sor.arr;
765 
766   ierr = VecGetArray(xx,&x);    CHKERRQ(ierr);
767   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
768 
769   if (flag & SOR_ZERO_INITIAL_GUESS) {
770     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
771       PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);                            /* x[0:bs] <- D^{-1} b[0:bs] */
772       ierr   =  PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr);
773       i2     =  bs;
774       idiag  += bs2;
775       for (i=1; i<m; i++) {
776         v  = aa + ai[i];
777         vi = aj + ai[i];
778         nz = diag[i] - ai[i];
779 
780         if (T) {                /* b - T (Arow * x) */
781           ierr = PetscMemzero(w,bs*sizeof(PetscScalar));
782           for (j=0; j<nz; j++) {
783             for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k];
784           }
785           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs,w,T,&t[i2]);
786           for (k=0; k<bs; k++) t[i2+k] += b[i2+k];
787         } else if (kaij->isTI) {
788           ierr = PetscMemcpy(t+i2,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
789           for (j=0; j<nz; j++) {
790             for (k=0; k<bs; k++) t[i2+k] -= v[j] * x[vi[j]*bs+k];
791           }
792         } else {
793           ierr = PetscMemcpy(t+i2,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
794         }
795 
796         PetscKernel_w_gets_Ar_times_v(bs,bs,t+i2,idiag,y);
797         for (j=0; j<bs; j++) x[i2+j] = omega * y[j];
798 
799         idiag += bs2;
800         i2    += bs;
801       }
802       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
803       ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr);
804       xb = t;
805     } else xb = b;
806     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
807       idiag = kaij->ibdiag+bs2*(m-1);
808       i2    = bs * (m-1);
809       ierr  = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
810       PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
811       i2    -= bs;
812       idiag -= bs2;
813       for (i=m-2; i>=0; i--) {
814         v  = aa + diag[i] + 1 ;
815         vi = aj + diag[i] + 1;
816         nz = ai[i+1] - diag[i] - 1;
817 
818         if (T) {                /* FIXME: This branch untested */
819           ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
820           /* copy all rows of x that are needed into contiguous space */
821           workt = work;
822           for (j=0; j<nz; j++) {
823             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
824             workt += bs;
825           }
826           arrt = arr;
827           for (j=0; j<nz; j++) {
828             ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
829             for (k=0; k<bs2; k++) arrt[k] *= v[j];
830             arrt += bs2;
831           }
832           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
833         } else if (kaij->isTI) {
834           ierr = PetscMemcpy(w,t+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
835           for (j=0; j<nz; j++) {
836             for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k];
837           }
838         }
839 
840         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); /* RHS incorrect for omega != 1.0 */
841         for (j=0; j<bs; j++) x[i2+j] = (1.0-omega) * x[i2+j] + omega * y[j];
842 
843         idiag -= bs2;
844         i2    -= bs;
845       }
846       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
847     }
848     its--;
849   }
850   while (its--) {               /* FIXME: This branch not updated */
851     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
852       i2     =  0;
853       idiag  = kaij->ibdiag;
854       for (i=0; i<m; i++) {
855         ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
856 
857         v  = aa + ai[i];
858         vi = aj + ai[i];
859         nz = diag[i] - ai[i];
860         workt = work;
861         for (j=0; j<nz; j++) {
862           ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
863           workt += bs;
864         }
865         arrt = arr;
866         if (T) {
867           for (j=0; j<nz; j++) {
868             ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
869             for (k=0; k<bs2; k++) arrt[k] *= v[j];
870             arrt += bs2;
871           }
872           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
873         } else if (kaij->isTI) {
874           for (j=0; j<nz; j++) {
875             ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
876             for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
877             arrt += bs2;
878           }
879           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
880         }
881         ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr);
882 
883         v  = aa + diag[i] + 1;
884         vi = aj + diag[i] + 1;
885         nz = ai[i+1] - diag[i] - 1;
886         workt = work;
887         for (j=0; j<nz; j++) {
888           ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
889           workt += bs;
890         }
891         arrt = arr;
892         if (T) {
893           for (j=0; j<nz; j++) {
894             ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
895             for (k=0; k<bs2; k++) arrt[k] *= v[j];
896             arrt += bs2;
897           }
898           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
899         } else if (kaij->isTI) {
900           for (j=0; j<nz; j++) {
901             ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
902             for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
903             arrt += bs2;
904           }
905           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
906         }
907 
908         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y);
909         for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j);
910 
911         idiag += bs2;
912         i2    += bs;
913       }
914       xb = t;
915     }
916     else xb = b;
917     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
918       idiag = kaij->ibdiag+bs2*(m-1);
919       i2    = bs * (m-1);
920       if (xb == b) {
921         for (i=m-1; i>=0; i--) {
922           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
923 
924           v  = aa + ai[i];
925           vi = aj + ai[i];
926           nz = diag[i] - ai[i];
927           workt = work;
928           for (j=0; j<nz; j++) {
929             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
930             workt += bs;
931           }
932           arrt = arr;
933           if (T) {
934             for (j=0; j<nz; j++) {
935               ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
936               for (k=0; k<bs2; k++) arrt[k] *= v[j];
937               arrt += bs2;
938             }
939             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
940           } else if (kaij->isTI) {
941             for (j=0; j<nz; j++) {
942               ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
943               for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
944               arrt += bs2;
945             }
946             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
947           }
948 
949           v  = aa + diag[i] + 1;
950           vi = aj + diag[i] + 1;
951           nz = ai[i+1] - diag[i] - 1;
952           workt = work;
953           for (j=0; j<nz; j++) {
954             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
955             workt += bs;
956           }
957           arrt = arr;
958           if (T) {
959             for (j=0; j<nz; j++) {
960               ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
961               for (k=0; k<bs2; k++) arrt[k] *= v[j];
962               arrt += bs2;
963             }
964             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
965           } else if (kaij->isTI) {
966             for (j=0; j<nz; j++) {
967               ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
968               for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
969               arrt += bs2;
970             }
971             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
972           }
973 
974           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y);
975           for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j);
976         }
977       } else {
978         for (i=m-1; i>=0; i--) {
979           ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
980           v  = aa + diag[i] + 1;
981           vi = aj + diag[i] + 1;
982           nz = ai[i+1] - diag[i] - 1;
983           workt = work;
984           for (j=0; j<nz; j++) {
985             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
986             workt += bs;
987           }
988           arrt = arr;
989           if (T) {
990             for (j=0; j<nz; j++) {
991               ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
992               for (k=0; k<bs2; k++) arrt[k] *= v[j];
993               arrt += bs2;
994             }
995             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
996           } else if (kaij->isTI) {
997             for (j=0; j<nz; j++) {
998               ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
999               for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
1000               arrt += bs2;
1001             }
1002             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
1003           }
1004           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y);
1005           for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j);
1006         }
1007         idiag -= bs2;
1008         i2    -= bs;
1009       }
1010       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
1011     }
1012   }
1013 
1014   ierr = VecRestoreArray(xx,&x);    CHKERRQ(ierr);
1015   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1016   PetscFunctionReturn(0);
1017 }
1018 
1019 /*===================================================================================*/
1020 
1021 PetscErrorCode MatMultAdd_MPIKAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1022 {
1023   Mat_MPIKAIJ    *b = (Mat_MPIKAIJ*)A->data;
1024   PetscErrorCode ierr;
1025 
1026   PetscFunctionBegin;
1027   if (!yy) {
1028     ierr = VecSet(zz,0.0);CHKERRQ(ierr);
1029   } else {
1030     ierr = VecCopy(yy,zz);CHKERRQ(ierr);
1031   }
1032   /* start the scatter */
1033   ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1034   ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,zz,zz);CHKERRQ(ierr);
1035   ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1036   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr);
1037   PetscFunctionReturn(0);
1038 }
1039 
1040 PetscErrorCode MatMult_MPIKAIJ(Mat A,Vec xx,Vec yy)
1041 {
1042   PetscErrorCode ierr;
1043   PetscFunctionBegin;
1044   ierr = MatMultAdd_MPIKAIJ(A,xx,PETSC_NULL,yy);CHKERRQ(ierr);
1045   PetscFunctionReturn(0);
1046 }
1047 
1048 PetscErrorCode MatInvertBlockDiagonal_MPIKAIJ(Mat A,const PetscScalar **values)
1049 {
1050   Mat_MPIKAIJ     *b = (Mat_MPIKAIJ*)A->data;
1051   PetscErrorCode  ierr;
1052 
1053   PetscFunctionBegin;
1054   ierr = (*b->AIJ->ops->invertblockdiagonal)(b->AIJ,values);CHKERRQ(ierr);
1055   PetscFunctionReturn(0);
1056 }
1057 
1058 /* ----------------------------------------------------------------*/
1059 
1060 PetscErrorCode MatGetRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values)
1061 {
1062   Mat_SeqKAIJ     *b   = (Mat_SeqKAIJ*) A->data;
1063   PetscErrorCode  diag = PETSC_FALSE;
1064   PetscErrorCode  ierr;
1065   PetscInt        nzaij,nz,*colsaij,*idx,i,j,p=b->p,q=b->q,r=row/p,s=row%p,c;
1066   PetscScalar     *vaij,*v,*S=b->S,*T=b->T;
1067 
1068   PetscFunctionBegin;
1069   if (b->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1070   b->getrowactive = PETSC_TRUE;
1071   if (row < 0 || row >= A->rmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range",row);
1072 
1073   if ((!S) && (!T) && (!b->isTI)) {
1074     if (ncols)    *ncols  = 0;
1075     if (cols)     *cols   = NULL;
1076     if (values)   *values = NULL;
1077     PetscFunctionReturn(0);
1078   }
1079 
1080   if (T || b->isTI) {
1081     ierr  = MatGetRow_SeqAIJ(b->AIJ,r,&nzaij,&colsaij,&vaij);CHKERRQ(ierr);
1082     c     = nzaij;
1083     for (i=0; i<nzaij; i++) {
1084       /* check if this row contains a diagonal entry */
1085       if (colsaij[i] == r) {
1086         diag = PETSC_TRUE;
1087         c = i;
1088       }
1089     }
1090   } else nzaij = c = 0;
1091 
1092   /* calculate size of row */
1093   nz = 0;
1094   if (S)            nz += q;
1095   if (T || b->isTI) nz += (diag && S ? (nzaij-1)*q : nzaij*q);
1096 
1097   if (cols || values) {
1098     ierr = PetscMalloc2(nz,&idx,nz,&v);CHKERRQ(ierr);
1099     for (i=0; i<q; i++) {
1100       /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */
1101       v[i] = 0.0;
1102     }
1103     if (b->isTI) {
1104       for (i=0; i<nzaij; i++) {
1105         for (j=0; j<q; j++) {
1106           idx[i*q+j] = colsaij[i]*q+j;
1107           v[i*q+j]   = (j==s ? vaij[i] : 0);
1108         }
1109       }
1110     } else if (T) {
1111       for (i=0; i<nzaij; i++) {
1112         for (j=0; j<q; j++) {
1113           idx[i*q+j] = colsaij[i]*q+j;
1114           v[i*q+j]   = vaij[i]*T[s+j*p];
1115         }
1116       }
1117     }
1118     if (S) {
1119       for (j=0; j<q; j++) {
1120         idx[c*q+j] = r*q+j;
1121         v[c*q+j]  += S[s+j*p];
1122       }
1123     }
1124   }
1125 
1126   if (ncols)    *ncols  = nz;
1127   if (cols)     *cols   = idx;
1128   if (values)   *values = v;
1129   PetscFunctionReturn(0);
1130 }
1131 
1132 PetscErrorCode MatRestoreRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1133 {
1134   PetscErrorCode ierr;
1135   PetscFunctionBegin;
1136   ierr = PetscFree2(*idx,*v);CHKERRQ(ierr);
1137   ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE;
1138   PetscFunctionReturn(0);
1139 }
1140 
1141 PetscErrorCode MatGetRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values)
1142 {
1143   Mat_MPIKAIJ     *b      = (Mat_MPIKAIJ*) A->data;
1144   Mat             MatAIJ  = ((Mat_SeqKAIJ*)b->AIJ->data)->AIJ;
1145   Mat             MatOAIJ = ((Mat_SeqKAIJ*)b->OAIJ->data)->AIJ;
1146   Mat             AIJ     = b->A;
1147   PetscBool       diag    = PETSC_FALSE;
1148   PetscErrorCode  ierr;
1149   const PetscInt  rstart=A->rmap->rstart,rend=A->rmap->rend,p=b->p,q=b->q,*garray;
1150   PetscInt        nz,*idx,ncolsaij,ncolsoaij,*colsaij,*colsoaij,r,s,c,i,j,lrow;
1151   PetscScalar     *v,*vals,*ovals,*S=b->S,*T=b->T;
1152 
1153   PetscFunctionBegin;
1154   if (b->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1155   b->getrowactive = PETSC_TRUE;
1156   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1157   lrow = row - rstart;
1158 
1159   if ((!S) && (!T) && (!b->isTI)) {
1160     if (ncols)    *ncols  = 0;
1161     if (cols)     *cols   = NULL;
1162     if (values)   *values = NULL;
1163     PetscFunctionReturn(0);
1164   }
1165 
1166   r = lrow/p;
1167   s = lrow%p;
1168 
1169   if (T || b->isTI) {
1170     ierr = MatMPIAIJGetSeqAIJ(AIJ,NULL,NULL,&garray);
1171     ierr = MatGetRow_SeqAIJ(MatAIJ,lrow/p,&ncolsaij,&colsaij,&vals);CHKERRQ(ierr);
1172     ierr = MatGetRow_SeqAIJ(MatOAIJ,lrow/p,&ncolsoaij,&colsoaij,&ovals);CHKERRQ(ierr);
1173 
1174     c     = ncolsaij + ncolsoaij;
1175     for (i=0; i<ncolsaij; i++) {
1176       /* check if this row contains a diagonal entry */
1177       if (colsaij[i] == r) {
1178         diag = PETSC_TRUE;
1179         c = i;
1180       }
1181     }
1182   } else c = 0;
1183 
1184   /* calculate size of row */
1185   nz = 0;
1186   if (S)            nz += q;
1187   if (T || b->isTI) nz += (diag && S ? (ncolsaij+ncolsoaij-1)*q : (ncolsaij+ncolsoaij)*q);
1188 
1189   if (cols || values) {
1190     ierr = PetscMalloc2(nz,&idx,nz,&v);CHKERRQ(ierr);
1191     for (i=0; i<q; i++) {
1192       /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */
1193       v[i] = 0.0;
1194     }
1195     if (b->isTI) {
1196       for (i=0; i<ncolsaij; i++) {
1197         for (j=0; j<q; j++) {
1198           idx[i*q+j] = (colsaij[i]+rstart/p)*q+j;
1199           v[i*q+j]   = (j==s ? vals[i] : 0.0);
1200         }
1201       }
1202       for (i=0; i<ncolsoaij; i++) {
1203         for (j=0; j<q; j++) {
1204           idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j;
1205           v[(i+ncolsaij)*q+j]   = (j==s ? ovals[i]: 0.0);
1206         }
1207       }
1208     } else if (T) {
1209       for (i=0; i<ncolsaij; i++) {
1210         for (j=0; j<q; j++) {
1211           idx[i*q+j] = (colsaij[i]+rstart/p)*q+j;
1212           v[i*q+j]   = vals[i]*T[s+j*p];
1213         }
1214       }
1215       for (i=0; i<ncolsoaij; i++) {
1216         for (j=0; j<q; j++) {
1217           idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j;
1218           v[(i+ncolsaij)*q+j]   = ovals[i]*T[s+j*p];
1219         }
1220       }
1221     }
1222     if (S) {
1223       for (j=0; j<q; j++) {
1224         idx[c*q+j] = (r+rstart/p)*q+j;
1225         v[c*q+j]  += S[s+j*p];
1226       }
1227     }
1228   }
1229 
1230   if (ncols)  *ncols  = nz;
1231   if (cols)   *cols   = idx;
1232   if (values) *values = v;
1233   PetscFunctionReturn(0);
1234 }
1235 
1236 PetscErrorCode MatRestoreRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1237 {
1238   PetscErrorCode ierr;
1239   PetscFunctionBegin;
1240   ierr = PetscFree2(*idx,*v);CHKERRQ(ierr);
1241   ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE;
1242   PetscFunctionReturn(0);
1243 }
1244 
1245 PetscErrorCode  MatCreateSubMatrix_KAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat)
1246 {
1247   PetscErrorCode ierr;
1248   Mat            A;
1249 
1250   PetscFunctionBegin;
1251   ierr = MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr);
1252   ierr = MatCreateSubMatrix(A,isrow,iscol,cll,newmat);CHKERRQ(ierr);
1253   ierr = MatDestroy(&A);CHKERRQ(ierr);
1254   PetscFunctionReturn(0);
1255 }
1256 
1257 /* ---------------------------------------------------------------------------------- */
1258 /*@C
1259   MatCreateKAIJ - Creates a matrix type to be used for matrices of the following form:
1260 
1261     [I \otimes S + A \otimes T]
1262 
1263   where
1264     S is a dense (p \times q) matrix
1265     T is a dense (p \times q) matrix
1266     A is an AIJ  (n \times n) matrix
1267     I is the identity matrix
1268   The resulting matrix is (np \times nq)
1269 
1270   S and T are always stored independently on all processes as PetscScalar arrays in column-major format.
1271 
1272   Collective
1273 
1274   Input Parameters:
1275 + A - the AIJ matrix
1276 . p - number of rows in S and T
1277 . q - number of columns in S and T
1278 . S - the S matrix (can be PETSC_NULL), stored as a PetscScalar array (column-major)
1279 - T - the T matrix (can be PETSC_NULL), stored as a PetscScalar array (column-major)
1280 
1281   Output Parameter:
1282 . kaij - the new KAIJ matrix
1283 
1284   Notes:
1285   This function increases the reference count on the AIJ matrix, so the user is free to destroy the matrix if it is not needed.
1286   Changes to the entries of the AIJ matrix will immediately affect the KAIJ matrix.
1287 
1288   Level: advanced
1289 
1290 .seealso: MatKAIJSetAIJ(), MatKAIJSetS(), MatKAIJSetT(), MatKAIJGetAIJ(), MatKAIJGetS(), MatKAIJGetT(), MATKAIJ
1291 @*/
1292 PetscErrorCode  MatCreateKAIJ(Mat A,PetscInt p,PetscInt q,const PetscScalar S[],const PetscScalar T[],Mat *kaij)
1293 {
1294   PetscErrorCode ierr;
1295   PetscMPIInt    size;
1296 
1297   PetscFunctionBegin;
1298   ierr = MatCreate(PetscObjectComm((PetscObject)A),kaij);CHKERRQ(ierr);
1299   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
1300   if (size == 1) {
1301     ierr = MatSetType(*kaij,MATSEQKAIJ);CHKERRQ(ierr);
1302   } else {
1303     ierr = MatSetType(*kaij,MATMPIKAIJ);CHKERRQ(ierr);
1304   }
1305   ierr = MatKAIJSetAIJ(*kaij,A);CHKERRQ(ierr);
1306   ierr = MatKAIJSetS(*kaij,p,q,S);CHKERRQ(ierr);
1307   ierr = MatKAIJSetT(*kaij,p,q,T);CHKERRQ(ierr);
1308   ierr = MatSetUp(*kaij);
1309   PetscFunctionReturn(0);
1310 }
1311 
1312 /*MC
1313   MATKAIJ - MATKAIJ = "kaij" - A matrix type to be used to evaluate matrices of the following form:
1314 
1315     [I \otimes S + A \otimes T]
1316 
1317   where
1318     S is a dense (p \times q) matrix
1319     T is a dense (p \times q) matrix
1320     A is an AIJ  (n \times n) matrix
1321     I is the identity matrix
1322   The resulting matrix is (np \times nq)
1323 
1324   S and T are always stored independently on all processes as PetscScalar arrays in column-major format.
1325 
1326   Level: advanced
1327 
1328 .seealso: MatKAIJSetAIJ(), MatKAIJSetS(), MatKAIJSetT(), MatKAIJGetAIJ(), MatKAIJGetS(), MatKAIJGetT(), MatCreateKAIJ()
1329 M*/
1330 
1331 PETSC_EXTERN PetscErrorCode MatCreate_KAIJ(Mat A)
1332 {
1333   PetscErrorCode ierr;
1334   Mat_MPIKAIJ    *b;
1335   PetscMPIInt    size;
1336 
1337   PetscFunctionBegin;
1338   ierr     = PetscNewLog(A,&b);CHKERRQ(ierr);
1339   A->data  = (void*)b;
1340 
1341   ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1342 
1343   A->ops->setup = MatSetUp_KAIJ;
1344 
1345   b->w    = 0;
1346   ierr    = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
1347   if (size == 1) {
1348     ierr = PetscObjectChangeTypeName((PetscObject)A,MATSEQKAIJ);CHKERRQ(ierr);
1349     A->ops->setup               = MatSetUp_KAIJ;
1350     A->ops->destroy             = MatDestroy_SeqKAIJ;
1351     A->ops->view                = MatView_KAIJ;
1352     A->ops->mult                = MatMult_SeqKAIJ;
1353     A->ops->multadd             = MatMultAdd_SeqKAIJ;
1354     A->ops->invertblockdiagonal = MatInvertBlockDiagonal_SeqKAIJ;
1355     A->ops->getrow              = MatGetRow_SeqKAIJ;
1356     A->ops->restorerow          = MatRestoreRow_SeqKAIJ;
1357     A->ops->sor                 = MatSOR_SeqKAIJ;
1358   } else {
1359     ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIKAIJ);CHKERRQ(ierr);
1360     A->ops->setup               = MatSetUp_KAIJ;
1361     A->ops->destroy             = MatDestroy_MPIKAIJ;
1362     A->ops->view                = MatView_KAIJ;
1363     A->ops->mult                = MatMult_MPIKAIJ;
1364     A->ops->multadd             = MatMultAdd_MPIKAIJ;
1365     A->ops->invertblockdiagonal = MatInvertBlockDiagonal_MPIKAIJ;
1366     A->ops->getrow              = MatGetRow_MPIKAIJ;
1367     A->ops->restorerow          = MatRestoreRow_MPIKAIJ;
1368     ierr = PetscObjectComposeFunction((PetscObject)A,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIKAIJ);CHKERRQ(ierr);
1369   }
1370   A->ops->createsubmatrix = MatCreateSubMatrix_KAIJ;
1371   PetscFunctionReturn(0);
1372 }
1373