xref: /petsc/src/mat/impls/kaij/kaij.c (revision 234d920406567b02445c67203b6cdfb781bb1ba5)
1 
2 /*
3   Defines the basic matrix operations for the KAIJ  matrix storage format.
4   This format is used to evaluate matrices of the form:
5 
6     [I \otimes S + A \otimes T]
7 
8   where
9     S is a dense (p \times q) matrix
10     T is a dense (p \times q) matrix
11     A is an AIJ  (n \times n) matrix
12     I is the identity matrix
13 
14   The resulting matrix is (np \times nq)
15 
16   We provide:
17      MatMult()
18      MatMultAdd()
19      MatInvertBlockDiagonal()
20   and
21      MatCreateKAIJ(Mat,PetscInt,PetscInt,const PetscScalar[],const PetscScalar[],Mat*)
22 
23   This single directory handles both the sequential and parallel codes
24 */
25 
26 #include <../src/mat/impls/kaij/kaij.h> /*I "petscmat.h" I*/
27 #include <../src/mat/utils/freespace.h>
28 #include <petsc/private/vecimpl.h>
29 
30 /*@C
31    MatKAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the KAIJ matrix
32 
33    Not Collective, but if the KAIJ matrix is parallel, the AIJ matrix is also parallel
34 
35    Input Parameter:
36 .  A - the KAIJ matrix
37 
38    Output Parameter:
39 .  B - the AIJ matrix
40 
41    Level: advanced
42 
43    Notes: The reference count on the AIJ matrix is not increased so you should not destroy it.
44 
45 .seealso: MatCreateKAIJ()
46 @*/
47 PetscErrorCode  MatKAIJGetAIJ(Mat A,Mat *B)
48 {
49   PetscErrorCode ierr;
50   PetscBool      ismpikaij,isseqkaij;
51 
52   PetscFunctionBegin;
53   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIKAIJ,&ismpikaij);CHKERRQ(ierr);
54   ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQKAIJ,&isseqkaij);CHKERRQ(ierr);
55   if (ismpikaij) {
56     Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data;
57 
58     *B = b->A;
59   } else if (isseqkaij) {
60     Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
61 
62     *B = b->AIJ;
63   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix passed in is not of type KAIJ");
64   PetscFunctionReturn(0);
65 }
66 
67 /*@C
68    MatKAIJGetS - Get the S matrix describing the shift action of the KAIJ matrix
69 
70    Not Collective; the entire S is stored and returned independently on all processes.
71 
72    Input Parameter:
73 .  A - the KAIJ matrix
74 
75    Output Parameters:
76 +  m - the number of rows in S
77 .  n - the number of columns in S
78 -  S - the S matrix, in form of a scalar array in column-major format
79 
80    Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)
81 
82    Level: advanced
83 
84 .seealso: MatCreateKAIJ(), MatGetBlockSizes()
85 @*/
86 PetscErrorCode MatKAIJGetS(Mat A,PetscInt *m,PetscInt *n,PetscScalar **S)
87 {
88   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
89   PetscFunctionBegin;
90   if (m) *m = b->p;
91   if (n) *n = b->q;
92   if (S) *S = b->S;
93   PetscFunctionReturn(0);
94 }
95 
96 /*@C
97    MatKAIJGetSRead - Get a read-only pointer to the S matrix describing the shift action of the KAIJ matrix
98 
99    Not Collective; the entire S is stored and returned independently on all processes.
100 
101    Input Parameter:
102 .  A - the KAIJ matrix
103 
104    Output Parameters:
105 +  m - the number of rows in S
106 .  n - the number of columns in S
107 -  S - the S matrix, in form of a scalar array in column-major format
108 
109    Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)
110 
111    Level: advanced
112 
113 .seealso: MatCreateKAIJ(), MatGetBlockSizes()
114 @*/
115 PetscErrorCode MatKAIJGetSRead(Mat A,PetscInt *m,PetscInt *n,const PetscScalar **S)
116 {
117   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
118   PetscFunctionBegin;
119   if (m) *m = b->p;
120   if (n) *n = b->q;
121   if (S) *S = b->S;
122   PetscFunctionReturn(0);
123 }
124 
125 /*@C
126   MatKAIJRestoreS - Restore array obtained with MatKAIJGetS()
127 
128   Not collective
129 
130   Input Parameter:
131 . A - the KAIJ matrix
132 
133   Output Parameter:
134 . S - location of pointer to array obtained with MatKAIJGetS()
135 
136   Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
137   If NULL is passed, it will not attempt to zero the array pointer.
138 
139   Level: advanced
140 .seealso: MatKAIJGetS(), MatKAIJGetSRead(), MatKAIJRestoreSRead()
141 @*/
142 PetscErrorCode MatKAIJRestoreS(Mat A,PetscScalar **S)
143 {
144   PetscFunctionBegin;
145   if (S) *S = NULL;
146   PetscObjectStateIncrease((PetscObject)A);
147   PetscFunctionReturn(0);
148 }
149 
150 /*@C
151   MatKAIJRestoreSRead - Restore array obtained with MatKAIJGetSRead()
152 
153   Not collective
154 
155   Input Parameter:
156 . A - the KAIJ matrix
157 
158   Output Parameter:
159 . S - location of pointer to array obtained with MatKAIJGetS()
160 
161   Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
162   If NULL is passed, it will not attempt to zero the array pointer.
163 
164   Level: advanced
165 .seealso: MatKAIJGetS(), MatKAIJGetSRead(), MatKAIJRestoreSRead()
166 @*/
167 PetscErrorCode MatKAIJRestoreSRead(Mat A,const PetscScalar **S)
168 {
169   PetscFunctionBegin;
170   if (S) *S = NULL;
171   PetscFunctionReturn(0);
172 }
173 
174 /*@C
175    MatKAIJGetT - Get the transformation matrix T associated with the KAIJ matrix
176 
177    Not Collective; the entire T is stored and returned independently on all processes
178 
179    Input Parameter:
180 .  A - the KAIJ matrix
181 
182    Output Parameter:
183 +  m - the number of rows in T
184 .  n - the number of columns in T
185 -  T - the T matrix, in form of a scalar array in column-major format
186 
187    Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)
188 
189    Level: advanced
190 
191 .seealso: MatCreateKAIJ(), MatGetBlockSizes()
192 @*/
193 PetscErrorCode MatKAIJGetT(Mat A,PetscInt *m,PetscInt *n,PetscScalar **T)
194 {
195   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
196   PetscFunctionBegin;
197   if (m) *m = b->p;
198   if (n) *n = b->q;
199   if (T) *T = b->T;
200   PetscFunctionReturn(0);
201 }
202 
203 /*@C
204    MatKAIJGetTRead - Get a read-only pointer to the transformation matrix T associated with the KAIJ matrix
205 
206    Not Collective; the entire T is stored and returned independently on all processes
207 
208    Input Parameter:
209 .  A - the KAIJ matrix
210 
211    Output Parameter:
212 +  m - the number of rows in T
213 .  n - the number of columns in T
214 -  T - the T matrix, in form of a scalar array in column-major format
215 
216    Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)
217 
218    Level: advanced
219 
220 .seealso: MatCreateKAIJ(), MatGetBlockSizes()
221 @*/
222 PetscErrorCode MatKAIJGetTRead(Mat A,PetscInt *m,PetscInt *n,const PetscScalar **T)
223 {
224   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
225   PetscFunctionBegin;
226   if (m) *m = b->p;
227   if (n) *n = b->q;
228   if (T) *T = b->T;
229   PetscFunctionReturn(0);
230 }
231 
232 /*@C
233   MatKAIJRestoreT - Restore array obtained with MatKAIJGetT()
234 
235   Not collective
236 
237   Input Parameter:
238 . A - the KAIJ matrix
239 
240   Output Parameter:
241 . T - location of pointer to array obtained with MatKAIJGetS()
242 
243   Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
244   If NULL is passed, it will not attempt to zero the array pointer.
245 
246   Level: advanced
247 .seealso: MatKAIJGetT(), MatKAIJGetTRead(), MatKAIJRestoreTRead()
248 @*/
249 PetscErrorCode MatKAIJRestoreT(Mat A,PetscScalar **T)
250 {
251   PetscFunctionBegin;
252   if (T) *T = NULL;
253   PetscObjectStateIncrease((PetscObject)A);
254   PetscFunctionReturn(0);
255 }
256 
257 /*@C
258   MatKAIJRestoreTRead - Restore array obtained with MatKAIJGetTRead()
259 
260   Not collective
261 
262   Input Parameter:
263 . A - the KAIJ matrix
264 
265   Output Parameter:
266 . T - location of pointer to array obtained with MatKAIJGetS()
267 
268   Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
269   If NULL is passed, it will not attempt to zero the array pointer.
270 
271   Level: advanced
272 .seealso: MatKAIJGetT(), MatKAIJGetTRead(), MatKAIJRestoreTRead()
273 @*/
274 PetscErrorCode MatKAIJRestoreTRead(Mat A,const PetscScalar **T)
275 {
276   PetscFunctionBegin;
277   if (T) *T = NULL;
278   PetscFunctionReturn(0);
279 }
280 
281 /*@
282    MatKAIJSetAIJ - Set the AIJ matrix describing the blockwise action of the KAIJ matrix
283 
284    Logically Collective; if the AIJ matrix is parallel, the KAIJ matrix is also parallel
285 
286    Input Parameters:
287 +  A - the KAIJ matrix
288 -  B - the AIJ matrix
289 
290    Notes:
291    This function increases the reference count on the AIJ matrix, so the user is free to destroy the matrix if it is not needed.
292    Changes to the entries of the AIJ matrix will immediately affect the KAIJ matrix.
293 
294    Level: advanced
295 
296 .seealso: MatKAIJGetAIJ(), MatKAIJSetS(), MatKAIJSetT()
297 @*/
298 PetscErrorCode MatKAIJSetAIJ(Mat A,Mat B)
299 {
300   PetscErrorCode ierr;
301   PetscMPIInt    size;
302 
303   PetscFunctionBegin;
304   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
305   if (size == 1) {
306     Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data;
307     a->AIJ = B;
308   } else {
309     Mat_MPIKAIJ *a = (Mat_MPIKAIJ*)A->data;
310     a->A = B;
311   }
312   ierr = PetscObjectReference((PetscObject)B);CHKERRQ(ierr);
313   PetscFunctionReturn(0);
314 }
315 
316 /*@C
317    MatKAIJSetS - Set the S matrix describing the shift action of the KAIJ matrix
318 
319    Logically Collective; the entire S is stored independently on all processes.
320 
321    Input Parameters:
322 +  A - the KAIJ matrix
323 .  p - the number of rows in S
324 .  q - the number of columns in S
325 -  S - the S matrix, in form of a scalar array in column-major format
326 
327    Notes: The dimensions p and q must match those of the transformation matrix T associated with the KAIJ matrix.
328    The S matrix is copied, so the user can destroy this array.
329 
330    Level: Advanced
331 
332 .seealso: MatKAIJGetS(), MatKAIJSetT(), MatKAIJSetAIJ()
333 @*/
334 PetscErrorCode MatKAIJSetS(Mat A,PetscInt p,PetscInt q,const PetscScalar S[])
335 {
336   PetscErrorCode ierr;
337   Mat_SeqKAIJ    *a = (Mat_SeqKAIJ*)A->data;
338 
339   PetscFunctionBegin;
340   ierr = PetscFree(a->S);CHKERRQ(ierr);
341   if (S) {
342     ierr = PetscMalloc1(p*q*sizeof(PetscScalar),&a->S);CHKERRQ(ierr);
343     ierr = PetscMemcpy(a->S,S,p*q*sizeof(PetscScalar));CHKERRQ(ierr);
344   } else  a->S = NULL;
345 
346   a->p = p;
347   a->q = q;
348   PetscFunctionReturn(0);
349 }
350 
351 /*@C
352    MatKAIJSetT - Set the transformation matrix T associated with the KAIJ matrix
353 
354    Logically Collective; the entire T is stored independently on all processes.
355 
356    Input Parameters:
357 +  A - the KAIJ matrix
358 .  p - the number of rows in S
359 .  q - the number of columns in S
360 -  T - the T matrix, in form of a scalar array in column-major format
361 
362    Notes: The dimensions p and q must match those of the shift matrix S associated with the KAIJ matrix.
363    The T matrix is copied, so the user can destroy this array.
364 
365    Level: Advanced
366 
367 .seealso: MatKAIJGetT(), MatKAIJSetS(), MatKAIJSetAIJ()
368 @*/
369 PetscErrorCode MatKAIJSetT(Mat A,PetscInt p,PetscInt q,const PetscScalar T[])
370 {
371   PetscErrorCode ierr;
372   PetscInt       i,j;
373   Mat_SeqKAIJ    *a = (Mat_SeqKAIJ*)A->data;
374   PetscBool      isTI = PETSC_FALSE;
375 
376   PetscFunctionBegin;
377   /* check if T is an identity matrix */
378   if (T && (p == q)) {
379     isTI = PETSC_TRUE;
380     for (i=0; i<p; i++) {
381       for (j=0; j<q; j++) {
382         if (i == j) {
383           /* diagonal term must be 1 */
384           if (T[i+j*p] != 1.0) isTI = PETSC_FALSE;
385         } else {
386           /* off-diagonal term must be 0 */
387           if (T[i+j*p] != 0.0) isTI = PETSC_FALSE;
388         }
389       }
390     }
391   }
392   a->isTI = isTI;
393 
394   ierr = PetscFree(a->T);CHKERRQ(ierr);
395   if (T && (!isTI)) {
396     ierr = PetscMalloc1(p*q*sizeof(PetscScalar),&a->T);CHKERRQ(ierr);
397     ierr = PetscMemcpy(a->T,T,p*q*sizeof(PetscScalar));CHKERRQ(ierr);
398   } else a->T = NULL;
399 
400   a->p = p;
401   a->q = q;
402   PetscFunctionReturn(0);
403 }
404 
405 PetscErrorCode MatDestroy_SeqKAIJ(Mat A)
406 {
407   PetscErrorCode ierr;
408   Mat_SeqKAIJ    *b = (Mat_SeqKAIJ*)A->data;
409 
410   PetscFunctionBegin;
411   ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr);
412   ierr = PetscFree(b->S);CHKERRQ(ierr);
413   ierr = PetscFree(b->T);CHKERRQ(ierr);
414   ierr = PetscFree(b->ibdiag);CHKERRQ(ierr);
415   ierr = PetscFree5(b->sor.w,b->sor.y,b->sor.work,b->sor.t,b->sor.arr);CHKERRQ(ierr);
416   ierr = PetscFree(A->data);CHKERRQ(ierr);
417   PetscFunctionReturn(0);
418 }
419 
420 PetscErrorCode MatSetUp_KAIJ(Mat A)
421 {
422   PetscErrorCode ierr;
423   PetscInt       n;
424   PetscMPIInt    size;
425   Mat_SeqKAIJ    *seqkaij = (Mat_SeqKAIJ*)A->data;
426 
427   PetscFunctionBegin;
428   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
429   if (size == 1) {
430     ierr = MatSetSizes(A,seqkaij->p*seqkaij->AIJ->rmap->n,seqkaij->q*seqkaij->AIJ->cmap->n,seqkaij->p*seqkaij->AIJ->rmap->N,seqkaij->q*seqkaij->AIJ->cmap->N);CHKERRQ(ierr);
431     ierr = PetscLayoutSetBlockSize(A->rmap,seqkaij->p);CHKERRQ(ierr);
432     ierr = PetscLayoutSetBlockSize(A->cmap,seqkaij->q);CHKERRQ(ierr);
433     ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
434     ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
435   } else {
436     Mat_MPIKAIJ *a;
437     Mat_MPIAIJ  *mpiaij;
438     IS          from,to;
439     Vec         gvec;
440     PetscScalar *T;
441     PetscInt    i,j;
442 
443     a = (Mat_MPIKAIJ*)A->data;
444     mpiaij = (Mat_MPIAIJ*)a->A->data;
445     ierr = MatSetSizes(A,a->p*a->A->rmap->n,a->q*a->A->cmap->n,a->p*a->A->rmap->N,a->q*a->A->cmap->N);CHKERRQ(ierr);
446     ierr = PetscLayoutSetBlockSize(A->rmap,seqkaij->p);CHKERRQ(ierr);
447     ierr = PetscLayoutSetBlockSize(A->cmap,seqkaij->q);CHKERRQ(ierr);
448     ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
449     ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
450 
451     if (a->isTI) {
452       /* If the transformation matrix associated with the parallel matrix A is the identity matrix, then a->T will be NULL.
453        * In this case, if we pass a->T directly to the MatCreateKAIJ() calls to create the sequential submatrices, the routine will
454        * not be able to tell that transformation matrix should be set to the identity; thus we create a temporary identity matrix
455        * to pass in. */
456       ierr = PetscMalloc1(a->p*a->q*sizeof(PetscScalar),&T);CHKERRQ(ierr);
457       for (i=0; i<a->p; i++) {
458         for (j=0; j<a->q; j++) {
459           if (i==j) T[i+j*a->p] = 1.0;
460           else      T[i+j*a->p] = 0.0;
461         }
462       }
463     } else T = a->T;
464     ierr = MatCreateKAIJ(mpiaij->A,a->p,a->q,a->S,T,&a->AIJ);CHKERRQ(ierr);
465     ierr = MatCreateKAIJ(mpiaij->B,a->p,a->q,NULL,T,&a->OAIJ);CHKERRQ(ierr);
466     if (a->isTI) {
467       ierr = PetscFree(T);CHKERRQ(ierr);
468     }
469 
470     ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr);
471     ierr = VecCreate(PETSC_COMM_SELF,&a->w);CHKERRQ(ierr);
472     ierr = VecSetSizes(a->w,n*a->q,n*a->q);CHKERRQ(ierr);
473     ierr = VecSetBlockSize(a->w,a->q);CHKERRQ(ierr);
474     ierr = VecSetType(a->w,VECSEQ);CHKERRQ(ierr);
475 
476     /* create two temporary Index sets for build scatter gather */
477     ierr = ISCreateBlock(PetscObjectComm((PetscObject)a->A),a->q,n,mpiaij->garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr);
478     ierr = ISCreateStride(PETSC_COMM_SELF,n*a->q,0,1,&to);CHKERRQ(ierr);
479 
480     /* create temporary global vector to generate scatter context */
481     ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)a->A),a->q,a->q*a->A->cmap->n,a->q*a->A->cmap->N,NULL,&gvec);CHKERRQ(ierr);
482 
483     /* generate the scatter context */
484     ierr = VecScatterCreate(gvec,from,a->w,to,&a->ctx);CHKERRQ(ierr);
485 
486     ierr = ISDestroy(&from);CHKERRQ(ierr);
487     ierr = ISDestroy(&to);CHKERRQ(ierr);
488     ierr = VecDestroy(&gvec);CHKERRQ(ierr);
489   }
490 
491   A->assembled = PETSC_TRUE;
492   PetscFunctionReturn(0);
493 }
494 
495 PetscErrorCode MatView_KAIJ(Mat A,PetscViewer viewer)
496 {
497   PetscViewerFormat format;
498   Mat_SeqKAIJ       *a = (Mat_SeqKAIJ*)A->data;
499   Mat               B;
500   PetscInt          i;
501   PetscErrorCode    ierr;
502   PetscBool         ismpikaij;
503 
504   PetscFunctionBegin;
505   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIKAIJ,&ismpikaij);CHKERRQ(ierr);
506   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
507   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL || format == PETSC_VIEWER_ASCII_IMPL) {
508     ierr = PetscViewerASCIIPrintf(viewer,"S and T have %D rows and %D columns\n",a->p,a->q);CHKERRQ(ierr);
509 
510     /* Print appropriate details for S. */
511     if (!a->S) {
512       ierr = PetscViewerASCIIPrintf(viewer,"S is NULL\n");
513     } else if (format == PETSC_VIEWER_ASCII_IMPL) {
514       ierr = PetscViewerASCIIPrintf(viewer,"Entries of S are ");CHKERRQ(ierr);
515       for (i=0; i<(a->p * a->q); i++) {
516 #if defined(PETSC_USE_COMPLEX)
517         ierr = PetscViewerASCIIPrintf(viewer,"%18.16e %18.16e ",(double)PetscRealPart(a->S[i]),(double)PetscImaginaryPart(a->S[i]));CHKERRQ(ierr);
518 #else
519         ierr = PetscViewerASCIIPrintf(viewer,"%18.16e ",(double)PetscRealPart(a->S[i]));CHKERRQ(ierr);
520 #endif
521       }
522       ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
523     }
524 
525     /* Print appropriate details for T. */
526     if (a->isTI) {
527       ierr = PetscViewerASCIIPrintf(viewer,"T is the identity matrix\n");
528     } else if (!a->T) {
529       ierr = PetscViewerASCIIPrintf(viewer,"T is NULL\n");
530     } else if (format == PETSC_VIEWER_ASCII_IMPL) {
531       ierr = PetscViewerASCIIPrintf(viewer,"Entries of T are ");CHKERRQ(ierr);
532       for (i=0; i<(a->p * a->q); i++) {
533 #if defined(PETSC_USE_COMPLEX)
534         ierr = PetscViewerASCIIPrintf(viewer,"%18.16e %18.16e ",(double)PetscRealPart(a->T[i]),(double)PetscImaginaryPart(a->T[i]));CHKERRQ(ierr);
535 #else
536         ierr = PetscViewerASCIIPrintf(viewer,"%18.16e ",(double)PetscRealPart(a->T[i]));CHKERRQ(ierr);
537 #endif
538       }
539       ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
540     }
541 
542     /* Now print details for the AIJ matrix, using the AIJ viewer. */
543     ierr = PetscViewerASCIIPrintf(viewer,"Now viewing the associated AIJ matrix:\n");CHKERRQ(ierr);
544     if (ismpikaij) {
545       Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data;
546       ierr = MatView(b->A,viewer);CHKERRQ(ierr);
547     } else {
548       ierr = MatView(a->AIJ,viewer);CHKERRQ(ierr);
549     }
550 
551   } else {
552     /* For all other matrix viewer output formats, simply convert to an AIJ matrix and call MatView() on that. */
553     if (ismpikaij) {
554       ierr = MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
555     } else {
556       ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
557     }
558     ierr = MatView(B,viewer);CHKERRQ(ierr);
559     ierr = MatDestroy(&B);CHKERRQ(ierr);
560   }
561   PetscFunctionReturn(0);
562 }
563 
564 PetscErrorCode MatDestroy_MPIKAIJ(Mat A)
565 {
566   PetscErrorCode ierr;
567   Mat_MPIKAIJ    *b = (Mat_MPIKAIJ*)A->data;
568 
569   PetscFunctionBegin;
570   ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr);
571   ierr = MatDestroy(&b->OAIJ);CHKERRQ(ierr);
572   ierr = MatDestroy(&b->A);CHKERRQ(ierr);
573   ierr = VecScatterDestroy(&b->ctx);CHKERRQ(ierr);
574   ierr = VecDestroy(&b->w);CHKERRQ(ierr);
575   ierr = PetscFree(b->S);CHKERRQ(ierr);
576   ierr = PetscFree(b->T);CHKERRQ(ierr);
577   ierr = PetscFree(b->ibdiag);CHKERRQ(ierr);
578   ierr = PetscFree(A->data);CHKERRQ(ierr);
579   PetscFunctionReturn(0);
580 }
581 
582 /* --------------------------------------------------------------------------------------*/
583 
584 /* zz = yy + Axx */
585 PetscErrorCode MatMultAdd_SeqKAIJ(Mat A,Vec xx,Vec yy,Vec zz)
586 {
587   Mat_SeqKAIJ       *b = (Mat_SeqKAIJ*)A->data;
588   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
589   const PetscScalar *s = b->S, *t = b->T;
590   const PetscScalar *x,*v,*bx;
591   PetscScalar       *y,*sums;
592   PetscErrorCode    ierr;
593   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
594   PetscInt          n,i,jrow,j,l,p=b->p,q=b->q,k;
595 
596   PetscFunctionBegin;
597   if (!yy) {
598     ierr = VecSet(zz,0.0);CHKERRQ(ierr);
599   } else {
600     ierr = VecCopy(yy,zz);CHKERRQ(ierr);
601   }
602   if ((!s) && (!t) && (!b->isTI)) PetscFunctionReturn(0);
603 
604   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
605   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
606   idx  = a->j;
607   v    = a->a;
608   ii   = a->i;
609 
610   if (b->isTI) {
611     for (i=0; i<m; i++) {
612       jrow = ii[i];
613       n    = ii[i+1] - jrow;
614       sums = y + p*i;
615       for (j=0; j<n; j++) {
616         for (k=0; k<p; k++) {
617           sums[k] += v[jrow+j]*x[q*idx[jrow+j]+k];
618         }
619       }
620     }
621     ierr = PetscLogFlops((a->nz)*3*p);CHKERRQ(ierr);
622   } else if (t) {
623     for (i=0; i<m; i++) {
624       jrow = ii[i];
625       n    = ii[i+1] - jrow;
626       sums = y + p*i;
627       bx   = x + q*i;
628       for (j=0; j<n; j++) {
629         for (k=0; k<p; k++) {
630           for (l=0; l<q; l++) {
631             sums[k] += v[jrow+j]*t[k+l*p]*x[q*idx[jrow+j]+l];
632           }
633         }
634       }
635     }
636     /* The flop count below assumes that v[jrow+j] is hoisted out (which an optimizing compiler is likely to do),
637      * and also that T part is hoisted outside this loop (in exchange for temporary storage) as (A \otimes I) (I \otimes T),
638      * so that this multiply doesn't have to be redone for each matrix entry, but just once per column. The latter
639      * transformation is much less likely to be applied, but we nonetheless count the minimum flops required. */
640     ierr = PetscLogFlops((2.0*p*q-p)*m+2*p*a->nz);CHKERRQ(ierr);
641   }
642   if (s) {
643     for (i=0; i<m; i++) {
644       sums = y + p*i;
645       bx   = x + q*i;
646       if (i < b->AIJ->cmap->n) {
647         for (j=0; j<q; j++) {
648           for (k=0; k<p; k++) {
649             sums[k] += s[k+j*p]*bx[j];
650           }
651         }
652       }
653     }
654     ierr = PetscLogFlops(m*2*p*q);CHKERRQ(ierr);
655   }
656 
657   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
658   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
659   PetscFunctionReturn(0);
660 }
661 
662 PetscErrorCode MatMult_SeqKAIJ(Mat A,Vec xx,Vec yy)
663 {
664   PetscErrorCode ierr;
665   PetscFunctionBegin;
666   ierr = MatMultAdd_SeqKAIJ(A,xx,PETSC_NULL,yy);CHKERRQ(ierr);
667   PetscFunctionReturn(0);
668 }
669 
670 #include <petsc/private/kernels/blockinvert.h>
671 
672 PetscErrorCode MatInvertBlockDiagonal_SeqKAIJ(Mat A,const PetscScalar **values)
673 {
674   Mat_SeqKAIJ       *b  = (Mat_SeqKAIJ*)A->data;
675   Mat_SeqAIJ        *a  = (Mat_SeqAIJ*)b->AIJ->data;
676   const PetscScalar *S  = b->S;
677   const PetscScalar *T  = b->T;
678   const PetscScalar *v  = a->a;
679   const PetscInt     p  = b->p, q = b->q, m = b->AIJ->rmap->n, *idx = a->j, *ii = a->i;
680   PetscErrorCode    ierr;
681   PetscInt          i,j,*v_pivots,dof,dof2;
682   PetscScalar       *diag,aval,*v_work;
683 
684   PetscFunctionBegin;
685   if (p != q) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Block size must be square to calculate inverse.");
686   if ((!S) && (!T) && (!b->isTI)) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Cannot invert a zero matrix.");
687 
688   dof  = p;
689   dof2 = dof*dof;
690 
691   if (b->ibdiagvalid) {
692     if (values) *values = b->ibdiag;
693     PetscFunctionReturn(0);
694   }
695   if (!b->ibdiag) {
696     ierr = PetscMalloc1(dof2*m*sizeof(PetscScalar),&b->ibdiag);CHKERRQ(ierr);
697     ierr = PetscLogObjectMemory((PetscObject)A,dof2*m*sizeof(PetscScalar));CHKERRQ(ierr);
698   }
699   if (values) *values = b->ibdiag;
700   diag = b->ibdiag;
701 
702   ierr = PetscMalloc2(dof,&v_work,dof,&v_pivots);CHKERRQ(ierr);
703   for (i=0; i<m; i++) {
704     if (S) {
705       ierr = PetscMemcpy(diag,S,dof2*sizeof(PetscScalar));CHKERRQ(ierr);
706     } else {
707       ierr = PetscMemzero(diag,dof2*sizeof(PetscScalar));CHKERRQ(ierr);
708     }
709     if (b->isTI) {
710       aval = 0;
711       for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j];
712       for (j=0; j<dof; j++) diag[j+dof*j] += aval;
713     } else if (T) {
714       aval = 0;
715       for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j];
716       for (j=0; j<dof2; j++) diag[j] += aval*T[j];
717     }
718     ierr = PetscKernel_A_gets_inverse_A(dof,diag,v_pivots,v_work,PETSC_FALSE,NULL);CHKERRQ(ierr);
719     diag += dof2;
720   }
721   ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr);
722 
723   b->ibdiagvalid = PETSC_TRUE;
724   PetscFunctionReturn(0);
725 }
726 
727 static PetscErrorCode MatGetDiagonalBlock_MPIKAIJ(Mat A,Mat *B)
728 {
729   Mat_MPIKAIJ *kaij = (Mat_MPIKAIJ*) A->data;
730 
731   PetscFunctionBegin;
732   *B = kaij->AIJ;
733   PetscFunctionReturn(0);
734 }
735 
736 PetscErrorCode MatSOR_SeqKAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
737 {
738   PetscErrorCode    ierr;
739   Mat_SeqKAIJ       *kaij = (Mat_SeqKAIJ*) A->data;
740   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)kaij->AIJ->data;
741   const PetscScalar *aa = a->a, *T = kaij->T, *v;
742   const PetscInt    m  = kaij->AIJ->rmap->n, *ai=a->i, *aj=a->j, p = kaij->p, q = kaij->q, *diag, *vi;
743   const PetscScalar *b, *xb, *idiag;
744   PetscScalar       *x, *work, *workt, *w, *y, *arr, *t, *arrt;
745   PetscInt          i, j, k, i2, bs, bs2, nz;
746 
747   PetscFunctionBegin;
748   its = its*lits;
749   if (flag & SOR_EISENSTAT) SETERRQ (PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
750   if (its <= 0)             SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
751   if (fshift)               SETERRQ (PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for diagonal shift");
752   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for applying upper or lower triangular parts");
753   if (p != q) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSOR for KAIJ: No support for non-square dense blocks");
754   else        {bs = p; bs2 = bs*bs; }
755 
756   if (!m) PetscFunctionReturn(0);
757 
758   if (!kaij->ibdiagvalid) { ierr = MatInvertBlockDiagonal_SeqKAIJ(A,NULL);CHKERRQ(ierr); }
759   idiag = kaij->ibdiag;
760   diag  = a->diag;
761 
762   if (!kaij->sor.setup) {
763     ierr = PetscMalloc5(bs,&kaij->sor.w,bs,&kaij->sor.y,m*bs,&kaij->sor.work,m*bs,&kaij->sor.t,m*bs2,&kaij->sor.arr);CHKERRQ(ierr);
764     kaij->sor.setup = PETSC_TRUE;
765   }
766   y     = kaij->sor.y;
767   w     = kaij->sor.w;
768   work  = kaij->sor.work;
769   t     = kaij->sor.t;
770   arr   = kaij->sor.arr;
771 
772   ierr = VecGetArray(xx,&x);    CHKERRQ(ierr);
773   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
774 
775   if (flag & SOR_ZERO_INITIAL_GUESS) {
776     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
777       PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);                            /* x[0:bs] <- D^{-1} b[0:bs] */
778       ierr   =  PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr);
779       i2     =  bs;
780       idiag  += bs2;
781       for (i=1; i<m; i++) {
782         v  = aa + ai[i];
783         vi = aj + ai[i];
784         nz = diag[i] - ai[i];
785 
786         if (T) {                /* b - T (Arow * x) */
787           ierr = PetscMemzero(w,bs*sizeof(PetscScalar));
788           for (j=0; j<nz; j++) {
789             for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k];
790           }
791           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs,w,T,&t[i2]);
792           for (k=0; k<bs; k++) t[i2+k] += b[i2+k];
793         } else if (kaij->isTI) {
794           ierr = PetscMemcpy(t+i2,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
795           for (j=0; j<nz; j++) {
796             for (k=0; k<bs; k++) t[i2+k] -= v[j] * x[vi[j]*bs+k];
797           }
798         } else {
799           ierr = PetscMemcpy(t+i2,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
800         }
801 
802         PetscKernel_w_gets_Ar_times_v(bs,bs,t+i2,idiag,y);
803         for (j=0; j<bs; j++) x[i2+j] = omega * y[j];
804 
805         idiag += bs2;
806         i2    += bs;
807       }
808       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
809       ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr);
810       xb = t;
811     } else xb = b;
812     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
813       idiag = kaij->ibdiag+bs2*(m-1);
814       i2    = bs * (m-1);
815       ierr  = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
816       PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
817       i2    -= bs;
818       idiag -= bs2;
819       for (i=m-2; i>=0; i--) {
820         v  = aa + diag[i] + 1 ;
821         vi = aj + diag[i] + 1;
822         nz = ai[i+1] - diag[i] - 1;
823 
824         if (T) {                /* FIXME: This branch untested */
825           ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
826           /* copy all rows of x that are needed into contiguous space */
827           workt = work;
828           for (j=0; j<nz; j++) {
829             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
830             workt += bs;
831           }
832           arrt = arr;
833           for (j=0; j<nz; j++) {
834             ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
835             for (k=0; k<bs2; k++) arrt[k] *= v[j];
836             arrt += bs2;
837           }
838           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
839         } else if (kaij->isTI) {
840           ierr = PetscMemcpy(w,t+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
841           for (j=0; j<nz; j++) {
842             for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k];
843           }
844         }
845 
846         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); /* RHS incorrect for omega != 1.0 */
847         for (j=0; j<bs; j++) x[i2+j] = (1.0-omega) * x[i2+j] + omega * y[j];
848 
849         idiag -= bs2;
850         i2    -= bs;
851       }
852       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
853     }
854     its--;
855   }
856   while (its--) {               /* FIXME: This branch not updated */
857     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
858       i2     =  0;
859       idiag  = kaij->ibdiag;
860       for (i=0; i<m; i++) {
861         ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
862 
863         v  = aa + ai[i];
864         vi = aj + ai[i];
865         nz = diag[i] - ai[i];
866         workt = work;
867         for (j=0; j<nz; j++) {
868           ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
869           workt += bs;
870         }
871         arrt = arr;
872         if (T) {
873           for (j=0; j<nz; j++) {
874             ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
875             for (k=0; k<bs2; k++) arrt[k] *= v[j];
876             arrt += bs2;
877           }
878           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
879         } else if (kaij->isTI) {
880           for (j=0; j<nz; j++) {
881             ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
882             for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
883             arrt += bs2;
884           }
885           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
886         }
887         ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr);
888 
889         v  = aa + diag[i] + 1;
890         vi = aj + diag[i] + 1;
891         nz = ai[i+1] - diag[i] - 1;
892         workt = work;
893         for (j=0; j<nz; j++) {
894           ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
895           workt += bs;
896         }
897         arrt = arr;
898         if (T) {
899           for (j=0; j<nz; j++) {
900             ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
901             for (k=0; k<bs2; k++) arrt[k] *= v[j];
902             arrt += bs2;
903           }
904           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
905         } else if (kaij->isTI) {
906           for (j=0; j<nz; j++) {
907             ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
908             for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
909             arrt += bs2;
910           }
911           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
912         }
913 
914         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y);
915         for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j);
916 
917         idiag += bs2;
918         i2    += bs;
919       }
920       xb = t;
921     }
922     else xb = b;
923     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
924       idiag = kaij->ibdiag+bs2*(m-1);
925       i2    = bs * (m-1);
926       if (xb == b) {
927         for (i=m-1; i>=0; i--) {
928           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
929 
930           v  = aa + ai[i];
931           vi = aj + ai[i];
932           nz = diag[i] - ai[i];
933           workt = work;
934           for (j=0; j<nz; j++) {
935             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
936             workt += bs;
937           }
938           arrt = arr;
939           if (T) {
940             for (j=0; j<nz; j++) {
941               ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
942               for (k=0; k<bs2; k++) arrt[k] *= v[j];
943               arrt += bs2;
944             }
945             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
946           } else if (kaij->isTI) {
947             for (j=0; j<nz; j++) {
948               ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
949               for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
950               arrt += bs2;
951             }
952             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
953           }
954 
955           v  = aa + diag[i] + 1;
956           vi = aj + diag[i] + 1;
957           nz = ai[i+1] - diag[i] - 1;
958           workt = work;
959           for (j=0; j<nz; j++) {
960             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
961             workt += bs;
962           }
963           arrt = arr;
964           if (T) {
965             for (j=0; j<nz; j++) {
966               ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
967               for (k=0; k<bs2; k++) arrt[k] *= v[j];
968               arrt += bs2;
969             }
970             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
971           } else if (kaij->isTI) {
972             for (j=0; j<nz; j++) {
973               ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
974               for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
975               arrt += bs2;
976             }
977             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
978           }
979 
980           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y);
981           for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j);
982         }
983       } else {
984         for (i=m-1; i>=0; i--) {
985           ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
986           v  = aa + diag[i] + 1;
987           vi = aj + diag[i] + 1;
988           nz = ai[i+1] - diag[i] - 1;
989           workt = work;
990           for (j=0; j<nz; j++) {
991             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
992             workt += bs;
993           }
994           arrt = arr;
995           if (T) {
996             for (j=0; j<nz; j++) {
997               ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
998               for (k=0; k<bs2; k++) arrt[k] *= v[j];
999               arrt += bs2;
1000             }
1001             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
1002           } else if (kaij->isTI) {
1003             for (j=0; j<nz; j++) {
1004               ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
1005               for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
1006               arrt += bs2;
1007             }
1008             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
1009           }
1010           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y);
1011           for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j);
1012         }
1013         idiag -= bs2;
1014         i2    -= bs;
1015       }
1016       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
1017     }
1018   }
1019 
1020   ierr = VecRestoreArray(xx,&x);    CHKERRQ(ierr);
1021   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1022   PetscFunctionReturn(0);
1023 }
1024 
1025 /*===================================================================================*/
1026 
1027 PetscErrorCode MatMultAdd_MPIKAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1028 {
1029   Mat_MPIKAIJ    *b = (Mat_MPIKAIJ*)A->data;
1030   PetscErrorCode ierr;
1031 
1032   PetscFunctionBegin;
1033   if (!yy) {
1034     ierr = VecSet(zz,0.0);CHKERRQ(ierr);
1035   } else {
1036     ierr = VecCopy(yy,zz);CHKERRQ(ierr);
1037   }
1038   /* start the scatter */
1039   ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1040   ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,zz,zz);CHKERRQ(ierr);
1041   ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1042   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr);
1043   PetscFunctionReturn(0);
1044 }
1045 
1046 PetscErrorCode MatMult_MPIKAIJ(Mat A,Vec xx,Vec yy)
1047 {
1048   PetscErrorCode ierr;
1049   PetscFunctionBegin;
1050   ierr = MatMultAdd_MPIKAIJ(A,xx,PETSC_NULL,yy);CHKERRQ(ierr);
1051   PetscFunctionReturn(0);
1052 }
1053 
1054 PetscErrorCode MatInvertBlockDiagonal_MPIKAIJ(Mat A,const PetscScalar **values)
1055 {
1056   Mat_MPIKAIJ     *b = (Mat_MPIKAIJ*)A->data;
1057   PetscErrorCode  ierr;
1058 
1059   PetscFunctionBegin;
1060   ierr = (*b->AIJ->ops->invertblockdiagonal)(b->AIJ,values);CHKERRQ(ierr);
1061   PetscFunctionReturn(0);
1062 }
1063 
1064 /* ----------------------------------------------------------------*/
1065 
1066 PetscErrorCode MatGetRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values)
1067 {
1068   Mat_SeqKAIJ     *b   = (Mat_SeqKAIJ*) A->data;
1069   PetscErrorCode  diag = PETSC_FALSE;
1070   PetscErrorCode  ierr;
1071   PetscInt        nzaij,nz,*colsaij,*idx,i,j,p=b->p,q=b->q,r=row/p,s=row%p,c;
1072   PetscScalar     *vaij,*v,*S=b->S,*T=b->T;
1073 
1074   PetscFunctionBegin;
1075   if (b->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1076   b->getrowactive = PETSC_TRUE;
1077   if (row < 0 || row >= A->rmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range",row);
1078 
1079   if ((!S) && (!T) && (!b->isTI)) {
1080     if (ncols)    *ncols  = 0;
1081     if (cols)     *cols   = NULL;
1082     if (values)   *values = NULL;
1083     PetscFunctionReturn(0);
1084   }
1085 
1086   if (T || b->isTI) {
1087     ierr  = MatGetRow_SeqAIJ(b->AIJ,r,&nzaij,&colsaij,&vaij);CHKERRQ(ierr);
1088     c     = nzaij;
1089     for (i=0; i<nzaij; i++) {
1090       /* check if this row contains a diagonal entry */
1091       if (colsaij[i] == r) {
1092         diag = PETSC_TRUE;
1093         c = i;
1094       }
1095     }
1096   } else nzaij = c = 0;
1097 
1098   /* calculate size of row */
1099   nz = 0;
1100   if (S)            nz += q;
1101   if (T || b->isTI) nz += (diag && S ? (nzaij-1)*q : nzaij*q);
1102 
1103   if (cols || values) {
1104     ierr = PetscMalloc2(nz,&idx,nz,&v);CHKERRQ(ierr);
1105     for (i=0; i<q; i++) {
1106       /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */
1107       v[i] = 0.0;
1108     }
1109     if (b->isTI) {
1110       for (i=0; i<nzaij; i++) {
1111         for (j=0; j<q; j++) {
1112           idx[i*q+j] = colsaij[i]*q+j;
1113           v[i*q+j]   = (j==s ? vaij[i] : 0);
1114         }
1115       }
1116     } else if (T) {
1117       for (i=0; i<nzaij; i++) {
1118         for (j=0; j<q; j++) {
1119           idx[i*q+j] = colsaij[i]*q+j;
1120           v[i*q+j]   = vaij[i]*T[s+j*p];
1121         }
1122       }
1123     }
1124     if (S) {
1125       for (j=0; j<q; j++) {
1126         idx[c*q+j] = r*q+j;
1127         v[c*q+j]  += S[s+j*p];
1128       }
1129     }
1130   }
1131 
1132   if (ncols)    *ncols  = nz;
1133   if (cols)     *cols   = idx;
1134   if (values)   *values = v;
1135   PetscFunctionReturn(0);
1136 }
1137 
1138 PetscErrorCode MatRestoreRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1139 {
1140   PetscErrorCode ierr;
1141   PetscFunctionBegin;
1142   ierr = PetscFree2(*idx,*v);CHKERRQ(ierr);
1143   ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE;
1144   PetscFunctionReturn(0);
1145 }
1146 
1147 PetscErrorCode MatGetRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values)
1148 {
1149   Mat_MPIKAIJ     *b      = (Mat_MPIKAIJ*) A->data;
1150   Mat             MatAIJ  = ((Mat_SeqKAIJ*)b->AIJ->data)->AIJ;
1151   Mat             MatOAIJ = ((Mat_SeqKAIJ*)b->OAIJ->data)->AIJ;
1152   Mat             AIJ     = b->A;
1153   PetscBool       diag    = PETSC_FALSE;
1154   PetscErrorCode  ierr;
1155   const PetscInt  rstart=A->rmap->rstart,rend=A->rmap->rend,p=b->p,q=b->q,*garray;
1156   PetscInt        nz,*idx,ncolsaij,ncolsoaij,*colsaij,*colsoaij,r,s,c,i,j,lrow;
1157   PetscScalar     *v,*vals,*ovals,*S=b->S,*T=b->T;
1158 
1159   PetscFunctionBegin;
1160   if (b->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1161   b->getrowactive = PETSC_TRUE;
1162   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1163   lrow = row - rstart;
1164 
1165   if ((!S) && (!T) && (!b->isTI)) {
1166     if (ncols)    *ncols  = 0;
1167     if (cols)     *cols   = NULL;
1168     if (values)   *values = NULL;
1169     PetscFunctionReturn(0);
1170   }
1171 
1172   r = lrow/p;
1173   s = lrow%p;
1174 
1175   if (T || b->isTI) {
1176     ierr = MatMPIAIJGetSeqAIJ(AIJ,NULL,NULL,&garray);
1177     ierr = MatGetRow_SeqAIJ(MatAIJ,lrow/p,&ncolsaij,&colsaij,&vals);CHKERRQ(ierr);
1178     ierr = MatGetRow_SeqAIJ(MatOAIJ,lrow/p,&ncolsoaij,&colsoaij,&ovals);CHKERRQ(ierr);
1179 
1180     c     = ncolsaij + ncolsoaij;
1181     for (i=0; i<ncolsaij; i++) {
1182       /* check if this row contains a diagonal entry */
1183       if (colsaij[i] == r) {
1184         diag = PETSC_TRUE;
1185         c = i;
1186       }
1187     }
1188   } else c = 0;
1189 
1190   /* calculate size of row */
1191   nz = 0;
1192   if (S)            nz += q;
1193   if (T || b->isTI) nz += (diag && S ? (ncolsaij+ncolsoaij-1)*q : (ncolsaij+ncolsoaij)*q);
1194 
1195   if (cols || values) {
1196     ierr = PetscMalloc2(nz,&idx,nz,&v);CHKERRQ(ierr);
1197     for (i=0; i<q; i++) {
1198       /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */
1199       v[i] = 0.0;
1200     }
1201     if (b->isTI) {
1202       for (i=0; i<ncolsaij; i++) {
1203         for (j=0; j<q; j++) {
1204           idx[i*q+j] = (colsaij[i]+rstart/p)*q+j;
1205           v[i*q+j]   = (j==s ? vals[i] : 0.0);
1206         }
1207       }
1208       for (i=0; i<ncolsoaij; i++) {
1209         for (j=0; j<q; j++) {
1210           idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j;
1211           v[(i+ncolsaij)*q+j]   = (j==s ? ovals[i]: 0.0);
1212         }
1213       }
1214     } else if (T) {
1215       for (i=0; i<ncolsaij; i++) {
1216         for (j=0; j<q; j++) {
1217           idx[i*q+j] = (colsaij[i]+rstart/p)*q+j;
1218           v[i*q+j]   = vals[i]*T[s+j*p];
1219         }
1220       }
1221       for (i=0; i<ncolsoaij; i++) {
1222         for (j=0; j<q; j++) {
1223           idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j;
1224           v[(i+ncolsaij)*q+j]   = ovals[i]*T[s+j*p];
1225         }
1226       }
1227     }
1228     if (S) {
1229       for (j=0; j<q; j++) {
1230         idx[c*q+j] = (r+rstart/p)*q+j;
1231         v[c*q+j]  += S[s+j*p];
1232       }
1233     }
1234   }
1235 
1236   if (ncols)  *ncols  = nz;
1237   if (cols)   *cols   = idx;
1238   if (values) *values = v;
1239   PetscFunctionReturn(0);
1240 }
1241 
1242 PetscErrorCode MatRestoreRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1243 {
1244   PetscErrorCode ierr;
1245   PetscFunctionBegin;
1246   ierr = PetscFree2(*idx,*v);CHKERRQ(ierr);
1247   ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE;
1248   PetscFunctionReturn(0);
1249 }
1250 
1251 PetscErrorCode  MatCreateSubMatrix_KAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat)
1252 {
1253   PetscErrorCode ierr;
1254   Mat            A;
1255 
1256   PetscFunctionBegin;
1257   ierr = MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr);
1258   ierr = MatCreateSubMatrix(A,isrow,iscol,cll,newmat);CHKERRQ(ierr);
1259   ierr = MatDestroy(&A);CHKERRQ(ierr);
1260   PetscFunctionReturn(0);
1261 }
1262 
1263 /* ---------------------------------------------------------------------------------- */
1264 /*@C
1265   MatCreateKAIJ - Creates a matrix type to be used for matrices of the following form:
1266 
1267     [I \otimes S + A \otimes T]
1268 
1269   where
1270     S is a dense (p \times q) matrix
1271     T is a dense (p \times q) matrix
1272     A is an AIJ  (n \times n) matrix
1273     I is the identity matrix
1274   The resulting matrix is (np \times nq)
1275 
1276   S and T are always stored independently on all processes as PetscScalar arrays in column-major format.
1277 
1278   Collective
1279 
1280   Input Parameters:
1281 + A - the AIJ matrix
1282 . p - number of rows in S and T
1283 . q - number of columns in S and T
1284 . S - the S matrix (can be PETSC_NULL), stored as a PetscScalar array (column-major)
1285 - T - the T matrix (can be PETSC_NULL), stored as a PetscScalar array (column-major)
1286 
1287   Output Parameter:
1288 . kaij - the new KAIJ matrix
1289 
1290   Notes:
1291   This function increases the reference count on the AIJ matrix, so the user is free to destroy the matrix if it is not needed.
1292   Changes to the entries of the AIJ matrix will immediately affect the KAIJ matrix.
1293 
1294   Level: advanced
1295 
1296 .seealso: MatKAIJSetAIJ(), MatKAIJSetS(), MatKAIJSetT(), MatKAIJGetAIJ(), MatKAIJGetS(), MatKAIJGetT(), MATKAIJ
1297 @*/
1298 PetscErrorCode  MatCreateKAIJ(Mat A,PetscInt p,PetscInt q,const PetscScalar S[],const PetscScalar T[],Mat *kaij)
1299 {
1300   PetscErrorCode ierr;
1301   PetscMPIInt    size;
1302 
1303   PetscFunctionBegin;
1304   ierr = MatCreate(PetscObjectComm((PetscObject)A),kaij);CHKERRQ(ierr);
1305   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
1306   if (size == 1) {
1307     ierr = MatSetType(*kaij,MATSEQKAIJ);CHKERRQ(ierr);
1308   } else {
1309     ierr = MatSetType(*kaij,MATMPIKAIJ);CHKERRQ(ierr);
1310   }
1311   ierr = MatKAIJSetAIJ(*kaij,A);CHKERRQ(ierr);
1312   ierr = MatKAIJSetS(*kaij,p,q,S);CHKERRQ(ierr);
1313   ierr = MatKAIJSetT(*kaij,p,q,T);CHKERRQ(ierr);
1314   ierr = MatSetUp(*kaij);
1315   PetscFunctionReturn(0);
1316 }
1317 
1318 /*MC
1319   MATKAIJ - MATKAIJ = "kaij" - A matrix type to be used to evaluate matrices of the following form:
1320 
1321     [I \otimes S + A \otimes T]
1322 
1323   where
1324     S is a dense (p \times q) matrix
1325     T is a dense (p \times q) matrix
1326     A is an AIJ  (n \times n) matrix
1327     I is the identity matrix
1328   The resulting matrix is (np \times nq)
1329 
1330   S and T are always stored independently on all processes as PetscScalar arrays in column-major format.
1331 
1332   Level: advanced
1333 
1334 .seealso: MatKAIJSetAIJ(), MatKAIJSetS(), MatKAIJSetT(), MatKAIJGetAIJ(), MatKAIJGetS(), MatKAIJGetT(), MatCreateKAIJ()
1335 M*/
1336 
1337 PETSC_EXTERN PetscErrorCode MatCreate_KAIJ(Mat A)
1338 {
1339   PetscErrorCode ierr;
1340   Mat_MPIKAIJ    *b;
1341   PetscMPIInt    size;
1342 
1343   PetscFunctionBegin;
1344   ierr     = PetscNewLog(A,&b);CHKERRQ(ierr);
1345   A->data  = (void*)b;
1346 
1347   ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1348 
1349   A->ops->setup = MatSetUp_KAIJ;
1350 
1351   b->w    = 0;
1352   ierr    = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
1353   if (size == 1) {
1354     ierr = PetscObjectChangeTypeName((PetscObject)A,MATSEQKAIJ);CHKERRQ(ierr);
1355     A->ops->setup               = MatSetUp_KAIJ;
1356     A->ops->destroy             = MatDestroy_SeqKAIJ;
1357     A->ops->view                = MatView_KAIJ;
1358     A->ops->mult                = MatMult_SeqKAIJ;
1359     A->ops->multadd             = MatMultAdd_SeqKAIJ;
1360     A->ops->invertblockdiagonal = MatInvertBlockDiagonal_SeqKAIJ;
1361     A->ops->getrow              = MatGetRow_SeqKAIJ;
1362     A->ops->restorerow          = MatRestoreRow_SeqKAIJ;
1363     A->ops->sor                 = MatSOR_SeqKAIJ;
1364   } else {
1365     ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIKAIJ);CHKERRQ(ierr);
1366     A->ops->setup               = MatSetUp_KAIJ;
1367     A->ops->destroy             = MatDestroy_MPIKAIJ;
1368     A->ops->view                = MatView_KAIJ;
1369     A->ops->mult                = MatMult_MPIKAIJ;
1370     A->ops->multadd             = MatMultAdd_MPIKAIJ;
1371     A->ops->invertblockdiagonal = MatInvertBlockDiagonal_MPIKAIJ;
1372     A->ops->getrow              = MatGetRow_MPIKAIJ;
1373     A->ops->restorerow          = MatRestoreRow_MPIKAIJ;
1374     ierr = PetscObjectComposeFunction((PetscObject)A,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIKAIJ);CHKERRQ(ierr);
1375   }
1376   A->ops->createsubmatrix = MatCreateSubMatrix_KAIJ;
1377   PetscFunctionReturn(0);
1378 }
1379