xref: /petsc/src/mat/impls/maij/maij.c (revision bc2edf6e1f1306b3c8ad4520aae64d4f74a7ec64)
1 
2 /*
3     Defines the basic matrix operations for the MAIJ  matrix storage format.
4   This format is used for restriction and interpolation operations for
5   multicomponent problems. It interpolates each component the same way
6   independently.
7 
8      We provide:
9          MatMult()
10          MatMultTranspose()
11          MatMultTransposeAdd()
12          MatMultAdd()
13           and
14          MatCreateMAIJ(Mat,dof,Mat*)
15 
16      This single directory handles both the sequential and parallel codes
17 */
18 
19 #include <../src/mat/impls/maij/maij.h> /*I "petscmat.h" I*/
20 #include <../src/mat/utils/freespace.h>
21 
22 /*@
23    MatMAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the MAIJ matrix
24 
25    Not Collective, but if the MAIJ matrix is parallel, the AIJ matrix is also parallel
26 
27    Input Parameter:
28 .  A - the MAIJ matrix
29 
30    Output Parameter:
31 .  B - the AIJ matrix
32 
33    Level: advanced
34 
35    Notes:
36     The reference count on the AIJ matrix is not increased so you should not destroy it.
37 
38 .seealso: MatCreateMAIJ()
39 @*/
40 PetscErrorCode  MatMAIJGetAIJ(Mat A,Mat *B)
41 {
42   PetscErrorCode ierr;
43   PetscBool      ismpimaij,isseqmaij;
44 
45   PetscFunctionBegin;
46   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);CHKERRQ(ierr);
47   ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);CHKERRQ(ierr);
48   if (ismpimaij) {
49     Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
50 
51     *B = b->A;
52   } else if (isseqmaij) {
53     Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
54 
55     *B = b->AIJ;
56   } else {
57     *B = A;
58   }
59   PetscFunctionReturn(0);
60 }
61 
62 /*@
63    MatMAIJRedimension - Get an MAIJ matrix with the same action, but for a different block size
64 
65    Logically Collective
66 
67    Input Parameter:
68 +  A - the MAIJ matrix
69 -  dof - the block size for the new matrix
70 
71    Output Parameter:
72 .  B - the new MAIJ matrix
73 
74    Level: advanced
75 
76 .seealso: MatCreateMAIJ()
77 @*/
78 PetscErrorCode  MatMAIJRedimension(Mat A,PetscInt dof,Mat *B)
79 {
80   PetscErrorCode ierr;
81   Mat            Aij = NULL;
82 
83   PetscFunctionBegin;
84   PetscValidLogicalCollectiveInt(A,dof,2);
85   ierr = MatMAIJGetAIJ(A,&Aij);CHKERRQ(ierr);
86   ierr = MatCreateMAIJ(Aij,dof,B);CHKERRQ(ierr);
87   PetscFunctionReturn(0);
88 }
89 
90 PetscErrorCode MatDestroy_SeqMAIJ(Mat A)
91 {
92   PetscErrorCode ierr;
93   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
94 
95   PetscFunctionBegin;
96   ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr);
97   ierr = PetscFree(A->data);CHKERRQ(ierr);
98   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqmaij_seqaij_C",NULL);CHKERRQ(ierr);
99   ierr = PetscObjectComposeFunction((PetscObject)A,"MatProductSetFromOptions_seqaij_seqmaij_C",NULL);CHKERRQ(ierr);
100   PetscFunctionReturn(0);
101 }
102 
103 PetscErrorCode MatSetUp_MAIJ(Mat A)
104 {
105   PetscFunctionBegin;
106   SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Must use MatCreateMAIJ() to create MAIJ matrices");
107   PetscFunctionReturn(0);
108 }
109 
110 PetscErrorCode MatView_SeqMAIJ(Mat A,PetscViewer viewer)
111 {
112   PetscErrorCode ierr;
113   Mat            B;
114 
115   PetscFunctionBegin;
116   ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
117   ierr = MatView(B,viewer);CHKERRQ(ierr);
118   ierr = MatDestroy(&B);CHKERRQ(ierr);
119   PetscFunctionReturn(0);
120 }
121 
122 PetscErrorCode MatView_MPIMAIJ(Mat A,PetscViewer viewer)
123 {
124   PetscErrorCode ierr;
125   Mat            B;
126 
127   PetscFunctionBegin;
128   ierr = MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
129   ierr = MatView(B,viewer);CHKERRQ(ierr);
130   ierr = MatDestroy(&B);CHKERRQ(ierr);
131   PetscFunctionReturn(0);
132 }
133 
134 PetscErrorCode MatDestroy_MPIMAIJ(Mat A)
135 {
136   PetscErrorCode ierr;
137   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
138 
139   PetscFunctionBegin;
140   ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr);
141   ierr = MatDestroy(&b->OAIJ);CHKERRQ(ierr);
142   ierr = MatDestroy(&b->A);CHKERRQ(ierr);
143   ierr = VecScatterDestroy(&b->ctx);CHKERRQ(ierr);
144   ierr = VecDestroy(&b->w);CHKERRQ(ierr);
145   ierr = PetscFree(A->data);CHKERRQ(ierr);
146   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_mpimaij_mpiaij_C",NULL);CHKERRQ(ierr);
147   ierr = PetscObjectComposeFunction((PetscObject)A,"MatProductSetFromOptions_mpiaij_mpimaij_C",NULL);CHKERRQ(ierr);
148   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
149   PetscFunctionReturn(0);
150 }
151 
152 /*MC
153   MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for
154   multicomponent problems, interpolating or restricting each component the same way independently.
155   The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices.
156 
157   Operations provided:
158 + MatMult
159 . MatMultTranspose
160 . MatMultAdd
161 - MatMultTransposeAdd
162 
163   Level: advanced
164 
165 .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MatCreateMAIJ()
166 M*/
167 
168 PETSC_EXTERN PetscErrorCode MatCreate_MAIJ(Mat A)
169 {
170   PetscErrorCode ierr;
171   Mat_MPIMAIJ    *b;
172   PetscMPIInt    size;
173 
174   PetscFunctionBegin;
175   ierr     = PetscNewLog(A,&b);CHKERRQ(ierr);
176   A->data  = (void*)b;
177 
178   ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
179 
180   A->ops->setup = MatSetUp_MAIJ;
181 
182   b->AIJ  = 0;
183   b->dof  = 0;
184   b->OAIJ = 0;
185   b->ctx  = 0;
186   b->w    = 0;
187   ierr    = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
188   if (size == 1) {
189     ierr = PetscObjectChangeTypeName((PetscObject)A,MATSEQMAIJ);CHKERRQ(ierr);
190   } else {
191     ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIMAIJ);CHKERRQ(ierr);
192   }
193   A->preallocated  = PETSC_TRUE;
194   A->assembled     = PETSC_TRUE;
195   PetscFunctionReturn(0);
196 }
197 
198 /* --------------------------------------------------------------------------------------*/
199 PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
200 {
201   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
202   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
203   const PetscScalar *x,*v;
204   PetscScalar       *y, sum1, sum2;
205   PetscErrorCode    ierr;
206   PetscInt          nonzerorow=0,n,i,jrow,j;
207   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
208 
209   PetscFunctionBegin;
210   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
211   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
212   idx  = a->j;
213   v    = a->a;
214   ii   = a->i;
215 
216   for (i=0; i<m; i++) {
217     jrow  = ii[i];
218     n     = ii[i+1] - jrow;
219     sum1  = 0.0;
220     sum2  = 0.0;
221 
222     nonzerorow += (n>0);
223     for (j=0; j<n; j++) {
224       sum1 += v[jrow]*x[2*idx[jrow]];
225       sum2 += v[jrow]*x[2*idx[jrow]+1];
226       jrow++;
227     }
228     y[2*i]   = sum1;
229     y[2*i+1] = sum2;
230   }
231 
232   ierr = PetscLogFlops(4.0*a->nz - 2.0*nonzerorow);CHKERRQ(ierr);
233   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
234   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
235   PetscFunctionReturn(0);
236 }
237 
238 PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
239 {
240   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
241   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
242   const PetscScalar *x,*v;
243   PetscScalar       *y,alpha1,alpha2;
244   PetscErrorCode    ierr;
245   PetscInt          n,i;
246   const PetscInt    m = b->AIJ->rmap->n,*idx;
247 
248   PetscFunctionBegin;
249   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
250   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
251   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
252 
253   for (i=0; i<m; i++) {
254     idx    = a->j + a->i[i];
255     v      = a->a + a->i[i];
256     n      = a->i[i+1] - a->i[i];
257     alpha1 = x[2*i];
258     alpha2 = x[2*i+1];
259     while (n-->0) {
260       y[2*(*idx)]   += alpha1*(*v);
261       y[2*(*idx)+1] += alpha2*(*v);
262       idx++; v++;
263     }
264   }
265   ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr);
266   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
267   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
268   PetscFunctionReturn(0);
269 }
270 
271 PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
272 {
273   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
274   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
275   const PetscScalar *x,*v;
276   PetscScalar       *y,sum1, sum2;
277   PetscErrorCode    ierr;
278   PetscInt          n,i,jrow,j;
279   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
280 
281   PetscFunctionBegin;
282   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
283   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
284   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
285   idx  = a->j;
286   v    = a->a;
287   ii   = a->i;
288 
289   for (i=0; i<m; i++) {
290     jrow = ii[i];
291     n    = ii[i+1] - jrow;
292     sum1 = 0.0;
293     sum2 = 0.0;
294     for (j=0; j<n; j++) {
295       sum1 += v[jrow]*x[2*idx[jrow]];
296       sum2 += v[jrow]*x[2*idx[jrow]+1];
297       jrow++;
298     }
299     y[2*i]   += sum1;
300     y[2*i+1] += sum2;
301   }
302 
303   ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr);
304   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
305   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
306   PetscFunctionReturn(0);
307 }
308 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
309 {
310   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
311   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
312   const PetscScalar *x,*v;
313   PetscScalar       *y,alpha1,alpha2;
314   PetscErrorCode    ierr;
315   PetscInt          n,i;
316   const PetscInt    m = b->AIJ->rmap->n,*idx;
317 
318   PetscFunctionBegin;
319   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
320   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
321   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
322 
323   for (i=0; i<m; i++) {
324     idx    = a->j + a->i[i];
325     v      = a->a + a->i[i];
326     n      = a->i[i+1] - a->i[i];
327     alpha1 = x[2*i];
328     alpha2 = x[2*i+1];
329     while (n-->0) {
330       y[2*(*idx)]   += alpha1*(*v);
331       y[2*(*idx)+1] += alpha2*(*v);
332       idx++; v++;
333     }
334   }
335   ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr);
336   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
337   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
338   PetscFunctionReturn(0);
339 }
340 /* --------------------------------------------------------------------------------------*/
341 PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
342 {
343   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
344   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
345   const PetscScalar *x,*v;
346   PetscScalar       *y,sum1, sum2, sum3;
347   PetscErrorCode    ierr;
348   PetscInt          nonzerorow=0,n,i,jrow,j;
349   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
350 
351   PetscFunctionBegin;
352   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
353   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
354   idx  = a->j;
355   v    = a->a;
356   ii   = a->i;
357 
358   for (i=0; i<m; i++) {
359     jrow  = ii[i];
360     n     = ii[i+1] - jrow;
361     sum1  = 0.0;
362     sum2  = 0.0;
363     sum3  = 0.0;
364 
365     nonzerorow += (n>0);
366     for (j=0; j<n; j++) {
367       sum1 += v[jrow]*x[3*idx[jrow]];
368       sum2 += v[jrow]*x[3*idx[jrow]+1];
369       sum3 += v[jrow]*x[3*idx[jrow]+2];
370       jrow++;
371      }
372     y[3*i]   = sum1;
373     y[3*i+1] = sum2;
374     y[3*i+2] = sum3;
375   }
376 
377   ierr = PetscLogFlops(6.0*a->nz - 3.0*nonzerorow);CHKERRQ(ierr);
378   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
379   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
380   PetscFunctionReturn(0);
381 }
382 
383 PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
384 {
385   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
386   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
387   const PetscScalar *x,*v;
388   PetscScalar       *y,alpha1,alpha2,alpha3;
389   PetscErrorCode    ierr;
390   PetscInt          n,i;
391   const PetscInt    m = b->AIJ->rmap->n,*idx;
392 
393   PetscFunctionBegin;
394   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
395   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
396   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
397 
398   for (i=0; i<m; i++) {
399     idx    = a->j + a->i[i];
400     v      = a->a + a->i[i];
401     n      = a->i[i+1] - a->i[i];
402     alpha1 = x[3*i];
403     alpha2 = x[3*i+1];
404     alpha3 = x[3*i+2];
405     while (n-->0) {
406       y[3*(*idx)]   += alpha1*(*v);
407       y[3*(*idx)+1] += alpha2*(*v);
408       y[3*(*idx)+2] += alpha3*(*v);
409       idx++; v++;
410     }
411   }
412   ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr);
413   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
414   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
415   PetscFunctionReturn(0);
416 }
417 
418 PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
419 {
420   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
421   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
422   const PetscScalar *x,*v;
423   PetscScalar       *y,sum1, sum2, sum3;
424   PetscErrorCode    ierr;
425   PetscInt          n,i,jrow,j;
426   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
427 
428   PetscFunctionBegin;
429   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
430   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
431   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
432   idx  = a->j;
433   v    = a->a;
434   ii   = a->i;
435 
436   for (i=0; i<m; i++) {
437     jrow = ii[i];
438     n    = ii[i+1] - jrow;
439     sum1 = 0.0;
440     sum2 = 0.0;
441     sum3 = 0.0;
442     for (j=0; j<n; j++) {
443       sum1 += v[jrow]*x[3*idx[jrow]];
444       sum2 += v[jrow]*x[3*idx[jrow]+1];
445       sum3 += v[jrow]*x[3*idx[jrow]+2];
446       jrow++;
447     }
448     y[3*i]   += sum1;
449     y[3*i+1] += sum2;
450     y[3*i+2] += sum3;
451   }
452 
453   ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr);
454   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
455   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
456   PetscFunctionReturn(0);
457 }
458 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
459 {
460   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
461   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
462   const PetscScalar *x,*v;
463   PetscScalar       *y,alpha1,alpha2,alpha3;
464   PetscErrorCode    ierr;
465   PetscInt          n,i;
466   const PetscInt    m = b->AIJ->rmap->n,*idx;
467 
468   PetscFunctionBegin;
469   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
470   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
471   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
472   for (i=0; i<m; i++) {
473     idx    = a->j + a->i[i];
474     v      = a->a + a->i[i];
475     n      = a->i[i+1] - a->i[i];
476     alpha1 = x[3*i];
477     alpha2 = x[3*i+1];
478     alpha3 = x[3*i+2];
479     while (n-->0) {
480       y[3*(*idx)]   += alpha1*(*v);
481       y[3*(*idx)+1] += alpha2*(*v);
482       y[3*(*idx)+2] += alpha3*(*v);
483       idx++; v++;
484     }
485   }
486   ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr);
487   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
488   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
489   PetscFunctionReturn(0);
490 }
491 
492 /* ------------------------------------------------------------------------------*/
493 PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
494 {
495   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
496   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
497   const PetscScalar *x,*v;
498   PetscScalar       *y,sum1, sum2, sum3, sum4;
499   PetscErrorCode    ierr;
500   PetscInt          nonzerorow=0,n,i,jrow,j;
501   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
502 
503   PetscFunctionBegin;
504   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
505   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
506   idx  = a->j;
507   v    = a->a;
508   ii   = a->i;
509 
510   for (i=0; i<m; i++) {
511     jrow        = ii[i];
512     n           = ii[i+1] - jrow;
513     sum1        = 0.0;
514     sum2        = 0.0;
515     sum3        = 0.0;
516     sum4        = 0.0;
517     nonzerorow += (n>0);
518     for (j=0; j<n; j++) {
519       sum1 += v[jrow]*x[4*idx[jrow]];
520       sum2 += v[jrow]*x[4*idx[jrow]+1];
521       sum3 += v[jrow]*x[4*idx[jrow]+2];
522       sum4 += v[jrow]*x[4*idx[jrow]+3];
523       jrow++;
524     }
525     y[4*i]   = sum1;
526     y[4*i+1] = sum2;
527     y[4*i+2] = sum3;
528     y[4*i+3] = sum4;
529   }
530 
531   ierr = PetscLogFlops(8.0*a->nz - 4.0*nonzerorow);CHKERRQ(ierr);
532   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
533   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
534   PetscFunctionReturn(0);
535 }
536 
537 PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
538 {
539   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
540   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
541   const PetscScalar *x,*v;
542   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4;
543   PetscErrorCode    ierr;
544   PetscInt          n,i;
545   const PetscInt    m = b->AIJ->rmap->n,*idx;
546 
547   PetscFunctionBegin;
548   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
549   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
550   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
551   for (i=0; i<m; i++) {
552     idx    = a->j + a->i[i];
553     v      = a->a + a->i[i];
554     n      = a->i[i+1] - a->i[i];
555     alpha1 = x[4*i];
556     alpha2 = x[4*i+1];
557     alpha3 = x[4*i+2];
558     alpha4 = x[4*i+3];
559     while (n-->0) {
560       y[4*(*idx)]   += alpha1*(*v);
561       y[4*(*idx)+1] += alpha2*(*v);
562       y[4*(*idx)+2] += alpha3*(*v);
563       y[4*(*idx)+3] += alpha4*(*v);
564       idx++; v++;
565     }
566   }
567   ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr);
568   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
569   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
570   PetscFunctionReturn(0);
571 }
572 
573 PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
574 {
575   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
576   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
577   const PetscScalar *x,*v;
578   PetscScalar       *y,sum1, sum2, sum3, sum4;
579   PetscErrorCode    ierr;
580   PetscInt          n,i,jrow,j;
581   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
582 
583   PetscFunctionBegin;
584   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
585   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
586   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
587   idx  = a->j;
588   v    = a->a;
589   ii   = a->i;
590 
591   for (i=0; i<m; i++) {
592     jrow = ii[i];
593     n    = ii[i+1] - jrow;
594     sum1 = 0.0;
595     sum2 = 0.0;
596     sum3 = 0.0;
597     sum4 = 0.0;
598     for (j=0; j<n; j++) {
599       sum1 += v[jrow]*x[4*idx[jrow]];
600       sum2 += v[jrow]*x[4*idx[jrow]+1];
601       sum3 += v[jrow]*x[4*idx[jrow]+2];
602       sum4 += v[jrow]*x[4*idx[jrow]+3];
603       jrow++;
604     }
605     y[4*i]   += sum1;
606     y[4*i+1] += sum2;
607     y[4*i+2] += sum3;
608     y[4*i+3] += sum4;
609   }
610 
611   ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr);
612   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
613   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
614   PetscFunctionReturn(0);
615 }
616 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
617 {
618   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
619   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
620   const PetscScalar *x,*v;
621   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4;
622   PetscErrorCode    ierr;
623   PetscInt          n,i;
624   const PetscInt    m = b->AIJ->rmap->n,*idx;
625 
626   PetscFunctionBegin;
627   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
628   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
629   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
630 
631   for (i=0; i<m; i++) {
632     idx    = a->j + a->i[i];
633     v      = a->a + a->i[i];
634     n      = a->i[i+1] - a->i[i];
635     alpha1 = x[4*i];
636     alpha2 = x[4*i+1];
637     alpha3 = x[4*i+2];
638     alpha4 = x[4*i+3];
639     while (n-->0) {
640       y[4*(*idx)]   += alpha1*(*v);
641       y[4*(*idx)+1] += alpha2*(*v);
642       y[4*(*idx)+2] += alpha3*(*v);
643       y[4*(*idx)+3] += alpha4*(*v);
644       idx++; v++;
645     }
646   }
647   ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr);
648   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
649   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
650   PetscFunctionReturn(0);
651 }
652 /* ------------------------------------------------------------------------------*/
653 
654 PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
655 {
656   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
657   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
658   const PetscScalar *x,*v;
659   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5;
660   PetscErrorCode    ierr;
661   PetscInt          nonzerorow=0,n,i,jrow,j;
662   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
663 
664   PetscFunctionBegin;
665   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
666   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
667   idx  = a->j;
668   v    = a->a;
669   ii   = a->i;
670 
671   for (i=0; i<m; i++) {
672     jrow  = ii[i];
673     n     = ii[i+1] - jrow;
674     sum1  = 0.0;
675     sum2  = 0.0;
676     sum3  = 0.0;
677     sum4  = 0.0;
678     sum5  = 0.0;
679 
680     nonzerorow += (n>0);
681     for (j=0; j<n; j++) {
682       sum1 += v[jrow]*x[5*idx[jrow]];
683       sum2 += v[jrow]*x[5*idx[jrow]+1];
684       sum3 += v[jrow]*x[5*idx[jrow]+2];
685       sum4 += v[jrow]*x[5*idx[jrow]+3];
686       sum5 += v[jrow]*x[5*idx[jrow]+4];
687       jrow++;
688     }
689     y[5*i]   = sum1;
690     y[5*i+1] = sum2;
691     y[5*i+2] = sum3;
692     y[5*i+3] = sum4;
693     y[5*i+4] = sum5;
694   }
695 
696   ierr = PetscLogFlops(10.0*a->nz - 5.0*nonzerorow);CHKERRQ(ierr);
697   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
698   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
699   PetscFunctionReturn(0);
700 }
701 
702 PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
703 {
704   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
705   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
706   const PetscScalar *x,*v;
707   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5;
708   PetscErrorCode    ierr;
709   PetscInt          n,i;
710   const PetscInt    m = b->AIJ->rmap->n,*idx;
711 
712   PetscFunctionBegin;
713   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
714   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
715   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
716 
717   for (i=0; i<m; i++) {
718     idx    = a->j + a->i[i];
719     v      = a->a + a->i[i];
720     n      = a->i[i+1] - a->i[i];
721     alpha1 = x[5*i];
722     alpha2 = x[5*i+1];
723     alpha3 = x[5*i+2];
724     alpha4 = x[5*i+3];
725     alpha5 = x[5*i+4];
726     while (n-->0) {
727       y[5*(*idx)]   += alpha1*(*v);
728       y[5*(*idx)+1] += alpha2*(*v);
729       y[5*(*idx)+2] += alpha3*(*v);
730       y[5*(*idx)+3] += alpha4*(*v);
731       y[5*(*idx)+4] += alpha5*(*v);
732       idx++; v++;
733     }
734   }
735   ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr);
736   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
737   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
738   PetscFunctionReturn(0);
739 }
740 
741 PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
742 {
743   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
744   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
745   const PetscScalar *x,*v;
746   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5;
747   PetscErrorCode    ierr;
748   PetscInt          n,i,jrow,j;
749   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
750 
751   PetscFunctionBegin;
752   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
753   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
754   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
755   idx  = a->j;
756   v    = a->a;
757   ii   = a->i;
758 
759   for (i=0; i<m; i++) {
760     jrow = ii[i];
761     n    = ii[i+1] - jrow;
762     sum1 = 0.0;
763     sum2 = 0.0;
764     sum3 = 0.0;
765     sum4 = 0.0;
766     sum5 = 0.0;
767     for (j=0; j<n; j++) {
768       sum1 += v[jrow]*x[5*idx[jrow]];
769       sum2 += v[jrow]*x[5*idx[jrow]+1];
770       sum3 += v[jrow]*x[5*idx[jrow]+2];
771       sum4 += v[jrow]*x[5*idx[jrow]+3];
772       sum5 += v[jrow]*x[5*idx[jrow]+4];
773       jrow++;
774     }
775     y[5*i]   += sum1;
776     y[5*i+1] += sum2;
777     y[5*i+2] += sum3;
778     y[5*i+3] += sum4;
779     y[5*i+4] += sum5;
780   }
781 
782   ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr);
783   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
784   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
785   PetscFunctionReturn(0);
786 }
787 
788 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
789 {
790   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
791   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
792   const PetscScalar *x,*v;
793   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5;
794   PetscErrorCode    ierr;
795   PetscInt          n,i;
796   const PetscInt    m = b->AIJ->rmap->n,*idx;
797 
798   PetscFunctionBegin;
799   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
800   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
801   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
802 
803   for (i=0; i<m; i++) {
804     idx    = a->j + a->i[i];
805     v      = a->a + a->i[i];
806     n      = a->i[i+1] - a->i[i];
807     alpha1 = x[5*i];
808     alpha2 = x[5*i+1];
809     alpha3 = x[5*i+2];
810     alpha4 = x[5*i+3];
811     alpha5 = x[5*i+4];
812     while (n-->0) {
813       y[5*(*idx)]   += alpha1*(*v);
814       y[5*(*idx)+1] += alpha2*(*v);
815       y[5*(*idx)+2] += alpha3*(*v);
816       y[5*(*idx)+3] += alpha4*(*v);
817       y[5*(*idx)+4] += alpha5*(*v);
818       idx++; v++;
819     }
820   }
821   ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr);
822   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
823   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
824   PetscFunctionReturn(0);
825 }
826 
827 /* ------------------------------------------------------------------------------*/
828 PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
829 {
830   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
831   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
832   const PetscScalar *x,*v;
833   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6;
834   PetscErrorCode    ierr;
835   PetscInt          nonzerorow=0,n,i,jrow,j;
836   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
837 
838   PetscFunctionBegin;
839   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
840   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
841   idx  = a->j;
842   v    = a->a;
843   ii   = a->i;
844 
845   for (i=0; i<m; i++) {
846     jrow  = ii[i];
847     n     = ii[i+1] - jrow;
848     sum1  = 0.0;
849     sum2  = 0.0;
850     sum3  = 0.0;
851     sum4  = 0.0;
852     sum5  = 0.0;
853     sum6  = 0.0;
854 
855     nonzerorow += (n>0);
856     for (j=0; j<n; j++) {
857       sum1 += v[jrow]*x[6*idx[jrow]];
858       sum2 += v[jrow]*x[6*idx[jrow]+1];
859       sum3 += v[jrow]*x[6*idx[jrow]+2];
860       sum4 += v[jrow]*x[6*idx[jrow]+3];
861       sum5 += v[jrow]*x[6*idx[jrow]+4];
862       sum6 += v[jrow]*x[6*idx[jrow]+5];
863       jrow++;
864     }
865     y[6*i]   = sum1;
866     y[6*i+1] = sum2;
867     y[6*i+2] = sum3;
868     y[6*i+3] = sum4;
869     y[6*i+4] = sum5;
870     y[6*i+5] = sum6;
871   }
872 
873   ierr = PetscLogFlops(12.0*a->nz - 6.0*nonzerorow);CHKERRQ(ierr);
874   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
875   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
876   PetscFunctionReturn(0);
877 }
878 
879 PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
880 {
881   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
882   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
883   const PetscScalar *x,*v;
884   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
885   PetscErrorCode    ierr;
886   PetscInt          n,i;
887   const PetscInt    m = b->AIJ->rmap->n,*idx;
888 
889   PetscFunctionBegin;
890   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
891   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
892   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
893 
894   for (i=0; i<m; i++) {
895     idx    = a->j + a->i[i];
896     v      = a->a + a->i[i];
897     n      = a->i[i+1] - a->i[i];
898     alpha1 = x[6*i];
899     alpha2 = x[6*i+1];
900     alpha3 = x[6*i+2];
901     alpha4 = x[6*i+3];
902     alpha5 = x[6*i+4];
903     alpha6 = x[6*i+5];
904     while (n-->0) {
905       y[6*(*idx)]   += alpha1*(*v);
906       y[6*(*idx)+1] += alpha2*(*v);
907       y[6*(*idx)+2] += alpha3*(*v);
908       y[6*(*idx)+3] += alpha4*(*v);
909       y[6*(*idx)+4] += alpha5*(*v);
910       y[6*(*idx)+5] += alpha6*(*v);
911       idx++; v++;
912     }
913   }
914   ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr);
915   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
916   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
917   PetscFunctionReturn(0);
918 }
919 
920 PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
921 {
922   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
923   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
924   const PetscScalar *x,*v;
925   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6;
926   PetscErrorCode    ierr;
927   PetscInt          n,i,jrow,j;
928   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
929 
930   PetscFunctionBegin;
931   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
932   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
933   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
934   idx  = a->j;
935   v    = a->a;
936   ii   = a->i;
937 
938   for (i=0; i<m; i++) {
939     jrow = ii[i];
940     n    = ii[i+1] - jrow;
941     sum1 = 0.0;
942     sum2 = 0.0;
943     sum3 = 0.0;
944     sum4 = 0.0;
945     sum5 = 0.0;
946     sum6 = 0.0;
947     for (j=0; j<n; j++) {
948       sum1 += v[jrow]*x[6*idx[jrow]];
949       sum2 += v[jrow]*x[6*idx[jrow]+1];
950       sum3 += v[jrow]*x[6*idx[jrow]+2];
951       sum4 += v[jrow]*x[6*idx[jrow]+3];
952       sum5 += v[jrow]*x[6*idx[jrow]+4];
953       sum6 += v[jrow]*x[6*idx[jrow]+5];
954       jrow++;
955     }
956     y[6*i]   += sum1;
957     y[6*i+1] += sum2;
958     y[6*i+2] += sum3;
959     y[6*i+3] += sum4;
960     y[6*i+4] += sum5;
961     y[6*i+5] += sum6;
962   }
963 
964   ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr);
965   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
966   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
967   PetscFunctionReturn(0);
968 }
969 
970 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
971 {
972   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
973   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
974   const PetscScalar *x,*v;
975   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
976   PetscErrorCode    ierr;
977   PetscInt          n,i;
978   const PetscInt    m = b->AIJ->rmap->n,*idx;
979 
980   PetscFunctionBegin;
981   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
982   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
983   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
984 
985   for (i=0; i<m; i++) {
986     idx    = a->j + a->i[i];
987     v      = a->a + a->i[i];
988     n      = a->i[i+1] - a->i[i];
989     alpha1 = x[6*i];
990     alpha2 = x[6*i+1];
991     alpha3 = x[6*i+2];
992     alpha4 = x[6*i+3];
993     alpha5 = x[6*i+4];
994     alpha6 = x[6*i+5];
995     while (n-->0) {
996       y[6*(*idx)]   += alpha1*(*v);
997       y[6*(*idx)+1] += alpha2*(*v);
998       y[6*(*idx)+2] += alpha3*(*v);
999       y[6*(*idx)+3] += alpha4*(*v);
1000       y[6*(*idx)+4] += alpha5*(*v);
1001       y[6*(*idx)+5] += alpha6*(*v);
1002       idx++; v++;
1003     }
1004   }
1005   ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr);
1006   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1007   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1008   PetscFunctionReturn(0);
1009 }
1010 
1011 /* ------------------------------------------------------------------------------*/
1012 PetscErrorCode MatMult_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
1013 {
1014   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1015   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1016   const PetscScalar *x,*v;
1017   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1018   PetscErrorCode    ierr;
1019   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1020   PetscInt          nonzerorow=0,n,i,jrow,j;
1021 
1022   PetscFunctionBegin;
1023   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1024   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1025   idx  = a->j;
1026   v    = a->a;
1027   ii   = a->i;
1028 
1029   for (i=0; i<m; i++) {
1030     jrow  = ii[i];
1031     n     = ii[i+1] - jrow;
1032     sum1  = 0.0;
1033     sum2  = 0.0;
1034     sum3  = 0.0;
1035     sum4  = 0.0;
1036     sum5  = 0.0;
1037     sum6  = 0.0;
1038     sum7  = 0.0;
1039 
1040     nonzerorow += (n>0);
1041     for (j=0; j<n; j++) {
1042       sum1 += v[jrow]*x[7*idx[jrow]];
1043       sum2 += v[jrow]*x[7*idx[jrow]+1];
1044       sum3 += v[jrow]*x[7*idx[jrow]+2];
1045       sum4 += v[jrow]*x[7*idx[jrow]+3];
1046       sum5 += v[jrow]*x[7*idx[jrow]+4];
1047       sum6 += v[jrow]*x[7*idx[jrow]+5];
1048       sum7 += v[jrow]*x[7*idx[jrow]+6];
1049       jrow++;
1050     }
1051     y[7*i]   = sum1;
1052     y[7*i+1] = sum2;
1053     y[7*i+2] = sum3;
1054     y[7*i+3] = sum4;
1055     y[7*i+4] = sum5;
1056     y[7*i+5] = sum6;
1057     y[7*i+6] = sum7;
1058   }
1059 
1060   ierr = PetscLogFlops(14.0*a->nz - 7.0*nonzerorow);CHKERRQ(ierr);
1061   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1062   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1063   PetscFunctionReturn(0);
1064 }
1065 
1066 PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
1067 {
1068   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1069   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1070   const PetscScalar *x,*v;
1071   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1072   PetscErrorCode    ierr;
1073   const PetscInt    m = b->AIJ->rmap->n,*idx;
1074   PetscInt          n,i;
1075 
1076   PetscFunctionBegin;
1077   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
1078   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1079   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1080 
1081   for (i=0; i<m; i++) {
1082     idx    = a->j + a->i[i];
1083     v      = a->a + a->i[i];
1084     n      = a->i[i+1] - a->i[i];
1085     alpha1 = x[7*i];
1086     alpha2 = x[7*i+1];
1087     alpha3 = x[7*i+2];
1088     alpha4 = x[7*i+3];
1089     alpha5 = x[7*i+4];
1090     alpha6 = x[7*i+5];
1091     alpha7 = x[7*i+6];
1092     while (n-->0) {
1093       y[7*(*idx)]   += alpha1*(*v);
1094       y[7*(*idx)+1] += alpha2*(*v);
1095       y[7*(*idx)+2] += alpha3*(*v);
1096       y[7*(*idx)+3] += alpha4*(*v);
1097       y[7*(*idx)+4] += alpha5*(*v);
1098       y[7*(*idx)+5] += alpha6*(*v);
1099       y[7*(*idx)+6] += alpha7*(*v);
1100       idx++; v++;
1101     }
1102   }
1103   ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr);
1104   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1105   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1106   PetscFunctionReturn(0);
1107 }
1108 
1109 PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1110 {
1111   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1112   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1113   const PetscScalar *x,*v;
1114   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1115   PetscErrorCode    ierr;
1116   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1117   PetscInt          n,i,jrow,j;
1118 
1119   PetscFunctionBegin;
1120   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1121   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1122   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1123   idx  = a->j;
1124   v    = a->a;
1125   ii   = a->i;
1126 
1127   for (i=0; i<m; i++) {
1128     jrow = ii[i];
1129     n    = ii[i+1] - jrow;
1130     sum1 = 0.0;
1131     sum2 = 0.0;
1132     sum3 = 0.0;
1133     sum4 = 0.0;
1134     sum5 = 0.0;
1135     sum6 = 0.0;
1136     sum7 = 0.0;
1137     for (j=0; j<n; j++) {
1138       sum1 += v[jrow]*x[7*idx[jrow]];
1139       sum2 += v[jrow]*x[7*idx[jrow]+1];
1140       sum3 += v[jrow]*x[7*idx[jrow]+2];
1141       sum4 += v[jrow]*x[7*idx[jrow]+3];
1142       sum5 += v[jrow]*x[7*idx[jrow]+4];
1143       sum6 += v[jrow]*x[7*idx[jrow]+5];
1144       sum7 += v[jrow]*x[7*idx[jrow]+6];
1145       jrow++;
1146     }
1147     y[7*i]   += sum1;
1148     y[7*i+1] += sum2;
1149     y[7*i+2] += sum3;
1150     y[7*i+3] += sum4;
1151     y[7*i+4] += sum5;
1152     y[7*i+5] += sum6;
1153     y[7*i+6] += sum7;
1154   }
1155 
1156   ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr);
1157   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1158   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1159   PetscFunctionReturn(0);
1160 }
1161 
1162 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1163 {
1164   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1165   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1166   const PetscScalar *x,*v;
1167   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1168   PetscErrorCode    ierr;
1169   const PetscInt    m = b->AIJ->rmap->n,*idx;
1170   PetscInt          n,i;
1171 
1172   PetscFunctionBegin;
1173   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1174   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1175   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1176   for (i=0; i<m; i++) {
1177     idx    = a->j + a->i[i];
1178     v      = a->a + a->i[i];
1179     n      = a->i[i+1] - a->i[i];
1180     alpha1 = x[7*i];
1181     alpha2 = x[7*i+1];
1182     alpha3 = x[7*i+2];
1183     alpha4 = x[7*i+3];
1184     alpha5 = x[7*i+4];
1185     alpha6 = x[7*i+5];
1186     alpha7 = x[7*i+6];
1187     while (n-->0) {
1188       y[7*(*idx)]   += alpha1*(*v);
1189       y[7*(*idx)+1] += alpha2*(*v);
1190       y[7*(*idx)+2] += alpha3*(*v);
1191       y[7*(*idx)+3] += alpha4*(*v);
1192       y[7*(*idx)+4] += alpha5*(*v);
1193       y[7*(*idx)+5] += alpha6*(*v);
1194       y[7*(*idx)+6] += alpha7*(*v);
1195       idx++; v++;
1196     }
1197   }
1198   ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr);
1199   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1200   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1201   PetscFunctionReturn(0);
1202 }
1203 
1204 PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1205 {
1206   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1207   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1208   const PetscScalar *x,*v;
1209   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1210   PetscErrorCode    ierr;
1211   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1212   PetscInt          nonzerorow=0,n,i,jrow,j;
1213 
1214   PetscFunctionBegin;
1215   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1216   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1217   idx  = a->j;
1218   v    = a->a;
1219   ii   = a->i;
1220 
1221   for (i=0; i<m; i++) {
1222     jrow  = ii[i];
1223     n     = ii[i+1] - jrow;
1224     sum1  = 0.0;
1225     sum2  = 0.0;
1226     sum3  = 0.0;
1227     sum4  = 0.0;
1228     sum5  = 0.0;
1229     sum6  = 0.0;
1230     sum7  = 0.0;
1231     sum8  = 0.0;
1232 
1233     nonzerorow += (n>0);
1234     for (j=0; j<n; j++) {
1235       sum1 += v[jrow]*x[8*idx[jrow]];
1236       sum2 += v[jrow]*x[8*idx[jrow]+1];
1237       sum3 += v[jrow]*x[8*idx[jrow]+2];
1238       sum4 += v[jrow]*x[8*idx[jrow]+3];
1239       sum5 += v[jrow]*x[8*idx[jrow]+4];
1240       sum6 += v[jrow]*x[8*idx[jrow]+5];
1241       sum7 += v[jrow]*x[8*idx[jrow]+6];
1242       sum8 += v[jrow]*x[8*idx[jrow]+7];
1243       jrow++;
1244     }
1245     y[8*i]   = sum1;
1246     y[8*i+1] = sum2;
1247     y[8*i+2] = sum3;
1248     y[8*i+3] = sum4;
1249     y[8*i+4] = sum5;
1250     y[8*i+5] = sum6;
1251     y[8*i+6] = sum7;
1252     y[8*i+7] = sum8;
1253   }
1254 
1255   ierr = PetscLogFlops(16.0*a->nz - 8.0*nonzerorow);CHKERRQ(ierr);
1256   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1257   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1258   PetscFunctionReturn(0);
1259 }
1260 
1261 PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1262 {
1263   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1264   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1265   const PetscScalar *x,*v;
1266   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1267   PetscErrorCode    ierr;
1268   const PetscInt    m = b->AIJ->rmap->n,*idx;
1269   PetscInt          n,i;
1270 
1271   PetscFunctionBegin;
1272   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
1273   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1274   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1275 
1276   for (i=0; i<m; i++) {
1277     idx    = a->j + a->i[i];
1278     v      = a->a + a->i[i];
1279     n      = a->i[i+1] - a->i[i];
1280     alpha1 = x[8*i];
1281     alpha2 = x[8*i+1];
1282     alpha3 = x[8*i+2];
1283     alpha4 = x[8*i+3];
1284     alpha5 = x[8*i+4];
1285     alpha6 = x[8*i+5];
1286     alpha7 = x[8*i+6];
1287     alpha8 = x[8*i+7];
1288     while (n-->0) {
1289       y[8*(*idx)]   += alpha1*(*v);
1290       y[8*(*idx)+1] += alpha2*(*v);
1291       y[8*(*idx)+2] += alpha3*(*v);
1292       y[8*(*idx)+3] += alpha4*(*v);
1293       y[8*(*idx)+4] += alpha5*(*v);
1294       y[8*(*idx)+5] += alpha6*(*v);
1295       y[8*(*idx)+6] += alpha7*(*v);
1296       y[8*(*idx)+7] += alpha8*(*v);
1297       idx++; v++;
1298     }
1299   }
1300   ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr);
1301   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1302   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1303   PetscFunctionReturn(0);
1304 }
1305 
1306 PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1307 {
1308   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1309   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1310   const PetscScalar *x,*v;
1311   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1312   PetscErrorCode    ierr;
1313   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1314   PetscInt          n,i,jrow,j;
1315 
1316   PetscFunctionBegin;
1317   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1318   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1319   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1320   idx  = a->j;
1321   v    = a->a;
1322   ii   = a->i;
1323 
1324   for (i=0; i<m; i++) {
1325     jrow = ii[i];
1326     n    = ii[i+1] - jrow;
1327     sum1 = 0.0;
1328     sum2 = 0.0;
1329     sum3 = 0.0;
1330     sum4 = 0.0;
1331     sum5 = 0.0;
1332     sum6 = 0.0;
1333     sum7 = 0.0;
1334     sum8 = 0.0;
1335     for (j=0; j<n; j++) {
1336       sum1 += v[jrow]*x[8*idx[jrow]];
1337       sum2 += v[jrow]*x[8*idx[jrow]+1];
1338       sum3 += v[jrow]*x[8*idx[jrow]+2];
1339       sum4 += v[jrow]*x[8*idx[jrow]+3];
1340       sum5 += v[jrow]*x[8*idx[jrow]+4];
1341       sum6 += v[jrow]*x[8*idx[jrow]+5];
1342       sum7 += v[jrow]*x[8*idx[jrow]+6];
1343       sum8 += v[jrow]*x[8*idx[jrow]+7];
1344       jrow++;
1345     }
1346     y[8*i]   += sum1;
1347     y[8*i+1] += sum2;
1348     y[8*i+2] += sum3;
1349     y[8*i+3] += sum4;
1350     y[8*i+4] += sum5;
1351     y[8*i+5] += sum6;
1352     y[8*i+6] += sum7;
1353     y[8*i+7] += sum8;
1354   }
1355 
1356   ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr);
1357   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1358   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1359   PetscFunctionReturn(0);
1360 }
1361 
1362 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1363 {
1364   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1365   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1366   const PetscScalar *x,*v;
1367   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1368   PetscErrorCode    ierr;
1369   const PetscInt    m = b->AIJ->rmap->n,*idx;
1370   PetscInt          n,i;
1371 
1372   PetscFunctionBegin;
1373   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1374   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1375   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1376   for (i=0; i<m; i++) {
1377     idx    = a->j + a->i[i];
1378     v      = a->a + a->i[i];
1379     n      = a->i[i+1] - a->i[i];
1380     alpha1 = x[8*i];
1381     alpha2 = x[8*i+1];
1382     alpha3 = x[8*i+2];
1383     alpha4 = x[8*i+3];
1384     alpha5 = x[8*i+4];
1385     alpha6 = x[8*i+5];
1386     alpha7 = x[8*i+6];
1387     alpha8 = x[8*i+7];
1388     while (n-->0) {
1389       y[8*(*idx)]   += alpha1*(*v);
1390       y[8*(*idx)+1] += alpha2*(*v);
1391       y[8*(*idx)+2] += alpha3*(*v);
1392       y[8*(*idx)+3] += alpha4*(*v);
1393       y[8*(*idx)+4] += alpha5*(*v);
1394       y[8*(*idx)+5] += alpha6*(*v);
1395       y[8*(*idx)+6] += alpha7*(*v);
1396       y[8*(*idx)+7] += alpha8*(*v);
1397       idx++; v++;
1398     }
1399   }
1400   ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr);
1401   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1402   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1403   PetscFunctionReturn(0);
1404 }
1405 
1406 /* ------------------------------------------------------------------------------*/
1407 PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1408 {
1409   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1410   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1411   const PetscScalar *x,*v;
1412   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1413   PetscErrorCode    ierr;
1414   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1415   PetscInt          nonzerorow=0,n,i,jrow,j;
1416 
1417   PetscFunctionBegin;
1418   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1419   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1420   idx  = a->j;
1421   v    = a->a;
1422   ii   = a->i;
1423 
1424   for (i=0; i<m; i++) {
1425     jrow  = ii[i];
1426     n     = ii[i+1] - jrow;
1427     sum1  = 0.0;
1428     sum2  = 0.0;
1429     sum3  = 0.0;
1430     sum4  = 0.0;
1431     sum5  = 0.0;
1432     sum6  = 0.0;
1433     sum7  = 0.0;
1434     sum8  = 0.0;
1435     sum9  = 0.0;
1436 
1437     nonzerorow += (n>0);
1438     for (j=0; j<n; j++) {
1439       sum1 += v[jrow]*x[9*idx[jrow]];
1440       sum2 += v[jrow]*x[9*idx[jrow]+1];
1441       sum3 += v[jrow]*x[9*idx[jrow]+2];
1442       sum4 += v[jrow]*x[9*idx[jrow]+3];
1443       sum5 += v[jrow]*x[9*idx[jrow]+4];
1444       sum6 += v[jrow]*x[9*idx[jrow]+5];
1445       sum7 += v[jrow]*x[9*idx[jrow]+6];
1446       sum8 += v[jrow]*x[9*idx[jrow]+7];
1447       sum9 += v[jrow]*x[9*idx[jrow]+8];
1448       jrow++;
1449     }
1450     y[9*i]   = sum1;
1451     y[9*i+1] = sum2;
1452     y[9*i+2] = sum3;
1453     y[9*i+3] = sum4;
1454     y[9*i+4] = sum5;
1455     y[9*i+5] = sum6;
1456     y[9*i+6] = sum7;
1457     y[9*i+7] = sum8;
1458     y[9*i+8] = sum9;
1459   }
1460 
1461   ierr = PetscLogFlops(18.0*a->nz - 9*nonzerorow);CHKERRQ(ierr);
1462   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1463   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1464   PetscFunctionReturn(0);
1465 }
1466 
1467 /* ------------------------------------------------------------------------------*/
1468 
1469 PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1470 {
1471   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1472   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1473   const PetscScalar *x,*v;
1474   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1475   PetscErrorCode    ierr;
1476   const PetscInt    m = b->AIJ->rmap->n,*idx;
1477   PetscInt          n,i;
1478 
1479   PetscFunctionBegin;
1480   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
1481   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1482   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1483 
1484   for (i=0; i<m; i++) {
1485     idx    = a->j + a->i[i];
1486     v      = a->a + a->i[i];
1487     n      = a->i[i+1] - a->i[i];
1488     alpha1 = x[9*i];
1489     alpha2 = x[9*i+1];
1490     alpha3 = x[9*i+2];
1491     alpha4 = x[9*i+3];
1492     alpha5 = x[9*i+4];
1493     alpha6 = x[9*i+5];
1494     alpha7 = x[9*i+6];
1495     alpha8 = x[9*i+7];
1496     alpha9 = x[9*i+8];
1497     while (n-->0) {
1498       y[9*(*idx)]   += alpha1*(*v);
1499       y[9*(*idx)+1] += alpha2*(*v);
1500       y[9*(*idx)+2] += alpha3*(*v);
1501       y[9*(*idx)+3] += alpha4*(*v);
1502       y[9*(*idx)+4] += alpha5*(*v);
1503       y[9*(*idx)+5] += alpha6*(*v);
1504       y[9*(*idx)+6] += alpha7*(*v);
1505       y[9*(*idx)+7] += alpha8*(*v);
1506       y[9*(*idx)+8] += alpha9*(*v);
1507       idx++; v++;
1508     }
1509   }
1510   ierr = PetscLogFlops(18.0*a->nz);CHKERRQ(ierr);
1511   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1512   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1513   PetscFunctionReturn(0);
1514 }
1515 
1516 PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1517 {
1518   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1519   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1520   const PetscScalar *x,*v;
1521   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1522   PetscErrorCode    ierr;
1523   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1524   PetscInt          n,i,jrow,j;
1525 
1526   PetscFunctionBegin;
1527   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1528   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1529   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1530   idx  = a->j;
1531   v    = a->a;
1532   ii   = a->i;
1533 
1534   for (i=0; i<m; i++) {
1535     jrow = ii[i];
1536     n    = ii[i+1] - jrow;
1537     sum1 = 0.0;
1538     sum2 = 0.0;
1539     sum3 = 0.0;
1540     sum4 = 0.0;
1541     sum5 = 0.0;
1542     sum6 = 0.0;
1543     sum7 = 0.0;
1544     sum8 = 0.0;
1545     sum9 = 0.0;
1546     for (j=0; j<n; j++) {
1547       sum1 += v[jrow]*x[9*idx[jrow]];
1548       sum2 += v[jrow]*x[9*idx[jrow]+1];
1549       sum3 += v[jrow]*x[9*idx[jrow]+2];
1550       sum4 += v[jrow]*x[9*idx[jrow]+3];
1551       sum5 += v[jrow]*x[9*idx[jrow]+4];
1552       sum6 += v[jrow]*x[9*idx[jrow]+5];
1553       sum7 += v[jrow]*x[9*idx[jrow]+6];
1554       sum8 += v[jrow]*x[9*idx[jrow]+7];
1555       sum9 += v[jrow]*x[9*idx[jrow]+8];
1556       jrow++;
1557     }
1558     y[9*i]   += sum1;
1559     y[9*i+1] += sum2;
1560     y[9*i+2] += sum3;
1561     y[9*i+3] += sum4;
1562     y[9*i+4] += sum5;
1563     y[9*i+5] += sum6;
1564     y[9*i+6] += sum7;
1565     y[9*i+7] += sum8;
1566     y[9*i+8] += sum9;
1567   }
1568 
1569   ierr = PetscLogFlops(18*a->nz);CHKERRQ(ierr);
1570   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1571   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1572   PetscFunctionReturn(0);
1573 }
1574 
1575 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1576 {
1577   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1578   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1579   const PetscScalar *x,*v;
1580   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1581   PetscErrorCode    ierr;
1582   const PetscInt    m = b->AIJ->rmap->n,*idx;
1583   PetscInt          n,i;
1584 
1585   PetscFunctionBegin;
1586   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1587   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1588   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1589   for (i=0; i<m; i++) {
1590     idx    = a->j + a->i[i];
1591     v      = a->a + a->i[i];
1592     n      = a->i[i+1] - a->i[i];
1593     alpha1 = x[9*i];
1594     alpha2 = x[9*i+1];
1595     alpha3 = x[9*i+2];
1596     alpha4 = x[9*i+3];
1597     alpha5 = x[9*i+4];
1598     alpha6 = x[9*i+5];
1599     alpha7 = x[9*i+6];
1600     alpha8 = x[9*i+7];
1601     alpha9 = x[9*i+8];
1602     while (n-->0) {
1603       y[9*(*idx)]   += alpha1*(*v);
1604       y[9*(*idx)+1] += alpha2*(*v);
1605       y[9*(*idx)+2] += alpha3*(*v);
1606       y[9*(*idx)+3] += alpha4*(*v);
1607       y[9*(*idx)+4] += alpha5*(*v);
1608       y[9*(*idx)+5] += alpha6*(*v);
1609       y[9*(*idx)+6] += alpha7*(*v);
1610       y[9*(*idx)+7] += alpha8*(*v);
1611       y[9*(*idx)+8] += alpha9*(*v);
1612       idx++; v++;
1613     }
1614   }
1615   ierr = PetscLogFlops(18.0*a->nz);CHKERRQ(ierr);
1616   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1617   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1618   PetscFunctionReturn(0);
1619 }
1620 PetscErrorCode MatMult_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1621 {
1622   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1623   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1624   const PetscScalar *x,*v;
1625   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1626   PetscErrorCode    ierr;
1627   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1628   PetscInt          nonzerorow=0,n,i,jrow,j;
1629 
1630   PetscFunctionBegin;
1631   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1632   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1633   idx  = a->j;
1634   v    = a->a;
1635   ii   = a->i;
1636 
1637   for (i=0; i<m; i++) {
1638     jrow  = ii[i];
1639     n     = ii[i+1] - jrow;
1640     sum1  = 0.0;
1641     sum2  = 0.0;
1642     sum3  = 0.0;
1643     sum4  = 0.0;
1644     sum5  = 0.0;
1645     sum6  = 0.0;
1646     sum7  = 0.0;
1647     sum8  = 0.0;
1648     sum9  = 0.0;
1649     sum10 = 0.0;
1650 
1651     nonzerorow += (n>0);
1652     for (j=0; j<n; j++) {
1653       sum1  += v[jrow]*x[10*idx[jrow]];
1654       sum2  += v[jrow]*x[10*idx[jrow]+1];
1655       sum3  += v[jrow]*x[10*idx[jrow]+2];
1656       sum4  += v[jrow]*x[10*idx[jrow]+3];
1657       sum5  += v[jrow]*x[10*idx[jrow]+4];
1658       sum6  += v[jrow]*x[10*idx[jrow]+5];
1659       sum7  += v[jrow]*x[10*idx[jrow]+6];
1660       sum8  += v[jrow]*x[10*idx[jrow]+7];
1661       sum9  += v[jrow]*x[10*idx[jrow]+8];
1662       sum10 += v[jrow]*x[10*idx[jrow]+9];
1663       jrow++;
1664     }
1665     y[10*i]   = sum1;
1666     y[10*i+1] = sum2;
1667     y[10*i+2] = sum3;
1668     y[10*i+3] = sum4;
1669     y[10*i+4] = sum5;
1670     y[10*i+5] = sum6;
1671     y[10*i+6] = sum7;
1672     y[10*i+7] = sum8;
1673     y[10*i+8] = sum9;
1674     y[10*i+9] = sum10;
1675   }
1676 
1677   ierr = PetscLogFlops(20.0*a->nz - 10.0*nonzerorow);CHKERRQ(ierr);
1678   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1679   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1680   PetscFunctionReturn(0);
1681 }
1682 
1683 PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1684 {
1685   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1686   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1687   const PetscScalar *x,*v;
1688   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1689   PetscErrorCode    ierr;
1690   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1691   PetscInt          n,i,jrow,j;
1692 
1693   PetscFunctionBegin;
1694   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1695   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1696   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1697   idx  = a->j;
1698   v    = a->a;
1699   ii   = a->i;
1700 
1701   for (i=0; i<m; i++) {
1702     jrow  = ii[i];
1703     n     = ii[i+1] - jrow;
1704     sum1  = 0.0;
1705     sum2  = 0.0;
1706     sum3  = 0.0;
1707     sum4  = 0.0;
1708     sum5  = 0.0;
1709     sum6  = 0.0;
1710     sum7  = 0.0;
1711     sum8  = 0.0;
1712     sum9  = 0.0;
1713     sum10 = 0.0;
1714     for (j=0; j<n; j++) {
1715       sum1  += v[jrow]*x[10*idx[jrow]];
1716       sum2  += v[jrow]*x[10*idx[jrow]+1];
1717       sum3  += v[jrow]*x[10*idx[jrow]+2];
1718       sum4  += v[jrow]*x[10*idx[jrow]+3];
1719       sum5  += v[jrow]*x[10*idx[jrow]+4];
1720       sum6  += v[jrow]*x[10*idx[jrow]+5];
1721       sum7  += v[jrow]*x[10*idx[jrow]+6];
1722       sum8  += v[jrow]*x[10*idx[jrow]+7];
1723       sum9  += v[jrow]*x[10*idx[jrow]+8];
1724       sum10 += v[jrow]*x[10*idx[jrow]+9];
1725       jrow++;
1726     }
1727     y[10*i]   += sum1;
1728     y[10*i+1] += sum2;
1729     y[10*i+2] += sum3;
1730     y[10*i+3] += sum4;
1731     y[10*i+4] += sum5;
1732     y[10*i+5] += sum6;
1733     y[10*i+6] += sum7;
1734     y[10*i+7] += sum8;
1735     y[10*i+8] += sum9;
1736     y[10*i+9] += sum10;
1737   }
1738 
1739   ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr);
1740   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1741   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1742   PetscFunctionReturn(0);
1743 }
1744 
1745 PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1746 {
1747   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1748   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1749   const PetscScalar *x,*v;
1750   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10;
1751   PetscErrorCode    ierr;
1752   const PetscInt    m = b->AIJ->rmap->n,*idx;
1753   PetscInt          n,i;
1754 
1755   PetscFunctionBegin;
1756   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
1757   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1758   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1759 
1760   for (i=0; i<m; i++) {
1761     idx     = a->j + a->i[i];
1762     v       = a->a + a->i[i];
1763     n       = a->i[i+1] - a->i[i];
1764     alpha1  = x[10*i];
1765     alpha2  = x[10*i+1];
1766     alpha3  = x[10*i+2];
1767     alpha4  = x[10*i+3];
1768     alpha5  = x[10*i+4];
1769     alpha6  = x[10*i+5];
1770     alpha7  = x[10*i+6];
1771     alpha8  = x[10*i+7];
1772     alpha9  = x[10*i+8];
1773     alpha10 = x[10*i+9];
1774     while (n-->0) {
1775       y[10*(*idx)]   += alpha1*(*v);
1776       y[10*(*idx)+1] += alpha2*(*v);
1777       y[10*(*idx)+2] += alpha3*(*v);
1778       y[10*(*idx)+3] += alpha4*(*v);
1779       y[10*(*idx)+4] += alpha5*(*v);
1780       y[10*(*idx)+5] += alpha6*(*v);
1781       y[10*(*idx)+6] += alpha7*(*v);
1782       y[10*(*idx)+7] += alpha8*(*v);
1783       y[10*(*idx)+8] += alpha9*(*v);
1784       y[10*(*idx)+9] += alpha10*(*v);
1785       idx++; v++;
1786     }
1787   }
1788   ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr);
1789   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1790   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1791   PetscFunctionReturn(0);
1792 }
1793 
1794 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1795 {
1796   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1797   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1798   const PetscScalar *x,*v;
1799   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10;
1800   PetscErrorCode    ierr;
1801   const PetscInt    m = b->AIJ->rmap->n,*idx;
1802   PetscInt          n,i;
1803 
1804   PetscFunctionBegin;
1805   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1806   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1807   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1808   for (i=0; i<m; i++) {
1809     idx     = a->j + a->i[i];
1810     v       = a->a + a->i[i];
1811     n       = a->i[i+1] - a->i[i];
1812     alpha1  = x[10*i];
1813     alpha2  = x[10*i+1];
1814     alpha3  = x[10*i+2];
1815     alpha4  = x[10*i+3];
1816     alpha5  = x[10*i+4];
1817     alpha6  = x[10*i+5];
1818     alpha7  = x[10*i+6];
1819     alpha8  = x[10*i+7];
1820     alpha9  = x[10*i+8];
1821     alpha10 = x[10*i+9];
1822     while (n-->0) {
1823       y[10*(*idx)]   += alpha1*(*v);
1824       y[10*(*idx)+1] += alpha2*(*v);
1825       y[10*(*idx)+2] += alpha3*(*v);
1826       y[10*(*idx)+3] += alpha4*(*v);
1827       y[10*(*idx)+4] += alpha5*(*v);
1828       y[10*(*idx)+5] += alpha6*(*v);
1829       y[10*(*idx)+6] += alpha7*(*v);
1830       y[10*(*idx)+7] += alpha8*(*v);
1831       y[10*(*idx)+8] += alpha9*(*v);
1832       y[10*(*idx)+9] += alpha10*(*v);
1833       idx++; v++;
1834     }
1835   }
1836   ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr);
1837   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1838   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1839   PetscFunctionReturn(0);
1840 }
1841 
1842 
1843 /*--------------------------------------------------------------------------------------------*/
1844 PetscErrorCode MatMult_SeqMAIJ_11(Mat A,Vec xx,Vec yy)
1845 {
1846   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1847   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1848   const PetscScalar *x,*v;
1849   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11;
1850   PetscErrorCode    ierr;
1851   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1852   PetscInt          nonzerorow=0,n,i,jrow,j;
1853 
1854   PetscFunctionBegin;
1855   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1856   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1857   idx  = a->j;
1858   v    = a->a;
1859   ii   = a->i;
1860 
1861   for (i=0; i<m; i++) {
1862     jrow  = ii[i];
1863     n     = ii[i+1] - jrow;
1864     sum1  = 0.0;
1865     sum2  = 0.0;
1866     sum3  = 0.0;
1867     sum4  = 0.0;
1868     sum5  = 0.0;
1869     sum6  = 0.0;
1870     sum7  = 0.0;
1871     sum8  = 0.0;
1872     sum9  = 0.0;
1873     sum10 = 0.0;
1874     sum11 = 0.0;
1875 
1876     nonzerorow += (n>0);
1877     for (j=0; j<n; j++) {
1878       sum1  += v[jrow]*x[11*idx[jrow]];
1879       sum2  += v[jrow]*x[11*idx[jrow]+1];
1880       sum3  += v[jrow]*x[11*idx[jrow]+2];
1881       sum4  += v[jrow]*x[11*idx[jrow]+3];
1882       sum5  += v[jrow]*x[11*idx[jrow]+4];
1883       sum6  += v[jrow]*x[11*idx[jrow]+5];
1884       sum7  += v[jrow]*x[11*idx[jrow]+6];
1885       sum8  += v[jrow]*x[11*idx[jrow]+7];
1886       sum9  += v[jrow]*x[11*idx[jrow]+8];
1887       sum10 += v[jrow]*x[11*idx[jrow]+9];
1888       sum11 += v[jrow]*x[11*idx[jrow]+10];
1889       jrow++;
1890     }
1891     y[11*i]    = sum1;
1892     y[11*i+1]  = sum2;
1893     y[11*i+2]  = sum3;
1894     y[11*i+3]  = sum4;
1895     y[11*i+4]  = sum5;
1896     y[11*i+5]  = sum6;
1897     y[11*i+6]  = sum7;
1898     y[11*i+7]  = sum8;
1899     y[11*i+8]  = sum9;
1900     y[11*i+9]  = sum10;
1901     y[11*i+10] = sum11;
1902   }
1903 
1904   ierr = PetscLogFlops(22*a->nz - 11*nonzerorow);CHKERRQ(ierr);
1905   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1906   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1907   PetscFunctionReturn(0);
1908 }
1909 
1910 PetscErrorCode MatMultAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz)
1911 {
1912   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1913   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1914   const PetscScalar *x,*v;
1915   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11;
1916   PetscErrorCode    ierr;
1917   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1918   PetscInt          n,i,jrow,j;
1919 
1920   PetscFunctionBegin;
1921   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1922   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1923   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1924   idx  = a->j;
1925   v    = a->a;
1926   ii   = a->i;
1927 
1928   for (i=0; i<m; i++) {
1929     jrow  = ii[i];
1930     n     = ii[i+1] - jrow;
1931     sum1  = 0.0;
1932     sum2  = 0.0;
1933     sum3  = 0.0;
1934     sum4  = 0.0;
1935     sum5  = 0.0;
1936     sum6  = 0.0;
1937     sum7  = 0.0;
1938     sum8  = 0.0;
1939     sum9  = 0.0;
1940     sum10 = 0.0;
1941     sum11 = 0.0;
1942     for (j=0; j<n; j++) {
1943       sum1  += v[jrow]*x[11*idx[jrow]];
1944       sum2  += v[jrow]*x[11*idx[jrow]+1];
1945       sum3  += v[jrow]*x[11*idx[jrow]+2];
1946       sum4  += v[jrow]*x[11*idx[jrow]+3];
1947       sum5  += v[jrow]*x[11*idx[jrow]+4];
1948       sum6  += v[jrow]*x[11*idx[jrow]+5];
1949       sum7  += v[jrow]*x[11*idx[jrow]+6];
1950       sum8  += v[jrow]*x[11*idx[jrow]+7];
1951       sum9  += v[jrow]*x[11*idx[jrow]+8];
1952       sum10 += v[jrow]*x[11*idx[jrow]+9];
1953       sum11 += v[jrow]*x[11*idx[jrow]+10];
1954       jrow++;
1955     }
1956     y[11*i]    += sum1;
1957     y[11*i+1]  += sum2;
1958     y[11*i+2]  += sum3;
1959     y[11*i+3]  += sum4;
1960     y[11*i+4]  += sum5;
1961     y[11*i+5]  += sum6;
1962     y[11*i+6]  += sum7;
1963     y[11*i+7]  += sum8;
1964     y[11*i+8]  += sum9;
1965     y[11*i+9]  += sum10;
1966     y[11*i+10] += sum11;
1967   }
1968 
1969   ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr);
1970   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1971   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1972   PetscFunctionReturn(0);
1973 }
1974 
1975 PetscErrorCode MatMultTranspose_SeqMAIJ_11(Mat A,Vec xx,Vec yy)
1976 {
1977   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1978   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1979   const PetscScalar *x,*v;
1980   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11;
1981   PetscErrorCode    ierr;
1982   const PetscInt    m = b->AIJ->rmap->n,*idx;
1983   PetscInt          n,i;
1984 
1985   PetscFunctionBegin;
1986   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
1987   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1988   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1989 
1990   for (i=0; i<m; i++) {
1991     idx     = a->j + a->i[i];
1992     v       = a->a + a->i[i];
1993     n       = a->i[i+1] - a->i[i];
1994     alpha1  = x[11*i];
1995     alpha2  = x[11*i+1];
1996     alpha3  = x[11*i+2];
1997     alpha4  = x[11*i+3];
1998     alpha5  = x[11*i+4];
1999     alpha6  = x[11*i+5];
2000     alpha7  = x[11*i+6];
2001     alpha8  = x[11*i+7];
2002     alpha9  = x[11*i+8];
2003     alpha10 = x[11*i+9];
2004     alpha11 = x[11*i+10];
2005     while (n-->0) {
2006       y[11*(*idx)]    += alpha1*(*v);
2007       y[11*(*idx)+1]  += alpha2*(*v);
2008       y[11*(*idx)+2]  += alpha3*(*v);
2009       y[11*(*idx)+3]  += alpha4*(*v);
2010       y[11*(*idx)+4]  += alpha5*(*v);
2011       y[11*(*idx)+5]  += alpha6*(*v);
2012       y[11*(*idx)+6]  += alpha7*(*v);
2013       y[11*(*idx)+7]  += alpha8*(*v);
2014       y[11*(*idx)+8]  += alpha9*(*v);
2015       y[11*(*idx)+9]  += alpha10*(*v);
2016       y[11*(*idx)+10] += alpha11*(*v);
2017       idx++; v++;
2018     }
2019   }
2020   ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr);
2021   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2022   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2023   PetscFunctionReturn(0);
2024 }
2025 
2026 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz)
2027 {
2028   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2029   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2030   const PetscScalar *x,*v;
2031   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11;
2032   PetscErrorCode    ierr;
2033   const PetscInt    m = b->AIJ->rmap->n,*idx;
2034   PetscInt          n,i;
2035 
2036   PetscFunctionBegin;
2037   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2038   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2039   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2040   for (i=0; i<m; i++) {
2041     idx     = a->j + a->i[i];
2042     v       = a->a + a->i[i];
2043     n       = a->i[i+1] - a->i[i];
2044     alpha1  = x[11*i];
2045     alpha2  = x[11*i+1];
2046     alpha3  = x[11*i+2];
2047     alpha4  = x[11*i+3];
2048     alpha5  = x[11*i+4];
2049     alpha6  = x[11*i+5];
2050     alpha7  = x[11*i+6];
2051     alpha8  = x[11*i+7];
2052     alpha9  = x[11*i+8];
2053     alpha10 = x[11*i+9];
2054     alpha11 = x[11*i+10];
2055     while (n-->0) {
2056       y[11*(*idx)]    += alpha1*(*v);
2057       y[11*(*idx)+1]  += alpha2*(*v);
2058       y[11*(*idx)+2]  += alpha3*(*v);
2059       y[11*(*idx)+3]  += alpha4*(*v);
2060       y[11*(*idx)+4]  += alpha5*(*v);
2061       y[11*(*idx)+5]  += alpha6*(*v);
2062       y[11*(*idx)+6]  += alpha7*(*v);
2063       y[11*(*idx)+7]  += alpha8*(*v);
2064       y[11*(*idx)+8]  += alpha9*(*v);
2065       y[11*(*idx)+9]  += alpha10*(*v);
2066       y[11*(*idx)+10] += alpha11*(*v);
2067       idx++; v++;
2068     }
2069   }
2070   ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr);
2071   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2072   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2073   PetscFunctionReturn(0);
2074 }
2075 
2076 
2077 /*--------------------------------------------------------------------------------------------*/
2078 PetscErrorCode MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
2079 {
2080   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2081   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2082   const PetscScalar *x,*v;
2083   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2084   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
2085   PetscErrorCode    ierr;
2086   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
2087   PetscInt          nonzerorow=0,n,i,jrow,j;
2088 
2089   PetscFunctionBegin;
2090   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2091   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2092   idx  = a->j;
2093   v    = a->a;
2094   ii   = a->i;
2095 
2096   for (i=0; i<m; i++) {
2097     jrow  = ii[i];
2098     n     = ii[i+1] - jrow;
2099     sum1  = 0.0;
2100     sum2  = 0.0;
2101     sum3  = 0.0;
2102     sum4  = 0.0;
2103     sum5  = 0.0;
2104     sum6  = 0.0;
2105     sum7  = 0.0;
2106     sum8  = 0.0;
2107     sum9  = 0.0;
2108     sum10 = 0.0;
2109     sum11 = 0.0;
2110     sum12 = 0.0;
2111     sum13 = 0.0;
2112     sum14 = 0.0;
2113     sum15 = 0.0;
2114     sum16 = 0.0;
2115 
2116     nonzerorow += (n>0);
2117     for (j=0; j<n; j++) {
2118       sum1  += v[jrow]*x[16*idx[jrow]];
2119       sum2  += v[jrow]*x[16*idx[jrow]+1];
2120       sum3  += v[jrow]*x[16*idx[jrow]+2];
2121       sum4  += v[jrow]*x[16*idx[jrow]+3];
2122       sum5  += v[jrow]*x[16*idx[jrow]+4];
2123       sum6  += v[jrow]*x[16*idx[jrow]+5];
2124       sum7  += v[jrow]*x[16*idx[jrow]+6];
2125       sum8  += v[jrow]*x[16*idx[jrow]+7];
2126       sum9  += v[jrow]*x[16*idx[jrow]+8];
2127       sum10 += v[jrow]*x[16*idx[jrow]+9];
2128       sum11 += v[jrow]*x[16*idx[jrow]+10];
2129       sum12 += v[jrow]*x[16*idx[jrow]+11];
2130       sum13 += v[jrow]*x[16*idx[jrow]+12];
2131       sum14 += v[jrow]*x[16*idx[jrow]+13];
2132       sum15 += v[jrow]*x[16*idx[jrow]+14];
2133       sum16 += v[jrow]*x[16*idx[jrow]+15];
2134       jrow++;
2135     }
2136     y[16*i]    = sum1;
2137     y[16*i+1]  = sum2;
2138     y[16*i+2]  = sum3;
2139     y[16*i+3]  = sum4;
2140     y[16*i+4]  = sum5;
2141     y[16*i+5]  = sum6;
2142     y[16*i+6]  = sum7;
2143     y[16*i+7]  = sum8;
2144     y[16*i+8]  = sum9;
2145     y[16*i+9]  = sum10;
2146     y[16*i+10] = sum11;
2147     y[16*i+11] = sum12;
2148     y[16*i+12] = sum13;
2149     y[16*i+13] = sum14;
2150     y[16*i+14] = sum15;
2151     y[16*i+15] = sum16;
2152   }
2153 
2154   ierr = PetscLogFlops(32.0*a->nz - 16.0*nonzerorow);CHKERRQ(ierr);
2155   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2156   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2157   PetscFunctionReturn(0);
2158 }
2159 
2160 PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
2161 {
2162   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2163   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2164   const PetscScalar *x,*v;
2165   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2166   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
2167   PetscErrorCode    ierr;
2168   const PetscInt    m = b->AIJ->rmap->n,*idx;
2169   PetscInt          n,i;
2170 
2171   PetscFunctionBegin;
2172   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
2173   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2174   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2175 
2176   for (i=0; i<m; i++) {
2177     idx     = a->j + a->i[i];
2178     v       = a->a + a->i[i];
2179     n       = a->i[i+1] - a->i[i];
2180     alpha1  = x[16*i];
2181     alpha2  = x[16*i+1];
2182     alpha3  = x[16*i+2];
2183     alpha4  = x[16*i+3];
2184     alpha5  = x[16*i+4];
2185     alpha6  = x[16*i+5];
2186     alpha7  = x[16*i+6];
2187     alpha8  = x[16*i+7];
2188     alpha9  = x[16*i+8];
2189     alpha10 = x[16*i+9];
2190     alpha11 = x[16*i+10];
2191     alpha12 = x[16*i+11];
2192     alpha13 = x[16*i+12];
2193     alpha14 = x[16*i+13];
2194     alpha15 = x[16*i+14];
2195     alpha16 = x[16*i+15];
2196     while (n-->0) {
2197       y[16*(*idx)]    += alpha1*(*v);
2198       y[16*(*idx)+1]  += alpha2*(*v);
2199       y[16*(*idx)+2]  += alpha3*(*v);
2200       y[16*(*idx)+3]  += alpha4*(*v);
2201       y[16*(*idx)+4]  += alpha5*(*v);
2202       y[16*(*idx)+5]  += alpha6*(*v);
2203       y[16*(*idx)+6]  += alpha7*(*v);
2204       y[16*(*idx)+7]  += alpha8*(*v);
2205       y[16*(*idx)+8]  += alpha9*(*v);
2206       y[16*(*idx)+9]  += alpha10*(*v);
2207       y[16*(*idx)+10] += alpha11*(*v);
2208       y[16*(*idx)+11] += alpha12*(*v);
2209       y[16*(*idx)+12] += alpha13*(*v);
2210       y[16*(*idx)+13] += alpha14*(*v);
2211       y[16*(*idx)+14] += alpha15*(*v);
2212       y[16*(*idx)+15] += alpha16*(*v);
2213       idx++; v++;
2214     }
2215   }
2216   ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr);
2217   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2218   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2219   PetscFunctionReturn(0);
2220 }
2221 
2222 PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
2223 {
2224   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2225   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2226   const PetscScalar *x,*v;
2227   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2228   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
2229   PetscErrorCode    ierr;
2230   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2231   PetscInt          n,i,jrow,j;
2232 
2233   PetscFunctionBegin;
2234   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2235   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2236   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2237   idx  = a->j;
2238   v    = a->a;
2239   ii   = a->i;
2240 
2241   for (i=0; i<m; i++) {
2242     jrow  = ii[i];
2243     n     = ii[i+1] - jrow;
2244     sum1  = 0.0;
2245     sum2  = 0.0;
2246     sum3  = 0.0;
2247     sum4  = 0.0;
2248     sum5  = 0.0;
2249     sum6  = 0.0;
2250     sum7  = 0.0;
2251     sum8  = 0.0;
2252     sum9  = 0.0;
2253     sum10 = 0.0;
2254     sum11 = 0.0;
2255     sum12 = 0.0;
2256     sum13 = 0.0;
2257     sum14 = 0.0;
2258     sum15 = 0.0;
2259     sum16 = 0.0;
2260     for (j=0; j<n; j++) {
2261       sum1  += v[jrow]*x[16*idx[jrow]];
2262       sum2  += v[jrow]*x[16*idx[jrow]+1];
2263       sum3  += v[jrow]*x[16*idx[jrow]+2];
2264       sum4  += v[jrow]*x[16*idx[jrow]+3];
2265       sum5  += v[jrow]*x[16*idx[jrow]+4];
2266       sum6  += v[jrow]*x[16*idx[jrow]+5];
2267       sum7  += v[jrow]*x[16*idx[jrow]+6];
2268       sum8  += v[jrow]*x[16*idx[jrow]+7];
2269       sum9  += v[jrow]*x[16*idx[jrow]+8];
2270       sum10 += v[jrow]*x[16*idx[jrow]+9];
2271       sum11 += v[jrow]*x[16*idx[jrow]+10];
2272       sum12 += v[jrow]*x[16*idx[jrow]+11];
2273       sum13 += v[jrow]*x[16*idx[jrow]+12];
2274       sum14 += v[jrow]*x[16*idx[jrow]+13];
2275       sum15 += v[jrow]*x[16*idx[jrow]+14];
2276       sum16 += v[jrow]*x[16*idx[jrow]+15];
2277       jrow++;
2278     }
2279     y[16*i]    += sum1;
2280     y[16*i+1]  += sum2;
2281     y[16*i+2]  += sum3;
2282     y[16*i+3]  += sum4;
2283     y[16*i+4]  += sum5;
2284     y[16*i+5]  += sum6;
2285     y[16*i+6]  += sum7;
2286     y[16*i+7]  += sum8;
2287     y[16*i+8]  += sum9;
2288     y[16*i+9]  += sum10;
2289     y[16*i+10] += sum11;
2290     y[16*i+11] += sum12;
2291     y[16*i+12] += sum13;
2292     y[16*i+13] += sum14;
2293     y[16*i+14] += sum15;
2294     y[16*i+15] += sum16;
2295   }
2296 
2297   ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr);
2298   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2299   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2300   PetscFunctionReturn(0);
2301 }
2302 
2303 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
2304 {
2305   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2306   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2307   const PetscScalar *x,*v;
2308   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2309   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
2310   PetscErrorCode    ierr;
2311   const PetscInt    m = b->AIJ->rmap->n,*idx;
2312   PetscInt          n,i;
2313 
2314   PetscFunctionBegin;
2315   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2316   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2317   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2318   for (i=0; i<m; i++) {
2319     idx     = a->j + a->i[i];
2320     v       = a->a + a->i[i];
2321     n       = a->i[i+1] - a->i[i];
2322     alpha1  = x[16*i];
2323     alpha2  = x[16*i+1];
2324     alpha3  = x[16*i+2];
2325     alpha4  = x[16*i+3];
2326     alpha5  = x[16*i+4];
2327     alpha6  = x[16*i+5];
2328     alpha7  = x[16*i+6];
2329     alpha8  = x[16*i+7];
2330     alpha9  = x[16*i+8];
2331     alpha10 = x[16*i+9];
2332     alpha11 = x[16*i+10];
2333     alpha12 = x[16*i+11];
2334     alpha13 = x[16*i+12];
2335     alpha14 = x[16*i+13];
2336     alpha15 = x[16*i+14];
2337     alpha16 = x[16*i+15];
2338     while (n-->0) {
2339       y[16*(*idx)]    += alpha1*(*v);
2340       y[16*(*idx)+1]  += alpha2*(*v);
2341       y[16*(*idx)+2]  += alpha3*(*v);
2342       y[16*(*idx)+3]  += alpha4*(*v);
2343       y[16*(*idx)+4]  += alpha5*(*v);
2344       y[16*(*idx)+5]  += alpha6*(*v);
2345       y[16*(*idx)+6]  += alpha7*(*v);
2346       y[16*(*idx)+7]  += alpha8*(*v);
2347       y[16*(*idx)+8]  += alpha9*(*v);
2348       y[16*(*idx)+9]  += alpha10*(*v);
2349       y[16*(*idx)+10] += alpha11*(*v);
2350       y[16*(*idx)+11] += alpha12*(*v);
2351       y[16*(*idx)+12] += alpha13*(*v);
2352       y[16*(*idx)+13] += alpha14*(*v);
2353       y[16*(*idx)+14] += alpha15*(*v);
2354       y[16*(*idx)+15] += alpha16*(*v);
2355       idx++; v++;
2356     }
2357   }
2358   ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr);
2359   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2360   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2361   PetscFunctionReturn(0);
2362 }
2363 
2364 /*--------------------------------------------------------------------------------------------*/
2365 PetscErrorCode MatMult_SeqMAIJ_18(Mat A,Vec xx,Vec yy)
2366 {
2367   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2368   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2369   const PetscScalar *x,*v;
2370   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2371   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2372   PetscErrorCode    ierr;
2373   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
2374   PetscInt          nonzerorow=0,n,i,jrow,j;
2375 
2376   PetscFunctionBegin;
2377   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2378   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2379   idx  = a->j;
2380   v    = a->a;
2381   ii   = a->i;
2382 
2383   for (i=0; i<m; i++) {
2384     jrow  = ii[i];
2385     n     = ii[i+1] - jrow;
2386     sum1  = 0.0;
2387     sum2  = 0.0;
2388     sum3  = 0.0;
2389     sum4  = 0.0;
2390     sum5  = 0.0;
2391     sum6  = 0.0;
2392     sum7  = 0.0;
2393     sum8  = 0.0;
2394     sum9  = 0.0;
2395     sum10 = 0.0;
2396     sum11 = 0.0;
2397     sum12 = 0.0;
2398     sum13 = 0.0;
2399     sum14 = 0.0;
2400     sum15 = 0.0;
2401     sum16 = 0.0;
2402     sum17 = 0.0;
2403     sum18 = 0.0;
2404 
2405     nonzerorow += (n>0);
2406     for (j=0; j<n; j++) {
2407       sum1  += v[jrow]*x[18*idx[jrow]];
2408       sum2  += v[jrow]*x[18*idx[jrow]+1];
2409       sum3  += v[jrow]*x[18*idx[jrow]+2];
2410       sum4  += v[jrow]*x[18*idx[jrow]+3];
2411       sum5  += v[jrow]*x[18*idx[jrow]+4];
2412       sum6  += v[jrow]*x[18*idx[jrow]+5];
2413       sum7  += v[jrow]*x[18*idx[jrow]+6];
2414       sum8  += v[jrow]*x[18*idx[jrow]+7];
2415       sum9  += v[jrow]*x[18*idx[jrow]+8];
2416       sum10 += v[jrow]*x[18*idx[jrow]+9];
2417       sum11 += v[jrow]*x[18*idx[jrow]+10];
2418       sum12 += v[jrow]*x[18*idx[jrow]+11];
2419       sum13 += v[jrow]*x[18*idx[jrow]+12];
2420       sum14 += v[jrow]*x[18*idx[jrow]+13];
2421       sum15 += v[jrow]*x[18*idx[jrow]+14];
2422       sum16 += v[jrow]*x[18*idx[jrow]+15];
2423       sum17 += v[jrow]*x[18*idx[jrow]+16];
2424       sum18 += v[jrow]*x[18*idx[jrow]+17];
2425       jrow++;
2426     }
2427     y[18*i]    = sum1;
2428     y[18*i+1]  = sum2;
2429     y[18*i+2]  = sum3;
2430     y[18*i+3]  = sum4;
2431     y[18*i+4]  = sum5;
2432     y[18*i+5]  = sum6;
2433     y[18*i+6]  = sum7;
2434     y[18*i+7]  = sum8;
2435     y[18*i+8]  = sum9;
2436     y[18*i+9]  = sum10;
2437     y[18*i+10] = sum11;
2438     y[18*i+11] = sum12;
2439     y[18*i+12] = sum13;
2440     y[18*i+13] = sum14;
2441     y[18*i+14] = sum15;
2442     y[18*i+15] = sum16;
2443     y[18*i+16] = sum17;
2444     y[18*i+17] = sum18;
2445   }
2446 
2447   ierr = PetscLogFlops(36.0*a->nz - 18.0*nonzerorow);CHKERRQ(ierr);
2448   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2449   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2450   PetscFunctionReturn(0);
2451 }
2452 
2453 PetscErrorCode MatMultTranspose_SeqMAIJ_18(Mat A,Vec xx,Vec yy)
2454 {
2455   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2456   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2457   const PetscScalar *x,*v;
2458   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2459   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18;
2460   PetscErrorCode    ierr;
2461   const PetscInt    m = b->AIJ->rmap->n,*idx;
2462   PetscInt          n,i;
2463 
2464   PetscFunctionBegin;
2465   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
2466   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2467   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2468 
2469   for (i=0; i<m; i++) {
2470     idx     = a->j + a->i[i];
2471     v       = a->a + a->i[i];
2472     n       = a->i[i+1] - a->i[i];
2473     alpha1  = x[18*i];
2474     alpha2  = x[18*i+1];
2475     alpha3  = x[18*i+2];
2476     alpha4  = x[18*i+3];
2477     alpha5  = x[18*i+4];
2478     alpha6  = x[18*i+5];
2479     alpha7  = x[18*i+6];
2480     alpha8  = x[18*i+7];
2481     alpha9  = x[18*i+8];
2482     alpha10 = x[18*i+9];
2483     alpha11 = x[18*i+10];
2484     alpha12 = x[18*i+11];
2485     alpha13 = x[18*i+12];
2486     alpha14 = x[18*i+13];
2487     alpha15 = x[18*i+14];
2488     alpha16 = x[18*i+15];
2489     alpha17 = x[18*i+16];
2490     alpha18 = x[18*i+17];
2491     while (n-->0) {
2492       y[18*(*idx)]    += alpha1*(*v);
2493       y[18*(*idx)+1]  += alpha2*(*v);
2494       y[18*(*idx)+2]  += alpha3*(*v);
2495       y[18*(*idx)+3]  += alpha4*(*v);
2496       y[18*(*idx)+4]  += alpha5*(*v);
2497       y[18*(*idx)+5]  += alpha6*(*v);
2498       y[18*(*idx)+6]  += alpha7*(*v);
2499       y[18*(*idx)+7]  += alpha8*(*v);
2500       y[18*(*idx)+8]  += alpha9*(*v);
2501       y[18*(*idx)+9]  += alpha10*(*v);
2502       y[18*(*idx)+10] += alpha11*(*v);
2503       y[18*(*idx)+11] += alpha12*(*v);
2504       y[18*(*idx)+12] += alpha13*(*v);
2505       y[18*(*idx)+13] += alpha14*(*v);
2506       y[18*(*idx)+14] += alpha15*(*v);
2507       y[18*(*idx)+15] += alpha16*(*v);
2508       y[18*(*idx)+16] += alpha17*(*v);
2509       y[18*(*idx)+17] += alpha18*(*v);
2510       idx++; v++;
2511     }
2512   }
2513   ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr);
2514   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2515   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2516   PetscFunctionReturn(0);
2517 }
2518 
2519 PetscErrorCode MatMultAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz)
2520 {
2521   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2522   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2523   const PetscScalar *x,*v;
2524   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2525   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2526   PetscErrorCode    ierr;
2527   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2528   PetscInt          n,i,jrow,j;
2529 
2530   PetscFunctionBegin;
2531   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2532   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2533   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2534   idx  = a->j;
2535   v    = a->a;
2536   ii   = a->i;
2537 
2538   for (i=0; i<m; i++) {
2539     jrow  = ii[i];
2540     n     = ii[i+1] - jrow;
2541     sum1  = 0.0;
2542     sum2  = 0.0;
2543     sum3  = 0.0;
2544     sum4  = 0.0;
2545     sum5  = 0.0;
2546     sum6  = 0.0;
2547     sum7  = 0.0;
2548     sum8  = 0.0;
2549     sum9  = 0.0;
2550     sum10 = 0.0;
2551     sum11 = 0.0;
2552     sum12 = 0.0;
2553     sum13 = 0.0;
2554     sum14 = 0.0;
2555     sum15 = 0.0;
2556     sum16 = 0.0;
2557     sum17 = 0.0;
2558     sum18 = 0.0;
2559     for (j=0; j<n; j++) {
2560       sum1  += v[jrow]*x[18*idx[jrow]];
2561       sum2  += v[jrow]*x[18*idx[jrow]+1];
2562       sum3  += v[jrow]*x[18*idx[jrow]+2];
2563       sum4  += v[jrow]*x[18*idx[jrow]+3];
2564       sum5  += v[jrow]*x[18*idx[jrow]+4];
2565       sum6  += v[jrow]*x[18*idx[jrow]+5];
2566       sum7  += v[jrow]*x[18*idx[jrow]+6];
2567       sum8  += v[jrow]*x[18*idx[jrow]+7];
2568       sum9  += v[jrow]*x[18*idx[jrow]+8];
2569       sum10 += v[jrow]*x[18*idx[jrow]+9];
2570       sum11 += v[jrow]*x[18*idx[jrow]+10];
2571       sum12 += v[jrow]*x[18*idx[jrow]+11];
2572       sum13 += v[jrow]*x[18*idx[jrow]+12];
2573       sum14 += v[jrow]*x[18*idx[jrow]+13];
2574       sum15 += v[jrow]*x[18*idx[jrow]+14];
2575       sum16 += v[jrow]*x[18*idx[jrow]+15];
2576       sum17 += v[jrow]*x[18*idx[jrow]+16];
2577       sum18 += v[jrow]*x[18*idx[jrow]+17];
2578       jrow++;
2579     }
2580     y[18*i]    += sum1;
2581     y[18*i+1]  += sum2;
2582     y[18*i+2]  += sum3;
2583     y[18*i+3]  += sum4;
2584     y[18*i+4]  += sum5;
2585     y[18*i+5]  += sum6;
2586     y[18*i+6]  += sum7;
2587     y[18*i+7]  += sum8;
2588     y[18*i+8]  += sum9;
2589     y[18*i+9]  += sum10;
2590     y[18*i+10] += sum11;
2591     y[18*i+11] += sum12;
2592     y[18*i+12] += sum13;
2593     y[18*i+13] += sum14;
2594     y[18*i+14] += sum15;
2595     y[18*i+15] += sum16;
2596     y[18*i+16] += sum17;
2597     y[18*i+17] += sum18;
2598   }
2599 
2600   ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr);
2601   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2602   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2603   PetscFunctionReturn(0);
2604 }
2605 
2606 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz)
2607 {
2608   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2609   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2610   const PetscScalar *x,*v;
2611   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2612   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18;
2613   PetscErrorCode    ierr;
2614   const PetscInt    m = b->AIJ->rmap->n,*idx;
2615   PetscInt          n,i;
2616 
2617   PetscFunctionBegin;
2618   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2619   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2620   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2621   for (i=0; i<m; i++) {
2622     idx     = a->j + a->i[i];
2623     v       = a->a + a->i[i];
2624     n       = a->i[i+1] - a->i[i];
2625     alpha1  = x[18*i];
2626     alpha2  = x[18*i+1];
2627     alpha3  = x[18*i+2];
2628     alpha4  = x[18*i+3];
2629     alpha5  = x[18*i+4];
2630     alpha6  = x[18*i+5];
2631     alpha7  = x[18*i+6];
2632     alpha8  = x[18*i+7];
2633     alpha9  = x[18*i+8];
2634     alpha10 = x[18*i+9];
2635     alpha11 = x[18*i+10];
2636     alpha12 = x[18*i+11];
2637     alpha13 = x[18*i+12];
2638     alpha14 = x[18*i+13];
2639     alpha15 = x[18*i+14];
2640     alpha16 = x[18*i+15];
2641     alpha17 = x[18*i+16];
2642     alpha18 = x[18*i+17];
2643     while (n-->0) {
2644       y[18*(*idx)]    += alpha1*(*v);
2645       y[18*(*idx)+1]  += alpha2*(*v);
2646       y[18*(*idx)+2]  += alpha3*(*v);
2647       y[18*(*idx)+3]  += alpha4*(*v);
2648       y[18*(*idx)+4]  += alpha5*(*v);
2649       y[18*(*idx)+5]  += alpha6*(*v);
2650       y[18*(*idx)+6]  += alpha7*(*v);
2651       y[18*(*idx)+7]  += alpha8*(*v);
2652       y[18*(*idx)+8]  += alpha9*(*v);
2653       y[18*(*idx)+9]  += alpha10*(*v);
2654       y[18*(*idx)+10] += alpha11*(*v);
2655       y[18*(*idx)+11] += alpha12*(*v);
2656       y[18*(*idx)+12] += alpha13*(*v);
2657       y[18*(*idx)+13] += alpha14*(*v);
2658       y[18*(*idx)+14] += alpha15*(*v);
2659       y[18*(*idx)+15] += alpha16*(*v);
2660       y[18*(*idx)+16] += alpha17*(*v);
2661       y[18*(*idx)+17] += alpha18*(*v);
2662       idx++; v++;
2663     }
2664   }
2665   ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr);
2666   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2667   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2668   PetscFunctionReturn(0);
2669 }
2670 
2671 PetscErrorCode MatMult_SeqMAIJ_N(Mat A,Vec xx,Vec yy)
2672 {
2673   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2674   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2675   const PetscScalar *x,*v;
2676   PetscScalar       *y,*sums;
2677   PetscErrorCode    ierr;
2678   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2679   PetscInt          n,i,jrow,j,dof = b->dof,k;
2680 
2681   PetscFunctionBegin;
2682   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2683   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
2684   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2685   idx  = a->j;
2686   v    = a->a;
2687   ii   = a->i;
2688 
2689   for (i=0; i<m; i++) {
2690     jrow = ii[i];
2691     n    = ii[i+1] - jrow;
2692     sums = y + dof*i;
2693     for (j=0; j<n; j++) {
2694       for (k=0; k<dof; k++) {
2695         sums[k] += v[jrow]*x[dof*idx[jrow]+k];
2696       }
2697       jrow++;
2698     }
2699   }
2700 
2701   ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr);
2702   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2703   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2704   PetscFunctionReturn(0);
2705 }
2706 
2707 PetscErrorCode MatMultAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
2708 {
2709   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2710   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2711   const PetscScalar *x,*v;
2712   PetscScalar       *y,*sums;
2713   PetscErrorCode    ierr;
2714   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2715   PetscInt          n,i,jrow,j,dof = b->dof,k;
2716 
2717   PetscFunctionBegin;
2718   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2719   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2720   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2721   idx  = a->j;
2722   v    = a->a;
2723   ii   = a->i;
2724 
2725   for (i=0; i<m; i++) {
2726     jrow = ii[i];
2727     n    = ii[i+1] - jrow;
2728     sums = y + dof*i;
2729     for (j=0; j<n; j++) {
2730       for (k=0; k<dof; k++) {
2731         sums[k] += v[jrow]*x[dof*idx[jrow]+k];
2732       }
2733       jrow++;
2734     }
2735   }
2736 
2737   ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr);
2738   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2739   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2740   PetscFunctionReturn(0);
2741 }
2742 
2743 PetscErrorCode MatMultTranspose_SeqMAIJ_N(Mat A,Vec xx,Vec yy)
2744 {
2745   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2746   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2747   const PetscScalar *x,*v,*alpha;
2748   PetscScalar       *y;
2749   PetscErrorCode    ierr;
2750   const PetscInt    m = b->AIJ->rmap->n,*idx,dof = b->dof;
2751   PetscInt          n,i,k;
2752 
2753   PetscFunctionBegin;
2754   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2755   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
2756   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2757   for (i=0; i<m; i++) {
2758     idx   = a->j + a->i[i];
2759     v     = a->a + a->i[i];
2760     n     = a->i[i+1] - a->i[i];
2761     alpha = x + dof*i;
2762     while (n-->0) {
2763       for (k=0; k<dof; k++) {
2764         y[dof*(*idx)+k] += alpha[k]*(*v);
2765       }
2766       idx++; v++;
2767     }
2768   }
2769   ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr);
2770   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2771   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2772   PetscFunctionReturn(0);
2773 }
2774 
2775 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
2776 {
2777   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2778   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2779   const PetscScalar *x,*v,*alpha;
2780   PetscScalar       *y;
2781   PetscErrorCode    ierr;
2782   const PetscInt    m = b->AIJ->rmap->n,*idx,dof = b->dof;
2783   PetscInt          n,i,k;
2784 
2785   PetscFunctionBegin;
2786   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2787   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2788   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2789   for (i=0; i<m; i++) {
2790     idx   = a->j + a->i[i];
2791     v     = a->a + a->i[i];
2792     n     = a->i[i+1] - a->i[i];
2793     alpha = x + dof*i;
2794     while (n-->0) {
2795       for (k=0; k<dof; k++) {
2796         y[dof*(*idx)+k] += alpha[k]*(*v);
2797       }
2798       idx++; v++;
2799     }
2800   }
2801   ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr);
2802   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2803   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2804   PetscFunctionReturn(0);
2805 }
2806 
2807 /*===================================================================================*/
2808 PetscErrorCode MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2809 {
2810   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2811   PetscErrorCode ierr;
2812 
2813   PetscFunctionBegin;
2814   /* start the scatter */
2815   ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2816   ierr = (*b->AIJ->ops->mult)(b->AIJ,xx,yy);CHKERRQ(ierr);
2817   ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2818   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);CHKERRQ(ierr);
2819   PetscFunctionReturn(0);
2820 }
2821 
2822 PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2823 {
2824   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2825   PetscErrorCode ierr;
2826 
2827   PetscFunctionBegin;
2828   ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr);
2829   ierr = (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);CHKERRQ(ierr);
2830   ierr = VecScatterBegin(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2831   ierr = VecScatterEnd(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2832   PetscFunctionReturn(0);
2833 }
2834 
2835 PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
2836 {
2837   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2838   PetscErrorCode ierr;
2839 
2840   PetscFunctionBegin;
2841   /* start the scatter */
2842   ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2843   ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr);
2844   ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2845   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr);
2846   PetscFunctionReturn(0);
2847 }
2848 
2849 PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
2850 {
2851   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2852   PetscErrorCode ierr;
2853 
2854   PetscFunctionBegin;
2855   ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr);
2856   ierr = (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr);
2857   ierr = VecScatterBegin(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2858   ierr = VecScatterEnd(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2859   PetscFunctionReturn(0);
2860 }
2861 
2862 /* ----------------------------------------------------------------*/
2863 PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqMAIJ(Mat C)
2864 {
2865   PetscErrorCode ierr;
2866   Mat_Product    *product = C->product;
2867 
2868   PetscFunctionBegin;
2869   ierr = MatSetType(C,MATSEQAIJ);CHKERRQ(ierr);
2870   if (product->type == MATPRODUCT_PtAP) {
2871     C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ;
2872   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Mat Product type %s is not supported for SeqAIJ and SeqMAIJ matrices",MatProductTypes[product->type]);
2873   PetscFunctionReturn(0);
2874 }
2875 
2876 PetscErrorCode MatProductSetFromOptions_MPIAIJ_MPIMAIJ(Mat C)
2877 {
2878   PetscErrorCode ierr;
2879   Mat_Product    *product = C->product;
2880   PetscBool      flg = PETSC_FALSE;
2881   Mat            A=product->A,P=product->B;
2882   PetscInt       alg=1; /* set default algorithm */
2883 #if !defined(PETSC_HAVE_HYPRE)
2884   const char     *algTypes[4] = {"scalable","nonscalable","allatonce","allatonce_merged"};
2885   PetscInt       nalg=4;
2886 #else
2887   const char     *algTypes[5] = {"scalable","nonscalable","allatonce","allatonce_merged","hypre"};
2888   PetscInt       nalg=5;
2889 #endif
2890 
2891   PetscFunctionBegin;
2892   ierr = MatSetType(C,MATMPIAIJ);CHKERRQ(ierr);
2893   if (product->type != MATPRODUCT_PtAP) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Mat Product type %s is not supported for MPIAIJ and MPIMAIJ matrices",MatProductTypes[product->type]);
2894 
2895   /* PtAP */
2896   /* Check matrix local sizes */
2897   if (A->rmap->rstart != P->rmap->rstart || A->rmap->rend != P->rmap->rend) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, Arow (%D, %D) != Prow (%D,%D)",A->rmap->rstart,A->rmap->rend,P->rmap->rstart,P->rmap->rend);
2898   if (A->cmap->rstart != P->rmap->rstart || A->cmap->rend != P->rmap->rend) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, Acol (%D, %D) != Prow (%D,%D)",A->cmap->rstart,A->cmap->rend,P->rmap->rstart,P->rmap->rend);
2899 
2900   /* Set the default algorithm */
2901   ierr = PetscStrcmp(C->product->alg,"default",&flg);CHKERRQ(ierr);
2902   if (flg) {
2903     ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr);
2904   }
2905 
2906   /* Get runtime option */
2907   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_PtAP","Mat");CHKERRQ(ierr);
2908   ierr = PetscOptionsEList("-matproduct_ptap_via","Algorithmic approach","MatPtAP",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr);
2909   if (flg) {
2910     ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr);
2911   }
2912   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2913 
2914   ierr = PetscStrcmp(C->product->alg,"allatonce",&flg);CHKERRQ(ierr);
2915   if (flg) {
2916     C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ;
2917     PetscFunctionReturn(0);
2918   }
2919 
2920   ierr = PetscStrcmp(C->product->alg,"allatonce_merged",&flg);CHKERRQ(ierr);
2921   if (flg) {
2922     C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ;
2923     PetscFunctionReturn(0);
2924   }
2925 
2926   /* Convert P from MAIJ to AIJ matrix since implementation not available for MAIJ */
2927   ierr = PetscInfo((PetscObject)A,"Converting from MAIJ to AIJ matrix since implementation not available for MAIJ");
2928   ierr = MatConvert(P,MATMPIAIJ,MAT_INPLACE_MATRIX,&P);CHKERRQ(ierr);
2929   ierr = (C->ops->productsetfromoptions)(C);CHKERRQ(ierr);
2930   PetscFunctionReturn(0);
2931 }
2932 
2933 /* ----------------------------------------------------------------*/
2934 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,PetscReal fill,Mat C)
2935 {
2936   PetscErrorCode     ierr;
2937   PetscFreeSpaceList free_space=NULL,current_space=NULL;
2938   Mat_SeqMAIJ        *pp       =(Mat_SeqMAIJ*)PP->data;
2939   Mat                P         =pp->AIJ;
2940   Mat_SeqAIJ         *a        =(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
2941   PetscInt           *pti,*ptj,*ptJ;
2942   PetscInt           *ci,*cj,*ptadenserow,*ptasparserow,*denserow,*sparserow,*ptaj;
2943   const PetscInt     an=A->cmap->N,am=A->rmap->N,pn=P->cmap->N,pm=P->rmap->N,ppdof=pp->dof;
2944   PetscInt           i,j,k,dof,pshift,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,cn;
2945   MatScalar          *ca;
2946   const PetscInt     *pi = p->i,*pj = p->j,*pjj,*ai=a->i,*aj=a->j,*ajj;
2947 
2948   PetscFunctionBegin;
2949   /* Get ij structure of P^T */
2950   ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
2951 
2952   cn = pn*ppdof;
2953   /* Allocate ci array, arrays for fill computation and */
2954   /* free space for accumulating nonzero column info */
2955   ierr  = PetscMalloc1(cn+1,&ci);CHKERRQ(ierr);
2956   ci[0] = 0;
2957 
2958   /* Work arrays for rows of P^T*A */
2959   ierr = PetscMalloc4(an,&ptadenserow,an,&ptasparserow,cn,&denserow,cn,&sparserow);CHKERRQ(ierr);
2960   ierr = PetscArrayzero(ptadenserow,an);CHKERRQ(ierr);
2961   ierr = PetscArrayzero(denserow,cn);CHKERRQ(ierr);
2962 
2963   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
2964   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
2965   /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */
2966   ierr          = PetscFreeSpaceGet(PetscIntMultTruncate(ai[am]/pm,pn),&free_space);CHKERRQ(ierr);
2967   current_space = free_space;
2968 
2969   /* Determine symbolic info for each row of C: */
2970   for (i=0; i<pn; i++) {
2971     ptnzi = pti[i+1] - pti[i];
2972     ptJ   = ptj + pti[i];
2973     for (dof=0; dof<ppdof; dof++) {
2974       ptanzi = 0;
2975       /* Determine symbolic row of PtA: */
2976       for (j=0; j<ptnzi; j++) {
2977         /* Expand ptJ[j] by block size and shift by dof to get the right row of A */
2978         arow = ptJ[j]*ppdof + dof;
2979         /* Nonzeros of P^T*A will be in same locations as any element of A in that row */
2980         anzj = ai[arow+1] - ai[arow];
2981         ajj  = aj + ai[arow];
2982         for (k=0; k<anzj; k++) {
2983           if (!ptadenserow[ajj[k]]) {
2984             ptadenserow[ajj[k]]    = -1;
2985             ptasparserow[ptanzi++] = ajj[k];
2986           }
2987         }
2988       }
2989       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
2990       ptaj = ptasparserow;
2991       cnzi = 0;
2992       for (j=0; j<ptanzi; j++) {
2993         /* Get offset within block of P */
2994         pshift = *ptaj%ppdof;
2995         /* Get block row of P */
2996         prow = (*ptaj++)/ppdof; /* integer division */
2997         /* P has same number of nonzeros per row as the compressed form */
2998         pnzj = pi[prow+1] - pi[prow];
2999         pjj  = pj + pi[prow];
3000         for (k=0;k<pnzj;k++) {
3001           /* Locations in C are shifted by the offset within the block */
3002           /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */
3003           if (!denserow[pjj[k]*ppdof+pshift]) {
3004             denserow[pjj[k]*ppdof+pshift] = -1;
3005             sparserow[cnzi++]             = pjj[k]*ppdof+pshift;
3006           }
3007         }
3008       }
3009 
3010       /* sort sparserow */
3011       ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr);
3012 
3013       /* If free space is not available, make more free space */
3014       /* Double the amount of total space in the list */
3015       if (current_space->local_remaining<cnzi) {
3016         ierr = PetscFreeSpaceGet(PetscIntSumTruncate(cnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
3017       }
3018 
3019       /* Copy data into free space, and zero out denserows */
3020       ierr = PetscArraycpy(current_space->array,sparserow,cnzi);CHKERRQ(ierr);
3021 
3022       current_space->array           += cnzi;
3023       current_space->local_used      += cnzi;
3024       current_space->local_remaining -= cnzi;
3025 
3026       for (j=0; j<ptanzi; j++) ptadenserow[ptasparserow[j]] = 0;
3027       for (j=0; j<cnzi; j++) denserow[sparserow[j]] = 0;
3028 
3029       /* Aside: Perhaps we should save the pta info for the numerical factorization. */
3030       /*        For now, we will recompute what is needed. */
3031       ci[i*ppdof+1+dof] = ci[i*ppdof+dof] + cnzi;
3032     }
3033   }
3034   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
3035   /* Allocate space for cj, initialize cj, and */
3036   /* destroy list of free space and other temporary array(s) */
3037   ierr = PetscMalloc1(ci[cn]+1,&cj);CHKERRQ(ierr);
3038   ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
3039   ierr = PetscFree4(ptadenserow,ptasparserow,denserow,sparserow);CHKERRQ(ierr);
3040 
3041   /* Allocate space for ca */
3042   ierr = PetscCalloc1(ci[cn]+1,&ca);CHKERRQ(ierr);
3043 
3044   /* put together the new matrix */
3045   ierr = MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A),cn,cn,ci,cj,ca,C);CHKERRQ(ierr);
3046   ierr = MatSetBlockSize(C,pp->dof);CHKERRQ(ierr);
3047 
3048   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
3049   /* Since these are PETSc arrays, change flags to free them as necessary. */
3050   c          = (Mat_SeqAIJ*)(C->data);
3051   c->free_a  = PETSC_TRUE;
3052   c->free_ij = PETSC_TRUE;
3053   c->nonew   = 0;
3054 
3055   C->ops->ptapnumeric    = MatPtAPNumeric_SeqAIJ_SeqMAIJ;
3056   C->ops->productnumeric = MatProductNumeric_PtAP;
3057 
3058   /* Clean up. */
3059   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
3060   PetscFunctionReturn(0);
3061 }
3062 
3063 PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat C)
3064 {
3065   /* This routine requires testing -- first draft only */
3066   PetscErrorCode  ierr;
3067   Mat_SeqMAIJ     *pp=(Mat_SeqMAIJ*)PP->data;
3068   Mat             P  =pp->AIJ;
3069   Mat_SeqAIJ      *a = (Mat_SeqAIJ*) A->data;
3070   Mat_SeqAIJ      *p = (Mat_SeqAIJ*) P->data;
3071   Mat_SeqAIJ      *c = (Mat_SeqAIJ*) C->data;
3072   const PetscInt  *ai=a->i,*aj=a->j,*pi=p->i,*pj=p->j,*pJ,*pjj;
3073   const PetscInt  *ci=c->i,*cj=c->j,*cjj;
3074   const PetscInt  am =A->rmap->N,cn=C->cmap->N,cm=C->rmap->N,ppdof=pp->dof;
3075   PetscInt        i,j,k,pshift,poffset,anzi,pnzi,apnzj,nextap,pnzj,prow,crow,*apj,*apjdense;
3076   const MatScalar *aa=a->a,*pa=p->a,*pA,*paj;
3077   MatScalar       *ca=c->a,*caj,*apa;
3078 
3079   PetscFunctionBegin;
3080   /* Allocate temporary array for storage of one row of A*P */
3081   ierr = PetscCalloc3(cn,&apa,cn,&apj,cn,&apjdense);CHKERRQ(ierr);
3082 
3083   /* Clear old values in C */
3084   ierr = PetscArrayzero(ca,ci[cm]);CHKERRQ(ierr);
3085 
3086   for (i=0; i<am; i++) {
3087     /* Form sparse row of A*P */
3088     anzi  = ai[i+1] - ai[i];
3089     apnzj = 0;
3090     for (j=0; j<anzi; j++) {
3091       /* Get offset within block of P */
3092       pshift = *aj%ppdof;
3093       /* Get block row of P */
3094       prow = *aj++/ppdof;   /* integer division */
3095       pnzj = pi[prow+1] - pi[prow];
3096       pjj  = pj + pi[prow];
3097       paj  = pa + pi[prow];
3098       for (k=0; k<pnzj; k++) {
3099         poffset = pjj[k]*ppdof+pshift;
3100         if (!apjdense[poffset]) {
3101           apjdense[poffset] = -1;
3102           apj[apnzj++]      = poffset;
3103         }
3104         apa[poffset] += (*aa)*paj[k];
3105       }
3106       ierr = PetscLogFlops(2.0*pnzj);CHKERRQ(ierr);
3107       aa++;
3108     }
3109 
3110     /* Sort the j index array for quick sparse axpy. */
3111     /* Note: a array does not need sorting as it is in dense storage locations. */
3112     ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr);
3113 
3114     /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
3115     prow    = i/ppdof; /* integer division */
3116     pshift  = i%ppdof;
3117     poffset = pi[prow];
3118     pnzi    = pi[prow+1] - poffset;
3119     /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */
3120     pJ = pj+poffset;
3121     pA = pa+poffset;
3122     for (j=0; j<pnzi; j++) {
3123       crow = (*pJ)*ppdof+pshift;
3124       cjj  = cj + ci[crow];
3125       caj  = ca + ci[crow];
3126       pJ++;
3127       /* Perform sparse axpy operation.  Note cjj includes apj. */
3128       for (k=0,nextap=0; nextap<apnzj; k++) {
3129         if (cjj[k] == apj[nextap]) caj[k] += (*pA)*apa[apj[nextap++]];
3130       }
3131       ierr = PetscLogFlops(2.0*apnzj);CHKERRQ(ierr);
3132       pA++;
3133     }
3134 
3135     /* Zero the current row info for A*P */
3136     for (j=0; j<apnzj; j++) {
3137       apa[apj[j]]      = 0.;
3138       apjdense[apj[j]] = 0;
3139     }
3140   }
3141 
3142   /* Assemble the final matrix and clean up */
3143   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3144   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3145   ierr = PetscFree3(apa,apj,apjdense);CHKERRQ(ierr);
3146   PetscFunctionReturn(0);
3147 }
3148 
3149 PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ(Mat C)
3150 {
3151   PetscErrorCode      ierr;
3152   Mat_Product         *product = C->product;
3153   Mat                 A=product->A,P=product->B;
3154 
3155   PetscFunctionBegin;
3156   ierr = MatPtAPSymbolic_SeqAIJ_SeqMAIJ(A,P,product->fill,C);CHKERRQ(ierr);
3157   PetscFunctionReturn(0);
3158 }
3159 
3160 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C)
3161 {
3162   PetscFunctionBegin;
3163   SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MatPtAPSymbolic is not implemented for MPIMAIJ matrix yet");
3164   PetscFunctionReturn(0);
3165 }
3166 
3167 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ(Mat A,Mat PP,Mat C)
3168 {
3169   PetscFunctionBegin;
3170   SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MatPtAPNumeric is not implemented for MPIMAIJ matrix yet");
3171   PetscFunctionReturn(0);
3172 }
3173 
3174 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(Mat,Mat,PetscInt,Mat);
3175 
3176 PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce(Mat A,Mat P,Mat C)
3177 {
3178   Mat_MPIMAIJ     *maij = (Mat_MPIMAIJ*)P->data;
3179   PetscErrorCode  ierr;
3180 
3181   PetscFunctionBegin;
3182 
3183   ierr = MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(A,maij->A,maij->dof,C);CHKERRQ(ierr);
3184   PetscFunctionReturn(0);
3185 }
3186 
3187 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(Mat,Mat,PetscInt,PetscReal,Mat);
3188 
3189 PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(Mat A,Mat P,PetscReal fill,Mat C)
3190 {
3191   Mat_MPIMAIJ     *maij = (Mat_MPIMAIJ*)P->data;
3192   PetscErrorCode  ierr;
3193 
3194   PetscFunctionBegin;
3195   ierr = MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(A,maij->A,maij->dof,fill,C);CHKERRQ(ierr);
3196   C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce;
3197   PetscFunctionReturn(0);
3198 }
3199 
3200 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(Mat,Mat,PetscInt,Mat);
3201 
3202 PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A,Mat P,Mat C)
3203 {
3204   Mat_MPIMAIJ     *maij = (Mat_MPIMAIJ*)P->data;
3205   PetscErrorCode  ierr;
3206 
3207   PetscFunctionBegin;
3208 
3209   ierr = MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(A,maij->A,maij->dof,C);CHKERRQ(ierr);
3210   PetscFunctionReturn(0);
3211 }
3212 
3213 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(Mat,Mat,PetscInt,PetscReal,Mat);
3214 
3215 PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A,Mat P,PetscReal fill,Mat C)
3216 {
3217   Mat_MPIMAIJ     *maij = (Mat_MPIMAIJ*)P->data;
3218   PetscErrorCode  ierr;
3219 
3220   PetscFunctionBegin;
3221 
3222   ierr = MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(A,maij->A,maij->dof,fill,C);CHKERRQ(ierr);
3223   C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged;
3224   PetscFunctionReturn(0);
3225 }
3226 
3227 PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ(Mat C)
3228 {
3229   PetscErrorCode      ierr;
3230   Mat_Product         *product = C->product;
3231   Mat                 A=product->A,P=product->B;
3232   PetscBool           flg;
3233 
3234   PetscFunctionBegin;
3235   ierr = PetscStrcmp(product->alg,"allatonce",&flg);CHKERRQ(ierr);
3236   if (flg) {
3237     ierr = MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(A,P,product->fill,C);CHKERRQ(ierr);
3238     C->ops->productnumeric = MatProductNumeric_PtAP;
3239     PetscFunctionReturn(0);
3240   }
3241 
3242   ierr = PetscStrcmp(product->alg,"allatonce_merged",&flg);CHKERRQ(ierr);
3243   if (flg) {
3244     ierr = MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(A,P,product->fill,C);CHKERRQ(ierr);
3245     C->ops->productnumeric = MatProductNumeric_PtAP;
3246     PetscFunctionReturn(0);
3247   }
3248 
3249   SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_SUP,"Mat Product Algorithm is not supported");
3250   PetscFunctionReturn(0);
3251 }
3252 
3253 PETSC_INTERN PetscErrorCode MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
3254 {
3255   Mat_SeqMAIJ    *b   = (Mat_SeqMAIJ*)A->data;
3256   Mat            a    = b->AIJ,B;
3257   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)a->data;
3258   PetscErrorCode ierr;
3259   PetscInt       m,n,i,ncols,*ilen,nmax = 0,*icols,j,k,ii,dof = b->dof;
3260   PetscInt       *cols;
3261   PetscScalar    *vals;
3262 
3263   PetscFunctionBegin;
3264   ierr = MatGetSize(a,&m,&n);CHKERRQ(ierr);
3265   ierr = PetscMalloc1(dof*m,&ilen);CHKERRQ(ierr);
3266   for (i=0; i<m; i++) {
3267     nmax = PetscMax(nmax,aij->ilen[i]);
3268     for (j=0; j<dof; j++) ilen[dof*i+j] = aij->ilen[i];
3269   }
3270   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
3271   ierr = MatSetSizes(B,dof*m,dof*n,dof*m,dof*n);CHKERRQ(ierr);
3272   ierr = MatSetType(B,newtype);CHKERRQ(ierr);
3273   ierr = MatSeqAIJSetPreallocation(B,0,ilen);CHKERRQ(ierr);
3274   ierr = PetscFree(ilen);CHKERRQ(ierr);
3275   ierr = PetscMalloc1(nmax,&icols);CHKERRQ(ierr);
3276   ii   = 0;
3277   for (i=0; i<m; i++) {
3278     ierr = MatGetRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr);
3279     for (j=0; j<dof; j++) {
3280       for (k=0; k<ncols; k++) icols[k] = dof*cols[k]+j;
3281       ierr = MatSetValues_SeqAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr);
3282       ii++;
3283     }
3284     ierr = MatRestoreRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr);
3285   }
3286   ierr = PetscFree(icols);CHKERRQ(ierr);
3287   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3288   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3289 
3290   if (reuse == MAT_INPLACE_MATRIX) {
3291     ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr);
3292   } else {
3293     *newmat = B;
3294   }
3295   PetscFunctionReturn(0);
3296 }
3297 
3298 #include <../src/mat/impls/aij/mpi/mpiaij.h>
3299 
3300 PETSC_INTERN PetscErrorCode MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
3301 {
3302   Mat_MPIMAIJ    *maij   = (Mat_MPIMAIJ*)A->data;
3303   Mat            MatAIJ  = ((Mat_SeqMAIJ*)maij->AIJ->data)->AIJ,B;
3304   Mat            MatOAIJ = ((Mat_SeqMAIJ*)maij->OAIJ->data)->AIJ;
3305   Mat_SeqAIJ     *AIJ    = (Mat_SeqAIJ*) MatAIJ->data;
3306   Mat_SeqAIJ     *OAIJ   =(Mat_SeqAIJ*) MatOAIJ->data;
3307   Mat_MPIAIJ     *mpiaij = (Mat_MPIAIJ*) maij->A->data;
3308   PetscInt       dof     = maij->dof,i,j,*dnz = NULL,*onz = NULL,nmax = 0,onmax = 0;
3309   PetscInt       *oicols = NULL,*icols = NULL,ncols,*cols = NULL,oncols,*ocols = NULL;
3310   PetscInt       rstart,cstart,*garray,ii,k;
3311   PetscErrorCode ierr;
3312   PetscScalar    *vals,*ovals;
3313 
3314   PetscFunctionBegin;
3315   ierr = PetscMalloc2(A->rmap->n,&dnz,A->rmap->n,&onz);CHKERRQ(ierr);
3316   for (i=0; i<A->rmap->n/dof; i++) {
3317     nmax  = PetscMax(nmax,AIJ->ilen[i]);
3318     onmax = PetscMax(onmax,OAIJ->ilen[i]);
3319     for (j=0; j<dof; j++) {
3320       dnz[dof*i+j] = AIJ->ilen[i];
3321       onz[dof*i+j] = OAIJ->ilen[i];
3322     }
3323   }
3324   ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
3325   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
3326   ierr = MatSetType(B,newtype);CHKERRQ(ierr);
3327   ierr = MatMPIAIJSetPreallocation(B,0,dnz,0,onz);CHKERRQ(ierr);
3328   ierr = MatSetBlockSize(B,dof);CHKERRQ(ierr);
3329   ierr = PetscFree2(dnz,onz);CHKERRQ(ierr);
3330 
3331   ierr   = PetscMalloc2(nmax,&icols,onmax,&oicols);CHKERRQ(ierr);
3332   rstart = dof*maij->A->rmap->rstart;
3333   cstart = dof*maij->A->cmap->rstart;
3334   garray = mpiaij->garray;
3335 
3336   ii = rstart;
3337   for (i=0; i<A->rmap->n/dof; i++) {
3338     ierr = MatGetRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr);
3339     ierr = MatGetRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr);
3340     for (j=0; j<dof; j++) {
3341       for (k=0; k<ncols; k++) {
3342         icols[k] = cstart + dof*cols[k]+j;
3343       }
3344       for (k=0; k<oncols; k++) {
3345         oicols[k] = dof*garray[ocols[k]]+j;
3346       }
3347       ierr = MatSetValues_MPIAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr);
3348       ierr = MatSetValues_MPIAIJ(B,1,&ii,oncols,oicols,ovals,INSERT_VALUES);CHKERRQ(ierr);
3349       ii++;
3350     }
3351     ierr = MatRestoreRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr);
3352     ierr = MatRestoreRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr);
3353   }
3354   ierr = PetscFree2(icols,oicols);CHKERRQ(ierr);
3355 
3356   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3357   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3358 
3359   if (reuse == MAT_INPLACE_MATRIX) {
3360     PetscInt refct = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */
3361     ((PetscObject)A)->refct = 1;
3362 
3363     ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr);
3364 
3365     ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */
3366   } else {
3367     *newmat = B;
3368   }
3369   PetscFunctionReturn(0);
3370 }
3371 
3372 PetscErrorCode MatCreateSubMatrix_MAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat)
3373 {
3374   PetscErrorCode ierr;
3375   Mat            A;
3376 
3377   PetscFunctionBegin;
3378   ierr = MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr);
3379   ierr = MatCreateSubMatrix(A,isrow,iscol,cll,newmat);CHKERRQ(ierr);
3380   ierr = MatDestroy(&A);CHKERRQ(ierr);
3381   PetscFunctionReturn(0);
3382 }
3383 
3384 PetscErrorCode MatCreateSubMatrices_MAIJ(Mat mat,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *submat[])
3385 {
3386   PetscErrorCode ierr;
3387   Mat            A;
3388 
3389   PetscFunctionBegin;
3390   ierr = MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr);
3391   ierr = MatCreateSubMatrices(A,n,irow,icol,scall,submat);CHKERRQ(ierr);
3392   ierr = MatDestroy(&A);CHKERRQ(ierr);
3393   PetscFunctionReturn(0);
3394 }
3395 
3396 /* ---------------------------------------------------------------------------------- */
3397 /*@
3398   MatCreateMAIJ - Creates a matrix type providing restriction and interpolation
3399   operations for multicomponent problems.  It interpolates each component the same
3400   way independently.  The matrix type is based on MATSEQAIJ for sequential matrices,
3401   and MATMPIAIJ for distributed matrices.
3402 
3403   Collective
3404 
3405   Input Parameters:
3406 + A - the AIJ matrix describing the action on blocks
3407 - dof - the block size (number of components per node)
3408 
3409   Output Parameter:
3410 . maij - the new MAIJ matrix
3411 
3412   Operations provided:
3413 + MatMult
3414 . MatMultTranspose
3415 . MatMultAdd
3416 . MatMultTransposeAdd
3417 - MatView
3418 
3419   Level: advanced
3420 
3421 .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MATMAIJ
3422 @*/
3423 PetscErrorCode  MatCreateMAIJ(Mat A,PetscInt dof,Mat *maij)
3424 {
3425   PetscErrorCode ierr;
3426   PetscMPIInt    size;
3427   PetscInt       n;
3428   Mat            B;
3429 #if defined(PETSC_HAVE_CUDA)
3430   /* hack to prevent conversion to AIJ format for CUDA when used inside a parallel MAIJ */
3431   PetscBool      convert = dof < 0 ? PETSC_FALSE : PETSC_TRUE;
3432 #endif
3433 
3434   PetscFunctionBegin;
3435   dof  = PetscAbs(dof);
3436   ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
3437 
3438   if (dof == 1) *maij = A;
3439   else {
3440     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
3441     ierr = MatSetSizes(B,dof*A->rmap->n,dof*A->cmap->n,dof*A->rmap->N,dof*A->cmap->N);CHKERRQ(ierr);
3442     ierr = PetscLayoutSetBlockSize(B->rmap,dof);CHKERRQ(ierr);
3443     ierr = PetscLayoutSetBlockSize(B->cmap,dof);CHKERRQ(ierr);
3444     ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3445     ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3446 
3447     B->assembled = PETSC_TRUE;
3448 
3449     ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
3450     if (size == 1) {
3451       Mat_SeqMAIJ *b;
3452 
3453       ierr = MatSetType(B,MATSEQMAIJ);CHKERRQ(ierr);
3454 
3455       B->ops->setup   = NULL;
3456       B->ops->destroy = MatDestroy_SeqMAIJ;
3457       B->ops->view    = MatView_SeqMAIJ;
3458 
3459       b               = (Mat_SeqMAIJ*)B->data;
3460       b->dof          = dof;
3461       b->AIJ          = A;
3462 
3463       if (dof == 2) {
3464         B->ops->mult             = MatMult_SeqMAIJ_2;
3465         B->ops->multadd          = MatMultAdd_SeqMAIJ_2;
3466         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_2;
3467         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2;
3468       } else if (dof == 3) {
3469         B->ops->mult             = MatMult_SeqMAIJ_3;
3470         B->ops->multadd          = MatMultAdd_SeqMAIJ_3;
3471         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_3;
3472         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3;
3473       } else if (dof == 4) {
3474         B->ops->mult             = MatMult_SeqMAIJ_4;
3475         B->ops->multadd          = MatMultAdd_SeqMAIJ_4;
3476         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_4;
3477         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4;
3478       } else if (dof == 5) {
3479         B->ops->mult             = MatMult_SeqMAIJ_5;
3480         B->ops->multadd          = MatMultAdd_SeqMAIJ_5;
3481         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_5;
3482         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5;
3483       } else if (dof == 6) {
3484         B->ops->mult             = MatMult_SeqMAIJ_6;
3485         B->ops->multadd          = MatMultAdd_SeqMAIJ_6;
3486         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_6;
3487         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6;
3488       } else if (dof == 7) {
3489         B->ops->mult             = MatMult_SeqMAIJ_7;
3490         B->ops->multadd          = MatMultAdd_SeqMAIJ_7;
3491         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_7;
3492         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7;
3493       } else if (dof == 8) {
3494         B->ops->mult             = MatMult_SeqMAIJ_8;
3495         B->ops->multadd          = MatMultAdd_SeqMAIJ_8;
3496         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_8;
3497         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8;
3498       } else if (dof == 9) {
3499         B->ops->mult             = MatMult_SeqMAIJ_9;
3500         B->ops->multadd          = MatMultAdd_SeqMAIJ_9;
3501         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_9;
3502         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9;
3503       } else if (dof == 10) {
3504         B->ops->mult             = MatMult_SeqMAIJ_10;
3505         B->ops->multadd          = MatMultAdd_SeqMAIJ_10;
3506         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_10;
3507         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10;
3508       } else if (dof == 11) {
3509         B->ops->mult             = MatMult_SeqMAIJ_11;
3510         B->ops->multadd          = MatMultAdd_SeqMAIJ_11;
3511         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_11;
3512         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_11;
3513       } else if (dof == 16) {
3514         B->ops->mult             = MatMult_SeqMAIJ_16;
3515         B->ops->multadd          = MatMultAdd_SeqMAIJ_16;
3516         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_16;
3517         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16;
3518       } else if (dof == 18) {
3519         B->ops->mult             = MatMult_SeqMAIJ_18;
3520         B->ops->multadd          = MatMultAdd_SeqMAIJ_18;
3521         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_18;
3522         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18;
3523       } else {
3524         B->ops->mult             = MatMult_SeqMAIJ_N;
3525         B->ops->multadd          = MatMultAdd_SeqMAIJ_N;
3526         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_N;
3527         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_N;
3528       }
3529 #if defined(PETSC_HAVE_CUDA)
3530       ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqmaij_seqaijcusparse_C",MatConvert_SeqMAIJ_SeqAIJ);CHKERRQ(ierr);
3531 #endif
3532       ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqmaij_seqaij_C",MatConvert_SeqMAIJ_SeqAIJ);CHKERRQ(ierr);
3533       ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_seqaij_seqmaij_C",MatProductSetFromOptions_SeqAIJ_SeqMAIJ);CHKERRQ(ierr);
3534     } else {
3535       Mat_MPIAIJ  *mpiaij = (Mat_MPIAIJ*)A->data;
3536       Mat_MPIMAIJ *b;
3537       IS          from,to;
3538       Vec         gvec;
3539 
3540       ierr = MatSetType(B,MATMPIMAIJ);CHKERRQ(ierr);
3541 
3542       B->ops->setup            = NULL;
3543       B->ops->destroy          = MatDestroy_MPIMAIJ;
3544       B->ops->view             = MatView_MPIMAIJ;
3545 
3546       b      = (Mat_MPIMAIJ*)B->data;
3547       b->dof = dof;
3548       b->A   = A;
3549 
3550       ierr = MatCreateMAIJ(mpiaij->A,-dof,&b->AIJ);CHKERRQ(ierr);
3551       ierr = MatCreateMAIJ(mpiaij->B,-dof,&b->OAIJ);CHKERRQ(ierr);
3552 
3553       ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr);
3554       ierr = VecCreate(PETSC_COMM_SELF,&b->w);CHKERRQ(ierr);
3555       ierr = VecSetSizes(b->w,n*dof,n*dof);CHKERRQ(ierr);
3556       ierr = VecSetBlockSize(b->w,dof);CHKERRQ(ierr);
3557       ierr = VecSetType(b->w,VECSEQ);CHKERRQ(ierr);
3558 
3559       /* create two temporary Index sets for build scatter gather */
3560       ierr = ISCreateBlock(PetscObjectComm((PetscObject)A),dof,n,mpiaij->garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr);
3561       ierr = ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);CHKERRQ(ierr);
3562 
3563       /* create temporary global vector to generate scatter context */
3564       ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),dof,dof*A->cmap->n,dof*A->cmap->N,NULL,&gvec);CHKERRQ(ierr);
3565 
3566       /* generate the scatter context */
3567       ierr = VecScatterCreate(gvec,from,b->w,to,&b->ctx);CHKERRQ(ierr);
3568 
3569       ierr = ISDestroy(&from);CHKERRQ(ierr);
3570       ierr = ISDestroy(&to);CHKERRQ(ierr);
3571       ierr = VecDestroy(&gvec);CHKERRQ(ierr);
3572 
3573       B->ops->mult             = MatMult_MPIMAIJ_dof;
3574       B->ops->multtranspose    = MatMultTranspose_MPIMAIJ_dof;
3575       B->ops->multadd          = MatMultAdd_MPIMAIJ_dof;
3576       B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof;
3577 
3578 #if defined(PETSC_HAVE_CUDA)
3579       ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpimaij_mpiaijcusparse_C",MatConvert_MPIMAIJ_MPIAIJ);CHKERRQ(ierr);
3580 #endif
3581       ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpimaij_mpiaij_C",MatConvert_MPIMAIJ_MPIAIJ);CHKERRQ(ierr);
3582       ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpimaij_C",MatProductSetFromOptions_MPIAIJ_MPIMAIJ);CHKERRQ(ierr);
3583     }
3584     B->ops->createsubmatrix   = MatCreateSubMatrix_MAIJ;
3585     B->ops->createsubmatrices = MatCreateSubMatrices_MAIJ;
3586     ierr  = MatSetFromOptions(B);CHKERRQ(ierr);
3587     ierr  = MatSetUp(B);CHKERRQ(ierr);
3588 #if defined(PETSC_HAVE_CUDA)
3589     /* temporary until we have CUDA implementation of MAIJ */
3590     {
3591       PetscBool flg;
3592       if (convert) {
3593         ierr = PetscObjectTypeCompareAny((PetscObject)A,&flg,MATSEQAIJCUSPARSE,MATMPIAIJCUSPARSE,MATAIJCUSPARSE,"");CHKERRQ(ierr);
3594         if (flg) {
3595           ierr = MatConvert(B,((PetscObject)A)->type_name,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr);
3596         }
3597       }
3598     }
3599 #endif
3600     *maij = B;
3601     ierr  = MatViewFromOptions(B,NULL,"-mat_view");CHKERRQ(ierr);
3602   }
3603   PetscFunctionReturn(0);
3604 }
3605