xref: /petsc/src/mat/impls/maij/maij.c (revision 40cbb1a031ea8f2be4fe2b92dc842b003ad37be3)
1 
2 /*
3     Defines the basic matrix operations for the MAIJ  matrix storage format.
4   This format is used for restriction and interpolation operations for
5   multicomponent problems. It interpolates each component the same way
6   independently.
7 
8      We provide:
9          MatMult()
10          MatMultTranspose()
11          MatMultTransposeAdd()
12          MatMultAdd()
13           and
14          MatCreateMAIJ(Mat,dof,Mat*)
15 
16      This single directory handles both the sequential and parallel codes
17 */
18 
19 #include <../src/mat/impls/maij/maij.h> /*I "petscmat.h" I*/
20 #include <../src/mat/utils/freespace.h>
21 
22 /*@
23    MatMAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the MAIJ matrix
24 
25    Not Collective, but if the MAIJ matrix is parallel, the AIJ matrix is also parallel
26 
27    Input Parameter:
28 .  A - the MAIJ matrix
29 
30    Output Parameter:
31 .  B - the AIJ matrix
32 
33    Level: advanced
34 
35    Notes:
36     The reference count on the AIJ matrix is not increased so you should not destroy it.
37 
38 .seealso: `MatCreateMAIJ()`
39 @*/
40 PetscErrorCode  MatMAIJGetAIJ(Mat A,Mat *B)
41 {
42   PetscBool      ismpimaij,isseqmaij;
43 
44   PetscFunctionBegin;
45   PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij));
46   PetscCall(PetscObjectTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij));
47   if (ismpimaij) {
48     Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
49 
50     *B = b->A;
51   } else if (isseqmaij) {
52     Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
53 
54     *B = b->AIJ;
55   } else {
56     *B = A;
57   }
58   PetscFunctionReturn(0);
59 }
60 
61 /*@
62    MatMAIJRedimension - Get an MAIJ matrix with the same action, but for a different block size
63 
64    Logically Collective
65 
66    Input Parameters:
67 +  A - the MAIJ matrix
68 -  dof - the block size for the new matrix
69 
70    Output Parameter:
71 .  B - the new MAIJ matrix
72 
73    Level: advanced
74 
75 .seealso: `MatCreateMAIJ()`
76 @*/
77 PetscErrorCode  MatMAIJRedimension(Mat A,PetscInt dof,Mat *B)
78 {
79   Mat            Aij = NULL;
80 
81   PetscFunctionBegin;
82   PetscValidLogicalCollectiveInt(A,dof,2);
83   PetscCall(MatMAIJGetAIJ(A,&Aij));
84   PetscCall(MatCreateMAIJ(Aij,dof,B));
85   PetscFunctionReturn(0);
86 }
87 
88 PetscErrorCode MatDestroy_SeqMAIJ(Mat A)
89 {
90   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
91 
92   PetscFunctionBegin;
93   PetscCall(MatDestroy(&b->AIJ));
94   PetscCall(PetscFree(A->data));
95   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqmaij_seqaijcusparse_C",NULL));
96   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqmaij_seqaij_C",NULL));
97   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatProductSetFromOptions_seqaij_seqmaij_C",NULL));
98   PetscFunctionReturn(0);
99 }
100 
101 PetscErrorCode MatSetUp_MAIJ(Mat A)
102 {
103   PetscFunctionBegin;
104   SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Must use MatCreateMAIJ() to create MAIJ matrices");
105 }
106 
107 PetscErrorCode MatView_SeqMAIJ(Mat A,PetscViewer viewer)
108 {
109   Mat            B;
110 
111   PetscFunctionBegin;
112   PetscCall(MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B));
113   PetscCall(MatView(B,viewer));
114   PetscCall(MatDestroy(&B));
115   PetscFunctionReturn(0);
116 }
117 
118 PetscErrorCode MatView_MPIMAIJ(Mat A,PetscViewer viewer)
119 {
120   Mat            B;
121 
122   PetscFunctionBegin;
123   PetscCall(MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B));
124   PetscCall(MatView(B,viewer));
125   PetscCall(MatDestroy(&B));
126   PetscFunctionReturn(0);
127 }
128 
129 PetscErrorCode MatDestroy_MPIMAIJ(Mat A)
130 {
131   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
132 
133   PetscFunctionBegin;
134   PetscCall(MatDestroy(&b->AIJ));
135   PetscCall(MatDestroy(&b->OAIJ));
136   PetscCall(MatDestroy(&b->A));
137   PetscCall(VecScatterDestroy(&b->ctx));
138   PetscCall(VecDestroy(&b->w));
139   PetscCall(PetscFree(A->data));
140   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatConvert_mpimaij_mpiaijcusparse_C",NULL));
141   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatConvert_mpimaij_mpiaij_C",NULL));
142   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatProductSetFromOptions_mpiaij_mpimaij_C",NULL));
143   PetscCall(PetscObjectChangeTypeName((PetscObject)A,NULL));
144   PetscFunctionReturn(0);
145 }
146 
147 /*MC
148   MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for
149   multicomponent problems, interpolating or restricting each component the same way independently.
150   The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices.
151 
152   Operations provided:
153 .vb
154     MatMult
155     MatMultTranspose
156     MatMultAdd
157     MatMultTransposeAdd
158 .ve
159 
160   Level: advanced
161 
162 .seealso: `MatMAIJGetAIJ()`, `MatMAIJRedimension()`, `MatCreateMAIJ()`
163 M*/
164 
165 PETSC_EXTERN PetscErrorCode MatCreate_MAIJ(Mat A)
166 {
167   Mat_MPIMAIJ    *b;
168   PetscMPIInt    size;
169 
170   PetscFunctionBegin;
171   PetscCall(PetscNewLog(A,&b));
172   A->data  = (void*)b;
173 
174   PetscCall(PetscMemzero(A->ops,sizeof(struct _MatOps)));
175 
176   A->ops->setup = MatSetUp_MAIJ;
177 
178   b->AIJ  = NULL;
179   b->dof  = 0;
180   b->OAIJ = NULL;
181   b->ctx  = NULL;
182   b->w    = NULL;
183   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size));
184   if (size == 1) {
185     PetscCall(PetscObjectChangeTypeName((PetscObject)A,MATSEQMAIJ));
186   } else {
187     PetscCall(PetscObjectChangeTypeName((PetscObject)A,MATMPIMAIJ));
188   }
189   A->preallocated  = PETSC_TRUE;
190   A->assembled     = PETSC_TRUE;
191   PetscFunctionReturn(0);
192 }
193 
194 /* --------------------------------------------------------------------------------------*/
195 PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
196 {
197   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
198   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
199   const PetscScalar *x,*v;
200   PetscScalar       *y, sum1, sum2;
201   PetscInt          nonzerorow=0,n,i,jrow,j;
202   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
203 
204   PetscFunctionBegin;
205   PetscCall(VecGetArrayRead(xx,&x));
206   PetscCall(VecGetArray(yy,&y));
207   idx  = a->j;
208   v    = a->a;
209   ii   = a->i;
210 
211   for (i=0; i<m; i++) {
212     jrow  = ii[i];
213     n     = ii[i+1] - jrow;
214     sum1  = 0.0;
215     sum2  = 0.0;
216 
217     nonzerorow += (n>0);
218     for (j=0; j<n; j++) {
219       sum1 += v[jrow]*x[2*idx[jrow]];
220       sum2 += v[jrow]*x[2*idx[jrow]+1];
221       jrow++;
222     }
223     y[2*i]   = sum1;
224     y[2*i+1] = sum2;
225   }
226 
227   PetscCall(PetscLogFlops(4.0*a->nz - 2.0*nonzerorow));
228   PetscCall(VecRestoreArrayRead(xx,&x));
229   PetscCall(VecRestoreArray(yy,&y));
230   PetscFunctionReturn(0);
231 }
232 
233 PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
234 {
235   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
236   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
237   const PetscScalar *x,*v;
238   PetscScalar       *y,alpha1,alpha2;
239   PetscInt          n,i;
240   const PetscInt    m = b->AIJ->rmap->n,*idx;
241 
242   PetscFunctionBegin;
243   PetscCall(VecSet(yy,0.0));
244   PetscCall(VecGetArrayRead(xx,&x));
245   PetscCall(VecGetArray(yy,&y));
246 
247   for (i=0; i<m; i++) {
248     idx    = a->j + a->i[i];
249     v      = a->a + a->i[i];
250     n      = a->i[i+1] - a->i[i];
251     alpha1 = x[2*i];
252     alpha2 = x[2*i+1];
253     while (n-->0) {
254       y[2*(*idx)]   += alpha1*(*v);
255       y[2*(*idx)+1] += alpha2*(*v);
256       idx++; v++;
257     }
258   }
259   PetscCall(PetscLogFlops(4.0*a->nz));
260   PetscCall(VecRestoreArrayRead(xx,&x));
261   PetscCall(VecRestoreArray(yy,&y));
262   PetscFunctionReturn(0);
263 }
264 
265 PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
266 {
267   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
268   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
269   const PetscScalar *x,*v;
270   PetscScalar       *y,sum1, sum2;
271   PetscInt          n,i,jrow,j;
272   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
273 
274   PetscFunctionBegin;
275   if (yy != zz) PetscCall(VecCopy(yy,zz));
276   PetscCall(VecGetArrayRead(xx,&x));
277   PetscCall(VecGetArray(zz,&y));
278   idx  = a->j;
279   v    = a->a;
280   ii   = a->i;
281 
282   for (i=0; i<m; i++) {
283     jrow = ii[i];
284     n    = ii[i+1] - jrow;
285     sum1 = 0.0;
286     sum2 = 0.0;
287     for (j=0; j<n; j++) {
288       sum1 += v[jrow]*x[2*idx[jrow]];
289       sum2 += v[jrow]*x[2*idx[jrow]+1];
290       jrow++;
291     }
292     y[2*i]   += sum1;
293     y[2*i+1] += sum2;
294   }
295 
296   PetscCall(PetscLogFlops(4.0*a->nz));
297   PetscCall(VecRestoreArrayRead(xx,&x));
298   PetscCall(VecRestoreArray(zz,&y));
299   PetscFunctionReturn(0);
300 }
301 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
302 {
303   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
304   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
305   const PetscScalar *x,*v;
306   PetscScalar       *y,alpha1,alpha2;
307   PetscInt          n,i;
308   const PetscInt    m = b->AIJ->rmap->n,*idx;
309 
310   PetscFunctionBegin;
311   if (yy != zz) PetscCall(VecCopy(yy,zz));
312   PetscCall(VecGetArrayRead(xx,&x));
313   PetscCall(VecGetArray(zz,&y));
314 
315   for (i=0; i<m; i++) {
316     idx    = a->j + a->i[i];
317     v      = a->a + a->i[i];
318     n      = a->i[i+1] - a->i[i];
319     alpha1 = x[2*i];
320     alpha2 = x[2*i+1];
321     while (n-->0) {
322       y[2*(*idx)]   += alpha1*(*v);
323       y[2*(*idx)+1] += alpha2*(*v);
324       idx++; v++;
325     }
326   }
327   PetscCall(PetscLogFlops(4.0*a->nz));
328   PetscCall(VecRestoreArrayRead(xx,&x));
329   PetscCall(VecRestoreArray(zz,&y));
330   PetscFunctionReturn(0);
331 }
332 /* --------------------------------------------------------------------------------------*/
333 PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
334 {
335   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
336   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
337   const PetscScalar *x,*v;
338   PetscScalar       *y,sum1, sum2, sum3;
339   PetscInt          nonzerorow=0,n,i,jrow,j;
340   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
341 
342   PetscFunctionBegin;
343   PetscCall(VecGetArrayRead(xx,&x));
344   PetscCall(VecGetArray(yy,&y));
345   idx  = a->j;
346   v    = a->a;
347   ii   = a->i;
348 
349   for (i=0; i<m; i++) {
350     jrow  = ii[i];
351     n     = ii[i+1] - jrow;
352     sum1  = 0.0;
353     sum2  = 0.0;
354     sum3  = 0.0;
355 
356     nonzerorow += (n>0);
357     for (j=0; j<n; j++) {
358       sum1 += v[jrow]*x[3*idx[jrow]];
359       sum2 += v[jrow]*x[3*idx[jrow]+1];
360       sum3 += v[jrow]*x[3*idx[jrow]+2];
361       jrow++;
362      }
363     y[3*i]   = sum1;
364     y[3*i+1] = sum2;
365     y[3*i+2] = sum3;
366   }
367 
368   PetscCall(PetscLogFlops(6.0*a->nz - 3.0*nonzerorow));
369   PetscCall(VecRestoreArrayRead(xx,&x));
370   PetscCall(VecRestoreArray(yy,&y));
371   PetscFunctionReturn(0);
372 }
373 
374 PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
375 {
376   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
377   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
378   const PetscScalar *x,*v;
379   PetscScalar       *y,alpha1,alpha2,alpha3;
380   PetscInt          n,i;
381   const PetscInt    m = b->AIJ->rmap->n,*idx;
382 
383   PetscFunctionBegin;
384   PetscCall(VecSet(yy,0.0));
385   PetscCall(VecGetArrayRead(xx,&x));
386   PetscCall(VecGetArray(yy,&y));
387 
388   for (i=0; i<m; i++) {
389     idx    = a->j + a->i[i];
390     v      = a->a + a->i[i];
391     n      = a->i[i+1] - a->i[i];
392     alpha1 = x[3*i];
393     alpha2 = x[3*i+1];
394     alpha3 = x[3*i+2];
395     while (n-->0) {
396       y[3*(*idx)]   += alpha1*(*v);
397       y[3*(*idx)+1] += alpha2*(*v);
398       y[3*(*idx)+2] += alpha3*(*v);
399       idx++; v++;
400     }
401   }
402   PetscCall(PetscLogFlops(6.0*a->nz));
403   PetscCall(VecRestoreArrayRead(xx,&x));
404   PetscCall(VecRestoreArray(yy,&y));
405   PetscFunctionReturn(0);
406 }
407 
408 PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
409 {
410   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
411   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
412   const PetscScalar *x,*v;
413   PetscScalar       *y,sum1, sum2, sum3;
414   PetscInt          n,i,jrow,j;
415   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
416 
417   PetscFunctionBegin;
418   if (yy != zz) PetscCall(VecCopy(yy,zz));
419   PetscCall(VecGetArrayRead(xx,&x));
420   PetscCall(VecGetArray(zz,&y));
421   idx  = a->j;
422   v    = a->a;
423   ii   = a->i;
424 
425   for (i=0; i<m; i++) {
426     jrow = ii[i];
427     n    = ii[i+1] - jrow;
428     sum1 = 0.0;
429     sum2 = 0.0;
430     sum3 = 0.0;
431     for (j=0; j<n; j++) {
432       sum1 += v[jrow]*x[3*idx[jrow]];
433       sum2 += v[jrow]*x[3*idx[jrow]+1];
434       sum3 += v[jrow]*x[3*idx[jrow]+2];
435       jrow++;
436     }
437     y[3*i]   += sum1;
438     y[3*i+1] += sum2;
439     y[3*i+2] += sum3;
440   }
441 
442   PetscCall(PetscLogFlops(6.0*a->nz));
443   PetscCall(VecRestoreArrayRead(xx,&x));
444   PetscCall(VecRestoreArray(zz,&y));
445   PetscFunctionReturn(0);
446 }
447 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
448 {
449   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
450   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
451   const PetscScalar *x,*v;
452   PetscScalar       *y,alpha1,alpha2,alpha3;
453   PetscInt          n,i;
454   const PetscInt    m = b->AIJ->rmap->n,*idx;
455 
456   PetscFunctionBegin;
457   if (yy != zz) PetscCall(VecCopy(yy,zz));
458   PetscCall(VecGetArrayRead(xx,&x));
459   PetscCall(VecGetArray(zz,&y));
460   for (i=0; i<m; i++) {
461     idx    = a->j + a->i[i];
462     v      = a->a + a->i[i];
463     n      = a->i[i+1] - a->i[i];
464     alpha1 = x[3*i];
465     alpha2 = x[3*i+1];
466     alpha3 = x[3*i+2];
467     while (n-->0) {
468       y[3*(*idx)]   += alpha1*(*v);
469       y[3*(*idx)+1] += alpha2*(*v);
470       y[3*(*idx)+2] += alpha3*(*v);
471       idx++; v++;
472     }
473   }
474   PetscCall(PetscLogFlops(6.0*a->nz));
475   PetscCall(VecRestoreArrayRead(xx,&x));
476   PetscCall(VecRestoreArray(zz,&y));
477   PetscFunctionReturn(0);
478 }
479 
480 /* ------------------------------------------------------------------------------*/
481 PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
482 {
483   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
484   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
485   const PetscScalar *x,*v;
486   PetscScalar       *y,sum1, sum2, sum3, sum4;
487   PetscInt          nonzerorow=0,n,i,jrow,j;
488   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
489 
490   PetscFunctionBegin;
491   PetscCall(VecGetArrayRead(xx,&x));
492   PetscCall(VecGetArray(yy,&y));
493   idx  = a->j;
494   v    = a->a;
495   ii   = a->i;
496 
497   for (i=0; i<m; i++) {
498     jrow        = ii[i];
499     n           = ii[i+1] - jrow;
500     sum1        = 0.0;
501     sum2        = 0.0;
502     sum3        = 0.0;
503     sum4        = 0.0;
504     nonzerorow += (n>0);
505     for (j=0; j<n; j++) {
506       sum1 += v[jrow]*x[4*idx[jrow]];
507       sum2 += v[jrow]*x[4*idx[jrow]+1];
508       sum3 += v[jrow]*x[4*idx[jrow]+2];
509       sum4 += v[jrow]*x[4*idx[jrow]+3];
510       jrow++;
511     }
512     y[4*i]   = sum1;
513     y[4*i+1] = sum2;
514     y[4*i+2] = sum3;
515     y[4*i+3] = sum4;
516   }
517 
518   PetscCall(PetscLogFlops(8.0*a->nz - 4.0*nonzerorow));
519   PetscCall(VecRestoreArrayRead(xx,&x));
520   PetscCall(VecRestoreArray(yy,&y));
521   PetscFunctionReturn(0);
522 }
523 
524 PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
525 {
526   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
527   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
528   const PetscScalar *x,*v;
529   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4;
530   PetscInt          n,i;
531   const PetscInt    m = b->AIJ->rmap->n,*idx;
532 
533   PetscFunctionBegin;
534   PetscCall(VecSet(yy,0.0));
535   PetscCall(VecGetArrayRead(xx,&x));
536   PetscCall(VecGetArray(yy,&y));
537   for (i=0; i<m; i++) {
538     idx    = a->j + a->i[i];
539     v      = a->a + a->i[i];
540     n      = a->i[i+1] - a->i[i];
541     alpha1 = x[4*i];
542     alpha2 = x[4*i+1];
543     alpha3 = x[4*i+2];
544     alpha4 = x[4*i+3];
545     while (n-->0) {
546       y[4*(*idx)]   += alpha1*(*v);
547       y[4*(*idx)+1] += alpha2*(*v);
548       y[4*(*idx)+2] += alpha3*(*v);
549       y[4*(*idx)+3] += alpha4*(*v);
550       idx++; v++;
551     }
552   }
553   PetscCall(PetscLogFlops(8.0*a->nz));
554   PetscCall(VecRestoreArrayRead(xx,&x));
555   PetscCall(VecRestoreArray(yy,&y));
556   PetscFunctionReturn(0);
557 }
558 
559 PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
560 {
561   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
562   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
563   const PetscScalar *x,*v;
564   PetscScalar       *y,sum1, sum2, sum3, sum4;
565   PetscInt          n,i,jrow,j;
566   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
567 
568   PetscFunctionBegin;
569   if (yy != zz) PetscCall(VecCopy(yy,zz));
570   PetscCall(VecGetArrayRead(xx,&x));
571   PetscCall(VecGetArray(zz,&y));
572   idx  = a->j;
573   v    = a->a;
574   ii   = a->i;
575 
576   for (i=0; i<m; i++) {
577     jrow = ii[i];
578     n    = ii[i+1] - jrow;
579     sum1 = 0.0;
580     sum2 = 0.0;
581     sum3 = 0.0;
582     sum4 = 0.0;
583     for (j=0; j<n; j++) {
584       sum1 += v[jrow]*x[4*idx[jrow]];
585       sum2 += v[jrow]*x[4*idx[jrow]+1];
586       sum3 += v[jrow]*x[4*idx[jrow]+2];
587       sum4 += v[jrow]*x[4*idx[jrow]+3];
588       jrow++;
589     }
590     y[4*i]   += sum1;
591     y[4*i+1] += sum2;
592     y[4*i+2] += sum3;
593     y[4*i+3] += sum4;
594   }
595 
596   PetscCall(PetscLogFlops(8.0*a->nz));
597   PetscCall(VecRestoreArrayRead(xx,&x));
598   PetscCall(VecRestoreArray(zz,&y));
599   PetscFunctionReturn(0);
600 }
601 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
602 {
603   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
604   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
605   const PetscScalar *x,*v;
606   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4;
607   PetscInt          n,i;
608   const PetscInt    m = b->AIJ->rmap->n,*idx;
609 
610   PetscFunctionBegin;
611   if (yy != zz) PetscCall(VecCopy(yy,zz));
612   PetscCall(VecGetArrayRead(xx,&x));
613   PetscCall(VecGetArray(zz,&y));
614 
615   for (i=0; i<m; i++) {
616     idx    = a->j + a->i[i];
617     v      = a->a + a->i[i];
618     n      = a->i[i+1] - a->i[i];
619     alpha1 = x[4*i];
620     alpha2 = x[4*i+1];
621     alpha3 = x[4*i+2];
622     alpha4 = x[4*i+3];
623     while (n-->0) {
624       y[4*(*idx)]   += alpha1*(*v);
625       y[4*(*idx)+1] += alpha2*(*v);
626       y[4*(*idx)+2] += alpha3*(*v);
627       y[4*(*idx)+3] += alpha4*(*v);
628       idx++; v++;
629     }
630   }
631   PetscCall(PetscLogFlops(8.0*a->nz));
632   PetscCall(VecRestoreArrayRead(xx,&x));
633   PetscCall(VecRestoreArray(zz,&y));
634   PetscFunctionReturn(0);
635 }
636 /* ------------------------------------------------------------------------------*/
637 
638 PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
639 {
640   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
641   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
642   const PetscScalar *x,*v;
643   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5;
644   PetscInt          nonzerorow=0,n,i,jrow,j;
645   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
646 
647   PetscFunctionBegin;
648   PetscCall(VecGetArrayRead(xx,&x));
649   PetscCall(VecGetArray(yy,&y));
650   idx  = a->j;
651   v    = a->a;
652   ii   = a->i;
653 
654   for (i=0; i<m; i++) {
655     jrow  = ii[i];
656     n     = ii[i+1] - jrow;
657     sum1  = 0.0;
658     sum2  = 0.0;
659     sum3  = 0.0;
660     sum4  = 0.0;
661     sum5  = 0.0;
662 
663     nonzerorow += (n>0);
664     for (j=0; j<n; j++) {
665       sum1 += v[jrow]*x[5*idx[jrow]];
666       sum2 += v[jrow]*x[5*idx[jrow]+1];
667       sum3 += v[jrow]*x[5*idx[jrow]+2];
668       sum4 += v[jrow]*x[5*idx[jrow]+3];
669       sum5 += v[jrow]*x[5*idx[jrow]+4];
670       jrow++;
671     }
672     y[5*i]   = sum1;
673     y[5*i+1] = sum2;
674     y[5*i+2] = sum3;
675     y[5*i+3] = sum4;
676     y[5*i+4] = sum5;
677   }
678 
679   PetscCall(PetscLogFlops(10.0*a->nz - 5.0*nonzerorow));
680   PetscCall(VecRestoreArrayRead(xx,&x));
681   PetscCall(VecRestoreArray(yy,&y));
682   PetscFunctionReturn(0);
683 }
684 
685 PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
686 {
687   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
688   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
689   const PetscScalar *x,*v;
690   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5;
691   PetscInt          n,i;
692   const PetscInt    m = b->AIJ->rmap->n,*idx;
693 
694   PetscFunctionBegin;
695   PetscCall(VecSet(yy,0.0));
696   PetscCall(VecGetArrayRead(xx,&x));
697   PetscCall(VecGetArray(yy,&y));
698 
699   for (i=0; i<m; i++) {
700     idx    = a->j + a->i[i];
701     v      = a->a + a->i[i];
702     n      = a->i[i+1] - a->i[i];
703     alpha1 = x[5*i];
704     alpha2 = x[5*i+1];
705     alpha3 = x[5*i+2];
706     alpha4 = x[5*i+3];
707     alpha5 = x[5*i+4];
708     while (n-->0) {
709       y[5*(*idx)]   += alpha1*(*v);
710       y[5*(*idx)+1] += alpha2*(*v);
711       y[5*(*idx)+2] += alpha3*(*v);
712       y[5*(*idx)+3] += alpha4*(*v);
713       y[5*(*idx)+4] += alpha5*(*v);
714       idx++; v++;
715     }
716   }
717   PetscCall(PetscLogFlops(10.0*a->nz));
718   PetscCall(VecRestoreArrayRead(xx,&x));
719   PetscCall(VecRestoreArray(yy,&y));
720   PetscFunctionReturn(0);
721 }
722 
723 PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
724 {
725   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
726   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
727   const PetscScalar *x,*v;
728   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5;
729   PetscInt          n,i,jrow,j;
730   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
731 
732   PetscFunctionBegin;
733   if (yy != zz) PetscCall(VecCopy(yy,zz));
734   PetscCall(VecGetArrayRead(xx,&x));
735   PetscCall(VecGetArray(zz,&y));
736   idx  = a->j;
737   v    = a->a;
738   ii   = a->i;
739 
740   for (i=0; i<m; i++) {
741     jrow = ii[i];
742     n    = ii[i+1] - jrow;
743     sum1 = 0.0;
744     sum2 = 0.0;
745     sum3 = 0.0;
746     sum4 = 0.0;
747     sum5 = 0.0;
748     for (j=0; j<n; j++) {
749       sum1 += v[jrow]*x[5*idx[jrow]];
750       sum2 += v[jrow]*x[5*idx[jrow]+1];
751       sum3 += v[jrow]*x[5*idx[jrow]+2];
752       sum4 += v[jrow]*x[5*idx[jrow]+3];
753       sum5 += v[jrow]*x[5*idx[jrow]+4];
754       jrow++;
755     }
756     y[5*i]   += sum1;
757     y[5*i+1] += sum2;
758     y[5*i+2] += sum3;
759     y[5*i+3] += sum4;
760     y[5*i+4] += sum5;
761   }
762 
763   PetscCall(PetscLogFlops(10.0*a->nz));
764   PetscCall(VecRestoreArrayRead(xx,&x));
765   PetscCall(VecRestoreArray(zz,&y));
766   PetscFunctionReturn(0);
767 }
768 
769 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
770 {
771   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
772   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
773   const PetscScalar *x,*v;
774   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5;
775   PetscInt          n,i;
776   const PetscInt    m = b->AIJ->rmap->n,*idx;
777 
778   PetscFunctionBegin;
779   if (yy != zz) PetscCall(VecCopy(yy,zz));
780   PetscCall(VecGetArrayRead(xx,&x));
781   PetscCall(VecGetArray(zz,&y));
782 
783   for (i=0; i<m; i++) {
784     idx    = a->j + a->i[i];
785     v      = a->a + a->i[i];
786     n      = a->i[i+1] - a->i[i];
787     alpha1 = x[5*i];
788     alpha2 = x[5*i+1];
789     alpha3 = x[5*i+2];
790     alpha4 = x[5*i+3];
791     alpha5 = x[5*i+4];
792     while (n-->0) {
793       y[5*(*idx)]   += alpha1*(*v);
794       y[5*(*idx)+1] += alpha2*(*v);
795       y[5*(*idx)+2] += alpha3*(*v);
796       y[5*(*idx)+3] += alpha4*(*v);
797       y[5*(*idx)+4] += alpha5*(*v);
798       idx++; v++;
799     }
800   }
801   PetscCall(PetscLogFlops(10.0*a->nz));
802   PetscCall(VecRestoreArrayRead(xx,&x));
803   PetscCall(VecRestoreArray(zz,&y));
804   PetscFunctionReturn(0);
805 }
806 
807 /* ------------------------------------------------------------------------------*/
808 PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
809 {
810   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
811   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
812   const PetscScalar *x,*v;
813   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6;
814   PetscInt          nonzerorow=0,n,i,jrow,j;
815   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
816 
817   PetscFunctionBegin;
818   PetscCall(VecGetArrayRead(xx,&x));
819   PetscCall(VecGetArray(yy,&y));
820   idx  = a->j;
821   v    = a->a;
822   ii   = a->i;
823 
824   for (i=0; i<m; i++) {
825     jrow  = ii[i];
826     n     = ii[i+1] - jrow;
827     sum1  = 0.0;
828     sum2  = 0.0;
829     sum3  = 0.0;
830     sum4  = 0.0;
831     sum5  = 0.0;
832     sum6  = 0.0;
833 
834     nonzerorow += (n>0);
835     for (j=0; j<n; j++) {
836       sum1 += v[jrow]*x[6*idx[jrow]];
837       sum2 += v[jrow]*x[6*idx[jrow]+1];
838       sum3 += v[jrow]*x[6*idx[jrow]+2];
839       sum4 += v[jrow]*x[6*idx[jrow]+3];
840       sum5 += v[jrow]*x[6*idx[jrow]+4];
841       sum6 += v[jrow]*x[6*idx[jrow]+5];
842       jrow++;
843     }
844     y[6*i]   = sum1;
845     y[6*i+1] = sum2;
846     y[6*i+2] = sum3;
847     y[6*i+3] = sum4;
848     y[6*i+4] = sum5;
849     y[6*i+5] = sum6;
850   }
851 
852   PetscCall(PetscLogFlops(12.0*a->nz - 6.0*nonzerorow));
853   PetscCall(VecRestoreArrayRead(xx,&x));
854   PetscCall(VecRestoreArray(yy,&y));
855   PetscFunctionReturn(0);
856 }
857 
858 PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
859 {
860   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
861   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
862   const PetscScalar *x,*v;
863   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
864   PetscInt          n,i;
865   const PetscInt    m = b->AIJ->rmap->n,*idx;
866 
867   PetscFunctionBegin;
868   PetscCall(VecSet(yy,0.0));
869   PetscCall(VecGetArrayRead(xx,&x));
870   PetscCall(VecGetArray(yy,&y));
871 
872   for (i=0; i<m; i++) {
873     idx    = a->j + a->i[i];
874     v      = a->a + a->i[i];
875     n      = a->i[i+1] - a->i[i];
876     alpha1 = x[6*i];
877     alpha2 = x[6*i+1];
878     alpha3 = x[6*i+2];
879     alpha4 = x[6*i+3];
880     alpha5 = x[6*i+4];
881     alpha6 = x[6*i+5];
882     while (n-->0) {
883       y[6*(*idx)]   += alpha1*(*v);
884       y[6*(*idx)+1] += alpha2*(*v);
885       y[6*(*idx)+2] += alpha3*(*v);
886       y[6*(*idx)+3] += alpha4*(*v);
887       y[6*(*idx)+4] += alpha5*(*v);
888       y[6*(*idx)+5] += alpha6*(*v);
889       idx++; v++;
890     }
891   }
892   PetscCall(PetscLogFlops(12.0*a->nz));
893   PetscCall(VecRestoreArrayRead(xx,&x));
894   PetscCall(VecRestoreArray(yy,&y));
895   PetscFunctionReturn(0);
896 }
897 
898 PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
899 {
900   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
901   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
902   const PetscScalar *x,*v;
903   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6;
904   PetscInt          n,i,jrow,j;
905   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
906 
907   PetscFunctionBegin;
908   if (yy != zz) PetscCall(VecCopy(yy,zz));
909   PetscCall(VecGetArrayRead(xx,&x));
910   PetscCall(VecGetArray(zz,&y));
911   idx  = a->j;
912   v    = a->a;
913   ii   = a->i;
914 
915   for (i=0; i<m; i++) {
916     jrow = ii[i];
917     n    = ii[i+1] - jrow;
918     sum1 = 0.0;
919     sum2 = 0.0;
920     sum3 = 0.0;
921     sum4 = 0.0;
922     sum5 = 0.0;
923     sum6 = 0.0;
924     for (j=0; j<n; j++) {
925       sum1 += v[jrow]*x[6*idx[jrow]];
926       sum2 += v[jrow]*x[6*idx[jrow]+1];
927       sum3 += v[jrow]*x[6*idx[jrow]+2];
928       sum4 += v[jrow]*x[6*idx[jrow]+3];
929       sum5 += v[jrow]*x[6*idx[jrow]+4];
930       sum6 += v[jrow]*x[6*idx[jrow]+5];
931       jrow++;
932     }
933     y[6*i]   += sum1;
934     y[6*i+1] += sum2;
935     y[6*i+2] += sum3;
936     y[6*i+3] += sum4;
937     y[6*i+4] += sum5;
938     y[6*i+5] += sum6;
939   }
940 
941   PetscCall(PetscLogFlops(12.0*a->nz));
942   PetscCall(VecRestoreArrayRead(xx,&x));
943   PetscCall(VecRestoreArray(zz,&y));
944   PetscFunctionReturn(0);
945 }
946 
947 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
948 {
949   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
950   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
951   const PetscScalar *x,*v;
952   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
953   PetscInt          n,i;
954   const PetscInt    m = b->AIJ->rmap->n,*idx;
955 
956   PetscFunctionBegin;
957   if (yy != zz) PetscCall(VecCopy(yy,zz));
958   PetscCall(VecGetArrayRead(xx,&x));
959   PetscCall(VecGetArray(zz,&y));
960 
961   for (i=0; i<m; i++) {
962     idx    = a->j + a->i[i];
963     v      = a->a + a->i[i];
964     n      = a->i[i+1] - a->i[i];
965     alpha1 = x[6*i];
966     alpha2 = x[6*i+1];
967     alpha3 = x[6*i+2];
968     alpha4 = x[6*i+3];
969     alpha5 = x[6*i+4];
970     alpha6 = x[6*i+5];
971     while (n-->0) {
972       y[6*(*idx)]   += alpha1*(*v);
973       y[6*(*idx)+1] += alpha2*(*v);
974       y[6*(*idx)+2] += alpha3*(*v);
975       y[6*(*idx)+3] += alpha4*(*v);
976       y[6*(*idx)+4] += alpha5*(*v);
977       y[6*(*idx)+5] += alpha6*(*v);
978       idx++; v++;
979     }
980   }
981   PetscCall(PetscLogFlops(12.0*a->nz));
982   PetscCall(VecRestoreArrayRead(xx,&x));
983   PetscCall(VecRestoreArray(zz,&y));
984   PetscFunctionReturn(0);
985 }
986 
987 /* ------------------------------------------------------------------------------*/
988 PetscErrorCode MatMult_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
989 {
990   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
991   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
992   const PetscScalar *x,*v;
993   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
994   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
995   PetscInt          nonzerorow=0,n,i,jrow,j;
996 
997   PetscFunctionBegin;
998   PetscCall(VecGetArrayRead(xx,&x));
999   PetscCall(VecGetArray(yy,&y));
1000   idx  = a->j;
1001   v    = a->a;
1002   ii   = a->i;
1003 
1004   for (i=0; i<m; i++) {
1005     jrow  = ii[i];
1006     n     = ii[i+1] - jrow;
1007     sum1  = 0.0;
1008     sum2  = 0.0;
1009     sum3  = 0.0;
1010     sum4  = 0.0;
1011     sum5  = 0.0;
1012     sum6  = 0.0;
1013     sum7  = 0.0;
1014 
1015     nonzerorow += (n>0);
1016     for (j=0; j<n; j++) {
1017       sum1 += v[jrow]*x[7*idx[jrow]];
1018       sum2 += v[jrow]*x[7*idx[jrow]+1];
1019       sum3 += v[jrow]*x[7*idx[jrow]+2];
1020       sum4 += v[jrow]*x[7*idx[jrow]+3];
1021       sum5 += v[jrow]*x[7*idx[jrow]+4];
1022       sum6 += v[jrow]*x[7*idx[jrow]+5];
1023       sum7 += v[jrow]*x[7*idx[jrow]+6];
1024       jrow++;
1025     }
1026     y[7*i]   = sum1;
1027     y[7*i+1] = sum2;
1028     y[7*i+2] = sum3;
1029     y[7*i+3] = sum4;
1030     y[7*i+4] = sum5;
1031     y[7*i+5] = sum6;
1032     y[7*i+6] = sum7;
1033   }
1034 
1035   PetscCall(PetscLogFlops(14.0*a->nz - 7.0*nonzerorow));
1036   PetscCall(VecRestoreArrayRead(xx,&x));
1037   PetscCall(VecRestoreArray(yy,&y));
1038   PetscFunctionReturn(0);
1039 }
1040 
1041 PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
1042 {
1043   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1044   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1045   const PetscScalar *x,*v;
1046   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1047   const PetscInt    m = b->AIJ->rmap->n,*idx;
1048   PetscInt          n,i;
1049 
1050   PetscFunctionBegin;
1051   PetscCall(VecSet(yy,0.0));
1052   PetscCall(VecGetArrayRead(xx,&x));
1053   PetscCall(VecGetArray(yy,&y));
1054 
1055   for (i=0; i<m; i++) {
1056     idx    = a->j + a->i[i];
1057     v      = a->a + a->i[i];
1058     n      = a->i[i+1] - a->i[i];
1059     alpha1 = x[7*i];
1060     alpha2 = x[7*i+1];
1061     alpha3 = x[7*i+2];
1062     alpha4 = x[7*i+3];
1063     alpha5 = x[7*i+4];
1064     alpha6 = x[7*i+5];
1065     alpha7 = x[7*i+6];
1066     while (n-->0) {
1067       y[7*(*idx)]   += alpha1*(*v);
1068       y[7*(*idx)+1] += alpha2*(*v);
1069       y[7*(*idx)+2] += alpha3*(*v);
1070       y[7*(*idx)+3] += alpha4*(*v);
1071       y[7*(*idx)+4] += alpha5*(*v);
1072       y[7*(*idx)+5] += alpha6*(*v);
1073       y[7*(*idx)+6] += alpha7*(*v);
1074       idx++; v++;
1075     }
1076   }
1077   PetscCall(PetscLogFlops(14.0*a->nz));
1078   PetscCall(VecRestoreArrayRead(xx,&x));
1079   PetscCall(VecRestoreArray(yy,&y));
1080   PetscFunctionReturn(0);
1081 }
1082 
1083 PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1084 {
1085   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1086   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1087   const PetscScalar *x,*v;
1088   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1089   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1090   PetscInt          n,i,jrow,j;
1091 
1092   PetscFunctionBegin;
1093   if (yy != zz) PetscCall(VecCopy(yy,zz));
1094   PetscCall(VecGetArrayRead(xx,&x));
1095   PetscCall(VecGetArray(zz,&y));
1096   idx  = a->j;
1097   v    = a->a;
1098   ii   = a->i;
1099 
1100   for (i=0; i<m; i++) {
1101     jrow = ii[i];
1102     n    = ii[i+1] - jrow;
1103     sum1 = 0.0;
1104     sum2 = 0.0;
1105     sum3 = 0.0;
1106     sum4 = 0.0;
1107     sum5 = 0.0;
1108     sum6 = 0.0;
1109     sum7 = 0.0;
1110     for (j=0; j<n; j++) {
1111       sum1 += v[jrow]*x[7*idx[jrow]];
1112       sum2 += v[jrow]*x[7*idx[jrow]+1];
1113       sum3 += v[jrow]*x[7*idx[jrow]+2];
1114       sum4 += v[jrow]*x[7*idx[jrow]+3];
1115       sum5 += v[jrow]*x[7*idx[jrow]+4];
1116       sum6 += v[jrow]*x[7*idx[jrow]+5];
1117       sum7 += v[jrow]*x[7*idx[jrow]+6];
1118       jrow++;
1119     }
1120     y[7*i]   += sum1;
1121     y[7*i+1] += sum2;
1122     y[7*i+2] += sum3;
1123     y[7*i+3] += sum4;
1124     y[7*i+4] += sum5;
1125     y[7*i+5] += sum6;
1126     y[7*i+6] += sum7;
1127   }
1128 
1129   PetscCall(PetscLogFlops(14.0*a->nz));
1130   PetscCall(VecRestoreArrayRead(xx,&x));
1131   PetscCall(VecRestoreArray(zz,&y));
1132   PetscFunctionReturn(0);
1133 }
1134 
1135 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1136 {
1137   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1138   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1139   const PetscScalar *x,*v;
1140   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1141   const PetscInt    m = b->AIJ->rmap->n,*idx;
1142   PetscInt          n,i;
1143 
1144   PetscFunctionBegin;
1145   if (yy != zz) PetscCall(VecCopy(yy,zz));
1146   PetscCall(VecGetArrayRead(xx,&x));
1147   PetscCall(VecGetArray(zz,&y));
1148   for (i=0; i<m; i++) {
1149     idx    = a->j + a->i[i];
1150     v      = a->a + a->i[i];
1151     n      = a->i[i+1] - a->i[i];
1152     alpha1 = x[7*i];
1153     alpha2 = x[7*i+1];
1154     alpha3 = x[7*i+2];
1155     alpha4 = x[7*i+3];
1156     alpha5 = x[7*i+4];
1157     alpha6 = x[7*i+5];
1158     alpha7 = x[7*i+6];
1159     while (n-->0) {
1160       y[7*(*idx)]   += alpha1*(*v);
1161       y[7*(*idx)+1] += alpha2*(*v);
1162       y[7*(*idx)+2] += alpha3*(*v);
1163       y[7*(*idx)+3] += alpha4*(*v);
1164       y[7*(*idx)+4] += alpha5*(*v);
1165       y[7*(*idx)+5] += alpha6*(*v);
1166       y[7*(*idx)+6] += alpha7*(*v);
1167       idx++; v++;
1168     }
1169   }
1170   PetscCall(PetscLogFlops(14.0*a->nz));
1171   PetscCall(VecRestoreArrayRead(xx,&x));
1172   PetscCall(VecRestoreArray(zz,&y));
1173   PetscFunctionReturn(0);
1174 }
1175 
1176 PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1177 {
1178   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1179   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1180   const PetscScalar *x,*v;
1181   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1182   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1183   PetscInt          nonzerorow=0,n,i,jrow,j;
1184 
1185   PetscFunctionBegin;
1186   PetscCall(VecGetArrayRead(xx,&x));
1187   PetscCall(VecGetArray(yy,&y));
1188   idx  = a->j;
1189   v    = a->a;
1190   ii   = a->i;
1191 
1192   for (i=0; i<m; i++) {
1193     jrow  = ii[i];
1194     n     = ii[i+1] - jrow;
1195     sum1  = 0.0;
1196     sum2  = 0.0;
1197     sum3  = 0.0;
1198     sum4  = 0.0;
1199     sum5  = 0.0;
1200     sum6  = 0.0;
1201     sum7  = 0.0;
1202     sum8  = 0.0;
1203 
1204     nonzerorow += (n>0);
1205     for (j=0; j<n; j++) {
1206       sum1 += v[jrow]*x[8*idx[jrow]];
1207       sum2 += v[jrow]*x[8*idx[jrow]+1];
1208       sum3 += v[jrow]*x[8*idx[jrow]+2];
1209       sum4 += v[jrow]*x[8*idx[jrow]+3];
1210       sum5 += v[jrow]*x[8*idx[jrow]+4];
1211       sum6 += v[jrow]*x[8*idx[jrow]+5];
1212       sum7 += v[jrow]*x[8*idx[jrow]+6];
1213       sum8 += v[jrow]*x[8*idx[jrow]+7];
1214       jrow++;
1215     }
1216     y[8*i]   = sum1;
1217     y[8*i+1] = sum2;
1218     y[8*i+2] = sum3;
1219     y[8*i+3] = sum4;
1220     y[8*i+4] = sum5;
1221     y[8*i+5] = sum6;
1222     y[8*i+6] = sum7;
1223     y[8*i+7] = sum8;
1224   }
1225 
1226   PetscCall(PetscLogFlops(16.0*a->nz - 8.0*nonzerorow));
1227   PetscCall(VecRestoreArrayRead(xx,&x));
1228   PetscCall(VecRestoreArray(yy,&y));
1229   PetscFunctionReturn(0);
1230 }
1231 
1232 PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1233 {
1234   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1235   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1236   const PetscScalar *x,*v;
1237   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1238   const PetscInt    m = b->AIJ->rmap->n,*idx;
1239   PetscInt          n,i;
1240 
1241   PetscFunctionBegin;
1242   PetscCall(VecSet(yy,0.0));
1243   PetscCall(VecGetArrayRead(xx,&x));
1244   PetscCall(VecGetArray(yy,&y));
1245 
1246   for (i=0; i<m; i++) {
1247     idx    = a->j + a->i[i];
1248     v      = a->a + a->i[i];
1249     n      = a->i[i+1] - a->i[i];
1250     alpha1 = x[8*i];
1251     alpha2 = x[8*i+1];
1252     alpha3 = x[8*i+2];
1253     alpha4 = x[8*i+3];
1254     alpha5 = x[8*i+4];
1255     alpha6 = x[8*i+5];
1256     alpha7 = x[8*i+6];
1257     alpha8 = x[8*i+7];
1258     while (n-->0) {
1259       y[8*(*idx)]   += alpha1*(*v);
1260       y[8*(*idx)+1] += alpha2*(*v);
1261       y[8*(*idx)+2] += alpha3*(*v);
1262       y[8*(*idx)+3] += alpha4*(*v);
1263       y[8*(*idx)+4] += alpha5*(*v);
1264       y[8*(*idx)+5] += alpha6*(*v);
1265       y[8*(*idx)+6] += alpha7*(*v);
1266       y[8*(*idx)+7] += alpha8*(*v);
1267       idx++; v++;
1268     }
1269   }
1270   PetscCall(PetscLogFlops(16.0*a->nz));
1271   PetscCall(VecRestoreArrayRead(xx,&x));
1272   PetscCall(VecRestoreArray(yy,&y));
1273   PetscFunctionReturn(0);
1274 }
1275 
1276 PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1277 {
1278   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1279   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1280   const PetscScalar *x,*v;
1281   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1282   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1283   PetscInt          n,i,jrow,j;
1284 
1285   PetscFunctionBegin;
1286   if (yy != zz) PetscCall(VecCopy(yy,zz));
1287   PetscCall(VecGetArrayRead(xx,&x));
1288   PetscCall(VecGetArray(zz,&y));
1289   idx  = a->j;
1290   v    = a->a;
1291   ii   = a->i;
1292 
1293   for (i=0; i<m; i++) {
1294     jrow = ii[i];
1295     n    = ii[i+1] - jrow;
1296     sum1 = 0.0;
1297     sum2 = 0.0;
1298     sum3 = 0.0;
1299     sum4 = 0.0;
1300     sum5 = 0.0;
1301     sum6 = 0.0;
1302     sum7 = 0.0;
1303     sum8 = 0.0;
1304     for (j=0; j<n; j++) {
1305       sum1 += v[jrow]*x[8*idx[jrow]];
1306       sum2 += v[jrow]*x[8*idx[jrow]+1];
1307       sum3 += v[jrow]*x[8*idx[jrow]+2];
1308       sum4 += v[jrow]*x[8*idx[jrow]+3];
1309       sum5 += v[jrow]*x[8*idx[jrow]+4];
1310       sum6 += v[jrow]*x[8*idx[jrow]+5];
1311       sum7 += v[jrow]*x[8*idx[jrow]+6];
1312       sum8 += v[jrow]*x[8*idx[jrow]+7];
1313       jrow++;
1314     }
1315     y[8*i]   += sum1;
1316     y[8*i+1] += sum2;
1317     y[8*i+2] += sum3;
1318     y[8*i+3] += sum4;
1319     y[8*i+4] += sum5;
1320     y[8*i+5] += sum6;
1321     y[8*i+6] += sum7;
1322     y[8*i+7] += sum8;
1323   }
1324 
1325   PetscCall(PetscLogFlops(16.0*a->nz));
1326   PetscCall(VecRestoreArrayRead(xx,&x));
1327   PetscCall(VecRestoreArray(zz,&y));
1328   PetscFunctionReturn(0);
1329 }
1330 
1331 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1332 {
1333   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1334   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1335   const PetscScalar *x,*v;
1336   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1337   const PetscInt    m = b->AIJ->rmap->n,*idx;
1338   PetscInt          n,i;
1339 
1340   PetscFunctionBegin;
1341   if (yy != zz) PetscCall(VecCopy(yy,zz));
1342   PetscCall(VecGetArrayRead(xx,&x));
1343   PetscCall(VecGetArray(zz,&y));
1344   for (i=0; i<m; i++) {
1345     idx    = a->j + a->i[i];
1346     v      = a->a + a->i[i];
1347     n      = a->i[i+1] - a->i[i];
1348     alpha1 = x[8*i];
1349     alpha2 = x[8*i+1];
1350     alpha3 = x[8*i+2];
1351     alpha4 = x[8*i+3];
1352     alpha5 = x[8*i+4];
1353     alpha6 = x[8*i+5];
1354     alpha7 = x[8*i+6];
1355     alpha8 = x[8*i+7];
1356     while (n-->0) {
1357       y[8*(*idx)]   += alpha1*(*v);
1358       y[8*(*idx)+1] += alpha2*(*v);
1359       y[8*(*idx)+2] += alpha3*(*v);
1360       y[8*(*idx)+3] += alpha4*(*v);
1361       y[8*(*idx)+4] += alpha5*(*v);
1362       y[8*(*idx)+5] += alpha6*(*v);
1363       y[8*(*idx)+6] += alpha7*(*v);
1364       y[8*(*idx)+7] += alpha8*(*v);
1365       idx++; v++;
1366     }
1367   }
1368   PetscCall(PetscLogFlops(16.0*a->nz));
1369   PetscCall(VecRestoreArrayRead(xx,&x));
1370   PetscCall(VecRestoreArray(zz,&y));
1371   PetscFunctionReturn(0);
1372 }
1373 
1374 /* ------------------------------------------------------------------------------*/
1375 PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1376 {
1377   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1378   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1379   const PetscScalar *x,*v;
1380   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1381   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1382   PetscInt          nonzerorow=0,n,i,jrow,j;
1383 
1384   PetscFunctionBegin;
1385   PetscCall(VecGetArrayRead(xx,&x));
1386   PetscCall(VecGetArray(yy,&y));
1387   idx  = a->j;
1388   v    = a->a;
1389   ii   = a->i;
1390 
1391   for (i=0; i<m; i++) {
1392     jrow  = ii[i];
1393     n     = ii[i+1] - jrow;
1394     sum1  = 0.0;
1395     sum2  = 0.0;
1396     sum3  = 0.0;
1397     sum4  = 0.0;
1398     sum5  = 0.0;
1399     sum6  = 0.0;
1400     sum7  = 0.0;
1401     sum8  = 0.0;
1402     sum9  = 0.0;
1403 
1404     nonzerorow += (n>0);
1405     for (j=0; j<n; j++) {
1406       sum1 += v[jrow]*x[9*idx[jrow]];
1407       sum2 += v[jrow]*x[9*idx[jrow]+1];
1408       sum3 += v[jrow]*x[9*idx[jrow]+2];
1409       sum4 += v[jrow]*x[9*idx[jrow]+3];
1410       sum5 += v[jrow]*x[9*idx[jrow]+4];
1411       sum6 += v[jrow]*x[9*idx[jrow]+5];
1412       sum7 += v[jrow]*x[9*idx[jrow]+6];
1413       sum8 += v[jrow]*x[9*idx[jrow]+7];
1414       sum9 += v[jrow]*x[9*idx[jrow]+8];
1415       jrow++;
1416     }
1417     y[9*i]   = sum1;
1418     y[9*i+1] = sum2;
1419     y[9*i+2] = sum3;
1420     y[9*i+3] = sum4;
1421     y[9*i+4] = sum5;
1422     y[9*i+5] = sum6;
1423     y[9*i+6] = sum7;
1424     y[9*i+7] = sum8;
1425     y[9*i+8] = sum9;
1426   }
1427 
1428   PetscCall(PetscLogFlops(18.0*a->nz - 9*nonzerorow));
1429   PetscCall(VecRestoreArrayRead(xx,&x));
1430   PetscCall(VecRestoreArray(yy,&y));
1431   PetscFunctionReturn(0);
1432 }
1433 
1434 /* ------------------------------------------------------------------------------*/
1435 
1436 PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1437 {
1438   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1439   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1440   const PetscScalar *x,*v;
1441   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1442   const PetscInt    m = b->AIJ->rmap->n,*idx;
1443   PetscInt          n,i;
1444 
1445   PetscFunctionBegin;
1446   PetscCall(VecSet(yy,0.0));
1447   PetscCall(VecGetArrayRead(xx,&x));
1448   PetscCall(VecGetArray(yy,&y));
1449 
1450   for (i=0; i<m; i++) {
1451     idx    = a->j + a->i[i];
1452     v      = a->a + a->i[i];
1453     n      = a->i[i+1] - a->i[i];
1454     alpha1 = x[9*i];
1455     alpha2 = x[9*i+1];
1456     alpha3 = x[9*i+2];
1457     alpha4 = x[9*i+3];
1458     alpha5 = x[9*i+4];
1459     alpha6 = x[9*i+5];
1460     alpha7 = x[9*i+6];
1461     alpha8 = x[9*i+7];
1462     alpha9 = x[9*i+8];
1463     while (n-->0) {
1464       y[9*(*idx)]   += alpha1*(*v);
1465       y[9*(*idx)+1] += alpha2*(*v);
1466       y[9*(*idx)+2] += alpha3*(*v);
1467       y[9*(*idx)+3] += alpha4*(*v);
1468       y[9*(*idx)+4] += alpha5*(*v);
1469       y[9*(*idx)+5] += alpha6*(*v);
1470       y[9*(*idx)+6] += alpha7*(*v);
1471       y[9*(*idx)+7] += alpha8*(*v);
1472       y[9*(*idx)+8] += alpha9*(*v);
1473       idx++; v++;
1474     }
1475   }
1476   PetscCall(PetscLogFlops(18.0*a->nz));
1477   PetscCall(VecRestoreArrayRead(xx,&x));
1478   PetscCall(VecRestoreArray(yy,&y));
1479   PetscFunctionReturn(0);
1480 }
1481 
1482 PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1483 {
1484   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1485   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1486   const PetscScalar *x,*v;
1487   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1488   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1489   PetscInt          n,i,jrow,j;
1490 
1491   PetscFunctionBegin;
1492   if (yy != zz) PetscCall(VecCopy(yy,zz));
1493   PetscCall(VecGetArrayRead(xx,&x));
1494   PetscCall(VecGetArray(zz,&y));
1495   idx  = a->j;
1496   v    = a->a;
1497   ii   = a->i;
1498 
1499   for (i=0; i<m; i++) {
1500     jrow = ii[i];
1501     n    = ii[i+1] - jrow;
1502     sum1 = 0.0;
1503     sum2 = 0.0;
1504     sum3 = 0.0;
1505     sum4 = 0.0;
1506     sum5 = 0.0;
1507     sum6 = 0.0;
1508     sum7 = 0.0;
1509     sum8 = 0.0;
1510     sum9 = 0.0;
1511     for (j=0; j<n; j++) {
1512       sum1 += v[jrow]*x[9*idx[jrow]];
1513       sum2 += v[jrow]*x[9*idx[jrow]+1];
1514       sum3 += v[jrow]*x[9*idx[jrow]+2];
1515       sum4 += v[jrow]*x[9*idx[jrow]+3];
1516       sum5 += v[jrow]*x[9*idx[jrow]+4];
1517       sum6 += v[jrow]*x[9*idx[jrow]+5];
1518       sum7 += v[jrow]*x[9*idx[jrow]+6];
1519       sum8 += v[jrow]*x[9*idx[jrow]+7];
1520       sum9 += v[jrow]*x[9*idx[jrow]+8];
1521       jrow++;
1522     }
1523     y[9*i]   += sum1;
1524     y[9*i+1] += sum2;
1525     y[9*i+2] += sum3;
1526     y[9*i+3] += sum4;
1527     y[9*i+4] += sum5;
1528     y[9*i+5] += sum6;
1529     y[9*i+6] += sum7;
1530     y[9*i+7] += sum8;
1531     y[9*i+8] += sum9;
1532   }
1533 
1534   PetscCall(PetscLogFlops(18.0*a->nz));
1535   PetscCall(VecRestoreArrayRead(xx,&x));
1536   PetscCall(VecRestoreArray(zz,&y));
1537   PetscFunctionReturn(0);
1538 }
1539 
1540 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1541 {
1542   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1543   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1544   const PetscScalar *x,*v;
1545   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1546   const PetscInt    m = b->AIJ->rmap->n,*idx;
1547   PetscInt          n,i;
1548 
1549   PetscFunctionBegin;
1550   if (yy != zz) PetscCall(VecCopy(yy,zz));
1551   PetscCall(VecGetArrayRead(xx,&x));
1552   PetscCall(VecGetArray(zz,&y));
1553   for (i=0; i<m; i++) {
1554     idx    = a->j + a->i[i];
1555     v      = a->a + a->i[i];
1556     n      = a->i[i+1] - a->i[i];
1557     alpha1 = x[9*i];
1558     alpha2 = x[9*i+1];
1559     alpha3 = x[9*i+2];
1560     alpha4 = x[9*i+3];
1561     alpha5 = x[9*i+4];
1562     alpha6 = x[9*i+5];
1563     alpha7 = x[9*i+6];
1564     alpha8 = x[9*i+7];
1565     alpha9 = x[9*i+8];
1566     while (n-->0) {
1567       y[9*(*idx)]   += alpha1*(*v);
1568       y[9*(*idx)+1] += alpha2*(*v);
1569       y[9*(*idx)+2] += alpha3*(*v);
1570       y[9*(*idx)+3] += alpha4*(*v);
1571       y[9*(*idx)+4] += alpha5*(*v);
1572       y[9*(*idx)+5] += alpha6*(*v);
1573       y[9*(*idx)+6] += alpha7*(*v);
1574       y[9*(*idx)+7] += alpha8*(*v);
1575       y[9*(*idx)+8] += alpha9*(*v);
1576       idx++; v++;
1577     }
1578   }
1579   PetscCall(PetscLogFlops(18.0*a->nz));
1580   PetscCall(VecRestoreArrayRead(xx,&x));
1581   PetscCall(VecRestoreArray(zz,&y));
1582   PetscFunctionReturn(0);
1583 }
1584 PetscErrorCode MatMult_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1585 {
1586   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1587   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1588   const PetscScalar *x,*v;
1589   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1590   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1591   PetscInt          nonzerorow=0,n,i,jrow,j;
1592 
1593   PetscFunctionBegin;
1594   PetscCall(VecGetArrayRead(xx,&x));
1595   PetscCall(VecGetArray(yy,&y));
1596   idx  = a->j;
1597   v    = a->a;
1598   ii   = a->i;
1599 
1600   for (i=0; i<m; i++) {
1601     jrow  = ii[i];
1602     n     = ii[i+1] - jrow;
1603     sum1  = 0.0;
1604     sum2  = 0.0;
1605     sum3  = 0.0;
1606     sum4  = 0.0;
1607     sum5  = 0.0;
1608     sum6  = 0.0;
1609     sum7  = 0.0;
1610     sum8  = 0.0;
1611     sum9  = 0.0;
1612     sum10 = 0.0;
1613 
1614     nonzerorow += (n>0);
1615     for (j=0; j<n; j++) {
1616       sum1  += v[jrow]*x[10*idx[jrow]];
1617       sum2  += v[jrow]*x[10*idx[jrow]+1];
1618       sum3  += v[jrow]*x[10*idx[jrow]+2];
1619       sum4  += v[jrow]*x[10*idx[jrow]+3];
1620       sum5  += v[jrow]*x[10*idx[jrow]+4];
1621       sum6  += v[jrow]*x[10*idx[jrow]+5];
1622       sum7  += v[jrow]*x[10*idx[jrow]+6];
1623       sum8  += v[jrow]*x[10*idx[jrow]+7];
1624       sum9  += v[jrow]*x[10*idx[jrow]+8];
1625       sum10 += v[jrow]*x[10*idx[jrow]+9];
1626       jrow++;
1627     }
1628     y[10*i]   = sum1;
1629     y[10*i+1] = sum2;
1630     y[10*i+2] = sum3;
1631     y[10*i+3] = sum4;
1632     y[10*i+4] = sum5;
1633     y[10*i+5] = sum6;
1634     y[10*i+6] = sum7;
1635     y[10*i+7] = sum8;
1636     y[10*i+8] = sum9;
1637     y[10*i+9] = sum10;
1638   }
1639 
1640   PetscCall(PetscLogFlops(20.0*a->nz - 10.0*nonzerorow));
1641   PetscCall(VecRestoreArrayRead(xx,&x));
1642   PetscCall(VecRestoreArray(yy,&y));
1643   PetscFunctionReturn(0);
1644 }
1645 
1646 PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1647 {
1648   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1649   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1650   const PetscScalar *x,*v;
1651   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1652   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1653   PetscInt          n,i,jrow,j;
1654 
1655   PetscFunctionBegin;
1656   if (yy != zz) PetscCall(VecCopy(yy,zz));
1657   PetscCall(VecGetArrayRead(xx,&x));
1658   PetscCall(VecGetArray(zz,&y));
1659   idx  = a->j;
1660   v    = a->a;
1661   ii   = a->i;
1662 
1663   for (i=0; i<m; i++) {
1664     jrow  = ii[i];
1665     n     = ii[i+1] - jrow;
1666     sum1  = 0.0;
1667     sum2  = 0.0;
1668     sum3  = 0.0;
1669     sum4  = 0.0;
1670     sum5  = 0.0;
1671     sum6  = 0.0;
1672     sum7  = 0.0;
1673     sum8  = 0.0;
1674     sum9  = 0.0;
1675     sum10 = 0.0;
1676     for (j=0; j<n; j++) {
1677       sum1  += v[jrow]*x[10*idx[jrow]];
1678       sum2  += v[jrow]*x[10*idx[jrow]+1];
1679       sum3  += v[jrow]*x[10*idx[jrow]+2];
1680       sum4  += v[jrow]*x[10*idx[jrow]+3];
1681       sum5  += v[jrow]*x[10*idx[jrow]+4];
1682       sum6  += v[jrow]*x[10*idx[jrow]+5];
1683       sum7  += v[jrow]*x[10*idx[jrow]+6];
1684       sum8  += v[jrow]*x[10*idx[jrow]+7];
1685       sum9  += v[jrow]*x[10*idx[jrow]+8];
1686       sum10 += v[jrow]*x[10*idx[jrow]+9];
1687       jrow++;
1688     }
1689     y[10*i]   += sum1;
1690     y[10*i+1] += sum2;
1691     y[10*i+2] += sum3;
1692     y[10*i+3] += sum4;
1693     y[10*i+4] += sum5;
1694     y[10*i+5] += sum6;
1695     y[10*i+6] += sum7;
1696     y[10*i+7] += sum8;
1697     y[10*i+8] += sum9;
1698     y[10*i+9] += sum10;
1699   }
1700 
1701   PetscCall(PetscLogFlops(20.0*a->nz));
1702   PetscCall(VecRestoreArrayRead(xx,&x));
1703   PetscCall(VecRestoreArray(yy,&y));
1704   PetscFunctionReturn(0);
1705 }
1706 
1707 PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1708 {
1709   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1710   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1711   const PetscScalar *x,*v;
1712   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10;
1713   const PetscInt    m = b->AIJ->rmap->n,*idx;
1714   PetscInt          n,i;
1715 
1716   PetscFunctionBegin;
1717   PetscCall(VecSet(yy,0.0));
1718   PetscCall(VecGetArrayRead(xx,&x));
1719   PetscCall(VecGetArray(yy,&y));
1720 
1721   for (i=0; i<m; i++) {
1722     idx     = a->j + a->i[i];
1723     v       = a->a + a->i[i];
1724     n       = a->i[i+1] - a->i[i];
1725     alpha1  = x[10*i];
1726     alpha2  = x[10*i+1];
1727     alpha3  = x[10*i+2];
1728     alpha4  = x[10*i+3];
1729     alpha5  = x[10*i+4];
1730     alpha6  = x[10*i+5];
1731     alpha7  = x[10*i+6];
1732     alpha8  = x[10*i+7];
1733     alpha9  = x[10*i+8];
1734     alpha10 = x[10*i+9];
1735     while (n-->0) {
1736       y[10*(*idx)]   += alpha1*(*v);
1737       y[10*(*idx)+1] += alpha2*(*v);
1738       y[10*(*idx)+2] += alpha3*(*v);
1739       y[10*(*idx)+3] += alpha4*(*v);
1740       y[10*(*idx)+4] += alpha5*(*v);
1741       y[10*(*idx)+5] += alpha6*(*v);
1742       y[10*(*idx)+6] += alpha7*(*v);
1743       y[10*(*idx)+7] += alpha8*(*v);
1744       y[10*(*idx)+8] += alpha9*(*v);
1745       y[10*(*idx)+9] += alpha10*(*v);
1746       idx++; v++;
1747     }
1748   }
1749   PetscCall(PetscLogFlops(20.0*a->nz));
1750   PetscCall(VecRestoreArrayRead(xx,&x));
1751   PetscCall(VecRestoreArray(yy,&y));
1752   PetscFunctionReturn(0);
1753 }
1754 
1755 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1756 {
1757   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1758   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1759   const PetscScalar *x,*v;
1760   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10;
1761   const PetscInt    m = b->AIJ->rmap->n,*idx;
1762   PetscInt          n,i;
1763 
1764   PetscFunctionBegin;
1765   if (yy != zz) PetscCall(VecCopy(yy,zz));
1766   PetscCall(VecGetArrayRead(xx,&x));
1767   PetscCall(VecGetArray(zz,&y));
1768   for (i=0; i<m; i++) {
1769     idx     = a->j + a->i[i];
1770     v       = a->a + a->i[i];
1771     n       = a->i[i+1] - a->i[i];
1772     alpha1  = x[10*i];
1773     alpha2  = x[10*i+1];
1774     alpha3  = x[10*i+2];
1775     alpha4  = x[10*i+3];
1776     alpha5  = x[10*i+4];
1777     alpha6  = x[10*i+5];
1778     alpha7  = x[10*i+6];
1779     alpha8  = x[10*i+7];
1780     alpha9  = x[10*i+8];
1781     alpha10 = x[10*i+9];
1782     while (n-->0) {
1783       y[10*(*idx)]   += alpha1*(*v);
1784       y[10*(*idx)+1] += alpha2*(*v);
1785       y[10*(*idx)+2] += alpha3*(*v);
1786       y[10*(*idx)+3] += alpha4*(*v);
1787       y[10*(*idx)+4] += alpha5*(*v);
1788       y[10*(*idx)+5] += alpha6*(*v);
1789       y[10*(*idx)+6] += alpha7*(*v);
1790       y[10*(*idx)+7] += alpha8*(*v);
1791       y[10*(*idx)+8] += alpha9*(*v);
1792       y[10*(*idx)+9] += alpha10*(*v);
1793       idx++; v++;
1794     }
1795   }
1796   PetscCall(PetscLogFlops(20.0*a->nz));
1797   PetscCall(VecRestoreArrayRead(xx,&x));
1798   PetscCall(VecRestoreArray(zz,&y));
1799   PetscFunctionReturn(0);
1800 }
1801 
1802 /*--------------------------------------------------------------------------------------------*/
1803 PetscErrorCode MatMult_SeqMAIJ_11(Mat A,Vec xx,Vec yy)
1804 {
1805   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1806   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1807   const PetscScalar *x,*v;
1808   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11;
1809   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1810   PetscInt          nonzerorow=0,n,i,jrow,j;
1811 
1812   PetscFunctionBegin;
1813   PetscCall(VecGetArrayRead(xx,&x));
1814   PetscCall(VecGetArray(yy,&y));
1815   idx  = a->j;
1816   v    = a->a;
1817   ii   = a->i;
1818 
1819   for (i=0; i<m; i++) {
1820     jrow  = ii[i];
1821     n     = ii[i+1] - jrow;
1822     sum1  = 0.0;
1823     sum2  = 0.0;
1824     sum3  = 0.0;
1825     sum4  = 0.0;
1826     sum5  = 0.0;
1827     sum6  = 0.0;
1828     sum7  = 0.0;
1829     sum8  = 0.0;
1830     sum9  = 0.0;
1831     sum10 = 0.0;
1832     sum11 = 0.0;
1833 
1834     nonzerorow += (n>0);
1835     for (j=0; j<n; j++) {
1836       sum1  += v[jrow]*x[11*idx[jrow]];
1837       sum2  += v[jrow]*x[11*idx[jrow]+1];
1838       sum3  += v[jrow]*x[11*idx[jrow]+2];
1839       sum4  += v[jrow]*x[11*idx[jrow]+3];
1840       sum5  += v[jrow]*x[11*idx[jrow]+4];
1841       sum6  += v[jrow]*x[11*idx[jrow]+5];
1842       sum7  += v[jrow]*x[11*idx[jrow]+6];
1843       sum8  += v[jrow]*x[11*idx[jrow]+7];
1844       sum9  += v[jrow]*x[11*idx[jrow]+8];
1845       sum10 += v[jrow]*x[11*idx[jrow]+9];
1846       sum11 += v[jrow]*x[11*idx[jrow]+10];
1847       jrow++;
1848     }
1849     y[11*i]    = sum1;
1850     y[11*i+1]  = sum2;
1851     y[11*i+2]  = sum3;
1852     y[11*i+3]  = sum4;
1853     y[11*i+4]  = sum5;
1854     y[11*i+5]  = sum6;
1855     y[11*i+6]  = sum7;
1856     y[11*i+7]  = sum8;
1857     y[11*i+8]  = sum9;
1858     y[11*i+9]  = sum10;
1859     y[11*i+10] = sum11;
1860   }
1861 
1862   PetscCall(PetscLogFlops(22.0*a->nz - 11*nonzerorow));
1863   PetscCall(VecRestoreArrayRead(xx,&x));
1864   PetscCall(VecRestoreArray(yy,&y));
1865   PetscFunctionReturn(0);
1866 }
1867 
1868 PetscErrorCode MatMultAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz)
1869 {
1870   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1871   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1872   const PetscScalar *x,*v;
1873   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11;
1874   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1875   PetscInt          n,i,jrow,j;
1876 
1877   PetscFunctionBegin;
1878   if (yy != zz) PetscCall(VecCopy(yy,zz));
1879   PetscCall(VecGetArrayRead(xx,&x));
1880   PetscCall(VecGetArray(zz,&y));
1881   idx  = a->j;
1882   v    = a->a;
1883   ii   = a->i;
1884 
1885   for (i=0; i<m; i++) {
1886     jrow  = ii[i];
1887     n     = ii[i+1] - jrow;
1888     sum1  = 0.0;
1889     sum2  = 0.0;
1890     sum3  = 0.0;
1891     sum4  = 0.0;
1892     sum5  = 0.0;
1893     sum6  = 0.0;
1894     sum7  = 0.0;
1895     sum8  = 0.0;
1896     sum9  = 0.0;
1897     sum10 = 0.0;
1898     sum11 = 0.0;
1899     for (j=0; j<n; j++) {
1900       sum1  += v[jrow]*x[11*idx[jrow]];
1901       sum2  += v[jrow]*x[11*idx[jrow]+1];
1902       sum3  += v[jrow]*x[11*idx[jrow]+2];
1903       sum4  += v[jrow]*x[11*idx[jrow]+3];
1904       sum5  += v[jrow]*x[11*idx[jrow]+4];
1905       sum6  += v[jrow]*x[11*idx[jrow]+5];
1906       sum7  += v[jrow]*x[11*idx[jrow]+6];
1907       sum8  += v[jrow]*x[11*idx[jrow]+7];
1908       sum9  += v[jrow]*x[11*idx[jrow]+8];
1909       sum10 += v[jrow]*x[11*idx[jrow]+9];
1910       sum11 += v[jrow]*x[11*idx[jrow]+10];
1911       jrow++;
1912     }
1913     y[11*i]    += sum1;
1914     y[11*i+1]  += sum2;
1915     y[11*i+2]  += sum3;
1916     y[11*i+3]  += sum4;
1917     y[11*i+4]  += sum5;
1918     y[11*i+5]  += sum6;
1919     y[11*i+6]  += sum7;
1920     y[11*i+7]  += sum8;
1921     y[11*i+8]  += sum9;
1922     y[11*i+9]  += sum10;
1923     y[11*i+10] += sum11;
1924   }
1925 
1926   PetscCall(PetscLogFlops(22.0*a->nz));
1927   PetscCall(VecRestoreArrayRead(xx,&x));
1928   PetscCall(VecRestoreArray(yy,&y));
1929   PetscFunctionReturn(0);
1930 }
1931 
1932 PetscErrorCode MatMultTranspose_SeqMAIJ_11(Mat A,Vec xx,Vec yy)
1933 {
1934   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1935   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1936   const PetscScalar *x,*v;
1937   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11;
1938   const PetscInt    m = b->AIJ->rmap->n,*idx;
1939   PetscInt          n,i;
1940 
1941   PetscFunctionBegin;
1942   PetscCall(VecSet(yy,0.0));
1943   PetscCall(VecGetArrayRead(xx,&x));
1944   PetscCall(VecGetArray(yy,&y));
1945 
1946   for (i=0; i<m; i++) {
1947     idx     = a->j + a->i[i];
1948     v       = a->a + a->i[i];
1949     n       = a->i[i+1] - a->i[i];
1950     alpha1  = x[11*i];
1951     alpha2  = x[11*i+1];
1952     alpha3  = x[11*i+2];
1953     alpha4  = x[11*i+3];
1954     alpha5  = x[11*i+4];
1955     alpha6  = x[11*i+5];
1956     alpha7  = x[11*i+6];
1957     alpha8  = x[11*i+7];
1958     alpha9  = x[11*i+8];
1959     alpha10 = x[11*i+9];
1960     alpha11 = x[11*i+10];
1961     while (n-->0) {
1962       y[11*(*idx)]    += alpha1*(*v);
1963       y[11*(*idx)+1]  += alpha2*(*v);
1964       y[11*(*idx)+2]  += alpha3*(*v);
1965       y[11*(*idx)+3]  += alpha4*(*v);
1966       y[11*(*idx)+4]  += alpha5*(*v);
1967       y[11*(*idx)+5]  += alpha6*(*v);
1968       y[11*(*idx)+6]  += alpha7*(*v);
1969       y[11*(*idx)+7]  += alpha8*(*v);
1970       y[11*(*idx)+8]  += alpha9*(*v);
1971       y[11*(*idx)+9]  += alpha10*(*v);
1972       y[11*(*idx)+10] += alpha11*(*v);
1973       idx++; v++;
1974     }
1975   }
1976   PetscCall(PetscLogFlops(22.0*a->nz));
1977   PetscCall(VecRestoreArrayRead(xx,&x));
1978   PetscCall(VecRestoreArray(yy,&y));
1979   PetscFunctionReturn(0);
1980 }
1981 
1982 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz)
1983 {
1984   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1985   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1986   const PetscScalar *x,*v;
1987   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11;
1988   const PetscInt    m = b->AIJ->rmap->n,*idx;
1989   PetscInt          n,i;
1990 
1991   PetscFunctionBegin;
1992   if (yy != zz) PetscCall(VecCopy(yy,zz));
1993   PetscCall(VecGetArrayRead(xx,&x));
1994   PetscCall(VecGetArray(zz,&y));
1995   for (i=0; i<m; i++) {
1996     idx     = a->j + a->i[i];
1997     v       = a->a + a->i[i];
1998     n       = a->i[i+1] - a->i[i];
1999     alpha1  = x[11*i];
2000     alpha2  = x[11*i+1];
2001     alpha3  = x[11*i+2];
2002     alpha4  = x[11*i+3];
2003     alpha5  = x[11*i+4];
2004     alpha6  = x[11*i+5];
2005     alpha7  = x[11*i+6];
2006     alpha8  = x[11*i+7];
2007     alpha9  = x[11*i+8];
2008     alpha10 = x[11*i+9];
2009     alpha11 = x[11*i+10];
2010     while (n-->0) {
2011       y[11*(*idx)]    += alpha1*(*v);
2012       y[11*(*idx)+1]  += alpha2*(*v);
2013       y[11*(*idx)+2]  += alpha3*(*v);
2014       y[11*(*idx)+3]  += alpha4*(*v);
2015       y[11*(*idx)+4]  += alpha5*(*v);
2016       y[11*(*idx)+5]  += alpha6*(*v);
2017       y[11*(*idx)+6]  += alpha7*(*v);
2018       y[11*(*idx)+7]  += alpha8*(*v);
2019       y[11*(*idx)+8]  += alpha9*(*v);
2020       y[11*(*idx)+9]  += alpha10*(*v);
2021       y[11*(*idx)+10] += alpha11*(*v);
2022       idx++; v++;
2023     }
2024   }
2025   PetscCall(PetscLogFlops(22.0*a->nz));
2026   PetscCall(VecRestoreArrayRead(xx,&x));
2027   PetscCall(VecRestoreArray(zz,&y));
2028   PetscFunctionReturn(0);
2029 }
2030 
2031 /*--------------------------------------------------------------------------------------------*/
2032 PetscErrorCode MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
2033 {
2034   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2035   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2036   const PetscScalar *x,*v;
2037   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2038   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
2039   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
2040   PetscInt          nonzerorow=0,n,i,jrow,j;
2041 
2042   PetscFunctionBegin;
2043   PetscCall(VecGetArrayRead(xx,&x));
2044   PetscCall(VecGetArray(yy,&y));
2045   idx  = a->j;
2046   v    = a->a;
2047   ii   = a->i;
2048 
2049   for (i=0; i<m; i++) {
2050     jrow  = ii[i];
2051     n     = ii[i+1] - jrow;
2052     sum1  = 0.0;
2053     sum2  = 0.0;
2054     sum3  = 0.0;
2055     sum4  = 0.0;
2056     sum5  = 0.0;
2057     sum6  = 0.0;
2058     sum7  = 0.0;
2059     sum8  = 0.0;
2060     sum9  = 0.0;
2061     sum10 = 0.0;
2062     sum11 = 0.0;
2063     sum12 = 0.0;
2064     sum13 = 0.0;
2065     sum14 = 0.0;
2066     sum15 = 0.0;
2067     sum16 = 0.0;
2068 
2069     nonzerorow += (n>0);
2070     for (j=0; j<n; j++) {
2071       sum1  += v[jrow]*x[16*idx[jrow]];
2072       sum2  += v[jrow]*x[16*idx[jrow]+1];
2073       sum3  += v[jrow]*x[16*idx[jrow]+2];
2074       sum4  += v[jrow]*x[16*idx[jrow]+3];
2075       sum5  += v[jrow]*x[16*idx[jrow]+4];
2076       sum6  += v[jrow]*x[16*idx[jrow]+5];
2077       sum7  += v[jrow]*x[16*idx[jrow]+6];
2078       sum8  += v[jrow]*x[16*idx[jrow]+7];
2079       sum9  += v[jrow]*x[16*idx[jrow]+8];
2080       sum10 += v[jrow]*x[16*idx[jrow]+9];
2081       sum11 += v[jrow]*x[16*idx[jrow]+10];
2082       sum12 += v[jrow]*x[16*idx[jrow]+11];
2083       sum13 += v[jrow]*x[16*idx[jrow]+12];
2084       sum14 += v[jrow]*x[16*idx[jrow]+13];
2085       sum15 += v[jrow]*x[16*idx[jrow]+14];
2086       sum16 += v[jrow]*x[16*idx[jrow]+15];
2087       jrow++;
2088     }
2089     y[16*i]    = sum1;
2090     y[16*i+1]  = sum2;
2091     y[16*i+2]  = sum3;
2092     y[16*i+3]  = sum4;
2093     y[16*i+4]  = sum5;
2094     y[16*i+5]  = sum6;
2095     y[16*i+6]  = sum7;
2096     y[16*i+7]  = sum8;
2097     y[16*i+8]  = sum9;
2098     y[16*i+9]  = sum10;
2099     y[16*i+10] = sum11;
2100     y[16*i+11] = sum12;
2101     y[16*i+12] = sum13;
2102     y[16*i+13] = sum14;
2103     y[16*i+14] = sum15;
2104     y[16*i+15] = sum16;
2105   }
2106 
2107   PetscCall(PetscLogFlops(32.0*a->nz - 16.0*nonzerorow));
2108   PetscCall(VecRestoreArrayRead(xx,&x));
2109   PetscCall(VecRestoreArray(yy,&y));
2110   PetscFunctionReturn(0);
2111 }
2112 
2113 PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
2114 {
2115   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2116   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2117   const PetscScalar *x,*v;
2118   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2119   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
2120   const PetscInt    m = b->AIJ->rmap->n,*idx;
2121   PetscInt          n,i;
2122 
2123   PetscFunctionBegin;
2124   PetscCall(VecSet(yy,0.0));
2125   PetscCall(VecGetArrayRead(xx,&x));
2126   PetscCall(VecGetArray(yy,&y));
2127 
2128   for (i=0; i<m; i++) {
2129     idx     = a->j + a->i[i];
2130     v       = a->a + a->i[i];
2131     n       = a->i[i+1] - a->i[i];
2132     alpha1  = x[16*i];
2133     alpha2  = x[16*i+1];
2134     alpha3  = x[16*i+2];
2135     alpha4  = x[16*i+3];
2136     alpha5  = x[16*i+4];
2137     alpha6  = x[16*i+5];
2138     alpha7  = x[16*i+6];
2139     alpha8  = x[16*i+7];
2140     alpha9  = x[16*i+8];
2141     alpha10 = x[16*i+9];
2142     alpha11 = x[16*i+10];
2143     alpha12 = x[16*i+11];
2144     alpha13 = x[16*i+12];
2145     alpha14 = x[16*i+13];
2146     alpha15 = x[16*i+14];
2147     alpha16 = x[16*i+15];
2148     while (n-->0) {
2149       y[16*(*idx)]    += alpha1*(*v);
2150       y[16*(*idx)+1]  += alpha2*(*v);
2151       y[16*(*idx)+2]  += alpha3*(*v);
2152       y[16*(*idx)+3]  += alpha4*(*v);
2153       y[16*(*idx)+4]  += alpha5*(*v);
2154       y[16*(*idx)+5]  += alpha6*(*v);
2155       y[16*(*idx)+6]  += alpha7*(*v);
2156       y[16*(*idx)+7]  += alpha8*(*v);
2157       y[16*(*idx)+8]  += alpha9*(*v);
2158       y[16*(*idx)+9]  += alpha10*(*v);
2159       y[16*(*idx)+10] += alpha11*(*v);
2160       y[16*(*idx)+11] += alpha12*(*v);
2161       y[16*(*idx)+12] += alpha13*(*v);
2162       y[16*(*idx)+13] += alpha14*(*v);
2163       y[16*(*idx)+14] += alpha15*(*v);
2164       y[16*(*idx)+15] += alpha16*(*v);
2165       idx++; v++;
2166     }
2167   }
2168   PetscCall(PetscLogFlops(32.0*a->nz));
2169   PetscCall(VecRestoreArrayRead(xx,&x));
2170   PetscCall(VecRestoreArray(yy,&y));
2171   PetscFunctionReturn(0);
2172 }
2173 
2174 PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
2175 {
2176   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2177   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2178   const PetscScalar *x,*v;
2179   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2180   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
2181   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2182   PetscInt          n,i,jrow,j;
2183 
2184   PetscFunctionBegin;
2185   if (yy != zz) PetscCall(VecCopy(yy,zz));
2186   PetscCall(VecGetArrayRead(xx,&x));
2187   PetscCall(VecGetArray(zz,&y));
2188   idx  = a->j;
2189   v    = a->a;
2190   ii   = a->i;
2191 
2192   for (i=0; i<m; i++) {
2193     jrow  = ii[i];
2194     n     = ii[i+1] - jrow;
2195     sum1  = 0.0;
2196     sum2  = 0.0;
2197     sum3  = 0.0;
2198     sum4  = 0.0;
2199     sum5  = 0.0;
2200     sum6  = 0.0;
2201     sum7  = 0.0;
2202     sum8  = 0.0;
2203     sum9  = 0.0;
2204     sum10 = 0.0;
2205     sum11 = 0.0;
2206     sum12 = 0.0;
2207     sum13 = 0.0;
2208     sum14 = 0.0;
2209     sum15 = 0.0;
2210     sum16 = 0.0;
2211     for (j=0; j<n; j++) {
2212       sum1  += v[jrow]*x[16*idx[jrow]];
2213       sum2  += v[jrow]*x[16*idx[jrow]+1];
2214       sum3  += v[jrow]*x[16*idx[jrow]+2];
2215       sum4  += v[jrow]*x[16*idx[jrow]+3];
2216       sum5  += v[jrow]*x[16*idx[jrow]+4];
2217       sum6  += v[jrow]*x[16*idx[jrow]+5];
2218       sum7  += v[jrow]*x[16*idx[jrow]+6];
2219       sum8  += v[jrow]*x[16*idx[jrow]+7];
2220       sum9  += v[jrow]*x[16*idx[jrow]+8];
2221       sum10 += v[jrow]*x[16*idx[jrow]+9];
2222       sum11 += v[jrow]*x[16*idx[jrow]+10];
2223       sum12 += v[jrow]*x[16*idx[jrow]+11];
2224       sum13 += v[jrow]*x[16*idx[jrow]+12];
2225       sum14 += v[jrow]*x[16*idx[jrow]+13];
2226       sum15 += v[jrow]*x[16*idx[jrow]+14];
2227       sum16 += v[jrow]*x[16*idx[jrow]+15];
2228       jrow++;
2229     }
2230     y[16*i]    += sum1;
2231     y[16*i+1]  += sum2;
2232     y[16*i+2]  += sum3;
2233     y[16*i+3]  += sum4;
2234     y[16*i+4]  += sum5;
2235     y[16*i+5]  += sum6;
2236     y[16*i+6]  += sum7;
2237     y[16*i+7]  += sum8;
2238     y[16*i+8]  += sum9;
2239     y[16*i+9]  += sum10;
2240     y[16*i+10] += sum11;
2241     y[16*i+11] += sum12;
2242     y[16*i+12] += sum13;
2243     y[16*i+13] += sum14;
2244     y[16*i+14] += sum15;
2245     y[16*i+15] += sum16;
2246   }
2247 
2248   PetscCall(PetscLogFlops(32.0*a->nz));
2249   PetscCall(VecRestoreArrayRead(xx,&x));
2250   PetscCall(VecRestoreArray(zz,&y));
2251   PetscFunctionReturn(0);
2252 }
2253 
2254 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
2255 {
2256   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2257   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2258   const PetscScalar *x,*v;
2259   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2260   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
2261   const PetscInt    m = b->AIJ->rmap->n,*idx;
2262   PetscInt          n,i;
2263 
2264   PetscFunctionBegin;
2265   if (yy != zz) PetscCall(VecCopy(yy,zz));
2266   PetscCall(VecGetArrayRead(xx,&x));
2267   PetscCall(VecGetArray(zz,&y));
2268   for (i=0; i<m; i++) {
2269     idx     = a->j + a->i[i];
2270     v       = a->a + a->i[i];
2271     n       = a->i[i+1] - a->i[i];
2272     alpha1  = x[16*i];
2273     alpha2  = x[16*i+1];
2274     alpha3  = x[16*i+2];
2275     alpha4  = x[16*i+3];
2276     alpha5  = x[16*i+4];
2277     alpha6  = x[16*i+5];
2278     alpha7  = x[16*i+6];
2279     alpha8  = x[16*i+7];
2280     alpha9  = x[16*i+8];
2281     alpha10 = x[16*i+9];
2282     alpha11 = x[16*i+10];
2283     alpha12 = x[16*i+11];
2284     alpha13 = x[16*i+12];
2285     alpha14 = x[16*i+13];
2286     alpha15 = x[16*i+14];
2287     alpha16 = x[16*i+15];
2288     while (n-->0) {
2289       y[16*(*idx)]    += alpha1*(*v);
2290       y[16*(*idx)+1]  += alpha2*(*v);
2291       y[16*(*idx)+2]  += alpha3*(*v);
2292       y[16*(*idx)+3]  += alpha4*(*v);
2293       y[16*(*idx)+4]  += alpha5*(*v);
2294       y[16*(*idx)+5]  += alpha6*(*v);
2295       y[16*(*idx)+6]  += alpha7*(*v);
2296       y[16*(*idx)+7]  += alpha8*(*v);
2297       y[16*(*idx)+8]  += alpha9*(*v);
2298       y[16*(*idx)+9]  += alpha10*(*v);
2299       y[16*(*idx)+10] += alpha11*(*v);
2300       y[16*(*idx)+11] += alpha12*(*v);
2301       y[16*(*idx)+12] += alpha13*(*v);
2302       y[16*(*idx)+13] += alpha14*(*v);
2303       y[16*(*idx)+14] += alpha15*(*v);
2304       y[16*(*idx)+15] += alpha16*(*v);
2305       idx++; v++;
2306     }
2307   }
2308   PetscCall(PetscLogFlops(32.0*a->nz));
2309   PetscCall(VecRestoreArrayRead(xx,&x));
2310   PetscCall(VecRestoreArray(zz,&y));
2311   PetscFunctionReturn(0);
2312 }
2313 
2314 /*--------------------------------------------------------------------------------------------*/
2315 PetscErrorCode MatMult_SeqMAIJ_18(Mat A,Vec xx,Vec yy)
2316 {
2317   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2318   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2319   const PetscScalar *x,*v;
2320   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2321   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2322   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
2323   PetscInt          nonzerorow=0,n,i,jrow,j;
2324 
2325   PetscFunctionBegin;
2326   PetscCall(VecGetArrayRead(xx,&x));
2327   PetscCall(VecGetArray(yy,&y));
2328   idx  = a->j;
2329   v    = a->a;
2330   ii   = a->i;
2331 
2332   for (i=0; i<m; i++) {
2333     jrow  = ii[i];
2334     n     = ii[i+1] - jrow;
2335     sum1  = 0.0;
2336     sum2  = 0.0;
2337     sum3  = 0.0;
2338     sum4  = 0.0;
2339     sum5  = 0.0;
2340     sum6  = 0.0;
2341     sum7  = 0.0;
2342     sum8  = 0.0;
2343     sum9  = 0.0;
2344     sum10 = 0.0;
2345     sum11 = 0.0;
2346     sum12 = 0.0;
2347     sum13 = 0.0;
2348     sum14 = 0.0;
2349     sum15 = 0.0;
2350     sum16 = 0.0;
2351     sum17 = 0.0;
2352     sum18 = 0.0;
2353 
2354     nonzerorow += (n>0);
2355     for (j=0; j<n; j++) {
2356       sum1  += v[jrow]*x[18*idx[jrow]];
2357       sum2  += v[jrow]*x[18*idx[jrow]+1];
2358       sum3  += v[jrow]*x[18*idx[jrow]+2];
2359       sum4  += v[jrow]*x[18*idx[jrow]+3];
2360       sum5  += v[jrow]*x[18*idx[jrow]+4];
2361       sum6  += v[jrow]*x[18*idx[jrow]+5];
2362       sum7  += v[jrow]*x[18*idx[jrow]+6];
2363       sum8  += v[jrow]*x[18*idx[jrow]+7];
2364       sum9  += v[jrow]*x[18*idx[jrow]+8];
2365       sum10 += v[jrow]*x[18*idx[jrow]+9];
2366       sum11 += v[jrow]*x[18*idx[jrow]+10];
2367       sum12 += v[jrow]*x[18*idx[jrow]+11];
2368       sum13 += v[jrow]*x[18*idx[jrow]+12];
2369       sum14 += v[jrow]*x[18*idx[jrow]+13];
2370       sum15 += v[jrow]*x[18*idx[jrow]+14];
2371       sum16 += v[jrow]*x[18*idx[jrow]+15];
2372       sum17 += v[jrow]*x[18*idx[jrow]+16];
2373       sum18 += v[jrow]*x[18*idx[jrow]+17];
2374       jrow++;
2375     }
2376     y[18*i]    = sum1;
2377     y[18*i+1]  = sum2;
2378     y[18*i+2]  = sum3;
2379     y[18*i+3]  = sum4;
2380     y[18*i+4]  = sum5;
2381     y[18*i+5]  = sum6;
2382     y[18*i+6]  = sum7;
2383     y[18*i+7]  = sum8;
2384     y[18*i+8]  = sum9;
2385     y[18*i+9]  = sum10;
2386     y[18*i+10] = sum11;
2387     y[18*i+11] = sum12;
2388     y[18*i+12] = sum13;
2389     y[18*i+13] = sum14;
2390     y[18*i+14] = sum15;
2391     y[18*i+15] = sum16;
2392     y[18*i+16] = sum17;
2393     y[18*i+17] = sum18;
2394   }
2395 
2396   PetscCall(PetscLogFlops(36.0*a->nz - 18.0*nonzerorow));
2397   PetscCall(VecRestoreArrayRead(xx,&x));
2398   PetscCall(VecRestoreArray(yy,&y));
2399   PetscFunctionReturn(0);
2400 }
2401 
2402 PetscErrorCode MatMultTranspose_SeqMAIJ_18(Mat A,Vec xx,Vec yy)
2403 {
2404   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2405   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2406   const PetscScalar *x,*v;
2407   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2408   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18;
2409   const PetscInt    m = b->AIJ->rmap->n,*idx;
2410   PetscInt          n,i;
2411 
2412   PetscFunctionBegin;
2413   PetscCall(VecSet(yy,0.0));
2414   PetscCall(VecGetArrayRead(xx,&x));
2415   PetscCall(VecGetArray(yy,&y));
2416 
2417   for (i=0; i<m; i++) {
2418     idx     = a->j + a->i[i];
2419     v       = a->a + a->i[i];
2420     n       = a->i[i+1] - a->i[i];
2421     alpha1  = x[18*i];
2422     alpha2  = x[18*i+1];
2423     alpha3  = x[18*i+2];
2424     alpha4  = x[18*i+3];
2425     alpha5  = x[18*i+4];
2426     alpha6  = x[18*i+5];
2427     alpha7  = x[18*i+6];
2428     alpha8  = x[18*i+7];
2429     alpha9  = x[18*i+8];
2430     alpha10 = x[18*i+9];
2431     alpha11 = x[18*i+10];
2432     alpha12 = x[18*i+11];
2433     alpha13 = x[18*i+12];
2434     alpha14 = x[18*i+13];
2435     alpha15 = x[18*i+14];
2436     alpha16 = x[18*i+15];
2437     alpha17 = x[18*i+16];
2438     alpha18 = x[18*i+17];
2439     while (n-->0) {
2440       y[18*(*idx)]    += alpha1*(*v);
2441       y[18*(*idx)+1]  += alpha2*(*v);
2442       y[18*(*idx)+2]  += alpha3*(*v);
2443       y[18*(*idx)+3]  += alpha4*(*v);
2444       y[18*(*idx)+4]  += alpha5*(*v);
2445       y[18*(*idx)+5]  += alpha6*(*v);
2446       y[18*(*idx)+6]  += alpha7*(*v);
2447       y[18*(*idx)+7]  += alpha8*(*v);
2448       y[18*(*idx)+8]  += alpha9*(*v);
2449       y[18*(*idx)+9]  += alpha10*(*v);
2450       y[18*(*idx)+10] += alpha11*(*v);
2451       y[18*(*idx)+11] += alpha12*(*v);
2452       y[18*(*idx)+12] += alpha13*(*v);
2453       y[18*(*idx)+13] += alpha14*(*v);
2454       y[18*(*idx)+14] += alpha15*(*v);
2455       y[18*(*idx)+15] += alpha16*(*v);
2456       y[18*(*idx)+16] += alpha17*(*v);
2457       y[18*(*idx)+17] += alpha18*(*v);
2458       idx++; v++;
2459     }
2460   }
2461   PetscCall(PetscLogFlops(36.0*a->nz));
2462   PetscCall(VecRestoreArrayRead(xx,&x));
2463   PetscCall(VecRestoreArray(yy,&y));
2464   PetscFunctionReturn(0);
2465 }
2466 
2467 PetscErrorCode MatMultAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz)
2468 {
2469   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2470   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2471   const PetscScalar *x,*v;
2472   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2473   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2474   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2475   PetscInt          n,i,jrow,j;
2476 
2477   PetscFunctionBegin;
2478   if (yy != zz) PetscCall(VecCopy(yy,zz));
2479   PetscCall(VecGetArrayRead(xx,&x));
2480   PetscCall(VecGetArray(zz,&y));
2481   idx  = a->j;
2482   v    = a->a;
2483   ii   = a->i;
2484 
2485   for (i=0; i<m; i++) {
2486     jrow  = ii[i];
2487     n     = ii[i+1] - jrow;
2488     sum1  = 0.0;
2489     sum2  = 0.0;
2490     sum3  = 0.0;
2491     sum4  = 0.0;
2492     sum5  = 0.0;
2493     sum6  = 0.0;
2494     sum7  = 0.0;
2495     sum8  = 0.0;
2496     sum9  = 0.0;
2497     sum10 = 0.0;
2498     sum11 = 0.0;
2499     sum12 = 0.0;
2500     sum13 = 0.0;
2501     sum14 = 0.0;
2502     sum15 = 0.0;
2503     sum16 = 0.0;
2504     sum17 = 0.0;
2505     sum18 = 0.0;
2506     for (j=0; j<n; j++) {
2507       sum1  += v[jrow]*x[18*idx[jrow]];
2508       sum2  += v[jrow]*x[18*idx[jrow]+1];
2509       sum3  += v[jrow]*x[18*idx[jrow]+2];
2510       sum4  += v[jrow]*x[18*idx[jrow]+3];
2511       sum5  += v[jrow]*x[18*idx[jrow]+4];
2512       sum6  += v[jrow]*x[18*idx[jrow]+5];
2513       sum7  += v[jrow]*x[18*idx[jrow]+6];
2514       sum8  += v[jrow]*x[18*idx[jrow]+7];
2515       sum9  += v[jrow]*x[18*idx[jrow]+8];
2516       sum10 += v[jrow]*x[18*idx[jrow]+9];
2517       sum11 += v[jrow]*x[18*idx[jrow]+10];
2518       sum12 += v[jrow]*x[18*idx[jrow]+11];
2519       sum13 += v[jrow]*x[18*idx[jrow]+12];
2520       sum14 += v[jrow]*x[18*idx[jrow]+13];
2521       sum15 += v[jrow]*x[18*idx[jrow]+14];
2522       sum16 += v[jrow]*x[18*idx[jrow]+15];
2523       sum17 += v[jrow]*x[18*idx[jrow]+16];
2524       sum18 += v[jrow]*x[18*idx[jrow]+17];
2525       jrow++;
2526     }
2527     y[18*i]    += sum1;
2528     y[18*i+1]  += sum2;
2529     y[18*i+2]  += sum3;
2530     y[18*i+3]  += sum4;
2531     y[18*i+4]  += sum5;
2532     y[18*i+5]  += sum6;
2533     y[18*i+6]  += sum7;
2534     y[18*i+7]  += sum8;
2535     y[18*i+8]  += sum9;
2536     y[18*i+9]  += sum10;
2537     y[18*i+10] += sum11;
2538     y[18*i+11] += sum12;
2539     y[18*i+12] += sum13;
2540     y[18*i+13] += sum14;
2541     y[18*i+14] += sum15;
2542     y[18*i+15] += sum16;
2543     y[18*i+16] += sum17;
2544     y[18*i+17] += sum18;
2545   }
2546 
2547   PetscCall(PetscLogFlops(36.0*a->nz));
2548   PetscCall(VecRestoreArrayRead(xx,&x));
2549   PetscCall(VecRestoreArray(zz,&y));
2550   PetscFunctionReturn(0);
2551 }
2552 
2553 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz)
2554 {
2555   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2556   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2557   const PetscScalar *x,*v;
2558   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2559   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18;
2560   const PetscInt    m = b->AIJ->rmap->n,*idx;
2561   PetscInt          n,i;
2562 
2563   PetscFunctionBegin;
2564   if (yy != zz) PetscCall(VecCopy(yy,zz));
2565   PetscCall(VecGetArrayRead(xx,&x));
2566   PetscCall(VecGetArray(zz,&y));
2567   for (i=0; i<m; i++) {
2568     idx     = a->j + a->i[i];
2569     v       = a->a + a->i[i];
2570     n       = a->i[i+1] - a->i[i];
2571     alpha1  = x[18*i];
2572     alpha2  = x[18*i+1];
2573     alpha3  = x[18*i+2];
2574     alpha4  = x[18*i+3];
2575     alpha5  = x[18*i+4];
2576     alpha6  = x[18*i+5];
2577     alpha7  = x[18*i+6];
2578     alpha8  = x[18*i+7];
2579     alpha9  = x[18*i+8];
2580     alpha10 = x[18*i+9];
2581     alpha11 = x[18*i+10];
2582     alpha12 = x[18*i+11];
2583     alpha13 = x[18*i+12];
2584     alpha14 = x[18*i+13];
2585     alpha15 = x[18*i+14];
2586     alpha16 = x[18*i+15];
2587     alpha17 = x[18*i+16];
2588     alpha18 = x[18*i+17];
2589     while (n-->0) {
2590       y[18*(*idx)]    += alpha1*(*v);
2591       y[18*(*idx)+1]  += alpha2*(*v);
2592       y[18*(*idx)+2]  += alpha3*(*v);
2593       y[18*(*idx)+3]  += alpha4*(*v);
2594       y[18*(*idx)+4]  += alpha5*(*v);
2595       y[18*(*idx)+5]  += alpha6*(*v);
2596       y[18*(*idx)+6]  += alpha7*(*v);
2597       y[18*(*idx)+7]  += alpha8*(*v);
2598       y[18*(*idx)+8]  += alpha9*(*v);
2599       y[18*(*idx)+9]  += alpha10*(*v);
2600       y[18*(*idx)+10] += alpha11*(*v);
2601       y[18*(*idx)+11] += alpha12*(*v);
2602       y[18*(*idx)+12] += alpha13*(*v);
2603       y[18*(*idx)+13] += alpha14*(*v);
2604       y[18*(*idx)+14] += alpha15*(*v);
2605       y[18*(*idx)+15] += alpha16*(*v);
2606       y[18*(*idx)+16] += alpha17*(*v);
2607       y[18*(*idx)+17] += alpha18*(*v);
2608       idx++; v++;
2609     }
2610   }
2611   PetscCall(PetscLogFlops(36.0*a->nz));
2612   PetscCall(VecRestoreArrayRead(xx,&x));
2613   PetscCall(VecRestoreArray(zz,&y));
2614   PetscFunctionReturn(0);
2615 }
2616 
2617 PetscErrorCode MatMult_SeqMAIJ_N(Mat A,Vec xx,Vec yy)
2618 {
2619   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2620   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2621   const PetscScalar *x,*v;
2622   PetscScalar       *y,*sums;
2623   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2624   PetscInt          n,i,jrow,j,dof = b->dof,k;
2625 
2626   PetscFunctionBegin;
2627   PetscCall(VecGetArrayRead(xx,&x));
2628   PetscCall(VecSet(yy,0.0));
2629   PetscCall(VecGetArray(yy,&y));
2630   idx  = a->j;
2631   v    = a->a;
2632   ii   = a->i;
2633 
2634   for (i=0; i<m; i++) {
2635     jrow = ii[i];
2636     n    = ii[i+1] - jrow;
2637     sums = y + dof*i;
2638     for (j=0; j<n; j++) {
2639       for (k=0; k<dof; k++) {
2640         sums[k] += v[jrow]*x[dof*idx[jrow]+k];
2641       }
2642       jrow++;
2643     }
2644   }
2645 
2646   PetscCall(PetscLogFlops(2.0*dof*a->nz));
2647   PetscCall(VecRestoreArrayRead(xx,&x));
2648   PetscCall(VecRestoreArray(yy,&y));
2649   PetscFunctionReturn(0);
2650 }
2651 
2652 PetscErrorCode MatMultAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
2653 {
2654   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2655   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2656   const PetscScalar *x,*v;
2657   PetscScalar       *y,*sums;
2658   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2659   PetscInt          n,i,jrow,j,dof = b->dof,k;
2660 
2661   PetscFunctionBegin;
2662   if (yy != zz) PetscCall(VecCopy(yy,zz));
2663   PetscCall(VecGetArrayRead(xx,&x));
2664   PetscCall(VecGetArray(zz,&y));
2665   idx  = a->j;
2666   v    = a->a;
2667   ii   = a->i;
2668 
2669   for (i=0; i<m; i++) {
2670     jrow = ii[i];
2671     n    = ii[i+1] - jrow;
2672     sums = y + dof*i;
2673     for (j=0; j<n; j++) {
2674       for (k=0; k<dof; k++) {
2675         sums[k] += v[jrow]*x[dof*idx[jrow]+k];
2676       }
2677       jrow++;
2678     }
2679   }
2680 
2681   PetscCall(PetscLogFlops(2.0*dof*a->nz));
2682   PetscCall(VecRestoreArrayRead(xx,&x));
2683   PetscCall(VecRestoreArray(zz,&y));
2684   PetscFunctionReturn(0);
2685 }
2686 
2687 PetscErrorCode MatMultTranspose_SeqMAIJ_N(Mat A,Vec xx,Vec yy)
2688 {
2689   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2690   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2691   const PetscScalar *x,*v,*alpha;
2692   PetscScalar       *y;
2693   const PetscInt    m = b->AIJ->rmap->n,*idx,dof = b->dof;
2694   PetscInt          n,i,k;
2695 
2696   PetscFunctionBegin;
2697   PetscCall(VecGetArrayRead(xx,&x));
2698   PetscCall(VecSet(yy,0.0));
2699   PetscCall(VecGetArray(yy,&y));
2700   for (i=0; i<m; i++) {
2701     idx   = a->j + a->i[i];
2702     v     = a->a + a->i[i];
2703     n     = a->i[i+1] - a->i[i];
2704     alpha = x + dof*i;
2705     while (n-->0) {
2706       for (k=0; k<dof; k++) {
2707         y[dof*(*idx)+k] += alpha[k]*(*v);
2708       }
2709       idx++; v++;
2710     }
2711   }
2712   PetscCall(PetscLogFlops(2.0*dof*a->nz));
2713   PetscCall(VecRestoreArrayRead(xx,&x));
2714   PetscCall(VecRestoreArray(yy,&y));
2715   PetscFunctionReturn(0);
2716 }
2717 
2718 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
2719 {
2720   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2721   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2722   const PetscScalar *x,*v,*alpha;
2723   PetscScalar       *y;
2724   const PetscInt    m = b->AIJ->rmap->n,*idx,dof = b->dof;
2725   PetscInt          n,i,k;
2726 
2727   PetscFunctionBegin;
2728   if (yy != zz) PetscCall(VecCopy(yy,zz));
2729   PetscCall(VecGetArrayRead(xx,&x));
2730   PetscCall(VecGetArray(zz,&y));
2731   for (i=0; i<m; i++) {
2732     idx   = a->j + a->i[i];
2733     v     = a->a + a->i[i];
2734     n     = a->i[i+1] - a->i[i];
2735     alpha = x + dof*i;
2736     while (n-->0) {
2737       for (k=0; k<dof; k++) {
2738         y[dof*(*idx)+k] += alpha[k]*(*v);
2739       }
2740       idx++; v++;
2741     }
2742   }
2743   PetscCall(PetscLogFlops(2.0*dof*a->nz));
2744   PetscCall(VecRestoreArrayRead(xx,&x));
2745   PetscCall(VecRestoreArray(zz,&y));
2746   PetscFunctionReturn(0);
2747 }
2748 
2749 /*===================================================================================*/
2750 PetscErrorCode MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2751 {
2752   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2753 
2754   PetscFunctionBegin;
2755   /* start the scatter */
2756   PetscCall(VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD));
2757   PetscCall((*b->AIJ->ops->mult)(b->AIJ,xx,yy));
2758   PetscCall(VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD));
2759   PetscCall((*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy));
2760   PetscFunctionReturn(0);
2761 }
2762 
2763 PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2764 {
2765   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2766 
2767   PetscFunctionBegin;
2768   PetscCall((*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w));
2769   PetscCall((*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy));
2770   PetscCall(VecScatterBegin(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE));
2771   PetscCall(VecScatterEnd(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE));
2772   PetscFunctionReturn(0);
2773 }
2774 
2775 PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
2776 {
2777   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2778 
2779   PetscFunctionBegin;
2780   /* start the scatter */
2781   PetscCall(VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD));
2782   PetscCall((*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz));
2783   PetscCall(VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD));
2784   PetscCall((*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz));
2785   PetscFunctionReturn(0);
2786 }
2787 
2788 PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
2789 {
2790   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2791 
2792   PetscFunctionBegin;
2793   PetscCall((*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w));
2794   PetscCall((*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz));
2795   PetscCall(VecScatterBegin(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE));
2796   PetscCall(VecScatterEnd(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE));
2797   PetscFunctionReturn(0);
2798 }
2799 
2800 /* ----------------------------------------------------------------*/
2801 PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqMAIJ(Mat C)
2802 {
2803   Mat_Product    *product = C->product;
2804 
2805   PetscFunctionBegin;
2806   if (product->type == MATPRODUCT_PtAP) {
2807     C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ;
2808   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Mat Product type %s is not supported for SeqAIJ and SeqMAIJ matrices",MatProductTypes[product->type]);
2809   PetscFunctionReturn(0);
2810 }
2811 
2812 PetscErrorCode MatProductSetFromOptions_MPIAIJ_MPIMAIJ(Mat C)
2813 {
2814   Mat_Product    *product = C->product;
2815   PetscBool      flg = PETSC_FALSE;
2816   Mat            A=product->A,P=product->B;
2817   PetscInt       alg=1; /* set default algorithm */
2818 #if !defined(PETSC_HAVE_HYPRE)
2819   const char     *algTypes[4] = {"scalable","nonscalable","allatonce","allatonce_merged"};
2820   PetscInt       nalg=4;
2821 #else
2822   const char     *algTypes[5] = {"scalable","nonscalable","allatonce","allatonce_merged","hypre"};
2823   PetscInt       nalg=5;
2824 #endif
2825 
2826   PetscFunctionBegin;
2827   PetscCheck(product->type == MATPRODUCT_PtAP,PETSC_COMM_SELF,PETSC_ERR_SUP,"Mat Product type %s is not supported for MPIAIJ and MPIMAIJ matrices",MatProductTypes[product->type]);
2828 
2829   /* PtAP */
2830   /* Check matrix local sizes */
2831   PetscCheck(A->rmap->rstart == P->rmap->rstart && A->rmap->rend == P->rmap->rend,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, Arow (%" PetscInt_FMT ", %" PetscInt_FMT ") != Prow (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->rmap->rstart,A->rmap->rend,P->rmap->rstart,P->rmap->rend);
2832   PetscCheck(A->cmap->rstart == P->rmap->rstart && A->cmap->rend == P->rmap->rend,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, Acol (%" PetscInt_FMT ", %" PetscInt_FMT ") != Prow (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,P->rmap->rstart,P->rmap->rend);
2833 
2834   /* Set the default algorithm */
2835   PetscCall(PetscStrcmp(C->product->alg,"default",&flg));
2836   if (flg) {
2837     PetscCall(MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]));
2838   }
2839 
2840   /* Get runtime option */
2841   PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_PtAP","Mat");
2842   PetscCall(PetscOptionsEList("-mat_product_algorithm","Algorithmic approach","MatPtAP",algTypes,nalg,algTypes[alg],&alg,&flg));
2843   if (flg) {
2844     PetscCall(MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]));
2845   }
2846   PetscOptionsEnd();
2847 
2848   PetscCall(PetscStrcmp(C->product->alg,"allatonce",&flg));
2849   if (flg) {
2850     C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ;
2851     PetscFunctionReturn(0);
2852   }
2853 
2854   PetscCall(PetscStrcmp(C->product->alg,"allatonce_merged",&flg));
2855   if (flg) {
2856     C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ;
2857     PetscFunctionReturn(0);
2858   }
2859 
2860   /* Convert P from MAIJ to AIJ matrix since implementation not available for MAIJ */
2861   PetscCall(PetscInfo((PetscObject)A,"Converting from MAIJ to AIJ matrix since implementation not available for MAIJ\n"));
2862   PetscCall(MatConvert(P,MATMPIAIJ,MAT_INPLACE_MATRIX,&P));
2863   PetscCall(MatProductSetFromOptions(C));
2864   PetscFunctionReturn(0);
2865 }
2866 
2867 /* ----------------------------------------------------------------*/
2868 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,PetscReal fill,Mat C)
2869 {
2870   PetscFreeSpaceList free_space=NULL,current_space=NULL;
2871   Mat_SeqMAIJ        *pp       =(Mat_SeqMAIJ*)PP->data;
2872   Mat                P         =pp->AIJ;
2873   Mat_SeqAIJ         *a        =(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
2874   PetscInt           *pti,*ptj,*ptJ;
2875   PetscInt           *ci,*cj,*ptadenserow,*ptasparserow,*denserow,*sparserow,*ptaj;
2876   const PetscInt     an=A->cmap->N,am=A->rmap->N,pn=P->cmap->N,pm=P->rmap->N,ppdof=pp->dof;
2877   PetscInt           i,j,k,dof,pshift,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,cn;
2878   MatScalar          *ca;
2879   const PetscInt     *pi = p->i,*pj = p->j,*pjj,*ai=a->i,*aj=a->j,*ajj;
2880 
2881   PetscFunctionBegin;
2882   /* Get ij structure of P^T */
2883   PetscCall(MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj));
2884 
2885   cn = pn*ppdof;
2886   /* Allocate ci array, arrays for fill computation and */
2887   /* free space for accumulating nonzero column info */
2888   PetscCall(PetscMalloc1(cn+1,&ci));
2889   ci[0] = 0;
2890 
2891   /* Work arrays for rows of P^T*A */
2892   PetscCall(PetscMalloc4(an,&ptadenserow,an,&ptasparserow,cn,&denserow,cn,&sparserow));
2893   PetscCall(PetscArrayzero(ptadenserow,an));
2894   PetscCall(PetscArrayzero(denserow,cn));
2895 
2896   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
2897   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
2898   /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */
2899   PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(ai[am]/pm,pn),&free_space));
2900   current_space = free_space;
2901 
2902   /* Determine symbolic info for each row of C: */
2903   for (i=0; i<pn; i++) {
2904     ptnzi = pti[i+1] - pti[i];
2905     ptJ   = ptj + pti[i];
2906     for (dof=0; dof<ppdof; dof++) {
2907       ptanzi = 0;
2908       /* Determine symbolic row of PtA: */
2909       for (j=0; j<ptnzi; j++) {
2910         /* Expand ptJ[j] by block size and shift by dof to get the right row of A */
2911         arow = ptJ[j]*ppdof + dof;
2912         /* Nonzeros of P^T*A will be in same locations as any element of A in that row */
2913         anzj = ai[arow+1] - ai[arow];
2914         ajj  = aj + ai[arow];
2915         for (k=0; k<anzj; k++) {
2916           if (!ptadenserow[ajj[k]]) {
2917             ptadenserow[ajj[k]]    = -1;
2918             ptasparserow[ptanzi++] = ajj[k];
2919           }
2920         }
2921       }
2922       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
2923       ptaj = ptasparserow;
2924       cnzi = 0;
2925       for (j=0; j<ptanzi; j++) {
2926         /* Get offset within block of P */
2927         pshift = *ptaj%ppdof;
2928         /* Get block row of P */
2929         prow = (*ptaj++)/ppdof; /* integer division */
2930         /* P has same number of nonzeros per row as the compressed form */
2931         pnzj = pi[prow+1] - pi[prow];
2932         pjj  = pj + pi[prow];
2933         for (k=0;k<pnzj;k++) {
2934           /* Locations in C are shifted by the offset within the block */
2935           /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */
2936           if (!denserow[pjj[k]*ppdof+pshift]) {
2937             denserow[pjj[k]*ppdof+pshift] = -1;
2938             sparserow[cnzi++]             = pjj[k]*ppdof+pshift;
2939           }
2940         }
2941       }
2942 
2943       /* sort sparserow */
2944       PetscCall(PetscSortInt(cnzi,sparserow));
2945 
2946       /* If free space is not available, make more free space */
2947       /* Double the amount of total space in the list */
2948       if (current_space->local_remaining<cnzi) {
2949         PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(cnzi,current_space->total_array_size),&current_space));
2950       }
2951 
2952       /* Copy data into free space, and zero out denserows */
2953       PetscCall(PetscArraycpy(current_space->array,sparserow,cnzi));
2954 
2955       current_space->array           += cnzi;
2956       current_space->local_used      += cnzi;
2957       current_space->local_remaining -= cnzi;
2958 
2959       for (j=0; j<ptanzi; j++) ptadenserow[ptasparserow[j]] = 0;
2960       for (j=0; j<cnzi; j++) denserow[sparserow[j]] = 0;
2961 
2962       /* Aside: Perhaps we should save the pta info for the numerical factorization. */
2963       /*        For now, we will recompute what is needed. */
2964       ci[i*ppdof+1+dof] = ci[i*ppdof+dof] + cnzi;
2965     }
2966   }
2967   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
2968   /* Allocate space for cj, initialize cj, and */
2969   /* destroy list of free space and other temporary array(s) */
2970   PetscCall(PetscMalloc1(ci[cn]+1,&cj));
2971   PetscCall(PetscFreeSpaceContiguous(&free_space,cj));
2972   PetscCall(PetscFree4(ptadenserow,ptasparserow,denserow,sparserow));
2973 
2974   /* Allocate space for ca */
2975   PetscCall(PetscCalloc1(ci[cn]+1,&ca));
2976 
2977   /* put together the new matrix */
2978   PetscCall(MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A),cn,cn,ci,cj,ca,NULL,C));
2979   PetscCall(MatSetBlockSize(C,pp->dof));
2980 
2981   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
2982   /* Since these are PETSc arrays, change flags to free them as necessary. */
2983   c          = (Mat_SeqAIJ*)(C->data);
2984   c->free_a  = PETSC_TRUE;
2985   c->free_ij = PETSC_TRUE;
2986   c->nonew   = 0;
2987 
2988   C->ops->ptapnumeric    = MatPtAPNumeric_SeqAIJ_SeqMAIJ;
2989   C->ops->productnumeric = MatProductNumeric_PtAP;
2990 
2991   /* Clean up. */
2992   PetscCall(MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj));
2993   PetscFunctionReturn(0);
2994 }
2995 
2996 PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat C)
2997 {
2998   /* This routine requires testing -- first draft only */
2999   Mat_SeqMAIJ     *pp=(Mat_SeqMAIJ*)PP->data;
3000   Mat             P  =pp->AIJ;
3001   Mat_SeqAIJ      *a = (Mat_SeqAIJ*) A->data;
3002   Mat_SeqAIJ      *p = (Mat_SeqAIJ*) P->data;
3003   Mat_SeqAIJ      *c = (Mat_SeqAIJ*) C->data;
3004   const PetscInt  *ai=a->i,*aj=a->j,*pi=p->i,*pj=p->j,*pJ,*pjj;
3005   const PetscInt  *ci=c->i,*cj=c->j,*cjj;
3006   const PetscInt  am =A->rmap->N,cn=C->cmap->N,cm=C->rmap->N,ppdof=pp->dof;
3007   PetscInt        i,j,k,pshift,poffset,anzi,pnzi,apnzj,nextap,pnzj,prow,crow,*apj,*apjdense;
3008   const MatScalar *aa=a->a,*pa=p->a,*pA,*paj;
3009   MatScalar       *ca=c->a,*caj,*apa;
3010 
3011   PetscFunctionBegin;
3012   /* Allocate temporary array for storage of one row of A*P */
3013   PetscCall(PetscCalloc3(cn,&apa,cn,&apj,cn,&apjdense));
3014 
3015   /* Clear old values in C */
3016   PetscCall(PetscArrayzero(ca,ci[cm]));
3017 
3018   for (i=0; i<am; i++) {
3019     /* Form sparse row of A*P */
3020     anzi  = ai[i+1] - ai[i];
3021     apnzj = 0;
3022     for (j=0; j<anzi; j++) {
3023       /* Get offset within block of P */
3024       pshift = *aj%ppdof;
3025       /* Get block row of P */
3026       prow = *aj++/ppdof;   /* integer division */
3027       pnzj = pi[prow+1] - pi[prow];
3028       pjj  = pj + pi[prow];
3029       paj  = pa + pi[prow];
3030       for (k=0; k<pnzj; k++) {
3031         poffset = pjj[k]*ppdof+pshift;
3032         if (!apjdense[poffset]) {
3033           apjdense[poffset] = -1;
3034           apj[apnzj++]      = poffset;
3035         }
3036         apa[poffset] += (*aa)*paj[k];
3037       }
3038       PetscCall(PetscLogFlops(2.0*pnzj));
3039       aa++;
3040     }
3041 
3042     /* Sort the j index array for quick sparse axpy. */
3043     /* Note: a array does not need sorting as it is in dense storage locations. */
3044     PetscCall(PetscSortInt(apnzj,apj));
3045 
3046     /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
3047     prow    = i/ppdof; /* integer division */
3048     pshift  = i%ppdof;
3049     poffset = pi[prow];
3050     pnzi    = pi[prow+1] - poffset;
3051     /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */
3052     pJ = pj+poffset;
3053     pA = pa+poffset;
3054     for (j=0; j<pnzi; j++) {
3055       crow = (*pJ)*ppdof+pshift;
3056       cjj  = cj + ci[crow];
3057       caj  = ca + ci[crow];
3058       pJ++;
3059       /* Perform sparse axpy operation.  Note cjj includes apj. */
3060       for (k=0,nextap=0; nextap<apnzj; k++) {
3061         if (cjj[k] == apj[nextap]) caj[k] += (*pA)*apa[apj[nextap++]];
3062       }
3063       PetscCall(PetscLogFlops(2.0*apnzj));
3064       pA++;
3065     }
3066 
3067     /* Zero the current row info for A*P */
3068     for (j=0; j<apnzj; j++) {
3069       apa[apj[j]]      = 0.;
3070       apjdense[apj[j]] = 0;
3071     }
3072   }
3073 
3074   /* Assemble the final matrix and clean up */
3075   PetscCall(MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY));
3076   PetscCall(MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY));
3077   PetscCall(PetscFree3(apa,apj,apjdense));
3078   PetscFunctionReturn(0);
3079 }
3080 
3081 PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ(Mat C)
3082 {
3083   Mat_Product         *product = C->product;
3084   Mat                 A=product->A,P=product->B;
3085 
3086   PetscFunctionBegin;
3087   PetscCall(MatPtAPSymbolic_SeqAIJ_SeqMAIJ(A,P,product->fill,C));
3088   PetscFunctionReturn(0);
3089 }
3090 
3091 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C)
3092 {
3093   PetscFunctionBegin;
3094   SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MatPtAPSymbolic is not implemented for MPIMAIJ matrix yet");
3095 }
3096 
3097 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ(Mat A,Mat PP,Mat C)
3098 {
3099   PetscFunctionBegin;
3100   SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MatPtAPNumeric is not implemented for MPIMAIJ matrix yet");
3101 }
3102 
3103 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(Mat,Mat,PetscInt,Mat);
3104 
3105 PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce(Mat A,Mat P,Mat C)
3106 {
3107   Mat_MPIMAIJ     *maij = (Mat_MPIMAIJ*)P->data;
3108 
3109   PetscFunctionBegin;
3110 
3111   PetscCall(MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(A,maij->A,maij->dof,C));
3112   PetscFunctionReturn(0);
3113 }
3114 
3115 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(Mat,Mat,PetscInt,PetscReal,Mat);
3116 
3117 PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(Mat A,Mat P,PetscReal fill,Mat C)
3118 {
3119   Mat_MPIMAIJ     *maij = (Mat_MPIMAIJ*)P->data;
3120 
3121   PetscFunctionBegin;
3122   PetscCall(MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(A,maij->A,maij->dof,fill,C));
3123   C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce;
3124   PetscFunctionReturn(0);
3125 }
3126 
3127 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(Mat,Mat,PetscInt,Mat);
3128 
3129 PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A,Mat P,Mat C)
3130 {
3131   Mat_MPIMAIJ     *maij = (Mat_MPIMAIJ*)P->data;
3132 
3133   PetscFunctionBegin;
3134 
3135   PetscCall(MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(A,maij->A,maij->dof,C));
3136   PetscFunctionReturn(0);
3137 }
3138 
3139 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(Mat,Mat,PetscInt,PetscReal,Mat);
3140 
3141 PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A,Mat P,PetscReal fill,Mat C)
3142 {
3143   Mat_MPIMAIJ     *maij = (Mat_MPIMAIJ*)P->data;
3144 
3145   PetscFunctionBegin;
3146 
3147   PetscCall(MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(A,maij->A,maij->dof,fill,C));
3148   C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged;
3149   PetscFunctionReturn(0);
3150 }
3151 
3152 PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ(Mat C)
3153 {
3154   Mat_Product         *product = C->product;
3155   Mat                 A=product->A,P=product->B;
3156   PetscBool           flg;
3157 
3158   PetscFunctionBegin;
3159   PetscCall(PetscStrcmp(product->alg,"allatonce",&flg));
3160   if (flg) {
3161     PetscCall(MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(A,P,product->fill,C));
3162     C->ops->productnumeric = MatProductNumeric_PtAP;
3163     PetscFunctionReturn(0);
3164   }
3165 
3166   PetscCall(PetscStrcmp(product->alg,"allatonce_merged",&flg));
3167   if (flg) {
3168     PetscCall(MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(A,P,product->fill,C));
3169     C->ops->productnumeric = MatProductNumeric_PtAP;
3170     PetscFunctionReturn(0);
3171   }
3172 
3173   SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_SUP,"Mat Product Algorithm is not supported");
3174 }
3175 
3176 PETSC_INTERN PetscErrorCode MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
3177 {
3178   Mat_SeqMAIJ    *b   = (Mat_SeqMAIJ*)A->data;
3179   Mat            a    = b->AIJ,B;
3180   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)a->data;
3181   PetscInt       m,n,i,ncols,*ilen,nmax = 0,*icols,j,k,ii,dof = b->dof;
3182   PetscInt       *cols;
3183   PetscScalar    *vals;
3184 
3185   PetscFunctionBegin;
3186   PetscCall(MatGetSize(a,&m,&n));
3187   PetscCall(PetscMalloc1(dof*m,&ilen));
3188   for (i=0; i<m; i++) {
3189     nmax = PetscMax(nmax,aij->ilen[i]);
3190     for (j=0; j<dof; j++) ilen[dof*i+j] = aij->ilen[i];
3191   }
3192   PetscCall(MatCreate(PETSC_COMM_SELF,&B));
3193   PetscCall(MatSetSizes(B,dof*m,dof*n,dof*m,dof*n));
3194   PetscCall(MatSetType(B,newtype));
3195   PetscCall(MatSeqAIJSetPreallocation(B,0,ilen));
3196   PetscCall(PetscFree(ilen));
3197   PetscCall(PetscMalloc1(nmax,&icols));
3198   ii   = 0;
3199   for (i=0; i<m; i++) {
3200     PetscCall(MatGetRow_SeqAIJ(a,i,&ncols,&cols,&vals));
3201     for (j=0; j<dof; j++) {
3202       for (k=0; k<ncols; k++) icols[k] = dof*cols[k]+j;
3203       PetscCall(MatSetValues_SeqAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES));
3204       ii++;
3205     }
3206     PetscCall(MatRestoreRow_SeqAIJ(a,i,&ncols,&cols,&vals));
3207   }
3208   PetscCall(PetscFree(icols));
3209   PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY));
3210   PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY));
3211 
3212   if (reuse == MAT_INPLACE_MATRIX) {
3213     PetscCall(MatHeaderReplace(A,&B));
3214   } else {
3215     *newmat = B;
3216   }
3217   PetscFunctionReturn(0);
3218 }
3219 
3220 #include <../src/mat/impls/aij/mpi/mpiaij.h>
3221 
3222 PETSC_INTERN PetscErrorCode MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
3223 {
3224   Mat_MPIMAIJ    *maij   = (Mat_MPIMAIJ*)A->data;
3225   Mat            MatAIJ  = ((Mat_SeqMAIJ*)maij->AIJ->data)->AIJ,B;
3226   Mat            MatOAIJ = ((Mat_SeqMAIJ*)maij->OAIJ->data)->AIJ;
3227   Mat_SeqAIJ     *AIJ    = (Mat_SeqAIJ*) MatAIJ->data;
3228   Mat_SeqAIJ     *OAIJ   =(Mat_SeqAIJ*) MatOAIJ->data;
3229   Mat_MPIAIJ     *mpiaij = (Mat_MPIAIJ*) maij->A->data;
3230   PetscInt       dof     = maij->dof,i,j,*dnz = NULL,*onz = NULL,nmax = 0,onmax = 0;
3231   PetscInt       *oicols = NULL,*icols = NULL,ncols,*cols = NULL,oncols,*ocols = NULL;
3232   PetscInt       rstart,cstart,*garray,ii,k;
3233   PetscScalar    *vals,*ovals;
3234 
3235   PetscFunctionBegin;
3236   PetscCall(PetscMalloc2(A->rmap->n,&dnz,A->rmap->n,&onz));
3237   for (i=0; i<A->rmap->n/dof; i++) {
3238     nmax  = PetscMax(nmax,AIJ->ilen[i]);
3239     onmax = PetscMax(onmax,OAIJ->ilen[i]);
3240     for (j=0; j<dof; j++) {
3241       dnz[dof*i+j] = AIJ->ilen[i];
3242       onz[dof*i+j] = OAIJ->ilen[i];
3243     }
3244   }
3245   PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B));
3246   PetscCall(MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N));
3247   PetscCall(MatSetType(B,newtype));
3248   PetscCall(MatMPIAIJSetPreallocation(B,0,dnz,0,onz));
3249   PetscCall(MatSetBlockSize(B,dof));
3250   PetscCall(PetscFree2(dnz,onz));
3251 
3252   PetscCall(PetscMalloc2(nmax,&icols,onmax,&oicols));
3253   rstart = dof*maij->A->rmap->rstart;
3254   cstart = dof*maij->A->cmap->rstart;
3255   garray = mpiaij->garray;
3256 
3257   ii = rstart;
3258   for (i=0; i<A->rmap->n/dof; i++) {
3259     PetscCall(MatGetRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals));
3260     PetscCall(MatGetRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals));
3261     for (j=0; j<dof; j++) {
3262       for (k=0; k<ncols; k++) {
3263         icols[k] = cstart + dof*cols[k]+j;
3264       }
3265       for (k=0; k<oncols; k++) {
3266         oicols[k] = dof*garray[ocols[k]]+j;
3267       }
3268       PetscCall(MatSetValues_MPIAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES));
3269       PetscCall(MatSetValues_MPIAIJ(B,1,&ii,oncols,oicols,ovals,INSERT_VALUES));
3270       ii++;
3271     }
3272     PetscCall(MatRestoreRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals));
3273     PetscCall(MatRestoreRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals));
3274   }
3275   PetscCall(PetscFree2(icols,oicols));
3276 
3277   PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY));
3278   PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY));
3279 
3280   if (reuse == MAT_INPLACE_MATRIX) {
3281     PetscInt refct = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */
3282     ((PetscObject)A)->refct = 1;
3283 
3284     PetscCall(MatHeaderReplace(A,&B));
3285 
3286     ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */
3287   } else {
3288     *newmat = B;
3289   }
3290   PetscFunctionReturn(0);
3291 }
3292 
3293 PetscErrorCode MatCreateSubMatrix_MAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat)
3294 {
3295   Mat            A;
3296 
3297   PetscFunctionBegin;
3298   PetscCall(MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A));
3299   PetscCall(MatCreateSubMatrix(A,isrow,iscol,cll,newmat));
3300   PetscCall(MatDestroy(&A));
3301   PetscFunctionReturn(0);
3302 }
3303 
3304 PetscErrorCode MatCreateSubMatrices_MAIJ(Mat mat,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *submat[])
3305 {
3306   Mat            A;
3307 
3308   PetscFunctionBegin;
3309   PetscCall(MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A));
3310   PetscCall(MatCreateSubMatrices(A,n,irow,icol,scall,submat));
3311   PetscCall(MatDestroy(&A));
3312   PetscFunctionReturn(0);
3313 }
3314 
3315 /* ---------------------------------------------------------------------------------- */
3316 /*@
3317   MatCreateMAIJ - Creates a matrix type providing restriction and interpolation
3318   operations for multicomponent problems.  It interpolates each component the same
3319   way independently.  The matrix type is based on MATSEQAIJ for sequential matrices,
3320   and MATMPIAIJ for distributed matrices.
3321 
3322   Collective
3323 
3324   Input Parameters:
3325 + A - the AIJ matrix describing the action on blocks
3326 - dof - the block size (number of components per node)
3327 
3328   Output Parameter:
3329 . maij - the new MAIJ matrix
3330 
3331   Operations provided:
3332 .vb
3333     MatMult
3334     MatMultTranspose
3335     MatMultAdd
3336     MatMultTransposeAdd
3337     MatView
3338 .ve
3339 
3340   Level: advanced
3341 
3342 .seealso: `MatMAIJGetAIJ()`, `MatMAIJRedimension()`, `MATMAIJ`
3343 @*/
3344 PetscErrorCode  MatCreateMAIJ(Mat A,PetscInt dof,Mat *maij)
3345 {
3346   PetscMPIInt    size;
3347   PetscInt       n;
3348   Mat            B;
3349 #if defined(PETSC_HAVE_CUDA)
3350   /* hack to prevent conversion to AIJ format for CUDA when used inside a parallel MAIJ */
3351   PetscBool      convert = dof < 0 ? PETSC_FALSE : PETSC_TRUE;
3352 #endif
3353 
3354   PetscFunctionBegin;
3355   dof  = PetscAbs(dof);
3356   PetscCall(PetscObjectReference((PetscObject)A));
3357 
3358   if (dof == 1) *maij = A;
3359   else {
3360     PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B));
3361     /* propagate vec type */
3362     PetscCall(MatSetVecType(B,A->defaultvectype));
3363     PetscCall(MatSetSizes(B,dof*A->rmap->n,dof*A->cmap->n,dof*A->rmap->N,dof*A->cmap->N));
3364     PetscCall(PetscLayoutSetBlockSize(B->rmap,dof));
3365     PetscCall(PetscLayoutSetBlockSize(B->cmap,dof));
3366     PetscCall(PetscLayoutSetUp(B->rmap));
3367     PetscCall(PetscLayoutSetUp(B->cmap));
3368 
3369     B->assembled = PETSC_TRUE;
3370 
3371     PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size));
3372     if (size == 1) {
3373       Mat_SeqMAIJ *b;
3374 
3375       PetscCall(MatSetType(B,MATSEQMAIJ));
3376 
3377       B->ops->setup   = NULL;
3378       B->ops->destroy = MatDestroy_SeqMAIJ;
3379       B->ops->view    = MatView_SeqMAIJ;
3380 
3381       b               = (Mat_SeqMAIJ*)B->data;
3382       b->dof          = dof;
3383       b->AIJ          = A;
3384 
3385       if (dof == 2) {
3386         B->ops->mult             = MatMult_SeqMAIJ_2;
3387         B->ops->multadd          = MatMultAdd_SeqMAIJ_2;
3388         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_2;
3389         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2;
3390       } else if (dof == 3) {
3391         B->ops->mult             = MatMult_SeqMAIJ_3;
3392         B->ops->multadd          = MatMultAdd_SeqMAIJ_3;
3393         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_3;
3394         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3;
3395       } else if (dof == 4) {
3396         B->ops->mult             = MatMult_SeqMAIJ_4;
3397         B->ops->multadd          = MatMultAdd_SeqMAIJ_4;
3398         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_4;
3399         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4;
3400       } else if (dof == 5) {
3401         B->ops->mult             = MatMult_SeqMAIJ_5;
3402         B->ops->multadd          = MatMultAdd_SeqMAIJ_5;
3403         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_5;
3404         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5;
3405       } else if (dof == 6) {
3406         B->ops->mult             = MatMult_SeqMAIJ_6;
3407         B->ops->multadd          = MatMultAdd_SeqMAIJ_6;
3408         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_6;
3409         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6;
3410       } else if (dof == 7) {
3411         B->ops->mult             = MatMult_SeqMAIJ_7;
3412         B->ops->multadd          = MatMultAdd_SeqMAIJ_7;
3413         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_7;
3414         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7;
3415       } else if (dof == 8) {
3416         B->ops->mult             = MatMult_SeqMAIJ_8;
3417         B->ops->multadd          = MatMultAdd_SeqMAIJ_8;
3418         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_8;
3419         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8;
3420       } else if (dof == 9) {
3421         B->ops->mult             = MatMult_SeqMAIJ_9;
3422         B->ops->multadd          = MatMultAdd_SeqMAIJ_9;
3423         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_9;
3424         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9;
3425       } else if (dof == 10) {
3426         B->ops->mult             = MatMult_SeqMAIJ_10;
3427         B->ops->multadd          = MatMultAdd_SeqMAIJ_10;
3428         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_10;
3429         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10;
3430       } else if (dof == 11) {
3431         B->ops->mult             = MatMult_SeqMAIJ_11;
3432         B->ops->multadd          = MatMultAdd_SeqMAIJ_11;
3433         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_11;
3434         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_11;
3435       } else if (dof == 16) {
3436         B->ops->mult             = MatMult_SeqMAIJ_16;
3437         B->ops->multadd          = MatMultAdd_SeqMAIJ_16;
3438         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_16;
3439         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16;
3440       } else if (dof == 18) {
3441         B->ops->mult             = MatMult_SeqMAIJ_18;
3442         B->ops->multadd          = MatMultAdd_SeqMAIJ_18;
3443         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_18;
3444         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18;
3445       } else {
3446         B->ops->mult             = MatMult_SeqMAIJ_N;
3447         B->ops->multadd          = MatMultAdd_SeqMAIJ_N;
3448         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_N;
3449         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_N;
3450       }
3451 #if defined(PETSC_HAVE_CUDA)
3452       PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqmaij_seqaijcusparse_C",MatConvert_SeqMAIJ_SeqAIJ));
3453 #endif
3454       PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqmaij_seqaij_C",MatConvert_SeqMAIJ_SeqAIJ));
3455       PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_seqaij_seqmaij_C",MatProductSetFromOptions_SeqAIJ_SeqMAIJ));
3456     } else {
3457       Mat_MPIAIJ  *mpiaij = (Mat_MPIAIJ*)A->data;
3458       Mat_MPIMAIJ *b;
3459       IS          from,to;
3460       Vec         gvec;
3461 
3462       PetscCall(MatSetType(B,MATMPIMAIJ));
3463 
3464       B->ops->setup            = NULL;
3465       B->ops->destroy          = MatDestroy_MPIMAIJ;
3466       B->ops->view             = MatView_MPIMAIJ;
3467 
3468       b      = (Mat_MPIMAIJ*)B->data;
3469       b->dof = dof;
3470       b->A   = A;
3471 
3472       PetscCall(MatCreateMAIJ(mpiaij->A,-dof,&b->AIJ));
3473       PetscCall(MatCreateMAIJ(mpiaij->B,-dof,&b->OAIJ));
3474 
3475       PetscCall(VecGetSize(mpiaij->lvec,&n));
3476       PetscCall(VecCreate(PETSC_COMM_SELF,&b->w));
3477       PetscCall(VecSetSizes(b->w,n*dof,n*dof));
3478       PetscCall(VecSetBlockSize(b->w,dof));
3479       PetscCall(VecSetType(b->w,VECSEQ));
3480 
3481       /* create two temporary Index sets for build scatter gather */
3482       PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)A),dof,n,mpiaij->garray,PETSC_COPY_VALUES,&from));
3483       PetscCall(ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to));
3484 
3485       /* create temporary global vector to generate scatter context */
3486       PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),dof,dof*A->cmap->n,dof*A->cmap->N,NULL,&gvec));
3487 
3488       /* generate the scatter context */
3489       PetscCall(VecScatterCreate(gvec,from,b->w,to,&b->ctx));
3490 
3491       PetscCall(ISDestroy(&from));
3492       PetscCall(ISDestroy(&to));
3493       PetscCall(VecDestroy(&gvec));
3494 
3495       B->ops->mult             = MatMult_MPIMAIJ_dof;
3496       B->ops->multtranspose    = MatMultTranspose_MPIMAIJ_dof;
3497       B->ops->multadd          = MatMultAdd_MPIMAIJ_dof;
3498       B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof;
3499 
3500 #if defined(PETSC_HAVE_CUDA)
3501       PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpimaij_mpiaijcusparse_C",MatConvert_MPIMAIJ_MPIAIJ));
3502 #endif
3503       PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpimaij_mpiaij_C",MatConvert_MPIMAIJ_MPIAIJ));
3504       PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpimaij_C",MatProductSetFromOptions_MPIAIJ_MPIMAIJ));
3505     }
3506     B->ops->createsubmatrix   = MatCreateSubMatrix_MAIJ;
3507     B->ops->createsubmatrices = MatCreateSubMatrices_MAIJ;
3508     PetscCall(MatSetUp(B));
3509 #if defined(PETSC_HAVE_CUDA)
3510     /* temporary until we have CUDA implementation of MAIJ */
3511     {
3512       PetscBool flg;
3513       if (convert) {
3514         PetscCall(PetscObjectTypeCompareAny((PetscObject)A,&flg,MATSEQAIJCUSPARSE,MATMPIAIJCUSPARSE,MATAIJCUSPARSE,""));
3515         if (flg) {
3516           PetscCall(MatConvert(B,((PetscObject)A)->type_name,MAT_INPLACE_MATRIX,&B));
3517         }
3518       }
3519     }
3520 #endif
3521     *maij = B;
3522   }
3523   PetscFunctionReturn(0);
3524 }
3525