xref: /petsc/src/mat/impls/maij/maij.c (revision d45987f3cb8da7dca4e7c09f0fedfc3d8f6afad7)
1 
2 /*
3     Defines the basic matrix operations for the MAIJ  matrix storage format.
4   This format is used for restriction and interpolation operations for
5   multicomponent problems. It interpolates each component the same way
6   independently.
7 
8      We provide:
9          MatMult()
10          MatMultTranspose()
11          MatMultTransposeAdd()
12          MatMultAdd()
13           and
14          MatCreateMAIJ(Mat,dof,Mat*)
15 
16      This single directory handles both the sequential and parallel codes
17 */
18 
19 #include <../src/mat/impls/maij/maij.h> /*I "petscmat.h" I*/
20 #include <../src/mat/utils/freespace.h>
21 #include <petsc-private/vecimpl.h>
22 
23 #undef __FUNCT__
24 #define __FUNCT__ "MatMAIJGetAIJ"
25 /*@C
26    MatMAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the MAIJ matrix
27 
28    Not Collective, but if the MAIJ matrix is parallel, the AIJ matrix is also parallel
29 
30    Input Parameter:
31 .  A - the MAIJ matrix
32 
33    Output Parameter:
34 .  B - the AIJ matrix
35 
36    Level: advanced
37 
38    Notes: The reference count on the AIJ matrix is not increased so you should not destroy it.
39 
40 .seealso: MatCreateMAIJ()
41 @*/
42 PetscErrorCode  MatMAIJGetAIJ(Mat A,Mat *B)
43 {
44   PetscErrorCode ierr;
45   PetscBool      ismpimaij,isseqmaij;
46 
47   PetscFunctionBegin;
48   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);CHKERRQ(ierr);
49   ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);CHKERRQ(ierr);
50   if (ismpimaij) {
51     Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
52 
53     *B = b->A;
54   } else if (isseqmaij) {
55     Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
56 
57     *B = b->AIJ;
58   } else {
59     *B = A;
60   }
61   PetscFunctionReturn(0);
62 }
63 
64 #undef __FUNCT__
65 #define __FUNCT__ "MatMAIJRedimension"
66 /*@C
67    MatMAIJRedimension - Get an MAIJ matrix with the same action, but for a different block size
68 
69    Logically Collective
70 
71    Input Parameter:
72 +  A - the MAIJ matrix
73 -  dof - the block size for the new matrix
74 
75    Output Parameter:
76 .  B - the new MAIJ matrix
77 
78    Level: advanced
79 
80 .seealso: MatCreateMAIJ()
81 @*/
82 PetscErrorCode  MatMAIJRedimension(Mat A,PetscInt dof,Mat *B)
83 {
84   PetscErrorCode ierr;
85   Mat            Aij = PETSC_NULL;
86 
87   PetscFunctionBegin;
88   PetscValidLogicalCollectiveInt(A,dof,2);
89   ierr = MatMAIJGetAIJ(A,&Aij);CHKERRQ(ierr);
90   ierr = MatCreateMAIJ(Aij,dof,B);CHKERRQ(ierr);
91   PetscFunctionReturn(0);
92 }
93 
94 #undef __FUNCT__
95 #define __FUNCT__ "MatDestroy_SeqMAIJ"
96 PetscErrorCode MatDestroy_SeqMAIJ(Mat A)
97 {
98   PetscErrorCode ierr;
99   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
100 
101   PetscFunctionBegin;
102   ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr);
103   ierr = PetscFree(A->data);CHKERRQ(ierr);
104   PetscFunctionReturn(0);
105 }
106 
107 #undef __FUNCT__
108 #define __FUNCT__ "MatSetUp_MAIJ"
109 PetscErrorCode MatSetUp_MAIJ(Mat A)
110 {
111   PetscFunctionBegin;
112   SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Must use MatCreateMAIJ() to create MAIJ matrices");
113   PetscFunctionReturn(0);
114 }
115 
116 #undef __FUNCT__
117 #define __FUNCT__ "MatView_SeqMAIJ"
118 PetscErrorCode MatView_SeqMAIJ(Mat A,PetscViewer viewer)
119 {
120   PetscErrorCode ierr;
121   Mat            B;
122 
123   PetscFunctionBegin;
124   ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);
125   ierr = MatView(B,viewer);CHKERRQ(ierr);
126   ierr = MatDestroy(&B);CHKERRQ(ierr);
127   PetscFunctionReturn(0);
128 }
129 
130 #undef __FUNCT__
131 #define __FUNCT__ "MatView_MPIMAIJ"
132 PetscErrorCode MatView_MPIMAIJ(Mat A,PetscViewer viewer)
133 {
134   PetscErrorCode ierr;
135   Mat            B;
136 
137   PetscFunctionBegin;
138   ierr = MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);
139   ierr = MatView(B,viewer);CHKERRQ(ierr);
140   ierr = MatDestroy(&B);CHKERRQ(ierr);
141   PetscFunctionReturn(0);
142 }
143 
144 #undef __FUNCT__
145 #define __FUNCT__ "MatDestroy_MPIMAIJ"
146 PetscErrorCode MatDestroy_MPIMAIJ(Mat A)
147 {
148   PetscErrorCode ierr;
149   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
150 
151   PetscFunctionBegin;
152   ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr);
153   ierr = MatDestroy(&b->OAIJ);CHKERRQ(ierr);
154   ierr = MatDestroy(&b->A);CHKERRQ(ierr);
155   ierr = VecScatterDestroy(&b->ctx);CHKERRQ(ierr);
156   ierr = VecDestroy(&b->w);CHKERRQ(ierr);
157   ierr = PetscFree(A->data);CHKERRQ(ierr);
158   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
159   PetscFunctionReturn(0);
160 }
161 
162 /*MC
163   MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for
164   multicomponent problems, interpolating or restricting each component the same way independently.
165   The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices.
166 
167   Operations provided:
168 . MatMult
169 . MatMultTranspose
170 . MatMultAdd
171 . MatMultTransposeAdd
172 
173   Level: advanced
174 
175 .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MatCreateMAIJ()
176 M*/
177 
178 EXTERN_C_BEGIN
179 #undef __FUNCT__
180 #define __FUNCT__ "MatCreate_MAIJ"
181 PetscErrorCode  MatCreate_MAIJ(Mat A)
182 {
183   PetscErrorCode ierr;
184   Mat_MPIMAIJ    *b;
185   PetscMPIInt    size;
186 
187   PetscFunctionBegin;
188   ierr     = PetscNewLog(A,Mat_MPIMAIJ,&b);CHKERRQ(ierr);
189   A->data  = (void*)b;
190   ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
191   A->ops->setup = MatSetUp_MAIJ;
192 
193   b->AIJ  = 0;
194   b->dof  = 0;
195   b->OAIJ = 0;
196   b->ctx  = 0;
197   b->w    = 0;
198   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
199   if (size == 1){
200     ierr = PetscObjectChangeTypeName((PetscObject)A,MATSEQMAIJ);CHKERRQ(ierr);
201   } else {
202     ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIMAIJ);CHKERRQ(ierr);
203   }
204   PetscFunctionReturn(0);
205 }
206 EXTERN_C_END
207 
208 /* --------------------------------------------------------------------------------------*/
209 #undef __FUNCT__
210 #define __FUNCT__ "MatMult_SeqMAIJ_2"
211 PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
212 {
213   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
214   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
215   const PetscScalar *x,*v;
216   PetscScalar       *y, sum1, sum2;
217   PetscErrorCode    ierr;
218   PetscInt          nonzerorow=0,n,i,jrow,j;
219   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
220 
221   PetscFunctionBegin;
222   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
223   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
224   idx  = a->j;
225   v    = a->a;
226   ii   = a->i;
227 
228   for (i=0; i<m; i++) {
229     jrow = ii[i];
230     n    = ii[i+1] - jrow;
231     sum1  = 0.0;
232     sum2  = 0.0;
233     nonzerorow += (n>0);
234     for (j=0; j<n; j++) {
235       sum1 += v[jrow]*x[2*idx[jrow]];
236       sum2 += v[jrow]*x[2*idx[jrow]+1];
237       jrow++;
238      }
239     y[2*i]   = sum1;
240     y[2*i+1] = sum2;
241   }
242 
243   ierr = PetscLogFlops(4.0*a->nz - 2.0*nonzerorow);CHKERRQ(ierr);
244   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
245   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
246   PetscFunctionReturn(0);
247 }
248 
249 #undef __FUNCT__
250 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_2"
251 PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
252 {
253   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
254   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
255   const PetscScalar *x,*v;
256   PetscScalar       *y,alpha1,alpha2;
257   PetscErrorCode    ierr;
258   PetscInt          n,i;
259   const PetscInt    m = b->AIJ->rmap->n,*idx;
260 
261   PetscFunctionBegin;
262   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
263   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
264   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
265 
266   for (i=0; i<m; i++) {
267     idx    = a->j + a->i[i] ;
268     v      = a->a + a->i[i] ;
269     n      = a->i[i+1] - a->i[i];
270     alpha1 = x[2*i];
271     alpha2 = x[2*i+1];
272     while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
273   }
274   ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr);
275   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
276   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
277   PetscFunctionReturn(0);
278 }
279 
280 #undef __FUNCT__
281 #define __FUNCT__ "MatMultAdd_SeqMAIJ_2"
282 PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
283 {
284   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
285   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
286   const PetscScalar *x,*v;
287   PetscScalar       *y,sum1, sum2;
288   PetscErrorCode    ierr;
289   PetscInt          n,i,jrow,j;
290   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
291 
292   PetscFunctionBegin;
293   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
294   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
295   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
296   idx  = a->j;
297   v    = a->a;
298   ii   = a->i;
299 
300   for (i=0; i<m; i++) {
301     jrow = ii[i];
302     n    = ii[i+1] - jrow;
303     sum1  = 0.0;
304     sum2  = 0.0;
305     for (j=0; j<n; j++) {
306       sum1 += v[jrow]*x[2*idx[jrow]];
307       sum2 += v[jrow]*x[2*idx[jrow]+1];
308       jrow++;
309      }
310     y[2*i]   += sum1;
311     y[2*i+1] += sum2;
312   }
313 
314   ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr);
315   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
316   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
317   PetscFunctionReturn(0);
318 }
319 #undef __FUNCT__
320 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_2"
321 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
322 {
323   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
324   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
325   const PetscScalar *x,*v;
326   PetscScalar       *y,alpha1,alpha2;
327   PetscErrorCode    ierr;
328   PetscInt          n,i;
329   const PetscInt    m = b->AIJ->rmap->n,*idx;
330 
331   PetscFunctionBegin;
332   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
333   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
334   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
335 
336   for (i=0; i<m; i++) {
337     idx   = a->j + a->i[i] ;
338     v     = a->a + a->i[i] ;
339     n     = a->i[i+1] - a->i[i];
340     alpha1 = x[2*i];
341     alpha2 = x[2*i+1];
342     while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
343   }
344   ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr);
345   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
346   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
347   PetscFunctionReturn(0);
348 }
349 /* --------------------------------------------------------------------------------------*/
350 #undef __FUNCT__
351 #define __FUNCT__ "MatMult_SeqMAIJ_3"
352 PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
353 {
354   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
355   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
356   const PetscScalar *x,*v;
357   PetscScalar       *y,sum1, sum2, sum3;
358   PetscErrorCode    ierr;
359   PetscInt          nonzerorow=0,n,i,jrow,j;
360   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
361 
362   PetscFunctionBegin;
363   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
364   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
365   idx  = a->j;
366   v    = a->a;
367   ii   = a->i;
368 
369   for (i=0; i<m; i++) {
370     jrow = ii[i];
371     n    = ii[i+1] - jrow;
372     sum1  = 0.0;
373     sum2  = 0.0;
374     sum3  = 0.0;
375     nonzerorow += (n>0);
376     for (j=0; j<n; j++) {
377       sum1 += v[jrow]*x[3*idx[jrow]];
378       sum2 += v[jrow]*x[3*idx[jrow]+1];
379       sum3 += v[jrow]*x[3*idx[jrow]+2];
380       jrow++;
381      }
382     y[3*i]   = sum1;
383     y[3*i+1] = sum2;
384     y[3*i+2] = sum3;
385   }
386 
387   ierr = PetscLogFlops(6.0*a->nz - 3.0*nonzerorow);CHKERRQ(ierr);
388   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
389   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
390   PetscFunctionReturn(0);
391 }
392 
393 #undef __FUNCT__
394 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_3"
395 PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
396 {
397   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
398   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
399   const PetscScalar *x,*v;
400   PetscScalar       *y,alpha1,alpha2,alpha3;
401   PetscErrorCode    ierr;
402   PetscInt          n,i;
403   const PetscInt    m = b->AIJ->rmap->n,*idx;
404 
405   PetscFunctionBegin;
406   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
407   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
408   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
409 
410   for (i=0; i<m; i++) {
411     idx    = a->j + a->i[i];
412     v      = a->a + a->i[i];
413     n      = a->i[i+1] - a->i[i];
414     alpha1 = x[3*i];
415     alpha2 = x[3*i+1];
416     alpha3 = x[3*i+2];
417     while (n-->0) {
418       y[3*(*idx)]   += alpha1*(*v);
419       y[3*(*idx)+1] += alpha2*(*v);
420       y[3*(*idx)+2] += alpha3*(*v);
421       idx++; v++;
422     }
423   }
424   ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr);
425   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
426   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
427   PetscFunctionReturn(0);
428 }
429 
430 #undef __FUNCT__
431 #define __FUNCT__ "MatMultAdd_SeqMAIJ_3"
432 PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
433 {
434   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
435   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
436   const PetscScalar *x,*v;
437   PetscScalar       *y,sum1, sum2, sum3;
438   PetscErrorCode    ierr;
439   PetscInt          n,i,jrow,j;
440   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
441 
442   PetscFunctionBegin;
443   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
444   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
445   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
446   idx  = a->j;
447   v    = a->a;
448   ii   = a->i;
449 
450   for (i=0; i<m; i++) {
451     jrow = ii[i];
452     n    = ii[i+1] - jrow;
453     sum1  = 0.0;
454     sum2  = 0.0;
455     sum3  = 0.0;
456     for (j=0; j<n; j++) {
457       sum1 += v[jrow]*x[3*idx[jrow]];
458       sum2 += v[jrow]*x[3*idx[jrow]+1];
459       sum3 += v[jrow]*x[3*idx[jrow]+2];
460       jrow++;
461      }
462     y[3*i]   += sum1;
463     y[3*i+1] += sum2;
464     y[3*i+2] += sum3;
465   }
466 
467   ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr);
468   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
469   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
470   PetscFunctionReturn(0);
471 }
472 #undef __FUNCT__
473 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_3"
474 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
475 {
476   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
477   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
478   const PetscScalar *x,*v;
479   PetscScalar       *y,alpha1,alpha2,alpha3;
480   PetscErrorCode    ierr;
481   PetscInt          n,i;
482   const PetscInt    m = b->AIJ->rmap->n,*idx;
483 
484   PetscFunctionBegin;
485   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
486   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
487   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
488   for (i=0; i<m; i++) {
489     idx    = a->j + a->i[i] ;
490     v      = a->a + a->i[i] ;
491     n      = a->i[i+1] - a->i[i];
492     alpha1 = x[3*i];
493     alpha2 = x[3*i+1];
494     alpha3 = x[3*i+2];
495     while (n-->0) {
496       y[3*(*idx)]   += alpha1*(*v);
497       y[3*(*idx)+1] += alpha2*(*v);
498       y[3*(*idx)+2] += alpha3*(*v);
499       idx++; v++;
500     }
501   }
502   ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr);
503   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
504   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
505   PetscFunctionReturn(0);
506 }
507 
508 /* ------------------------------------------------------------------------------*/
509 #undef __FUNCT__
510 #define __FUNCT__ "MatMult_SeqMAIJ_4"
511 PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
512 {
513   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
514   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
515   const PetscScalar *x,*v;
516   PetscScalar       *y,sum1, sum2, sum3, sum4;
517   PetscErrorCode    ierr;
518   PetscInt          nonzerorow=0,n,i,jrow,j;
519   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
520 
521   PetscFunctionBegin;
522   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
523   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
524   idx  = a->j;
525   v    = a->a;
526   ii   = a->i;
527 
528   for (i=0; i<m; i++) {
529     jrow = ii[i];
530     n    = ii[i+1] - jrow;
531     sum1  = 0.0;
532     sum2  = 0.0;
533     sum3  = 0.0;
534     sum4  = 0.0;
535     nonzerorow += (n>0);
536     for (j=0; j<n; j++) {
537       sum1 += v[jrow]*x[4*idx[jrow]];
538       sum2 += v[jrow]*x[4*idx[jrow]+1];
539       sum3 += v[jrow]*x[4*idx[jrow]+2];
540       sum4 += v[jrow]*x[4*idx[jrow]+3];
541       jrow++;
542      }
543     y[4*i]   = sum1;
544     y[4*i+1] = sum2;
545     y[4*i+2] = sum3;
546     y[4*i+3] = sum4;
547   }
548 
549   ierr = PetscLogFlops(8.0*a->nz - 4.0*nonzerorow);CHKERRQ(ierr);
550   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
551   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
552   PetscFunctionReturn(0);
553 }
554 
555 #undef __FUNCT__
556 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_4"
557 PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
558 {
559   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
560   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
561   const PetscScalar *x,*v;
562   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4;
563   PetscErrorCode    ierr;
564   PetscInt          n,i;
565   const PetscInt    m = b->AIJ->rmap->n,*idx;
566 
567   PetscFunctionBegin;
568   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
569   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
570   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
571   for (i=0; i<m; i++) {
572     idx    = a->j + a->i[i] ;
573     v      = a->a + a->i[i] ;
574     n      = a->i[i+1] - a->i[i];
575     alpha1 = x[4*i];
576     alpha2 = x[4*i+1];
577     alpha3 = x[4*i+2];
578     alpha4 = x[4*i+3];
579     while (n-->0) {
580       y[4*(*idx)]   += alpha1*(*v);
581       y[4*(*idx)+1] += alpha2*(*v);
582       y[4*(*idx)+2] += alpha3*(*v);
583       y[4*(*idx)+3] += alpha4*(*v);
584       idx++; v++;
585     }
586   }
587   ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr);
588   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
589   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
590   PetscFunctionReturn(0);
591 }
592 
593 #undef __FUNCT__
594 #define __FUNCT__ "MatMultAdd_SeqMAIJ_4"
595 PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
596 {
597   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
598   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
599   const PetscScalar *x,*v;
600   PetscScalar       *y,sum1, sum2, sum3, sum4;
601   PetscErrorCode    ierr;
602   PetscInt          n,i,jrow,j;
603   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
604 
605   PetscFunctionBegin;
606   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
607   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
608   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
609   idx  = a->j;
610   v    = a->a;
611   ii   = a->i;
612 
613   for (i=0; i<m; i++) {
614     jrow = ii[i];
615     n    = ii[i+1] - jrow;
616     sum1  = 0.0;
617     sum2  = 0.0;
618     sum3  = 0.0;
619     sum4  = 0.0;
620     for (j=0; j<n; j++) {
621       sum1 += v[jrow]*x[4*idx[jrow]];
622       sum2 += v[jrow]*x[4*idx[jrow]+1];
623       sum3 += v[jrow]*x[4*idx[jrow]+2];
624       sum4 += v[jrow]*x[4*idx[jrow]+3];
625       jrow++;
626      }
627     y[4*i]   += sum1;
628     y[4*i+1] += sum2;
629     y[4*i+2] += sum3;
630     y[4*i+3] += sum4;
631   }
632 
633   ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr);
634   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
635   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
636   PetscFunctionReturn(0);
637 }
638 #undef __FUNCT__
639 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_4"
640 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
641 {
642   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
643   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
644   const PetscScalar *x,*v;
645   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4;
646   PetscErrorCode    ierr;
647   PetscInt          n,i;
648   const PetscInt    m = b->AIJ->rmap->n,*idx;
649 
650   PetscFunctionBegin;
651   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
652   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
653   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
654 
655   for (i=0; i<m; i++) {
656     idx    = a->j + a->i[i] ;
657     v      = a->a + a->i[i] ;
658     n      = a->i[i+1] - a->i[i];
659     alpha1 = x[4*i];
660     alpha2 = x[4*i+1];
661     alpha3 = x[4*i+2];
662     alpha4 = x[4*i+3];
663     while (n-->0) {
664       y[4*(*idx)]   += alpha1*(*v);
665       y[4*(*idx)+1] += alpha2*(*v);
666       y[4*(*idx)+2] += alpha3*(*v);
667       y[4*(*idx)+3] += alpha4*(*v);
668       idx++; v++;
669     }
670   }
671   ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr);
672   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
673   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
674   PetscFunctionReturn(0);
675 }
676 /* ------------------------------------------------------------------------------*/
677 
678 #undef __FUNCT__
679 #define __FUNCT__ "MatMult_SeqMAIJ_5"
680 PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
681 {
682   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
683   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
684   const PetscScalar *x,*v;
685   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5;
686   PetscErrorCode    ierr;
687   PetscInt          nonzerorow=0,n,i,jrow,j;
688   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
689 
690   PetscFunctionBegin;
691   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
692   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
693   idx  = a->j;
694   v    = a->a;
695   ii   = a->i;
696 
697   for (i=0; i<m; i++) {
698     jrow = ii[i];
699     n    = ii[i+1] - jrow;
700     sum1  = 0.0;
701     sum2  = 0.0;
702     sum3  = 0.0;
703     sum4  = 0.0;
704     sum5  = 0.0;
705     nonzerorow += (n>0);
706     for (j=0; j<n; j++) {
707       sum1 += v[jrow]*x[5*idx[jrow]];
708       sum2 += v[jrow]*x[5*idx[jrow]+1];
709       sum3 += v[jrow]*x[5*idx[jrow]+2];
710       sum4 += v[jrow]*x[5*idx[jrow]+3];
711       sum5 += v[jrow]*x[5*idx[jrow]+4];
712       jrow++;
713      }
714     y[5*i]   = sum1;
715     y[5*i+1] = sum2;
716     y[5*i+2] = sum3;
717     y[5*i+3] = sum4;
718     y[5*i+4] = sum5;
719   }
720 
721   ierr = PetscLogFlops(10.0*a->nz - 5.0*nonzerorow);CHKERRQ(ierr);
722   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
723   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
724   PetscFunctionReturn(0);
725 }
726 
727 #undef __FUNCT__
728 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_5"
729 PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
730 {
731   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
732   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
733   const PetscScalar *x,*v;
734   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5;
735   PetscErrorCode    ierr;
736   PetscInt          n,i;
737   const PetscInt    m = b->AIJ->rmap->n,*idx;
738 
739   PetscFunctionBegin;
740   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
741   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
742   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
743 
744   for (i=0; i<m; i++) {
745     idx    = a->j + a->i[i] ;
746     v      = a->a + a->i[i] ;
747     n      = a->i[i+1] - a->i[i];
748     alpha1 = x[5*i];
749     alpha2 = x[5*i+1];
750     alpha3 = x[5*i+2];
751     alpha4 = x[5*i+3];
752     alpha5 = x[5*i+4];
753     while (n-->0) {
754       y[5*(*idx)]   += alpha1*(*v);
755       y[5*(*idx)+1] += alpha2*(*v);
756       y[5*(*idx)+2] += alpha3*(*v);
757       y[5*(*idx)+3] += alpha4*(*v);
758       y[5*(*idx)+4] += alpha5*(*v);
759       idx++; v++;
760     }
761   }
762   ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr);
763   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
764   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
765   PetscFunctionReturn(0);
766 }
767 
768 #undef __FUNCT__
769 #define __FUNCT__ "MatMultAdd_SeqMAIJ_5"
770 PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
771 {
772   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
773   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
774   const PetscScalar *x,*v;
775   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5;
776   PetscErrorCode    ierr;
777   PetscInt          n,i,jrow,j;
778   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
779 
780   PetscFunctionBegin;
781   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
782   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
783   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
784   idx  = a->j;
785   v    = a->a;
786   ii   = a->i;
787 
788   for (i=0; i<m; i++) {
789     jrow = ii[i];
790     n    = ii[i+1] - jrow;
791     sum1  = 0.0;
792     sum2  = 0.0;
793     sum3  = 0.0;
794     sum4  = 0.0;
795     sum5  = 0.0;
796     for (j=0; j<n; j++) {
797       sum1 += v[jrow]*x[5*idx[jrow]];
798       sum2 += v[jrow]*x[5*idx[jrow]+1];
799       sum3 += v[jrow]*x[5*idx[jrow]+2];
800       sum4 += v[jrow]*x[5*idx[jrow]+3];
801       sum5 += v[jrow]*x[5*idx[jrow]+4];
802       jrow++;
803      }
804     y[5*i]   += sum1;
805     y[5*i+1] += sum2;
806     y[5*i+2] += sum3;
807     y[5*i+3] += sum4;
808     y[5*i+4] += sum5;
809   }
810 
811   ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr);
812   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
813   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
814   PetscFunctionReturn(0);
815 }
816 
817 #undef __FUNCT__
818 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_5"
819 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
820 {
821   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
822   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
823   const PetscScalar *x,*v;
824   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5;
825   PetscErrorCode    ierr;
826   PetscInt          n,i;
827   const PetscInt    m = b->AIJ->rmap->n,*idx;
828 
829   PetscFunctionBegin;
830   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
831   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
832   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
833 
834   for (i=0; i<m; i++) {
835     idx    = a->j + a->i[i] ;
836     v      = a->a + a->i[i] ;
837     n      = a->i[i+1] - a->i[i];
838     alpha1 = x[5*i];
839     alpha2 = x[5*i+1];
840     alpha3 = x[5*i+2];
841     alpha4 = x[5*i+3];
842     alpha5 = x[5*i+4];
843     while (n-->0) {
844       y[5*(*idx)]   += alpha1*(*v);
845       y[5*(*idx)+1] += alpha2*(*v);
846       y[5*(*idx)+2] += alpha3*(*v);
847       y[5*(*idx)+3] += alpha4*(*v);
848       y[5*(*idx)+4] += alpha5*(*v);
849       idx++; v++;
850     }
851   }
852   ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr);
853   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
854   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
855   PetscFunctionReturn(0);
856 }
857 
858 /* ------------------------------------------------------------------------------*/
859 #undef __FUNCT__
860 #define __FUNCT__ "MatMult_SeqMAIJ_6"
861 PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
862 {
863   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
864   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
865   const PetscScalar *x,*v;
866   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6;
867   PetscErrorCode    ierr;
868   PetscInt          nonzerorow=0,n,i,jrow,j;
869   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
870 
871   PetscFunctionBegin;
872   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
873   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
874   idx  = a->j;
875   v    = a->a;
876   ii   = a->i;
877 
878   for (i=0; i<m; i++) {
879     jrow = ii[i];
880     n    = ii[i+1] - jrow;
881     sum1  = 0.0;
882     sum2  = 0.0;
883     sum3  = 0.0;
884     sum4  = 0.0;
885     sum5  = 0.0;
886     sum6  = 0.0;
887     nonzerorow += (n>0);
888     for (j=0; j<n; j++) {
889       sum1 += v[jrow]*x[6*idx[jrow]];
890       sum2 += v[jrow]*x[6*idx[jrow]+1];
891       sum3 += v[jrow]*x[6*idx[jrow]+2];
892       sum4 += v[jrow]*x[6*idx[jrow]+3];
893       sum5 += v[jrow]*x[6*idx[jrow]+4];
894       sum6 += v[jrow]*x[6*idx[jrow]+5];
895       jrow++;
896      }
897     y[6*i]   = sum1;
898     y[6*i+1] = sum2;
899     y[6*i+2] = sum3;
900     y[6*i+3] = sum4;
901     y[6*i+4] = sum5;
902     y[6*i+5] = sum6;
903   }
904 
905   ierr = PetscLogFlops(12.0*a->nz - 6.0*nonzerorow);CHKERRQ(ierr);
906   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
907   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
908   PetscFunctionReturn(0);
909 }
910 
911 #undef __FUNCT__
912 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_6"
913 PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
914 {
915   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
916   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
917   const PetscScalar *x,*v;
918   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
919   PetscErrorCode    ierr;
920   PetscInt          n,i;
921   const PetscInt    m = b->AIJ->rmap->n,*idx;
922 
923   PetscFunctionBegin;
924   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
925   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
926   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
927 
928   for (i=0; i<m; i++) {
929     idx    = a->j + a->i[i] ;
930     v      = a->a + a->i[i] ;
931     n      = a->i[i+1] - a->i[i];
932     alpha1 = x[6*i];
933     alpha2 = x[6*i+1];
934     alpha3 = x[6*i+2];
935     alpha4 = x[6*i+3];
936     alpha5 = x[6*i+4];
937     alpha6 = x[6*i+5];
938     while (n-->0) {
939       y[6*(*idx)]   += alpha1*(*v);
940       y[6*(*idx)+1] += alpha2*(*v);
941       y[6*(*idx)+2] += alpha3*(*v);
942       y[6*(*idx)+3] += alpha4*(*v);
943       y[6*(*idx)+4] += alpha5*(*v);
944       y[6*(*idx)+5] += alpha6*(*v);
945       idx++; v++;
946     }
947   }
948   ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr);
949   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
950   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
951   PetscFunctionReturn(0);
952 }
953 
954 #undef __FUNCT__
955 #define __FUNCT__ "MatMultAdd_SeqMAIJ_6"
956 PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
957 {
958   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
959   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
960   const PetscScalar *x,*v;
961   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6;
962   PetscErrorCode    ierr;
963   PetscInt          n,i,jrow,j;
964   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
965 
966   PetscFunctionBegin;
967   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
968   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
969   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
970   idx  = a->j;
971   v    = a->a;
972   ii   = a->i;
973 
974   for (i=0; i<m; i++) {
975     jrow = ii[i];
976     n    = ii[i+1] - jrow;
977     sum1  = 0.0;
978     sum2  = 0.0;
979     sum3  = 0.0;
980     sum4  = 0.0;
981     sum5  = 0.0;
982     sum6  = 0.0;
983     for (j=0; j<n; j++) {
984       sum1 += v[jrow]*x[6*idx[jrow]];
985       sum2 += v[jrow]*x[6*idx[jrow]+1];
986       sum3 += v[jrow]*x[6*idx[jrow]+2];
987       sum4 += v[jrow]*x[6*idx[jrow]+3];
988       sum5 += v[jrow]*x[6*idx[jrow]+4];
989       sum6 += v[jrow]*x[6*idx[jrow]+5];
990       jrow++;
991      }
992     y[6*i]   += sum1;
993     y[6*i+1] += sum2;
994     y[6*i+2] += sum3;
995     y[6*i+3] += sum4;
996     y[6*i+4] += sum5;
997     y[6*i+5] += sum6;
998   }
999 
1000   ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr);
1001   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1002   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1003   PetscFunctionReturn(0);
1004 }
1005 
1006 #undef __FUNCT__
1007 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_6"
1008 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
1009 {
1010   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1011   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1012   const PetscScalar *x,*v;
1013   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
1014   PetscErrorCode    ierr;
1015   PetscInt          n,i;
1016   const PetscInt    m = b->AIJ->rmap->n,*idx;
1017 
1018   PetscFunctionBegin;
1019   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1020   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1021   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1022 
1023   for (i=0; i<m; i++) {
1024     idx    = a->j + a->i[i] ;
1025     v      = a->a + a->i[i] ;
1026     n      = a->i[i+1] - a->i[i];
1027     alpha1 = x[6*i];
1028     alpha2 = x[6*i+1];
1029     alpha3 = x[6*i+2];
1030     alpha4 = x[6*i+3];
1031     alpha5 = x[6*i+4];
1032     alpha6 = x[6*i+5];
1033     while (n-->0) {
1034       y[6*(*idx)]   += alpha1*(*v);
1035       y[6*(*idx)+1] += alpha2*(*v);
1036       y[6*(*idx)+2] += alpha3*(*v);
1037       y[6*(*idx)+3] += alpha4*(*v);
1038       y[6*(*idx)+4] += alpha5*(*v);
1039       y[6*(*idx)+5] += alpha6*(*v);
1040       idx++; v++;
1041     }
1042   }
1043   ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr);
1044   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1045   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1046   PetscFunctionReturn(0);
1047 }
1048 
1049 /* ------------------------------------------------------------------------------*/
1050 #undef __FUNCT__
1051 #define __FUNCT__ "MatMult_SeqMAIJ_7"
1052 PetscErrorCode MatMult_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
1053 {
1054   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1055   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1056   const PetscScalar *x,*v;
1057   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1058   PetscErrorCode    ierr;
1059   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1060   PetscInt          nonzerorow=0,n,i,jrow,j;
1061 
1062   PetscFunctionBegin;
1063   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1064   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1065   idx  = a->j;
1066   v    = a->a;
1067   ii   = a->i;
1068 
1069   for (i=0; i<m; i++) {
1070     jrow = ii[i];
1071     n    = ii[i+1] - jrow;
1072     sum1  = 0.0;
1073     sum2  = 0.0;
1074     sum3  = 0.0;
1075     sum4  = 0.0;
1076     sum5  = 0.0;
1077     sum6  = 0.0;
1078     sum7  = 0.0;
1079     nonzerorow += (n>0);
1080     for (j=0; j<n; j++) {
1081       sum1 += v[jrow]*x[7*idx[jrow]];
1082       sum2 += v[jrow]*x[7*idx[jrow]+1];
1083       sum3 += v[jrow]*x[7*idx[jrow]+2];
1084       sum4 += v[jrow]*x[7*idx[jrow]+3];
1085       sum5 += v[jrow]*x[7*idx[jrow]+4];
1086       sum6 += v[jrow]*x[7*idx[jrow]+5];
1087       sum7 += v[jrow]*x[7*idx[jrow]+6];
1088       jrow++;
1089      }
1090     y[7*i]   = sum1;
1091     y[7*i+1] = sum2;
1092     y[7*i+2] = sum3;
1093     y[7*i+3] = sum4;
1094     y[7*i+4] = sum5;
1095     y[7*i+5] = sum6;
1096     y[7*i+6] = sum7;
1097   }
1098 
1099   ierr = PetscLogFlops(14.0*a->nz - 7.0*nonzerorow);CHKERRQ(ierr);
1100   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1101   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1102   PetscFunctionReturn(0);
1103 }
1104 
1105 #undef __FUNCT__
1106 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_7"
1107 PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
1108 {
1109   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1110   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1111   const PetscScalar *x,*v;
1112   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1113   PetscErrorCode    ierr;
1114   const PetscInt    m = b->AIJ->rmap->n,*idx;
1115   PetscInt          n,i;
1116 
1117   PetscFunctionBegin;
1118   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
1119   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1120   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1121 
1122   for (i=0; i<m; i++) {
1123     idx    = a->j + a->i[i] ;
1124     v      = a->a + a->i[i] ;
1125     n      = a->i[i+1] - a->i[i];
1126     alpha1 = x[7*i];
1127     alpha2 = x[7*i+1];
1128     alpha3 = x[7*i+2];
1129     alpha4 = x[7*i+3];
1130     alpha5 = x[7*i+4];
1131     alpha6 = x[7*i+5];
1132     alpha7 = x[7*i+6];
1133     while (n-->0) {
1134       y[7*(*idx)]   += alpha1*(*v);
1135       y[7*(*idx)+1] += alpha2*(*v);
1136       y[7*(*idx)+2] += alpha3*(*v);
1137       y[7*(*idx)+3] += alpha4*(*v);
1138       y[7*(*idx)+4] += alpha5*(*v);
1139       y[7*(*idx)+5] += alpha6*(*v);
1140       y[7*(*idx)+6] += alpha7*(*v);
1141       idx++; v++;
1142     }
1143   }
1144   ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr);
1145   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1146   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1147   PetscFunctionReturn(0);
1148 }
1149 
1150 #undef __FUNCT__
1151 #define __FUNCT__ "MatMultAdd_SeqMAIJ_7"
1152 PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1153 {
1154   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1155   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1156   const PetscScalar *x,*v;
1157   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1158   PetscErrorCode    ierr;
1159   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1160   PetscInt          n,i,jrow,j;
1161 
1162   PetscFunctionBegin;
1163   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1164   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1165   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1166   idx  = a->j;
1167   v    = a->a;
1168   ii   = a->i;
1169 
1170   for (i=0; i<m; i++) {
1171     jrow = ii[i];
1172     n    = ii[i+1] - jrow;
1173     sum1  = 0.0;
1174     sum2  = 0.0;
1175     sum3  = 0.0;
1176     sum4  = 0.0;
1177     sum5  = 0.0;
1178     sum6  = 0.0;
1179     sum7  = 0.0;
1180     for (j=0; j<n; j++) {
1181       sum1 += v[jrow]*x[7*idx[jrow]];
1182       sum2 += v[jrow]*x[7*idx[jrow]+1];
1183       sum3 += v[jrow]*x[7*idx[jrow]+2];
1184       sum4 += v[jrow]*x[7*idx[jrow]+3];
1185       sum5 += v[jrow]*x[7*idx[jrow]+4];
1186       sum6 += v[jrow]*x[7*idx[jrow]+5];
1187       sum7 += v[jrow]*x[7*idx[jrow]+6];
1188       jrow++;
1189      }
1190     y[7*i]   += sum1;
1191     y[7*i+1] += sum2;
1192     y[7*i+2] += sum3;
1193     y[7*i+3] += sum4;
1194     y[7*i+4] += sum5;
1195     y[7*i+5] += sum6;
1196     y[7*i+6] += sum7;
1197   }
1198 
1199   ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr);
1200   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1201   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1202   PetscFunctionReturn(0);
1203 }
1204 
1205 #undef __FUNCT__
1206 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_7"
1207 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1208 {
1209   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1210   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1211   const PetscScalar *x,*v;
1212   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1213   PetscErrorCode    ierr;
1214   const PetscInt    m = b->AIJ->rmap->n,*idx;
1215   PetscInt          n,i;
1216 
1217   PetscFunctionBegin;
1218   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1219   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1220   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1221   for (i=0; i<m; i++) {
1222     idx    = a->j + a->i[i] ;
1223     v      = a->a + a->i[i] ;
1224     n      = a->i[i+1] - a->i[i];
1225     alpha1 = x[7*i];
1226     alpha2 = x[7*i+1];
1227     alpha3 = x[7*i+2];
1228     alpha4 = x[7*i+3];
1229     alpha5 = x[7*i+4];
1230     alpha6 = x[7*i+5];
1231     alpha7 = x[7*i+6];
1232     while (n-->0) {
1233       y[7*(*idx)]   += alpha1*(*v);
1234       y[7*(*idx)+1] += alpha2*(*v);
1235       y[7*(*idx)+2] += alpha3*(*v);
1236       y[7*(*idx)+3] += alpha4*(*v);
1237       y[7*(*idx)+4] += alpha5*(*v);
1238       y[7*(*idx)+5] += alpha6*(*v);
1239       y[7*(*idx)+6] += alpha7*(*v);
1240       idx++; v++;
1241     }
1242   }
1243   ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr);
1244   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1245   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1246   PetscFunctionReturn(0);
1247 }
1248 
1249 #undef __FUNCT__
1250 #define __FUNCT__ "MatMult_SeqMAIJ_8"
1251 PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1252 {
1253   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1254   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1255   const PetscScalar *x,*v;
1256   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1257   PetscErrorCode    ierr;
1258   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1259   PetscInt          nonzerorow=0,n,i,jrow,j;
1260 
1261   PetscFunctionBegin;
1262   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1263   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1264   idx  = a->j;
1265   v    = a->a;
1266   ii   = a->i;
1267 
1268   for (i=0; i<m; i++) {
1269     jrow = ii[i];
1270     n    = ii[i+1] - jrow;
1271     sum1  = 0.0;
1272     sum2  = 0.0;
1273     sum3  = 0.0;
1274     sum4  = 0.0;
1275     sum5  = 0.0;
1276     sum6  = 0.0;
1277     sum7  = 0.0;
1278     sum8  = 0.0;
1279     nonzerorow += (n>0);
1280     for (j=0; j<n; j++) {
1281       sum1 += v[jrow]*x[8*idx[jrow]];
1282       sum2 += v[jrow]*x[8*idx[jrow]+1];
1283       sum3 += v[jrow]*x[8*idx[jrow]+2];
1284       sum4 += v[jrow]*x[8*idx[jrow]+3];
1285       sum5 += v[jrow]*x[8*idx[jrow]+4];
1286       sum6 += v[jrow]*x[8*idx[jrow]+5];
1287       sum7 += v[jrow]*x[8*idx[jrow]+6];
1288       sum8 += v[jrow]*x[8*idx[jrow]+7];
1289       jrow++;
1290      }
1291     y[8*i]   = sum1;
1292     y[8*i+1] = sum2;
1293     y[8*i+2] = sum3;
1294     y[8*i+3] = sum4;
1295     y[8*i+4] = sum5;
1296     y[8*i+5] = sum6;
1297     y[8*i+6] = sum7;
1298     y[8*i+7] = sum8;
1299   }
1300 
1301   ierr = PetscLogFlops(16.0*a->nz - 8.0*nonzerorow);CHKERRQ(ierr);
1302   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1303   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1304   PetscFunctionReturn(0);
1305 }
1306 
1307 #undef __FUNCT__
1308 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_8"
1309 PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1310 {
1311   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1312   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1313   const PetscScalar *x,*v;
1314   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1315   PetscErrorCode    ierr;
1316   const PetscInt    m = b->AIJ->rmap->n,*idx;
1317   PetscInt          n,i;
1318 
1319   PetscFunctionBegin;
1320   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
1321   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1322   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1323 
1324   for (i=0; i<m; i++) {
1325     idx    = a->j + a->i[i] ;
1326     v      = a->a + a->i[i] ;
1327     n      = a->i[i+1] - a->i[i];
1328     alpha1 = x[8*i];
1329     alpha2 = x[8*i+1];
1330     alpha3 = x[8*i+2];
1331     alpha4 = x[8*i+3];
1332     alpha5 = x[8*i+4];
1333     alpha6 = x[8*i+5];
1334     alpha7 = x[8*i+6];
1335     alpha8 = x[8*i+7];
1336     while (n-->0) {
1337       y[8*(*idx)]   += alpha1*(*v);
1338       y[8*(*idx)+1] += alpha2*(*v);
1339       y[8*(*idx)+2] += alpha3*(*v);
1340       y[8*(*idx)+3] += alpha4*(*v);
1341       y[8*(*idx)+4] += alpha5*(*v);
1342       y[8*(*idx)+5] += alpha6*(*v);
1343       y[8*(*idx)+6] += alpha7*(*v);
1344       y[8*(*idx)+7] += alpha8*(*v);
1345       idx++; v++;
1346     }
1347   }
1348   ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr);
1349   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1350   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1351   PetscFunctionReturn(0);
1352 }
1353 
1354 #undef __FUNCT__
1355 #define __FUNCT__ "MatMultAdd_SeqMAIJ_8"
1356 PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1357 {
1358   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1359   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1360   const PetscScalar *x,*v;
1361   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1362   PetscErrorCode    ierr;
1363   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1364   PetscInt          n,i,jrow,j;
1365 
1366   PetscFunctionBegin;
1367   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1368   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1369   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1370   idx  = a->j;
1371   v    = a->a;
1372   ii   = a->i;
1373 
1374   for (i=0; i<m; i++) {
1375     jrow = ii[i];
1376     n    = ii[i+1] - jrow;
1377     sum1  = 0.0;
1378     sum2  = 0.0;
1379     sum3  = 0.0;
1380     sum4  = 0.0;
1381     sum5  = 0.0;
1382     sum6  = 0.0;
1383     sum7  = 0.0;
1384     sum8  = 0.0;
1385     for (j=0; j<n; j++) {
1386       sum1 += v[jrow]*x[8*idx[jrow]];
1387       sum2 += v[jrow]*x[8*idx[jrow]+1];
1388       sum3 += v[jrow]*x[8*idx[jrow]+2];
1389       sum4 += v[jrow]*x[8*idx[jrow]+3];
1390       sum5 += v[jrow]*x[8*idx[jrow]+4];
1391       sum6 += v[jrow]*x[8*idx[jrow]+5];
1392       sum7 += v[jrow]*x[8*idx[jrow]+6];
1393       sum8 += v[jrow]*x[8*idx[jrow]+7];
1394       jrow++;
1395      }
1396     y[8*i]   += sum1;
1397     y[8*i+1] += sum2;
1398     y[8*i+2] += sum3;
1399     y[8*i+3] += sum4;
1400     y[8*i+4] += sum5;
1401     y[8*i+5] += sum6;
1402     y[8*i+6] += sum7;
1403     y[8*i+7] += sum8;
1404   }
1405 
1406   ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr);
1407   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1408   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1409   PetscFunctionReturn(0);
1410 }
1411 
1412 #undef __FUNCT__
1413 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_8"
1414 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1415 {
1416   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1417   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1418   const PetscScalar *x,*v;
1419   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1420   PetscErrorCode    ierr;
1421   const PetscInt    m = b->AIJ->rmap->n,*idx;
1422   PetscInt          n,i;
1423 
1424   PetscFunctionBegin;
1425   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1426   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1427   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1428   for (i=0; i<m; i++) {
1429     idx    = a->j + a->i[i] ;
1430     v      = a->a + a->i[i] ;
1431     n      = a->i[i+1] - a->i[i];
1432     alpha1 = x[8*i];
1433     alpha2 = x[8*i+1];
1434     alpha3 = x[8*i+2];
1435     alpha4 = x[8*i+3];
1436     alpha5 = x[8*i+4];
1437     alpha6 = x[8*i+5];
1438     alpha7 = x[8*i+6];
1439     alpha8 = x[8*i+7];
1440     while (n-->0) {
1441       y[8*(*idx)]   += alpha1*(*v);
1442       y[8*(*idx)+1] += alpha2*(*v);
1443       y[8*(*idx)+2] += alpha3*(*v);
1444       y[8*(*idx)+3] += alpha4*(*v);
1445       y[8*(*idx)+4] += alpha5*(*v);
1446       y[8*(*idx)+5] += alpha6*(*v);
1447       y[8*(*idx)+6] += alpha7*(*v);
1448       y[8*(*idx)+7] += alpha8*(*v);
1449       idx++; v++;
1450     }
1451   }
1452   ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr);
1453   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1454   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1455   PetscFunctionReturn(0);
1456 }
1457 
1458 /* ------------------------------------------------------------------------------*/
1459 #undef __FUNCT__
1460 #define __FUNCT__ "MatMult_SeqMAIJ_9"
1461 PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1462 {
1463   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1464   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1465   const PetscScalar *x,*v;
1466   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1467   PetscErrorCode    ierr;
1468   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1469   PetscInt          nonzerorow=0,n,i,jrow,j;
1470 
1471   PetscFunctionBegin;
1472   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1473   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1474   idx  = a->j;
1475   v    = a->a;
1476   ii   = a->i;
1477 
1478   for (i=0; i<m; i++) {
1479     jrow = ii[i];
1480     n    = ii[i+1] - jrow;
1481     sum1  = 0.0;
1482     sum2  = 0.0;
1483     sum3  = 0.0;
1484     sum4  = 0.0;
1485     sum5  = 0.0;
1486     sum6  = 0.0;
1487     sum7  = 0.0;
1488     sum8  = 0.0;
1489     sum9  = 0.0;
1490     nonzerorow += (n>0);
1491     for (j=0; j<n; j++) {
1492       sum1 += v[jrow]*x[9*idx[jrow]];
1493       sum2 += v[jrow]*x[9*idx[jrow]+1];
1494       sum3 += v[jrow]*x[9*idx[jrow]+2];
1495       sum4 += v[jrow]*x[9*idx[jrow]+3];
1496       sum5 += v[jrow]*x[9*idx[jrow]+4];
1497       sum6 += v[jrow]*x[9*idx[jrow]+5];
1498       sum7 += v[jrow]*x[9*idx[jrow]+6];
1499       sum8 += v[jrow]*x[9*idx[jrow]+7];
1500       sum9 += v[jrow]*x[9*idx[jrow]+8];
1501       jrow++;
1502      }
1503     y[9*i]   = sum1;
1504     y[9*i+1] = sum2;
1505     y[9*i+2] = sum3;
1506     y[9*i+3] = sum4;
1507     y[9*i+4] = sum5;
1508     y[9*i+5] = sum6;
1509     y[9*i+6] = sum7;
1510     y[9*i+7] = sum8;
1511     y[9*i+8] = sum9;
1512   }
1513 
1514   ierr = PetscLogFlops(18.0*a->nz - 9*nonzerorow);CHKERRQ(ierr);
1515   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1516   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1517   PetscFunctionReturn(0);
1518 }
1519 
1520 /* ------------------------------------------------------------------------------*/
1521 
1522 #undef __FUNCT__
1523 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_9"
1524 PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1525 {
1526   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1527   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1528   const PetscScalar *x,*v;
1529   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1530   PetscErrorCode    ierr;
1531   const PetscInt    m = b->AIJ->rmap->n,*idx;
1532   PetscInt          n,i;
1533 
1534   PetscFunctionBegin;
1535   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
1536   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1537   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1538 
1539   for (i=0; i<m; i++) {
1540     idx    = a->j + a->i[i] ;
1541     v      = a->a + a->i[i] ;
1542     n      = a->i[i+1] - a->i[i];
1543     alpha1 = x[9*i];
1544     alpha2 = x[9*i+1];
1545     alpha3 = x[9*i+2];
1546     alpha4 = x[9*i+3];
1547     alpha5 = x[9*i+4];
1548     alpha6 = x[9*i+5];
1549     alpha7 = x[9*i+6];
1550     alpha8 = x[9*i+7];
1551     alpha9 = x[9*i+8];
1552     while (n-->0) {
1553       y[9*(*idx)]   += alpha1*(*v);
1554       y[9*(*idx)+1] += alpha2*(*v);
1555       y[9*(*idx)+2] += alpha3*(*v);
1556       y[9*(*idx)+3] += alpha4*(*v);
1557       y[9*(*idx)+4] += alpha5*(*v);
1558       y[9*(*idx)+5] += alpha6*(*v);
1559       y[9*(*idx)+6] += alpha7*(*v);
1560       y[9*(*idx)+7] += alpha8*(*v);
1561       y[9*(*idx)+8] += alpha9*(*v);
1562       idx++; v++;
1563     }
1564   }
1565   ierr = PetscLogFlops(18.0*a->nz);CHKERRQ(ierr);
1566   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1567   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1568   PetscFunctionReturn(0);
1569 }
1570 
1571 #undef __FUNCT__
1572 #define __FUNCT__ "MatMultAdd_SeqMAIJ_9"
1573 PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1574 {
1575   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1576   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1577   const PetscScalar *x,*v;
1578   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1579   PetscErrorCode    ierr;
1580   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1581   PetscInt          n,i,jrow,j;
1582 
1583   PetscFunctionBegin;
1584   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1585   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1586   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1587   idx  = a->j;
1588   v    = a->a;
1589   ii   = a->i;
1590 
1591   for (i=0; i<m; i++) {
1592     jrow = ii[i];
1593     n    = ii[i+1] - jrow;
1594     sum1  = 0.0;
1595     sum2  = 0.0;
1596     sum3  = 0.0;
1597     sum4  = 0.0;
1598     sum5  = 0.0;
1599     sum6  = 0.0;
1600     sum7  = 0.0;
1601     sum8  = 0.0;
1602     sum9  = 0.0;
1603     for (j=0; j<n; j++) {
1604       sum1 += v[jrow]*x[9*idx[jrow]];
1605       sum2 += v[jrow]*x[9*idx[jrow]+1];
1606       sum3 += v[jrow]*x[9*idx[jrow]+2];
1607       sum4 += v[jrow]*x[9*idx[jrow]+3];
1608       sum5 += v[jrow]*x[9*idx[jrow]+4];
1609       sum6 += v[jrow]*x[9*idx[jrow]+5];
1610       sum7 += v[jrow]*x[9*idx[jrow]+6];
1611       sum8 += v[jrow]*x[9*idx[jrow]+7];
1612       sum9 += v[jrow]*x[9*idx[jrow]+8];
1613       jrow++;
1614      }
1615     y[9*i]   += sum1;
1616     y[9*i+1] += sum2;
1617     y[9*i+2] += sum3;
1618     y[9*i+3] += sum4;
1619     y[9*i+4] += sum5;
1620     y[9*i+5] += sum6;
1621     y[9*i+6] += sum7;
1622     y[9*i+7] += sum8;
1623     y[9*i+8] += sum9;
1624   }
1625 
1626   ierr = PetscLogFlops(18*a->nz);CHKERRQ(ierr);
1627   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1628   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1629   PetscFunctionReturn(0);
1630 }
1631 
1632 #undef __FUNCT__
1633 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_9"
1634 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1635 {
1636   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1637   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1638   const PetscScalar *x,*v;
1639   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1640   PetscErrorCode    ierr;
1641   const PetscInt    m = b->AIJ->rmap->n,*idx;
1642   PetscInt          n,i;
1643 
1644   PetscFunctionBegin;
1645   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1646   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1647   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1648   for (i=0; i<m; i++) {
1649     idx    = a->j + a->i[i] ;
1650     v      = a->a + a->i[i] ;
1651     n      = a->i[i+1] - a->i[i];
1652     alpha1 = x[9*i];
1653     alpha2 = x[9*i+1];
1654     alpha3 = x[9*i+2];
1655     alpha4 = x[9*i+3];
1656     alpha5 = x[9*i+4];
1657     alpha6 = x[9*i+5];
1658     alpha7 = x[9*i+6];
1659     alpha8 = x[9*i+7];
1660     alpha9 = x[9*i+8];
1661     while (n-->0) {
1662       y[9*(*idx)]   += alpha1*(*v);
1663       y[9*(*idx)+1] += alpha2*(*v);
1664       y[9*(*idx)+2] += alpha3*(*v);
1665       y[9*(*idx)+3] += alpha4*(*v);
1666       y[9*(*idx)+4] += alpha5*(*v);
1667       y[9*(*idx)+5] += alpha6*(*v);
1668       y[9*(*idx)+6] += alpha7*(*v);
1669       y[9*(*idx)+7] += alpha8*(*v);
1670       y[9*(*idx)+8] += alpha9*(*v);
1671       idx++; v++;
1672     }
1673   }
1674   ierr = PetscLogFlops(18.0*a->nz);CHKERRQ(ierr);
1675   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1676   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1677   PetscFunctionReturn(0);
1678 }
1679 #undef __FUNCT__
1680 #define __FUNCT__ "MatMult_SeqMAIJ_10"
1681 PetscErrorCode MatMult_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1682 {
1683   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1684   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1685   const PetscScalar *x,*v;
1686   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1687   PetscErrorCode    ierr;
1688   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1689   PetscInt          nonzerorow=0,n,i,jrow,j;
1690 
1691   PetscFunctionBegin;
1692   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1693   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1694   idx  = a->j;
1695   v    = a->a;
1696   ii   = a->i;
1697 
1698   for (i=0; i<m; i++) {
1699     jrow = ii[i];
1700     n    = ii[i+1] - jrow;
1701     sum1  = 0.0;
1702     sum2  = 0.0;
1703     sum3  = 0.0;
1704     sum4  = 0.0;
1705     sum5  = 0.0;
1706     sum6  = 0.0;
1707     sum7  = 0.0;
1708     sum8  = 0.0;
1709     sum9  = 0.0;
1710     sum10 = 0.0;
1711     nonzerorow += (n>0);
1712     for (j=0; j<n; j++) {
1713       sum1  += v[jrow]*x[10*idx[jrow]];
1714       sum2  += v[jrow]*x[10*idx[jrow]+1];
1715       sum3  += v[jrow]*x[10*idx[jrow]+2];
1716       sum4  += v[jrow]*x[10*idx[jrow]+3];
1717       sum5  += v[jrow]*x[10*idx[jrow]+4];
1718       sum6  += v[jrow]*x[10*idx[jrow]+5];
1719       sum7  += v[jrow]*x[10*idx[jrow]+6];
1720       sum8  += v[jrow]*x[10*idx[jrow]+7];
1721       sum9  += v[jrow]*x[10*idx[jrow]+8];
1722       sum10 += v[jrow]*x[10*idx[jrow]+9];
1723       jrow++;
1724      }
1725     y[10*i]   = sum1;
1726     y[10*i+1] = sum2;
1727     y[10*i+2] = sum3;
1728     y[10*i+3] = sum4;
1729     y[10*i+4] = sum5;
1730     y[10*i+5] = sum6;
1731     y[10*i+6] = sum7;
1732     y[10*i+7] = sum8;
1733     y[10*i+8] = sum9;
1734     y[10*i+9] = sum10;
1735   }
1736 
1737   ierr = PetscLogFlops(20.0*a->nz - 10.0*nonzerorow);CHKERRQ(ierr);
1738   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1739   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1740   PetscFunctionReturn(0);
1741 }
1742 
1743 #undef __FUNCT__
1744 #define __FUNCT__ "MatMultAdd_SeqMAIJ_10"
1745 PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1746 {
1747   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1748   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1749   const PetscScalar *x,*v;
1750   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1751   PetscErrorCode    ierr;
1752   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1753   PetscInt          n,i,jrow,j;
1754 
1755   PetscFunctionBegin;
1756   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1757   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1758   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1759   idx  = a->j;
1760   v    = a->a;
1761   ii   = a->i;
1762 
1763   for (i=0; i<m; i++) {
1764     jrow = ii[i];
1765     n    = ii[i+1] - jrow;
1766     sum1  = 0.0;
1767     sum2  = 0.0;
1768     sum3  = 0.0;
1769     sum4  = 0.0;
1770     sum5  = 0.0;
1771     sum6  = 0.0;
1772     sum7  = 0.0;
1773     sum8  = 0.0;
1774     sum9  = 0.0;
1775     sum10 = 0.0;
1776     for (j=0; j<n; j++) {
1777       sum1  += v[jrow]*x[10*idx[jrow]];
1778       sum2  += v[jrow]*x[10*idx[jrow]+1];
1779       sum3  += v[jrow]*x[10*idx[jrow]+2];
1780       sum4  += v[jrow]*x[10*idx[jrow]+3];
1781       sum5  += v[jrow]*x[10*idx[jrow]+4];
1782       sum6  += v[jrow]*x[10*idx[jrow]+5];
1783       sum7  += v[jrow]*x[10*idx[jrow]+6];
1784       sum8  += v[jrow]*x[10*idx[jrow]+7];
1785       sum9  += v[jrow]*x[10*idx[jrow]+8];
1786       sum10 += v[jrow]*x[10*idx[jrow]+9];
1787       jrow++;
1788      }
1789     y[10*i]   += sum1;
1790     y[10*i+1] += sum2;
1791     y[10*i+2] += sum3;
1792     y[10*i+3] += sum4;
1793     y[10*i+4] += sum5;
1794     y[10*i+5] += sum6;
1795     y[10*i+6] += sum7;
1796     y[10*i+7] += sum8;
1797     y[10*i+8] += sum9;
1798     y[10*i+9] += sum10;
1799   }
1800 
1801   ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr);
1802   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1803   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1804   PetscFunctionReturn(0);
1805 }
1806 
1807 #undef __FUNCT__
1808 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_10"
1809 PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1810 {
1811   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1812   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1813   const PetscScalar *x,*v;
1814   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10;
1815   PetscErrorCode    ierr;
1816   const PetscInt    m = b->AIJ->rmap->n,*idx;
1817   PetscInt          n,i;
1818 
1819   PetscFunctionBegin;
1820   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
1821   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1822   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1823 
1824   for (i=0; i<m; i++) {
1825     idx    = a->j + a->i[i] ;
1826     v      = a->a + a->i[i] ;
1827     n      = a->i[i+1] - a->i[i];
1828     alpha1 = x[10*i];
1829     alpha2 = x[10*i+1];
1830     alpha3 = x[10*i+2];
1831     alpha4 = x[10*i+3];
1832     alpha5 = x[10*i+4];
1833     alpha6 = x[10*i+5];
1834     alpha7 = x[10*i+6];
1835     alpha8 = x[10*i+7];
1836     alpha9 = x[10*i+8];
1837     alpha10 = x[10*i+9];
1838     while (n-->0) {
1839       y[10*(*idx)]   += alpha1*(*v);
1840       y[10*(*idx)+1] += alpha2*(*v);
1841       y[10*(*idx)+2] += alpha3*(*v);
1842       y[10*(*idx)+3] += alpha4*(*v);
1843       y[10*(*idx)+4] += alpha5*(*v);
1844       y[10*(*idx)+5] += alpha6*(*v);
1845       y[10*(*idx)+6] += alpha7*(*v);
1846       y[10*(*idx)+7] += alpha8*(*v);
1847       y[10*(*idx)+8] += alpha9*(*v);
1848       y[10*(*idx)+9] += alpha10*(*v);
1849       idx++; v++;
1850     }
1851   }
1852   ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr);
1853   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1854   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1855   PetscFunctionReturn(0);
1856 }
1857 
1858 #undef __FUNCT__
1859 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_10"
1860 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1861 {
1862   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1863   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1864   const PetscScalar *x,*v;
1865   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10;
1866   PetscErrorCode    ierr;
1867   const PetscInt    m = b->AIJ->rmap->n,*idx;
1868   PetscInt          n,i;
1869 
1870   PetscFunctionBegin;
1871   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1872   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1873   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1874   for (i=0; i<m; i++) {
1875     idx    = a->j + a->i[i] ;
1876     v      = a->a + a->i[i] ;
1877     n      = a->i[i+1] - a->i[i];
1878     alpha1 = x[10*i];
1879     alpha2 = x[10*i+1];
1880     alpha3 = x[10*i+2];
1881     alpha4 = x[10*i+3];
1882     alpha5 = x[10*i+4];
1883     alpha6 = x[10*i+5];
1884     alpha7 = x[10*i+6];
1885     alpha8 = x[10*i+7];
1886     alpha9 = x[10*i+8];
1887     alpha10 = x[10*i+9];
1888     while (n-->0) {
1889       y[10*(*idx)]   += alpha1*(*v);
1890       y[10*(*idx)+1] += alpha2*(*v);
1891       y[10*(*idx)+2] += alpha3*(*v);
1892       y[10*(*idx)+3] += alpha4*(*v);
1893       y[10*(*idx)+4] += alpha5*(*v);
1894       y[10*(*idx)+5] += alpha6*(*v);
1895       y[10*(*idx)+6] += alpha7*(*v);
1896       y[10*(*idx)+7] += alpha8*(*v);
1897       y[10*(*idx)+8] += alpha9*(*v);
1898       y[10*(*idx)+9] += alpha10*(*v);
1899       idx++; v++;
1900     }
1901   }
1902   ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr);
1903   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1904   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1905   PetscFunctionReturn(0);
1906 }
1907 
1908 
1909 /*--------------------------------------------------------------------------------------------*/
1910 #undef __FUNCT__
1911 #define __FUNCT__ "MatMult_SeqMAIJ_11"
1912 PetscErrorCode MatMult_SeqMAIJ_11(Mat A,Vec xx,Vec yy)
1913 {
1914   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1915   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1916   const PetscScalar *x,*v;
1917   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11;
1918   PetscErrorCode    ierr;
1919   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1920   PetscInt          nonzerorow=0,n,i,jrow,j;
1921 
1922   PetscFunctionBegin;
1923   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1924   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1925   idx  = a->j;
1926   v    = a->a;
1927   ii   = a->i;
1928 
1929   for (i=0; i<m; i++) {
1930     jrow = ii[i];
1931     n    = ii[i+1] - jrow;
1932     sum1  = 0.0;
1933     sum2  = 0.0;
1934     sum3  = 0.0;
1935     sum4  = 0.0;
1936     sum5  = 0.0;
1937     sum6  = 0.0;
1938     sum7  = 0.0;
1939     sum8  = 0.0;
1940     sum9  = 0.0;
1941     sum10 = 0.0;
1942     sum11 = 0.0;
1943     nonzerorow += (n>0);
1944     for (j=0; j<n; j++) {
1945       sum1  += v[jrow]*x[11*idx[jrow]];
1946       sum2  += v[jrow]*x[11*idx[jrow]+1];
1947       sum3  += v[jrow]*x[11*idx[jrow]+2];
1948       sum4  += v[jrow]*x[11*idx[jrow]+3];
1949       sum5  += v[jrow]*x[11*idx[jrow]+4];
1950       sum6  += v[jrow]*x[11*idx[jrow]+5];
1951       sum7  += v[jrow]*x[11*idx[jrow]+6];
1952       sum8  += v[jrow]*x[11*idx[jrow]+7];
1953       sum9  += v[jrow]*x[11*idx[jrow]+8];
1954       sum10 += v[jrow]*x[11*idx[jrow]+9];
1955       sum11 += v[jrow]*x[11*idx[jrow]+10];
1956       jrow++;
1957      }
1958     y[11*i]   = sum1;
1959     y[11*i+1] = sum2;
1960     y[11*i+2] = sum3;
1961     y[11*i+3] = sum4;
1962     y[11*i+4] = sum5;
1963     y[11*i+5] = sum6;
1964     y[11*i+6] = sum7;
1965     y[11*i+7] = sum8;
1966     y[11*i+8] = sum9;
1967     y[11*i+9] = sum10;
1968     y[11*i+10] = sum11;
1969   }
1970 
1971   ierr = PetscLogFlops(22*a->nz - 11*nonzerorow);CHKERRQ(ierr);
1972   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1973   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1974   PetscFunctionReturn(0);
1975 }
1976 
1977 #undef __FUNCT__
1978 #define __FUNCT__ "MatMultAdd_SeqMAIJ_11"
1979 PetscErrorCode MatMultAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz)
1980 {
1981   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1982   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1983   const PetscScalar *x,*v;
1984   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11;
1985   PetscErrorCode    ierr;
1986   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1987   PetscInt          n,i,jrow,j;
1988 
1989   PetscFunctionBegin;
1990   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1991   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1992   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1993   idx  = a->j;
1994   v    = a->a;
1995   ii   = a->i;
1996 
1997   for (i=0; i<m; i++) {
1998     jrow = ii[i];
1999     n    = ii[i+1] - jrow;
2000     sum1  = 0.0;
2001     sum2  = 0.0;
2002     sum3  = 0.0;
2003     sum4  = 0.0;
2004     sum5  = 0.0;
2005     sum6  = 0.0;
2006     sum7  = 0.0;
2007     sum8  = 0.0;
2008     sum9  = 0.0;
2009     sum10 = 0.0;
2010     sum11 = 0.0;
2011     for (j=0; j<n; j++) {
2012       sum1  += v[jrow]*x[11*idx[jrow]];
2013       sum2  += v[jrow]*x[11*idx[jrow]+1];
2014       sum3  += v[jrow]*x[11*idx[jrow]+2];
2015       sum4  += v[jrow]*x[11*idx[jrow]+3];
2016       sum5  += v[jrow]*x[11*idx[jrow]+4];
2017       sum6  += v[jrow]*x[11*idx[jrow]+5];
2018       sum7  += v[jrow]*x[11*idx[jrow]+6];
2019       sum8  += v[jrow]*x[11*idx[jrow]+7];
2020       sum9  += v[jrow]*x[11*idx[jrow]+8];
2021       sum10 += v[jrow]*x[11*idx[jrow]+9];
2022       sum11 += v[jrow]*x[11*idx[jrow]+10];
2023       jrow++;
2024      }
2025     y[11*i]   += sum1;
2026     y[11*i+1] += sum2;
2027     y[11*i+2] += sum3;
2028     y[11*i+3] += sum4;
2029     y[11*i+4] += sum5;
2030     y[11*i+5] += sum6;
2031     y[11*i+6] += sum7;
2032     y[11*i+7] += sum8;
2033     y[11*i+8] += sum9;
2034     y[11*i+9] += sum10;
2035     y[11*i+10] += sum11;
2036   }
2037 
2038   ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr);
2039   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2040   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2041   PetscFunctionReturn(0);
2042 }
2043 
2044 #undef __FUNCT__
2045 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_11"
2046 PetscErrorCode MatMultTranspose_SeqMAIJ_11(Mat A,Vec xx,Vec yy)
2047 {
2048   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2049   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2050   const PetscScalar *x,*v;
2051   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11;
2052   PetscErrorCode    ierr;
2053   const PetscInt    m = b->AIJ->rmap->n,*idx;
2054   PetscInt          n,i;
2055 
2056   PetscFunctionBegin;
2057   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
2058   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2059   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2060 
2061   for (i=0; i<m; i++) {
2062     idx    = a->j + a->i[i] ;
2063     v      = a->a + a->i[i] ;
2064     n      = a->i[i+1] - a->i[i];
2065     alpha1 = x[11*i];
2066     alpha2 = x[11*i+1];
2067     alpha3 = x[11*i+2];
2068     alpha4 = x[11*i+3];
2069     alpha5 = x[11*i+4];
2070     alpha6 = x[11*i+5];
2071     alpha7 = x[11*i+6];
2072     alpha8 = x[11*i+7];
2073     alpha9 = x[11*i+8];
2074     alpha10 = x[11*i+9];
2075     alpha11 = x[11*i+10];
2076     while (n-->0) {
2077       y[11*(*idx)]   += alpha1*(*v);
2078       y[11*(*idx)+1] += alpha2*(*v);
2079       y[11*(*idx)+2] += alpha3*(*v);
2080       y[11*(*idx)+3] += alpha4*(*v);
2081       y[11*(*idx)+4] += alpha5*(*v);
2082       y[11*(*idx)+5] += alpha6*(*v);
2083       y[11*(*idx)+6] += alpha7*(*v);
2084       y[11*(*idx)+7] += alpha8*(*v);
2085       y[11*(*idx)+8] += alpha9*(*v);
2086       y[11*(*idx)+9] += alpha10*(*v);
2087       y[11*(*idx)+10] += alpha11*(*v);
2088       idx++; v++;
2089     }
2090   }
2091   ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr);
2092   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2093   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2094   PetscFunctionReturn(0);
2095 }
2096 
2097 #undef __FUNCT__
2098 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_11"
2099 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz)
2100 {
2101   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2102   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2103   const PetscScalar *x,*v;
2104   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11;
2105   PetscErrorCode    ierr;
2106   const PetscInt    m = b->AIJ->rmap->n,*idx;
2107   PetscInt          n,i;
2108 
2109   PetscFunctionBegin;
2110   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2111   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2112   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2113   for (i=0; i<m; i++) {
2114     idx    = a->j + a->i[i] ;
2115     v      = a->a + a->i[i] ;
2116     n      = a->i[i+1] - a->i[i];
2117     alpha1 = x[11*i];
2118     alpha2 = x[11*i+1];
2119     alpha3 = x[11*i+2];
2120     alpha4 = x[11*i+3];
2121     alpha5 = x[11*i+4];
2122     alpha6 = x[11*i+5];
2123     alpha7 = x[11*i+6];
2124     alpha8 = x[11*i+7];
2125     alpha9 = x[11*i+8];
2126     alpha10 = x[11*i+9];
2127     alpha11 = x[11*i+10];
2128     while (n-->0) {
2129       y[11*(*idx)]   += alpha1*(*v);
2130       y[11*(*idx)+1] += alpha2*(*v);
2131       y[11*(*idx)+2] += alpha3*(*v);
2132       y[11*(*idx)+3] += alpha4*(*v);
2133       y[11*(*idx)+4] += alpha5*(*v);
2134       y[11*(*idx)+5] += alpha6*(*v);
2135       y[11*(*idx)+6] += alpha7*(*v);
2136       y[11*(*idx)+7] += alpha8*(*v);
2137       y[11*(*idx)+8] += alpha9*(*v);
2138       y[11*(*idx)+9] += alpha10*(*v);
2139       y[11*(*idx)+10] += alpha11*(*v);
2140       idx++; v++;
2141     }
2142   }
2143   ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr);
2144   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2145   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2146   PetscFunctionReturn(0);
2147 }
2148 
2149 
2150 /*--------------------------------------------------------------------------------------------*/
2151 #undef __FUNCT__
2152 #define __FUNCT__ "MatMult_SeqMAIJ_16"
2153 PetscErrorCode MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
2154 {
2155   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2156   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2157   const PetscScalar *x,*v;
2158   PetscScalar        *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2159   PetscScalar        sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
2160   PetscErrorCode     ierr;
2161   const PetscInt     m = b->AIJ->rmap->n,*idx,*ii;
2162   PetscInt           nonzerorow=0,n,i,jrow,j;
2163 
2164   PetscFunctionBegin;
2165   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2166   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2167   idx  = a->j;
2168   v    = a->a;
2169   ii   = a->i;
2170 
2171   for (i=0; i<m; i++) {
2172     jrow = ii[i];
2173     n    = ii[i+1] - jrow;
2174     sum1  = 0.0;
2175     sum2  = 0.0;
2176     sum3  = 0.0;
2177     sum4  = 0.0;
2178     sum5  = 0.0;
2179     sum6  = 0.0;
2180     sum7  = 0.0;
2181     sum8  = 0.0;
2182     sum9  = 0.0;
2183     sum10 = 0.0;
2184     sum11 = 0.0;
2185     sum12 = 0.0;
2186     sum13 = 0.0;
2187     sum14 = 0.0;
2188     sum15 = 0.0;
2189     sum16 = 0.0;
2190     nonzerorow += (n>0);
2191     for (j=0; j<n; j++) {
2192       sum1  += v[jrow]*x[16*idx[jrow]];
2193       sum2  += v[jrow]*x[16*idx[jrow]+1];
2194       sum3  += v[jrow]*x[16*idx[jrow]+2];
2195       sum4  += v[jrow]*x[16*idx[jrow]+3];
2196       sum5  += v[jrow]*x[16*idx[jrow]+4];
2197       sum6  += v[jrow]*x[16*idx[jrow]+5];
2198       sum7  += v[jrow]*x[16*idx[jrow]+6];
2199       sum8  += v[jrow]*x[16*idx[jrow]+7];
2200       sum9  += v[jrow]*x[16*idx[jrow]+8];
2201       sum10 += v[jrow]*x[16*idx[jrow]+9];
2202       sum11 += v[jrow]*x[16*idx[jrow]+10];
2203       sum12 += v[jrow]*x[16*idx[jrow]+11];
2204       sum13 += v[jrow]*x[16*idx[jrow]+12];
2205       sum14 += v[jrow]*x[16*idx[jrow]+13];
2206       sum15 += v[jrow]*x[16*idx[jrow]+14];
2207       sum16 += v[jrow]*x[16*idx[jrow]+15];
2208       jrow++;
2209      }
2210     y[16*i]    = sum1;
2211     y[16*i+1]  = sum2;
2212     y[16*i+2]  = sum3;
2213     y[16*i+3]  = sum4;
2214     y[16*i+4]  = sum5;
2215     y[16*i+5]  = sum6;
2216     y[16*i+6]  = sum7;
2217     y[16*i+7]  = sum8;
2218     y[16*i+8]  = sum9;
2219     y[16*i+9]  = sum10;
2220     y[16*i+10] = sum11;
2221     y[16*i+11] = sum12;
2222     y[16*i+12] = sum13;
2223     y[16*i+13] = sum14;
2224     y[16*i+14] = sum15;
2225     y[16*i+15] = sum16;
2226   }
2227 
2228   ierr = PetscLogFlops(32.0*a->nz - 16.0*nonzerorow);CHKERRQ(ierr);
2229   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2230   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2231   PetscFunctionReturn(0);
2232 }
2233 
2234 #undef __FUNCT__
2235 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_16"
2236 PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
2237 {
2238   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2239   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2240   const PetscScalar *x,*v;
2241   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2242   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
2243   PetscErrorCode    ierr;
2244   const PetscInt    m = b->AIJ->rmap->n,*idx;
2245   PetscInt          n,i;
2246 
2247   PetscFunctionBegin;
2248   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
2249   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2250   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2251 
2252   for (i=0; i<m; i++) {
2253     idx    = a->j + a->i[i] ;
2254     v      = a->a + a->i[i] ;
2255     n      = a->i[i+1] - a->i[i];
2256     alpha1  = x[16*i];
2257     alpha2  = x[16*i+1];
2258     alpha3  = x[16*i+2];
2259     alpha4  = x[16*i+3];
2260     alpha5  = x[16*i+4];
2261     alpha6  = x[16*i+5];
2262     alpha7  = x[16*i+6];
2263     alpha8  = x[16*i+7];
2264     alpha9  = x[16*i+8];
2265     alpha10 = x[16*i+9];
2266     alpha11 = x[16*i+10];
2267     alpha12 = x[16*i+11];
2268     alpha13 = x[16*i+12];
2269     alpha14 = x[16*i+13];
2270     alpha15 = x[16*i+14];
2271     alpha16 = x[16*i+15];
2272     while (n-->0) {
2273       y[16*(*idx)]    += alpha1*(*v);
2274       y[16*(*idx)+1]  += alpha2*(*v);
2275       y[16*(*idx)+2]  += alpha3*(*v);
2276       y[16*(*idx)+3]  += alpha4*(*v);
2277       y[16*(*idx)+4]  += alpha5*(*v);
2278       y[16*(*idx)+5]  += alpha6*(*v);
2279       y[16*(*idx)+6]  += alpha7*(*v);
2280       y[16*(*idx)+7]  += alpha8*(*v);
2281       y[16*(*idx)+8]  += alpha9*(*v);
2282       y[16*(*idx)+9]  += alpha10*(*v);
2283       y[16*(*idx)+10] += alpha11*(*v);
2284       y[16*(*idx)+11] += alpha12*(*v);
2285       y[16*(*idx)+12] += alpha13*(*v);
2286       y[16*(*idx)+13] += alpha14*(*v);
2287       y[16*(*idx)+14] += alpha15*(*v);
2288       y[16*(*idx)+15] += alpha16*(*v);
2289       idx++; v++;
2290     }
2291   }
2292   ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr);
2293   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2294   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2295   PetscFunctionReturn(0);
2296 }
2297 
2298 #undef __FUNCT__
2299 #define __FUNCT__ "MatMultAdd_SeqMAIJ_16"
2300 PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
2301 {
2302   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2303   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2304   const PetscScalar *x,*v;
2305   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2306   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
2307   PetscErrorCode    ierr;
2308   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2309   PetscInt          n,i,jrow,j;
2310 
2311   PetscFunctionBegin;
2312   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2313   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2314   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2315   idx  = a->j;
2316   v    = a->a;
2317   ii   = a->i;
2318 
2319   for (i=0; i<m; i++) {
2320     jrow = ii[i];
2321     n    = ii[i+1] - jrow;
2322     sum1  = 0.0;
2323     sum2  = 0.0;
2324     sum3  = 0.0;
2325     sum4  = 0.0;
2326     sum5  = 0.0;
2327     sum6  = 0.0;
2328     sum7  = 0.0;
2329     sum8  = 0.0;
2330     sum9  = 0.0;
2331     sum10 = 0.0;
2332     sum11 = 0.0;
2333     sum12 = 0.0;
2334     sum13 = 0.0;
2335     sum14 = 0.0;
2336     sum15 = 0.0;
2337     sum16 = 0.0;
2338     for (j=0; j<n; j++) {
2339       sum1  += v[jrow]*x[16*idx[jrow]];
2340       sum2  += v[jrow]*x[16*idx[jrow]+1];
2341       sum3  += v[jrow]*x[16*idx[jrow]+2];
2342       sum4  += v[jrow]*x[16*idx[jrow]+3];
2343       sum5  += v[jrow]*x[16*idx[jrow]+4];
2344       sum6  += v[jrow]*x[16*idx[jrow]+5];
2345       sum7  += v[jrow]*x[16*idx[jrow]+6];
2346       sum8  += v[jrow]*x[16*idx[jrow]+7];
2347       sum9  += v[jrow]*x[16*idx[jrow]+8];
2348       sum10 += v[jrow]*x[16*idx[jrow]+9];
2349       sum11 += v[jrow]*x[16*idx[jrow]+10];
2350       sum12 += v[jrow]*x[16*idx[jrow]+11];
2351       sum13 += v[jrow]*x[16*idx[jrow]+12];
2352       sum14 += v[jrow]*x[16*idx[jrow]+13];
2353       sum15 += v[jrow]*x[16*idx[jrow]+14];
2354       sum16 += v[jrow]*x[16*idx[jrow]+15];
2355       jrow++;
2356      }
2357     y[16*i]    += sum1;
2358     y[16*i+1]  += sum2;
2359     y[16*i+2]  += sum3;
2360     y[16*i+3]  += sum4;
2361     y[16*i+4]  += sum5;
2362     y[16*i+5]  += sum6;
2363     y[16*i+6]  += sum7;
2364     y[16*i+7]  += sum8;
2365     y[16*i+8]  += sum9;
2366     y[16*i+9]  += sum10;
2367     y[16*i+10] += sum11;
2368     y[16*i+11] += sum12;
2369     y[16*i+12] += sum13;
2370     y[16*i+13] += sum14;
2371     y[16*i+14] += sum15;
2372     y[16*i+15] += sum16;
2373   }
2374 
2375   ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr);
2376   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2377   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2378   PetscFunctionReturn(0);
2379 }
2380 
2381 #undef __FUNCT__
2382 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_16"
2383 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
2384 {
2385   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2386   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2387   const PetscScalar *x,*v;
2388   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2389   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
2390   PetscErrorCode    ierr;
2391   const PetscInt    m = b->AIJ->rmap->n,*idx;
2392   PetscInt          n,i;
2393 
2394   PetscFunctionBegin;
2395   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2396   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2397   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2398   for (i=0; i<m; i++) {
2399     idx    = a->j + a->i[i] ;
2400     v      = a->a + a->i[i] ;
2401     n      = a->i[i+1] - a->i[i];
2402     alpha1 = x[16*i];
2403     alpha2 = x[16*i+1];
2404     alpha3 = x[16*i+2];
2405     alpha4 = x[16*i+3];
2406     alpha5 = x[16*i+4];
2407     alpha6 = x[16*i+5];
2408     alpha7 = x[16*i+6];
2409     alpha8 = x[16*i+7];
2410     alpha9  = x[16*i+8];
2411     alpha10 = x[16*i+9];
2412     alpha11 = x[16*i+10];
2413     alpha12 = x[16*i+11];
2414     alpha13 = x[16*i+12];
2415     alpha14 = x[16*i+13];
2416     alpha15 = x[16*i+14];
2417     alpha16 = x[16*i+15];
2418     while (n-->0) {
2419       y[16*(*idx)]   += alpha1*(*v);
2420       y[16*(*idx)+1] += alpha2*(*v);
2421       y[16*(*idx)+2] += alpha3*(*v);
2422       y[16*(*idx)+3] += alpha4*(*v);
2423       y[16*(*idx)+4] += alpha5*(*v);
2424       y[16*(*idx)+5] += alpha6*(*v);
2425       y[16*(*idx)+6] += alpha7*(*v);
2426       y[16*(*idx)+7] += alpha8*(*v);
2427       y[16*(*idx)+8]  += alpha9*(*v);
2428       y[16*(*idx)+9]  += alpha10*(*v);
2429       y[16*(*idx)+10] += alpha11*(*v);
2430       y[16*(*idx)+11] += alpha12*(*v);
2431       y[16*(*idx)+12] += alpha13*(*v);
2432       y[16*(*idx)+13] += alpha14*(*v);
2433       y[16*(*idx)+14] += alpha15*(*v);
2434       y[16*(*idx)+15] += alpha16*(*v);
2435       idx++; v++;
2436     }
2437   }
2438   ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr);
2439   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2440   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2441   PetscFunctionReturn(0);
2442 }
2443 
2444 /*--------------------------------------------------------------------------------------------*/
2445 #undef __FUNCT__
2446 #define __FUNCT__ "MatMult_SeqMAIJ_18"
2447 PetscErrorCode MatMult_SeqMAIJ_18(Mat A,Vec xx,Vec yy)
2448 {
2449   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2450   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2451   const PetscScalar *x,*v;
2452   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2453   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2454   PetscErrorCode    ierr;
2455   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2456   PetscInt          nonzerorow=0,n,i,jrow,j;
2457 
2458   PetscFunctionBegin;
2459   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2460   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2461   idx  = a->j;
2462   v    = a->a;
2463   ii   = a->i;
2464 
2465   for (i=0; i<m; i++) {
2466     jrow = ii[i];
2467     n    = ii[i+1] - jrow;
2468     sum1  = 0.0;
2469     sum2  = 0.0;
2470     sum3  = 0.0;
2471     sum4  = 0.0;
2472     sum5  = 0.0;
2473     sum6  = 0.0;
2474     sum7  = 0.0;
2475     sum8  = 0.0;
2476     sum9  = 0.0;
2477     sum10 = 0.0;
2478     sum11 = 0.0;
2479     sum12 = 0.0;
2480     sum13 = 0.0;
2481     sum14 = 0.0;
2482     sum15 = 0.0;
2483     sum16 = 0.0;
2484     sum17 = 0.0;
2485     sum18 = 0.0;
2486     nonzerorow += (n>0);
2487     for (j=0; j<n; j++) {
2488       sum1  += v[jrow]*x[18*idx[jrow]];
2489       sum2  += v[jrow]*x[18*idx[jrow]+1];
2490       sum3  += v[jrow]*x[18*idx[jrow]+2];
2491       sum4  += v[jrow]*x[18*idx[jrow]+3];
2492       sum5  += v[jrow]*x[18*idx[jrow]+4];
2493       sum6  += v[jrow]*x[18*idx[jrow]+5];
2494       sum7  += v[jrow]*x[18*idx[jrow]+6];
2495       sum8  += v[jrow]*x[18*idx[jrow]+7];
2496       sum9  += v[jrow]*x[18*idx[jrow]+8];
2497       sum10 += v[jrow]*x[18*idx[jrow]+9];
2498       sum11 += v[jrow]*x[18*idx[jrow]+10];
2499       sum12 += v[jrow]*x[18*idx[jrow]+11];
2500       sum13 += v[jrow]*x[18*idx[jrow]+12];
2501       sum14 += v[jrow]*x[18*idx[jrow]+13];
2502       sum15 += v[jrow]*x[18*idx[jrow]+14];
2503       sum16 += v[jrow]*x[18*idx[jrow]+15];
2504       sum17 += v[jrow]*x[18*idx[jrow]+16];
2505       sum18 += v[jrow]*x[18*idx[jrow]+17];
2506       jrow++;
2507      }
2508     y[18*i]    = sum1;
2509     y[18*i+1]  = sum2;
2510     y[18*i+2]  = sum3;
2511     y[18*i+3]  = sum4;
2512     y[18*i+4]  = sum5;
2513     y[18*i+5]  = sum6;
2514     y[18*i+6]  = sum7;
2515     y[18*i+7]  = sum8;
2516     y[18*i+8]  = sum9;
2517     y[18*i+9]  = sum10;
2518     y[18*i+10] = sum11;
2519     y[18*i+11] = sum12;
2520     y[18*i+12] = sum13;
2521     y[18*i+13] = sum14;
2522     y[18*i+14] = sum15;
2523     y[18*i+15] = sum16;
2524     y[18*i+16] = sum17;
2525     y[18*i+17] = sum18;
2526   }
2527 
2528   ierr = PetscLogFlops(36.0*a->nz - 18.0*nonzerorow);CHKERRQ(ierr);
2529   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2530   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2531   PetscFunctionReturn(0);
2532 }
2533 
2534 #undef __FUNCT__
2535 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_18"
2536 PetscErrorCode MatMultTranspose_SeqMAIJ_18(Mat A,Vec xx,Vec yy)
2537 {
2538   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2539   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2540   const PetscScalar *x,*v;
2541   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2542   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18;
2543   PetscErrorCode    ierr;
2544   const PetscInt    m = b->AIJ->rmap->n,*idx;
2545   PetscInt          n,i;
2546 
2547   PetscFunctionBegin;
2548   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
2549   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2550   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2551 
2552   for (i=0; i<m; i++) {
2553     idx    = a->j + a->i[i] ;
2554     v      = a->a + a->i[i] ;
2555     n      = a->i[i+1] - a->i[i];
2556     alpha1  = x[18*i];
2557     alpha2  = x[18*i+1];
2558     alpha3  = x[18*i+2];
2559     alpha4  = x[18*i+3];
2560     alpha5  = x[18*i+4];
2561     alpha6  = x[18*i+5];
2562     alpha7  = x[18*i+6];
2563     alpha8  = x[18*i+7];
2564     alpha9  = x[18*i+8];
2565     alpha10 = x[18*i+9];
2566     alpha11 = x[18*i+10];
2567     alpha12 = x[18*i+11];
2568     alpha13 = x[18*i+12];
2569     alpha14 = x[18*i+13];
2570     alpha15 = x[18*i+14];
2571     alpha16 = x[18*i+15];
2572     alpha17 = x[18*i+16];
2573     alpha18 = x[18*i+17];
2574     while (n-->0) {
2575       y[18*(*idx)]    += alpha1*(*v);
2576       y[18*(*idx)+1]  += alpha2*(*v);
2577       y[18*(*idx)+2]  += alpha3*(*v);
2578       y[18*(*idx)+3]  += alpha4*(*v);
2579       y[18*(*idx)+4]  += alpha5*(*v);
2580       y[18*(*idx)+5]  += alpha6*(*v);
2581       y[18*(*idx)+6]  += alpha7*(*v);
2582       y[18*(*idx)+7]  += alpha8*(*v);
2583       y[18*(*idx)+8]  += alpha9*(*v);
2584       y[18*(*idx)+9]  += alpha10*(*v);
2585       y[18*(*idx)+10] += alpha11*(*v);
2586       y[18*(*idx)+11] += alpha12*(*v);
2587       y[18*(*idx)+12] += alpha13*(*v);
2588       y[18*(*idx)+13] += alpha14*(*v);
2589       y[18*(*idx)+14] += alpha15*(*v);
2590       y[18*(*idx)+15] += alpha16*(*v);
2591       y[18*(*idx)+16] += alpha17*(*v);
2592       y[18*(*idx)+17] += alpha18*(*v);
2593       idx++; v++;
2594     }
2595   }
2596   ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr);
2597   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2598   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2599   PetscFunctionReturn(0);
2600 }
2601 
2602 #undef __FUNCT__
2603 #define __FUNCT__ "MatMultAdd_SeqMAIJ_18"
2604 PetscErrorCode MatMultAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz)
2605 {
2606   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2607   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2608   const PetscScalar *x,*v;
2609   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2610   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2611   PetscErrorCode    ierr;
2612   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2613   PetscInt          n,i,jrow,j;
2614 
2615   PetscFunctionBegin;
2616   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2617   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2618   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2619   idx  = a->j;
2620   v    = a->a;
2621   ii   = a->i;
2622 
2623   for (i=0; i<m; i++) {
2624     jrow = ii[i];
2625     n    = ii[i+1] - jrow;
2626     sum1  = 0.0;
2627     sum2  = 0.0;
2628     sum3  = 0.0;
2629     sum4  = 0.0;
2630     sum5  = 0.0;
2631     sum6  = 0.0;
2632     sum7  = 0.0;
2633     sum8  = 0.0;
2634     sum9  = 0.0;
2635     sum10 = 0.0;
2636     sum11 = 0.0;
2637     sum12 = 0.0;
2638     sum13 = 0.0;
2639     sum14 = 0.0;
2640     sum15 = 0.0;
2641     sum16 = 0.0;
2642     sum17 = 0.0;
2643     sum18 = 0.0;
2644     for (j=0; j<n; j++) {
2645       sum1  += v[jrow]*x[18*idx[jrow]];
2646       sum2  += v[jrow]*x[18*idx[jrow]+1];
2647       sum3  += v[jrow]*x[18*idx[jrow]+2];
2648       sum4  += v[jrow]*x[18*idx[jrow]+3];
2649       sum5  += v[jrow]*x[18*idx[jrow]+4];
2650       sum6  += v[jrow]*x[18*idx[jrow]+5];
2651       sum7  += v[jrow]*x[18*idx[jrow]+6];
2652       sum8  += v[jrow]*x[18*idx[jrow]+7];
2653       sum9  += v[jrow]*x[18*idx[jrow]+8];
2654       sum10 += v[jrow]*x[18*idx[jrow]+9];
2655       sum11 += v[jrow]*x[18*idx[jrow]+10];
2656       sum12 += v[jrow]*x[18*idx[jrow]+11];
2657       sum13 += v[jrow]*x[18*idx[jrow]+12];
2658       sum14 += v[jrow]*x[18*idx[jrow]+13];
2659       sum15 += v[jrow]*x[18*idx[jrow]+14];
2660       sum16 += v[jrow]*x[18*idx[jrow]+15];
2661       sum17 += v[jrow]*x[18*idx[jrow]+16];
2662       sum18 += v[jrow]*x[18*idx[jrow]+17];
2663       jrow++;
2664      }
2665     y[18*i]    += sum1;
2666     y[18*i+1]  += sum2;
2667     y[18*i+2]  += sum3;
2668     y[18*i+3]  += sum4;
2669     y[18*i+4]  += sum5;
2670     y[18*i+5]  += sum6;
2671     y[18*i+6]  += sum7;
2672     y[18*i+7]  += sum8;
2673     y[18*i+8]  += sum9;
2674     y[18*i+9]  += sum10;
2675     y[18*i+10] += sum11;
2676     y[18*i+11] += sum12;
2677     y[18*i+12] += sum13;
2678     y[18*i+13] += sum14;
2679     y[18*i+14] += sum15;
2680     y[18*i+15] += sum16;
2681     y[18*i+16] += sum17;
2682     y[18*i+17] += sum18;
2683   }
2684 
2685   ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr);
2686   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2687   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2688   PetscFunctionReturn(0);
2689 }
2690 
2691 #undef __FUNCT__
2692 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_18"
2693 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz)
2694 {
2695   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2696   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2697   const PetscScalar *x,*v;
2698   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2699   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18;
2700   PetscErrorCode    ierr;
2701   const PetscInt    m = b->AIJ->rmap->n,*idx;
2702   PetscInt          n,i;
2703 
2704   PetscFunctionBegin;
2705   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2706   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2707   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2708   for (i=0; i<m; i++) {
2709     idx    = a->j + a->i[i] ;
2710     v      = a->a + a->i[i] ;
2711     n      = a->i[i+1] - a->i[i];
2712     alpha1 = x[18*i];
2713     alpha2 = x[18*i+1];
2714     alpha3 = x[18*i+2];
2715     alpha4 = x[18*i+3];
2716     alpha5 = x[18*i+4];
2717     alpha6 = x[18*i+5];
2718     alpha7 = x[18*i+6];
2719     alpha8 = x[18*i+7];
2720     alpha9  = x[18*i+8];
2721     alpha10 = x[18*i+9];
2722     alpha11 = x[18*i+10];
2723     alpha12 = x[18*i+11];
2724     alpha13 = x[18*i+12];
2725     alpha14 = x[18*i+13];
2726     alpha15 = x[18*i+14];
2727     alpha16 = x[18*i+15];
2728     alpha17 = x[18*i+16];
2729     alpha18 = x[18*i+17];
2730     while (n-->0) {
2731       y[18*(*idx)]   += alpha1*(*v);
2732       y[18*(*idx)+1] += alpha2*(*v);
2733       y[18*(*idx)+2] += alpha3*(*v);
2734       y[18*(*idx)+3] += alpha4*(*v);
2735       y[18*(*idx)+4] += alpha5*(*v);
2736       y[18*(*idx)+5] += alpha6*(*v);
2737       y[18*(*idx)+6] += alpha7*(*v);
2738       y[18*(*idx)+7] += alpha8*(*v);
2739       y[18*(*idx)+8]  += alpha9*(*v);
2740       y[18*(*idx)+9]  += alpha10*(*v);
2741       y[18*(*idx)+10] += alpha11*(*v);
2742       y[18*(*idx)+11] += alpha12*(*v);
2743       y[18*(*idx)+12] += alpha13*(*v);
2744       y[18*(*idx)+13] += alpha14*(*v);
2745       y[18*(*idx)+14] += alpha15*(*v);
2746       y[18*(*idx)+15] += alpha16*(*v);
2747       y[18*(*idx)+16] += alpha17*(*v);
2748       y[18*(*idx)+17] += alpha18*(*v);
2749       idx++; v++;
2750     }
2751   }
2752   ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr);
2753   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2754   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2755   PetscFunctionReturn(0);
2756 }
2757 
2758 #undef __FUNCT__
2759 #define __FUNCT__ "MatMult_SeqMAIJ_N"
2760 PetscErrorCode MatMult_SeqMAIJ_N(Mat A,Vec xx,Vec yy)
2761 {
2762   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2763   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2764   const PetscScalar *x,*v;
2765   PetscScalar       *y,*sums;
2766   PetscErrorCode    ierr;
2767   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2768   PetscInt          n,i,jrow,j,dof = b->dof,k;
2769 
2770   PetscFunctionBegin;
2771   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2772   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
2773   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2774   idx  = a->j;
2775   v    = a->a;
2776   ii   = a->i;
2777 
2778   for (i=0; i<m; i++) {
2779     jrow = ii[i];
2780     n    = ii[i+1] - jrow;
2781     sums = y + dof*i;
2782     for (j=0; j<n; j++) {
2783       for (k=0; k<dof; k++) {
2784         sums[k]  += v[jrow]*x[dof*idx[jrow]+k];
2785       }
2786       jrow++;
2787     }
2788   }
2789 
2790   ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr);
2791   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2792   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2793   PetscFunctionReturn(0);
2794 }
2795 
2796 #undef __FUNCT__
2797 #define __FUNCT__ "MatMultAdd_SeqMAIJ_N"
2798 PetscErrorCode MatMultAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
2799 {
2800   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2801   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2802   const PetscScalar *x,*v;
2803   PetscScalar       *y,*sums;
2804   PetscErrorCode    ierr;
2805   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2806   PetscInt          n,i,jrow,j,dof = b->dof,k;
2807 
2808   PetscFunctionBegin;
2809   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2810   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2811   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2812   idx  = a->j;
2813   v    = a->a;
2814   ii   = a->i;
2815 
2816   for (i=0; i<m; i++) {
2817     jrow = ii[i];
2818     n    = ii[i+1] - jrow;
2819     sums = y + dof*i;
2820     for (j=0; j<n; j++) {
2821       for (k=0; k<dof; k++) {
2822         sums[k]  += v[jrow]*x[dof*idx[jrow]+k];
2823       }
2824       jrow++;
2825     }
2826   }
2827 
2828   ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr);
2829   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2830   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2831   PetscFunctionReturn(0);
2832 }
2833 
2834 #undef __FUNCT__
2835 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_N"
2836 PetscErrorCode MatMultTranspose_SeqMAIJ_N(Mat A,Vec xx,Vec yy)
2837 {
2838   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2839   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2840   const PetscScalar *x,*v,*alpha;
2841   PetscScalar       *y;
2842   PetscErrorCode    ierr;
2843   const PetscInt    m = b->AIJ->rmap->n,*idx,dof = b->dof;
2844   PetscInt          n,i,k;
2845 
2846   PetscFunctionBegin;
2847   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2848   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
2849   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2850   for (i=0; i<m; i++) {
2851     idx    = a->j + a->i[i] ;
2852     v      = a->a + a->i[i] ;
2853     n      = a->i[i+1] - a->i[i];
2854     alpha  = x + dof*i;
2855     while (n-->0) {
2856       for (k=0; k<dof; k++) {
2857         y[dof*(*idx)+k] += alpha[k]*(*v);
2858       }
2859       idx++; v++;
2860     }
2861   }
2862   ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr);
2863   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2864   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2865   PetscFunctionReturn(0);
2866 }
2867 
2868 #undef __FUNCT__
2869 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_N"
2870 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
2871 {
2872   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2873   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2874   const PetscScalar *x,*v,*alpha;
2875   PetscScalar       *y;
2876   PetscErrorCode    ierr;
2877   const PetscInt    m = b->AIJ->rmap->n,*idx,dof = b->dof;
2878   PetscInt          n,i,k;
2879 
2880   PetscFunctionBegin;
2881   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2882   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2883   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2884   for (i=0; i<m; i++) {
2885     idx    = a->j + a->i[i] ;
2886     v      = a->a + a->i[i] ;
2887     n      = a->i[i+1] - a->i[i];
2888     alpha  = x + dof*i;
2889     while (n-->0) {
2890       for (k=0; k<dof; k++) {
2891         y[dof*(*idx)+k] += alpha[k]*(*v);
2892       }
2893       idx++; v++;
2894     }
2895   }
2896   ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr);
2897   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2898   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2899   PetscFunctionReturn(0);
2900 }
2901 
2902 /*===================================================================================*/
2903 #undef __FUNCT__
2904 #define __FUNCT__ "MatMult_MPIMAIJ_dof"
2905 PetscErrorCode MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2906 {
2907   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2908   PetscErrorCode ierr;
2909 
2910   PetscFunctionBegin;
2911   /* start the scatter */
2912   ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2913   ierr = (*b->AIJ->ops->mult)(b->AIJ,xx,yy);CHKERRQ(ierr);
2914   ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2915   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);CHKERRQ(ierr);
2916   PetscFunctionReturn(0);
2917 }
2918 
2919 #undef __FUNCT__
2920 #define __FUNCT__ "MatMultTranspose_MPIMAIJ_dof"
2921 PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2922 {
2923   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2924   PetscErrorCode ierr;
2925 
2926   PetscFunctionBegin;
2927   ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr);
2928   ierr = (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);CHKERRQ(ierr);
2929   ierr = VecScatterBegin(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2930   ierr = VecScatterEnd(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2931   PetscFunctionReturn(0);
2932 }
2933 
2934 #undef __FUNCT__
2935 #define __FUNCT__ "MatMultAdd_MPIMAIJ_dof"
2936 PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
2937 {
2938   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2939   PetscErrorCode ierr;
2940 
2941   PetscFunctionBegin;
2942   /* start the scatter */
2943   ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2944   ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr);
2945   ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2946   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr);
2947   PetscFunctionReturn(0);
2948 }
2949 
2950 #undef __FUNCT__
2951 #define __FUNCT__ "MatMultTransposeAdd_MPIMAIJ_dof"
2952 PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
2953 {
2954   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2955   PetscErrorCode ierr;
2956 
2957   PetscFunctionBegin;
2958   ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr);
2959   ierr = VecScatterBegin(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2960   ierr = (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr);
2961   ierr = VecScatterEnd(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2962   PetscFunctionReturn(0);
2963 }
2964 
2965 /* ----------------------------------------------------------------*/
2966 #undef __FUNCT__
2967 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqMAIJ"
2968 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C)
2969 {
2970   /* This routine requires testing -- but it's getting better. */
2971   PetscErrorCode     ierr;
2972   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
2973   Mat_SeqMAIJ        *pp=(Mat_SeqMAIJ*)PP->data;
2974   Mat                P=pp->AIJ;
2975   Mat_SeqAIJ         *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
2976   PetscInt           *pti,*ptj,*ptJ;
2977   PetscInt           *ci,*cj,*ptadenserow,*ptasparserow,*denserow,*sparserow,*ptaj;
2978   const PetscInt     an=A->cmap->N,am=A->rmap->N,pn=P->cmap->N,pm=P->rmap->N,ppdof=pp->dof;
2979   PetscInt           i,j,k,dof,pshift,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,cn;
2980   MatScalar          *ca;
2981   const PetscInt     *pi = p->i,*pj = p->j,*pjj,*ai=a->i,*aj=a->j,*ajj;
2982 
2983   PetscFunctionBegin;
2984   /* Start timer */
2985   ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr);
2986 
2987   /* Get ij structure of P^T */
2988   ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
2989 
2990   cn = pn*ppdof;
2991   /* Allocate ci array, arrays for fill computation and */
2992   /* free space for accumulating nonzero column info */
2993   ierr = PetscMalloc((cn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
2994   ci[0] = 0;
2995 
2996   /* Work arrays for rows of P^T*A */
2997   ierr = PetscMalloc4(an,PetscInt,&ptadenserow,an,PetscInt,&ptasparserow,cn,PetscInt,&denserow,cn,PetscInt,&sparserow);CHKERRQ(ierr);
2998   ierr = PetscMemzero(ptadenserow,an*sizeof(PetscInt));CHKERRQ(ierr);
2999   ierr = PetscMemzero(denserow,cn*sizeof(PetscInt));CHKERRQ(ierr);
3000 
3001   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
3002   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
3003   /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */
3004   ierr          = PetscFreeSpaceGet((ai[am]/pm)*pn,&free_space);
3005   current_space = free_space;
3006 
3007   /* Determine symbolic info for each row of C: */
3008   for (i=0;i<pn;i++) {
3009     ptnzi  = pti[i+1] - pti[i];
3010     ptJ    = ptj + pti[i];
3011     for (dof=0;dof<ppdof;dof++) {
3012       ptanzi = 0;
3013       /* Determine symbolic row of PtA: */
3014       for (j=0;j<ptnzi;j++) {
3015         /* Expand ptJ[j] by block size and shift by dof to get the right row of A */
3016         arow = ptJ[j]*ppdof + dof;
3017         /* Nonzeros of P^T*A will be in same locations as any element of A in that row */
3018         anzj = ai[arow+1] - ai[arow];
3019         ajj  = aj + ai[arow];
3020         for (k=0;k<anzj;k++) {
3021           if (!ptadenserow[ajj[k]]) {
3022             ptadenserow[ajj[k]]    = -1;
3023             ptasparserow[ptanzi++] = ajj[k];
3024           }
3025         }
3026       }
3027       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
3028       ptaj = ptasparserow;
3029       cnzi   = 0;
3030       for (j=0;j<ptanzi;j++) {
3031         /* Get offset within block of P */
3032         pshift = *ptaj%ppdof;
3033         /* Get block row of P */
3034         prow = (*ptaj++)/ppdof; /* integer division */
3035         /* P has same number of nonzeros per row as the compressed form */
3036         pnzj = pi[prow+1] - pi[prow];
3037         pjj  = pj + pi[prow];
3038         for (k=0;k<pnzj;k++) {
3039           /* Locations in C are shifted by the offset within the block */
3040           /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */
3041           if (!denserow[pjj[k]*ppdof+pshift]) {
3042             denserow[pjj[k]*ppdof+pshift] = -1;
3043             sparserow[cnzi++]             = pjj[k]*ppdof+pshift;
3044           }
3045         }
3046       }
3047 
3048       /* sort sparserow */
3049       ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr);
3050 
3051       /* If free space is not available, make more free space */
3052       /* Double the amount of total space in the list */
3053       if (current_space->local_remaining<cnzi) {
3054         ierr = PetscFreeSpaceGet(cnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
3055       }
3056 
3057       /* Copy data into free space, and zero out denserows */
3058       ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));CHKERRQ(ierr);
3059       current_space->array           += cnzi;
3060       current_space->local_used      += cnzi;
3061       current_space->local_remaining -= cnzi;
3062 
3063       for (j=0;j<ptanzi;j++) {
3064         ptadenserow[ptasparserow[j]] = 0;
3065       }
3066       for (j=0;j<cnzi;j++) {
3067         denserow[sparserow[j]] = 0;
3068       }
3069       /* Aside: Perhaps we should save the pta info for the numerical factorization. */
3070       /*        For now, we will recompute what is needed. */
3071       ci[i*ppdof+1+dof] = ci[i*ppdof+dof] + cnzi;
3072     }
3073   }
3074   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
3075   /* Allocate space for cj, initialize cj, and */
3076   /* destroy list of free space and other temporary array(s) */
3077   ierr = PetscMalloc((ci[cn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr);
3078   ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
3079   ierr = PetscFree4(ptadenserow,ptasparserow,denserow,sparserow);CHKERRQ(ierr);
3080 
3081   /* Allocate space for ca */
3082   ierr = PetscMalloc((ci[cn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
3083   ierr = PetscMemzero(ca,(ci[cn]+1)*sizeof(MatScalar));CHKERRQ(ierr);
3084 
3085   /* put together the new matrix */
3086   ierr = MatCreateSeqAIJWithArrays(((PetscObject)A)->comm,cn,cn,ci,cj,ca,C);CHKERRQ(ierr);
3087 
3088   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
3089   /* Since these are PETSc arrays, change flags to free them as necessary. */
3090   c          = (Mat_SeqAIJ *)((*C)->data);
3091   c->free_a  = PETSC_TRUE;
3092   c->free_ij = PETSC_TRUE;
3093   c->nonew   = 0;
3094 
3095   /* Clean up. */
3096   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
3097 
3098   ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr);
3099   PetscFunctionReturn(0);
3100 }
3101 
3102 #undef __FUNCT__
3103 #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqMAIJ"
3104 PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat C)
3105 {
3106   /* This routine requires testing -- first draft only */
3107   PetscErrorCode  ierr;
3108   Mat_SeqMAIJ     *pp=(Mat_SeqMAIJ*)PP->data;
3109   Mat             P=pp->AIJ;
3110   Mat_SeqAIJ      *a  = (Mat_SeqAIJ *) A->data;
3111   Mat_SeqAIJ      *p  = (Mat_SeqAIJ *) P->data;
3112   Mat_SeqAIJ      *c  = (Mat_SeqAIJ *) C->data;
3113   const PetscInt  *ai=a->i,*aj=a->j,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj;
3114   const PetscInt  *ci=c->i,*cj=c->j,*cjj;
3115   const PetscInt  am=A->rmap->N,cn=C->cmap->N,cm=C->rmap->N,ppdof=pp->dof;
3116   PetscInt        i,j,k,pshift,poffset,anzi,pnzi,apnzj,nextap,pnzj,prow,crow,*apj,*apjdense;
3117   const MatScalar *aa=a->a,*pa=p->a,*pA=p->a,*paj;
3118   MatScalar       *ca=c->a,*caj,*apa;
3119 
3120   PetscFunctionBegin;
3121   /* Allocate temporary array for storage of one row of A*P */
3122   ierr = PetscMalloc3(cn,MatScalar,&apa,cn,PetscInt,&apj,cn,PetscInt,&apjdense);CHKERRQ(ierr);
3123   ierr = PetscMemzero(apa,cn*sizeof(MatScalar));CHKERRQ(ierr);
3124   ierr = PetscMemzero(apj,cn*sizeof(PetscInt));CHKERRQ(ierr);
3125   ierr = PetscMemzero(apjdense,cn*sizeof(PetscInt));CHKERRQ(ierr);
3126 
3127   /* Clear old values in C */
3128   ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr);
3129 
3130   for (i=0;i<am;i++) {
3131     /* Form sparse row of A*P */
3132     anzi  = ai[i+1] - ai[i];
3133     apnzj = 0;
3134     for (j=0;j<anzi;j++) {
3135       /* Get offset within block of P */
3136       pshift = *aj%ppdof;
3137       /* Get block row of P */
3138       prow   = *aj++/ppdof; /* integer division */
3139       pnzj = pi[prow+1] - pi[prow];
3140       pjj  = pj + pi[prow];
3141       paj  = pa + pi[prow];
3142       for (k=0;k<pnzj;k++) {
3143         poffset = pjj[k]*ppdof+pshift;
3144         if (!apjdense[poffset]) {
3145           apjdense[poffset] = -1;
3146           apj[apnzj++]      = poffset;
3147         }
3148         apa[poffset] += (*aa)*paj[k];
3149       }
3150       ierr = PetscLogFlops(2.0*pnzj);CHKERRQ(ierr);
3151       aa++;
3152     }
3153 
3154     /* Sort the j index array for quick sparse axpy. */
3155     /* Note: a array does not need sorting as it is in dense storage locations. */
3156     ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr);
3157 
3158     /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
3159     prow    = i/ppdof; /* integer division */
3160     pshift  = i%ppdof;
3161     poffset = pi[prow];
3162     pnzi = pi[prow+1] - poffset;
3163     /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */
3164     pJ   = pj+poffset;
3165     pA   = pa+poffset;
3166     for (j=0;j<pnzi;j++) {
3167       crow   = (*pJ)*ppdof+pshift;
3168       cjj    = cj + ci[crow];
3169       caj    = ca + ci[crow];
3170       pJ++;
3171       /* Perform sparse axpy operation.  Note cjj includes apj. */
3172       for (k=0,nextap=0;nextap<apnzj;k++) {
3173         if (cjj[k]==apj[nextap]) {
3174           caj[k] += (*pA)*apa[apj[nextap++]];
3175         }
3176       }
3177       ierr = PetscLogFlops(2.0*apnzj);CHKERRQ(ierr);
3178       pA++;
3179     }
3180 
3181     /* Zero the current row info for A*P */
3182     for (j=0;j<apnzj;j++) {
3183       apa[apj[j]]      = 0.;
3184       apjdense[apj[j]] = 0;
3185     }
3186   }
3187 
3188   /* Assemble the final matrix and clean up */
3189   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3190   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3191   ierr = PetscFree3(apa,apj,apjdense);CHKERRQ(ierr);
3192   PetscFunctionReturn(0);
3193 }
3194 
3195 #undef __FUNCT__
3196 #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_MPIMAIJ"
3197 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C)
3198 {
3199   PetscErrorCode    ierr;
3200 
3201   PetscFunctionBegin;
3202   /* MatPtAPSymbolic_MPIAIJ_MPIMAIJ() is not implemented yet. Convert PP to mpiaij format */
3203   ierr = MatConvert(PP,MATMPIAIJ,MAT_REUSE_MATRIX,&PP);CHKERRQ(ierr);
3204   ierr =(*PP->ops->ptapsymbolic)(A,PP,fill,C);CHKERRQ(ierr);
3205   PetscFunctionReturn(0);
3206 }
3207 
3208 #undef __FUNCT__
3209 #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_MPIMAIJ"
3210 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ(Mat A,Mat PP,Mat C)
3211 {
3212   PetscFunctionBegin;
3213   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatPtAPNumeric is not implemented for MPIMAIJ matrix yet");
3214   PetscFunctionReturn(0);
3215 }
3216 
3217 EXTERN_C_BEGIN
3218 #undef __FUNCT__
3219 #define __FUNCT__ "MatConvert_SeqMAIJ_SeqAIJ"
3220 PetscErrorCode  MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
3221 {
3222   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
3223   Mat               a = b->AIJ,B;
3224   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)a->data;
3225   PetscErrorCode    ierr;
3226   PetscInt          m,n,i,ncols,*ilen,nmax = 0,*icols,j,k,ii,dof = b->dof;
3227   PetscInt          *cols;
3228   PetscScalar       *vals;
3229 
3230   PetscFunctionBegin;
3231   ierr = MatGetSize(a,&m,&n);CHKERRQ(ierr);
3232   ierr = PetscMalloc(dof*m*sizeof(PetscInt),&ilen);CHKERRQ(ierr);
3233   for (i=0; i<m; i++) {
3234     nmax = PetscMax(nmax,aij->ilen[i]);
3235     for (j=0; j<dof; j++) {
3236       ilen[dof*i+j] = aij->ilen[i];
3237     }
3238   }
3239   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,dof*m,dof*n,0,ilen,&B);CHKERRQ(ierr);
3240   ierr = PetscFree(ilen);CHKERRQ(ierr);
3241   ierr = PetscMalloc(nmax*sizeof(PetscInt),&icols);CHKERRQ(ierr);
3242   ii   = 0;
3243   for (i=0; i<m; i++) {
3244     ierr = MatGetRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr);
3245     for (j=0; j<dof; j++) {
3246       for (k=0; k<ncols; k++) {
3247         icols[k] = dof*cols[k]+j;
3248       }
3249       ierr = MatSetValues_SeqAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr);
3250       ii++;
3251     }
3252     ierr = MatRestoreRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr);
3253   }
3254   ierr = PetscFree(icols);CHKERRQ(ierr);
3255   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3256   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3257 
3258   if (reuse == MAT_REUSE_MATRIX) {
3259     ierr = MatHeaderReplace(A,B);CHKERRQ(ierr);
3260   } else {
3261     *newmat = B;
3262   }
3263   PetscFunctionReturn(0);
3264 }
3265 EXTERN_C_END
3266 
3267 #include <../src/mat/impls/aij/mpi/mpiaij.h>
3268 
3269 EXTERN_C_BEGIN
3270 #undef __FUNCT__
3271 #define __FUNCT__ "MatConvert_MPIMAIJ_MPIAIJ"
3272 PetscErrorCode  MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
3273 {
3274   Mat_MPIMAIJ       *maij = (Mat_MPIMAIJ*)A->data;
3275   Mat               MatAIJ  = ((Mat_SeqMAIJ*)maij->AIJ->data)->AIJ,B;
3276   Mat               MatOAIJ = ((Mat_SeqMAIJ*)maij->OAIJ->data)->AIJ;
3277   Mat_SeqAIJ        *AIJ = (Mat_SeqAIJ*) MatAIJ->data;
3278   Mat_SeqAIJ        *OAIJ =(Mat_SeqAIJ*) MatOAIJ->data;
3279   Mat_MPIAIJ        *mpiaij = (Mat_MPIAIJ*) maij->A->data;
3280   PetscInt          dof = maij->dof,i,j,*dnz = PETSC_NULL,*onz = PETSC_NULL,nmax = 0,onmax = 0;
3281   PetscInt          *oicols = PETSC_NULL,*icols = PETSC_NULL,ncols,*cols = PETSC_NULL,oncols,*ocols = PETSC_NULL;
3282   PetscInt          rstart,cstart,*garray,ii,k;
3283   PetscErrorCode    ierr;
3284   PetscScalar       *vals,*ovals;
3285 
3286   PetscFunctionBegin;
3287   ierr = PetscMalloc2(A->rmap->n,PetscInt,&dnz,A->rmap->n,PetscInt,&onz);CHKERRQ(ierr);
3288   for (i=0; i<A->rmap->n/dof; i++) {
3289     nmax  = PetscMax(nmax,AIJ->ilen[i]);
3290     onmax = PetscMax(onmax,OAIJ->ilen[i]);
3291     for (j=0; j<dof; j++) {
3292       dnz[dof*i+j] = AIJ->ilen[i];
3293       onz[dof*i+j] = OAIJ->ilen[i];
3294     }
3295   }
3296   ierr = MatCreateAIJ(((PetscObject)A)->comm,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,dnz,0,onz,&B);CHKERRQ(ierr);
3297   ierr = PetscFree2(dnz,onz);CHKERRQ(ierr);
3298 
3299   ierr   = PetscMalloc2(nmax,PetscInt,&icols,onmax,PetscInt,&oicols);CHKERRQ(ierr);
3300   rstart = dof*maij->A->rmap->rstart;
3301   cstart = dof*maij->A->cmap->rstart;
3302   garray = mpiaij->garray;
3303 
3304   ii = rstart;
3305   for (i=0; i<A->rmap->n/dof; i++) {
3306     ierr = MatGetRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr);
3307     ierr = MatGetRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr);
3308     for (j=0; j<dof; j++) {
3309       for (k=0; k<ncols; k++) {
3310         icols[k] = cstart + dof*cols[k]+j;
3311       }
3312       for (k=0; k<oncols; k++) {
3313         oicols[k] = dof*garray[ocols[k]]+j;
3314       }
3315       ierr = MatSetValues_MPIAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr);
3316       ierr = MatSetValues_MPIAIJ(B,1,&ii,oncols,oicols,ovals,INSERT_VALUES);CHKERRQ(ierr);
3317       ii++;
3318     }
3319     ierr = MatRestoreRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr);
3320     ierr = MatRestoreRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr);
3321   }
3322   ierr = PetscFree2(icols,oicols);CHKERRQ(ierr);
3323 
3324   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3325   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3326 
3327   if (reuse == MAT_REUSE_MATRIX) {
3328     PetscInt refct = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */
3329     ((PetscObject)A)->refct = 1;
3330     ierr = MatHeaderReplace(A,B);CHKERRQ(ierr);
3331     ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */
3332   } else {
3333     *newmat = B;
3334   }
3335   PetscFunctionReturn(0);
3336 }
3337 EXTERN_C_END
3338 
3339 #undef __FUNCT__
3340 #define __FUNCT__ "MatGetSubMatrix_MAIJ"
3341 PetscErrorCode  MatGetSubMatrix_MAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat)
3342 {
3343   PetscErrorCode ierr;
3344   Mat            A;
3345 
3346   PetscFunctionBegin;
3347   ierr = MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr);
3348   ierr = MatGetSubMatrix(A,isrow,iscol,cll,newmat);CHKERRQ(ierr);
3349   ierr = MatDestroy(&A);CHKERRQ(ierr);
3350   PetscFunctionReturn(0);
3351 }
3352 
3353 /* ---------------------------------------------------------------------------------- */
3354 #undef __FUNCT__
3355 #define __FUNCT__ "MatCreateMAIJ"
3356 /*@C
3357   MatCreateMAIJ - Creates a matrix type providing restriction and interpolation
3358   operations for multicomponent problems.  It interpolates each component the same
3359   way independently.  The matrix type is based on MATSEQAIJ for sequential matrices,
3360   and MATMPIAIJ for distributed matrices.
3361 
3362   Collective
3363 
3364   Input Parameters:
3365 + A - the AIJ matrix describing the action on blocks
3366 - dof - the block size (number of components per node)
3367 
3368   Output Parameter:
3369 . maij - the new MAIJ matrix
3370 
3371   Operations provided:
3372 + MatMult
3373 . MatMultTranspose
3374 . MatMultAdd
3375 . MatMultTransposeAdd
3376 - MatView
3377 
3378   Level: advanced
3379 
3380 .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MATMAIJ
3381 @*/
3382 PetscErrorCode  MatCreateMAIJ(Mat A,PetscInt dof,Mat *maij)
3383 {
3384   PetscErrorCode ierr;
3385   PetscMPIInt    size;
3386   PetscInt       n;
3387   Mat            B;
3388 
3389   PetscFunctionBegin;
3390   ierr   = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
3391 
3392   if (dof == 1) {
3393     *maij = A;
3394   } else {
3395     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
3396     ierr = MatSetSizes(B,dof*A->rmap->n,dof*A->cmap->n,dof*A->rmap->N,dof*A->cmap->N);CHKERRQ(ierr);
3397     ierr = PetscLayoutSetBlockSize(B->rmap,dof);CHKERRQ(ierr);
3398     ierr = PetscLayoutSetBlockSize(B->cmap,dof);CHKERRQ(ierr);
3399     ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3400     ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3401     B->assembled    = PETSC_TRUE;
3402 
3403     ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
3404     if (size == 1) {
3405       Mat_SeqMAIJ    *b;
3406 
3407       ierr = MatSetType(B,MATSEQMAIJ);CHKERRQ(ierr);
3408       B->ops->setup   = PETSC_NULL;
3409       B->ops->destroy = MatDestroy_SeqMAIJ;
3410       B->ops->view    = MatView_SeqMAIJ;
3411       b      = (Mat_SeqMAIJ*)B->data;
3412       b->dof = dof;
3413       b->AIJ = A;
3414       if (dof == 2) {
3415         B->ops->mult             = MatMult_SeqMAIJ_2;
3416         B->ops->multadd          = MatMultAdd_SeqMAIJ_2;
3417         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_2;
3418         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2;
3419       } else if (dof == 3) {
3420         B->ops->mult             = MatMult_SeqMAIJ_3;
3421         B->ops->multadd          = MatMultAdd_SeqMAIJ_3;
3422         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_3;
3423         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3;
3424       } else if (dof == 4) {
3425         B->ops->mult             = MatMult_SeqMAIJ_4;
3426         B->ops->multadd          = MatMultAdd_SeqMAIJ_4;
3427         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_4;
3428         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4;
3429       } else if (dof == 5) {
3430         B->ops->mult             = MatMult_SeqMAIJ_5;
3431         B->ops->multadd          = MatMultAdd_SeqMAIJ_5;
3432         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_5;
3433         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5;
3434       } else if (dof == 6) {
3435         B->ops->mult             = MatMult_SeqMAIJ_6;
3436         B->ops->multadd          = MatMultAdd_SeqMAIJ_6;
3437         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_6;
3438         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6;
3439       } else if (dof == 7) {
3440         B->ops->mult             = MatMult_SeqMAIJ_7;
3441         B->ops->multadd          = MatMultAdd_SeqMAIJ_7;
3442         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_7;
3443         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7;
3444       } else if (dof == 8) {
3445         B->ops->mult             = MatMult_SeqMAIJ_8;
3446         B->ops->multadd          = MatMultAdd_SeqMAIJ_8;
3447         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_8;
3448         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8;
3449       } else if (dof == 9) {
3450         B->ops->mult             = MatMult_SeqMAIJ_9;
3451         B->ops->multadd          = MatMultAdd_SeqMAIJ_9;
3452         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_9;
3453         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9;
3454       } else if (dof == 10) {
3455         B->ops->mult             = MatMult_SeqMAIJ_10;
3456         B->ops->multadd          = MatMultAdd_SeqMAIJ_10;
3457         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_10;
3458         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10;
3459       } else if (dof == 11) {
3460         B->ops->mult             = MatMult_SeqMAIJ_11;
3461         B->ops->multadd          = MatMultAdd_SeqMAIJ_11;
3462         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_11;
3463         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_11;
3464       } else if (dof == 16) {
3465         B->ops->mult             = MatMult_SeqMAIJ_16;
3466         B->ops->multadd          = MatMultAdd_SeqMAIJ_16;
3467         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_16;
3468         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16;
3469       } else if (dof == 18) {
3470         B->ops->mult             = MatMult_SeqMAIJ_18;
3471         B->ops->multadd          = MatMultAdd_SeqMAIJ_18;
3472         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_18;
3473         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18;
3474       } else {
3475         B->ops->mult             = MatMult_SeqMAIJ_N;
3476         B->ops->multadd          = MatMultAdd_SeqMAIJ_N;
3477         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_N;
3478         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_N;
3479       }
3480       B->ops->ptapsymbolic_seqaij = MatPtAPSymbolic_SeqAIJ_SeqMAIJ;
3481       B->ops->ptapnumeric_seqaij  = MatPtAPNumeric_SeqAIJ_SeqMAIJ;
3482       ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqmaij_seqaij_C","MatConvert_SeqMAIJ_SeqAIJ",MatConvert_SeqMAIJ_SeqAIJ);CHKERRQ(ierr);
3483     } else {
3484       Mat_MPIAIJ  *mpiaij = (Mat_MPIAIJ *)A->data;
3485       Mat_MPIMAIJ *b;
3486       IS          from,to;
3487       Vec         gvec;
3488 
3489       ierr = MatSetType(B,MATMPIMAIJ);CHKERRQ(ierr);
3490       B->ops->setup   = PETSC_NULL;
3491       B->ops->destroy = MatDestroy_MPIMAIJ;
3492       B->ops->view    = MatView_MPIMAIJ;
3493       b      = (Mat_MPIMAIJ*)B->data;
3494       b->dof = dof;
3495       b->A   = A;
3496       ierr = MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);CHKERRQ(ierr);
3497       ierr = MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);CHKERRQ(ierr);
3498 
3499       ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr);
3500       ierr = VecCreate(PETSC_COMM_SELF,&b->w);CHKERRQ(ierr);
3501       ierr = VecSetSizes(b->w,n*dof,n*dof);CHKERRQ(ierr);
3502       ierr = VecSetBlockSize(b->w,dof);CHKERRQ(ierr);
3503       ierr = VecSetType(b->w,VECSEQ);CHKERRQ(ierr);
3504 
3505       /* create two temporary Index sets for build scatter gather */
3506       ierr = ISCreateBlock(((PetscObject)A)->comm,dof,n,mpiaij->garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr);
3507       ierr = ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);CHKERRQ(ierr);
3508 
3509       /* create temporary global vector to generate scatter context */
3510       ierr = VecCreateMPIWithArray(((PetscObject)A)->comm,dof,dof*A->cmap->n,dof*A->cmap->N,PETSC_NULL,&gvec);CHKERRQ(ierr);
3511 
3512       /* generate the scatter context */
3513       ierr = VecScatterCreate(gvec,from,b->w,to,&b->ctx);CHKERRQ(ierr);
3514 
3515       ierr = ISDestroy(&from);CHKERRQ(ierr);
3516       ierr = ISDestroy(&to);CHKERRQ(ierr);
3517       ierr = VecDestroy(&gvec);CHKERRQ(ierr);
3518 
3519       B->ops->mult                = MatMult_MPIMAIJ_dof;
3520       B->ops->multtranspose       = MatMultTranspose_MPIMAIJ_dof;
3521       B->ops->multadd             = MatMultAdd_MPIMAIJ_dof;
3522       B->ops->multtransposeadd    = MatMultTransposeAdd_MPIMAIJ_dof;
3523       B->ops->ptapsymbolic_mpiaij = MatPtAPSymbolic_MPIAIJ_MPIMAIJ;
3524       B->ops->ptapnumeric_mpiaij  = MatPtAPNumeric_MPIAIJ_MPIMAIJ;
3525       ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpimaij_mpiaij_C","MatConvert_MPIMAIJ_MPIAIJ",MatConvert_MPIMAIJ_MPIAIJ);CHKERRQ(ierr);
3526     }
3527     B->ops->getsubmatrix        = MatGetSubMatrix_MAIJ;
3528     ierr = MatSetUp(B);CHKERRQ(ierr);
3529     *maij = B;
3530     ierr = MatView_Private(B);CHKERRQ(ierr);
3531   }
3532   PetscFunctionReturn(0);
3533 }
3534