xref: /petsc/src/mat/impls/maij/maij.c (revision ca4a67af8ca7cf2d1ac25cfe8f5aed45e6b9324f)
1 #define PETSCMAT_DLL
2 
3 /*
4     Defines the basic matrix operations for the MAIJ  matrix storage format.
5   This format is used for restriction and interpolation operations for
6   multicomponent problems. It interpolates each component the same way
7   independently.
8 
9      We provide:
10          MatMult()
11          MatMultTranspose()
12          MatMultTransposeAdd()
13          MatMultAdd()
14           and
15          MatCreateMAIJ(Mat,dof,Mat*)
16 
17      This single directory handles both the sequential and parallel codes
18 */
19 
20 #include "../src/mat/impls/maij/maij.h" /*I "petscmat.h" I*/
21 #include "../src/mat/utils/freespace.h"
22 #include "private/vecimpl.h"
23 
24 #undef __FUNCT__
25 #define __FUNCT__ "MatMAIJGetAIJ"
26 /*@C
27    MatMAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the MAIJ matrix
28 
29    Not Collective, but if the MAIJ matrix is parallel, the AIJ matrix is also parallel
30 
31    Input Parameter:
32 .  A - the MAIJ matrix
33 
34    Output Parameter:
35 .  B - the AIJ matrix
36 
37    Level: advanced
38 
39    Notes: The reference count on the AIJ matrix is not increased so you should not destroy it.
40 
41 .seealso: MatCreateMAIJ()
42 @*/
43 PetscErrorCode PETSCMAT_DLLEXPORT MatMAIJGetAIJ(Mat A,Mat *B)
44 {
45   PetscErrorCode ierr;
46   PetscTruth     ismpimaij,isseqmaij;
47 
48   PetscFunctionBegin;
49   ierr = PetscTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);CHKERRQ(ierr);
50   ierr = PetscTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);CHKERRQ(ierr);
51   if (ismpimaij) {
52     Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
53 
54     *B = b->A;
55   } else if (isseqmaij) {
56     Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
57 
58     *B = b->AIJ;
59   } else {
60     *B = A;
61   }
62   PetscFunctionReturn(0);
63 }
64 
65 #undef __FUNCT__
66 #define __FUNCT__ "MatMAIJRedimension"
67 /*@C
68    MatMAIJRedimension - Get an MAIJ matrix with the same action, but for a different block size
69 
70    Collective
71 
72    Input Parameter:
73 +  A - the MAIJ matrix
74 -  dof - the block size for the new matrix
75 
76    Output Parameter:
77 .  B - the new MAIJ matrix
78 
79    Level: advanced
80 
81 .seealso: MatCreateMAIJ()
82 @*/
83 PetscErrorCode PETSCMAT_DLLEXPORT MatMAIJRedimension(Mat A,PetscInt dof,Mat *B)
84 {
85   PetscErrorCode ierr;
86   Mat            Aij;
87 
88   PetscFunctionBegin;
89   ierr = MatMAIJGetAIJ(A,&Aij);CHKERRQ(ierr);
90   ierr = MatCreateMAIJ(Aij,dof,B);CHKERRQ(ierr);
91   PetscFunctionReturn(0);
92 }
93 
94 #undef __FUNCT__
95 #define __FUNCT__ "MatDestroy_SeqMAIJ"
96 PetscErrorCode MatDestroy_SeqMAIJ(Mat A)
97 {
98   PetscErrorCode ierr;
99   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
100 
101   PetscFunctionBegin;
102   if (b->AIJ) {
103     ierr = MatDestroy(b->AIJ);CHKERRQ(ierr);
104   }
105   ierr = PetscFree(b);CHKERRQ(ierr);
106   PetscFunctionReturn(0);
107 }
108 
109 #undef __FUNCT__
110 #define __FUNCT__ "MatView_SeqMAIJ"
111 PetscErrorCode MatView_SeqMAIJ(Mat A,PetscViewer viewer)
112 {
113   PetscErrorCode ierr;
114   Mat            B;
115 
116   PetscFunctionBegin;
117   ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);
118   ierr = MatView(B,viewer);CHKERRQ(ierr);
119   ierr = MatDestroy(B);CHKERRQ(ierr);
120   PetscFunctionReturn(0);
121 }
122 
123 #undef __FUNCT__
124 #define __FUNCT__ "MatView_MPIMAIJ"
125 PetscErrorCode MatView_MPIMAIJ(Mat A,PetscViewer viewer)
126 {
127   PetscErrorCode ierr;
128   Mat            B;
129 
130   PetscFunctionBegin;
131   ierr = MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);
132   ierr = MatView(B,viewer);CHKERRQ(ierr);
133   ierr = MatDestroy(B);CHKERRQ(ierr);
134   PetscFunctionReturn(0);
135 }
136 
137 #undef __FUNCT__
138 #define __FUNCT__ "MatDestroy_MPIMAIJ"
139 PetscErrorCode MatDestroy_MPIMAIJ(Mat A)
140 {
141   PetscErrorCode ierr;
142   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
143 
144   PetscFunctionBegin;
145   if (b->AIJ) {
146     ierr = MatDestroy(b->AIJ);CHKERRQ(ierr);
147   }
148   if (b->OAIJ) {
149     ierr = MatDestroy(b->OAIJ);CHKERRQ(ierr);
150   }
151   if (b->A) {
152     ierr = MatDestroy(b->A);CHKERRQ(ierr);
153   }
154   if (b->ctx) {
155     ierr = VecScatterDestroy(b->ctx);CHKERRQ(ierr);
156   }
157   if (b->w) {
158     ierr = VecDestroy(b->w);CHKERRQ(ierr);
159   }
160   ierr = PetscFree(b);CHKERRQ(ierr);
161   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
162   PetscFunctionReturn(0);
163 }
164 
165 /*MC
166   MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for
167   multicomponent problems, interpolating or restricting each component the same way independently.
168   The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices.
169 
170   Operations provided:
171 . MatMult
172 . MatMultTranspose
173 . MatMultAdd
174 . MatMultTransposeAdd
175 
176   Level: advanced
177 
178 .seealso: MatCreateSeqDense
179 M*/
180 
181 EXTERN_C_BEGIN
182 #undef __FUNCT__
183 #define __FUNCT__ "MatCreate_MAIJ"
184 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MAIJ(Mat A)
185 {
186   PetscErrorCode ierr;
187   Mat_MPIMAIJ    *b;
188   PetscMPIInt    size;
189 
190   PetscFunctionBegin;
191   ierr     = PetscNewLog(A,Mat_MPIMAIJ,&b);CHKERRQ(ierr);
192   A->data  = (void*)b;
193   ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
194   A->mapping          = 0;
195 
196   b->AIJ  = 0;
197   b->dof  = 0;
198   b->OAIJ = 0;
199   b->ctx  = 0;
200   b->w    = 0;
201   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
202   if (size == 1){
203     ierr = PetscObjectChangeTypeName((PetscObject)A,MATSEQMAIJ);CHKERRQ(ierr);
204   } else {
205     ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIMAIJ);CHKERRQ(ierr);
206   }
207   PetscFunctionReturn(0);
208 }
209 EXTERN_C_END
210 
211 /* --------------------------------------------------------------------------------------*/
212 #undef __FUNCT__
213 #define __FUNCT__ "MatMult_SeqMAIJ_2"
214 PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
215 {
216   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
217   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
218   PetscScalar    *x,*y,*v,sum1, sum2;
219   PetscErrorCode ierr;
220   PetscInt       m = b->AIJ->rmap->n,nonzerorow=0,*idx,*ii;
221   PetscInt       n,i,jrow,j;
222 
223   PetscFunctionBegin;
224   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
225   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
226   idx  = a->j;
227   v    = a->a;
228   ii   = a->i;
229 
230   for (i=0; i<m; i++) {
231     jrow = ii[i];
232     n    = ii[i+1] - jrow;
233     sum1  = 0.0;
234     sum2  = 0.0;
235     nonzerorow += (n>0);
236     for (j=0; j<n; j++) {
237       sum1 += v[jrow]*x[2*idx[jrow]];
238       sum2 += v[jrow]*x[2*idx[jrow]+1];
239       jrow++;
240      }
241     y[2*i]   = sum1;
242     y[2*i+1] = sum2;
243   }
244 
245   ierr = PetscLogFlops(4.0*a->nz - 2.0*nonzerorow);CHKERRQ(ierr);
246   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
247   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
248   PetscFunctionReturn(0);
249 }
250 
251 #undef __FUNCT__
252 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_2"
253 PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
254 {
255   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
256   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
257   PetscScalar    *x,*y,*v,alpha1,alpha2,zero = 0.0;
258   PetscErrorCode ierr;
259   PetscInt       m = b->AIJ->rmap->n,n,i,*idx;
260 
261   PetscFunctionBegin;
262   ierr = VecSet(yy,zero);CHKERRQ(ierr);
263   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
264   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
265 
266   for (i=0; i<m; i++) {
267     idx    = a->j + a->i[i] ;
268     v      = a->a + a->i[i] ;
269     n      = a->i[i+1] - a->i[i];
270     alpha1 = x[2*i];
271     alpha2 = x[2*i+1];
272     while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
273   }
274   ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr);
275   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
276   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
277   PetscFunctionReturn(0);
278 }
279 
280 #undef __FUNCT__
281 #define __FUNCT__ "MatMultAdd_SeqMAIJ_2"
282 PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
283 {
284   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
285   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
286   PetscScalar    *x,*y,*v,sum1, sum2;
287   PetscErrorCode ierr;
288   PetscInt       m = b->AIJ->rmap->n,*idx,*ii;
289   PetscInt       n,i,jrow,j;
290 
291   PetscFunctionBegin;
292   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
293   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
294   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
295   idx  = a->j;
296   v    = a->a;
297   ii   = a->i;
298 
299   for (i=0; i<m; i++) {
300     jrow = ii[i];
301     n    = ii[i+1] - jrow;
302     sum1  = 0.0;
303     sum2  = 0.0;
304     for (j=0; j<n; j++) {
305       sum1 += v[jrow]*x[2*idx[jrow]];
306       sum2 += v[jrow]*x[2*idx[jrow]+1];
307       jrow++;
308      }
309     y[2*i]   += sum1;
310     y[2*i+1] += sum2;
311   }
312 
313   ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr);
314   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
315   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
316   PetscFunctionReturn(0);
317 }
318 #undef __FUNCT__
319 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_2"
320 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
321 {
322   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
323   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
324   PetscScalar    *x,*y,*v,alpha1,alpha2;
325   PetscErrorCode ierr;
326   PetscInt       m = b->AIJ->rmap->n,n,i,*idx;
327 
328   PetscFunctionBegin;
329   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
330   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
331   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
332 
333   for (i=0; i<m; i++) {
334     idx   = a->j + a->i[i] ;
335     v     = a->a + a->i[i] ;
336     n     = a->i[i+1] - a->i[i];
337     alpha1 = x[2*i];
338     alpha2 = x[2*i+1];
339     while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
340   }
341   ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr);
342   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
343   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
344   PetscFunctionReturn(0);
345 }
346 /* --------------------------------------------------------------------------------------*/
347 #undef __FUNCT__
348 #define __FUNCT__ "MatMult_SeqMAIJ_3"
349 PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
350 {
351   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
352   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
353   PetscScalar    *x,*y,*v,sum1, sum2, sum3;
354   PetscErrorCode ierr;
355   PetscInt       m = b->AIJ->rmap->n,nonzerorow=0,*idx,*ii;
356   PetscInt       n,i,jrow,j;
357 
358   PetscFunctionBegin;
359   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
360   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
361   idx  = a->j;
362   v    = a->a;
363   ii   = a->i;
364 
365   for (i=0; i<m; i++) {
366     jrow = ii[i];
367     n    = ii[i+1] - jrow;
368     sum1  = 0.0;
369     sum2  = 0.0;
370     sum3  = 0.0;
371     nonzerorow += (n>0);
372     for (j=0; j<n; j++) {
373       sum1 += v[jrow]*x[3*idx[jrow]];
374       sum2 += v[jrow]*x[3*idx[jrow]+1];
375       sum3 += v[jrow]*x[3*idx[jrow]+2];
376       jrow++;
377      }
378     y[3*i]   = sum1;
379     y[3*i+1] = sum2;
380     y[3*i+2] = sum3;
381   }
382 
383   ierr = PetscLogFlops(6.0*a->nz - 3.0*nonzerorow);CHKERRQ(ierr);
384   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
385   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
386   PetscFunctionReturn(0);
387 }
388 
389 #undef __FUNCT__
390 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_3"
391 PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
392 {
393   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
394   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
395   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,zero = 0.0;
396   PetscErrorCode ierr;
397   PetscInt       m = b->AIJ->rmap->n,n,i,*idx;
398 
399   PetscFunctionBegin;
400   ierr = VecSet(yy,zero);CHKERRQ(ierr);
401   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
402   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
403 
404   for (i=0; i<m; i++) {
405     idx    = a->j + a->i[i];
406     v      = a->a + a->i[i];
407     n      = a->i[i+1] - a->i[i];
408     alpha1 = x[3*i];
409     alpha2 = x[3*i+1];
410     alpha3 = x[3*i+2];
411     while (n-->0) {
412       y[3*(*idx)]   += alpha1*(*v);
413       y[3*(*idx)+1] += alpha2*(*v);
414       y[3*(*idx)+2] += alpha3*(*v);
415       idx++; v++;
416     }
417   }
418   ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr);
419   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
420   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
421   PetscFunctionReturn(0);
422 }
423 
424 #undef __FUNCT__
425 #define __FUNCT__ "MatMultAdd_SeqMAIJ_3"
426 PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
427 {
428   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
429   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
430   PetscScalar    *x,*y,*v,sum1, sum2, sum3;
431   PetscErrorCode ierr;
432   PetscInt       m = b->AIJ->rmap->n,*idx,*ii;
433   PetscInt       n,i,jrow,j;
434 
435   PetscFunctionBegin;
436   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
437   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
438   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
439   idx  = a->j;
440   v    = a->a;
441   ii   = a->i;
442 
443   for (i=0; i<m; i++) {
444     jrow = ii[i];
445     n    = ii[i+1] - jrow;
446     sum1  = 0.0;
447     sum2  = 0.0;
448     sum3  = 0.0;
449     for (j=0; j<n; j++) {
450       sum1 += v[jrow]*x[3*idx[jrow]];
451       sum2 += v[jrow]*x[3*idx[jrow]+1];
452       sum3 += v[jrow]*x[3*idx[jrow]+2];
453       jrow++;
454      }
455     y[3*i]   += sum1;
456     y[3*i+1] += sum2;
457     y[3*i+2] += sum3;
458   }
459 
460   ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr);
461   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
462   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
463   PetscFunctionReturn(0);
464 }
465 #undef __FUNCT__
466 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_3"
467 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
468 {
469   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
470   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
471   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3;
472   PetscErrorCode ierr;
473   PetscInt       m = b->AIJ->rmap->n,n,i,*idx;
474 
475   PetscFunctionBegin;
476   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
477   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
478   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
479   for (i=0; i<m; i++) {
480     idx    = a->j + a->i[i] ;
481     v      = a->a + a->i[i] ;
482     n      = a->i[i+1] - a->i[i];
483     alpha1 = x[3*i];
484     alpha2 = x[3*i+1];
485     alpha3 = x[3*i+2];
486     while (n-->0) {
487       y[3*(*idx)]   += alpha1*(*v);
488       y[3*(*idx)+1] += alpha2*(*v);
489       y[3*(*idx)+2] += alpha3*(*v);
490       idx++; v++;
491     }
492   }
493   ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr);
494   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
495   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
496   PetscFunctionReturn(0);
497 }
498 
499 /* ------------------------------------------------------------------------------*/
500 #undef __FUNCT__
501 #define __FUNCT__ "MatMult_SeqMAIJ_4"
502 PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
503 {
504   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
505   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
506   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4;
507   PetscErrorCode ierr;
508   PetscInt       m = b->AIJ->rmap->n,nonzerorow=0,*idx,*ii;
509   PetscInt       n,i,jrow,j;
510 
511   PetscFunctionBegin;
512   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
513   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
514   idx  = a->j;
515   v    = a->a;
516   ii   = a->i;
517 
518   for (i=0; i<m; i++) {
519     jrow = ii[i];
520     n    = ii[i+1] - jrow;
521     sum1  = 0.0;
522     sum2  = 0.0;
523     sum3  = 0.0;
524     sum4  = 0.0;
525     nonzerorow += (n>0);
526     for (j=0; j<n; j++) {
527       sum1 += v[jrow]*x[4*idx[jrow]];
528       sum2 += v[jrow]*x[4*idx[jrow]+1];
529       sum3 += v[jrow]*x[4*idx[jrow]+2];
530       sum4 += v[jrow]*x[4*idx[jrow]+3];
531       jrow++;
532      }
533     y[4*i]   = sum1;
534     y[4*i+1] = sum2;
535     y[4*i+2] = sum3;
536     y[4*i+3] = sum4;
537   }
538 
539   ierr = PetscLogFlops(8.0*a->nz - 4.0*nonzerorow);CHKERRQ(ierr);
540   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
541   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
542   PetscFunctionReturn(0);
543 }
544 
545 #undef __FUNCT__
546 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_4"
547 PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
548 {
549   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
550   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
551   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,zero = 0.0;
552   PetscErrorCode ierr;
553   PetscInt       m = b->AIJ->rmap->n,n,i,*idx;
554 
555   PetscFunctionBegin;
556   ierr = VecSet(yy,zero);CHKERRQ(ierr);
557   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
558   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
559   for (i=0; i<m; i++) {
560     idx    = a->j + a->i[i] ;
561     v      = a->a + a->i[i] ;
562     n      = a->i[i+1] - a->i[i];
563     alpha1 = x[4*i];
564     alpha2 = x[4*i+1];
565     alpha3 = x[4*i+2];
566     alpha4 = x[4*i+3];
567     while (n-->0) {
568       y[4*(*idx)]   += alpha1*(*v);
569       y[4*(*idx)+1] += alpha2*(*v);
570       y[4*(*idx)+2] += alpha3*(*v);
571       y[4*(*idx)+3] += alpha4*(*v);
572       idx++; v++;
573     }
574   }
575   ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr);
576   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
577   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
578   PetscFunctionReturn(0);
579 }
580 
581 #undef __FUNCT__
582 #define __FUNCT__ "MatMultAdd_SeqMAIJ_4"
583 PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
584 {
585   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
586   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
587   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4;
588   PetscErrorCode ierr;
589   PetscInt       m = b->AIJ->rmap->n,*idx,*ii;
590   PetscInt       n,i,jrow,j;
591 
592   PetscFunctionBegin;
593   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
594   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
595   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
596   idx  = a->j;
597   v    = a->a;
598   ii   = a->i;
599 
600   for (i=0; i<m; i++) {
601     jrow = ii[i];
602     n    = ii[i+1] - jrow;
603     sum1  = 0.0;
604     sum2  = 0.0;
605     sum3  = 0.0;
606     sum4  = 0.0;
607     for (j=0; j<n; j++) {
608       sum1 += v[jrow]*x[4*idx[jrow]];
609       sum2 += v[jrow]*x[4*idx[jrow]+1];
610       sum3 += v[jrow]*x[4*idx[jrow]+2];
611       sum4 += v[jrow]*x[4*idx[jrow]+3];
612       jrow++;
613      }
614     y[4*i]   += sum1;
615     y[4*i+1] += sum2;
616     y[4*i+2] += sum3;
617     y[4*i+3] += sum4;
618   }
619 
620   ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr);
621   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
622   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
623   PetscFunctionReturn(0);
624 }
625 #undef __FUNCT__
626 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_4"
627 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
628 {
629   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
630   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
631   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4;
632   PetscErrorCode ierr;
633   PetscInt       m = b->AIJ->rmap->n,n,i,*idx;
634 
635   PetscFunctionBegin;
636   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
637   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
638   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
639 
640   for (i=0; i<m; i++) {
641     idx    = a->j + a->i[i] ;
642     v      = a->a + a->i[i] ;
643     n      = a->i[i+1] - a->i[i];
644     alpha1 = x[4*i];
645     alpha2 = x[4*i+1];
646     alpha3 = x[4*i+2];
647     alpha4 = x[4*i+3];
648     while (n-->0) {
649       y[4*(*idx)]   += alpha1*(*v);
650       y[4*(*idx)+1] += alpha2*(*v);
651       y[4*(*idx)+2] += alpha3*(*v);
652       y[4*(*idx)+3] += alpha4*(*v);
653       idx++; v++;
654     }
655   }
656   ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr);
657   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
658   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
659   PetscFunctionReturn(0);
660 }
661 /* ------------------------------------------------------------------------------*/
662 
663 #undef __FUNCT__
664 #define __FUNCT__ "MatMult_SeqMAIJ_5"
665 PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
666 {
667   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
668   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
669   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5;
670   PetscErrorCode ierr;
671   PetscInt       m = b->AIJ->rmap->n,nonzerorow=0,*idx,*ii;
672   PetscInt       n,i,jrow,j;
673 
674   PetscFunctionBegin;
675   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
676   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
677   idx  = a->j;
678   v    = a->a;
679   ii   = a->i;
680 
681   for (i=0; i<m; i++) {
682     jrow = ii[i];
683     n    = ii[i+1] - jrow;
684     sum1  = 0.0;
685     sum2  = 0.0;
686     sum3  = 0.0;
687     sum4  = 0.0;
688     sum5  = 0.0;
689     nonzerorow += (n>0);
690     for (j=0; j<n; j++) {
691       sum1 += v[jrow]*x[5*idx[jrow]];
692       sum2 += v[jrow]*x[5*idx[jrow]+1];
693       sum3 += v[jrow]*x[5*idx[jrow]+2];
694       sum4 += v[jrow]*x[5*idx[jrow]+3];
695       sum5 += v[jrow]*x[5*idx[jrow]+4];
696       jrow++;
697      }
698     y[5*i]   = sum1;
699     y[5*i+1] = sum2;
700     y[5*i+2] = sum3;
701     y[5*i+3] = sum4;
702     y[5*i+4] = sum5;
703   }
704 
705   ierr = PetscLogFlops(10.0*a->nz - 5.0*nonzerorow);CHKERRQ(ierr);
706   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
707   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
708   PetscFunctionReturn(0);
709 }
710 
711 #undef __FUNCT__
712 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_5"
713 PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
714 {
715   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
716   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
717   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,zero = 0.0;
718   PetscErrorCode ierr;
719   PetscInt       m = b->AIJ->rmap->n,n,i,*idx;
720 
721   PetscFunctionBegin;
722   ierr = VecSet(yy,zero);CHKERRQ(ierr);
723   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
724   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
725 
726   for (i=0; i<m; i++) {
727     idx    = a->j + a->i[i] ;
728     v      = a->a + a->i[i] ;
729     n      = a->i[i+1] - a->i[i];
730     alpha1 = x[5*i];
731     alpha2 = x[5*i+1];
732     alpha3 = x[5*i+2];
733     alpha4 = x[5*i+3];
734     alpha5 = x[5*i+4];
735     while (n-->0) {
736       y[5*(*idx)]   += alpha1*(*v);
737       y[5*(*idx)+1] += alpha2*(*v);
738       y[5*(*idx)+2] += alpha3*(*v);
739       y[5*(*idx)+3] += alpha4*(*v);
740       y[5*(*idx)+4] += alpha5*(*v);
741       idx++; v++;
742     }
743   }
744   ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr);
745   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
746   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
747   PetscFunctionReturn(0);
748 }
749 
750 #undef __FUNCT__
751 #define __FUNCT__ "MatMultAdd_SeqMAIJ_5"
752 PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
753 {
754   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
755   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
756   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5;
757   PetscErrorCode ierr;
758   PetscInt       m = b->AIJ->rmap->n,*idx,*ii;
759   PetscInt       n,i,jrow,j;
760 
761   PetscFunctionBegin;
762   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
763   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
764   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
765   idx  = a->j;
766   v    = a->a;
767   ii   = a->i;
768 
769   for (i=0; i<m; i++) {
770     jrow = ii[i];
771     n    = ii[i+1] - jrow;
772     sum1  = 0.0;
773     sum2  = 0.0;
774     sum3  = 0.0;
775     sum4  = 0.0;
776     sum5  = 0.0;
777     for (j=0; j<n; j++) {
778       sum1 += v[jrow]*x[5*idx[jrow]];
779       sum2 += v[jrow]*x[5*idx[jrow]+1];
780       sum3 += v[jrow]*x[5*idx[jrow]+2];
781       sum4 += v[jrow]*x[5*idx[jrow]+3];
782       sum5 += v[jrow]*x[5*idx[jrow]+4];
783       jrow++;
784      }
785     y[5*i]   += sum1;
786     y[5*i+1] += sum2;
787     y[5*i+2] += sum3;
788     y[5*i+3] += sum4;
789     y[5*i+4] += sum5;
790   }
791 
792   ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr);
793   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
794   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
795   PetscFunctionReturn(0);
796 }
797 
798 #undef __FUNCT__
799 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_5"
800 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
801 {
802   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
803   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
804   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5;
805   PetscErrorCode ierr;
806   PetscInt       m = b->AIJ->rmap->n,n,i,*idx;
807 
808   PetscFunctionBegin;
809   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
810   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
811   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
812 
813   for (i=0; i<m; i++) {
814     idx    = a->j + a->i[i] ;
815     v      = a->a + a->i[i] ;
816     n      = a->i[i+1] - a->i[i];
817     alpha1 = x[5*i];
818     alpha2 = x[5*i+1];
819     alpha3 = x[5*i+2];
820     alpha4 = x[5*i+3];
821     alpha5 = x[5*i+4];
822     while (n-->0) {
823       y[5*(*idx)]   += alpha1*(*v);
824       y[5*(*idx)+1] += alpha2*(*v);
825       y[5*(*idx)+2] += alpha3*(*v);
826       y[5*(*idx)+3] += alpha4*(*v);
827       y[5*(*idx)+4] += alpha5*(*v);
828       idx++; v++;
829     }
830   }
831   ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr);
832   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
833   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
834   PetscFunctionReturn(0);
835 }
836 
837 /* ------------------------------------------------------------------------------*/
838 #undef __FUNCT__
839 #define __FUNCT__ "MatMult_SeqMAIJ_6"
840 PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
841 {
842   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
843   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
844   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6;
845   PetscErrorCode ierr;
846   PetscInt       m = b->AIJ->rmap->n,nonzerorow=0,*idx,*ii;
847   PetscInt       n,i,jrow,j;
848 
849   PetscFunctionBegin;
850   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
851   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
852   idx  = a->j;
853   v    = a->a;
854   ii   = a->i;
855 
856   for (i=0; i<m; i++) {
857     jrow = ii[i];
858     n    = ii[i+1] - jrow;
859     sum1  = 0.0;
860     sum2  = 0.0;
861     sum3  = 0.0;
862     sum4  = 0.0;
863     sum5  = 0.0;
864     sum6  = 0.0;
865     nonzerorow += (n>0);
866     for (j=0; j<n; j++) {
867       sum1 += v[jrow]*x[6*idx[jrow]];
868       sum2 += v[jrow]*x[6*idx[jrow]+1];
869       sum3 += v[jrow]*x[6*idx[jrow]+2];
870       sum4 += v[jrow]*x[6*idx[jrow]+3];
871       sum5 += v[jrow]*x[6*idx[jrow]+4];
872       sum6 += v[jrow]*x[6*idx[jrow]+5];
873       jrow++;
874      }
875     y[6*i]   = sum1;
876     y[6*i+1] = sum2;
877     y[6*i+2] = sum3;
878     y[6*i+3] = sum4;
879     y[6*i+4] = sum5;
880     y[6*i+5] = sum6;
881   }
882 
883   ierr = PetscLogFlops(12.0*a->nz - 6.0*nonzerorow);CHKERRQ(ierr);
884   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
885   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
886   PetscFunctionReturn(0);
887 }
888 
889 #undef __FUNCT__
890 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_6"
891 PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
892 {
893   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
894   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
895   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,zero = 0.0;
896   PetscErrorCode ierr;
897   PetscInt       m = b->AIJ->rmap->n,n,i,*idx;
898 
899   PetscFunctionBegin;
900   ierr = VecSet(yy,zero);CHKERRQ(ierr);
901   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
902   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
903 
904   for (i=0; i<m; i++) {
905     idx    = a->j + a->i[i] ;
906     v      = a->a + a->i[i] ;
907     n      = a->i[i+1] - a->i[i];
908     alpha1 = x[6*i];
909     alpha2 = x[6*i+1];
910     alpha3 = x[6*i+2];
911     alpha4 = x[6*i+3];
912     alpha5 = x[6*i+4];
913     alpha6 = x[6*i+5];
914     while (n-->0) {
915       y[6*(*idx)]   += alpha1*(*v);
916       y[6*(*idx)+1] += alpha2*(*v);
917       y[6*(*idx)+2] += alpha3*(*v);
918       y[6*(*idx)+3] += alpha4*(*v);
919       y[6*(*idx)+4] += alpha5*(*v);
920       y[6*(*idx)+5] += alpha6*(*v);
921       idx++; v++;
922     }
923   }
924   ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr);
925   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
926   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
927   PetscFunctionReturn(0);
928 }
929 
930 #undef __FUNCT__
931 #define __FUNCT__ "MatMultAdd_SeqMAIJ_6"
932 PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
933 {
934   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
935   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
936   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6;
937   PetscErrorCode ierr;
938   PetscInt       m = b->AIJ->rmap->n,*idx,*ii;
939   PetscInt       n,i,jrow,j;
940 
941   PetscFunctionBegin;
942   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
943   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
944   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
945   idx  = a->j;
946   v    = a->a;
947   ii   = a->i;
948 
949   for (i=0; i<m; i++) {
950     jrow = ii[i];
951     n    = ii[i+1] - jrow;
952     sum1  = 0.0;
953     sum2  = 0.0;
954     sum3  = 0.0;
955     sum4  = 0.0;
956     sum5  = 0.0;
957     sum6  = 0.0;
958     for (j=0; j<n; j++) {
959       sum1 += v[jrow]*x[6*idx[jrow]];
960       sum2 += v[jrow]*x[6*idx[jrow]+1];
961       sum3 += v[jrow]*x[6*idx[jrow]+2];
962       sum4 += v[jrow]*x[6*idx[jrow]+3];
963       sum5 += v[jrow]*x[6*idx[jrow]+4];
964       sum6 += v[jrow]*x[6*idx[jrow]+5];
965       jrow++;
966      }
967     y[6*i]   += sum1;
968     y[6*i+1] += sum2;
969     y[6*i+2] += sum3;
970     y[6*i+3] += sum4;
971     y[6*i+4] += sum5;
972     y[6*i+5] += sum6;
973   }
974 
975   ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr);
976   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
977   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
978   PetscFunctionReturn(0);
979 }
980 
981 #undef __FUNCT__
982 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_6"
983 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
984 {
985   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
986   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
987   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
988   PetscErrorCode ierr;
989   PetscInt       m = b->AIJ->rmap->n,n,i,*idx;
990 
991   PetscFunctionBegin;
992   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
993   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
994   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
995 
996   for (i=0; i<m; i++) {
997     idx    = a->j + a->i[i] ;
998     v      = a->a + a->i[i] ;
999     n      = a->i[i+1] - a->i[i];
1000     alpha1 = x[6*i];
1001     alpha2 = x[6*i+1];
1002     alpha3 = x[6*i+2];
1003     alpha4 = x[6*i+3];
1004     alpha5 = x[6*i+4];
1005     alpha6 = x[6*i+5];
1006     while (n-->0) {
1007       y[6*(*idx)]   += alpha1*(*v);
1008       y[6*(*idx)+1] += alpha2*(*v);
1009       y[6*(*idx)+2] += alpha3*(*v);
1010       y[6*(*idx)+3] += alpha4*(*v);
1011       y[6*(*idx)+4] += alpha5*(*v);
1012       y[6*(*idx)+5] += alpha6*(*v);
1013       idx++; v++;
1014     }
1015   }
1016   ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr);
1017   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1018   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1019   PetscFunctionReturn(0);
1020 }
1021 
1022 /* ------------------------------------------------------------------------------*/
1023 #undef __FUNCT__
1024 #define __FUNCT__ "MatMult_SeqMAIJ_7"
1025 PetscErrorCode MatMult_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
1026 {
1027   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1028   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1029   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1030   PetscErrorCode ierr;
1031   PetscInt       m = b->AIJ->rmap->n,nonzerorow=0,*idx,*ii;
1032   PetscInt       n,i,jrow,j;
1033 
1034   PetscFunctionBegin;
1035   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1036   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1037   idx  = a->j;
1038   v    = a->a;
1039   ii   = a->i;
1040 
1041   for (i=0; i<m; i++) {
1042     jrow = ii[i];
1043     n    = ii[i+1] - jrow;
1044     sum1  = 0.0;
1045     sum2  = 0.0;
1046     sum3  = 0.0;
1047     sum4  = 0.0;
1048     sum5  = 0.0;
1049     sum6  = 0.0;
1050     sum7  = 0.0;
1051     nonzerorow += (n>0);
1052     for (j=0; j<n; j++) {
1053       sum1 += v[jrow]*x[7*idx[jrow]];
1054       sum2 += v[jrow]*x[7*idx[jrow]+1];
1055       sum3 += v[jrow]*x[7*idx[jrow]+2];
1056       sum4 += v[jrow]*x[7*idx[jrow]+3];
1057       sum5 += v[jrow]*x[7*idx[jrow]+4];
1058       sum6 += v[jrow]*x[7*idx[jrow]+5];
1059       sum7 += v[jrow]*x[7*idx[jrow]+6];
1060       jrow++;
1061      }
1062     y[7*i]   = sum1;
1063     y[7*i+1] = sum2;
1064     y[7*i+2] = sum3;
1065     y[7*i+3] = sum4;
1066     y[7*i+4] = sum5;
1067     y[7*i+5] = sum6;
1068     y[7*i+6] = sum7;
1069   }
1070 
1071   ierr = PetscLogFlops(14.0*a->nz - 7.0*nonzerorow);CHKERRQ(ierr);
1072   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1073   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1074   PetscFunctionReturn(0);
1075 }
1076 
1077 #undef __FUNCT__
1078 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_7"
1079 PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
1080 {
1081   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1082   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1083   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,zero = 0.0;
1084   PetscErrorCode ierr;
1085   PetscInt       m = b->AIJ->rmap->n,n,i,*idx;
1086 
1087   PetscFunctionBegin;
1088   ierr = VecSet(yy,zero);CHKERRQ(ierr);
1089   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1090   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1091 
1092   for (i=0; i<m; i++) {
1093     idx    = a->j + a->i[i] ;
1094     v      = a->a + a->i[i] ;
1095     n      = a->i[i+1] - a->i[i];
1096     alpha1 = x[7*i];
1097     alpha2 = x[7*i+1];
1098     alpha3 = x[7*i+2];
1099     alpha4 = x[7*i+3];
1100     alpha5 = x[7*i+4];
1101     alpha6 = x[7*i+5];
1102     alpha7 = x[7*i+6];
1103     while (n-->0) {
1104       y[7*(*idx)]   += alpha1*(*v);
1105       y[7*(*idx)+1] += alpha2*(*v);
1106       y[7*(*idx)+2] += alpha3*(*v);
1107       y[7*(*idx)+3] += alpha4*(*v);
1108       y[7*(*idx)+4] += alpha5*(*v);
1109       y[7*(*idx)+5] += alpha6*(*v);
1110       y[7*(*idx)+6] += alpha7*(*v);
1111       idx++; v++;
1112     }
1113   }
1114   ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr);
1115   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1116   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1117   PetscFunctionReturn(0);
1118 }
1119 
1120 #undef __FUNCT__
1121 #define __FUNCT__ "MatMultAdd_SeqMAIJ_7"
1122 PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1123 {
1124   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1125   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1126   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1127   PetscErrorCode ierr;
1128   PetscInt       m = b->AIJ->rmap->n,*idx,*ii;
1129   PetscInt       n,i,jrow,j;
1130 
1131   PetscFunctionBegin;
1132   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1133   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1134   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1135   idx  = a->j;
1136   v    = a->a;
1137   ii   = a->i;
1138 
1139   for (i=0; i<m; i++) {
1140     jrow = ii[i];
1141     n    = ii[i+1] - jrow;
1142     sum1  = 0.0;
1143     sum2  = 0.0;
1144     sum3  = 0.0;
1145     sum4  = 0.0;
1146     sum5  = 0.0;
1147     sum6  = 0.0;
1148     sum7  = 0.0;
1149     for (j=0; j<n; j++) {
1150       sum1 += v[jrow]*x[7*idx[jrow]];
1151       sum2 += v[jrow]*x[7*idx[jrow]+1];
1152       sum3 += v[jrow]*x[7*idx[jrow]+2];
1153       sum4 += v[jrow]*x[7*idx[jrow]+3];
1154       sum5 += v[jrow]*x[7*idx[jrow]+4];
1155       sum6 += v[jrow]*x[7*idx[jrow]+5];
1156       sum7 += v[jrow]*x[7*idx[jrow]+6];
1157       jrow++;
1158      }
1159     y[7*i]   += sum1;
1160     y[7*i+1] += sum2;
1161     y[7*i+2] += sum3;
1162     y[7*i+3] += sum4;
1163     y[7*i+4] += sum5;
1164     y[7*i+5] += sum6;
1165     y[7*i+6] += sum7;
1166   }
1167 
1168   ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr);
1169   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1170   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1171   PetscFunctionReturn(0);
1172 }
1173 
1174 #undef __FUNCT__
1175 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_7"
1176 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1177 {
1178   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1179   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1180   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1181   PetscErrorCode ierr;
1182   PetscInt       m = b->AIJ->rmap->n,n,i,*idx;
1183 
1184   PetscFunctionBegin;
1185   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1186   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1187   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1188   for (i=0; i<m; i++) {
1189     idx    = a->j + a->i[i] ;
1190     v      = a->a + a->i[i] ;
1191     n      = a->i[i+1] - a->i[i];
1192     alpha1 = x[7*i];
1193     alpha2 = x[7*i+1];
1194     alpha3 = x[7*i+2];
1195     alpha4 = x[7*i+3];
1196     alpha5 = x[7*i+4];
1197     alpha6 = x[7*i+5];
1198     alpha7 = x[7*i+6];
1199     while (n-->0) {
1200       y[7*(*idx)]   += alpha1*(*v);
1201       y[7*(*idx)+1] += alpha2*(*v);
1202       y[7*(*idx)+2] += alpha3*(*v);
1203       y[7*(*idx)+3] += alpha4*(*v);
1204       y[7*(*idx)+4] += alpha5*(*v);
1205       y[7*(*idx)+5] += alpha6*(*v);
1206       y[7*(*idx)+6] += alpha7*(*v);
1207       idx++; v++;
1208     }
1209   }
1210   ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr);
1211   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1212   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1213   PetscFunctionReturn(0);
1214 }
1215 
1216 #undef __FUNCT__
1217 #define __FUNCT__ "MatMult_SeqMAIJ_8"
1218 PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1219 {
1220   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1221   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1222   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1223   PetscErrorCode ierr;
1224   PetscInt       m = b->AIJ->rmap->n,nonzerorow=0,*idx,*ii;
1225   PetscInt       n,i,jrow,j;
1226 
1227   PetscFunctionBegin;
1228   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1229   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1230   idx  = a->j;
1231   v    = a->a;
1232   ii   = a->i;
1233 
1234   for (i=0; i<m; i++) {
1235     jrow = ii[i];
1236     n    = ii[i+1] - jrow;
1237     sum1  = 0.0;
1238     sum2  = 0.0;
1239     sum3  = 0.0;
1240     sum4  = 0.0;
1241     sum5  = 0.0;
1242     sum6  = 0.0;
1243     sum7  = 0.0;
1244     sum8  = 0.0;
1245     nonzerorow += (n>0);
1246     for (j=0; j<n; j++) {
1247       sum1 += v[jrow]*x[8*idx[jrow]];
1248       sum2 += v[jrow]*x[8*idx[jrow]+1];
1249       sum3 += v[jrow]*x[8*idx[jrow]+2];
1250       sum4 += v[jrow]*x[8*idx[jrow]+3];
1251       sum5 += v[jrow]*x[8*idx[jrow]+4];
1252       sum6 += v[jrow]*x[8*idx[jrow]+5];
1253       sum7 += v[jrow]*x[8*idx[jrow]+6];
1254       sum8 += v[jrow]*x[8*idx[jrow]+7];
1255       jrow++;
1256      }
1257     y[8*i]   = sum1;
1258     y[8*i+1] = sum2;
1259     y[8*i+2] = sum3;
1260     y[8*i+3] = sum4;
1261     y[8*i+4] = sum5;
1262     y[8*i+5] = sum6;
1263     y[8*i+6] = sum7;
1264     y[8*i+7] = sum8;
1265   }
1266 
1267   ierr = PetscLogFlops(16.0*a->nz - 8.0*nonzerorow);CHKERRQ(ierr);
1268   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1269   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1270   PetscFunctionReturn(0);
1271 }
1272 
1273 #undef __FUNCT__
1274 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_8"
1275 PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1276 {
1277   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1278   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1279   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0;
1280   PetscErrorCode ierr;
1281   PetscInt       m = b->AIJ->rmap->n,n,i,*idx;
1282 
1283   PetscFunctionBegin;
1284   ierr = VecSet(yy,zero);CHKERRQ(ierr);
1285   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1286   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1287 
1288   for (i=0; i<m; i++) {
1289     idx    = a->j + a->i[i] ;
1290     v      = a->a + a->i[i] ;
1291     n      = a->i[i+1] - a->i[i];
1292     alpha1 = x[8*i];
1293     alpha2 = x[8*i+1];
1294     alpha3 = x[8*i+2];
1295     alpha4 = x[8*i+3];
1296     alpha5 = x[8*i+4];
1297     alpha6 = x[8*i+5];
1298     alpha7 = x[8*i+6];
1299     alpha8 = x[8*i+7];
1300     while (n-->0) {
1301       y[8*(*idx)]   += alpha1*(*v);
1302       y[8*(*idx)+1] += alpha2*(*v);
1303       y[8*(*idx)+2] += alpha3*(*v);
1304       y[8*(*idx)+3] += alpha4*(*v);
1305       y[8*(*idx)+4] += alpha5*(*v);
1306       y[8*(*idx)+5] += alpha6*(*v);
1307       y[8*(*idx)+6] += alpha7*(*v);
1308       y[8*(*idx)+7] += alpha8*(*v);
1309       idx++; v++;
1310     }
1311   }
1312   ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr);
1313   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1314   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1315   PetscFunctionReturn(0);
1316 }
1317 
1318 #undef __FUNCT__
1319 #define __FUNCT__ "MatMultAdd_SeqMAIJ_8"
1320 PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1321 {
1322   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1323   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1324   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1325   PetscErrorCode ierr;
1326   PetscInt       m = b->AIJ->rmap->n,*idx,*ii;
1327   PetscInt       n,i,jrow,j;
1328 
1329   PetscFunctionBegin;
1330   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1331   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1332   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1333   idx  = a->j;
1334   v    = a->a;
1335   ii   = a->i;
1336 
1337   for (i=0; i<m; i++) {
1338     jrow = ii[i];
1339     n    = ii[i+1] - jrow;
1340     sum1  = 0.0;
1341     sum2  = 0.0;
1342     sum3  = 0.0;
1343     sum4  = 0.0;
1344     sum5  = 0.0;
1345     sum6  = 0.0;
1346     sum7  = 0.0;
1347     sum8  = 0.0;
1348     for (j=0; j<n; j++) {
1349       sum1 += v[jrow]*x[8*idx[jrow]];
1350       sum2 += v[jrow]*x[8*idx[jrow]+1];
1351       sum3 += v[jrow]*x[8*idx[jrow]+2];
1352       sum4 += v[jrow]*x[8*idx[jrow]+3];
1353       sum5 += v[jrow]*x[8*idx[jrow]+4];
1354       sum6 += v[jrow]*x[8*idx[jrow]+5];
1355       sum7 += v[jrow]*x[8*idx[jrow]+6];
1356       sum8 += v[jrow]*x[8*idx[jrow]+7];
1357       jrow++;
1358      }
1359     y[8*i]   += sum1;
1360     y[8*i+1] += sum2;
1361     y[8*i+2] += sum3;
1362     y[8*i+3] += sum4;
1363     y[8*i+4] += sum5;
1364     y[8*i+5] += sum6;
1365     y[8*i+6] += sum7;
1366     y[8*i+7] += sum8;
1367   }
1368 
1369   ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr);
1370   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1371   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1372   PetscFunctionReturn(0);
1373 }
1374 
1375 #undef __FUNCT__
1376 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_8"
1377 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1378 {
1379   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1380   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1381   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1382   PetscErrorCode ierr;
1383   PetscInt       m = b->AIJ->rmap->n,n,i,*idx;
1384 
1385   PetscFunctionBegin;
1386   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1387   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1388   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1389   for (i=0; i<m; i++) {
1390     idx    = a->j + a->i[i] ;
1391     v      = a->a + a->i[i] ;
1392     n      = a->i[i+1] - a->i[i];
1393     alpha1 = x[8*i];
1394     alpha2 = x[8*i+1];
1395     alpha3 = x[8*i+2];
1396     alpha4 = x[8*i+3];
1397     alpha5 = x[8*i+4];
1398     alpha6 = x[8*i+5];
1399     alpha7 = x[8*i+6];
1400     alpha8 = x[8*i+7];
1401     while (n-->0) {
1402       y[8*(*idx)]   += alpha1*(*v);
1403       y[8*(*idx)+1] += alpha2*(*v);
1404       y[8*(*idx)+2] += alpha3*(*v);
1405       y[8*(*idx)+3] += alpha4*(*v);
1406       y[8*(*idx)+4] += alpha5*(*v);
1407       y[8*(*idx)+5] += alpha6*(*v);
1408       y[8*(*idx)+6] += alpha7*(*v);
1409       y[8*(*idx)+7] += alpha8*(*v);
1410       idx++; v++;
1411     }
1412   }
1413   ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr);
1414   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1415   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1416   PetscFunctionReturn(0);
1417 }
1418 
1419 /* ------------------------------------------------------------------------------*/
1420 #undef __FUNCT__
1421 #define __FUNCT__ "MatMult_SeqMAIJ_9"
1422 PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1423 {
1424   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1425   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1426   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1427   PetscErrorCode ierr;
1428   PetscInt       m = b->AIJ->rmap->n,nonzerorow=0,*idx,*ii;
1429   PetscInt       n,i,jrow,j;
1430 
1431   PetscFunctionBegin;
1432   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1433   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1434   idx  = a->j;
1435   v    = a->a;
1436   ii   = a->i;
1437 
1438   for (i=0; i<m; i++) {
1439     jrow = ii[i];
1440     n    = ii[i+1] - jrow;
1441     sum1  = 0.0;
1442     sum2  = 0.0;
1443     sum3  = 0.0;
1444     sum4  = 0.0;
1445     sum5  = 0.0;
1446     sum6  = 0.0;
1447     sum7  = 0.0;
1448     sum8  = 0.0;
1449     sum9  = 0.0;
1450     nonzerorow += (n>0);
1451     for (j=0; j<n; j++) {
1452       sum1 += v[jrow]*x[9*idx[jrow]];
1453       sum2 += v[jrow]*x[9*idx[jrow]+1];
1454       sum3 += v[jrow]*x[9*idx[jrow]+2];
1455       sum4 += v[jrow]*x[9*idx[jrow]+3];
1456       sum5 += v[jrow]*x[9*idx[jrow]+4];
1457       sum6 += v[jrow]*x[9*idx[jrow]+5];
1458       sum7 += v[jrow]*x[9*idx[jrow]+6];
1459       sum8 += v[jrow]*x[9*idx[jrow]+7];
1460       sum9 += v[jrow]*x[9*idx[jrow]+8];
1461       jrow++;
1462      }
1463     y[9*i]   = sum1;
1464     y[9*i+1] = sum2;
1465     y[9*i+2] = sum3;
1466     y[9*i+3] = sum4;
1467     y[9*i+4] = sum5;
1468     y[9*i+5] = sum6;
1469     y[9*i+6] = sum7;
1470     y[9*i+7] = sum8;
1471     y[9*i+8] = sum9;
1472   }
1473 
1474   ierr = PetscLogFlops(18.0*a->nz - 9*nonzerorow);CHKERRQ(ierr);
1475   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1476   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1477   PetscFunctionReturn(0);
1478 }
1479 
1480 /* ------------------------------------------------------------------------------*/
1481 
1482 #undef __FUNCT__
1483 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_9"
1484 PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1485 {
1486   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1487   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1488   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,zero = 0.0;
1489   PetscErrorCode ierr;
1490   PetscInt       m = b->AIJ->rmap->n,n,i,*idx;
1491 
1492   PetscFunctionBegin;
1493   ierr = VecSet(yy,zero);CHKERRQ(ierr);
1494   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1495   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1496 
1497   for (i=0; i<m; i++) {
1498     idx    = a->j + a->i[i] ;
1499     v      = a->a + a->i[i] ;
1500     n      = a->i[i+1] - a->i[i];
1501     alpha1 = x[9*i];
1502     alpha2 = x[9*i+1];
1503     alpha3 = x[9*i+2];
1504     alpha4 = x[9*i+3];
1505     alpha5 = x[9*i+4];
1506     alpha6 = x[9*i+5];
1507     alpha7 = x[9*i+6];
1508     alpha8 = x[9*i+7];
1509     alpha9 = x[9*i+8];
1510     while (n-->0) {
1511       y[9*(*idx)]   += alpha1*(*v);
1512       y[9*(*idx)+1] += alpha2*(*v);
1513       y[9*(*idx)+2] += alpha3*(*v);
1514       y[9*(*idx)+3] += alpha4*(*v);
1515       y[9*(*idx)+4] += alpha5*(*v);
1516       y[9*(*idx)+5] += alpha6*(*v);
1517       y[9*(*idx)+6] += alpha7*(*v);
1518       y[9*(*idx)+7] += alpha8*(*v);
1519       y[9*(*idx)+8] += alpha9*(*v);
1520       idx++; v++;
1521     }
1522   }
1523   ierr = PetscLogFlops(18.0*a->nz);CHKERRQ(ierr);
1524   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1525   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1526   PetscFunctionReturn(0);
1527 }
1528 
1529 #undef __FUNCT__
1530 #define __FUNCT__ "MatMultAdd_SeqMAIJ_9"
1531 PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1532 {
1533   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1534   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1535   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1536   PetscErrorCode ierr;
1537   PetscInt       m = b->AIJ->rmap->n,*idx,*ii;
1538   PetscInt       n,i,jrow,j;
1539 
1540   PetscFunctionBegin;
1541   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1542   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1543   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1544   idx  = a->j;
1545   v    = a->a;
1546   ii   = a->i;
1547 
1548   for (i=0; i<m; i++) {
1549     jrow = ii[i];
1550     n    = ii[i+1] - jrow;
1551     sum1  = 0.0;
1552     sum2  = 0.0;
1553     sum3  = 0.0;
1554     sum4  = 0.0;
1555     sum5  = 0.0;
1556     sum6  = 0.0;
1557     sum7  = 0.0;
1558     sum8  = 0.0;
1559     sum9  = 0.0;
1560     for (j=0; j<n; j++) {
1561       sum1 += v[jrow]*x[9*idx[jrow]];
1562       sum2 += v[jrow]*x[9*idx[jrow]+1];
1563       sum3 += v[jrow]*x[9*idx[jrow]+2];
1564       sum4 += v[jrow]*x[9*idx[jrow]+3];
1565       sum5 += v[jrow]*x[9*idx[jrow]+4];
1566       sum6 += v[jrow]*x[9*idx[jrow]+5];
1567       sum7 += v[jrow]*x[9*idx[jrow]+6];
1568       sum8 += v[jrow]*x[9*idx[jrow]+7];
1569       sum9 += v[jrow]*x[9*idx[jrow]+8];
1570       jrow++;
1571      }
1572     y[9*i]   += sum1;
1573     y[9*i+1] += sum2;
1574     y[9*i+2] += sum3;
1575     y[9*i+3] += sum4;
1576     y[9*i+4] += sum5;
1577     y[9*i+5] += sum6;
1578     y[9*i+6] += sum7;
1579     y[9*i+7] += sum8;
1580     y[9*i+8] += sum9;
1581   }
1582 
1583   ierr = PetscLogFlops(18*a->nz);CHKERRQ(ierr);
1584   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1585   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1586   PetscFunctionReturn(0);
1587 }
1588 
1589 #undef __FUNCT__
1590 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_9"
1591 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1592 {
1593   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1594   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1595   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1596   PetscErrorCode ierr;
1597   PetscInt       m = b->AIJ->rmap->n,n,i,*idx;
1598 
1599   PetscFunctionBegin;
1600   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1601   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1602   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1603   for (i=0; i<m; i++) {
1604     idx    = a->j + a->i[i] ;
1605     v      = a->a + a->i[i] ;
1606     n      = a->i[i+1] - a->i[i];
1607     alpha1 = x[9*i];
1608     alpha2 = x[9*i+1];
1609     alpha3 = x[9*i+2];
1610     alpha4 = x[9*i+3];
1611     alpha5 = x[9*i+4];
1612     alpha6 = x[9*i+5];
1613     alpha7 = x[9*i+6];
1614     alpha8 = x[9*i+7];
1615     alpha9 = x[9*i+8];
1616     while (n-->0) {
1617       y[9*(*idx)]   += alpha1*(*v);
1618       y[9*(*idx)+1] += alpha2*(*v);
1619       y[9*(*idx)+2] += alpha3*(*v);
1620       y[9*(*idx)+3] += alpha4*(*v);
1621       y[9*(*idx)+4] += alpha5*(*v);
1622       y[9*(*idx)+5] += alpha6*(*v);
1623       y[9*(*idx)+6] += alpha7*(*v);
1624       y[9*(*idx)+7] += alpha8*(*v);
1625       y[9*(*idx)+8] += alpha9*(*v);
1626       idx++; v++;
1627     }
1628   }
1629   ierr = PetscLogFlops(18.0*a->nz);CHKERRQ(ierr);
1630   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1631   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1632   PetscFunctionReturn(0);
1633 }
1634 /*--------------------------------------------------------------------------------------------*/
1635 #undef __FUNCT__
1636 #define __FUNCT__ "MatMult_SeqMAIJ_10"
1637 PetscErrorCode MatMult_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1638 {
1639   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1640   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1641   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1642   PetscErrorCode ierr;
1643   PetscInt       m = b->AIJ->rmap->n,nonzerorow=0,*idx,*ii;
1644   PetscInt       n,i,jrow,j;
1645 
1646   PetscFunctionBegin;
1647   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1648   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1649   idx  = a->j;
1650   v    = a->a;
1651   ii   = a->i;
1652 
1653   for (i=0; i<m; i++) {
1654     jrow = ii[i];
1655     n    = ii[i+1] - jrow;
1656     sum1  = 0.0;
1657     sum2  = 0.0;
1658     sum3  = 0.0;
1659     sum4  = 0.0;
1660     sum5  = 0.0;
1661     sum6  = 0.0;
1662     sum7  = 0.0;
1663     sum8  = 0.0;
1664     sum9  = 0.0;
1665     sum10 = 0.0;
1666     nonzerorow += (n>0);
1667     for (j=0; j<n; j++) {
1668       sum1  += v[jrow]*x[10*idx[jrow]];
1669       sum2  += v[jrow]*x[10*idx[jrow]+1];
1670       sum3  += v[jrow]*x[10*idx[jrow]+2];
1671       sum4  += v[jrow]*x[10*idx[jrow]+3];
1672       sum5  += v[jrow]*x[10*idx[jrow]+4];
1673       sum6  += v[jrow]*x[10*idx[jrow]+5];
1674       sum7  += v[jrow]*x[10*idx[jrow]+6];
1675       sum8  += v[jrow]*x[10*idx[jrow]+7];
1676       sum9  += v[jrow]*x[10*idx[jrow]+8];
1677       sum10 += v[jrow]*x[10*idx[jrow]+9];
1678       jrow++;
1679      }
1680     y[10*i]   = sum1;
1681     y[10*i+1] = sum2;
1682     y[10*i+2] = sum3;
1683     y[10*i+3] = sum4;
1684     y[10*i+4] = sum5;
1685     y[10*i+5] = sum6;
1686     y[10*i+6] = sum7;
1687     y[10*i+7] = sum8;
1688     y[10*i+8] = sum9;
1689     y[10*i+9] = sum10;
1690   }
1691 
1692   ierr = PetscLogFlops(20.0*a->nz - 10.0*nonzerorow);CHKERRQ(ierr);
1693   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1694   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1695   PetscFunctionReturn(0);
1696 }
1697 
1698 #undef __FUNCT__
1699 #define __FUNCT__ "MatMult_SeqMAIJ_10"
1700 PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1701 {
1702   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1703   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1704   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1705   PetscErrorCode ierr;
1706   PetscInt       m = b->AIJ->rmap->n,*idx,*ii;
1707   PetscInt       n,i,jrow,j;
1708 
1709   PetscFunctionBegin;
1710   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1711   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1712   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1713   idx  = a->j;
1714   v    = a->a;
1715   ii   = a->i;
1716 
1717   for (i=0; i<m; i++) {
1718     jrow = ii[i];
1719     n    = ii[i+1] - jrow;
1720     sum1  = 0.0;
1721     sum2  = 0.0;
1722     sum3  = 0.0;
1723     sum4  = 0.0;
1724     sum5  = 0.0;
1725     sum6  = 0.0;
1726     sum7  = 0.0;
1727     sum8  = 0.0;
1728     sum9  = 0.0;
1729     sum10 = 0.0;
1730     for (j=0; j<n; j++) {
1731       sum1  += v[jrow]*x[10*idx[jrow]];
1732       sum2  += v[jrow]*x[10*idx[jrow]+1];
1733       sum3  += v[jrow]*x[10*idx[jrow]+2];
1734       sum4  += v[jrow]*x[10*idx[jrow]+3];
1735       sum5  += v[jrow]*x[10*idx[jrow]+4];
1736       sum6  += v[jrow]*x[10*idx[jrow]+5];
1737       sum7  += v[jrow]*x[10*idx[jrow]+6];
1738       sum8  += v[jrow]*x[10*idx[jrow]+7];
1739       sum9  += v[jrow]*x[10*idx[jrow]+8];
1740       sum10 += v[jrow]*x[10*idx[jrow]+9];
1741       jrow++;
1742      }
1743     y[10*i]   += sum1;
1744     y[10*i+1] += sum2;
1745     y[10*i+2] += sum3;
1746     y[10*i+3] += sum4;
1747     y[10*i+4] += sum5;
1748     y[10*i+5] += sum6;
1749     y[10*i+6] += sum7;
1750     y[10*i+7] += sum8;
1751     y[10*i+8] += sum9;
1752     y[10*i+9] += sum10;
1753   }
1754 
1755   ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr);
1756   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1757   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1758   PetscFunctionReturn(0);
1759 }
1760 
1761 #undef __FUNCT__
1762 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_10"
1763 PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1764 {
1765   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1766   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1767   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,zero = 0.0;
1768   PetscErrorCode ierr;
1769   PetscInt       m = b->AIJ->rmap->n,n,i,*idx;
1770 
1771   PetscFunctionBegin;
1772   ierr = VecSet(yy,zero);CHKERRQ(ierr);
1773   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1774   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1775 
1776   for (i=0; i<m; i++) {
1777     idx    = a->j + a->i[i] ;
1778     v      = a->a + a->i[i] ;
1779     n      = a->i[i+1] - a->i[i];
1780     alpha1 = x[10*i];
1781     alpha2 = x[10*i+1];
1782     alpha3 = x[10*i+2];
1783     alpha4 = x[10*i+3];
1784     alpha5 = x[10*i+4];
1785     alpha6 = x[10*i+5];
1786     alpha7 = x[10*i+6];
1787     alpha8 = x[10*i+7];
1788     alpha9 = x[10*i+8];
1789     alpha10 = x[10*i+9];
1790     while (n-->0) {
1791       y[10*(*idx)]   += alpha1*(*v);
1792       y[10*(*idx)+1] += alpha2*(*v);
1793       y[10*(*idx)+2] += alpha3*(*v);
1794       y[10*(*idx)+3] += alpha4*(*v);
1795       y[10*(*idx)+4] += alpha5*(*v);
1796       y[10*(*idx)+5] += alpha6*(*v);
1797       y[10*(*idx)+6] += alpha7*(*v);
1798       y[10*(*idx)+7] += alpha8*(*v);
1799       y[10*(*idx)+8] += alpha9*(*v);
1800       y[10*(*idx)+9] += alpha10*(*v);
1801       idx++; v++;
1802     }
1803   }
1804   ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr);
1805   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1806   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1807   PetscFunctionReturn(0);
1808 }
1809 
1810 #undef __FUNCT__
1811 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_10"
1812 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1813 {
1814   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1815   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1816   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10;
1817   PetscErrorCode ierr;
1818   PetscInt       m = b->AIJ->rmap->n,n,i,*idx;
1819 
1820   PetscFunctionBegin;
1821   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1822   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1823   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1824   for (i=0; i<m; i++) {
1825     idx    = a->j + a->i[i] ;
1826     v      = a->a + a->i[i] ;
1827     n      = a->i[i+1] - a->i[i];
1828     alpha1 = x[10*i];
1829     alpha2 = x[10*i+1];
1830     alpha3 = x[10*i+2];
1831     alpha4 = x[10*i+3];
1832     alpha5 = x[10*i+4];
1833     alpha6 = x[10*i+5];
1834     alpha7 = x[10*i+6];
1835     alpha8 = x[10*i+7];
1836     alpha9 = x[10*i+8];
1837     alpha10 = x[10*i+9];
1838     while (n-->0) {
1839       y[10*(*idx)]   += alpha1*(*v);
1840       y[10*(*idx)+1] += alpha2*(*v);
1841       y[10*(*idx)+2] += alpha3*(*v);
1842       y[10*(*idx)+3] += alpha4*(*v);
1843       y[10*(*idx)+4] += alpha5*(*v);
1844       y[10*(*idx)+5] += alpha6*(*v);
1845       y[10*(*idx)+6] += alpha7*(*v);
1846       y[10*(*idx)+7] += alpha8*(*v);
1847       y[10*(*idx)+8] += alpha9*(*v);
1848       y[10*(*idx)+9] += alpha10*(*v);
1849       idx++; v++;
1850     }
1851   }
1852   ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr);
1853   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1854   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1855   PetscFunctionReturn(0);
1856 }
1857 
1858 
1859 /*--------------------------------------------------------------------------------------------*/
1860 #undef __FUNCT__
1861 #define __FUNCT__ "MatMult_SeqMAIJ_16"
1862 PetscErrorCode MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
1863 {
1864   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1865   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1866   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1867   PetscScalar    sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
1868   PetscErrorCode ierr;
1869   PetscInt       m = b->AIJ->rmap->n,nonzerorow=0,*idx,*ii;
1870   PetscInt       n,i,jrow,j;
1871 
1872   PetscFunctionBegin;
1873   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1874   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1875   idx  = a->j;
1876   v    = a->a;
1877   ii   = a->i;
1878 
1879   for (i=0; i<m; i++) {
1880     jrow = ii[i];
1881     n    = ii[i+1] - jrow;
1882     sum1  = 0.0;
1883     sum2  = 0.0;
1884     sum3  = 0.0;
1885     sum4  = 0.0;
1886     sum5  = 0.0;
1887     sum6  = 0.0;
1888     sum7  = 0.0;
1889     sum8  = 0.0;
1890     sum9  = 0.0;
1891     sum10 = 0.0;
1892     sum11 = 0.0;
1893     sum12 = 0.0;
1894     sum13 = 0.0;
1895     sum14 = 0.0;
1896     sum15 = 0.0;
1897     sum16 = 0.0;
1898     nonzerorow += (n>0);
1899     for (j=0; j<n; j++) {
1900       sum1  += v[jrow]*x[16*idx[jrow]];
1901       sum2  += v[jrow]*x[16*idx[jrow]+1];
1902       sum3  += v[jrow]*x[16*idx[jrow]+2];
1903       sum4  += v[jrow]*x[16*idx[jrow]+3];
1904       sum5  += v[jrow]*x[16*idx[jrow]+4];
1905       sum6  += v[jrow]*x[16*idx[jrow]+5];
1906       sum7  += v[jrow]*x[16*idx[jrow]+6];
1907       sum8  += v[jrow]*x[16*idx[jrow]+7];
1908       sum9  += v[jrow]*x[16*idx[jrow]+8];
1909       sum10 += v[jrow]*x[16*idx[jrow]+9];
1910       sum11 += v[jrow]*x[16*idx[jrow]+10];
1911       sum12 += v[jrow]*x[16*idx[jrow]+11];
1912       sum13 += v[jrow]*x[16*idx[jrow]+12];
1913       sum14 += v[jrow]*x[16*idx[jrow]+13];
1914       sum15 += v[jrow]*x[16*idx[jrow]+14];
1915       sum16 += v[jrow]*x[16*idx[jrow]+15];
1916       jrow++;
1917      }
1918     y[16*i]    = sum1;
1919     y[16*i+1]  = sum2;
1920     y[16*i+2]  = sum3;
1921     y[16*i+3]  = sum4;
1922     y[16*i+4]  = sum5;
1923     y[16*i+5]  = sum6;
1924     y[16*i+6]  = sum7;
1925     y[16*i+7]  = sum8;
1926     y[16*i+8]  = sum9;
1927     y[16*i+9]  = sum10;
1928     y[16*i+10] = sum11;
1929     y[16*i+11] = sum12;
1930     y[16*i+12] = sum13;
1931     y[16*i+13] = sum14;
1932     y[16*i+14] = sum15;
1933     y[16*i+15] = sum16;
1934   }
1935 
1936   ierr = PetscLogFlops(32.0*a->nz - 16.0*nonzerorow);CHKERRQ(ierr);
1937   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1938   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1939   PetscFunctionReturn(0);
1940 }
1941 
1942 #undef __FUNCT__
1943 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_16"
1944 PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
1945 {
1946   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1947   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1948   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0;
1949   PetscScalar    alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
1950   PetscErrorCode ierr;
1951   PetscInt       m = b->AIJ->rmap->n,n,i,*idx;
1952 
1953   PetscFunctionBegin;
1954   ierr = VecSet(yy,zero);CHKERRQ(ierr);
1955   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1956   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1957 
1958   for (i=0; i<m; i++) {
1959     idx    = a->j + a->i[i] ;
1960     v      = a->a + a->i[i] ;
1961     n      = a->i[i+1] - a->i[i];
1962     alpha1  = x[16*i];
1963     alpha2  = x[16*i+1];
1964     alpha3  = x[16*i+2];
1965     alpha4  = x[16*i+3];
1966     alpha5  = x[16*i+4];
1967     alpha6  = x[16*i+5];
1968     alpha7  = x[16*i+6];
1969     alpha8  = x[16*i+7];
1970     alpha9  = x[16*i+8];
1971     alpha10 = x[16*i+9];
1972     alpha11 = x[16*i+10];
1973     alpha12 = x[16*i+11];
1974     alpha13 = x[16*i+12];
1975     alpha14 = x[16*i+13];
1976     alpha15 = x[16*i+14];
1977     alpha16 = x[16*i+15];
1978     while (n-->0) {
1979       y[16*(*idx)]    += alpha1*(*v);
1980       y[16*(*idx)+1]  += alpha2*(*v);
1981       y[16*(*idx)+2]  += alpha3*(*v);
1982       y[16*(*idx)+3]  += alpha4*(*v);
1983       y[16*(*idx)+4]  += alpha5*(*v);
1984       y[16*(*idx)+5]  += alpha6*(*v);
1985       y[16*(*idx)+6]  += alpha7*(*v);
1986       y[16*(*idx)+7]  += alpha8*(*v);
1987       y[16*(*idx)+8]  += alpha9*(*v);
1988       y[16*(*idx)+9]  += alpha10*(*v);
1989       y[16*(*idx)+10] += alpha11*(*v);
1990       y[16*(*idx)+11] += alpha12*(*v);
1991       y[16*(*idx)+12] += alpha13*(*v);
1992       y[16*(*idx)+13] += alpha14*(*v);
1993       y[16*(*idx)+14] += alpha15*(*v);
1994       y[16*(*idx)+15] += alpha16*(*v);
1995       idx++; v++;
1996     }
1997   }
1998   ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr);
1999   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2000   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2001   PetscFunctionReturn(0);
2002 }
2003 
2004 #undef __FUNCT__
2005 #define __FUNCT__ "MatMultAdd_SeqMAIJ_16"
2006 PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
2007 {
2008   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
2009   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
2010   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2011   PetscScalar    sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
2012   PetscErrorCode ierr;
2013   PetscInt       m = b->AIJ->rmap->n,*idx,*ii;
2014   PetscInt       n,i,jrow,j;
2015 
2016   PetscFunctionBegin;
2017   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2018   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2019   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2020   idx  = a->j;
2021   v    = a->a;
2022   ii   = a->i;
2023 
2024   for (i=0; i<m; i++) {
2025     jrow = ii[i];
2026     n    = ii[i+1] - jrow;
2027     sum1  = 0.0;
2028     sum2  = 0.0;
2029     sum3  = 0.0;
2030     sum4  = 0.0;
2031     sum5  = 0.0;
2032     sum6  = 0.0;
2033     sum7  = 0.0;
2034     sum8  = 0.0;
2035     sum9  = 0.0;
2036     sum10 = 0.0;
2037     sum11 = 0.0;
2038     sum12 = 0.0;
2039     sum13 = 0.0;
2040     sum14 = 0.0;
2041     sum15 = 0.0;
2042     sum16 = 0.0;
2043     for (j=0; j<n; j++) {
2044       sum1  += v[jrow]*x[16*idx[jrow]];
2045       sum2  += v[jrow]*x[16*idx[jrow]+1];
2046       sum3  += v[jrow]*x[16*idx[jrow]+2];
2047       sum4  += v[jrow]*x[16*idx[jrow]+3];
2048       sum5  += v[jrow]*x[16*idx[jrow]+4];
2049       sum6  += v[jrow]*x[16*idx[jrow]+5];
2050       sum7  += v[jrow]*x[16*idx[jrow]+6];
2051       sum8  += v[jrow]*x[16*idx[jrow]+7];
2052       sum9  += v[jrow]*x[16*idx[jrow]+8];
2053       sum10 += v[jrow]*x[16*idx[jrow]+9];
2054       sum11 += v[jrow]*x[16*idx[jrow]+10];
2055       sum12 += v[jrow]*x[16*idx[jrow]+11];
2056       sum13 += v[jrow]*x[16*idx[jrow]+12];
2057       sum14 += v[jrow]*x[16*idx[jrow]+13];
2058       sum15 += v[jrow]*x[16*idx[jrow]+14];
2059       sum16 += v[jrow]*x[16*idx[jrow]+15];
2060       jrow++;
2061      }
2062     y[16*i]    += sum1;
2063     y[16*i+1]  += sum2;
2064     y[16*i+2]  += sum3;
2065     y[16*i+3]  += sum4;
2066     y[16*i+4]  += sum5;
2067     y[16*i+5]  += sum6;
2068     y[16*i+6]  += sum7;
2069     y[16*i+7]  += sum8;
2070     y[16*i+8]  += sum9;
2071     y[16*i+9]  += sum10;
2072     y[16*i+10] += sum11;
2073     y[16*i+11] += sum12;
2074     y[16*i+12] += sum13;
2075     y[16*i+13] += sum14;
2076     y[16*i+14] += sum15;
2077     y[16*i+15] += sum16;
2078   }
2079 
2080   ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr);
2081   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2082   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2083   PetscFunctionReturn(0);
2084 }
2085 
2086 #undef __FUNCT__
2087 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_16"
2088 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
2089 {
2090   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
2091   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
2092   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2093   PetscScalar    alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
2094   PetscErrorCode ierr;
2095   PetscInt       m = b->AIJ->rmap->n,n,i,*idx;
2096 
2097   PetscFunctionBegin;
2098   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2099   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2100   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2101   for (i=0; i<m; i++) {
2102     idx    = a->j + a->i[i] ;
2103     v      = a->a + a->i[i] ;
2104     n      = a->i[i+1] - a->i[i];
2105     alpha1 = x[16*i];
2106     alpha2 = x[16*i+1];
2107     alpha3 = x[16*i+2];
2108     alpha4 = x[16*i+3];
2109     alpha5 = x[16*i+4];
2110     alpha6 = x[16*i+5];
2111     alpha7 = x[16*i+6];
2112     alpha8 = x[16*i+7];
2113     alpha9  = x[16*i+8];
2114     alpha10 = x[16*i+9];
2115     alpha11 = x[16*i+10];
2116     alpha12 = x[16*i+11];
2117     alpha13 = x[16*i+12];
2118     alpha14 = x[16*i+13];
2119     alpha15 = x[16*i+14];
2120     alpha16 = x[16*i+15];
2121     while (n-->0) {
2122       y[16*(*idx)]   += alpha1*(*v);
2123       y[16*(*idx)+1] += alpha2*(*v);
2124       y[16*(*idx)+2] += alpha3*(*v);
2125       y[16*(*idx)+3] += alpha4*(*v);
2126       y[16*(*idx)+4] += alpha5*(*v);
2127       y[16*(*idx)+5] += alpha6*(*v);
2128       y[16*(*idx)+6] += alpha7*(*v);
2129       y[16*(*idx)+7] += alpha8*(*v);
2130       y[16*(*idx)+8]  += alpha9*(*v);
2131       y[16*(*idx)+9]  += alpha10*(*v);
2132       y[16*(*idx)+10] += alpha11*(*v);
2133       y[16*(*idx)+11] += alpha12*(*v);
2134       y[16*(*idx)+12] += alpha13*(*v);
2135       y[16*(*idx)+13] += alpha14*(*v);
2136       y[16*(*idx)+14] += alpha15*(*v);
2137       y[16*(*idx)+15] += alpha16*(*v);
2138       idx++; v++;
2139     }
2140   }
2141   ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr);
2142   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2143   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2144   PetscFunctionReturn(0);
2145 }
2146 
2147 /*--------------------------------------------------------------------------------------------*/
2148 #undef __FUNCT__
2149 #define __FUNCT__ "MatMult_SeqMAIJ_18"
2150 PetscErrorCode MatMult_SeqMAIJ_18(Mat A,Vec xx,Vec yy)
2151 {
2152   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
2153   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
2154   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2155   PetscScalar    sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2156   PetscErrorCode ierr;
2157   PetscInt       m = b->AIJ->rmap->n,nonzerorow=0,*idx,*ii;
2158   PetscInt       n,i,jrow,j;
2159 
2160   PetscFunctionBegin;
2161   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2162   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2163   idx  = a->j;
2164   v    = a->a;
2165   ii   = a->i;
2166 
2167   for (i=0; i<m; i++) {
2168     jrow = ii[i];
2169     n    = ii[i+1] - jrow;
2170     sum1  = 0.0;
2171     sum2  = 0.0;
2172     sum3  = 0.0;
2173     sum4  = 0.0;
2174     sum5  = 0.0;
2175     sum6  = 0.0;
2176     sum7  = 0.0;
2177     sum8  = 0.0;
2178     sum9  = 0.0;
2179     sum10 = 0.0;
2180     sum11 = 0.0;
2181     sum12 = 0.0;
2182     sum13 = 0.0;
2183     sum14 = 0.0;
2184     sum15 = 0.0;
2185     sum16 = 0.0;
2186     sum17 = 0.0;
2187     sum18 = 0.0;
2188     nonzerorow += (n>0);
2189     for (j=0; j<n; j++) {
2190       sum1  += v[jrow]*x[18*idx[jrow]];
2191       sum2  += v[jrow]*x[18*idx[jrow]+1];
2192       sum3  += v[jrow]*x[18*idx[jrow]+2];
2193       sum4  += v[jrow]*x[18*idx[jrow]+3];
2194       sum5  += v[jrow]*x[18*idx[jrow]+4];
2195       sum6  += v[jrow]*x[18*idx[jrow]+5];
2196       sum7  += v[jrow]*x[18*idx[jrow]+6];
2197       sum8  += v[jrow]*x[18*idx[jrow]+7];
2198       sum9  += v[jrow]*x[18*idx[jrow]+8];
2199       sum10 += v[jrow]*x[18*idx[jrow]+9];
2200       sum11 += v[jrow]*x[18*idx[jrow]+10];
2201       sum12 += v[jrow]*x[18*idx[jrow]+11];
2202       sum13 += v[jrow]*x[18*idx[jrow]+12];
2203       sum14 += v[jrow]*x[18*idx[jrow]+13];
2204       sum15 += v[jrow]*x[18*idx[jrow]+14];
2205       sum16 += v[jrow]*x[18*idx[jrow]+15];
2206       sum17 += v[jrow]*x[18*idx[jrow]+16];
2207       sum18 += v[jrow]*x[18*idx[jrow]+17];
2208       jrow++;
2209      }
2210     y[18*i]    = sum1;
2211     y[18*i+1]  = sum2;
2212     y[18*i+2]  = sum3;
2213     y[18*i+3]  = sum4;
2214     y[18*i+4]  = sum5;
2215     y[18*i+5]  = sum6;
2216     y[18*i+6]  = sum7;
2217     y[18*i+7]  = sum8;
2218     y[18*i+8]  = sum9;
2219     y[18*i+9]  = sum10;
2220     y[18*i+10] = sum11;
2221     y[18*i+11] = sum12;
2222     y[18*i+12] = sum13;
2223     y[18*i+13] = sum14;
2224     y[18*i+14] = sum15;
2225     y[18*i+15] = sum16;
2226     y[18*i+16] = sum17;
2227     y[18*i+17] = sum18;
2228   }
2229 
2230   ierr = PetscLogFlops(36.0*a->nz - 18.0*nonzerorow);CHKERRQ(ierr);
2231   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2232   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2233   PetscFunctionReturn(0);
2234 }
2235 
2236 #undef __FUNCT__
2237 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_18"
2238 PetscErrorCode MatMultTranspose_SeqMAIJ_18(Mat A,Vec xx,Vec yy)
2239 {
2240   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
2241   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
2242   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0;
2243   PetscScalar    alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18;
2244   PetscErrorCode ierr;
2245   PetscInt       m = b->AIJ->rmap->n,n,i,*idx;
2246 
2247   PetscFunctionBegin;
2248   ierr = VecSet(yy,zero);CHKERRQ(ierr);
2249   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2250   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2251 
2252   for (i=0; i<m; i++) {
2253     idx    = a->j + a->i[i] ;
2254     v      = a->a + a->i[i] ;
2255     n      = a->i[i+1] - a->i[i];
2256     alpha1  = x[18*i];
2257     alpha2  = x[18*i+1];
2258     alpha3  = x[18*i+2];
2259     alpha4  = x[18*i+3];
2260     alpha5  = x[18*i+4];
2261     alpha6  = x[18*i+5];
2262     alpha7  = x[18*i+6];
2263     alpha8  = x[18*i+7];
2264     alpha9  = x[18*i+8];
2265     alpha10 = x[18*i+9];
2266     alpha11 = x[18*i+10];
2267     alpha12 = x[18*i+11];
2268     alpha13 = x[18*i+12];
2269     alpha14 = x[18*i+13];
2270     alpha15 = x[18*i+14];
2271     alpha16 = x[18*i+15];
2272     alpha17 = x[18*i+16];
2273     alpha18 = x[18*i+17];
2274     while (n-->0) {
2275       y[18*(*idx)]    += alpha1*(*v);
2276       y[18*(*idx)+1]  += alpha2*(*v);
2277       y[18*(*idx)+2]  += alpha3*(*v);
2278       y[18*(*idx)+3]  += alpha4*(*v);
2279       y[18*(*idx)+4]  += alpha5*(*v);
2280       y[18*(*idx)+5]  += alpha6*(*v);
2281       y[18*(*idx)+6]  += alpha7*(*v);
2282       y[18*(*idx)+7]  += alpha8*(*v);
2283       y[18*(*idx)+8]  += alpha9*(*v);
2284       y[18*(*idx)+9]  += alpha10*(*v);
2285       y[18*(*idx)+10] += alpha11*(*v);
2286       y[18*(*idx)+11] += alpha12*(*v);
2287       y[18*(*idx)+12] += alpha13*(*v);
2288       y[18*(*idx)+13] += alpha14*(*v);
2289       y[18*(*idx)+14] += alpha15*(*v);
2290       y[18*(*idx)+15] += alpha16*(*v);
2291       y[18*(*idx)+16] += alpha17*(*v);
2292       y[18*(*idx)+17] += alpha18*(*v);
2293       idx++; v++;
2294     }
2295   }
2296   ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr);
2297   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2298   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2299   PetscFunctionReturn(0);
2300 }
2301 
2302 #undef __FUNCT__
2303 #define __FUNCT__ "MatMultAdd_SeqMAIJ_18"
2304 PetscErrorCode MatMultAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz)
2305 {
2306   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
2307   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
2308   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2309   PetscScalar    sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2310   PetscErrorCode ierr;
2311   PetscInt       m = b->AIJ->rmap->n,*idx,*ii;
2312   PetscInt       n,i,jrow,j;
2313 
2314   PetscFunctionBegin;
2315   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2316   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2317   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2318   idx  = a->j;
2319   v    = a->a;
2320   ii   = a->i;
2321 
2322   for (i=0; i<m; i++) {
2323     jrow = ii[i];
2324     n    = ii[i+1] - jrow;
2325     sum1  = 0.0;
2326     sum2  = 0.0;
2327     sum3  = 0.0;
2328     sum4  = 0.0;
2329     sum5  = 0.0;
2330     sum6  = 0.0;
2331     sum7  = 0.0;
2332     sum8  = 0.0;
2333     sum9  = 0.0;
2334     sum10 = 0.0;
2335     sum11 = 0.0;
2336     sum12 = 0.0;
2337     sum13 = 0.0;
2338     sum14 = 0.0;
2339     sum15 = 0.0;
2340     sum16 = 0.0;
2341     sum17 = 0.0;
2342     sum18 = 0.0;
2343     for (j=0; j<n; j++) {
2344       sum1  += v[jrow]*x[18*idx[jrow]];
2345       sum2  += v[jrow]*x[18*idx[jrow]+1];
2346       sum3  += v[jrow]*x[18*idx[jrow]+2];
2347       sum4  += v[jrow]*x[18*idx[jrow]+3];
2348       sum5  += v[jrow]*x[18*idx[jrow]+4];
2349       sum6  += v[jrow]*x[18*idx[jrow]+5];
2350       sum7  += v[jrow]*x[18*idx[jrow]+6];
2351       sum8  += v[jrow]*x[18*idx[jrow]+7];
2352       sum9  += v[jrow]*x[18*idx[jrow]+8];
2353       sum10 += v[jrow]*x[18*idx[jrow]+9];
2354       sum11 += v[jrow]*x[18*idx[jrow]+10];
2355       sum12 += v[jrow]*x[18*idx[jrow]+11];
2356       sum13 += v[jrow]*x[18*idx[jrow]+12];
2357       sum14 += v[jrow]*x[18*idx[jrow]+13];
2358       sum15 += v[jrow]*x[18*idx[jrow]+14];
2359       sum16 += v[jrow]*x[18*idx[jrow]+15];
2360       sum17 += v[jrow]*x[18*idx[jrow]+16];
2361       sum18 += v[jrow]*x[18*idx[jrow]+17];
2362       jrow++;
2363      }
2364     y[18*i]    += sum1;
2365     y[18*i+1]  += sum2;
2366     y[18*i+2]  += sum3;
2367     y[18*i+3]  += sum4;
2368     y[18*i+4]  += sum5;
2369     y[18*i+5]  += sum6;
2370     y[18*i+6]  += sum7;
2371     y[18*i+7]  += sum8;
2372     y[18*i+8]  += sum9;
2373     y[18*i+9]  += sum10;
2374     y[18*i+10] += sum11;
2375     y[18*i+11] += sum12;
2376     y[18*i+12] += sum13;
2377     y[18*i+13] += sum14;
2378     y[18*i+14] += sum15;
2379     y[18*i+15] += sum16;
2380     y[18*i+16] += sum17;
2381     y[18*i+17] += sum18;
2382   }
2383 
2384   ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr);
2385   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2386   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2387   PetscFunctionReturn(0);
2388 }
2389 
2390 #undef __FUNCT__
2391 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_18"
2392 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz)
2393 {
2394   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
2395   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
2396   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2397   PetscScalar    alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18;
2398   PetscErrorCode ierr;
2399   PetscInt       m = b->AIJ->rmap->n,n,i,*idx;
2400 
2401   PetscFunctionBegin;
2402   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2403   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2404   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2405   for (i=0; i<m; i++) {
2406     idx    = a->j + a->i[i] ;
2407     v      = a->a + a->i[i] ;
2408     n      = a->i[i+1] - a->i[i];
2409     alpha1 = x[18*i];
2410     alpha2 = x[18*i+1];
2411     alpha3 = x[18*i+2];
2412     alpha4 = x[18*i+3];
2413     alpha5 = x[18*i+4];
2414     alpha6 = x[18*i+5];
2415     alpha7 = x[18*i+6];
2416     alpha8 = x[18*i+7];
2417     alpha9  = x[18*i+8];
2418     alpha10 = x[18*i+9];
2419     alpha11 = x[18*i+10];
2420     alpha12 = x[18*i+11];
2421     alpha13 = x[18*i+12];
2422     alpha14 = x[18*i+13];
2423     alpha15 = x[18*i+14];
2424     alpha16 = x[18*i+15];
2425     alpha17 = x[18*i+16];
2426     alpha18 = x[18*i+17];
2427     while (n-->0) {
2428       y[18*(*idx)]   += alpha1*(*v);
2429       y[18*(*idx)+1] += alpha2*(*v);
2430       y[18*(*idx)+2] += alpha3*(*v);
2431       y[18*(*idx)+3] += alpha4*(*v);
2432       y[18*(*idx)+4] += alpha5*(*v);
2433       y[18*(*idx)+5] += alpha6*(*v);
2434       y[18*(*idx)+6] += alpha7*(*v);
2435       y[18*(*idx)+7] += alpha8*(*v);
2436       y[18*(*idx)+8]  += alpha9*(*v);
2437       y[18*(*idx)+9]  += alpha10*(*v);
2438       y[18*(*idx)+10] += alpha11*(*v);
2439       y[18*(*idx)+11] += alpha12*(*v);
2440       y[18*(*idx)+12] += alpha13*(*v);
2441       y[18*(*idx)+13] += alpha14*(*v);
2442       y[18*(*idx)+14] += alpha15*(*v);
2443       y[18*(*idx)+15] += alpha16*(*v);
2444       y[18*(*idx)+16] += alpha17*(*v);
2445       y[18*(*idx)+17] += alpha18*(*v);
2446       idx++; v++;
2447     }
2448   }
2449   ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr);
2450   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2451   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2452   PetscFunctionReturn(0);
2453 }
2454 
2455 /*===================================================================================*/
2456 #undef __FUNCT__
2457 #define __FUNCT__ "MatMult_MPIMAIJ_dof"
2458 PetscErrorCode MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2459 {
2460   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2461   PetscErrorCode ierr;
2462 
2463   PetscFunctionBegin;
2464   /* start the scatter */
2465   ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2466   ierr = (*b->AIJ->ops->mult)(b->AIJ,xx,yy);CHKERRQ(ierr);
2467   ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2468   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);CHKERRQ(ierr);
2469   PetscFunctionReturn(0);
2470 }
2471 
2472 #undef __FUNCT__
2473 #define __FUNCT__ "MatMultTranspose_MPIMAIJ_dof"
2474 PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2475 {
2476   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2477   PetscErrorCode ierr;
2478 
2479   PetscFunctionBegin;
2480   ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr);
2481   ierr = (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);CHKERRQ(ierr);
2482   ierr = VecScatterBegin(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2483   ierr = VecScatterEnd(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2484   PetscFunctionReturn(0);
2485 }
2486 
2487 #undef __FUNCT__
2488 #define __FUNCT__ "MatMultAdd_MPIMAIJ_dof"
2489 PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
2490 {
2491   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2492   PetscErrorCode ierr;
2493 
2494   PetscFunctionBegin;
2495   /* start the scatter */
2496   ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2497   ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr);
2498   ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2499   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr);
2500   PetscFunctionReturn(0);
2501 }
2502 
2503 #undef __FUNCT__
2504 #define __FUNCT__ "MatMultTransposeAdd_MPIMAIJ_dof"
2505 PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
2506 {
2507   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2508   PetscErrorCode ierr;
2509 
2510   PetscFunctionBegin;
2511   ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr);
2512   ierr = VecScatterBegin(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2513   ierr = (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr);
2514   ierr = VecScatterEnd(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2515   PetscFunctionReturn(0);
2516 }
2517 
2518 /* ----------------------------------------------------------------*/
2519 #undef __FUNCT__
2520 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqMAIJ"
2521 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C)
2522 {
2523   /* This routine requires testing -- but it's getting better. */
2524   PetscErrorCode     ierr;
2525   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
2526   Mat_SeqMAIJ        *pp=(Mat_SeqMAIJ*)PP->data;
2527   Mat                P=pp->AIJ;
2528   Mat_SeqAIJ         *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
2529   PetscInt           *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj;
2530   PetscInt           *ci,*cj,*ptadenserow,*ptasparserow,*denserow,*sparserow,*ptaj;
2531   PetscInt           an=A->cmap->N,am=A->rmap->N,pn=P->cmap->N,pm=P->rmap->N,ppdof=pp->dof,cn;
2532   PetscInt           i,j,k,dof,pshift,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi;
2533   MatScalar          *ca;
2534 
2535   PetscFunctionBegin;
2536   /* Start timer */
2537   ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr);
2538 
2539   /* Get ij structure of P^T */
2540   ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
2541 
2542   cn = pn*ppdof;
2543   /* Allocate ci array, arrays for fill computation and */
2544   /* free space for accumulating nonzero column info */
2545   ierr = PetscMalloc((cn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
2546   ci[0] = 0;
2547 
2548   /* Work arrays for rows of P^T*A */
2549   ierr = PetscMalloc((2*cn+2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr);
2550   ierr = PetscMemzero(ptadenserow,(2*cn+2*an+1)*sizeof(PetscInt));CHKERRQ(ierr);
2551   ptasparserow = ptadenserow  + an;
2552   denserow     = ptasparserow + an;
2553   sparserow    = denserow     + cn;
2554 
2555   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
2556   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
2557   /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */
2558   ierr          = PetscFreeSpaceGet((ai[am]/pm)*pn,&free_space);
2559   current_space = free_space;
2560 
2561   /* Determine symbolic info for each row of C: */
2562   for (i=0;i<pn;i++) {
2563     ptnzi  = pti[i+1] - pti[i];
2564     ptJ    = ptj + pti[i];
2565     for (dof=0;dof<ppdof;dof++) {
2566       ptanzi = 0;
2567       /* Determine symbolic row of PtA: */
2568       for (j=0;j<ptnzi;j++) {
2569         /* Expand ptJ[j] by block size and shift by dof to get the right row of A */
2570         arow = ptJ[j]*ppdof + dof;
2571         /* Nonzeros of P^T*A will be in same locations as any element of A in that row */
2572         anzj = ai[arow+1] - ai[arow];
2573         ajj  = aj + ai[arow];
2574         for (k=0;k<anzj;k++) {
2575           if (!ptadenserow[ajj[k]]) {
2576             ptadenserow[ajj[k]]    = -1;
2577             ptasparserow[ptanzi++] = ajj[k];
2578           }
2579         }
2580       }
2581       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
2582       ptaj = ptasparserow;
2583       cnzi   = 0;
2584       for (j=0;j<ptanzi;j++) {
2585         /* Get offset within block of P */
2586         pshift = *ptaj%ppdof;
2587         /* Get block row of P */
2588         prow = (*ptaj++)/ppdof; /* integer division */
2589         /* P has same number of nonzeros per row as the compressed form */
2590         pnzj = pi[prow+1] - pi[prow];
2591         pjj  = pj + pi[prow];
2592         for (k=0;k<pnzj;k++) {
2593           /* Locations in C are shifted by the offset within the block */
2594           /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */
2595           if (!denserow[pjj[k]*ppdof+pshift]) {
2596             denserow[pjj[k]*ppdof+pshift] = -1;
2597             sparserow[cnzi++]             = pjj[k]*ppdof+pshift;
2598           }
2599         }
2600       }
2601 
2602       /* sort sparserow */
2603       ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr);
2604 
2605       /* If free space is not available, make more free space */
2606       /* Double the amount of total space in the list */
2607       if (current_space->local_remaining<cnzi) {
2608         ierr = PetscFreeSpaceGet(cnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
2609       }
2610 
2611       /* Copy data into free space, and zero out denserows */
2612       ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));CHKERRQ(ierr);
2613       current_space->array           += cnzi;
2614       current_space->local_used      += cnzi;
2615       current_space->local_remaining -= cnzi;
2616 
2617       for (j=0;j<ptanzi;j++) {
2618         ptadenserow[ptasparserow[j]] = 0;
2619       }
2620       for (j=0;j<cnzi;j++) {
2621         denserow[sparserow[j]] = 0;
2622       }
2623       /* Aside: Perhaps we should save the pta info for the numerical factorization. */
2624       /*        For now, we will recompute what is needed. */
2625       ci[i*ppdof+1+dof] = ci[i*ppdof+dof] + cnzi;
2626     }
2627   }
2628   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
2629   /* Allocate space for cj, initialize cj, and */
2630   /* destroy list of free space and other temporary array(s) */
2631   ierr = PetscMalloc((ci[cn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr);
2632   ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
2633   ierr = PetscFree(ptadenserow);CHKERRQ(ierr);
2634 
2635   /* Allocate space for ca */
2636   ierr = PetscMalloc((ci[cn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
2637   ierr = PetscMemzero(ca,(ci[cn]+1)*sizeof(MatScalar));CHKERRQ(ierr);
2638 
2639   /* put together the new matrix */
2640   ierr = MatCreateSeqAIJWithArrays(((PetscObject)A)->comm,cn,cn,ci,cj,ca,C);CHKERRQ(ierr);
2641 
2642   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
2643   /* Since these are PETSc arrays, change flags to free them as necessary. */
2644   c          = (Mat_SeqAIJ *)((*C)->data);
2645   c->free_a  = PETSC_TRUE;
2646   c->free_ij = PETSC_TRUE;
2647   c->nonew   = 0;
2648 
2649   /* Clean up. */
2650   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
2651 
2652   ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr);
2653   PetscFunctionReturn(0);
2654 }
2655 
2656 #undef __FUNCT__
2657 #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqMAIJ"
2658 PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat C)
2659 {
2660   /* This routine requires testing -- first draft only */
2661   PetscErrorCode ierr;
2662   Mat_SeqMAIJ    *pp=(Mat_SeqMAIJ*)PP->data;
2663   Mat            P=pp->AIJ;
2664   Mat_SeqAIJ     *a  = (Mat_SeqAIJ *) A->data;
2665   Mat_SeqAIJ     *p  = (Mat_SeqAIJ *) P->data;
2666   Mat_SeqAIJ     *c  = (Mat_SeqAIJ *) C->data;
2667   PetscInt       *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj;
2668   PetscInt       *ci=c->i,*cj=c->j,*cjj;
2669   PetscInt       am=A->rmap->N,cn=C->cmap->N,cm=C->rmap->N,ppdof=pp->dof;
2670   PetscInt       i,j,k,pshift,poffset,anzi,pnzi,apnzj,nextap,pnzj,prow,crow;
2671   MatScalar      *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj;
2672 
2673   PetscFunctionBegin;
2674   /* Allocate temporary array for storage of one row of A*P */
2675   ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr);
2676   ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr);
2677 
2678   apj      = (PetscInt *)(apa + cn);
2679   apjdense = apj + cn;
2680 
2681   /* Clear old values in C */
2682   ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr);
2683 
2684   for (i=0;i<am;i++) {
2685     /* Form sparse row of A*P */
2686     anzi  = ai[i+1] - ai[i];
2687     apnzj = 0;
2688     for (j=0;j<anzi;j++) {
2689       /* Get offset within block of P */
2690       pshift = *aj%ppdof;
2691       /* Get block row of P */
2692       prow   = *aj++/ppdof; /* integer division */
2693       pnzj = pi[prow+1] - pi[prow];
2694       pjj  = pj + pi[prow];
2695       paj  = pa + pi[prow];
2696       for (k=0;k<pnzj;k++) {
2697         poffset = pjj[k]*ppdof+pshift;
2698         if (!apjdense[poffset]) {
2699           apjdense[poffset] = -1;
2700           apj[apnzj++]      = poffset;
2701         }
2702         apa[poffset] += (*aa)*paj[k];
2703       }
2704       ierr = PetscLogFlops(2.0*pnzj);CHKERRQ(ierr);
2705       aa++;
2706     }
2707 
2708     /* Sort the j index array for quick sparse axpy. */
2709     /* Note: a array does not need sorting as it is in dense storage locations. */
2710     ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr);
2711 
2712     /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
2713     prow    = i/ppdof; /* integer division */
2714     pshift  = i%ppdof;
2715     poffset = pi[prow];
2716     pnzi = pi[prow+1] - poffset;
2717     /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */
2718     pJ   = pj+poffset;
2719     pA   = pa+poffset;
2720     for (j=0;j<pnzi;j++) {
2721       crow   = (*pJ)*ppdof+pshift;
2722       cjj    = cj + ci[crow];
2723       caj    = ca + ci[crow];
2724       pJ++;
2725       /* Perform sparse axpy operation.  Note cjj includes apj. */
2726       for (k=0,nextap=0;nextap<apnzj;k++) {
2727         if (cjj[k]==apj[nextap]) {
2728           caj[k] += (*pA)*apa[apj[nextap++]];
2729         }
2730       }
2731       ierr = PetscLogFlops(2.0*apnzj);CHKERRQ(ierr);
2732       pA++;
2733     }
2734 
2735     /* Zero the current row info for A*P */
2736     for (j=0;j<apnzj;j++) {
2737       apa[apj[j]]      = 0.;
2738       apjdense[apj[j]] = 0;
2739     }
2740   }
2741 
2742   /* Assemble the final matrix and clean up */
2743   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2744   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2745   ierr = PetscFree(apa);CHKERRQ(ierr);
2746   PetscFunctionReturn(0);
2747 }
2748 
2749 #undef __FUNCT__
2750 #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_MPIMAIJ"
2751 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C)
2752 {
2753   PetscErrorCode    ierr;
2754 
2755   PetscFunctionBegin;
2756   /* MatPtAPSymbolic_MPIAIJ_MPIMAIJ() is not implemented yet. Convert PP to mpiaij format */
2757   ierr = MatConvert(PP,MATMPIAIJ,MAT_REUSE_MATRIX,&PP);CHKERRQ(ierr);
2758   ierr =(*PP->ops->ptapsymbolic)(A,PP,fill,C);CHKERRQ(ierr);
2759   PetscFunctionReturn(0);
2760 }
2761 
2762 #undef __FUNCT__
2763 #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_MPIMAIJ"
2764 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ(Mat A,Mat PP,Mat C)
2765 {
2766   PetscFunctionBegin;
2767   SETERRQ(PETSC_ERR_SUP,"MatPtAPNumeric is not implemented for MPIMAIJ matrix yet");
2768   PetscFunctionReturn(0);
2769 }
2770 
2771 EXTERN_C_BEGIN
2772 #undef __FUNCT__
2773 #define __FUNCT__ "MatConvert_SeqMAIJ_SeqAIJ"
2774 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
2775 {
2776   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2777   Mat               a = b->AIJ,B;
2778   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)a->data;
2779   PetscErrorCode    ierr;
2780   PetscInt          m,n,i,ncols,*ilen,nmax = 0,*icols,j,k,ii,dof = b->dof;
2781   PetscInt          *cols;
2782   PetscScalar       *vals;
2783 
2784   PetscFunctionBegin;
2785   ierr = MatGetSize(a,&m,&n);CHKERRQ(ierr);
2786   ierr = PetscMalloc(dof*m*sizeof(PetscInt),&ilen);CHKERRQ(ierr);
2787   for (i=0; i<m; i++) {
2788     nmax = PetscMax(nmax,aij->ilen[i]);
2789     for (j=0; j<dof; j++) {
2790       ilen[dof*i+j] = aij->ilen[i];
2791     }
2792   }
2793   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,dof*m,dof*n,0,ilen,&B);CHKERRQ(ierr);
2794   ierr = PetscFree(ilen);CHKERRQ(ierr);
2795   ierr = PetscMalloc(nmax*sizeof(PetscInt),&icols);CHKERRQ(ierr);
2796   ii   = 0;
2797   for (i=0; i<m; i++) {
2798     ierr = MatGetRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr);
2799     for (j=0; j<dof; j++) {
2800       for (k=0; k<ncols; k++) {
2801         icols[k] = dof*cols[k]+j;
2802       }
2803       ierr = MatSetValues_SeqAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr);
2804       ii++;
2805     }
2806     ierr = MatRestoreRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr);
2807   }
2808   ierr = PetscFree(icols);CHKERRQ(ierr);
2809   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2810   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2811 
2812   if (reuse == MAT_REUSE_MATRIX) {
2813     ierr = MatHeaderReplace(A,B);CHKERRQ(ierr);
2814   } else {
2815     *newmat = B;
2816   }
2817   PetscFunctionReturn(0);
2818 }
2819 EXTERN_C_END
2820 
2821 #include "../src/mat/impls/aij/mpi/mpiaij.h"
2822 
2823 EXTERN_C_BEGIN
2824 #undef __FUNCT__
2825 #define __FUNCT__ "MatConvert_MPIMAIJ_MPIAIJ"
2826 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
2827 {
2828   Mat_MPIMAIJ       *maij = (Mat_MPIMAIJ*)A->data;
2829   Mat               MatAIJ  = ((Mat_SeqMAIJ*)maij->AIJ->data)->AIJ,B;
2830   Mat               MatOAIJ = ((Mat_SeqMAIJ*)maij->OAIJ->data)->AIJ;
2831   Mat_SeqAIJ        *AIJ = (Mat_SeqAIJ*) MatAIJ->data;
2832   Mat_SeqAIJ        *OAIJ =(Mat_SeqAIJ*) MatOAIJ->data;
2833   Mat_MPIAIJ        *mpiaij = (Mat_MPIAIJ*) maij->A->data;
2834   PetscInt          dof = maij->dof,i,j,*dnz = PETSC_NULL,*onz = PETSC_NULL,nmax = 0,onmax = 0;
2835   PetscInt          *oicols = PETSC_NULL,*icols = PETSC_NULL,ncols,*cols = PETSC_NULL,oncols,*ocols = PETSC_NULL;
2836   PetscInt          rstart,cstart,*garray,ii,k;
2837   PetscErrorCode    ierr;
2838   PetscScalar       *vals,*ovals;
2839 
2840   PetscFunctionBegin;
2841   ierr = PetscMalloc2(A->rmap->n,PetscInt,&dnz,A->rmap->n,PetscInt,&onz);CHKERRQ(ierr);
2842   for (i=0; i<A->rmap->n/dof; i++) {
2843     nmax  = PetscMax(nmax,AIJ->ilen[i]);
2844     onmax = PetscMax(onmax,OAIJ->ilen[i]);
2845     for (j=0; j<dof; j++) {
2846       dnz[dof*i+j] = AIJ->ilen[i];
2847       onz[dof*i+j] = OAIJ->ilen[i];
2848     }
2849   }
2850   ierr = MatCreateMPIAIJ(((PetscObject)A)->comm,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,dnz,0,onz,&B);CHKERRQ(ierr);
2851   ierr = PetscFree2(dnz,onz);CHKERRQ(ierr);
2852 
2853   ierr   = PetscMalloc2(nmax,PetscInt,&icols,onmax,PetscInt,&oicols);CHKERRQ(ierr);
2854   rstart = dof*maij->A->rmap->rstart;
2855   cstart = dof*maij->A->cmap->rstart;
2856   garray = mpiaij->garray;
2857 
2858   ii = rstart;
2859   for (i=0; i<A->rmap->n/dof; i++) {
2860     ierr = MatGetRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr);
2861     ierr = MatGetRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr);
2862     for (j=0; j<dof; j++) {
2863       for (k=0; k<ncols; k++) {
2864         icols[k] = cstart + dof*cols[k]+j;
2865       }
2866       for (k=0; k<oncols; k++) {
2867         oicols[k] = dof*garray[ocols[k]]+j;
2868       }
2869       ierr = MatSetValues_MPIAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr);
2870       ierr = MatSetValues_MPIAIJ(B,1,&ii,oncols,oicols,ovals,INSERT_VALUES);CHKERRQ(ierr);
2871       ii++;
2872     }
2873     ierr = MatRestoreRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr);
2874     ierr = MatRestoreRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr);
2875   }
2876   ierr = PetscFree2(icols,oicols);CHKERRQ(ierr);
2877 
2878   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2879   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2880 
2881   if (reuse == MAT_REUSE_MATRIX) {
2882     PetscInt refct = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */
2883     ((PetscObject)A)->refct = 1;
2884     ierr = MatHeaderReplace(A,B);CHKERRQ(ierr);
2885     ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */
2886   } else {
2887     *newmat = B;
2888   }
2889   PetscFunctionReturn(0);
2890 }
2891 EXTERN_C_END
2892 
2893 
2894 /* ---------------------------------------------------------------------------------- */
2895 #undef __FUNCT__
2896 #define __FUNCT__ "MatCreateMAIJ"
2897 /*@C
2898   MatCreateMAIJ - Creates a matrix type providing restriction and interpolation
2899   operations for multicomponent problems.  It interpolates each component the same
2900   way independently.  The matrix type is based on MATSEQAIJ for sequential matrices,
2901   and MATMPIAIJ for distributed matrices.
2902 
2903   Collective
2904 
2905   Input Parameters:
2906 + A - the AIJ matrix describing the action on blocks
2907 - dof - the block size (number of components per node)
2908 
2909   Output Parameter:
2910 . maij - the new MAIJ matrix
2911 
2912   Operations provided:
2913 + MatMult
2914 . MatMultTranspose
2915 . MatMultAdd
2916 . MatMultTransposeAdd
2917 - MatView
2918 
2919   Level: advanced
2920 
2921 .seealso: MatMAIJGetAIJ(), MatMAIJRedimension()
2922 @*/
2923 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMAIJ(Mat A,PetscInt dof,Mat *maij)
2924 {
2925   PetscErrorCode ierr;
2926   PetscMPIInt    size;
2927   PetscInt       n;
2928   Mat_MPIMAIJ    *b;
2929   Mat            B;
2930 
2931   PetscFunctionBegin;
2932   ierr   = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
2933 
2934   if (dof == 1) {
2935     *maij = A;
2936   } else {
2937     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
2938     ierr = MatSetSizes(B,dof*A->rmap->n,dof*A->cmap->n,dof*A->rmap->N,dof*A->cmap->N);CHKERRQ(ierr);
2939     B->assembled    = PETSC_TRUE;
2940 
2941     ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
2942     if (size == 1) {
2943       ierr = MatSetType(B,MATSEQMAIJ);CHKERRQ(ierr);
2944       B->ops->destroy = MatDestroy_SeqMAIJ;
2945       B->ops->view    = MatView_SeqMAIJ;
2946       b      = (Mat_MPIMAIJ*)B->data;
2947       b->dof = dof;
2948       b->AIJ = A;
2949       if (dof == 2) {
2950         B->ops->mult             = MatMult_SeqMAIJ_2;
2951         B->ops->multadd          = MatMultAdd_SeqMAIJ_2;
2952         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_2;
2953         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2;
2954       } else if (dof == 3) {
2955         B->ops->mult             = MatMult_SeqMAIJ_3;
2956         B->ops->multadd          = MatMultAdd_SeqMAIJ_3;
2957         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_3;
2958         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3;
2959       } else if (dof == 4) {
2960         B->ops->mult             = MatMult_SeqMAIJ_4;
2961         B->ops->multadd          = MatMultAdd_SeqMAIJ_4;
2962         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_4;
2963         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4;
2964       } else if (dof == 5) {
2965         B->ops->mult             = MatMult_SeqMAIJ_5;
2966         B->ops->multadd          = MatMultAdd_SeqMAIJ_5;
2967         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_5;
2968         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5;
2969       } else if (dof == 6) {
2970         B->ops->mult             = MatMult_SeqMAIJ_6;
2971         B->ops->multadd          = MatMultAdd_SeqMAIJ_6;
2972         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_6;
2973         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6;
2974       } else if (dof == 7) {
2975         B->ops->mult             = MatMult_SeqMAIJ_7;
2976         B->ops->multadd          = MatMultAdd_SeqMAIJ_7;
2977         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_7;
2978         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7;
2979       } else if (dof == 8) {
2980         B->ops->mult             = MatMult_SeqMAIJ_8;
2981         B->ops->multadd          = MatMultAdd_SeqMAIJ_8;
2982         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_8;
2983         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8;
2984       } else if (dof == 9) {
2985         B->ops->mult             = MatMult_SeqMAIJ_9;
2986         B->ops->multadd          = MatMultAdd_SeqMAIJ_9;
2987         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_9;
2988         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9;
2989       } else if (dof == 10) {
2990         B->ops->mult             = MatMult_SeqMAIJ_10;
2991         B->ops->multadd          = MatMultAdd_SeqMAIJ_10;
2992         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_10;
2993         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10;
2994       } else if (dof == 16) {
2995         B->ops->mult             = MatMult_SeqMAIJ_16;
2996         B->ops->multadd          = MatMultAdd_SeqMAIJ_16;
2997         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_16;
2998         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16;
2999       } else if (dof == 18) {
3000         B->ops->mult             = MatMult_SeqMAIJ_18;
3001         B->ops->multadd          = MatMultAdd_SeqMAIJ_18;
3002         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_18;
3003         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18;
3004       } else {
3005         SETERRQ1(PETSC_ERR_SUP,"Cannot handle a dof of %D. Send request for code to petsc-maint@mcs.anl.gov\n",dof);
3006       }
3007       B->ops->ptapsymbolic_seqaij = MatPtAPSymbolic_SeqAIJ_SeqMAIJ;
3008       B->ops->ptapnumeric_seqaij  = MatPtAPNumeric_SeqAIJ_SeqMAIJ;
3009       ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqmaij_seqaij_C","MatConvert_SeqMAIJ_SeqAIJ",MatConvert_SeqMAIJ_SeqAIJ);CHKERRQ(ierr);
3010     } else {
3011       Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data;
3012       IS         from,to;
3013       Vec        gvec;
3014       PetscInt   *garray,i;
3015 
3016       ierr = MatSetType(B,MATMPIMAIJ);CHKERRQ(ierr);
3017       B->ops->destroy = MatDestroy_MPIMAIJ;
3018       B->ops->view    = MatView_MPIMAIJ;
3019       b      = (Mat_MPIMAIJ*)B->data;
3020       b->dof = dof;
3021       b->A   = A;
3022       ierr = MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);CHKERRQ(ierr);
3023       ierr = MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);CHKERRQ(ierr);
3024 
3025       ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr);
3026       ierr = VecCreateSeq(PETSC_COMM_SELF,n*dof,&b->w);CHKERRQ(ierr);
3027       ierr = VecSetBlockSize(b->w,dof);CHKERRQ(ierr);
3028 
3029       /* create two temporary Index sets for build scatter gather */
3030       ierr = PetscMalloc((n+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr);
3031       for (i=0; i<n; i++) garray[i] = dof*mpiaij->garray[i];
3032       ierr = ISCreateBlock(((PetscObject)A)->comm,dof,n,garray,&from);CHKERRQ(ierr);
3033       ierr = PetscFree(garray);CHKERRQ(ierr);
3034       ierr = ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);CHKERRQ(ierr);
3035 
3036       /* create temporary global vector to generate scatter context */
3037       ierr = VecCreateMPIWithArray(((PetscObject)A)->comm,dof*A->cmap->n,dof*A->cmap->N,PETSC_NULL,&gvec);CHKERRQ(ierr);
3038       ierr = VecSetBlockSize(gvec,dof);CHKERRQ(ierr);
3039 
3040       /* generate the scatter context */
3041       ierr = VecScatterCreate(gvec,from,b->w,to,&b->ctx);CHKERRQ(ierr);
3042 
3043       ierr = ISDestroy(from);CHKERRQ(ierr);
3044       ierr = ISDestroy(to);CHKERRQ(ierr);
3045       ierr = VecDestroy(gvec);CHKERRQ(ierr);
3046 
3047       B->ops->mult             = MatMult_MPIMAIJ_dof;
3048       B->ops->multtranspose    = MatMultTranspose_MPIMAIJ_dof;
3049       B->ops->multadd          = MatMultAdd_MPIMAIJ_dof;
3050       B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof;
3051       B->ops->ptapsymbolic_mpiaij = MatPtAPSymbolic_MPIAIJ_MPIMAIJ;
3052       B->ops->ptapnumeric_mpiaij  = MatPtAPNumeric_MPIAIJ_MPIMAIJ;
3053       ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpimaij_mpiaij_C","MatConvert_MPIMAIJ_MPIAIJ",MatConvert_MPIMAIJ_MPIAIJ);CHKERRQ(ierr);
3054     }
3055     *maij = B;
3056     ierr = MatView_Private(B);CHKERRQ(ierr);
3057   }
3058   PetscFunctionReturn(0);
3059 }
3060